diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 05:18:23 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 05:18:23 +0000 |
commit | 95596860196fd998a07724598c875186cb18e708 (patch) | |
tree | f699330c2958363bba7dd027f7bba2c1b9de3a55 | |
parent | 0cb2a584727d043ff12da67f185e21b9a852c731 (diff) | |
parent | 1aa43263531a06caef740a97373eead33b2ac2f7 (diff) | |
download | vixl-android14-mainline-uwb-release.tar.gz |
Snap for 10453563 from 1aa43263531a06caef740a97373eead33b2ac2f7 to mainline-uwb-releaseaml_uwb_341513070aml_uwb_341511050aml_uwb_341310300aml_uwb_341310030aml_uwb_341111010aml_uwb_341011000android14-mainline-uwb-release
Change-Id: I7f86314b52728f41a23bf22c4e7178fc36e1396c
87 files changed, 59845 insertions, 15468 deletions
diff --git a/.clang-tidy b/.clang-tidy index 57feba42..256ea716 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -28,6 +28,5 @@ Checks: '-clang-analyzer-security.insecureAPI.rand,google-*,-google-readability-todo,-google-readability-function-size,-google-build-using-namespace,-google-explicit-constructor,-google-readability-braces-around-statements,-google-readability-namespace-comments,-google-readability-casting' HeaderFilterRegex: '\.h$' AnalyzeTemporaryDtors: false -CheckOptions: ... @@ -2,3 +2,4 @@ host=review.linaro.org port=29418 project=arm/vixl +defaultbranch=sve2 @@ -72,10 +72,10 @@ license { cc_defaults { name: "vixl-common", host_supported: true, - clang_cflags: ["-Wimplicit-fallthrough"], cflags: [ "-Wall", "-Werror", + "-Wimplicit-fallthrough", ], cppflags: [ "-DVIXL_GENERATE_SIMULATOR_INSTRUCTIONS_VALUE=0", @@ -163,6 +163,17 @@ cc_defaults { srcs: ["src/*.cc"], export_include_dirs: ["src"], min_sdk_version: "S", + + static: { + cflags: [ + "-fvisibility=hidden", + ], + }, + shared: { + cflags: [ + "-fvisibility=protected", + ], + }, } art_cc_library { @@ -188,17 +199,6 @@ art_cc_library { "com.android.art", "com.android.art.debug", ], - - static: { - cflags: [ - "-fvisibility=hidden", - ], - }, - shared: { - cflags: [ - "-fvisibility=protected", - ], - }, } art_cc_library { @@ -233,6 +233,9 @@ cc_test_host { local_include_dirs: [ "test", ], + exclude_srcs: [ + "test/test-donkey.cc" + ], srcs: [ "test/*.cc", "test/aarch32/*.cc", @@ -1,5 +1,5 @@ -VIXL: ARMv8 Runtime Code Generation Library, Development Version -================================================================ +VIXL: ARMv8 Runtime Code Generation Library +=========================================== Contents: @@ -24,10 +24,14 @@ VIXL contains three components. assembler. The simulator allows generated code to be run on another architecture without the need for a full ISA model. -The VIXL git repository can be found [on 'https://git.linaro.org'][vixl]. +The VIXL git repository can be found [on GitHub][vixl]. -Changes from previous versions of VIXL can be found in the -[Changelog](doc/changelog.md). +Build and Test Status +--------------------- + + * [![Build Status](https://ci.linaro.org/buildStatus/icon?job=linaro-art-vixlpresubmit)](https://ci.linaro.org/job/linaro-art-vixlpresubmit/) Simulator + * [![Build Status](https://ci.linaro.org/buildStatus/icon?job=linaro-art-vixlpresubmit-native-armv8)](https://ci.linaro.org/job/linaro-art-vixlpresubmit-native-armv8/) Native + * [![Build Status](https://ci.linaro.org/buildStatus/icon?job=linaro-art-vixlpresubmit-macos)](https://ci.linaro.org/job/linaro-art-vixlpresubmit-macos/) MacOS Licence @@ -36,6 +40,8 @@ Licence This software is covered by the licence described in the [LICENCE](LICENCE) file. +Contributions, as pull requests or via other means, are accepted under the terms +of the same [LICENCE](LICENCE). Requirements ============ @@ -63,6 +69,41 @@ Refer to the 'Usage' section for details. Note that in Ubuntu 18.04, clang-tidy-4.0 will only work if the clang-4.0 package is also installed. +Supported Arm Architecture Features +=================================== + +| Feature | VIXL CPUFeatures Flag | Notes | +|------------|-----------------------|---------------------------------| +| BTI | kBTI | Per-page enabling not supported | +| DotProd | kDotProduct | | +| FCMA | kFcma | | +| FHM | kFHM | | +| FP16 | kFPHalf, kNEONHalf | | +| FRINTTS | kFrintToFixedSizedInt | | +| FlagM | kFlagM | | +| FlagM2 | kAXFlag | | +| I8MM | kI8MM | | +| JSCVT | kJSCVT | | +| LOR | kLORegions | | +| LRCPC | kRCpc | | +| LRCPC2 | kRCpcImm | | +| LSE | kAtomics | | +| PAuth | kPAuth, kPAuthGeneric | Not ERETAA, ERETAB | +| RAS | kRAS | | +| RDM | kRDM | | +| SVE | kSVE | | +| SVE2 | kSVE2 | | +| SVEBitPerm | kSVEBitPerm | | +| SVEF32MM | kSVEF32MM | | +| SVEF64MM | kSVEF64MM | | +| SVEI8MM | kSVEI8MM | | + +Enable generating code for an architecture feature by combining a flag with +the MacroAssembler's defaults. For example, to generate code for SVE, use +`masm.GetCPUFeatures()->Combine(CPUFeatures::kSVE);`. + +See [the cpu features header file](src/cpu-features.h) for more information. + Known Limitations ================= @@ -161,9 +202,9 @@ selection. Bug reports =========== -Bug reports may be sent to vixl@arm.com. Please provide any steps required to -recreate a bug, along with build environment and host system information. - +Bug reports may be made in the Issues section of GitHub, or sent to +vixl@arm.com. Please provide any steps required to recreate a bug, along with +build environment and host system information. Usage ===== @@ -213,11 +254,11 @@ aarch32_examples` or `scons aarch64_examples` from the root directory, or use -[cpplint]: http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py +[cpplint]: https://github.com/google/styleguide/tree/gh-pages/cpplint "Google's cpplint.py script." -[vixl]: https://git.linaro.org/arm/vixl.git - "The VIXL repository at 'https://git.linaro.org'." +[vixl]: https://github.com/Linaro/vixl + "The VIXL repository on GitHub." [getting-started-aarch32]: doc/aarch32/getting-started-aarch32.md "Introduction to VIXL for AArch32." @@ -84,6 +84,7 @@ options = { '-pedantic', '-Wwrite-strings', '-Wunused', + '-Wshadow', '-Wno-missing-noreturn'], 'CPPPATH' : [config.dir_src_vixl] }, @@ -115,6 +116,10 @@ options = { 'ubsan:on' : { 'CCFLAGS': ['-fsanitize=undefined'], 'LINKFLAGS': ['-fsanitize=undefined'] + }, + 'coverage:on' : { + 'CCFLAGS': ['-fprofile-instr-generate', '-fcoverage-mapping'], + 'LINKFLAGS': ['-fprofile-instr-generate', '-fcoverage-mapping'] } } @@ -255,6 +260,8 @@ vars.AddVariables( 'release', allowed_values=config.build_options_modes), EnumVariable('ubsan', 'Enable undefined behavior checks', 'off', allowed_values=['on', 'off']), + EnumVariable('coverage', 'Enable code coverage measurement', + 'off', allowed_values=['on', 'off']), EnumVariable('negative_testing', 'Enable negative testing (needs exceptions)', 'off', allowed_values=['on', 'off']), @@ -482,7 +489,7 @@ top_level_targets.Add('', 'Build the VIXL library.') # Common test code. test_build_dir = PrepareVariantDir('test', TargetBuildDir(env)) -test_objects = [env.Object(Glob(join(test_build_dir, '*.cc')))] +test_objects = [env.Object(Glob(join(test_build_dir, '*.cc'), exclude=join(test_build_dir, 'test-donkey.cc')))] # AArch32 support if CanTargetAArch32(env): @@ -564,6 +571,15 @@ if CanTargetAArch64(env): CPPPATH = env['CPPPATH'] + [config.dir_aarch64_examples] + [config.dir_tests]) test_objects.append(test_aarch64_examples_obj) + # The simulator test generator. + donkey_objects = [] + donkey_objects.append(env.Object( + [join(test_build_dir, 'test-donkey.cc'), join(test_aarch64_build_dir, 'test-utils-aarch64.cc')], + CPPPATH = env['CPPPATH'] + [config.dir_tests], + CCFLAGS = [flag for flag in env['CCFLAGS'] if flag != '-O3'])) + donkey = env.Program(join(test_build_dir, 'test-donkey'), donkey_objects, LIBS=[libvixl]) + env.Alias('tests', donkey) + test = env.Program(join(test_build_dir, 'test-runner'), test_objects, LIBS=[libvixl]) env.Alias('tests', test) diff --git a/doc/aarch64/supported-instructions-aarch64.md b/doc/aarch64/supported-instructions-aarch64.md index 1c16eb2b..5919354f 100644 --- a/doc/aarch64/supported-instructions-aarch64.md +++ b/doc/aarch64/supported-instructions-aarch64.md @@ -6,9 +6,25 @@ disassembler and simulator. The simulator may not support all floating point operations to the precision required by AArch64 - please check the simulator source code for details. +#### AAch64 integer instructions #### +[a](#integer-a) [b](#integer-b) [c](#integer-c) [d](#integer-d) [e](#integer-e) [h](#integer-h) [i](#integer-i) [l](#integer-l) [m](#integer-m) [n](#integer-n) [o](#integer-o) [p](#integer-p) [r](#integer-r) [s](#integer-s) [t](#integer-t) [u](#integer-u) [x](#integer-x) + +#### AArch64 floating point and NEON instructions #### +[a](#float-a) [b](#float-b) [c](#float-c) [d](#float-d) [e](#float-e) [f](#float-f) [i](#float-i) [l](#float-l) [m](#float-m) [n](#float-n) [o](#float-o) [p](#float-p) [r](#float-r) [s](#float-s) [t](#float-t) [u](#float-u) [x](#float-x) [z](#float-z) + +#### AArch64 Scalable Vector Extension (SVE) instructions #### +[a](#sve-a) [b](#sve-b) [c](#sve-c) [d](#sve-d) [e](#sve-e) [f](#sve-f) [h](#sve-h) [i](#sve-i) [l](#sve-l) [m](#sve-m) [n](#sve-n) [o](#sve-o) [p](#sve-p) [r](#sve-r) [s](#sve-s) [t](#sve-t) [u](#sve-u) [w](#sve-w) [x](#sve-x) [z](#sve-z) + +#### Additional or pseudo instructions #### +[b](#pseudo-b) [d](#pseudo-d) [p](#pseudo-p) + +___ + AArch64 integer instructions ---------------------------- +<a id="integer-a"> + ### ADC ### Add with carry bit. @@ -100,27 +116,6 @@ Authenticate Data address, using key A _(Armv8.3)_. void autda(const Register& xd, const Register& xn) -### AUTDA1716 ### - -Authenticate Data address, using key A, with address in x17 and modifier in x16 _(Armv8.3)_. - - void autda1716() - - -### AUTDASP ### - -Authenticate Data address, using key A, with address in LR and modifier in SP _(Armv8.3)_. - - void autdasp() - - -### AUTDAZ ### - -Authenticate Data address, using key A, with address in LR and a modifier of zero _(Armv8.3)_. - - void autdaz() - - ### AUTDB ### Authenticate Data address, using key B _(Armv8.3)_. @@ -128,27 +123,6 @@ Authenticate Data address, using key B _(Armv8.3)_. void autdb(const Register& xd, const Register& xn) -### AUTDB1716 ### - -Authenticate Data address, using key B, with address in x17 and modifier in x16 _(Armv8.3)_. - - void autdb1716() - - -### AUTDBSP ### - -Authenticate Data address, using key B, with address in LR and modifier in SP _(Armv8.3)_. - - void autdbsp() - - -### AUTDBZ ### - -Authenticate Data address, using key B, with address in LR and a modifier of zero _(Armv8.3)_. - - void autdbz() - - ### AUTDZA ### Authenticate Data address, using key A and a modifier of zero _(Armv8.3)_. @@ -240,6 +214,8 @@ Convert floating-point condition flags from Arm format to alternative format _(A void axflag() +<a id="integer-b"> + ### B ### Conditional branch to PC offset. @@ -417,6 +393,8 @@ Branch target identification. void bti(BranchTargetIdentifier id) +<a id="integer-c"> + ### CAS ### Compare and Swap word or doubleword in memory _(Armv8.1)_. @@ -773,6 +751,8 @@ Conditional select negation: rd = cond ? rn : -rm. Condition cond) +<a id="integer-d"> + ### DC ### System data cache operation. @@ -794,6 +774,8 @@ Data synchronization barrier. void dsb(BarrierDomain domain, BarrierType type) +<a id="integer-e"> + ### EON ### Bitwise enor/xnor (A ^ ~B). @@ -825,6 +807,8 @@ Extract. unsigned lsb) +<a id="integer-h"> + ### HINT ### System hint (named type). @@ -846,6 +830,8 @@ Halting debug-mode breakpoint. void hlt(int code) +<a id="integer-i"> + ### IC ### System instruction cache operation. @@ -860,6 +846,8 @@ Instruction synchronization barrier. void isb() +<a id="integer-l"> + ### LDADD ### Atomic add on word or doubleword in memory _(Armv8.1)_ @@ -1896,6 +1884,8 @@ Logical shift right by variable. void lsrv(const Register& rd, const Register& rn, const Register& rm) +<a id="integer-m"> + ### MADD ### Multiply and accumulate. @@ -1915,6 +1905,13 @@ Negated multiply. ### MOV ### +Move immediate, aliases for movz, movn, orr. + + void mov(const Register& rd, uint64_t imm) + + +### MOV ### + Move register to register. void mov(const Register& rd, const Register& rn) @@ -1979,6 +1976,8 @@ Move inverted operand to register. void mvn(const Register& rd, const Operand& operand) +<a id="integer-n"> + ### NEG ### Negate. @@ -2014,6 +2013,8 @@ No-op. void nop() +<a id="integer-o"> + ### ORN ### Bitwise nor (A | ~B). @@ -2028,6 +2029,8 @@ Bitwise or (A | B). void orr(const Register& rd, const Register& rn, const Operand& operand) +<a id="integer-p"> + ### PACDA ### Pointer Authentication Code for Data address, using key A _(Armv8.3)_. @@ -2035,27 +2038,6 @@ Pointer Authentication Code for Data address, using key A _(Armv8.3)_. void pacda(const Register& xd, const Register& xn) -### PACDA1716 ### - -Pointer Authentication Code for Data address, using key A, with address in x17 and modifier in x16 _(Armv8.3)_. - - void pacda1716() - - -### PACDASP ### - -Pointer Authentication Code for Data address, using key A, with address in LR and modifier in SP _(Armv8.3)_. - - void pacdasp() - - -### PACDAZ ### - -Pointer Authentication Code for Data address, using key A, with address in LR and a modifier of zero _(Armv8.3)_. - - void pacdaz() - - ### PACDB ### Pointer Authentication Code for Data address, using key B _(Armv8.3)_. @@ -2063,27 +2045,6 @@ Pointer Authentication Code for Data address, using key B _(Armv8.3)_. void pacdb(const Register& xd, const Register& xn) -### PACDB1716 ### - -Pointer Authentication Code for Data address, using key B, with address in x17 and modifier in x16 _(Armv8.3)_. - - void pacdb1716() - - -### PACDBSP ### - -Pointer Authentication Code for Data address, using key B, with address in LR and modifier in SP _(Armv8.3)_. - - void pacdbsp() - - -### PACDBZ ### - -Pointer Authentication Code for Data address, using key B, with address in LR and a modifier of zero _(Armv8.3)_. - - void pacdbz() - - ### PACDZA ### Pointer Authentication Code for Data address, using key A and a modifier of zero _(Armv8.3)_. @@ -2177,6 +2138,13 @@ Pointer Authentication Code for Instruction address, using key B and a modifier ### PRFM ### +Prefetch from pc + imm19 << 2 (allowing unallocated hints). + + void prfm(int op, int64_t imm19) + + +### PRFM ### + Prefetch from pc + imm19 << 2. void prfm(PrefetchOperation op, int64_t imm19) @@ -2184,6 +2152,22 @@ Prefetch from pc + imm19 << 2. ### PRFM ### +Prefetch memory (allowing unallocated hints). + + void prfm(int op, + const MemOperand& addr, + LoadStoreScalingOption option = PreferScaledOffset) + + +### PRFM ### + +Prefetch memory in the literal pool (allowing unallocated hints). + + void prfm(int op, RawLiteral* literal) + + +### PRFM ### + Prefetch memory in the literal pool. void prfm(PrefetchOperation op, RawLiteral* literal) @@ -2207,6 +2191,17 @@ Prefetch memory (with unscaled offset). LoadStoreScalingOption option = PreferUnscaledOffset) +### PRFUM ### + +Prefetch memory (with unscaled offset, allowing unallocated hints). + + void prfum(int op, + const MemOperand& addr, + LoadStoreScalingOption option = PreferUnscaledOffset) + + +<a id="integer-r"> + ### RBIT ### Bit reverse. @@ -2284,6 +2279,8 @@ Rotate right by variable. void rorv(const Register& rd, const Register& rn, const Register& rm) +<a id="integer-s"> + ### SBC ### Subtract with carry bit. @@ -3056,6 +3053,8 @@ System instruction. void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr) +<a id="integer-t"> + ### TBNZ ### Test bit and branch to PC offset if not zero. @@ -3091,6 +3090,8 @@ Bit test and set flags. void tst(const Register& rn, const Operand& operand) +<a id="integer-u"> + ### UBFIZ ### Unsigned bitfield insert with zero at right. @@ -3121,6 +3122,13 @@ Unsigned bitfield extract. unsigned width) +### UDF ### + +Generate undefined instruction exception. + + void udf(int code) + + ### UDIV ### Unsigned integer divide. @@ -3183,6 +3191,8 @@ Unsigned extend word. void uxtw(const Register& rd, const Register& rn) +<a id="integer-x"> + ### XAFLAG ### Convert floating-point condition flags from alternative format to Arm format _(Armv8.5)_. @@ -3215,6 +3225,8 @@ Strip Pointer Authentication Code of Instruction address in LR _(Armv8.3)_. AArch64 floating point and NEON instructions -------------------------------------------- +<a id="float-a"> + ### ABS ### Absolute value. @@ -3271,6 +3283,8 @@ Bitwise and. void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm) +<a id="float-b"> + ### BIC ### Bit clear immediate. @@ -3306,6 +3320,8 @@ Bitwise select. void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm) +<a id="float-c"> + ### CLS ### Count leading sign bits. @@ -3404,6 +3420,8 @@ Population count per byte. void cnt(const VRegister& vd, const VRegister& vn) +<a id="float-d"> + ### DUP ### Duplicate general-purpose register to vector. @@ -3418,6 +3436,8 @@ Duplicate vector element to vector or scalar. void dup(const VRegister& vd, const VRegister& vn, int vn_index) +<a id="float-e"> + ### EOR ### Bitwise eor. @@ -3435,6 +3455,8 @@ Extract vector from pair of vectors. int index) +<a id="float-f"> + ### FABD ### FP absolute difference. @@ -4211,6 +4233,34 @@ FP reciprocal exponent scalar. void frecpx(const VRegister& vd, const VRegister& vn) +### FRINT32X ### + +FP round to 32-bit integer, exact, implicit rounding _(Armv8.5)_. + + void frint32x(const VRegister& vd, const VRegister& vn) + + +### FRINT32Z ### + +FP round to 32-bit integer, towards zero _(Armv8.5)_. + + void frint32z(const VRegister& vd, const VRegister& vn) + + +### FRINT64X ### + +FP round to 64-bit integer, exact, implicit rounding _(Armv8.5)_. + + void frint64x(const VRegister& vd, const VRegister& vn) + + +### FRINT64Z ### + +FP round to 64-bit integer, towards zero _(Armv8.5)_. + + void frint64z(const VRegister& vd, const VRegister& vn) + + ### FRINTA ### FP round to integer, nearest with ties to away. @@ -4288,6 +4338,8 @@ FP subtract. void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm) +<a id="float-i"> + ### INS ### Insert vector element from another vector element. @@ -4305,6 +4357,8 @@ Insert vector element from general-purpose register. void ins(const VRegister& vd, int vd_index, const Register& rn) +<a id="float-l"> + ### LD1 ### One-element single structure load to one lane. @@ -4443,6 +4497,8 @@ Four-element single structure load to all lanes. const MemOperand& src) +<a id="float-m"> + ### MLA ### Multiply-add by scalar element. @@ -4559,6 +4615,8 @@ Vector move inverted immediate. const int shift_amount = 0) +<a id="float-n"> + ### NEG ### Negate. @@ -4573,6 +4631,8 @@ Bitwise not. void not_(const VRegister& vd, const VRegister& vn) +<a id="float-o"> + ### ORN ### Bitwise orn. @@ -4594,6 +4654,8 @@ Bitwise or. void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm) +<a id="float-p"> + ### PMUL ### Polynomial multiply. @@ -4615,6 +4677,8 @@ Polynomial multiply long (second part). void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm) +<a id="float-r"> + ### RADDHN ### Rounding add narrow returning high half. @@ -4685,6 +4749,8 @@ Rounding subtract narrow returning high half (second part). void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm) +<a id="float-s"> + ### SABA ### Signed absolute difference and accumulate. @@ -4973,6 +5039,13 @@ Signed long multiply-sub by scalar element (second part). int vm_index) +### SMMLA ### + +Signed 8-bit integer matrix multiply-accumulate (vector). + + void smmla(const VRegister& vd, const VRegister& vn, const VRegister& vm) + + ### SMOV ### Signed move vector element to general-purpose register. @@ -5546,6 +5619,16 @@ Subtract narrow returning high half (second part). void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm) +### SUDOT ### + +Dot product with signed and unsigned integers (vector, by element). + + void sudot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index) + + ### SUQADD ### Signed saturating accumulate of unsigned value. @@ -5567,6 +5650,8 @@ Signed extend long (second part). void sxtl2(const VRegister& vd, const VRegister& vn) +<a id="float-t"> + ### TBL ### Table lookup from four registers. @@ -5661,6 +5746,8 @@ Transpose vectors (secondary). void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm) +<a id="float-u"> + ### UABA ### Unsigned absolute difference and accumulate. @@ -5907,6 +5994,13 @@ Unsigned long multiply-sub by scalar element (second part). int vm_index) +### UMMLA ### + +Unsigned 8-bit integer matrix multiply-accumulate (vector). + + void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm) + + ### UMOV ### Unsigned move vector element to general-purpose register. @@ -6067,6 +6161,23 @@ Unsigned rounding shift right by immediate and accumulate. void ursra(const VRegister& vd, const VRegister& vn, int shift) +### USDOT ### + +Dot Product with unsigned and signed integers (vector). + + void usdot(const VRegister& vd, const VRegister& vn, const VRegister& vm) + + +### USDOT ### + +Dot product with unsigned and signed integers (vector, by element). + + void usdot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index) + + ### USHL ### Unsigned shift left by register. @@ -6095,6 +6206,13 @@ Unsigned shift right by immediate. void ushr(const VRegister& vd, const VRegister& vn, int shift) +### USMMLA ### + +Unsigned and signed 8-bit integer matrix multiply-accumulate (vector). + + void usmmla(const VRegister& vd, const VRegister& vn, const VRegister& vm) + + ### USQADD ### Unsigned saturating accumulate of signed value. @@ -6165,6 +6283,8 @@ Unzip vectors (secondary). void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm) +<a id="float-x"> + ### XTN ### Extract narrow. @@ -6179,6 +6299,8 @@ Extract narrow (second part). void xtn2(const VRegister& vd, const VRegister& vn) +<a id="float-z"> + ### ZIP1 ### Zip vectors (primary). @@ -6194,9 +6316,6086 @@ Zip vectors (secondary). +AArch64 Scalable Vector Extension (SVE) instructions +---------------------------------------------------- + +<a id="sve-a"> + +### ABS ### + +Absolute value (predicated). + + void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### ADCLB ### + +Add with carry long (bottom). + + void adclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### ADCLT ### + +Add with carry long (top). + + void adclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### ADD ### + +Add immediate (unpredicated). + + void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1) + + +### ADD ### + +Add vectors (predicated). + + void add(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### ADD ### + +Add vectors (unpredicated). + + void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### ADDHNB ### + +Add narrow high part (bottom). + + void addhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### ADDHNT ### + +Add narrow high part (top). + + void addhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### ADDP ### + +Add pairwise. + + void addp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### ADDPL ### + +Add multiple of predicate register size to scalar register. + + void addpl(const Register& xd, const Register& xn, int imm6) + + +### ADDVL ### + +Add multiple of vector register size to scalar register. + + void addvl(const Register& xd, const Register& xn, int imm6) + + +### ADR ### + +Compute vector address. + + void adr(const ZRegister& zd, const SVEMemOperand& addr) + + +### AND ### + +Bitwise AND predicates. + + void and_(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### AND ### + +Bitwise AND vectors (predicated). + + void and_(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### AND ### + +Bitwise AND vectors (unpredicated). + + void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### AND ### + +Bitwise AND with immediate (unpredicated). + + void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm) + + +### ANDS ### + +Bitwise AND predicates. + + void ands(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### ANDV ### + +Bitwise AND reduction to scalar. + + void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) + + +### ASR ### + +Arithmetic shift right by 64-bit wide elements (predicated). + + void asr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### ASR ### + +Arithmetic shift right by 64-bit wide elements (unpredicated). + + void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### ASR ### + +Arithmetic shift right by immediate (predicated). + + void asr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) + + +### ASR ### + +Arithmetic shift right by immediate (unpredicated). + + void asr(const ZRegister& zd, const ZRegister& zn, int shift) + + +### ASRD ### + +Arithmetic shift right for divide by immediate (predicated). + + void asrd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) + + +### ASRR ### + +Reversed arithmetic shift right by vector (predicated). + + void asrr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +<a id="sve-b"> + +### BCAX ### + +Bitwise clear and exclusive OR. + + void bcax(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) + + +### BDEP ### + +Scatter lower bits into positions selected by bitmask. + + void bdep(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### BEXT ### + +Gather lower bits from positions selected by bitmask. + + void bext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### BGRP ### + +Group bits to right or left as selected by bitmask. + + void bgrp(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### BIC ### + +Bitwise clear bits using immediate (unpredicated). + + void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm) + + +### BIC ### + +Bitwise clear predicates. + + void bic(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### BIC ### + +Bitwise clear vectors (predicated). + + void bic(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### BIC ### + +Bitwise clear vectors (unpredicated). + + void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### BICS ### + +Bitwise clear predicates. + + void bics(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### BRKA ### + +Break after first true condition. + + void brka(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) + + +### BRKAS ### + +Break after first true condition. + + void brkas(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) + + +### BRKB ### + +Break before first true condition. + + void brkb(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) + + +### BRKBS ### + +Break before first true condition. + + void brkbs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) + + +### BRKN ### + +Propagate break to next partition. + + void brkn(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### BRKNS ### + +Propagate break to next partition. + + void brkns(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### BRKPA ### + +Break after first true condition, propagating from previous partition. + + void brkpa(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### BRKPAS ### + +Break after first true condition, propagating from previous partition. + + void brkpas(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### BRKPB ### + +Break before first true condition, propagating from previous partition. + + void brkpb(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### BRKPBS ### + +Break before first true condition, propagating from previous partition. + + void brkpbs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### BSL ### + +Bitwise select. + + void bsl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) + + +### BSL1N ### + +Bitwise select with first input inverted. + + void bsl1n(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) + + +### BSL2N ### + +Bitwise select with second input inverted. + + void bsl2n(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) + + +<a id="sve-c"> + +### CADD ### + +Complex integer add with rotate. + + void cadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot) + + +### CDOT ### + +Complex integer dot product (indexed). + + void cdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot) + + +### CDOT ### + +Complex integer dot product. + + void cdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int rot) + + +### CLASTA ### + +Conditionally extract element after last to SIMD&FP scalar register. + + void clasta(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm) + + +### CLASTA ### + +Conditionally extract element after last to general-purpose register. + + void clasta(const Register& rd, + const PRegister& pg, + const Register& rn, + const ZRegister& zm) + + +### CLASTA ### + +Conditionally extract element after last to vector register. + + void clasta(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### CLASTB ### + +Conditionally extract last element to SIMD&FP scalar register. + + void clastb(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm) + + +### CLASTB ### + +Conditionally extract last element to general-purpose register. + + void clastb(const Register& rd, + const PRegister& pg, + const Register& rn, + const ZRegister& zm) + + +### CLASTB ### + +Conditionally extract last element to vector register. + + void clastb(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### CLS ### + +Count leading sign bits (predicated). + + void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### CLZ ### + +Count leading zero bits (predicated). + + void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### CMLA ### + +Complex integer multiply-add with rotate (indexed). + + void cmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot) + + +### CMLA ### + +Complex integer multiply-add with rotate. + + void cmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int rot) + + +### CMP ### + + + + void cmp(Condition cond, + const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### CMPEQ ### + +Compare vector to 64-bit wide elements. + + void cmpeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### CMPEQ ### + +Compare vector to immediate. + + void cmpeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) + + +### CMPGE ### + +Compare vector to 64-bit wide elements. + + void cmpge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### CMPGE ### + +Compare vector to immediate. + + void cmpge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) + + +### CMPGT ### + +Compare vector to 64-bit wide elements. + + void cmpgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### CMPGT ### + +Compare vector to immediate. + + void cmpgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) + + +### CMPHI ### + +Compare vector to 64-bit wide elements. + + void cmphi(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### CMPHI ### + +Compare vector to immediate. + + void cmphi(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7) + + +### CMPHS ### + +Compare vector to 64-bit wide elements. + + void cmphs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### CMPHS ### + +Compare vector to immediate. + + void cmphs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7) + + +### CMPLE ### + +Compare vector to 64-bit wide elements. + + void cmple(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### CMPLE ### + +Compare vector to immediate. + + void cmple(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) + + +### CMPLO ### + +Compare vector to 64-bit wide elements. + + void cmplo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### CMPLO ### + +Compare vector to immediate. + + void cmplo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7) + + +### CMPLS ### + +Compare vector to 64-bit wide elements. + + void cmpls(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### CMPLS ### + +Compare vector to immediate. + + void cmpls(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7) + + +### CMPLT ### + +Compare vector to 64-bit wide elements. + + void cmplt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### CMPLT ### + +Compare vector to immediate. + + void cmplt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) + + +### CMPNE ### + +Compare vector to 64-bit wide elements. + + void cmpne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### CMPNE ### + +Compare vector to immediate. + + void cmpne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) + + +### CNOT ### + +Logically invert boolean condition in vector (predicated). + + void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### CNT ### + +Count non-zero bits (predicated). + + void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### CNTB ### + +Set scalar to multiple of predicate constraint element count. + + void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) + + +### CNTD ### + +Set scalar to multiple of predicate constraint element count. + + void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) + + +### CNTH ### + +Set scalar to multiple of predicate constraint element count. + + void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) + + +### CNTP ### + +Set scalar to active predicate element count. + + void cntp(const Register& xd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) + + +### CNTW ### + +Set scalar to multiple of predicate constraint element count. + + void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) + + +### COMPACT ### + +Shuffle active elements of vector to the right and fill with zero. + + void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) + + +### CPY ### + +Copy SIMD&FP scalar register to vector elements (predicated). + + void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn) + + +### CPY ### + +Copy general-purpose register to vector elements (predicated). + + void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn) + + +### CPY ### + +Copy signed integer immediate to vector elements (predicated). + + void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1) + + +### CTERMEQ ### + +Compare and terminate loop. + + void ctermeq(const Register& rn, const Register& rm) + + +### CTERMNE ### + +Compare and terminate loop. + + void ctermne(const Register& rn, const Register& rm) + + +<a id="sve-d"> + +### DECB ### + +Decrement scalar by multiple of predicate constraint element count. + + void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1) + + +### DECD ### + +Decrement scalar by multiple of predicate constraint element count. + + void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1) + + +### DECD ### + +Decrement vector by multiple of predicate constraint element count. + + void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### DECH ### + +Decrement scalar by multiple of predicate constraint element count. + + void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1) + + +### DECH ### + +Decrement vector by multiple of predicate constraint element count. + + void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### DECP ### + +Decrement scalar by active predicate element count. + + void decp(const Register& rdn, const PRegisterWithLaneSize& pg) + + +### DECP ### + +Decrement vector by active predicate element count. + + void decp(const ZRegister& zdn, const PRegister& pg) + + +### DECW ### + +Decrement scalar by multiple of predicate constraint element count. + + void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1) + + +### DECW ### + +Decrement vector by multiple of predicate constraint element count. + + void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### DUP ### + +Broadcast general-purpose register to vector elements (unpredicated). + + void dup(const ZRegister& zd, const Register& xn) + + +### DUP ### + +Broadcast indexed element to vector (unpredicated). + + void dup(const ZRegister& zd, const ZRegister& zn, unsigned index) + + +### DUP ### + +Broadcast signed immediate to vector elements (unpredicated). + + void dup(const ZRegister& zd, int imm8, int shift = -1) + + +### DUPM ### + +Broadcast logical bitmask immediate to vector (unpredicated). + + void dupm(const ZRegister& zd, uint64_t imm) + + +<a id="sve-e"> + +### EON ### + +Bitwise exclusive OR with inverted immediate (unpredicated). + + void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm) + + +### EOR ### + +Bitwise exclusive OR predicates. + + void eor(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### EOR ### + +Bitwise exclusive OR vectors (predicated). + + void eor(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### EOR ### + +Bitwise exclusive OR vectors (unpredicated). + + void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### EOR ### + +Bitwise exclusive OR with immediate (unpredicated). + + void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm) + + +### EOR3 ### + +Bitwise exclusive OR of three vectors. + + void eor3(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) + + +### EORBT ### + +Interleaving exclusive OR (bottom, top). + + void eorbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### EORS ### + +Bitwise exclusive OR predicates. + + void eors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### EORTB ### + +Interleaving exclusive OR (top, bottom). + + void eortb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### EORV ### + +Bitwise XOR reduction to scalar. + + void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) + + +### EXT ### + +Extract vector from pair of vectors. + + void ext(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + unsigned offset) + + +<a id="sve-f"> + +### FABD ### + +Floating-point absolute difference (predicated). + + void fabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FABS ### + +Floating-point absolute value (predicated). + + void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FACGE ### + +Floating-point absolute compare vectors. + + void facge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FACGT ### + +Floating-point absolute compare vectors. + + void facgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FADD ### + +Floating-point add immediate (predicated). + + void fadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) + + +### FADD ### + +Floating-point add vector (predicated). + + void fadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FADD ### + +Floating-point add vector (unpredicated). + + void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### FADDA ### + +Floating-point add strictly-ordered reduction, accumulating in scalar. + + void fadda(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm) + + +### FADDP ### + +Floating-point add pairwise. + + void faddp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FADDV ### + +Floating-point add recursive reduction to scalar. + + void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) + + +### FCADD ### + +Floating-point complex add with rotate (predicated). + + void fcadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + int rot) + + +### FCMEQ ### + +Floating-point compare vector with zero. + + void fcmeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) + + +### FCMEQ ### + +Floating-point compare vectors. + + void fcmeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FCMGE ### + +Floating-point compare vector with zero. + + void fcmge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) + + +### FCMGE ### + +Floating-point compare vectors. + + void fcmge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FCMGT ### + +Floating-point compare vector with zero. + + void fcmgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) + + +### FCMGT ### + +Floating-point compare vectors. + + void fcmgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FCMLA ### + +Floating-point complex multiply-add by indexed values with rotate. + + void fcmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot) + + +### FCMLA ### + +Floating-point complex multiply-add with rotate (predicated). + + void fcmla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + int rot) + + +### FCMLE ### + +Floating-point compare vector with zero. + + void fcmle(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) + + +### FCMLT ### + +Floating-point compare vector with zero. + + void fcmlt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) + + +### FCMNE ### + +Floating-point compare vector with zero. + + void fcmne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) + + +### FCMNE ### + +Floating-point compare vectors. + + void fcmne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FCMUO ### + +Floating-point compare vectors. + + void fcmuo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FCPY ### + +Copy floating-point immediate to vector elements (predicated). + + void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm) + + +### FCPY ### + +Copy half-precision floating-point immediate to vector elements (predicated). + + void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) + + +### FCVT ### + +Floating-point convert precision (predicated). + + void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FCVTLT ### + +Floating-point up convert long (top, predicated). + + void fcvtlt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FCVTNT ### + +Floating-point down convert and narrow (top, predicated). + + void fcvtnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FCVTX ### + +Floating-point down convert, rounding to odd (predicated). + + void fcvtx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FCVTXNT ### + +Floating-point down convert, rounding to odd (top, predicated). + + void fcvtxnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FCVTZS ### + +Floating-point convert to signed integer, rounding toward zero (predicated). + + void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FCVTZU ### + +Floating-point convert to unsigned integer, rounding toward zero (predicated). + + void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FDIV ### + +Floating-point divide by vector (predicated). + + void fdiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FDIVR ### + +Floating-point reversed divide by vector (predicated). + + void fdivr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FDUP ### + +Broadcast floating-point immediate to vector elements. + + void fdup(const ZRegister& zd, double imm) + + +### FDUP ### + +Broadcast half-precision floating-point immediate to vector elements. + + void fdup(const ZRegister& zd, Float16 imm) + + +### FEXPA ### + +Floating-point exponential accelerator. + + void fexpa(const ZRegister& zd, const ZRegister& zn) + + +### FLOGB ### + +Floating-point base 2 logarithm as integer. + + void flogb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FMAD ### + +Floating-point fused multiply-add vectors (predicated), writing multiplicand [Zdn = Za + Zdn * Zm]. + + void fmad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) + + +### FMAX ### + +Floating-point maximum (predicated). + + void fmax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FMAX ### + +Floating-point maximum with immediate (predicated). + + void fmax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) + + +### FMAXNM ### + +Floating-point maximum number (predicated). + + void fmaxnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FMAXNM ### + +Floating-point maximum number with immediate (predicated). + + void fmaxnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) + + +### FMAXNMP ### + +Floating-point maximum number pairwise. + + void fmaxnmp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FMAXNMV ### + +Floating-point maximum number recursive reduction to scalar. + + void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) + + +### FMAXP ### + +Floating-point maximum pairwise. + + void fmaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FMAXV ### + +Floating-point maximum recursive reduction to scalar. + + void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) + + +### FMIN ### + +Floating-point minimum (predicated). + + void fmin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FMIN ### + +Floating-point minimum with immediate (predicated). + + void fmin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) + + +### FMINNM ### + +Floating-point minimum number (predicated). + + void fminnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FMINNM ### + +Floating-point minimum number with immediate (predicated). + + void fminnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) + + +### FMINNMP ### + +Floating-point minimum number pairwise. + + void fminnmp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FMINNMV ### + +Floating-point minimum number recursive reduction to scalar. + + void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) + + +### FMINP ### + +Floating-point minimum pairwise. + + void fminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FMINV ### + +Floating-point minimum recursive reduction to scalar. + + void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) + + +### FMLA ### + +Floating-point fused multiply-add by indexed elements (Zda = Zda + Zn * Zm[indexed]). + + void fmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### FMLA ### + +Floating-point fused multiply-add vectors (predicated), writing addend [Zda = Zda + Zn * Zm]. + + void fmla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FMLALB ### + +Half-precision floating-point multiply-add long to single-precision (bottom). + + void fmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### FMLALB ### + +Half-precision floating-point multiply-add long to single-precision (bottom, indexed). + + void fmlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### FMLALT ### + +Half-precision floating-point multiply-add long to single-precision (top). + + void fmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### FMLALT ### + +Half-precision floating-point multiply-add long to single-precision (top, indexed). + + void fmlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### FMLS ### + +Floating-point fused multiply-subtract by indexed elements (Zda = Zda + -Zn * Zm[indexed]). + + void fmls(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### FMLS ### + +Floating-point fused multiply-subtract vectors (predicated), writing addend [Zda = Zda + -Zn * Zm]. + + void fmls(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FMLSLB ### + +Half-precision floating-point multiply-subtract long from single-precision (bottom). + + void fmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### FMLSLB ### + +Half-precision floating-point multiply-subtract long from single-precision (bottom, indexed). + + void fmlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### FMLSLT ### + +Half-precision floating-point multiply-subtract long from single-precision (top). + + void fmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### FMLSLT ### + +Half-precision floating-point multiply-subtract long from single-precision (top, indexed). + + void fmlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### FMMLA ### + +Floating-point matrix multiply-accumulate. + + void fmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### FMOV ### + +Move 8-bit floating-point immediate to vector elements (predicated). + + void fmov(const ZRegister& zd, const PRegisterM& pg, double imm) + + +### FMOV ### + +Move 8-bit floating-point immediate to vector elements (unpredicated). + + void fmov(const ZRegister& zd, double imm) + + +### FMSB ### + +Floating-point fused multiply-subtract vectors (predicated), writing multiplicand [Zdn = Za + -Zdn * Zm]. + + void fmsb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) + + +### FMUL ### + +Floating-point multiply by immediate (predicated). + + void fmul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) + + +### FMUL ### + +Floating-point multiply by indexed elements. + + void fmul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + unsigned index) + + +### FMUL ### + +Floating-point multiply vectors (predicated). + + void fmul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FMUL ### + +Floating-point multiply vectors (unpredicated). + + void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### FMULX ### + +Floating-point multiply-extended vectors (predicated). + + void fmulx(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FNEG ### + +Floating-point negate (predicated). + + void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FNMAD ### + +Floating-point negated fused multiply-add vectors (predicated), writing multiplicand [Zdn = -Za + -Zdn * Zm]. + + void fnmad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) + + +### FNMLA ### + +Floating-point negated fused multiply-add vectors (predicated), writing addend [Zda = -Zda + -Zn * Zm]. + + void fnmla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FNMLS ### + +Floating-point negated fused multiply-subtract vectors (predicated), writing addend [Zda = -Zda + Zn * Zm]. + + void fnmls(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FNMSB ### + +Floating-point negated fused multiply-subtract vectors (predicated), writing multiplicand [Zdn = -Za + Zdn * Zm]. + + void fnmsb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) + + +### FRECPE ### + +Floating-point reciprocal estimate (unpredicated). + + void frecpe(const ZRegister& zd, const ZRegister& zn) + + +### FRECPS ### + +Floating-point reciprocal step (unpredicated). + + void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### FRECPX ### + +Floating-point reciprocal exponent (predicated). + + void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FRINTA ### + +Floating-point round to integral value (predicated). + + void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FRINTI ### + +Floating-point round to integral value (predicated). + + void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FRINTM ### + +Floating-point round to integral value (predicated). + + void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FRINTN ### + +Floating-point round to integral value (predicated). + + void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FRINTP ### + +Floating-point round to integral value (predicated). + + void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FRINTX ### + +Floating-point round to integral value (predicated). + + void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FRINTZ ### + +Floating-point round to integral value (predicated). + + void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FRSQRTE ### + +Floating-point reciprocal square root estimate (unpredicated). + + void frsqrte(const ZRegister& zd, const ZRegister& zn) + + +### FRSQRTS ### + +Floating-point reciprocal square root step (unpredicated). + + void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### FSCALE ### + +Floating-point adjust exponent by vector (predicated). + + void fscale(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FSQRT ### + +Floating-point square root (predicated). + + void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### FSUB ### + +Floating-point subtract immediate (predicated). + + void fsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) + + +### FSUB ### + +Floating-point subtract vectors (predicated). + + void fsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FSUB ### + +Floating-point subtract vectors (unpredicated). + + void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### FSUBR ### + +Floating-point reversed subtract from immediate (predicated). + + void fsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) + + +### FSUBR ### + +Floating-point reversed subtract vectors (predicated). + + void fsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### FTMAD ### + +Floating-point trigonometric multiply-add coefficient. + + void ftmad(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int imm3) + + +### FTSMUL ### + +Floating-point trigonometric starting value. + + void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### FTSSEL ### + +Floating-point trigonometric select coefficient. + + void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +<a id="sve-h"> + +### HISTCNT ### + +Count matching elements in vector. + + void histcnt(const ZRegister& zd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### HISTSEG ### + +Count matching elements in vector segments. + + void histseg(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +<a id="sve-i"> + +### INCB ### + +Increment scalar by multiple of predicate constraint element count. + + void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1) + + +### INCD ### + +Increment scalar by multiple of predicate constraint element count. + + void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1) + + +### INCD ### + +Increment vector by multiple of predicate constraint element count. + + void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### INCH ### + +Increment scalar by multiple of predicate constraint element count. + + void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1) + + +### INCH ### + +Increment vector by multiple of predicate constraint element count. + + void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### INCP ### + +Increment scalar by active predicate element count. + + void incp(const Register& rdn, const PRegisterWithLaneSize& pg) + + +### INCP ### + +Increment vector by active predicate element count. + + void incp(const ZRegister& zdn, const PRegister& pg) + + +### INCW ### + +Increment scalar by multiple of predicate constraint element count. + + void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1) + + +### INCW ### + +Increment vector by multiple of predicate constraint element count. + + void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### INDEX ### + +Create index starting from and incremented by general-purpose register. + + void index(const ZRegister& zd, const Register& rn, const Register& rm) + + +### INDEX ### + +Create index starting from and incremented by immediate. + + void index(const ZRegister& zd, int start, int step) + + +### INDEX ### + +Create index starting from general-purpose register and incremented by immediate. + + void index(const ZRegister& zd, const Register& rn, int imm5) + + +### INDEX ### + +Create index starting from immediate and incremented by general-purpose register. + + void index(const ZRegister& zd, int imm5, const Register& rm) + + +### INSR ### + +Insert SIMD&FP scalar register in shifted vector. + + void insr(const ZRegister& zdn, const VRegister& vm) + + +### INSR ### + +Insert general-purpose register in shifted vector. + + void insr(const ZRegister& zdn, const Register& rm) + + +<a id="sve-l"> + +### LASTA ### + +Extract element after last to SIMD&FP scalar register. + + void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn) + + +### LASTA ### + +Extract element after last to general-purpose register. + + void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn) + + +### LASTB ### + +Extract last element to SIMD&FP scalar register. + + void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn) + + +### LASTB ### + +Extract last element to general-purpose register. + + void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn) + + +### LD1B ### + +Contiguous/gather load bytes to vector. + + void ld1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1D ### + +Contiguous/gather load doublewords to vector. + + void ld1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1H ### + +Contiguous/gather load halfwords to vector. + + void ld1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1RB ### + +Load and broadcast unsigned byte to vector. + + void ld1rb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1RD ### + +Load and broadcast doubleword to vector. + + void ld1rd(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1RH ### + +Load and broadcast unsigned halfword to vector. + + void ld1rh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1ROB ### + +Contiguous load and replicate thirty-two bytes. + + void ld1rob(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1ROD ### + +Contiguous load and replicate four doublewords. + + void ld1rod(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1ROH ### + +Contiguous load and replicate sixteen halfwords. + + void ld1roh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1ROW ### + +Contiguous load and replicate eight words. + + void ld1row(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1RQB ### + +Contiguous load and replicate sixteen bytes. + + void ld1rqb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1RQD ### + +Contiguous load and replicate two doublewords. + + void ld1rqd(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1RQH ### + +Contiguous load and replicate eight halfwords. + + void ld1rqh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1RQW ### + +Contiguous load and replicate four words. + + void ld1rqw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1RSB ### + +Load and broadcast signed byte to vector. + + void ld1rsb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1RSH ### + +Load and broadcast signed halfword to vector. + + void ld1rsh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1RSW ### + +Load and broadcast signed word to vector. + + void ld1rsw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1RW ### + +Load and broadcast unsigned word to vector. + + void ld1rw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1SB ### + +Contiguous/gather load signed bytes to vector. + + void ld1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1SH ### + +Contiguous/gather load signed halfwords to vector. + + void ld1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1SW ### + +Contiguous/gather load signed words to vector. + + void ld1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD1W ### + +Contiguous/gather load words to vector. + + void ld1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD2B ### + +Contiguous load two-byte structures to two vectors. + + void ld2b(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD2D ### + +Contiguous load two-doubleword structures to two vectors. + + void ld2d(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD2H ### + +Contiguous load two-halfword structures to two vectors. + + void ld2h(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD2W ### + +Contiguous load two-word structures to two vectors. + + void ld2w(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD3B ### + +Contiguous load three-byte structures to three vectors. + + void ld3b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD3D ### + +Contiguous load three-doubleword structures to three vectors. + + void ld3d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD3H ### + +Contiguous load three-halfword structures to three vectors. + + void ld3h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD3W ### + +Contiguous load three-word structures to three vectors. + + void ld3w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD4B ### + +Contiguous load four-byte structures to four vectors. + + void ld4b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD4D ### + +Contiguous load four-doubleword structures to four vectors. + + void ld4d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD4H ### + +Contiguous load four-halfword structures to four vectors. + + void ld4h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LD4W ### + +Contiguous load four-word structures to four vectors. + + void ld4w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDFF1B ### + +Contiguous load first-fault unsigned bytes to vector. + + void ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDFF1B ### + +Gather load first-fault unsigned bytes to vector (immediate index). + + void ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) + + +### LDFF1B ### + +Gather load first-fault unsigned bytes to vector. + + void ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) + + +### LDFF1D ### + +Contiguous load first-fault doublewords to vector. + + void ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDFF1D ### + +Gather load first-fault doublewords to vector (immediate index). + + void ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) + + +### LDFF1D ### + +Gather load first-fault doublewords to vector (vector index). + + void ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) + + +### LDFF1H ### + +Contiguous load first-fault unsigned halfwords to vector. + + void ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDFF1H ### + +Gather load first-fault unsigned halfwords to vector (immediate index). + + void ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) + + +### LDFF1H ### + +Gather load first-fault unsigned halfwords to vector (vector index). + + void ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) + + +### LDFF1SB ### + +Contiguous load first-fault signed bytes to vector. + + void ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDFF1SB ### + +Gather load first-fault signed bytes to vector (immediate index). + + void ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) + + +### LDFF1SB ### + +Gather load first-fault signed bytes to vector (vector index). + + void ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) + + +### LDFF1SH ### + +Contiguous load first-fault signed halfwords to vector. + + void ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDFF1SH ### + +Gather load first-fault signed halfwords to vector (immediate index). + + void ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) + + +### LDFF1SH ### + +Gather load first-fault signed halfwords to vector (vector index). + + void ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) + + +### LDFF1SW ### + +Contiguous load first-fault signed words to vector. + + void ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDFF1SW ### + +Gather load first-fault signed words to vector (immediate index). + + void ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) + + +### LDFF1SW ### + +Gather load first-fault signed words to vector (vector index). + + void ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) + + +### LDFF1W ### + +Contiguous load first-fault unsigned words to vector. + + void ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDFF1W ### + +Gather load first-fault unsigned words to vector (immediate index). + + void ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) + + +### LDFF1W ### + +Gather load first-fault unsigned words to vector (vector index). + + void ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) + + +### LDNF1B ### + +Contiguous load non-fault unsigned bytes to vector (immediate index). + + void ldnf1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDNF1D ### + +Contiguous load non-fault doublewords to vector (immediate index). + + void ldnf1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDNF1H ### + +Contiguous load non-fault unsigned halfwords to vector (immediate index). + + void ldnf1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDNF1SB ### + +Contiguous load non-fault signed bytes to vector (immediate index). + + void ldnf1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDNF1SH ### + +Contiguous load non-fault signed halfwords to vector (immediate index). + + void ldnf1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDNF1SW ### + +Contiguous load non-fault signed words to vector (immediate index). + + void ldnf1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDNF1W ### + +Contiguous load non-fault unsigned words to vector (immediate index). + + void ldnf1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDNT1B ### + +Contiguous load non-temporal bytes to vector. + + void ldnt1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDNT1D ### + +Contiguous load non-temporal doublewords to vector. + + void ldnt1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDNT1H ### + +Contiguous load non-temporal halfwords to vector. + + void ldnt1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDNT1SB ### + +Gather load non-temporal signed bytes. + + void ldnt1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDNT1SH ### + +Gather load non-temporal signed halfwords. + + void ldnt1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDNT1SW ### + +Gather load non-temporal signed words. + + void ldnt1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDNT1W ### + +Contiguous load non-temporal words to vector. + + void ldnt1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) + + +### LDR ### + +Load SVE predicate/vector register. + + void ldr(const CPURegister& rt, const SVEMemOperand& addr) + + +### LSL ### + +Logical shift left by 64-bit wide elements (predicated). + + void lsl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### LSL ### + +Logical shift left by 64-bit wide elements (unpredicated). + + void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### LSL ### + +Logical shift left by immediate (predicated). + + void lsl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) + + +### LSL ### + +Logical shift left by immediate (unpredicated). + + void lsl(const ZRegister& zd, const ZRegister& zn, int shift) + + +### LSLR ### + +Reversed logical shift left by vector (predicated). + + void lslr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### LSR ### + +Logical shift right by 64-bit wide elements (predicated). + + void lsr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### LSR ### + +Logical shift right by 64-bit wide elements (unpredicated). + + void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### LSR ### + +Logical shift right by immediate (predicated). + + void lsr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) + + +### LSR ### + +Logical shift right by immediate (unpredicated). + + void lsr(const ZRegister& zd, const ZRegister& zn, int shift) + + +### LSRR ### + +Reversed logical shift right by vector (predicated). + + void lsrr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +<a id="sve-m"> + +### MAD ### + +Multiply-add vectors (predicated), writing multiplicand [Zdn = Za + Zdn * Zm]. + + void mad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) + + +### MATCH ### + +Detect any matching elements, setting the condition flags. + + void match(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### MLA ### + +Multiply-add to accumulator (indexed). + + void mla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### MLA ### + +Multiply-add vectors (predicated), writing addend [Zda = Zda + Zn * Zm]. + + void mla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### MLS ### + +Multiply-subtract from accumulator (indexed). + + void mls(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### MLS ### + +Multiply-subtract vectors (predicated), writing addend [Zda = Zda - Zn * Zm]. + + void mls(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### MOV ### + +Move SIMD&FP scalar register to vector elements (predicated) + + void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn) + + +### MOV ### + +Move SIMD&FP scalar register to vector elements (unpredicated) + + void mov(const ZRegister& zd, const VRegister& vn) + + +### MOV ### + +Move general-purpose register to vector elements (predicated) + + void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn) + + +### MOV ### + +Move general-purpose register to vector elements (unpredicated) + + void mov(const ZRegister& zd, const Register& xn) + + +### MOV ### + +Move indexed element to vector elements (unpredicated) + + void mov(const ZRegister& zd, const ZRegister& zn, unsigned index) + + +### MOV ### + +Move logical bitmask immediate to vector (unpredicated). + + void mov(const ZRegister& zd, uint64_t imm) + + +### MOV ### + +Move predicates (merging) + + void mov(const PRegisterWithLaneSize& pd, + const PRegisterM& pg, + const PRegisterWithLaneSize& pn) + + +### MOV ### + +Move predicates (unpredicated) + + void mov(const PRegister& pd, const PRegister& pn) + + +### MOV ### + +Move predicates (zeroing) + + void mov(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) + + +### MOV ### + +Move signed immediate to vector elements (unpredicated). + + void mov(const ZRegister& zd, int imm8, int shift) + + +### MOV ### + +Move signed integer immediate to vector elements (predicated) + + void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1) + + +### MOV ### + +Move vector elements (predicated) + + void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### MOV ### + +Move vector register (unpredicated) + + void mov(const ZRegister& zd, const ZRegister& zn) + + +### MOVPRFX ### + +Move prefix (predicated). + + void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) + + +### MOVPRFX ### + +Move prefix (unpredicated). + + void movprfx(const ZRegister& zd, const ZRegister& zn) + + +### MOVS ### + +Move predicate (unpredicated), setting the condition flags + + void movs(const PRegister& pd, const PRegister& pn) + + +### MOVS ### + +Move predicates (zeroing), setting the condition flags + + void movs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) + + +### MSB ### + +Multiply-subtract vectors (predicated), writing multiplicand [Zdn = Za - Zdn * Zm]. + + void msb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) + + +### MUL ### + +Multiply (indexed). + + void mul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### MUL ### + +Multiply by immediate (unpredicated). + + void mul(const ZRegister& zd, const ZRegister& zn, int imm8) + + +### MUL ### + +Multiply vectors (predicated). + + void mul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### MUL ### + +Multiply vectors (unpredicated). + + void mul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +<a id="sve-n"> + +### NAND ### + +Bitwise NAND predicates. + + void nand(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### NANDS ### + +Bitwise NAND predicates. + + void nands(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### NBSL ### + +Bitwise inverted select. + + void nbsl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) + + +### NEG ### + +Negate (predicated). + + void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### NMATCH ### + +Detect no matching elements, setting the condition flags. + + void nmatch(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### NOR ### + +Bitwise NOR predicates. + + void nor(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### NORS ### + +Bitwise NOR predicates. + + void nors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### NOT ### + +Bitwise invert predicate. + + void not_(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) + + +### NOT ### + +Bitwise invert vector (predicated). + + void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### NOTS ### + +Bitwise invert predicate, setting the condition flags. + + void nots(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) + + +<a id="sve-o"> + +### ORN ### + +Bitwise OR inverted predicate. + + void orn(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### ORN ### + +Bitwise OR with inverted immediate (unpredicated). + + void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm) + + +### ORNS ### + +Bitwise OR inverted predicate. + + void orns(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### ORR ### + +Bitwise OR predicate. + + void orr(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### ORR ### + +Bitwise OR vectors (predicated). + + void orr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### ORR ### + +Bitwise OR vectors (unpredicated). + + void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### ORR ### + +Bitwise OR with immediate (unpredicated). + + void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm) + + +### ORRS ### + +Bitwise OR predicate. + + void orrs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### ORV ### + +Bitwise OR reduction to scalar. + + void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) + + +<a id="sve-p"> + +### PFALSE ### + +Set all predicate elements to false. + + void pfalse(const PRegisterWithLaneSize& pd) + + +### PFIRST ### + +Set the first active predicate element to true. + + void pfirst(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) + + +### PMUL ### + +Polynomial multiply vectors (unpredicated). + + void pmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### PMULLB ### + +Polynomial multiply long (bottom). + + void pmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### PMULLT ### + +Polynomial multiply long (top). + + void pmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### PNEXT ### + +Find next active predicate. + + void pnext(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) + + +### PRFB ### + +Prefetch bytes. + + void prfb(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr) + + +### PRFD ### + +Prefetch doublewords. + + void prfd(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr) + + +### PRFH ### + +Prefetch halfwords. + + void prfh(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr) + + +### PRFW ### + +Prefetch words. + + void prfw(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr) + + +### PTEST ### + +Set condition flags for predicate. + + void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn) + + +### PTRUE ### + +Initialise predicate from named constraint. + + void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL) + + +### PTRUES ### + +Initialise predicate from named constraint. + + void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL) + + +### PUNPKHI ### + +Unpack and widen half of predicate. + + void punpkhi(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn) + + +### PUNPKLO ### + +Unpack and widen half of predicate. + + void punpklo(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn) + + +<a id="sve-r"> + +### RADDHNB ### + +Rounding add narrow high part (bottom). + + void raddhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### RADDHNT ### + +Rounding add narrow high part (top). + + void raddhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### RBIT ### + +Reverse bits (predicated). + + void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### RDFFR ### + +Read the first-fault register. + + void rdffr(const PRegisterWithLaneSize& pd) + + +### RDFFR ### + +Return predicate of succesfully loaded elements. + + void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) + + +### RDFFRS ### + +Return predicate of succesfully loaded elements. + + void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) + + +### RDVL ### + +Read multiple of vector register size to scalar register. + + void rdvl(const Register& xd, int imm6) + + +### REV ### + +Reverse all elements in a predicate. + + void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn) + + +### REV ### + +Reverse all elements in a vector (unpredicated). + + void rev(const ZRegister& zd, const ZRegister& zn) + + +### REVB ### + +Reverse bytes / halfwords / words within elements (predicated). + + void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### REVH ### + +Reverse bytes / halfwords / words within elements (predicated). + + void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### REVW ### + +Reverse bytes / halfwords / words within elements (predicated). + + void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### RSHRNB ### + +Rounding shift right narrow by immediate (bottom). + + void rshrnb(const ZRegister& zd, const ZRegister& zn, int shift) + + +### RSHRNT ### + +Rounding shift right narrow by immediate (top). + + void rshrnt(const ZRegister& zd, const ZRegister& zn, int shift) + + +### RSUBHNB ### + +Rounding subtract narrow high part (bottom). + + void rsubhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### RSUBHNT ### + +Rounding subtract narrow high part (top). + + void rsubhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +<a id="sve-s"> + +### SABA ### + +Signed absolute difference and accumulate. + + void saba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SABALB ### + +Signed absolute difference and accumulate long (bottom). + + void sabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SABALT ### + +Signed absolute difference and accumulate long (top). + + void sabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SABD ### + +Signed absolute difference (predicated). + + void sabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SABDLB ### + +Signed absolute difference long (bottom). + + void sabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SABDLT ### + +Signed absolute difference long (top). + + void sabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SADALP ### + +Signed add and accumulate long pairwise. + + void sadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn) + + +### SADDLB ### + +Signed add long (bottom). + + void saddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SADDLBT ### + +Signed add long (bottom + top). + + void saddlbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SADDLT ### + +Signed add long (top). + + void saddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SADDV ### + +Signed add reduction to scalar. + + void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn) + + +### SADDWB ### + +Signed add wide (bottom). + + void saddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SADDWT ### + +Signed add wide (top). + + void saddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SBCLB ### + +Subtract with carry long (bottom). + + void sbclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SBCLT ### + +Subtract with carry long (top). + + void sbclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SCVTF ### + +Signed integer convert to floating-point (predicated). + + void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### SDIV ### + +Signed divide (predicated). + + void sdiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SDIVR ### + +Signed reversed divide (predicated). + + void sdivr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SDOT ### + +Signed dot product by indexed quadtuplet. + + void sdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SDOT ### + +Signed dot product. + + void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SEL ### + +Conditionally select elements from two predicates. + + void sel(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### SEL ### + +Conditionally select elements from two vectors. + + void sel(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SETFFR ### + +Initialise the first-fault register to all true. + + void setffr() + + +### SHADD ### + +Signed halving addition. + + void shadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SHRNB ### + +Shift right narrow by immediate (bottom). + + void shrnb(const ZRegister& zd, const ZRegister& zn, int shift) + + +### SHRNT ### + +Shift right narrow by immediate (top). + + void shrnt(const ZRegister& zd, const ZRegister& zn, int shift) + + +### SHSUB ### + +Signed halving subtract. + + void shsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SHSUBR ### + +Signed halving subtract reversed vectors. + + void shsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SLI ### + +Shift left and insert (immediate). + + void sli(const ZRegister& zd, const ZRegister& zn, int shift) + + +### SMAX ### + +Signed maximum vectors (predicated). + + void smax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SMAX ### + +Signed maximum with immediate (unpredicated). + + void smax(const ZRegister& zd, const ZRegister& zn, int imm8) + + +### SMAXP ### + +Signed maximum pairwise. + + void smaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SMAXV ### + +Signed maximum reduction to scalar. + + void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) + + +### SMIN ### + +Signed minimum vectors (predicated). + + void smin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SMIN ### + +Signed minimum with immediate (unpredicated). + + void smin(const ZRegister& zd, const ZRegister& zn, int imm8) + + +### SMINP ### + +Signed minimum pairwise. + + void sminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SMINV ### + +Signed minimum reduction to scalar. + + void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) + + +### SMLALB ### + +Signed multiply-add long to accumulator (bottom). + + void smlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SMLALB ### + +Signed multiply-add long to accumulator (bottom, indexed). + + void smlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SMLALT ### + +Signed multiply-add long to accumulator (top). + + void smlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SMLALT ### + +Signed multiply-add long to accumulator (top, indexed). + + void smlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SMLSLB ### + +Signed multiply-subtract long from accumulator (bottom). + + void smlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SMLSLB ### + +Signed multiply-subtract long from accumulator (bottom, indexed). + + void smlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SMLSLT ### + +Signed multiply-subtract long from accumulator (top). + + void smlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SMLSLT ### + +Signed multiply-subtract long from accumulator (top, indexed). + + void smlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SMMLA ### + +Signed integer matrix multiply-accumulate. + + void smmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SMULH ### + +Signed multiply returning high half (predicated). + + void smulh(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SMULH ### + +Signed multiply returning high half (unpredicated). + + void smulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SMULLB ### + +Signed multiply long (bottom). + + void smullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SMULLB ### + +Signed multiply long (bottom, indexed). + + void smullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SMULLT ### + +Signed multiply long (top). + + void smullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SMULLT ### + +Signed multiply long (top, indexed). + + void smullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SPLICE ### + +Splice two vectors under predicate control. + + void splice(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SQABS ### + +Signed saturating absolute value. + + void sqabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### SQADD ### + +Signed saturating add immediate (unpredicated). + + void sqadd(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift = -1) + + +### SQADD ### + +Signed saturating add vectors (unpredicated). + + void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SQADD ### + +Signed saturating addition (predicated). + + void sqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SQCADD ### + +Saturating complex integer add with rotate. + + void sqcadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot) + + +### SQDECB ### + +Signed saturating decrement scalar by multiple of 8-bit predicate constraint element count. + + void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) + + +### SQDECB ### + +Signed saturating decrement scalar by multiple of 8-bit predicate constraint element count. + + void sqdecb(const Register& xd, + const Register& wn, + int pattern, + int multiplier) + + +### SQDECD ### + +Signed saturating decrement scalar by multiple of 64-bit predicate constraint element count. + + void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) + + +### SQDECD ### + +Signed saturating decrement scalar by multiple of 64-bit predicate constraint element count. + + void sqdecd(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) + + +### SQDECD ### + +Signed saturating decrement vector by multiple of 64-bit predicate constraint element count. + + void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### SQDECH ### + +Signed saturating decrement scalar by multiple of 16-bit predicate constraint element count. + + void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) + + +### SQDECH ### + +Signed saturating decrement scalar by multiple of 16-bit predicate constraint element count. + + void sqdech(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) + + +### SQDECH ### + +Signed saturating decrement vector by multiple of 16-bit predicate constraint element count. + + void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### SQDECP ### + +Signed saturating decrement scalar by active predicate element count. + + void sqdecp(const Register& xd, + const PRegisterWithLaneSize& pg, + const Register& wn) + + +### SQDECP ### + +Signed saturating decrement scalar by active predicate element count. + + void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg) + + +### SQDECP ### + +Signed saturating decrement vector by active predicate element count. + + void sqdecp(const ZRegister& zdn, const PRegister& pg) + + +### SQDECW ### + +Signed saturating decrement scalar by multiple of 32-bit predicate constraint element count. + + void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) + + +### SQDECW ### + +Signed saturating decrement scalar by multiple of 32-bit predicate constraint element count. + + void sqdecw(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) + + +### SQDECW ### + +Signed saturating decrement vector by multiple of 32-bit predicate constraint element count. + + void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### SQDMLALB ### + +Signed saturating doubling multiply-add long to accumulator (bottom). + + void sqdmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SQDMLALB ### + +Signed saturating doubling multiply-add long to accumulator (bottom, indexed). + + void sqdmlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SQDMLALBT ### + +Signed saturating doubling multiply-add long to accumulator (bottom x top). + + void sqdmlalbt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) + + +### SQDMLALT ### + +Signed saturating doubling multiply-add long to accumulator (top). + + void sqdmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SQDMLALT ### + +Signed saturating doubling multiply-add long to accumulator (top, indexed). + + void sqdmlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SQDMLSLB ### + +Signed saturating doubling multiply-subtract long from accumulator (bottom). + + void sqdmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SQDMLSLB ### + +Signed saturating doubling multiply-subtract long from accumulator (bottom, indexed). + + void sqdmlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SQDMLSLBT ### + +Signed saturating doubling multiply-subtract long from accumulator (bottom x top). + + void sqdmlslbt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) + + +### SQDMLSLT ### + +Signed saturating doubling multiply-subtract long from accumulator (top). + + void sqdmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SQDMLSLT ### + +Signed saturating doubling multiply-subtract long from accumulator (top, indexed). + + void sqdmlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SQDMULH ### + +Signed saturating doubling multiply high (indexed). + + void sqdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SQDMULH ### + +Signed saturating doubling multiply high (unpredicated). + + void sqdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SQDMULLB ### + +Signed saturating doubling multiply long (bottom). + + void sqdmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SQDMULLB ### + +Signed saturating doubling multiply long (bottom, indexed). + + void sqdmullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SQDMULLT ### + +Signed saturating doubling multiply long (top). + + void sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SQDMULLT ### + +Signed saturating doubling multiply long (top, indexed). + + void sqdmullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SQINCB ### + +Signed saturating increment scalar by multiple of 8-bit predicate constraint element count. + + void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) + + +### SQINCB ### + +Signed saturating increment scalar by multiple of 8-bit predicate constraint element count. + + void sqincb(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) + + +### SQINCD ### + +Signed saturating increment scalar by multiple of 64-bit predicate constraint element count. + + void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) + + +### SQINCD ### + +Signed saturating increment scalar by multiple of 64-bit predicate constraint element count. + + void sqincd(const Register& xd, + const Register& wn, + int pattern, + int multiplier) + + +### SQINCD ### + +Signed saturating increment vector by multiple of 64-bit predicate constraint element count. + + void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### SQINCH ### + +Signed saturating increment scalar by multiple of 16-bit predicate constraint element count. + + void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) + + +### SQINCH ### + +Signed saturating increment scalar by multiple of 16-bit predicate constraint element count. + + void sqinch(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) + + +### SQINCH ### + +Signed saturating increment vector by multiple of 16-bit predicate constraint element count. + + void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### SQINCP ### + +Signed saturating increment scalar by active predicate element count. + + void sqincp(const Register& xd, + const PRegisterWithLaneSize& pg, + const Register& wn) + + +### SQINCP ### + +Signed saturating increment scalar by active predicate element count. + + void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg) + + +### SQINCP ### + +Signed saturating increment vector by active predicate element count. + + void sqincp(const ZRegister& zdn, const PRegister& pg) + + +### SQINCW ### + +Signed saturating increment scalar by multiple of 32-bit predicate constraint element count. + + void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) + + +### SQINCW ### + +Signed saturating increment scalar by multiple of 32-bit predicate constraint element count. + + void sqincw(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) + + +### SQINCW ### + +Signed saturating increment vector by multiple of 32-bit predicate constraint element count. + + void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### SQNEG ### + +Signed saturating negate. + + void sqneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### SQRDCMLAH ### + +Saturating rounding doubling complex integer multiply-add high with rotate (indexed). + + void sqrdcmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot) + + +### SQRDCMLAH ### + +Saturating rounding doubling complex integer multiply-add high with rotate. + + void sqrdcmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int rot) + + +### SQRDMLAH ### + +Signed saturating rounding doubling multiply-add high to accumulator (indexed). + + void sqrdmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SQRDMLAH ### + +Signed saturating rounding doubling multiply-add high to accumulator (unpredicated). + + void sqrdmlah(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SQRDMLSH ### + +Signed saturating rounding doubling multiply-subtract high from accumulator (indexed). + + void sqrdmlsh(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SQRDMLSH ### + +Signed saturating rounding doubling multiply-subtract high from accumulator (unpredicated). + + void sqrdmlsh(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### SQRDMULH ### + +Signed saturating rounding doubling multiply high (indexed). + + void sqrdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SQRDMULH ### + +Signed saturating rounding doubling multiply high (unpredicated). + + void sqrdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SQRSHL ### + +Signed saturating rounding shift left by vector (predicated). + + void sqrshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SQRSHLR ### + +Signed saturating rounding shift left reversed vectors (predicated). + + void sqrshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SQRSHRNB ### + +Signed saturating rounding shift right narrow by immediate (bottom). + + void sqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift) + + +### SQRSHRNT ### + +Signed saturating rounding shift right narrow by immediate (top). + + void sqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift) + + +### SQRSHRUNB ### + +Signed saturating rounding shift right unsigned narrow by immediate (bottom). + + void sqrshrunb(const ZRegister& zd, const ZRegister& zn, int shift) + + +### SQRSHRUNT ### + +Signed saturating rounding shift right unsigned narrow by immediate (top). + + void sqrshrunt(const ZRegister& zd, const ZRegister& zn, int shift) + + +### SQSHL ### + +Signed saturating shift left by immediate. + + void sqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) + + +### SQSHL ### + +Signed saturating shift left by vector (predicated). + + void sqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SQSHLR ### + +Signed saturating shift left reversed vectors (predicated). + + void sqshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SQSHLU ### + +Signed saturating shift left unsigned by immediate. + + void sqshlu(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) + + +### SQSHRNB ### + +Signed saturating shift right narrow by immediate (bottom). + + void sqshrnb(const ZRegister& zd, const ZRegister& zn, int shift) + + +### SQSHRNT ### + +Signed saturating shift right narrow by immediate (top). + + void sqshrnt(const ZRegister& zd, const ZRegister& zn, int shift) + + +### SQSHRUNB ### + +Signed saturating shift right unsigned narrow by immediate (bottom). + + void sqshrunb(const ZRegister& zd, const ZRegister& zn, int shift) + + +### SQSHRUNT ### + +Signed saturating shift right unsigned narrow by immediate (top). + + void sqshrunt(const ZRegister& zd, const ZRegister& zn, int shift) + + +### SQSUB ### + +Signed saturating subtract immediate (unpredicated). + + void sqsub(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift = -1) + + +### SQSUB ### + +Signed saturating subtract vectors (unpredicated). + + void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SQSUB ### + +Signed saturating subtraction (predicated). + + void sqsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SQSUBR ### + +Signed saturating subtraction reversed vectors (predicated). + + void sqsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SQXTNB ### + +Signed saturating extract narrow (bottom). + + void sqxtnb(const ZRegister& zd, const ZRegister& zn) + + +### SQXTNT ### + +Signed saturating extract narrow (top). + + void sqxtnt(const ZRegister& zd, const ZRegister& zn) + + +### SQXTUNB ### + +Signed saturating unsigned extract narrow (bottom). + + void sqxtunb(const ZRegister& zd, const ZRegister& zn) + + +### SQXTUNT ### + +Signed saturating unsigned extract narrow (top). + + void sqxtunt(const ZRegister& zd, const ZRegister& zn) + + +### SRHADD ### + +Signed rounding halving addition. + + void srhadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SRI ### + +Shift right and insert (immediate). + + void sri(const ZRegister& zd, const ZRegister& zn, int shift) + + +### SRSHL ### + +Signed rounding shift left by vector (predicated). + + void srshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SRSHLR ### + +Signed rounding shift left reversed vectors (predicated). + + void srshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SRSHR ### + +Signed rounding shift right by immediate. + + void srshr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) + + +### SRSRA ### + +Signed rounding shift right and accumulate (immediate). + + void srsra(const ZRegister& zda, const ZRegister& zn, int shift) + + +### SSHLLB ### + +Signed shift left long by immediate (bottom). + + void sshllb(const ZRegister& zd, const ZRegister& zn, int shift) + + +### SSHLLT ### + +Signed shift left long by immediate (top). + + void sshllt(const ZRegister& zd, const ZRegister& zn, int shift) + + +### SSRA ### + +Signed shift right and accumulate (immediate). + + void ssra(const ZRegister& zda, const ZRegister& zn, int shift) + + +### SSUBLB ### + +Signed subtract long (bottom). + + void ssublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SSUBLBT ### + +Signed subtract long (bottom - top). + + void ssublbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SSUBLT ### + +Signed subtract long (top). + + void ssublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SSUBLTB ### + +Signed subtract long (top - bottom). + + void ssubltb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SSUBWB ### + +Signed subtract wide (bottom). + + void ssubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SSUBWT ### + +Signed subtract wide (top). + + void ssubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### ST1B ### + +Contiguous/scatter store bytes from vector. + + void st1b(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) + + +### ST1D ### + +Contiguous/scatter store doublewords from vector. + + void st1d(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) + + +### ST1H ### + +Contiguous/scatter store halfwords from vector. + + void st1h(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) + + +### ST1W ### + +Contiguous/scatter store words from vector. + + void st1w(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) + + +### ST2B ### + +Contiguous store two-byte structures from two vectors. + + void st2b(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr) + + +### ST2D ### + +Contiguous store two-doubleword structures from two vectors, + + void st2d(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr) + + +### ST2H ### + +Contiguous store two-halfword structures from two vectors. + + void st2h(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr) + + +### ST2W ### + +Contiguous store two-word structures from two vectors. + + void st2w(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr) + + +### ST3B ### + +Contiguous store three-byte structures from three vectors. + + void st3b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr) + + +### ST3D ### + +Contiguous store three-doubleword structures from three vectors. + + void st3d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr) + + +### ST3H ### + +Contiguous store three-halfword structures from three vectors. + + void st3h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr) + + +### ST3W ### + +Contiguous store three-word structures from three vectors. + + void st3w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr) + + +### ST4B ### + +Contiguous store four-byte structures from four vectors. + + void st4b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr) + + +### ST4D ### + +Contiguous store four-doubleword structures from four vectors. + + void st4d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr) + + +### ST4H ### + +Contiguous store four-halfword structures from four vectors. + + void st4h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr) + + +### ST4W ### + +Contiguous store four-word structures from four vectors. + + void st4w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr) + + +### STNT1B ### + +Contiguous store non-temporal bytes from vector. + + void stnt1b(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) + + +### STNT1D ### + +Contiguous store non-temporal doublewords from vector. + + void stnt1d(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) + + +### STNT1H ### + +Contiguous store non-temporal halfwords from vector. + + void stnt1h(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) + + +### STNT1W ### + +Contiguous store non-temporal words from vector. + + void stnt1w(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) + + +### STR ### + +Store SVE predicate/vector register. + + void str(const CPURegister& rt, const SVEMemOperand& addr) + + +### SUB ### + +Subtract immediate (unpredicated). + + void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1) + + +### SUB ### + +Subtract vectors (predicated). + + void sub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SUB ### + +Subtract vectors (unpredicated). + + void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SUBHNB ### + +Subtract narrow high part (bottom). + + void subhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SUBHNT ### + +Subtract narrow high part (top). + + void subhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### SUBR ### + +Reversed subtract from immediate (unpredicated). + + void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1) + + +### SUBR ### + +Reversed subtract vectors (predicated). + + void subr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SUDOT ### + +Signed by unsigned integer indexed dot product. + + void sudot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### SUNPKHI ### + +Signed unpack and extend half of vector. + + void sunpkhi(const ZRegister& zd, const ZRegister& zn) + + +### SUNPKLO ### + +Signed unpack and extend half of vector. + + void sunpklo(const ZRegister& zd, const ZRegister& zn) + + +### SUQADD ### + +Signed saturating addition of unsigned value. + + void suqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### SXTB ### + +Signed byte extend (predicated). + + void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### SXTH ### + +Signed halfword extend (predicated). + + void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### SXTW ### + +Signed word extend (predicated). + + void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +<a id="sve-t"> + +### TBL ### + +Programmable table lookup in one or two vector table (zeroing). + + void tbl(const ZRegister& zd, + const ZRegister& zn1, + const ZRegister& zn2, + const ZRegister& zm) + + +### TBL ### + +Programmable table lookup/permute using vector of indices into a vector. + + void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### TBX ### + +Programmable table lookup in single vector table (merging). + + void tbx(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### TRN1 ### + +Interleave even or odd elements from two predicates. + + void trn1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### TRN1 ### + +Interleave even or odd elements from two vectors. + + void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### TRN2 ### + +Interleave even or odd elements from two predicates. + + void trn2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### TRN2 ### + +Interleave even or odd elements from two vectors. + + void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +<a id="sve-u"> + +### UABA ### + +Unsigned absolute difference and accumulate. + + void uaba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### UABALB ### + +Unsigned absolute difference and accumulate long (bottom). + + void uabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### UABALT ### + +Unsigned absolute difference and accumulate long (top). + + void uabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### UABD ### + +Unsigned absolute difference (predicated). + + void uabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UABDLB ### + +Unsigned absolute difference long (bottom). + + void uabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### UABDLT ### + +Unsigned absolute difference long (top). + + void uabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### UADALP ### + +Unsigned add and accumulate long pairwise. + + void uadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn) + + +### UADDLB ### + +Unsigned add long (bottom). + + void uaddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### UADDLT ### + +Unsigned add long (top). + + void uaddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### UADDV ### + +Unsigned add reduction to scalar. + + void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn) + + +### UADDWB ### + +Unsigned add wide (bottom). + + void uaddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### UADDWT ### + +Unsigned add wide (top). + + void uaddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### UCVTF ### + +Unsigned integer convert to floating-point (predicated). + + void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### UDIV ### + +Unsigned divide (predicated). + + void udiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UDIVR ### + +Unsigned reversed divide (predicated). + + void udivr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UDOT ### + +Unsigned dot product by indexed quadtuplet. + + void udot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### UDOT ### + +Unsigned dot product. + + void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### UHADD ### + +Unsigned halving addition. + + void uhadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UHSUB ### + +Unsigned halving subtract. + + void uhsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UHSUBR ### + +Unsigned halving subtract reversed vectors. + + void uhsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UMAX ### + +Unsigned maximum vectors (predicated). + + void umax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UMAX ### + +Unsigned maximum with immediate (unpredicated). + + void umax(const ZRegister& zd, const ZRegister& zn, int imm8) + + +### UMAXP ### + +Unsigned maximum pairwise. + + void umaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UMAXV ### + +Unsigned maximum reduction to scalar. + + void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) + + +### UMIN ### + +Unsigned minimum vectors (predicated). + + void umin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UMIN ### + +Unsigned minimum with immediate (unpredicated). + + void umin(const ZRegister& zd, const ZRegister& zn, int imm8) + + +### UMINP ### + +Unsigned minimum pairwise. + + void uminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UMINV ### + +Unsigned minimum reduction to scalar. + + void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) + + +### UMLALB ### + +Unsigned multiply-add long to accumulator (bottom). + + void umlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### UMLALB ### + +Unsigned multiply-add long to accumulator (bottom, indexed). + + void umlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### UMLALT ### + +Unsigned multiply-add long to accumulator (top). + + void umlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### UMLALT ### + +Unsigned multiply-add long to accumulator (top, indexed). + + void umlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### UMLSLB ### + +Unsigned multiply-subtract long from accumulator (bottom). + + void umlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### UMLSLB ### + +Unsigned multiply-subtract long from accumulator (bottom, indexed). + + void umlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### UMLSLT ### + +Unsigned multiply-subtract long from accumulator (top). + + void umlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### UMLSLT ### + +Unsigned multiply-subtract long from accumulator (top, indexed). + + void umlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### UMMLA ### + +Unsigned integer matrix multiply-accumulate. + + void ummla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### UMULH ### + +Unsigned multiply returning high half (predicated). + + void umulh(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UMULH ### + +Unsigned multiply returning high half (unpredicated). + + void umulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### UMULLB ### + +Unsigned multiply long (bottom). + + void umullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### UMULLB ### + +Unsigned multiply long (bottom, indexed). + + void umullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### UMULLT ### + +Unsigned multiply long (top). + + void umullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### UMULLT ### + +Unsigned multiply long (top, indexed). + + void umullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### UQADD ### + +Unsigned saturating add immediate (unpredicated). + + void uqadd(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift = -1) + + +### UQADD ### + +Unsigned saturating add vectors (unpredicated). + + void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### UQADD ### + +Unsigned saturating addition (predicated). + + void uqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UQDECB ### + +Unsigned saturating decrement scalar by multiple of 8-bit predicate constraint element count. + + void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) + + +### UQDECD ### + +Unsigned saturating decrement scalar by multiple of 64-bit predicate constraint element count. + + void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) + + +### UQDECD ### + +Unsigned saturating decrement vector by multiple of 64-bit predicate constraint element count. + + void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### UQDECH ### + +Unsigned saturating decrement scalar by multiple of 16-bit predicate constraint element count. + + void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) + + +### UQDECH ### + +Unsigned saturating decrement vector by multiple of 16-bit predicate constraint element count. + + void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### UQDECP ### + +Unsigned saturating decrement scalar by active predicate element count. + + void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg) + + +### UQDECP ### + +Unsigned saturating decrement vector by active predicate element count. + + void uqdecp(const ZRegister& zdn, const PRegister& pg) + + +### UQDECW ### + +Unsigned saturating decrement scalar by multiple of 32-bit predicate constraint element count. + + void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) + + +### UQDECW ### + +Unsigned saturating decrement vector by multiple of 32-bit predicate constraint element count. + + void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### UQINCB ### + +Unsigned saturating increment scalar by multiple of 8-bit predicate constraint element count. + + void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) + + +### UQINCD ### + +Unsigned saturating increment scalar by multiple of 64-bit predicate constraint element count. + + void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) + + +### UQINCD ### + +Unsigned saturating increment vector by multiple of 64-bit predicate constraint element count. + + void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### UQINCH ### + +Unsigned saturating increment scalar by multiple of 16-bit predicate constraint element count. + + void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) + + +### UQINCH ### + +Unsigned saturating increment vector by multiple of 16-bit predicate constraint element count. + + void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### UQINCP ### + +Unsigned saturating increment scalar by active predicate element count. + + void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg) + + +### UQINCP ### + +Unsigned saturating increment vector by active predicate element count. + + void uqincp(const ZRegister& zdn, const PRegister& pg) + + +### UQINCW ### + +Unsigned saturating increment scalar by multiple of 32-bit predicate constraint element count. + + void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) + + +### UQINCW ### + +Unsigned saturating increment vector by multiple of 32-bit predicate constraint element count. + + void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) + + +### UQRSHL ### + +Unsigned saturating rounding shift left by vector (predicated). + + void uqrshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UQRSHLR ### + +Unsigned saturating rounding shift left reversed vectors (predicated). + + void uqrshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UQRSHRNB ### + +Unsigned saturating rounding shift right narrow by immediate (bottom). + + void uqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift) + + +### UQRSHRNT ### + +Unsigned saturating rounding shift right narrow by immediate (top). + + void uqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift) + + +### UQSHL ### + +Unsigned saturating shift left by immediate. + + void uqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) + + +### UQSHL ### + +Unsigned saturating shift left by vector (predicated). + + void uqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UQSHLR ### + +Unsigned saturating shift left reversed vectors (predicated). + + void uqshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UQSHRNB ### + +Unsigned saturating shift right narrow by immediate (bottom). + + void uqshrnb(const ZRegister& zd, const ZRegister& zn, int shift) + + +### UQSHRNT ### + +Unsigned saturating shift right narrow by immediate (top). + + void uqshrnt(const ZRegister& zd, const ZRegister& zn, int shift) + + +### UQSUB ### + +Unsigned saturating subtract immediate (unpredicated). + + void uqsub(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift = -1) + + +### UQSUB ### + +Unsigned saturating subtract vectors (unpredicated). + + void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### UQSUB ### + +Unsigned saturating subtraction (predicated). + + void uqsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UQSUBR ### + +Unsigned saturating subtraction reversed vectors (predicated). + + void uqsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### UQXTNB ### + +Unsigned saturating extract narrow (bottom). + + void uqxtnb(const ZRegister& zd, const ZRegister& zn) + + +### UQXTNT ### + +Unsigned saturating extract narrow (top). + + void uqxtnt(const ZRegister& zd, const ZRegister& zn) + + +### URECPE ### + +Unsigned reciprocal estimate (predicated). + + void urecpe(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### URHADD ### + +Unsigned rounding halving addition. + + void urhadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### URSHL ### + +Unsigned rounding shift left by vector (predicated). + + void urshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### URSHLR ### + +Unsigned rounding shift left reversed vectors (predicated). + + void urshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### URSHR ### + +Unsigned rounding shift right by immediate. + + void urshr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) + + +### URSQRTE ### + +Unsigned reciprocal square root estimate (predicated). + + void ursqrte(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### URSRA ### + +Unsigned rounding shift right and accumulate (immediate). + + void ursra(const ZRegister& zda, const ZRegister& zn, int shift) + + +### USDOT ### + +Unsigned by signed integer dot product. + + void usdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### USDOT ### + +Unsigned by signed integer indexed dot product. + + void usdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) + + +### USHLLB ### + +Unsigned shift left long by immediate (bottom). + + void ushllb(const ZRegister& zd, const ZRegister& zn, int shift) + + +### USHLLT ### + +Unsigned shift left long by immediate (top). + + void ushllt(const ZRegister& zd, const ZRegister& zn, int shift) + + +### USMMLA ### + +Unsigned by signed integer matrix multiply-accumulate. + + void usmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) + + +### USQADD ### + +Unsigned saturating addition of signed value. + + void usqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) + + +### USRA ### + +Unsigned shift right and accumulate (immediate). + + void usra(const ZRegister& zda, const ZRegister& zn, int shift) + + +### USUBLB ### + +Unsigned subtract long (bottom). + + void usublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### USUBLT ### + +Unsigned subtract long (top). + + void usublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### USUBWB ### + +Unsigned subtract wide (bottom). + + void usubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### USUBWT ### + +Unsigned subtract wide (top). + + void usubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### UUNPKHI ### + +Unsigned unpack and extend half of vector. + + void uunpkhi(const ZRegister& zd, const ZRegister& zn) + + +### UUNPKLO ### + +Unsigned unpack and extend half of vector. + + void uunpklo(const ZRegister& zd, const ZRegister& zn) + + +### UXTB ### + +Unsigned byte extend (predicated). + + void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### UXTH ### + +Unsigned halfword extend (predicated). + + void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### UXTW ### + +Unsigned word extend (predicated). + + void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) + + +### UZP1 ### + +Concatenate even or odd elements from two predicates. + + void uzp1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### UZP1 ### + +Concatenate even or odd elements from two vectors. + + void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### UZP2 ### + +Concatenate even or odd elements from two predicates. + + void uzp2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### UZP2 ### + +Concatenate even or odd elements from two vectors. + + void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +<a id="sve-w"> + +### WHILEGE ### + +While decrementing signed scalar greater than or equal to scalar. + + void whilege(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) + + +### WHILEGT ### + +While decrementing signed scalar greater than scalar. + + void whilegt(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) + + +### WHILEHI ### + +While decrementing unsigned scalar higher than scalar. + + void whilehi(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) + + +### WHILEHS ### + +While decrementing unsigned scalar higher or same as scalar. + + void whilehs(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) + + +### WHILELE ### + +While incrementing signed scalar less than or equal to scalar. + + void whilele(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) + + +### WHILELO ### + +While incrementing unsigned scalar lower than scalar. + + void whilelo(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) + + +### WHILELS ### + +While incrementing unsigned scalar lower or same as scalar. + + void whilels(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) + + +### WHILELT ### + +While incrementing signed scalar less than scalar. + + void whilelt(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) + + +### WHILERW ### + +While free of read-after-write conflicts. + + void whilerw(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) + + +### WHILEWR ### + +While free of write-after-read/write conflicts. + + void whilewr(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) + + +### WRFFR ### + +Write the first-fault register. + + void wrffr(const PRegisterWithLaneSize& pn) + + +<a id="sve-x"> + +### XAR ### + +Bitwise exclusive OR and rotate right by immediate. + + void xar(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int shift) + + +<a id="sve-z"> + +### ZIP1 ### + +Interleave elements from two half predicates. + + void zip1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### ZIP1 ### + +Interleave elements from two half vectors. + + void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + +### ZIP2 ### + +Interleave elements from two half predicates. + + void zip2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) + + +### ZIP2 ### + +Interleave elements from two half vectors. + + void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) + + + Additional or pseudo instructions --------------------------------- +<a id="pseudo-b"> + ### BIND ### Bind a label to the current PC. @@ -6204,6 +12403,8 @@ Bind a label to the current PC. void bind(Label* label) +<a id="pseudo-d"> + ### DC ### Emit data in the instruction stream. @@ -6232,6 +12433,8 @@ Emit raw instructions into the instruction stream. void dci(Instr raw_inst) +<a id="pseudo-p"> + ### PLACE ### Place a literal at the current PC. diff --git a/doc/changelog.md b/doc/changelog.md deleted file mode 100644 index 70d0755b..00000000 --- a/doc/changelog.md +++ /dev/null @@ -1,124 +0,0 @@ -VIXL Change Log -=============== - -* 1.13 - + Improve code formatting and add tests using clang-format. - + Fix bugs in disassembly of unallocated instruction encodings. - + Fix some execution trace bugs, and add tests. - + Other small bug fixes and improvements. - -* 1.12 - + Bug fixes for toolchain compatibility. - -* 1.11 - + Fix bug in simulation of add with carry. - + Fix use-after-free bug in Literal handling. - + Build system updates for Android. - + Add option to run test.py under Valgrind. - + Other small bug fixes and improvements. - -* 1.10 - + Improved support for externally managed literals. - + Reworked build and test infrastructure. - + Other small bug fixes and improvements. - -* 1.9 - + Improved compatibility with Android build system. - + Improved compatibility with Clang toolchain. - + Added support for `umulh` instruction. - + Added support for `fcmpe` and `fccmpe` instructions. - + Other small bug fixes and improvements. - -* 1.8 - + Complete NEON instruction set support. - + Support long branches using veneers. - + Improved handling of literal pools. - + Support some `ic` and `dc` cache op instructions. - + Support CRC32 instructions. - + Support half-precision floating point instructions. - + MacroAssembler support for `bfm`, `ubfm` and `sbfm`. - + Other small bug fixes and improvements. - -* 1.7 - + Added support for `prfm` prefetch instructions. - + Added support for all `frint` instruction variants. - + Add support for disassembling as an offset from a given address. - + Fixed the disassembly of `movz` and `movn`. - + Provide static helpers for immediate generation. - + Provide helpers to create CPURegList from list unions or intersections. - + Improved register value tracing. - + Multithreading test fixes. - + Other small bug fixes and build system improvements. - -* 1.6 - + Make literal pool management the responsibility of the macro assembler. - + Move code buffer management out of the Assembler. - + Support `ldrsw` for literals. - + Support binding a label to a specific offset. - + Add macro assembler support for load/store pair with arbitrary offset. - + Support Peek and Poke for CPURegLists. - + Fix disassembly of branch targets. - + Fix Decoder visitor insertion order. - + Separate Decoder visitors into const and non-const variants. - + Fix simulator for branches to tagged addresses. - + Add a VIM YouCompleteMe configuration file. - + Other small bug fixes and build system improvements. - -* 1.5 - + Tagged pointer support. - + Implement support for exclusive access instructions. - + Implement support for `adrp` instruction. - + Faster code for logical immediate identification. - + Generate better code for immediates passed to shift-capable instructions. - + Allow explicit use of unscaled-offset loads and stores. - + Build and test infrastructure improvements. - + Corrected computation of cache line size. - + Fix simulation of `extr` instruction. - + Fixed a bug when moving kWMinInt to a register. - + Other small bug fixes. - -* 1.4 - + Added support for `frintm`. - + Fixed simulation of `frintn` and `frinta` for corner cases. - + Added more tests for floating point instruction simulation. - + Modified `CalleeSave()` and `CalleeRestore()` to push general purpose - registers before floating point registers on the stack. - + Fixed Printf for mixed argument types, and use on real hardware. - + Improved compatibility with some 32-bit compilers. - -* 1.3 - + Address inaccuracies in the simulated floating point instructions. - + Implement Default-NaN floating point mode. - + Introduce `UseScratchRegisterScope` for controlling the use of temporary - registers. - + Enable building VIXL on 32-bit hosts. - + Other small bug fixes and improvements. - -* 1.2 - + Added support for `fmadd`, `fnmadd`, `fnmsub`, `fminnm`, `fmaxnm`, - `frinta`, `fcvtau` and `fcvtas`. - + Added support for assembling and disassembling `isb`, `dsb` and `dmb`. - + Added support for automatic inversion of compare instructions when using - negative immediates. - + Added support for using `movn` when generating immediates. - + Added explicit flag-setting 'S' instructions, and removed - `SetFlags` and `LeaveFlags` arguments. - + Added support for `Movk` in macro assembler. - + Added support for W register parameters to `Tbz` and `Tbnz`. - + Added support for using immediate operands with `Csel`. - + Added new debugger syntax for memory inspection. - + Fixed `smull`, `fmsub` and `sdiv` simulation. - + Fixed sign extension for W->X conversions using `sxtb`, `sxth` and `sxtw`. - + Prevented code generation for certain side-effect free operations, - such as `add r, r, #0`, in the macro assembler. - + Other small bug fixes. - -* 1.1 - + Improved robustness of instruction decoder and disassembler. - + Added support for double-to-float conversions using `fcvt`. - + Added support for more fixed-point to floating-point conversions (`ucvtf` - and `scvtf`). - + Added instruction statistics collection class `instrument-a64.cc`. - -* 1.0 - + Initial release. diff --git a/examples/aarch32/custom-aarch32-disasm.cc b/examples/aarch32/custom-aarch32-disasm.cc index e4df9ff3..9577ec78 100644 --- a/examples/aarch32/custom-aarch32-disasm.cc +++ b/examples/aarch32/custom-aarch32-disasm.cc @@ -73,11 +73,11 @@ class CustomDisassembler : public PrintDisassembler { CustomStream* GetStream() const { return reinterpret_cast<CustomStream*>(&os()); } - virtual void PrintCodeAddress(uint32_t pc) VIXL_OVERRIDE { + virtual void PrintCodeAddress(uint32_t addr) VIXL_OVERRIDE { // If the address matches a label, then print the label. Otherwise, print // nothing. std::map<Location::Offset, const char*>::iterator symbol = - GetStream()->GetSymbols().find(pc); + GetStream()->GetSymbols().find(addr); if (symbol != GetStream()->GetSymbols().end()) { os().os() << symbol->second << ":" << std::endl; } diff --git a/examples/aarch64/custom-disassembler.cc b/examples/aarch64/custom-disassembler.cc index 97c94c80..9ea6aacf 100644 --- a/examples/aarch64/custom-disassembler.cc +++ b/examples/aarch64/custom-disassembler.cc @@ -24,6 +24,8 @@ // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include <regex> + #include "custom-disassembler.h" #include "examples.h" @@ -102,13 +104,20 @@ void CustomDisassembler::AppendCodeRelativeCodeAddressToOutput( } -// We override this method to add a comment to this type of instruction. Helpers -// from the vixl::Instruction class can be used to analyse the instruction being +// We override this method to add a comment to some instructions. Helpers from +// the vixl::Instruction class can be used to analyse the instruction being // disasssembled. -void CustomDisassembler::VisitAddSubShifted(const Instruction* instr) { - vixl::aarch64::Disassembler::VisitAddSubShifted(instr); - if (instr->GetRd() == 10) { - AppendToOutput(" // add/sub to x10"); +void CustomDisassembler::Visit(Metadata* metadata, const Instruction* instr) { + vixl::aarch64::Disassembler::Visit(metadata, instr); + const std::string& form = (*metadata)["form"]; + + // Match the forms for 32/64-bit add/subtract with shift, with optional flag + // setting. + if (std::regex_match(form, // NOLINT: avoid clang-tidy-4.0 errors. + std::regex("(?:add|sub)s?_(?:32|64)_addsub_shift"))) { + if (instr->GetRd() == 10) { + AppendToOutput(" // add/sub to x10"); + } } ProcessOutput(instr); } diff --git a/examples/aarch64/custom-disassembler.h b/examples/aarch64/custom-disassembler.h index cfff489f..261a7853 100644 --- a/examples/aarch64/custom-disassembler.h +++ b/examples/aarch64/custom-disassembler.h @@ -40,8 +40,8 @@ class CustomDisassembler : public vixl::aarch64::Disassembler { CustomDisassembler() : vixl::aarch64::Disassembler() {} virtual ~CustomDisassembler() {} - virtual void VisitAddSubShifted(const vixl::aarch64::Instruction* instr) - VIXL_OVERRIDE; + virtual void Visit(vixl::aarch64::Metadata* metadata, + const vixl::aarch64::Instruction* instr) VIXL_OVERRIDE; protected: virtual void AppendRegisterNameToOutput( diff --git a/examples/aarch64/executable-memory.h b/examples/aarch64/executable-memory.h new file mode 100644 index 00000000..8a9ef1eb --- /dev/null +++ b/examples/aarch64/executable-memory.h @@ -0,0 +1,88 @@ +// Copyright 2020, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_EXAMPLE_EXECUTABLE_MEMORY_H_ +#define VIXL_EXAMPLE_EXECUTABLE_MEMORY_H_ + +extern "C" { +#include <stdint.h> +#ifndef VIXL_INCLUDE_SIMULATOR_AARCH64 +#include <sys/mman.h> +#endif +} + +#include <cstdio> +#include <string> + +#include "aarch64/assembler-aarch64.h" +#include "aarch64/constants-aarch64.h" +#include "aarch64/cpu-aarch64.h" +#include "aarch64/macro-assembler-aarch64.h" + +#ifndef VIXL_INCLUDE_SIMULATOR_AARCH64 +class ExecutableMemory { + public: + ExecutableMemory(const vixl::byte* code_start, size_t size) + : size_(size), + buffer_(reinterpret_cast<vixl::byte*>(mmap(NULL, + size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + -1, + 0))) { + VIXL_CHECK(buffer_ != MAP_FAILED); + memcpy(buffer_, code_start, size_); + + vixl::aarch64::CPU::EnsureIAndDCacheCoherency(buffer_, size_); + int res = mprotect(buffer_, size_, PROT_READ | PROT_EXEC); + VIXL_CHECK(res == 0); + } + ~ExecutableMemory() { munmap(buffer_, size_); } + + template <typename T> + T GetEntryPoint(const vixl::aarch64::Label& entry_point) const { + int64_t location = entry_point.GetLocation(); + return GetOffsetAddress<T>(location); + } + + private: + template <typename T> + T GetOffsetAddress(int64_t offset) const { + VIXL_ASSERT((offset >= 0) && (static_cast<size_t>(offset) <= size_)); + T function_address; + vixl::byte* buffer_address = buffer_ + offset; + + VIXL_STATIC_ASSERT(sizeof(T) == sizeof(buffer_address)); + memcpy(&function_address, &buffer_address, sizeof(T)); + return function_address; + } + + size_t size_; + vixl::byte* buffer_; +}; +#endif + +#endif // VIXL_EXAMPLE_EXECUTABLE_MEMORY_H_ diff --git a/examples/aarch64/getting-started.cc b/examples/aarch64/getting-started.cc index c5da4c8a..a0834989 100644 --- a/examples/aarch64/getting-started.cc +++ b/examples/aarch64/getting-started.cc @@ -27,6 +27,8 @@ #include "aarch64/macro-assembler-aarch64.h" #include "aarch64/simulator-aarch64.h" +#include "executable-memory.h" + using namespace vixl; using namespace vixl::aarch64; @@ -43,25 +45,34 @@ void GenerateDemoFunction(MacroAssembler *masm) { #ifndef TEST_EXAMPLES -#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 int main() { MacroAssembler masm; - Decoder decoder; - Simulator simulator(&decoder); - Label demo_function; - masm.Bind(&demo_function); + Label demo; + masm.Bind(&demo); GenerateDemoFunction(&masm); masm.FinalizeCode(); +#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 + Decoder decoder; + Simulator simulator(&decoder); + simulator.WriteXRegister(0, 0x8899aabbccddeeff); - simulator.RunFrom(masm.GetLabelAddress<Instruction *>(&demo_function)); + simulator.RunFrom(masm.GetLabelAddress<Instruction *>(&demo)); printf("x0 = %" PRIx64 "\n", simulator.ReadXRegister(0)); - return 0; -} #else -// Without the simulator there is nothing to test. -int main(void) { return 0; } + byte* code = masm.GetBuffer()->GetStartAddress<byte*>(); + size_t code_size = masm.GetSizeOfCodeGenerated(); + ExecutableMemory memory(code, code_size); + // Run the example function. + uint64_t (*demo_function)(uint64_t) = + memory.GetEntryPoint<uint64_t (*)(uint64_t)>(demo); + uint64_t input_value = 0x8899aabbccddeeff; + uint64_t output_value = (*demo_function)(input_value); + printf("native: demo(0x%016lx) = 0x%016lx\n", input_value, output_value); #endif // VIXL_INCLUDE_SIMULATOR_AARCH64 + + return 0; +} #endif // TEST_EXAMPLES diff --git a/examples/aarch64/non-const-visitor.cc b/examples/aarch64/non-const-visitor.cc index d4c54fb1..307b618f 100644 --- a/examples/aarch64/non-const-visitor.cc +++ b/examples/aarch64/non-const-visitor.cc @@ -24,8 +24,10 @@ // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#include "non-const-visitor.h" +#include <regex> + #include "examples.h" +#include "non-const-visitor.h" using namespace vixl; using namespace vixl::aarch64; @@ -33,20 +35,28 @@ using namespace vixl::aarch64; #define __ masm-> -void SwitchAddSubRegisterSources::VisitAddSubShifted(const Instruction* instr) { - int rn = instr->GetRn(); - int rm = instr->GetRm(); - // Only non-const visitors are allowed to discard constness of the visited - // instruction. - Instruction* mutable_instr = MutableInstruction(instr); - Instr instr_bits = mutable_instr->GetInstructionBits(); +void SwitchAddSubRegisterSources::Visit(Metadata* metadata, + const Instruction* instr) { + const std::string& form = (*metadata)["form"]; - // Switch the bitfields for the `rn` and `rm` registers. - instr_bits &= ~(Rn_mask | Rm_mask); - instr_bits |= (rn << Rm_offset) | (rm << Rn_offset); + // Match the forms for 32/64-bit add/subtract with shift, with optional flag + // setting. + if (std::regex_match(form, // NOLINT: avoid clang-tidy-4.0 errors. + std::regex("(?:add|sub)s?_(?:32|64)_addsub_shift"))) { + int rn = instr->GetRn(); + int rm = instr->GetRm(); + // Only non-const visitors are allowed to discard constness of the visited + // instruction. + Instruction* mutable_instr = MutableInstruction(instr); + Instr instr_bits = mutable_instr->GetInstructionBits(); - // Rewrite the instruction. - mutable_instr->SetInstructionBits(instr_bits); + // Switch the bitfields for the `rn` and `rm` registers. + instr_bits &= ~(Rn_mask | Rm_mask); + instr_bits |= (rn << Rm_offset) | (rm << Rn_offset); + + // Rewrite the instruction. + mutable_instr->SetInstructionBits(instr_bits); + } } diff --git a/examples/aarch64/non-const-visitor.h b/examples/aarch64/non-const-visitor.h index 243cc156..b7c50797 100644 --- a/examples/aarch64/non-const-visitor.h +++ b/examples/aarch64/non-const-visitor.h @@ -30,17 +30,16 @@ #include "aarch64/decoder-aarch64.h" #include "aarch64/macro-assembler-aarch64.h" -class SwitchAddSubRegisterSources - : public vixl::aarch64::DecoderVisitorWithDefaults { +class SwitchAddSubRegisterSources : public vixl::aarch64::DecoderVisitor { public: SwitchAddSubRegisterSources() - : vixl::aarch64::DecoderVisitorWithDefaults(kNonConstVisitor) {} + : vixl::aarch64::DecoderVisitor(kNonConstVisitor) {} // Our visitor switches the register sources for some add and sub instructions // (not all add and sub instructions). - virtual void VisitAddSubShifted(const vixl::aarch64::Instruction* instr) - VIXL_OVERRIDE; + virtual void Visit(vixl::aarch64::Metadata* metadata, + const vixl::aarch64::Instruction* instr) VIXL_OVERRIDE; }; diff --git a/src/aarch32/assembler-aarch32.cc b/src/aarch32/assembler-aarch32.cc index 5f636981..64126664 100644 --- a/src/aarch32/assembler-aarch32.cc +++ b/src/aarch32/assembler-aarch32.cc @@ -2557,13 +2557,13 @@ void Assembler::adr(Condition cond, public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= 0) && (offset <= 1020) && - ((offset & 0x3) == 0)); - const int32_t target = offset >> 2; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= 0) && (off <= 1020) && ((off & 0x3) == 0)); + const int32_t target = off >> 2; return instr | (target & 0xff); } } immop; @@ -2588,15 +2588,16 @@ void Assembler::adr(Condition cond, public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); int32_t target; - if ((offset >= 0) && (offset <= 4095)) { - target = offset; + if ((off >= 0) && (off <= 4095)) { + target = off; } else { - target = -offset; + target = -off; VIXL_ASSERT((target >= 0) && (target <= 4095)); // Emit the T2 encoding. instr |= 0x00a00000; @@ -2622,19 +2623,20 @@ void Assembler::adr(Condition cond, public: EmitOp() : Location::EmitOperator(A32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kA32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kA32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); int32_t target; - ImmediateA32 positive_immediate_a32(offset); - if (positive_immediate_a32.IsValid()) { - target = positive_immediate_a32.GetEncodingValue(); + ImmediateA32 pos_imm_a32(off); + if (pos_imm_a32.IsValid()) { + target = pos_imm_a32.GetEncodingValue(); } else { - ImmediateA32 negative_immediate_a32(-offset); - VIXL_ASSERT(negative_immediate_a32.IsValid()); + ImmediateA32 neg_imm_a32(-off); + VIXL_ASSERT(neg_imm_a32.IsValid()); // Emit the A2 encoding. - target = negative_immediate_a32.GetEncodingValue(); + target = neg_imm_a32.GetEncodingValue(); instr = (instr & ~0x00f00000) | 0x00400000; } return instr | (target & 0xfff); @@ -3024,13 +3026,12 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) { public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - pc; - VIXL_ASSERT((offset >= -256) && (offset <= 254) && - ((offset & 0x1) == 0)); - const int32_t target = offset >> 1; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = loc->GetLocation() - program_counter; + VIXL_ASSERT((off >= -256) && (off <= 254) && ((off & 0x1) == 0)); + const int32_t target = off >> 1; return instr | (target & 0xff); } } immop; @@ -3051,13 +3052,12 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) { public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - pc; - VIXL_ASSERT((offset >= -2048) && (offset <= 2046) && - ((offset & 0x1) == 0)); - const int32_t target = offset >> 1; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = loc->GetLocation() - program_counter; + VIXL_ASSERT((off >= -2048) && (off <= 2046) && ((off & 0x1) == 0)); + const int32_t target = off >> 1; return instr | (target & 0x7ff); } } immop; @@ -3075,13 +3075,13 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) { public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - pc; - VIXL_ASSERT((offset >= -1048576) && (offset <= 1048574) && - ((offset & 0x1) == 0)); - const int32_t target = offset >> 1; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = loc->GetLocation() - program_counter; + VIXL_ASSERT((off >= -1048576) && (off <= 1048574) && + ((off & 0x1) == 0)); + const int32_t target = off >> 1; return instr | (target & 0x7ff) | ((target & 0x1f800) << 5) | ((target & 0x20000) >> 4) | ((target & 0x40000) >> 7) | ((target & 0x80000) << 7); @@ -3104,13 +3104,13 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) { public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - pc; - VIXL_ASSERT((offset >= -16777216) && (offset <= 16777214) && - ((offset & 0x1) == 0)); - int32_t target = offset >> 1; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = loc->GetLocation() - program_counter; + VIXL_ASSERT((off >= -16777216) && (off <= 16777214) && + ((off & 0x1) == 0)); + int32_t target = off >> 1; uint32_t S = target & (1 << 23); target ^= ((S >> 1) | (S >> 2)) ^ (3 << 21); return instr | (target & 0x7ff) | ((target & 0x1ff800) << 5) | @@ -3132,13 +3132,13 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) { public: EmitOp() : Location::EmitOperator(A32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kA32PcDelta; - Location::Offset offset = location->GetLocation() - pc; - VIXL_ASSERT((offset >= -33554432) && (offset <= 33554428) && - ((offset & 0x3) == 0)); - const int32_t target = offset >> 2; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kA32PcDelta; + Location::Offset off = loc->GetLocation() - program_counter; + VIXL_ASSERT((off >= -33554432) && (off <= 33554428) && + ((off & 0x3) == 0)); + const int32_t target = off >> 2; return instr | (target & 0xffffff); } } immop; @@ -3462,13 +3462,13 @@ void Assembler::bl(Condition cond, Location* location) { public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - pc; - VIXL_ASSERT((offset >= -16777216) && (offset <= 16777214) && - ((offset & 0x1) == 0)); - int32_t target = offset >> 1; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = loc->GetLocation() - program_counter; + VIXL_ASSERT((off >= -16777216) && (off <= 16777214) && + ((off & 0x1) == 0)); + int32_t target = off >> 1; uint32_t S = target & (1 << 23); target ^= ((S >> 1) | (S >> 2)) ^ (3 << 21); return instr | (target & 0x7ff) | ((target & 0x1ff800) << 5) | @@ -3490,13 +3490,13 @@ void Assembler::bl(Condition cond, Location* location) { public: EmitOp() : Location::EmitOperator(A32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kA32PcDelta; - Location::Offset offset = location->GetLocation() - pc; - VIXL_ASSERT((offset >= -33554432) && (offset <= 33554428) && - ((offset & 0x3) == 0)); - const int32_t target = offset >> 2; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kA32PcDelta; + Location::Offset off = loc->GetLocation() - program_counter; + VIXL_ASSERT((off >= -33554432) && (off <= 33554428) && + ((off & 0x3) == 0)); + const int32_t target = off >> 2; return instr | (target & 0xffffff); } } immop; @@ -3549,13 +3549,14 @@ void Assembler::blx(Condition cond, Location* location) { public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -16777216) && (offset <= 16777212) && - ((offset & 0x3) == 0)); - int32_t target = offset >> 2; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -16777216) && (off <= 16777212) && + ((off & 0x3) == 0)); + int32_t target = off >> 2; uint32_t S = target & (1 << 22); target ^= ((S >> 1) | (S >> 2)) ^ (3 << 20); return instr | ((target & 0x3ff) << 1) | ((target & 0xffc00) << 6) | @@ -3577,15 +3578,14 @@ void Assembler::blx(Condition cond, Location* location) { public: EmitOp() : Location::EmitOperator(A32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const - VIXL_OVERRIDE { - pc += kA32PcDelta; - Location::Offset offset = - location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -33554432) && (offset <= 33554430) && - ((offset & 0x1) == 0)); - const int32_t target = offset >> 1; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kA32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -33554432) && (off <= 33554430) && + ((off & 0x1) == 0)); + const int32_t target = off >> 1; return instr | ((target & 0x1) << 24) | ((target & 0x1fffffe) >> 1); } } immop; @@ -3698,13 +3698,12 @@ void Assembler::cbnz(Register rn, Location* location) { public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - pc; - VIXL_ASSERT((offset >= 0) && (offset <= 126) && - ((offset & 0x1) == 0)); - const int32_t target = offset >> 1; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = loc->GetLocation() - program_counter; + VIXL_ASSERT((off >= 0) && (off <= 126) && ((off & 0x1) == 0)); + const int32_t target = off >> 1; return instr | ((target & 0x1f) << 3) | ((target & 0x20) << 4); } } immop; @@ -3748,13 +3747,12 @@ void Assembler::cbz(Register rn, Location* location) { public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - pc; - VIXL_ASSERT((offset >= 0) && (offset <= 126) && - ((offset & 0x1) == 0)); - const int32_t target = offset >> 1; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = loc->GetLocation() - program_counter; + VIXL_ASSERT((off >= 0) && (off <= 126) && ((off & 0x1) == 0)); + const int32_t target = off >> 1; return instr | ((target & 0x1f) << 3) | ((target & 0x20) << 4); } } immop; @@ -4790,7 +4788,7 @@ void Assembler::ldm(Condition cond, } // LDM{<c>}{<q>} SP!, <registers> ; T1 if (!size.IsWide() && rn.Is(sp) && write_back.DoesWriteBack() && - ((registers.GetList() & ~0x80ff) == 0)) { + registers.IsR0toR7orPC()) { EmitT32_16(0xbc00 | (GetRegisterListEncoding(registers, 15, 1) << 8) | GetRegisterListEncoding(registers, 0, 8)); AdvanceIT(); @@ -5208,13 +5206,13 @@ void Assembler::ldr(Condition cond, public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= 0) && (offset <= 1020) && - ((offset & 0x3) == 0)); - const int32_t target = offset >> 2; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= 0) && (off <= 1020) && ((off & 0x3) == 0)); + const int32_t target = off >> 2; return instr | (target & 0xff); } } immop; @@ -5233,13 +5231,14 @@ void Assembler::ldr(Condition cond, public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -4095) && (offset <= 4095)); - uint32_t U = (offset >= 0); - int32_t target = abs(offset) | (U << 12); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -4095) && (off <= 4095)); + uint32_t U = (off >= 0); + int32_t target = abs(off) | (U << 12); return instr | (target & 0xfff) | ((target & 0x1000) << 11); } } immop; @@ -5259,13 +5258,14 @@ void Assembler::ldr(Condition cond, public: EmitOp() : Location::EmitOperator(A32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kA32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -4095) && (offset <= 4095)); - uint32_t U = (offset >= 0); - int32_t target = abs(offset) | (U << 12); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kA32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -4095) && (off <= 4095)); + uint32_t U = (off >= 0); + int32_t target = abs(off) | (U << 12); return instr | (target & 0xfff) | ((target & 0x1000) << 11); } } immop; @@ -5505,13 +5505,14 @@ void Assembler::ldrb(Condition cond, Register rt, Location* location) { public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -4095) && (offset <= 4095)); - uint32_t U = (offset >= 0); - int32_t target = abs(offset) | (U << 12); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -4095) && (off <= 4095)); + uint32_t U = (off >= 0); + int32_t target = abs(off) | (U << 12); return instr | (target & 0xfff) | ((target & 0x1000) << 11); } } immop; @@ -5531,13 +5532,14 @@ void Assembler::ldrb(Condition cond, Register rt, Location* location) { public: EmitOp() : Location::EmitOperator(A32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kA32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -4095) && (offset <= 4095)); - uint32_t U = (offset >= 0); - int32_t target = abs(offset) | (U << 12); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kA32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -4095) && (off <= 4095)); + uint32_t U = (off >= 0); + int32_t target = abs(off) | (U << 12); return instr | (target & 0xfff) | ((target & 0x1000) << 11); } } immop; @@ -5747,13 +5749,13 @@ void Assembler::ldrd(Condition cond, public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -1020) && (offset <= 1020) && - ((offset & 0x3) == 0)); - int32_t target = offset >> 2; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0)); + int32_t target = off >> 2; uint32_t U = (target >= 0); target = abs(target) | (U << 8); return instr | (target & 0xff) | ((target & 0x100) << 15); @@ -5777,13 +5779,14 @@ void Assembler::ldrd(Condition cond, public: EmitOp() : Location::EmitOperator(A32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kA32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -255) && (offset <= 255)); - uint32_t U = (offset >= 0); - int32_t target = abs(offset) | (U << 8); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kA32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -255) && (off <= 255)); + uint32_t U = (off >= 0); + int32_t target = abs(off) | (U << 8); return instr | (target & 0xf) | ((target & 0xf0) << 4) | ((target & 0x100) << 15); } @@ -6129,13 +6132,14 @@ void Assembler::ldrh(Condition cond, Register rt, Location* location) { public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -4095) && (offset <= 4095)); - uint32_t U = (offset >= 0); - int32_t target = abs(offset) | (U << 12); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -4095) && (off <= 4095)); + uint32_t U = (off >= 0); + int32_t target = abs(off) | (U << 12); return instr | (target & 0xfff) | ((target & 0x1000) << 11); } } immop; @@ -6155,13 +6159,14 @@ void Assembler::ldrh(Condition cond, Register rt, Location* location) { public: EmitOp() : Location::EmitOperator(A32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kA32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -255) && (offset <= 255)); - uint32_t U = (offset >= 0); - int32_t target = abs(offset) | (U << 8); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kA32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -255) && (off <= 255)); + uint32_t U = (off >= 0); + int32_t target = abs(off) | (U << 8); return instr | (target & 0xf) | ((target & 0xf0) << 4) | ((target & 0x100) << 15); } @@ -6382,13 +6387,14 @@ void Assembler::ldrsb(Condition cond, Register rt, Location* location) { public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -4095) && (offset <= 4095)); - uint32_t U = (offset >= 0); - int32_t target = abs(offset) | (U << 12); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -4095) && (off <= 4095)); + uint32_t U = (off >= 0); + int32_t target = abs(off) | (U << 12); return instr | (target & 0xfff) | ((target & 0x1000) << 11); } } immop; @@ -6408,13 +6414,14 @@ void Assembler::ldrsb(Condition cond, Register rt, Location* location) { public: EmitOp() : Location::EmitOperator(A32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kA32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -255) && (offset <= 255)); - uint32_t U = (offset >= 0); - int32_t target = abs(offset) | (U << 8); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kA32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -255) && (off <= 255)); + uint32_t U = (off >= 0); + int32_t target = abs(off) | (U << 8); return instr | (target & 0xf) | ((target & 0xf0) << 4) | ((target & 0x100) << 15); } @@ -6635,13 +6642,14 @@ void Assembler::ldrsh(Condition cond, Register rt, Location* location) { public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -4095) && (offset <= 4095)); - uint32_t U = (offset >= 0); - int32_t target = abs(offset) | (U << 12); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -4095) && (off <= 4095)); + uint32_t U = (off >= 0); + int32_t target = abs(off) | (U << 12); return instr | (target & 0xfff) | ((target & 0x1000) << 11); } } immop; @@ -6661,13 +6669,14 @@ void Assembler::ldrsh(Condition cond, Register rt, Location* location) { public: EmitOp() : Location::EmitOperator(A32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kA32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -255) && (offset <= 255)); - uint32_t U = (offset >= 0); - int32_t target = abs(offset) | (U << 8); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kA32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -255) && (off <= 255)); + uint32_t U = (off >= 0); + int32_t target = abs(off) | (U << 8); return instr | (target & 0xf) | ((target & 0xf0) << 4) | ((target & 0x100) << 15); } @@ -8039,13 +8048,14 @@ void Assembler::pld(Condition cond, Location* location) { public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -4095) && (offset <= 4095)); - uint32_t U = (offset >= 0); - int32_t target = abs(offset) | (U << 12); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -4095) && (off <= 4095)); + uint32_t U = (off >= 0); + int32_t target = abs(off) | (U << 12); return instr | (target & 0xfff) | ((target & 0x1000) << 11); } } immop; @@ -8062,15 +8072,14 @@ void Assembler::pld(Condition cond, Location* location) { public: EmitOp() : Location::EmitOperator(A32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const - VIXL_OVERRIDE { - pc += kA32PcDelta; - Location::Offset offset = - location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -4095) && (offset <= 4095)); - uint32_t U = (offset >= 0); - int32_t target = abs(offset) | (U << 12); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kA32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -4095) && (off <= 4095)); + uint32_t U = (off >= 0); + int32_t target = abs(off) | (U << 12); return instr | (target & 0xfff) | ((target & 0x1000) << 11); } } immop; @@ -8403,13 +8412,14 @@ void Assembler::pli(Condition cond, Location* location) { public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -4095) && (offset <= 4095)); - uint32_t U = (offset >= 0); - int32_t target = abs(offset) | (U << 12); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -4095) && (off <= 4095)); + uint32_t U = (off >= 0); + int32_t target = abs(off) | (U << 12); return instr | (target & 0xfff) | ((target & 0x1000) << 11); } } immop; @@ -8426,15 +8436,14 @@ void Assembler::pli(Condition cond, Location* location) { public: EmitOp() : Location::EmitOperator(A32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const - VIXL_OVERRIDE { - pc += kA32PcDelta; - Location::Offset offset = - location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -4095) && (offset <= 4095)); - uint32_t U = (offset >= 0); - int32_t target = abs(offset) | (U << 12); + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kA32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -4095) && (off <= 4095)); + uint32_t U = (off >= 0); + int32_t target = abs(off) | (U << 12); return instr | (target & 0xfff) | ((target & 0x1000) << 11); } } immop; @@ -8471,29 +8480,39 @@ bool Assembler::pli_info(Condition cond, void Assembler::pop(Condition cond, EncodingSize size, RegisterList registers) { VIXL_ASSERT(AllowAssembler()); CheckIT(cond); - if (IsUsingT32()) { - // POP{<c>}{<q>} <registers> ; T1 - if (!size.IsWide() && ((registers.GetList() & ~0x80ff) == 0)) { - EmitT32_16(0xbc00 | (GetRegisterListEncoding(registers, 15, 1) << 8) | - GetRegisterListEncoding(registers, 0, 8)); - AdvanceIT(); - return; - } - // POP{<c>}{<q>} <registers> ; T2 - if (!size.IsNarrow() && ((registers.GetList() & ~0xdfff) == 0)) { - EmitT32_32(0xe8bd0000U | - (GetRegisterListEncoding(registers, 15, 1) << 15) | - (GetRegisterListEncoding(registers, 14, 1) << 14) | - GetRegisterListEncoding(registers, 0, 13)); - AdvanceIT(); - return; - } - } else { - // POP{<c>}{<q>} <registers> ; A1 - if (cond.IsNotNever()) { - EmitA32(0x08bd0000U | (cond.GetCondition() << 28) | - GetRegisterListEncoding(registers, 0, 16)); - return; + if (!registers.IsEmpty() || AllowUnpredictable()) { + if (IsUsingT32()) { + // A branch out of an IT block should be the last instruction in the + // block. + if (!registers.Includes(pc) || OutsideITBlockAndAlOrLast(cond) || + AllowUnpredictable()) { + // POP{<c>}{<q>} <registers> ; T1 + if (!size.IsWide() && registers.IsR0toR7orPC()) { + EmitT32_16(0xbc00 | (GetRegisterListEncoding(registers, 15, 1) << 8) | + GetRegisterListEncoding(registers, 0, 8)); + AdvanceIT(); + return; + } + // POP{<c>}{<q>} <registers> ; T2 + // Alias of: LDM{<c>}{<q>} SP!, <registers> ; T2 + if (!size.IsNarrow() && + ((!registers.Includes(sp) && (registers.GetCount() > 1) && + !(registers.Includes(pc) && registers.Includes(lr))) || + AllowUnpredictable())) { + EmitT32_32(0xe8bd0000U | GetRegisterListEncoding(registers, 0, 16)); + AdvanceIT(); + return; + } + } + } else { + // POP{<c>}{<q>} <registers> ; A1 + // Alias of: LDM{<c>}{<q>} SP!, <registers> ; A1 + if (cond.IsNotNever() && + (!registers.Includes(sp) || AllowUnpredictable())) { + EmitA32(0x08bd0000U | (cond.GetCondition() << 28) | + GetRegisterListEncoding(registers, 0, 16)); + return; + } } } Delegate(kPop, &Assembler::pop, cond, size, registers); @@ -8502,19 +8521,24 @@ void Assembler::pop(Condition cond, EncodingSize size, RegisterList registers) { void Assembler::pop(Condition cond, EncodingSize size, Register rt) { VIXL_ASSERT(AllowAssembler()); CheckIT(cond); - if (IsUsingT32()) { - // POP{<c>}{<q>} <single_register_list> ; T4 - if (!size.IsNarrow() && ((!rt.IsPC() || OutsideITBlockAndAlOrLast(cond)) || - AllowUnpredictable())) { - EmitT32_32(0xf85d0b04U | (rt.GetCode() << 12)); - AdvanceIT(); - return; - } - } else { - // POP{<c>}{<q>} <single_register_list> ; A1 - if (cond.IsNotNever()) { - EmitA32(0x049d0004U | (cond.GetCondition() << 28) | (rt.GetCode() << 12)); - return; + if (!rt.IsSP() || AllowUnpredictable()) { + if (IsUsingT32()) { + // POP{<c>}{<q>} <single_register_list> ; T4 + // Alias of: LDR{<c>}{<q>} <Rt>, [SP], #4 ; T4 + if (!size.IsNarrow() && (!rt.IsPC() || OutsideITBlockAndAlOrLast(cond) || + AllowUnpredictable())) { + EmitT32_32(0xf85d0b04U | (rt.GetCode() << 12)); + AdvanceIT(); + return; + } + } else { + // POP{<c>}{<q>} <single_register_list> ; A1 + // Alias of: LDR{<c>}{<q>} <Rt>, [SP], #4 ; T1 + if (cond.IsNotNever()) { + EmitA32(0x049d0004U | (cond.GetCondition() << 28) | + (rt.GetCode() << 12)); + return; + } } } Delegate(kPop, &Assembler::pop, cond, size, rt); @@ -8525,28 +8549,37 @@ void Assembler::push(Condition cond, RegisterList registers) { VIXL_ASSERT(AllowAssembler()); CheckIT(cond); - if (IsUsingT32()) { - // PUSH{<c>}{<q>} <registers> ; T1 - if (!size.IsWide() && ((registers.GetList() & ~0x40ff) == 0)) { - EmitT32_16(0xb400 | (GetRegisterListEncoding(registers, 14, 1) << 8) | - GetRegisterListEncoding(registers, 0, 8)); - AdvanceIT(); - return; - } - // PUSH{<c>}{<q>} <registers> ; T1 - if (!size.IsNarrow() && ((registers.GetList() & ~0x5fff) == 0)) { - EmitT32_32(0xe92d0000U | - (GetRegisterListEncoding(registers, 14, 1) << 14) | - GetRegisterListEncoding(registers, 0, 13)); - AdvanceIT(); - return; - } - } else { - // PUSH{<c>}{<q>} <registers> ; A1 - if (cond.IsNotNever()) { - EmitA32(0x092d0000U | (cond.GetCondition() << 28) | - GetRegisterListEncoding(registers, 0, 16)); - return; + if (!registers.IsEmpty() || AllowUnpredictable()) { + if (IsUsingT32()) { + // PUSH{<c>}{<q>} <registers> ; T1 + if (!size.IsWide() && registers.IsR0toR7orLR()) { + EmitT32_16(0xb400 | (GetRegisterListEncoding(registers, 14, 1) << 8) | + GetRegisterListEncoding(registers, 0, 8)); + AdvanceIT(); + return; + } + // PUSH{<c>}{<q>} <registers> ; T1 + // Alias of: STMDB SP!, <registers> ; T1 + if (!size.IsNarrow() && !registers.Includes(pc) && + ((!registers.Includes(sp) && (registers.GetCount() > 1)) || + AllowUnpredictable())) { + EmitT32_32(0xe92d0000U | GetRegisterListEncoding(registers, 0, 15)); + AdvanceIT(); + return; + } + } else { + // PUSH{<c>}{<q>} <registers> ; A1 + // Alias of: STMDB SP!, <registers> ; A1 + if (cond.IsNotNever() && + // For A32, sp can appear in the list, but stores an UNKNOWN value if + // it is not the lowest-valued register. + (!registers.Includes(sp) || + registers.GetFirstAvailableRegister().IsSP() || + AllowUnpredictable())) { + EmitA32(0x092d0000U | (cond.GetCondition() << 28) | + GetRegisterListEncoding(registers, 0, 16)); + return; + } } } Delegate(kPush, &Assembler::push, cond, size, registers); @@ -8557,14 +8590,17 @@ void Assembler::push(Condition cond, EncodingSize size, Register rt) { CheckIT(cond); if (IsUsingT32()) { // PUSH{<c>}{<q>} <single_register_list> ; T4 - if (!size.IsNarrow() && (!rt.IsPC() || AllowUnpredictable())) { + // Alias of: STR{<c>}{<q>} <Rt>, [SP, #4]! ; T4 + if (!size.IsNarrow() && + ((!rt.IsPC() && !rt.IsSP()) || AllowUnpredictable())) { EmitT32_32(0xf84d0d04U | (rt.GetCode() << 12)); AdvanceIT(); return; } } else { // PUSH{<c>}{<q>} <single_register_list> ; A1 - if (cond.IsNotNever() && (!rt.IsPC() || AllowUnpredictable())) { + // Alias of: STR{<c>}{<q>} <Rt>, [SP, #4]! ; A1 + if (cond.IsNotNever() && (!rt.IsSP() || AllowUnpredictable())) { EmitA32(0x052d0004U | (cond.GetCondition() << 28) | (rt.GetCode() << 12)); return; } @@ -11177,7 +11213,7 @@ void Assembler::stmdb(Condition cond, if (IsUsingT32()) { // STMDB{<c>}{<q>} SP!, <registers> ; T1 if (!size.IsWide() && rn.Is(sp) && write_back.DoesWriteBack() && - ((registers.GetList() & ~0x40ff) == 0)) { + registers.IsR0toR7orLR()) { EmitT32_16(0xb400 | (GetRegisterListEncoding(registers, 14, 1) << 8) | GetRegisterListEncoding(registers, 0, 8)); AdvanceIT(); @@ -19589,13 +19625,13 @@ void Assembler::vldr(Condition cond, public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -1020) && (offset <= 1020) && - ((offset & 0x3) == 0)); - int32_t target = offset >> 2; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0)); + int32_t target = off >> 2; uint32_t U = (target >= 0); target = abs(target) | (U << 8); return instr | (target & 0xff) | ((target & 0x100) << 15); @@ -19619,13 +19655,13 @@ void Assembler::vldr(Condition cond, public: EmitOp() : Location::EmitOperator(A32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kA32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -1020) && (offset <= 1020) && - ((offset & 0x3) == 0)); - int32_t target = offset >> 2; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kA32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0)); + int32_t target = off >> 2; uint32_t U = (target >= 0); target = abs(target) | (U << 8); return instr | (target & 0xff) | ((target & 0x100) << 15); @@ -19743,13 +19779,13 @@ void Assembler::vldr(Condition cond, public: EmitOp() : Location::EmitOperator(T32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kT32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -1020) && (offset <= 1020) && - ((offset & 0x3) == 0)); - int32_t target = offset >> 2; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kT32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0)); + int32_t target = off >> 2; uint32_t U = (target >= 0); target = abs(target) | (U << 8); return instr | (target & 0xff) | ((target & 0x100) << 15); @@ -19773,13 +19809,13 @@ void Assembler::vldr(Condition cond, public: EmitOp() : Location::EmitOperator(A32) {} virtual uint32_t Encode(uint32_t instr, - Location::Offset pc, - const Location* location) const VIXL_OVERRIDE { - pc += kA32PcDelta; - Location::Offset offset = location->GetLocation() - AlignDown(pc, 4); - VIXL_ASSERT((offset >= -1020) && (offset <= 1020) && - ((offset & 0x3) == 0)); - int32_t target = offset >> 2; + Location::Offset program_counter, + const Location* loc) const VIXL_OVERRIDE { + program_counter += kA32PcDelta; + Location::Offset off = + loc->GetLocation() - AlignDown(program_counter, 4); + VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0)); + int32_t target = off >> 2; uint32_t U = (target >= 0); target = abs(target) | (U << 8); return instr | (target & 0xff) | ((target & 0x100) << 15); diff --git a/src/aarch32/instructions-aarch32.cc b/src/aarch32/instructions-aarch32.cc index 2d1cb905..92450d41 100644 --- a/src/aarch32/instructions-aarch32.cc +++ b/src/aarch32/instructions-aarch32.cc @@ -95,10 +95,10 @@ QRegister VRegister::Q() const { Register RegisterList::GetFirstAvailableRegister() const { - for (uint32_t i = 0; i < kNumberOfRegisters; i++) { - if (((list_ >> i) & 1) != 0) return Register(i); + if (list_ == 0) { + return Register(); } - return Register(); + return Register(CountTrailingZeros(list_)); } diff --git a/src/aarch32/instructions-aarch32.h b/src/aarch32/instructions-aarch32.h index f11f2b02..e2c95d19 100644 --- a/src/aarch32/instructions-aarch32.h +++ b/src/aarch32/instructions-aarch32.h @@ -38,7 +38,7 @@ extern "C" { #include "utils-vixl.h" #include "aarch32/constants-aarch32.h" -#ifdef __arm__ +#if defined(__arm__) && !defined(__SOFTFP__) #define HARDFLOAT __attribute__((noinline, pcs("aapcs-vfp"))) #else #define HARDFLOAT __attribute__((noinline)) @@ -491,6 +491,8 @@ class RegisterList { } Register GetFirstAvailableRegister() const; bool IsEmpty() const { return list_ == 0; } + bool IsSingleRegister() const { return IsPowerOf2(list_); } + int GetCount() const { return CountSetBits(list_); } static RegisterList Union(const RegisterList& list_1, const RegisterList& list_2) { return RegisterList(list_1.list_ | list_2.list_); diff --git a/src/aarch32/macro-assembler-aarch32.h b/src/aarch32/macro-assembler-aarch32.h index 6d76642f..390b9088 100644 --- a/src/aarch32/macro-assembler-aarch32.h +++ b/src/aarch32/macro-assembler-aarch32.h @@ -402,13 +402,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { VIXL_ASSERT(GetBuffer()->Is32bitAligned()); } // If we need to add padding, check if we have to emit the pool. - const int32_t pc = GetCursorOffset(); - if (label->Needs16BitPadding(pc)) { + const int32_t cursor = GetCursorOffset(); + if (label->Needs16BitPadding(cursor)) { const int kPaddingBytes = 2; - if (pool_manager_.MustEmit(pc, kPaddingBytes)) { - int32_t new_pc = pool_manager_.Emit(this, pc, kPaddingBytes); - USE(new_pc); - VIXL_ASSERT(new_pc == GetCursorOffset()); + if (pool_manager_.MustEmit(cursor, kPaddingBytes)) { + int32_t new_cursor = pool_manager_.Emit(this, cursor, kPaddingBytes); + USE(new_cursor); + VIXL_ASSERT(new_cursor == GetCursorOffset()); } } pool_manager_.Bind(this, label, GetCursorOffset()); @@ -430,30 +430,30 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { Location* location, Condition* cond = NULL) { int size = info->size; - int32_t pc = GetCursorOffset(); + int32_t cursor = GetCursorOffset(); // If we need to emit a branch over the instruction, take this into account. if ((cond != NULL) && NeedBranch(cond)) { size += kBranchSize; - pc += kBranchSize; + cursor += kBranchSize; } - int32_t from = pc; + int32_t from = cursor; from += IsUsingT32() ? kT32PcDelta : kA32PcDelta; if (info->pc_needs_aligning) from = AlignDown(from, 4); int32_t min = from + info->min_offset; int32_t max = from + info->max_offset; - ForwardReference<int32_t> temp_ref(pc, + ForwardReference<int32_t> temp_ref(cursor, info->size, min, max, info->alignment); if (pool_manager_.MustEmit(GetCursorOffset(), size, &temp_ref, location)) { - int32_t new_pc = pool_manager_.Emit(this, - GetCursorOffset(), - info->size, - &temp_ref, - location); - USE(new_pc); - VIXL_ASSERT(new_pc == GetCursorOffset()); + int32_t new_cursor = pool_manager_.Emit(this, + GetCursorOffset(), + info->size, + &temp_ref, + location); + USE(new_cursor); + VIXL_ASSERT(new_cursor == GetCursorOffset()); } } @@ -464,13 +464,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { // into account, as well as potential 16-bit padding needed to reach the // minimum accessible location. int alignment = literal->GetMaxAlignment(); - int32_t pc = GetCursorOffset(); - int total_size = AlignUp(pc, alignment) - pc + literal->GetSize(); - if (literal->Needs16BitPadding(pc)) total_size += 2; - if (pool_manager_.MustEmit(pc, total_size)) { - int32_t new_pc = pool_manager_.Emit(this, pc, total_size); - USE(new_pc); - VIXL_ASSERT(new_pc == GetCursorOffset()); + int32_t cursor = GetCursorOffset(); + int total_size = AlignUp(cursor, alignment) - cursor + literal->GetSize(); + if (literal->Needs16BitPadding(cursor)) total_size += 2; + if (pool_manager_.MustEmit(cursor, total_size)) { + int32_t new_cursor = pool_manager_.Emit(this, cursor, total_size); + USE(new_cursor); + VIXL_ASSERT(new_cursor == GetCursorOffset()); } pool_manager_.Bind(this, literal, GetCursorOffset()); literal->EmitPoolObject(this); @@ -2897,7 +2897,12 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { VIXL_ASSERT(OutsideITBlock()); MacroEmissionCheckScope guard(this); ITScope it_scope(this, &cond, guard); - pop(cond, registers); + if (registers.IsSingleRegister() && + (!IsUsingT32() || !registers.IsR0toR7orPC())) { + pop(cond, registers.GetFirstAvailableRegister()); + } else if (!registers.IsEmpty()) { + pop(cond, registers); + } } void Pop(RegisterList registers) { Pop(al, registers); } @@ -2917,7 +2922,12 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { VIXL_ASSERT(OutsideITBlock()); MacroEmissionCheckScope guard(this); ITScope it_scope(this, &cond, guard); - push(cond, registers); + if (registers.IsSingleRegister() && !registers.Includes(sp) && + (!IsUsingT32() || !registers.IsR0toR7orLR())) { + push(cond, registers.GetFirstAvailableRegister()); + } else if (!registers.IsEmpty()) { + push(cond, registers); + } } void Push(RegisterList registers) { Push(al, registers); } @@ -2927,7 +2937,12 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { VIXL_ASSERT(OutsideITBlock()); MacroEmissionCheckScope guard(this); ITScope it_scope(this, &cond, guard); - push(cond, rt); + if (IsUsingA32() && rt.IsSP()) { + // Only the A32 multiple-register form can push sp. + push(cond, RegisterList(rt)); + } else { + push(cond, rt); + } } void Push(Register rt) { Push(al, rt); } diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc index 534e1d9b..895e8c50 100644 --- a/src/aarch64/assembler-aarch64.cc +++ b/src/aarch64/assembler-aarch64.cc @@ -1054,7 +1054,7 @@ void Assembler::cls(const Register& rd, const Register& rn) { void Assembler::PRE##za(const Register& xd) { \ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); \ VIXL_ASSERT(xd.Is64Bits()); \ - Emit(SF(xd) | OP##ZA | Rd(xd)); \ + Emit(SF(xd) | OP##ZA | Rd(xd) | Rn(xzr)); \ } \ \ void Assembler::PRE##b(const Register& xd, const Register& xn) { \ @@ -1066,7 +1066,7 @@ void Assembler::cls(const Register& rd, const Register& rn) { void Assembler::PRE##zb(const Register& xd) { \ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); \ VIXL_ASSERT(xd.Is64Bits()); \ - Emit(SF(xd) | OP##ZB | Rd(xd)); \ + Emit(SF(xd) | OP##ZB | Rd(xd) | Rn(xzr)); \ } PAUTH_VARIATIONS(VIXL_DEFINE_ASM_FUNC) @@ -1083,13 +1083,13 @@ void Assembler::pacga(const Register& xd, void Assembler::xpaci(const Register& xd) { VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); VIXL_ASSERT(xd.Is64Bits()); - Emit(SF(xd) | XPACI | Rd(xd)); + Emit(SF(xd) | XPACI | Rd(xd) | Rn(xzr)); } void Assembler::xpacd(const Register& xd) { VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); VIXL_ASSERT(xd.Is64Bits()); - Emit(SF(xd) | XPACD | Rd(xd)); + Emit(SF(xd) | XPACD | Rd(xd) | Rn(xzr)); } @@ -1134,10 +1134,10 @@ void Assembler::LoadStorePair(const CPURegister& rt, if (addr.IsImmediateOffset()) { addrmodeop = LoadStorePairOffsetFixed; } else { - if (addr.IsPreIndex()) { + if (addr.IsImmediatePreIndex()) { addrmodeop = LoadStorePairPreIndexFixed; } else { - VIXL_ASSERT(addr.IsPostIndex()); + VIXL_ASSERT(addr.IsImmediatePostIndex()); addrmodeop = LoadStorePairPostIndexFixed; } } @@ -3852,6 +3852,15 @@ void Assembler::udot(const VRegister& vd, Emit(VFormat(vd) | NEON_UDOT | Rm(vm) | Rn(vn) | Rd(vd)); } +void Assembler::usdot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kI8MM)); + VIXL_ASSERT(AreSameFormat(vn, vm)); + VIXL_ASSERT((vd.Is2S() && vn.Is8B()) || (vd.Is4S() && vn.Is16B())); + + Emit(VFormat(vd) | 0x0e809c00 | Rm(vm) | Rn(vn) | Rd(vd)); +} void Assembler::faddp(const VRegister& vd, const VRegister& vn) { VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); @@ -4166,6 +4175,32 @@ void Assembler::udot(const VRegister& vd, ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) | Rd(vd)); } +void Assembler::sudot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kI8MM)); + VIXL_ASSERT((vd.Is2S() && vn.Is8B() && vm.Is1S4B()) || + (vd.Is4S() && vn.Is16B() && vm.Is1S4B())); + int q = vd.Is4S() ? (1U << NEONQ_offset) : 0; + int index_num_bits = 2; + Emit(q | 0x0f00f000 | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) | + Rd(vd)); +} + + +void Assembler::usdot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kI8MM)); + VIXL_ASSERT((vd.Is2S() && vn.Is8B() && vm.Is1S4B()) || + (vd.Is4S() && vn.Is16B() && vm.Is1S4B())); + int q = vd.Is4S() ? (1U << NEONQ_offset) : 0; + int index_num_bits = 2; + Emit(q | 0x0f80f000 | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) | + Rd(vd)); +} // clang-format off #define NEON_BYELEMENT_LIST(V) \ @@ -5224,6 +5259,32 @@ void Assembler::uqrshrn2(const VRegister& vd, const VRegister& vn, int shift) { NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN); } +void Assembler::smmla(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(CPUHas(CPUFeatures::kI8MM)); + VIXL_ASSERT(vd.IsLaneSizeS()); + VIXL_ASSERT(vn.IsLaneSizeB() && vm.IsLaneSizeB()); + + Emit(0x4e80a400 | Rd(vd) | Rn(vn) | Rm(vm)); +} + +void Assembler::usmmla(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(CPUHas(CPUFeatures::kI8MM)); + VIXL_ASSERT(vd.IsLaneSizeS()); + VIXL_ASSERT(vn.IsLaneSizeB() && vm.IsLaneSizeB()); + + Emit(0x4e80ac00 | Rd(vd) | Rn(vn) | Rm(vm)); +} + +void Assembler::ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(CPUHas(CPUFeatures::kI8MM)); + VIXL_ASSERT(vd.IsLaneSizeS()); + VIXL_ASSERT(vn.IsLaneSizeB() && vm.IsLaneSizeB()); + + Emit(0x6e80a400 | Rd(vd) | Rn(vn) | Rm(vm)); +} // Note: // For all ToImm instructions below, a difference in case @@ -5287,6 +5348,44 @@ Instr Assembler::ImmFP64(double imm) { return FP64ToImm8(imm) << ImmFP_offset; } // Code generation helpers. +bool Assembler::OneInstrMoveImmediateHelper(Assembler* assm, + const Register& dst, + uint64_t imm) { + bool emit_code = assm != NULL; + unsigned n, imm_s, imm_r; + int reg_size = dst.GetSizeInBits(); + + if (IsImmMovz(imm, reg_size) && !dst.IsSP()) { + // Immediate can be represented in a move zero instruction. Movz can't write + // to the stack pointer. + if (emit_code) { + assm->movz(dst, imm); + } + return true; + } else if (IsImmMovn(imm, reg_size) && !dst.IsSP()) { + // Immediate can be represented in a move negative instruction. Movn can't + // write to the stack pointer. + if (emit_code) { + assm->movn(dst, dst.Is64Bits() ? ~imm : (~imm & kWRegMask)); + } + return true; + } else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) { + // Immediate can be represented in a logical orr instruction. + VIXL_ASSERT(!dst.IsZero()); + if (emit_code) { + assm->LogicalImmediate(dst, + AppropriateZeroRegFor(dst), + n, + imm_s, + imm_r, + ORR); + } + return true; + } + return false; +} + + void Assembler::MoveWide(const Register& rd, uint64_t imm, int shift, @@ -5694,11 +5793,11 @@ Instr Assembler::LoadStoreMemOperand(const MemOperand& addr, ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0); } - if (addr.IsPreIndex() && IsImmLSUnscaled(offset)) { + if (addr.IsImmediatePreIndex() && IsImmLSUnscaled(offset)) { return base | LoadStorePreIndexFixed | ImmLS(offset); } - if (addr.IsPostIndex() && IsImmLSUnscaled(offset)) { + if (addr.IsImmediatePostIndex() && IsImmLSUnscaled(offset)) { return base | LoadStorePostIndexFixed | ImmLS(offset); } @@ -5720,10 +5819,10 @@ void Assembler::LoadStorePAC(const Register& xt, const MemOperand& addr, LoadStorePACOp op) { VIXL_ASSERT(xt.Is64Bits()); - VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsPreIndex()); + VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsImmediatePreIndex()); Instr pac_op = op; - if (addr.IsPreIndex()) { + if (addr.IsImmediatePreIndex()) { pac_op |= LoadStorePACPreBit; } diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h index f7aafd07..65c55cc4 100644 --- a/src/aarch64/assembler-aarch64.h +++ b/src/aarch64/assembler-aarch64.h @@ -2105,6 +2105,13 @@ class Assembler : public vixl::internal::AssemblerBase { MoveWide(rd, imm, shift, MOVZ); } + // Move immediate, aliases for movz, movn, orr. + void mov(const Register& rd, uint64_t imm) { + if (!OneInstrMoveImmediateHelper(this, rd, imm)) { + VIXL_UNIMPLEMENTED(); + } + } + // Misc instructions. // Monitor debug-mode breakpoint. @@ -3360,6 +3367,21 @@ class Assembler : public vixl::internal::AssemblerBase { // Unsigned dot product [Armv8.2]. void udot(const VRegister& vd, const VRegister& vn, const VRegister& vm); + // Dot Product with unsigned and signed integers (vector). + void usdot(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Dot product with signed and unsigned integers (vector, by element). + void sudot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Dot product with unsigned and signed integers (vector, by element). + void usdot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + // Signed saturating rounding doubling multiply subtract returning high half // [Armv8.1]. void sqrdmlsh(const VRegister& vd, const VRegister& vn, const VRegister& vm); @@ -3586,6 +3608,15 @@ class Assembler : public vixl::internal::AssemblerBase { const VRegister& vm, int rot); + // Signed 8-bit integer matrix multiply-accumulate (vector). + void smmla(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned and signed 8-bit integer matrix multiply-accumulate (vector). + void usmmla(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned 8-bit integer matrix multiply-accumulate (vector). + void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm); + // Scalable Vector Extensions. // Absolute value (predicated). @@ -4584,6 +4615,26 @@ class Assembler : public vixl::internal::AssemblerBase { const PRegisterZ& pg, const SVEMemOperand& addr); + // Contiguous load and replicate thirty-two bytes. + void ld1rob(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load and replicate sixteen halfwords. + void ld1roh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load and replicate eight words. + void ld1row(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load and replicate four doublewords. + void ld1rod(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + // Load and broadcast signed byte to vector. void ld1rsb(const ZRegister& zt, const PRegisterZ& pg, @@ -5266,6 +5317,12 @@ class Assembler : public vixl::internal::AssemblerBase { const ZRegister& zn, const ZRegister& zm); + // Splice two vectors under predicate control (constructive). + void splice_con(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm); + // Signed saturating add vectors (unpredicated). void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); @@ -5820,6 +5877,1030 @@ class Assembler : public vixl::internal::AssemblerBase { // Interleave elements from two half vectors. void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + // Add with carry long (bottom). + void adclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Add with carry long (top). + void adclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Add narrow high part (bottom). + void addhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Add narrow high part (top). + void addhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Add pairwise. + void addp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Bitwise clear and exclusive OR. + void bcax(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + + // Scatter lower bits into positions selected by bitmask. + void bdep(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Gather lower bits from positions selected by bitmask. + void bext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Group bits to right or left as selected by bitmask. + void bgrp(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Bitwise select. + void bsl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + + // Bitwise select with first input inverted. + void bsl1n(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + + // Bitwise select with second input inverted. + void bsl2n(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + + // Complex integer add with rotate. + void cadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot); + + // Complex integer dot product (indexed). + void cdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot); + + // Complex integer dot product. + void cdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int rot); + + // Complex integer multiply-add with rotate (indexed). + void cmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot); + + // Complex integer multiply-add with rotate. + void cmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int rot); + + // Bitwise exclusive OR of three vectors. + void eor3(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + + // Interleaving exclusive OR (bottom, top). + void eorbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Interleaving exclusive OR (top, bottom). + void eortb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Floating-point add pairwise. + void faddp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point up convert long (top, predicated). + void fcvtlt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point down convert and narrow (top, predicated). + void fcvtnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point down convert, rounding to odd (predicated). + void fcvtx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point down convert, rounding to odd (top, predicated). + void fcvtxnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point base 2 logarithm as integer. + void flogb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point maximum number pairwise. + void fmaxnmp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point maximum pairwise. + void fmaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point minimum number pairwise. + void fminnmp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point minimum pairwise. + void fminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Half-precision floating-point multiply-add long to single-precision + // (bottom). + void fmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Half-precision floating-point multiply-add long to single-precision + // (top). + void fmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Half-precision floating-point multiply-subtract long from + // single-precision (bottom). + void fmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Half-precision floating-point multiply-subtract long from + // single-precision (top, indexed). + void fmlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Half-precision floating-point multiply-add long to single-precision + // (bottom, indexed). + void fmlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Half-precision floating-point multiply-add long to single-precision + // (top, indexed). + void fmlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Half-precision floating-point multiply-subtract long from + // single-precision (bottom, indexed). + void fmlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Half-precision floating-point multiply-subtract long from + // single-precision (top). + void fmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Count matching elements in vector. + void histcnt(const ZRegister& zd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Count matching elements in vector segments. + void histseg(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Gather load non-temporal signed bytes. + void ldnt1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Gather load non-temporal signed halfwords. + void ldnt1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Gather load non-temporal signed words. + void ldnt1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Detect any matching elements, setting the condition flags. + void match(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Multiply-add to accumulator (indexed). + void mla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Multiply-subtract from accumulator (indexed). + void mls(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Multiply (indexed). + void mul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Multiply vectors (unpredicated). + void mul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Bitwise inverted select. + void nbsl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + + // Detect no matching elements, setting the condition flags. + void nmatch(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Polynomial multiply vectors (unpredicated). + void pmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Polynomial multiply long (bottom). + void pmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Polynomial multiply long (top). + void pmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Rounding add narrow high part (bottom). + void raddhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Rounding add narrow high part (top). + void raddhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Rounding shift right narrow by immediate (bottom). + void rshrnb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Rounding shift right narrow by immediate (top). + void rshrnt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Rounding subtract narrow high part (bottom). + void rsubhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Rounding subtract narrow high part (top). + void rsubhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed absolute difference and accumulate. + void saba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed absolute difference and accumulate long (bottom). + void sabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed absolute difference and accumulate long (top). + void sabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed absolute difference long (bottom). + void sabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed absolute difference long (top). + void sabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed add and accumulate long pairwise. + void sadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn); + + // Signed add long (bottom). + void saddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed add long (bottom + top). + void saddlbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed add long (top). + void saddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed add wide (bottom). + void saddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed add wide (top). + void saddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Subtract with carry long (bottom). + void sbclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Subtract with carry long (top). + void sbclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed halving addition. + void shadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Shift right narrow by immediate (bottom). + void shrnb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Shift right narrow by immediate (top). + void shrnt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed halving subtract. + void shsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed halving subtract reversed vectors. + void shsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Shift left and insert (immediate). + void sli(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed maximum pairwise. + void smaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed minimum pairwise. + void sminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed multiply-add long to accumulator (bottom, indexed). + void smlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed multiply-add long to accumulator (bottom). + void smlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed multiply-add long to accumulator (top, indexed). + void smlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed multiply-add long to accumulator (top). + void smlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed multiply-subtract long from accumulator (bottom, indexed). + void smlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed multiply-subtract long from accumulator (bottom). + void smlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed multiply-subtract long from accumulator (top, indexed). + void smlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed multiply-subtract long from accumulator (top). + void smlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed multiply returning high half (unpredicated). + void smulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed multiply long (bottom, indexed). + void smullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed multiply long (bottom). + void smullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed multiply long (top, indexed). + void smullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed multiply long (top). + void smullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating absolute value. + void sqabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Signed saturating addition (predicated). + void sqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Saturating complex integer add with rotate. + void sqcadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot); + + // Signed saturating doubling multiply-add long to accumulator (bottom, + // indexed). + void sqdmlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating doubling multiply-add long to accumulator (bottom). + void sqdmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating doubling multiply-add long to accumulator (bottom x + // top). + void sqdmlalbt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating doubling multiply-add long to accumulator (top, + // indexed). + void sqdmlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating doubling multiply-add long to accumulator (top). + void sqdmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating doubling multiply-subtract long from accumulator + // (bottom, indexed). + void sqdmlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating doubling multiply-subtract long from accumulator + // (bottom). + void sqdmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating doubling multiply-subtract long from accumulator + // (bottom x top). + void sqdmlslbt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating doubling multiply-subtract long from accumulator + // (top, indexed). + void sqdmlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating doubling multiply-subtract long from accumulator + // (top). + void sqdmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating doubling multiply high (indexed). + void sqdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating doubling multiply high (unpredicated). + void sqdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating doubling multiply long (bottom, indexed). + void sqdmullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating doubling multiply long (bottom). + void sqdmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating doubling multiply long (top, indexed). + void sqdmullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating doubling multiply long (top). + void sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating negate. + void sqneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Saturating rounding doubling complex integer multiply-add high with + // rotate (indexed). + void sqrdcmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot); + + // Saturating rounding doubling complex integer multiply-add high with + // rotate. + void sqrdcmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int rot); + + // Signed saturating rounding doubling multiply-add high to accumulator + // (indexed). + void sqrdmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating rounding doubling multiply-add high to accumulator + // (unpredicated). + void sqrdmlah(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating rounding doubling multiply-subtract high from + // accumulator (indexed). + void sqrdmlsh(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating rounding doubling multiply-subtract high from + // accumulator (unpredicated). + void sqrdmlsh(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating rounding doubling multiply high (indexed). + void sqrdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating rounding doubling multiply high (unpredicated). + void sqrdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating rounding shift left by vector (predicated). + void sqrshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating rounding shift left reversed vectors (predicated). + void sqrshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating rounding shift right narrow by immediate (bottom). + void sqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed saturating rounding shift right narrow by immediate (top). + void sqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed saturating rounding shift right unsigned narrow by immediate + // (bottom). + void sqrshrunb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed saturating rounding shift right unsigned narrow by immediate + // (top). + void sqrshrunt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed saturating shift left by immediate. + void sqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Signed saturating shift left by vector (predicated). + void sqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating shift left reversed vectors (predicated). + void sqshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating shift left unsigned by immediate. + void sqshlu(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Signed saturating shift right narrow by immediate (bottom). + void sqshrnb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed saturating shift right narrow by immediate (top). + void sqshrnt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed saturating shift right unsigned narrow by immediate (bottom). + void sqshrunb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed saturating shift right unsigned narrow by immediate (top). + void sqshrunt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed saturating subtraction (predicated). + void sqsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating subtraction reversed vectors (predicated). + void sqsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating extract narrow (bottom). + void sqxtnb(const ZRegister& zd, const ZRegister& zn); + + // Signed saturating extract narrow (top). + void sqxtnt(const ZRegister& zd, const ZRegister& zn); + + // Signed saturating unsigned extract narrow (bottom). + void sqxtunb(const ZRegister& zd, const ZRegister& zn); + + // Signed saturating unsigned extract narrow (top). + void sqxtunt(const ZRegister& zd, const ZRegister& zn); + + // Signed rounding halving addition. + void srhadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Shift right and insert (immediate). + void sri(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed rounding shift left by vector (predicated). + void srshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed rounding shift left reversed vectors (predicated). + void srshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed rounding shift right by immediate. + void srshr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Signed rounding shift right and accumulate (immediate). + void srsra(const ZRegister& zda, const ZRegister& zn, int shift); + + // Signed shift left long by immediate (bottom). + void sshllb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed shift left long by immediate (top). + void sshllt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed shift right and accumulate (immediate). + void ssra(const ZRegister& zda, const ZRegister& zn, int shift); + + // Signed subtract long (bottom). + void ssublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed subtract long (bottom - top). + void ssublbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed subtract long (top). + void ssublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed subtract long (top - bottom). + void ssubltb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed subtract wide (bottom). + void ssubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed subtract wide (top). + void ssubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Subtract narrow high part (bottom). + void subhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Subtract narrow high part (top). + void subhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating addition of unsigned value. + void suqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Programmable table lookup in one or two vector table (zeroing). + void tbl(const ZRegister& zd, + const ZRegister& zn1, + const ZRegister& zn2, + const ZRegister& zm); + + // Programmable table lookup in single vector table (merging). + void tbx(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned absolute difference and accumulate. + void uaba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned absolute difference and accumulate long (bottom). + void uabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned absolute difference and accumulate long (top). + void uabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned absolute difference long (bottom). + void uabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned absolute difference long (top). + void uabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned add and accumulate long pairwise. + void uadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn); + + // Unsigned add long (bottom). + void uaddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned add long (top). + void uaddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned add wide (bottom). + void uaddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned add wide (top). + void uaddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned halving addition. + void uhadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned halving subtract. + void uhsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned halving subtract reversed vectors. + void uhsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned maximum pairwise. + void umaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned minimum pairwise. + void uminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned multiply-add long to accumulator (bottom, indexed). + void umlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Unsigned multiply-add long to accumulator (bottom). + void umlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned multiply-add long to accumulator (top, indexed). + void umlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Unsigned multiply-add long to accumulator (top). + void umlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned multiply-subtract long from accumulator (bottom, indexed). + void umlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Unsigned multiply-subtract long from accumulator (bottom). + void umlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned multiply-subtract long from accumulator (top, indexed). + void umlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Unsigned multiply-subtract long from accumulator (top). + void umlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned multiply returning high half (unpredicated). + void umulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned multiply long (bottom, indexed). + void umullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Unsigned multiply long (bottom). + void umullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned multiply long (top, indexed). + void umullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Unsigned multiply long (top). + void umullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned saturating addition (predicated). + void uqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned saturating rounding shift left by vector (predicated). + void uqrshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned saturating rounding shift left reversed vectors (predicated). + void uqrshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned saturating rounding shift right narrow by immediate (bottom). + void uqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Unsigned saturating rounding shift right narrow by immediate (top). + void uqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Unsigned saturating shift left by immediate. + void uqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Unsigned saturating shift left by vector (predicated). + void uqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned saturating shift left reversed vectors (predicated). + void uqshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned saturating shift right narrow by immediate (bottom). + void uqshrnb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Unsigned saturating shift right narrow by immediate (top). + void uqshrnt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Unsigned saturating subtraction (predicated). + void uqsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned saturating subtraction reversed vectors (predicated). + void uqsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned saturating extract narrow (bottom). + void uqxtnb(const ZRegister& zd, const ZRegister& zn); + + // Unsigned saturating extract narrow (top). + void uqxtnt(const ZRegister& zd, const ZRegister& zn); + + // Unsigned reciprocal estimate (predicated). + void urecpe(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Unsigned rounding halving addition. + void urhadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned rounding shift left by vector (predicated). + void urshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned rounding shift left reversed vectors (predicated). + void urshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned rounding shift right by immediate. + void urshr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Unsigned reciprocal square root estimate (predicated). + void ursqrte(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Unsigned rounding shift right and accumulate (immediate). + void ursra(const ZRegister& zda, const ZRegister& zn, int shift); + + // Unsigned shift left long by immediate (bottom). + void ushllb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Unsigned shift left long by immediate (top). + void ushllt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Unsigned saturating addition of signed value. + void usqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned shift right and accumulate (immediate). + void usra(const ZRegister& zda, const ZRegister& zn, int shift); + + // Unsigned subtract long (bottom). + void usublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned subtract long (top). + void usublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned subtract wide (bottom). + void usubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned subtract wide (top). + void usubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // While decrementing signed scalar greater than or equal to scalar. + void whilege(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // While decrementing signed scalar greater than scalar. + void whilegt(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // While decrementing unsigned scalar higher than scalar. + void whilehi(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // While decrementing unsigned scalar higher or same as scalar. + void whilehs(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // While free of read-after-write conflicts. + void whilerw(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // While free of write-after-read/write conflicts. + void whilewr(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // Bitwise exclusive OR and rotate right by immediate. + void xar(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int shift); + + // Floating-point matrix multiply-accumulate. + void fmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed integer matrix multiply-accumulate. + void smmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned by signed integer matrix multiply-accumulate. + void usmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned integer matrix multiply-accumulate. + void ummla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned by signed integer dot product. + void usdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned by signed integer indexed dot product. + void usdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed by unsigned integer indexed dot product. + void sudot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + // Emit generic instructions. // Emit raw instructions into the instruction stream. @@ -5850,6 +6931,9 @@ class Assembler : public vixl::internal::AssemblerBase { } // Code generation helpers. + static bool OneInstrMoveImmediateHelper(Assembler* assm, + const Register& dst, + uint64_t imm); // Register encoding. template <int hibit, int lobit> @@ -5983,11 +7067,11 @@ class Assembler : public vixl::internal::AssemblerBase { static Instr ImmTestBranchBit(unsigned bit_pos) { VIXL_ASSERT(IsUint6(bit_pos)); // Subtract five from the shift offset, as we need bit 5 from bit_pos. - unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5); - unsigned b40 = bit_pos << ImmTestBranchBit40_offset; - b5 &= ImmTestBranchBit5_mask; - b40 &= ImmTestBranchBit40_mask; - return b5 | b40; + unsigned bit5 = bit_pos << (ImmTestBranchBit5_offset - 5); + unsigned bit40 = bit_pos << ImmTestBranchBit40_offset; + bit5 &= ImmTestBranchBit5_mask; + bit40 &= ImmTestBranchBit40_mask; + return bit5 | bit40; } // Data Processing encoding. @@ -6660,6 +7744,16 @@ class Assembler : public vixl::internal::AssemblerBase { Instr immoffset_op, int imm_divisor = 1); + void SVELd1VecScaHelper(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + uint32_t msize, + bool is_signed); + void SVESt1VecScaHelper(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + uint32_t msize); + void Prefetch(PrefetchOperation op, const MemOperand& addr, LoadStoreScalingOption option = PreferScaledOffset); @@ -6724,27 +7818,30 @@ class Assembler : public vixl::internal::AssemblerBase { int pattern, int multiplier); - Instr EncodeSVEShiftImmediate(Shift shift_op, - int shift, - int lane_size_in_bits); + Instr EncodeSVEShiftLeftImmediate(int shift, int lane_size_in_bits); + + Instr EncodeSVEShiftRightImmediate(int shift, int lane_size_in_bits); void SVEBitwiseShiftImmediate(const ZRegister& zd, const ZRegister& zn, Instr encoded_imm, - SVEBitwiseShiftUnpredicatedOp op); + Instr op); void SVEBitwiseShiftImmediatePred(const ZRegister& zdn, const PRegisterM& pg, Instr encoded_imm, - SVEBitwiseShiftByImm_PredicatedOp op); + Instr op); + + Instr SVEMulIndexHelper(unsigned lane_size_in_bytes_log2, + const ZRegister& zm, + int index, + Instr op_h, + Instr op_s, + Instr op_d); - Instr SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2, - const ZRegister& zm, - int index, - Instr op_h, - Instr op_s, - Instr op_d); + Instr SVEMulLongIndexHelper(const ZRegister& zm, int index); + Instr SVEMulComplexIndexHelper(const ZRegister& zm, int index); void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop, const PRegister& pg, diff --git a/src/aarch64/assembler-sve-aarch64.cc b/src/aarch64/assembler-sve-aarch64.cc index f7cf8b21..84d4d517 100644 --- a/src/aarch64/assembler-sve-aarch64.cc +++ b/src/aarch64/assembler-sve-aarch64.cc @@ -165,11 +165,10 @@ void Assembler::orr(const ZRegister& zd, // SVEBitwiseShiftPredicated. -void Assembler::SVEBitwiseShiftImmediatePred( - const ZRegister& zdn, - const PRegisterM& pg, - Instr encoded_imm_and_tsz, - SVEBitwiseShiftByImm_PredicatedOp op) { +void Assembler::SVEBitwiseShiftImmediatePred(const ZRegister& zdn, + const PRegisterM& pg, + Instr encoded_imm_and_tsz, + Instr op) { Instr tszl_and_imm = ExtractUnsignedBitfield32(4, 0, encoded_imm_and_tsz) << 5; Instr tszh = ExtractUnsignedBitfield32(6, 5, encoded_imm_and_tsz) << 22; @@ -189,7 +188,7 @@ void Assembler::asr(const ZRegister& zd, VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); VIXL_ASSERT(zd.Is(zn)); Instr encoded_imm = - EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits()); + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, ASR_z_p_zi); } @@ -229,7 +228,7 @@ void Assembler::asrd(const ZRegister& zd, VIXL_ASSERT(zd.Is(zn)); Instr encoded_imm = - EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits()); + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, ASRD_z_p_zi); } @@ -264,7 +263,7 @@ void Assembler::lsl(const ZRegister& zd, VIXL_ASSERT(zd.Is(zn)); Instr encoded_imm = - EncodeSVEShiftImmediate(LSL, shift, zd.GetLaneSizeInBits()); + EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits()); SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, LSL_z_p_zi); } @@ -321,7 +320,7 @@ void Assembler::lsr(const ZRegister& zd, VIXL_ASSERT(zd.Is(zn)); Instr encoded_imm = - EncodeSVEShiftImmediate(LSR, shift, zd.GetLaneSizeInBits()); + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, LSR_z_p_zi); } @@ -366,15 +365,13 @@ void Assembler::lsrr(const ZRegister& zd, // SVEBitwiseShiftUnpredicated. -Instr Assembler::EncodeSVEShiftImmediate(Shift shift_op, - int shift, - int lane_size_in_bits) { - if (shift_op == LSL) { - VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits)); - return lane_size_in_bits + shift; - } +Instr Assembler::EncodeSVEShiftLeftImmediate(int shift, int lane_size_in_bits) { + VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits)); + return lane_size_in_bits + shift; +} - VIXL_ASSERT((shift_op == ASR) || (shift_op == LSR)); +Instr Assembler::EncodeSVEShiftRightImmediate(int shift, + int lane_size_in_bits) { VIXL_ASSERT((shift > 0) && (shift <= lane_size_in_bits)); return (2 * lane_size_in_bits) - shift; } @@ -382,7 +379,7 @@ Instr Assembler::EncodeSVEShiftImmediate(Shift shift_op, void Assembler::SVEBitwiseShiftImmediate(const ZRegister& zd, const ZRegister& zn, Instr encoded_imm_and_tsz, - SVEBitwiseShiftUnpredicatedOp op) { + Instr op) { Instr tszl_and_imm = ExtractUnsignedBitfield32(4, 0, encoded_imm_and_tsz) << 16; Instr tszh = ExtractUnsignedBitfield32(6, 5, encoded_imm_and_tsz) << 22; @@ -393,7 +390,7 @@ void Assembler::asr(const ZRegister& zd, const ZRegister& zn, int shift) { VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); VIXL_ASSERT(AreSameLaneSize(zd, zn)); Instr encoded_imm = - EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits()); + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); SVEBitwiseShiftImmediate(zd, zn, encoded_imm, ASR_z_zi); } @@ -410,7 +407,7 @@ void Assembler::asr(const ZRegister& zd, void Assembler::lsl(const ZRegister& zd, const ZRegister& zn, int shift) { VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); Instr encoded_imm = - EncodeSVEShiftImmediate(LSL, shift, zd.GetLaneSizeInBits()); + EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits()); SVEBitwiseShiftImmediate(zd, zn, encoded_imm, LSL_z_zi); } @@ -427,7 +424,7 @@ void Assembler::lsl(const ZRegister& zd, void Assembler::lsr(const ZRegister& zd, const ZRegister& zn, int shift) { VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); Instr encoded_imm = - EncodeSVEShiftImmediate(LSR, shift, zd.GetLaneSizeInBits()); + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); SVEBitwiseShiftImmediate(zd, zn, encoded_imm, LSR_z_zi); } @@ -1318,26 +1315,10 @@ void Assembler::fcmla(const ZRegister& zda, VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); - VIXL_ASSERT(index >= 0); - - int lane_size = zda.GetLaneSizeInBytes(); - - Instr zm_and_idx = 0; - Instr op = FCMLA_z_zzzi_h; - if (lane_size == kHRegSizeInBytes) { - // Zm<18:16> | i2<20:19> - VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 3)); - zm_and_idx = (index << 19) | Rx<18, 16>(zm); - } else { - // Zm<19:16> | i1<20> - VIXL_ASSERT(lane_size == kSRegSizeInBytes); - VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 1)); - zm_and_idx = (index << 20) | Rx<19, 16>(zm); - op = FCMLA_z_zzzi_s; - } Instr rotate_bit = (rot / 90) << 10; - Emit(op | zm_and_idx | rotate_bit | Rd(zda) | Rn(zn)); + Emit(FCMLA_z_zzzi_h | SVEMulComplexIndexHelper(zm, index) | rotate_bit | + Rd(zda) | Rn(zn)); } // SVEFPFastReduction. @@ -1539,12 +1520,12 @@ void Assembler::fnmsb(const ZRegister& zdn, Emit(FNMSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za)); } -Instr Assembler::SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2, - const ZRegister& zm, - int index, - Instr op_h, - Instr op_s, - Instr op_d) { +Instr Assembler::SVEMulIndexHelper(unsigned lane_size_in_bytes_log2, + const ZRegister& zm, + int index, + Instr op_h, + Instr op_s, + Instr op_d) { Instr size = lane_size_in_bytes_log2 << SVESize_offset; Instr zm_with_index = Rm(zm); Instr op = 0xffffffff; @@ -1563,15 +1544,15 @@ Instr Assembler::SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2, op = op_h; break; case kSRegSizeInBytesLog2: - VIXL_ASSERT(zm.GetCode() <= 7); - VIXL_ASSERT(IsUint2(index)); + VIXL_CHECK(zm.GetCode() <= 7); + VIXL_CHECK(IsUint2(index)); // Top two bits of "zm" encode the index. zm_with_index |= (index & 3) << (Rm_offset + 3); op = op_s; break; case kDRegSizeInBytesLog2: - VIXL_ASSERT(zm.GetCode() <= 15); - VIXL_ASSERT(IsUint1(index)); + VIXL_CHECK(zm.GetCode() <= 15); + VIXL_CHECK(IsUint1(index)); // Top bit of "zm" encodes the index. zm_with_index |= (index & 1) << (Rm_offset + 4); op = op_d; @@ -1582,6 +1563,45 @@ Instr Assembler::SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2, return op | zm_with_index | size; } +Instr Assembler::SVEMulLongIndexHelper(const ZRegister& zm, int index) { + Instr imm_field; + Instr zm_id; + if (zm.IsLaneSizeH()) { + VIXL_CHECK(zm.GetCode() <= 7); + VIXL_CHECK(IsUint3(index)); + imm_field = ExtractUnsignedBitfield32(2, 1, index) << 19; + zm_id = Rx<18, 16>(zm); + } else { + VIXL_ASSERT(zm.IsLaneSizeS()); + VIXL_CHECK(zm.GetCode() <= 15); + VIXL_CHECK(IsUint2(index)); + imm_field = ExtractBit(index, 1) << 20; + zm_id = Rx<19, 16>(zm); + } + + // Synthesize the low part of immediate encoding. + imm_field |= ExtractBit(index, 0) << 11; + + return zm_id | imm_field; +} + +Instr Assembler::SVEMulComplexIndexHelper(const ZRegister& zm, int index) { + Instr zm_idx_size; + if (zm.IsLaneSizeH()) { + // Zm<18:16> | i2<20:19> + VIXL_CHECK(zm.GetCode() <= 7); + VIXL_CHECK(IsUint2(index)); + zm_idx_size = (index << 19) | Rx<18, 16>(zm) | 0; + } else { + VIXL_ASSERT(zm.IsLaneSizeS()); + // Zm<19:16> | i1<20> + VIXL_CHECK(zm.GetCode() <= 15); + VIXL_CHECK(IsUint1(index)); + zm_idx_size = (index << 20) | Rx<19, 16>(zm) | (1 << 22); + } + return zm_idx_size; +} + // SVEFPMulAddIndex. void Assembler::fmla(const ZRegister& zda, @@ -1593,12 +1613,12 @@ void Assembler::fmla(const ZRegister& zda, // The encoding of opcode, index, Zm, and size are synthesized in this // variable. - Instr synthesized_op = SVEFPMulIndexHelper(zda.GetLaneSizeInBytesLog2(), - zm, - index, - FMLA_z_zzzi_h, - FMLA_z_zzzi_s, - FMLA_z_zzzi_d); + Instr synthesized_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(), + zm, + index, + FMLA_z_zzzi_h, + FMLA_z_zzzi_s, + FMLA_z_zzzi_d); Emit(synthesized_op | Rd(zda) | Rn(zn)); } @@ -1612,12 +1632,12 @@ void Assembler::fmls(const ZRegister& zda, // The encoding of opcode, index, Zm, and size are synthesized in this // variable. - Instr synthesized_op = SVEFPMulIndexHelper(zda.GetLaneSizeInBytesLog2(), - zm, - index, - FMLS_z_zzzi_h, - FMLS_z_zzzi_s, - FMLS_z_zzzi_d); + Instr synthesized_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(), + zm, + index, + FMLS_z_zzzi_h, + FMLS_z_zzzi_s, + FMLS_z_zzzi_d); Emit(synthesized_op | Rd(zda) | Rn(zn)); } @@ -1638,12 +1658,12 @@ void Assembler::fmul(const ZRegister& zd, // The encoding of opcode, index, Zm, and size are synthesized in this // variable. - Instr synthesized_op = SVEFPMulIndexHelper(zd.GetLaneSizeInBytesLog2(), - zm, - index, - FMUL_z_zzi_h, - FMUL_z_zzi_s, - FMUL_z_zzi_d); + Instr synthesized_op = SVEMulIndexHelper(zd.GetLaneSizeInBytesLog2(), + zm, + index, + FMUL_z_zzi_h, + FMUL_z_zzi_s, + FMUL_z_zzi_d); Emit(synthesized_op | Rd(zd) | Rn(zn)); } @@ -4743,57 +4763,67 @@ void Assembler::SVELd1St1ScaImmHelper(const ZRegister& zt, Emit(op | Rt(zt) | PgLow8(pg) | RnSP(addr.GetScalarBase())); } -void Assembler::ld1rqb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(0)); - VIXL_ASSERT(zt.IsLaneSizeB()); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - LD1RQB_z_p_br_contiguous, - LD1RQB_z_p_bi_u8, - 16); -} +void Assembler::SVELd1VecScaHelper(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + uint32_t msize_bytes_log2, + bool is_signed) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(addr.IsVectorPlusScalar()); + ZRegister zn = addr.GetVectorBase(); + VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD()); + VIXL_ASSERT(AreSameLaneSize(zn, zt)); -void Assembler::ld1rqd(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(3)); - VIXL_ASSERT(zt.IsLaneSizeD()); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - LD1RQD_z_p_br_contiguous, - LD1RQD_z_p_bi_u64, - 16); -} + uint32_t esize = zn.GetLaneSizeInBytesLog2(); + uint32_t b14_13 = 0; + if (!is_signed) b14_13 = zn.IsLaneSizeS() ? 0x1 : 0x2; -void Assembler::ld1rqh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(1)); - VIXL_ASSERT(zt.IsLaneSizeH()); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - LD1RQH_z_p_br_contiguous, - LD1RQH_z_p_bi_u16, - 16); + Instr op = 0x04008000; // LDNT1 with vector plus scalar addressing mode. + op |= (esize << 30) | (msize_bytes_log2 << 23) | (b14_13 << 13); + Emit(op | Rt(zt) | PgLow8(pg) | + SVEMemOperandHelper(msize_bytes_log2, 1, addr, true)); } -void Assembler::ld1rqw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(2)); - VIXL_ASSERT(zt.IsLaneSizeS()); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - LD1RQW_z_p_br_contiguous, - LD1RQW_z_p_bi_u32, - 16); -} +void Assembler::SVESt1VecScaHelper(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + uint32_t msize_bytes_log2) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(addr.IsVectorPlusScalar()); + ZRegister zn = addr.GetVectorBase(); + VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD()); + VIXL_ASSERT(AreSameLaneSize(zn, zt)); + + uint32_t bit22 = zn.IsLaneSizeS() ? (1 << 22) : 0; + Instr op = 0xe4002000; // STNT1 with vector plus scalar addressing mode. + op |= bit22 | (msize_bytes_log2 << 23); + Emit(op | Rt(zt) | PgLow8(pg) | + SVEMemOperandHelper(msize_bytes_log2, 1, addr, true)); +} + +#define VIXL_SVE_LD1R_LIST(V) \ + V(qb, 0, B, LD1RQB_z_p_br_contiguous, LD1RQB_z_p_bi_u8, 16) \ + V(qh, 1, H, LD1RQH_z_p_br_contiguous, LD1RQH_z_p_bi_u16, 16) \ + V(qw, 2, S, LD1RQW_z_p_br_contiguous, LD1RQW_z_p_bi_u32, 16) \ + V(qd, 3, D, LD1RQD_z_p_br_contiguous, LD1RQD_z_p_bi_u64, 16) \ + V(ob, 0, B, 0xa4200000, 0xa4202000, 32) \ + V(oh, 1, H, 0xa4a00000, 0xa4a02000, 32) \ + V(ow, 2, S, 0xa5200000, 0xa5202000, 32) \ + V(od, 3, D, 0xa5a00000, 0xa5a02000, 32) + +#define VIXL_DEFINE_ASM_FUNC(FN, SH, SZ, SCA, IMM, BYTES) \ + void Assembler::ld1r##FN(const ZRegister& zt, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + VIXL_ASSERT((BYTES == 16) || \ + ((BYTES == 32) && (CPUHas(CPUFeatures::kSVEF64MM)))); \ + VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(SH)); \ + VIXL_ASSERT(zt.IsLaneSize##SZ()); \ + SVELd1St1ScaImmHelper(zt, pg, addr, SCA, IMM, BYTES); \ + } +VIXL_SVE_LD1R_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC +#undef VIXL_SVE_LD1R_LIST #define VIXL_DEFINE_LDFF1(MSZ, LANE_SIZE) \ void Assembler::ldff1##MSZ(const ZRegister& zt, \ @@ -4930,12 +4960,17 @@ void Assembler::ldnt1b(const ZRegister& zt, const SVEMemOperand& addr) { VIXL_ASSERT(addr.IsPlainScalar() || (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || - (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0))); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - LDNT1B_z_p_br_contiguous, - LDNT1B_z_p_bi_contiguous); + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0)) || + (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2))); + if (addr.IsVectorPlusScalar()) { + SVELd1VecScaHelper(zt, pg, addr, 0, /* is_signed = */ false); + } else { + SVELd1St1ScaImmHelper(zt, + pg, + addr, + LDNT1B_z_p_br_contiguous, + LDNT1B_z_p_bi_contiguous); + } } void Assembler::ldnt1d(const ZRegister& zt, @@ -4943,12 +4978,17 @@ void Assembler::ldnt1d(const ZRegister& zt, const SVEMemOperand& addr) { VIXL_ASSERT(addr.IsPlainScalar() || (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || - (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3))); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - LDNT1D_z_p_br_contiguous, - LDNT1D_z_p_bi_contiguous); + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3)) || + (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2))); + if (addr.IsVectorPlusScalar()) { + SVELd1VecScaHelper(zt, pg, addr, 3, /* is_signed = */ false); + } else { + SVELd1St1ScaImmHelper(zt, + pg, + addr, + LDNT1D_z_p_br_contiguous, + LDNT1D_z_p_bi_contiguous); + } } void Assembler::ldnt1h(const ZRegister& zt, @@ -4956,12 +4996,17 @@ void Assembler::ldnt1h(const ZRegister& zt, const SVEMemOperand& addr) { VIXL_ASSERT(addr.IsPlainScalar() || (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || - (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1))); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - LDNT1H_z_p_br_contiguous, - LDNT1H_z_p_bi_contiguous); + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1)) || + (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2))); + if (addr.IsVectorPlusScalar()) { + SVELd1VecScaHelper(zt, pg, addr, 1, /* is_signed = */ false); + } else { + SVELd1St1ScaImmHelper(zt, + pg, + addr, + LDNT1H_z_p_br_contiguous, + LDNT1H_z_p_bi_contiguous); + } } void Assembler::ldnt1w(const ZRegister& zt, @@ -4969,12 +5014,38 @@ void Assembler::ldnt1w(const ZRegister& zt, const SVEMemOperand& addr) { VIXL_ASSERT(addr.IsPlainScalar() || (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || - (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2))); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - LDNT1W_z_p_br_contiguous, - LDNT1W_z_p_bi_contiguous); + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2)) || + (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2))); + if (addr.IsVectorPlusScalar()) { + SVELd1VecScaHelper(zt, pg, addr, 2, /* is_signed = */ false); + } else { + SVELd1St1ScaImmHelper(zt, + pg, + addr, + LDNT1W_z_p_br_contiguous, + LDNT1W_z_p_bi_contiguous); + } +} + +void Assembler::ldnt1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2)); + SVELd1VecScaHelper(zt, pg, addr, 0, /* is_signed = */ true); +} + +void Assembler::ldnt1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2)); + SVELd1VecScaHelper(zt, pg, addr, 1, /* is_signed = */ true); +} + +void Assembler::ldnt1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2)); + SVELd1VecScaHelper(zt, pg, addr, 2, /* is_signed = */ true); } Instr Assembler::SVEMemOperandHelper(unsigned msize_in_bytes_log2, @@ -5002,7 +5073,13 @@ Instr Assembler::SVEMemOperandHelper(unsigned msize_in_bytes_log2, VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD()); VIXL_ASSERT(IsMultiple(imm, (1 << msize_in_bytes_log2))); op = Rn(zn) | ImmUnsignedField<20, 16>(imm >> msize_in_bytes_log2); - + } else if (addr.IsVectorPlusScalar()) { + VIXL_ASSERT(addr.GetOffsetModifier() == NO_SVE_OFFSET_MODIFIER); + VIXL_ASSERT(addr.GetShiftAmount() == 0); + ZRegister zn = addr.GetVectorBase(); + VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD()); + Register xm = addr.GetScalarOffset(); + op = Rn(zn) | Rm(xm); } else if (addr.IsScalarPlusVector()) { // We have to support several different addressing modes. Some instructions // support a subset of these, but the SVEMemOperand encoding is consistent. @@ -5156,12 +5233,17 @@ void Assembler::stnt1b(const ZRegister& zt, const SVEMemOperand& addr) { VIXL_ASSERT(addr.IsPlainScalar() || (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || - (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0))); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - STNT1B_z_p_br_contiguous, - STNT1B_z_p_bi_contiguous); + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0)) || + (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2))); + if (addr.IsVectorPlusScalar()) { + SVESt1VecScaHelper(zt, pg, addr, 0); + } else { + SVELd1St1ScaImmHelper(zt, + pg, + addr, + STNT1B_z_p_br_contiguous, + STNT1B_z_p_bi_contiguous); + } } void Assembler::stnt1d(const ZRegister& zt, @@ -5169,12 +5251,17 @@ void Assembler::stnt1d(const ZRegister& zt, const SVEMemOperand& addr) { VIXL_ASSERT(addr.IsPlainScalar() || (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || - (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3))); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - STNT1D_z_p_br_contiguous, - STNT1D_z_p_bi_contiguous); + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3)) || + (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2))); + if (addr.IsVectorPlusScalar()) { + SVESt1VecScaHelper(zt, pg, addr, 3); + } else { + SVELd1St1ScaImmHelper(zt, + pg, + addr, + STNT1D_z_p_br_contiguous, + STNT1D_z_p_bi_contiguous); + } } void Assembler::stnt1h(const ZRegister& zt, @@ -5182,12 +5269,17 @@ void Assembler::stnt1h(const ZRegister& zt, const SVEMemOperand& addr) { VIXL_ASSERT(addr.IsPlainScalar() || (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || - (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1))); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - STNT1H_z_p_br_contiguous, - STNT1H_z_p_bi_contiguous); + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1)) || + (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2))); + if (addr.IsVectorPlusScalar()) { + SVESt1VecScaHelper(zt, pg, addr, 1); + } else { + SVELd1St1ScaImmHelper(zt, + pg, + addr, + STNT1H_z_p_br_contiguous, + STNT1H_z_p_bi_contiguous); + } } void Assembler::stnt1w(const ZRegister& zt, @@ -5195,12 +5287,17 @@ void Assembler::stnt1w(const ZRegister& zt, const SVEMemOperand& addr) { VIXL_ASSERT(addr.IsPlainScalar() || (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || - (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2))); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - STNT1W_z_p_br_contiguous, - STNT1W_z_p_bi_contiguous); + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2)) || + (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2))); + if (addr.IsVectorPlusScalar()) { + SVESt1VecScaHelper(zt, pg, addr, 2); + } else { + SVELd1St1ScaImmHelper(zt, + pg, + addr, + STNT1W_z_p_br_contiguous, + STNT1W_z_p_bi_contiguous); + } } void Assembler::str(const CPURegister& rt, const SVEMemOperand& addr) { @@ -5471,14 +5568,27 @@ void Assembler::ext(const ZRegister& zd, // 0000 0101 001. .... 000. .... .... .... // imm8h<20:16> | imm8l<12:10> | Zm<9:5> | Zdn<4:0> - USE(zn); + // EXT <Zd>.B, { <Zn1>.B, <Zn2>.B }, #<imm> + // 0000 0101 011. .... 000. .... .... .... + // imm8h<20:16> | imm8l<12:10> | Zn<9:5> | Zd<4:0> + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); VIXL_ASSERT(IsUint8(offset)); int imm8h = ExtractUnsignedBitfield32(7, 3, offset); int imm8l = ExtractUnsignedBitfield32(2, 0, offset); - Emit(EXT_z_zi_des | Rd(zd) | Rn(zm) | ImmUnsignedField<20, 16>(imm8h) | + + Instr op; + if (zd.Is(zn)) { + // Destructive form. + op = EXT_z_zi_des | Rn(zm); + } else { + // Constructive form (requires SVE2). + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2) && AreConsecutive(zn, zm)); + op = 0x05600000 | Rn(zn); + } + + Emit(op | Rd(zd) | ImmUnsignedField<20, 16>(imm8h) | ImmUnsignedField<12, 10>(imm8l)); } @@ -5814,16 +5924,37 @@ void Assembler::splice(const ZRegister& zd, const PRegister& pg, const ZRegister& zn, const ZRegister& zm) { - // SPLICE <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> - // 0000 0101 ..10 1100 100. .... .... .... - // size<23:22> | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - Emit(SPLICE_z_p_zz_des | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); + if (zd.Aliases(zn)) { + // SPLICE <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> + // 0000 0101 ..10 1100 100. .... .... .... + // size<23:22> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + + Emit(SPLICE_z_p_zz_des | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); + } else { + splice_con(zd, pg, zn, zm); + } +} + +void Assembler::splice_con(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn1, + const ZRegister& zn2) { + // SPLICE <Zd>.<T>, <Pg>, { <Zn1>.<T>, <Zn2>.<T> } + // 0000 0101 ..10 1101 100. .... .... .... + // size<23:22> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + USE(zn2); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreConsecutive(zn1, zn2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn1, zn2)); + + Emit(0x052d8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn1)); } // SVEPermuteVectorUnpredicated. @@ -6485,5 +6616,3284 @@ void Assembler::nots(const PRegisterWithLaneSize& pd, eors(pd, pg, pn, pg.VnB()); } +// SVE2 + +void Assembler::adclb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // ADCLB <Zda>.<T>, <Zn>.<T>, <Zm>.<T> + // 0100 0101 0.0. .... 1101 00.. .... .... + // size<23:22> | Zm<20:16> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + + Instr sz = zda.IsLaneSizeD() ? (1 << 22) : 0; + Emit(0x4500d000 | sz | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::adclt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // ADCLT <Zda>.<T>, <Zn>.<T>, <Zm>.<T> + // 0100 0101 0.0. .... 1101 01.. .... .... + // size<23:22> | Zm<20:16> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + + Instr sz = zda.IsLaneSizeD() ? (1 << 22) : 0; + Emit(0x4500d400 | sz | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::addhnb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // ADDHNB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..1. .... 0110 00.. .... .... + // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45206000 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::addhnt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // ADDHNT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..1. .... 0110 01.. .... .... + // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45206400 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::addp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // ADDP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 0001 101. .... .... .... + // size<23:22> | opc<18:17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x4411a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::bcax(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) { + // BCAX <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D + // 0000 0100 011. .... 0011 10.. .... .... + // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk)); + VIXL_ASSERT(zd.IsLaneSizeD()); + + Emit(0x04603800 | Rd(zd) | Rm(zm) | Rn(zk)); +} + +void Assembler::bdep(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // BDEP <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0100 0101 ..0. .... 1011 01.. .... .... + // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEBitPerm)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x4500b400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::bext(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // BEXT <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0100 0101 ..0. .... 1011 00.. .... .... + // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEBitPerm)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x4500b000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::bgrp(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // BGRP <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0100 0101 ..0. .... 1011 10.. .... .... + // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEBitPerm)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x4500b800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::bsl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) { + // BSL <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D + // 0000 0100 001. .... 0011 11.. .... .... + // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk)); + VIXL_ASSERT(zd.IsLaneSizeD()); + + Emit(0x04203c00 | Rd(zd) | Rm(zm) | Rn(zk)); +} + +void Assembler::bsl1n(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) { + // BSL1N <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D + // 0000 0100 011. .... 0011 11.. .... .... + // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk)); + VIXL_ASSERT(zd.IsLaneSizeD()); + + Emit(0x04603c00 | Rd(zd) | Rm(zm) | Rn(zk)); +} + +void Assembler::bsl2n(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) { + // BSL2N <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D + // 0000 0100 101. .... 0011 11.. .... .... + // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk)); + VIXL_ASSERT(zd.IsLaneSizeD()); + + Emit(0x04a03c00 | Rd(zd) | Rm(zm) | Rn(zk)); +} + +void Assembler::cadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + // CADD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, <const> + // 0100 0101 ..00 0000 1101 1... .... .... + // size<23:22> | op<16> | rot<10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT((rot == 90) || (rot == 270)); + + Instr rotate_bit = (rot == 90) ? 0 : (1 << 10); + Emit(0x4500d800 | rotate_bit | SVESize(zd) | Rd(zd) | Rn(zm)); +} + +void Assembler::cdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot) { + // CDOT <Zda>.D, <Zn>.H, <Zm>.H[<imm>], <const> + // 0100 0100 111. .... 0100 .... .... .... + // size<23:22> | opc<20:16> | rot<11:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); + VIXL_ASSERT(index >= 0); + + Instr zm_and_idx = 0; + if (zm.IsLaneSizeB()) { + // Zm<18:16> | i2<20:19> + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 3)); + zm_and_idx = (index << 19) | Rx<18, 16>(zm); + } else { + // Zm<19:16> | i1<20> + VIXL_ASSERT(zm.IsLaneSizeH()); + VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 1)); + zm_and_idx = (index << 20) | Rx<19, 16>(zm); + } + + Instr rotate_bits = (rot / 90) << 10; + Emit(0x44a04000 | zm_and_idx | rotate_bits | SVESize(zda) | Rd(zda) | Rn(zn)); +} + +void Assembler::cdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + // CDOT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>, <const> + // 0100 0100 ..0. .... 0001 .... .... .... + // size<23:22> | Zm<20:16> | rot<11:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); + + Instr rotate_bits = (rot / 90) << 10; + Emit(0x44001000 | rotate_bits | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::cmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot) { + // CMLA <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const> + // 0100 0100 101. .... 0110 .... .... .... + // size<23:22> | opc<20:16> | rot<11:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + + Instr rotate_bit = (rot / 90) << 10; + Emit(0x44a06000 | SVEMulComplexIndexHelper(zm, index) | rotate_bit | Rd(zda) | + Rn(zn)); +} + +void Assembler::cmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + // CMLA <Zda>.<T>, <Zn>.<T>, <Zm>.<T>, <const> + // 0100 0100 ..0. .... 0010 .... .... .... + // size<23:22> | Zm<20:16> | op<12> | rot<11:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + + Instr rotate_bit = (rot / 90) << 10; + Emit(0x44002000 | rotate_bit | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::eor3(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) { + // EOR3 <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D + // 0000 0100 001. .... 0011 10.. .... .... + // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk)); + VIXL_ASSERT(zd.IsLaneSizeD()); + + Emit(0x04203800 | Rd(zd) | Rm(zm) | Rn(zk)); +} + +void Assembler::eorbt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // EORBT <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0100 0101 ..0. .... 1001 00.. .... .... + // size<23:22> | Zm<20:16> | tb<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x45009000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::eortb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // EORTB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0100 0101 ..0. .... 1001 01.. .... .... + // size<23:22> | Zm<20:16> | tb<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x45009400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::faddp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FADDP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0100 ..01 0000 100. .... .... .... + // size<23:22> | opc<18:16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x64108000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fcvtlt(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FCVTLT <Zd>.S, <Pg>/M, <Zn>.H + // 0110 0100 1000 1001 101. .... .... .... + // opc<23:22> | opc2<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + + Instr op; + if (zd.IsLaneSizeD() && zn.IsLaneSizeS()) { + op = 0x64cba000; + } else { + VIXL_ASSERT(zd.IsLaneSizeS() && zn.IsLaneSizeH()); + op = 0x6489a000; + } + + Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcvtnt(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FCVTNT <Zd>.S, <Pg>/M, <Zn>.D + // 0110 0100 1100 1010 101. .... .... .... + // opc<23:22> | opc2<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + + Instr op; + if (zd.IsLaneSizeS() && zn.IsLaneSizeD()) { + op = 0x64caa000; + } else { + VIXL_ASSERT(zd.IsLaneSizeH() && zn.IsLaneSizeS()); + op = 0x6488a000; + } + Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcvtx(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FCVTX <Zd>.S, <Pg>/M, <Zn>.D + // 0110 0101 0000 1010 101. .... .... .... + // opc<23:22> | opc2<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() && zn.IsLaneSizeD()); + + Emit(0x650aa000 | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcvtxnt(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FCVTXNT <Zd>.S, <Pg>/M, <Zn>.D + // 0110 0100 0000 1010 101. .... .... .... + // opc<23:22> | opc2<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + + Emit(0x640aa000 | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::flogb(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FLOGB <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0110 0101 0001 1..0 101. .... .... .... + // opc<23:22> | opc2<18:17> | U<16> | Pg<12:10> | Zn<9:5> | Zd<4:0> | size<> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + + // Size field is encoded in bits <18:17> rather than <23:22>. + Instr size = SVESize(zd) >> 5; + Emit(0x6518a000 | size | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fmaxnmp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMAXNMP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0100 ..01 0100 100. .... .... .... + // size<23:22> | opc<18:16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x64148000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fmaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMAXP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0100 ..01 0110 100. .... .... .... + // size<23:22> | opc<18:16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x64168000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fminnmp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMINNMP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0100 ..01 0101 100. .... .... .... + // size<23:22> | opc<18:16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x64158000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMINP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0100 ..01 0111 100. .... .... .... + // size<23:22> | opc<18:16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x64178000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fmlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // FMLALB <Zda>.S, <Zn>.H, <Zm>.H + // 0110 0100 101. .... 1000 00.. .... .... + // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH()); + + Emit(0x64a08000 | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::fmlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH()); + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7)); + Instr zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) | + (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm); + + Emit(0x64a04000 | Rd(zda) | Rn(zn) | zm_and_idx); +} + +void Assembler::fmlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // FMLALT <Zda>.S, <Zn>.H, <Zm>.H + // 0110 0100 101. .... 1000 01.. .... .... + // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH()); + + Emit(0x64a08400 | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::fmlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // FMLALT <Zda>.S, <Zn>.H, <Zm>.H + // 0110 0100 101. .... 1000 01.. .... .... + // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH()); + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7)); + Instr zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) | + (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm); + + Emit(0x64a04400 | Rd(zda) | Rn(zn) | zm_and_idx); +} + +void Assembler::fmlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // FMLSLB <Zda>.S, <Zn>.H, <Zm>.H + // 0110 0100 101. .... 1010 00.. .... .... + // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH()); + + Emit(0x64a0a000 | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::fmlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // FMLSLB <Zda>.S, <Zn>.H, <Zm>.H + // 0110 0100 101. .... 1010 00.. .... .... + // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH()); + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7)); + Instr zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) | + (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm); + + Emit(0x64a06000 | Rd(zda) | Rn(zn) | zm_and_idx); +} + +void Assembler::fmlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // FMLSLT <Zda>.S, <Zn>.H, <Zm>.H + // 0110 0100 101. .... 1010 01.. .... .... + // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH()); + + Emit(0x64a0a400 | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::fmlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // FMLSLT <Zda>.S, <Zn>.H, <Zm>.H + // 0110 0100 101. .... 1010 01.. .... .... + // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH()); + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7)); + Instr zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) | + (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm); + + Emit(0x64a06400 | Rd(zda) | Rn(zn) | zm_and_idx); +} + +void Assembler::histcnt(const ZRegister& zd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // HISTCNT <Zd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> + // 0100 0101 ..1. .... 110. .... .... .... + // size<23:22> | Zm<20:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); + + Emit(0x4520c000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::histseg(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // HISTSEG <Zd>.B, <Zn>.B, <Zm>.B + // 0100 0101 ..1. .... 1010 00.. .... .... + // size<23:22> | Zm<20:16> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.IsLaneSizeB()); + + Emit(0x4520a000 | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::match(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // MATCH <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> + // 0100 0101 ..1. .... 100. .... ...0 .... + // size<23:22> | Zm<20:16> | Pg<12:10> | Zn<9:5> | op<4> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(pd, zn, zm)); + VIXL_ASSERT(zm.IsLaneSizeB() || zm.IsLaneSizeH()); + + Emit(0x45208000 | SVESize(zm) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::mla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // MLA <Zda>.D, <Zn>.D, <Zm>.D[<imm>] + // 0100 0100 111. .... 0000 10.. .... .... + // size<23:22> | opc<20:16> | S<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Instr synthesised_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(), + zm, + index, + 0x44200800, + 0x44a00800, + 0x44e00800); + + Emit(synthesised_op | Rd(zda) | Rn(zn)); +} + +void Assembler::mls(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // MLS <Zda>.D, <Zn>.D, <Zm>.D[<imm>] + // 0100 0100 111. .... 0000 11.. .... .... + // size<23:22> | opc<20:16> | S<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Instr synthesised_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(), + zm, + index, + 0x44200c00, + 0x44a00c00, + 0x44e00c00); + + Emit(synthesised_op | Rd(zda) | Rn(zn)); +} + +void Assembler::mul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // MUL <Zd>.D, <Zn>.D, <Zm>.D[<imm>] + // 0100 0100 111. .... 1111 10.. .... .... + // size<23:22> | opc<20:16> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Instr synthesised_op = SVEMulIndexHelper(zd.GetLaneSizeInBytesLog2(), + zm, + index, + 0x4420f800, + 0x44a0f800, + 0x44e0f800); + + Emit(synthesised_op | Rd(zd) | Rn(zn)); +} + +void Assembler::mul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // MUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0100 ..1. .... 0110 00.. .... .... + // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x04206000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::nbsl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) { + // NBSL <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D + // 0000 0100 111. .... 0011 11.. .... .... + // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk)); + VIXL_ASSERT(zd.IsLaneSizeD()); + + Emit(0x04e03c00 | Rd(zd) | Rm(zm) | Rn(zk)); +} + +void Assembler::nmatch(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // NMATCH <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> + // 0100 0101 ..1. .... 100. .... ...1 .... + // size<23:22> | Zm<20:16> | Pg<12:10> | Zn<9:5> | op<4> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(pd, zn, zm)); + VIXL_ASSERT(zm.IsLaneSizeB() || zm.IsLaneSizeH()); + + Emit(0x45208010 | SVESize(zm) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::pmul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // PMUL <Zd>.B, <Zn>.B, <Zm>.B + // 0000 0100 001. .... 0110 01.. .... .... + // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + + Emit(0x04206400 | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::pmullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // PMULLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0110 10.. .... .... + // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeS()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); + // SVEPmull128 is not supported + VIXL_ASSERT(!zd.IsLaneSizeQ()); + + Emit(0x45006800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::pmullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // PMULLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0110 11.. .... .... + // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeS()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); + // SVEPmull128 is not supported + VIXL_ASSERT(!zd.IsLaneSizeQ()); + + Emit(0x45006c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::raddhnb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // RADDHNB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..1. .... 0110 10.. .... .... + // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45206800 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::raddhnt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // RADDHNT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..1. .... 0110 11.. .... .... + // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45206c00 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +#define VIXL_SVE_SHR_LIST(V) \ + V(rshrnb, 0x45201800) \ + V(rshrnt, 0x45201c00) \ + V(shrnb, 0x45201000) \ + V(shrnt, 0x45201400) \ + V(sqrshrnb, 0x45202800) \ + V(sqrshrnt, 0x45202c00) \ + V(sqrshrunb, 0x45200800) \ + V(sqrshrunt, 0x45200c00) \ + V(sqshrnb, 0x45202000) \ + V(sqshrnt, 0x45202400) \ + V(sqshrunb, 0x45200000) \ + V(sqshrunt, 0x45200400) \ + V(uqrshrnb, 0x45203800) \ + V(uqrshrnt, 0x45203c00) \ + V(uqshrnb, 0x45203000) \ + V(uqshrnt, 0x45203400) + +#define VIXL_DEFINE_ASM_FUNC(MNE, X) \ + void Assembler::MNE(const ZRegister& zd, const ZRegister& zn, int shift) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); \ + VIXL_ASSERT(!zd.IsLaneSizeD() && !zd.IsLaneSizeQ()); \ + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); \ + Instr encoded_imm = \ + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); \ + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, X); \ + } +VIXL_SVE_SHR_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +void Assembler::rsubhnb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // RSUBHNB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..1. .... 0111 10.. .... .... + // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45207800 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::rsubhnt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // RSUBHNT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..1. .... 0111 11.. .... .... + // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45207c00 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::saba(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SABA <Zda>.<T>, <Zn>.<T>, <Zm>.<T> + // 0100 0101 ..0. .... 1111 10.. .... .... + // size<23:22> | Zm<20:16> | U<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Emit(0x4500f800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::sabalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SABALB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 1100 00.. .... .... + // size<23:22> | Zm<20:16> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x4500c000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::sabalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SABALT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 1100 01.. .... .... + // size<23:22> | Zm<20:16> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x4500c400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::sabdlb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SABDLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0011 00.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45003000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sabdlt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SABDLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0011 01.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45003400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sadalp(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn) { + // SADALP <Zda>.<T>, <Pg>/M, <Zn>.<Tb> + // 0100 0100 ..00 0100 101. .... .... .... + // size<23:22> | U<16> | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x4404a000 | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::saddlb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SADDLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0000 00.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45000000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::saddlbt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SADDLBT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 1000 00.. .... .... + // size<23:22> | Zm<20:16> | S<11> | tb<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.IsLaneSizeD() || zd.IsLaneSizeH() || zd.IsLaneSizeS()); + + Emit(0x45008000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::saddlt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SADDLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0000 01.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45000400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::saddwb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SADDWB <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0100 00.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45004000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::saddwt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SADDWT <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0100 01.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45004400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sbclb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SBCLB <Zda>.<T>, <Zn>.<T>, <Zm>.<T> + // 0100 0101 1.0. .... 1101 00.. .... .... + // size<23:22> | Zm<20:16> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + + Instr sz = zda.IsLaneSizeD() ? (1 << 22) : 0; + Emit(0x4580d000 | sz | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::sbclt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SBCLT <Zda>.<T>, <Zn>.<T>, <Zm>.<T> + // 0100 0101 1.0. .... 1101 01.. .... .... + // size<23:22> | Zm<20:16> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + + Instr sz = zda.IsLaneSizeD() ? (1 << 22) : 0; + Emit(0x4580d400 | sz | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::shadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SHADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 0000 100. .... .... .... + // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44108000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::shsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SHSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 0010 100. .... .... .... + // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44128000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::shsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SHSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 0110 100. .... .... .... + // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44168000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sli(const ZRegister& zd, const ZRegister& zn, int shift) { + // SLI <Zd>.<T>, <Zn>.<T>, #<const> + // 0100 0101 ..0. .... 1111 01.. .... .... + // tszh<23:22> | tszl<20:19> | imm3<18:16> | op<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits()); + + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500f400); +} + +void Assembler::smaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SMAXP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 0100 101. .... .... .... + // size<23:22> | opc<18:17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x4414a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SMINP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 0110 101. .... .... .... + // size<23:22> | opc<18:17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x4416a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +#define VIXL_SVE_MULL_INDEX_LIST(V) \ + V(smullb, 0x44a0c000) \ + V(smullt, 0x44a0c400) \ + V(umullb, 0x44a0d000) \ + V(umullt, 0x44a0d400) \ + V(smlalb, 0x44a08000) \ + V(smlalt, 0x44a08400) \ + V(smlslb, 0x44a0a000) \ + V(smlslt, 0x44a0a400) \ + V(umlalb, 0x44a09000) \ + V(umlalt, 0x44a09400) \ + V(umlslb, 0x44a0b000) \ + V(umlslt, 0x44a0b400) \ + V(sqdmullb, 0x44a0e000) \ + V(sqdmullt, 0x44a0e400) + +#define VIXL_DEFINE_ASM_FUNC(MNE, OP) \ + void Assembler::MNE(const ZRegister& zda, \ + const ZRegister& zn, \ + const ZRegister& zm, \ + int index) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); \ + VIXL_ASSERT(AreSameLaneSize(zn, zm)); \ + VIXL_ASSERT(zda.IsLaneSizeD() || zda.IsLaneSizeS()); \ + VIXL_ASSERT(zda.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); \ + Instr zm_with_index = SVEMulLongIndexHelper(zm, index); \ + Emit(OP | SVESize(zda) | Rd(zda) | Rn(zn) | zm_with_index); \ + } +VIXL_SVE_MULL_INDEX_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FuNC + +void Assembler::smlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SMLALB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0100 ..0. .... 0100 00.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zda.IsLaneSizeB()); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Emit(0x44004000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::smlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SMLALT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0100 ..0. .... 0100 01.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zda.IsLaneSizeB()); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Emit(0x44004400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::smlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SMLSLB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0100 ..0. .... 0101 00.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zda.IsLaneSizeB()); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Emit(0x44005000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::smlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SMLSLT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0100 ..0. .... 0101 01.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zda.IsLaneSizeB()); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Emit(0x44005400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::smulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SMULH <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0100 ..1. .... 0110 10.. .... .... + // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x04206800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::smullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SMULLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0111 00.. .... .... + // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); + + Emit(0x45007000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::smullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SMULLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0111 01.. .... .... + // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); + + Emit(0x45007400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqabs(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // SQABS <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0100 0100 ..00 1000 101. .... .... .... + // size<23:22> | Q<19> | opc<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(0x4408a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::sqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SQADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 1000 100. .... .... .... + // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44188000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sqcadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + // SQCADD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, <const> + // 0100 0101 ..00 0001 1101 1... .... .... + // size<23:22> | op<16> | rot<10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT((rot == 90) || (rot == 270)); + + Instr rotate_bit = (rot == 90) ? 0 : (1 << 10); + Emit(0x4501d800 | rotate_bit | SVESize(zd) | Rd(zd) | Rn(zm)); +} + +// This prototype maps to 2 instruction encodings: +// sqdmlalb_z_zzzi_d +// sqdmlalb_z_zzzi_s +void Assembler::sqdmlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // SQDMLALB <Zda>.D, <Zn>.S, <Zm>.S[<imm>] + // 0100 0100 111. .... 0010 .0.. .... .... + // size<23:22> | opc<20:16> | S<12> | il<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(index >= 0); + + Instr zm_and_idx = 0; + if (zm.IsLaneSizeH()) { + // Zm<18:16> | i3h<20:19> | i3l<11> + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7)); + zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) | + (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm); + } else { + // Zm<19:16> | i2h<20> | i2l<11> + VIXL_ASSERT(zm.IsLaneSizeS()); + VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 3)); + zm_and_idx = (ExtractBit(index, 1) << 20) | (ExtractBit(index, 0) << 11) | + Rx<19, 16>(zm); + } + + Emit(0x44202000 | zm_and_idx | SVESize(zda) | Rd(zda) | Rn(zn)); +} + +void Assembler::sqdmlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMLALB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0100 ..0. .... 0110 00.. .... .... + // size<23:22> | Zm<20:16> | S<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x44006000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqdmlalbt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMLALBT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0100 ..0. .... 0000 10.. .... .... + // size<23:22> | Zm<20:16> | S<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x44000800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +// This prototype maps to 2 instruction encodings: +// sqdmlalt_z_zzzi_d +// sqdmlalt_z_zzzi_s +void Assembler::sqdmlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // SQDMLALT <Zda>.D, <Zn>.S, <Zm>.S[<imm>] + // 0100 0100 111. .... 0010 .1.. .... .... + // size<23:22> | opc<20:16> | S<12> | il<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(index >= 0); + + Instr zm_and_idx = 0; + if (zm.IsLaneSizeH()) { + // Zm<18:16> | i3h<20:19> | i3l<11> + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7)); + zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) | + (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm); + } else { + // Zm<19:16> | i2h<20> | i2l<11> + VIXL_ASSERT(zm.IsLaneSizeS()); + VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 3)); + zm_and_idx = (ExtractBit(index, 1) << 20) | (ExtractBit(index, 0) << 11) | + Rx<19, 16>(zm); + } + + Emit(0x44202400 | zm_and_idx | SVESize(zda) | Rd(zda) | Rn(zn)); +} + +void Assembler::sqdmlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMLALT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0100 ..0. .... 0110 01.. .... .... + // size<23:22> | Zm<20:16> | S<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x44006400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +// This prototype maps to 2 instruction encodings: +// sqdmlslb_z_zzzi_d +// sqdmlslb_z_zzzi_s +void Assembler::sqdmlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // SQDMLSLB <Zda>.D, <Zn>.S, <Zm>.S[<imm>] + // 0100 0100 111. .... 0011 .0.. .... .... + // size<23:22> | opc<20:16> | S<12> | il<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(index >= 0); + + Instr zm_and_idx = 0; + if (zm.IsLaneSizeH()) { + // Zm<18:16> | i3h<20:19> | i3l<11> + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7)); + zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) | + (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm); + } else { + // Zm<19:16> | i2h<20> | i2l<11> + VIXL_ASSERT(zm.IsLaneSizeS()); + VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 3)); + zm_and_idx = (ExtractBit(index, 1) << 20) | (ExtractBit(index, 0) << 11) | + Rx<19, 16>(zm); + } + + Emit(0x44203000 | zm_and_idx | SVESize(zda) | Rd(zda) | Rn(zn)); +} + +void Assembler::sqdmlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMLSLB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0100 ..0. .... 0110 10.. .... .... + // size<23:22> | Zm<20:16> | S<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x44006800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqdmlslbt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMLSLBT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0100 ..0. .... 0000 11.. .... .... + // size<23:22> | Zm<20:16> | S<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x44000c00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +// This prototype maps to 2 instruction encodings: +// sqdmlslt_z_zzzi_d +// sqdmlslt_z_zzzi_s +void Assembler::sqdmlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // SQDMLSLT <Zda>.D, <Zn>.S, <Zm>.S[<imm>] + // 0100 0100 111. .... 0011 .1.. .... .... + // size<23:22> | opc<20:16> | S<12> | il<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(index >= 0); + + Instr zm_and_idx = 0; + if (zm.IsLaneSizeH()) { + // Zm<18:16> | i3h<20:19> | i3l<11> + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7)); + zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) | + (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm); + } else { + // Zm<19:16> | i2h<20> | i2l<11> + VIXL_ASSERT(zm.IsLaneSizeS()); + VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 3)); + zm_and_idx = (ExtractBit(index, 1) << 20) | (ExtractBit(index, 0) << 11) | + Rx<19, 16>(zm); + } + + Emit(0x44203400 | zm_and_idx | SVESize(zda) | Rd(zda) | Rn(zn)); +} + +void Assembler::sqdmlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMLSLT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0100 ..0. .... 0110 11.. .... .... + // size<23:22> | Zm<20:16> | S<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x44006c00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // SQDMULH <Zd>.D, <Zn>.D, <Zm>.D[<imm>] + // 0100 0100 111. .... 1111 00.. .... .... + // size<23:22> | opc<20:16> | R<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Instr synthesised_op = SVEMulIndexHelper(zd.GetLaneSizeInBytesLog2(), + zm, + index, + 0x4420f000, + 0x44a0f000, + 0x44e0f000); + + Emit(synthesised_op | Rd(zd) | Rn(zn)); +} + +void Assembler::sqdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMULH <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0100 ..1. .... 0111 00.. .... .... + // size<23:22> | Zm<20:16> | R<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x04207000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqdmullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMULLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0110 00.. .... .... + // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); + + Emit(0x45006000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqdmullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMULLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0110 01.. .... .... + // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); + + Emit(0x45006400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqneg(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // SQNEG <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0100 0100 ..00 1001 101. .... .... .... + // size<23:22> | Q<19> | opc<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(0x4409a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::sqrdcmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot) { + // SQRDCMLAH <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const> + // 0100 0100 101. .... 0111 .... .... .... + // size<23:22> | opc<20:16> | rot<11:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + + Instr rotate_bit = (rot / 90) << 10; + Emit(0x44a07000 | SVEMulComplexIndexHelper(zm, index) | rotate_bit | Rd(zda) | + Rn(zn)); +} + +void Assembler::sqrdcmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + // SQRDCMLAH <Zda>.<T>, <Zn>.<T>, <Zm>.<T>, <const> + // 0100 0100 ..0. .... 0011 .... .... .... + // size<23:22> | Zm<20:16> | op<12> | rot<11:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + + Instr rotate_bit = (rot / 90) << 10; + Emit(0x44003000 | rotate_bit | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +// This prototype maps to 3 instruction encodings: +// sqrdmlah_z_zzzi_d +// sqrdmlah_z_zzzi_h +// sqrdmlah_z_zzzi_s +void Assembler::sqrdmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Instr op_h = 0x44201000; + Instr op_s = op_h | (1 << 23); + Instr op_d = op_h | (3 << 22); + // The encoding of opcode, index, Zm, and size are synthesized in this + // variable. + Instr synthesized_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(), + zm, + index, + op_h, + op_s, + op_d); + + Emit(synthesized_op | Rd(zda) | Rn(zn)); +} + +void Assembler::sqrdmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SQRDMLAH <Zda>.<T>, <Zn>.<T>, <Zm>.<T> + // 0100 0100 ..0. .... 0111 00.. .... .... + // size<23:22> | Zm<20:16> | S<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Emit(0x44007000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +// This prototype maps to 3 instruction encodings: +// sqrdmlsh_z_zzzi_d +// sqrdmlsh_z_zzzi_h +// sqrdmlsh_z_zzzi_s +void Assembler::sqrdmlsh(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Instr op_h = 0x44201400; + Instr op_s = op_h | (1 << 23); + Instr op_d = op_h | (3 << 22); + // The encoding of opcode, index, Zm, and size are synthesized in this + // variable. + Instr synthesized_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(), + zm, + index, + op_h, + op_s, + op_d); + + Emit(synthesized_op | Rd(zda) | Rn(zn)); +} + +void Assembler::sqrdmlsh(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SQRDMLSH <Zda>.<T>, <Zn>.<T>, <Zm>.<T> + // 0100 0100 ..0. .... 0111 01.. .... .... + // size<23:22> | Zm<20:16> | S<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Emit(0x44007400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqrdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // SQRDMULH <Zd>.D, <Zn>.D, <Zm>.D[<imm>] + // 0100 0100 111. .... 1111 01.. .... .... + // size<23:22> | opc<20:16> | R<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Instr synthesised_op = SVEMulIndexHelper(zd.GetLaneSizeInBytesLog2(), + zm, + index, + 0x4420f400, + 0x44a0f400, + 0x44e0f400); + + Emit(synthesised_op | Rd(zd) | Rn(zn)); +} + +void Assembler::sqrdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SQRDMULH <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0100 ..1. .... 0111 01.. .... .... + // size<23:22> | Zm<20:16> | R<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x04207400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqrshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SQRSHL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..00 1010 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x440a8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sqrshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SQRSHLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..00 1110 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x440e8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // SQSHL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const> + // 0000 0100 ..00 0110 100. .... .... .... + // tszh<23:22> | opc<19:18> | L<17> | U<16> | Pg<12:10> | tszl<9:8> | + // imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, 0x04068000); +} + +void Assembler::sqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SQSHL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..00 1000 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44088000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sqshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SQSHLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..00 1100 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x440c8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sqshlu(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // SQSHLU <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const> + // 0000 0100 ..00 1111 100. .... .... .... + // tszh<23:22> | opc<19:18> | L<17> | U<16> | Pg<12:10> | tszl<9:8> | + // imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, 0x040f8000); +} + +void Assembler::sqsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SQSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 1010 100. .... .... .... + // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x441a8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sqsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SQSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 1110 100. .... .... .... + // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x441e8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sqxtnb(const ZRegister& zd, const ZRegister& zn) { + // SQXTNB <Zd>.<T>, <Zn>.<Tb> + // 0100 0101 0.1. .000 0100 00.. .... .... + // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2)); + + // XTN instructions look like immediate shifts with zero shift distance. + Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, size, 0x45204000); +} + +void Assembler::sqxtnt(const ZRegister& zd, const ZRegister& zn) { + // SQXTNT <Zd>.<T>, <Zn>.<Tb> + // 0100 0101 0.1. .000 0100 01.. .... .... + // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2)); + + // XTN instructions look like immediate shifts with zero shift distance. + Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, size, 0x45204400); +} + +void Assembler::sqxtunb(const ZRegister& zd, const ZRegister& zn) { + // SQXTUNB <Zd>.<T>, <Zn>.<Tb> + // 0100 0101 0.1. .000 0101 00.. .... .... + // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2)); + + // XTN instructions look like immediate shifts with zero shift distance. + Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, size, 0x45205000); +} + +void Assembler::sqxtunt(const ZRegister& zd, const ZRegister& zn) { + // SQXTUNT <Zd>.<T>, <Zn>.<Tb> + // 0100 0101 0.1. .000 0101 01.. .... .... + // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2)); + + // XTN instructions look like immediate shifts with zero shift distance. + Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, size, 0x45205400); +} + +void Assembler::srhadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SRHADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 0100 100. .... .... .... + // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44148000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sri(const ZRegister& zd, const ZRegister& zn, int shift) { + // SRI <Zd>.<T>, <Zn>.<T>, #<const> + // 0100 0101 ..0. .... 1111 00.. .... .... + // tszh<23:22> | tszl<20:19> | imm3<18:16> | op<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); + + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500f000); +} + +void Assembler::srshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SRSHL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..00 0010 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44028000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::srshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SRSHLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..00 0110 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44068000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::srshr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // SRSHR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const> + // 0000 0100 ..00 1100 100. .... .... .... + // tszh<23:22> | opc<19:18> | L<17> | U<16> | Pg<12:10> | tszl<9:8> | + // imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, 0x040c8000); +} + +void Assembler::srsra(const ZRegister& zda, const ZRegister& zn, int shift) { + // SRSRA <Zda>.<T>, <Zn>.<T>, #<const> + // 0100 0101 ..0. .... 1110 10.. .... .... + // tszh<23:22> | tszl<20:19> | imm3<18:16> | R<11> | U<10> | Zn<9:5> | + // Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zda.GetLaneSizeInBits()); + + SVEBitwiseShiftImmediate(zda, zn, encoded_imm, 0x4500e800); +} + +void Assembler::sshllb(const ZRegister& zd, const ZRegister& zn, int shift) { + // SSHLLB <Zd>.<T>, <Zn>.<Tb>, #<const> + // 0100 0101 0.0. .... 1010 00.. .... .... + // tszh<22> | tszl<20:19> | imm3<18:16> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zn.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500a000); +} + +void Assembler::sshllt(const ZRegister& zd, const ZRegister& zn, int shift) { + // SSHLLT <Zd>.<T>, <Zn>.<Tb>, #<const> + // 0100 0101 0.0. .... 1010 01.. .... .... + // tszh<22> | tszl<20:19> | imm3<18:16> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zn.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500a400); +} + +void Assembler::ssra(const ZRegister& zda, const ZRegister& zn, int shift) { + // SSRA <Zda>.<T>, <Zn>.<T>, #<const> + // 0100 0101 ..0. .... 1110 00.. .... .... + // tszh<23:22> | tszl<20:19> | imm3<18:16> | R<11> | U<10> | Zn<9:5> | + // Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zda.GetLaneSizeInBits()); + + SVEBitwiseShiftImmediate(zda, zn, encoded_imm, 0x4500e000); +} + +void Assembler::ssublb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SSUBLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0001 00.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45001000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::ssublbt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SSUBLBT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 1000 10.. .... .... + // size<23:22> | Zm<20:16> | S<11> | tb<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.IsLaneSizeD() || zd.IsLaneSizeH() || zd.IsLaneSizeS()); + + Emit(0x45008800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::ssublt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SSUBLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0001 01.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45001400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::ssubltb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SSUBLTB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 1000 11.. .... .... + // size<23:22> | Zm<20:16> | S<11> | tb<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.IsLaneSizeD() || zd.IsLaneSizeH() || zd.IsLaneSizeS()); + + Emit(0x45008c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::ssubwb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SSUBWB <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0101 00.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45005000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::ssubwt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SSUBWT <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0101 01.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45005400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +#if 0 +// This prototype maps to 2 instruction encodings: +// stnt1b_z_p_ar_d_64_unscaled +// stnt1b_z_p_ar_s_x32_unscaled +void Assembler::stnt1b(const ZRegister& zt, const PRegister& pg, const ZRegister& zn, const Register& rm) { + // STNT1B { <Zt>.D }, <Pg>, [<Zn>.D{, <Xm>}] + // 1110 0100 000. .... 001. .... .... .... + // msz<24:23> | Rm<20:16> | Pg<12:10> | Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + + Emit(0xe4002000 | Rt(zt) | PgLow8(pg) | Rn(zn) | Rm(rm)); +} + +void Assembler::stnt1d(const ZRegister& zt, const PRegister& pg, const ZRegister& zn, const Register& rm) { + // STNT1D { <Zt>.D }, <Pg>, [<Zn>.D{, <Xm>}] + // 1110 0101 100. .... 001. .... .... .... + // msz<24:23> | Rm<20:16> | Pg<12:10> | Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + + Emit(0xe5802000 | Rt(zt) | PgLow8(pg) | Rn(zn) | Rm(rm)); +} + +// This prototype maps to 2 instruction encodings: +// stnt1h_z_p_ar_d_64_unscaled +// stnt1h_z_p_ar_s_x32_unscaled +void Assembler::stnt1h(const ZRegister& zt, const PRegister& pg, const ZRegister& zn, const Register& rm) { + // STNT1H { <Zt>.D }, <Pg>, [<Zn>.D{, <Xm>}] + // 1110 0100 100. .... 001. .... .... .... + // msz<24:23> | Rm<20:16> | Pg<12:10> | Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + + Emit(0xe4802000 | Rt(zt) | PgLow8(pg) | Rn(zn) | Rm(rm)); +} + +// This prototype maps to 2 instruction encodings: +// stnt1w_z_p_ar_d_64_unscaled +// stnt1w_z_p_ar_s_x32_unscaled +void Assembler::stnt1w(const ZRegister& zt, const PRegister& pg, const ZRegister& zn, const Register& rm) { + // STNT1W { <Zt>.D }, <Pg>, [<Zn>.D{, <Xm>}] + // 1110 0101 000. .... 001. .... .... .... + // msz<24:23> | Rm<20:16> | Pg<12:10> | Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + + Emit(0xe5002000 | Rt(zt) | PgLow8(pg) | Rn(zn) | Rm(rm)); +} +#endif + +void Assembler::subhnb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SUBHNB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..1. .... 0111 00.. .... .... + // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45207000 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::subhnt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SUBHNT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..1. .... 0111 01.. .... .... + // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45207400 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::suqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SUQADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 1100 100. .... .... .... + // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x441c8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::tbl(const ZRegister& zd, + const ZRegister& zn1, + const ZRegister& zn2, + const ZRegister& zm) { + // TBL <Zd>.<T>, { <Zn1>.<T>, <Zn2>.<T> }, <Zm>.<T> + // 0000 0101 ..1. .... 0010 10.. .... .... + // size<23:22> | Zm<20:16> | op<10> | Zn<9:5> | Zd<4:0> + + USE(zn2); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreConsecutive(zn1, zn2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn1, zn2, zm)); + + Emit(0x05202800 | SVESize(zd) | Rd(zd) | Rn(zn1) | Rn(zn2) | Rm(zm)); +} + +void Assembler::tbx(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // TBX <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0101 ..1. .... 0010 11.. .... .... + // size<23:22> | Zm<20:16> | op<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x05202c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uaba(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // UABA <Zda>.<T>, <Zn>.<T>, <Zm>.<T> + // 0100 0101 ..0. .... 1111 11.. .... .... + // size<23:22> | Zm<20:16> | U<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Emit(0x4500fc00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::uabalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // UABALB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 1100 10.. .... .... + // size<23:22> | Zm<20:16> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x4500c800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::uabalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // UABALT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 1100 11.. .... .... + // size<23:22> | Zm<20:16> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x4500cc00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::uabdlb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UABDLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0011 10.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45003800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uabdlt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UABDLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0011 11.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45003c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uadalp(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn) { + // UADALP <Zda>.<T>, <Pg>/M, <Zn>.<Tb> + // 0100 0100 ..00 0101 101. .... .... .... + // size<23:22> | U<16> | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x4405a000 | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::uaddlb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UADDLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0000 10.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45000800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uaddlt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UADDLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0000 11.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45000c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uaddwb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UADDWB <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0100 10.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45004800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uaddwt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UADDWT <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0100 11.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45004c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uhadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UHADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 0001 100. .... .... .... + // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44118000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uhsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UHSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 0011 100. .... .... .... + // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44138000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uhsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UHSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 0111 100. .... .... .... + // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44178000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::umaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UMAXP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 0101 101. .... .... .... + // size<23:22> | opc<18:17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x4415a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UMINP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 0111 101. .... .... .... + // size<23:22> | opc<18:17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x4417a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::umlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // UMLALB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0100 ..0. .... 0100 10.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zda.IsLaneSizeB()); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Emit(0x44004800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::umlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // UMLALT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0100 ..0. .... 0100 11.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zda.IsLaneSizeB()); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Emit(0x44004c00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::umlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // UMLSLB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0100 ..0. .... 0101 10.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zda.IsLaneSizeB()); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Emit(0x44005800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::umlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // UMLSLT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0100 ..0. .... 0101 11.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zda.IsLaneSizeB()); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Emit(0x44005c00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::umulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UMULH <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0100 ..1. .... 0110 11.. .... .... + // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x04206c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::umullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UMULLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0111 10.. .... .... + // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); + + Emit(0x45007800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::umullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UMULLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0111 11.. .... .... + // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); + + Emit(0x45007c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UQADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 1001 100. .... .... .... + // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44198000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uqrshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UQRSHL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..00 1011 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x440b8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uqrshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UQRSHLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..00 1111 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x440f8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // UQSHL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const> + // 0000 0100 ..00 0111 100. .... .... .... + // tszh<23:22> | opc<19:18> | L<17> | U<16> | Pg<12:10> | tszl<9:8> | + // imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, 0x04078000); +} + +void Assembler::uqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UQSHL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..00 1001 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44098000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uqshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UQSHLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..00 1101 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x440d8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uqsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UQSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 1011 100. .... .... .... + // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x441b8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uqsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UQSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 1111 100. .... .... .... + // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x441f8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uqxtnb(const ZRegister& zd, const ZRegister& zn) { + // UQXTNB <Zd>.<T>, <Zn>.<Tb> + // 0100 0101 0.1. .000 0100 10.. .... .... + // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2)); + + // XTN instructions look like immediate shifts with zero shift distance. + Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, size, 0x45204800); +} + +void Assembler::uqxtnt(const ZRegister& zd, const ZRegister& zn) { + // UQXTNT <Zd>.<T>, <Zn>.<Tb> + // 0100 0101 0.1. .000 0100 11.. .... .... + // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2)); + + // XTN instructions look like immediate shifts with zero shift distance. + Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, size, 0x45204c00); +} + +void Assembler::urecpe(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // URECPE <Zd>.S, <Pg>/M, <Zn>.S + // 0100 0100 ..00 0000 101. .... .... .... + // size<23:22> | Q<19> | opc<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() && zn.IsLaneSizeS()); + + Emit(0x4400a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::urhadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // URHADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 0101 100. .... .... .... + // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44158000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::urshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // URSHL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..00 0011 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44038000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::urshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // URSHLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..00 0111 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44078000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::urshr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // URSHR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const> + // 0000 0100 ..00 1101 100. .... .... .... + // tszh<23:22> | opc<19:18> | L<17> | U<16> | Pg<12:10> | tszl<9:8> | + // imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, 0x040d8000); +} + +void Assembler::ursqrte(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // URSQRTE <Zd>.S, <Pg>/M, <Zn>.S + // 0100 0100 ..00 0001 101. .... .... .... + // size<23:22> | Q<19> | opc<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() && zn.IsLaneSizeS()); + + Emit(0x4401a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::ursra(const ZRegister& zda, const ZRegister& zn, int shift) { + // URSRA <Zda>.<T>, <Zn>.<T>, #<const> + // 0100 0101 ..0. .... 1110 11.. .... .... + // tszh<23:22> | tszl<20:19> | imm3<18:16> | R<11> | U<10> | Zn<9:5> | + // Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zda.GetLaneSizeInBits()); + + SVEBitwiseShiftImmediate(zda, zn, encoded_imm, 0x4500ec00); +} + +void Assembler::ushllb(const ZRegister& zd, const ZRegister& zn, int shift) { + // USHLLB <Zd>.<T>, <Zn>.<Tb>, #<const> + // 0100 0101 0.0. .... 1010 10.. .... .... + // tszh<22> | tszl<20:19> | imm3<18:16> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zn.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500a800); +} + +void Assembler::ushllt(const ZRegister& zd, const ZRegister& zn, int shift) { + // USHLLT <Zd>.<T>, <Zn>.<Tb>, #<const> + // 0100 0101 0.0. .... 1010 11.. .... .... + // tszh<22> | tszl<20:19> | imm3<18:16> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zn.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500ac00); +} + +void Assembler::usqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // USQADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0100 0100 ..01 1101 100. .... .... .... + // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x441d8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::usra(const ZRegister& zda, const ZRegister& zn, int shift) { + // USRA <Zda>.<T>, <Zn>.<T>, #<const> + // 0100 0101 ..0. .... 1110 01.. .... .... + // tszh<23:22> | tszl<20:19> | imm3<18:16> | R<11> | U<10> | Zn<9:5> | + // Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zda.GetLaneSizeInBits()); + + SVEBitwiseShiftImmediate(zda, zn, encoded_imm, 0x4500e400); +} + +void Assembler::usublb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // USUBLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0001 10.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45001800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::usublt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // USUBLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0001 11.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45001c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::usubwb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // USUBWB <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0101 10.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45005800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::usubwt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // USUBWT <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb> + // 0100 0101 ..0. .... 0101 11.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45005c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::whilege(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILEGE <Pd>.<T>, <R><n>, <R><m> + // 0010 0101 ..1. .... 000. 00.. ...0 .... + // size<23:22> | Rm<20:16> | sf<12> | U<11> | lt<10> | Rn<9:5> | eq<4> | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; + + Emit(0x25200000 | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::whilegt(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILEGT <Pd>.<T>, <R><n>, <R><m> + // 0010 0101 ..1. .... 000. 00.. ...1 .... + // size<23:22> | Rm<20:16> | sf<12> | U<11> | lt<10> | Rn<9:5> | eq<4> | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; + + Emit(0x25200010 | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::whilehi(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILEHI <Pd>.<T>, <R><n>, <R><m> + // 0010 0101 ..1. .... 000. 10.. ...1 .... + // size<23:22> | Rm<20:16> | sf<12> | U<11> | lt<10> | Rn<9:5> | eq<4> | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; + + Emit(0x25200810 | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::whilehs(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILEHS <Pd>.<T>, <R><n>, <R><m> + // 0010 0101 ..1. .... 000. 10.. ...0 .... + // size<23:22> | Rm<20:16> | sf<12> | U<11> | lt<10> | Rn<9:5> | eq<4> | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; + + Emit(0x25200800 | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::whilerw(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILERW <Pd>.<T>, <Xn>, <Xm> + // 0010 0101 ..1. .... 0011 00.. ...1 .... + // size<23:22> | Rm<20:16> | Rn<9:5> | rw<4> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(rn.IsX() && rm.IsX()); + + Emit(0x25203010 | SVESize(pd) | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::whilewr(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILEWR <Pd>.<T>, <Xn>, <Xm> + // 0010 0101 ..1. .... 0011 00.. ...0 .... + // size<23:22> | Rm<20:16> | Rn<9:5> | rw<4> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(rn.IsX() && rm.IsX()); + + Emit(0x25203000 | SVESize(pd) | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::xar(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int shift) { + // XAR <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<const> + // 0000 0100 ..1. .... 0011 01.. .... .... + // tszh<23:22> | tszl<20:19> | imm3<18:16> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zm)); + + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zm, encoded_imm, 0x04203400); +} + +void Assembler::fmmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT((CPUHas(CPUFeatures::kSVEF32MM) && zda.IsLaneSizeS()) || + (CPUHas(CPUFeatures::kSVEF64MM) && zda.IsLaneSizeD())); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Emit(0x6420e400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::smmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB()); + + Emit(0x45009800 | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::usmmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB()); + + Emit(0x45809800 | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::ummla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB()); + + Emit(0x45c09800 | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::usdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB()); + + Emit(0x44807800 | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::usdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB()); + VIXL_ASSERT(zm.GetCode() <= 7); + VIXL_ASSERT(IsUint2(index)); + + Emit(0x44a01800 | Rx<18, 16>(zm) | (index << 19) | Rd(zda) | Rn(zn)); +} + +void Assembler::sudot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB()); + VIXL_ASSERT(zm.GetCode() <= 7); + VIXL_ASSERT(IsUint2(index)); + + Emit(0x44a01c00 | Rx<18, 16>(zm) | (index << 19) | Rd(zda) | Rn(zn)); +} + } // namespace aarch64 } // namespace vixl diff --git a/src/aarch64/constants-aarch64.h b/src/aarch64/constants-aarch64.h index d17c8894..f7512b2c 100644 --- a/src/aarch64/constants-aarch64.h +++ b/src/aarch64/constants-aarch64.h @@ -4404,11 +4404,11 @@ enum SVEVectorSelectOp { SEL_z_p_zz = SVEVectorSelectFixed }; -enum SVEVectorSplice_DestructiveOp { - SVEVectorSplice_DestructiveFixed = 0x052C8000, - SVEVectorSplice_DestructiveFMask = 0xFF3FE000, - SVEVectorSplice_DestructiveMask = 0xFF3FE000, - SPLICE_z_p_zz_des = SVEVectorSplice_DestructiveFixed +enum SVEVectorSpliceOp { + SVEVectorSpliceFixed = 0x052C8000, + SVEVectorSpliceFMask = 0xFF3FE000, + SVEVectorSpliceMask = 0xFF3FE000, + SPLICE_z_p_zz_des = SVEVectorSpliceFixed }; enum ReservedOp { diff --git a/src/aarch64/cpu-aarch64.cc b/src/aarch64/cpu-aarch64.cc index a31e010d..ae519928 100644 --- a/src/aarch64/cpu-aarch64.cc +++ b/src/aarch64/cpu-aarch64.cc @@ -78,11 +78,21 @@ const IDRegister::Field AA64ISAR1::kBF16(44); const IDRegister::Field AA64ISAR1::kDGH(48); const IDRegister::Field AA64ISAR1::kI8MM(52); +const IDRegister::Field AA64ISAR2::kRPRES(4); + +const IDRegister::Field AA64MMFR0::kECV(60); + const IDRegister::Field AA64MMFR1::kLO(16); +const IDRegister::Field AA64MMFR1::kAFP(44); const IDRegister::Field AA64MMFR2::kAT(32); +const IDRegister::Field AA64ZFR0::kSVEver(0); +const IDRegister::Field AA64ZFR0::kAES(4); +const IDRegister::Field AA64ZFR0::kBitPerm(16); const IDRegister::Field AA64ZFR0::kBF16(20); +const IDRegister::Field AA64ZFR0::kSHA3(32); +const IDRegister::Field AA64ZFR0::kSM4(40); const IDRegister::Field AA64ZFR0::kI8MM(44); const IDRegister::Field AA64ZFR0::kF32MM(52); const IDRegister::Field AA64ZFR0::kF64MM(56); @@ -168,9 +178,22 @@ CPUFeatures AA64ISAR1::GetCPUFeatures() const { return f; } +CPUFeatures AA64ISAR2::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES); + return f; +} + +CPUFeatures AA64MMFR0::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kECV) >= 1) f.Combine(CPUFeatures::kECV); + return f; +} + CPUFeatures AA64MMFR1::GetCPUFeatures() const { CPUFeatures f; if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions); + if (Get(kAFP) >= 1) f.Combine(CPUFeatures::kAFP); return f; } @@ -187,7 +210,13 @@ CPUFeatures AA64ZFR0::GetCPUFeatures() const { if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM); if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM); if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM); + if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSVESM4); + if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSVESHA3); if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16); + if (Get(kBitPerm) >= 1) f.Combine(CPUFeatures::kSVEBitPerm); + if (Get(kAES) >= 1) f.Combine(CPUFeatures::kSVEAES); + if (Get(kAES) >= 2) f.Combine(CPUFeatures::kSVEPmull128); + if (Get(kSVEver) >= 1) f.Combine(CPUFeatures::kSVE2); return f; } @@ -262,14 +291,15 @@ CPUFeatures CPU::InferCPUFeaturesFromOS( CPUFeatures::kPAuthGeneric, // Bits 32-39 CPUFeatures::kDCCVADP, - CPUFeatures::kNone, // "sve2" - CPUFeatures::kNone, // "sveaes" - CPUFeatures::kNone, // "svepmull" - CPUFeatures::kNone, // "svebitperm" - CPUFeatures::kNone, // "svesha3" - CPUFeatures::kNone, // "svesm4" - CPUFeatures::kFrintToFixedSizedInt, + CPUFeatures::kSVE2, + CPUFeatures::kSVEAES, + CPUFeatures::kSVEPmull128, + CPUFeatures::kSVEBitPerm, + CPUFeatures::kSVESHA3, + CPUFeatures::kSVESM4, + CPUFeatures::kAXFlag, // Bits 40-47 + CPUFeatures::kFrintToFixedSizedInt, CPUFeatures::kSVEI8MM, CPUFeatures::kSVEF32MM, CPUFeatures::kSVEF64MM, @@ -277,9 +307,13 @@ CPUFeatures CPU::InferCPUFeaturesFromOS( CPUFeatures::kI8MM, CPUFeatures::kBF16, CPUFeatures::kDGH, - CPUFeatures::kRNG, // Bits 48+ - CPUFeatures::kBTI}; + CPUFeatures::kRNG, + CPUFeatures::kBTI, + CPUFeatures::kMTE, + CPUFeatures::kECV, + CPUFeatures::kAFP, + CPUFeatures::kRPRES}; uint64_t hwcap_low32 = getauxval(AT_HWCAP); uint64_t hwcap_high32 = getauxval(AT_HWCAP2); @@ -291,6 +325,10 @@ CPUFeatures CPU::InferCPUFeaturesFromOS( for (size_t i = 0; i < ArrayLength(kFeatureBits); i++) { if (hwcap & (UINT64_C(1) << i)) features.Combine(kFeatureBits[i]); } + // MTE support from HWCAP2 signifies FEAT_MTE1 and FEAT_MTE2 support + if (features.Has(CPUFeatures::kMTE)) { + features.Combine(CPUFeatures::kMTEInstructions); + } #endif // VIXL_USE_LINUX_HWCAP if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) && diff --git a/src/aarch64/cpu-aarch64.h b/src/aarch64/cpu-aarch64.h index 2bf1e60f..892f48f2 100644 --- a/src/aarch64/cpu-aarch64.h +++ b/src/aarch64/cpu-aarch64.h @@ -160,6 +160,26 @@ class AA64ISAR1 : public IDRegister { static const Field kI8MM; }; +class AA64ISAR2 : public IDRegister { + public: + explicit AA64ISAR2(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kRPRES; +}; + +class AA64MMFR0 : public IDRegister { + public: + explicit AA64MMFR0(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kECV; +}; + class AA64MMFR1 : public IDRegister { public: explicit AA64MMFR1(uint64_t value) : IDRegister(value) {} @@ -168,6 +188,7 @@ class AA64MMFR1 : public IDRegister { private: static const Field kLO; + static const Field kAFP; }; class AA64MMFR2 : public IDRegister { @@ -187,7 +208,12 @@ class AA64ZFR0 : public IDRegister { CPUFeatures GetCPUFeatures() const; private: + static const Field kSVEver; + static const Field kAES; + static const Field kBitPerm; static const Field kBF16; + static const Field kSHA3; + static const Field kSM4; static const Field kI8MM; static const Field kF32MM; static const Field kF64MM; @@ -255,9 +281,11 @@ class CPU { V(AA64PFR1, "ID_AA64PFR1_EL1") \ V(AA64ISAR0, "ID_AA64ISAR0_EL1") \ V(AA64ISAR1, "ID_AA64ISAR1_EL1") \ + V(AA64MMFR0, "ID_AA64MMFR0_EL1") \ V(AA64MMFR1, "ID_AA64MMFR1_EL1") \ /* These registers are RES0 in the baseline Arm8.0. We can always safely */ \ /* read them, but some compilers don't accept the symbolic names. */ \ + V(AA64ISAR2, "S3_0_C0_C6_2") \ V(AA64MMFR2, "S3_0_C0_C7_2") \ V(AA64ZFR0, "S3_0_C0_C4_4") diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc index abe63d39..63249b04 100644 --- a/src/aarch64/cpu-features-auditor-aarch64.cc +++ b/src/aarch64/cpu-features-auditor-aarch64.cc @@ -34,6 +34,43 @@ namespace vixl { namespace aarch64 { + +const CPUFeaturesAuditor::FormToVisitorFnMap* +CPUFeaturesAuditor::GetFormToVisitorFnMap() { + static const FormToVisitorFnMap form_to_visitor = { + DEFAULT_FORM_TO_VISITOR_MAP(CPUFeaturesAuditor), + SIM_AUD_VISITOR_MAP(CPUFeaturesAuditor), + {"fcmla_asimdelem_c_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fcmla_asimdelem_c_s"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmlal2_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmlal_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmla_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmla_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmlsl2_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmlsl_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmls_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmls_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmulx_asimdelem_rh_h"_h, + &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmulx_asimdelem_r_sd"_h, + &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmul_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmul_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"sdot_asimdelem_d"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"smlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"smlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"smull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"sqdmlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"sqdmlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"sqdmull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"udot_asimdelem_d"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"umlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"umlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"umull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + }; + return &form_to_visitor; +} + // Every instruction must update last_instruction_, even if only to clear it, // and every instruction must also update seen_ once it has been fully handled. // This scope makes that simple, and allows early returns in the decode logic. @@ -1186,8 +1223,8 @@ void CPUFeaturesAuditor::VisitPCRelAddressing(const Instruction* instr) { V(SVEIntMulImm_Unpredicated) \ V(SVEIntMulVectors_Predicated) \ V(SVELoadAndBroadcastElement) \ - V(SVELoadAndBroadcastQuadword_ScalarPlusImm) \ - V(SVELoadAndBroadcastQuadword_ScalarPlusScalar) \ + V(SVELoadAndBroadcastQOWord_ScalarPlusImm) \ + V(SVELoadAndBroadcastQOWord_ScalarPlusScalar) \ V(SVELoadMultipleStructures_ScalarPlusImm) \ V(SVELoadMultipleStructures_ScalarPlusScalar) \ V(SVELoadPredicateRegister) \ @@ -1214,7 +1251,7 @@ void CPUFeaturesAuditor::VisitPCRelAddressing(const Instruction* instr) { V(SVETableLookup) \ V(SVEUnpackPredicateElements) \ V(SVEUnpackVectorElements) \ - V(SVEVectorSplice_Destructive) + V(SVEVectorSplice) #define VIXL_DEFINE_SIMPLE_SVE_VISITOR(NAME) \ void CPUFeaturesAuditor::Visit##NAME(const Instruction* instr) { \ @@ -1352,6 +1389,351 @@ void CPUFeaturesAuditor::VisitUnimplemented(const Instruction* instr) { USE(instr); } +void CPUFeaturesAuditor::Visit(Metadata* metadata, const Instruction* instr) { + VIXL_ASSERT(metadata->count("form") > 0); + const std::string& form = (*metadata)["form"]; + uint32_t form_hash = Hash(form.c_str()); + const FormToVisitorFnMap* fv = CPUFeaturesAuditor::GetFormToVisitorFnMap(); + FormToVisitorFnMap::const_iterator it = fv->find(form_hash); + if (it == fv->end()) { + RecordInstructionFeaturesScope scope(this); + std::map<uint32_t, const CPUFeatures> features = { + {"adclb_z_zzz"_h, CPUFeatures::kSVE2}, + {"adclt_z_zzz"_h, CPUFeatures::kSVE2}, + {"addhnb_z_zz"_h, CPUFeatures::kSVE2}, + {"addhnt_z_zz"_h, CPUFeatures::kSVE2}, + {"addp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"bcax_z_zzz"_h, CPUFeatures::kSVE2}, + {"bdep_z_zz"_h, + CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)}, + {"bext_z_zz"_h, + CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)}, + {"bgrp_z_zz"_h, + CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)}, + {"bsl1n_z_zzz"_h, CPUFeatures::kSVE2}, + {"bsl2n_z_zzz"_h, CPUFeatures::kSVE2}, + {"bsl_z_zzz"_h, CPUFeatures::kSVE2}, + {"cadd_z_zz"_h, CPUFeatures::kSVE2}, + {"cdot_z_zzz"_h, CPUFeatures::kSVE2}, + {"cdot_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"cdot_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"cmla_z_zzz"_h, CPUFeatures::kSVE2}, + {"cmla_z_zzzi_h"_h, CPUFeatures::kSVE2}, + {"cmla_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"eor3_z_zzz"_h, CPUFeatures::kSVE2}, + {"eorbt_z_zz"_h, CPUFeatures::kSVE2}, + {"eortb_z_zz"_h, CPUFeatures::kSVE2}, + {"ext_z_zi_con"_h, CPUFeatures::kSVE2}, + {"faddp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"fcvtlt_z_p_z_h2s"_h, CPUFeatures::kSVE2}, + {"fcvtlt_z_p_z_s2d"_h, CPUFeatures::kSVE2}, + {"fcvtnt_z_p_z_d2s"_h, CPUFeatures::kSVE2}, + {"fcvtnt_z_p_z_s2h"_h, CPUFeatures::kSVE2}, + {"fcvtx_z_p_z_d2s"_h, CPUFeatures::kSVE2}, + {"fcvtxnt_z_p_z_d2s"_h, CPUFeatures::kSVE2}, + {"flogb_z_p_z"_h, CPUFeatures::kSVE2}, + {"fmaxnmp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"fmaxp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"fminnmp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"fminp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"fmlalb_z_zzz"_h, CPUFeatures::kSVE2}, + {"fmlalb_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"fmlalt_z_zzz"_h, CPUFeatures::kSVE2}, + {"fmlalt_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"fmlslb_z_zzz"_h, CPUFeatures::kSVE2}, + {"fmlslb_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"fmlslt_z_zzz"_h, CPUFeatures::kSVE2}, + {"fmlslt_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"histcnt_z_p_zz"_h, CPUFeatures::kSVE2}, + {"histseg_z_zz"_h, CPUFeatures::kSVE2}, + {"ldnt1b_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1b_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1d_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1h_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1h_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1sb_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1sb_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1sh_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1sh_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1sw_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1w_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1w_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2}, + {"match_p_p_zz"_h, CPUFeatures::kSVE2}, + {"mla_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"mla_z_zzzi_h"_h, CPUFeatures::kSVE2}, + {"mla_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"mls_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"mls_z_zzzi_h"_h, CPUFeatures::kSVE2}, + {"mls_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"mul_z_zz"_h, CPUFeatures::kSVE2}, + {"mul_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"mul_z_zzi_h"_h, CPUFeatures::kSVE2}, + {"mul_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"nbsl_z_zzz"_h, CPUFeatures::kSVE2}, + {"nmatch_p_p_zz"_h, CPUFeatures::kSVE2}, + {"pmul_z_zz"_h, CPUFeatures::kSVE2}, + {"pmullb_z_zz"_h, CPUFeatures::kSVE2}, + {"pmullt_z_zz"_h, CPUFeatures::kSVE2}, + {"raddhnb_z_zz"_h, CPUFeatures::kSVE2}, + {"raddhnt_z_zz"_h, CPUFeatures::kSVE2}, + {"rshrnb_z_zi"_h, CPUFeatures::kSVE2}, + {"rshrnt_z_zi"_h, CPUFeatures::kSVE2}, + {"rsubhnb_z_zz"_h, CPUFeatures::kSVE2}, + {"rsubhnt_z_zz"_h, CPUFeatures::kSVE2}, + {"saba_z_zzz"_h, CPUFeatures::kSVE2}, + {"sabalb_z_zzz"_h, CPUFeatures::kSVE2}, + {"sabalt_z_zzz"_h, CPUFeatures::kSVE2}, + {"sabdlb_z_zz"_h, CPUFeatures::kSVE2}, + {"sabdlt_z_zz"_h, CPUFeatures::kSVE2}, + {"sadalp_z_p_z"_h, CPUFeatures::kSVE2}, + {"saddlb_z_zz"_h, CPUFeatures::kSVE2}, + {"saddlbt_z_zz"_h, CPUFeatures::kSVE2}, + {"saddlt_z_zz"_h, CPUFeatures::kSVE2}, + {"saddwb_z_zz"_h, CPUFeatures::kSVE2}, + {"saddwt_z_zz"_h, CPUFeatures::kSVE2}, + {"sbclb_z_zzz"_h, CPUFeatures::kSVE2}, + {"sbclt_z_zzz"_h, CPUFeatures::kSVE2}, + {"shadd_z_p_zz"_h, CPUFeatures::kSVE2}, + {"shrnb_z_zi"_h, CPUFeatures::kSVE2}, + {"shrnt_z_zi"_h, CPUFeatures::kSVE2}, + {"shsub_z_p_zz"_h, CPUFeatures::kSVE2}, + {"shsubr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sli_z_zzi"_h, CPUFeatures::kSVE2}, + {"smaxp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sminp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"smlalb_z_zzz"_h, CPUFeatures::kSVE2}, + {"smlalb_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"smlalb_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"smlalt_z_zzz"_h, CPUFeatures::kSVE2}, + {"smlalt_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"smlalt_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"smlslb_z_zzz"_h, CPUFeatures::kSVE2}, + {"smlslb_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"smlslb_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"smlslt_z_zzz"_h, CPUFeatures::kSVE2}, + {"smlslt_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"smlslt_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"smulh_z_zz"_h, CPUFeatures::kSVE2}, + {"smullb_z_zz"_h, CPUFeatures::kSVE2}, + {"smullb_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"smullb_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"smullt_z_zz"_h, CPUFeatures::kSVE2}, + {"smullt_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"smullt_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"splice_z_p_zz_con"_h, CPUFeatures::kSVE2}, + {"sqabs_z_p_z"_h, CPUFeatures::kSVE2}, + {"sqadd_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sqcadd_z_zz"_h, CPUFeatures::kSVE2}, + {"sqdmlalb_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqdmlalb_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"sqdmlalb_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"sqdmlalbt_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqdmlalt_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqdmlalt_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"sqdmlalt_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"sqdmlslb_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqdmlslb_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"sqdmlslb_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"sqdmlslbt_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqdmlslt_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqdmlslt_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"sqdmlslt_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"sqdmulh_z_zz"_h, CPUFeatures::kSVE2}, + {"sqdmulh_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"sqdmulh_z_zzi_h"_h, CPUFeatures::kSVE2}, + {"sqdmulh_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"sqdmullb_z_zz"_h, CPUFeatures::kSVE2}, + {"sqdmullb_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"sqdmullb_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"sqdmullt_z_zz"_h, CPUFeatures::kSVE2}, + {"sqdmullt_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"sqdmullt_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"sqneg_z_p_z"_h, CPUFeatures::kSVE2}, + {"sqrdcmlah_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqrdcmlah_z_zzzi_h"_h, CPUFeatures::kSVE2}, + {"sqrdcmlah_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"sqrdmlah_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqrdmlah_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"sqrdmlah_z_zzzi_h"_h, CPUFeatures::kSVE2}, + {"sqrdmlah_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"sqrdmlsh_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqrdmlsh_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"sqrdmlsh_z_zzzi_h"_h, CPUFeatures::kSVE2}, + {"sqrdmlsh_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"sqrdmulh_z_zz"_h, CPUFeatures::kSVE2}, + {"sqrdmulh_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"sqrdmulh_z_zzi_h"_h, CPUFeatures::kSVE2}, + {"sqrdmulh_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"sqrshl_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sqrshlr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sqrshrnb_z_zi"_h, CPUFeatures::kSVE2}, + {"sqrshrnt_z_zi"_h, CPUFeatures::kSVE2}, + {"sqrshrunb_z_zi"_h, CPUFeatures::kSVE2}, + {"sqrshrunt_z_zi"_h, CPUFeatures::kSVE2}, + {"sqshl_z_p_zi"_h, CPUFeatures::kSVE2}, + {"sqshl_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sqshlr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sqshlu_z_p_zi"_h, CPUFeatures::kSVE2}, + {"sqshrnb_z_zi"_h, CPUFeatures::kSVE2}, + {"sqshrnt_z_zi"_h, CPUFeatures::kSVE2}, + {"sqshrunb_z_zi"_h, CPUFeatures::kSVE2}, + {"sqshrunt_z_zi"_h, CPUFeatures::kSVE2}, + {"sqsub_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sqsubr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sqxtnb_z_zz"_h, CPUFeatures::kSVE2}, + {"sqxtnt_z_zz"_h, CPUFeatures::kSVE2}, + {"sqxtunb_z_zz"_h, CPUFeatures::kSVE2}, + {"sqxtunt_z_zz"_h, CPUFeatures::kSVE2}, + {"srhadd_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sri_z_zzi"_h, CPUFeatures::kSVE2}, + {"srshl_z_p_zz"_h, CPUFeatures::kSVE2}, + {"srshlr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"srshr_z_p_zi"_h, CPUFeatures::kSVE2}, + {"srsra_z_zi"_h, CPUFeatures::kSVE2}, + {"sshllb_z_zi"_h, CPUFeatures::kSVE2}, + {"sshllt_z_zi"_h, CPUFeatures::kSVE2}, + {"ssra_z_zi"_h, CPUFeatures::kSVE2}, + {"ssublb_z_zz"_h, CPUFeatures::kSVE2}, + {"ssublbt_z_zz"_h, CPUFeatures::kSVE2}, + {"ssublt_z_zz"_h, CPUFeatures::kSVE2}, + {"ssubltb_z_zz"_h, CPUFeatures::kSVE2}, + {"ssubwb_z_zz"_h, CPUFeatures::kSVE2}, + {"ssubwt_z_zz"_h, CPUFeatures::kSVE2}, + {"stnt1b_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"stnt1b_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2}, + {"stnt1d_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"stnt1h_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"stnt1h_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2}, + {"stnt1w_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"stnt1w_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2}, + {"subhnb_z_zz"_h, CPUFeatures::kSVE2}, + {"subhnt_z_zz"_h, CPUFeatures::kSVE2}, + {"suqadd_z_p_zz"_h, CPUFeatures::kSVE2}, + {"tbl_z_zz_2"_h, CPUFeatures::kSVE2}, + {"tbx_z_zz"_h, CPUFeatures::kSVE2}, + {"uaba_z_zzz"_h, CPUFeatures::kSVE2}, + {"uabalb_z_zzz"_h, CPUFeatures::kSVE2}, + {"uabalt_z_zzz"_h, CPUFeatures::kSVE2}, + {"uabdlb_z_zz"_h, CPUFeatures::kSVE2}, + {"uabdlt_z_zz"_h, CPUFeatures::kSVE2}, + {"uadalp_z_p_z"_h, CPUFeatures::kSVE2}, + {"uaddlb_z_zz"_h, CPUFeatures::kSVE2}, + {"uaddlt_z_zz"_h, CPUFeatures::kSVE2}, + {"uaddwb_z_zz"_h, CPUFeatures::kSVE2}, + {"uaddwt_z_zz"_h, CPUFeatures::kSVE2}, + {"uhadd_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uhsub_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uhsubr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"umaxp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uminp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"umlalb_z_zzz"_h, CPUFeatures::kSVE2}, + {"umlalb_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"umlalb_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"umlalt_z_zzz"_h, CPUFeatures::kSVE2}, + {"umlalt_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"umlalt_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"umlslb_z_zzz"_h, CPUFeatures::kSVE2}, + {"umlslb_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"umlslb_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"umlslt_z_zzz"_h, CPUFeatures::kSVE2}, + {"umlslt_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"umlslt_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"umulh_z_zz"_h, CPUFeatures::kSVE2}, + {"umullb_z_zz"_h, CPUFeatures::kSVE2}, + {"umullb_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"umullb_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"umullt_z_zz"_h, CPUFeatures::kSVE2}, + {"umullt_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"umullt_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"uqadd_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uqrshl_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uqrshlr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uqrshrnb_z_zi"_h, CPUFeatures::kSVE2}, + {"uqrshrnt_z_zi"_h, CPUFeatures::kSVE2}, + {"uqshl_z_p_zi"_h, CPUFeatures::kSVE2}, + {"uqshl_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uqshlr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uqshrnb_z_zi"_h, CPUFeatures::kSVE2}, + {"uqshrnt_z_zi"_h, CPUFeatures::kSVE2}, + {"uqsub_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uqsubr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uqxtnb_z_zz"_h, CPUFeatures::kSVE2}, + {"uqxtnt_z_zz"_h, CPUFeatures::kSVE2}, + {"urecpe_z_p_z"_h, CPUFeatures::kSVE2}, + {"urhadd_z_p_zz"_h, CPUFeatures::kSVE2}, + {"urshl_z_p_zz"_h, CPUFeatures::kSVE2}, + {"urshlr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"urshr_z_p_zi"_h, CPUFeatures::kSVE2}, + {"ursqrte_z_p_z"_h, CPUFeatures::kSVE2}, + {"ursra_z_zi"_h, CPUFeatures::kSVE2}, + {"ushllb_z_zi"_h, CPUFeatures::kSVE2}, + {"ushllt_z_zi"_h, CPUFeatures::kSVE2}, + {"usqadd_z_p_zz"_h, CPUFeatures::kSVE2}, + {"usra_z_zi"_h, CPUFeatures::kSVE2}, + {"usublb_z_zz"_h, CPUFeatures::kSVE2}, + {"usublt_z_zz"_h, CPUFeatures::kSVE2}, + {"usubwb_z_zz"_h, CPUFeatures::kSVE2}, + {"usubwt_z_zz"_h, CPUFeatures::kSVE2}, + {"whilege_p_p_rr"_h, CPUFeatures::kSVE2}, + {"whilegt_p_p_rr"_h, CPUFeatures::kSVE2}, + {"whilehi_p_p_rr"_h, CPUFeatures::kSVE2}, + {"whilehs_p_p_rr"_h, CPUFeatures::kSVE2}, + {"whilerw_p_rr"_h, CPUFeatures::kSVE2}, + {"whilewr_p_rr"_h, CPUFeatures::kSVE2}, + {"xar_z_zzi"_h, CPUFeatures::kSVE2}, + {"smmla_z_zzz"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)}, + {"ummla_z_zzz"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)}, + {"usmmla_z_zzz"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)}, + {"fmmla_z_zzz_s"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF32MM)}, + {"fmmla_z_zzz_d"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"smmla_asimdsame2_g"_h, + CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)}, + {"ummla_asimdsame2_g"_h, + CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)}, + {"usmmla_asimdsame2_g"_h, + CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)}, + {"ld1row_z_p_bi_u32"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"ld1row_z_p_br_contiguous"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"ld1rod_z_p_bi_u64"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"ld1rod_z_p_br_contiguous"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"ld1rob_z_p_bi_u8"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"ld1rob_z_p_br_contiguous"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"ld1roh_z_p_bi_u16"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"ld1roh_z_p_br_contiguous"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"usdot_asimdsame2_d"_h, + CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)}, + {"sudot_asimdelem_d"_h, + CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)}, + {"usdot_asimdelem_d"_h, + CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)}, + {"usdot_z_zzz_s"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)}, + {"usdot_z_zzzi_s"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)}, + {"sudot_z_zzzi_s"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)}, + }; + + if (features.count(form_hash) > 0) { + scope.Record(features[form_hash]); + } + } else { + (it->second)(this, instr); + } +} } // namespace aarch64 } // namespace vixl diff --git a/src/aarch64/cpu-features-auditor-aarch64.h b/src/aarch64/cpu-features-auditor-aarch64.h index 23aec066..041bc88e 100644 --- a/src/aarch64/cpu-features-auditor-aarch64.h +++ b/src/aarch64/cpu-features-auditor-aarch64.h @@ -27,10 +27,13 @@ #ifndef VIXL_AARCH64_CPU_FEATURES_AUDITOR_AARCH64_H_ #define VIXL_AARCH64_CPU_FEATURES_AUDITOR_AARCH64_H_ +#include <functional> #include <iostream> +#include <unordered_map> #include "cpu-features.h" #include "decoder-aarch64.h" +#include "decoder-visitor-map-aarch64.h" namespace vixl { namespace aarch64 { @@ -100,15 +103,16 @@ class CPUFeaturesAuditor : public DecoderVisitor { SetAvailableFeatures(available); } -// Declare all Visitor functions. -#define DECLARE(A) \ - virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE; - VISITOR_LIST(DECLARE) -#undef DECLARE + virtual void Visit(Metadata* metadata, + const Instruction* instr) VIXL_OVERRIDE; private: class RecordInstructionFeaturesScope; +#define DECLARE(A) virtual void Visit##A(const Instruction* instr); + VISITOR_LIST(DECLARE) +#undef DECLARE + void LoadStoreHelper(const Instruction* instr); void LoadStorePairHelper(const Instruction* instr); @@ -117,6 +121,11 @@ class CPUFeaturesAuditor : public DecoderVisitor { CPUFeatures available_; Decoder* decoder_; + + using FormToVisitorFnMap = std::unordered_map< + uint32_t, + std::function<void(CPUFeaturesAuditor*, const Instruction*)>>; + static const FormToVisitorFnMap* GetFormToVisitorFnMap(); }; } // namespace aarch64 diff --git a/src/aarch64/decoder-aarch64.cc b/src/aarch64/decoder-aarch64.cc index c6859bbc..a4e2989e 100644 --- a/src/aarch64/decoder-aarch64.cc +++ b/src/aarch64/decoder-aarch64.cc @@ -49,7 +49,9 @@ void Decoder::Decode(Instruction* instr) { } void Decoder::AddDecodeNode(const DecodeNode& node) { - decode_nodes_.insert(std::make_pair(node.GetName(), node)); + if (decode_nodes_.count(node.GetName()) == 0) { + decode_nodes_.insert(std::make_pair(node.GetName(), node)); + } } DecodeNode* Decoder::GetDecodeNode(std::string name) { @@ -64,13 +66,22 @@ void Decoder::ConstructDecodeGraph() { // Add all of the decoding nodes to the Decoder. for (unsigned i = 0; i < ArrayLength(kDecodeMapping); i++) { AddDecodeNode(DecodeNode(kDecodeMapping[i], this)); - } - // Add the visitor function wrapping nodes to the Decoder. - for (unsigned i = 0; i < ArrayLength(kVisitorNodes); i++) { - AddDecodeNode(DecodeNode(kVisitorNodes[i], this)); + // Add a node for each instruction form named, identified by having no '_' + // prefix on the node name. + const DecodeMapping& map = kDecodeMapping[i]; + for (unsigned j = 0; j < map.mapping.size(); j++) { + if ((map.mapping[j].handler != NULL) && + (map.mapping[j].handler[0] != '_')) { + AddDecodeNode(DecodeNode(map.mapping[j].handler, this)); + } + } } + // Add an "unallocated" node, used when an instruction encoding is not + // recognised by the decoding graph. + AddDecodeNode(DecodeNode("unallocated", this)); + // Compile the graph from the root. compiled_decoder_root_ = GetDecodeNode("Root")->Compile(this); } @@ -122,43 +133,18 @@ void Decoder::RemoveVisitor(DecoderVisitor* visitor) { visitors_.remove(visitor); } -#define DEFINE_VISITOR_CALLERS(A) \ - void Decoder::Visit##A(const Instruction* instr) { \ - VIXL_ASSERT(((A##FMask == 0) && (A##Fixed == 0)) || \ - (instr->Mask(A##FMask) == A##Fixed)); \ - std::list<DecoderVisitor*>::iterator it; \ - for (it = visitors_.begin(); it != visitors_.end(); it++) { \ - (*it)->Visit##A(instr); \ - } \ - } -VISITOR_LIST(DEFINE_VISITOR_CALLERS) -#undef DEFINE_VISITOR_CALLERS - -void DecodeNode::SetSampledBits(const uint8_t* bits, int bit_count) { - VIXL_ASSERT(!IsCompiled()); - - sampled_bits_.resize(bit_count); - for (int i = 0; i < bit_count; i++) { - sampled_bits_[i] = bits[i]; +void Decoder::VisitNamedInstruction(const Instruction* instr, + const std::string& name) { + std::list<DecoderVisitor*>::iterator it; + Metadata m = {{"form", name}}; + for (it = visitors_.begin(); it != visitors_.end(); it++) { + (*it)->Visit(&m, instr); } } -std::vector<uint8_t> DecodeNode::GetSampledBits() const { - return sampled_bits_; -} - -size_t DecodeNode::GetSampledBitsCount() const { return sampled_bits_.size(); } - -void DecodeNode::AddPatterns(const DecodePattern* patterns) { - VIXL_ASSERT(!IsCompiled()); - for (unsigned i = 0; i < kMaxDecodeMappings; i++) { - // Empty string indicates end of patterns. - if (patterns[i].pattern == NULL) break; - VIXL_ASSERT((strlen(patterns[i].pattern) == GetSampledBitsCount()) || - (strcmp(patterns[i].pattern, "otherwise") == 0)); - pattern_table_.push_back(patterns[i]); - } -} +// Initialise empty vectors for sampled bits and pattern table. +const std::vector<uint8_t> DecodeNode::kEmptySampledBits; +const std::vector<DecodePattern> DecodeNode::kEmptyPatternTable; void DecodeNode::CompileNodeForBits(Decoder* decoder, std::string name, @@ -172,191 +158,246 @@ void DecodeNode::CompileNodeForBits(Decoder* decoder, compiled_node_->SetNodeForBits(bits, n->GetCompiledNode()); } -BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask) { - // Instantiate a templated bit extraction function for every pattern we - // might encounter. If the assertion in the default clause is reached, add a - // new instantiation below using the information in the failure message. - BitExtractFn bit_extract_fn = NULL; - switch (mask) { -#define INSTANTIATE_TEMPLATE(M) \ - case M: \ - bit_extract_fn = &Instruction::ExtractBits<M>; \ + +#define INSTANTIATE_TEMPLATE_M(M) \ + case 0x##M: \ + bit_extract_fn = &Instruction::ExtractBits<0x##M>; \ + break; +#define INSTANTIATE_TEMPLATE_MV(M, V) \ + case 0x##M##V: \ + bit_extract_fn = &Instruction::IsMaskedValue<0x##M, 0x##V>; \ break; - INSTANTIATE_TEMPLATE(0x000001e0); - INSTANTIATE_TEMPLATE(0x00000400); - INSTANTIATE_TEMPLATE(0x00000800); - INSTANTIATE_TEMPLATE(0x00000c00); - INSTANTIATE_TEMPLATE(0x00001000); - INSTANTIATE_TEMPLATE(0x00001800); - INSTANTIATE_TEMPLATE(0x00001c00); - INSTANTIATE_TEMPLATE(0x00004000); - INSTANTIATE_TEMPLATE(0x00008000); - INSTANTIATE_TEMPLATE(0x0000f000); - INSTANTIATE_TEMPLATE(0x0000fc00); - INSTANTIATE_TEMPLATE(0x00060010); - INSTANTIATE_TEMPLATE(0x00093e00); - INSTANTIATE_TEMPLATE(0x000c1000); - INSTANTIATE_TEMPLATE(0x00100000); - INSTANTIATE_TEMPLATE(0x00101800); - INSTANTIATE_TEMPLATE(0x00140000); - INSTANTIATE_TEMPLATE(0x00180000); - INSTANTIATE_TEMPLATE(0x00181000); - INSTANTIATE_TEMPLATE(0x00190000); - INSTANTIATE_TEMPLATE(0x00191400); - INSTANTIATE_TEMPLATE(0x001c0000); - INSTANTIATE_TEMPLATE(0x001c1800); - INSTANTIATE_TEMPLATE(0x001f0000); - INSTANTIATE_TEMPLATE(0x0020fc00); - INSTANTIATE_TEMPLATE(0x0038f000); - INSTANTIATE_TEMPLATE(0x00400000); - INSTANTIATE_TEMPLATE(0x00400010); - INSTANTIATE_TEMPLATE(0x0040f000); - INSTANTIATE_TEMPLATE(0x00500000); - INSTANTIATE_TEMPLATE(0x00800000); - INSTANTIATE_TEMPLATE(0x00800010); - INSTANTIATE_TEMPLATE(0x00801800); - INSTANTIATE_TEMPLATE(0x009f0000); - INSTANTIATE_TEMPLATE(0x00c00000); - INSTANTIATE_TEMPLATE(0x00c00010); - INSTANTIATE_TEMPLATE(0x00cf8000); - INSTANTIATE_TEMPLATE(0x00db0000); - INSTANTIATE_TEMPLATE(0x00dc0000); - INSTANTIATE_TEMPLATE(0x00e00003); - INSTANTIATE_TEMPLATE(0x00f80400); - INSTANTIATE_TEMPLATE(0x01e00000); - INSTANTIATE_TEMPLATE(0x03800000); - INSTANTIATE_TEMPLATE(0x04c0f000); - INSTANTIATE_TEMPLATE(0x10800400); - INSTANTIATE_TEMPLATE(0x1e000000); - INSTANTIATE_TEMPLATE(0x20000000); - INSTANTIATE_TEMPLATE(0x20000410); - INSTANTIATE_TEMPLATE(0x20007000); - INSTANTIATE_TEMPLATE(0x20007800); - INSTANTIATE_TEMPLATE(0x2000f000); - INSTANTIATE_TEMPLATE(0x2000f800); - INSTANTIATE_TEMPLATE(0x201e0c00); - INSTANTIATE_TEMPLATE(0x20803800); - INSTANTIATE_TEMPLATE(0x20c0cc00); - INSTANTIATE_TEMPLATE(0x20c0f000); - INSTANTIATE_TEMPLATE(0x20c0f800); - INSTANTIATE_TEMPLATE(0x20c1f000); - INSTANTIATE_TEMPLATE(0x51e00000); - INSTANTIATE_TEMPLATE(0x60007800); - INSTANTIATE_TEMPLATE(0x6000f800); - INSTANTIATE_TEMPLATE(0x601e0000); - INSTANTIATE_TEMPLATE(0x80007c00); - INSTANTIATE_TEMPLATE(0x80017c00); - INSTANTIATE_TEMPLATE(0x80408000); - INSTANTIATE_TEMPLATE(0x80a07c00); - INSTANTIATE_TEMPLATE(0x80df0000); - INSTANTIATE_TEMPLATE(0x80e08000); - INSTANTIATE_TEMPLATE(0xa0c00000); - INSTANTIATE_TEMPLATE(0xb5a00000); - INSTANTIATE_TEMPLATE(0xc0c00c00); - INSTANTIATE_TEMPLATE(0xc4400000); - INSTANTIATE_TEMPLATE(0xc4c00000); - INSTANTIATE_TEMPLATE(0xe0400000); - INSTANTIATE_TEMPLATE(0xe120e000); - INSTANTIATE_TEMPLATE(0xe3c00000); - INSTANTIATE_TEMPLATE(0xf1200000); -#undef INSTANTIATE_TEMPLATE - default: - printf("Node %s: No template instantiated for extracting 0x%08x.\n", - GetName().c_str(), - GenerateSampledBitsMask()); - printf("Add one in %s above line %d:\n", __FILE__, __LINE__); - printf(" INSTANTIATE_TEMPLATE(0x%08x);\n", GenerateSampledBitsMask()); - VIXL_UNREACHABLE(); - } - return bit_extract_fn; -} -BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask, uint32_t value) { +BitExtractFn DecodeNode::GetBitExtractFunctionHelper(uint32_t x, uint32_t y) { // Instantiate a templated bit extraction function for every pattern we - // might encounter. If the assertion in the following check fails, add a + // might encounter. If the assertion in the default clause is reached, add a // new instantiation below using the information in the failure message. - bool instantiated = false; BitExtractFn bit_extract_fn = NULL; -#define INSTANTIATE_TEMPLATE(M, V) \ - if ((mask == M) && (value == V)) { \ - bit_extract_fn = &Instruction::IsMaskedValue<M, V>; \ - instantiated = true; \ - } - INSTANTIATE_TEMPLATE(0x0000001c, 0x00000000); - INSTANTIATE_TEMPLATE(0x00000210, 0x00000000); - INSTANTIATE_TEMPLATE(0x000003c0, 0x00000000); - INSTANTIATE_TEMPLATE(0x00001c00, 0x00000000); - INSTANTIATE_TEMPLATE(0x00001c0f, 0x00000000); - INSTANTIATE_TEMPLATE(0x00003000, 0x00000000); - INSTANTIATE_TEMPLATE(0x00007800, 0x00000000); - INSTANTIATE_TEMPLATE(0x0000e000, 0x0000a000); - INSTANTIATE_TEMPLATE(0x0000f000, 0x00000000); - INSTANTIATE_TEMPLATE(0x00030400, 0x00000000); - INSTANTIATE_TEMPLATE(0x0003801f, 0x0000000d); - INSTANTIATE_TEMPLATE(0x00060210, 0x00000000); - INSTANTIATE_TEMPLATE(0x00060810, 0x00000000); - INSTANTIATE_TEMPLATE(0x00060a10, 0x00000000); - INSTANTIATE_TEMPLATE(0x00060bf0, 0x00000000); - INSTANTIATE_TEMPLATE(0x00061e10, 0x00000000); - INSTANTIATE_TEMPLATE(0x00061e10, 0x00000400); - INSTANTIATE_TEMPLATE(0x00070200, 0x00000000); - INSTANTIATE_TEMPLATE(0x000b1e10, 0x00000000); - INSTANTIATE_TEMPLATE(0x000f0000, 0x00000000); - INSTANTIATE_TEMPLATE(0x00130e1f, 0x00000000); - INSTANTIATE_TEMPLATE(0x00130fff, 0x00000000); - INSTANTIATE_TEMPLATE(0x00180000, 0x00000000); - INSTANTIATE_TEMPLATE(0x00180000, 0x00100000); - INSTANTIATE_TEMPLATE(0x001e0000, 0x00000000); - INSTANTIATE_TEMPLATE(0x001f0000, 0x00000000); - INSTANTIATE_TEMPLATE(0x001f0000, 0x001f0000); - INSTANTIATE_TEMPLATE(0x0038e000, 0x00000000); - INSTANTIATE_TEMPLATE(0x0039e000, 0x00002000); - INSTANTIATE_TEMPLATE(0x003ae000, 0x00002000); - INSTANTIATE_TEMPLATE(0x003ce000, 0x00042000); - INSTANTIATE_TEMPLATE(0x005f0000, 0x001f0000); - INSTANTIATE_TEMPLATE(0x00780000, 0x00000000); - INSTANTIATE_TEMPLATE(0x00870210, 0x00000000); - INSTANTIATE_TEMPLATE(0x00c00000, 0x00000000); - INSTANTIATE_TEMPLATE(0x00c00000, 0x00800000); - INSTANTIATE_TEMPLATE(0x00c00000, 0x00c00000); - INSTANTIATE_TEMPLATE(0x00c00010, 0x00800000); - INSTANTIATE_TEMPLATE(0x00ca1e10, 0x00000000); - INSTANTIATE_TEMPLATE(0x01000010, 0x00000000); - INSTANTIATE_TEMPLATE(0x20000800, 0x00000000); - INSTANTIATE_TEMPLATE(0x20008000, 0x00000000); - INSTANTIATE_TEMPLATE(0x20040000, 0x00000000); - INSTANTIATE_TEMPLATE(0x201e8000, 0x00000000); - INSTANTIATE_TEMPLATE(0x60000000, 0x00000000); - INSTANTIATE_TEMPLATE(0x60000000, 0x20000000); - INSTANTIATE_TEMPLATE(0x60000000, 0x60000000); - INSTANTIATE_TEMPLATE(0x60200000, 0x00000000); - INSTANTIATE_TEMPLATE(0x80008000, 0x00000000); - INSTANTIATE_TEMPLATE(0x80008000, 0x00008000); - INSTANTIATE_TEMPLATE(0x80400000, 0x00400000); - INSTANTIATE_TEMPLATE(0xa00003e0, 0x00000000); - INSTANTIATE_TEMPLATE(0xa000c007, 0x00000000); - INSTANTIATE_TEMPLATE(0xa0100000, 0x00000000); - INSTANTIATE_TEMPLATE(0xc4000000, 0xc0000000); - INSTANTIATE_TEMPLATE(0xc4000000, 0xc4000000); - INSTANTIATE_TEMPLATE(0xe0000010, 0xa0000000); - INSTANTIATE_TEMPLATE(0xe01c0000, 0x20000000); - INSTANTIATE_TEMPLATE(0xe1ff0000, 0x00000000); -#undef INSTANTIATE_TEMPLATE - - if (!instantiated) { - printf( - "Node %s: no template instantiated for mask 0x%08x, value = " - "0x%08x.\n", - GetName().c_str(), - mask, - value); - printf("Add one in %s above line %d:\n", __FILE__, __LINE__); - printf(" INSTANTIATE_TEMPLATE(0x%08x, 0x%08x);\n", mask, value); - VIXL_UNREACHABLE(); + + // The arguments x and y represent the mask and value. If y is 0, x is the + // mask. Otherwise, y is the mask, and x is the value to compare against a + // masked result. + uint64_t signature = (static_cast<uint64_t>(y) << 32) | x; + switch (signature) { + INSTANTIATE_TEMPLATE_M(00000001); + INSTANTIATE_TEMPLATE_M(00000010); + INSTANTIATE_TEMPLATE_M(0000001f); + INSTANTIATE_TEMPLATE_M(00000060); + INSTANTIATE_TEMPLATE_M(00000100); + INSTANTIATE_TEMPLATE_M(00000200); + INSTANTIATE_TEMPLATE_M(00000400); + INSTANTIATE_TEMPLATE_M(00000800); + INSTANTIATE_TEMPLATE_M(00000c00); + INSTANTIATE_TEMPLATE_M(00000c10); + INSTANTIATE_TEMPLATE_M(00000fc0); + INSTANTIATE_TEMPLATE_M(00001000); + INSTANTIATE_TEMPLATE_M(00001400); + INSTANTIATE_TEMPLATE_M(00001800); + INSTANTIATE_TEMPLATE_M(00001c00); + INSTANTIATE_TEMPLATE_M(00002000); + INSTANTIATE_TEMPLATE_M(00002010); + INSTANTIATE_TEMPLATE_M(00002400); + INSTANTIATE_TEMPLATE_M(00003000); + INSTANTIATE_TEMPLATE_M(00003020); + INSTANTIATE_TEMPLATE_M(00003400); + INSTANTIATE_TEMPLATE_M(00003800); + INSTANTIATE_TEMPLATE_M(00003c00); + INSTANTIATE_TEMPLATE_M(00013000); + INSTANTIATE_TEMPLATE_M(00020000); + INSTANTIATE_TEMPLATE_M(00020010); + INSTANTIATE_TEMPLATE_M(000203e0); + INSTANTIATE_TEMPLATE_M(000303e0); + INSTANTIATE_TEMPLATE_M(00060000); + INSTANTIATE_TEMPLATE_M(00061000); + INSTANTIATE_TEMPLATE_M(00070000); + INSTANTIATE_TEMPLATE_M(000703c0); + INSTANTIATE_TEMPLATE_M(00080000); + INSTANTIATE_TEMPLATE_M(00090000); + INSTANTIATE_TEMPLATE_M(000f0000); + INSTANTIATE_TEMPLATE_M(000f0010); + INSTANTIATE_TEMPLATE_M(00100000); + INSTANTIATE_TEMPLATE_M(00180000); + INSTANTIATE_TEMPLATE_M(001d1c00); + INSTANTIATE_TEMPLATE_M(001f0000); + INSTANTIATE_TEMPLATE_M(001f2000); + INSTANTIATE_TEMPLATE_M(001f3000); + INSTANTIATE_TEMPLATE_M(00400000); + INSTANTIATE_TEMPLATE_M(00400800); + INSTANTIATE_TEMPLATE_M(00403000); + INSTANTIATE_TEMPLATE_M(00500800); + INSTANTIATE_TEMPLATE_M(00583000); + INSTANTIATE_TEMPLATE_M(005f0000); + INSTANTIATE_TEMPLATE_M(00800000); + INSTANTIATE_TEMPLATE_M(00800400); + INSTANTIATE_TEMPLATE_M(00800c1e); + INSTANTIATE_TEMPLATE_M(0080101f); + INSTANTIATE_TEMPLATE_M(00801c00); + INSTANTIATE_TEMPLATE_M(00803000); + INSTANTIATE_TEMPLATE_M(00803c00); + INSTANTIATE_TEMPLATE_M(009f0000); + INSTANTIATE_TEMPLATE_M(009f2000); + INSTANTIATE_TEMPLATE_M(00c00000); + INSTANTIATE_TEMPLATE_M(00c00010); + INSTANTIATE_TEMPLATE_M(00c0001f); + INSTANTIATE_TEMPLATE_M(00c00200); + INSTANTIATE_TEMPLATE_M(00c00400); + INSTANTIATE_TEMPLATE_M(00c00c00); + INSTANTIATE_TEMPLATE_M(00c00c1c); + INSTANTIATE_TEMPLATE_M(00c01000); + INSTANTIATE_TEMPLATE_M(00c01400); + INSTANTIATE_TEMPLATE_M(00c01c00); + INSTANTIATE_TEMPLATE_M(00c02000); + INSTANTIATE_TEMPLATE_M(00c03000); + INSTANTIATE_TEMPLATE_M(00c03c00); + INSTANTIATE_TEMPLATE_M(00c83000); + INSTANTIATE_TEMPLATE_M(00cf0000); + INSTANTIATE_TEMPLATE_M(00d00200); + INSTANTIATE_TEMPLATE_M(00d80800); + INSTANTIATE_TEMPLATE_M(00d81800); + INSTANTIATE_TEMPLATE_M(00d81c00); + INSTANTIATE_TEMPLATE_M(00d82800); + INSTANTIATE_TEMPLATE_M(00d82c00); + INSTANTIATE_TEMPLATE_M(00d92400); + INSTANTIATE_TEMPLATE_M(00d93000); + INSTANTIATE_TEMPLATE_M(00db0000); + INSTANTIATE_TEMPLATE_M(00dc0000); + INSTANTIATE_TEMPLATE_M(00dc2000); + INSTANTIATE_TEMPLATE_M(00dd2000); + INSTANTIATE_TEMPLATE_M(00df0000); + INSTANTIATE_TEMPLATE_M(40000000); + INSTANTIATE_TEMPLATE_M(40000010); + INSTANTIATE_TEMPLATE_M(40000c00); + INSTANTIATE_TEMPLATE_M(40002000); + INSTANTIATE_TEMPLATE_M(40002010); + INSTANTIATE_TEMPLATE_M(40003000); + INSTANTIATE_TEMPLATE_M(40003c00); + INSTANTIATE_TEMPLATE_M(400f0000); + INSTANTIATE_TEMPLATE_M(400f0400); + INSTANTIATE_TEMPLATE_M(401f2000); + INSTANTIATE_TEMPLATE_M(40400800); + INSTANTIATE_TEMPLATE_M(40400c00); + INSTANTIATE_TEMPLATE_M(40403c00); + INSTANTIATE_TEMPLATE_M(40800000); + INSTANTIATE_TEMPLATE_M(40800c00); + INSTANTIATE_TEMPLATE_M(40802000); + INSTANTIATE_TEMPLATE_M(40802010); + INSTANTIATE_TEMPLATE_M(40803400); + INSTANTIATE_TEMPLATE_M(40803c00); + INSTANTIATE_TEMPLATE_M(40c00000); + INSTANTIATE_TEMPLATE_M(40c00c00); + INSTANTIATE_TEMPLATE_M(40c00c10); + INSTANTIATE_TEMPLATE_M(40c01c00); + INSTANTIATE_TEMPLATE_M(40c02000); + INSTANTIATE_TEMPLATE_M(40c02010); + INSTANTIATE_TEMPLATE_M(40c02c00); + INSTANTIATE_TEMPLATE_M(40c03c00); + INSTANTIATE_TEMPLATE_M(40c80000); + INSTANTIATE_TEMPLATE_M(40c90000); + INSTANTIATE_TEMPLATE_M(40cf0000); + INSTANTIATE_TEMPLATE_M(40d02000); + INSTANTIATE_TEMPLATE_M(40d02010); + INSTANTIATE_TEMPLATE_M(40d80000); + INSTANTIATE_TEMPLATE_M(40d81800); + INSTANTIATE_TEMPLATE_M(bf20c000); + INSTANTIATE_TEMPLATE_MV(00000003, 00000000); + INSTANTIATE_TEMPLATE_MV(00000003, 00000003); + INSTANTIATE_TEMPLATE_MV(0000001f, 0000001f); + INSTANTIATE_TEMPLATE_MV(00000210, 00000000); + INSTANTIATE_TEMPLATE_MV(000003e0, 00000000); + INSTANTIATE_TEMPLATE_MV(000003e0, 000003e0); + INSTANTIATE_TEMPLATE_MV(000003e1, 000003e0); + INSTANTIATE_TEMPLATE_MV(000003e3, 000003e0); + INSTANTIATE_TEMPLATE_MV(000003e3, 000003e3); + INSTANTIATE_TEMPLATE_MV(00000c00, 00000000); + INSTANTIATE_TEMPLATE_MV(00000fc0, 00000000); + INSTANTIATE_TEMPLATE_MV(000013e0, 00001000); + INSTANTIATE_TEMPLATE_MV(00001c00, 00000000); + INSTANTIATE_TEMPLATE_MV(00002400, 00000000); + INSTANTIATE_TEMPLATE_MV(00003000, 00000000); + INSTANTIATE_TEMPLATE_MV(00003000, 00001000); + INSTANTIATE_TEMPLATE_MV(00003000, 00002000); + INSTANTIATE_TEMPLATE_MV(00003000, 00003000); + INSTANTIATE_TEMPLATE_MV(00003010, 00000000); + INSTANTIATE_TEMPLATE_MV(00060000, 00000000); + INSTANTIATE_TEMPLATE_MV(00061000, 00000000); + INSTANTIATE_TEMPLATE_MV(00070000, 00030000); + INSTANTIATE_TEMPLATE_MV(0007309f, 0000001f); + INSTANTIATE_TEMPLATE_MV(00073ee0, 00033060); + INSTANTIATE_TEMPLATE_MV(000f0000, 00000000); + INSTANTIATE_TEMPLATE_MV(000f0010, 00000000); + INSTANTIATE_TEMPLATE_MV(00100200, 00000000); + INSTANTIATE_TEMPLATE_MV(00100210, 00000000); + INSTANTIATE_TEMPLATE_MV(00160000, 00000000); + INSTANTIATE_TEMPLATE_MV(00170000, 00000000); + INSTANTIATE_TEMPLATE_MV(001c0000, 00000000); + INSTANTIATE_TEMPLATE_MV(001d0000, 00000000); + INSTANTIATE_TEMPLATE_MV(001e0000, 00000000); + INSTANTIATE_TEMPLATE_MV(001f0000, 00000000); + INSTANTIATE_TEMPLATE_MV(001f0000, 00010000); + INSTANTIATE_TEMPLATE_MV(001f0000, 00100000); + INSTANTIATE_TEMPLATE_MV(001f0000, 001f0000); + INSTANTIATE_TEMPLATE_MV(001f3000, 00000000); + INSTANTIATE_TEMPLATE_MV(001f3000, 001f0000); + INSTANTIATE_TEMPLATE_MV(001f300f, 0000000d); + INSTANTIATE_TEMPLATE_MV(001f301f, 0000000d); + INSTANTIATE_TEMPLATE_MV(001f33e0, 000103e0); + INSTANTIATE_TEMPLATE_MV(001f3800, 00000000); + INSTANTIATE_TEMPLATE_MV(00401000, 00400000); + INSTANTIATE_TEMPLATE_MV(00403000, 00000000); + INSTANTIATE_TEMPLATE_MV(005f3000, 001f0000); + INSTANTIATE_TEMPLATE_MV(005f3000, 001f1000); + INSTANTIATE_TEMPLATE_MV(00800010, 00000000); + INSTANTIATE_TEMPLATE_MV(00800400, 00000000); + INSTANTIATE_TEMPLATE_MV(00800410, 00000000); + INSTANTIATE_TEMPLATE_MV(00803000, 00002000); + INSTANTIATE_TEMPLATE_MV(00870000, 00000000); + INSTANTIATE_TEMPLATE_MV(009f0000, 00010000); + INSTANTIATE_TEMPLATE_MV(00c00000, 00000000); + INSTANTIATE_TEMPLATE_MV(00c00000, 00400000); + INSTANTIATE_TEMPLATE_MV(00c0001f, 00000000); + INSTANTIATE_TEMPLATE_MV(00c001ff, 00000000); + INSTANTIATE_TEMPLATE_MV(00c00200, 00400000); + INSTANTIATE_TEMPLATE_MV(00c0020f, 00400000); + INSTANTIATE_TEMPLATE_MV(00c003e0, 00000000); + INSTANTIATE_TEMPLATE_MV(00c00800, 00000000); + INSTANTIATE_TEMPLATE_MV(00d80800, 00000000); + INSTANTIATE_TEMPLATE_MV(00df0000, 00000000); + INSTANTIATE_TEMPLATE_MV(00df3800, 001f0800); + INSTANTIATE_TEMPLATE_MV(40002000, 40000000); + INSTANTIATE_TEMPLATE_MV(40003c00, 00000000); + INSTANTIATE_TEMPLATE_MV(40040000, 00000000); + INSTANTIATE_TEMPLATE_MV(40800c00, 40000400); + INSTANTIATE_TEMPLATE_MV(40c00000, 00000000); + INSTANTIATE_TEMPLATE_MV(40c00000, 00400000); + INSTANTIATE_TEMPLATE_MV(40c00000, 40000000); + INSTANTIATE_TEMPLATE_MV(40c00000, 40800000); + INSTANTIATE_TEMPLATE_MV(40df0000, 00000000); + default: { + static bool printed_preamble = false; + if (!printed_preamble) { + printf("One or more missing template instantiations.\n"); + printf( + "Add the following to either GetBitExtractFunction() " + "implementations\n"); + printf("in %s near line %d:\n", __FILE__, __LINE__); + printed_preamble = true; + } + + if (y == 0) { + printf(" INSTANTIATE_TEMPLATE_M(%08x);\n", x); + bit_extract_fn = &Instruction::ExtractBitsAbsent; + } else { + printf(" INSTANTIATE_TEMPLATE_MV(%08x, %08x);\n", y, x); + bit_extract_fn = &Instruction::IsMaskedValueAbsent; + } + } } return bit_extract_fn; } +#undef INSTANTIATE_TEMPLATE_M +#undef INSTANTIATE_TEMPLATE_MV + bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) { // EitherOr optimisation: if there are only one or two patterns in the table, // try to optimise the node to exploit that. @@ -364,21 +405,22 @@ bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) { if ((table_size <= 2) && (GetSampledBitsCount() > 1)) { // TODO: support 'x' in this optimisation by dropping the sampled bit // positions before making the mask/value. - if ((strchr(pattern_table_[0].pattern, 'x') == NULL) && - ((table_size == 1) || - (strcmp(pattern_table_[1].pattern, "otherwise") == 0))) { + if (!PatternContainsSymbol(pattern_table_[0].pattern, + PatternSymbol::kSymbolX) && + (table_size == 1)) { // A pattern table consisting of a fixed pattern with no x's, and an // "otherwise" or absent case. Optimise this into an instruction mask and // value test. uint32_t single_decode_mask = 0; uint32_t single_decode_value = 0; - std::vector<uint8_t> bits = GetSampledBits(); + const std::vector<uint8_t>& bits = GetSampledBits(); // Construct the instruction mask and value from the pattern. - VIXL_ASSERT(bits.size() == strlen(pattern_table_[0].pattern)); + VIXL_ASSERT(bits.size() == GetPatternLength(pattern_table_[0].pattern)); for (size_t i = 0; i < bits.size(); i++) { single_decode_mask |= 1U << bits[i]; - if (pattern_table_[0].pattern[i] == '1') { + if (GetSymbolAt(pattern_table_[0].pattern, i) == + PatternSymbol::kSymbol1) { single_decode_value |= 1U << bits[i]; } } @@ -391,9 +433,7 @@ bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) { // Set DecodeNode for when the instruction after masking doesn't match the // value. - const char* doesnt_match_handler = - (table_size == 1) ? "VisitUnallocated" : pattern_table_[1].handler; - CompileNodeForBits(decoder, doesnt_match_handler, 0); + CompileNodeForBits(decoder, "unallocated", 0); // Set DecodeNode for when it does match. CompileNodeForBits(decoder, pattern_table_[0].handler, 1); @@ -411,21 +451,14 @@ CompiledDecodeNode* DecodeNode::Compile(Decoder* decoder) { CreateVisitorNode(); } else if (!TryCompileOptimisedDecodeTable(decoder)) { // The "otherwise" node is the default next node if no pattern matches. - std::string otherwise = "VisitUnallocated"; + std::string otherwise = "unallocated"; // For each pattern in pattern_table_, create an entry in matches that // has a corresponding mask and value for the pattern. std::vector<MaskValuePair> matches; for (size_t i = 0; i < pattern_table_.size(); i++) { - if (strcmp(pattern_table_[i].pattern, "otherwise") == 0) { - // "otherwise" must be the last pattern in the list, otherwise the - // indices won't match for pattern_table_ and matches. - VIXL_ASSERT(i == pattern_table_.size() - 1); - otherwise = pattern_table_[i].handler; - } else { - matches.push_back(GenerateMaskValuePair( - GenerateOrderedPattern(pattern_table_[i].pattern))); - } + matches.push_back(GenerateMaskValuePair( + GenerateOrderedPattern(pattern_table_[i].pattern))); } BitExtractFn bit_extract_fn = @@ -466,7 +499,7 @@ void CompiledDecodeNode::Decode(const Instruction* instr) const { if (IsLeafNode()) { // If this node is a leaf, call the registered visitor function. VIXL_ASSERT(decoder_ != NULL); - (decoder_->*visitor_fn_)(instr); + decoder_->VisitNamedInstruction(instr, instruction_name_); } else { // Otherwise, using the sampled bit extractor for this node, look up the // next node in the decode tree, and call its Decode method. @@ -478,41 +511,53 @@ void CompiledDecodeNode::Decode(const Instruction* instr) const { } DecodeNode::MaskValuePair DecodeNode::GenerateMaskValuePair( - std::string pattern) const { + uint32_t pattern) const { uint32_t mask = 0, value = 0; - for (size_t i = 0; i < pattern.size(); i++) { - mask |= ((pattern[i] == 'x') ? 0 : 1) << i; - value |= ((pattern[i] == '1') ? 1 : 0) << i; + for (size_t i = 0; i < GetPatternLength(pattern); i++) { + PatternSymbol sym = GetSymbolAt(pattern, i); + mask = (mask << 1) | ((sym == PatternSymbol::kSymbolX) ? 0 : 1); + value = (value << 1) | (static_cast<uint32_t>(sym) & 1); } return std::make_pair(mask, value); } -std::string DecodeNode::GenerateOrderedPattern(std::string pattern) const { - std::vector<uint8_t> sampled_bits = GetSampledBits(); - // Construct a temporary 32-character string containing '_', then at each - // sampled bit position, set the corresponding pattern character. - std::string temp(32, '_'); +uint32_t DecodeNode::GenerateOrderedPattern(uint32_t pattern) const { + const std::vector<uint8_t>& sampled_bits = GetSampledBits(); + uint64_t temp = 0xffffffffffffffff; + + // Place symbols into the field of set bits. Symbols are two bits wide and + // take values 0, 1 or 2, so 3 will represent "no symbol". for (size_t i = 0; i < sampled_bits.size(); i++) { - temp[sampled_bits[i]] = pattern[i]; + int shift = sampled_bits[i] * 2; + temp ^= static_cast<uint64_t>(kEndOfPattern) << shift; + temp |= static_cast<uint64_t>(GetSymbolAt(pattern, i)) << shift; } - // Iterate through the temporary string, filtering out the non-'_' characters - // into a new ordered pattern result string. - std::string result; - for (size_t i = 0; i < temp.size(); i++) { - if (temp[i] != '_') { - result.push_back(temp[i]); + // Iterate over temp and extract new pattern ordered by sample position. + uint32_t result = kEndOfPattern; // End of pattern marker. + + // Iterate over the pattern one symbol (two bits) at a time. + for (int i = 62; i >= 0; i -= 2) { + uint32_t sym = (temp >> i) & kPatternSymbolMask; + + // If this is a valid symbol, shift into the result. + if (sym != kEndOfPattern) { + result = (result << 2) | sym; } } - VIXL_ASSERT(result.size() == sampled_bits.size()); + + // The length of the ordered pattern must be the same as the input pattern, + // and the number of sampled bits. + VIXL_ASSERT(GetPatternLength(result) == GetPatternLength(pattern)); + VIXL_ASSERT(GetPatternLength(result) == sampled_bits.size()); + return result; } uint32_t DecodeNode::GenerateSampledBitsMask() const { - std::vector<uint8_t> sampled_bits = GetSampledBits(); uint32_t mask = 0; - for (size_t i = 0; i < sampled_bits.size(); i++) { - mask |= 1 << sampled_bits[i]; + for (int bit : GetSampledBits()) { + mask |= 1 << bit; } return mask; } diff --git a/src/aarch64/decoder-aarch64.h b/src/aarch64/decoder-aarch64.h index 38540195..22c66e82 100644 --- a/src/aarch64/decoder-aarch64.h +++ b/src/aarch64/decoder-aarch64.h @@ -35,9 +35,7 @@ #include "instructions-aarch64.h" - // List macro containing all visitors needed by the decoder class. - #define VISITOR_LIST_THAT_RETURN(V) \ V(AddSubExtended) \ V(AddSubImmediate) \ @@ -231,8 +229,8 @@ V(SVEIntMulImm_Unpredicated) \ V(SVEIntMulVectors_Predicated) \ V(SVELoadAndBroadcastElement) \ - V(SVELoadAndBroadcastQuadword_ScalarPlusImm) \ - V(SVELoadAndBroadcastQuadword_ScalarPlusScalar) \ + V(SVELoadAndBroadcastQOWord_ScalarPlusImm) \ + V(SVELoadAndBroadcastQOWord_ScalarPlusScalar) \ V(SVELoadMultipleStructures_ScalarPlusImm) \ V(SVELoadMultipleStructures_ScalarPlusScalar) \ V(SVELoadPredicateRegister) \ @@ -259,7 +257,7 @@ V(SVETableLookup) \ V(SVEUnpackPredicateElements) \ V(SVEUnpackVectorElements) \ - V(SVEVectorSplice_Destructive) \ + V(SVEVectorSplice) \ V(System) \ V(TestBranch) \ V(Unallocated) \ @@ -276,14 +274,12 @@ namespace vixl { namespace aarch64 { -// The Visitor interface. Disassembler and simulator (and other tools) -// must provide implementations for all of these functions. -// -// Note that this class must change in breaking ways with even minor additions -// to VIXL, and so its API should be considered unstable. User classes that -// inherit from this one should be expected to break even on minor version -// updates. If this is a problem, consider using DecoderVisitorWithDefaults -// instead. +using Metadata = std::map<std::string, std::string>; + +// The Visitor interface consists only of the Visit() method. User classes +// that inherit from this one must provide an implementation of the method. +// Information about the instruction encountered by the Decoder is available +// via the metadata pointer. class DecoderVisitor { public: enum VisitorConstness { kConstVisitor, kNonConstVisitor }; @@ -292,9 +288,7 @@ class DecoderVisitor { virtual ~DecoderVisitor() {} -#define DECLARE(A) virtual void Visit##A(const Instruction* instr) = 0; - VISITOR_LIST(DECLARE) -#undef DECLARE + virtual void Visit(Metadata* metadata, const Instruction* instr) = 0; bool IsConstVisitor() const { return constness_ == kConstVisitor; } Instruction* MutableInstruction(const Instruction* instr) { @@ -306,25 +300,6 @@ class DecoderVisitor { const VisitorConstness constness_; }; -// As above, but a default (no-op) implementation for each visitor is provided. -// This is useful for derived class that only care about specific visitors. -// -// A minor version update may add a visitor, but will never remove one, so it is -// safe (and recommended) to use `override` in derived classes. -class DecoderVisitorWithDefaults : public DecoderVisitor { - public: - explicit DecoderVisitorWithDefaults( - VisitorConstness constness = kConstVisitor) - : DecoderVisitor(constness) {} - - virtual ~DecoderVisitorWithDefaults() {} - -#define DECLARE(A) \ - virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE { USE(instr); } - VISITOR_LIST(DECLARE) -#undef DECLARE -}; - class DecodeNode; class CompiledDecodeNode; @@ -389,9 +364,7 @@ class Decoder { // of visitors stored by the decoder. void RemoveVisitor(DecoderVisitor* visitor); -#define DECLARE(A) void Visit##A(const Instruction* instr); - VISITOR_LIST(DECLARE) -#undef DECLARE + void VisitNamedInstruction(const Instruction* instr, const std::string& name); std::list<DecoderVisitor*>* visitors() { return &visitors_; } @@ -421,8 +394,6 @@ class Decoder { std::map<std::string, DecodeNode> decode_nodes_; }; -const int kMaxDecodeSampledBits = 16; -const int kMaxDecodeMappings = 100; typedef void (Decoder::*DecodeFnPtr)(const Instruction*); typedef uint32_t (Instruction::*BitExtractFn)(void) const; @@ -436,10 +407,14 @@ struct VisitorNode { // compilation stage. After compilation, the decoder is embodied in the graph // of CompiledDecodeNodes pointer to by compiled_decoder_root_. -// A DecodePattern maps a pattern of set/unset/don't care (1, 0, x) bits as a -// string to the name of its handler. +// A DecodePattern maps a pattern of set/unset/don't care (1, 0, x) bits encoded +// as uint32_t to its handler. +// The encoding uses two bits per symbol: 0 => 0b00, 1 => 0b01, x => 0b10. +// 0b11 marks the edge of the most-significant bits of the pattern, which is +// required to determine the length. For example, the pattern "1x01"_b is +// encoded in a uint32_t as 0b11_01_10_00_01. struct DecodePattern { - const char* pattern; + uint32_t pattern; const char* handler; }; @@ -448,8 +423,8 @@ struct DecodePattern { // sampled bits match to the corresponding name of a node. struct DecodeMapping { const char* name; - const uint8_t sampled_bits[kMaxDecodeSampledBits]; - const DecodePattern mapping[kMaxDecodeMappings]; + const std::vector<uint8_t> sampled_bits; + const std::vector<DecodePattern> mapping; }; // For speed, before nodes can be used for decoding instructions, they must @@ -463,7 +438,7 @@ class CompiledDecodeNode { // function that extracts the bits to be sampled. CompiledDecodeNode(BitExtractFn bit_extract_fn, size_t decode_table_size) : bit_extract_fn_(bit_extract_fn), - visitor_fn_(NULL), + instruction_name_("node"), decode_table_size_(decode_table_size), decoder_(NULL) { decode_table_ = new CompiledDecodeNode*[decode_table_size_]; @@ -472,9 +447,9 @@ class CompiledDecodeNode { // Constructor for wrappers around visitor functions. These require no // decoding, so no bit extraction function or decode table is assigned. - explicit CompiledDecodeNode(DecodeFnPtr visitor_fn, Decoder* decoder) + explicit CompiledDecodeNode(std::string iname, Decoder* decoder) : bit_extract_fn_(NULL), - visitor_fn_(visitor_fn), + instruction_name_(iname), decode_table_(NULL), decode_table_size_(0), decoder_(decoder) {} @@ -494,9 +469,9 @@ class CompiledDecodeNode { // A leaf node is a wrapper for a visitor function. bool IsLeafNode() const { - VIXL_ASSERT(((visitor_fn_ == NULL) && (bit_extract_fn_ != NULL)) || - ((visitor_fn_ != NULL) && (bit_extract_fn_ == NULL))); - return visitor_fn_ != NULL; + VIXL_ASSERT(((instruction_name_ == "node") && (bit_extract_fn_ != NULL)) || + ((instruction_name_ != "node") && (bit_extract_fn_ == NULL))); + return instruction_name_ != "node"; } // Get a pointer to the next node required in the decode process, based on the @@ -521,7 +496,7 @@ class CompiledDecodeNode { // Visitor function that handles the instruction identified. Set only for // leaf nodes, where no extra decoding is required, otherwise NULL. - const DecodeFnPtr visitor_fn_; + std::string instruction_name_; // Mapping table from instruction bits to next decode stage. CompiledDecodeNode** decode_table_; @@ -535,30 +510,35 @@ class CompiledDecodeNode { class DecodeNode { public: // Default constructor needed for map initialisation. - DecodeNode() : compiled_node_(NULL) {} + DecodeNode() + : sampled_bits_(DecodeNode::kEmptySampledBits), + pattern_table_(DecodeNode::kEmptyPatternTable), + compiled_node_(NULL) {} // Constructor for DecodeNode wrappers around visitor functions. These are // marked as "compiled", as there is no decoding left to do. - explicit DecodeNode(const VisitorNode& visitor, Decoder* decoder) - : name_(visitor.name), - visitor_fn_(visitor.visitor_fn), + explicit DecodeNode(const std::string& iname, Decoder* decoder) + : name_(iname), + sampled_bits_(DecodeNode::kEmptySampledBits), + instruction_name_(iname), + pattern_table_(DecodeNode::kEmptyPatternTable), decoder_(decoder), compiled_node_(NULL) {} // Constructor for DecodeNodes that map bit patterns to other DecodeNodes. explicit DecodeNode(const DecodeMapping& map, Decoder* decoder = NULL) : name_(map.name), - visitor_fn_(NULL), + sampled_bits_(map.sampled_bits), + instruction_name_("node"), + pattern_table_(map.mapping), decoder_(decoder), compiled_node_(NULL) { - // The length of the bit string in the first mapping determines the number - // of sampled bits. When adding patterns later, we assert that all mappings - // sample the same number of bits. - VIXL_CHECK(strcmp(map.mapping[0].pattern, "otherwise") != 0); - int bit_count = static_cast<int>(strlen(map.mapping[0].pattern)); - VIXL_CHECK((bit_count > 0) && (bit_count <= 32)); - SetSampledBits(map.sampled_bits, bit_count); - AddPatterns(map.mapping); + // With the current two bits per symbol encoding scheme, the maximum pattern + // length is (32 - 2) / 2 = 15 bits. + VIXL_CHECK(GetPatternLength(map.mapping[0].pattern) <= 15); + for (const DecodePattern& p : map.mapping) { + VIXL_CHECK(GetPatternLength(p.pattern) == map.sampled_bits.size()); + } } ~DecodeNode() { @@ -568,21 +548,15 @@ class DecodeNode { } } - // Set the bits sampled from the instruction by this node. - void SetSampledBits(const uint8_t* bits, int bit_count); - // Get the bits sampled from the instruction by this node. - std::vector<uint8_t> GetSampledBits() const; + const std::vector<uint8_t>& GetSampledBits() const { return sampled_bits_; } // Get the number of bits sampled from the instruction by this node. - size_t GetSampledBitsCount() const; - - // Add patterns to this node's internal pattern table. - void AddPatterns(const DecodePattern* patterns); + size_t GetSampledBitsCount() const { return sampled_bits_.size(); } // A leaf node is a DecodeNode that wraps the visitor function for the // identified instruction class. - bool IsLeafNode() const { return visitor_fn_ != NULL; } + bool IsLeafNode() const { return instruction_name_ != "node"; } std::string GetName() const { return name_; } @@ -597,7 +571,7 @@ class DecodeNode { // Create a CompiledDecodeNode wrapping a visitor function. No decoding is // required for this node; the visitor function is called instead. void CreateVisitorNode() { - compiled_node_ = new CompiledDecodeNode(visitor_fn_, decoder_); + compiled_node_ = new CompiledDecodeNode(instruction_name_, decoder_); } // Find and compile the DecodeNode named "name", and set it as the node for @@ -609,12 +583,16 @@ class DecodeNode { // contiguous sequence, suitable for indexing an array. // For example, a mask of 0b1010 returns a function that, given an instruction // 0bXYZW, will return 0bXZ. - BitExtractFn GetBitExtractFunction(uint32_t mask); + BitExtractFn GetBitExtractFunction(uint32_t mask) { + return GetBitExtractFunctionHelper(mask, 0); + } // Get a pointer to an Instruction method that applies a mask to the // instruction bits, and tests if the result is equal to value. The returned // function gives a 1 result if (inst & mask == value), 0 otherwise. - BitExtractFn GetBitExtractFunction(uint32_t mask, uint32_t value); + BitExtractFn GetBitExtractFunction(uint32_t mask, uint32_t value) { + return GetBitExtractFunctionHelper(value, mask); + } // Compile this DecodeNode into a new CompiledDecodeNode and returns a pointer // to it. This pointer is also stored inside the DecodeNode itself. Destroying @@ -626,22 +604,53 @@ class DecodeNode { CompiledDecodeNode* GetCompiledNode() const { return compiled_node_; } bool IsCompiled() const { return GetCompiledNode() != NULL; } + enum class PatternSymbol { kSymbol0 = 0, kSymbol1 = 1, kSymbolX = 2 }; + static const uint32_t kEndOfPattern = 3; + static const uint32_t kPatternSymbolMask = 3; + + size_t GetPatternLength(uint32_t pattern) const { + uint32_t hsb = HighestSetBitPosition(pattern); + // The pattern length is signified by two set bits in a two bit-aligned + // position. Ensure that the pattern has a highest set bit, it's at an odd + // bit position, and that the bit to the right of the hsb is also set. + VIXL_ASSERT(((hsb % 2) == 1) && (pattern >> (hsb - 1)) == kEndOfPattern); + return hsb / 2; + } + + bool PatternContainsSymbol(uint32_t pattern, PatternSymbol symbol) const { + while ((pattern & kPatternSymbolMask) != kEndOfPattern) { + if (static_cast<PatternSymbol>(pattern & kPatternSymbolMask) == symbol) + return true; + pattern >>= 2; + } + return false; + } + + PatternSymbol GetSymbolAt(uint32_t pattern, size_t pos) const { + size_t len = GetPatternLength(pattern); + VIXL_ASSERT((pos < 15) && (pos < len)); + uint32_t shift = static_cast<uint32_t>(2 * (len - pos - 1)); + uint32_t sym = (pattern >> shift) & kPatternSymbolMask; + return static_cast<PatternSymbol>(sym); + } + private: - // Generate a mask and value pair from a string constructed from 0, 1 and x - // (don't care) characters. - // For example "10x1" should return mask = 0b1101, value = 0b1001. + // Generate a mask and value pair from a pattern constructed from 0, 1 and x + // (don't care) 2-bit symbols. + // For example "10x1"_b should return mask = 0b1101, value = 0b1001. typedef std::pair<Instr, Instr> MaskValuePair; - MaskValuePair GenerateMaskValuePair(std::string pattern) const; - - // Generate a pattern string ordered by the bit positions sampled by this - // node. The first character in the string corresponds to the lowest sampled - // bit. - // For example, a pattern of "1x0" expected when sampling bits 31, 1 and 30 - // returns the pattern "x01"; bit 1 should be 'x', bit 30 '0' and bit 31 '1'. + MaskValuePair GenerateMaskValuePair(uint32_t pattern) const; + + // Generate a pattern ordered by the bit positions sampled by this node. + // The symbol corresponding to the lowest sample position is placed in the + // least-significant bits of the result pattern. + // For example, a pattern of "1x0"_b expected when sampling bits 31, 1 and 30 + // returns the pattern "x01"_b; bit 1 should be 'x', bit 30 '0' and bit 31 + // '1'. // This output makes comparisons easier between the pattern and bits sampled // from an instruction using the fast "compress" algorithm. See // Instruction::Compress(). - std::string GenerateOrderedPattern(std::string pattern) const; + uint32_t GenerateOrderedPattern(uint32_t pattern) const; // Generate a mask with a bit set at each sample position. uint32_t GenerateSampledBitsMask() const; @@ -650,20 +659,26 @@ class DecodeNode { // true if successful. bool TryCompileOptimisedDecodeTable(Decoder* decoder); + // Helper function that returns a bit extracting function. If y is zero, + // x is a bit extraction mask. Otherwise, y is the mask, and x is the value + // to match after masking. + BitExtractFn GetBitExtractFunctionHelper(uint32_t x, uint32_t y); + // Name of this decoder node, used to construct edges in the decode graph. std::string name_; // Vector of bits sampled from an instruction to determine which node to look // up next in the decode process. - std::vector<uint8_t> sampled_bits_; + const std::vector<uint8_t>& sampled_bits_; + static const std::vector<uint8_t> kEmptySampledBits; - // Visitor function that handles the instruction identified. Set only for leaf - // nodes, where no extra decoding is required. For non-leaf decoding nodes, - // this pointer is NULL. - DecodeFnPtr visitor_fn_; + // For leaf nodes, this is the name of the instruction form that the node + // represents. For other nodes, this is always set to "node". + std::string instruction_name_; // Source mapping from bit pattern to name of next decode stage. - std::vector<DecodePattern> pattern_table_; + const std::vector<DecodePattern>& pattern_table_; + static const std::vector<DecodePattern> kEmptyPatternTable; // Pointer to the decoder containing this node, used to call its visitor // function for leaf nodes. diff --git a/src/aarch64/decoder-constants-aarch64.h b/src/aarch64/decoder-constants-aarch64.h index 53f283bb..ddfdff6e 100644 --- a/src/aarch64/decoder-constants-aarch64.h +++ b/src/aarch64/decoder-constants-aarch64.h @@ -27,2112 +27,8816 @@ namespace vixl { namespace aarch64 { +// Recursively construct a uint32_t encoded bit pattern from a string literal. +// The string characters are mapped as two-bit symbols '0'=>0, '1'=>1, 'x'=>2. +// The remaining symbol, 3, is used to mark the end of the pattern, allowing +// its length to be found. For example, the pattern "1x01"_b is encoded in a +// uint32_t as 0b11_01_00_01. The maximum pattern string length is 15 +// characters, encoded as 3 in the most significant bits, followed by 15 2-bit +// symbols. +constexpr uint32_t str_to_two_bit_pattern(const char* x, size_t s, uint32_t a) { + if (s == 0) return a; + uint32_t r = (x[0] == 'x') ? 2 : (x[0] - '0'); + return str_to_two_bit_pattern(x + 1, s - 1, (a << 2) | r); +} + +constexpr uint32_t operator"" _b(const char* x, size_t s) { + return str_to_two_bit_pattern(x, s, DecodeNode::kEndOfPattern); +} + // This decode table is derived from the AArch64 ISA XML specification, // available from https://developer.arm.com/products/architecture/a-profile/ -// -// The data below are based on the "Index by Encoding" tables, reformatted into -// structures of C++ strings, suitable for processing into an instruction -// decoding tree. // clang-format off static const DecodeMapping kDecodeMapping[] = { - { "Root", - {28, 27, 26, 25}, - { {"0000", "DecodeReserved"}, - {"0010", "DecodeSVE"}, - {"100x", "DecodeDataProcessingImmediate"}, - {"101x", "DecodeBranchesExceptionAndSystem"}, - {"x1x0", "DecodeLoadsAndStores"}, - {"x101", "DecodeDataProcessingRegister"}, - {"x111", "DecodeDataProcessingFPAndNEON"}, - }, - }, - - { "DecodeReserved", - {31, 30, 29, 24, 23, 22, 21, 20, 19, 18, 17, 16}, - { {"000000000000", "VisitReserved"}, - {"otherwise", "VisitUnallocated"}, - }, - }, - - { "DecodeDataProcessingImmediate", - {25, 24, 23}, - { {"00x", "VisitPCRelAddressing"}, - {"01x", "UnallocAddSubImmediate"}, - {"100", "UnallocLogicalImmediate"}, - {"101", "UnallocMoveWideImmediate"}, - {"110", "UnallocBitfield"}, - {"111", "UnallocExtract"}, - }, - }, - - { "DecodeBranchesExceptionAndSystem", - {31, 30, 29, 25, 24, 23, 22}, - { {"0100xxx", "UnallocConditionalBranch"}, - {"11000xx", "UnallocException"}, - {"1100100", "UnallocSystem"}, - {"1101xxx", "UnallocUnconditionalBranchToRegister"}, - {"x00xxxx", "VisitUnconditionalBranch"}, - {"x010xxx", "VisitCompareBranch"}, - {"x011xxx", "VisitTestBranch"}, - }, - }, - - { "DecodeLoadsAndStores", - {31, 29, 28, 26, 24, 23, 21}, - { {"x0000xx", "UnallocLoadStoreExclusive"}, - {"x01x0xx", "UnallocLoadLiteral"}, - {"x0101x0", "UnallocLoadStoreRCpcUnscaledOffset"}, - {"x10x00x", "UnallocLoadStorePairNonTemporal"}, - {"x10x01x", "UnallocLoadStorePairPostIndex"}, - {"x10x10x", "UnallocLoadStorePairOffset"}, - {"x10x11x", "UnallocLoadStorePairPreIndex"}, - {"0001000", "DecodeNEONLoadStoreMulti"}, - {"0001010", "UnallocNEONLoadStoreMultiStructPostIndex"}, - {"000110x", "DecodeNEONLoadStoreSingle"}, - {"000111x", "UnallocNEONLoadStoreSingleStructPostIndex"}, - {"x11x0x0", "DecodeLoadStore"}, - {"x11x0x1", "DecodeLoadStoreRegister"}, - {"x11x1xx", "UnallocLoadStoreUnsignedOffset"}, - }, - }, - - { "DecodeDataProcessingRegister", - {30, 28, 24, 23, 22, 21}, - { {"010110", "UnallocDataProcessing2Source"}, - {"110110", "UnallocDataProcessing1Source"}, - {"x00xxx", "UnallocLogicalShifted"}, - {"x01xx0", "UnallocAddSubShifted"}, - {"x01xx1", "UnallocAddSubExtended"}, - {"x10000", "UnallocAddSubWithCarry"}, - {"x10010", "DecodeCondCmp"}, - {"x10100", "UnallocConditionalSelect"}, - {"x11xxx", "UnallocDataProcessing3Source"}, - }, - }, - - { "DecodeDataProcessingFPAndNEON", - {31, 30, 29, 28, 24, 21}, - { {"0xx000", "DecodeNEONOther"}, - {"0xx001", "DecodeNEON3Op"}, - {"0xx01x", "DecodeNEONImmAndIndex"}, - {"01x100", "DecodeNEONScalarAnd3SHA"}, - {"01x101", "DecodeNEONScalarAnd2SHA"}, - {"01x11x", "DecodeNEONScalar"}, - {"x0x100", "UnallocFPFixedPointConvert"}, - {"x0x101", "DecodeFP"}, - {"x0x11x", "UnallocFPDataProcessing3Source"}, - }, - }, - - { "DecodeSVE", - {31, 30, 29, 24, 21, 15, 14, 13}, - { {"00000x1x", "VisitSVEIntMulAddPredicated"}, - {"00000000", "DecodeSVE00000000"}, - {"00000001", "DecodeSVE00000001"}, - {"00000100", "DecodeSVE00000100"}, - {"00000101", "VisitSVEIntUnaryArithmeticPredicated"}, - {"00001000", "VisitSVEIntArithmeticUnpredicated"}, - {"00001001", "VisitSVEBitwiseLogicalUnpredicated"}, - {"00001010", "DecodeSVE00001010"}, - {"00001100", "VisitSVEBitwiseShiftUnpredicated"}, - {"00001101", "DecodeSVE00001101"}, - {"00001110", "DecodeSVE00001110"}, - {"00001111", "DecodeSVE00001111"}, - {"000100xx", "DecodeSVE000100xx"}, - {"0001010x", "DecodeSVE0001010x"}, - {"00010110", "DecodeSVE00010110"}, - {"00010111", "DecodeSVE00010111"}, - {"00011000", "VisitSVEPermuteVectorExtract"}, - {"00011001", "DecodeSVE00011001"}, - {"00011010", "DecodeSVE00011010"}, - {"00011011", "VisitSVEPermuteVectorInterleaving"}, - {"00011100", "DecodeSVE00011100"}, - {"00011101", "DecodeSVE00011101"}, - {"0001111x", "VisitSVEVectorSelect"}, - {"00100xxx", "VisitSVEIntCompareVectors"}, - {"00101xxx", "VisitSVEIntCompareUnsignedImm"}, - {"00110x0x", "VisitSVEIntCompareSignedImm"}, - {"0011001x", "DecodeSVE0011001x"}, - {"00110110", "DecodeSVE00110110"}, - {"00110111", "DecodeSVE00110111"}, - {"00111000", "VisitSVEIntCompareScalarCountAndLimit"}, - {"00111001", "UnallocSVEConditionallyTerminateScalars"}, - {"00111100", "DecodeSVE00111100"}, - {"00111101", "UnallocSVEPredicateCount"}, - {"0011111x", "DecodeSVE0011111x"}, - {"010000xx", "VisitSVEIntMulAddUnpredicated"}, - {"01001xxx", "VisitSVEMulIndex"}, - {"011000xx", "VisitSVEFPComplexMulAdd"}, - {"01100100", "UnallocSVEFPComplexAddition"}, - {"01101000", "DecodeSVE01101000"}, - {"01101001", "UnallocSVEFPMulIndex"}, - {"01110x1x", "VisitSVEFPCompareVectors"}, - {"01110000", "VisitSVEFPArithmeticUnpredicated"}, - {"01110001", "DecodeSVE01110001"}, - {"01110100", "DecodeSVE01110100"}, - {"01110101", "DecodeSVE01110101"}, - {"01111xxx", "VisitSVEFPMulAdd"}, - {"100x010x", "UnallocSVELoadAndBroadcastElement"}, - {"100x0110", "DecodeSVE100x0110"}, - {"100x0111", "DecodeSVE100x0111"}, - {"100x11xx", "DecodeSVE100x11xx"}, - {"100000xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"}, - {"100010xx", "DecodeSVE100010xx"}, - {"100100x1", "DecodeSVE100100x1"}, - {"10010000", "DecodeSVE10010000"}, - {"10010010", "DecodeSVE10010010"}, - {"100110x1", "DecodeSVE100110x1"}, - {"10011000", "DecodeSVE10011000"}, - {"10011010", "DecodeSVE10011010"}, - {"101xx000", "VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar"}, - {"101xx001", "UnallocSVELoadAndBroadcastQuadword_ScalarPlusImm"}, - {"101xx010", "VisitSVEContiguousLoad_ScalarPlusScalar"}, - {"101xx011", "VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar"}, - {"101xx101", "DecodeSVE101xx101"}, - {"101x0110", "DecodeSVE101x0110"}, - {"101x0111", "DecodeSVE101x0111"}, - {"101x1110", "VisitSVELoadMultipleStructures_ScalarPlusScalar"}, - {"101x1111", "DecodeSVE101x1111"}, - {"110x00xx", "VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets"}, - {"110x0111", "DecodeSVE110x0111"}, - {"1100010x", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, - {"11000110", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, - {"110010xx", "DecodeSVE110010xx"}, - {"110011xx", "DecodeSVE110011xx"}, - {"1101010x", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, - {"11010110", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, - {"110110xx", "VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets"}, - {"110111xx", "DecodeSVE110111xx"}, - {"111x0011", "DecodeSVE111x0011"}, - {"111x01x0", "DecodeSVE111x01x0"}, - {"111x0101", "DecodeSVE111x0101"}, - {"111x0111", "DecodeSVE111x0111"}, - {"111x1011", "VisitSVEStoreMultipleStructures_ScalarPlusScalar"}, - {"111x11x0", "DecodeSVE111x11x0"}, - {"111x1101", "DecodeSVE111x1101"}, - {"111x1111", "DecodeSVE111x1111"}, - {"1110x010", "VisitSVEContiguousStore_ScalarPlusScalar"}, - {"1111x000", "UnallocSVEStorePredicateRegister"}, - {"1111x010", "DecodeSVE1111x010"}, - }, - }, - - { "DecodeSVE00000000", - {20, 19, 18}, - { {"00x", "VisitSVEIntAddSubtractVectors_Predicated"}, - {"01x", "VisitSVEIntMinMaxDifference_Predicated"}, - {"100", "VisitSVEIntMulVectors_Predicated"}, - {"101", "VisitSVEIntDivideVectors_Predicated"}, - {"11x", "VisitSVEBitwiseLogical_Predicated"}, + { "_gggyqx", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "fcvtnu_asimdmiscfp16_r"}, + {"0x00001"_b, "fcvtnu_asimdmisc_r"}, + {"1111001"_b, "fcvtpu_asimdmiscfp16_r"}, + {"1x00001"_b, "fcvtpu_asimdmisc_r"}, + {"xx10000"_b, "umaxv_asimdall_only"}, + {"xx10001"_b, "uminv_asimdall_only"}, + }, + }, + + { "_ggvztl", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_qpzynz"}, + }, + }, + + { "_ghmzhr", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000000"_b, "rbit_32_dp_1src"}, + {"0000001"_b, "clz_32_dp_1src"}, + }, + }, + + { "_ghnljt", + {23, 22, 20, 19, 18, 17, 16}, + { {"0000000"_b, "fcvtns_64s_float2int"}, + {"0000001"_b, "fcvtnu_64s_float2int"}, + {"0000010"_b, "scvtf_s64_float2int"}, + {"0000011"_b, "ucvtf_s64_float2int"}, + {"0000100"_b, "fcvtas_64s_float2int"}, + {"0000101"_b, "fcvtau_64s_float2int"}, + {"0001000"_b, "fcvtps_64s_float2int"}, + {"0001001"_b, "fcvtpu_64s_float2int"}, + {"0010000"_b, "fcvtms_64s_float2int"}, + {"0010001"_b, "fcvtmu_64s_float2int"}, + {"0011000"_b, "fcvtzs_64s_float2int"}, + {"0011001"_b, "fcvtzu_64s_float2int"}, + {"0100000"_b, "fcvtns_64d_float2int"}, + {"0100001"_b, "fcvtnu_64d_float2int"}, + {"0100010"_b, "scvtf_d64_float2int"}, + {"0100011"_b, "ucvtf_d64_float2int"}, + {"0100100"_b, "fcvtas_64d_float2int"}, + {"0100101"_b, "fcvtau_64d_float2int"}, + {"0100110"_b, "fmov_64d_float2int"}, + {"0100111"_b, "fmov_d64_float2int"}, + {"0101000"_b, "fcvtps_64d_float2int"}, + {"0101001"_b, "fcvtpu_64d_float2int"}, + {"0110000"_b, "fcvtms_64d_float2int"}, + {"0110001"_b, "fcvtmu_64d_float2int"}, + {"0111000"_b, "fcvtzs_64d_float2int"}, + {"0111001"_b, "fcvtzu_64d_float2int"}, + {"1001110"_b, "fmov_64vx_float2int"}, + {"1001111"_b, "fmov_v64i_float2int"}, + {"1100000"_b, "fcvtns_64h_float2int"}, + {"1100001"_b, "fcvtnu_64h_float2int"}, + {"1100010"_b, "scvtf_h64_float2int"}, + {"1100011"_b, "ucvtf_h64_float2int"}, + {"1100100"_b, "fcvtas_64h_float2int"}, + {"1100101"_b, "fcvtau_64h_float2int"}, + {"1100110"_b, "fmov_64h_float2int"}, + {"1100111"_b, "fmov_h64_float2int"}, + {"1101000"_b, "fcvtps_64h_float2int"}, + {"1101001"_b, "fcvtpu_64h_float2int"}, + {"1110000"_b, "fcvtms_64h_float2int"}, + {"1110001"_b, "fcvtmu_64h_float2int"}, + {"1111000"_b, "fcvtzs_64h_float2int"}, + {"1111001"_b, "fcvtzu_64h_float2int"}, + }, + }, + + { "_gjprmg", + {11}, + { {"0"_b, "_llpsqq"}, }, }, - { "DecodeSVE00000100", - {20, 19}, - { {"0x", "VisitSVEBitwiseShiftByImm_Predicated"}, - {"10", "VisitSVEBitwiseShiftByVector_Predicated"}, - {"11", "VisitSVEBitwiseShiftByWideElements_Predicated"}, + { "_gjsnly", + {16, 13, 12}, + { {"000"_b, "rev16_64_dp_1src"}, + {"001"_b, "cls_64_dp_1src"}, + {"100"_b, "pacib_64p_dp_1src"}, + {"101"_b, "autib_64p_dp_1src"}, + {"110"_b, "_ksvxxm"}, + {"111"_b, "_xsgxyy"}, + }, + }, + + { "_gjylrt", + {20, 19, 18, 17, 16}, + { {"00000"_b, "fcvtns_32h_float2int"}, + {"00001"_b, "fcvtnu_32h_float2int"}, + {"00010"_b, "scvtf_h32_float2int"}, + {"00011"_b, "ucvtf_h32_float2int"}, + {"00100"_b, "fcvtas_32h_float2int"}, + {"00101"_b, "fcvtau_32h_float2int"}, + {"00110"_b, "fmov_32h_float2int"}, + {"00111"_b, "fmov_h32_float2int"}, + {"01000"_b, "fcvtps_32h_float2int"}, + {"01001"_b, "fcvtpu_32h_float2int"}, + {"10000"_b, "fcvtms_32h_float2int"}, + {"10001"_b, "fcvtmu_32h_float2int"}, + {"11000"_b, "fcvtzs_32h_float2int"}, + {"11001"_b, "fcvtzu_32h_float2int"}, + }, + }, + + { "_gkhhjm", + {30, 23, 22}, + { {"000"_b, "sbfm_32m_bitfield"}, + {"100"_b, "ubfm_32m_bitfield"}, + }, + }, + + { "_gkkpjz", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "fcvtmu_asisdmiscfp16_r"}, + {"0x00001"_b, "fcvtmu_asisdmisc_r"}, + {"1111001"_b, "fcvtzu_asisdmiscfp16_r"}, + {"1x00001"_b, "fcvtzu_asisdmisc_r"}, + {"xx00000"_b, "neg_asisdmisc_r"}, + }, + }, + + { "_gkpvxz", + {10}, + { {"0"_b, "blraa_64p_branch_reg"}, + {"1"_b, "blrab_64p_branch_reg"}, + }, + }, + + { "_gkpzhr", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xxxx"_b, "fnmsub_s_floatdp3"}, + {"001xxxx"_b, "fnmsub_d_floatdp3"}, + {"011xxxx"_b, "fnmsub_h_floatdp3"}, + {"10001x0"_b, "fmul_asisdelem_rh_h"}, + {"10x0101"_b, "sqshrn_asisdshf_n"}, + {"10x0111"_b, "sqrshrn_asisdshf_n"}, + {"11x01x0"_b, "fmul_asisdelem_r_sd"}, + {"1xx11x0"_b, "sqdmull_asisdelem_l"}, + }, + }, + + { "_gkxgsn", + {30, 23, 22, 11, 10}, + { {"00000"_b, "stlur_32_ldapstl_unscaled"}, + {"00100"_b, "ldapur_32_ldapstl_unscaled"}, + {"01000"_b, "ldapursw_64_ldapstl_unscaled"}, + {"10000"_b, "stlur_64_ldapstl_unscaled"}, + {"10100"_b, "ldapur_64_ldapstl_unscaled"}, + }, + }, + + { "_glgrjy", + {23, 22, 20, 19, 18, 17, 16}, + { {"0000000"_b, "not_asimdmisc_r"}, + {"0100000"_b, "rbit_asimdmisc_r"}, + }, + }, + + { "_glhxyj", + {17}, + { {"0"_b, "ld3_asisdlsop_bx3_r3b"}, + {"1"_b, "ld3_asisdlsop_b3_i3b"}, + }, + }, + + { "_glkzlv", + {20, 19, 18, 17, 16}, + { {"00000"_b, "rev16_asimdmisc_r"}, + }, + }, + + { "_gmjhll", + {17}, + { {"0"_b, "st1_asisdlsep_r4_r4"}, + {"1"_b, "st1_asisdlsep_i4_i4"}, + }, + }, + + { "_gmrxlp", + {30}, + { {"0"_b, "orr_32_log_shift"}, + {"1"_b, "ands_32_log_shift"}, }, }, - { "DecodeSVE00001010", - {23, 12, 11}, - { {"x0x", "VisitSVEIndexGeneration"}, - {"010", "VisitSVEStackFrameAdjustment"}, - {"110", "UnallocSVEStackFrameSize"}, + { "_gmrxqq", + {30, 23, 22}, + { {"000"_b, "stp_q_ldstpair_off"}, + {"001"_b, "ldp_q_ldstpair_off"}, + {"010"_b, "stp_q_ldstpair_pre"}, + {"011"_b, "ldp_q_ldstpair_pre"}, }, }, - { "UnallocSVEStackFrameSize", + { "_gmsgqz", + {30, 23, 22}, + { {"100"_b, "eor3_vvv16_crypto4"}, + {"101"_b, "sm3ss1_vvv4_crypto4"}, + {"110"_b, "xar_vvv2_crypto3_imm6"}, + }, + }, + + { "_gmvjgn", + {23}, + { {"0"_b, "fmax_asimdsame_only"}, + {"1"_b, "fmin_asimdsame_only"}, + }, + }, + + { "_gmvrxn", + {18, 17, 12}, + { {"000"_b, "st4_asisdlso_d4_4d"}, + }, + }, + + { "_gmvtss", + {30}, + { {"0"_b, "ldr_q_loadlit"}, + }, + }, + + { "_gngjxr", + {20, 19, 18, 17, 16}, + { {"00000"_b, "cadd_z_zz"}, + {"00001"_b, "sqcadd_z_zz"}, + }, + }, + + { "_gnqhsl", + {23, 22, 20, 19, 18, 17, 16}, + { {"0010000"_b, "punpklo_p_p"}, + {"0010001"_b, "punpkhi_p_p"}, + {"xx0xxxx"_b, "zip1_p_pp"}, + {"xx10100"_b, "rev_p_p"}, + }, + }, + + { "_gnqjhz", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000000"_b, "rev16_32_dp_1src"}, + {"0000001"_b, "cls_32_dp_1src"}, + }, + }, + + { "_gntpyh", + {23, 13, 12, 11, 10}, + { {"00010"_b, "_gqspys"}, + {"00110"_b, "_ymgrgx"}, + {"01001"_b, "fcmge_asisdsame_only"}, + {"01011"_b, "facge_asisdsame_only"}, + {"01110"_b, "_kjyphv"}, + {"10010"_b, "_myjqrl"}, + {"10101"_b, "fabd_asisdsame_only"}, + {"10110"_b, "_vlsmsn"}, + {"11001"_b, "fcmgt_asisdsame_only"}, + {"11011"_b, "facgt_asisdsame_only"}, + {"11110"_b, "_pxtsvn"}, + }, + }, + + { "_gnxgxs", + {30, 18}, + { {"00"_b, "_krlpjl"}, + }, + }, + + { "_gnytkh", + {1, 0}, + { {"11"_b, "braaz_64_branch_reg"}, + }, + }, + + { "_gpxltv", + {23, 18, 17, 16}, + { {"0000"_b, "uqxtnt_z_zz"}, + }, + }, + + { "_gqspys", {22, 20, 19, 18, 17, 16}, - { {"011111", "VisitSVEStackFrameSize"}, + { {"111001"_b, "fcvtau_asisdmiscfp16_r"}, + {"x00001"_b, "fcvtau_asisdmisc_r"}, + {"x10000"_b, "fmaxnmp_asisdpair_only_sd"}, }, }, - { "DecodeSVE00001101", - {12, 11, 10}, - { {"0xx", "VisitSVEAddressGeneration"}, - {"10x", "VisitSVEFPTrigSelectCoefficient"}, - {"110", "VisitSVEFPExponentialAccelerator"}, - {"111", "VisitSVEConstructivePrefix_Unpredicated"}, + { "_gqykqv", + {23, 22, 12}, + { {"000"_b, "_rjmyyl"}, + {"001"_b, "_zqltpy"}, + {"010"_b, "_hstvrp"}, + {"011"_b, "_yhqyzj"}, + {"110"_b, "_mxtskk"}, + {"111"_b, "_qmjqhq"}, }, }, - { "DecodeSVE00001110", - {20, 12, 11}, - { {"00x", "VisitSVESaturatingIncDecVectorByElementCount"}, - {"100", "VisitSVEIncDecVectorByElementCount"}, + { "_grqnlm", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xxxx"_b, "fnmadd_s_floatdp3"}, + {"001xxxx"_b, "fnmadd_d_floatdp3"}, + {"011xxxx"_b, "fnmadd_h_floatdp3"}, + {"10001x0"_b, "fmla_asisdelem_rh_h"}, + {"10x0001"_b, "sshr_asisdshf_r"}, + {"10x0101"_b, "ssra_asisdshf_r"}, + {"10x1001"_b, "srshr_asisdshf_r"}, + {"10x1101"_b, "srsra_asisdshf_r"}, + {"11x01x0"_b, "fmla_asisdelem_r_sd"}, + {"1xx11x0"_b, "sqdmlal_asisdelem_l"}, }, }, - { "DecodeSVE00001111", - {20, 12, 11}, - { {"x1x", "VisitSVESaturatingIncDecRegisterByElementCount"}, - {"000", "VisitSVEElementCount"}, - {"100", "VisitSVEIncDecRegisterByElementCount"}, + { "_grrjlh", + {30}, + { {"1"_b, "_jlqxvj"}, }, }, - { "DecodeSVE000100xx", - {23, 22, 20, 19, 18}, - { {"xx1xx", "VisitSVECopyIntImm_Predicated"}, - {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, - {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, - {"11000", "VisitSVEBroadcastBitmaskImm"}, + { "_grxzzg", + {23, 22}, + { {"00"_b, "tbx_asimdtbl_l2_2"}, }, }, - { "DecodeSVE0001010x", - {23, 22, 20, 19, 18}, - { {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, - {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, - {"11000", "VisitSVEBroadcastBitmaskImm"}, + { "_gsgzpg", + {17}, + { {"0"_b, "ld2_asisdlso_h2_2h"}, }, }, - { "DecodeSVE00010110", - {23, 22, 20, 19, 18}, - { {"xx1xx", "VisitSVECopyFPImm_Predicated"}, - {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, - {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, - {"11000", "VisitSVEBroadcastBitmaskImm"}, + { "_gshrzq", + {22, 20, 11}, + { {"010"_b, "decb_r_rs"}, + {"110"_b, "dech_r_rs"}, }, }, - { "DecodeSVE00010111", - {23, 22, 20, 19, 18}, - { {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, - {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, - {"11000", "VisitSVEBroadcastBitmaskImm"}, + { "_gskkxk", + {17}, + { {"0"_b, "st1_asisdlso_h1_1h"}, + }, + }, + + { "_gsttpm", + {12}, + { {"0"_b, "ld3_asisdlsop_dx3_r3d"}, + }, + }, + + { "_gszlvl", + {30}, + { {"0"_b, "_tvsszp"}, + {"1"_b, "_njtngm"}, + }, + }, + + { "_gszxkp", + {13, 12}, + { {"11"_b, "cmgt_asisdsame_only"}, + }, + }, + + { "_gtjskz", + {30, 23, 22, 13, 12, 11, 10}, + { {"1011011"_b, "bfmmla_asimdsame2_e"}, + {"x011111"_b, "bfdot_asimdsame2_d"}, + {"x111111"_b, "bfmlal_asimdsame2_f"}, + {"xxx0xx1"_b, "fcmla_asimdsame2_c"}, + {"xxx1x01"_b, "fcadd_asimdsame2_c"}, + }, + }, + + { "_gttglx", + {17}, + { {"0"_b, "st4_asisdlso_h4_4h"}, + }, + }, + + { "_gtvhmp", + {30, 13}, + { {"00"_b, "_rjyrnt"}, + {"01"_b, "_mzhsrq"}, + {"10"_b, "_xtzlzy"}, + {"11"_b, "_kqxhzx"}, + }, + }, + + { "_gtxpgx", + {30, 23, 13, 4}, + { {"0000"_b, "prfw_i_p_bz_s_x32_scaled"}, + {"0010"_b, "prfd_i_p_bz_s_x32_scaled"}, + {"010x"_b, "ld1h_z_p_bz_s_x32_scaled"}, + {"011x"_b, "ldff1h_z_p_bz_s_x32_scaled"}, + {"1000"_b, "prfw_i_p_bz_d_x32_scaled"}, + {"1010"_b, "prfd_i_p_bz_d_x32_scaled"}, + {"110x"_b, "ld1h_z_p_bz_d_x32_scaled"}, + {"111x"_b, "ldff1h_z_p_bz_d_x32_scaled"}, + }, + }, + + { "_gvjgyp", + {23, 22, 13, 12, 11, 10}, + { {"0001x0"_b, "fmls_asimdelem_rh_h"}, + {"0x0101"_b, "shl_asimdshf_r"}, + {"0x1101"_b, "sqshl_asimdshf_r"}, + {"1000x0"_b, "fmlsl_asimdelem_lh"}, + {"1x01x0"_b, "fmls_asimdelem_r_sd"}, + {"xx10x0"_b, "smlsl_asimdelem_l"}, + {"xx11x0"_b, "sqdmlsl_asimdelem_l"}, + }, + }, + + { "_gvstrp", + {17}, + { {"0"_b, "ld2_asisdlsop_bx2_r2b"}, + {"1"_b, "ld2_asisdlsop_b2_i2b"}, + }, + }, + + { "_gvykrp", + {30, 23, 22, 13, 12, 11, 10}, + { {"10001x0"_b, "fmulx_asisdelem_rh_h"}, + {"10x0001"_b, "sqshrun_asisdshf_n"}, + {"10x0011"_b, "sqrshrun_asisdshf_n"}, + {"10x0101"_b, "uqshrn_asisdshf_n"}, + {"10x0111"_b, "uqrshrn_asisdshf_n"}, + {"11x01x0"_b, "fmulx_asisdelem_r_sd"}, + }, + }, + + { "_gxlvsg", + {13}, + { {"0"_b, "_vpxvjs"}, + {"1"_b, "_lpslrz"}, + }, + }, + + { "_gxmnkl", + {23, 22}, + { {"10"_b, "cdot_z_zzzi_s"}, + {"11"_b, "cdot_z_zzzi_d"}, + }, + }, + + { "_gxnlxg", + {20, 19, 18, 17, 16}, + { {"00001"_b, "uqxtn_asisdmisc_n"}, + }, + }, + + { "_gxslgq", + {23, 22, 20, 19, 17, 16}, + { {"000010"_b, "scvtf_s32_float2fix"}, + {"000011"_b, "ucvtf_s32_float2fix"}, + {"001100"_b, "fcvtzs_32s_float2fix"}, + {"001101"_b, "fcvtzu_32s_float2fix"}, + {"010010"_b, "scvtf_d32_float2fix"}, + {"010011"_b, "ucvtf_d32_float2fix"}, + {"011100"_b, "fcvtzs_32d_float2fix"}, + {"011101"_b, "fcvtzu_32d_float2fix"}, + {"110010"_b, "scvtf_h32_float2fix"}, + {"110011"_b, "ucvtf_h32_float2fix"}, + {"111100"_b, "fcvtzs_32h_float2fix"}, + {"111101"_b, "fcvtzu_32h_float2fix"}, + }, + }, + + { "_gygnsz", + {17}, + { {"0"_b, "ld2_asisdlsop_hx2_r2h"}, + {"1"_b, "ld2_asisdlsop_h2_i2h"}, + }, + }, + + { "_gymljg", + {23}, + { {"0"_b, "fmulx_asimdsame_only"}, + }, + }, + + { "_gyrjrm", + {20, 19, 18, 17, 16}, + { {"00000"_b, "cpy_z_p_v"}, + {"00001"_b, "compact_z_p_z"}, + {"00010"_b, "lasta_v_p_z"}, + {"00011"_b, "lastb_v_p_z"}, + {"00100"_b, "revb_z_z"}, + {"00101"_b, "revh_z_z"}, + {"00110"_b, "revw_z_z"}, + {"00111"_b, "rbit_z_p_z"}, + {"01000"_b, "clasta_z_p_zz"}, + {"01001"_b, "clastb_z_p_zz"}, + {"01010"_b, "clasta_v_p_z"}, + {"01011"_b, "clastb_v_p_z"}, + {"01100"_b, "splice_z_p_zz_des"}, + {"01101"_b, "splice_z_p_zz_con"}, + }, + }, + + { "_gznnvh", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "frinta_asimdmiscfp16_r"}, + {"0x00001"_b, "frinta_asimdmisc_r"}, + {"xx00000"_b, "cmge_asimdmisc_z"}, + }, + }, + + { "_gzqvnk", + {23, 12, 4, 3, 2, 1, 0}, + { {"1000000"_b, "ctermeq_rr"}, + {"1010000"_b, "ctermne_rr"}, + {"x10xxxx"_b, "whilewr_p_rr"}, + {"x11xxxx"_b, "whilerw_p_rr"}, + }, + }, + + { "_gzvgmh", + {18, 17, 12}, + { {"0x0"_b, "ld4_asisdlsop_dx4_r4d"}, + {"100"_b, "ld4_asisdlsop_dx4_r4d"}, + {"110"_b, "ld4_asisdlsop_d4_i4d"}, + }, + }, + + { "_gzylzp", + {17}, + { {"0"_b, "st3_asisdlsop_hx3_r3h"}, + {"1"_b, "st3_asisdlsop_h3_i3h"}, + }, + }, + + { "_hggmnk", + {13, 12}, + { {"10"_b, "lslv_32_dp_2src"}, + }, + }, + + { "_hgxqpp", + {18, 17}, + { {"00"_b, "st3_asisdlso_s3_3s"}, + }, + }, + + { "_hgxtqy", + {30, 23, 22, 13}, + { {"0001"_b, "ldnt1w_z_p_ar_s_x32_unscaled"}, + {"0010"_b, "ld1rsh_z_p_bi_s64"}, + {"0011"_b, "ld1rsh_z_p_bi_s32"}, + {"0110"_b, "ld1rsb_z_p_bi_s64"}, + {"0111"_b, "ld1rsb_z_p_bi_s32"}, + {"1000"_b, "ldnt1sw_z_p_ar_d_64_unscaled"}, + {"1010"_b, "ld1sw_z_p_bz_d_64_unscaled"}, + {"1011"_b, "ldff1sw_z_p_bz_d_64_unscaled"}, + }, + }, + + { "_hhhqjk", + {4, 3, 2, 1, 0}, + { {"11111"_b, "_pqpzkt"}, + }, + }, + + { "_hhkhkk", + {30, 23, 11, 10}, + { {"1001"_b, "_lkvynm"}, + }, + }, + + { "_hhkqtn", + {20, 19, 18, 17, 16}, + { {"00000"_b, "lasta_r_p_z"}, + {"00001"_b, "lastb_r_p_z"}, + {"01000"_b, "cpy_z_p_r"}, + {"10000"_b, "clasta_r_p_z"}, + {"10001"_b, "clastb_r_p_z"}, + }, + }, + + { "_hhnjjk", + {9, 8, 7, 6, 5}, + { {"11111"_b, "pacdzb_64z_dp_1src"}, + }, + }, + + { "_hhymvj", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000011"_b, "sqabs_asisdmisc_r"}, + {"0000100"_b, "sqxtn_asisdmisc_n"}, + }, + }, + + { "_hjgylh", + {30, 23, 22}, + { {"000"_b, "str_s_ldst_pos"}, + {"001"_b, "ldr_s_ldst_pos"}, + {"100"_b, "str_d_ldst_pos"}, + {"101"_b, "ldr_d_ldst_pos"}, + }, + }, + + { "_hjqtrt", + {12}, + { {"0"_b, "st1_asisdlsop_dx1_r1d"}, + }, + }, + + { "_hjtvvm", + {13, 12}, + { {"00"_b, "sdiv_64_dp_2src"}, + {"10"_b, "rorv_64_dp_2src"}, + }, + }, + + { "_hljrqn", + {22}, + { {"0"_b, "str_32_ldst_regoff"}, + {"1"_b, "ldr_32_ldst_regoff"}, + }, + }, + + { "_hlshjk", + {23, 22}, + { {"00"_b, "fmlal_asimdsame_f"}, + {"10"_b, "fmlsl_asimdsame_f"}, + }, + }, + + { "_hmsgpj", + {13, 12, 10}, + { {"000"_b, "_hthxvr"}, + {"100"_b, "ptrue_p_s"}, + {"101"_b, "_kkvrzq"}, + {"110"_b, "_xxjrsy"}, + }, + }, + + { "_hmtmlq", + {4}, + { {"0"_b, "nor_p_p_pp_z"}, + {"1"_b, "nand_p_p_pp_z"}, + }, + }, + + { "_hmtxlh", + {9, 8, 7, 6, 5, 1, 0}, + { {"1111111"_b, "retaa_64e_branch_reg"}, + }, + }, + + { "_hmxlny", + {13, 12, 11, 10}, + { {"0000"_b, "addhn_asimddiff_n"}, + {"0001"_b, "sshl_asimdsame_only"}, + {"0010"_b, "_lyghyg"}, + {"0011"_b, "sqshl_asimdsame_only"}, + {"0100"_b, "sabal_asimddiff_l"}, + {"0101"_b, "srshl_asimdsame_only"}, + {"0110"_b, "_htgzzx"}, + {"0111"_b, "sqrshl_asimdsame_only"}, + {"1000"_b, "subhn_asimddiff_n"}, + {"1001"_b, "smax_asimdsame_only"}, + {"1010"_b, "_sqpjtr"}, + {"1011"_b, "smin_asimdsame_only"}, + {"1100"_b, "sabdl_asimddiff_l"}, + {"1101"_b, "sabd_asimdsame_only"}, + {"1110"_b, "_rnrzsj"}, + {"1111"_b, "saba_asimdsame_only"}, + }, + }, + + { "_hngpgx", + {23, 10, 4}, + { {"000"_b, "_vxsjgg"}, + }, + }, + + { "_hngpxg", + {1, 0}, + { {"00"_b, "br_64_branch_reg"}, + }, + }, + + { "_hnjrmp", + {4}, + { {"0"_b, "cmplo_p_p_zi"}, + {"1"_b, "cmpls_p_p_zi"}, + }, + }, + + { "_hnzzkj", + {30, 18}, + { {"00"_b, "_gxslgq"}, + }, + }, + + { "_hpgqlp", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_s_floatimm"}, + }, + }, + + { "_hqhzgj", + {17}, + { {"0"_b, "ld2_asisdlso_b2_2b"}, + }, + }, + + { "_hqlskj", + {18, 17}, + { {"00"_b, "ld1_asisdlse_r1_1v"}, + }, + }, + + { "_hqnxvt", + {13, 12, 11, 10}, + { {"0000"_b, "saddl_asimddiff_l"}, + {"0001"_b, "shadd_asimdsame_only"}, + {"0010"_b, "_rykykh"}, + {"0011"_b, "sqadd_asimdsame_only"}, + {"0100"_b, "saddw_asimddiff_w"}, + {"0101"_b, "srhadd_asimdsame_only"}, + {"0110"_b, "_glkzlv"}, + {"0111"_b, "_rnktts"}, + {"1000"_b, "ssubl_asimddiff_l"}, + {"1001"_b, "shsub_asimdsame_only"}, + {"1010"_b, "_rgztzl"}, + {"1011"_b, "sqsub_asimdsame_only"}, + {"1100"_b, "ssubw_asimddiff_w"}, + {"1101"_b, "cmgt_asimdsame_only"}, + {"1110"_b, "_nyxxks"}, + {"1111"_b, "cmge_asimdsame_only"}, + }, + }, + + { "_hqsvmh", + {18, 17}, + { {"00"_b, "st4_asisdlso_s4_4s"}, + }, + }, + + { "_hrhzqy", + {17}, + { {"0"_b, "ld4_asisdlse_r4"}, + }, + }, + + { "_hrktgs", + {12}, + { {"0"_b, "st2_asisdlsop_dx2_r2d"}, + }, + }, + + { "_hrllsn", + {18, 17, 16}, + { {"000"_b, "fadd_z_p_zz"}, + {"001"_b, "fsub_z_p_zz"}, + {"010"_b, "fmul_z_p_zz"}, + {"011"_b, "fsubr_z_p_zz"}, + {"100"_b, "fmaxnm_z_p_zz"}, + {"101"_b, "fminnm_z_p_zz"}, + {"110"_b, "fmax_z_p_zz"}, + {"111"_b, "fmin_z_p_zz"}, + }, + }, + + { "_hrxyts", + {23, 22, 20, 19, 18, 13}, + { {"00000x"_b, "orr_z_zi"}, + {"01000x"_b, "eor_z_zi"}, + {"10000x"_b, "and_z_zi"}, + {"11000x"_b, "dupm_z_i"}, + {"xx1xx0"_b, "fcpy_z_p_i"}, + }, + }, + + { "_hsjynv", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + }, + }, + + { "_hstvrp", + {20, 19, 18, 17, 16, 13}, + { {"000000"_b, "fmov_d_floatdp1"}, + {"000010"_b, "fneg_d_floatdp1"}, + {"000100"_b, "fcvt_sd_floatdp1"}, + {"000110"_b, "bfcvt_bs_floatdp1"}, + {"001000"_b, "frintn_d_floatdp1"}, + {"001010"_b, "frintm_d_floatdp1"}, + {"001100"_b, "frinta_d_floatdp1"}, + {"001110"_b, "frintx_d_floatdp1"}, + {"010000"_b, "frint32z_d_floatdp1"}, + {"010010"_b, "frint64z_d_floatdp1"}, + }, + }, + + { "_hsvgnt", + {23, 22, 4, 3, 2, 1, 0}, + { {"0000001"_b, "svc_ex_exception"}, + {"0000010"_b, "hvc_ex_exception"}, + {"0000011"_b, "smc_ex_exception"}, + {"0100000"_b, "hlt_ex_exception"}, + }, + }, + + { "_htgzzx", + {20, 18, 17, 16}, + { {"0000"_b, "_mqgtsq"}, + }, + }, + + { "_hthxvr", + {23, 22, 9}, + { {"010"_b, "pfirst_p_p_p"}, + }, + }, + + { "_htmthz", + {22, 20, 19, 18, 17, 16, 13, 12}, + { {"01111100"_b, "_msztzv"}, + }, + }, + + { "_htnmls", + {22, 13, 12}, + { {"000"_b, "ldapr_32l_memop"}, + }, + }, + + { "_htplsj", + {4}, + { {"0"_b, "cmpeq_p_p_zz"}, + {"1"_b, "cmpne_p_p_zz"}, + }, + }, + + { "_htppjj", + {30, 23, 22}, + { {"000"_b, "msub_64a_dp_3src"}, + }, + }, + + { "_htqpks", + {30, 20, 19, 18, 17, 16, 13}, + { {"000000x"_b, "add_z_zi"}, + {"000001x"_b, "sub_z_zi"}, + {"000011x"_b, "subr_z_zi"}, + {"000100x"_b, "sqadd_z_zi"}, + {"000101x"_b, "uqadd_z_zi"}, + {"000110x"_b, "sqsub_z_zi"}, + {"000111x"_b, "uqsub_z_zi"}, + {"0010000"_b, "smax_z_zi"}, + {"0010010"_b, "umax_z_zi"}, + {"0010100"_b, "smin_z_zi"}, + {"0010110"_b, "umin_z_zi"}, + {"0100000"_b, "mul_z_zi"}, + {"011000x"_b, "dup_z_i"}, + {"0110010"_b, "fdup_z_i"}, + {"1xxxxx0"_b, "fnmad_z_p_zzz"}, + {"1xxxxx1"_b, "fnmsb_z_p_zzz"}, + }, + }, + + { "_hvvyhl", + {23, 22, 20, 19, 18, 17, 16}, + { {"0x00001"_b, "frint32z_asimdmisc_r"}, + {"1111000"_b, "fcmlt_asimdmiscfp16_fz"}, + {"1x00000"_b, "fcmlt_asimdmisc_fz"}, + }, + }, + + { "_hvyjnk", + {11}, + { {"0"_b, "sqrdmulh_z_zzi_h"}, + }, + }, + + { "_hxglyp", + {17}, + { {"0"_b, "ld4_asisdlsep_r4_r"}, + {"1"_b, "ld4_asisdlsep_i4_i"}, + }, + }, + + { "_hxmjhn", + {30, 23, 22, 19, 16}, + { {"10010"_b, "aese_b_cryptoaes"}, + {"xxx00"_b, "cls_asimdmisc_r"}, + {"xxx01"_b, "sqxtn_asimdmisc_n"}, + }, + }, + + { "_hxnmsl", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ld2w_z_p_bi_contiguous"}, + {"000x0"_b, "ld2w_z_p_br_contiguous"}, + {"00101"_b, "ld4w_z_p_bi_contiguous"}, + {"001x0"_b, "ld4w_z_p_br_contiguous"}, + {"01001"_b, "ld2d_z_p_bi_contiguous"}, + {"010x0"_b, "ld2d_z_p_br_contiguous"}, + {"01101"_b, "ld4d_z_p_bi_contiguous"}, + {"011x0"_b, "ld4d_z_p_br_contiguous"}, + {"10011"_b, "st2w_z_p_bi_contiguous"}, + {"100x0"_b, "st1w_z_p_bz_d_x32_scaled"}, + {"10111"_b, "st4w_z_p_bi_contiguous"}, + {"101x0"_b, "st1w_z_p_bz_s_x32_scaled"}, + {"10x01"_b, "st1w_z_p_bi"}, + {"11011"_b, "st2d_z_p_bi_contiguous"}, + {"110x0"_b, "st1d_z_p_bz_d_x32_scaled"}, + {"11111"_b, "st4d_z_p_bi_contiguous"}, + {"11x01"_b, "st1d_z_p_bi"}, + }, + }, + + { "_hxrtsq", + {23, 22, 12}, + { {"000"_b, "_gxlvsg"}, + {"001"_b, "_kxhjtk"}, + {"010"_b, "_hyxhpl"}, + {"011"_b, "_kvgjzh"}, + {"110"_b, "_tpsylx"}, + {"111"_b, "_zhpxqz"}, + }, + }, + + { "_hxzlmm", + {30, 23, 22}, + { {"000"_b, "stxp_sp32_ldstexcl"}, + {"001"_b, "ldxp_lp32_ldstexcl"}, + {"100"_b, "stxp_sp64_ldstexcl"}, + {"101"_b, "ldxp_lp64_ldstexcl"}, + }, + }, + + { "_hykhmt", + {20, 19, 18, 17, 16}, + { {"00000"_b, "saddv_r_p_z"}, + {"00001"_b, "uaddv_r_p_z"}, + {"01000"_b, "smaxv_r_p_z"}, + {"01001"_b, "umaxv_r_p_z"}, + {"01010"_b, "sminv_r_p_z"}, + {"01011"_b, "uminv_r_p_z"}, + {"1000x"_b, "movprfx_z_p_z"}, + {"11000"_b, "orv_r_p_z"}, + {"11001"_b, "eorv_r_p_z"}, + {"11010"_b, "andv_r_p_z"}, + }, + }, + + { "_hyxhpl", + {13}, + { {"0"_b, "_yrrppk"}, + {"1"_b, "_pnxggm"}, + }, + }, + + { "_hyymjs", + {18, 17, 12}, + { {"0x0"_b, "ld2_asisdlsop_dx2_r2d"}, + {"100"_b, "ld2_asisdlsop_dx2_r2d"}, + {"110"_b, "ld2_asisdlsop_d2_i2d"}, + }, + }, + + { "_hzkglv", + {30, 23, 22, 13}, + { {"0000"_b, "ld1b_z_p_br_u8"}, + {"0001"_b, "ldff1b_z_p_br_u8"}, + {"0010"_b, "ld1b_z_p_br_u32"}, + {"0011"_b, "ldff1b_z_p_br_u32"}, + {"0100"_b, "ld1sw_z_p_br_s64"}, + {"0101"_b, "ldff1sw_z_p_br_s64"}, + {"0110"_b, "ld1h_z_p_br_u32"}, + {"0111"_b, "ldff1h_z_p_br_u32"}, + {"1001"_b, "stnt1b_z_p_br_contiguous"}, + {"1011"_b, "st3b_z_p_br_contiguous"}, + {"10x0"_b, "st1b_z_p_br"}, + {"1101"_b, "stnt1h_z_p_br_contiguous"}, + {"1111"_b, "st3h_z_p_br_contiguous"}, + {"11x0"_b, "st1h_z_p_br"}, + }, + }, + + { "_hzllgl", + {17}, + { {"0"_b, "st1_asisdlse_r4_4v"}, + }, + }, + + { "_hzmlps", + {19}, + { {"0"_b, "_rpqgjl"}, + {"1"_b, "sys_cr_systeminstrs"}, + }, + }, + + { "_hzxjsp", + {23, 22, 20, 19, 16, 13, 10}, + { {"0000000"_b, "_shgkvq"}, + {"0000001"_b, "_vytxll"}, + {"0000010"_b, "_hqsvmh"}, + {"0000011"_b, "_gmvrxn"}, + {"0100000"_b, "_ygyxvx"}, + {"0100001"_b, "_tszvvk"}, + {"0100010"_b, "_tyjqvt"}, + {"0100011"_b, "_ylqnqt"}, + {"100xx00"_b, "st2_asisdlsop_sx2_r2s"}, + {"100xx01"_b, "_hrktgs"}, + {"100xx10"_b, "st4_asisdlsop_sx4_r4s"}, + {"100xx11"_b, "_mmrtvz"}, + {"1010x00"_b, "st2_asisdlsop_sx2_r2s"}, + {"1010x01"_b, "_lmtnzv"}, + {"1010x10"_b, "st4_asisdlsop_sx4_r4s"}, + {"1010x11"_b, "_qrykhm"}, + {"1011000"_b, "st2_asisdlsop_sx2_r2s"}, + {"1011001"_b, "_nyssqn"}, + {"1011010"_b, "st4_asisdlsop_sx4_r4s"}, + {"1011011"_b, "_kpqgsn"}, + {"1011100"_b, "_knpsmq"}, + {"1011101"_b, "_jzyzjh"}, + {"1011110"_b, "_vhhktl"}, + {"1011111"_b, "_yjxvkp"}, + {"110xx00"_b, "ld2_asisdlsop_sx2_r2s"}, + {"110xx01"_b, "_zppjvk"}, + {"110xx10"_b, "ld4_asisdlsop_sx4_r4s"}, + {"110xx11"_b, "_kqjmvy"}, + {"1110x00"_b, "ld2_asisdlsop_sx2_r2s"}, + {"1110x01"_b, "_ptkrvg"}, + {"1110x10"_b, "ld4_asisdlsop_sx4_r4s"}, + {"1110x11"_b, "_kjryvx"}, + {"1111000"_b, "ld2_asisdlsop_sx2_r2s"}, + {"1111001"_b, "_mlvpxh"}, + {"1111010"_b, "ld4_asisdlsop_sx4_r4s"}, + {"1111011"_b, "_xqjrgk"}, + {"1111100"_b, "_msgqps"}, + {"1111101"_b, "_hyymjs"}, + {"1111110"_b, "_qsnqpz"}, + {"1111111"_b, "_gzvgmh"}, + }, + }, + + { "_jggvph", + {30}, + { {"0"_b, "bic_64_log_shift"}, + {"1"_b, "eon_64_log_shift"}, + }, + }, + + { "_jgmlpk", + {4}, + { {"0"_b, "match_p_p_zz"}, + {"1"_b, "nmatch_p_p_zz"}, + }, + }, + + { "_jgyhrh", + {4}, + { {"0"_b, "cmplo_p_p_zi"}, + {"1"_b, "cmpls_p_p_zi"}, + }, + }, + + { "_jhkglp", + {30, 23, 22}, + { {"110"_b, "xar_vvv2_crypto3_imm6"}, + }, + }, + + { "_jhllmn", + {4}, + { {"0"_b, "cmpge_p_p_zz"}, + {"1"_b, "cmpgt_p_p_zz"}, + }, + }, + + { "_jhqlkv", + {30, 23, 22}, + { {"000"_b, "stxr_sr32_ldstexcl"}, + {"001"_b, "ldxr_lr32_ldstexcl"}, + {"010"_b, "stllr_sl32_ldstexcl"}, + {"011"_b, "ldlar_lr32_ldstexcl"}, + {"100"_b, "stxr_sr64_ldstexcl"}, + {"101"_b, "ldxr_lr64_ldstexcl"}, + {"110"_b, "stllr_sl64_ldstexcl"}, + {"111"_b, "ldlar_lr64_ldstexcl"}, + }, + }, + + { "_jhytlg", + {30, 23, 22, 13, 11, 10}, + { {"000010"_b, "str_b_ldst_regoff"}, + {"000110"_b, "str_bl_ldst_regoff"}, + {"001010"_b, "ldr_b_ldst_regoff"}, + {"001110"_b, "ldr_bl_ldst_regoff"}, + {"010x10"_b, "str_q_ldst_regoff"}, + {"011x10"_b, "ldr_q_ldst_regoff"}, + {"100x10"_b, "str_h_ldst_regoff"}, + {"101x10"_b, "ldr_h_ldst_regoff"}, + }, + }, + + { "_jkkqvy", + {22, 20, 11}, + { {"100"_b, "uqinch_z_zs"}, + {"101"_b, "uqdech_z_zs"}, + {"110"_b, "dech_z_zs"}, + }, + }, + + { "_jkpsxk", + {20}, + { {"0"_b, "_kyygzs"}, + {"1"_b, "msr_sr_systemmove"}, + }, + }, + + { "_jkqktg", + {20, 19, 18, 17, 16}, + { {"00000"_b, "sqneg_asimdmisc_r"}, + }, + }, + + { "_jkrlsg", + {23, 22}, + { {"00"_b, "fmsub_s_floatdp3"}, + {"01"_b, "fmsub_d_floatdp3"}, + {"11"_b, "fmsub_h_floatdp3"}, + }, + }, + + { "_jksztq", + {22, 20, 19, 13, 12}, + { {"0x100"_b, "sri_asisdshf_r"}, + {"0x101"_b, "sli_asisdshf_r"}, + {"0x110"_b, "sqshlu_asisdshf_r"}, + {"0x111"_b, "uqshl_asisdshf_r"}, + {"10x00"_b, "sri_asisdshf_r"}, + {"10x01"_b, "sli_asisdshf_r"}, + {"10x10"_b, "sqshlu_asisdshf_r"}, + {"10x11"_b, "uqshl_asisdshf_r"}, + {"11100"_b, "sri_asisdshf_r"}, + {"11101"_b, "sli_asisdshf_r"}, + {"11110"_b, "sqshlu_asisdshf_r"}, + {"11111"_b, "uqshl_asisdshf_r"}, + {"x1000"_b, "sri_asisdshf_r"}, + {"x1001"_b, "sli_asisdshf_r"}, + {"x1010"_b, "sqshlu_asisdshf_r"}, + {"x1011"_b, "uqshl_asisdshf_r"}, + }, + }, + + { "_jkxlnq", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_nhzyvv"}, + }, + }, + + { "_jlqjzr", + {30, 23}, + { {"00"_b, "adds_64s_addsub_imm"}, + {"10"_b, "subs_64s_addsub_imm"}, + }, + }, + + { "_jlqxvj", + {23, 22}, + { {"01"_b, "_mplgqv"}, + {"10"_b, "xar_vvv2_crypto3_imm6"}, + {"11"_b, "_ljhtkq"}, + }, + }, + + { "_jlrrlt", + {11, 10, 4}, + { {"000"_b, "whilege_p_p_rr"}, + {"001"_b, "whilegt_p_p_rr"}, + {"010"_b, "whilelt_p_p_rr"}, + {"011"_b, "whilele_p_p_rr"}, + {"100"_b, "whilehs_p_p_rr"}, + {"101"_b, "whilehi_p_p_rr"}, + {"110"_b, "whilelo_p_p_rr"}, + {"111"_b, "whilels_p_p_rr"}, + }, + }, + + { "_jlrvpl", + {17}, + { {"0"_b, "st2_asisdlse_r2"}, + }, + }, + + { "_jmgkrl", + {30}, + { {"0"_b, "orn_32_log_shift"}, + {"1"_b, "bics_32_log_shift"}, + }, + }, + + { "_jmvgsp", + {22, 20, 11}, + { {"100"_b, "sqinch_z_zs"}, + {"101"_b, "sqdech_z_zs"}, + {"110"_b, "inch_z_zs"}, + }, + }, + + { "_jmxstz", + {13, 12, 11, 10}, + { {"0000"_b, "sqdecp_z_p_z"}, + {"0010"_b, "sqdecp_r_p_r_sx"}, + {"0011"_b, "sqdecp_r_p_r_x"}, + }, + }, + + { "_jmyslr", + {17}, + { {"0"_b, "ld1_asisdlsep_r4_r4"}, + {"1"_b, "ld1_asisdlsep_i4_i4"}, + }, + }, + + { "_jnjlsh", + {12}, + { {"0"_b, "st1_asisdlsop_dx1_r1d"}, + }, + }, + + { "_jnmgrh", + {30, 19, 18, 17, 16}, + { {"11000"_b, "ins_asimdins_iv_v"}, + {"1x100"_b, "ins_asimdins_iv_v"}, + {"1xx10"_b, "ins_asimdins_iv_v"}, + {"1xxx1"_b, "ins_asimdins_iv_v"}, + }, + }, + + { "_jplmmr", + {23, 22, 20, 19, 16, 13, 12}, + { {"0111100"_b, "fcvtas_asisdmiscfp16_r"}, + {"0111101"_b, "scvtf_asisdmiscfp16_r"}, + {"0x00100"_b, "fcvtas_asisdmisc_r"}, + {"0x00101"_b, "scvtf_asisdmisc_r"}, + {"0x10000"_b, "fmaxnmp_asisdpair_only_h"}, + {"0x10001"_b, "faddp_asisdpair_only_h"}, + {"0x10011"_b, "fmaxp_asisdpair_only_h"}, + {"1111000"_b, "fcmgt_asisdmiscfp16_fz"}, + {"1111001"_b, "fcmeq_asisdmiscfp16_fz"}, + {"1111010"_b, "fcmlt_asisdmiscfp16_fz"}, + {"1111101"_b, "frecpe_asisdmiscfp16_r"}, + {"1111111"_b, "frecpx_asisdmiscfp16_r"}, + {"1x00000"_b, "fcmgt_asisdmisc_fz"}, + {"1x00001"_b, "fcmeq_asisdmisc_fz"}, + {"1x00010"_b, "fcmlt_asisdmisc_fz"}, + {"1x00101"_b, "frecpe_asisdmisc_r"}, + {"1x00111"_b, "frecpx_asisdmisc_r"}, + {"1x10000"_b, "fminnmp_asisdpair_only_h"}, + {"1x10011"_b, "fminp_asisdpair_only_h"}, + }, + }, + + { "_jpvljz", + {23, 22}, + { {"01"_b, "fcmeq_asimdsamefp16_only"}, + }, + }, + + { "_jpxgqh", + {30, 23, 22}, + { {"000"_b, "sbfm_32m_bitfield"}, + {"100"_b, "ubfm_32m_bitfield"}, + }, + }, + + { "_jqjnrv", + {18, 17}, + { {"00"_b, "st1_asisdlso_s1_1s"}, + }, + }, + + { "_jqnglz", + {23, 22, 20, 19, 11}, + { {"00010"_b, "ucvtf_asisdshf_c"}, + {"001x0"_b, "ucvtf_asisdshf_c"}, + {"01xx0"_b, "ucvtf_asisdshf_c"}, + }, + }, + + { "_jqnhrj", + {12, 10}, + { {"00"_b, "_mzynlp"}, + {"01"_b, "_mvglql"}, + {"10"_b, "_tylqpt"}, + {"11"_b, "_lrjyhr"}, + }, + }, + + { "_jqplxx", + {20, 19, 18, 17, 16, 13, 12}, + { {"1111100"_b, "_xpvpqq"}, + }, + }, + + { "_jqtltz", + {13}, + { {"0"_b, "mul_asimdelem_r"}, + {"1"_b, "smull_asimdelem_l"}, + }, + }, + + { "_jqxqql", + {22, 20, 11}, + { {"000"_b, "uqincw_z_zs"}, + {"001"_b, "uqdecw_z_zs"}, + {"010"_b, "decw_z_zs"}, + {"100"_b, "uqincd_z_zs"}, + {"101"_b, "uqdecd_z_zs"}, + {"110"_b, "decd_z_zs"}, + }, + }, + + { "_jrgzxt", + {18, 17}, + { {"00"_b, "ld3_asisdlse_r3"}, + }, + }, + + { "_jrlynj", + {11, 10}, + { {"00"_b, "_gzqvnk"}, + }, + }, + + { "_jrnlzs", + {13, 12, 11}, + { {"000"_b, "fminnmp_asimdsamefp16_only"}, + {"010"_b, "fabd_asimdsamefp16_only"}, + {"100"_b, "fcmgt_asimdsamefp16_only"}, + {"101"_b, "facgt_asimdsamefp16_only"}, + {"110"_b, "fminp_asimdsamefp16_only"}, + }, + }, + + { "_jrnxzh", + {12}, + { {"0"_b, "cmla_z_zzz"}, + {"1"_b, "sqrdcmlah_z_zzz"}, + }, + }, + + { "_jrsptt", + {13, 12}, + { {"00"_b, "sqadd_asisdsame_only"}, + {"10"_b, "sqsub_asisdsame_only"}, + {"11"_b, "cmge_asisdsame_only"}, + }, + }, + + { "_jryylt", + {30, 23, 22, 19, 18, 17, 16}, + { {"00000x1"_b, "smov_asimdins_w_w"}, + {"0000x10"_b, "smov_asimdins_w_w"}, + {"00010xx"_b, "smov_asimdins_w_w"}, + {"0001110"_b, "smov_asimdins_w_w"}, + {"000x10x"_b, "smov_asimdins_w_w"}, + {"000x111"_b, "smov_asimdins_w_w"}, + {"10000x1"_b, "smov_asimdins_x_x"}, + {"1000x10"_b, "smov_asimdins_x_x"}, + {"10010xx"_b, "smov_asimdins_x_x"}, + {"1001110"_b, "smov_asimdins_x_x"}, + {"100x10x"_b, "smov_asimdins_x_x"}, + {"100x111"_b, "smov_asimdins_x_x"}, + }, + }, + + { "_jsygzs", + {30, 23, 22, 12, 11, 10}, + { {"0000xx"_b, "add_64_addsub_ext"}, + {"000100"_b, "add_64_addsub_ext"}, + {"1000xx"_b, "sub_64_addsub_ext"}, + {"100100"_b, "sub_64_addsub_ext"}, + }, + }, + + { "_jtqlhs", + {22}, + { {"0"_b, "str_64_ldst_regoff"}, + {"1"_b, "ldr_64_ldst_regoff"}, + }, + }, + + { "_jvhnxl", + {23}, + { {"0"_b, "fcmge_asimdsame_only"}, + {"1"_b, "fcmgt_asimdsame_only"}, + }, + }, + + { "_jvpqrp", + {23, 22}, + { {"00"_b, "fmla_asisdelem_rh_h"}, + {"1x"_b, "fmla_asisdelem_r_sd"}, + }, + }, + + { "_jvvzjq", + {23, 22}, + { {"00"_b, "fcsel_s_floatsel"}, + {"01"_b, "fcsel_d_floatsel"}, + {"11"_b, "fcsel_h_floatsel"}, + }, + }, + + { "_jxrlyh", + {12}, + { {"0"_b, "_mtgksl"}, + }, + }, + + { "_jxszhy", + {23, 22, 11}, + { {"000"_b, "_rqhryp"}, + }, + }, + + { "_jxtgtx", + {30, 23, 22}, + { {"000"_b, "str_b_ldst_pos"}, + {"001"_b, "ldr_b_ldst_pos"}, + {"010"_b, "str_q_ldst_pos"}, + {"011"_b, "ldr_q_ldst_pos"}, + {"100"_b, "str_h_ldst_pos"}, + {"101"_b, "ldr_h_ldst_pos"}, + }, + }, + + { "_jxyskn", + {13, 12, 11, 10}, + { {"0000"_b, "uqincp_z_p_z"}, + {"0010"_b, "uqincp_r_p_r_uw"}, + {"0011"_b, "uqincp_r_p_r_x"}, + }, + }, + + { "_jxzrxm", + {20, 19, 18, 17, 16}, + { {"00000"_b, "usqadd_asisdmisc_r"}, + }, + }, + + { "_jymnkk", + {23, 22, 12, 11, 10}, + { {"01000"_b, "bfdot_z_zzzi"}, + {"100x0"_b, "fmlalb_z_zzzi_s"}, + {"100x1"_b, "fmlalt_z_zzzi_s"}, + {"110x0"_b, "bfmlalb_z_zzzi"}, + {"110x1"_b, "bfmlalt_z_zzzi"}, + }, + }, + + { "_jyxszq", + {30, 4}, + { {"0x"_b, "b_only_branch_imm"}, + {"10"_b, "b_only_condbranch"}, + }, + }, + + { "_jzjvtv", + {19, 18, 17, 16, 4}, + { {"00000"_b, "brkbs_p_p_p_z"}, + }, + }, + + { "_jzkqhn", + {23, 22, 12, 11, 10}, + { {"10000"_b, "fmlslb_z_zzz"}, + {"10001"_b, "fmlslt_z_zzz"}, + }, + }, + + { "_jzyzjh", + {18, 17, 12}, + { {"0x0"_b, "st2_asisdlsop_dx2_r2d"}, + {"100"_b, "st2_asisdlsop_dx2_r2d"}, + {"110"_b, "st2_asisdlsop_d2_i2d"}, + }, + }, + + { "_kgmqkh", + {30, 23, 22, 13}, + { {"0000"_b, "ld1w_z_p_ai_s"}, + {"0001"_b, "ldff1w_z_p_ai_s"}, + {"0010"_b, "ld1rw_z_p_bi_u32"}, + {"0011"_b, "ld1rw_z_p_bi_u64"}, + {"0110"_b, "ld1rsb_z_p_bi_s16"}, + {"0111"_b, "ld1rd_z_p_bi_u64"}, + {"1000"_b, "ld1w_z_p_ai_d"}, + {"1001"_b, "ldff1w_z_p_ai_d"}, + {"1010"_b, "ld1w_z_p_bz_d_64_scaled"}, + {"1011"_b, "ldff1w_z_p_bz_d_64_scaled"}, + {"1100"_b, "ld1d_z_p_ai_d"}, + {"1101"_b, "ldff1d_z_p_ai_d"}, + {"1110"_b, "ld1d_z_p_bz_d_64_scaled"}, + {"1111"_b, "ldff1d_z_p_bz_d_64_scaled"}, + }, + }, + + { "_kgpgly", + {23, 22, 10}, + { {"100"_b, "smlslb_z_zzzi_s"}, + {"101"_b, "smlslt_z_zzzi_s"}, + {"110"_b, "smlslb_z_zzzi_d"}, + {"111"_b, "smlslt_z_zzzi_d"}, + }, + }, + + { "_khjvqq", + {22, 11}, + { {"00"_b, "sqrdmulh_z_zzi_s"}, + {"10"_b, "sqrdmulh_z_zzi_d"}, + }, + }, + + { "_kjghlk", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "orr_asimdimm_l_sl"}, + {"00x100"_b, "ssra_asimdshf_r"}, + {"00x110"_b, "srsra_asimdshf_r"}, + {"010x00"_b, "ssra_asimdshf_r"}, + {"010x10"_b, "srsra_asimdshf_r"}, + {"011100"_b, "ssra_asimdshf_r"}, + {"011110"_b, "srsra_asimdshf_r"}, + {"0x1000"_b, "ssra_asimdshf_r"}, + {"0x1010"_b, "srsra_asimdshf_r"}, + }, + }, + + { "_kjngjl", + {23, 22}, + { {"00"_b, "tbx_asimdtbl_l1_1"}, + }, + }, + + { "_kjpxvh", + {20, 19, 18}, + { {"000"_b, "_yyrkmn"}, + }, + }, + + { "_kjqynn", + {4}, + { {"0"_b, "cmphs_p_p_zi"}, + {"1"_b, "cmphi_p_p_zi"}, + }, + }, + + { "_kjrxpx", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "ucvtf_asimdmiscfp16_r"}, + {"0x00001"_b, "ucvtf_asimdmisc_r"}, + {"1111000"_b, "fcmle_asimdmiscfp16_fz"}, + {"1111001"_b, "frsqrte_asimdmiscfp16_r"}, + {"1x00000"_b, "fcmle_asimdmisc_fz"}, + {"1x00001"_b, "frsqrte_asimdmisc_r"}, + }, + }, + + { "_kjryvx", + {12}, + { {"0"_b, "ld4_asisdlsop_dx4_r4d"}, + }, + }, + + { "_kjyphv", + {20, 19, 18, 17, 16}, + { {"10000"_b, "fmaxp_asisdpair_only_sd"}, + }, + }, + + { "_kkgpjl", + {20, 19, 18, 17}, + { {"0000"_b, "_msqkyy"}, + }, + }, + + { "_kkgzst", + {23, 22, 13, 12, 11, 10}, + { {"0001x0"_b, "fmla_asimdelem_rh_h"}, + {"0x0001"_b, "sshr_asimdshf_r"}, + {"0x0101"_b, "ssra_asimdshf_r"}, + {"0x1001"_b, "srshr_asimdshf_r"}, + {"0x1101"_b, "srsra_asimdshf_r"}, + {"1000x0"_b, "fmlal_asimdelem_lh"}, + {"1x01x0"_b, "fmla_asimdelem_r_sd"}, + {"xx10x0"_b, "smlal_asimdelem_l"}, + {"xx11x0"_b, "sqdmlal_asimdelem_l"}, + }, + }, + + { "_kkmjyr", + {0}, + { {"1"_b, "blrabz_64_branch_reg"}, }, }, - { "UnallocSVEBroadcastIndexElement", + { "_kkmxxx", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_jqplxx"}, + }, + }, + + { "_kknjng", + {23, 22, 20, 19, 11}, + { {"00010"_b, "ssra_asisdshf_r"}, + {"001x0"_b, "ssra_asisdshf_r"}, + {"01xx0"_b, "ssra_asisdshf_r"}, + }, + }, + + { "_kktglv", + {30, 13, 12}, + { {"000"_b, "_njvkjq"}, + {"001"_b, "_rpzykx"}, + {"010"_b, "_zzvxvh"}, + {"011"_b, "_yqxnzl"}, + {"100"_b, "_gxmnkl"}, + {"110"_b, "_lkxgjy"}, + {"111"_b, "_vjmklj"}, + }, + }, + + { "_kkvrzq", + {23, 22, 9, 8, 7, 6, 5}, + { {"0000000"_b, "pfalse_p"}, + }, + }, + + { "_klkgqk", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "fcvtms_asimdmiscfp16_r"}, + {"0x00001"_b, "fcvtms_asimdmisc_r"}, + {"1111001"_b, "fcvtzs_asimdmiscfp16_r"}, + {"1x00001"_b, "fcvtzs_asimdmisc_r"}, + {"xx00000"_b, "abs_asimdmisc_r"}, + {"xx10001"_b, "addv_asimdall_only"}, + }, + }, + + { "_klnhpj", + {9, 8, 7, 6, 5, 1, 0}, + { {"1111111"_b, "eretab_64e_branch_reg"}, + }, + }, + + { "_klthpn", + {30, 23, 22, 11, 10}, + { {"01000"_b, "csel_64_condsel"}, + {"01001"_b, "csinc_64_condsel"}, + {"11000"_b, "csinv_64_condsel"}, + {"11001"_b, "csneg_64_condsel"}, + }, + }, + + { "_kmhtqp", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + }, + }, + + { "_kmkpnj", + {17}, + { {"0"_b, "ld3_asisdlso_h3_3h"}, + }, + }, + + { "_knkjnz", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ld1sh_z_p_bi_s32"}, + {"00011"_b, "ldnf1sh_z_p_bi_s32"}, + {"00101"_b, "ld1w_z_p_bi_u64"}, + {"00111"_b, "ldnf1w_z_p_bi_u64"}, + {"01001"_b, "ld1sb_z_p_bi_s32"}, + {"01011"_b, "ldnf1sb_z_p_bi_s32"}, + {"01101"_b, "ld1d_z_p_bi_u64"}, + {"01111"_b, "ldnf1d_z_p_bi_u64"}, + {"100x0"_b, "st1w_z_p_bz_d_x32_scaled"}, + {"100x1"_b, "st1w_z_p_bz_d_64_scaled"}, + {"101x0"_b, "st1w_z_p_bz_s_x32_scaled"}, + {"101x1"_b, "st1w_z_p_ai_s"}, + {"110x0"_b, "st1d_z_p_bz_d_x32_scaled"}, + {"110x1"_b, "st1d_z_p_bz_d_64_scaled"}, + }, + }, + + { "_knpsmq", + {18, 17}, + { {"0x"_b, "st2_asisdlsop_sx2_r2s"}, + {"10"_b, "st2_asisdlsop_sx2_r2s"}, + {"11"_b, "st2_asisdlsop_s2_i2s"}, + }, + }, + + { "_kpmvkn", + {30, 23, 22, 11, 10}, + { {"00000"_b, "stur_b_ldst_unscaled"}, + {"00001"_b, "str_b_ldst_immpost"}, + {"00011"_b, "str_b_ldst_immpre"}, + {"00100"_b, "ldur_b_ldst_unscaled"}, + {"00101"_b, "ldr_b_ldst_immpost"}, + {"00111"_b, "ldr_b_ldst_immpre"}, + {"01000"_b, "stur_q_ldst_unscaled"}, + {"01001"_b, "str_q_ldst_immpost"}, + {"01011"_b, "str_q_ldst_immpre"}, + {"01100"_b, "ldur_q_ldst_unscaled"}, + {"01101"_b, "ldr_q_ldst_immpost"}, + {"01111"_b, "ldr_q_ldst_immpre"}, + {"10000"_b, "stur_h_ldst_unscaled"}, + {"10001"_b, "str_h_ldst_immpost"}, + {"10011"_b, "str_h_ldst_immpre"}, + {"10100"_b, "ldur_h_ldst_unscaled"}, + {"10101"_b, "ldr_h_ldst_immpost"}, + {"10111"_b, "ldr_h_ldst_immpre"}, + }, + }, + + { "_kpqgsn", + {12}, + { {"0"_b, "st4_asisdlsop_dx4_r4d"}, + }, + }, + + { "_kpxtsp", + {6, 5}, + { {"00"_b, "cfinv_m_pstate"}, + {"01"_b, "xaflag_m_pstate"}, + {"10"_b, "axflag_m_pstate"}, + }, + }, + + { "_kpyqyv", + {12}, + { {"0"_b, "_vjxqhp"}, + }, + }, + + { "_kqjmvy", + {12}, + { {"0"_b, "ld4_asisdlsop_dx4_r4d"}, + }, + }, + + { "_kqkhtz", + {9, 8, 7, 6, 5}, + { {"11111"_b, "autiza_64z_dp_1src"}, + }, + }, + + { "_kqvljp", + {18, 17, 16}, + { {"000"_b, "fabd_z_p_zz"}, + {"001"_b, "fscale_z_p_zz"}, + {"010"_b, "fmulx_z_p_zz"}, + {"100"_b, "fdivr_z_p_zz"}, + {"101"_b, "fdiv_z_p_zz"}, + }, + }, + + { "_kqxhzx", + {20, 19, 18, 16, 12, 11, 10}, + { {"0000xxx"_b, "_zmzxjm"}, + {"0010xxx"_b, "_tmshps"}, + {"0011xxx"_b, "_tsksxr"}, + {"0110100"_b, "_pnzphx"}, + {"0111100"_b, "_xpkkpn"}, + {"1000xxx"_b, "_psqpkp"}, + {"1001xxx"_b, "_phxkzh"}, + {"1100xxx"_b, "_vsvrgt"}, + }, + }, + + { "_kqzmtr", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ld1b_z_p_bi_u16"}, + {"00011"_b, "ldnf1b_z_p_bi_u16"}, + {"00101"_b, "ld1b_z_p_bi_u64"}, + {"00111"_b, "ldnf1b_z_p_bi_u64"}, + {"01001"_b, "ld1h_z_p_bi_u16"}, + {"01011"_b, "ldnf1h_z_p_bi_u16"}, + {"01101"_b, "ld1h_z_p_bi_u64"}, + {"01111"_b, "ldnf1h_z_p_bi_u64"}, + {"101x1"_b, "st1b_z_p_ai_s"}, + {"110x0"_b, "st1h_z_p_bz_d_x32_scaled"}, + {"110x1"_b, "st1h_z_p_bz_d_64_scaled"}, + {"111x0"_b, "st1h_z_p_bz_s_x32_scaled"}, + {"111x1"_b, "st1h_z_p_ai_s"}, + }, + }, + + { "_krhrrr", + {12, 10}, + { {"00"_b, "_xyzpvp"}, + {"01"_b, "_nlyntn"}, + {"10"_b, "_zhkjzg"}, + {"11"_b, "_zmpzkg"}, + }, + }, + + { "_krlpjl", + {23, 22, 20, 19, 17, 16}, + { {"000010"_b, "scvtf_s64_float2fix"}, + {"000011"_b, "ucvtf_s64_float2fix"}, + {"001100"_b, "fcvtzs_64s_float2fix"}, + {"001101"_b, "fcvtzu_64s_float2fix"}, + {"010010"_b, "scvtf_d64_float2fix"}, + {"010011"_b, "ucvtf_d64_float2fix"}, + {"011100"_b, "fcvtzs_64d_float2fix"}, + {"011101"_b, "fcvtzu_64d_float2fix"}, + {"110010"_b, "scvtf_h64_float2fix"}, + {"110011"_b, "ucvtf_h64_float2fix"}, + {"111100"_b, "fcvtzs_64h_float2fix"}, + {"111101"_b, "fcvtzu_64h_float2fix"}, + }, + }, + + { "_kstltt", + {18, 17, 12}, + { {"0x0"_b, "ld3_asisdlsop_dx3_r3d"}, + {"100"_b, "ld3_asisdlsop_dx3_r3d"}, + {"110"_b, "ld3_asisdlsop_d3_i3d"}, + }, + }, + + { "_ksvxxm", + {9, 8, 7, 6, 5}, + { {"11111"_b, "pacizb_64z_dp_1src"}, + }, + }, + + { "_ktnjrx", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xxxx"_b, "fnmadd_s_floatdp3"}, + {"001xxxx"_b, "fnmadd_d_floatdp3"}, + {"011xxxx"_b, "fnmadd_h_floatdp3"}, + {"10001x0"_b, "fmls_asisdelem_rh_h"}, + {"10x0101"_b, "shl_asisdshf_r"}, + {"10x1101"_b, "sqshl_asisdshf_r"}, + {"11x01x0"_b, "fmls_asisdelem_r_sd"}, + {"1xx11x0"_b, "sqdmlsl_asisdelem_l"}, + }, + }, + + { "_ktrkrp", + {17}, + { {"0"_b, "st3_asisdlso_h3_3h"}, + }, + }, + + { "_ktyppm", + {11, 10}, + { {"00"_b, "asr_z_zw"}, + {"01"_b, "lsr_z_zw"}, + {"11"_b, "lsl_z_zw"}, + }, + }, + + { "_kvgjzh", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_d_floatimm"}, + }, + }, + + { "_kvmrng", + {23, 22}, + { {"00"_b, "tbl_asimdtbl_l1_1"}, + }, + }, + + { "_kvnqhn", + {22, 20, 11}, + { {"000"_b, "sqincw_r_rs_sx"}, + {"001"_b, "sqdecw_r_rs_sx"}, + {"010"_b, "sqincw_r_rs_x"}, + {"011"_b, "sqdecw_r_rs_x"}, + {"100"_b, "sqincd_r_rs_sx"}, + {"101"_b, "sqdecd_r_rs_sx"}, + {"110"_b, "sqincd_r_rs_x"}, + {"111"_b, "sqdecd_r_rs_x"}, + }, + }, + + { "_kvyysq", + {12, 9, 8, 7, 6, 5}, + { {"100000"_b, "_sjrqth"}, + }, + }, + + { "_kxhjtk", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_s_floatimm"}, + }, + }, + + { "_kxjgsz", + {23, 22, 20, 19, 11}, + { {"00000"_b, "movi_asimdimm_m_sm"}, + }, + }, + + { "_kxkyqr", + {17}, + { {"0"_b, "ld4_asisdlsop_hx4_r4h"}, + {"1"_b, "ld4_asisdlsop_h4_i4h"}, + }, + }, + + { "_kxprqm", + {13, 12, 11, 10}, + { {"0000"_b, "raddhn_asimddiff_n"}, + {"0001"_b, "ushl_asimdsame_only"}, + {"0010"_b, "_mmknzp"}, + {"0011"_b, "uqshl_asimdsame_only"}, + {"0100"_b, "uabal_asimddiff_l"}, + {"0101"_b, "urshl_asimdsame_only"}, + {"0110"_b, "_glgrjy"}, + {"0111"_b, "uqrshl_asimdsame_only"}, + {"1000"_b, "rsubhn_asimddiff_n"}, + {"1001"_b, "umax_asimdsame_only"}, + {"1010"_b, "_pxlnhs"}, + {"1011"_b, "umin_asimdsame_only"}, + {"1100"_b, "uabdl_asimddiff_l"}, + {"1101"_b, "uabd_asimdsame_only"}, + {"1110"_b, "_jkqktg"}, + {"1111"_b, "uaba_asimdsame_only"}, + }, + }, + + { "_kxsysq", + {30}, + { {"0"_b, "tbnz_only_testbranch"}, + }, + }, + + { "_kxvvkq", + {30, 23, 13}, + { {"000"_b, "ld1b_z_p_bz_s_x32_unscaled"}, + {"001"_b, "ldff1b_z_p_bz_s_x32_unscaled"}, + {"010"_b, "ld1h_z_p_bz_s_x32_unscaled"}, + {"011"_b, "ldff1h_z_p_bz_s_x32_unscaled"}, + {"100"_b, "ld1b_z_p_bz_d_x32_unscaled"}, + {"101"_b, "ldff1b_z_p_bz_d_x32_unscaled"}, + {"110"_b, "ld1h_z_p_bz_d_x32_unscaled"}, + {"111"_b, "ldff1h_z_p_bz_d_x32_unscaled"}, + }, + }, + + { "_kyjxrr", + {30, 13}, + { {"00"_b, "_qtxpky"}, + {"01"_b, "_hnjrmp"}, + {"11"_b, "_vzjvtv"}, + }, + }, + + { "_kykymg", + {30}, + { {"1"_b, "_rsyhtj"}, + }, + }, + + { "_kypqpy", + {30, 23, 22, 13, 12, 11, 10}, + { {"1010000"_b, "sm3partw1_vvv4_cryptosha512_3"}, + {"1010001"_b, "sm3partw2_vvv4_cryptosha512_3"}, + {"1010010"_b, "sm4ekey_vvv4_cryptosha512_3"}, + }, + }, + + { "_kyspnn", + {22}, + { {"0"_b, "sqdmullb_z_zzi_s"}, + {"1"_b, "sqdmullb_z_zzi_d"}, + }, + }, + + { "_kyxqgg", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000000"_b, "stgm_64bulk_ldsttags"}, + }, + }, + + { "_kyxrqg", {10}, - { {"0", "VisitSVEBroadcastIndexElement"}, + { {"0"_b, "uabalb_z_zzz"}, + {"1"_b, "uabalt_z_zzz"}, + }, + }, + + { "_kyygzs", + {19}, + { {"0"_b, "_nnkyzr"}, + {"1"_b, "sys_cr_systeminstrs"}, }, }, - { "UnallocSVETableLookup", + { "_kyyzks", + {13, 12}, + { {"00"_b, "sdiv_32_dp_2src"}, + {"10"_b, "rorv_32_dp_2src"}, + }, + }, + + { "_kzmvpk", + {23, 22, 10}, + { {"100"_b, "smlalb_z_zzzi_s"}, + {"101"_b, "smlalt_z_zzzi_s"}, + {"110"_b, "smlalb_z_zzzi_d"}, + {"111"_b, "smlalt_z_zzzi_d"}, + }, + }, + + { "_kzrklp", + {17}, + { {"0"_b, "ld4_asisdlso_b4_4b"}, + }, + }, + + { "_lgglzy", + {30, 23, 22, 19, 16}, + { {"10010"_b, "aesimc_b_cryptoaes"}, + {"x0x01"_b, "fcvtl_asimdmisc_l"}, + {"xxx00"_b, "sqabs_asimdmisc_r"}, + }, + }, + + { "_lhmlrj", + {30, 23, 22, 20, 19}, + { {"0xxxx"_b, "bl_only_branch_imm"}, + {"10001"_b, "sysl_rc_systeminstrs"}, + {"1001x"_b, "mrs_rs_systemmove"}, + }, + }, + + { "_lhpgsn", + {13, 12, 10}, + { {"000"_b, "sqdmulh_asisdelem_r"}, + {"010"_b, "sqrdmulh_asisdelem_r"}, + {"101"_b, "_mxkgnq"}, + {"111"_b, "_sgnknz"}, + }, + }, + + { "_lhtyjq", + {23, 22, 20, 19, 18, 16, 13}, + { {"0000000"_b, "_gskkxk"}, + {"0000001"_b, "_ktrkrp"}, + {"0100000"_b, "_nmtkjv"}, + {"0100001"_b, "_kmkpnj"}, + {"100xxx0"_b, "st1_asisdlsop_hx1_r1h"}, + {"100xxx1"_b, "st3_asisdlsop_hx3_r3h"}, + {"1010xx0"_b, "st1_asisdlsop_hx1_r1h"}, + {"1010xx1"_b, "st3_asisdlsop_hx3_r3h"}, + {"10110x0"_b, "st1_asisdlsop_hx1_r1h"}, + {"10110x1"_b, "st3_asisdlsop_hx3_r3h"}, + {"1011100"_b, "st1_asisdlsop_hx1_r1h"}, + {"1011101"_b, "st3_asisdlsop_hx3_r3h"}, + {"1011110"_b, "_mgmgqh"}, + {"1011111"_b, "_gzylzp"}, + {"110xxx0"_b, "ld1_asisdlsop_hx1_r1h"}, + {"110xxx1"_b, "ld3_asisdlsop_hx3_r3h"}, + {"1110xx0"_b, "ld1_asisdlsop_hx1_r1h"}, + {"1110xx1"_b, "ld3_asisdlsop_hx3_r3h"}, + {"11110x0"_b, "ld1_asisdlsop_hx1_r1h"}, + {"11110x1"_b, "ld3_asisdlsop_hx3_r3h"}, + {"1111100"_b, "ld1_asisdlsop_hx1_r1h"}, + {"1111101"_b, "ld3_asisdlsop_hx3_r3h"}, + {"1111110"_b, "_mrkkps"}, + {"1111111"_b, "_xygxsv"}, + }, + }, + + { "_lhvtrp", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "orr_asimdimm_l_hl"}, + {"00x100"_b, "sqshrn_asimdshf_n"}, + {"00x101"_b, "sqrshrn_asimdshf_n"}, + {"010x00"_b, "sqshrn_asimdshf_n"}, + {"010x01"_b, "sqrshrn_asimdshf_n"}, + {"011100"_b, "sqshrn_asimdshf_n"}, + {"011101"_b, "sqrshrn_asimdshf_n"}, + {"0x1000"_b, "sqshrn_asimdshf_n"}, + {"0x1001"_b, "sqrshrn_asimdshf_n"}, + }, + }, + + { "_ljhtkq", + {20, 19, 18, 17, 16, 13, 12, 11}, + { {"00000000"_b, "_yvyxkx"}, + }, + }, + + { "_ljljkv", + {30, 23, 22, 13, 12, 11, 10}, + { {"0001100"_b, "and_z_zz"}, + {"0001110"_b, "eor3_z_zzz"}, + {"0001111"_b, "bsl_z_zzz"}, + {"0011100"_b, "orr_z_zz"}, + {"0011110"_b, "bcax_z_zzz"}, + {"0011111"_b, "bsl1n_z_zzz"}, + {"0101100"_b, "eor_z_zz"}, + {"0101111"_b, "bsl2n_z_zzz"}, + {"0111100"_b, "bic_z_zz"}, + {"0111111"_b, "nbsl_z_zzz"}, + {"0xx0000"_b, "add_z_zz"}, + {"0xx0001"_b, "sub_z_zz"}, + {"0xx0100"_b, "sqadd_z_zz"}, + {"0xx0101"_b, "uqadd_z_zz"}, + {"0xx0110"_b, "sqsub_z_zz"}, + {"0xx0111"_b, "uqsub_z_zz"}, + {"0xx1101"_b, "xar_z_zzi"}, + {"10x0010"_b, "mla_z_zzzi_h"}, + {"10x0011"_b, "mls_z_zzzi_h"}, + {"10x0100"_b, "sqrdmlah_z_zzzi_h"}, + {"10x0101"_b, "sqrdmlsh_z_zzzi_h"}, + {"1100000"_b, "sdot_z_zzzi_s"}, + {"1100001"_b, "udot_z_zzzi_s"}, + {"1100010"_b, "mla_z_zzzi_s"}, + {"1100011"_b, "mls_z_zzzi_s"}, + {"1100100"_b, "sqrdmlah_z_zzzi_s"}, + {"1100101"_b, "sqrdmlsh_z_zzzi_s"}, + {"1100110"_b, "usdot_z_zzzi_s"}, + {"1100111"_b, "sudot_z_zzzi_s"}, + {"11010x0"_b, "sqdmlalb_z_zzzi_s"}, + {"11010x1"_b, "sqdmlalt_z_zzzi_s"}, + {"11011x0"_b, "sqdmlslb_z_zzzi_s"}, + {"11011x1"_b, "sqdmlslt_z_zzzi_s"}, + {"1110000"_b, "sdot_z_zzzi_d"}, + {"1110001"_b, "udot_z_zzzi_d"}, + {"1110010"_b, "mla_z_zzzi_d"}, + {"1110011"_b, "mls_z_zzzi_d"}, + {"1110100"_b, "sqrdmlah_z_zzzi_d"}, + {"1110101"_b, "sqrdmlsh_z_zzzi_d"}, + {"11110x0"_b, "sqdmlalb_z_zzzi_d"}, + {"11110x1"_b, "sqdmlalt_z_zzzi_d"}, + {"11111x0"_b, "sqdmlslb_z_zzzi_d"}, + {"11111x1"_b, "sqdmlslt_z_zzzi_d"}, + }, + }, + + { "_ljxhnq", + {12}, + { {"0"_b, "ld1_asisdlsop_dx1_r1d"}, + }, + }, + + { "_lkttgy", {10}, - { {"0", "VisitSVETableLookup"}, + { {"0"_b, "saba_z_zzz"}, + {"1"_b, "uaba_z_zzz"}, }, }, - { "UnallocSVEBroadcastGeneralRegister", - {17, 16, 10}, - { {"000", "VisitSVEBroadcastGeneralRegister"}, + { "_lkvynm", + {22, 20, 19, 13, 12}, + { {"0x100"_b, "ushr_asisdshf_r"}, + {"0x101"_b, "usra_asisdshf_r"}, + {"0x110"_b, "urshr_asisdshf_r"}, + {"0x111"_b, "ursra_asisdshf_r"}, + {"10x00"_b, "ushr_asisdshf_r"}, + {"10x01"_b, "usra_asisdshf_r"}, + {"10x10"_b, "urshr_asisdshf_r"}, + {"10x11"_b, "ursra_asisdshf_r"}, + {"11100"_b, "ushr_asisdshf_r"}, + {"11101"_b, "usra_asisdshf_r"}, + {"11110"_b, "urshr_asisdshf_r"}, + {"11111"_b, "ursra_asisdshf_r"}, + {"x1000"_b, "ushr_asisdshf_r"}, + {"x1001"_b, "usra_asisdshf_r"}, + {"x1010"_b, "urshr_asisdshf_r"}, + {"x1011"_b, "ursra_asisdshf_r"}, }, }, - { "UnallocSVEInsertGeneralRegister", - {17, 16, 10}, - { {"000", "VisitSVEInsertGeneralRegister"}, + { "_lkxgjy", + {23, 22}, + { {"10"_b, "cmla_z_zzzi_h"}, + {"11"_b, "cmla_z_zzzi_s"}, }, }, - { "UnallocSVEUnpackVectorElements", + { "_llnzlv", + {20, 19, 18, 17, 16}, + { {"00000"_b, "sqneg_asisdmisc_r"}, + }, + }, + + { "_llpsqq", + {13, 12, 10}, + { {"001"_b, "_zjjxjl"}, + {"100"_b, "ptrues_p_s"}, + {"110"_b, "_njngkk"}, + }, + }, + + { "_llqjlh", {10}, - { {"0", "VisitSVEUnpackVectorElements"}, + { {"0"_b, "_lhtyjq"}, }, }, - { "UnallocSVEInsertSIMDFPScalarRegister", - {17, 16, 10}, - { {"000", "VisitSVEInsertSIMDFPScalarRegister"}, + { "_llvrrk", + {23, 18, 17, 16}, + { {"0000"_b, "sqxtnb_z_zz"}, }, }, - { "UnallocSVEReverseVectorElements", - {17, 16, 10}, - { {"000", "VisitSVEReverseVectorElements"}, + { "_llxlqz", + {20, 19, 18, 17, 16}, + { {"00000"_b, "cmge_asisdmisc_z"}, }, }, - { "DecodeSVE00011001", - {20, 19, 18, 12, 11}, - { {"xxx00", "UnallocSVEBroadcastIndexElement"}, - {"xxx10", "UnallocSVETableLookup"}, - {"00011", "UnallocSVEBroadcastGeneralRegister"}, - {"00111", "UnallocSVEInsertGeneralRegister"}, - {"10011", "UnallocSVEUnpackVectorElements"}, - {"10111", "UnallocSVEInsertSIMDFPScalarRegister"}, - {"11011", "UnallocSVEReverseVectorElements"}, + { "_lmtnzv", + {12}, + { {"0"_b, "st2_asisdlsop_dx2_r2d"}, }, }, - { "UnallocSVEPermutePredicateElements", + { "_lmyxhr", {9, 4}, - { {"00", "VisitSVEPermutePredicateElements"}, + { {"00"_b, "_gnqhsl"}, + }, + }, + + { "_lnjpjs", + {18, 17}, + { {"0x"_b, "ld3_asisdlsop_sx3_r3s"}, + {"10"_b, "ld3_asisdlsop_sx3_r3s"}, + {"11"_b, "ld3_asisdlsop_s3_i3s"}, + }, + }, + + { "_lnkqjp", + {18, 17, 12}, + { {"000"_b, "ld3_asisdlso_d3_3d"}, + }, + }, + + { "_lnnyzt", + {23, 22}, + { {"01"_b, "fmax_asimdsamefp16_only"}, + {"11"_b, "fmin_asimdsamefp16_only"}, + }, + }, + + { "_lnpvky", + {23, 22, 19, 13, 12}, + { {"00100"_b, "sha1h_ss_cryptosha2"}, + {"00101"_b, "sha1su1_vv_cryptosha2"}, + {"00110"_b, "sha256su0_vv_cryptosha2"}, + {"xx011"_b, "suqadd_asisdmisc_r"}, + }, + }, + + { "_lpkqzl", + {30, 23, 22, 12, 11, 10}, + { {"0000xx"_b, "adds_64s_addsub_ext"}, + {"000100"_b, "adds_64s_addsub_ext"}, + {"1000xx"_b, "subs_64s_addsub_ext"}, + {"100100"_b, "subs_64s_addsub_ext"}, + }, + }, + + { "_lpslrz", + {4, 3, 2, 1, 0}, + { {"00000"_b, "fcmp_s_floatcmp"}, + {"01000"_b, "fcmp_sz_floatcmp"}, + {"10000"_b, "fcmpe_s_floatcmp"}, + {"11000"_b, "fcmpe_sz_floatcmp"}, + }, + }, + + { "_lpsvyy", + {30, 13}, + { {"00"_b, "_jlrrlt"}, + {"01"_b, "_jrlynj"}, + {"10"_b, "fmla_z_p_zzz"}, + {"11"_b, "fmls_z_p_zzz"}, + }, + }, + + { "_lpsxhz", + {22, 20, 19, 18, 17, 16, 13, 12}, + { {"01111101"_b, "ld64b_64l_memop"}, + }, + }, + + { "_lqmksm", + {30, 23, 22, 20, 13, 4}, + { {"00001x"_b, "ld1row_z_p_bi_u32"}, + {"000x0x"_b, "ld1row_z_p_br_contiguous"}, + {"01001x"_b, "ld1rod_z_p_bi_u64"}, + {"010x0x"_b, "ld1rod_z_p_br_contiguous"}, + {"110x00"_b, "str_p_bi"}, + }, + }, + + { "_lqnvvj", + {22, 13, 12}, + { {"000"_b, "swp_32_memop"}, + {"100"_b, "swpl_32_memop"}, + }, + }, + + { "_lrjyhr", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "bic_asimdimm_l_hl"}, + {"00x100"_b, "uqshrn_asimdshf_n"}, + {"00x101"_b, "uqrshrn_asimdshf_n"}, + {"010x00"_b, "uqshrn_asimdshf_n"}, + {"010x01"_b, "uqrshrn_asimdshf_n"}, + {"011100"_b, "uqshrn_asimdshf_n"}, + {"011101"_b, "uqrshrn_asimdshf_n"}, + {"0x1000"_b, "uqshrn_asimdshf_n"}, + {"0x1001"_b, "uqrshrn_asimdshf_n"}, + }, + }, + + { "_lrntmz", + {13, 12, 11, 10}, + { {"0000"_b, "saddlb_z_zz"}, + {"0001"_b, "saddlt_z_zz"}, + {"0010"_b, "uaddlb_z_zz"}, + {"0011"_b, "uaddlt_z_zz"}, + {"0100"_b, "ssublb_z_zz"}, + {"0101"_b, "ssublt_z_zz"}, + {"0110"_b, "usublb_z_zz"}, + {"0111"_b, "usublt_z_zz"}, + {"1100"_b, "sabdlb_z_zz"}, + {"1101"_b, "sabdlt_z_zz"}, + {"1110"_b, "uabdlb_z_zz"}, + {"1111"_b, "uabdlt_z_zz"}, + }, + }, + + { "_lrqkvp", + {30, 23, 22, 13, 12, 11, 10}, + { {"0000000"_b, "ldadd_32_memop"}, + {"0000100"_b, "ldclr_32_memop"}, + {"0001000"_b, "ldeor_32_memop"}, + {"0001100"_b, "ldset_32_memop"}, + {"000xx10"_b, "str_32_ldst_regoff"}, + {"0010000"_b, "ldaddl_32_memop"}, + {"0010100"_b, "ldclrl_32_memop"}, + {"0011000"_b, "ldeorl_32_memop"}, + {"0011100"_b, "ldsetl_32_memop"}, + {"001xx10"_b, "ldr_32_ldst_regoff"}, + {"0100000"_b, "ldadda_32_memop"}, + {"0100100"_b, "ldclra_32_memop"}, + {"0101000"_b, "ldeora_32_memop"}, + {"0101100"_b, "ldseta_32_memop"}, + {"010xx10"_b, "ldrsw_64_ldst_regoff"}, + {"0110000"_b, "ldaddal_32_memop"}, + {"0110100"_b, "ldclral_32_memop"}, + {"0111000"_b, "ldeoral_32_memop"}, + {"0111100"_b, "ldsetal_32_memop"}, + {"1000000"_b, "ldadd_64_memop"}, + {"1000100"_b, "ldclr_64_memop"}, + {"1001000"_b, "ldeor_64_memop"}, + {"1001100"_b, "ldset_64_memop"}, + {"100xx10"_b, "str_64_ldst_regoff"}, + {"1010000"_b, "ldaddl_64_memop"}, + {"1010100"_b, "ldclrl_64_memop"}, + {"1011000"_b, "ldeorl_64_memop"}, + {"1011100"_b, "ldsetl_64_memop"}, + {"101xx10"_b, "ldr_64_ldst_regoff"}, + {"10xxx01"_b, "ldraa_64_ldst_pac"}, + {"10xxx11"_b, "ldraa_64w_ldst_pac"}, + {"1100000"_b, "ldadda_64_memop"}, + {"1100100"_b, "ldclra_64_memop"}, + {"1101000"_b, "ldeora_64_memop"}, + {"1101100"_b, "ldseta_64_memop"}, + {"110xx10"_b, "prfm_p_ldst_regoff"}, + {"1110000"_b, "ldaddal_64_memop"}, + {"1110100"_b, "ldclral_64_memop"}, + {"1111000"_b, "ldeoral_64_memop"}, + {"1111100"_b, "ldsetal_64_memop"}, + {"11xxx01"_b, "ldrab_64_ldst_pac"}, + {"11xxx11"_b, "ldrab_64w_ldst_pac"}, + }, + }, + + { "_lspzrv", + {30, 23, 13}, + { {"000"_b, "ld1sb_z_p_bz_s_x32_unscaled"}, + {"001"_b, "ldff1sb_z_p_bz_s_x32_unscaled"}, + {"010"_b, "ld1sh_z_p_bz_s_x32_unscaled"}, + {"011"_b, "ldff1sh_z_p_bz_s_x32_unscaled"}, + {"100"_b, "ld1sb_z_p_bz_d_x32_unscaled"}, + {"101"_b, "ldff1sb_z_p_bz_d_x32_unscaled"}, + {"110"_b, "ld1sh_z_p_bz_d_x32_unscaled"}, + {"111"_b, "ldff1sh_z_p_bz_d_x32_unscaled"}, + }, + }, + + { "_ltvrrg", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_htmthz"}, + }, + }, + + { "_lvshqt", + {23, 22}, + { {"00"_b, "_qtkpxg"}, + }, + }, + + { "_lxgltj", + {30, 23, 22}, + { {"000"_b, "stlxr_sr32_ldstexcl"}, + {"001"_b, "ldaxr_lr32_ldstexcl"}, + {"010"_b, "stlr_sl32_ldstexcl"}, + {"011"_b, "ldar_lr32_ldstexcl"}, + {"100"_b, "stlxr_sr64_ldstexcl"}, + {"101"_b, "ldaxr_lr64_ldstexcl"}, + {"110"_b, "stlr_sl64_ldstexcl"}, + {"111"_b, "ldar_lr64_ldstexcl"}, + }, + }, + + { "_lxhlkx", + {12, 11, 10}, + { {"000"_b, "ftmad_z_zzi"}, + }, + }, + + { "_lxmyjh", + {30, 23, 11, 10}, + { {"0000"_b, "_lqnvvj"}, + {"0010"_b, "_tmthqm"}, + {"0100"_b, "_rxjrmn"}, + {"0110"_b, "_ypqgyp"}, + {"1000"_b, "_zpsymj"}, + {"1001"_b, "ldraa_64_ldst_pac"}, + {"1010"_b, "_rsyzrs"}, + {"1011"_b, "ldraa_64w_ldst_pac"}, + {"1100"_b, "_nrrmtx"}, + {"1101"_b, "ldrab_64_ldst_pac"}, + {"1110"_b, "_tgqsyg"}, + {"1111"_b, "ldrab_64w_ldst_pac"}, + }, + }, + + { "_lxqynh", + {23, 22, 19, 18, 17, 16}, + { {"0000x1"_b, "dup_asimdins_dr_r"}, + {"000x10"_b, "dup_asimdins_dr_r"}, + {"0010xx"_b, "dup_asimdins_dr_r"}, + {"001110"_b, "dup_asimdins_dr_r"}, + {"00x10x"_b, "dup_asimdins_dr_r"}, + {"00x111"_b, "dup_asimdins_dr_r"}, + {"01xxxx"_b, "fmla_asimdsamefp16_only"}, + {"11xxxx"_b, "fmls_asimdsamefp16_only"}, + }, + }, + + { "_lxvnxm", + {23, 22, 12}, + { {"100"_b, "fmlsl2_asimdelem_lh"}, + {"xx1"_b, "sqrdmlah_asimdelem_r"}, + }, + }, + + { "_lyghyg", + {20, 18, 17}, + { {"000"_b, "_hxmjhn"}, + }, + }, + + { "_lylpyx", + {10}, + { {"0"_b, "sabalb_z_zzz"}, + {"1"_b, "sabalt_z_zzz"}, + }, + }, + + { "_lynsgm", + {13}, + { {"0"_b, "_ttplgp"}, + }, + }, + + { "_lytkrx", + {12, 11, 10}, + { {"000"_b, "dup_z_zi"}, + {"010"_b, "tbl_z_zz_2"}, + {"011"_b, "tbx_z_zz"}, + {"100"_b, "tbl_z_zz_1"}, + {"110"_b, "_ylnsvy"}, + }, + }, + + { "_lyzxhr", + {23, 22, 20, 19, 18, 17, 16, 13, 12, 11}, + { {"0011111001"_b, "_smplhv"}, + }, + }, + + { "_lzpykk", + {30, 23, 22}, + { {"000"_b, "bfm_32m_bitfield"}, + }, + }, + + { "_mgmgqh", + {17}, + { {"0"_b, "st1_asisdlsop_hx1_r1h"}, + {"1"_b, "st1_asisdlsop_h1_i1h"}, + }, + }, + + { "_mgmkyq", + {23}, + { {"0"_b, "fmaxp_asimdsame_only"}, + {"1"_b, "fminp_asimdsame_only"}, + }, + }, + + { "_mgqvvn", + {9, 8, 7, 6, 5}, + { {"11111"_b, "pacdza_64z_dp_1src"}, + }, + }, + + { "_mgsvlj", + {13, 12}, + { {"00"_b, "udiv_32_dp_2src"}, + {"10"_b, "asrv_32_dp_2src"}, + }, + }, + + { "_mhrjvp", + {30, 13}, + { {"00"_b, "_vxhgzz"}, + {"01"_b, "_lytkrx"}, + {"10"_b, "_rlyvpn"}, + {"11"_b, "_yvptvx"}, + }, + }, + + { "_mjqvxq", + {23, 22, 13, 12, 11, 10}, + { {"0001x0"_b, "fmul_asimdelem_rh_h"}, + {"0x0001"_b, "shrn_asimdshf_n"}, + {"0x0011"_b, "rshrn_asimdshf_n"}, + {"0x0101"_b, "sqshrn_asimdshf_n"}, + {"0x0111"_b, "sqrshrn_asimdshf_n"}, + {"0x1001"_b, "sshll_asimdshf_l"}, + {"1x01x0"_b, "fmul_asimdelem_r_sd"}, + {"xx00x0"_b, "mul_asimdelem_r"}, + {"xx10x0"_b, "smull_asimdelem_l"}, + {"xx11x0"_b, "sqdmull_asimdelem_l"}, + }, + }, + + { "_mjxzks", + {4}, + { {"0"_b, "ccmp_64_condcmp_reg"}, + }, + }, + + { "_mkgsly", + {19, 18, 17, 16, 4}, + { {"00000"_b, "brkas_p_p_p_z"}, + {"10000"_b, "brkns_p_p_pp"}, + }, + }, + + { "_mkklrm", + {18, 17}, + { {"00"_b, "ld3_asisdlso_s3_3s"}, + }, + }, + + { "_mkskxj", + {30, 23, 22, 13}, + { {"0000"_b, "ld1sh_z_p_br_s32"}, + {"0001"_b, "ldff1sh_z_p_br_s32"}, + {"0010"_b, "ld1w_z_p_br_u64"}, + {"0011"_b, "ldff1w_z_p_br_u64"}, + {"0100"_b, "ld1sb_z_p_br_s32"}, + {"0101"_b, "ldff1sb_z_p_br_s32"}, + {"0110"_b, "ld1d_z_p_br_u64"}, + {"0111"_b, "ldff1d_z_p_br_u64"}, + {"1001"_b, "st2w_z_p_br_contiguous"}, + {"1011"_b, "st4w_z_p_br_contiguous"}, + {"10x0"_b, "st1w_z_p_br"}, + {"1100"_b, "str_z_bi"}, + {"1101"_b, "st2d_z_p_br_contiguous"}, + {"1110"_b, "st1d_z_p_br"}, + {"1111"_b, "st4d_z_p_br_contiguous"}, + }, + }, + + { "_mlnqrm", + {30}, + { {"0"_b, "_nhzrqr"}, + {"1"_b, "_zpmkvt"}, + }, + }, + + { "_mlvpxh", + {12}, + { {"0"_b, "ld2_asisdlsop_dx2_r2d"}, + }, + }, + + { "_mlxtxs", + {10}, + { {"0"_b, "ssra_z_zi"}, + {"1"_b, "usra_z_zi"}, + }, + }, + + { "_mlyynz", + {12}, + { {"0"_b, "st3_asisdlsop_dx3_r3d"}, + }, + }, + + { "_mmhkmp", + {18, 17}, + { {"0x"_b, "ld1_asisdlsop_sx1_r1s"}, + {"10"_b, "ld1_asisdlsop_sx1_r1s"}, + {"11"_b, "ld1_asisdlsop_s1_i1s"}, + }, + }, + + { "_mmknzp", + {20, 19, 18, 17, 16}, + { {"00000"_b, "clz_asimdmisc_r"}, + {"00001"_b, "uqxtn_asimdmisc_n"}, + }, + }, + + { "_mmmjkx", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000000"_b, "rev_32_dp_1src"}, + }, + }, + + { "_mmrtvz", + {12}, + { {"0"_b, "st4_asisdlsop_dx4_r4d"}, + }, + }, + + { "_mnmtql", + {10}, + { {"0"_b, "srsra_z_zi"}, + {"1"_b, "ursra_z_zi"}, }, }, - { "UnallocSVEUnpackPredicateElements", - {23, 22, 19, 17, 12, 11, 10, 9, 4}, - { {"000000000", "VisitSVEUnpackPredicateElements"}, + { "_mnxmst", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "fcvtns_asimdmiscfp16_r"}, + {"0x00001"_b, "fcvtns_asimdmisc_r"}, + {"1111001"_b, "fcvtps_asimdmiscfp16_r"}, + {"1x00001"_b, "fcvtps_asimdmisc_r"}, + {"xx00000"_b, "cmlt_asimdmisc_z"}, + {"xx10000"_b, "smaxv_asimdall_only"}, + {"xx10001"_b, "sminv_asimdall_only"}, }, }, - { "UnallocSVEReversePredicateElements", - {19, 17, 16, 12, 11, 10, 9, 4}, - { {"00000000", "VisitSVEReversePredicateElements"}, + { "_mpgrgp", + {30, 22, 13, 12, 11, 10}, + { {"000001"_b, "rmif_only_rmif"}, + {"01xx00"_b, "ccmn_64_condcmp_reg"}, + {"01xx10"_b, "ccmn_64_condcmp_imm"}, + {"11xx00"_b, "ccmp_64_condcmp_reg"}, + {"11xx10"_b, "ccmp_64_condcmp_imm"}, + }, + }, + + { "_mplgqv", + {11, 10}, + { {"00"_b, "sm3tt1a_vvv4_crypto3_imm2"}, + {"01"_b, "sm3tt1b_vvv4_crypto3_imm2"}, + {"10"_b, "sm3tt2a_vvv4_crypto3_imm2"}, + {"11"_b, "sm3tt2b_vvv_crypto3_imm2"}, + }, + }, + + { "_mplskr", + {13, 12}, + { {"00"_b, "add_asisdsame_only"}, + {"11"_b, "sqdmulh_asisdsame_only"}, + }, + }, + + { "_mpstrr", + {23, 22, 8, 7, 6, 5, 4, 3, 2, 1, 0}, + { {"00000000000"_b, "setffr_f"}, + }, + }, + + { "_mpvsng", + {30}, + { {"0"_b, "_vvtnrv"}, + {"1"_b, "_yykhjv"}, + }, + }, + + { "_mpyhkm", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xxxx"_b, "fnmsub_s_floatdp3"}, + {"001xxxx"_b, "fnmsub_d_floatdp3"}, + {"011xxxx"_b, "fnmsub_h_floatdp3"}, + {"10x1001"_b, "scvtf_asisdshf_c"}, + {"10x1111"_b, "fcvtzs_asisdshf_c"}, + {"1xx00x0"_b, "sqdmulh_asisdelem_r"}, + {"1xx01x0"_b, "sqrdmulh_asisdelem_r"}, + }, + }, + + { "_mpyklp", + {23, 22, 20, 19, 16, 13, 10}, + { {"0000000"_b, "_jqjnrv"}, + {"0000001"_b, "_yqmqzp"}, + {"0000010"_b, "_hgxqpp"}, + {"0000011"_b, "_rvzhhx"}, + {"0100000"_b, "_nnllqy"}, + {"0100001"_b, "_vhmsgj"}, + {"0100010"_b, "_mkklrm"}, + {"0100011"_b, "_lnkqjp"}, + {"100xx00"_b, "st1_asisdlsop_sx1_r1s"}, + {"100xx01"_b, "_yxmkzr"}, + {"100xx10"_b, "st3_asisdlsop_sx3_r3s"}, + {"100xx11"_b, "_mlyynz"}, + {"1010x00"_b, "st1_asisdlsop_sx1_r1s"}, + {"1010x01"_b, "_jnjlsh"}, + {"1010x10"_b, "st3_asisdlsop_sx3_r3s"}, + {"1010x11"_b, "_svrnxq"}, + {"1011000"_b, "st1_asisdlsop_sx1_r1s"}, + {"1011001"_b, "_hjqtrt"}, + {"1011010"_b, "st3_asisdlsop_sx3_r3s"}, + {"1011011"_b, "_vqlytp"}, + {"1011100"_b, "_qqpqnm"}, + {"1011101"_b, "_thvvzp"}, + {"1011110"_b, "_srglgl"}, + {"1011111"_b, "_qzrjss"}, + {"110xx00"_b, "ld1_asisdlsop_sx1_r1s"}, + {"110xx01"_b, "_ljxhnq"}, + {"110xx10"_b, "ld3_asisdlsop_sx3_r3s"}, + {"110xx11"_b, "_nkrqgn"}, + {"1110x00"_b, "ld1_asisdlsop_sx1_r1s"}, + {"1110x01"_b, "_vmplgv"}, + {"1110x10"_b, "ld3_asisdlsop_sx3_r3s"}, + {"1110x11"_b, "_gsttpm"}, + {"1111000"_b, "ld1_asisdlsop_sx1_r1s"}, + {"1111001"_b, "_xmqvpl"}, + {"1111010"_b, "ld3_asisdlsop_sx3_r3s"}, + {"1111011"_b, "_stqmps"}, + {"1111100"_b, "_mmhkmp"}, + {"1111101"_b, "_srvnql"}, + {"1111110"_b, "_lnjpjs"}, + {"1111111"_b, "_kstltt"}, + }, + }, + + { "_mpzqxm", + {23, 22, 20, 19, 18, 16, 13}, + { {"0000000"_b, "_vpkhvh"}, + {"0000001"_b, "_gttglx"}, + {"0100000"_b, "_gsgzpg"}, + {"0100001"_b, "_ynqsgl"}, + {"100xxx0"_b, "st2_asisdlsop_hx2_r2h"}, + {"100xxx1"_b, "st4_asisdlsop_hx4_r4h"}, + {"1010xx0"_b, "st2_asisdlsop_hx2_r2h"}, + {"1010xx1"_b, "st4_asisdlsop_hx4_r4h"}, + {"10110x0"_b, "st2_asisdlsop_hx2_r2h"}, + {"10110x1"_b, "st4_asisdlsop_hx4_r4h"}, + {"1011100"_b, "st2_asisdlsop_hx2_r2h"}, + {"1011101"_b, "st4_asisdlsop_hx4_r4h"}, + {"1011110"_b, "_sjsltg"}, + {"1011111"_b, "_xrpmzt"}, + {"110xxx0"_b, "ld2_asisdlsop_hx2_r2h"}, + {"110xxx1"_b, "ld4_asisdlsop_hx4_r4h"}, + {"1110xx0"_b, "ld2_asisdlsop_hx2_r2h"}, + {"1110xx1"_b, "ld4_asisdlsop_hx4_r4h"}, + {"11110x0"_b, "ld2_asisdlsop_hx2_r2h"}, + {"11110x1"_b, "ld4_asisdlsop_hx4_r4h"}, + {"1111100"_b, "ld2_asisdlsop_hx2_r2h"}, + {"1111101"_b, "ld4_asisdlsop_hx4_r4h"}, + {"1111110"_b, "_gygnsz"}, + {"1111111"_b, "_kxkyqr"}, + }, + }, + + { "_mqgtsq", + {30, 23, 22, 19}, + { {"1001"_b, "aesd_b_cryptoaes"}, + {"xxx0"_b, "cnt_asimdmisc_r"}, + }, + }, + + { "_mqkjxj", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_lyzxhr"}, + }, + }, + + { "_mqrzzk", + {22, 20, 11}, + { {"000"_b, "sqincw_z_zs"}, + {"001"_b, "sqdecw_z_zs"}, + {"010"_b, "incw_z_zs"}, + {"100"_b, "sqincd_z_zs"}, + {"101"_b, "sqdecd_z_zs"}, + {"110"_b, "incd_z_zs"}, + }, + }, + + { "_mrhtxt", + {23, 22, 20, 9}, + { {"0000"_b, "brkpb_p_p_pp"}, + {"0100"_b, "brkpbs_p_p_pp"}, + }, + }, + + { "_mrkkps", + {17}, + { {"0"_b, "ld1_asisdlsop_hx1_r1h"}, + {"1"_b, "ld1_asisdlsop_h1_i1h"}, + }, + }, + + { "_mrmpgh", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xxxx"_b, "stlxp_sp32_ldstexcl"}, + {"001xxxx"_b, "ldaxp_lp32_ldstexcl"}, + {"0101111"_b, "casl_c32_ldstexcl"}, + {"0111111"_b, "casal_c32_ldstexcl"}, + {"100xxxx"_b, "stlxp_sp64_ldstexcl"}, + {"101xxxx"_b, "ldaxp_lp64_ldstexcl"}, + {"1101111"_b, "casl_c64_ldstexcl"}, + {"1111111"_b, "casal_c64_ldstexcl"}, }, }, - { "DecodeSVE00011010", - {20, 18}, - { {"0x", "UnallocSVEPermutePredicateElements"}, - {"10", "UnallocSVEUnpackPredicateElements"}, - {"11", "UnallocSVEReversePredicateElements"}, + { "_mrqqlp", + {30, 11, 10}, + { {"000"_b, "_gqykqv"}, + {"001"_b, "_xgvgmk"}, + {"010"_b, "_tjpjng"}, + {"011"_b, "_pjkylt"}, + {"101"_b, "_yrgnqz"}, + {"110"_b, "_hhymvj"}, + {"111"_b, "_xpmvjv"}, }, }, - { "DecodeSVE00011100", + { "_msgqps", + {18, 17}, + { {"0x"_b, "ld2_asisdlsop_sx2_r2s"}, + {"10"_b, "ld2_asisdlsop_sx2_r2s"}, + {"11"_b, "ld2_asisdlsop_s2_i2s"}, + }, + }, + + { "_msnsjp", {23, 20, 19, 18, 17, 16}, - { {"x00000", "VisitSVECopySIMDFPScalarRegisterToVector_Predicated"}, - {"x0001x", "VisitSVEExtractElementToSIMDFPScalarRegister"}, - {"x001xx", "VisitSVEReverseWithinElements"}, - {"x0100x", "VisitSVEConditionallyBroadcastElementToVector"}, - {"x0101x", "VisitSVEConditionallyExtractElementToSIMDFPScalar"}, - {"x01100", "VisitSVEVectorSplice_Destructive"}, - {"100001", "VisitSVECompressActiveElements"}, + { {"000001"_b, "fcvtxn_asisdmisc_n"}, + }, + }, + + { "_msqkyy", + {16, 13, 12}, + { {"000"_b, "rbit_64_dp_1src"}, + {"001"_b, "clz_64_dp_1src"}, + {"100"_b, "pacia_64p_dp_1src"}, + {"101"_b, "autia_64p_dp_1src"}, + {"110"_b, "_sqgxzn"}, + {"111"_b, "_kqkhtz"}, + }, + }, + + { "_mstthg", + {13, 12, 11, 10}, + { {"0000"_b, "umull_asimddiff_l"}, + {"0001"_b, "_qptvrm"}, + {"0010"_b, "_qqzrhz"}, + {"0011"_b, "_yxhrpk"}, + {"0101"_b, "_vsqpzr"}, + {"0110"_b, "_kjrxpx"}, + {"0111"_b, "_qnvgmh"}, + {"1001"_b, "_jvhnxl"}, + {"1010"_b, "_zyzzhm"}, + {"1011"_b, "_slhpgp"}, + {"1101"_b, "_mgmkyq"}, + {"1110"_b, "_qvlytr"}, + {"1111"_b, "_qtmjkr"}, + }, + }, + + { "_msztzv", + {23, 11, 10, 4, 3, 2, 1}, + { {"0000000"_b, "_vvprhx"}, + {"0101111"_b, "_nqysxy"}, + {"0111111"_b, "_kkmjyr"}, + {"1000000"_b, "_ypjyqh"}, + }, + }, + + { "_mtgksl", + {23, 22, 20, 19, 18, 16, 13}, + { {"0000000"_b, "_vnrnmg"}, + {"0000001"_b, "_hzllgl"}, + {"0100000"_b, "_hrhzqy"}, + {"0100001"_b, "_qtjzhs"}, + {"100xxx0"_b, "st4_asisdlsep_r4_r"}, + {"100xxx1"_b, "st1_asisdlsep_r4_r4"}, + {"1010xx0"_b, "st4_asisdlsep_r4_r"}, + {"1010xx1"_b, "st1_asisdlsep_r4_r4"}, + {"10110x0"_b, "st4_asisdlsep_r4_r"}, + {"10110x1"_b, "st1_asisdlsep_r4_r4"}, + {"1011100"_b, "st4_asisdlsep_r4_r"}, + {"1011101"_b, "st1_asisdlsep_r4_r4"}, + {"1011110"_b, "_nzkhrj"}, + {"1011111"_b, "_gmjhll"}, + {"110xxx0"_b, "ld4_asisdlsep_r4_r"}, + {"110xxx1"_b, "ld1_asisdlsep_r4_r4"}, + {"1110xx0"_b, "ld4_asisdlsep_r4_r"}, + {"1110xx1"_b, "ld1_asisdlsep_r4_r4"}, + {"11110x0"_b, "ld4_asisdlsep_r4_r"}, + {"11110x1"_b, "ld1_asisdlsep_r4_r4"}, + {"1111100"_b, "ld4_asisdlsep_r4_r"}, + {"1111101"_b, "ld1_asisdlsep_r4_r4"}, + {"1111110"_b, "_hxglyp"}, + {"1111111"_b, "_jmyslr"}, + }, + }, + + { "_mthzvm", + {30, 23, 13, 12, 11, 10}, + { {"100001"_b, "ushr_asisdshf_r"}, + {"100101"_b, "usra_asisdshf_r"}, + {"101001"_b, "urshr_asisdshf_r"}, + {"101101"_b, "ursra_asisdshf_r"}, + }, + }, + + { "_mtjrtt", + {13, 12}, + { {"00"_b, "subps_64s_dp_2src"}, }, }, - { "DecodeSVE00011101", + { "_mtlhnl", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "movi_asimdimm_l_sl"}, + {"00x100"_b, "sshr_asimdshf_r"}, + {"00x110"_b, "srshr_asimdshf_r"}, + {"010x00"_b, "sshr_asimdshf_r"}, + {"010x10"_b, "srshr_asimdshf_r"}, + {"011100"_b, "sshr_asimdshf_r"}, + {"011110"_b, "srshr_asimdshf_r"}, + {"0x1000"_b, "sshr_asimdshf_r"}, + {"0x1010"_b, "srshr_asimdshf_r"}, + }, + }, + + { "_mtnpmr", + {13, 12, 11, 10}, + { {"0000"_b, "smull_asimddiff_l"}, + {"0001"_b, "_ypznsm"}, + {"0010"_b, "_sgztlj"}, + {"0011"_b, "_nsnyxt"}, + {"0100"_b, "sqdmull_asimddiff_l"}, + {"0101"_b, "_plltlx"}, + {"0110"_b, "_qtystr"}, + {"0111"_b, "_gymljg"}, + {"1000"_b, "pmull_asimddiff_l"}, + {"1001"_b, "_rpmrkq"}, + {"1010"_b, "_hvvyhl"}, + {"1011"_b, "_hlshjk"}, + {"1101"_b, "_gmvjgn"}, + {"1110"_b, "_rsyjqj"}, + {"1111"_b, "_yvlhjg"}, + }, + }, + + { "_mtzgpn", + {30}, + { {"0"_b, "cbz_32_compbranch"}, + }, + }, + + { "_mvglql", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "mvni_asimdimm_l_hl"}, + {"00x100"_b, "sqshrun_asimdshf_n"}, + {"00x101"_b, "sqrshrun_asimdshf_n"}, + {"00x110"_b, "ushll_asimdshf_l"}, + {"010x00"_b, "sqshrun_asimdshf_n"}, + {"010x01"_b, "sqrshrun_asimdshf_n"}, + {"010x10"_b, "ushll_asimdshf_l"}, + {"011100"_b, "sqshrun_asimdshf_n"}, + {"011101"_b, "sqrshrun_asimdshf_n"}, + {"011110"_b, "ushll_asimdshf_l"}, + {"0x1000"_b, "sqshrun_asimdshf_n"}, + {"0x1001"_b, "sqrshrun_asimdshf_n"}, + {"0x1010"_b, "ushll_asimdshf_l"}, + }, + }, + + { "_mvgsjr", {20, 19, 18, 17, 16}, - { {"0000x", "VisitSVEExtractElementToGeneralRegister"}, - {"01000", "VisitSVECopyGeneralRegisterToVector_Predicated"}, - {"1000x", "VisitSVEConditionallyExtractElementToGeneralRegister"}, + { {"00000"_b, "usqadd_asimdmisc_r"}, + {"00001"_b, "shll_asimdmisc_s"}, + {"10000"_b, "uaddlv_asimdall_only"}, }, }, - { "UnallocSVEPartitionBreakCondition", - {18, 17, 16, 9}, - { {"0000", "VisitSVEPartitionBreakCondition"}, + { "_mvzvpk", + {30}, + { {"0"_b, "orn_64_log_shift"}, + {"1"_b, "bics_64_log_shift"}, }, }, - { "UnallocSVEPropagateBreakToNextPartition", - {23, 18, 17, 16, 9, 4}, - { {"000000", "VisitSVEPropagateBreakToNextPartition"}, + { "_mxgykv", + {19, 18, 17, 16}, + { {"0000"_b, "cntp_r_p_p"}, + {"1000"_b, "_lynsgm"}, + {"1001"_b, "_jxyskn"}, + {"1010"_b, "_jmxstz"}, + {"1011"_b, "_yjzknm"}, + {"1100"_b, "_zmtkvx"}, + {"1101"_b, "_yhmlxk"}, }, }, - { "DecodeSVE0011001x", - {20, 19}, - { {"0x", "VisitSVEPredicateLogical"}, - {"10", "UnallocSVEPartitionBreakCondition"}, - {"11", "UnallocSVEPropagateBreakToNextPartition"}, + { "_mxkgnq", + {23, 22, 20, 19, 11}, + { {"00010"_b, "scvtf_asisdshf_c"}, + {"001x0"_b, "scvtf_asisdshf_c"}, + {"01xx0"_b, "scvtf_asisdshf_c"}, }, }, - { "UnallocSVEPredicateTest", - {18, 17, 9, 4}, - { {"0000", "VisitSVEPredicateTest"}, + { "_mxnzyr", + {19, 16}, + { {"00"_b, "_nhxxmh"}, + {"10"_b, "_qgymsy"}, + {"11"_b, "_gjprmg"}, }, }, - { "UnallocSVEPredicateFirstActive", - {18, 17, 12, 11, 10, 9, 4}, - { {"0000000", "VisitSVEPredicateFirstActive"}, + { "_mxtskk", + {20, 19, 18, 17, 16, 13}, + { {"000000"_b, "fmov_h_floatdp1"}, + {"000010"_b, "fneg_h_floatdp1"}, + {"000100"_b, "fcvt_sh_floatdp1"}, + {"001000"_b, "frintn_h_floatdp1"}, + {"001010"_b, "frintm_h_floatdp1"}, + {"001100"_b, "frinta_h_floatdp1"}, + {"001110"_b, "frintx_h_floatdp1"}, }, }, - { "UnallocSVEPredicateNextActive", - {18, 17, 12, 11, 10, 9, 4}, - { {"0000100", "VisitSVEPredicateNextActive"}, + { "_mxvjxx", + {20, 19, 18, 16}, + { {"0000"_b, "_nshjhk"}, }, }, - { "DecodeSVE00110110", - {20, 19, 16}, - { {"0xx", "VisitSVEPropagateBreak"}, - {"100", "UnallocSVEPredicateTest"}, - {"110", "UnallocSVEPredicateFirstActive"}, - {"111", "UnallocSVEPredicateNextActive"}, + { "_myjqrl", + {22, 20, 19, 18, 17, 16}, + { {"111000"_b, "fcmge_asisdmiscfp16_fz"}, + {"x00000"_b, "fcmge_asisdmisc_fz"}, + {"x10000"_b, "fminnmp_asisdpair_only_sd"}, }, }, - { "UnallocSVEPredicateTest", - {18, 17, 9, 4}, - { {"0000", "VisitSVEPredicateTest"}, + { "_mykjss", + {17}, + { {"0"_b, "st2_asisdlsop_bx2_r2b"}, + {"1"_b, "st2_asisdlsop_b2_i2b"}, }, }, - { "UnallocSVEPredicateInitialize", - {18, 17, 11, 4}, - { {"0000", "VisitSVEPredicateInitialize"}, + { "_mylphg", + {30, 13, 4}, + { {"000"_b, "cmpge_p_p_zw"}, + {"001"_b, "cmpgt_p_p_zw"}, + {"010"_b, "cmplt_p_p_zw"}, + {"011"_b, "cmple_p_p_zw"}, + {"1xx"_b, "fcmla_z_p_zzz"}, }, }, - { "UnallocSVEPredicateZero", - {18, 17, 11, 9, 8, 7, 6, 5, 4}, - { {"000000000", "VisitSVEPredicateZero"}, + { "_myrshl", + {4}, + { {"0"_b, "ccmn_32_condcmp_imm"}, }, }, - { "UnallocSVEPredicateReadFromFFR_Predicated", - {18, 17, 11, 9, 4}, - { {"00000", "VisitSVEPredicateReadFromFFR_Predicated"}, + { "_myxhpq", + {12}, + { {"0"_b, "udot_asimdelem_d"}, + {"1"_b, "sqrdmlsh_asimdelem_r"}, }, }, - { "UnallocSVEPredicateReadFromFFR_Unpredicated", - {18, 17, 11, 9, 8, 7, 6, 5, 4}, - { {"000000000", "VisitSVEPredicateReadFromFFR_Unpredicated"}, + { "_mzhsrq", + {4}, + { {"0"_b, "cmplt_p_p_zi"}, + {"1"_b, "cmple_p_p_zi"}, }, }, - { "DecodeSVE00110111", - {20, 19, 16, 12, 10}, - { {"0xxxx", "VisitSVEPropagateBreak"}, - {"100xx", "UnallocSVEPredicateTest"}, - {"11x00", "UnallocSVEPredicateInitialize"}, - {"11001", "UnallocSVEPredicateZero"}, - {"11010", "UnallocSVEPredicateReadFromFFR_Predicated"}, - {"11110", "UnallocSVEPredicateReadFromFFR_Unpredicated"}, + { "_mzqzhq", + {23, 22, 20, 19, 11}, + { {"00000"_b, "mvni_asimdimm_m_sm"}, }, }, - { "UnallocSVEConditionallyTerminateScalars", - {12, 11, 10, 3, 2, 1, 0}, - { {"0000000", "VisitSVEConditionallyTerminateScalars"}, + { "_mzynlp", + {23, 22, 13}, + { {"100"_b, "fmlal2_asimdelem_lh"}, + {"xx1"_b, "umull_asimdelem_l"}, }, }, - { "UnallocSVEPredicateCount_2", - {20}, - { {"0", "VisitSVEPredicateCount"}, + { "_ngttyj", + {30, 23, 22, 13}, + { {"0000"_b, "ld1b_z_p_br_u16"}, + {"0001"_b, "ldff1b_z_p_br_u16"}, + {"0010"_b, "ld1b_z_p_br_u64"}, + {"0011"_b, "ldff1b_z_p_br_u64"}, + {"0100"_b, "ld1h_z_p_br_u16"}, + {"0101"_b, "ldff1h_z_p_br_u16"}, + {"0110"_b, "ld1h_z_p_br_u64"}, + {"0111"_b, "ldff1h_z_p_br_u64"}, + {"1001"_b, "st2b_z_p_br_contiguous"}, + {"1011"_b, "st4b_z_p_br_contiguous"}, + {"10x0"_b, "st1b_z_p_br"}, + {"1101"_b, "st2h_z_p_br_contiguous"}, + {"1111"_b, "st4h_z_p_br_contiguous"}, + {"11x0"_b, "st1h_z_p_br"}, }, }, - { "UnallocSVEIncDecByPredicateCount", - {20}, - { {"0", "VisitSVEIncDecByPredicateCount"}, + { "_ngxkmp", + {18, 17}, + { {"0x"_b, "st3_asisdlsep_r3_r"}, + {"10"_b, "st3_asisdlsep_r3_r"}, + {"11"_b, "st3_asisdlsep_i3_i"}, }, }, - { "UnallocSVEFFRWriteFromPredicate", - {20, 17, 16, 11, 10, 9, 4, 3, 2, 1, 0}, - { {"00000000000", "VisitSVEFFRWriteFromPredicate"}, + { "_ngzyqj", + {11, 10}, + { {"00"_b, "asr_z_zi"}, + {"01"_b, "lsr_z_zi"}, + {"11"_b, "lsl_z_zi"}, }, }, - { "UnallocSVEFFRInitialise", - {20, 17, 16, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, - { {"000000000000000", "VisitSVEFFRInitialise"}, + { "_nhhpqz", + {23, 22, 13, 12}, + { {"0000"_b, "fmul_s_floatdp2"}, + {"0001"_b, "fdiv_s_floatdp2"}, + {"0010"_b, "fadd_s_floatdp2"}, + {"0011"_b, "fsub_s_floatdp2"}, + {"0100"_b, "fmul_d_floatdp2"}, + {"0101"_b, "fdiv_d_floatdp2"}, + {"0110"_b, "fadd_d_floatdp2"}, + {"0111"_b, "fsub_d_floatdp2"}, + {"1100"_b, "fmul_h_floatdp2"}, + {"1101"_b, "fdiv_h_floatdp2"}, + {"1110"_b, "fadd_h_floatdp2"}, + {"1111"_b, "fsub_h_floatdp2"}, }, }, - { "DecodeSVE00111100", - {19, 18, 12}, - { {"0xx", "UnallocSVEPredicateCount_2"}, - {"1x0", "UnallocSVEIncDecByPredicateCount"}, - {"101", "UnallocSVEFFRWriteFromPredicate"}, - {"111", "UnallocSVEFFRInitialise"}, + { "_nhkstj", + {30, 23, 22}, + { {"00x"_b, "add_64_addsub_shift"}, + {"010"_b, "add_64_addsub_shift"}, + {"10x"_b, "sub_64_addsub_shift"}, + {"110"_b, "sub_64_addsub_shift"}, }, }, - { "UnallocSVEPredicateCount", - {20, 19}, - { {"00", "VisitSVEPredicateCount"}, + { "_nhxxmh", + {23, 22, 9, 3, 2, 1, 0}, + { {"0100000"_b, "ptest_p_p"}, + }, + }, + + { "_nhzrqr", + {23, 22}, + { {"00"_b, "fmadd_s_floatdp3"}, + {"01"_b, "fmadd_d_floatdp3"}, + {"11"_b, "fmadd_h_floatdp3"}, + }, + }, + + { "_nhzyvv", + {23, 22, 4, 3, 2, 1, 0}, + { {"0000000"_b, "brk_ex_exception"}, + {"0100000"_b, "tcancel_ex_exception"}, + {"1000001"_b, "dcps1_dc_exception"}, + {"1000010"_b, "dcps2_dc_exception"}, + {"1000011"_b, "dcps3_dc_exception"}, + }, + }, + + { "_njgmvx", + {18, 17}, + { {"00"_b, "_rzqzlq"}, }, }, - { "DecodeSVE0011111x", - {20, 19, 16}, - { {"00x", "VisitSVEIntAddSubtractImm_Unpredicated"}, - {"01x", "VisitSVEIntMinMaxImm_Unpredicated"}, - {"10x", "VisitSVEIntMulImm_Unpredicated"}, - {"110", "VisitSVEBroadcastIntImm_Unpredicated"}, - {"111", "VisitSVEBroadcastFPImm_Unpredicated"}, + { "_njgxlz", + {30}, + { {"0"_b, "_txzxzs"}, + {"1"_b, "_vprkpq"}, }, }, - { "UnallocSVEFPComplexAddition", + { "_njngkk", + {23, 22, 9, 8, 7, 6, 5}, + { {"0000000"_b, "rdffr_p_f"}, + }, + }, + + { "_njtngm", + {13, 12, 10}, + { {"001"_b, "_qkzlkj"}, + {"010"_b, "_jvpqrp"}, + {"011"_b, "_kknjng"}, + {"101"_b, "_xmtlmj"}, + {"110"_b, "sqdmlal_asisdelem_l"}, + {"111"_b, "_zgjpym"}, + }, + }, + + { "_njvkjq", + {11, 10}, + { {"00"_b, "index_z_ii"}, + {"01"_b, "index_z_ri"}, + {"10"_b, "index_z_ir"}, + {"11"_b, "index_z_rr"}, + }, + }, + + { "_njxtpv", + {30, 23, 22, 11, 10, 4}, + { {"001000"_b, "ccmn_32_condcmp_reg"}, + {"001100"_b, "ccmn_32_condcmp_imm"}, + {"101000"_b, "ccmp_32_condcmp_reg"}, + {"101100"_b, "ccmp_32_condcmp_imm"}, + }, + }, + + { "_nkjgpq", + {23, 20, 19, 18, 17, 16, 13}, + { {"0000000"_b, "ld1r_asisdlso_r1"}, + {"0000001"_b, "ld3r_asisdlso_r3"}, + {"10xxxx0"_b, "ld1r_asisdlsop_rx1_r"}, + {"10xxxx1"_b, "ld3r_asisdlsop_rx3_r"}, + {"110xxx0"_b, "ld1r_asisdlsop_rx1_r"}, + {"110xxx1"_b, "ld3r_asisdlsop_rx3_r"}, + {"1110xx0"_b, "ld1r_asisdlsop_rx1_r"}, + {"1110xx1"_b, "ld3r_asisdlsop_rx3_r"}, + {"11110x0"_b, "ld1r_asisdlsop_rx1_r"}, + {"11110x1"_b, "ld3r_asisdlsop_rx3_r"}, + {"1111100"_b, "ld1r_asisdlsop_rx1_r"}, + {"1111101"_b, "ld3r_asisdlsop_rx3_r"}, + {"1111110"_b, "ld1r_asisdlsop_r1_i"}, + {"1111111"_b, "ld3r_asisdlsop_r3_i"}, + }, + }, + + { "_nkrqgn", + {12}, + { {"0"_b, "ld3_asisdlsop_dx3_r3d"}, + }, + }, + + { "_nkxhsy", + {22, 20, 11}, + { {"000"_b, "cntb_r_s"}, + {"010"_b, "incb_r_rs"}, + {"100"_b, "cnth_r_s"}, + {"110"_b, "inch_r_rs"}, + }, + }, + + { "_nlgqsk", + {30, 23, 13, 12, 11, 10}, + { {"100001"_b, "sri_asisdshf_r"}, + {"100101"_b, "sli_asisdshf_r"}, + {"101001"_b, "sqshlu_asisdshf_r"}, + {"101101"_b, "uqshl_asisdshf_r"}, + }, + }, + + { "_nlkkyx", + {23, 13, 12}, + { {"001"_b, "fmulx_asisdsame_only"}, + {"011"_b, "frecps_asisdsame_only"}, + {"111"_b, "frsqrts_asisdsame_only"}, + }, + }, + + { "_nllnsg", + {30, 23, 22, 19, 16}, + { {"10010"_b, "aesmc_b_cryptoaes"}, + {"x0x01"_b, "fcvtn_asimdmisc_n"}, + {"x1001"_b, "bfcvtn_asimdmisc_4s"}, + {"xxx00"_b, "sadalp_asimdmisc_p"}, + }, + }, + + { "_nlpmvl", + {30, 13}, + { {"00"_b, "mad_z_p_zzz"}, + {"01"_b, "msb_z_p_zzz"}, + }, + }, + + { "_nlqglq", + {13, 10}, + { {"00"_b, "_lxvnxm"}, + {"01"_b, "_mzqzhq"}, + {"10"_b, "_myxhpq"}, + {"11"_b, "_pslllp"}, + }, + }, + + { "_nlyntn", + {23, 22, 20, 19, 11}, + { {"00000"_b, "movi_asimdimm_l_sl"}, + }, + }, + + { "_nmkqzt", {20, 19, 18, 17}, - { {"0000", "VisitSVEFPComplexAddition"}, + { {"0000"_b, "_nvqlyn"}, + }, + }, + + { "_nmtkjv", + {17}, + { {"0"_b, "ld1_asisdlso_h1_1h"}, + }, + }, + + { "_nmzyvt", + {30, 23, 22, 13, 12, 11, 10}, + { {"0000000"_b, "ldsmaxb_32_memop"}, + {"0000100"_b, "ldsminb_32_memop"}, + {"0000x10"_b, "strb_32b_ldst_regoff"}, + {"0001000"_b, "ldumaxb_32_memop"}, + {"0001100"_b, "lduminb_32_memop"}, + {"0001x10"_b, "strb_32bl_ldst_regoff"}, + {"0010000"_b, "ldsmaxlb_32_memop"}, + {"0010100"_b, "ldsminlb_32_memop"}, + {"0010x10"_b, "ldrb_32b_ldst_regoff"}, + {"0011000"_b, "ldumaxlb_32_memop"}, + {"0011100"_b, "lduminlb_32_memop"}, + {"0011x10"_b, "ldrb_32bl_ldst_regoff"}, + {"0100000"_b, "ldsmaxab_32_memop"}, + {"0100100"_b, "ldsminab_32_memop"}, + {"0100x10"_b, "ldrsb_64b_ldst_regoff"}, + {"0101000"_b, "ldumaxab_32_memop"}, + {"0101100"_b, "lduminab_32_memop"}, + {"0101x10"_b, "ldrsb_64bl_ldst_regoff"}, + {"0110000"_b, "ldsmaxalb_32_memop"}, + {"0110100"_b, "ldsminalb_32_memop"}, + {"0110x10"_b, "ldrsb_32b_ldst_regoff"}, + {"0111000"_b, "ldumaxalb_32_memop"}, + {"0111100"_b, "lduminalb_32_memop"}, + {"0111x10"_b, "ldrsb_32bl_ldst_regoff"}, + {"1000000"_b, "ldsmaxh_32_memop"}, + {"1000100"_b, "ldsminh_32_memop"}, + {"1001000"_b, "ldumaxh_32_memop"}, + {"1001100"_b, "lduminh_32_memop"}, + {"100xx10"_b, "strh_32_ldst_regoff"}, + {"1010000"_b, "ldsmaxlh_32_memop"}, + {"1010100"_b, "ldsminlh_32_memop"}, + {"1011000"_b, "ldumaxlh_32_memop"}, + {"1011100"_b, "lduminlh_32_memop"}, + {"101xx10"_b, "ldrh_32_ldst_regoff"}, + {"1100000"_b, "ldsmaxah_32_memop"}, + {"1100100"_b, "ldsminah_32_memop"}, + {"1101000"_b, "ldumaxah_32_memop"}, + {"1101100"_b, "lduminah_32_memop"}, + {"110xx10"_b, "ldrsh_64_ldst_regoff"}, + {"1110000"_b, "ldsmaxalh_32_memop"}, + {"1110100"_b, "ldsminalh_32_memop"}, + {"1111000"_b, "ldumaxalh_32_memop"}, + {"1111100"_b, "lduminalh_32_memop"}, + {"111xx10"_b, "ldrsh_32_ldst_regoff"}, + }, + }, + + { "_nnhprs", + {1, 0}, + { {"00"_b, "ret_64r_branch_reg"}, + }, + }, + + { "_nnkxgr", + {11, 10}, + { {"00"_b, "ftssel_z_zz"}, + {"10"_b, "_yhlntp"}, + {"11"_b, "_rsqmgk"}, }, }, - { "DecodeSVE01101000", - {12, 11}, - { {"00", "VisitSVEFPMulAddIndex"}, - {"1x", "VisitSVEFPComplexMulAddIndex"}, + { "_nnkyzr", + {18, 17, 16}, + { {"011"_b, "_yvgqjx"}, }, }, - { "UnallocSVEFPMulIndex", - {12, 11, 10}, - { {"000", "VisitSVEFPMulIndex"}, + { "_nnllqy", + {18, 17}, + { {"00"_b, "ld1_asisdlso_s1_1s"}, }, }, - { "DecodeSVE01110001", - {20, 19, 12}, - { {"00x", "VisitSVEFPFastReduction"}, - {"011", "VisitSVEFPUnaryOpUnpredicated"}, - {"10x", "VisitSVEFPCompareWithZero"}, - {"11x", "VisitSVEFPAccumulatingReduction"}, + { "_nnlvqz", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_d_floatimm"}, }, }, - { "UnallocSVEFPTrigMulAddCoefficient", - {12, 11, 10}, - { {"000", "VisitSVEFPTrigMulAddCoefficient"}, + { "_nnzhgm", + {19, 18, 17, 16, 4}, + { {"0000x"_b, "brka_p_p_p"}, + {"10000"_b, "brkn_p_p_pp"}, }, }, - { "UnallocSVEFPArithmeticWithImm_Predicated", - {9, 8, 7, 6}, - { {"0000", "VisitSVEFPArithmeticWithImm_Predicated"}, + { "_nqgqjh", + {30, 23, 22, 20, 19}, + { {"0xxxx"_b, "bl_only_branch_imm"}, + {"10001"_b, "sys_cr_systeminstrs"}, + {"1001x"_b, "msr_sr_systemmove"}, }, }, - { "DecodeSVE01110100", - {20, 19}, - { {"0x", "VisitSVEFPArithmetic_Predicated"}, - {"10", "UnallocSVEFPTrigMulAddCoefficient"}, - {"11", "UnallocSVEFPArithmeticWithImm_Predicated"}, + { "_nqkhrv", + {30, 13}, + { {"10"_b, "fnmla_z_p_zzz"}, + {"11"_b, "fnmls_z_p_zzz"}, }, }, - { "DecodeSVE01110101", - {20, 19, 18}, - { {"00x", "VisitSVEFPRoundToIntegralValue"}, - {"010", "VisitSVEFPConvertPrecision"}, - {"011", "VisitSVEFPUnaryOp"}, - {"10x", "VisitSVEIntConvertToFP"}, - {"11x", "VisitSVEFPConvertToInt"}, + { "_nqlgtn", + {23, 20, 19, 18, 17, 16, 13}, + { {"0000000"_b, "ld2r_asisdlso_r2"}, + {"0000001"_b, "ld4r_asisdlso_r4"}, + {"10xxxx0"_b, "ld2r_asisdlsop_rx2_r"}, + {"10xxxx1"_b, "ld4r_asisdlsop_rx4_r"}, + {"110xxx0"_b, "ld2r_asisdlsop_rx2_r"}, + {"110xxx1"_b, "ld4r_asisdlsop_rx4_r"}, + {"1110xx0"_b, "ld2r_asisdlsop_rx2_r"}, + {"1110xx1"_b, "ld4r_asisdlsop_rx4_r"}, + {"11110x0"_b, "ld2r_asisdlsop_rx2_r"}, + {"11110x1"_b, "ld4r_asisdlsop_rx4_r"}, + {"1111100"_b, "ld2r_asisdlsop_rx2_r"}, + {"1111101"_b, "ld4r_asisdlsop_rx4_r"}, + {"1111110"_b, "ld2r_asisdlsop_r2_i"}, + {"1111111"_b, "ld4r_asisdlsop_r4_i"}, }, }, - { "UnallocSVELoadAndBroadcastElement", - {22}, - { {"1", "VisitSVELoadAndBroadcastElement"}, + { "_nqysxy", + {0}, + { {"1"_b, "blraaz_64_branch_reg"}, }, }, - { "DecodeSVE100x0110", - {22, 4}, - { {"00", "VisitSVEContiguousPrefetch_ScalarPlusScalar"}, - {"1x", "VisitSVELoadAndBroadcastElement"}, + { "_nrrmtx", + {22, 13, 12}, + { {"000"_b, "swpa_64_memop"}, + {"100"_b, "swpal_64_memop"}, }, }, - { "DecodeSVE100x0111", - {22, 4}, - { {"00", "VisitSVE32BitGatherPrefetch_VectorPlusImm"}, - {"1x", "VisitSVELoadAndBroadcastElement"}, + { "_nrssjz", + {17}, + { {"0"_b, "ld3_asisdlso_b3_3b"}, }, }, - { "DecodeSVE100x11xx", - {22}, - { {"0", "VisitSVE32BitGatherLoad_VectorPlusImm"}, - {"1", "VisitSVELoadAndBroadcastElement"}, + { "_nshjhk", + {17, 9, 8, 7, 6, 5}, + { {"000000"_b, "aesimc_z_z"}, + {"1xxxxx"_b, "aesd_z_zz"}, }, }, - { "DecodeSVE100010xx", - {23, 4}, - { {"00", "VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets"}, - {"1x", "VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets"}, + { "_nsjhhg", + {30, 13}, + { {"00"_b, "_jhllmn"}, + {"01"_b, "_htplsj"}, + {"10"_b, "_rztvnl"}, + {"11"_b, "_vgtnjh"}, }, }, - { "DecodeSVE100100x1", - {23, 22, 4}, - { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"}, - {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, + { "_nsnyxt", + {23}, + { {"0"_b, "fmla_asimdsame_only"}, + {"1"_b, "fmls_asimdsame_only"}, }, }, - { "DecodeSVE10010000", - {23, 22, 4}, - { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"}, - {"100", "VisitSVELoadPredicateRegister"}, - {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, + { "_nssrnm", + {20, 18, 17, 16}, + { {"0000"_b, "_lnpvky"}, }, }, - { "DecodeSVE10010010", - {23, 22, 4}, - { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"}, - {"10x", "VisitSVELoadVectorRegister"}, - {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, + { "_nszhhy", + {17}, + { {"0"_b, "ld2_asisdlsep_r2_r"}, + {"1"_b, "ld2_asisdlsep_i2_i"}, }, }, - { "DecodeSVE100110x1", - {23, 22, 4}, - { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"}, - {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, + { "_nthvqx", + {23, 22}, + { {"00"_b, "eor_asimdsame_only"}, + {"01"_b, "bsl_asimdsame_only"}, + {"10"_b, "bit_asimdsame_only"}, + {"11"_b, "bif_asimdsame_only"}, }, }, - { "DecodeSVE10011000", - {23, 22, 4}, - { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"}, - {"100", "VisitSVELoadPredicateRegister"}, - {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, + { "_ntjpsx", + {22, 20, 11}, + { {"000"_b, "uqincb_r_rs_uw"}, + {"001"_b, "uqdecb_r_rs_uw"}, + {"010"_b, "uqincb_r_rs_x"}, + {"011"_b, "uqdecb_r_rs_x"}, + {"100"_b, "uqinch_r_rs_uw"}, + {"101"_b, "uqdech_r_rs_uw"}, + {"110"_b, "uqinch_r_rs_x"}, + {"111"_b, "uqdech_r_rs_x"}, }, }, - { "DecodeSVE10011010", - {23, 22, 4}, - { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"}, - {"10x", "VisitSVELoadVectorRegister"}, - {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, + { "_ntkhsm", + {13, 12}, + { {"00"_b, "cmtst_asisdsame_only"}, + }, + }, + + { "_ntkqhk", + {11, 10, 9, 8, 7, 6}, + { {"000000"_b, "yield_hi_hints"}, + {"000001"_b, "wfi_hi_hints"}, + {"000010"_b, "sevl_hi_hints"}, + {"000011"_b, "xpaclri_hi_hints"}, + {"001000"_b, "psb_hc_hints"}, + {"0010x1"_b, "hint_hm_hints"}, + {"001100"_b, "paciasp_hi_hints"}, + {"001101"_b, "pacibsp_hi_hints"}, + {"001110"_b, "autiasp_hi_hints"}, + {"001111"_b, "autibsp_hi_hints"}, + {"0x01xx"_b, "hint_hm_hints"}, + {"0x1010"_b, "hint_hm_hints"}, + {"10x0xx"_b, "hint_hm_hints"}, + {"10x1xx"_b, "hint_hm_hints"}, + {"1101xx"_b, "hint_hm_hints"}, + {"111010"_b, "hint_hm_hints"}, + {"x100xx"_b, "hint_hm_hints"}, + {"x1100x"_b, "hint_hm_hints"}, + {"x11011"_b, "hint_hm_hints"}, + {"x111xx"_b, "hint_hm_hints"}, + }, + }, + + { "_nvkthr", + {30, 13}, + { {"00"_b, "_kjqynn"}, + {"01"_b, "_jgyhrh"}, + {"10"_b, "_jymnkk"}, + {"11"_b, "_pqjjsh"}, + }, + }, + + { "_nvqlyn", + {16, 13, 12}, + { {"000"_b, "rev_64_dp_1src"}, + {"100"_b, "pacdb_64p_dp_1src"}, + {"101"_b, "autdb_64p_dp_1src"}, + {"110"_b, "_hhnjjk"}, + {"111"_b, "_yvnjkr"}, + }, + }, + + { "_nvthzh", + {20, 19, 18, 17, 16, 13, 12, 9, 8, 7, 6, 5}, + { {"000010011111"_b, "xpacd_64z_dp_1src"}, + }, + }, + + { "_nvyxmh", + {20, 19, 18, 17, 16}, + { {"00000"_b, "add_z_p_zz"}, + {"00001"_b, "sub_z_p_zz"}, + {"00011"_b, "subr_z_p_zz"}, + {"01000"_b, "smax_z_p_zz"}, + {"01001"_b, "umax_z_p_zz"}, + {"01010"_b, "smin_z_p_zz"}, + {"01011"_b, "umin_z_p_zz"}, + {"01100"_b, "sabd_z_p_zz"}, + {"01101"_b, "uabd_z_p_zz"}, + {"10000"_b, "mul_z_p_zz"}, + {"10010"_b, "smulh_z_p_zz"}, + {"10011"_b, "umulh_z_p_zz"}, + {"10100"_b, "sdiv_z_p_zz"}, + {"10101"_b, "udiv_z_p_zz"}, + {"10110"_b, "sdivr_z_p_zz"}, + {"10111"_b, "udivr_z_p_zz"}, + {"11000"_b, "orr_z_p_zz"}, + {"11001"_b, "eor_z_p_zz"}, + {"11010"_b, "and_z_p_zz"}, + {"11011"_b, "bic_z_p_zz"}, + }, + }, + + { "_nxjgmm", + {17}, + { {"0"_b, "st3_asisdlsop_bx3_r3b"}, + {"1"_b, "st3_asisdlsop_b3_i3b"}, + }, + }, + + { "_nxjkqs", + {23, 22, 12, 11, 10}, + { {"0x000"_b, "fmla_z_zzzi_h"}, + {"0x001"_b, "fmls_z_zzzi_h"}, + {"10000"_b, "fmla_z_zzzi_s"}, + {"10001"_b, "fmls_z_zzzi_s"}, + {"101xx"_b, "fcmla_z_zzzi_h"}, + {"11000"_b, "fmla_z_zzzi_d"}, + {"11001"_b, "fmls_z_zzzi_d"}, + {"111xx"_b, "fcmla_z_zzzi_s"}, + }, + }, + + { "_nxmjvy", + {30, 23, 11, 10}, + { {"1001"_b, "_jksztq"}, + }, + }, + + { "_nxqygl", + {13}, + { {"0"_b, "mla_asimdelem_r"}, + {"1"_b, "umlal_asimdelem_l"}, + }, + }, + + { "_nxyhyv", + {30, 11, 10}, + { {"000"_b, "_kvyysq"}, + {"001"_b, "_rvjzgt"}, + {"010"_b, "_vjlnqj"}, + {"011"_b, "_jvvzjq"}, + {"100"_b, "_tzzhsk"}, + {"101"_b, "_mplskr"}, + {"110"_b, "_njgmvx"}, + {"111"_b, "_ntkhsm"}, + }, + }, + + { "_nykvly", + {16, 13, 12}, + { {"000"_b, "rev32_64_dp_1src"}, + {"100"_b, "pacda_64p_dp_1src"}, + {"101"_b, "autda_64p_dp_1src"}, + {"110"_b, "_mgqvvn"}, + {"111"_b, "_xvlnmy"}, + }, + }, + + { "_nyssqn", + {12}, + { {"0"_b, "st2_asisdlsop_dx2_r2d"}, + }, + }, + + { "_nyxxks", + {20, 19, 18, 17, 16}, + { {"00000"_b, "suqadd_asimdmisc_r"}, + {"10000"_b, "saddlv_asimdall_only"}, + }, + }, + + { "_nzkhrj", + {17}, + { {"0"_b, "st4_asisdlsep_r4_r"}, + {"1"_b, "st4_asisdlsep_i4_i"}, + }, + }, + + { "_nzqkky", + {20, 19, 18, 17, 16}, + { {"00000"_b, "rev32_asimdmisc_r"}, + }, + }, + + { "_pgjjsz", + {30, 13, 12, 11, 10}, + { {"00000"_b, "_lmyxhr"}, + {"00001"_b, "_tmhlvh"}, + {"00010"_b, "_qvtxpr"}, + {"00011"_b, "_ymkthj"}, + {"00100"_b, "_rhmxyp"}, + {"00101"_b, "_zryvjk"}, + {"01000"_b, "zip1_z_zz"}, + {"01001"_b, "zip2_z_zz"}, + {"01010"_b, "uzp1_z_zz"}, + {"01011"_b, "uzp2_z_zz"}, + {"01100"_b, "trn1_z_zz"}, + {"01101"_b, "trn2_z_zz"}, + {"10000"_b, "_llvrrk"}, + {"10001"_b, "_qyjvqr"}, + {"10010"_b, "_tmtnkq"}, + {"10011"_b, "_gpxltv"}, + {"10100"_b, "_pnlnzt"}, + {"10101"_b, "_pygvrr"}, + {"11000"_b, "addhnb_z_zz"}, + {"11001"_b, "addhnt_z_zz"}, + {"11010"_b, "raddhnb_z_zz"}, + {"11011"_b, "raddhnt_z_zz"}, + {"11100"_b, "subhnb_z_zz"}, + {"11101"_b, "subhnt_z_zz"}, + {"11110"_b, "rsubhnb_z_zz"}, + {"11111"_b, "rsubhnt_z_zz"}, + }, + }, + + { "_phthqj", + {30, 13}, + { {"00"_b, "_sntyqy"}, + {"01"_b, "_xhlhmh"}, + {"10"_b, "_rtrlts"}, + {"11"_b, "_jzkqhn"}, + }, + }, + + { "_phtnny", + {18, 17}, + { {"0x"_b, "ld1_asisdlsep_r3_r3"}, + {"10"_b, "ld1_asisdlsep_r3_r3"}, + {"11"_b, "ld1_asisdlsep_i3_i3"}, + }, + }, + + { "_phvnqh", + {30}, + { {"0"_b, "bic_32_log_shift"}, + {"1"_b, "eon_32_log_shift"}, + }, + }, + + { "_phxkzh", + {17, 4}, + { {"00"_b, "fcmlt_p_p_z0"}, + {"01"_b, "fcmle_p_p_z0"}, + {"10"_b, "fcmne_p_p_z0"}, + }, + }, + + { "_pjgkjs", + {18, 17}, + { {"00"_b, "_mxnzyr"}, + }, + }, + + { "_pjkylt", + {23, 22}, + { {"00"_b, "fcsel_s_floatsel"}, + {"01"_b, "fcsel_d_floatsel"}, + {"11"_b, "fcsel_h_floatsel"}, + }, + }, + + { "_plktrh", + {30, 23}, + { {"00"_b, "adds_32s_addsub_imm"}, + {"10"_b, "subs_32s_addsub_imm"}, + }, + }, + + { "_plltlx", + {23}, + { {"0"_b, "fadd_asimdsame_only"}, + {"1"_b, "fsub_asimdsame_only"}, + }, + }, + + { "_pmkxlj", + {17}, + { {"0"_b, "st1_asisdlse_r2_2v"}, + }, + }, + + { "_pmrngh", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_snkqvp"}, + }, + }, + + { "_pnlnzt", + {23, 18, 17, 16}, + { {"0000"_b, "sqxtunb_z_zz"}, + }, + }, + + { "_pnqxjg", + {4}, + { {"0"_b, "ccmn_32_condcmp_reg"}, + }, + }, + + { "_pnxggm", + {4, 3, 2, 1, 0}, + { {"00000"_b, "fcmp_d_floatcmp"}, + {"01000"_b, "fcmp_dz_floatcmp"}, + {"10000"_b, "fcmpe_d_floatcmp"}, + {"11000"_b, "fcmpe_dz_floatcmp"}, + }, + }, + + { "_pnxgrg", + {30, 23, 22}, + { {"000"_b, "madd_32a_dp_3src"}, + }, + }, + + { "_pnzphx", + {17}, + { {"1"_b, "frecpe_z_z"}, + }, + }, + + { "_pphhym", + {30, 23, 22}, + { {"00x"_b, "add_32_addsub_shift"}, + {"010"_b, "add_32_addsub_shift"}, + {"10x"_b, "sub_32_addsub_shift"}, + {"110"_b, "sub_32_addsub_shift"}, + }, + }, + + { "_ppllxt", + {18, 17}, + { {"00"_b, "ld1_asisdlse_r3_3v"}, + }, + }, + + { "_ppnssm", + {30, 13, 12}, + { {"000"_b, "_ktyppm"}, + {"001"_b, "_ngzyqj"}, + {"010"_b, "_yxnslx"}, + {"011"_b, "_nnkxgr"}, + {"100"_b, "_kzmvpk"}, + {"101"_b, "_thrxph"}, + {"110"_b, "_kgpgly"}, + {"111"_b, "_yppszx"}, + }, + }, + + { "_pppsmg", + {30}, + { {"0"_b, "_xyhmgh"}, + {"1"_b, "_rlrjxp"}, + }, + }, + + { "_ppqkym", + {30, 23, 22, 11, 10}, + { {"10001"_b, "stg_64spost_ldsttags"}, + {"10010"_b, "stg_64soffset_ldsttags"}, + {"10011"_b, "stg_64spre_ldsttags"}, + {"10100"_b, "ldg_64loffset_ldsttags"}, + {"10101"_b, "stzg_64spost_ldsttags"}, + {"10110"_b, "stzg_64soffset_ldsttags"}, + {"10111"_b, "stzg_64spre_ldsttags"}, + {"11001"_b, "st2g_64spost_ldsttags"}, + {"11010"_b, "st2g_64soffset_ldsttags"}, + {"11011"_b, "st2g_64spre_ldsttags"}, + {"11101"_b, "stz2g_64spost_ldsttags"}, + {"11110"_b, "stz2g_64soffset_ldsttags"}, + {"11111"_b, "stz2g_64spre_ldsttags"}, + }, + }, + + { "_pqjjsh", + {23, 22, 12, 10}, + { {"1000"_b, "fmlslb_z_zzzi_s"}, + {"1001"_b, "fmlslt_z_zzzi_s"}, + }, + }, + + { "_pqpzkt", + {11, 10, 9, 8, 7, 6}, + { {"000000"_b, "nop_hi_hints"}, + {"000001"_b, "wfe_hi_hints"}, + {"000010"_b, "sev_hi_hints"}, + {"000011"_b, "dgh_hi_hints"}, + {"000100"_b, "pacia1716_hi_hints"}, + {"000101"_b, "pacib1716_hi_hints"}, + {"000110"_b, "autia1716_hi_hints"}, + {"000111"_b, "autib1716_hi_hints"}, + {"001000"_b, "esb_hi_hints"}, + {"001001"_b, "tsb_hc_hints"}, + {"001010"_b, "csdb_hi_hints"}, + {"001100"_b, "paciaz_hi_hints"}, + {"001101"_b, "pacibz_hi_hints"}, + {"001110"_b, "autiaz_hi_hints"}, + {"001111"_b, "autibz_hi_hints"}, + {"0100xx"_b, "bti_hb_hints"}, + {"0x1011"_b, "hint_hm_hints"}, + {"10x0xx"_b, "hint_hm_hints"}, + {"10x1xx"_b, "hint_hm_hints"}, + {"1100xx"_b, "hint_hm_hints"}, + {"111011"_b, "hint_hm_hints"}, + {"x1100x"_b, "hint_hm_hints"}, + {"x11010"_b, "hint_hm_hints"}, + {"x1x1xx"_b, "hint_hm_hints"}, + }, + }, + + { "_pqtjgx", + {23, 22, 13, 12, 11, 10}, + { {"01x1x0"_b, "fcmla_asimdelem_c_h"}, + {"0x0001"_b, "sri_asimdshf_r"}, + {"0x0101"_b, "sli_asimdshf_r"}, + {"0x1001"_b, "sqshlu_asimdshf_r"}, + {"0x1101"_b, "uqshl_asimdshf_r"}, + {"10x1x0"_b, "fcmla_asimdelem_c_s"}, + {"xx00x0"_b, "mls_asimdelem_r"}, + {"xx10x0"_b, "umlsl_asimdelem_l"}, + }, + }, + + { "_prkmty", + {23, 22, 9}, + { {"000"_b, "brkpa_p_p_pp"}, + {"010"_b, "brkpas_p_p_pp"}, + }, + }, + + { "_pslllp", + {30, 23, 22, 20, 19, 12, 11}, + { {"0000000"_b, "movi_asimdimm_d_ds"}, + {"1000000"_b, "movi_asimdimm_d2_d"}, + {"1000010"_b, "fmov_asimdimm_d2_d"}, + {"x00x100"_b, "ucvtf_asimdshf_c"}, + {"x00x111"_b, "fcvtzu_asimdshf_c"}, + {"x010x00"_b, "ucvtf_asimdshf_c"}, + {"x010x11"_b, "fcvtzu_asimdshf_c"}, + {"x011100"_b, "ucvtf_asimdshf_c"}, + {"x011111"_b, "fcvtzu_asimdshf_c"}, + {"x0x1000"_b, "ucvtf_asimdshf_c"}, + {"x0x1011"_b, "fcvtzu_asimdshf_c"}, + }, + }, + + { "_psqpkp", + {17, 4}, + { {"00"_b, "fcmge_p_p_z0"}, + {"01"_b, "fcmgt_p_p_z0"}, + {"10"_b, "fcmeq_p_p_z0"}, + }, + }, + + { "_ptjyqx", + {13}, + { {"0"_b, "fcmuo_p_p_zz"}, + }, + }, + + { "_ptkrvg", + {12}, + { {"0"_b, "ld2_asisdlsop_dx2_r2d"}, + }, + }, + + { "_ptsjnr", + {30, 20, 19, 18, 17, 16, 13}, + { {"0000000"_b, "asr_z_p_zi"}, + {"0000010"_b, "lsr_z_p_zi"}, + {"0000110"_b, "lsl_z_p_zi"}, + {"0001000"_b, "asrd_z_p_zi"}, + {"0001100"_b, "sqshl_z_p_zi"}, + {"0001110"_b, "uqshl_z_p_zi"}, + {"0011000"_b, "srshr_z_p_zi"}, + {"0011010"_b, "urshr_z_p_zi"}, + {"0011110"_b, "sqshlu_z_p_zi"}, + {"0100000"_b, "asr_z_p_zz"}, + {"0100001"_b, "sxtb_z_p_z"}, + {"0100010"_b, "lsr_z_p_zz"}, + {"0100011"_b, "uxtb_z_p_z"}, + {"0100101"_b, "sxth_z_p_z"}, + {"0100110"_b, "lsl_z_p_zz"}, + {"0100111"_b, "uxth_z_p_z"}, + {"0101000"_b, "asrr_z_p_zz"}, + {"0101001"_b, "sxtw_z_p_z"}, + {"0101010"_b, "lsrr_z_p_zz"}, + {"0101011"_b, "uxtw_z_p_z"}, + {"0101101"_b, "abs_z_p_z"}, + {"0101110"_b, "lslr_z_p_zz"}, + {"0101111"_b, "neg_z_p_z"}, + {"0110000"_b, "asr_z_p_zw"}, + {"0110001"_b, "cls_z_p_z"}, + {"0110010"_b, "lsr_z_p_zw"}, + {"0110011"_b, "clz_z_p_z"}, + {"0110101"_b, "cnt_z_p_z"}, + {"0110110"_b, "lsl_z_p_zw"}, + {"0110111"_b, "cnot_z_p_z"}, + {"0111001"_b, "fabs_z_p_z"}, + {"0111011"_b, "fneg_z_p_z"}, + {"0111101"_b, "not_z_p_z"}, + {"1000001"_b, "urecpe_z_p_z"}, + {"1000011"_b, "ursqrte_z_p_z"}, + {"1000100"_b, "srshl_z_p_zz"}, + {"1000110"_b, "urshl_z_p_zz"}, + {"1001001"_b, "sadalp_z_p_z"}, + {"1001011"_b, "uadalp_z_p_z"}, + {"1001100"_b, "srshlr_z_p_zz"}, + {"1001110"_b, "urshlr_z_p_zz"}, + {"1010000"_b, "sqshl_z_p_zz"}, + {"1010001"_b, "sqabs_z_p_z"}, + {"1010010"_b, "uqshl_z_p_zz"}, + {"1010011"_b, "sqneg_z_p_z"}, + {"1010100"_b, "sqrshl_z_p_zz"}, + {"1010110"_b, "uqrshl_z_p_zz"}, + {"1011000"_b, "sqshlr_z_p_zz"}, + {"1011010"_b, "uqshlr_z_p_zz"}, + {"1011100"_b, "sqrshlr_z_p_zz"}, + {"1011110"_b, "uqrshlr_z_p_zz"}, + {"1100000"_b, "shadd_z_p_zz"}, + {"1100010"_b, "uhadd_z_p_zz"}, + {"1100011"_b, "addp_z_p_zz"}, + {"1100100"_b, "shsub_z_p_zz"}, + {"1100110"_b, "uhsub_z_p_zz"}, + {"1101000"_b, "srhadd_z_p_zz"}, + {"1101001"_b, "smaxp_z_p_zz"}, + {"1101010"_b, "urhadd_z_p_zz"}, + {"1101011"_b, "umaxp_z_p_zz"}, + {"1101100"_b, "shsubr_z_p_zz"}, + {"1101101"_b, "sminp_z_p_zz"}, + {"1101110"_b, "uhsubr_z_p_zz"}, + {"1101111"_b, "uminp_z_p_zz"}, + {"1110000"_b, "sqadd_z_p_zz"}, + {"1110010"_b, "uqadd_z_p_zz"}, + {"1110100"_b, "sqsub_z_p_zz"}, + {"1110110"_b, "uqsub_z_p_zz"}, + {"1111000"_b, "suqadd_z_p_zz"}, + {"1111010"_b, "usqadd_z_p_zz"}, + {"1111100"_b, "sqsubr_z_p_zz"}, + {"1111110"_b, "uqsubr_z_p_zz"}, + }, + }, + + { "_ptslzg", + {30, 23, 22, 13, 4}, + { {"01000"_b, "ldr_p_bi"}, + {"01100"_b, "prfb_i_p_bi_s"}, + {"01110"_b, "prfh_i_p_bi_s"}, + {"10x0x"_b, "ld1sw_z_p_bz_d_x32_scaled"}, + {"10x1x"_b, "ldff1sw_z_p_bz_d_x32_scaled"}, + }, + }, + + { "_pvkmmv", + {30, 23, 22, 13, 12, 11, 10}, + { {"0000000"_b, "ldsmax_32_memop"}, + {"0000100"_b, "ldsmin_32_memop"}, + {"0001000"_b, "ldumax_32_memop"}, + {"0001100"_b, "ldumin_32_memop"}, + {"000xx10"_b, "str_32_ldst_regoff"}, + {"0010000"_b, "ldsmaxl_32_memop"}, + {"0010100"_b, "ldsminl_32_memop"}, + {"0011000"_b, "ldumaxl_32_memop"}, + {"0011100"_b, "lduminl_32_memop"}, + {"001xx10"_b, "ldr_32_ldst_regoff"}, + {"0100000"_b, "ldsmaxa_32_memop"}, + {"0100100"_b, "ldsmina_32_memop"}, + {"0101000"_b, "ldumaxa_32_memop"}, + {"0101100"_b, "ldumina_32_memop"}, + {"010xx10"_b, "ldrsw_64_ldst_regoff"}, + {"0110000"_b, "ldsmaxal_32_memop"}, + {"0110100"_b, "ldsminal_32_memop"}, + {"0111000"_b, "ldumaxal_32_memop"}, + {"0111100"_b, "lduminal_32_memop"}, + {"1000000"_b, "ldsmax_64_memop"}, + {"1000100"_b, "ldsmin_64_memop"}, + {"1001000"_b, "ldumax_64_memop"}, + {"1001100"_b, "ldumin_64_memop"}, + {"100xx10"_b, "str_64_ldst_regoff"}, + {"1010000"_b, "ldsmaxl_64_memop"}, + {"1010100"_b, "ldsminl_64_memop"}, + {"1011000"_b, "ldumaxl_64_memop"}, + {"1011100"_b, "lduminl_64_memop"}, + {"101xx10"_b, "ldr_64_ldst_regoff"}, + {"10xxx01"_b, "ldraa_64_ldst_pac"}, + {"10xxx11"_b, "ldraa_64w_ldst_pac"}, + {"1100000"_b, "ldsmaxa_64_memop"}, + {"1100100"_b, "ldsmina_64_memop"}, + {"1101000"_b, "ldumaxa_64_memop"}, + {"1101100"_b, "ldumina_64_memop"}, + {"110xx10"_b, "prfm_p_ldst_regoff"}, + {"1110000"_b, "ldsmaxal_64_memop"}, + {"1110100"_b, "ldsminal_64_memop"}, + {"1111000"_b, "ldumaxal_64_memop"}, + {"1111100"_b, "lduminal_64_memop"}, + {"11xxx01"_b, "ldrab_64_ldst_pac"}, + {"11xxx11"_b, "ldrab_64w_ldst_pac"}, + }, + }, + + { "_pvrylp", + {13, 12}, + { {"00"_b, "sbc_64_addsub_carry"}, + }, + }, + + { "_pxgztg", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "bic_asimdimm_l_sl"}, + {"00x100"_b, "sli_asimdshf_r"}, + {"00x110"_b, "uqshl_asimdshf_r"}, + {"010x00"_b, "sli_asimdshf_r"}, + {"010x10"_b, "uqshl_asimdshf_r"}, + {"011100"_b, "sli_asimdshf_r"}, + {"011110"_b, "uqshl_asimdshf_r"}, + {"0x1000"_b, "sli_asimdshf_r"}, + {"0x1010"_b, "uqshl_asimdshf_r"}, + }, + }, + + { "_pxkqxn", + {20, 19, 18, 17, 16}, + { {"00000"_b, "cmle_asisdmisc_z"}, + }, + }, + + { "_pxlnhs", + {23, 20, 19, 18, 17, 16}, + { {"000001"_b, "fcvtxn_asimdmisc_n"}, + {"x00000"_b, "uadalp_asimdmisc_p"}, + }, + }, + + { "_pxnnrz", + {20, 19, 18, 17, 16, 13, 12, 3, 2, 1, 0}, + { {"00000001101"_b, "setf16_only_setf"}, + }, + }, + + { "_pxtsvn", + {20, 19, 18, 17, 16}, + { {"10000"_b, "fminp_asisdpair_only_sd"}, + }, + }, + + { "_pxyrpm", + {22, 11}, + { {"00"_b, "sqdmulh_z_zzi_s"}, + {"01"_b, "mul_z_zzi_s"}, + {"10"_b, "sqdmulh_z_zzi_d"}, + {"11"_b, "mul_z_zzi_d"}, + }, + }, + + { "_pxzkjy", + {30}, + { {"1"_b, "_yplktv"}, + }, + }, + + { "_pygvrr", + {23, 18, 17, 16}, + { {"0000"_b, "sqxtunt_z_zz"}, + }, + }, + + { "_qghmks", + {13, 12}, + { {"00"_b, "subp_64s_dp_2src"}, + {"01"_b, "irg_64i_dp_2src"}, + {"10"_b, "lslv_64_dp_2src"}, + {"11"_b, "pacga_64p_dp_2src"}, + }, + }, + + { "_qgmngg", + {30, 23}, + { {"00"_b, "orr_64_log_imm"}, + {"10"_b, "ands_64s_log_imm"}, + {"11"_b, "movk_64_movewide"}, + }, + }, + + { "_qgryzh", + {18, 17}, + { {"0x"_b, "st1_asisdlsep_r3_r3"}, + {"10"_b, "st1_asisdlsep_r3_r3"}, + {"11"_b, "st1_asisdlsep_i3_i3"}, + }, + }, + + { "_qgymsy", + {11}, + { {"0"_b, "_hmsgpj"}, + }, + }, + + { "_qhgtvk", + {30, 23, 22}, + { {"00x"_b, "adds_32_addsub_shift"}, + {"010"_b, "adds_32_addsub_shift"}, + {"10x"_b, "subs_32_addsub_shift"}, + {"110"_b, "subs_32_addsub_shift"}, + }, + }, + + { "_qhsplz", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "frintn_asimdmiscfp16_r"}, + {"0x00001"_b, "frintn_asimdmisc_r"}, + {"1111001"_b, "frintp_asimdmiscfp16_r"}, + {"1x00001"_b, "frintp_asimdmisc_r"}, + {"xx00000"_b, "cmgt_asimdmisc_z"}, + }, + }, + + { "_qhtqrj", + {30, 23, 22}, + { {"000"_b, "stnp_s_ldstnapair_offs"}, + {"001"_b, "ldnp_s_ldstnapair_offs"}, + {"010"_b, "stp_s_ldstpair_post"}, + {"011"_b, "ldp_s_ldstpair_post"}, + {"100"_b, "stnp_d_ldstnapair_offs"}, + {"101"_b, "ldnp_d_ldstnapair_offs"}, + {"110"_b, "stp_d_ldstpair_post"}, + {"111"_b, "ldp_d_ldstpair_post"}, + }, + }, + + { "_qhtrnn", + {30, 23, 22, 11, 10}, + { {"00000"_b, "stur_32_ldst_unscaled"}, + {"00001"_b, "str_32_ldst_immpost"}, + {"00010"_b, "sttr_32_ldst_unpriv"}, + {"00011"_b, "str_32_ldst_immpre"}, + {"00100"_b, "ldur_32_ldst_unscaled"}, + {"00101"_b, "ldr_32_ldst_immpost"}, + {"00110"_b, "ldtr_32_ldst_unpriv"}, + {"00111"_b, "ldr_32_ldst_immpre"}, + {"01000"_b, "ldursw_64_ldst_unscaled"}, + {"01001"_b, "ldrsw_64_ldst_immpost"}, + {"01010"_b, "ldtrsw_64_ldst_unpriv"}, + {"01011"_b, "ldrsw_64_ldst_immpre"}, + {"10000"_b, "stur_64_ldst_unscaled"}, + {"10001"_b, "str_64_ldst_immpost"}, + {"10010"_b, "sttr_64_ldst_unpriv"}, + {"10011"_b, "str_64_ldst_immpre"}, + {"10100"_b, "ldur_64_ldst_unscaled"}, + {"10101"_b, "ldr_64_ldst_immpost"}, + {"10110"_b, "ldtr_64_ldst_unpriv"}, + {"10111"_b, "ldr_64_ldst_immpre"}, + {"11000"_b, "prfum_p_ldst_unscaled"}, + }, + }, + + { "_qhxzxl", + {17}, + { {"0"_b, "ld1_asisdlse_r2_2v"}, + }, + }, + + { "_qjyvln", + {20, 19, 18, 17, 16, 13, 12, 9, 8, 7, 6, 5}, + { {"000010011111"_b, "xpaci_64z_dp_1src"}, }, }, - { "UnallocSVELoadAndBroadcastQuadword_ScalarPlusImm", + { "_qkyjhg", + {30}, + { {"0"_b, "ldr_32_loadlit"}, + {"1"_b, "ldr_64_loadlit"}, + }, + }, + + { "_qkzlkj", + {23, 22, 20, 19, 11}, + { {"00010"_b, "sshr_asisdshf_r"}, + {"001x0"_b, "sshr_asisdshf_r"}, + {"01xx0"_b, "sshr_asisdshf_r"}, + }, + }, + + { "_qljhnp", + {22}, + { {"0"_b, "sqdmullt_z_zzi_s"}, + {"1"_b, "sqdmullt_z_zzi_d"}, + }, + }, + + { "_qlqhzg", {20}, - { {"0", "VisitSVELoadAndBroadcastQuadword_ScalarPlusImm"}, + { {"0"_b, "_hzmlps"}, + {"1"_b, "msr_sr_systemmove"}, + }, + }, + + { "_qlxksl", + {30}, + { {"0"_b, "_hrxyts"}, + {"1"_b, "_tytvjk"}, + }, + }, + + { "_qmgtyq", + {17}, + { {"0"_b, "ld2_asisdlse_r2"}, + }, + }, + + { "_qmjqhq", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_h_floatimm"}, + }, + }, + + { "_qmqmpj", + {12, 10}, + { {"00"_b, "_nxqygl"}, + {"01"_b, "_skglrt"}, + {"10"_b, "_sjlpxn"}, + {"11"_b, "_qzxvsk"}, + }, + }, + + { "_qmrgkn", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_hsvgnt"}, + }, + }, + + { "_qmzqsy", + {20, 19, 18, 17}, + { {"0000"_b, "_nykvly"}, + }, + }, + + { "_qnprqt", + {4}, + { {"0"_b, "eor_p_p_pp_z"}, + {"1"_b, "sel_p_p_pp"}, + }, + }, + + { "_qnsxkj", + {20, 19, 18, 17, 16, 13}, + { {"000000"_b, "fabs_d_floatdp1"}, + {"000010"_b, "fsqrt_d_floatdp1"}, + {"000110"_b, "fcvt_hd_floatdp1"}, + {"001000"_b, "frintp_d_floatdp1"}, + {"001010"_b, "frintz_d_floatdp1"}, + {"001110"_b, "frinti_d_floatdp1"}, + {"010000"_b, "frint32x_d_floatdp1"}, + {"010010"_b, "frint64x_d_floatdp1"}, + }, + }, + + { "_qntssm", + {30, 11, 10}, + { {"000"_b, "_hxrtsq"}, + {"001"_b, "_ygxhyg"}, + {"010"_b, "_nhhpqz"}, + {"011"_b, "_vjymzn"}, + {"101"_b, "_gszxkp"}, + {"110"_b, "_nssrnm"}, + {"111"_b, "_jrsptt"}, + }, + }, + + { "_qntygx", + {13, 12, 11, 10}, + { {"0000"_b, "uaddl_asimddiff_l"}, + {"0001"_b, "uhadd_asimdsame_only"}, + {"0010"_b, "_nzqkky"}, + {"0011"_b, "uqadd_asimdsame_only"}, + {"0100"_b, "uaddw_asimddiff_w"}, + {"0101"_b, "urhadd_asimdsame_only"}, + {"0111"_b, "_nthvqx"}, + {"1000"_b, "usubl_asimddiff_l"}, + {"1001"_b, "uhsub_asimdsame_only"}, + {"1010"_b, "_srmhlk"}, + {"1011"_b, "uqsub_asimdsame_only"}, + {"1100"_b, "usubw_asimddiff_w"}, + {"1101"_b, "cmhi_asimdsame_only"}, + {"1110"_b, "_mvgsjr"}, + {"1111"_b, "cmhs_asimdsame_only"}, + }, + }, + + { "_qnvgmh", + {23}, + { {"0"_b, "fmul_asimdsame_only"}, + }, + }, + + { "_qptvrm", + {23}, + { {"0"_b, "fmaxnmp_asimdsame_only"}, + {"1"_b, "fminnmp_asimdsame_only"}, }, }, - { "DecodeSVE101xx101", + { "_qpvgnh", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ld2b_z_p_bi_contiguous"}, + {"000x0"_b, "ld2b_z_p_br_contiguous"}, + {"00101"_b, "ld4b_z_p_bi_contiguous"}, + {"001x0"_b, "ld4b_z_p_br_contiguous"}, + {"01001"_b, "ld2h_z_p_bi_contiguous"}, + {"010x0"_b, "ld2h_z_p_br_contiguous"}, + {"01101"_b, "ld4h_z_p_bi_contiguous"}, + {"011x0"_b, "ld4h_z_p_br_contiguous"}, + {"10011"_b, "st2b_z_p_bi_contiguous"}, + {"10111"_b, "st4b_z_p_bi_contiguous"}, + {"10x01"_b, "st1b_z_p_bi"}, + {"11011"_b, "st2h_z_p_bi_contiguous"}, + {"110x0"_b, "st1h_z_p_bz_d_x32_scaled"}, + {"11111"_b, "st4h_z_p_bi_contiguous"}, + {"111x0"_b, "st1h_z_p_bz_s_x32_scaled"}, + {"11x01"_b, "st1h_z_p_bi"}, + }, + }, + + { "_qpzynz", + {23, 22}, + { {"00"_b, "_jkpsxk"}, + }, + }, + + { "_qqpkkm", + {9, 8, 7, 6, 5, 1, 0}, + { {"1111111"_b, "eretaa_64e_branch_reg"}, + }, + }, + + { "_qqpqnm", + {18, 17}, + { {"0x"_b, "st1_asisdlsop_sx1_r1s"}, + {"10"_b, "st1_asisdlsop_sx1_r1s"}, + {"11"_b, "st1_asisdlsop_s1_i1s"}, + }, + }, + + { "_qqsmlt", + {4}, + { {"0"_b, "ccmp_32_condcmp_imm"}, + }, + }, + + { "_qqtpln", + {17}, + { {"0"_b, "ld1_asisdlsop_bx1_r1b"}, + {"1"_b, "ld1_asisdlsop_b1_i1b"}, + }, + }, + + { "_qqyryl", + {30, 23, 22, 13, 4}, + { {"00x0x"_b, "ld1w_z_p_bz_s_x32_unscaled"}, + {"00x1x"_b, "ldff1w_z_p_bz_s_x32_unscaled"}, + {"0100x"_b, "ldr_z_bi"}, + {"01100"_b, "prfw_i_p_bi_s"}, + {"01110"_b, "prfd_i_p_bi_s"}, + {"10x0x"_b, "ld1w_z_p_bz_d_x32_unscaled"}, + {"10x1x"_b, "ldff1w_z_p_bz_d_x32_unscaled"}, + {"11x0x"_b, "ld1d_z_p_bz_d_x32_unscaled"}, + {"11x1x"_b, "ldff1d_z_p_bz_d_x32_unscaled"}, + }, + }, + + { "_qqzrhz", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "fcvtau_asimdmiscfp16_r"}, + {"0x00001"_b, "fcvtau_asimdmisc_r"}, + {"0x10000"_b, "fmaxnmv_asimdall_only_sd"}, + {"1111000"_b, "fcmge_asimdmiscfp16_fz"}, + {"1x00000"_b, "fcmge_asimdmisc_fz"}, + {"1x00001"_b, "ursqrte_asimdmisc_r"}, + {"1x10000"_b, "fminnmv_asimdall_only_sd"}, + }, + }, + + { "_qrygny", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ld1b_z_p_bi_u8"}, + {"00011"_b, "ldnf1b_z_p_bi_u8"}, + {"00101"_b, "ld1b_z_p_bi_u32"}, + {"00111"_b, "ldnf1b_z_p_bi_u32"}, + {"01001"_b, "ld1sw_z_p_bi_s64"}, + {"01011"_b, "ldnf1sw_z_p_bi_s64"}, + {"01101"_b, "ld1h_z_p_bi_u32"}, + {"01111"_b, "ldnf1h_z_p_bi_u32"}, + {"100x0"_b, "st1b_z_p_bz_d_x32_unscaled"}, + {"100x1"_b, "st1b_z_p_bz_d_64_unscaled"}, + {"101x0"_b, "st1b_z_p_bz_s_x32_unscaled"}, + {"101x1"_b, "st1b_z_p_ai_d"}, + {"110x0"_b, "st1h_z_p_bz_d_x32_unscaled"}, + {"110x1"_b, "st1h_z_p_bz_d_64_unscaled"}, + {"111x0"_b, "st1h_z_p_bz_s_x32_unscaled"}, + {"111x1"_b, "st1h_z_p_ai_d"}, + }, + }, + + { "_qrykhm", + {12}, + { {"0"_b, "st4_asisdlsop_dx4_r4d"}, + }, + }, + + { "_qsnqpz", + {18, 17}, + { {"0x"_b, "ld4_asisdlsop_sx4_r4s"}, + {"10"_b, "ld4_asisdlsop_sx4_r4s"}, + {"11"_b, "ld4_asisdlsop_s4_i4s"}, + }, + }, + + { "_qsqqxg", + {30, 23, 22, 13, 12, 11, 10}, + { {"1010000"_b, "sha512h_qqv_cryptosha512_3"}, + {"1010001"_b, "sha512h2_qqv_cryptosha512_3"}, + {"1010010"_b, "sha512su1_vvv2_cryptosha512_3"}, + {"1010011"_b, "rax1_vvv2_cryptosha512_3"}, + }, + }, + + { "_qsrlql", + {30, 23, 22, 13, 12, 11, 10}, + { {"010xx00"_b, "csel_32_condsel"}, + {"010xx01"_b, "csinc_32_condsel"}, + {"0110000"_b, "crc32b_32c_dp_2src"}, + {"0110001"_b, "crc32h_32c_dp_2src"}, + {"0110010"_b, "crc32w_32c_dp_2src"}, + {"0110100"_b, "crc32cb_32c_dp_2src"}, + {"0110101"_b, "crc32ch_32c_dp_2src"}, + {"0110110"_b, "crc32cw_32c_dp_2src"}, + {"110xx00"_b, "csinv_32_condsel"}, + {"110xx01"_b, "csneg_32_condsel"}, + }, + }, + + { "_qsrtzz", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_lvshqt"}, + }, + }, + + { "_qssyls", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000000"_b, "stzgm_64bulk_ldsttags"}, + }, + }, + + { "_qsxpyq", + {20, 19, 18, 17, 16, 13, 12, 4, 3, 2, 1, 0}, + { {"000000001101"_b, "setf8_only_setf"}, + }, + }, + + { "_qsygjs", + {30, 23, 22, 12, 11, 10}, + { {"0000xx"_b, "add_32_addsub_ext"}, + {"000100"_b, "add_32_addsub_ext"}, + {"1000xx"_b, "sub_32_addsub_ext"}, + {"100100"_b, "sub_32_addsub_ext"}, + }, + }, + + { "_qtgvhn", + {17}, + { {"0"_b, "ld4_asisdlsop_bx4_r4b"}, + {"1"_b, "ld4_asisdlsop_b4_i4b"}, + }, + }, + + { "_qtjzhs", + {17}, + { {"0"_b, "ld1_asisdlse_r4_4v"}, + }, + }, + + { "_qtknlp", + {30, 11, 10}, + { {"000"_b, "_skpjrp"}, + {"001"_b, "_sjnqvx"}, + {"011"_b, "_rgnxpp"}, + {"100"_b, "_rtlzxv"}, + {"101"_b, "_zvlxrl"}, + {"110"_b, "_ynnrny"}, + {"111"_b, "_nlkkyx"}, + }, + }, + + { "_qtkpxg", {20}, - { {"0", "VisitSVEContiguousLoad_ScalarPlusImm"}, - {"1", "VisitSVEContiguousNonFaultLoad_ScalarPlusImm"}, + { {"0"_b, "_srggzy"}, + {"1"_b, "mrs_rs_systemmove"}, }, }, - { "DecodeSVE101x0110", - {22}, - { {"0", "VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar"}, - {"1", "VisitSVELoadMultipleStructures_ScalarPlusScalar"}, + { "_qtmjkr", + {23}, + { {"0"_b, "fdiv_asimdsame_only"}, + }, + }, + + { "_qtxpky", + {4}, + { {"0"_b, "cmphs_p_p_zi"}, + {"1"_b, "cmphi_p_p_zi"}, + }, + }, + + { "_qtxypt", + {9, 8, 7, 6, 5, 1, 0}, + { {"1111111"_b, "retab_64e_branch_reg"}, + }, + }, + + { "_qtystr", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "scvtf_asimdmiscfp16_r"}, + {"0x00001"_b, "scvtf_asimdmisc_r"}, + {"1111000"_b, "fcmeq_asimdmiscfp16_fz"}, + {"1111001"_b, "frecpe_asimdmiscfp16_r"}, + {"1x00000"_b, "fcmeq_asimdmisc_fz"}, + {"1x00001"_b, "frecpe_asimdmisc_r"}, + }, + }, + + { "_qvlnll", + {22, 20, 11}, + { {"010"_b, "decw_r_rs"}, + {"110"_b, "decd_r_rs"}, + }, + }, + + { "_qvlytr", + {23, 22, 20, 19, 18, 17, 16}, + { {"0x00001"_b, "frint64x_asimdmisc_r"}, + {"0x10000"_b, "fmaxv_asimdall_only_sd"}, + {"1111000"_b, "fneg_asimdmiscfp16_r"}, + {"1111001"_b, "fsqrt_asimdmiscfp16_r"}, + {"1x00000"_b, "fneg_asimdmisc_r"}, + {"1x00001"_b, "fsqrt_asimdmisc_r"}, + {"1x10000"_b, "fminv_asimdall_only_sd"}, + }, + }, + + { "_qvsypn", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ldnt1w_z_p_bi_contiguous"}, + {"000x0"_b, "ldnt1w_z_p_br_contiguous"}, + {"00101"_b, "ld3w_z_p_bi_contiguous"}, + {"001x0"_b, "ld3w_z_p_br_contiguous"}, + {"01001"_b, "ldnt1d_z_p_bi_contiguous"}, + {"010x0"_b, "ldnt1d_z_p_br_contiguous"}, + {"01101"_b, "ld3d_z_p_bi_contiguous"}, + {"011x0"_b, "ld3d_z_p_br_contiguous"}, + {"10011"_b, "stnt1w_z_p_bi_contiguous"}, + {"100x0"_b, "st1w_z_p_bz_d_x32_unscaled"}, + {"10111"_b, "st3w_z_p_bi_contiguous"}, + {"101x0"_b, "st1w_z_p_bz_s_x32_unscaled"}, + {"10x01"_b, "st1w_z_p_bi"}, + {"11011"_b, "stnt1d_z_p_bi_contiguous"}, + {"110x0"_b, "st1d_z_p_bz_d_x32_unscaled"}, + {"11111"_b, "st3d_z_p_bi_contiguous"}, + {"11x01"_b, "st1d_z_p_bi"}, + }, + }, + + { "_qvtxpr", + {20, 9, 4}, + { {"000"_b, "uzp1_p_pp"}, + }, + }, + + { "_qxrzgv", + {17}, + { {"0"_b, "ld1_asisdlsep_r2_r2"}, + {"1"_b, "ld1_asisdlsep_i2_i2"}, + }, + }, + + { "_qxtvzy", + {13, 12, 11, 10}, + { {"0000"_b, "umlal_asimddiff_l"}, + {"0001"_b, "sub_asimdsame_only"}, + {"0010"_b, "_gznnvh"}, + {"0011"_b, "cmeq_asimdsame_only"}, + {"0101"_b, "mls_asimdsame_only"}, + {"0110"_b, "_vsqlkr"}, + {"0111"_b, "pmul_asimdsame_only"}, + {"1000"_b, "umlsl_asimddiff_l"}, + {"1001"_b, "umaxp_asimdsame_only"}, + {"1010"_b, "_gggyqx"}, + {"1011"_b, "uminp_asimdsame_only"}, + {"1101"_b, "sqrdmulh_asimdsame_only"}, + {"1110"_b, "_slnkst"}, + }, + }, + + { "_qyjvqr", + {23, 18, 17, 16}, + { {"0000"_b, "sqxtnt_z_zz"}, + }, + }, + + { "_qytrjj", + {30, 23, 22}, + { {"100"_b, "bcax_vvv16_crypto4"}, + }, + }, + + { "_qzjnpr", + {30, 23, 22, 20, 19, 18, 17, 16}, + { {"00000000"_b, "udf_only_perm_undef"}, + }, + }, + + { "_qzrjss", + {18, 17, 12}, + { {"0x0"_b, "st3_asisdlsop_dx3_r3d"}, + {"100"_b, "st3_asisdlsop_dx3_r3d"}, + {"110"_b, "st3_asisdlsop_d3_i3d"}, + }, + }, + + { "_qzsthq", + {30, 23, 22}, + { {"000"_b, "strb_32_ldst_pos"}, + {"001"_b, "ldrb_32_ldst_pos"}, + {"010"_b, "ldrsb_64_ldst_pos"}, + {"011"_b, "ldrsb_32_ldst_pos"}, + {"100"_b, "strh_32_ldst_pos"}, + {"101"_b, "ldrh_32_ldst_pos"}, + {"110"_b, "ldrsh_64_ldst_pos"}, + {"111"_b, "ldrsh_32_ldst_pos"}, + }, + }, + + { "_qzxvsk", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "bic_asimdimm_l_sl"}, + {"00x100"_b, "usra_asimdshf_r"}, + {"00x110"_b, "ursra_asimdshf_r"}, + {"010x00"_b, "usra_asimdshf_r"}, + {"010x10"_b, "ursra_asimdshf_r"}, + {"011100"_b, "usra_asimdshf_r"}, + {"011110"_b, "ursra_asimdshf_r"}, + {"0x1000"_b, "usra_asimdshf_r"}, + {"0x1010"_b, "ursra_asimdshf_r"}, + }, + }, + + { "_qzzlhq", + {30, 23, 22}, + { {"000"_b, "and_32_log_imm"}, + {"010"_b, "movn_32_movewide"}, + {"100"_b, "eor_32_log_imm"}, + {"110"_b, "movz_32_movewide"}, + }, + }, + + { "_qzzlpv", + {13, 12}, + { {"01"_b, "gmi_64g_dp_2src"}, + {"10"_b, "lsrv_64_dp_2src"}, + }, + }, + + { "_rgjqzs", + {30, 23, 22}, + { {"001"_b, "sbfm_64m_bitfield"}, + {"101"_b, "ubfm_64m_bitfield"}, + }, + }, + + { "_rgnxpp", + {23, 22}, + { {"00"_b, "fcsel_s_floatsel"}, + {"01"_b, "fcsel_d_floatsel"}, + {"11"_b, "fcsel_h_floatsel"}, + }, + }, + + { "_rgztzl", + {20, 19, 18, 17, 16}, + { {"00000"_b, "saddlp_asimdmisc_p"}, + {"00001"_b, "xtn_asimdmisc_n"}, + }, + }, + + { "_rhhrhg", + {30, 13, 4}, + { {"000"_b, "cmphs_p_p_zw"}, + {"001"_b, "cmphi_p_p_zw"}, + {"010"_b, "cmplo_p_p_zw"}, + {"011"_b, "cmpls_p_p_zw"}, + }, + }, + + { "_rhmxyp", + {20, 9, 4}, + { {"000"_b, "trn1_p_pp"}, + }, + }, + + { "_rhpmjz", + {12, 11}, + { {"00"_b, "incp_z_p_z"}, + {"01"_b, "incp_r_p_r"}, + {"10"_b, "_mpstrr"}, + }, + }, + + { "_rhttgj", + {12, 10}, + { {"00"_b, "_xxpzrl"}, + {"01"_b, "_vlzrlm"}, + {"10"_b, "_vxylhh"}, + {"11"_b, "_pxgztg"}, + }, + }, + + { "_rhvksm", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "fcvtnu_asisdmiscfp16_r"}, + {"0x00001"_b, "fcvtnu_asisdmisc_r"}, + {"1111001"_b, "fcvtpu_asisdmiscfp16_r"}, + {"1x00001"_b, "fcvtpu_asisdmisc_r"}, + }, + }, + + { "_rhzhyz", + {13, 12, 4}, + { {"000"_b, "rmif_only_rmif"}, + }, + }, + + { "_rjmyyl", + {20, 19, 18, 17, 16, 13}, + { {"000000"_b, "fmov_s_floatdp1"}, + {"000010"_b, "fneg_s_floatdp1"}, + {"001000"_b, "frintn_s_floatdp1"}, + {"001010"_b, "frintm_s_floatdp1"}, + {"001100"_b, "frinta_s_floatdp1"}, + {"001110"_b, "frintx_s_floatdp1"}, + {"010000"_b, "frint32z_s_floatdp1"}, + {"010010"_b, "frint64z_s_floatdp1"}, + }, + }, + + { "_rjyrnt", + {4}, + { {"0"_b, "cmpge_p_p_zi"}, + {"1"_b, "cmpgt_p_p_zi"}, + }, + }, + + { "_rjysnh", + {18, 17, 16, 9, 8, 7, 6}, + { {"0000000"_b, "fadd_z_p_zs"}, + {"0010000"_b, "fsub_z_p_zs"}, + {"0100000"_b, "fmul_z_p_zs"}, + {"0110000"_b, "fsubr_z_p_zs"}, + {"1000000"_b, "fmaxnm_z_p_zs"}, + {"1010000"_b, "fminnm_z_p_zs"}, + {"1100000"_b, "fmax_z_p_zs"}, + {"1110000"_b, "fmin_z_p_zs"}, + }, + }, + + { "_rkqtvs", + {23, 22, 13}, + { {"100"_b, "fmlal_asimdelem_lh"}, + {"xx1"_b, "smlal_asimdelem_l"}, + }, + }, + + { "_rkrltp", + {17}, + { {"0"_b, "st3_asisdlso_b3_3b"}, + }, + }, + + { "_rksxpn", + {30, 23, 22, 11, 10}, + { {"00010"_b, "str_b_ldst_regoff"}, + {"00110"_b, "ldr_b_ldst_regoff"}, + {"01010"_b, "str_q_ldst_regoff"}, + {"01110"_b, "ldr_q_ldst_regoff"}, + {"10010"_b, "str_h_ldst_regoff"}, + {"10110"_b, "ldr_h_ldst_regoff"}, + }, + }, + + { "_rkvyqk", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "movi_asimdimm_l_hl"}, + {"00x100"_b, "shrn_asimdshf_n"}, + {"00x101"_b, "rshrn_asimdshf_n"}, + {"00x110"_b, "sshll_asimdshf_l"}, + {"010x00"_b, "shrn_asimdshf_n"}, + {"010x01"_b, "rshrn_asimdshf_n"}, + {"010x10"_b, "sshll_asimdshf_l"}, + {"011100"_b, "shrn_asimdshf_n"}, + {"011101"_b, "rshrn_asimdshf_n"}, + {"011110"_b, "sshll_asimdshf_l"}, + {"0x1000"_b, "shrn_asimdshf_n"}, + {"0x1001"_b, "rshrn_asimdshf_n"}, + {"0x1010"_b, "sshll_asimdshf_l"}, + }, + }, + + { "_rlrjxp", + {13, 4}, + { {"00"_b, "fcmge_p_p_zz"}, + {"01"_b, "fcmgt_p_p_zz"}, + {"10"_b, "fcmeq_p_p_zz"}, + {"11"_b, "fcmne_p_p_zz"}, + }, + }, + + { "_rlyvpn", + {23, 12, 11, 10}, + { {"0000"_b, "sqshrunb_z_zi"}, + {"0001"_b, "sqshrunt_z_zi"}, + {"0010"_b, "sqrshrunb_z_zi"}, + {"0011"_b, "sqrshrunt_z_zi"}, + {"0100"_b, "shrnb_z_zi"}, + {"0101"_b, "shrnt_z_zi"}, + {"0110"_b, "rshrnb_z_zi"}, + {"0111"_b, "rshrnt_z_zi"}, + }, + }, + + { "_rmltms", + {9, 8, 7, 6, 5, 1, 0}, + { {"1111100"_b, "eret_64e_branch_reg"}, + }, + }, + + { "_rmmmjj", + {30, 23, 22}, + { {"000"_b, "smaddl_64wa_dp_3src"}, + {"010"_b, "umaddl_64wa_dp_3src"}, + }, + }, + + { "_rmxjsn", + {30}, + { {"0"_b, "orr_64_log_shift"}, + {"1"_b, "ands_64_log_shift"}, + }, + }, + + { "_rnktts", + {23, 22}, + { {"00"_b, "and_asimdsame_only"}, + {"01"_b, "bic_asimdsame_only"}, + {"10"_b, "orr_asimdsame_only"}, + {"11"_b, "orn_asimdsame_only"}, + }, + }, + + { "_rnqtmt", + {30}, + { {"0"_b, "_zyjjgs"}, + {"1"_b, "_lrntmz"}, + }, + }, + + { "_rnrzsj", + {20, 18, 17}, + { {"000"_b, "_lgglzy"}, + }, + }, + + { "_rnypvh", + {17}, + { {"0"_b, "st1_asisdlsop_bx1_r1b"}, + {"1"_b, "st1_asisdlsop_b1_i1b"}, + }, + }, + + { "_rpmrkq", + {23}, + { {"0"_b, "fcmeq_asimdsame_only"}, + }, + }, + + { "_rpqgjl", + {18, 17, 16, 13, 12, 7, 4, 3, 2, 1, 0}, + { {"00000011111"_b, "_kpxtsp"}, + }, + }, + + { "_rpzykx", + {11}, + { {"0"_b, "_svvyrz"}, }, }, - { "DecodeSVE101x0111", - {22, 20}, - { {"00", "VisitSVEContiguousNonTemporalLoad_ScalarPlusImm"}, - {"10", "VisitSVELoadMultipleStructures_ScalarPlusImm"}, + { "_rqhryp", + {12, 10}, + { {"00"_b, "_kjpxvh"}, + {"01"_b, "_mxvjxx"}, + {"10"_b, "sm4ekey_z_zz"}, + {"11"_b, "rax1_z_zz"}, }, }, - { "DecodeSVE101x1111", - {22, 20}, - { {"x0", "VisitSVELoadMultipleStructures_ScalarPlusImm"}, + { "_rshyht", + {13}, + { {"0"_b, "facge_p_p_zz"}, + {"1"_b, "facgt_p_p_zz"}, }, }, - { "DecodeSVE110x0111", - {22, 4}, - { {"00", "VisitSVE64BitGatherPrefetch_VectorPlusImm"}, - {"1x", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, + { "_rsqmgk", + {23, 22, 20, 19, 18, 17, 16}, + { {"0000000"_b, "movprfx_z_z"}, }, }, - { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets", + { "_rsyhtj", + {13, 12, 11, 10}, + { {"0001"_b, "ushl_asisdsame_only"}, + {"0010"_b, "_gxnlxg"}, + {"0011"_b, "uqshl_asisdsame_only"}, + {"0101"_b, "urshl_asisdsame_only"}, + {"0111"_b, "uqrshl_asisdsame_only"}, + {"1010"_b, "_msnsjp"}, + {"1110"_b, "_llnzlv"}, + }, + }, + + { "_rsyjqj", + {23, 22, 20, 19, 18, 17, 16}, + { {"0010000"_b, "fmaxv_asimdall_only_h"}, + {"0x00001"_b, "frint64z_asimdmisc_r"}, + {"1010000"_b, "fminv_asimdall_only_h"}, + {"1111000"_b, "fabs_asimdmiscfp16_r"}, + {"1x00000"_b, "fabs_asimdmisc_r"}, + }, + }, + + { "_rsyzrs", {22}, - { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, + { {"0"_b, "str_64_ldst_regoff"}, + {"1"_b, "ldr_64_ldst_regoff"}, }, }, - { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets", + { "_rtgkkg", + {30, 23, 22, 13, 12, 11, 10}, + { {"1101001"_b, "smmla_asimdsame2_g"}, + {"1101011"_b, "usmmla_asimdsame2_g"}, + {"x100111"_b, "usdot_asimdsame2_d"}, + {"xxx0101"_b, "sdot_asimdsame2_d"}, + }, + }, + + { "_rtlzxv", + {13, 12}, + { {"01"_b, "sqdmull_asisddiff_only"}, + }, + }, + + { "_rtpztp", {22}, - { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, + { {"0"_b, "umullb_z_zzi_s"}, + {"1"_b, "umullb_z_zzi_d"}, }, }, - { "DecodeSVE110010xx", - {23, 4}, - { {"00", "VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets"}, - {"1x", "VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets"}, + { "_rtrlts", + {23, 22, 12, 11, 10}, + { {"01000"_b, "bfdot_z_zzz"}, + {"10000"_b, "fmlalb_z_zzz"}, + {"10001"_b, "fmlalt_z_zzz"}, + {"11000"_b, "bfmlalb_z_zzz"}, + {"11001"_b, "bfmlalt_z_zzz"}, }, }, - { "DecodeSVE110011xx", + { "_rvjzgt", {23, 22, 4}, - { {"x0x", "VisitSVE64BitGatherLoad_VectorPlusImm"}, - {"010", "VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets"}, - {"11x", "VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets"}, + { {"000"_b, "fccmp_s_floatccmp"}, + {"001"_b, "fccmpe_s_floatccmp"}, + {"010"_b, "fccmp_d_floatccmp"}, + {"011"_b, "fccmpe_d_floatccmp"}, + {"110"_b, "fccmp_h_floatccmp"}, + {"111"_b, "fccmpe_h_floatccmp"}, }, }, - { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets", - {22}, - { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, + { "_rvzhhx", + {18, 17, 12}, + { {"000"_b, "st3_asisdlso_d3_3d"}, }, }, - { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets", - {22}, - { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, + { "_rxjrmn", + {22, 13, 12}, + { {"000"_b, "swpa_32_memop"}, + {"100"_b, "swpal_32_memop"}, }, }, - { "DecodeSVE110111xx", - {22}, - { {"0", "VisitSVE64BitGatherLoad_VectorPlusImm"}, - {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets"}, + { "_rxpspy", + {30, 23, 22, 12, 11, 10}, + { {"0000xx"_b, "adds_32s_addsub_ext"}, + {"000100"_b, "adds_32s_addsub_ext"}, + {"1000xx"_b, "subs_32s_addsub_ext"}, + {"100100"_b, "subs_32s_addsub_ext"}, }, }, - { "DecodeSVE111x0011", - {22}, - { {"0", "VisitSVEContiguousNonTemporalStore_ScalarPlusScalar"}, - {"1", "VisitSVEStoreMultipleStructures_ScalarPlusScalar"}, + { "_ryglvl", + {4}, + { {"0"_b, "ccmp_32_condcmp_reg"}, }, }, - { "DecodeSVE111x01x0", - {22}, - { {"0", "VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets"}, - {"1", "VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets"}, + { "_rykykh", + {20, 19, 18, 17, 16}, + { {"00000"_b, "rev64_asimdmisc_r"}, }, }, - { "DecodeSVE111x0101", + { "_rzkmny", + {30}, + { {"0"_b, "and_64_log_shift"}, + {"1"_b, "eor_64_log_shift"}, + }, + }, + + { "_rznrqt", {22}, - { {"0", "VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets"}, - {"1", "VisitSVE64BitScatterStore_VectorPlusImm"}, + { {"0"_b, "umullt_z_zzi_s"}, + {"1"_b, "umullt_z_zzi_d"}, }, }, - { "DecodeSVE111x0111", - {22, 20}, - { {"x0", "VisitSVEContiguousStore_ScalarPlusImm"}, - {"01", "VisitSVEContiguousNonTemporalStore_ScalarPlusImm"}, - {"11", "VisitSVEStoreMultipleStructures_ScalarPlusImm"}, + { "_rzqzlq", + {23, 22, 20, 19, 16, 13, 12}, + { {"0111110"_b, "fcvtns_asisdmiscfp16_r"}, + {"0111111"_b, "fcvtms_asisdmiscfp16_r"}, + {"0x00110"_b, "fcvtns_asisdmisc_r"}, + {"0x00111"_b, "fcvtms_asisdmisc_r"}, + {"1111110"_b, "fcvtps_asisdmiscfp16_r"}, + {"1111111"_b, "fcvtzs_asisdmiscfp16_r"}, + {"1x00110"_b, "fcvtps_asisdmisc_r"}, + {"1x00111"_b, "fcvtzs_asisdmisc_r"}, + {"xx00000"_b, "cmgt_asisdmisc_z"}, + {"xx00001"_b, "cmeq_asisdmisc_z"}, + {"xx00010"_b, "cmlt_asisdmisc_z"}, + {"xx00011"_b, "abs_asisdmisc_r"}, + {"xx10111"_b, "addp_asisdpair_only"}, }, }, - { "DecodeSVE111x11x0", - {22}, - { {"0", "VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets"}, - {"1", "VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets"}, + { "_rztvnl", + {20, 19, 18, 17, 16}, + { {"0000x"_b, "fcadd_z_p_zz"}, + {"10000"_b, "faddp_z_p_zz"}, + {"10100"_b, "fmaxnmp_z_p_zz"}, + {"10101"_b, "fminnmp_z_p_zz"}, + {"10110"_b, "fmaxp_z_p_zz"}, + {"10111"_b, "fminp_z_p_zz"}, }, }, - { "DecodeSVE111x1101", - {22}, - { {"0", "VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets"}, - {"1", "VisitSVE32BitScatterStore_VectorPlusImm"}, + { "_rzzxsn", + {30, 13}, + { {"00"_b, "_nvyxmh"}, + {"01"_b, "_hykhmt"}, + {"10"_b, "_yszjsm"}, + {"11"_b, "_jrnxzh"}, + }, + }, + + { "_sghgtk", + {4}, + { {"0"_b, "cmplo_p_p_zi"}, + {"1"_b, "cmpls_p_p_zi"}, + }, + }, + + { "_sgnknz", + {23, 22, 20, 19, 11}, + { {"00011"_b, "fcvtzs_asisdshf_c"}, + {"001x1"_b, "fcvtzs_asisdshf_c"}, + {"01xx1"_b, "fcvtzs_asisdshf_c"}, + }, + }, + + { "_sgztlj", + {23, 22, 20, 19, 18, 17, 16}, + { {"0010000"_b, "fmaxnmv_asimdall_only_h"}, + {"0111001"_b, "fcvtas_asimdmiscfp16_r"}, + {"0x00001"_b, "fcvtas_asimdmisc_r"}, + {"1010000"_b, "fminnmv_asimdall_only_h"}, + {"1111000"_b, "fcmgt_asimdmiscfp16_fz"}, + {"1x00000"_b, "fcmgt_asimdmisc_fz"}, + {"1x00001"_b, "urecpe_asimdmisc_r"}, + }, + }, + + { "_shgkvq", + {18, 17}, + { {"00"_b, "st2_asisdlso_s2_2s"}, + }, + }, + + { "_shqygv", + {30, 4}, + { {"00"_b, "_thvxym"}, + {"01"_b, "_mrhtxt"}, + {"10"_b, "_ptjyqx"}, + {"11"_b, "_rshyht"}, }, }, - { "DecodeSVE111x1111", - {22, 20}, - { {"x0", "VisitSVEContiguousStore_ScalarPlusImm"}, - {"x1", "VisitSVEStoreMultipleStructures_ScalarPlusImm"}, + { "_shrsxr", + {30, 23, 22}, + { {"000"_b, "stnp_64_ldstnapair_offs"}, + {"001"_b, "ldnp_64_ldstnapair_offs"}, + {"010"_b, "stp_64_ldstpair_post"}, + {"011"_b, "ldp_64_ldstpair_post"}, }, }, - { "UnallocSVEStorePredicateRegister", + { "_shzysp", + {30, 23, 22, 19, 18, 17, 16}, + { {"1001000"_b, "ins_asimdins_ir_r"}, + {"100x100"_b, "ins_asimdins_ir_r"}, + {"100xx10"_b, "ins_asimdins_ir_r"}, + {"100xxx1"_b, "ins_asimdins_ir_r"}, + {"x01xxxx"_b, "fmulx_asimdsamefp16_only"}, + }, + }, + + { "_sjlpxn", + {23, 22}, + { {"01"_b, "fcmla_asimdelem_c_h"}, + {"10"_b, "fcmla_asimdelem_c_s"}, + }, + }, + + { "_sjlrxn", + {10}, + { {"0"_b, "_mpzqxm"}, + }, + }, + + { "_sjnqvx", {23, 22, 4}, - { {"100", "VisitSVEStorePredicateRegister"}, + { {"000"_b, "fccmp_s_floatccmp"}, + {"001"_b, "fccmpe_s_floatccmp"}, + {"010"_b, "fccmp_d_floatccmp"}, + {"011"_b, "fccmpe_d_floatccmp"}, + {"110"_b, "fccmp_h_floatccmp"}, + {"111"_b, "fccmpe_h_floatccmp"}, + }, + }, + + { "_sjnspg", + {4}, + { {"0"_b, "nors_p_p_pp_z"}, + {"1"_b, "nands_p_p_pp_z"}, }, }, - { "DecodeSVE1111x010", + { "_sjnxky", + {30}, + { {"1"_b, "_ylyskq"}, + }, + }, + + { "_sjrqth", {23, 22}, - { {"0x", "VisitSVEContiguousStore_ScalarPlusScalar"}, - {"10", "VisitSVEStoreVectorRegister"}, - {"11", "VisitSVEContiguousStore_ScalarPlusScalar"}, + { {"00"_b, "fmov_s_floatimm"}, + {"01"_b, "fmov_d_floatimm"}, + {"11"_b, "fmov_h_floatimm"}, }, }, - { "DecodeNEONScalarAnd3SHA", - {29, 23, 22, 15, 14, 11, 10}, - { {"0xx0x00", "VisitCrypto3RegSHA"}, - {"x000xx1", "UnallocNEONScalarCopy"}, - {"xxx1xx1", "UnallocNEONScalar3SameExtra"}, - {"xx100x1", "UnallocNEONScalar3SameFP16"}, + { "_sjsltg", + {17}, + { {"0"_b, "st2_asisdlsop_hx2_r2h"}, + {"1"_b, "st2_asisdlsop_h2_i2h"}, }, }, - { "DecodeNEONScalarAnd2SHA", - {29, 20, 19, 18, 17, 11, 10}, - { {"0010010", "VisitCrypto2RegSHA"}, - {"x000010", "UnallocNEONScalar2RegMisc"}, - {"x100010", "UnallocNEONScalarPairwise"}, - {"x110010", "UnallocNEONScalar2RegMiscFP16"}, - {"xxxxxx1", "UnallocNEONScalar3Same"}, - {"xxxxx00", "UnallocNEONScalar3Diff"}, + { "_sjtrhm", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ld1rqb_z_p_bi_u8"}, + {"000x0"_b, "ld1rqb_z_p_br_contiguous"}, + {"01001"_b, "ld1rqh_z_p_bi_u16"}, + {"010x0"_b, "ld1rqh_z_p_br_contiguous"}, + {"100x1"_b, "stnt1b_z_p_ar_d_64_unscaled"}, + {"101x1"_b, "stnt1b_z_p_ar_s_x32_unscaled"}, + {"110x1"_b, "stnt1h_z_p_ar_d_64_unscaled"}, + {"111x1"_b, "stnt1h_z_p_ar_s_x32_unscaled"}, }, }, - { "DecodeNEONScalar", - {28, 23, 10}, - { {"101", "UnallocNEONScalarShiftImmediate"}, - {"1x0", "UnallocNEONScalarByIndexedElement"}, + { "_sjvhlq", + {22}, + { {"0"_b, "smullb_z_zzi_s"}, + {"1"_b, "smullb_z_zzi_d"}, }, }, - { "DecodeNEONLoadStoreMulti", - {20, 19, 18, 17, 16}, - { {"00000", "UnallocNEONLoadStoreMultiStruct"}, + { "_sjzsvv", + {30, 23, 13, 12, 11, 10}, + { {"101001"_b, "ucvtf_asisdshf_c"}, + {"101111"_b, "fcvtzu_asisdshf_c"}, + {"1x01x0"_b, "sqrdmlah_asisdelem_r"}, + {"1x11x0"_b, "sqrdmlsh_asisdelem_r"}, + }, + }, + + { "_skglrt", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "mvni_asimdimm_l_sl"}, + {"00x100"_b, "ushr_asimdshf_r"}, + {"00x110"_b, "urshr_asimdshf_r"}, + {"010x00"_b, "ushr_asimdshf_r"}, + {"010x10"_b, "urshr_asimdshf_r"}, + {"011100"_b, "ushr_asimdshf_r"}, + {"011110"_b, "urshr_asimdshf_r"}, + {"0x1000"_b, "ushr_asimdshf_r"}, + {"0x1010"_b, "urshr_asimdshf_r"}, + }, + }, + + { "_skpjrp", + {23, 22, 12}, + { {"000"_b, "_xzyylk"}, + {"001"_b, "_hpgqlp"}, + {"010"_b, "_qnsxkj"}, + {"011"_b, "_nnlvqz"}, + {"110"_b, "_vylhvl"}, + {"111"_b, "_stgkpy"}, + }, + }, + + { "_slhpgp", + {23}, + { {"0"_b, "facge_asimdsame_only"}, + {"1"_b, "facgt_asimdsame_only"}, + }, + }, + + { "_sllkpt", + {13, 12}, + { {"10"_b, "lsrv_32_dp_2src"}, + }, + }, + + { "_slnkst", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "fcvtmu_asimdmiscfp16_r"}, + {"0x00001"_b, "fcvtmu_asimdmisc_r"}, + {"1111001"_b, "fcvtzu_asimdmiscfp16_r"}, + {"1x00001"_b, "fcvtzu_asimdmisc_r"}, + {"xx00000"_b, "neg_asimdmisc_r"}, + }, + }, + + { "_sltqpy", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xx10"_b, "strb_32b_ldst_regoff"}, + {"001xx10"_b, "ldrb_32b_ldst_regoff"}, + {"0100000"_b, "ldaprb_32l_memop"}, + {"010xx10"_b, "ldrsb_64b_ldst_regoff"}, + {"011xx10"_b, "ldrsb_32b_ldst_regoff"}, + {"100xx10"_b, "strh_32_ldst_regoff"}, + {"101xx10"_b, "ldrh_32_ldst_regoff"}, + {"1100000"_b, "ldaprh_32l_memop"}, + {"110xx10"_b, "ldrsh_64_ldst_regoff"}, + {"111xx10"_b, "ldrsh_32_ldst_regoff"}, + }, + }, + + { "_smplhv", + {10}, + { {"0"_b, "braa_64p_branch_reg"}, + {"1"_b, "brab_64p_branch_reg"}, + }, + }, + + { "_smqvrs", + {18, 17}, + { {"00"_b, "st1_asisdlse_r1_1v"}, + }, + }, + + { "_smrtxq", + {13, 12}, + { {"00"_b, "sbcs_32_addsub_carry"}, + }, + }, + + { "_snjpvy", + {23, 22, 13, 12, 11, 10}, + { {"0001x0"_b, "fmulx_asimdelem_rh_h"}, + {"0x0001"_b, "sqshrun_asimdshf_n"}, + {"0x0011"_b, "sqrshrun_asimdshf_n"}, + {"0x0101"_b, "uqshrn_asimdshf_n"}, + {"0x0111"_b, "uqrshrn_asimdshf_n"}, + {"0x1001"_b, "ushll_asimdshf_l"}, + {"1000x0"_b, "fmlal2_asimdelem_lh"}, + {"1x01x0"_b, "fmulx_asimdelem_r_sd"}, + {"xx10x0"_b, "umull_asimdelem_l"}, + }, + }, + + { "_snkqvp", + {23, 22, 20, 19, 18, 17, 16, 13, 12, 11}, + { {"0011111001"_b, "_gkpvxz"}, + }, + }, + + { "_sntyqy", + {4}, + { {"0"_b, "cmphs_p_p_zi"}, + {"1"_b, "cmphi_p_p_zi"}, + }, + }, + + { "_sntzjg", + {23, 22, 11, 10}, + { {"0000"_b, "_qssyls"}, + {"0001"_b, "stg_64spost_ldsttags"}, + {"0010"_b, "stg_64soffset_ldsttags"}, + {"0011"_b, "stg_64spre_ldsttags"}, + {"0100"_b, "ldg_64loffset_ldsttags"}, + {"0101"_b, "stzg_64spost_ldsttags"}, + {"0110"_b, "stzg_64soffset_ldsttags"}, + {"0111"_b, "stzg_64spre_ldsttags"}, + {"1000"_b, "_kyxqgg"}, + {"1001"_b, "st2g_64spost_ldsttags"}, + {"1010"_b, "st2g_64soffset_ldsttags"}, + {"1011"_b, "st2g_64spre_ldsttags"}, + {"1100"_b, "_stjrgx"}, + {"1101"_b, "stz2g_64spost_ldsttags"}, + {"1110"_b, "stz2g_64soffset_ldsttags"}, + {"1111"_b, "stz2g_64spre_ldsttags"}, + }, + }, + + { "_spglxn", + {4, 3, 2, 1, 0}, + { {"11111"_b, "_yqmvxk"}, + }, + }, + + { "_sphpkr", + {4, 3, 2, 1, 0}, + { {"11111"_b, "_thsxvg"}, + }, + }, + + { "_spjjkg", + {23, 22, 13, 12, 11, 10}, + { {"0011x0"_b, "sudot_asimdelem_d"}, + {"0111x0"_b, "bfdot_asimdelem_e"}, + {"0x1001"_b, "scvtf_asimdshf_c"}, + {"0x1111"_b, "fcvtzs_asimdshf_c"}, + {"1011x0"_b, "usdot_asimdelem_d"}, + {"1111x0"_b, "bfmlal_asimdelem_f"}, + {"xx00x0"_b, "sqdmulh_asimdelem_r"}, + {"xx01x0"_b, "sqrdmulh_asimdelem_r"}, + {"xx10x0"_b, "sdot_asimdelem_d"}, + }, + }, + + { "_spmkmm", + {30, 19, 18, 17, 16, 10}, + { {"110001"_b, "ins_asimdins_iv_v"}, + {"1x1001"_b, "ins_asimdins_iv_v"}, + {"1xx101"_b, "ins_asimdins_iv_v"}, + {"1xxx11"_b, "ins_asimdins_iv_v"}, + {"xxxxx0"_b, "ext_asimdext_only"}, + }, + }, + + { "_spzgkt", + {23, 22, 13, 12, 11, 10}, + { {"0x1001"_b, "ucvtf_asimdshf_c"}, + {"0x1111"_b, "fcvtzu_asimdshf_c"}, + {"1000x0"_b, "fmlsl2_asimdelem_lh"}, + {"xx01x0"_b, "sqrdmlah_asimdelem_r"}, + {"xx10x0"_b, "udot_asimdelem_d"}, + {"xx11x0"_b, "sqrdmlsh_asimdelem_r"}, + }, + }, + + { "_sqgjmn", + {20, 9}, + { {"00"_b, "_mxgykv"}, + }, + }, + + { "_sqgxzn", + {9, 8, 7, 6, 5}, + { {"11111"_b, "paciza_64z_dp_1src"}, + }, + }, + + { "_sqjpsl", + {30, 13, 12, 11, 10}, + { {"10001"_b, "sqrdmlah_asisdsame2_only"}, + {"10011"_b, "sqrdmlsh_asisdsame2_only"}, + }, + }, + + { "_sqpjtr", + {20, 18, 17}, + { {"000"_b, "_nllnsg"}, + }, + }, + + { "_srggzy", + {19}, + { {"0"_b, "_xqgxjp"}, + {"1"_b, "sysl_rc_systeminstrs"}, + }, + }, + + { "_srglgl", + {18, 17}, + { {"0x"_b, "st3_asisdlsop_sx3_r3s"}, + {"10"_b, "st3_asisdlsop_sx3_r3s"}, + {"11"_b, "st3_asisdlsop_s3_i3s"}, + }, + }, + + { "_srmhjk", + {30}, + { {"0"_b, "ldr_s_loadlit"}, + {"1"_b, "ldr_d_loadlit"}, }, }, - { "DecodeNEONLoadStoreSingle", + { "_srmhlk", {20, 19, 18, 17, 16}, - { {"00000", "UnallocNEONLoadStoreSingleStruct"}, + { {"00000"_b, "uaddlp_asimdmisc_p"}, + {"00001"_b, "sqxtun_asimdmisc_n"}, }, }, - { "DecodeNEONOther", - {29, 23, 22, 15, 14, 11, 10}, - { {"0xx0x00", "UnallocNEONTable"}, - {"0xx0x10", "UnallocNEONPerm"}, - {"1xx0xx0", "UnallocNEONExtract"}, - {"x000xx1", "UnallocNEONCopy"}, - {"xx100x1", "UnallocNEON3SameFP16"}, - {"xxx1xx1", "UnallocNEON3SameExtra"}, + { "_srvnql", + {18, 17, 12}, + { {"0x0"_b, "ld1_asisdlsop_dx1_r1d"}, + {"100"_b, "ld1_asisdlsop_dx1_r1d"}, + {"110"_b, "ld1_asisdlsop_d1_i1d"}, }, }, - { "DecodeNEON2OpAndAcross", - {30, 29, 20, 19, 18, 17}, - { {"100100", "VisitCryptoAES"}, - {"xx1100", "UnallocNEON2RegMiscFP16"}, - {"xx0000", "UnallocNEON2RegMisc"}, - {"xx1000", "UnallocNEONAcrossLanes"}, + { "_stgkpy", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_h_floatimm"}, }, }, - { "DecodeNEON3Op", - {11, 10}, - { {"00", "UnallocNEON3Different"}, - {"10", "DecodeNEON2OpAndAcross"}, - {"x1", "UnallocNEON3Same"}, + { "_stjrgx", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000000"_b, "ldgm_64bulk_ldsttags"}, }, }, - { "DecodeNEONImmAndIndex", - {23, 22, 21, 20, 19, 10}, - { {"000001", "UnallocNEONModifiedImmediate"}, - {"0xxx11", "UnallocNEONShiftImmediate"}, - {"0xx1x1", "UnallocNEONShiftImmediate"}, - {"0x1xx1", "UnallocNEONShiftImmediate"}, - {"01xxx1", "UnallocNEONShiftImmediate"}, - {"xxxxx0", "UnallocNEONByIndexedElement"}, + { "_stqmps", + {12}, + { {"0"_b, "ld3_asisdlsop_dx3_r3d"}, }, }, - { "DecodeFP", - {15, 14, 13, 12, 11, 10}, - { {"000000", "UnallocFPIntegerConvert"}, - {"x10000", "UnallocFPDataProcessing1Source"}, - {"xx1000", "UnallocFPCompare"}, - {"xxx100", "UnallocFPImmediate"}, - {"xxxx01", "UnallocFPConditionalCompare"}, - {"xxxx10", "UnallocFPDataProcessing2Source"}, - {"xxxx11", "UnallocFPConditionalSelect"}, + { "_strkph", + {23, 22}, + { {"00"_b, "tbl_asimdtbl_l2_2"}, }, }, - { "DecodeLoadStore", - {11, 10}, - { {"00", "UnallocLoadStoreUnscaledOffset"}, - {"01", "UnallocLoadStorePostIndex"}, - {"10", "VisitUnimplemented"}, // LoadStoreUnprivileged. - {"11", "UnallocLoadStorePreIndex"}, + { "_svnyyx", + {13, 12}, + { {"00"_b, "adcs_32_addsub_carry"}, }, }, - { "DecodeLoadStoreRegister", - {11, 10}, - { {"00", "UnallocAtomicMemory"}, - {"10", "UnallocLoadStoreRegisterOffset"}, - {"x1", "UnallocLoadStorePAC"}, + { "_svrnxq", + {12}, + { {"0"_b, "st3_asisdlsop_dx3_r3d"}, }, }, - { "DecodeCondCmp", - {11}, - { {"0", "UnallocConditionalCompareRegister"}, - {"1", "UnallocConditionalCompareImmediate"}, + { "_svvyrz", + {23, 22, 20, 19, 18, 17, 16}, + { {"00xxxxx"_b, "addvl_r_ri"}, + {"01xxxxx"_b, "addpl_r_ri"}, + {"1011111"_b, "rdvl_r_i"}, + }, + }, + + { "_sxnkrh", + {23}, + { {"1"_b, "_xxkvsy"}, + }, + }, + + { "_sxpvym", + {30, 23, 22, 13}, + { {"0000"_b, "ldnt1sb_z_p_ar_s_x32_unscaled"}, + {"0001"_b, "ldnt1b_z_p_ar_s_x32_unscaled"}, + {"0010"_b, "ld1rb_z_p_bi_u8"}, + {"0011"_b, "ld1rb_z_p_bi_u16"}, + {"0100"_b, "ldnt1sh_z_p_ar_s_x32_unscaled"}, + {"0101"_b, "ldnt1h_z_p_ar_s_x32_unscaled"}, + {"0110"_b, "ld1rsw_z_p_bi_s64"}, + {"0111"_b, "ld1rh_z_p_bi_u16"}, + {"1000"_b, "ldnt1sb_z_p_ar_d_64_unscaled"}, + {"1010"_b, "ld1sb_z_p_bz_d_64_unscaled"}, + {"1011"_b, "ldff1sb_z_p_bz_d_64_unscaled"}, + {"1100"_b, "ldnt1sh_z_p_ar_d_64_unscaled"}, + {"1110"_b, "ld1sh_z_p_bz_d_64_unscaled"}, + {"1111"_b, "ldff1sh_z_p_bz_d_64_unscaled"}, + }, + }, + + { "_syktsg", + {13, 12}, + { {"00"_b, "udiv_64_dp_2src"}, + {"10"_b, "asrv_64_dp_2src"}, + }, + }, + + { "_syzjtz", + {13, 12, 10}, + { {"010"_b, "sqrdmlah_asisdelem_r"}, + {"101"_b, "_jqnglz"}, + {"110"_b, "sqrdmlsh_asisdelem_r"}, + {"111"_b, "_zslsvj"}, + }, + }, + + { "_szttjy", + {30, 23, 22, 19, 18, 17, 16}, + { {"00000x1"_b, "umov_asimdins_w_w"}, + {"0000x10"_b, "umov_asimdins_w_w"}, + {"00010xx"_b, "umov_asimdins_w_w"}, + {"0001110"_b, "umov_asimdins_w_w"}, + {"000x10x"_b, "umov_asimdins_w_w"}, + {"000x111"_b, "umov_asimdins_w_w"}, + {"1001000"_b, "umov_asimdins_x_x"}, + {"x01xxxx"_b, "frecps_asimdsamefp16_only"}, + {"x11xxxx"_b, "frsqrts_asimdsamefp16_only"}, + }, + }, + + { "_tgmljr", + {23, 22, 20, 19, 12, 11}, + { {"000000"_b, "movi_asimdimm_n_b"}, + {"000010"_b, "fmov_asimdimm_s_s"}, + {"000011"_b, "fmov_asimdimm_h_h"}, + {"00x100"_b, "scvtf_asimdshf_c"}, + {"00x111"_b, "fcvtzs_asimdshf_c"}, + {"010x00"_b, "scvtf_asimdshf_c"}, + {"010x11"_b, "fcvtzs_asimdshf_c"}, + {"011100"_b, "scvtf_asimdshf_c"}, + {"011111"_b, "fcvtzs_asimdshf_c"}, + {"0x1000"_b, "scvtf_asimdshf_c"}, + {"0x1011"_b, "fcvtzs_asimdshf_c"}, + }, + }, + + { "_tgqsyg", + {22}, + { {"0"_b, "prfm_p_ldst_regoff"}, + }, + }, + + { "_thqvrp", + {17}, + { {"0"_b, "st1_asisdlsep_r2_r2"}, + {"1"_b, "st1_asisdlsep_i2_i2"}, + }, + }, + + { "_thrxph", + {23, 22, 10}, + { {"100"_b, "umlalb_z_zzzi_s"}, + {"101"_b, "umlalt_z_zzzi_s"}, + {"110"_b, "umlalb_z_zzzi_d"}, + {"111"_b, "umlalt_z_zzzi_d"}, + }, + }, + + { "_thsxvg", + {11, 10, 9, 8, 7, 6}, + { {"000010"_b, "ssbb_only_barriers"}, + {"010010"_b, "pssbb_only_barriers"}, + {"0x1010"_b, "dsb_bo_barriers"}, + {"0xx110"_b, "dsb_bo_barriers"}, + {"1xxx10"_b, "dsb_bo_barriers"}, + {"xxxx01"_b, "clrex_bn_barriers"}, + {"xxxx11"_b, "isb_bi_barriers"}, + }, + }, + + { "_thvvzp", + {18, 17, 12}, + { {"0x0"_b, "st1_asisdlsop_dx1_r1d"}, + {"100"_b, "st1_asisdlsop_dx1_r1d"}, + {"110"_b, "st1_asisdlsop_d1_i1d"}, + }, + }, + + { "_thvxym", + {20}, + { {"0"_b, "_prkmty"}, + {"1"_b, "_pjgkjs"}, + }, + }, + + { "_tjktkm", + {30}, + { {"1"_b, "_gntpyh"}, + }, + }, + + { "_tjltls", + {18, 17}, + { {"0x"_b, "st1_asisdlsep_r1_r1"}, + {"10"_b, "st1_asisdlsep_r1_r1"}, + {"11"_b, "st1_asisdlsep_i1_i1"}, + }, + }, + + { "_tjpjng", + {23, 22, 13, 12}, + { {"0000"_b, "fmax_s_floatdp2"}, + {"0001"_b, "fmin_s_floatdp2"}, + {"0010"_b, "fmaxnm_s_floatdp2"}, + {"0011"_b, "fminnm_s_floatdp2"}, + {"0100"_b, "fmax_d_floatdp2"}, + {"0101"_b, "fmin_d_floatdp2"}, + {"0110"_b, "fmaxnm_d_floatdp2"}, + {"0111"_b, "fminnm_d_floatdp2"}, + {"1100"_b, "fmax_h_floatdp2"}, + {"1101"_b, "fmin_h_floatdp2"}, + {"1110"_b, "fmaxnm_h_floatdp2"}, + {"1111"_b, "fminnm_h_floatdp2"}, + }, + }, + + { "_tjtgjy", + {20, 19, 18, 17}, + { {"0000"_b, "_gjsnly"}, + }, + }, + + { "_tjzqnp", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ldnt1b_z_p_bi_contiguous"}, + {"000x0"_b, "ldnt1b_z_p_br_contiguous"}, + {"00101"_b, "ld3b_z_p_bi_contiguous"}, + {"001x0"_b, "ld3b_z_p_br_contiguous"}, + {"01001"_b, "ldnt1h_z_p_bi_contiguous"}, + {"010x0"_b, "ldnt1h_z_p_br_contiguous"}, + {"01101"_b, "ld3h_z_p_bi_contiguous"}, + {"011x0"_b, "ld3h_z_p_br_contiguous"}, + {"10011"_b, "stnt1b_z_p_bi_contiguous"}, + {"100x0"_b, "st1b_z_p_bz_d_x32_unscaled"}, + {"10111"_b, "st3b_z_p_bi_contiguous"}, + {"101x0"_b, "st1b_z_p_bz_s_x32_unscaled"}, + {"10x01"_b, "st1b_z_p_bi"}, + {"11011"_b, "stnt1h_z_p_bi_contiguous"}, + {"110x0"_b, "st1h_z_p_bz_d_x32_unscaled"}, + {"11111"_b, "st3h_z_p_bi_contiguous"}, + {"111x0"_b, "st1h_z_p_bz_s_x32_unscaled"}, + {"11x01"_b, "st1h_z_p_bi"}, + }, + }, + + { "_tkjtgp", + {30}, + { {"0"_b, "_sqgjmn"}, + {"1"_b, "_ztpryr"}, + }, + }, + + { "_tkzqqp", + {4, 3, 2, 1, 0}, + { {"11111"_b, "_ntkqhk"}, + }, + }, + + { "_tlstgz", + {30, 23, 22}, + { {"000"_b, "stlxp_sp32_ldstexcl"}, + {"001"_b, "ldaxp_lp32_ldstexcl"}, + {"100"_b, "stlxp_sp64_ldstexcl"}, + {"101"_b, "ldaxp_lp64_ldstexcl"}, + }, + }, + + { "_tlzlrj", + {17}, + { {"0"_b, "st2_asisdlso_b2_2b"}, + }, + }, + + { "_tmhlvh", + {20, 9, 4}, + { {"000"_b, "zip2_p_pp"}, + }, + }, + + { "_tmrnzq", + {17}, + { {"0"_b, "st2_asisdlsep_r2_r"}, + {"1"_b, "st2_asisdlsep_i2_i"}, + }, + }, + + { "_tmshps", + {17}, + { {"0"_b, "fmaxnmv_v_p_z"}, + {"1"_b, "fmaxv_v_p_z"}, + }, + }, + + { "_tmthqm", + {22}, + { {"0"_b, "str_32_ldst_regoff"}, + {"1"_b, "ldr_32_ldst_regoff"}, + }, + }, + + { "_tmtnkq", + {23, 18, 17, 16}, + { {"0000"_b, "uqxtnb_z_zz"}, + }, + }, + + { "_tnhmpx", + {30, 23, 22, 13, 12, 11, 10}, + { {"1011001"_b, "fcmge_asisdsamefp16_only"}, + {"1011011"_b, "facge_asisdsamefp16_only"}, + {"1110101"_b, "fabd_asisdsamefp16_only"}, + {"1111001"_b, "fcmgt_asisdsamefp16_only"}, + {"1111011"_b, "facgt_asisdsamefp16_only"}, + }, + }, + + { "_tnrrjk", + {30, 23, 22, 11, 10}, + { {"01000"_b, "csel_32_condsel"}, + {"01001"_b, "csinc_32_condsel"}, + {"11000"_b, "csinv_32_condsel"}, + {"11001"_b, "csneg_32_condsel"}, + }, + }, + + { "_tnxlnl", + {13, 12}, + { {"00"_b, "crc32x_64c_dp_2src"}, + {"01"_b, "crc32cx_64c_dp_2src"}, + }, + }, + + { "_tnzytv", + {11, 10, 9, 8, 7, 6}, + { {"000000"_b, "wfet_only_systeminstrswithreg"}, + }, + }, + + { "_tpkslq", + {30, 23, 22, 20, 13, 4}, + { {"00001x"_b, "ld1rqw_z_p_bi_u32"}, + {"000x0x"_b, "ld1rqw_z_p_br_contiguous"}, + {"01001x"_b, "ld1rqd_z_p_bi_u64"}, + {"010x0x"_b, "ld1rqd_z_p_br_contiguous"}, + {"100x1x"_b, "stnt1w_z_p_ar_d_64_unscaled"}, + {"101x1x"_b, "stnt1w_z_p_ar_s_x32_unscaled"}, + {"110x00"_b, "str_p_bi"}, + {"110x1x"_b, "stnt1d_z_p_ar_d_64_unscaled"}, + }, + }, + + { "_tpkzxg", + {4}, + { {"0"_b, "ccmp_64_condcmp_imm"}, + }, + }, + + { "_tpsylx", + {13}, + { {"0"_b, "_gjylrt"}, + {"1"_b, "_ygjslq"}, + }, + }, + + { "_trlhgn", + {30, 23, 22, 11, 10}, + { {"00010"_b, "str_b_ldst_regoff"}, + {"00110"_b, "ldr_b_ldst_regoff"}, + {"01010"_b, "str_q_ldst_regoff"}, + {"01110"_b, "ldr_q_ldst_regoff"}, + {"10010"_b, "str_h_ldst_regoff"}, + {"10110"_b, "ldr_h_ldst_regoff"}, }, }, - // Unallocation decode nodes. These are used to mark encodings within an - // instruction class as unallocated. - { "UnallocAddSubExtended", + { "_tsksxr", + {17}, + { {"0"_b, "fminnmv_v_p_z"}, + {"1"_b, "fminv_v_p_z"}, + }, + }, + + { "_tssqsr", + {30}, + { {"1"_b, "_syzjtz"}, + }, + }, + + { "_tsvsgh", + {17}, + { {"0"_b, "st1_asisdlso_b1_1b"}, + }, + }, + + { "_tszvvk", + {18, 17, 12}, + { {"000"_b, "ld2_asisdlso_d2_2d"}, + }, + }, + + { "_ttplgp", {12, 11, 10}, - { {"1x1", "VisitUnallocated"}, - {"11x", "VisitUnallocated"}, - {"otherwise", "UnallocAddSubExtended_2"}, + { {"000"_b, "sqincp_z_p_z"}, + {"010"_b, "sqincp_r_p_r_sx"}, + {"011"_b, "sqincp_r_p_r_x"}, + {"100"_b, "_zqmrhp"}, }, }, - { "UnallocAddSubExtended_2", + { "_ttstyt", + {12, 10}, + { {"00"_b, "_rkqtvs"}, + {"01"_b, "_mtlhnl"}, + {"10"_b, "_zlmgyp"}, + {"11"_b, "_kjghlk"}, + }, + }, + + { "_tvgvvq", + {30}, + { {"0"_b, "cbnz_32_compbranch"}, + }, + }, + + { "_tvsszp", {23, 22}, - { {"1x", "VisitUnallocated"}, - {"x1", "VisitUnallocated"}, - {"otherwise", "VisitAddSubExtended"}, + { {"00"_b, "fmadd_s_floatdp3"}, + {"01"_b, "fmadd_d_floatdp3"}, + {"11"_b, "fmadd_h_floatdp3"}, }, }, - { "UnallocAddSubImmediate", - {23}, - { {"0", "VisitAddSubImmediate"}, - {"1", "VisitUnallocated"}, + { "_txhzxq", + {30, 22, 11}, + { {"000"_b, "_svnyyx"}, + {"001"_b, "_qsxpyq"}, + {"010"_b, "_pnqxjg"}, + {"011"_b, "_myrshl"}, + {"100"_b, "_smrtxq"}, + {"110"_b, "_ryglvl"}, + {"111"_b, "_qqsmlt"}, + }, + }, + + { "_txjyxr", + {18, 17}, + { {"0x"_b, "ld1_asisdlsep_r1_r1"}, + {"10"_b, "ld1_asisdlsep_r1_r1"}, + {"11"_b, "ld1_asisdlsep_i1_i1"}, + }, + }, + + { "_txnqzy", + {30, 23, 22}, + { {"000"_b, "smsubl_64wa_dp_3src"}, + {"010"_b, "umsubl_64wa_dp_3src"}, + }, + }, + + { "_txsmts", + {13, 12, 11, 10}, + { {"0000"_b, "smlal_asimddiff_l"}, + {"0001"_b, "add_asimdsame_only"}, + {"0010"_b, "_qhsplz"}, + {"0011"_b, "cmtst_asimdsame_only"}, + {"0100"_b, "sqdmlal_asimddiff_l"}, + {"0101"_b, "mla_asimdsame_only"}, + {"0110"_b, "_yvxgrr"}, + {"0111"_b, "mul_asimdsame_only"}, + {"1000"_b, "smlsl_asimddiff_l"}, + {"1001"_b, "smaxp_asimdsame_only"}, + {"1010"_b, "_mnxmst"}, + {"1011"_b, "sminp_asimdsame_only"}, + {"1100"_b, "sqdmlsl_asimddiff_l"}, + {"1101"_b, "sqdmulh_asimdsame_only"}, + {"1110"_b, "_klkgqk"}, + {"1111"_b, "addp_asimdsame_only"}, + }, + }, + + { "_txzxzs", + {23, 22, 20, 19, 18}, + { {"00000"_b, "orr_z_zi"}, + {"01000"_b, "eor_z_zi"}, + {"10000"_b, "and_z_zi"}, + {"11000"_b, "dupm_z_i"}, + {"xx1xx"_b, "cpy_z_p_i"}, }, }, - { "UnallocAddSubShifted", + { "_tyjqvt", + {18, 17}, + { {"00"_b, "ld4_asisdlso_s4_4s"}, + }, + }, + + { "_tylqpt", + {23, 22, 13}, + { {"000"_b, "fmulx_asimdelem_rh_h"}, + {"1x0"_b, "fmulx_asimdelem_r_sd"}, + }, + }, + + { "_typysz", + {23, 22, 20, 19, 13, 11, 10}, + { {"00x1001"_b, "sqshrn_asisdshf_n"}, + {"00x1011"_b, "sqrshrn_asisdshf_n"}, + {"00xx0x0"_b, "fmul_asisdelem_rh_h"}, + {"010x001"_b, "sqshrn_asisdshf_n"}, + {"010x011"_b, "sqrshrn_asisdshf_n"}, + {"0111001"_b, "sqshrn_asisdshf_n"}, + {"0111011"_b, "sqrshrn_asisdshf_n"}, + {"0x10001"_b, "sqshrn_asisdshf_n"}, + {"0x10011"_b, "sqrshrn_asisdshf_n"}, + {"1xxx0x0"_b, "fmul_asisdelem_r_sd"}, + {"xxxx1x0"_b, "sqdmull_asisdelem_l"}, + }, + }, + + { "_tytvjk", + {13, 12, 11}, + { {"000"_b, "_lylpyx"}, + {"001"_b, "_kyxrqg"}, + {"010"_b, "_zmkqxl"}, + {"011"_b, "_gngjxr"}, + {"100"_b, "_mlxtxs"}, + {"101"_b, "_mnmtql"}, + {"110"_b, "_xmxpnx"}, + {"111"_b, "_lkttgy"}, + }, + }, + + { "_tzzhsk", + {13, 12}, + { {"01"_b, "sqdmlal_asisddiff_only"}, + {"11"_b, "sqdmlsl_asisddiff_only"}, + }, + }, + + { "_tzzssm", + {12, 11, 10}, + { {"000"_b, "histseg_z_zz"}, + }, + }, + + { "_tzzzxz", + {30, 23, 22, 20, 19}, + { {"0xxxx"_b, "bl_only_branch_imm"}, + {"10001"_b, "sysl_rc_systeminstrs"}, + {"1001x"_b, "mrs_rs_systemmove"}, + }, + }, + + { "_vgrhsz", + {30, 23, 11, 10}, + { {"0010"_b, "_hljrqn"}, + {"0100"_b, "_htnmls"}, + {"0110"_b, "_vxgzqy"}, + {"1000"_b, "_lpsxhz"}, + {"1001"_b, "ldraa_64_ldst_pac"}, + {"1010"_b, "_jtqlhs"}, + {"1011"_b, "ldraa_64w_ldst_pac"}, + {"1100"_b, "_yrlzqp"}, + {"1101"_b, "ldrab_64_ldst_pac"}, + {"1110"_b, "_xyhxzt"}, + {"1111"_b, "ldrab_64w_ldst_pac"}, + }, + }, + + { "_vgrtjz", + {12}, + { {"0"_b, "sqdmulh_asimdelem_r"}, + {"1"_b, "sqrdmulh_asimdelem_r"}, + }, + }, + + { "_vgtnjh", + {23, 22, 20, 19, 18, 17, 16}, + { {"0001010"_b, "fcvtxnt_z_p_z_d2s"}, + {"1001000"_b, "fcvtnt_z_p_z_s2h"}, + {"1001001"_b, "fcvtlt_z_p_z_h2s"}, + {"1001010"_b, "bfcvtnt_z_p_z_s2bf"}, + {"1101010"_b, "fcvtnt_z_p_z_d2s"}, + {"1101011"_b, "fcvtlt_z_p_z_s2d"}, + }, + }, + + { "_vhhktl", + {18, 17}, + { {"0x"_b, "st4_asisdlsop_sx4_r4s"}, + {"10"_b, "st4_asisdlsop_sx4_r4s"}, + {"11"_b, "st4_asisdlsop_s4_i4s"}, + }, + }, + + { "_vhmsgj", + {18, 17, 12}, + { {"000"_b, "ld1_asisdlso_d1_1d"}, + }, + }, + + { "_vjlnqj", + {23, 22, 13, 12}, + { {"0000"_b, "fnmul_s_floatdp2"}, + {"0100"_b, "fnmul_d_floatdp2"}, + {"1100"_b, "fnmul_h_floatdp2"}, + }, + }, + + { "_vjmklj", + {23, 22}, + { {"10"_b, "sqrdcmlah_z_zzzi_h"}, + {"11"_b, "sqrdcmlah_z_zzzi_s"}, + }, + }, + + { "_vjqsqs", + {30}, + { {"0"_b, "and_32_log_shift"}, + {"1"_b, "eor_32_log_shift"}, + }, + }, + + { "_vjxqhp", + {23, 22, 20, 19, 18, 16, 13}, + { {"0000000"_b, "_jlrvpl"}, + {"0000001"_b, "_pmkxlj"}, + {"0100000"_b, "_qmgtyq"}, + {"0100001"_b, "_qhxzxl"}, + {"100xxx0"_b, "st2_asisdlsep_r2_r"}, + {"100xxx1"_b, "st1_asisdlsep_r2_r2"}, + {"1010xx0"_b, "st2_asisdlsep_r2_r"}, + {"1010xx1"_b, "st1_asisdlsep_r2_r2"}, + {"10110x0"_b, "st2_asisdlsep_r2_r"}, + {"10110x1"_b, "st1_asisdlsep_r2_r2"}, + {"1011100"_b, "st2_asisdlsep_r2_r"}, + {"1011101"_b, "st1_asisdlsep_r2_r2"}, + {"1011110"_b, "_tmrnzq"}, + {"1011111"_b, "_thqvrp"}, + {"110xxx0"_b, "ld2_asisdlsep_r2_r"}, + {"110xxx1"_b, "ld1_asisdlsep_r2_r2"}, + {"1110xx0"_b, "ld2_asisdlsep_r2_r"}, + {"1110xx1"_b, "ld1_asisdlsep_r2_r2"}, + {"11110x0"_b, "ld2_asisdlsep_r2_r"}, + {"11110x1"_b, "ld1_asisdlsep_r2_r2"}, + {"1111100"_b, "ld2_asisdlsep_r2_r"}, + {"1111101"_b, "ld1_asisdlsep_r2_r2"}, + {"1111110"_b, "_nszhhy"}, + {"1111111"_b, "_qxrzgv"}, + }, + }, + + { "_vjymzn", {23, 22}, - { {"11", "VisitUnallocated"}, - {"otherwise", "UnallocAddSubShifted_2"}, + { {"00"_b, "fcsel_s_floatsel"}, + {"01"_b, "fcsel_d_floatsel"}, + {"11"_b, "fcsel_h_floatsel"}, }, }, - { "UnallocAddSubShifted_2", - {31, 15}, - { {"01", "VisitUnallocated"}, - {"otherwise", "VisitAddSubShifted"}, + { "_vkhhkk", + {30, 23, 22, 11, 10, 4}, + { {"001000"_b, "ccmn_64_condcmp_reg"}, + {"001100"_b, "ccmn_64_condcmp_imm"}, + {"101000"_b, "ccmp_64_condcmp_reg"}, + {"101100"_b, "ccmp_64_condcmp_imm"}, }, }, - { "UnallocAddSubWithCarry", - {15, 14, 13, 12, 11, 10}, - { {"000000", "VisitAddSubWithCarry"}, - {"x00001", "UnallocRotateRightIntoFlags"}, - {"xx0010", "UnallocEvaluateIntoFlags"}, - {"otherwise", "VisitUnallocated"}, + { "_vkrkks", + {30, 23, 22, 13, 4}, + { {"00000"_b, "prfb_i_p_br_s"}, + {"00010"_b, "prfb_i_p_ai_s"}, + {"0010x"_b, "ld1rb_z_p_bi_u32"}, + {"0011x"_b, "ld1rb_z_p_bi_u64"}, + {"01000"_b, "prfh_i_p_br_s"}, + {"01010"_b, "prfh_i_p_ai_s"}, + {"0110x"_b, "ld1rh_z_p_bi_u32"}, + {"0111x"_b, "ld1rh_z_p_bi_u64"}, + {"1000x"_b, "ldnt1b_z_p_ar_d_64_unscaled"}, + {"10010"_b, "prfb_i_p_ai_d"}, + {"1010x"_b, "ld1b_z_p_bz_d_64_unscaled"}, + {"1011x"_b, "ldff1b_z_p_bz_d_64_unscaled"}, + {"1100x"_b, "ldnt1h_z_p_ar_d_64_unscaled"}, + {"11010"_b, "prfh_i_p_ai_d"}, + {"1110x"_b, "ld1h_z_p_bz_d_64_unscaled"}, + {"1111x"_b, "ldff1h_z_p_bz_d_64_unscaled"}, }, }, - { "UnallocAtomicMemory", - {26, 23, 22, 15, 14, 13, 12}, - { {"0xx1001", "VisitUnallocated"}, - {"0xx101x", "VisitUnallocated"}, - {"0xx1101", "VisitUnallocated"}, - {"0xx111x", "VisitUnallocated"}, - {"00x1100", "VisitUnallocated"}, - {"0111100", "VisitUnallocated"}, - {"1xxxxxx", "VisitUnallocated"}, - {"otherwise", "VisitAtomicMemory"}, + { "_vkvgnm", + {30, 13}, + { {"10"_b, "_vyygqs"}, }, }, - { "UnallocBitfield", - {31, 30, 29, 22}, - { {"x11x", "VisitUnallocated"}, - {"0xx1", "VisitUnallocated"}, - {"1xx0", "VisitUnallocated"}, - {"otherwise", "VisitBitfield"}, + { "_vkyngx", + {23, 22, 19, 18, 17, 16}, + { {"0000x1"_b, "dup_asimdins_dv_v"}, + {"000x10"_b, "dup_asimdins_dv_v"}, + {"0010xx"_b, "dup_asimdins_dv_v"}, + {"001110"_b, "dup_asimdins_dv_v"}, + {"00x10x"_b, "dup_asimdins_dv_v"}, + {"00x111"_b, "dup_asimdins_dv_v"}, + {"01xxxx"_b, "fmaxnm_asimdsamefp16_only"}, + {"11xxxx"_b, "fminnm_asimdsamefp16_only"}, }, }, - { "UnallocConditionalBranch", - {24, 4}, - { {"00", "VisitConditionalBranch"}, - {"otherwise", "VisitUnallocated"}, + { "_vllqmp", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xxxx"_b, "stxp_sp32_ldstexcl"}, + {"001xxxx"_b, "ldxp_lp32_ldstexcl"}, + {"0101111"_b, "cas_c32_ldstexcl"}, + {"0111111"_b, "casa_c32_ldstexcl"}, + {"100xxxx"_b, "stxp_sp64_ldstexcl"}, + {"101xxxx"_b, "ldxp_lp64_ldstexcl"}, + {"1101111"_b, "cas_c64_ldstexcl"}, + {"1111111"_b, "casa_c64_ldstexcl"}, }, }, - { "UnallocConditionalCompareImmediate", - {10, 4, 29}, - { {"1xx", "VisitUnallocated"}, - {"x1x", "VisitUnallocated"}, - {"xx0", "VisitUnallocated"}, - {"otherwise", "VisitConditionalCompareImmediate"}, + { "_vlrhpy", + {30, 23, 22, 13, 4}, + { {"0000x"_b, "ld1sb_z_p_ai_s"}, + {"0001x"_b, "ldff1sb_z_p_ai_s"}, + {"0010x"_b, "ld1rb_z_p_bi_u8"}, + {"0011x"_b, "ld1rb_z_p_bi_u16"}, + {"0100x"_b, "ld1sh_z_p_ai_s"}, + {"0101x"_b, "ldff1sh_z_p_ai_s"}, + {"0110x"_b, "ld1rsw_z_p_bi_s64"}, + {"0111x"_b, "ld1rh_z_p_bi_u16"}, + {"1000x"_b, "ld1sb_z_p_ai_d"}, + {"1001x"_b, "ldff1sb_z_p_ai_d"}, + {"10100"_b, "prfb_i_p_bz_d_64_scaled"}, + {"10110"_b, "prfh_i_p_bz_d_64_scaled"}, + {"1100x"_b, "ld1sh_z_p_ai_d"}, + {"1101x"_b, "ldff1sh_z_p_ai_d"}, + {"1110x"_b, "ld1sh_z_p_bz_d_64_scaled"}, + {"1111x"_b, "ldff1sh_z_p_bz_d_64_scaled"}, }, }, - { "UnallocConditionalCompareRegister", - {10, 4, 29}, - { {"1xx", "VisitUnallocated"}, - {"x1x", "VisitUnallocated"}, - {"xx0", "VisitUnallocated"}, - {"otherwise", "VisitConditionalCompareRegister"}, + { "_vlrrtz", + {30, 23, 22}, + { {"001"_b, "bfm_64m_bitfield"}, }, }, - { "UnallocConditionalSelect", - {11, 29}, - { {"00", "VisitConditionalSelect"}, - {"otherwise", "VisitUnallocated"}, + { "_vlsmsn", + {22, 20, 19, 18, 17, 16}, + { {"111000"_b, "fcmle_asisdmiscfp16_fz"}, + {"111001"_b, "frsqrte_asisdmiscfp16_r"}, + {"x00000"_b, "fcmle_asisdmisc_fz"}, + {"x00001"_b, "frsqrte_asisdmisc_r"}, }, }, - { "UnallocDataProcessing1Source", - {31, 16, 14, 13, 12, 11, 10}, - { {"x0xx11x", "VisitUnallocated"}, - {"0000011", "VisitUnallocated"}, - {"1001xxx", "VisitUnallocated"}, - {"x01xxxx", "VisitUnallocated"}, - {"x0x1xxx", "VisitUnallocated"}, - {"01xxxxx", "VisitUnallocated"}, - {"111xx1x", "VisitUnallocated"}, - {"111x1xx", "VisitUnallocated"}, - {"1111xxx", "VisitUnallocated"}, - {"otherwise", "UnallocDataProcessing1Source_2"}, + { "_vlzrlm", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "mvni_asimdimm_l_sl"}, + {"00x100"_b, "sri_asimdshf_r"}, + {"00x110"_b, "sqshlu_asimdshf_r"}, + {"010x00"_b, "sri_asimdshf_r"}, + {"010x10"_b, "sqshlu_asimdshf_r"}, + {"011100"_b, "sri_asimdshf_r"}, + {"011110"_b, "sqshlu_asimdshf_r"}, + {"0x1000"_b, "sri_asimdshf_r"}, + {"0x1010"_b, "sqshlu_asimdshf_r"}, }, }, - { "UnallocDataProcessing1Source_2", - {29, 20, 19, 18, 17, 15}, - { {"000000", "VisitDataProcessing1Source"}, - {"otherwise", "VisitUnallocated"}, + { "_vmjgmg", + {30, 23, 22}, + { {"000"_b, "stxrb_sr32_ldstexcl"}, + {"001"_b, "ldxrb_lr32_ldstexcl"}, + {"010"_b, "stllrb_sl32_ldstexcl"}, + {"011"_b, "ldlarb_lr32_ldstexcl"}, + {"100"_b, "stxrh_sr32_ldstexcl"}, + {"101"_b, "ldxrh_lr32_ldstexcl"}, + {"110"_b, "stllrh_sl32_ldstexcl"}, + {"111"_b, "ldlarh_lr32_ldstexcl"}, }, }, - { "UnallocDataProcessing2Source", - {31, 14, 13, 12, 11, 10}, - { {"x0000x", "VisitUnallocated"}, - {"x11xxx", "VisitUnallocated"}, - {"010x11", "VisitUnallocated"}, - {"110xx0", "VisitUnallocated"}, - {"110x0x", "VisitUnallocated"}, - {"otherwise", "UnallocDataProcessing2Source_2"}, + { "_vmjtrx", + {23, 22, 12}, + { {"001"_b, "sudot_asimdelem_d"}, + {"011"_b, "bfdot_asimdelem_e"}, + {"101"_b, "usdot_asimdelem_d"}, + {"111"_b, "bfmlal_asimdelem_f"}, + {"xx0"_b, "sdot_asimdelem_d"}, }, }, - { "UnallocDataProcessing2Source_2", - {29, 15}, - { {"00", "VisitDataProcessing2Source"}, - {"otherwise", "VisitUnallocated"}, + { "_vmjzyk", + {30, 23, 22}, + { {"000"_b, "stp_32_ldstpair_off"}, + {"001"_b, "ldp_32_ldstpair_off"}, + {"010"_b, "stp_32_ldstpair_pre"}, + {"011"_b, "ldp_32_ldstpair_pre"}, + {"100"_b, "stgp_64_ldstpair_off"}, + {"101"_b, "ldpsw_64_ldstpair_off"}, + {"110"_b, "stgp_64_ldstpair_pre"}, + {"111"_b, "ldpsw_64_ldstpair_pre"}, }, }, - { "UnallocDataProcessing3Source", - {23, 22, 21, 15, 31}, - { {"00100", "VisitUnallocated"}, - {"00110", "VisitUnallocated"}, - {"01000", "VisitUnallocated"}, - {"0101x", "VisitUnallocated"}, - {"011xx", "VisitUnallocated"}, - {"100xx", "VisitUnallocated"}, - {"10100", "VisitUnallocated"}, - {"10110", "VisitUnallocated"}, - {"11000", "VisitUnallocated"}, - {"1101x", "VisitUnallocated"}, - {"111xx", "VisitUnallocated"}, - {"otherwise", "UnallocDataProcessing3Source_2"}, + { "_vmplgv", + {12}, + { {"0"_b, "ld1_asisdlsop_dx1_r1d"}, }, }, - { "UnallocDataProcessing3Source_2", - {30, 29}, - { {"00", "VisitDataProcessing3Source"}, - {"otherwise", "VisitUnallocated"}, + { "_vmpnlv", + {11, 10, 9, 8, 7, 6}, + { {"000000"_b, "wfit_only_systeminstrswithreg"}, }, }, - { "UnallocEvaluateIntoFlags", - {31, 30, 29, 20, 19, 18}, - { {"001000", "UnallocEvaluateIntoFlags_2"}, - {"otherwise", "VisitUnallocated"}, + { "_vnpqrh", + {30, 23, 22}, + { {"000"_b, "stp_s_ldstpair_off"}, + {"001"_b, "ldp_s_ldstpair_off"}, + {"010"_b, "stp_s_ldstpair_pre"}, + {"011"_b, "ldp_s_ldstpair_pre"}, + {"100"_b, "stp_d_ldstpair_off"}, + {"101"_b, "ldp_d_ldstpair_off"}, + {"110"_b, "stp_d_ldstpair_pre"}, + {"111"_b, "ldp_d_ldstpair_pre"}, }, }, - { "UnallocEvaluateIntoFlags_2", - {17, 16, 15, 4, 3, 2, 1, 0}, - { {"00001101", "VisitEvaluateIntoFlags"}, - {"otherwise", "VisitUnallocated"}, + { "_vnrnmg", + {17}, + { {"0"_b, "st4_asisdlse_r4"}, }, }, - { "UnallocException", - {23, 22, 21, 1, 0}, - { {"00000", "VisitUnallocated"}, - {"001x1", "VisitUnallocated"}, - {"0011x", "VisitUnallocated"}, - {"010x1", "VisitUnallocated"}, - {"0101x", "VisitUnallocated"}, - {"011xx", "VisitUnallocated"}, - {"100xx", "VisitUnallocated"}, - {"10100", "VisitUnallocated"}, - {"11xxx", "VisitUnallocated"}, - {"otherwise", "UnallocException_2"}, + { "_vpkhvh", + {17}, + { {"0"_b, "st2_asisdlso_h2_2h"}, }, }, - { "UnallocException_2", - {4, 3, 2}, - { {"000", "VisitException"}, - {"otherwise", "VisitUnallocated"}, + { "_vpkptr", + {30, 23, 22}, + { {"000"_b, "stnp_32_ldstnapair_offs"}, + {"001"_b, "ldnp_32_ldstnapair_offs"}, + {"010"_b, "stp_32_ldstpair_post"}, + {"011"_b, "ldp_32_ldstpair_post"}, + {"110"_b, "stgp_64_ldstpair_post"}, + {"111"_b, "ldpsw_64_ldstpair_post"}, }, }, - { "UnallocExtract", - {30, 29, 21}, - { {"000", "UnallocExtract_2"}, - {"otherwise", "VisitUnallocated"}, + { "_vpmxrj", + {13}, + { {"0"_b, "histcnt_z_p_zz"}, + {"1"_b, "_jxszhy"}, }, }, - { "UnallocExtract_2", - {31, 22, 15}, - { {"10x", "VisitUnallocated"}, - {"01x", "VisitUnallocated"}, - {"0x1", "VisitUnallocated"}, - {"otherwise", "VisitExtract"}, + { "_vppthj", + {30, 23}, + { {"00"_b, "add_32_addsub_imm"}, + {"10"_b, "sub_32_addsub_imm"}, }, }, - { "UnallocFPCompare", - {31, 29, 15, 14, 2, 1, 0}, - { {"0000000", "UnallocFPCompare_2"}, - {"otherwise", "VisitUnallocated"}, + { "_vprkpq", + {13, 12, 11, 10}, + { {"0000"_b, "saddwb_z_zz"}, + {"0001"_b, "saddwt_z_zz"}, + {"0010"_b, "uaddwb_z_zz"}, + {"0011"_b, "uaddwt_z_zz"}, + {"0100"_b, "ssubwb_z_zz"}, + {"0101"_b, "ssubwt_z_zz"}, + {"0110"_b, "usubwb_z_zz"}, + {"0111"_b, "usubwt_z_zz"}, + {"1000"_b, "sqdmullb_z_zz"}, + {"1001"_b, "sqdmullt_z_zz"}, + {"1010"_b, "pmullb_z_zz"}, + {"1011"_b, "pmullt_z_zz"}, + {"1100"_b, "smullb_z_zz"}, + {"1101"_b, "smullt_z_zz"}, + {"1110"_b, "umullb_z_zz"}, + {"1111"_b, "umullt_z_zz"}, }, }, - { "UnallocFPCompare_2", - {23, 22}, - { {"10", "VisitUnallocated"}, - {"otherwise", "VisitFPCompare"}, + { "_vpxvjs", + {20, 19, 18, 17, 16}, + { {"00000"_b, "fcvtns_32s_float2int"}, + {"00001"_b, "fcvtnu_32s_float2int"}, + {"00010"_b, "scvtf_s32_float2int"}, + {"00011"_b, "ucvtf_s32_float2int"}, + {"00100"_b, "fcvtas_32s_float2int"}, + {"00101"_b, "fcvtau_32s_float2int"}, + {"00110"_b, "fmov_32s_float2int"}, + {"00111"_b, "fmov_s32_float2int"}, + {"01000"_b, "fcvtps_32s_float2int"}, + {"01001"_b, "fcvtpu_32s_float2int"}, + {"10000"_b, "fcvtms_32s_float2int"}, + {"10001"_b, "fcvtmu_32s_float2int"}, + {"11000"_b, "fcvtzs_32s_float2int"}, + {"11001"_b, "fcvtzu_32s_float2int"}, + }, + }, + + { "_vpykkg", + {23, 22, 10}, + { {"000"_b, "ext_asimdext_only"}, + {"001"_b, "_jnmgrh"}, + {"011"_b, "_vytgtz"}, + {"111"_b, "_jrnlzs"}, }, }, - { "UnallocFPConditionalCompare", - {31, 29, 23, 22}, - { {"xx10", "VisitUnallocated"}, - {"x1xx", "VisitUnallocated"}, - {"1xxx", "VisitUnallocated"}, - {"otherwise", "VisitFPConditionalCompare"}, + { "_vqlytp", + {12}, + { {"0"_b, "st3_asisdlsop_dx3_r3d"}, }, }, - { "UnallocFPConditionalSelect", - {31, 29, 23, 22}, - { {"xx10", "VisitUnallocated"}, - {"x1xx", "VisitUnallocated"}, - {"1xxx", "VisitUnallocated"}, - {"otherwise", "VisitFPConditionalSelect"}, + { "_vqqrjl", + {23, 22, 20, 19, 13, 11, 10}, + { {"0001001"_b, "shl_asisdshf_r"}, + {"0001101"_b, "sqshl_asisdshf_r"}, + {"001x001"_b, "shl_asisdshf_r"}, + {"001x101"_b, "sqshl_asisdshf_r"}, + {"00xx0x0"_b, "fmls_asisdelem_rh_h"}, + {"01xx001"_b, "shl_asisdshf_r"}, + {"01xx101"_b, "sqshl_asisdshf_r"}, + {"1xxx0x0"_b, "fmls_asisdelem_r_sd"}, + {"xxxx1x0"_b, "sqdmlsl_asisdelem_l"}, }, }, - { "UnallocFPDataProcessing1Source", - {31, 29, 20}, - { {"000", "UnallocFPDataProcessing1Source_2"}, - {"otherwise", "VisitUnallocated"}, + { "_vqvqhp", + {30, 23, 22}, + { {"000"_b, "str_32_ldst_pos"}, + {"001"_b, "ldr_32_ldst_pos"}, + {"010"_b, "ldrsw_64_ldst_pos"}, + {"100"_b, "str_64_ldst_pos"}, + {"101"_b, "ldr_64_ldst_pos"}, + {"110"_b, "prfm_p_ldst_pos"}, }, }, - { "UnallocFPDataProcessing1Source_2", - {23, 22, 19, 18, 17, 16, 15}, - { {"0000100", "VisitUnallocated"}, - {"0000110", "VisitUnallocated"}, - {"0001101", "VisitUnallocated"}, - {"00101xx", "VisitUnallocated"}, - {"0011xxx", "VisitUnallocated"}, - {"0100101", "VisitUnallocated"}, - {"0101101", "VisitUnallocated"}, - {"01101xx", "VisitUnallocated"}, - {"0111xxx", "VisitUnallocated"}, - {"10xxxxx", "VisitUnallocated"}, - {"110011x", "VisitUnallocated"}, - {"1101101", "VisitUnallocated"}, - {"111xxxx", "VisitUnallocated"}, - {"otherwise", "VisitFPDataProcessing1Source"}, + { "_vqzlzt", + {30, 23}, + { {"00"_b, "and_64_log_imm"}, + {"01"_b, "movn_64_movewide"}, + {"10"_b, "eor_64_log_imm"}, + {"11"_b, "movz_64_movewide"}, }, }, - { "UnallocFPDataProcessing2Source", - {15, 14, 13, 12}, - { {"1xx1", "VisitUnallocated"}, - {"1x1x", "VisitUnallocated"}, - {"11xx", "VisitUnallocated"}, - {"otherwise", "UnallocFPDataProcessing2Source_2"}, + { "_vsqlkr", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "frintx_asimdmiscfp16_r"}, + {"0x00001"_b, "frintx_asimdmisc_r"}, + {"1111001"_b, "frinti_asimdmiscfp16_r"}, + {"1x00001"_b, "frinti_asimdmisc_r"}, + {"xx00000"_b, "cmle_asimdmisc_z"}, }, }, - { "UnallocFPDataProcessing2Source_2", - {31, 29, 23, 22}, - { {"xx10", "VisitUnallocated"}, - {"x1xx", "VisitUnallocated"}, - {"1xxx", "VisitUnallocated"}, - {"otherwise", "VisitFPDataProcessing2Source"}, + { "_vsqpzr", + {23}, + { {"0"_b, "faddp_asimdsame_only"}, + {"1"_b, "fabd_asimdsame_only"}, }, }, - { "UnallocFPDataProcessing3Source", - {31, 29, 23, 22}, - { {"xx10", "VisitUnallocated"}, - {"x1xx", "VisitUnallocated"}, - {"1xxx", "VisitUnallocated"}, - {"otherwise", "VisitFPDataProcessing3Source"}, + { "_vsvrgt", + {17}, + { {"0"_b, "fadda_v_p_z"}, }, }, - { "UnallocFPFixedPointConvert", - {23, 22, 20, 19, 17, 16}, - { {"10xxxx", "VisitUnallocated"}, - {"xxx00x", "VisitUnallocated"}, - {"xxx11x", "VisitUnallocated"}, - {"xx0x0x", "VisitUnallocated"}, - {"xx1x1x", "VisitUnallocated"}, - {"otherwise", "UnallocFPFixedPointConvert_2"}, + { "_vsvtqz", + {30, 23, 22}, + { {"00x"_b, "add_64_addsub_imm"}, + {"010"_b, "addg_64_addsub_immtags"}, + {"10x"_b, "sub_64_addsub_imm"}, + {"110"_b, "subg_64_addsub_immtags"}, }, }, - { "UnallocFPFixedPointConvert_2", - {29, 18}, - { {"00", "UnallocFPFixedPointConvert_3"}, - {"otherwise", "VisitUnallocated"}, + { "_vtxyxz", + {23, 22, 13, 12, 11, 10}, + { {"01x1x0"_b, "fcmla_asimdelem_c_h"}, + {"0x0001"_b, "ushr_asimdshf_r"}, + {"0x0101"_b, "usra_asimdshf_r"}, + {"0x1001"_b, "urshr_asimdshf_r"}, + {"0x1101"_b, "ursra_asimdshf_r"}, + {"10x1x0"_b, "fcmla_asimdelem_c_s"}, + {"xx00x0"_b, "mla_asimdelem_r"}, + {"xx10x0"_b, "umlal_asimdelem_l"}, }, }, - { "UnallocFPFixedPointConvert_3", - {31, 15}, - { {"00", "VisitUnallocated"}, - {"otherwise", "VisitFPFixedPointConvert"}, + { "_vvhzhv", + {30, 23, 22, 13, 12, 11, 10}, + { {"0000000"_b, "swpb_32_memop"}, + {"000xx10"_b, "strb_32b_ldst_regoff"}, + {"0010000"_b, "swplb_32_memop"}, + {"001xx10"_b, "ldrb_32b_ldst_regoff"}, + {"0100000"_b, "swpab_32_memop"}, + {"010xx10"_b, "ldrsb_64b_ldst_regoff"}, + {"0110000"_b, "swpalb_32_memop"}, + {"011xx10"_b, "ldrsb_32b_ldst_regoff"}, + {"1000000"_b, "swph_32_memop"}, + {"100xx10"_b, "strh_32_ldst_regoff"}, + {"1010000"_b, "swplh_32_memop"}, + {"101xx10"_b, "ldrh_32_ldst_regoff"}, + {"1100000"_b, "swpah_32_memop"}, + {"110xx10"_b, "ldrsh_64_ldst_regoff"}, + {"1110000"_b, "swpalh_32_memop"}, + {"111xx10"_b, "ldrsh_32_ldst_regoff"}, }, }, - { "UnallocFPImmediate", + { "_vvprhx", + {0}, + { {"0"_b, "blr_64_branch_reg"}, + }, + }, + + { "_vvrmvg", + {12}, + { {"1"_b, "_typysz"}, + }, + }, + + { "_vvtnrv", + {23, 22, 20, 19, 18}, + { {"00000"_b, "orr_z_zi"}, + {"01000"_b, "eor_z_zi"}, + {"10000"_b, "and_z_zi"}, + {"11000"_b, "dupm_z_i"}, + }, + }, + + { "_vvxsxt", + {4}, + { {"0"_b, "ands_p_p_pp_z"}, + {"1"_b, "bics_p_p_pp_z"}, + }, + }, + + { "_vxgzqy", + {22}, + { {"0"_b, "ldrsw_64_ldst_regoff"}, + }, + }, + + { "_vxhgzz", + {23, 22, 12, 11, 10}, + { {"00xxx"_b, "ext_z_zi_des"}, + {"01xxx"_b, "ext_z_zi_con"}, + {"10000"_b, "zip1_z_zz_q"}, + {"10001"_b, "zip2_z_zz_q"}, + {"10010"_b, "uzp1_z_zz_q"}, + {"10011"_b, "uzp2_z_zz_q"}, + {"10110"_b, "trn1_z_zz_q"}, + {"10111"_b, "trn2_z_zz_q"}, + }, + }, + + { "_vxsjgg", + {30, 22, 11}, + { {"001"_b, "_pxnnrz"}, + {"010"_b, "ccmn_32_condcmp_reg"}, + {"011"_b, "ccmn_32_condcmp_imm"}, + {"110"_b, "ccmp_32_condcmp_reg"}, + {"111"_b, "ccmp_32_condcmp_imm"}, + }, + }, + + { "_vxsvhs", + {13, 12}, + { {"00"_b, "adcs_64_addsub_carry"}, + }, + }, + + { "_vxylhh", {23, 22}, - { {"10", "VisitUnallocated"}, - {"otherwise", "UnallocFPImmediate_2"}, + { {"01"_b, "fcmla_asimdelem_c_h"}, + {"10"_b, "fcmla_asimdelem_c_s"}, + }, + }, + + { "_vylhvl", + {20, 19, 18, 17, 16, 13}, + { {"000000"_b, "fabs_h_floatdp1"}, + {"000010"_b, "fsqrt_h_floatdp1"}, + {"000100"_b, "fcvt_dh_floatdp1"}, + {"001000"_b, "frintp_h_floatdp1"}, + {"001010"_b, "frintz_h_floatdp1"}, + {"001110"_b, "frinti_h_floatdp1"}, + }, + }, + + { "_vytgtz", + {13, 12, 11}, + { {"000"_b, "fmaxnmp_asimdsamefp16_only"}, + {"010"_b, "faddp_asimdsamefp16_only"}, + {"011"_b, "fmul_asimdsamefp16_only"}, + {"100"_b, "fcmge_asimdsamefp16_only"}, + {"101"_b, "facge_asimdsamefp16_only"}, + {"110"_b, "fmaxp_asimdsamefp16_only"}, + {"111"_b, "fdiv_asimdsamefp16_only"}, + }, + }, + + { "_vytxll", + {18, 17, 12}, + { {"000"_b, "st2_asisdlso_d2_2d"}, + }, + }, + + { "_vyygqs", + {23, 22, 20, 19, 12, 11, 10}, + { {"00x1001"_b, "sqshrun_asisdshf_n"}, + {"00x1011"_b, "sqrshrun_asisdshf_n"}, + {"00x1101"_b, "uqshrn_asisdshf_n"}, + {"00x1111"_b, "uqrshrn_asisdshf_n"}, + {"00xx1x0"_b, "fmulx_asisdelem_rh_h"}, + {"010x001"_b, "sqshrun_asisdshf_n"}, + {"010x011"_b, "sqrshrun_asisdshf_n"}, + {"010x101"_b, "uqshrn_asisdshf_n"}, + {"010x111"_b, "uqrshrn_asisdshf_n"}, + {"0111001"_b, "sqshrun_asisdshf_n"}, + {"0111011"_b, "sqrshrun_asisdshf_n"}, + {"0111101"_b, "uqshrn_asisdshf_n"}, + {"0111111"_b, "uqrshrn_asisdshf_n"}, + {"0x10001"_b, "sqshrun_asisdshf_n"}, + {"0x10011"_b, "sqrshrun_asisdshf_n"}, + {"0x10101"_b, "uqshrn_asisdshf_n"}, + {"0x10111"_b, "uqrshrn_asisdshf_n"}, + {"1xxx1x0"_b, "fmulx_asisdelem_r_sd"}, + }, + }, + + { "_vyztqx", + {8}, + { {"0"_b, "tstart_br_systemresult"}, + {"1"_b, "ttest_br_systemresult"}, + }, + }, + + { "_vzjvtv", + {23, 22, 12, 11, 10}, + { {"01001"_b, "bfmmla_z_zzz"}, + {"10001"_b, "fmmla_z_zzz_s"}, + {"11001"_b, "fmmla_z_zzz_d"}, + }, + }, + + { "_vzzvlr", + {23, 22, 20, 19, 18, 16, 13}, + { {"0000000"_b, "_tlzlrj"}, + {"0000001"_b, "_yhxvhy"}, + {"0100000"_b, "_hqhzgj"}, + {"0100001"_b, "_kzrklp"}, + {"100xxx0"_b, "st2_asisdlsop_bx2_r2b"}, + {"100xxx1"_b, "st4_asisdlsop_bx4_r4b"}, + {"1010xx0"_b, "st2_asisdlsop_bx2_r2b"}, + {"1010xx1"_b, "st4_asisdlsop_bx4_r4b"}, + {"10110x0"_b, "st2_asisdlsop_bx2_r2b"}, + {"10110x1"_b, "st4_asisdlsop_bx4_r4b"}, + {"1011100"_b, "st2_asisdlsop_bx2_r2b"}, + {"1011101"_b, "st4_asisdlsop_bx4_r4b"}, + {"1011110"_b, "_mykjss"}, + {"1011111"_b, "_xkkggt"}, + {"110xxx0"_b, "ld2_asisdlsop_bx2_r2b"}, + {"110xxx1"_b, "ld4_asisdlsop_bx4_r4b"}, + {"1110xx0"_b, "ld2_asisdlsop_bx2_r2b"}, + {"1110xx1"_b, "ld4_asisdlsop_bx4_r4b"}, + {"11110x0"_b, "ld2_asisdlsop_bx2_r2b"}, + {"11110x1"_b, "ld4_asisdlsop_bx4_r4b"}, + {"1111100"_b, "ld2_asisdlsop_bx2_r2b"}, + {"1111101"_b, "ld4_asisdlsop_bx4_r4b"}, + {"1111110"_b, "_gvstrp"}, + {"1111111"_b, "_qtgvhn"}, + }, + }, + + { "_xgvgmk", + {23, 22, 4}, + { {"000"_b, "fccmp_s_floatccmp"}, + {"001"_b, "fccmpe_s_floatccmp"}, + {"010"_b, "fccmp_d_floatccmp"}, + {"011"_b, "fccmpe_d_floatccmp"}, + {"110"_b, "fccmp_h_floatccmp"}, + {"111"_b, "fccmpe_h_floatccmp"}, + }, + }, + + { "_xhkgqh", + {30, 23, 22}, + { {"000"_b, "stp_64_ldstpair_off"}, + {"001"_b, "ldp_64_ldstpair_off"}, + {"010"_b, "stp_64_ldstpair_pre"}, + {"011"_b, "ldp_64_ldstpair_pre"}, + }, + }, + + { "_xhktsk", + {22}, + { {"0"_b, "smullt_z_zzi_s"}, + {"1"_b, "smullt_z_zzi_d"}, + }, + }, + + { "_xhlhmh", + {4}, + { {"0"_b, "cmplo_p_p_zi"}, + {"1"_b, "cmpls_p_p_zi"}, }, }, - { "UnallocFPImmediate_2", - {31, 29, 9, 8, 7, 6, 5}, - { {"0000000", "VisitFPImmediate"}, - {"otherwise", "VisitUnallocated"}, + { "_xhltxn", + {12, 10}, + { {"00"_b, "_jqtltz"}, + {"01"_b, "_rkvyqk"}, + {"10"_b, "_zpnsrv"}, + {"11"_b, "_lhvtrp"}, }, }, - { "UnallocFPIntegerConvert", - {29}, - { {"0", "UnallocFPIntegerConvert_2"}, - {"1", "VisitUnallocated"}, + { "_xhmpmy", + {4}, + { {"0"_b, "and_p_p_pp_z"}, + {"1"_b, "bic_p_p_pp_z"}, }, }, - { "UnallocFPIntegerConvert_2", - {31, 23, 22, 20, 19, 18, 17, 16}, - { {"0001x11x", "VisitUnallocated"}, - {"0010x11x", "VisitUnallocated"}, - {"0011011x", "VisitUnallocated"}, - {"00111111", "VisitUnallocated"}, - {"010xx11x", "VisitUnallocated"}, - {"100xx11x", "VisitUnallocated"}, - {"1011x11x", "VisitUnallocated"}, - {"101x111x", "VisitUnallocated"}, - {"1101x11x", "VisitUnallocated"}, - {"110x011x", "VisitUnallocated"}, - {"xxx1x01x", "VisitUnallocated"}, - {"xxx1x10x", "VisitUnallocated"}, - {"xxxx110x", "VisitUnallocated"}, - {"xxxx101x", "VisitUnallocated"}, - {"otherwise", "VisitFPIntegerConvert"}, + { "_xhvtjg", + {11}, + { {"0"_b, "_mpyklp"}, }, }, - { "UnallocLoadLiteral", - {26, 31, 30}, - { {"111", "VisitUnallocated"}, - {"otherwise", "VisitLoadLiteral"}, + { "_xhxrnt", + {30}, + { {"0"_b, "_zxhhny"}, + {"1"_b, "_lhpgsn"}, }, }, - { "UnallocLoadStoreExclusive", - {31, 23, 21, 14, 13, 12, 11, 10}, - { {"001xxxx0", "VisitUnallocated"}, - {"001xxx0x", "VisitUnallocated"}, - {"001xx0xx", "VisitUnallocated"}, - {"001x0xxx", "VisitUnallocated"}, - {"0010xxxx", "VisitUnallocated"}, - {"x11xxxx0", "VisitUnallocated"}, - {"x11xxx0x", "VisitUnallocated"}, - {"x11xx0xx", "VisitUnallocated"}, - {"x11x0xxx", "VisitUnallocated"}, - {"x110xxxx", "VisitUnallocated"}, - {"otherwise", "VisitLoadStoreExclusive"}, + { "_xjghst", + {13, 12, 11, 10}, + { {"0000"_b, "_kvmrng"}, + {"0001"_b, "_vkyngx"}, + {"0011"_b, "_lxqynh"}, + {"0100"_b, "_kjngjl"}, + {"0101"_b, "_xmqgmz"}, + {"0110"_b, "uzp1_asimdperm_only"}, + {"0111"_b, "_shzysp"}, + {"1000"_b, "_strkph"}, + {"1001"_b, "_jpvljz"}, + {"1010"_b, "trn1_asimdperm_only"}, + {"1011"_b, "_jryylt"}, + {"1100"_b, "_grxzzg"}, + {"1101"_b, "_lnnyzt"}, + {"1110"_b, "zip1_asimdperm_only"}, + {"1111"_b, "_szttjy"}, }, }, - { "UnallocLoadStorePAC", - {31, 30, 26}, - { {"110", "VisitLoadStorePAC"}, - {"otherwise", "VisitUnallocated"}, + { "_xjxppp", + {1, 0}, + { {"11"_b, "brabz_64_branch_reg"}, }, }, - { "UnallocLoadStoreRCpcUnscaledOffset", - {31, 30, 23, 22, 11, 10}, - { {"xxxxx1", "VisitUnallocated"}, - {"xxxx1x", "VisitUnallocated"}, - {"101100", "VisitUnallocated"}, - {"111000", "VisitUnallocated"}, - {"111100", "VisitUnallocated"}, - {"otherwise", "VisitLoadStoreRCpcUnscaledOffset"}, + { "_xkkggt", + {17}, + { {"0"_b, "st4_asisdlsop_bx4_r4b"}, + {"1"_b, "st4_asisdlsop_b4_i4b"}, }, }, - { "UnallocLoadStorePairNonTemporal", - {26, 31, 30, 22}, - { {"001x", "VisitUnallocated"}, - {"x11x", "VisitUnallocated"}, - {"otherwise", "VisitLoadStorePairNonTemporal"}, + { "_xlhjhx", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_zhrtts"}, }, }, - { "UnallocLoadStorePairOffset", - {26, 31, 30, 22}, - { {"0010", "VisitUnallocated"}, - {"x11x", "VisitUnallocated"}, - {"otherwise", "VisitLoadStorePairOffset"}, + { "_xmqgmz", + {23, 22}, + { {"01"_b, "fadd_asimdsamefp16_only"}, + {"11"_b, "fsub_asimdsamefp16_only"}, }, }, - { "UnallocLoadStorePairPostIndex", - {26, 31, 30, 22}, - { {"0010", "VisitUnallocated"}, - {"x11x", "VisitUnallocated"}, - {"otherwise", "VisitLoadStorePairPostIndex"}, + { "_xmqvpl", + {12}, + { {"0"_b, "ld1_asisdlsop_dx1_r1d"}, }, }, - { "UnallocLoadStorePairPreIndex", - {26, 31, 30, 22}, - { {"0010", "VisitUnallocated"}, - {"x11x", "VisitUnallocated"}, - {"otherwise", "VisitLoadStorePairPreIndex"}, + { "_xmtlmj", + {23, 22, 20, 19, 11}, + { {"00010"_b, "srshr_asisdshf_r"}, + {"001x0"_b, "srshr_asisdshf_r"}, + {"01xx0"_b, "srshr_asisdshf_r"}, }, }, - { "UnallocLoadStorePostIndex", - {26, 23, 22, 31, 30}, - { {"01011", "VisitUnallocated"}, - {"0111x", "VisitUnallocated"}, - {"11xx1", "VisitUnallocated"}, - {"11x1x", "VisitUnallocated"}, - {"otherwise", "VisitLoadStorePostIndex"}, + { "_xmxpnx", + {10}, + { {"0"_b, "sri_z_zzi"}, + {"1"_b, "sli_z_zzi"}, }, }, - { "UnallocLoadStorePreIndex", - {26, 23, 22, 31, 30}, - { {"01011", "VisitUnallocated"}, - {"0111x", "VisitUnallocated"}, - {"11xx1", "VisitUnallocated"}, - {"11x1x", "VisitUnallocated"}, - {"otherwise", "VisitLoadStorePreIndex"}, + { "_xnsrny", + {30, 23, 22}, + { {"000"_b, "madd_64a_dp_3src"}, + {"001"_b, "smulh_64_dp_3src"}, + {"011"_b, "umulh_64_dp_3src"}, }, }, - { "UnallocLoadStoreRegisterOffset", - {14}, - { {"0", "VisitUnallocated"}, - {"1", "UnallocLoadStoreRegisterOffset_2"}, + { "_xpkkpn", + {17}, + { {"1"_b, "frsqrte_z_z"}, }, }, - { "UnallocLoadStoreRegisterOffset_2", - {26, 23, 22, 31, 30}, - { {"0111x", "VisitUnallocated"}, - {"11xx1", "VisitUnallocated"}, - {"11x1x", "VisitUnallocated"}, - {"otherwise", "VisitLoadStoreRegisterOffset"}, + { "_xpmvjv", + {13, 12}, + { {"00"_b, "sqshl_asisdsame_only"}, + {"01"_b, "sqrshl_asisdsame_only"}, }, }, - { "UnallocLoadStoreUnscaledOffset", - {26, 23, 22, 31, 30}, - { {"0111x", "VisitUnallocated"}, - {"11xx1", "VisitUnallocated"}, - {"11x1x", "VisitUnallocated"}, - {"otherwise", "VisitLoadStoreUnscaledOffset"}, + { "_xpqglq", + {4}, + { {"0"_b, "cmpeq_p_p_zi"}, + {"1"_b, "cmpne_p_p_zi"}, }, }, - { "UnallocLoadStoreUnsignedOffset", - {26, 23, 22, 31, 30}, - { {"0111x", "VisitUnallocated"}, - {"11xx1", "VisitUnallocated"}, - {"11x1x", "VisitUnallocated"}, - {"otherwise", "VisitLoadStoreUnsignedOffset"}, + { "_xprlgy", + {30, 23, 22, 11, 10}, + { {"00010"_b, "str_s_ldst_regoff"}, + {"00110"_b, "ldr_s_ldst_regoff"}, + {"10010"_b, "str_d_ldst_regoff"}, + {"10110"_b, "ldr_d_ldst_regoff"}, }, }, - { "UnallocLogicalImmediate", - {31, 22}, - { {"01", "VisitUnallocated"}, - {"otherwise", "VisitLogicalImmediate"}, + { "_xpvpqq", + {23, 22, 11, 10, 4, 3, 2}, + { {"0000000"_b, "_hngpxg"}, + {"0010111"_b, "_gnytkh"}, + {"0011111"_b, "_xjxppp"}, + {"0100000"_b, "_nnhprs"}, + {"0110111"_b, "_hmtxlh"}, + {"0111111"_b, "_qtxypt"}, + {"1000000"_b, "_rmltms"}, + {"1010111"_b, "_qqpkkm"}, + {"1011111"_b, "_klnhpj"}, }, }, - { "UnallocLogicalShifted", - {31, 15}, - { {"01", "VisitUnallocated"}, - {"otherwise", "VisitLogicalShifted"}, + { "_xqgxjp", + {18, 17, 16, 13, 12, 11, 10, 9, 7, 6, 5}, + { {"01111000011"_b, "_vyztqx"}, }, }, - { "UnallocMoveWideImmediate", - {30, 29}, - { {"01", "VisitUnallocated"}, - {"otherwise", "UnallocMoveWideImmediate_2"}, + { "_xqhgkk", + {30}, + { {"0"_b, "b_only_branch_imm"}, }, }, - { "UnallocMoveWideImmediate_2", - {31, 22}, - { {"01", "VisitUnallocated"}, - {"otherwise", "VisitMoveWideImmediate"}, + { "_xqjrgk", + {12}, + { {"0"_b, "ld4_asisdlsop_dx4_r4d"}, }, }, - { "UnallocNEON2RegMisc", - {16, 15, 14, 13, 12, 23, 22, 29}, - { {"00001xx1", "VisitUnallocated"}, - {"001011x1", "VisitUnallocated"}, - {"01010xx1", "VisitUnallocated"}, - {"011xx0xx", "VisitUnallocated"}, - {"011101x1", "VisitUnallocated"}, - {"1000xxxx", "VisitUnallocated"}, - {"10011xx0", "VisitUnallocated"}, - {"10101xxx", "VisitUnallocated"}, - {"101101xx", "VisitUnallocated"}, - {"101110x1", "VisitUnallocated"}, - {"101111xx", "VisitUnallocated"}, - {"110001x1", "VisitUnallocated"}, - {"111101xx", "VisitUnallocated"}, - {"111111x0", "VisitUnallocated"}, - {"otherwise", "VisitNEON2RegMisc"}, + { "_xrhhjz", + {11}, + { {"0"_b, "_hzxjsp"}, }, }, - { "UnallocNEON2RegMiscFP16", - {29, 23, 22, 16, 15, 14, 13, 12}, - { {"xx0xxxxx", "VisitUnallocated"}, - {"xxx00xxx", "VisitUnallocated"}, - {"xxx010xx", "VisitUnallocated"}, - {"xxx10xxx", "VisitUnallocated"}, - {"xxx11110", "VisitUnallocated"}, - {"x0x011xx", "VisitUnallocated"}, - {"x0x11111", "VisitUnallocated"}, - {"x1x11100", "VisitUnallocated"}, - {"01x11111", "VisitUnallocated"}, - {"11x01110", "VisitUnallocated"}, - {"11x11000", "VisitUnallocated"}, - {"otherwise", "VisitNEON2RegMiscFP16"}, + { "_xrhmtg", + {30, 23, 22, 11, 10}, + { {"00000"_b, "stur_s_ldst_unscaled"}, + {"00001"_b, "str_s_ldst_immpost"}, + {"00011"_b, "str_s_ldst_immpre"}, + {"00100"_b, "ldur_s_ldst_unscaled"}, + {"00101"_b, "ldr_s_ldst_immpost"}, + {"00111"_b, "ldr_s_ldst_immpre"}, + {"10000"_b, "stur_d_ldst_unscaled"}, + {"10001"_b, "str_d_ldst_immpost"}, + {"10011"_b, "str_d_ldst_immpre"}, + {"10100"_b, "ldur_d_ldst_unscaled"}, + {"10101"_b, "ldr_d_ldst_immpost"}, + {"10111"_b, "ldr_d_ldst_immpre"}, }, }, - { "UnallocNEON3Different", - {15, 14, 13, 12, 29}, - { {"10011", "VisitUnallocated"}, - {"10111", "VisitUnallocated"}, - {"11011", "VisitUnallocated"}, - {"11101", "VisitUnallocated"}, - {"1111x", "VisitUnallocated"}, - {"otherwise", "VisitNEON3Different"}, - }, - }, - - { "UnallocNEON3Same", - {29, 23, 22, 15, 14, 13, 12, 11}, - { {"00111101", "VisitUnallocated"}, - {"01x11011", "VisitUnallocated"}, - {"01x11100", "VisitUnallocated"}, - {"01111101", "VisitUnallocated"}, - {"1xx10111", "VisitUnallocated"}, - {"10111001", "VisitUnallocated"}, - {"11x11011", "VisitUnallocated"}, - {"11x11111", "VisitUnallocated"}, - {"11111001", "VisitUnallocated"}, - {"otherwise", "VisitNEON3Same"}, - }, - }, - - { "UnallocNEON3SameExtra", - {29, 14, 13, 12, 11}, - { {"x0011", "VisitUnallocated"}, - {"x01xx", "VisitUnallocated"}, - {"00000", "VisitUnallocated"}, - {"00001", "VisitUnallocated"}, - {"01xxx", "VisitUnallocated"}, - {"111x1", "VisitUnallocated"}, - {"otherwise", "VisitNEON3SameExtra"}, - }, - }, - - { "UnallocNEON3SameFP16", - {29, 23, 13, 12, 11}, - { {"00101", "VisitUnallocated"}, - {"01011", "VisitUnallocated"}, - {"01100", "VisitUnallocated"}, - {"01101", "VisitUnallocated"}, - {"10001", "VisitUnallocated"}, - {"11001", "VisitUnallocated"}, - {"11011", "VisitUnallocated"}, - {"11111", "VisitUnallocated"}, - {"otherwise", "VisitNEON3SameFP16"}, - }, - }, - - { "UnallocNEONAcrossLanes", - {29, 23, 22, 16, 15, 14, 13, 12}, - { {"xxx0000x", "VisitUnallocated"}, - {"xxx00010", "VisitUnallocated"}, - {"xxx001xx", "VisitUnallocated"}, - {"xxx0100x", "VisitUnallocated"}, - {"xxx01011", "VisitUnallocated"}, - {"xxx01101", "VisitUnallocated"}, - {"xxx01110", "VisitUnallocated"}, - {"xxx10xxx", "VisitUnallocated"}, - {"xxx1100x", "VisitUnallocated"}, - {"xxx111xx", "VisitUnallocated"}, - {"00101100", "VisitUnallocated"}, - {"00101111", "VisitUnallocated"}, - {"01101100", "VisitUnallocated"}, - {"01101111", "VisitUnallocated"}, - {"1xx11011", "VisitUnallocated"}, - {"otherwise", "VisitNEONAcrossLanes"}, - }, - }, - - { "UnallocNEONByIndexedElement", - {29, 23, 22, 15, 14, 13, 12}, - { {"0010001", "VisitUnallocated"}, - {"0010101", "VisitUnallocated"}, - {"0011001", "VisitUnallocated"}, - {"00x0000", "VisitUnallocated"}, - {"00x0100", "VisitUnallocated"}, - {"0xx1111", "VisitUnallocated"}, - {"1000001", "VisitUnallocated"}, - {"1000011", "VisitUnallocated"}, - {"1000101", "VisitUnallocated"}, - {"1000111", "VisitUnallocated"}, - {"10x1000", "VisitUnallocated"}, - {"10x1100", "VisitUnallocated"}, - {"1110001", "VisitUnallocated"}, - {"1110011", "VisitUnallocated"}, - {"1110101", "VisitUnallocated"}, - {"1110111", "VisitUnallocated"}, - {"1xx1011", "VisitUnallocated"}, - {"x011001", "VisitUnallocated"}, - {"otherwise", "VisitNEONByIndexedElement"}, - }, - }, - - { "UnallocNEONCopy", - {14, 13, 12, 11, 30, 29}, - { {"xxxx01", "VisitUnallocated"}, - {"0010x0", "VisitUnallocated"}, - {"001100", "VisitUnallocated"}, - {"0100x0", "VisitUnallocated"}, - {"0110x0", "VisitUnallocated"}, - {"1xxxx0", "VisitUnallocated"}, - {"otherwise", "UnallocNEONCopy_2"}, - }, - }, - - { "UnallocNEONCopy_2", - {19, 18, 17, 16}, - { {"0000", "VisitUnallocated"}, - {"otherwise", "VisitNEONCopy"}, + { "_xrpmzt", + {17}, + { {"0"_b, "st4_asisdlsop_hx4_r4h"}, + {"1"_b, "st4_asisdlsop_h4_i4h"}, }, }, - { "UnallocNEONExtract", + { "_xrxvpr", {23, 22}, - { {"00", "VisitNEONExtract"}, - {"otherwise", "VisitUnallocated"}, - }, - }, - - { "UnallocNEONLoadStoreMultiStruct", - {22, 15, 14, 13, 12}, - { {"00001", "VisitUnallocated"}, - {"00011", "VisitUnallocated"}, - {"00101", "VisitUnallocated"}, - {"01001", "VisitUnallocated"}, - {"01011", "VisitUnallocated"}, - {"011xx", "VisitUnallocated"}, - {"10001", "VisitUnallocated"}, - {"10011", "VisitUnallocated"}, - {"10101", "VisitUnallocated"}, - {"11001", "VisitUnallocated"}, - {"11011", "VisitUnallocated"}, - {"111xx", "VisitUnallocated"}, - {"otherwise", "VisitNEONLoadStoreMultiStruct"}, - }, - }, - - { "UnallocNEONLoadStoreMultiStructPostIndex", - {22, 15, 14, 13, 12}, - { {"00001", "VisitUnallocated"}, - {"00011", "VisitUnallocated"}, - {"00101", "VisitUnallocated"}, - {"01001", "VisitUnallocated"}, - {"01011", "VisitUnallocated"}, - {"011xx", "VisitUnallocated"}, - {"10001", "VisitUnallocated"}, - {"10011", "VisitUnallocated"}, - {"10101", "VisitUnallocated"}, - {"11001", "VisitUnallocated"}, - {"11011", "VisitUnallocated"}, - {"111xx", "VisitUnallocated"}, - {"otherwise", "VisitNEONLoadStoreMultiStructPostIndex"}, - }, - }, - - { "UnallocNEONLoadSingleStruct", - {21, 15, 14, 13, 12, 11, 10}, - { {"0010xx1", "VisitUnallocated"}, - {"0011xx1", "VisitUnallocated"}, - {"0100101", "VisitUnallocated"}, - {"0100x1x", "VisitUnallocated"}, - {"0101011", "VisitUnallocated"}, - {"01011x1", "VisitUnallocated"}, - {"0101x10", "VisitUnallocated"}, - {"01101xx", "VisitUnallocated"}, - {"01111xx", "VisitUnallocated"}, - {"1010xx1", "VisitUnallocated"}, - {"1011xx1", "VisitUnallocated"}, - {"1100011", "VisitUnallocated"}, - {"11001x1", "VisitUnallocated"}, - {"1100x10", "VisitUnallocated"}, - {"1101011", "VisitUnallocated"}, - {"11011x1", "VisitUnallocated"}, - {"1101x10", "VisitUnallocated"}, - {"11101xx", "VisitUnallocated"}, - {"11111xx", "VisitUnallocated"}, - {"otherwise", "VisitNEONLoadStoreSingleStruct"}, - }, - }, - - { "UnallocNEONLoadStoreSingleStruct", + { {"00"_b, "_spmkmm"}, + }, + }, + + { "_xryzqs", + {30, 23, 22, 13, 12, 11, 10}, + { {"0001111"_b, "caspl_cp32_ldstexcl"}, + {"0011111"_b, "caspal_cp32_ldstexcl"}, + {"0101111"_b, "caslb_c32_ldstexcl"}, + {"0111111"_b, "casalb_c32_ldstexcl"}, + {"1001111"_b, "caspl_cp64_ldstexcl"}, + {"1011111"_b, "caspal_cp64_ldstexcl"}, + {"1101111"_b, "caslh_c32_ldstexcl"}, + {"1111111"_b, "casalh_c32_ldstexcl"}, + }, + }, + + { "_xsgxyy", + {9, 8, 7, 6, 5}, + { {"11111"_b, "autizb_64z_dp_1src"}, + }, + }, + + { "_xstkrn", + {20, 19}, + { {"00"_b, "_hrllsn"}, + {"01"_b, "_kqvljp"}, + {"10"_b, "_lxhlkx"}, + {"11"_b, "_rjysnh"}, + }, + }, + + { "_xtgtyz", + {19, 18, 17, 16}, + { {"0000"_b, "brkb_p_p_p"}, + }, + }, + + { "_xtqmyj", + {30, 23, 22}, + { {"000"_b, "orr_32_log_imm"}, + {"100"_b, "ands_32s_log_imm"}, + {"110"_b, "movk_32_movewide"}, + }, + }, + + { "_xtxyxj", + {4}, + { {"0"_b, "orr_p_p_pp_z"}, + {"1"_b, "orn_p_p_pp_z"}, + }, + }, + + { "_xtzlzy", + {12, 11, 10}, + { {"000"_b, "fadd_z_zz"}, + {"001"_b, "fsub_z_zz"}, + {"010"_b, "fmul_z_zz"}, + {"011"_b, "ftsmul_z_zz"}, + {"110"_b, "frecps_z_zz"}, + {"111"_b, "frsqrts_z_zz"}, + }, + }, + + { "_xvlnmy", + {9, 8, 7, 6, 5}, + { {"11111"_b, "autdza_64z_dp_1src"}, + }, + }, + + { "_xvnyxq", + {30, 23, 13, 4}, + { {"0000"_b, "prfb_i_p_bz_s_x32_scaled"}, + {"0010"_b, "prfh_i_p_bz_s_x32_scaled"}, + {"010x"_b, "ld1sh_z_p_bz_s_x32_scaled"}, + {"011x"_b, "ldff1sh_z_p_bz_s_x32_scaled"}, + {"1000"_b, "prfb_i_p_bz_d_x32_scaled"}, + {"1010"_b, "prfh_i_p_bz_d_x32_scaled"}, + {"110x"_b, "ld1sh_z_p_bz_d_x32_scaled"}, + {"111x"_b, "ldff1sh_z_p_bz_d_x32_scaled"}, + }, + }, + + { "_xvppmm", + {30, 23, 22, 13, 12, 11, 10}, + { {"0xx0xxx"_b, "mla_z_p_zzz"}, + {"0xx1xxx"_b, "mls_z_p_zzz"}, + {"1101110"_b, "usdot_z_zzz_s"}, + {"1xx0000"_b, "smlalb_z_zzz"}, + {"1xx0001"_b, "smlalt_z_zzz"}, + {"1xx0010"_b, "umlalb_z_zzz"}, + {"1xx0011"_b, "umlalt_z_zzz"}, + {"1xx0100"_b, "smlslb_z_zzz"}, + {"1xx0101"_b, "smlslt_z_zzz"}, + {"1xx0110"_b, "umlslb_z_zzz"}, + {"1xx0111"_b, "umlslt_z_zzz"}, + {"1xx1000"_b, "sqdmlalb_z_zzz"}, + {"1xx1001"_b, "sqdmlalt_z_zzz"}, + {"1xx1010"_b, "sqdmlslb_z_zzz"}, + {"1xx1011"_b, "sqdmlslt_z_zzz"}, + {"1xx1100"_b, "sqrdmlah_z_zzz"}, + {"1xx1101"_b, "sqrdmlsh_z_zzz"}, + }, + }, + + { "_xxjrsy", + {23, 22, 9}, + { {"000"_b, "rdffr_p_p_f"}, + {"010"_b, "rdffrs_p_p_f"}, + }, + }, + + { "_xxkvsy", + {30, 22, 11, 10}, + { {"0000"_b, "csel_64_condsel"}, + {"0001"_b, "csinc_64_condsel"}, + {"0111"_b, "_tnxlnl"}, + {"1000"_b, "csinv_64_condsel"}, + {"1001"_b, "csneg_64_condsel"}, + {"1100"_b, "_qjyvln"}, + {"1101"_b, "_nvthzh"}, + }, + }, + + { "_xxpqgg", + {30, 23, 22}, + { {"001"_b, "sbfm_64m_bitfield"}, + {"011"_b, "extr_64_extract"}, + {"101"_b, "ubfm_64m_bitfield"}, + }, + }, + + { "_xxpzrl", + {13}, + { {"0"_b, "mls_asimdelem_r"}, + {"1"_b, "umlsl_asimdelem_l"}, + }, + }, + + { "_xxxxlh", + {4}, + { {"0"_b, "ccmn_64_condcmp_imm"}, + }, + }, + + { "_xxyklv", + {23, 22, 13, 12, 11, 10}, + { {"000000"_b, "tbl_asimdtbl_l3_3"}, + {"000100"_b, "tbx_asimdtbl_l3_3"}, + {"001000"_b, "tbl_asimdtbl_l4_4"}, + {"001100"_b, "tbx_asimdtbl_l4_4"}, + {"xx0110"_b, "uzp2_asimdperm_only"}, + {"xx1010"_b, "trn2_asimdperm_only"}, + {"xx1110"_b, "zip2_asimdperm_only"}, + }, + }, + + { "_xygxsv", + {17}, + { {"0"_b, "ld3_asisdlsop_hx3_r3h"}, + {"1"_b, "ld3_asisdlsop_h3_i3h"}, + }, + }, + + { "_xyhmgh", + {23, 22, 20, 9}, + { {"0000"_b, "_xhmpmy"}, + {"0001"_b, "_qnprqt"}, + {"0010"_b, "_nnzhgm"}, + {"0100"_b, "_vvxsxt"}, + {"0101"_b, "_yzmjhn"}, + {"0110"_b, "_mkgsly"}, + {"1000"_b, "_xtxyxj"}, + {"1001"_b, "_hmtmlq"}, + {"1010"_b, "_xtgtyz"}, + {"1100"_b, "_yynmjl"}, + {"1101"_b, "_sjnspg"}, + {"1110"_b, "_jzjvtv"}, + }, + }, + + { "_xyhxzt", {22}, - { {"0", "UnallocNEONStoreSingleStruct"}, - {"1", "UnallocNEONLoadSingleStruct"}, - }, - }, - - { "UnallocNEONLoadSingleStructPostIndex", - {21, 15, 14, 13, 12, 11, 10}, - { {"0010xx1", "VisitUnallocated"}, - {"0011xx1", "VisitUnallocated"}, - {"0100101", "VisitUnallocated"}, - {"0100x1x", "VisitUnallocated"}, - {"0101011", "VisitUnallocated"}, - {"01011x1", "VisitUnallocated"}, - {"0101x10", "VisitUnallocated"}, - {"01101xx", "VisitUnallocated"}, - {"01111xx", "VisitUnallocated"}, - {"1010xx1", "VisitUnallocated"}, - {"1011xx1", "VisitUnallocated"}, - {"1100011", "VisitUnallocated"}, - {"11001x1", "VisitUnallocated"}, - {"1100x10", "VisitUnallocated"}, - {"1101011", "VisitUnallocated"}, - {"11011x1", "VisitUnallocated"}, - {"1101x10", "VisitUnallocated"}, - {"11101xx", "VisitUnallocated"}, - {"11111xx", "VisitUnallocated"}, - {"otherwise", "VisitNEONLoadStoreSingleStructPostIndex"}, - }, - }, - - { "UnallocNEONLoadStoreSingleStructPostIndex", + { {"0"_b, "prfm_p_ldst_regoff"}, + }, + }, + + { "_xyljvp", + {30, 23, 22, 11, 10}, + { {"00000"_b, "_yjpstj"}, + {"01000"_b, "csel_64_condsel"}, + {"01001"_b, "csinc_64_condsel"}, + {"01100"_b, "_qghmks"}, + {"01101"_b, "_qzzlpv"}, + {"01110"_b, "_syktsg"}, + {"01111"_b, "_hjtvvm"}, + {"10000"_b, "_pvrylp"}, + {"11000"_b, "csinv_64_condsel"}, + {"11001"_b, "csneg_64_condsel"}, + {"11100"_b, "_kkgpjl"}, + {"11101"_b, "_tjtgjy"}, + {"11110"_b, "_qmzqsy"}, + {"11111"_b, "_nmkqzt"}, + }, + }, + + { "_xylmmp", + {22, 12}, + { {"10"_b, "_nkjgpq"}, + }, + }, + + { "_xyzpvp", + {23, 22, 13}, + { {"100"_b, "fmlsl_asimdelem_lh"}, + {"xx1"_b, "smlsl_asimdelem_l"}, + }, + }, + + { "_xzmjxk", + {30}, + { {"1"_b, "_sntzjg"}, + }, + }, + + { "_xznsqh", + {22, 20, 11}, + { {"000"_b, "cntw_r_s"}, + {"010"_b, "incw_r_rs"}, + {"100"_b, "cntd_r_s"}, + {"110"_b, "incd_r_rs"}, + }, + }, + + { "_xzyxnr", + {30, 23, 22, 11, 10}, + { {"10001"_b, "stg_64spost_ldsttags"}, + {"10010"_b, "stg_64soffset_ldsttags"}, + {"10011"_b, "stg_64spre_ldsttags"}, + {"10100"_b, "ldg_64loffset_ldsttags"}, + {"10101"_b, "stzg_64spost_ldsttags"}, + {"10110"_b, "stzg_64soffset_ldsttags"}, + {"10111"_b, "stzg_64spre_ldsttags"}, + {"11001"_b, "st2g_64spost_ldsttags"}, + {"11010"_b, "st2g_64soffset_ldsttags"}, + {"11011"_b, "st2g_64spre_ldsttags"}, + {"11101"_b, "stz2g_64spost_ldsttags"}, + {"11110"_b, "stz2g_64soffset_ldsttags"}, + {"11111"_b, "stz2g_64spre_ldsttags"}, + }, + }, + + { "_xzyylk", + {20, 19, 18, 17, 16, 13}, + { {"000000"_b, "fabs_s_floatdp1"}, + {"000010"_b, "fsqrt_s_floatdp1"}, + {"000100"_b, "fcvt_ds_floatdp1"}, + {"000110"_b, "fcvt_hs_floatdp1"}, + {"001000"_b, "frintp_s_floatdp1"}, + {"001010"_b, "frintz_s_floatdp1"}, + {"001110"_b, "frinti_s_floatdp1"}, + {"010000"_b, "frint32x_s_floatdp1"}, + {"010010"_b, "frint64x_s_floatdp1"}, + }, + }, + + { "_ygjslq", + {4, 3, 2, 1, 0}, + { {"00000"_b, "fcmp_h_floatcmp"}, + {"01000"_b, "fcmp_hz_floatcmp"}, + {"10000"_b, "fcmpe_h_floatcmp"}, + {"11000"_b, "fcmpe_hz_floatcmp"}, + }, + }, + + { "_ygnypk", + {22, 12}, + { {"10"_b, "_nqlgtn"}, + }, + }, + + { "_ygpjrl", + {13, 12}, + { {"00"_b, "adc_32_addsub_carry"}, + }, + }, + + { "_ygxhyg", + {23, 22, 4}, + { {"000"_b, "fccmp_s_floatccmp"}, + {"001"_b, "fccmpe_s_floatccmp"}, + {"010"_b, "fccmp_d_floatccmp"}, + {"011"_b, "fccmpe_d_floatccmp"}, + {"110"_b, "fccmp_h_floatccmp"}, + {"111"_b, "fccmpe_h_floatccmp"}, + }, + }, + + { "_ygyxvx", + {18, 17}, + { {"00"_b, "ld2_asisdlso_s2_2s"}, + }, + }, + + { "_yhlntp", + {20, 19, 18, 17, 16}, + { {"00000"_b, "fexpa_z_z"}, + }, + }, + + { "_yhmlxk", + {13, 12, 11, 10}, + { {"0000"_b, "decp_z_p_z"}, + {"0010"_b, "decp_r_p_r"}, + }, + }, + + { "_yhqyzj", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_d_floatimm"}, + }, + }, + + { "_yhxvhy", + {17}, + { {"0"_b, "st4_asisdlso_b4_4b"}, + }, + }, + + { "_yjjrgg", + {30}, + { {"0"_b, "cbnz_64_compbranch"}, + }, + }, + + { "_yjmngt", + {30}, + { {"0"_b, "sel_z_p_zz"}, + {"1"_b, "_vpmxrj"}, + }, + }, + + { "_yjpstj", + {13, 12}, + { {"00"_b, "adc_64_addsub_carry"}, + }, + }, + + { "_yjsjvt", + {30, 23, 22, 11, 10}, + { {"00000"_b, "_vxsvhs"}, + {"00001"_b, "_rhzhyz"}, + {"00100"_b, "_zjsgkm"}, + {"00110"_b, "_xxxxlh"}, + {"01100"_b, "_mtjrtt"}, + {"10000"_b, "_yskkjs"}, + {"10100"_b, "_mjxzks"}, + {"10110"_b, "_tpkzxg"}, + }, + }, + + { "_yjxshz", + {30, 23, 22, 11, 10}, + { {"00000"_b, "stlurb_32_ldapstl_unscaled"}, + {"00100"_b, "ldapurb_32_ldapstl_unscaled"}, + {"01000"_b, "ldapursb_64_ldapstl_unscaled"}, + {"01100"_b, "ldapursb_32_ldapstl_unscaled"}, + {"10000"_b, "stlurh_32_ldapstl_unscaled"}, + {"10100"_b, "ldapurh_32_ldapstl_unscaled"}, + {"11000"_b, "ldapursh_64_ldapstl_unscaled"}, + {"11100"_b, "ldapursh_32_ldapstl_unscaled"}, + }, + }, + + { "_yjxvkp", + {18, 17, 12}, + { {"0x0"_b, "st4_asisdlsop_dx4_r4d"}, + {"100"_b, "st4_asisdlsop_dx4_r4d"}, + {"110"_b, "st4_asisdlsop_d4_i4d"}, + }, + }, + + { "_yjzknm", + {13, 12, 11, 10}, + { {"0000"_b, "uqdecp_z_p_z"}, + {"0010"_b, "uqdecp_r_p_r_uw"}, + {"0011"_b, "uqdecp_r_p_r_x"}, + }, + }, + + { "_yjztsq", + {20, 19, 18, 17, 16}, + { {"11111"_b, "st64b_64l_memop"}, + }, + }, + + { "_ylhxlt", + {30}, + { {"0"_b, "ldrsw_64_loadlit"}, + {"1"_b, "prfm_p_loadlit"}, + }, + }, + + { "_ylnsvy", + {20, 19, 18, 17, 16}, + { {"00000"_b, "dup_z_r"}, + {"00100"_b, "insr_z_r"}, + {"10000"_b, "sunpklo_z_z"}, + {"10001"_b, "sunpkhi_z_z"}, + {"10010"_b, "uunpklo_z_z"}, + {"10011"_b, "uunpkhi_z_z"}, + {"10100"_b, "insr_z_v"}, + {"11000"_b, "rev_z_z"}, + }, + }, + + { "_ylqnqt", + {18, 17, 12}, + { {"000"_b, "ld4_asisdlso_d4_4d"}, + }, + }, + + { "_ylyskq", + {13, 12, 11, 10}, + { {"0011"_b, "uqadd_asisdsame_only"}, + {"1010"_b, "_yzqtyl"}, + {"1011"_b, "uqsub_asisdsame_only"}, + {"1101"_b, "cmhi_asisdsame_only"}, + {"1110"_b, "_jxzrxm"}, + {"1111"_b, "cmhs_asisdsame_only"}, + }, + }, + + { "_ymgrgx", + {22, 20, 19, 18, 17, 16}, + { {"111001"_b, "ucvtf_asisdmiscfp16_r"}, + {"x00001"_b, "ucvtf_asisdmisc_r"}, + {"x10000"_b, "faddp_asisdpair_only_sd"}, + }, + }, + + { "_ymhgxg", + {30, 13}, + { {"00"_b, "_yrmmmg"}, + {"01"_b, "_sghgtk"}, + {"10"_b, "_nxjkqs"}, + {"11"_b, "_yvyhlh"}, + }, + }, + + { "_ymhkrx", + {30, 23, 22, 13, 4}, + { {"0000x"_b, "ld1b_z_p_ai_s"}, + {"0001x"_b, "ldff1b_z_p_ai_s"}, + {"0010x"_b, "ld1rb_z_p_bi_u32"}, + {"0011x"_b, "ld1rb_z_p_bi_u64"}, + {"0100x"_b, "ld1h_z_p_ai_s"}, + {"0101x"_b, "ldff1h_z_p_ai_s"}, + {"0110x"_b, "ld1rh_z_p_bi_u32"}, + {"0111x"_b, "ld1rh_z_p_bi_u64"}, + {"1000x"_b, "ld1b_z_p_ai_d"}, + {"1001x"_b, "ldff1b_z_p_ai_d"}, + {"10100"_b, "prfw_i_p_bz_d_64_scaled"}, + {"10110"_b, "prfd_i_p_bz_d_64_scaled"}, + {"1100x"_b, "ld1h_z_p_ai_d"}, + {"1101x"_b, "ldff1h_z_p_ai_d"}, + {"1110x"_b, "ld1h_z_p_bz_d_64_scaled"}, + {"1111x"_b, "ldff1h_z_p_bz_d_64_scaled"}, + }, + }, + + { "_ymkthj", + {20, 9, 4}, + { {"000"_b, "uzp2_p_pp"}, + }, + }, + + { "_ympyng", + {30, 23, 22, 13}, + { {"0000"_b, "ld1sh_z_p_br_s64"}, + {"0001"_b, "ldff1sh_z_p_br_s64"}, + {"0010"_b, "ld1w_z_p_br_u32"}, + {"0011"_b, "ldff1w_z_p_br_u32"}, + {"0100"_b, "ld1sb_z_p_br_s64"}, + {"0101"_b, "ldff1sb_z_p_br_s64"}, + {"0110"_b, "ld1sb_z_p_br_s16"}, + {"0111"_b, "ldff1sb_z_p_br_s16"}, + {"1001"_b, "stnt1w_z_p_br_contiguous"}, + {"1011"_b, "st3w_z_p_br_contiguous"}, + {"10x0"_b, "st1w_z_p_br"}, + {"1100"_b, "str_z_bi"}, + {"1101"_b, "stnt1d_z_p_br_contiguous"}, + {"1111"_b, "st3d_z_p_br_contiguous"}, + }, + }, + + { "_ymznlj", + {13, 10}, + { {"00"_b, "_vgrtjz"}, + {"01"_b, "_kxjgsz"}, + {"10"_b, "_vmjtrx"}, + {"11"_b, "_tgmljr"}, + }, + }, + + { "_ynnrny", + {18, 17}, + { {"00"_b, "_jplmmr"}, + }, + }, + + { "_ynqsgl", + {17}, + { {"0"_b, "ld4_asisdlso_h4_4h"}, + }, + }, + + { "_ypjyqh", + {9, 8, 7, 6, 5, 0}, + { {"111110"_b, "drps_64e_branch_reg"}, + }, + }, + + { "_yplktv", + {13, 12, 11, 10}, + { {"0001"_b, "sub_asisdsame_only"}, + {"0010"_b, "_llxlqz"}, + {"0011"_b, "cmeq_asisdsame_only"}, + {"0110"_b, "_pxkqxn"}, + {"1010"_b, "_rhvksm"}, + {"1101"_b, "sqrdmulh_asisdsame_only"}, + {"1110"_b, "_gkkpjz"}, + }, + }, + + { "_yppszx", + {23, 22, 10}, + { {"100"_b, "umlslb_z_zzzi_s"}, + {"101"_b, "umlslt_z_zzzi_s"}, + {"110"_b, "umlslb_z_zzzi_d"}, + {"111"_b, "umlslt_z_zzzi_d"}, + }, + }, + + { "_yppyky", + {30, 13}, + { {"00"_b, "_gyrjrm"}, + {"01"_b, "_hhkqtn"}, + {"10"_b, "_jgmlpk"}, + {"11"_b, "_tzzssm"}, + }, + }, + + { "_ypqgyp", {22}, - { {"0", "UnallocNEONStoreSingleStructPostIndex"}, - {"1", "UnallocNEONLoadSingleStructPostIndex"}, + { {"0"_b, "ldrsw_64_ldst_regoff"}, }, }, - { "UnallocNEONModifiedImmediate", - {30, 29, 15, 14, 13, 12, 11}, - { { "x00xxx1", "VisitUnallocated"}, - { "x010xx1", "VisitUnallocated"}, - { "x0110x1", "VisitUnallocated"}, - { "x011101", "VisitUnallocated"}, - { "0111110", "VisitUnallocated"}, - { "x1xxxx1", "VisitUnallocated"}, - { "otherwise", "VisitNEONModifiedImmediate"}, + { "_ypznsm", + {23}, + { {"0"_b, "fmaxnm_asimdsame_only"}, + {"1"_b, "fminnm_asimdsame_only"}, + }, + }, + + { "_yqmqzp", + {18, 17, 12}, + { {"000"_b, "st1_asisdlso_d1_1d"}, + }, + }, + + { "_yqmvxk", + {11, 10, 9, 8, 7, 6}, + { {"000001"_b, "tcommit_only_barriers"}, + {"xx1000"_b, "dsb_bon_barriers"}, + {"xxxx10"_b, "dmb_bo_barriers"}, + {"xxxx11"_b, "sb_only_barriers"}, + }, + }, + + { "_yqsgrt", + {23, 22, 20, 19, 16, 13, 12}, + { {"0000000"_b, "_znmhps"}, + {"0000010"_b, "_zssjpv"}, + {"0000011"_b, "_smqvrs"}, + {"0100000"_b, "_jrgzxt"}, + {"0100010"_b, "_ppllxt"}, + {"0100011"_b, "_hqlskj"}, + {"100xx00"_b, "st3_asisdlsep_r3_r"}, + {"100xx10"_b, "st1_asisdlsep_r3_r3"}, + {"100xx11"_b, "st1_asisdlsep_r1_r1"}, + {"1010x00"_b, "st3_asisdlsep_r3_r"}, + {"1010x10"_b, "st1_asisdlsep_r3_r3"}, + {"1010x11"_b, "st1_asisdlsep_r1_r1"}, + {"1011000"_b, "st3_asisdlsep_r3_r"}, + {"1011010"_b, "st1_asisdlsep_r3_r3"}, + {"1011011"_b, "st1_asisdlsep_r1_r1"}, + {"1011100"_b, "_ngxkmp"}, + {"1011110"_b, "_qgryzh"}, + {"1011111"_b, "_tjltls"}, + {"110xx00"_b, "ld3_asisdlsep_r3_r"}, + {"110xx10"_b, "ld1_asisdlsep_r3_r3"}, + {"110xx11"_b, "ld1_asisdlsep_r1_r1"}, + {"1110x00"_b, "ld3_asisdlsep_r3_r"}, + {"1110x10"_b, "ld1_asisdlsep_r3_r3"}, + {"1110x11"_b, "ld1_asisdlsep_r1_r1"}, + {"1111000"_b, "ld3_asisdlsep_r3_r"}, + {"1111010"_b, "ld1_asisdlsep_r3_r3"}, + {"1111011"_b, "ld1_asisdlsep_r1_r1"}, + {"1111100"_b, "_zzgrjz"}, + {"1111110"_b, "_phtnny"}, + {"1111111"_b, "_txjyxr"}, + }, + }, + + { "_yqvqtx", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ld1rob_z_p_bi_u8"}, + {"000x0"_b, "ld1rob_z_p_br_contiguous"}, + {"01001"_b, "ld1roh_z_p_bi_u16"}, + {"010x0"_b, "ld1roh_z_p_br_contiguous"}, + }, + }, + + { "_yqxnzl", + {11, 10}, + { {"00"_b, "sqdmulh_z_zz"}, + {"01"_b, "sqrdmulh_z_zz"}, }, }, - { "UnallocNEONPerm", + { "_yrgnqz", {13, 12}, - { {"00", "VisitUnallocated"}, - {"otherwise", "VisitNEONPerm"}, - }, - }, - - { "UnallocNEONScalar2RegMisc", - {16, 15, 14, 13, 12, 23, 22, 29}, - { {"0000xxxx", "VisitUnallocated"}, - {"00010xxx", "VisitUnallocated"}, - {"0010xxxx", "VisitUnallocated"}, - {"00110xxx", "VisitUnallocated"}, - {"01010xx1", "VisitUnallocated"}, - {"011xx0xx", "VisitUnallocated"}, - {"011101x1", "VisitUnallocated"}, - {"01111xxx", "VisitUnallocated"}, - {"1000xxxx", "VisitUnallocated"}, - {"10010xx0", "VisitUnallocated"}, - {"10011xxx", "VisitUnallocated"}, - {"10101xxx", "VisitUnallocated"}, - {"101100x0", "VisitUnallocated"}, - {"101101xx", "VisitUnallocated"}, - {"10111xxx", "VisitUnallocated"}, - {"1100xxxx", "VisitUnallocated"}, - {"111001xx", "VisitUnallocated"}, - {"11110xxx", "VisitUnallocated"}, - {"111110xx", "VisitUnallocated"}, - {"111111x1", "VisitUnallocated"}, - {"otherwise", "VisitNEONScalar2RegMisc"}, - }, - }, - - { "UnallocNEONScalar2RegMiscFP16", - {29, 23, 22, 16, 15, 14, 13, 12}, - { {"xx0xxxxx", "VisitUnallocated"}, - {"xx100xxx", "VisitUnallocated"}, - {"xx1010xx", "VisitUnallocated"}, - {"xx110xxx", "VisitUnallocated"}, - {"xx11100x", "VisitUnallocated"}, - {"xx111110", "VisitUnallocated"}, - {"x01011xx", "VisitUnallocated"}, - {"x0111111", "VisitUnallocated"}, - {"x1101111", "VisitUnallocated"}, - {"x1111100", "VisitUnallocated"}, - {"11101110", "VisitUnallocated"}, - {"11111111", "VisitUnallocated"}, - {"otherwise", "VisitNEONScalar2RegMiscFP16"}, - }, - }, - - { "UnallocNEONScalar3Diff", - {14, 13, 12, 29}, - { {"000x", "VisitUnallocated"}, - {"0011", "VisitUnallocated"}, - {"010x", "VisitUnallocated"}, - {"0111", "VisitUnallocated"}, - {"100x", "VisitUnallocated"}, - {"1011", "VisitUnallocated"}, - {"11xx", "VisitUnallocated"}, - {"otherwise", "UnallocNEONScalar3Diff_2"}, - }, - }, - - { "UnallocNEONScalar3Diff_2", - {15}, - { {"0", "VisitUnallocated"}, - {"1", "VisitNEONScalar3Diff"}, - }, - }, - - { "UnallocNEONScalar3Same", - {15, 14, 13, 12, 11, 23, 22, 29}, - { {"00000xxx", "VisitUnallocated"}, - {"0001xxxx", "VisitUnallocated"}, - {"00100xxx", "VisitUnallocated"}, - {"011xxxxx", "VisitUnallocated"}, - {"1001xxxx", "VisitUnallocated"}, - {"1010xxxx", "VisitUnallocated"}, - {"10111xxx", "VisitUnallocated"}, - {"1100xxxx", "VisitUnallocated"}, - {"110100xx", "VisitUnallocated"}, - {"110101x0", "VisitUnallocated"}, - {"110110x1", "VisitUnallocated"}, - {"110111xx", "VisitUnallocated"}, - {"111001x0", "VisitUnallocated"}, - {"111010x0", "VisitUnallocated"}, - {"111011x0", "VisitUnallocated"}, - {"11110xxx", "VisitUnallocated"}, - {"111110x1", "VisitUnallocated"}, - {"111111x1", "VisitUnallocated"}, - {"otherwise", "VisitNEONScalar3Same"}, - }, - }, - - { "UnallocNEONScalar3SameExtra", - {29, 14, 13, 12, 11}, - { {"x001x", "VisitUnallocated"}, - {"x01xx", "VisitUnallocated"}, - {"x1xxx", "VisitUnallocated"}, - {"00000", "VisitUnallocated"}, - {"00001", "VisitUnallocated"}, - {"otherwise", "VisitNEONScalar3SameExtra"}, - }, - }, - - { "UnallocNEONScalar3SameFP16", - {29, 23, 13, 12, 11}, - { {"00011", "VisitNEONScalar3SameFP16"}, - {"00100", "VisitNEONScalar3SameFP16"}, - {"00111", "VisitNEONScalar3SameFP16"}, - {"01111", "VisitNEONScalar3SameFP16"}, - {"10100", "VisitNEONScalar3SameFP16"}, - {"10101", "VisitNEONScalar3SameFP16"}, - {"11010", "VisitNEONScalar3SameFP16"}, - {"11100", "VisitNEONScalar3SameFP16"}, - {"11101", "VisitNEONScalar3SameFP16"}, - {"otherwise", "VisitUnallocated"}, - }, - }, - - { "UnallocNEONScalarByIndexedElement", - {29, 23, 22, 15, 14, 13, 12}, - { {"0xx1111", "VisitUnallocated"}, - {"1000001", "VisitUnallocated"}, - {"1000101", "VisitUnallocated"}, - {"11x0001", "VisitUnallocated"}, - {"11x0101", "VisitUnallocated"}, - {"1xx0011", "VisitUnallocated"}, - {"1xx0111", "VisitUnallocated"}, - {"1xx1011", "VisitUnallocated"}, - {"1xx1100", "VisitUnallocated"}, - {"x010001", "VisitUnallocated"}, - {"x010101", "VisitUnallocated"}, - {"x011001", "VisitUnallocated"}, - {"xxx0000", "VisitUnallocated"}, - {"xxx0010", "VisitUnallocated"}, - {"xxx0100", "VisitUnallocated"}, - {"xxx0110", "VisitUnallocated"}, - {"xxx1000", "VisitUnallocated"}, - {"xxx1010", "VisitUnallocated"}, - {"xxx1110", "VisitUnallocated"}, - {"otherwise", "VisitNEONScalarByIndexedElement"}, - }, - }, - - { "UnallocNEONScalarCopy", - {14, 13, 12, 11}, - { {"0000", "UnallocNEONScalarCopy_2"}, - {"otherwise", "VisitUnallocated"}, - }, - }, - - { "UnallocNEONScalarCopy_2", - {19, 18, 17, 16}, - { {"0000", "VisitUnallocated"}, - {"otherwise", "UnallocNEONScalarCopy_3"}, - }, - }, - - { "UnallocNEONScalarCopy_3", - {29}, - { {"0", "VisitNEONScalarCopy"}, - {"1", "VisitUnallocated"}, - }, - }, - - { "UnallocNEONScalarPairwise", - {29, 23, 22, 16, 15, 14, 13, 12}, - { {"xxx00xxx", "VisitUnallocated"}, - {"xxx010xx", "VisitUnallocated"}, - {"xxx01110", "VisitUnallocated"}, - {"xxx10xxx", "VisitUnallocated"}, - {"xxx1100x", "VisitUnallocated"}, - {"xxx11010", "VisitUnallocated"}, - {"xxx111xx", "VisitUnallocated"}, - {"x1x01101", "VisitUnallocated"}, - {"00101100", "VisitUnallocated"}, - {"00101101", "VisitUnallocated"}, - {"00101111", "VisitUnallocated"}, - {"01101100", "VisitUnallocated"}, - {"01101111", "VisitUnallocated"}, - {"1xx11011", "VisitUnallocated"}, - {"otherwise", "VisitNEONScalarPairwise"}, - }, - }, - - { "UnallocNEONScalarShiftImmediate", - {15, 14, 13, 12, 11, 29}, - { {"00001x", "VisitUnallocated"}, - {"00011x", "VisitUnallocated"}, - {"00101x", "VisitUnallocated"}, - {"00111x", "VisitUnallocated"}, - {"010000", "VisitUnallocated"}, - {"01001x", "VisitUnallocated"}, - {"01011x", "VisitUnallocated"}, - {"011000", "VisitUnallocated"}, - {"01101x", "VisitUnallocated"}, - {"01111x", "VisitUnallocated"}, - {"100000", "VisitUnallocated"}, - {"100010", "VisitUnallocated"}, - {"101xxx", "VisitUnallocated"}, - {"110xxx", "VisitUnallocated"}, - {"11101x", "VisitUnallocated"}, - {"11110x", "VisitUnallocated"}, - {"otherwise", "UnallocNEONScalarShiftImmediate_2"}, - }, - }, - - { "UnallocNEONScalarShiftImmediate_2", - {22, 21, 20, 19}, - { {"0000", "VisitUnallocated"}, - {"otherwise", "VisitNEONScalarShiftImmediate"}, - }, - }, - - { "UnallocNEONShiftImmediate", - {15, 14, 13, 12, 11, 29}, - { {"00001x", "VisitUnallocated"}, - {"00011x", "VisitUnallocated"}, - {"00101x", "VisitUnallocated"}, - {"00111x", "VisitUnallocated"}, - {"010000", "VisitUnallocated"}, - {"01001x", "VisitUnallocated"}, - {"01011x", "VisitUnallocated"}, - {"011000", "VisitUnallocated"}, - {"01101x", "VisitUnallocated"}, - {"01111x", "VisitUnallocated"}, - {"10101x", "VisitUnallocated"}, - {"1011xx", "VisitUnallocated"}, - {"110xxx", "VisitUnallocated"}, - {"11101x", "VisitUnallocated"}, - {"11110x", "VisitUnallocated"}, - {"otherwise", "VisitNEONShiftImmediate"}, - }, - }, - - { "UnallocNEONStoreSingleStruct", - {21, 15, 14, 13, 12, 11, 10}, - { {"0010xx1", "VisitUnallocated"}, - {"0011xx1", "VisitUnallocated"}, - {"0100101", "VisitUnallocated"}, - {"0100x1x", "VisitUnallocated"}, - {"0101011", "VisitUnallocated"}, - {"01011x1", "VisitUnallocated"}, - {"0101x10", "VisitUnallocated"}, - {"1010xx1", "VisitUnallocated"}, - {"1011xx1", "VisitUnallocated"}, - {"1100011", "VisitUnallocated"}, - {"11001x1", "VisitUnallocated"}, - {"1100x10", "VisitUnallocated"}, - {"1101011", "VisitUnallocated"}, - {"11011x1", "VisitUnallocated"}, - {"1101x10", "VisitUnallocated"}, - {"x11xxxx", "VisitUnallocated"}, - {"otherwise", "VisitNEONLoadStoreSingleStruct"}, - }, - }, - - { "UnallocNEONStoreSingleStructPostIndex", - {21, 15, 14, 13, 12, 11, 10}, - { {"0010xx1", "VisitUnallocated"}, - {"0011xx1", "VisitUnallocated"}, - {"0100101", "VisitUnallocated"}, - {"0100x1x", "VisitUnallocated"}, - {"0101011", "VisitUnallocated"}, - {"01011x1", "VisitUnallocated"}, - {"0101x10", "VisitUnallocated"}, - {"1010xx1", "VisitUnallocated"}, - {"1011xx1", "VisitUnallocated"}, - {"1100011", "VisitUnallocated"}, - {"11001x1", "VisitUnallocated"}, - {"1100x10", "VisitUnallocated"}, - {"1101011", "VisitUnallocated"}, - {"11011x1", "VisitUnallocated"}, - {"1101x10", "VisitUnallocated"}, - {"x11xxxx", "VisitUnallocated"}, - {"otherwise", "VisitNEONLoadStoreSingleStructPostIndex"}, - }, - }, - - { "UnallocNEONTable", + { {"00"_b, "sshl_asisdsame_only"}, + {"01"_b, "srshl_asisdsame_only"}, + }, + }, + + { "_yrlzqp", + {22, 13, 12}, + { {"000"_b, "ldapr_64l_memop"}, + }, + }, + + { "_yrmmmg", + {4}, + { {"0"_b, "cmphs_p_p_zi"}, + {"1"_b, "cmphi_p_p_zi"}, + }, + }, + + { "_yrrppk", + {20, 19, 18, 17, 16}, + { {"00000"_b, "fcvtns_32d_float2int"}, + {"00001"_b, "fcvtnu_32d_float2int"}, + {"00010"_b, "scvtf_d32_float2int"}, + {"00011"_b, "ucvtf_d32_float2int"}, + {"00100"_b, "fcvtas_32d_float2int"}, + {"00101"_b, "fcvtau_32d_float2int"}, + {"01000"_b, "fcvtps_32d_float2int"}, + {"01001"_b, "fcvtpu_32d_float2int"}, + {"10000"_b, "fcvtms_32d_float2int"}, + {"10001"_b, "fcvtmu_32d_float2int"}, + {"11000"_b, "fcvtzs_32d_float2int"}, + {"11001"_b, "fcvtzu_32d_float2int"}, + {"11110"_b, "fjcvtzs_32d_float2int"}, + }, + }, + + { "_ysjqhn", + {30, 23, 22}, + { {"00x"_b, "adds_64_addsub_shift"}, + {"010"_b, "adds_64_addsub_shift"}, + {"10x"_b, "subs_64_addsub_shift"}, + {"110"_b, "subs_64_addsub_shift"}, + }, + }, + + { "_yskkjs", + {13, 12}, + { {"00"_b, "sbcs_64_addsub_carry"}, + }, + }, + + { "_yszjsm", + {12, 11, 10}, + { {"000"_b, "sdot_z_zzz"}, + {"001"_b, "udot_z_zzz"}, + {"010"_b, "sqdmlalbt_z_zzz"}, + {"011"_b, "sqdmlslbt_z_zzz"}, + {"1xx"_b, "cdot_z_zzz"}, + }, + }, + + { "_ytkjxx", + {30, 23, 22, 13, 4}, + { {"00x0x"_b, "ld1w_z_p_bz_s_x32_scaled"}, + {"00x1x"_b, "ldff1w_z_p_bz_s_x32_scaled"}, + {"0100x"_b, "ldr_z_bi"}, + {"01100"_b, "prfw_i_p_bi_s"}, + {"01110"_b, "prfd_i_p_bi_s"}, + {"10x0x"_b, "ld1w_z_p_bz_d_x32_scaled"}, + {"10x1x"_b, "ldff1w_z_p_bz_d_x32_scaled"}, + {"11x0x"_b, "ld1d_z_p_bz_d_x32_scaled"}, + {"11x1x"_b, "ldff1d_z_p_bz_d_x32_scaled"}, + }, + }, + + { "_ytsghm", + {30, 23, 22}, + { {"000"_b, "msub_32a_dp_3src"}, + }, + }, + + { "_ytvtqn", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ld1sh_z_p_bi_s64"}, + {"00011"_b, "ldnf1sh_z_p_bi_s64"}, + {"00101"_b, "ld1w_z_p_bi_u32"}, + {"00111"_b, "ldnf1w_z_p_bi_u32"}, + {"01001"_b, "ld1sb_z_p_bi_s64"}, + {"01011"_b, "ldnf1sb_z_p_bi_s64"}, + {"01101"_b, "ld1sb_z_p_bi_s16"}, + {"01111"_b, "ldnf1sb_z_p_bi_s16"}, + {"100x0"_b, "st1w_z_p_bz_d_x32_unscaled"}, + {"100x1"_b, "st1w_z_p_bz_d_64_unscaled"}, + {"101x0"_b, "st1w_z_p_bz_s_x32_unscaled"}, + {"101x1"_b, "st1w_z_p_ai_d"}, + {"110x0"_b, "st1d_z_p_bz_d_x32_unscaled"}, + {"110x1"_b, "st1d_z_p_bz_d_64_unscaled"}, + {"111x1"_b, "st1d_z_p_ai_d"}, + }, + }, + + { "_ytvxsl", + {30, 23, 22}, + { {"000"_b, "stlxrb_sr32_ldstexcl"}, + {"001"_b, "ldaxrb_lr32_ldstexcl"}, + {"010"_b, "stlrb_sl32_ldstexcl"}, + {"011"_b, "ldarb_lr32_ldstexcl"}, + {"100"_b, "stlxrh_sr32_ldstexcl"}, + {"101"_b, "ldaxrh_lr32_ldstexcl"}, + {"110"_b, "stlrh_sl32_ldstexcl"}, + {"111"_b, "ldarh_lr32_ldstexcl"}, + }, + }, + + { "_yvgqjx", + {13, 12, 5}, + { {"010"_b, "_tnzytv"}, + {"011"_b, "_vmpnlv"}, + {"100"_b, "_hhhqjk"}, + {"101"_b, "_tkzqqp"}, + {"110"_b, "_sphpkr"}, + {"111"_b, "_spglxn"}, + }, + }, + + { "_yvhnlk", + {30, 23, 22, 13, 12, 11, 10}, + { {"0001111"_b, "casp_cp32_ldstexcl"}, + {"0011111"_b, "caspa_cp32_ldstexcl"}, + {"0101111"_b, "casb_c32_ldstexcl"}, + {"0111111"_b, "casab_c32_ldstexcl"}, + {"1001111"_b, "casp_cp64_ldstexcl"}, + {"1011111"_b, "caspa_cp64_ldstexcl"}, + {"1101111"_b, "cash_c32_ldstexcl"}, + {"1111111"_b, "casah_c32_ldstexcl"}, + }, + }, + + { "_yvlhjg", + {23}, + { {"0"_b, "frecps_asimdsame_only"}, + {"1"_b, "frsqrts_asimdsame_only"}, + }, + }, + + { "_yvnjkr", + {9, 8, 7, 6, 5}, + { {"11111"_b, "autdzb_64z_dp_1src"}, + }, + }, + + { "_yvptvx", + {23, 12, 11, 10}, + { {"0000"_b, "sqshrnb_z_zi"}, + {"0001"_b, "sqshrnt_z_zi"}, + {"0010"_b, "sqrshrnb_z_zi"}, + {"0011"_b, "sqrshrnt_z_zi"}, + {"0100"_b, "uqshrnb_z_zi"}, + {"0101"_b, "uqshrnt_z_zi"}, + {"0110"_b, "uqrshrnb_z_zi"}, + {"0111"_b, "uqrshrnt_z_zi"}, + }, + }, + + { "_yvxgrr", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "frintm_asimdmiscfp16_r"}, + {"0x00001"_b, "frintm_asimdmisc_r"}, + {"1111001"_b, "frintz_asimdmiscfp16_r"}, + {"1x00001"_b, "frintz_asimdmisc_r"}, + {"xx00000"_b, "cmeq_asimdmisc_z"}, + }, + }, + + { "_yvygml", + {30}, + { {"0"_b, "_jkrlsg"}, + {"1"_b, "_vvrmvg"}, + }, + }, + + { "_yvyhlh", + {23, 22, 12, 11, 10}, + { {"0x000"_b, "fmul_z_zzi_h"}, + {"10000"_b, "fmul_z_zzi_s"}, + {"11000"_b, "fmul_z_zzi_d"}, + }, + }, + + { "_yvyxkx", + {10}, + { {"0"_b, "sha512su0_vv2_cryptosha512_2"}, + {"1"_b, "sm4e_vv4_cryptosha512_2"}, + }, + }, + + { "_yxhrpk", {23, 22}, - { {"00", "VisitNEONTable"}, - {"otherwise", "VisitUnallocated"}, + { {"00"_b, "fmlal2_asimdsame_f"}, + {"10"_b, "fmlsl2_asimdsame_f"}, }, }, - { "UnallocRotateRightIntoFlags", - {31, 30, 29, 4}, - { {"1010", "VisitRotateRightIntoFlags"}, - {"otherwise", "VisitUnallocated"}, + { "_yxmkzr", + {12}, + { {"0"_b, "st1_asisdlsop_dx1_r1d"}, }, }, - { "UnallocSystem", - {21, 20, 19, 15, 14, 13, 12}, - { {"0000101", "VisitUnallocated"}, - {"000011x", "VisitUnallocated"}, - {"0001xxx", "VisitUnallocated"}, - {"100xxxx", "VisitUnallocated"}, - {"otherwise", "UnallocSystem_2"}, + { "_yxnslx", + {23, 22}, + { {"00"_b, "adr_z_az_d_s32_scaled"}, + {"01"_b, "adr_z_az_d_u32_scaled"}, + {"1x"_b, "adr_z_az_sd_same_scaled"}, + }, + }, + + { "_yykhjv", + {23, 22, 13, 12, 11, 10}, + { {"000110"_b, "smmla_z_zzz"}, + {"0x1000"_b, "sshllb_z_zi"}, + {"0x1001"_b, "sshllt_z_zi"}, + {"0x1010"_b, "ushllb_z_zi"}, + {"0x1011"_b, "ushllt_z_zi"}, + {"100110"_b, "usmmla_z_zzz"}, + {"110110"_b, "ummla_z_zzz"}, + {"xx0000"_b, "saddlbt_z_zz"}, + {"xx0010"_b, "ssublbt_z_zz"}, + {"xx0011"_b, "ssubltb_z_zz"}, + {"xx0100"_b, "eorbt_z_zz"}, + {"xx0101"_b, "eortb_z_zz"}, + {"xx1100"_b, "bext_z_zz"}, + {"xx1101"_b, "bdep_z_zz"}, + {"xx1110"_b, "bgrp_z_zz"}, }, }, - { "UnallocSystem_2", - {21, 20, 19, 15, 14, 13}, - { {"000000", "VisitUnallocated"}, - {"otherwise", "UnallocSystem_3"}, + { "_yynmjl", + {4}, + { {"0"_b, "orrs_p_p_pp_z"}, + {"1"_b, "orns_p_p_pp_z"}, }, }, - { "UnallocSystem_3", - {21, 20, 19, 16, 15, 14, 13}, - { {"0000001", "VisitUnallocated"}, - {"otherwise", "UnallocSystem_4"}, + { "_yyrkmn", + {17, 16, 9, 8, 7, 6, 5}, + { {"0000000"_b, "aesmc_z_z"}, + {"10xxxxx"_b, "aese_z_zz"}, + {"11xxxxx"_b, "sm4e_z_zz"}, }, }, - { "UnallocSystem_4", - {21, 20, 19, 17, 15, 14, 13}, - { {"0000001", "VisitUnallocated"}, - {"otherwise", "UnallocSystem_5"}, + { "_yytvxh", + {30, 23, 22, 13, 4}, + { {"00000"_b, "prfw_i_p_br_s"}, + {"00010"_b, "prfw_i_p_ai_s"}, + {"0010x"_b, "ld1rw_z_p_bi_u32"}, + {"0011x"_b, "ld1rw_z_p_bi_u64"}, + {"01000"_b, "prfd_i_p_br_s"}, + {"01010"_b, "prfd_i_p_ai_s"}, + {"0110x"_b, "ld1rsb_z_p_bi_s16"}, + {"0111x"_b, "ld1rd_z_p_bi_u64"}, + {"1000x"_b, "ldnt1w_z_p_ar_d_64_unscaled"}, + {"10010"_b, "prfw_i_p_ai_d"}, + {"1010x"_b, "ld1w_z_p_bz_d_64_unscaled"}, + {"1011x"_b, "ldff1w_z_p_bz_d_64_unscaled"}, + {"1100x"_b, "ldnt1d_z_p_ar_d_64_unscaled"}, + {"11010"_b, "prfd_i_p_ai_d"}, + {"1110x"_b, "ld1d_z_p_bz_d_64_unscaled"}, + {"1111x"_b, "ldff1d_z_p_bz_d_64_unscaled"}, }, }, - { "UnallocSystem_5", - {21, 20, 19, 18, 15, 14, 13}, - { {"0001001", "VisitUnallocated"}, - {"otherwise", "VisitSystem"}, + { "_yyyshx", + {30, 13, 4}, + { {"000"_b, "cmphs_p_p_zz"}, + {"001"_b, "cmphi_p_p_zz"}, + {"010"_b, "cmpeq_p_p_zw"}, + {"011"_b, "cmpne_p_p_zw"}, + {"1xx"_b, "fcmla_z_p_zzz"}, }, }, - { "UnallocUnconditionalBranchToRegister", - {15, 14, 13, 12}, - { {"0000", "UnallocUnconditionalBranchToRegister_2"}, - {"otherwise", "VisitUnallocated"}, + { "_yzmjhn", + {4}, + { {"0"_b, "eors_p_p_pp_z"}, }, }, - { "UnallocUnconditionalBranchToRegister_2", + { "_yzqtyl", {20, 19, 18, 17, 16}, - { {"11111", "UnallocUnconditionalBranchToRegister_3"}, - {"otherwise", "VisitUnallocated"}, + { {"00001"_b, "sqxtun_asisdmisc_n"}, }, }, - { "UnallocUnconditionalBranchToRegister_3", - {24, 23, 22, 21}, - { {"0011", "VisitUnallocated"}, - {"011x", "VisitUnallocated"}, - {"otherwise", "VisitUnconditionalBranchToRegister"}, + { "_yzzlxs", + {23, 4}, + { {"00"_b, "_mpgrgp"}, }, }, - { "DecodeSVE101xxxxx", - {15, 14, 13}, - { {"101", "DecodeSVE101xx101"}, - {"010", "VisitSVEContiguousLoad_ScalarPlusScalar"}, - {"otherwise", "VisitSVEMemContiguousLoad"}, + { "_zgjpym", + {23, 22, 20, 19, 11}, + { {"00010"_b, "srsra_asisdshf_r"}, + {"001x0"_b, "srsra_asisdshf_r"}, + {"01xx0"_b, "srsra_asisdshf_r"}, }, }, - { "DecodeSVE101xx101", - {20}, - { {"0", "VisitSVEContiguousLoad_ScalarPlusImm"}, - {"1", "VisitSVEMemContiguousLoad"}, + { "_zglksl", + {30, 23, 22, 13, 12, 11, 10}, + { {"1101001"_b, "ummla_asimdsame2_g"}, + {"xxx0001"_b, "sqrdmlah_asimdsame2_only"}, + {"xxx0011"_b, "sqrdmlsh_asimdsame2_only"}, + {"xxx0101"_b, "udot_asimdsame2_d"}, }, }, - { "DecodeSVE00000001", - {20, 19}, - { {"10", "VisitSVEMovprfx"}, - {"otherwise", "VisitSVEIntReduction"}, + { "_zgysvr", + {30, 13}, + { {"00"_b, "_xpqglq"}, + {"10"_b, "_xstkrn"}, + {"11"_b, "_zjzmvh"}, + }, + }, + + { "_zgzlhq", + {17}, + { {"0"_b, "ld1_asisdlso_b1_1b"}, + }, + }, + + { "_zhkjzg", + {23, 22, 13}, + { {"000"_b, "fmls_asimdelem_rh_h"}, + {"1x0"_b, "fmls_asimdelem_r_sd"}, + {"xx1"_b, "sqdmlsl_asimdelem_l"}, + }, + }, + + { "_zhpxqz", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_h_floatimm"}, + }, + }, + + { "_zhrtts", + {23, 22}, + { {"00"_b, "_qlqhzg"}, + }, + }, + + { "_zjgvyp", + {30, 13, 12, 11, 10}, + { {"00000"_b, "_ghnljt"}, + }, + }, + + { "_zjjxjl", + {9}, + { {"0"_b, "pnext_p_p_p"}, + }, + }, + + { "_zjsgkm", + {4}, + { {"0"_b, "ccmn_64_condcmp_reg"}, + }, + }, + + { "_zjslnr", + {30, 23, 22}, + { {"000"_b, "sbfm_32m_bitfield"}, + {"010"_b, "extr_32_extract"}, + {"100"_b, "ubfm_32m_bitfield"}, + }, + }, + + { "_zjzmvh", + {23, 22, 20, 19, 18, 17, 16}, + { {"0001010"_b, "fcvtx_z_p_z_d2s"}, + {"0011xx0"_b, "flogb_z_p_z"}, + {"0110010"_b, "scvtf_z_p_z_h2fp16"}, + {"0110011"_b, "ucvtf_z_p_z_h2fp16"}, + {"0110100"_b, "scvtf_z_p_z_w2fp16"}, + {"0110101"_b, "ucvtf_z_p_z_w2fp16"}, + {"0110110"_b, "scvtf_z_p_z_x2fp16"}, + {"0110111"_b, "ucvtf_z_p_z_x2fp16"}, + {"0111010"_b, "fcvtzs_z_p_z_fp162h"}, + {"0111011"_b, "fcvtzu_z_p_z_fp162h"}, + {"0111100"_b, "fcvtzs_z_p_z_fp162w"}, + {"0111101"_b, "fcvtzu_z_p_z_fp162w"}, + {"0111110"_b, "fcvtzs_z_p_z_fp162x"}, + {"0111111"_b, "fcvtzu_z_p_z_fp162x"}, + {"1001000"_b, "fcvt_z_p_z_s2h"}, + {"1001001"_b, "fcvt_z_p_z_h2s"}, + {"1001010"_b, "bfcvt_z_p_z_s2bf"}, + {"1010100"_b, "scvtf_z_p_z_w2s"}, + {"1010101"_b, "ucvtf_z_p_z_w2s"}, + {"1011100"_b, "fcvtzs_z_p_z_s2w"}, + {"1011101"_b, "fcvtzu_z_p_z_s2w"}, + {"1101000"_b, "fcvt_z_p_z_d2h"}, + {"1101001"_b, "fcvt_z_p_z_h2d"}, + {"1101010"_b, "fcvt_z_p_z_d2s"}, + {"1101011"_b, "fcvt_z_p_z_s2d"}, + {"1110000"_b, "scvtf_z_p_z_w2d"}, + {"1110001"_b, "ucvtf_z_p_z_w2d"}, + {"1110100"_b, "scvtf_z_p_z_x2s"}, + {"1110101"_b, "ucvtf_z_p_z_x2s"}, + {"1110110"_b, "scvtf_z_p_z_x2d"}, + {"1110111"_b, "ucvtf_z_p_z_x2d"}, + {"1111000"_b, "fcvtzs_z_p_z_d2w"}, + {"1111001"_b, "fcvtzu_z_p_z_d2w"}, + {"1111100"_b, "fcvtzs_z_p_z_s2x"}, + {"1111101"_b, "fcvtzu_z_p_z_s2x"}, + {"1111110"_b, "fcvtzs_z_p_z_d2x"}, + {"1111111"_b, "fcvtzu_z_p_z_d2x"}, + {"xx00000"_b, "frintn_z_p_z"}, + {"xx00001"_b, "frintp_z_p_z"}, + {"xx00010"_b, "frintm_z_p_z"}, + {"xx00011"_b, "frintz_z_p_z"}, + {"xx00100"_b, "frinta_z_p_z"}, + {"xx00110"_b, "frintx_z_p_z"}, + {"xx00111"_b, "frinti_z_p_z"}, + {"xx01100"_b, "frecpx_z_p_z"}, + {"xx01101"_b, "fsqrt_z_p_z"}, + }, + }, + + { "_zkhjsp", + {11}, + { {"0"_b, "sqdmulh_z_zzi_h"}, + {"1"_b, "mul_z_zzi_h"}, + }, + }, + + { "_zkqtrj", + {30}, + { {"0"_b, "b_only_branch_imm"}, + }, + }, + + { "_zkttzl", + {23, 22, 20, 19, 18, 16, 13}, + { {"0000000"_b, "_tsvsgh"}, + {"0000001"_b, "_rkrltp"}, + {"0100000"_b, "_zgzlhq"}, + {"0100001"_b, "_nrssjz"}, + {"100xxx0"_b, "st1_asisdlsop_bx1_r1b"}, + {"100xxx1"_b, "st3_asisdlsop_bx3_r3b"}, + {"1010xx0"_b, "st1_asisdlsop_bx1_r1b"}, + {"1010xx1"_b, "st3_asisdlsop_bx3_r3b"}, + {"10110x0"_b, "st1_asisdlsop_bx1_r1b"}, + {"10110x1"_b, "st3_asisdlsop_bx3_r3b"}, + {"1011100"_b, "st1_asisdlsop_bx1_r1b"}, + {"1011101"_b, "st3_asisdlsop_bx3_r3b"}, + {"1011110"_b, "_rnypvh"}, + {"1011111"_b, "_nxjgmm"}, + {"110xxx0"_b, "ld1_asisdlsop_bx1_r1b"}, + {"110xxx1"_b, "ld3_asisdlsop_bx3_r3b"}, + {"1110xx0"_b, "ld1_asisdlsop_bx1_r1b"}, + {"1110xx1"_b, "ld3_asisdlsop_bx3_r3b"}, + {"11110x0"_b, "ld1_asisdlsop_bx1_r1b"}, + {"11110x1"_b, "ld3_asisdlsop_bx3_r3b"}, + {"1111100"_b, "ld1_asisdlsop_bx1_r1b"}, + {"1111101"_b, "ld3_asisdlsop_bx3_r3b"}, + {"1111110"_b, "_qqtpln"}, + {"1111111"_b, "_glhxyj"}, + }, + }, + + { "_zlmgyp", + {23, 22, 13}, + { {"000"_b, "fmla_asimdelem_rh_h"}, + {"1x0"_b, "fmla_asimdelem_r_sd"}, + {"xx1"_b, "sqdmlal_asimdelem_l"}, + }, + }, + + { "_zmkqxl", + {23, 10}, + { {"00"_b, "adclb_z_zzz"}, + {"01"_b, "adclt_z_zzz"}, + {"10"_b, "sbclb_z_zzz"}, + {"11"_b, "sbclt_z_zzz"}, + }, + }, + + { "_zmpzkg", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "orr_asimdimm_l_sl"}, + {"00x100"_b, "shl_asimdshf_r"}, + {"00x110"_b, "sqshl_asimdshf_r"}, + {"010x00"_b, "shl_asimdshf_r"}, + {"010x10"_b, "sqshl_asimdshf_r"}, + {"011100"_b, "shl_asimdshf_r"}, + {"011110"_b, "sqshl_asimdshf_r"}, + {"0x1000"_b, "shl_asimdshf_r"}, + {"0x1010"_b, "sqshl_asimdshf_r"}, + }, + }, + + { "_zmtkvx", + {13, 10}, + { {"00"_b, "_rhpmjz"}, + }, + }, + + { "_zmzxjm", + {17}, + { {"0"_b, "faddv_v_p_z"}, + }, + }, + + { "_znmhps", + {18, 17}, + { {"00"_b, "st3_asisdlse_r3"}, + }, + }, + + { "_zpmkvt", + {12}, + { {"1"_b, "_vqqrjl"}, + }, + }, + + { "_zpnsrv", + {23, 22, 13}, + { {"000"_b, "fmul_asimdelem_rh_h"}, + {"1x0"_b, "fmul_asimdelem_r_sd"}, + {"xx1"_b, "sqdmull_asimdelem_l"}, + }, + }, + + { "_zppjvk", + {12}, + { {"0"_b, "ld2_asisdlsop_dx2_r2d"}, + }, + }, + + { "_zpsymj", + {22, 13, 12}, + { {"000"_b, "swp_64_memop"}, + {"001"_b, "_yjztsq"}, + {"010"_b, "st64bv0_64_memop"}, + {"011"_b, "st64bv_64_memop"}, + {"100"_b, "swpl_64_memop"}, + }, + }, + + { "_zpzghs", + {30, 23, 22}, + { {"000"_b, "stnp_q_ldstnapair_offs"}, + {"001"_b, "ldnp_q_ldstnapair_offs"}, + {"010"_b, "stp_q_ldstpair_post"}, + {"011"_b, "ldp_q_ldstpair_post"}, + }, + }, + + { "_zqltpy", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_s_floatimm"}, + }, + }, + + { "_zqmmsk", + {30, 23, 22, 13, 12, 11, 10}, + { {"0000000"_b, "ldaddb_32_memop"}, + {"0000100"_b, "ldclrb_32_memop"}, + {"0001000"_b, "ldeorb_32_memop"}, + {"0001100"_b, "ldsetb_32_memop"}, + {"000xx10"_b, "strb_32b_ldst_regoff"}, + {"0010000"_b, "ldaddlb_32_memop"}, + {"0010100"_b, "ldclrlb_32_memop"}, + {"0011000"_b, "ldeorlb_32_memop"}, + {"0011100"_b, "ldsetlb_32_memop"}, + {"001xx10"_b, "ldrb_32b_ldst_regoff"}, + {"0100000"_b, "ldaddab_32_memop"}, + {"0100100"_b, "ldclrab_32_memop"}, + {"0101000"_b, "ldeorab_32_memop"}, + {"0101100"_b, "ldsetab_32_memop"}, + {"010xx10"_b, "ldrsb_64b_ldst_regoff"}, + {"0110000"_b, "ldaddalb_32_memop"}, + {"0110100"_b, "ldclralb_32_memop"}, + {"0111000"_b, "ldeoralb_32_memop"}, + {"0111100"_b, "ldsetalb_32_memop"}, + {"011xx10"_b, "ldrsb_32b_ldst_regoff"}, + {"1000000"_b, "ldaddh_32_memop"}, + {"1000100"_b, "ldclrh_32_memop"}, + {"1001000"_b, "ldeorh_32_memop"}, + {"1001100"_b, "ldseth_32_memop"}, + {"100xx10"_b, "strh_32_ldst_regoff"}, + {"1010000"_b, "ldaddlh_32_memop"}, + {"1010100"_b, "ldclrlh_32_memop"}, + {"1011000"_b, "ldeorlh_32_memop"}, + {"1011100"_b, "ldsetlh_32_memop"}, + {"101xx10"_b, "ldrh_32_ldst_regoff"}, + {"1100000"_b, "ldaddah_32_memop"}, + {"1100100"_b, "ldclrah_32_memop"}, + {"1101000"_b, "ldeorah_32_memop"}, + {"1101100"_b, "ldsetah_32_memop"}, + {"110xx10"_b, "ldrsh_64_ldst_regoff"}, + {"1110000"_b, "ldaddalh_32_memop"}, + {"1110100"_b, "ldclralh_32_memop"}, + {"1111000"_b, "ldeoralh_32_memop"}, + {"1111100"_b, "ldsetalh_32_memop"}, + {"111xx10"_b, "ldrsh_32_ldst_regoff"}, + }, + }, + + { "_zqmrhp", + {23, 22, 4, 3, 2, 1, 0}, + { {"0000000"_b, "wrffr_f_p"}, + }, + }, + + { "_zrmgjx", + {30, 23, 22, 13, 4}, + { {"01000"_b, "ldr_p_bi"}, + {"01100"_b, "prfb_i_p_bi_s"}, + {"01110"_b, "prfh_i_p_bi_s"}, + {"10x0x"_b, "ld1sw_z_p_bz_d_x32_unscaled"}, + {"10x1x"_b, "ldff1sw_z_p_bz_d_x32_unscaled"}, }, }, -}; -// clang-format on -static const VisitorNode kVisitorNodes[] = { -#define VISITOR_NODES(A) {"Visit" #A, &Decoder::Visit##A}, - VISITOR_LIST(VISITOR_NODES) -#undef VISITOR_NODES + { "_zrvlnx", + {13, 12}, + { {"00"_b, "sbc_32_addsub_carry"}, + }, + }, + + { "_zryvjk", + {20, 9, 4}, + { {"000"_b, "trn2_p_pp"}, + }, + }, + + { "_zslsvj", + {23, 22, 20, 19, 11}, + { {"00011"_b, "fcvtzu_asisdshf_c"}, + {"001x1"_b, "fcvtzu_asisdshf_c"}, + {"01xx1"_b, "fcvtzu_asisdshf_c"}, + }, + }, + + { "_zsltyl", + {22, 20, 11}, + { {"000"_b, "uqincw_r_rs_uw"}, + {"001"_b, "uqdecw_r_rs_uw"}, + {"010"_b, "uqincw_r_rs_x"}, + {"011"_b, "uqdecw_r_rs_x"}, + {"100"_b, "uqincd_r_rs_uw"}, + {"101"_b, "uqdecd_r_rs_uw"}, + {"110"_b, "uqincd_r_rs_x"}, + {"111"_b, "uqdecd_r_rs_x"}, + }, + }, + + { "_zssjpv", + {18, 17}, + { {"00"_b, "st1_asisdlse_r3_3v"}, + }, + }, + + { "_zsyggq", + {23, 10}, + { {"00"_b, "_txhzxq"}, + }, + }, + + { "_ztpryr", + {13}, + { {"0"_b, "fmad_z_p_zzz"}, + {"1"_b, "fmsb_z_p_zzz"}, + }, + }, + + { "_ztyqrj", + {30, 23, 13, 12, 10}, + { {"00000"_b, "_jmvgsp"}, + {"00001"_b, "_jkkqvy"}, + {"00100"_b, "_nkxhsy"}, + {"00101"_b, "_gshrzq"}, + {"00110"_b, "_zvjrlz"}, + {"00111"_b, "_ntjpsx"}, + {"01000"_b, "_mqrzzk"}, + {"01001"_b, "_jqxqql"}, + {"01100"_b, "_xznsqh"}, + {"01101"_b, "_qvlnll"}, + {"01110"_b, "_kvnqhn"}, + {"01111"_b, "_zsltyl"}, + {"10110"_b, "_zkhjsp"}, + {"10111"_b, "_hvyjnk"}, + {"11000"_b, "_sjvhlq"}, + {"11001"_b, "_xhktsk"}, + {"11010"_b, "_rtpztp"}, + {"11011"_b, "_rznrqt"}, + {"11100"_b, "_kyspnn"}, + {"11101"_b, "_qljhnp"}, + {"11110"_b, "_pxyrpm"}, + {"11111"_b, "_khjvqq"}, + }, + }, + + { "_zvjrlz", + {22, 20, 11}, + { {"000"_b, "sqincb_r_rs_sx"}, + {"001"_b, "sqdecb_r_rs_sx"}, + {"010"_b, "sqincb_r_rs_x"}, + {"011"_b, "sqdecb_r_rs_x"}, + {"100"_b, "sqinch_r_rs_sx"}, + {"101"_b, "sqdech_r_rs_sx"}, + {"110"_b, "sqinch_r_rs_x"}, + {"111"_b, "sqdech_r_rs_x"}, + }, + }, + + { "_zvlxrl", + {23, 13, 12}, + { {"010"_b, "fcmeq_asisdsame_only"}, + }, + }, + + { "_zvqghy", + {30, 23, 22, 13, 12, 11, 10}, + { {"1000000"_b, "sha256h_qqv_cryptosha3"}, + {"1000100"_b, "sha256h2_qqv_cryptosha3"}, + {"1001000"_b, "sha256su1_vvv_cryptosha3"}, + }, + }, + + { "_zxhhny", + {23, 22}, + { {"00"_b, "fmsub_s_floatdp3"}, + {"01"_b, "fmsub_d_floatdp3"}, + {"11"_b, "fmsub_h_floatdp3"}, + }, + }, + + { "_zxspnk", + {30, 23, 22, 11, 10}, + { {"00000"_b, "sturb_32_ldst_unscaled"}, + {"00001"_b, "strb_32_ldst_immpost"}, + {"00010"_b, "sttrb_32_ldst_unpriv"}, + {"00011"_b, "strb_32_ldst_immpre"}, + {"00100"_b, "ldurb_32_ldst_unscaled"}, + {"00101"_b, "ldrb_32_ldst_immpost"}, + {"00110"_b, "ldtrb_32_ldst_unpriv"}, + {"00111"_b, "ldrb_32_ldst_immpre"}, + {"01000"_b, "ldursb_64_ldst_unscaled"}, + {"01001"_b, "ldrsb_64_ldst_immpost"}, + {"01010"_b, "ldtrsb_64_ldst_unpriv"}, + {"01011"_b, "ldrsb_64_ldst_immpre"}, + {"01100"_b, "ldursb_32_ldst_unscaled"}, + {"01101"_b, "ldrsb_32_ldst_immpost"}, + {"01110"_b, "ldtrsb_32_ldst_unpriv"}, + {"01111"_b, "ldrsb_32_ldst_immpre"}, + {"10000"_b, "sturh_32_ldst_unscaled"}, + {"10001"_b, "strh_32_ldst_immpost"}, + {"10010"_b, "sttrh_32_ldst_unpriv"}, + {"10011"_b, "strh_32_ldst_immpre"}, + {"10100"_b, "ldurh_32_ldst_unscaled"}, + {"10101"_b, "ldrh_32_ldst_immpost"}, + {"10110"_b, "ldtrh_32_ldst_unpriv"}, + {"10111"_b, "ldrh_32_ldst_immpre"}, + {"11000"_b, "ldursh_64_ldst_unscaled"}, + {"11001"_b, "ldrsh_64_ldst_immpost"}, + {"11010"_b, "ldtrsh_64_ldst_unpriv"}, + {"11011"_b, "ldrsh_64_ldst_immpre"}, + {"11100"_b, "ldursh_32_ldst_unscaled"}, + {"11101"_b, "ldrsh_32_ldst_immpost"}, + {"11110"_b, "ldtrsh_32_ldst_unpriv"}, + {"11111"_b, "ldrsh_32_ldst_immpre"}, + }, + }, + + { "_zxtzmv", + {30, 23, 22, 13}, + { {"0010"_b, "ld1rsh_z_p_bi_s64"}, + {"0011"_b, "ld1rsh_z_p_bi_s32"}, + {"0110"_b, "ld1rsb_z_p_bi_s64"}, + {"0111"_b, "ld1rsb_z_p_bi_s32"}, + {"1000"_b, "ld1sw_z_p_ai_d"}, + {"1001"_b, "ldff1sw_z_p_ai_d"}, + {"1010"_b, "ld1sw_z_p_bz_d_64_scaled"}, + {"1011"_b, "ldff1sw_z_p_bz_d_64_scaled"}, + }, + }, + + { "_zyjjgs", + {23, 22, 20, 19, 18}, + { {"00000"_b, "orr_z_zi"}, + {"01000"_b, "eor_z_zi"}, + {"10000"_b, "and_z_zi"}, + {"11000"_b, "dupm_z_i"}, + {"xx1xx"_b, "cpy_z_o_i"}, + }, + }, + + { "_zylnnn", + {30}, + { {"0"_b, "cbz_64_compbranch"}, + }, + }, + + { "_zytrsq", + {30}, + { {"0"_b, "tbz_only_testbranch"}, + }, + }, + + { "_zyzzhm", + {23, 20, 19, 18, 17, 16}, + { {"000001"_b, "frint32x_asimdmisc_r"}, + }, + }, + + { "_zzgrjz", + {18, 17}, + { {"0x"_b, "ld3_asisdlsep_r3_r"}, + {"10"_b, "ld3_asisdlsep_r3_r"}, + {"11"_b, "ld3_asisdlsep_i3_i"}, + }, + }, + + { "_zzhgng", + {30, 23, 22, 13, 12, 11, 10}, + { {"1000000"_b, "sha1c_qsv_cryptosha3"}, + {"1000001"_b, "dup_asisdone_only"}, + {"1000100"_b, "sha1p_qsv_cryptosha3"}, + {"1001000"_b, "sha1m_qsv_cryptosha3"}, + {"1001100"_b, "sha1su0_vvv_cryptosha3"}, + {"1010111"_b, "fmulx_asisdsamefp16_only"}, + {"1011001"_b, "fcmeq_asisdsamefp16_only"}, + {"1011111"_b, "frecps_asisdsamefp16_only"}, + {"1111111"_b, "frsqrts_asisdsamefp16_only"}, + }, + }, + + { "_zzrqlh", + {30, 23, 22, 11, 10}, + { {"00000"_b, "_ygpjrl"}, + {"01000"_b, "csel_32_condsel"}, + {"01001"_b, "csinc_32_condsel"}, + {"01100"_b, "_hggmnk"}, + {"01101"_b, "_sllkpt"}, + {"01110"_b, "_mgsvlj"}, + {"01111"_b, "_kyyzks"}, + {"10000"_b, "_zrvlnx"}, + {"11000"_b, "csinv_32_condsel"}, + {"11001"_b, "csneg_32_condsel"}, + {"11100"_b, "_ghmzhr"}, + {"11101"_b, "_gnqjhz"}, + {"11110"_b, "_mmmjkx"}, + }, + }, + + { "_zzvxvh", + {23, 22, 11, 10}, + { {"0001"_b, "pmul_z_zz"}, + {"xx00"_b, "mul_z_zz"}, + {"xx10"_b, "smulh_z_zz"}, + {"xx11"_b, "umulh_z_zz"}, + }, + }, + + { "Root", + {31, 29, 28, 27, 26, 25, 24, 21, 15, 14}, + { {"00000000xx"_b, "_qzjnpr"}, + {"0000100000"_b, "_rzzxsn"}, + {"0000100001"_b, "_xvppmm"}, + {"0000100010"_b, "_ptsjnr"}, + {"0000100011"_b, "_nlpmvl"}, + {"0000100100"_b, "_ljljkv"}, + {"0000100101"_b, "_kktglv"}, + {"0000100110"_b, "_ppnssm"}, + {"0000100111"_b, "_ztyqrj"}, + {"0000101000"_b, "_rnqtmt"}, + {"0000101001"_b, "_njgxlz"}, + {"0000101010"_b, "_mpvsng"}, + {"0000101011"_b, "_qlxksl"}, + {"0000101100"_b, "_mhrjvp"}, + {"0000101101"_b, "_pgjjsz"}, + {"0000101110"_b, "_yppyky"}, + {"0000101111"_b, "_yjmngt"}, + {"000100000x"_b, "_vmjgmg"}, + {"000100001x"_b, "_ytvxsl"}, + {"0001000101"_b, "_yvhnlk"}, + {"0001000111"_b, "_xryzqs"}, + {"000101000x"_b, "_vjqsqs"}, + {"000101010x"_b, "_phvnqh"}, + {"000101100x"_b, "_pphhym"}, + {"00010111xx"_b, "_qsygjs"}, + {"0001100000"_b, "_jxrlyh"}, + {"0001100001"_b, "_yqsgrt"}, + {"0001100010"_b, "_kpyqyv"}, + {"0001101000"_b, "_zkttzl"}, + {"0001101001"_b, "_llqjlh"}, + {"0001101010"_b, "_xhvtjg"}, + {"0001101011"_b, "_xylmmp"}, + {"0001101100"_b, "_vzzvlr"}, + {"0001101101"_b, "_sjlrxn"}, + {"0001101110"_b, "_xrhhjz"}, + {"0001101111"_b, "_ygnypk"}, + {"0001110000"_b, "_xjghst"}, + {"0001110001"_b, "_xxyklv"}, + {"0001110010"_b, "_rtgkkg"}, + {"0001110100"_b, "_hqnxvt"}, + {"0001110101"_b, "_hmxlny"}, + {"0001110110"_b, "_txsmts"}, + {"0001110111"_b, "_mtnpmr"}, + {"0001111000"_b, "_ttstyt"}, + {"0001111001"_b, "_krhrrr"}, + {"0001111010"_b, "_xhltxn"}, + {"0001111011"_b, "_ymznlj"}, + {"0001111100"_b, "_kkgzst"}, + {"0001111101"_b, "_gvjgyp"}, + {"0001111110"_b, "_mjqvxq"}, + {"0001111111"_b, "_spjjkg"}, + {"0010001xxx"_b, "_vppthj"}, + {"0010010xxx"_b, "_qzzlhq"}, + {"001001100x"_b, "_zjslnr"}, + {"001001110x"_b, "_jpxgqh"}, + {"0010011x1x"_b, "_gkhhjm"}, + {"0010100xxx"_b, "_jyxszq"}, + {"0010110xxx"_b, "_xqhgkk"}, + {"00101x1xxx"_b, "_zkqtrj"}, + {"0011000xxx"_b, "_qkyjhg"}, + {"00110010xx"_b, "_yjxshz"}, + {"0011010000"_b, "_zzrqlh"}, + {"0011010001"_b, "_qsrlql"}, + {"001101001x"_b, "_tnrrjk"}, + {"001101100x"_b, "_pnxgrg"}, + {"001101101x"_b, "_ytsghm"}, + {"0011100xxx"_b, "_srmhjk"}, + {"0011110000"_b, "_zzhgng"}, + {"0011110001"_b, "_zvqghy"}, + {"001111001x"_b, "_hnzzkj"}, + {"0011110100"_b, "_qntssm"}, + {"0011110101"_b, "_mrqqlp"}, + {"0011110110"_b, "_nxyhyv"}, + {"0011110111"_b, "_qtknlp"}, + {"0011111000"_b, "_gszlvl"}, + {"0011111001"_b, "_mlnqrm"}, + {"0011111010"_b, "_yvygml"}, + {"0011111011"_b, "_xhxrnt"}, + {"0011111100"_b, "_grqnlm"}, + {"0011111101"_b, "_ktnjrx"}, + {"0011111110"_b, "_gkpzhr"}, + {"0011111111"_b, "_mpyhkm"}, + {"0100100000"_b, "_yyyshx"}, + {"0100100001"_b, "_mylphg"}, + {"0100100010"_b, "_nsjhhg"}, + {"0100100011"_b, "_rhhrhg"}, + {"0100100100"_b, "_ymhgxg"}, + {"0100100101"_b, "_nvkthr"}, + {"0100100110"_b, "_phthqj"}, + {"0100100111"_b, "_kyjxrr"}, + {"0100101000"_b, "_gtvhmp"}, + {"0100101001"_b, "_pppsmg"}, + {"0100101010"_b, "_zgysvr"}, + {"0100101011"_b, "_shqygv"}, + {"0100101100"_b, "_lpsvyy"}, + {"0100101101"_b, "_nqkhrv"}, + {"0100101110"_b, "_tkjtgp"}, + {"0100101111"_b, "_htqpks"}, + {"0101000xxx"_b, "_vpkptr"}, + {"0101001xxx"_b, "_vmjzyk"}, + {"010101000x"_b, "_gmrxlp"}, + {"010101010x"_b, "_jmgkrl"}, + {"010101100x"_b, "_qhgtvk"}, + {"01010111xx"_b, "_rxpspy"}, + {"0101100xxx"_b, "_qhtqrj"}, + {"0101101xxx"_b, "_vnpqrh"}, + {"0101110000"_b, "_vpykkg"}, + {"0101110001"_b, "_xrxvpr"}, + {"0101110010"_b, "_zglksl"}, + {"0101110011"_b, "_gtjskz"}, + {"0101110100"_b, "_qntygx"}, + {"0101110101"_b, "_kxprqm"}, + {"0101110110"_b, "_qxtvzy"}, + {"0101110111"_b, "_mstthg"}, + {"0101111000"_b, "_qmqmpj"}, + {"0101111001"_b, "_rhttgj"}, + {"0101111010"_b, "_jqnhrj"}, + {"0101111011"_b, "_nlqglq"}, + {"0101111100"_b, "_vtxyxz"}, + {"0101111101"_b, "_pqtjgx"}, + {"0101111110"_b, "_snjpvy"}, + {"0101111111"_b, "_spzgkt"}, + {"0110001xxx"_b, "_plktrh"}, + {"0110010xxx"_b, "_xtqmyj"}, + {"0110011xxx"_b, "_lzpykk"}, + {"0110100xxx"_b, "_mtzgpn"}, + {"0110101xxx"_b, "_tvgvvq"}, + {"01110000xx"_b, "_zxspnk"}, + {"0111000100"_b, "_zqmmsk"}, + {"0111000101"_b, "_nmzyvt"}, + {"0111000110"_b, "_vvhzhv"}, + {"0111000111"_b, "_sltqpy"}, + {"0111001xxx"_b, "_qzsthq"}, + {"0111010000"_b, "_zsyggq"}, + {"0111010001"_b, "_hngpgx"}, + {"011101001x"_b, "_njxtpv"}, + {"01111000xx"_b, "_kpmvkn"}, + {"0111100101"_b, "_jhytlg"}, + {"0111100111"_b, "_rksxpn"}, + {"01111001x0"_b, "_trlhgn"}, + {"0111101xxx"_b, "_jxtgtx"}, + {"0111110000"_b, "_tnhmpx"}, + {"0111110010"_b, "_sqjpsl"}, + {"0111110100"_b, "_sjnxky"}, + {"0111110101"_b, "_kykymg"}, + {"0111110110"_b, "_pxzkjy"}, + {"0111110111"_b, "_tjktkm"}, + {"0111111000"_b, "_hhkhkk"}, + {"0111111001"_b, "_nxmjvy"}, + {"0111111010"_b, "_vkvgnm"}, + {"0111111011"_b, "_tssqsr"}, + {"0111111100"_b, "_mthzvm"}, + {"0111111101"_b, "_nlgqsk"}, + {"0111111110"_b, "_gvykrp"}, + {"0111111111"_b, "_sjzsvv"}, + {"0x10000xxx"_b, "adr_only_pcreladdr"}, + {"1000100000"_b, "_lspzrv"}, + {"1000100001"_b, "_kxvvkq"}, + {"1000100010"_b, "_sxpvym"}, + {"1000100011"_b, "_vkrkks"}, + {"1000100100"_b, "_xvnyxq"}, + {"1000100101"_b, "_gtxpgx"}, + {"1000100110"_b, "_vlrhpy"}, + {"1000100111"_b, "_ymhkrx"}, + {"1000101000"_b, "_zrmgjx"}, + {"1000101001"_b, "_qqyryl"}, + {"1000101010"_b, "_hgxtqy"}, + {"1000101011"_b, "_yytvxh"}, + {"1000101100"_b, "_ptslzg"}, + {"1000101101"_b, "_ytkjxx"}, + {"1000101110"_b, "_zxtzmv"}, + {"1000101111"_b, "_kgmqkh"}, + {"100100000x"_b, "_jhqlkv"}, + {"100100001x"_b, "_lxgltj"}, + {"1001000100"_b, "_hxzlmm"}, + {"1001000101"_b, "_vllqmp"}, + {"1001000110"_b, "_tlstgz"}, + {"1001000111"_b, "_mrmpgh"}, + {"10010100xx"_b, "_rzkmny"}, + {"10010101xx"_b, "_jggvph"}, + {"10010110xx"_b, "_nhkstj"}, + {"10010111xx"_b, "_jsygzs"}, + {"100111000x"_b, "_gmsgqz"}, + {"1001110010"_b, "_grrjlh"}, + {"1001110011"_b, "_jhkglp"}, + {"100111010x"_b, "_qytrjj"}, + {"1001110110"_b, "_qsqqxg"}, + {"1001110111"_b, "_kypqpy"}, + {"1010001xxx"_b, "_vsvtqz"}, + {"1010010xxx"_b, "_vqzlzt"}, + {"10100110xx"_b, "_xxpqgg"}, + {"10100111xx"_b, "_rgjqzs"}, + {"10101000xx"_b, "_qmrgkn"}, + {"10101001xx"_b, "_jkxlnq"}, + {"1010101000"_b, "_ggvztl"}, + {"1010101001"_b, "_xlhjhx"}, + {"101010101x"_b, "_nqgqjh"}, + {"1010101100"_b, "_qsrtzz"}, + {"1010101110"_b, "_tzzzxz"}, + {"10101011x1"_b, "_lhmlrj"}, + {"1010110000"_b, "_kkmxxx"}, + {"1010110100"_b, "_ltvrrg"}, + {"1010111000"_b, "_mqkjxj"}, + {"1010111100"_b, "_pmrngh"}, + {"101011xx10"_b, "_hsjynv"}, + {"101011xxx1"_b, "_kmhtqp"}, + {"1011000xxx"_b, "_ylhxlt"}, + {"10110010xx"_b, "_gkxgsn"}, + {"1011001100"_b, "_xzmjxk"}, + {"1011001110"_b, "_ppqkym"}, + {"10110011x1"_b, "_xzyxnr"}, + {"1011010000"_b, "_xyljvp"}, + {"1011010001"_b, "_sxnkrh"}, + {"101101001x"_b, "_klthpn"}, + {"101101100x"_b, "_xnsrny"}, + {"101101101x"_b, "_htppjj"}, + {"101101110x"_b, "_rmmmjj"}, + {"101101111x"_b, "_txnqzy"}, + {"1011100xxx"_b, "_gmvtss"}, + {"10111100xx"_b, "_gnxgxs"}, + {"1011110100"_b, "_zjgvyp"}, + {"1100100000"_b, "_sjtrhm"}, + {"1100100001"_b, "_hzkglv"}, + {"1100100010"_b, "_qrygny"}, + {"1100100011"_b, "_tjzqnp"}, + {"1100100100"_b, "_yqvqtx"}, + {"1100100101"_b, "_ngttyj"}, + {"1100100110"_b, "_kqzmtr"}, + {"1100100111"_b, "_qpvgnh"}, + {"1100101000"_b, "_tpkslq"}, + {"1100101001"_b, "_ympyng"}, + {"1100101010"_b, "_ytvtqn"}, + {"1100101011"_b, "_qvsypn"}, + {"1100101100"_b, "_lqmksm"}, + {"1100101101"_b, "_mkskxj"}, + {"1100101110"_b, "_knkjnz"}, + {"1100101111"_b, "_hxnmsl"}, + {"1101000xxx"_b, "_shrsxr"}, + {"1101001xxx"_b, "_xhkgqh"}, + {"11010100xx"_b, "_rmxjsn"}, + {"11010101xx"_b, "_mvzvpk"}, + {"11010110xx"_b, "_ysjqhn"}, + {"11010111xx"_b, "_lpkqzl"}, + {"1101100xxx"_b, "_zpzghs"}, + {"1101101xxx"_b, "_gmrxqq"}, + {"1110001xxx"_b, "_jlqjzr"}, + {"1110010xxx"_b, "_qgmngg"}, + {"1110011xxx"_b, "_vlrrtz"}, + {"1110100xxx"_b, "_zylnnn"}, + {"1110101xxx"_b, "_yjjrgg"}, + {"11110000xx"_b, "_qhtrnn"}, + {"1111000100"_b, "_lrqkvp"}, + {"1111000101"_b, "_pvkmmv"}, + {"1111000110"_b, "_lxmyjh"}, + {"1111000111"_b, "_vgrhsz"}, + {"1111001xxx"_b, "_vqvqhp"}, + {"1111010000"_b, "_yjsjvt"}, + {"1111010010"_b, "_yzzlxs"}, + {"11110100x1"_b, "_vkhhkk"}, + {"11111000xx"_b, "_xrhmtg"}, + {"11111001xx"_b, "_xprlgy"}, + {"1111101xxx"_b, "_hjgylh"}, + {"1x10000xxx"_b, "adrp_only_pcreladdr"}, + {"x110110xxx"_b, "_zytrsq"}, + {"x110111xxx"_b, "_kxsysq"}, + }, + }, }; +// clang-format on } // namespace aarch64 } // namespace vixl diff --git a/src/aarch64/decoder-visitor-map-aarch64.h b/src/aarch64/decoder-visitor-map-aarch64.h new file mode 100644 index 00000000..49c27b2b --- /dev/null +++ b/src/aarch64/decoder-visitor-map-aarch64.h @@ -0,0 +1,2973 @@ +// Copyright 2020, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Initialisation data for a std::map, from instruction form to the visitor +// function that handles it. This allows reuse of existing visitor functions +// that support groups of instructions, though they may do extra decoding +// no longer needed. +// In the long term, it's expected that each component that uses the decoder +// will want to group instruction handling in the way most appropriate to +// the component's function, so this map initialisation will no longer be +// shared. + +#define DEFAULT_FORM_TO_VISITOR_MAP(VISITORCLASS) \ + {"abs_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"addpl_r_ri"_h, &VISITORCLASS::VisitSVEStackFrameAdjustment}, \ + {"addvl_r_ri"_h, &VISITORCLASS::VisitSVEStackFrameAdjustment}, \ + {"add_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntAddSubtractVectors_Predicated}, \ + {"add_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \ + {"add_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \ + {"adr_z_az_d_s32_scaled"_h, &VISITORCLASS::VisitSVEAddressGeneration}, \ + {"adr_z_az_d_u32_scaled"_h, &VISITORCLASS::VisitSVEAddressGeneration}, \ + {"adr_z_az_sd_same_scaled"_h, &VISITORCLASS::VisitSVEAddressGeneration}, \ + {"ands_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"andv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"and_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"and_z_p_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogical_Predicated}, \ + {"and_z_zi"_h, \ + &VISITORCLASS::VisitSVEBitwiseLogicalWithImm_Unpredicated}, \ + {"and_z_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogicalUnpredicated}, \ + {"asrd_z_p_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftByImm_Predicated}, \ + {"asrr_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \ + {"asr_z_p_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftByImm_Predicated}, \ + {"asr_z_p_zw"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByWideElements_Predicated}, \ + {"asr_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \ + {"asr_z_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \ + {"asr_z_zw"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \ + {"bics_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"bic_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"bic_z_p_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogical_Predicated}, \ + {"bic_z_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogicalUnpredicated}, \ + {"brkas_p_p_p_z"_h, &VISITORCLASS::VisitSVEPartitionBreakCondition}, \ + {"brka_p_p_p"_h, &VISITORCLASS::VisitSVEPartitionBreakCondition}, \ + {"brkbs_p_p_p_z"_h, &VISITORCLASS::VisitSVEPartitionBreakCondition}, \ + {"brkb_p_p_p"_h, &VISITORCLASS::VisitSVEPartitionBreakCondition}, \ + {"brkns_p_p_pp"_h, \ + &VISITORCLASS::VisitSVEPropagateBreakToNextPartition}, \ + {"brkn_p_p_pp"_h, &VISITORCLASS::VisitSVEPropagateBreakToNextPartition}, \ + {"brkpas_p_p_pp"_h, &VISITORCLASS::VisitSVEPropagateBreak}, \ + {"brkpa_p_p_pp"_h, &VISITORCLASS::VisitSVEPropagateBreak}, \ + {"brkpbs_p_p_pp"_h, &VISITORCLASS::VisitSVEPropagateBreak}, \ + {"brkpb_p_p_pp"_h, &VISITORCLASS::VisitSVEPropagateBreak}, \ + {"clasta_r_p_z"_h, \ + &VISITORCLASS::VisitSVEConditionallyExtractElementToGeneralRegister}, \ + {"clasta_v_p_z"_h, \ + &VISITORCLASS::VisitSVEConditionallyExtractElementToSIMDFPScalar}, \ + {"clasta_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEConditionallyBroadcastElementToVector}, \ + {"clastb_r_p_z"_h, \ + &VISITORCLASS::VisitSVEConditionallyExtractElementToGeneralRegister}, \ + {"clastb_v_p_z"_h, \ + &VISITORCLASS::VisitSVEConditionallyExtractElementToSIMDFPScalar}, \ + {"clastb_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEConditionallyBroadcastElementToVector}, \ + {"cls_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"clz_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"cmpeq_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \ + {"cmpeq_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmpeq_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmpge_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \ + {"cmpge_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmpge_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmpgt_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \ + {"cmpgt_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmpgt_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmphi_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareUnsignedImm}, \ + {"cmphi_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmphi_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmphs_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareUnsignedImm}, \ + {"cmphs_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmphs_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmple_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \ + {"cmple_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmplo_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareUnsignedImm}, \ + {"cmplo_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmpls_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareUnsignedImm}, \ + {"cmpls_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmplt_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \ + {"cmplt_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmpne_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \ + {"cmpne_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmpne_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cnot_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"cntb_r_s"_h, &VISITORCLASS::VisitSVEElementCount}, \ + {"cntd_r_s"_h, &VISITORCLASS::VisitSVEElementCount}, \ + {"cnth_r_s"_h, &VISITORCLASS::VisitSVEElementCount}, \ + {"cntp_r_p_p"_h, &VISITORCLASS::VisitSVEPredicateCount}, \ + {"cntw_r_s"_h, &VISITORCLASS::VisitSVEElementCount}, \ + {"cnt_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"compact_z_p_z"_h, &VISITORCLASS::VisitSVECompressActiveElements}, \ + {"cpy_z_o_i"_h, &VISITORCLASS::VisitSVECopyIntImm_Predicated}, \ + {"cpy_z_p_i"_h, &VISITORCLASS::VisitSVECopyIntImm_Predicated}, \ + {"cpy_z_p_r"_h, \ + &VISITORCLASS::VisitSVECopyGeneralRegisterToVector_Predicated}, \ + {"cpy_z_p_v"_h, \ + &VISITORCLASS::VisitSVECopySIMDFPScalarRegisterToVector_Predicated}, \ + {"ctermeq_rr"_h, &VISITORCLASS::VisitSVEConditionallyTerminateScalars}, \ + {"ctermne_rr"_h, &VISITORCLASS::VisitSVEConditionallyTerminateScalars}, \ + {"decb_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \ + {"decd_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \ + {"decd_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \ + {"dech_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \ + {"dech_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \ + {"decp_r_p_r"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"decp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"decw_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \ + {"decw_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \ + {"dupm_z_i"_h, &VISITORCLASS::VisitSVEBroadcastBitmaskImm}, \ + {"dup_z_i"_h, &VISITORCLASS::VisitSVEBroadcastIntImm_Unpredicated}, \ + {"dup_z_r"_h, &VISITORCLASS::VisitSVEBroadcastGeneralRegister}, \ + {"dup_z_zi"_h, &VISITORCLASS::VisitSVEBroadcastIndexElement}, \ + {"eors_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"eorv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"eor_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"eor_z_p_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogical_Predicated}, \ + {"eor_z_zi"_h, \ + &VISITORCLASS::VisitSVEBitwiseLogicalWithImm_Unpredicated}, \ + {"eor_z_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogicalUnpredicated}, \ + {"ext_z_zi_des"_h, &VISITORCLASS::VisitSVEPermuteVectorExtract}, \ + {"fabd_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fabs_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"facge_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \ + {"facgt_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \ + {"fadda_v_p_z"_h, &VISITORCLASS::VisitSVEFPAccumulatingReduction}, \ + {"faddv_v_p_z"_h, &VISITORCLASS::VisitSVEFPFastReduction}, \ + {"fadd_z_p_zs"_h, \ + &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \ + {"fadd_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fadd_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \ + {"fcadd_z_p_zz"_h, &VISITORCLASS::VisitSVEFPComplexAddition}, \ + {"fcmeq_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \ + {"fcmeq_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \ + {"fcmge_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \ + {"fcmge_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \ + {"fcmgt_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \ + {"fcmgt_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \ + {"fcmla_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPComplexMulAdd}, \ + {"fcmla_z_zzzi_h"_h, &VISITORCLASS::VisitSVEFPComplexMulAddIndex}, \ + {"fcmla_z_zzzi_s"_h, &VISITORCLASS::VisitSVEFPComplexMulAddIndex}, \ + {"fcmle_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \ + {"fcmlt_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \ + {"fcmne_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \ + {"fcmne_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \ + {"fcmuo_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \ + {"fcpy_z_p_i"_h, &VISITORCLASS::VisitSVECopyFPImm_Predicated}, \ + {"fcvtzs_z_p_z_d2w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzs_z_p_z_d2x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzs_z_p_z_fp162h"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzs_z_p_z_fp162w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzs_z_p_z_fp162x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzs_z_p_z_s2w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzs_z_p_z_s2x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzu_z_p_z_d2w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzu_z_p_z_d2x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzu_z_p_z_fp162h"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzu_z_p_z_fp162w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzu_z_p_z_fp162x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzu_z_p_z_s2w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzu_z_p_z_s2x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvt_z_p_z_d2h"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \ + {"fcvt_z_p_z_d2s"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \ + {"fcvt_z_p_z_h2d"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \ + {"fcvt_z_p_z_h2s"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \ + {"fcvt_z_p_z_s2d"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \ + {"fcvt_z_p_z_s2h"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \ + {"fdivr_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fdiv_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fdup_z_i"_h, &VISITORCLASS::VisitSVEBroadcastFPImm_Unpredicated}, \ + {"fexpa_z_z"_h, &VISITORCLASS::VisitSVEFPExponentialAccelerator}, \ + {"fmad_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \ + {"fmaxnmv_v_p_z"_h, &VISITORCLASS::VisitSVEFPFastReduction}, \ + {"fmaxnm_z_p_zs"_h, \ + &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \ + {"fmaxnm_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fmaxv_v_p_z"_h, &VISITORCLASS::VisitSVEFPFastReduction}, \ + {"fmax_z_p_zs"_h, \ + &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \ + {"fmax_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fminnmv_v_p_z"_h, &VISITORCLASS::VisitSVEFPFastReduction}, \ + {"fminnm_z_p_zs"_h, \ + &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \ + {"fminnm_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fminv_v_p_z"_h, &VISITORCLASS::VisitSVEFPFastReduction}, \ + {"fmin_z_p_zs"_h, \ + &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \ + {"fmin_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fmla_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \ + {"fmla_z_zzzi_d"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \ + {"fmla_z_zzzi_h"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \ + {"fmla_z_zzzi_s"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \ + {"fmls_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \ + {"fmls_z_zzzi_d"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \ + {"fmls_z_zzzi_h"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \ + {"fmls_z_zzzi_s"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \ + {"fmsb_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \ + {"fmulx_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fmul_z_p_zs"_h, \ + &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \ + {"fmul_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fmul_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \ + {"fmul_z_zzi_d"_h, &VISITORCLASS::VisitSVEFPMulIndex}, \ + {"fmul_z_zzi_h"_h, &VISITORCLASS::VisitSVEFPMulIndex}, \ + {"fmul_z_zzi_s"_h, &VISITORCLASS::VisitSVEFPMulIndex}, \ + {"fneg_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"fnmad_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \ + {"fnmla_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \ + {"fnmls_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \ + {"fnmsb_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \ + {"frecpe_z_z"_h, &VISITORCLASS::VisitSVEFPUnaryOpUnpredicated}, \ + {"frecps_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \ + {"frecpx_z_p_z"_h, &VISITORCLASS::VisitSVEFPUnaryOp}, \ + {"frinta_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \ + {"frinti_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \ + {"frintm_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \ + {"frintn_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \ + {"frintp_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \ + {"frintx_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \ + {"frintz_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \ + {"frsqrte_z_z"_h, &VISITORCLASS::VisitSVEFPUnaryOpUnpredicated}, \ + {"frsqrts_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \ + {"fscale_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fsqrt_z_p_z"_h, &VISITORCLASS::VisitSVEFPUnaryOp}, \ + {"fsubr_z_p_zs"_h, \ + &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \ + {"fsubr_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fsub_z_p_zs"_h, \ + &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \ + {"fsub_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fsub_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \ + {"ftmad_z_zzi"_h, &VISITORCLASS::VisitSVEFPTrigMulAddCoefficient}, \ + {"ftsmul_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \ + {"ftssel_z_zz"_h, &VISITORCLASS::VisitSVEFPTrigSelectCoefficient}, \ + {"incb_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \ + {"incd_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \ + {"incd_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \ + {"inch_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \ + {"inch_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \ + {"incp_r_p_r"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"incp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"incw_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \ + {"incw_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \ + {"index_z_ii"_h, &VISITORCLASS::VisitSVEIndexGeneration}, \ + {"index_z_ir"_h, &VISITORCLASS::VisitSVEIndexGeneration}, \ + {"index_z_ri"_h, &VISITORCLASS::VisitSVEIndexGeneration}, \ + {"index_z_rr"_h, &VISITORCLASS::VisitSVEIndexGeneration}, \ + {"insr_z_r"_h, &VISITORCLASS::VisitSVEInsertGeneralRegister}, \ + {"insr_z_v"_h, &VISITORCLASS::VisitSVEInsertSIMDFPScalarRegister}, \ + {"lasta_r_p_z"_h, \ + &VISITORCLASS::VisitSVEExtractElementToGeneralRegister}, \ + {"lasta_v_p_z"_h, \ + &VISITORCLASS::VisitSVEExtractElementToSIMDFPScalarRegister}, \ + {"lastb_r_p_z"_h, \ + &VISITORCLASS::VisitSVEExtractElementToGeneralRegister}, \ + {"lastb_v_p_z"_h, \ + &VISITORCLASS::VisitSVEExtractElementToSIMDFPScalarRegister}, \ + {"ld1b_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ld1b_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ld1b_z_p_bi_u16"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1b_z_p_bi_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1b_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1b_z_p_bi_u8"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1b_z_p_br_u16"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1b_z_p_br_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1b_z_p_br_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1b_z_p_br_u8"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1b_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ld1b_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ld1b_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ld1d_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ld1d_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1d_z_p_br_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1d_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ld1d_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ld1d_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ld1d_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ld1h_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ld1h_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ld1h_z_p_bi_u16"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1h_z_p_bi_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1h_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1h_z_p_br_u16"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1h_z_p_br_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1h_z_p_br_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1h_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ld1h_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ld1h_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ld1h_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ld1h_z_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets}, \ + {"ld1h_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ld1rb_z_p_bi_u16"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rb_z_p_bi_u32"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rb_z_p_bi_u64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rb_z_p_bi_u8"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rd_z_p_bi_u64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rh_z_p_bi_u16"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rh_z_p_bi_u32"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rh_z_p_bi_u64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rqb_z_p_bi_u8"_h, \ + &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, \ + {"ld1rqb_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, \ + {"ld1rqd_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, \ + {"ld1rqd_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, \ + {"ld1rqh_z_p_bi_u16"_h, \ + &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, \ + {"ld1rqh_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, \ + {"ld1rqw_z_p_bi_u32"_h, \ + &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, \ + {"ld1rqw_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, \ + {"ld1rsb_z_p_bi_s16"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rsb_z_p_bi_s32"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rsb_z_p_bi_s64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rsh_z_p_bi_s32"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rsh_z_p_bi_s64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rsw_z_p_bi_s64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rw_z_p_bi_u32"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rw_z_p_bi_u64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1sb_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ld1sb_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ld1sb_z_p_bi_s16"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1sb_z_p_bi_s32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1sb_z_p_bi_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1sb_z_p_br_s16"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1sb_z_p_br_s32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1sb_z_p_br_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1sb_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ld1sb_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ld1sb_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ld1sh_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ld1sh_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ld1sh_z_p_bi_s32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1sh_z_p_bi_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1sh_z_p_br_s32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1sh_z_p_br_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1sh_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ld1sh_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ld1sh_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ld1sh_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ld1sh_z_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets}, \ + {"ld1sh_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ld1sw_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ld1sw_z_p_bi_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1sw_z_p_br_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1sw_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ld1sw_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ld1sw_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ld1sw_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ld1w_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ld1w_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ld1w_z_p_bi_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1w_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1w_z_p_br_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1w_z_p_br_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1w_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ld1w_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ld1w_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ld1w_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ld1w_z_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets}, \ + {"ld1w_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ld2b_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld2b_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld2d_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld2d_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld2h_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld2h_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld2w_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld2w_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld3b_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld3b_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld3d_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld3d_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld3h_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld3h_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld3w_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld3w_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld4b_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld4b_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld4d_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld4d_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld4h_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld4h_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld4w_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld4w_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ldff1b_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ldff1b_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ldff1b_z_p_br_u16"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1b_z_p_br_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1b_z_p_br_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1b_z_p_br_u8"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1b_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ldff1b_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ldff1b_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ldff1d_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ldff1d_z_p_br_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1d_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ldff1d_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ldff1d_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ldff1d_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ldff1h_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ldff1h_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ldff1h_z_p_br_u16"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1h_z_p_br_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1h_z_p_br_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1h_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ldff1h_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ldff1h_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ldff1h_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ldff1h_z_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets}, \ + {"ldff1h_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ldff1sb_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ldff1sb_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ldff1sb_z_p_br_s16"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1sb_z_p_br_s32"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1sb_z_p_br_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1sb_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ldff1sb_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ldff1sb_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ldff1sh_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ldff1sh_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ldff1sh_z_p_br_s32"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1sh_z_p_br_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1sh_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ldff1sh_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ldff1sh_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ldff1sh_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ldff1sh_z_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets}, \ + {"ldff1sh_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ldff1sw_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ldff1sw_z_p_br_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1sw_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ldff1sw_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ldff1sw_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ldff1sw_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ldff1w_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ldff1w_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ldff1w_z_p_br_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1w_z_p_br_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1w_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ldff1w_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ldff1w_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ldff1w_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ldff1w_z_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets}, \ + {"ldff1w_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ldnf1b_z_p_bi_u16"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1b_z_p_bi_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1b_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1b_z_p_bi_u8"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1d_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1h_z_p_bi_u16"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1h_z_p_bi_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1h_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1sb_z_p_bi_s16"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1sb_z_p_bi_s32"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1sb_z_p_bi_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1sh_z_p_bi_s32"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1sh_z_p_bi_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1sw_z_p_bi_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1w_z_p_bi_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1w_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnt1b_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm}, \ + {"ldnt1b_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar}, \ + {"ldnt1d_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm}, \ + {"ldnt1d_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar}, \ + {"ldnt1h_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm}, \ + {"ldnt1h_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar}, \ + {"ldnt1w_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm}, \ + {"ldnt1w_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar}, \ + {"ldr_p_bi"_h, &VISITORCLASS::VisitSVELoadPredicateRegister}, \ + {"ldr_z_bi"_h, &VISITORCLASS::VisitSVELoadVectorRegister}, \ + {"lslr_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \ + {"lsl_z_p_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftByImm_Predicated}, \ + {"lsl_z_p_zw"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByWideElements_Predicated}, \ + {"lsl_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \ + {"lsl_z_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \ + {"lsl_z_zw"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \ + {"lsrr_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \ + {"lsr_z_p_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftByImm_Predicated}, \ + {"lsr_z_p_zw"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByWideElements_Predicated}, \ + {"lsr_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \ + {"lsr_z_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \ + {"lsr_z_zw"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \ + {"mad_z_p_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddPredicated}, \ + {"mla_z_p_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddPredicated}, \ + {"mls_z_p_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddPredicated}, \ + {"movprfx_z_p_z"_h, &VISITORCLASS::VisitSVEMovprfx}, \ + {"movprfx_z_z"_h, \ + &VISITORCLASS::VisitSVEConstructivePrefix_Unpredicated}, \ + {"msb_z_p_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddPredicated}, \ + {"mul_z_p_zz"_h, &VISITORCLASS::VisitSVEIntMulVectors_Predicated}, \ + {"mul_z_zi"_h, &VISITORCLASS::VisitSVEIntMulImm_Unpredicated}, \ + {"nands_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"nand_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"neg_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"nors_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"nor_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"not_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"orns_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"orn_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"orrs_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"orr_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"orr_z_p_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogical_Predicated}, \ + {"orr_z_zi"_h, \ + &VISITORCLASS::VisitSVEBitwiseLogicalWithImm_Unpredicated}, \ + {"orr_z_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogicalUnpredicated}, \ + {"orv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"pfalse_p"_h, &VISITORCLASS::VisitSVEPredicateZero}, \ + {"pfirst_p_p_p"_h, &VISITORCLASS::VisitSVEPredicateFirstActive}, \ + {"pnext_p_p_p"_h, &VISITORCLASS::VisitSVEPredicateNextActive}, \ + {"prfb_i_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherPrefetch_VectorPlusImm}, \ + {"prfb_i_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherPrefetch_VectorPlusImm}, \ + {"prfb_i_p_bi_s"_h, \ + &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusImm}, \ + {"prfb_i_p_br_s"_h, \ + &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusScalar}, \ + {"prfb_i_p_bz_d_64_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets}, \ + {"prfb_i_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets}, \ + {"prfb_i_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets}, \ + {"prfd_i_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherPrefetch_VectorPlusImm}, \ + {"prfd_i_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherPrefetch_VectorPlusImm}, \ + {"prfd_i_p_bi_s"_h, \ + &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusImm}, \ + {"prfd_i_p_br_s"_h, \ + &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusScalar}, \ + {"prfd_i_p_bz_d_64_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets}, \ + {"prfd_i_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets}, \ + {"prfd_i_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets}, \ + {"prfh_i_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherPrefetch_VectorPlusImm}, \ + {"prfh_i_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherPrefetch_VectorPlusImm}, \ + {"prfh_i_p_bi_s"_h, \ + &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusImm}, \ + {"prfh_i_p_br_s"_h, \ + &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusScalar}, \ + {"prfh_i_p_bz_d_64_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets}, \ + {"prfh_i_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets}, \ + {"prfh_i_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets}, \ + {"prfw_i_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherPrefetch_VectorPlusImm}, \ + {"prfw_i_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherPrefetch_VectorPlusImm}, \ + {"prfw_i_p_bi_s"_h, \ + &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusImm}, \ + {"prfw_i_p_br_s"_h, \ + &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusScalar}, \ + {"prfw_i_p_bz_d_64_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets}, \ + {"prfw_i_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets}, \ + {"prfw_i_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets}, \ + {"ptest_p_p"_h, &VISITORCLASS::VisitSVEPredicateTest}, \ + {"ptrues_p_s"_h, &VISITORCLASS::VisitSVEPredicateInitialize}, \ + {"ptrue_p_s"_h, &VISITORCLASS::VisitSVEPredicateInitialize}, \ + {"punpkhi_p_p"_h, &VISITORCLASS::VisitSVEUnpackPredicateElements}, \ + {"punpklo_p_p"_h, &VISITORCLASS::VisitSVEUnpackPredicateElements}, \ + {"rbit_z_p_z"_h, &VISITORCLASS::VisitSVEReverseWithinElements}, \ + {"rdffrs_p_p_f"_h, \ + &VISITORCLASS::VisitSVEPredicateReadFromFFR_Predicated}, \ + {"rdffr_p_f"_h, \ + &VISITORCLASS::VisitSVEPredicateReadFromFFR_Unpredicated}, \ + {"rdffr_p_p_f"_h, \ + &VISITORCLASS::VisitSVEPredicateReadFromFFR_Predicated}, \ + {"rdvl_r_i"_h, &VISITORCLASS::VisitSVEStackFrameSize}, \ + {"revb_z_z"_h, &VISITORCLASS::VisitSVEReverseWithinElements}, \ + {"revh_z_z"_h, &VISITORCLASS::VisitSVEReverseWithinElements}, \ + {"revw_z_z"_h, &VISITORCLASS::VisitSVEReverseWithinElements}, \ + {"rev_p_p"_h, &VISITORCLASS::VisitSVEReversePredicateElements}, \ + {"rev_z_z"_h, &VISITORCLASS::VisitSVEReverseVectorElements}, \ + {"sabd_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \ + {"saddv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"scvtf_z_p_z_h2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"scvtf_z_p_z_w2d"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"scvtf_z_p_z_w2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"scvtf_z_p_z_w2s"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"scvtf_z_p_z_x2d"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"scvtf_z_p_z_x2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"scvtf_z_p_z_x2s"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"sdivr_z_p_zz"_h, &VISITORCLASS::VisitSVEIntDivideVectors_Predicated}, \ + {"sdiv_z_p_zz"_h, &VISITORCLASS::VisitSVEIntDivideVectors_Predicated}, \ + {"sdot_z_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddUnpredicated}, \ + {"sdot_z_zzzi_d"_h, &VISITORCLASS::VisitSVEMulIndex}, \ + {"sdot_z_zzzi_s"_h, &VISITORCLASS::VisitSVEMulIndex}, \ + {"sel_p_p_pp"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"sel_z_p_zz"_h, &VISITORCLASS::VisitSVEVectorSelect}, \ + {"setffr_f"_h, &VISITORCLASS::VisitSVEFFRInitialise}, \ + {"smaxv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"smax_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \ + {"smax_z_zi"_h, &VISITORCLASS::VisitSVEIntMinMaxImm_Unpredicated}, \ + {"sminv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"smin_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \ + {"smin_z_zi"_h, &VISITORCLASS::VisitSVEIntMinMaxImm_Unpredicated}, \ + {"smulh_z_p_zz"_h, &VISITORCLASS::VisitSVEIntMulVectors_Predicated}, \ + {"splice_z_p_zz_des"_h, &VISITORCLASS::VisitSVEVectorSplice}, \ + {"sqadd_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \ + {"sqadd_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \ + {"sqdecb_r_rs_sx"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqdecb_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqdecd_r_rs_sx"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqdecd_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqdecd_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"sqdech_r_rs_sx"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqdech_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqdech_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"sqdecp_r_p_r_sx"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"sqdecp_r_p_r_x"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"sqdecp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"sqdecw_r_rs_sx"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqdecw_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqdecw_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"sqincb_r_rs_sx"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqincb_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqincd_r_rs_sx"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqincd_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqincd_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"sqinch_r_rs_sx"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqinch_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqinch_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"sqincp_r_p_r_sx"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"sqincp_r_p_r_x"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"sqincp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"sqincw_r_rs_sx"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqincw_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqincw_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"sqsub_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \ + {"sqsub_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \ + {"st1b_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitScatterStore_VectorPlusImm}, \ + {"st1b_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitScatterStore_VectorPlusImm}, \ + {"st1b_z_p_bi"_h, &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusImm}, \ + {"st1b_z_p_br"_h, \ + &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusScalar}, \ + {"st1b_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets}, \ + {"st1b_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"st1b_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets}, \ + {"st1d_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitScatterStore_VectorPlusImm}, \ + {"st1d_z_p_bi"_h, &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusImm}, \ + {"st1d_z_p_br"_h, \ + &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusScalar}, \ + {"st1d_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets}, \ + {"st1d_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets}, \ + {"st1d_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets}, \ + {"st1d_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"st1h_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitScatterStore_VectorPlusImm}, \ + {"st1h_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitScatterStore_VectorPlusImm}, \ + {"st1h_z_p_bi"_h, &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusImm}, \ + {"st1h_z_p_br"_h, \ + &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusScalar}, \ + {"st1h_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets}, \ + {"st1h_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets}, \ + {"st1h_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets}, \ + {"st1h_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"st1h_z_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets}, \ + {"st1h_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets}, \ + {"st1w_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitScatterStore_VectorPlusImm}, \ + {"st1w_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitScatterStore_VectorPlusImm}, \ + {"st1w_z_p_bi"_h, &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusImm}, \ + {"st1w_z_p_br"_h, \ + &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusScalar}, \ + {"st1w_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets}, \ + {"st1w_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets}, \ + {"st1w_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets}, \ + {"st1w_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"st1w_z_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets}, \ + {"st1w_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets}, \ + {"st2b_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st2b_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st2d_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st2d_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st2h_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st2h_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st2w_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st2w_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st3b_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st3b_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st3d_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st3d_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st3h_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st3h_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st3w_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st3w_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st4b_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st4b_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st4d_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st4d_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st4h_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st4h_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st4w_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st4w_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"stnt1b_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusImm}, \ + {"stnt1b_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar}, \ + {"stnt1d_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusImm}, \ + {"stnt1d_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar}, \ + {"stnt1h_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusImm}, \ + {"stnt1h_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar}, \ + {"stnt1w_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusImm}, \ + {"stnt1w_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar}, \ + {"str_p_bi"_h, &VISITORCLASS::VisitSVEStorePredicateRegister}, \ + {"str_z_bi"_h, &VISITORCLASS::VisitSVEStoreVectorRegister}, \ + {"subr_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntAddSubtractVectors_Predicated}, \ + {"subr_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \ + {"sub_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntAddSubtractVectors_Predicated}, \ + {"sub_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \ + {"sub_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \ + {"sunpkhi_z_z"_h, &VISITORCLASS::VisitSVEUnpackVectorElements}, \ + {"sunpklo_z_z"_h, &VISITORCLASS::VisitSVEUnpackVectorElements}, \ + {"sxtb_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"sxth_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"sxtw_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"tbl_z_zz_1"_h, &VISITORCLASS::VisitSVETableLookup}, \ + {"trn1_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \ + {"trn1_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \ + {"trn2_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \ + {"trn2_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \ + {"uabd_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \ + {"uaddv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"ucvtf_z_p_z_h2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"ucvtf_z_p_z_w2d"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"ucvtf_z_p_z_w2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"ucvtf_z_p_z_w2s"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"ucvtf_z_p_z_x2d"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"ucvtf_z_p_z_x2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"ucvtf_z_p_z_x2s"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"udf_only_perm_undef"_h, &VISITORCLASS::VisitReserved}, \ + {"udivr_z_p_zz"_h, &VISITORCLASS::VisitSVEIntDivideVectors_Predicated}, \ + {"udiv_z_p_zz"_h, &VISITORCLASS::VisitSVEIntDivideVectors_Predicated}, \ + {"udot_z_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddUnpredicated}, \ + {"udot_z_zzzi_d"_h, &VISITORCLASS::VisitSVEMulIndex}, \ + {"udot_z_zzzi_s"_h, &VISITORCLASS::VisitSVEMulIndex}, \ + {"umaxv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"umax_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \ + {"umax_z_zi"_h, &VISITORCLASS::VisitSVEIntMinMaxImm_Unpredicated}, \ + {"uminv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"umin_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \ + {"umin_z_zi"_h, &VISITORCLASS::VisitSVEIntMinMaxImm_Unpredicated}, \ + {"umulh_z_p_zz"_h, &VISITORCLASS::VisitSVEIntMulVectors_Predicated}, \ + {"uqadd_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \ + {"uqadd_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \ + {"uqdecb_r_rs_uw"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqdecb_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqdecd_r_rs_uw"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqdecd_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqdecd_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"uqdech_r_rs_uw"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqdech_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqdech_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"uqdecp_r_p_r_uw"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"uqdecp_r_p_r_x"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"uqdecp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"uqdecw_r_rs_uw"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqdecw_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqdecw_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"uqincb_r_rs_uw"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqincb_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqincd_r_rs_uw"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqincd_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqincd_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"uqinch_r_rs_uw"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqinch_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqinch_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"uqincp_r_p_r_uw"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"uqincp_r_p_r_x"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"uqincp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"uqincw_r_rs_uw"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqincw_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqincw_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"uqsub_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \ + {"uqsub_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \ + {"uunpkhi_z_z"_h, &VISITORCLASS::VisitSVEUnpackVectorElements}, \ + {"uunpklo_z_z"_h, &VISITORCLASS::VisitSVEUnpackVectorElements}, \ + {"uxtb_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"uxth_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"uxtw_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"uzp1_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \ + {"uzp1_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \ + {"uzp2_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \ + {"uzp2_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \ + {"whilele_p_p_rr"_h, \ + &VISITORCLASS::VisitSVEIntCompareScalarCountAndLimit}, \ + {"whilelo_p_p_rr"_h, \ + &VISITORCLASS::VisitSVEIntCompareScalarCountAndLimit}, \ + {"whilels_p_p_rr"_h, \ + &VISITORCLASS::VisitSVEIntCompareScalarCountAndLimit}, \ + {"whilelt_p_p_rr"_h, \ + &VISITORCLASS::VisitSVEIntCompareScalarCountAndLimit}, \ + {"wrffr_f_p"_h, &VISITORCLASS::VisitSVEFFRWriteFromPredicate}, \ + {"zip1_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \ + {"zip1_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \ + {"zip2_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \ + {"zip2_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \ + {"adds_32s_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \ + {"adds_64s_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \ + {"add_32_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \ + {"add_64_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \ + {"subs_32s_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \ + {"subs_64s_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \ + {"sub_32_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \ + {"sub_64_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \ + {"adds_32s_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \ + {"adds_64s_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \ + {"add_32_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \ + {"add_64_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \ + {"subs_32s_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \ + {"subs_64s_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \ + {"sub_32_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \ + {"sub_64_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \ + {"adds_32_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \ + {"adds_64_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \ + {"add_32_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \ + {"add_64_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \ + {"subs_32_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \ + {"subs_64_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \ + {"sub_32_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \ + {"sub_64_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \ + {"adcs_32_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \ + {"adcs_64_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \ + {"adc_32_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \ + {"adc_64_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \ + {"sbcs_32_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \ + {"sbcs_64_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \ + {"sbc_32_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \ + {"sbc_64_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \ + {"ldaddab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldadda_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldadda_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldadd_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldadd_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaprb_32l_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaprh_32l_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldapr_32l_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldapr_64l_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclrab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclrah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclralb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclralh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclral_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclral_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclra_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclra_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclrb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclrh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclrlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclrlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclrl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclrl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclr_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclr_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeorab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeorah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeoralb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeoralh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeoral_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeoral_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeora_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeora_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeorb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeorh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeorlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeorlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeorl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeorl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeor_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeor_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldseta_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldseta_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldseth_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldset_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldset_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxa_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxa_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmax_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmax_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmina_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmina_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmin_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmin_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxa_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxa_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumax_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumax_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumina_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumina_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumin_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumin_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpa_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpa_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swph_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swplb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swplh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swp_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swp_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"bfm_32m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \ + {"bfm_64m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \ + {"sbfm_32m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \ + {"sbfm_64m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \ + {"ubfm_32m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \ + {"ubfm_64m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \ + {"cbnz_32_compbranch"_h, &VISITORCLASS::VisitCompareBranch}, \ + {"cbnz_64_compbranch"_h, &VISITORCLASS::VisitCompareBranch}, \ + {"cbz_32_compbranch"_h, &VISITORCLASS::VisitCompareBranch}, \ + {"cbz_64_compbranch"_h, &VISITORCLASS::VisitCompareBranch}, \ + {"b_only_condbranch"_h, &VISITORCLASS::VisitConditionalBranch}, \ + {"ccmn_32_condcmp_imm"_h, \ + &VISITORCLASS::VisitConditionalCompareImmediate}, \ + {"ccmn_64_condcmp_imm"_h, \ + &VISITORCLASS::VisitConditionalCompareImmediate}, \ + {"ccmp_32_condcmp_imm"_h, \ + &VISITORCLASS::VisitConditionalCompareImmediate}, \ + {"ccmp_64_condcmp_imm"_h, \ + &VISITORCLASS::VisitConditionalCompareImmediate}, \ + {"ccmn_32_condcmp_reg"_h, \ + &VISITORCLASS::VisitConditionalCompareRegister}, \ + {"ccmn_64_condcmp_reg"_h, \ + &VISITORCLASS::VisitConditionalCompareRegister}, \ + {"ccmp_32_condcmp_reg"_h, \ + &VISITORCLASS::VisitConditionalCompareRegister}, \ + {"ccmp_64_condcmp_reg"_h, \ + &VISITORCLASS::VisitConditionalCompareRegister}, \ + {"csel_32_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \ + {"csel_64_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \ + {"csinc_32_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \ + {"csinc_64_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \ + {"csinv_32_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \ + {"csinv_64_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \ + {"csneg_32_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \ + {"csneg_64_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \ + {"sha1h_ss_cryptosha2"_h, &VISITORCLASS::VisitCrypto2RegSHA}, \ + {"sha1su1_vv_cryptosha2"_h, &VISITORCLASS::VisitCrypto2RegSHA}, \ + {"sha256su0_vv_cryptosha2"_h, &VISITORCLASS::VisitCrypto2RegSHA}, \ + {"sha1c_qsv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \ + {"sha1m_qsv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \ + {"sha1p_qsv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \ + {"sha1su0_vvv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \ + {"sha256h2_qqv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \ + {"sha256h_qqv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \ + {"sha256su1_vvv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \ + {"aesd_b_cryptoaes"_h, &VISITORCLASS::VisitCryptoAES}, \ + {"aese_b_cryptoaes"_h, &VISITORCLASS::VisitCryptoAES}, \ + {"aesimc_b_cryptoaes"_h, &VISITORCLASS::VisitCryptoAES}, \ + {"aesmc_b_cryptoaes"_h, &VISITORCLASS::VisitCryptoAES}, \ + {"autda_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"autdb_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"autdza_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"autdzb_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"autia_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"autib_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"autiza_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"autizb_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"cls_32_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"cls_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"clz_32_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"clz_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"pacda_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"pacdb_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"pacdza_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"pacdzb_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"pacia_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"pacib_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"paciza_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"pacizb_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"rbit_32_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"rbit_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"rev16_32_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"rev16_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"rev32_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"rev_32_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"rev_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"xpacd_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"xpaci_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"asrv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"asrv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"crc32b_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"crc32cb_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"crc32ch_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"crc32cw_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"crc32cx_64c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"crc32h_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"crc32w_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"crc32x_64c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"gmi_64g_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"irg_64i_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"lslv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"lslv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"lsrv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"lsrv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"pacga_64p_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"rorv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"rorv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"sdiv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"sdiv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"udiv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"udiv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"madd_32a_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"madd_64a_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"msub_32a_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"msub_64a_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"smaddl_64wa_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"smsubl_64wa_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"smulh_64_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"umaddl_64wa_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"umsubl_64wa_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"umulh_64_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"setf16_only_setf"_h, &VISITORCLASS::VisitEvaluateIntoFlags}, \ + {"setf8_only_setf"_h, &VISITORCLASS::VisitEvaluateIntoFlags}, \ + {"brk_ex_exception"_h, &VISITORCLASS::VisitException}, \ + {"dcps1_dc_exception"_h, &VISITORCLASS::VisitException}, \ + {"dcps2_dc_exception"_h, &VISITORCLASS::VisitException}, \ + {"dcps3_dc_exception"_h, &VISITORCLASS::VisitException}, \ + {"hlt_ex_exception"_h, &VISITORCLASS::VisitException}, \ + {"hvc_ex_exception"_h, &VISITORCLASS::VisitException}, \ + {"smc_ex_exception"_h, &VISITORCLASS::VisitException}, \ + {"svc_ex_exception"_h, &VISITORCLASS::VisitException}, \ + {"extr_32_extract"_h, &VISITORCLASS::VisitExtract}, \ + {"extr_64_extract"_h, &VISITORCLASS::VisitExtract}, \ + {"fcmpe_dz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmpe_d_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmpe_hz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmpe_h_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmpe_sz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmpe_s_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmp_dz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmp_d_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmp_hz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmp_h_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmp_sz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmp_s_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fccmpe_d_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \ + {"fccmpe_h_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \ + {"fccmpe_s_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \ + {"fccmp_d_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \ + {"fccmp_h_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \ + {"fccmp_s_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \ + {"fcsel_d_floatsel"_h, &VISITORCLASS::VisitFPConditionalSelect}, \ + {"fcsel_h_floatsel"_h, &VISITORCLASS::VisitFPConditionalSelect}, \ + {"fcsel_s_floatsel"_h, &VISITORCLASS::VisitFPConditionalSelect}, \ + {"bfcvt_bs_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fabs_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fabs_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fabs_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fcvt_dh_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fcvt_ds_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fcvt_hd_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fcvt_hs_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fcvt_sd_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fcvt_sh_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fmov_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fmov_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fmov_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fneg_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fneg_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fneg_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frint32x_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frint32x_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frint32z_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frint32z_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frint64x_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frint64x_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frint64z_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frint64z_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frinta_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frinta_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frinta_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frinti_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frinti_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frinti_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintm_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintm_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintm_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintn_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintn_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintn_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintp_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintp_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintp_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintx_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintx_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintx_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintz_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintz_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintz_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fsqrt_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fsqrt_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fsqrt_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fadd_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fadd_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fadd_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fdiv_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fdiv_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fdiv_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmaxnm_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmaxnm_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmaxnm_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmax_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmax_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmax_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fminnm_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fminnm_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fminnm_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmin_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmin_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmin_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmul_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmul_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmul_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fnmul_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fnmul_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fnmul_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fsub_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fsub_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fsub_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmadd_d_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fmadd_h_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fmadd_s_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fmsub_d_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fmsub_h_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fmsub_s_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fnmadd_d_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fnmadd_h_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fnmadd_s_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fnmsub_d_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fnmsub_h_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fnmsub_s_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fcvtzs_32d_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzs_32h_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzs_32s_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzs_64d_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzs_64h_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzs_64s_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzu_32d_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzu_32h_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzu_32s_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzu_64d_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzu_64h_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzu_64s_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"scvtf_d32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"scvtf_d64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"scvtf_h32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"scvtf_h64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"scvtf_s32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"scvtf_s64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"ucvtf_d32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"ucvtf_d64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"ucvtf_h32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"ucvtf_h64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"ucvtf_s32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"ucvtf_s64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fmov_d_floatimm"_h, &VISITORCLASS::VisitFPImmediate}, \ + {"fmov_h_floatimm"_h, &VISITORCLASS::VisitFPImmediate}, \ + {"fmov_s_floatimm"_h, &VISITORCLASS::VisitFPImmediate}, \ + {"fcvtas_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtas_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtas_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtas_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtas_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtas_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtau_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtau_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtau_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtau_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtau_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtau_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtms_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtms_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtms_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtms_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtms_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtms_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtmu_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtmu_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtmu_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtmu_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtmu_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtmu_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtns_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtns_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtns_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtns_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtns_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtns_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtnu_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtnu_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtnu_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtnu_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtnu_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtnu_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtps_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtps_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtps_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtps_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtps_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtps_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtpu_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtpu_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtpu_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtpu_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtpu_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtpu_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzs_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzs_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzs_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzs_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzs_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzs_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzu_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzu_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzu_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzu_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzu_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzu_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fjcvtzs_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_64vx_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_d64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_h32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_h64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_s32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_v64i_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"scvtf_d32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"scvtf_d64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"scvtf_h32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"scvtf_h64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"scvtf_s32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"scvtf_s64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"ucvtf_d32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"ucvtf_d64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"ucvtf_h32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"ucvtf_h64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"ucvtf_s32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"ucvtf_s64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"ldrsw_64_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \ + {"ldr_32_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \ + {"ldr_64_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \ + {"ldr_d_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \ + {"ldr_q_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \ + {"ldr_s_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \ + {"prfm_p_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \ + {"casab_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casah_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casalb_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casalh_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casal_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casal_c64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casa_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casa_c64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casb_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"cash_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"caslb_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"caslh_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casl_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casl_c64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"caspal_cp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"caspal_cp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"caspa_cp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"caspa_cp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"caspl_cp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"caspl_cp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casp_cp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casp_cp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"cas_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"cas_c64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldarb_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldarh_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldar_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldar_lr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldaxp_lp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldaxp_lp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldaxrb_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldaxrh_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldaxr_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldaxr_lr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldlarb_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldlarh_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldlar_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldlar_lr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldxp_lp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldxp_lp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldxrb_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldxrh_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldxr_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldxr_lr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stllrb_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stllrh_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stllr_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stllr_sl64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlrb_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlrh_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlr_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlr_sl64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlxp_sp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlxp_sp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlxrb_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlxrh_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlxr_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlxr_sr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stxp_sp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stxp_sp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stxrb_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stxrh_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stxr_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stxr_sr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldraa_64w_ldst_pac"_h, &VISITORCLASS::VisitLoadStorePAC}, \ + {"ldraa_64_ldst_pac"_h, &VISITORCLASS::VisitLoadStorePAC}, \ + {"ldrab_64w_ldst_pac"_h, &VISITORCLASS::VisitLoadStorePAC}, \ + {"ldrab_64_ldst_pac"_h, &VISITORCLASS::VisitLoadStorePAC}, \ + {"ldnp_32_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"ldnp_64_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"ldnp_d_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"ldnp_q_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"ldnp_s_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"stnp_32_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"stnp_64_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"stnp_d_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"stnp_q_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"stnp_s_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"ldpsw_64_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"ldp_32_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"ldp_64_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"ldp_d_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"ldp_q_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"ldp_s_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"stp_32_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"stp_64_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"stp_d_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"stp_q_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"stp_s_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"ldpsw_64_ldstpair_post"_h, \ + &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"ldp_32_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"ldp_64_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"ldp_d_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"ldp_q_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"ldp_s_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"stp_32_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"stp_64_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"stp_d_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"stp_q_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"stp_s_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"ldpsw_64_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"ldp_32_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"ldp_64_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"ldp_d_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"ldp_q_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"ldp_s_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"stp_32_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"stp_64_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"stp_d_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"stp_q_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"stp_s_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"ldrb_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldrh_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldrsb_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldrsb_64_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldrsh_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldrsh_64_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldrsw_64_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldr_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldr_64_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldr_b_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldr_d_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldr_h_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldr_q_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldr_s_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"strb_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"strh_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"str_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"str_64_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"str_b_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"str_d_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"str_h_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"str_q_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"str_s_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldrb_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldrh_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldrsb_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldrsb_64_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldrsh_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldrsh_64_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldrsw_64_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldr_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldr_64_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldr_b_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldr_d_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldr_h_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldr_q_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldr_s_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"strb_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"strh_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"str_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"str_64_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"str_b_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"str_d_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"str_h_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"str_q_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"str_s_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldapurb_32_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldapurh_32_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldapursb_32_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldapursb_64_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldapursh_32_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldapursh_64_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldapursw_64_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldapur_32_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldapur_64_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"stlurb_32_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"stlurh_32_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"stlur_32_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"stlur_64_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldrb_32bl_ldst_regoff"_h, \ + &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrb_32b_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrh_32_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrsb_32bl_ldst_regoff"_h, \ + &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrsb_32b_ldst_regoff"_h, \ + &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrsb_64bl_ldst_regoff"_h, \ + &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrsb_64b_ldst_regoff"_h, \ + &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrsh_32_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrsh_64_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrsw_64_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldr_32_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldr_64_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldr_bl_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldr_b_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldr_d_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldr_h_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldr_q_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldr_s_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"prfm_p_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"strb_32bl_ldst_regoff"_h, \ + &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"strb_32b_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"strh_32_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"str_32_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"str_64_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"str_bl_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"str_b_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"str_d_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"str_h_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"str_q_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"str_s_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldurb_32_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldurh_32_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldursb_32_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldursb_64_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldursh_32_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldursh_64_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldursw_64_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldur_32_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldur_64_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldur_b_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldur_d_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldur_h_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldur_q_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldur_s_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"prfum_p_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"sturb_32_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"sturh_32_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"stur_32_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"stur_64_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"stur_b_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"stur_d_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"stur_h_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"stur_q_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"stur_s_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldrb_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldrh_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldrsb_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldrsb_64_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldrsh_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldrsh_64_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldrsw_64_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldr_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldr_64_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldr_b_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldr_d_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldr_h_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldr_q_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldr_s_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"prfm_p_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"strb_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"strh_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"str_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"str_64_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"str_b_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"str_d_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"str_h_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"str_q_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"str_s_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ands_32s_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \ + {"ands_64s_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \ + {"and_32_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \ + {"and_64_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \ + {"eor_32_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \ + {"eor_64_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \ + {"orr_32_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \ + {"orr_64_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \ + {"ands_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"ands_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"and_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"and_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"bics_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"bics_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"bic_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"bic_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"eon_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"eon_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"eor_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"eor_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"orn_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"orn_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"orr_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"orr_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"movk_32_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \ + {"movk_64_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \ + {"movn_32_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \ + {"movn_64_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \ + {"movz_32_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \ + {"movz_64_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \ + {"fabs_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcmeq_asimdmiscfp16_fz"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcmge_asimdmiscfp16_fz"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcmgt_asimdmiscfp16_fz"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcmle_asimdmiscfp16_fz"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcmlt_asimdmiscfp16_fz"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtas_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtau_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtms_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtmu_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtns_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtnu_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtps_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtpu_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtzs_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtzu_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fneg_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frecpe_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frinta_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frinti_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frintm_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frintn_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frintp_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frintx_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frintz_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frsqrte_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fsqrt_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"scvtf_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"ucvtf_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"addhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"pmull_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"raddhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"rsubhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"sabal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"sabdl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"saddl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"saddw_asimddiff_w"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"smlal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"smlsl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"smull_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"sqdmlal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"sqdmlsl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"sqdmull_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"ssubl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"ssubw_asimddiff_w"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"subhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"uabal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"uabdl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"uaddl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"uaddw_asimddiff_w"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"umlal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"umlsl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"umull_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"usubl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"usubw_asimddiff_w"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"addp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"add_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"cmeq_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"cmge_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"cmgt_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"cmhi_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"cmhs_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"cmtst_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fabd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"facge_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"facgt_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"faddp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fcmeq_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fcmge_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fcmgt_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fdiv_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmaxnmp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmaxnm_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmaxp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmax_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fminnmp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fminnm_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fminp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmin_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmla_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmls_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmulx_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmul_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"frecps_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"frsqrts_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fsub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sqadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sqdmulh_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sqrdmulh_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sqrshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sqshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sqsub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"srshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uqadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uqrshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uqshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uqsub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"urshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"ushl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fcadd_asimdsame2_c"_h, &VISITORCLASS::VisitNEON3SameExtra}, \ + {"fcmla_asimdsame2_c"_h, &VISITORCLASS::VisitNEON3SameExtra}, \ + {"sdot_asimdsame2_d"_h, &VISITORCLASS::VisitNEON3SameExtra}, \ + {"sqrdmlah_asimdsame2_only"_h, &VISITORCLASS::VisitNEON3SameExtra}, \ + {"sqrdmlsh_asimdsame2_only"_h, &VISITORCLASS::VisitNEON3SameExtra}, \ + {"udot_asimdsame2_d"_h, &VISITORCLASS::VisitNEON3SameExtra}, \ + {"fabd_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"facge_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"facgt_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"faddp_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fadd_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fcmeq_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fcmge_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fcmgt_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fdiv_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmaxnmp_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmaxnm_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmaxp_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmax_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fminnmp_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fminnm_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fminp_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmin_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmla_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmls_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmulx_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmul_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"frecps_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"frsqrts_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fsub_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"addv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"saddlv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"smaxv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"sminv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"uaddlv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"umaxv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"uminv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"mla_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \ + {"mls_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \ + {"mul_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \ + {"sqdmulh_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \ + {"sqrdmlah_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \ + {"sqrdmlsh_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \ + {"sqrdmulh_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \ + {"dup_asimdins_dr_r"_h, &VISITORCLASS::VisitNEONCopy}, \ + {"dup_asimdins_dv_v"_h, &VISITORCLASS::VisitNEONCopy}, \ + {"ins_asimdins_ir_r"_h, &VISITORCLASS::VisitNEONCopy}, \ + {"ins_asimdins_iv_v"_h, &VISITORCLASS::VisitNEONCopy}, \ + {"smov_asimdins_w_w"_h, &VISITORCLASS::VisitNEONCopy}, \ + {"smov_asimdins_x_x"_h, &VISITORCLASS::VisitNEONCopy}, \ + {"umov_asimdins_w_w"_h, &VISITORCLASS::VisitNEONCopy}, \ + {"umov_asimdins_x_x"_h, &VISITORCLASS::VisitNEONCopy}, \ + {"ext_asimdext_only"_h, &VISITORCLASS::VisitNEONExtract}, \ + {"ld1_asisdlse_r1_1v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"ld1_asisdlse_r2_2v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"ld1_asisdlse_r3_3v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"ld1_asisdlse_r4_4v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"ld2_asisdlse_r2"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"ld3_asisdlse_r3"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"ld4_asisdlse_r4"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"st1_asisdlse_r1_1v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"st1_asisdlse_r2_2v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"st1_asisdlse_r3_3v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"st1_asisdlse_r4_4v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"st2_asisdlse_r2"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"st3_asisdlse_r3"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"st4_asisdlse_r4"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"ld1_asisdlsep_i1_i1"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld1_asisdlsep_i2_i2"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld1_asisdlsep_i3_i3"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld1_asisdlsep_i4_i4"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld1_asisdlsep_r1_r1"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld1_asisdlsep_r2_r2"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld1_asisdlsep_r3_r3"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld1_asisdlsep_r4_r4"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld2_asisdlsep_i2_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld2_asisdlsep_r2_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld3_asisdlsep_i3_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld3_asisdlsep_r3_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld4_asisdlsep_i4_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld4_asisdlsep_r4_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st1_asisdlsep_i1_i1"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st1_asisdlsep_i2_i2"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st1_asisdlsep_i3_i3"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st1_asisdlsep_i4_i4"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st1_asisdlsep_r1_r1"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st1_asisdlsep_r2_r2"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st1_asisdlsep_r3_r3"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st1_asisdlsep_r4_r4"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st2_asisdlsep_i2_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st2_asisdlsep_r2_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st3_asisdlsep_i3_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st3_asisdlsep_r3_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st4_asisdlsep_i4_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st4_asisdlsep_r4_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld1r_asisdlso_r1"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld1_asisdlso_b1_1b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld1_asisdlso_d1_1d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld1_asisdlso_h1_1h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld1_asisdlso_s1_1s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld2r_asisdlso_r2"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld2_asisdlso_b2_2b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld2_asisdlso_d2_2d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld2_asisdlso_h2_2h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld2_asisdlso_s2_2s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld3r_asisdlso_r3"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld3_asisdlso_b3_3b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld3_asisdlso_d3_3d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld3_asisdlso_h3_3h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld3_asisdlso_s3_3s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld4r_asisdlso_r4"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld4_asisdlso_b4_4b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld4_asisdlso_d4_4d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld4_asisdlso_h4_4h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld4_asisdlso_s4_4s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st1_asisdlso_b1_1b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st1_asisdlso_d1_1d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st1_asisdlso_h1_1h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st1_asisdlso_s1_1s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st2_asisdlso_b2_2b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st2_asisdlso_d2_2d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st2_asisdlso_h2_2h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st2_asisdlso_s2_2s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st3_asisdlso_b3_3b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st3_asisdlso_d3_3d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st3_asisdlso_h3_3h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st3_asisdlso_s3_3s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st4_asisdlso_b4_4b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st4_asisdlso_d4_4d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st4_asisdlso_h4_4h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st4_asisdlso_s4_4s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld1r_asisdlsop_r1_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1r_asisdlsop_rx1_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1_asisdlsop_b1_i1b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1_asisdlsop_bx1_r1b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1_asisdlsop_d1_i1d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1_asisdlsop_dx1_r1d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1_asisdlsop_h1_i1h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1_asisdlsop_hx1_r1h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1_asisdlsop_s1_i1s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1_asisdlsop_sx1_r1s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2r_asisdlsop_r2_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2r_asisdlsop_rx2_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2_asisdlsop_b2_i2b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2_asisdlsop_bx2_r2b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2_asisdlsop_d2_i2d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2_asisdlsop_dx2_r2d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2_asisdlsop_h2_i2h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2_asisdlsop_hx2_r2h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2_asisdlsop_s2_i2s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2_asisdlsop_sx2_r2s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3r_asisdlsop_r3_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3r_asisdlsop_rx3_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3_asisdlsop_b3_i3b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3_asisdlsop_bx3_r3b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3_asisdlsop_d3_i3d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3_asisdlsop_dx3_r3d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3_asisdlsop_h3_i3h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3_asisdlsop_hx3_r3h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3_asisdlsop_s3_i3s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3_asisdlsop_sx3_r3s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4r_asisdlsop_r4_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4r_asisdlsop_rx4_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4_asisdlsop_b4_i4b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4_asisdlsop_bx4_r4b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4_asisdlsop_d4_i4d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4_asisdlsop_dx4_r4d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4_asisdlsop_h4_i4h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4_asisdlsop_hx4_r4h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4_asisdlsop_s4_i4s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4_asisdlsop_sx4_r4s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st1_asisdlsop_b1_i1b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st1_asisdlsop_bx1_r1b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st1_asisdlsop_d1_i1d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st1_asisdlsop_dx1_r1d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st1_asisdlsop_h1_i1h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st1_asisdlsop_hx1_r1h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st1_asisdlsop_s1_i1s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st1_asisdlsop_sx1_r1s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st2_asisdlsop_b2_i2b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st2_asisdlsop_bx2_r2b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st2_asisdlsop_d2_i2d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st2_asisdlsop_dx2_r2d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st2_asisdlsop_h2_i2h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st2_asisdlsop_hx2_r2h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st2_asisdlsop_s2_i2s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st2_asisdlsop_sx2_r2s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st3_asisdlsop_b3_i3b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st3_asisdlsop_bx3_r3b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st3_asisdlsop_d3_i3d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st3_asisdlsop_dx3_r3d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st3_asisdlsop_h3_i3h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st3_asisdlsop_hx3_r3h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st3_asisdlsop_s3_i3s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st3_asisdlsop_sx3_r3s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st4_asisdlsop_b4_i4b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st4_asisdlsop_bx4_r4b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st4_asisdlsop_d4_i4d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st4_asisdlsop_dx4_r4d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st4_asisdlsop_h4_i4h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st4_asisdlsop_hx4_r4h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st4_asisdlsop_s4_i4s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st4_asisdlsop_sx4_r4s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"bic_asimdimm_l_hl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"bic_asimdimm_l_sl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"fmov_asimdimm_d2_d"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"fmov_asimdimm_h_h"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"fmov_asimdimm_s_s"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"movi_asimdimm_d2_d"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"movi_asimdimm_d_ds"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"movi_asimdimm_l_hl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"movi_asimdimm_l_sl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"movi_asimdimm_m_sm"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"movi_asimdimm_n_b"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"mvni_asimdimm_l_hl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"mvni_asimdimm_l_sl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"mvni_asimdimm_m_sm"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"orr_asimdimm_l_hl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"orr_asimdimm_l_sl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"trn1_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \ + {"trn2_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \ + {"uzp1_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \ + {"uzp2_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \ + {"zip1_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \ + {"zip2_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \ + {"sqabs_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"sqneg_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"sqxtn_asisdmisc_n"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"sqxtun_asisdmisc_n"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"suqadd_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"uqxtn_asisdmisc_n"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"usqadd_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcmeq_asisdmiscfp16_fz"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcmge_asisdmiscfp16_fz"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcmgt_asisdmiscfp16_fz"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcmle_asisdmiscfp16_fz"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcmlt_asisdmiscfp16_fz"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtas_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtau_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtms_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtmu_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtns_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtnu_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtps_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtpu_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtzs_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtzu_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"frecpe_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"frecpx_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"frsqrte_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"scvtf_asisdmiscfp16_r"_h, &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"ucvtf_asisdmiscfp16_r"_h, &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"sqdmlal_asisddiff_only"_h, &VISITORCLASS::VisitNEONScalar3Diff}, \ + {"sqdmlsl_asisddiff_only"_h, &VISITORCLASS::VisitNEONScalar3Diff}, \ + {"sqdmull_asisddiff_only"_h, &VISITORCLASS::VisitNEONScalar3Diff}, \ + {"sqadd_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"sqdmulh_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"sqrdmulh_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"sqrshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"sqshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"sqsub_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"srshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"sshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"uqadd_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"uqrshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"uqshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"uqsub_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"urshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"ushl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"fabd_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"facge_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"facgt_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"fcmeq_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"fcmge_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"fcmgt_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"fmulx_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"frecps_asisdsamefp16_only"_h, \ + &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"frsqrts_asisdsamefp16_only"_h, \ + &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"sqdmulh_asisdelem_r"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"sqrdmlah_asisdelem_r"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"sqrdmlsh_asisdelem_r"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"sqrdmulh_asisdelem_r"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"dup_asisdone_only"_h, &VISITORCLASS::VisitNEONScalarCopy}, \ + {"addp_asisdpair_only"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"faddp_asisdpair_only_h"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"faddp_asisdpair_only_sd"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fmaxnmp_asisdpair_only_h"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fmaxnmp_asisdpair_only_sd"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fmaxp_asisdpair_only_h"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fmaxp_asisdpair_only_sd"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fminnmp_asisdpair_only_h"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fminnmp_asisdpair_only_sd"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fminp_asisdpair_only_h"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fminp_asisdpair_only_sd"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fcvtzs_asisdshf_c"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"fcvtzu_asisdshf_c"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"scvtf_asisdshf_c"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sqshlu_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sqshl_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"ucvtf_asisdshf_c"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"uqshl_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sqshlu_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sqshl_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"uqshl_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"shl_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sli_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"tbl_asimdtbl_l1_1"_h, &VISITORCLASS::VisitNEONTable}, \ + {"tbl_asimdtbl_l2_2"_h, &VISITORCLASS::VisitNEONTable}, \ + {"tbl_asimdtbl_l3_3"_h, &VISITORCLASS::VisitNEONTable}, \ + {"tbl_asimdtbl_l4_4"_h, &VISITORCLASS::VisitNEONTable}, \ + {"tbx_asimdtbl_l1_1"_h, &VISITORCLASS::VisitNEONTable}, \ + {"tbx_asimdtbl_l2_2"_h, &VISITORCLASS::VisitNEONTable}, \ + {"tbx_asimdtbl_l3_3"_h, &VISITORCLASS::VisitNEONTable}, \ + {"tbx_asimdtbl_l4_4"_h, &VISITORCLASS::VisitNEONTable}, \ + {"adrp_only_pcreladdr"_h, &VISITORCLASS::VisitPCRelAddressing}, \ + {"adr_only_pcreladdr"_h, &VISITORCLASS::VisitPCRelAddressing}, \ + {"rmif_only_rmif"_h, &VISITORCLASS::VisitRotateRightIntoFlags}, \ + {"bti_hb_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"clrex_bn_barriers"_h, &VISITORCLASS::VisitSystem}, \ + {"dmb_bo_barriers"_h, &VISITORCLASS::VisitSystem}, \ + {"dsb_bo_barriers"_h, &VISITORCLASS::VisitSystem}, \ + {"hint_hm_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"mrs_rs_systemmove"_h, &VISITORCLASS::VisitSystem}, \ + {"msr_si_pstate"_h, &VISITORCLASS::VisitSystem}, \ + {"msr_sr_systemmove"_h, &VISITORCLASS::VisitSystem}, \ + {"psb_hc_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"sb_only_barriers"_h, &VISITORCLASS::VisitSystem}, \ + {"sysl_rc_systeminstrs"_h, &VISITORCLASS::VisitSystem}, \ + {"sys_cr_systeminstrs"_h, &VISITORCLASS::VisitSystem}, \ + {"tcommit_only_barriers"_h, &VISITORCLASS::VisitSystem}, \ + {"tsb_hc_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"tbnz_only_testbranch"_h, &VISITORCLASS::VisitTestBranch}, \ + {"tbz_only_testbranch"_h, &VISITORCLASS::VisitTestBranch}, \ + {"bl_only_branch_imm"_h, &VISITORCLASS::VisitUnconditionalBranch}, \ + {"b_only_branch_imm"_h, &VISITORCLASS::VisitUnconditionalBranch}, \ + {"blraaz_64_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"blraa_64p_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"blrabz_64_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"blrab_64p_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"blr_64_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"braaz_64_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"braa_64p_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"brabz_64_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"brab_64p_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"br_64_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"drps_64e_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"eretaa_64e_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"eretab_64e_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"eret_64e_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"retaa_64e_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"retab_64e_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"ret_64r_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"addg_64_addsub_immtags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bcax_vvv16_crypto4"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfcvtn_asimdmisc_4s"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfdot_asimdelem_e"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfdot_asimdsame2_d"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfmlal_asimdelem_f"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfmlal_asimdsame2_f_"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfmmla_asimdsame2_e"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"dsb_bon_barriers"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"eor3_vvv16_crypto4"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ld64b_64l_memop"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldgm_64bulk_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldg_64loffset_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtrb_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtrh_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtrsb_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtrsb_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtrsh_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtrsh_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtrsw_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtr_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtr_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"rax1_vvv2_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sha512h2_qqv_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sha512h_qqv_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sha512su0_vv2_cryptosha512_2"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sha512su1_vvv2_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm3partw1_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm3partw2_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm3ss1_vvv4_crypto4"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm3tt1a_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm3tt1b_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm3tt2a_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm3tt2b_vvv_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm4ekey_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm4e_vv4_cryptosha512_2"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"st2g_64soffset_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"st2g_64spost_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"st2g_64spre_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"st64b_64l_memop"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"st64bv_64_memop"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"st64bv0_64_memop"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"stgm_64bulk_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"stgp_64_ldstpair_off"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"stgp_64_ldstpair_post"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"stgp_64_ldstpair_pre"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"stg_64soffset_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"stg_64spost_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"stg_64spre_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sttrb_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sttrh_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sttr_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sttr_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"stz2g_64soffset_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"stz2g_64spost_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"stz2g_64spre_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"stzgm_64bulk_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"stzg_64soffset_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"stzg_64spost_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"stzg_64spre_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"subg_64_addsub_immtags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"subps_64s_dp_2src"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"subp_64s_dp_2src"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"tcancel_ex_exception"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"tstart_br_systemresult"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ttest_br_systemresult"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"wfet_only_systeminstrswithreg"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"wfit_only_systeminstrswithreg"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"xar_vvv2_crypto3_imm6"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfcvt_z_p_z_s2bf"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfcvtnt_z_p_z_s2bf"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfdot_z_zzz_"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfdot_z_zzzi_"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfmlalb_z_zzz_"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfmlalb_z_zzzi_"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfmlalt_z_zzz_"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfmlalt_z_zzzi_"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfmmla_z_zzz_"_h, &VISITORCLASS::VisitUnimplemented}, { \ + "unallocated"_h, &VISITORCLASS::VisitUnallocated \ + } + +#define SIM_AUD_VISITOR_MAP(VISITORCLASS) \ + {"autia1716_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"autiasp_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"autiaz_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"autib1716_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"autibsp_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"autibz_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"axflag_m_pstate"_h, &VISITORCLASS::VisitSystem}, \ + {"cfinv_m_pstate"_h, &VISITORCLASS::VisitSystem}, \ + {"csdb_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"dgh_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"esb_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"isb_bi_barriers"_h, &VISITORCLASS::VisitSystem}, \ + {"nop_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"pacia1716_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"paciasp_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"paciaz_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"pacib1716_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"pacibsp_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"pacibz_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"pssbb_only_barriers"_h, &VISITORCLASS::VisitSystem}, \ + {"sev_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"sevl_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"ssbb_only_barriers"_h, &VISITORCLASS::VisitSystem}, \ + {"wfe_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"wfi_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"xaflag_m_pstate"_h, &VISITORCLASS::VisitSystem}, \ + {"xpaclri_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"yield_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"abs_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"cls_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"clz_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"cmeq_asimdmisc_z"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"cmge_asimdmisc_z"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"cmgt_asimdmisc_z"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"cmle_asimdmisc_z"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"cmlt_asimdmisc_z"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"cnt_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fabs_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcmeq_asimdmisc_fz"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcmge_asimdmisc_fz"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcmgt_asimdmisc_fz"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcmle_asimdmisc_fz"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcmlt_asimdmisc_fz"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtas_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtau_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtl_asimdmisc_l"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtms_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtmu_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtns_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtnu_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtn_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtps_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtpu_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtxn_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtzs_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtzu_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fneg_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frecpe_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frint32x_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frint32z_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frint64x_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frint64z_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frinta_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frinti_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frintm_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frintn_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frintp_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frintx_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frintz_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frsqrte_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fsqrt_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"neg_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"not_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"rbit_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"rev16_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"rev32_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"rev64_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"sadalp_asimdmisc_p"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"saddlp_asimdmisc_p"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"scvtf_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"shll_asimdmisc_s"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"sqabs_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"sqneg_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"sqxtn_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"sqxtun_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"suqadd_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"uadalp_asimdmisc_p"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"uaddlp_asimdmisc_p"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"ucvtf_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"uqxtn_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"urecpe_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"ursqrte_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"usqadd_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"xtn_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"mla_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"mls_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"mul_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"saba_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sabd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"shadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"shsub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"smaxp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"smax_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sminp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"smin_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"srhadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uaba_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uabd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uhadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uhsub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"umaxp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"umax_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uminp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"umin_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"urhadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"and_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"bic_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"bif_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"bit_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"bsl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"eor_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"orr_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"orn_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"pmul_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmlal2_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmlal_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmlsl2_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmlsl_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"ushll_asimdshf_l"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sshll_asimdshf_l"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"shrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"rshrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sqshrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sqrshrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sqshrun_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sqrshrun_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"uqshrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"uqrshrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sri_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"srshr_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"srsra_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sshr_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"ssra_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"urshr_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"ursra_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"ushr_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"usra_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"scvtf_asimdshf_c"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"ucvtf_asimdshf_c"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"fcvtzs_asimdshf_c"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"fcvtzu_asimdshf_c"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sqdmlal_asisdelem_l"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"sqdmlsl_asisdelem_l"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"sqdmull_asisdelem_l"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fmla_asisdelem_rh_h"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fmla_asisdelem_r_sd"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fmls_asisdelem_rh_h"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fmls_asisdelem_r_sd"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fmulx_asisdelem_rh_h"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fmulx_asisdelem_r_sd"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fmul_asisdelem_rh_h"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fmul_asisdelem_r_sd"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fabd_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"facge_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"facgt_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"fcmeq_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"fcmge_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"fcmgt_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"fmulx_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"frecps_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"frsqrts_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"cmeq_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"cmge_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"cmgt_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"cmhi_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"cmhs_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"cmtst_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"add_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"sub_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"sqrdmlah_asisdsame2_only"_h, \ + &VISITORCLASS::VisitNEONScalar3SameExtra}, \ + {"sqrdmlsh_asisdsame2_only"_h, \ + &VISITORCLASS::VisitNEONScalar3SameExtra}, \ + {"fmaxnmv_asimdall_only_h"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"fmaxv_asimdall_only_h"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"fminnmv_asimdall_only_h"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"fminv_asimdall_only_h"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"fmaxnmv_asimdall_only_sd"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"fminnmv_asimdall_only_sd"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"fmaxv_asimdall_only_sd"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"fminv_asimdall_only_sd"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"shl_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sli_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sri_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"srshr_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"srsra_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sshr_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"ssra_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"urshr_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"ursra_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"ushr_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"usra_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sqrshrn_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sqrshrun_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sqshrn_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sqshrun_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"uqrshrn_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"uqshrn_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"cmeq_asisdmisc_z"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"cmge_asisdmisc_z"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"cmgt_asisdmisc_z"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"cmle_asisdmisc_z"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"cmlt_asisdmisc_z"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"abs_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"neg_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcmeq_asisdmisc_fz"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcmge_asisdmisc_fz"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcmgt_asisdmisc_fz"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcmle_asisdmisc_fz"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcmlt_asisdmisc_fz"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtas_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtau_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtms_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtmu_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtns_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtnu_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtps_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtpu_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtxn_asisdmisc_n"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtzs_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtzu_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"frecpe_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"frecpx_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"frsqrte_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"scvtf_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, { \ + "ucvtf_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc \ + } diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc index 83ce3174..3d3e5fd6 100644 --- a/src/aarch64/disasm-aarch64.cc +++ b/src/aarch64/disasm-aarch64.cc @@ -34,6 +34,648 @@ namespace vixl { namespace aarch64 { +const Disassembler::FormToVisitorFnMap *Disassembler::GetFormToVisitorFnMap() { + static const FormToVisitorFnMap form_to_visitor = { + DEFAULT_FORM_TO_VISITOR_MAP(Disassembler), + {"autia1716_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"autiasp_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"autiaz_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"autib1716_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"autibsp_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"autibz_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"axflag_m_pstate"_h, &Disassembler::DisassembleNoArgs}, + {"cfinv_m_pstate"_h, &Disassembler::DisassembleNoArgs}, + {"csdb_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"dgh_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"ssbb_only_barriers"_h, &Disassembler::DisassembleNoArgs}, + {"pssbb_only_barriers"_h, &Disassembler::DisassembleNoArgs}, + {"esb_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"isb_bi_barriers"_h, &Disassembler::DisassembleNoArgs}, + {"nop_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"pacia1716_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"paciasp_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"paciaz_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"pacib1716_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"pacibsp_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"pacibz_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"sev_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"sevl_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"wfe_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"wfi_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"xaflag_m_pstate"_h, &Disassembler::DisassembleNoArgs}, + {"xpaclri_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"yield_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"abs_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"cls_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"clz_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"cnt_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"neg_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"rev16_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"rev32_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"rev64_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"sqabs_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"sqneg_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"suqadd_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"urecpe_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"ursqrte_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"usqadd_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"not_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegLogical}, + {"rbit_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegLogical}, + {"xtn_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegExtract}, + {"sqxtn_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegExtract}, + {"uqxtn_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegExtract}, + {"sqxtun_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegExtract}, + {"shll_asimdmisc_s"_h, &Disassembler::DisassembleNEON2RegExtract}, + {"sadalp_asimdmisc_p"_h, &Disassembler::DisassembleNEON2RegAddlp}, + {"saddlp_asimdmisc_p"_h, &Disassembler::DisassembleNEON2RegAddlp}, + {"uadalp_asimdmisc_p"_h, &Disassembler::DisassembleNEON2RegAddlp}, + {"uaddlp_asimdmisc_p"_h, &Disassembler::DisassembleNEON2RegAddlp}, + {"cmeq_asimdmisc_z"_h, &Disassembler::DisassembleNEON2RegCompare}, + {"cmge_asimdmisc_z"_h, &Disassembler::DisassembleNEON2RegCompare}, + {"cmgt_asimdmisc_z"_h, &Disassembler::DisassembleNEON2RegCompare}, + {"cmle_asimdmisc_z"_h, &Disassembler::DisassembleNEON2RegCompare}, + {"cmlt_asimdmisc_z"_h, &Disassembler::DisassembleNEON2RegCompare}, + {"fcmeq_asimdmisc_fz"_h, &Disassembler::DisassembleNEON2RegFPCompare}, + {"fcmge_asimdmisc_fz"_h, &Disassembler::DisassembleNEON2RegFPCompare}, + {"fcmgt_asimdmisc_fz"_h, &Disassembler::DisassembleNEON2RegFPCompare}, + {"fcmle_asimdmisc_fz"_h, &Disassembler::DisassembleNEON2RegFPCompare}, + {"fcmlt_asimdmisc_fz"_h, &Disassembler::DisassembleNEON2RegFPCompare}, + {"fcvtl_asimdmisc_l"_h, &Disassembler::DisassembleNEON2RegFPConvert}, + {"fcvtn_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegFPConvert}, + {"fcvtxn_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegFPConvert}, + {"fabs_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtas_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtau_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtms_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtmu_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtns_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtnu_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtps_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtpu_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtzs_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtzu_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fneg_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frecpe_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frint32x_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frint32z_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frint64x_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frint64z_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frinta_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frinti_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frintm_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frintn_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frintp_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frintx_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frintz_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frsqrte_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fsqrt_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"scvtf_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"ucvtf_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"smlal_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"smlsl_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"smull_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"umlal_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"umlsl_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"umull_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"sqdmull_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"sqdmlal_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"sqdmlsl_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"sdot_asimdelem_d"_h, &Disassembler::DisassembleNEONDotProdByElement}, + {"udot_asimdelem_d"_h, &Disassembler::DisassembleNEONDotProdByElement}, + {"usdot_asimdelem_d"_h, &Disassembler::DisassembleNEONDotProdByElement}, + {"sudot_asimdelem_d"_h, &Disassembler::DisassembleNEONDotProdByElement}, + {"fmlal2_asimdelem_lh"_h, + &Disassembler::DisassembleNEONFPMulByElementLong}, + {"fmlal_asimdelem_lh"_h, + &Disassembler::DisassembleNEONFPMulByElementLong}, + {"fmlsl2_asimdelem_lh"_h, + &Disassembler::DisassembleNEONFPMulByElementLong}, + {"fmlsl_asimdelem_lh"_h, + &Disassembler::DisassembleNEONFPMulByElementLong}, + {"fcmla_asimdelem_c_h"_h, + &Disassembler::DisassembleNEONComplexMulByElement}, + {"fcmla_asimdelem_c_s"_h, + &Disassembler::DisassembleNEONComplexMulByElement}, + {"fmla_asimdelem_rh_h"_h, + &Disassembler::DisassembleNEONHalfFPMulByElement}, + {"fmls_asimdelem_rh_h"_h, + &Disassembler::DisassembleNEONHalfFPMulByElement}, + {"fmulx_asimdelem_rh_h"_h, + &Disassembler::DisassembleNEONHalfFPMulByElement}, + {"fmul_asimdelem_rh_h"_h, + &Disassembler::DisassembleNEONHalfFPMulByElement}, + {"fmla_asimdelem_r_sd"_h, &Disassembler::DisassembleNEONFPMulByElement}, + {"fmls_asimdelem_r_sd"_h, &Disassembler::DisassembleNEONFPMulByElement}, + {"fmulx_asimdelem_r_sd"_h, &Disassembler::DisassembleNEONFPMulByElement}, + {"fmul_asimdelem_r_sd"_h, &Disassembler::DisassembleNEONFPMulByElement}, + {"mla_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"mls_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"mul_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"saba_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"sabd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"shadd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"shsub_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"smaxp_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"smax_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"sminp_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"smin_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"srhadd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"uaba_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"uabd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"uhadd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"uhsub_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"umaxp_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"umax_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"uminp_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"umin_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"urhadd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"and_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"bic_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"bif_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"bit_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"bsl_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"eor_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"orr_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"orn_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"pmul_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"fmlal2_asimdsame_f"_h, &Disassembler::DisassembleNEON3SameFHM}, + {"fmlal_asimdsame_f"_h, &Disassembler::DisassembleNEON3SameFHM}, + {"fmlsl2_asimdsame_f"_h, &Disassembler::DisassembleNEON3SameFHM}, + {"fmlsl_asimdsame_f"_h, &Disassembler::DisassembleNEON3SameFHM}, + {"sri_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"srshr_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"srsra_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"sshr_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"ssra_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"urshr_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"ursra_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"ushr_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"usra_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"scvtf_asimdshf_c"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"ucvtf_asimdshf_c"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"fcvtzs_asimdshf_c"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"fcvtzu_asimdshf_c"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"ushll_asimdshf_l"_h, &Disassembler::DisassembleNEONShiftLeftLongImm}, + {"sshll_asimdshf_l"_h, &Disassembler::DisassembleNEONShiftLeftLongImm}, + {"shrn_asimdshf_n"_h, &Disassembler::DisassembleNEONShiftRightNarrowImm}, + {"rshrn_asimdshf_n"_h, &Disassembler::DisassembleNEONShiftRightNarrowImm}, + {"sqshrn_asimdshf_n"_h, + &Disassembler::DisassembleNEONShiftRightNarrowImm}, + {"sqrshrn_asimdshf_n"_h, + &Disassembler::DisassembleNEONShiftRightNarrowImm}, + {"sqshrun_asimdshf_n"_h, + &Disassembler::DisassembleNEONShiftRightNarrowImm}, + {"sqrshrun_asimdshf_n"_h, + &Disassembler::DisassembleNEONShiftRightNarrowImm}, + {"uqshrn_asimdshf_n"_h, + &Disassembler::DisassembleNEONShiftRightNarrowImm}, + {"uqrshrn_asimdshf_n"_h, + &Disassembler::DisassembleNEONShiftRightNarrowImm}, + {"sqdmlal_asisdelem_l"_h, + &Disassembler::DisassembleNEONScalarSatMulLongIndex}, + {"sqdmlsl_asisdelem_l"_h, + &Disassembler::DisassembleNEONScalarSatMulLongIndex}, + {"sqdmull_asisdelem_l"_h, + &Disassembler::DisassembleNEONScalarSatMulLongIndex}, + {"fmla_asisdelem_rh_h"_h, &Disassembler::DisassembleNEONFPScalarMulIndex}, + {"fmla_asisdelem_r_sd"_h, &Disassembler::DisassembleNEONFPScalarMulIndex}, + {"fmls_asisdelem_rh_h"_h, &Disassembler::DisassembleNEONFPScalarMulIndex}, + {"fmls_asisdelem_r_sd"_h, &Disassembler::DisassembleNEONFPScalarMulIndex}, + {"fmulx_asisdelem_rh_h"_h, + &Disassembler::DisassembleNEONFPScalarMulIndex}, + {"fmulx_asisdelem_r_sd"_h, + &Disassembler::DisassembleNEONFPScalarMulIndex}, + {"fmul_asisdelem_rh_h"_h, &Disassembler::DisassembleNEONFPScalarMulIndex}, + {"fmul_asisdelem_r_sd"_h, &Disassembler::DisassembleNEONFPScalarMulIndex}, + {"fabd_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"facge_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"facgt_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"fcmeq_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"fcmge_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"fcmgt_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"fmulx_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"frecps_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"frsqrts_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"sqrdmlah_asisdsame2_only"_h, &Disassembler::VisitNEONScalar3Same}, + {"sqrdmlsh_asisdsame2_only"_h, &Disassembler::VisitNEONScalar3Same}, + {"cmeq_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD}, + {"cmge_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD}, + {"cmgt_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD}, + {"cmhi_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD}, + {"cmhs_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD}, + {"cmtst_asisdsame_only"_h, + &Disassembler::DisassembleNEONScalar3SameOnlyD}, + {"add_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD}, + {"sub_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD}, + {"fmaxnmv_asimdall_only_h"_h, + &Disassembler::DisassembleNEONFP16AcrossLanes}, + {"fmaxv_asimdall_only_h"_h, + &Disassembler::DisassembleNEONFP16AcrossLanes}, + {"fminnmv_asimdall_only_h"_h, + &Disassembler::DisassembleNEONFP16AcrossLanes}, + {"fminv_asimdall_only_h"_h, + &Disassembler::DisassembleNEONFP16AcrossLanes}, + {"fmaxnmv_asimdall_only_sd"_h, + &Disassembler::DisassembleNEONFPAcrossLanes}, + {"fminnmv_asimdall_only_sd"_h, + &Disassembler::DisassembleNEONFPAcrossLanes}, + {"fmaxv_asimdall_only_sd"_h, &Disassembler::DisassembleNEONFPAcrossLanes}, + {"fminv_asimdall_only_sd"_h, &Disassembler::DisassembleNEONFPAcrossLanes}, + {"shl_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"sli_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"sri_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"srshr_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"srsra_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"sshr_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"ssra_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"urshr_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"ursra_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"ushr_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"usra_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"sqrshrn_asisdshf_n"_h, + &Disassembler::DisassembleNEONScalarShiftRightNarrowImm}, + {"sqrshrun_asisdshf_n"_h, + &Disassembler::DisassembleNEONScalarShiftRightNarrowImm}, + {"sqshrn_asisdshf_n"_h, + &Disassembler::DisassembleNEONScalarShiftRightNarrowImm}, + {"sqshrun_asisdshf_n"_h, + &Disassembler::DisassembleNEONScalarShiftRightNarrowImm}, + {"uqrshrn_asisdshf_n"_h, + &Disassembler::DisassembleNEONScalarShiftRightNarrowImm}, + {"uqshrn_asisdshf_n"_h, + &Disassembler::DisassembleNEONScalarShiftRightNarrowImm}, + {"cmeq_asisdmisc_z"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD}, + {"cmge_asisdmisc_z"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD}, + {"cmgt_asisdmisc_z"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD}, + {"cmle_asisdmisc_z"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD}, + {"cmlt_asisdmisc_z"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD}, + {"abs_asisdmisc_r"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD}, + {"neg_asisdmisc_r"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD}, + {"fcmeq_asisdmisc_fz"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcmge_asisdmisc_fz"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcmgt_asisdmisc_fz"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcmle_asisdmisc_fz"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcmlt_asisdmisc_fz"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtas_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtau_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtms_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtmu_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtns_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtnu_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtps_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtpu_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtxn_asisdmisc_n"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtzs_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtzu_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"frecpe_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"frecpx_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"frsqrte_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"scvtf_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"ucvtf_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"adclb_z_zzz"_h, &Disassembler::DisassembleSVEAddSubCarry}, + {"adclt_z_zzz"_h, &Disassembler::DisassembleSVEAddSubCarry}, + {"addhnb_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh}, + {"addhnt_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh}, + {"addp_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"aesd_z_zz"_h, &Disassembler::Disassemble_ZdnB_ZdnB_ZmB}, + {"aese_z_zz"_h, &Disassembler::Disassemble_ZdnB_ZdnB_ZmB}, + {"aesimc_z_z"_h, &Disassembler::Disassemble_ZdnB_ZdnB}, + {"aesmc_z_z"_h, &Disassembler::Disassemble_ZdnB_ZdnB}, + {"bcax_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary}, + {"bdep_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"bext_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"bgrp_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"bsl1n_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary}, + {"bsl2n_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary}, + {"bsl_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary}, + {"cadd_z_zz"_h, &Disassembler::DisassembleSVEComplexIntAddition}, + {"cdot_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb_const}, + {"cdot_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnH_ZmH_imm_const}, + {"cdot_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnB_ZmB_imm_const}, + {"cmla_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT_const}, + {"cmla_z_zzzi_h"_h, &Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm_const}, + {"cmla_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm_const}, + {"eor3_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary}, + {"eorbt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"eortb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"ext_z_zi_con"_h, &Disassembler::Disassemble_ZdB_Zn1B_Zn2B_imm}, + {"faddp_z_p_zz"_h, &Disassembler::DisassembleSVEFPPair}, + {"fcvtlt_z_p_z_h2s"_h, &Disassembler::Disassemble_ZdS_PgM_ZnH}, + {"fcvtlt_z_p_z_s2d"_h, &Disassembler::Disassemble_ZdD_PgM_ZnS}, + {"fcvtnt_z_p_z_d2s"_h, &Disassembler::Disassemble_ZdS_PgM_ZnD}, + {"fcvtnt_z_p_z_s2h"_h, &Disassembler::Disassemble_ZdH_PgM_ZnS}, + {"fcvtx_z_p_z_d2s"_h, &Disassembler::Disassemble_ZdS_PgM_ZnD}, + {"fcvtxnt_z_p_z_d2s"_h, &Disassembler::Disassemble_ZdS_PgM_ZnD}, + {"flogb_z_p_z"_h, &Disassembler::DisassembleSVEFlogb}, + {"fmaxnmp_z_p_zz"_h, &Disassembler::DisassembleSVEFPPair}, + {"fmaxp_z_p_zz"_h, &Disassembler::DisassembleSVEFPPair}, + {"fminnmp_z_p_zz"_h, &Disassembler::DisassembleSVEFPPair}, + {"fminp_z_p_zz"_h, &Disassembler::DisassembleSVEFPPair}, + {"fmlalb_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH}, + {"fmlalb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"fmlalt_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH}, + {"fmlalt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"fmlslb_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH}, + {"fmlslb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"fmlslt_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH}, + {"fmlslt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"histcnt_z_p_zz"_h, &Disassembler::Disassemble_ZdT_PgZ_ZnT_ZmT}, + {"histseg_z_zz"_h, &Disassembler::Disassemble_ZdB_ZnB_ZmB}, + {"ldnt1b_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm}, + {"ldnt1b_z_p_ar_s_x32_unscaled"_h, + &Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm}, + {"ldnt1d_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm}, + {"ldnt1h_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm}, + {"ldnt1h_z_p_ar_s_x32_unscaled"_h, + &Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm}, + {"ldnt1sb_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm}, + {"ldnt1sb_z_p_ar_s_x32_unscaled"_h, + &Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm}, + {"ldnt1sh_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm}, + {"ldnt1sh_z_p_ar_s_x32_unscaled"_h, + &Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm}, + {"ldnt1sw_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm}, + {"ldnt1w_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm}, + {"ldnt1w_z_p_ar_s_x32_unscaled"_h, + &Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm}, + {"match_p_p_zz"_h, &Disassembler::Disassemble_PdT_PgZ_ZnT_ZmT}, + {"mla_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD_imm}, + {"mla_z_zzzi_h"_h, &Disassembler::Disassemble_ZdH_ZnH_ZmH_imm}, + {"mla_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS_imm}, + {"mls_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD_imm}, + {"mls_z_zzzi_h"_h, &Disassembler::Disassemble_ZdH_ZnH_ZmH_imm}, + {"mls_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS_imm}, + {"mul_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"mul_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD_imm}, + {"mul_z_zzi_h"_h, &Disassembler::Disassemble_ZdH_ZnH_ZmH_imm}, + {"mul_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS_imm}, + {"nbsl_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary}, + {"nmatch_p_p_zz"_h, &Disassembler::Disassemble_PdT_PgZ_ZnT_ZmT}, + {"pmul_z_zz"_h, &Disassembler::Disassemble_ZdB_ZnB_ZmB}, + {"pmullb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"pmullt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"raddhnb_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh}, + {"raddhnt_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh}, + {"rax1_z_zz"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD}, + {"rshrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"rshrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"rsubhnb_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh}, + {"rsubhnt_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh}, + {"saba_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT}, + {"sabalb_z_zzz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"sabalt_z_zzz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"sabdlb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"sabdlt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"sadalp_z_p_z"_h, &Disassembler::Disassemble_ZdaT_PgM_ZnTb}, + {"saddlb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"saddlbt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"saddlt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"saddwb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb}, + {"saddwt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb}, + {"sbclb_z_zzz"_h, &Disassembler::DisassembleSVEAddSubCarry}, + {"sbclt_z_zzz"_h, &Disassembler::DisassembleSVEAddSubCarry}, + {"shadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"shrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"shrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"shsub_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"shsubr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sli_z_zzi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated}, + {"sm4e_z_zz"_h, &Disassembler::Disassemble_ZdnS_ZdnS_ZmS}, + {"sm4ekey_z_zz"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS}, + {"smaxp_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sminp_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"smlalb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"smlalb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"smlalb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"smlalt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"smlalt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"smlalt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"smlslb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"smlslb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"smlslb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"smlslt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"smlslt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"smlslt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"smulh_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"smullb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"smullb_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"smullb_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"smullt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"smullt_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"smullt_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"splice_z_p_zz_con"_h, &Disassembler::Disassemble_ZdT_Pg_Zn1T_Zn2T}, + {"sqabs_z_p_z"_h, &Disassembler::Disassemble_ZdT_PgM_ZnT}, + {"sqadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sqcadd_z_zz"_h, &Disassembler::DisassembleSVEComplexIntAddition}, + {"sqdmlalb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"sqdmlalb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm}, + {"sqdmlalb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"sqdmlalbt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"sqdmlalt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"sqdmlalt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm}, + {"sqdmlalt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"sqdmlslb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"sqdmlslb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm}, + {"sqdmlslb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"sqdmlslbt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"sqdmlslt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"sqdmlslt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm}, + {"sqdmlslt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"sqdmulh_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"sqdmulh_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD_imm}, + {"sqdmulh_z_zzi_h"_h, &Disassembler::Disassemble_ZdH_ZnH_ZmH_imm}, + {"sqdmulh_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS_imm}, + {"sqdmullb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"sqdmullb_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"sqdmullb_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"sqdmullt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"sqdmullt_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"sqdmullt_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"sqneg_z_p_z"_h, &Disassembler::Disassemble_ZdT_PgM_ZnT}, + {"sqrdcmlah_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT_const}, + {"sqrdcmlah_z_zzzi_h"_h, + &Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm_const}, + {"sqrdcmlah_z_zzzi_s"_h, + &Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm_const}, + {"sqrdmlah_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT}, + {"sqrdmlah_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnD_ZmD_imm}, + {"sqrdmlah_z_zzzi_h"_h, &Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm}, + {"sqrdmlah_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm}, + {"sqrdmlsh_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT}, + {"sqrdmlsh_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnD_ZmD_imm}, + {"sqrdmlsh_z_zzzi_h"_h, &Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm}, + {"sqrdmlsh_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm}, + {"sqrdmulh_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"sqrdmulh_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD_imm}, + {"sqrdmulh_z_zzi_h"_h, &Disassembler::Disassemble_ZdH_ZnH_ZmH_imm}, + {"sqrdmulh_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS_imm}, + {"sqrshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sqrshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sqrshrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"sqrshrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"sqrshrunb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"sqrshrunt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"sqshl_z_p_zi"_h, &Disassembler::VisitSVEBitwiseShiftByImm_Predicated}, + {"sqshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sqshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sqshlu_z_p_zi"_h, &Disassembler::VisitSVEBitwiseShiftByImm_Predicated}, + {"sqshrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"sqshrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"sqshrunb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"sqshrunt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"sqsub_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sqsubr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sqxtnb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb}, + {"sqxtnt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb}, + {"sqxtunb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb}, + {"sqxtunt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb}, + {"srhadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sri_z_zzi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated}, + {"srshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"srshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"srshr_z_p_zi"_h, &Disassembler::VisitSVEBitwiseShiftByImm_Predicated}, + {"srsra_z_zi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated}, + {"sshllb_z_zi"_h, &Disassembler::DisassembleSVEShiftLeftImm}, + {"sshllt_z_zi"_h, &Disassembler::DisassembleSVEShiftLeftImm}, + {"ssra_z_zi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated}, + {"ssublb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"ssublbt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"ssublt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"ssubltb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"ssubwb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb}, + {"ssubwt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb}, + {"stnt1b_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_Pg_ZnD_Xm}, + {"stnt1b_z_p_ar_s_x32_unscaled"_h, + &Disassembler::Disassemble_ZtS_Pg_ZnS_Xm}, + {"stnt1d_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_Pg_ZnD_Xm}, + {"stnt1h_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_Pg_ZnD_Xm}, + {"stnt1h_z_p_ar_s_x32_unscaled"_h, + &Disassembler::Disassemble_ZtS_Pg_ZnS_Xm}, + {"stnt1w_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_Pg_ZnD_Xm}, + {"stnt1w_z_p_ar_s_x32_unscaled"_h, + &Disassembler::Disassemble_ZtS_Pg_ZnS_Xm}, + {"subhnb_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh}, + {"subhnt_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh}, + {"suqadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"tbl_z_zz_2"_h, &Disassembler::Disassemble_ZdT_Zn1T_Zn2T_ZmT}, + {"tbx_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"uaba_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT}, + {"uabalb_z_zzz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"uabalt_z_zzz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"uabdlb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"uabdlt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"uadalp_z_p_z"_h, &Disassembler::Disassemble_ZdaT_PgM_ZnTb}, + {"uaddlb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"uaddlt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"uaddwb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb}, + {"uaddwt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb}, + {"uhadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uhsub_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uhsubr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"umaxp_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uminp_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"umlalb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"umlalb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"umlalb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"umlalt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"umlalt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"umlalt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"umlslb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"umlslb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"umlslb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"umlslt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"umlslt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"umlslt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"umulh_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"umullb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"umullb_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"umullb_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"umullt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"umullt_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"umullt_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"uqadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uqrshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uqrshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uqrshrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"uqrshrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"uqshl_z_p_zi"_h, &Disassembler::VisitSVEBitwiseShiftByImm_Predicated}, + {"uqshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uqshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uqshrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"uqshrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"uqsub_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uqsubr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uqxtnb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb}, + {"uqxtnt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb}, + {"urecpe_z_p_z"_h, &Disassembler::Disassemble_ZdS_PgM_ZnS}, + {"urhadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"urshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"urshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"urshr_z_p_zi"_h, &Disassembler::VisitSVEBitwiseShiftByImm_Predicated}, + {"ursqrte_z_p_z"_h, &Disassembler::Disassemble_ZdS_PgM_ZnS}, + {"ursra_z_zi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated}, + {"ushllb_z_zi"_h, &Disassembler::DisassembleSVEShiftLeftImm}, + {"ushllt_z_zi"_h, &Disassembler::DisassembleSVEShiftLeftImm}, + {"usqadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"usra_z_zi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated}, + {"usublb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"usublt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"usubwb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb}, + {"usubwt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb}, + {"whilege_p_p_rr"_h, + &Disassembler::VisitSVEIntCompareScalarCountAndLimit}, + {"whilegt_p_p_rr"_h, + &Disassembler::VisitSVEIntCompareScalarCountAndLimit}, + {"whilehi_p_p_rr"_h, + &Disassembler::VisitSVEIntCompareScalarCountAndLimit}, + {"whilehs_p_p_rr"_h, + &Disassembler::VisitSVEIntCompareScalarCountAndLimit}, + {"whilerw_p_rr"_h, &Disassembler::VisitSVEIntCompareScalarCountAndLimit}, + {"whilewr_p_rr"_h, &Disassembler::VisitSVEIntCompareScalarCountAndLimit}, + {"xar_z_zzi"_h, &Disassembler::Disassemble_ZdnT_ZdnT_ZmT_const}, + {"fmmla_z_zzz_s"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT}, + {"fmmla_z_zzz_d"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT}, + {"smmla_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnB_ZmB}, + {"ummla_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnB_ZmB}, + {"usmmla_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnB_ZmB}, + {"usdot_z_zzz_s"_h, &Disassembler::Disassemble_ZdaS_ZnB_ZmB}, + {"smmla_asimdsame2_g"_h, &Disassembler::Disassemble_Vd4S_Vn16B_Vm16B}, + {"ummla_asimdsame2_g"_h, &Disassembler::Disassemble_Vd4S_Vn16B_Vm16B}, + {"usmmla_asimdsame2_g"_h, &Disassembler::Disassemble_Vd4S_Vn16B_Vm16B}, + {"ld1row_z_p_bi_u32"_h, + &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, + {"ld1row_z_p_br_contiguous"_h, + &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, + {"ld1rod_z_p_bi_u64"_h, + &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, + {"ld1rod_z_p_br_contiguous"_h, + &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, + {"ld1rob_z_p_bi_u8"_h, + &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, + {"ld1rob_z_p_br_contiguous"_h, + &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, + {"ld1roh_z_p_bi_u16"_h, + &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, + {"ld1roh_z_p_br_contiguous"_h, + &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, + {"usdot_z_zzzi_s"_h, &Disassembler::VisitSVEMulIndex}, + {"sudot_z_zzzi_s"_h, &Disassembler::VisitSVEMulIndex}, + {"usdot_asimdsame2_d"_h, &Disassembler::VisitNEON3SameExtra}, + }; + return &form_to_visitor; +} // NOLINT(readability/fn_size) + Disassembler::Disassembler() { buffer_size_ = 256; buffer_ = reinterpret_cast<char *>(malloc(buffer_size_)); @@ -42,7 +684,6 @@ Disassembler::Disassembler() { code_address_offset_ = 0; } - Disassembler::Disassembler(char *text_buffer, int buffer_size) { buffer_size_ = buffer_size; buffer_ = text_buffer; @@ -51,61 +692,46 @@ Disassembler::Disassembler(char *text_buffer, int buffer_size) { code_address_offset_ = 0; } - Disassembler::~Disassembler() { if (own_buffer_) { free(buffer_); } } - char *Disassembler::GetOutput() { return buffer_; } - void Disassembler::VisitAddSubImmediate(const Instruction *instr) { bool rd_is_zr = RdIsZROrSP(instr); bool stack_op = (rd_is_zr || RnIsZROrSP(instr)) && (instr->GetImmAddSub() == 0) ? true : false; - const char *mnemonic = ""; + const char *mnemonic = mnemonic_.c_str(); const char *form = "'Rds, 'Rns, 'IAddSub"; const char *form_cmp = "'Rns, 'IAddSub"; const char *form_mov = "'Rds, 'Rns"; - switch (instr->Mask(AddSubImmediateMask)) { - case ADD_w_imm: - case ADD_x_imm: { - mnemonic = "add"; + switch (form_hash_) { + case "add_32_addsub_imm"_h: + case "add_64_addsub_imm"_h: if (stack_op) { mnemonic = "mov"; form = form_mov; } break; - } - case ADDS_w_imm: - case ADDS_x_imm: { - mnemonic = "adds"; + case "adds_32s_addsub_imm"_h: + case "adds_64s_addsub_imm"_h: if (rd_is_zr) { mnemonic = "cmn"; form = form_cmp; } break; - } - case SUB_w_imm: - case SUB_x_imm: - mnemonic = "sub"; - break; - case SUBS_w_imm: - case SUBS_x_imm: { - mnemonic = "subs"; + case "subs_32s_addsub_imm"_h: + case "subs_64s_addsub_imm"_h: if (rd_is_zr) { mnemonic = "cmp"; form = form_cmp; } break; - } - default: - VIXL_UNREACHABLE(); } Format(instr, mnemonic, form); } @@ -114,37 +740,28 @@ void Disassembler::VisitAddSubImmediate(const Instruction *instr) { void Disassembler::VisitAddSubShifted(const Instruction *instr) { bool rd_is_zr = RdIsZROrSP(instr); bool rn_is_zr = RnIsZROrSP(instr); - const char *mnemonic = ""; + const char *mnemonic = mnemonic_.c_str(); const char *form = "'Rd, 'Rn, 'Rm'NDP"; const char *form_cmp = "'Rn, 'Rm'NDP"; const char *form_neg = "'Rd, 'Rm'NDP"; - switch (instr->Mask(AddSubShiftedMask)) { - case ADD_w_shift: - case ADD_x_shift: - mnemonic = "add"; - break; - case ADDS_w_shift: - case ADDS_x_shift: { - mnemonic = "adds"; + switch (form_hash_) { + case "adds_32_addsub_shift"_h: + case "adds_64_addsub_shift"_h: if (rd_is_zr) { mnemonic = "cmn"; form = form_cmp; } break; - } - case SUB_w_shift: - case SUB_x_shift: { - mnemonic = "sub"; + case "sub_32_addsub_shift"_h: + case "sub_64_addsub_shift"_h: if (rn_is_zr) { mnemonic = "neg"; form = form_neg; } break; - } - case SUBS_w_shift: - case SUBS_x_shift: { - mnemonic = "subs"; + case "subs_32_addsub_shift"_h: + case "subs_64_addsub_shift"_h: if (rd_is_zr) { mnemonic = "cmp"; form = form_cmp; @@ -152,10 +769,6 @@ void Disassembler::VisitAddSubShifted(const Instruction *instr) { mnemonic = "negs"; form = form_neg; } - break; - } - default: - VIXL_UNREACHABLE(); } Format(instr, mnemonic, form); } @@ -245,40 +858,12 @@ void Disassembler::VisitAddSubWithCarry(const Instruction *instr) { void Disassembler::VisitRotateRightIntoFlags(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(RotateRightIntoFlags)"; - - switch (instr->Mask(RotateRightIntoFlagsMask)) { - case RMIF: - mnemonic = "rmif"; - form = "'Xn, 'IRr, 'INzcv"; - break; - default: - VIXL_UNREACHABLE(); - } - - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Xn, 'IRr, 'INzcv"); } void Disassembler::VisitEvaluateIntoFlags(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(EvaluateIntoFlags)"; - - switch (instr->Mask(EvaluateIntoFlagsMask)) { - case SETF8: - mnemonic = "setf8"; - form = "'Wn"; - break; - case SETF16: - mnemonic = "setf16"; - form = "'Wn"; - break; - default: - VIXL_UNREACHABLE(); - } - - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Wn"); } @@ -361,60 +946,32 @@ bool Disassembler::IsMovzMovnImm(unsigned reg_size, uint64_t value) { void Disassembler::VisitLogicalShifted(const Instruction *instr) { bool rd_is_zr = RdIsZROrSP(instr); bool rn_is_zr = RnIsZROrSP(instr); - const char *mnemonic = ""; + const char *mnemonic = mnemonic_.c_str(); const char *form = "'Rd, 'Rn, 'Rm'NLo"; - switch (instr->Mask(LogicalShiftedMask)) { - case AND_w: - case AND_x: - mnemonic = "and"; - break; - case BIC_w: - case BIC_x: - mnemonic = "bic"; - break; - case EOR_w: - case EOR_x: - mnemonic = "eor"; - break; - case EON_w: - case EON_x: - mnemonic = "eon"; - break; - case BICS_w: - case BICS_x: - mnemonic = "bics"; - break; - case ANDS_w: - case ANDS_x: { - mnemonic = "ands"; + switch (form_hash_) { + case "ands_32_log_shift"_h: + case "ands_64_log_shift"_h: if (rd_is_zr) { mnemonic = "tst"; form = "'Rn, 'Rm'NLo"; } break; - } - case ORR_w: - case ORR_x: { - mnemonic = "orr"; + case "orr_32_log_shift"_h: + case "orr_64_log_shift"_h: if (rn_is_zr && (instr->GetImmDPShift() == 0) && (instr->GetShiftDP() == LSL)) { mnemonic = "mov"; form = "'Rd, 'Rm"; } break; - } - case ORN_w: - case ORN_x: { - mnemonic = "orn"; + case "orn_32_log_shift"_h: + case "orn_64_log_shift"_h: if (rn_is_zr) { mnemonic = "mvn"; form = "'Rd, 'Rm'NLo"; } break; - } - default: - VIXL_UNREACHABLE(); } Format(instr, mnemonic, form); @@ -422,42 +979,12 @@ void Disassembler::VisitLogicalShifted(const Instruction *instr) { void Disassembler::VisitConditionalCompareRegister(const Instruction *instr) { - const char *mnemonic = ""; - const char *form = "'Rn, 'Rm, 'INzcv, 'Cond"; - - switch (instr->Mask(ConditionalCompareRegisterMask)) { - case CCMN_w: - case CCMN_x: - mnemonic = "ccmn"; - break; - case CCMP_w: - case CCMP_x: - mnemonic = "ccmp"; - break; - default: - VIXL_UNREACHABLE(); - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Rn, 'Rm, 'INzcv, 'Cond"); } void Disassembler::VisitConditionalCompareImmediate(const Instruction *instr) { - const char *mnemonic = ""; - const char *form = "'Rn, 'IP, 'INzcv, 'Cond"; - - switch (instr->Mask(ConditionalCompareImmediateMask)) { - case CCMN_w_imm: - case CCMN_x_imm: - mnemonic = "ccmn"; - break; - case CCMP_w_imm: - case CCMP_x_imm: - mnemonic = "ccmp"; - break; - default: - VIXL_UNREACHABLE(); - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Rn, 'IP, 'INzcv, 'Cond"); } @@ -531,6 +1058,16 @@ void Disassembler::VisitBitfield(const Instruction *instr) { const char *form_bfx = "'Rd, 'Rn, 'IBr, 'IBs-r+1"; const char *form_lsl = "'Rd, 'Rn, 'IBZ-r"; + if (instr->GetSixtyFourBits() != instr->GetBitN()) { + VisitUnallocated(instr); + return; + } + + if ((instr->GetSixtyFourBits() == 0) && ((s > 31) || (r > 31))) { + VisitUnallocated(instr); + return; + } + switch (instr->Mask(BitfieldMask)) { case SBFM_w: case SBFM_x: { @@ -638,217 +1175,102 @@ void Disassembler::VisitPCRelAddressing(const Instruction *instr) { void Disassembler::VisitConditionalBranch(const Instruction *instr) { - switch (instr->Mask(ConditionalBranchMask)) { - case B_cond: - Format(instr, "b.'CBrn", "'TImmCond"); - break; - default: - VIXL_UNREACHABLE(); - } + // We can't use the mnemonic directly here, as there's no space between it and + // the condition. Assert that we have the correct mnemonic, then use "b" + // explicitly for formatting the output. + VIXL_ASSERT(form_hash_ == "b_only_condbranch"_h); + Format(instr, "b.'CBrn", "'TImmCond"); } void Disassembler::VisitUnconditionalBranchToRegister( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form; + const char *form = "'Xn"; - switch (instr->Mask(UnconditionalBranchToRegisterMask)) { - case BR: - mnemonic = "br"; - form = "'Xn"; - break; - case BLR: - mnemonic = "blr"; - form = "'Xn"; - break; - case RET: { - mnemonic = "ret"; + switch (form_hash_) { + case "ret_64r_branch_reg"_h: if (instr->GetRn() == kLinkRegCode) { - form = NULL; - } else { - form = "'Xn"; + form = ""; } break; - } - case BRAAZ: - mnemonic = "braaz"; - form = "'Xn"; - break; - case BRABZ: - mnemonic = "brabz"; - form = "'Xn"; - break; - case BLRAAZ: - mnemonic = "blraaz"; - form = "'Xn"; - break; - case BLRABZ: - mnemonic = "blrabz"; - form = "'Xn"; - break; - case RETAA: - mnemonic = "retaa"; - form = NULL; - break; - case RETAB: - mnemonic = "retab"; - form = NULL; - break; - case BRAA: - mnemonic = "braa"; - form = "'Xn, 'Xds"; - break; - case BRAB: - mnemonic = "brab"; - form = "'Xn, 'Xds"; - break; - case BLRAA: - mnemonic = "blraa"; - form = "'Xn, 'Xds"; + case "retaa_64e_branch_reg"_h: + case "retab_64e_branch_reg"_h: + form = ""; break; - case BLRAB: - mnemonic = "blrab"; + case "braa_64p_branch_reg"_h: + case "brab_64p_branch_reg"_h: + case "blraa_64p_branch_reg"_h: + case "blrab_64p_branch_reg"_h: form = "'Xn, 'Xds"; break; - default: - form = "(UnconditionalBranchToRegister)"; } - Format(instr, mnemonic, form); + + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitUnconditionalBranch(const Instruction *instr) { - const char *mnemonic = ""; - const char *form = "'TImmUncn"; - - switch (instr->Mask(UnconditionalBranchMask)) { - case B: - mnemonic = "b"; - break; - case BL: - mnemonic = "bl"; - break; - default: - VIXL_UNREACHABLE(); - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'TImmUncn"); } void Disassembler::VisitDataProcessing1Source(const Instruction *instr) { - const char *mnemonic = ""; const char *form = "'Rd, 'Rn"; - switch (instr->Mask(DataProcessing1SourceMask)) { -#define FORMAT(A, B) \ - case A##_w: \ - case A##_x: \ - mnemonic = B; \ - break; - FORMAT(RBIT, "rbit"); - FORMAT(REV16, "rev16"); - FORMAT(REV, "rev"); - FORMAT(CLZ, "clz"); - FORMAT(CLS, "cls"); -#undef FORMAT - -#define PAUTH_VARIATIONS(V) \ - V(PACI, "paci") \ - V(PACD, "pacd") \ - V(AUTI, "auti") \ - V(AUTD, "autd") -#define PAUTH_CASE(NAME, MN) \ - case NAME##A: \ - mnemonic = MN "a"; \ - form = "'Xd, 'Xns"; \ - break; \ - case NAME##ZA: \ - mnemonic = MN "za"; \ - form = "'Xd"; \ - break; \ - case NAME##B: \ - mnemonic = MN "b"; \ - form = "'Xd, 'Xns"; \ - break; \ - case NAME##ZB: \ - mnemonic = MN "zb"; \ - form = "'Xd"; \ - break; - - PAUTH_VARIATIONS(PAUTH_CASE) -#undef PAUTH_CASE - - case XPACI: - mnemonic = "xpaci"; - form = "'Xd"; - break; - case XPACD: - mnemonic = "xpacd"; + switch (form_hash_) { + case "pacia_64p_dp_1src"_h: + case "pacda_64p_dp_1src"_h: + case "autia_64p_dp_1src"_h: + case "autda_64p_dp_1src"_h: + case "pacib_64p_dp_1src"_h: + case "pacdb_64p_dp_1src"_h: + case "autib_64p_dp_1src"_h: + case "autdb_64p_dp_1src"_h: + form = "'Xd, 'Xns"; + break; + case "paciza_64z_dp_1src"_h: + case "pacdza_64z_dp_1src"_h: + case "autiza_64z_dp_1src"_h: + case "autdza_64z_dp_1src"_h: + case "pacizb_64z_dp_1src"_h: + case "pacdzb_64z_dp_1src"_h: + case "autizb_64z_dp_1src"_h: + case "autdzb_64z_dp_1src"_h: + case "xpacd_64z_dp_1src"_h: + case "xpaci_64z_dp_1src"_h: form = "'Xd"; break; - case REV32_x: - mnemonic = "rev32"; - break; - default: - VIXL_UNREACHABLE(); } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitDataProcessing2Source(const Instruction *instr) { - const char *mnemonic = "unimplemented"; + std::string mnemonic = mnemonic_; const char *form = "'Rd, 'Rn, 'Rm"; - const char *form_wwx = "'Wd, 'Wn, 'Xm"; - switch (instr->Mask(DataProcessing2SourceMask)) { -#define FORMAT(A, B) \ - case A##_w: \ - case A##_x: \ - mnemonic = B; \ - break; - FORMAT(UDIV, "udiv"); - FORMAT(SDIV, "sdiv"); - FORMAT(LSLV, "lsl"); - FORMAT(LSRV, "lsr"); - FORMAT(ASRV, "asr"); - FORMAT(RORV, "ror"); -#undef FORMAT - case PACGA: - mnemonic = "pacga"; + switch (form_hash_) { + case "asrv_32_dp_2src"_h: + case "asrv_64_dp_2src"_h: + case "lslv_32_dp_2src"_h: + case "lslv_64_dp_2src"_h: + case "lsrv_32_dp_2src"_h: + case "lsrv_64_dp_2src"_h: + case "rorv_32_dp_2src"_h: + case "rorv_64_dp_2src"_h: + // Drop the last 'v' character. + VIXL_ASSERT(mnemonic[3] == 'v'); + mnemonic.pop_back(); + break; + case "pacga_64p_dp_2src"_h: form = "'Xd, 'Xn, 'Xms"; break; - case CRC32B: - mnemonic = "crc32b"; - break; - case CRC32H: - mnemonic = "crc32h"; - break; - case CRC32W: - mnemonic = "crc32w"; - break; - case CRC32X: - mnemonic = "crc32x"; - form = form_wwx; - break; - case CRC32CB: - mnemonic = "crc32cb"; - break; - case CRC32CH: - mnemonic = "crc32ch"; + case "crc32x_64c_dp_2src"_h: + case "crc32cx_64c_dp_2src"_h: + form = "'Wd, 'Wn, 'Xm"; break; - case CRC32CW: - mnemonic = "crc32cw"; - break; - case CRC32CX: - mnemonic = "crc32cx"; - form = form_wwx; - break; - default: - form = "(DataProcessing2Source)"; } - Format(instr, mnemonic, form); + Format(instr, mnemonic.c_str(), form); } @@ -932,44 +1354,16 @@ void Disassembler::VisitDataProcessing3Source(const Instruction *instr) { void Disassembler::VisitCompareBranch(const Instruction *instr) { - const char *mnemonic = ""; - const char *form = "'Rt, 'TImmCmpa"; - - switch (instr->Mask(CompareBranchMask)) { - case CBZ_w: - case CBZ_x: - mnemonic = "cbz"; - break; - case CBNZ_w: - case CBNZ_x: - mnemonic = "cbnz"; - break; - default: - VIXL_UNREACHABLE(); - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Rt, 'TImmCmpa"); } void Disassembler::VisitTestBranch(const Instruction *instr) { - const char *mnemonic = ""; // If the top bit of the immediate is clear, the tested register is // disassembled as Wt, otherwise Xt. As the top bit of the immediate is // encoded in bit 31 of the instruction, we can reuse the Rt form, which // uses bit 31 (normally "sf") to choose the register size. - const char *form = "'Rt, 'It, 'TImmTest"; - - switch (instr->Mask(TestBranchMask)) { - case TBZ: - mnemonic = "tbz"; - break; - case TBNZ: - mnemonic = "tbnz"; - break; - default: - VIXL_UNREACHABLE(); - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Rt, 'It, 'TImmTest"); } @@ -1014,138 +1408,94 @@ void Disassembler::VisitMoveWideImmediate(const Instruction *instr) { } -#define LOAD_STORE_LIST(V) \ - V(STRB_w, "strb", "'Wt") \ - V(STRH_w, "strh", "'Wt") \ - V(STR_w, "str", "'Wt") \ - V(STR_x, "str", "'Xt") \ - V(LDRB_w, "ldrb", "'Wt") \ - V(LDRH_w, "ldrh", "'Wt") \ - V(LDR_w, "ldr", "'Wt") \ - V(LDR_x, "ldr", "'Xt") \ - V(LDRSB_x, "ldrsb", "'Xt") \ - V(LDRSH_x, "ldrsh", "'Xt") \ - V(LDRSW_x, "ldrsw", "'Xt") \ - V(LDRSB_w, "ldrsb", "'Wt") \ - V(LDRSH_w, "ldrsh", "'Wt") \ - V(STR_b, "str", "'Bt") \ - V(STR_h, "str", "'Ht") \ - V(STR_s, "str", "'St") \ - V(STR_d, "str", "'Dt") \ - V(LDR_b, "ldr", "'Bt") \ - V(LDR_h, "ldr", "'Ht") \ - V(LDR_s, "ldr", "'St") \ - V(LDR_d, "ldr", "'Dt") \ - V(STR_q, "str", "'Qt") \ - V(LDR_q, "ldr", "'Qt") +#define LOAD_STORE_LIST(V) \ + V(STRB_w, "'Wt") \ + V(STRH_w, "'Wt") \ + V(STR_w, "'Wt") \ + V(STR_x, "'Xt") \ + V(LDRB_w, "'Wt") \ + V(LDRH_w, "'Wt") \ + V(LDR_w, "'Wt") \ + V(LDR_x, "'Xt") \ + V(LDRSB_x, "'Xt") \ + V(LDRSH_x, "'Xt") \ + V(LDRSW_x, "'Xt") \ + V(LDRSB_w, "'Wt") \ + V(LDRSH_w, "'Wt") \ + V(STR_b, "'Bt") \ + V(STR_h, "'Ht") \ + V(STR_s, "'St") \ + V(STR_d, "'Dt") \ + V(LDR_b, "'Bt") \ + V(LDR_h, "'Ht") \ + V(LDR_s, "'St") \ + V(LDR_d, "'Dt") \ + V(STR_q, "'Qt") \ + V(LDR_q, "'Qt") void Disassembler::VisitLoadStorePreIndex(const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "(LoadStorePreIndex)"; + const char *suffix = ", ['Xns'ILSi]!"; switch (instr->Mask(LoadStorePreIndexMask)) { -#define LS_PREINDEX(A, B, C) \ - case A##_pre: \ - mnemonic = B; \ - form = C ", ['Xns'ILSi]!"; \ +#define LS_PREINDEX(A, B) \ + case A##_pre: \ + form = B; \ break; LOAD_STORE_LIST(LS_PREINDEX) #undef LS_PREINDEX } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitLoadStorePostIndex(const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "(LoadStorePostIndex)"; + const char *suffix = ", ['Xns]'ILSi"; switch (instr->Mask(LoadStorePostIndexMask)) { -#define LS_POSTINDEX(A, B, C) \ - case A##_post: \ - mnemonic = B; \ - form = C ", ['Xns]'ILSi"; \ +#define LS_POSTINDEX(A, B) \ + case A##_post: \ + form = B; \ break; LOAD_STORE_LIST(LS_POSTINDEX) #undef LS_POSTINDEX } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitLoadStoreUnsignedOffset(const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "(LoadStoreUnsignedOffset)"; + const char *suffix = ", ['Xns'ILU]"; switch (instr->Mask(LoadStoreUnsignedOffsetMask)) { -#define LS_UNSIGNEDOFFSET(A, B, C) \ - case A##_unsigned: \ - mnemonic = B; \ - form = C ", ['Xns'ILU]"; \ +#define LS_UNSIGNEDOFFSET(A, B) \ + case A##_unsigned: \ + form = B; \ break; LOAD_STORE_LIST(LS_UNSIGNEDOFFSET) #undef LS_UNSIGNEDOFFSET case PRFM_unsigned: - mnemonic = "prfm"; - form = "'prefOp, ['Xns'ILU]"; + form = "'prefOp"; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitLoadStoreRCpcUnscaledOffset(const Instruction *instr) { - const char *mnemonic; + const char *mnemonic = mnemonic_.c_str(); const char *form = "'Wt, ['Xns'ILS]"; const char *form_x = "'Xt, ['Xns'ILS]"; - switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) { - case STLURB: - mnemonic = "stlurb"; - break; - case LDAPURB: - mnemonic = "ldapurb"; - break; - case LDAPURSB_w: - mnemonic = "ldapursb"; - break; - case LDAPURSB_x: - mnemonic = "ldapursb"; - form = form_x; - break; - case STLURH: - mnemonic = "stlurh"; - break; - case LDAPURH: - mnemonic = "ldapurh"; - break; - case LDAPURSH_w: - mnemonic = "ldapursh"; - break; - case LDAPURSH_x: - mnemonic = "ldapursh"; - form = form_x; - break; - case STLUR_w: - mnemonic = "stlur"; - break; - case LDAPUR_w: - mnemonic = "ldapur"; - break; - case LDAPURSW: - mnemonic = "ldapursw"; + switch (form_hash_) { + case "ldapursb_64_ldapstl_unscaled"_h: + case "ldapursh_64_ldapstl_unscaled"_h: + case "ldapursw_64_ldapstl_unscaled"_h: + case "ldapur_64_ldapstl_unscaled"_h: + case "stlur_64_ldapstl_unscaled"_h: form = form_x; break; - case STLUR_x: - mnemonic = "stlur"; - form = form_x; - break; - case LDAPUR_x: - mnemonic = "ldapur"; - form = form_x; - break; - default: - mnemonic = "unimplemented"; - form = "(LoadStoreRCpcUnscaledOffset)"; } Format(instr, mnemonic, form); @@ -1153,365 +1503,233 @@ void Disassembler::VisitLoadStoreRCpcUnscaledOffset(const Instruction *instr) { void Disassembler::VisitLoadStoreRegisterOffset(const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "(LoadStoreRegisterOffset)"; + const char *suffix = ", ['Xns, 'Offsetreg]"; switch (instr->Mask(LoadStoreRegisterOffsetMask)) { -#define LS_REGISTEROFFSET(A, B, C) \ - case A##_reg: \ - mnemonic = B; \ - form = C ", ['Xns, 'Offsetreg]"; \ +#define LS_REGISTEROFFSET(A, B) \ + case A##_reg: \ + form = B; \ break; LOAD_STORE_LIST(LS_REGISTEROFFSET) #undef LS_REGISTEROFFSET case PRFM_reg: - mnemonic = "prfm"; - form = "'prefOp, ['Xns, 'Offsetreg]"; + form = "'prefOp"; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitLoadStoreUnscaledOffset(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Wt, ['Xns'ILS]"; - const char *form_x = "'Xt, ['Xns'ILS]"; - const char *form_b = "'Bt, ['Xns'ILS]"; - const char *form_h = "'Ht, ['Xns'ILS]"; - const char *form_s = "'St, ['Xns'ILS]"; - const char *form_d = "'Dt, ['Xns'ILS]"; - const char *form_q = "'Qt, ['Xns'ILS]"; - const char *form_prefetch = "'prefOp, ['Xns'ILS]"; + const char *form = "'Wt"; + const char *suffix = ", ['Xns'ILS]"; - switch (instr->Mask(LoadStoreUnscaledOffsetMask)) { - case STURB_w: - mnemonic = "sturb"; - break; - case STURH_w: - mnemonic = "sturh"; - break; - case STUR_w: - mnemonic = "stur"; - break; - case STUR_x: - mnemonic = "stur"; - form = form_x; - break; - case STUR_b: - mnemonic = "stur"; - form = form_b; - break; - case STUR_h: - mnemonic = "stur"; - form = form_h; - break; - case STUR_s: - mnemonic = "stur"; - form = form_s; - break; - case STUR_d: - mnemonic = "stur"; - form = form_d; - break; - case STUR_q: - mnemonic = "stur"; - form = form_q; - break; - case LDURB_w: - mnemonic = "ldurb"; - break; - case LDURH_w: - mnemonic = "ldurh"; - break; - case LDUR_w: - mnemonic = "ldur"; - break; - case LDUR_x: - mnemonic = "ldur"; - form = form_x; - break; - case LDUR_b: - mnemonic = "ldur"; - form = form_b; - break; - case LDUR_h: - mnemonic = "ldur"; - form = form_h; - break; - case LDUR_s: - mnemonic = "ldur"; - form = form_s; + switch (form_hash_) { + case "ldur_64_ldst_unscaled"_h: + case "ldursb_64_ldst_unscaled"_h: + case "ldursh_64_ldst_unscaled"_h: + case "ldursw_64_ldst_unscaled"_h: + case "stur_64_ldst_unscaled"_h: + form = "'Xt"; break; - case LDUR_d: - mnemonic = "ldur"; - form = form_d; + case "ldur_b_ldst_unscaled"_h: + case "stur_b_ldst_unscaled"_h: + form = "'Bt"; break; - case LDUR_q: - mnemonic = "ldur"; - form = form_q; + case "ldur_h_ldst_unscaled"_h: + case "stur_h_ldst_unscaled"_h: + form = "'Ht"; break; - case LDURSB_x: - form = form_x; - VIXL_FALLTHROUGH(); - case LDURSB_w: - mnemonic = "ldursb"; + case "ldur_s_ldst_unscaled"_h: + case "stur_s_ldst_unscaled"_h: + form = "'St"; break; - case LDURSH_x: - form = form_x; - VIXL_FALLTHROUGH(); - case LDURSH_w: - mnemonic = "ldursh"; + case "ldur_d_ldst_unscaled"_h: + case "stur_d_ldst_unscaled"_h: + form = "'Dt"; break; - case LDURSW_x: - mnemonic = "ldursw"; - form = form_x; + case "ldur_q_ldst_unscaled"_h: + case "stur_q_ldst_unscaled"_h: + form = "'Qt"; break; - case PRFUM: - mnemonic = "prfum"; - form = form_prefetch; + case "prfum_p_ldst_unscaled"_h: + form = "'prefOp"; break; - default: - form = "(LoadStoreUnscaledOffset)"; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitLoadLiteral(const Instruction *instr) { - const char *mnemonic = "ldr"; - const char *form = "(LoadLiteral)"; + const char *form = "'Wt"; + const char *suffix = ", 'ILLiteral 'LValue"; - switch (instr->Mask(LoadLiteralMask)) { - case LDR_w_lit: - form = "'Wt, 'ILLiteral 'LValue"; - break; - case LDR_x_lit: - form = "'Xt, 'ILLiteral 'LValue"; + switch (form_hash_) { + case "ldr_64_loadlit"_h: + case "ldrsw_64_loadlit"_h: + form = "'Xt"; break; - case LDR_s_lit: - form = "'St, 'ILLiteral 'LValue"; - break; - case LDR_d_lit: - form = "'Dt, 'ILLiteral 'LValue"; + case "ldr_s_loadlit"_h: + form = "'St"; break; - case LDR_q_lit: - form = "'Qt, 'ILLiteral 'LValue"; + case "ldr_d_loadlit"_h: + form = "'Dt"; break; - case LDRSW_x_lit: { - mnemonic = "ldrsw"; - form = "'Xt, 'ILLiteral 'LValue"; + case "ldr_q_loadlit"_h: + form = "'Qt"; break; - } - case PRFM_lit: { - mnemonic = "prfm"; - form = "'prefOp, 'ILLiteral 'LValue"; + case "prfm_p_loadlit"_h: + form = "'prefOp"; break; - } - default: - mnemonic = "unimplemented"; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } -#define LOAD_STORE_PAIR_LIST(V) \ - V(STP_w, "stp", "'Wt, 'Wt2", "2") \ - V(LDP_w, "ldp", "'Wt, 'Wt2", "2") \ - V(LDPSW_x, "ldpsw", "'Xt, 'Xt2", "2") \ - V(STP_x, "stp", "'Xt, 'Xt2", "3") \ - V(LDP_x, "ldp", "'Xt, 'Xt2", "3") \ - V(STP_s, "stp", "'St, 'St2", "2") \ - V(LDP_s, "ldp", "'St, 'St2", "2") \ - V(STP_d, "stp", "'Dt, 'Dt2", "3") \ - V(LDP_d, "ldp", "'Dt, 'Dt2", "3") \ - V(LDP_q, "ldp", "'Qt, 'Qt2", "4") \ - V(STP_q, "stp", "'Qt, 'Qt2", "4") +#define LOAD_STORE_PAIR_LIST(V) \ + V(STP_w, "'Wt, 'Wt2", "2") \ + V(LDP_w, "'Wt, 'Wt2", "2") \ + V(LDPSW_x, "'Xt, 'Xt2", "2") \ + V(STP_x, "'Xt, 'Xt2", "3") \ + V(LDP_x, "'Xt, 'Xt2", "3") \ + V(STP_s, "'St, 'St2", "2") \ + V(LDP_s, "'St, 'St2", "2") \ + V(STP_d, "'Dt, 'Dt2", "3") \ + V(LDP_d, "'Dt, 'Dt2", "3") \ + V(LDP_q, "'Qt, 'Qt2", "4") \ + V(STP_q, "'Qt, 'Qt2", "4") void Disassembler::VisitLoadStorePairPostIndex(const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "(LoadStorePairPostIndex)"; switch (instr->Mask(LoadStorePairPostIndexMask)) { -#define LSP_POSTINDEX(A, B, C, D) \ +#define LSP_POSTINDEX(A, B, C) \ case A##_post: \ - mnemonic = B; \ - form = C ", ['Xns]'ILP" D "i"; \ + form = B ", ['Xns]'ILP" C "i"; \ break; LOAD_STORE_PAIR_LIST(LSP_POSTINDEX) #undef LSP_POSTINDEX } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitLoadStorePairPreIndex(const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "(LoadStorePairPreIndex)"; switch (instr->Mask(LoadStorePairPreIndexMask)) { -#define LSP_PREINDEX(A, B, C, D) \ +#define LSP_PREINDEX(A, B, C) \ case A##_pre: \ - mnemonic = B; \ - form = C ", ['Xns'ILP" D "i]!"; \ + form = B ", ['Xns'ILP" C "i]!"; \ break; LOAD_STORE_PAIR_LIST(LSP_PREINDEX) #undef LSP_PREINDEX } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitLoadStorePairOffset(const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "(LoadStorePairOffset)"; switch (instr->Mask(LoadStorePairOffsetMask)) { -#define LSP_OFFSET(A, B, C, D) \ +#define LSP_OFFSET(A, B, C) \ case A##_off: \ - mnemonic = B; \ - form = C ", ['Xns'ILP" D "]"; \ + form = B ", ['Xns'ILP" C "]"; \ break; LOAD_STORE_PAIR_LIST(LSP_OFFSET) #undef LSP_OFFSET } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitLoadStorePairNonTemporal(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form; + const char *form = "'Wt, 'Wt2, ['Xns'ILP2]"; - switch (instr->Mask(LoadStorePairNonTemporalMask)) { - case STNP_w: - mnemonic = "stnp"; - form = "'Wt, 'Wt2, ['Xns'ILP2]"; - break; - case LDNP_w: - mnemonic = "ldnp"; - form = "'Wt, 'Wt2, ['Xns'ILP2]"; - break; - case STNP_x: - mnemonic = "stnp"; + switch (form_hash_) { + case "ldnp_64_ldstnapair_offs"_h: + case "stnp_64_ldstnapair_offs"_h: form = "'Xt, 'Xt2, ['Xns'ILP3]"; break; - case LDNP_x: - mnemonic = "ldnp"; - form = "'Xt, 'Xt2, ['Xns'ILP3]"; - break; - case STNP_s: - mnemonic = "stnp"; - form = "'St, 'St2, ['Xns'ILP2]"; - break; - case LDNP_s: - mnemonic = "ldnp"; + case "ldnp_s_ldstnapair_offs"_h: + case "stnp_s_ldstnapair_offs"_h: form = "'St, 'St2, ['Xns'ILP2]"; break; - case STNP_d: - mnemonic = "stnp"; - form = "'Dt, 'Dt2, ['Xns'ILP3]"; - break; - case LDNP_d: - mnemonic = "ldnp"; + case "ldnp_d_ldstnapair_offs"_h: + case "stnp_d_ldstnapair_offs"_h: form = "'Dt, 'Dt2, ['Xns'ILP3]"; break; - case STNP_q: - mnemonic = "stnp"; + case "ldnp_q_ldstnapair_offs"_h: + case "stnp_q_ldstnapair_offs"_h: form = "'Qt, 'Qt2, ['Xns'ILP4]"; break; - case LDNP_q: - mnemonic = "ldnp"; - form = "'Qt, 'Qt2, ['Xns'ILP4]"; - break; - default: - form = "(LoadStorePairNonTemporal)"; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } // clang-format off -#define LOAD_STORE_EXCLUSIVE_LIST(V) \ - V(STXRB_w, "stxrb", "'Ws, 'Wt") \ - V(STXRH_w, "stxrh", "'Ws, 'Wt") \ - V(STXR_w, "stxr", "'Ws, 'Wt") \ - V(STXR_x, "stxr", "'Ws, 'Xt") \ - V(LDXRB_w, "ldxrb", "'Wt") \ - V(LDXRH_w, "ldxrh", "'Wt") \ - V(LDXR_w, "ldxr", "'Wt") \ - V(LDXR_x, "ldxr", "'Xt") \ - V(STXP_w, "stxp", "'Ws, 'Wt, 'Wt2") \ - V(STXP_x, "stxp", "'Ws, 'Xt, 'Xt2") \ - V(LDXP_w, "ldxp", "'Wt, 'Wt2") \ - V(LDXP_x, "ldxp", "'Xt, 'Xt2") \ - V(STLXRB_w, "stlxrb", "'Ws, 'Wt") \ - V(STLXRH_w, "stlxrh", "'Ws, 'Wt") \ - V(STLXR_w, "stlxr", "'Ws, 'Wt") \ - V(STLXR_x, "stlxr", "'Ws, 'Xt") \ - V(LDAXRB_w, "ldaxrb", "'Wt") \ - V(LDAXRH_w, "ldaxrh", "'Wt") \ - V(LDAXR_w, "ldaxr", "'Wt") \ - V(LDAXR_x, "ldaxr", "'Xt") \ - V(STLXP_w, "stlxp", "'Ws, 'Wt, 'Wt2") \ - V(STLXP_x, "stlxp", "'Ws, 'Xt, 'Xt2") \ - V(LDAXP_w, "ldaxp", "'Wt, 'Wt2") \ - V(LDAXP_x, "ldaxp", "'Xt, 'Xt2") \ - V(STLRB_w, "stlrb", "'Wt") \ - V(STLRH_w, "stlrh", "'Wt") \ - V(STLR_w, "stlr", "'Wt") \ - V(STLR_x, "stlr", "'Xt") \ - V(LDARB_w, "ldarb", "'Wt") \ - V(LDARH_w, "ldarh", "'Wt") \ - V(LDAR_w, "ldar", "'Wt") \ - V(LDAR_x, "ldar", "'Xt") \ - V(STLLRB, "stllrb", "'Wt") \ - V(STLLRH, "stllrh", "'Wt") \ - V(STLLR_w, "stllr", "'Wt") \ - V(STLLR_x, "stllr", "'Xt") \ - V(LDLARB, "ldlarb", "'Wt") \ - V(LDLARH, "ldlarh", "'Wt") \ - V(LDLAR_w, "ldlar", "'Wt") \ - V(LDLAR_x, "ldlar", "'Xt") \ - V(CAS_w, "cas", "'Ws, 'Wt") \ - V(CAS_x, "cas", "'Xs, 'Xt") \ - V(CASA_w, "casa", "'Ws, 'Wt") \ - V(CASA_x, "casa", "'Xs, 'Xt") \ - V(CASL_w, "casl", "'Ws, 'Wt") \ - V(CASL_x, "casl", "'Xs, 'Xt") \ - V(CASAL_w, "casal", "'Ws, 'Wt") \ - V(CASAL_x, "casal", "'Xs, 'Xt") \ - V(CASB, "casb", "'Ws, 'Wt") \ - V(CASAB, "casab", "'Ws, 'Wt") \ - V(CASLB, "caslb", "'Ws, 'Wt") \ - V(CASALB, "casalb", "'Ws, 'Wt") \ - V(CASH, "cash", "'Ws, 'Wt") \ - V(CASAH, "casah", "'Ws, 'Wt") \ - V(CASLH, "caslh", "'Ws, 'Wt") \ - V(CASALH, "casalh", "'Ws, 'Wt") \ - V(CASP_w, "casp", "'Ws, 'Ws+, 'Wt, 'Wt+") \ - V(CASP_x, "casp", "'Xs, 'Xs+, 'Xt, 'Xt+") \ - V(CASPA_w, "caspa", "'Ws, 'Ws+, 'Wt, 'Wt+") \ - V(CASPA_x, "caspa", "'Xs, 'Xs+, 'Xt, 'Xt+") \ - V(CASPL_w, "caspl", "'Ws, 'Ws+, 'Wt, 'Wt+") \ - V(CASPL_x, "caspl", "'Xs, 'Xs+, 'Xt, 'Xt+") \ - V(CASPAL_w, "caspal", "'Ws, 'Ws+, 'Wt, 'Wt+") \ - V(CASPAL_x, "caspal", "'Xs, 'Xs+, 'Xt, 'Xt+") +#define LOAD_STORE_EXCLUSIVE_LIST(V) \ + V(STXRB_w, "'Ws, 'Wt") \ + V(STXRH_w, "'Ws, 'Wt") \ + V(STXR_w, "'Ws, 'Wt") \ + V(STXR_x, "'Ws, 'Xt") \ + V(LDXR_x, "'Xt") \ + V(STXP_w, "'Ws, 'Wt, 'Wt2") \ + V(STXP_x, "'Ws, 'Xt, 'Xt2") \ + V(LDXP_w, "'Wt, 'Wt2") \ + V(LDXP_x, "'Xt, 'Xt2") \ + V(STLXRB_w, "'Ws, 'Wt") \ + V(STLXRH_w, "'Ws, 'Wt") \ + V(STLXR_w, "'Ws, 'Wt") \ + V(STLXR_x, "'Ws, 'Xt") \ + V(LDAXR_x, "'Xt") \ + V(STLXP_w, "'Ws, 'Wt, 'Wt2") \ + V(STLXP_x, "'Ws, 'Xt, 'Xt2") \ + V(LDAXP_w, "'Wt, 'Wt2") \ + V(LDAXP_x, "'Xt, 'Xt2") \ + V(STLR_x, "'Xt") \ + V(LDAR_x, "'Xt") \ + V(STLLR_x, "'Xt") \ + V(LDLAR_x, "'Xt") \ + V(CAS_w, "'Ws, 'Wt") \ + V(CAS_x, "'Xs, 'Xt") \ + V(CASA_w, "'Ws, 'Wt") \ + V(CASA_x, "'Xs, 'Xt") \ + V(CASL_w, "'Ws, 'Wt") \ + V(CASL_x, "'Xs, 'Xt") \ + V(CASAL_w, "'Ws, 'Wt") \ + V(CASAL_x, "'Xs, 'Xt") \ + V(CASB, "'Ws, 'Wt") \ + V(CASAB, "'Ws, 'Wt") \ + V(CASLB, "'Ws, 'Wt") \ + V(CASALB, "'Ws, 'Wt") \ + V(CASH, "'Ws, 'Wt") \ + V(CASAH, "'Ws, 'Wt") \ + V(CASLH, "'Ws, 'Wt") \ + V(CASALH, "'Ws, 'Wt") \ + V(CASP_w, "'Ws, 'Ws+, 'Wt, 'Wt+") \ + V(CASP_x, "'Xs, 'Xs+, 'Xt, 'Xt+") \ + V(CASPA_w, "'Ws, 'Ws+, 'Wt, 'Wt+") \ + V(CASPA_x, "'Xs, 'Xs+, 'Xt, 'Xt+") \ + V(CASPL_w, "'Ws, 'Ws+, 'Wt, 'Wt+") \ + V(CASPL_x, "'Xs, 'Xs+, 'Xt, 'Xt+") \ + V(CASPAL_w, "'Ws, 'Ws+, 'Wt, 'Wt+") \ + V(CASPAL_x, "'Xs, 'Xs+, 'Xt, 'Xt+") // clang-format on void Disassembler::VisitLoadStoreExclusive(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form; + const char *form = "'Wt"; + const char *suffix = ", ['Xns]"; switch (instr->Mask(LoadStoreExclusiveMask)) { -#define LSX(A, B, C) \ - case A: \ - mnemonic = B; \ - form = C ", ['Xns]"; \ +#define LSX(A, B) \ + case A: \ + form = B; \ break; LOAD_STORE_EXCLUSIVE_LIST(LSX) #undef LSX - default: - form = "(LoadStoreExclusive)"; } switch (instr->Mask(LoadStoreExclusiveMask)) { @@ -1524,767 +1742,264 @@ void Disassembler::VisitLoadStoreExclusive(const Instruction *instr) { case CASPAL_w: case CASPAL_x: if ((instr->GetRs() % 2 == 1) || (instr->GetRt() % 2 == 1)) { - mnemonic = "unallocated"; - form = "(LoadStoreExclusive)"; + VisitUnallocated(instr); + return; } break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitLoadStorePAC(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(LoadStorePAC)"; - - switch (instr->Mask(LoadStorePACMask)) { - case LDRAA: - mnemonic = "ldraa"; - form = "'Xt, ['Xns'ILA]"; - break; - case LDRAB: - mnemonic = "ldrab"; - form = "'Xt, ['Xns'ILA]"; - break; - case LDRAA_pre: - mnemonic = "ldraa"; - form = "'Xt, ['Xns'ILA]!"; - break; - case LDRAB_pre: - mnemonic = "ldrab"; - form = "'Xt, ['Xns'ILA]!"; + const char *form = "'Xt, ['Xns'ILA]"; + const char *suffix = ""; + switch (form_hash_) { + case "ldraa_64w_ldst_pac"_h: + case "ldrab_64w_ldst_pac"_h: + suffix = "!"; break; } - - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } -#define ATOMIC_MEMORY_SIMPLE_LIST(V) \ - V(LDADD, "add") \ - V(LDCLR, "clr") \ - V(LDEOR, "eor") \ - V(LDSET, "set") \ - V(LDSMAX, "smax") \ - V(LDSMIN, "smin") \ - V(LDUMAX, "umax") \ - V(LDUMIN, "umin") - void Disassembler::VisitAtomicMemory(const Instruction *instr) { - const int kMaxAtomicOpMnemonicLength = 16; - const char *mnemonic; - const char *form = "'Ws, 'Wt, ['Xns]"; - - switch (instr->Mask(AtomicMemoryMask)) { -#define AMS(A, MN) \ - case A##B: \ - mnemonic = MN "b"; \ - break; \ - case A##AB: \ - mnemonic = MN "ab"; \ - break; \ - case A##LB: \ - mnemonic = MN "lb"; \ - break; \ - case A##ALB: \ - mnemonic = MN "alb"; \ - break; \ - case A##H: \ - mnemonic = MN "h"; \ - break; \ - case A##AH: \ - mnemonic = MN "ah"; \ - break; \ - case A##LH: \ - mnemonic = MN "lh"; \ - break; \ - case A##ALH: \ - mnemonic = MN "alh"; \ - break; \ - case A##_w: \ - mnemonic = MN; \ - break; \ - case A##A_w: \ - mnemonic = MN "a"; \ - break; \ - case A##L_w: \ - mnemonic = MN "l"; \ - break; \ - case A##AL_w: \ - mnemonic = MN "al"; \ - break; \ - case A##_x: \ - mnemonic = MN; \ - form = "'Xs, 'Xt, ['Xns]"; \ - break; \ - case A##A_x: \ - mnemonic = MN "a"; \ - form = "'Xs, 'Xt, ['Xns]"; \ - break; \ - case A##L_x: \ - mnemonic = MN "l"; \ - form = "'Xs, 'Xt, ['Xns]"; \ - break; \ - case A##AL_x: \ - mnemonic = MN "al"; \ - form = "'Xs, 'Xt, ['Xns]"; \ - break; - ATOMIC_MEMORY_SIMPLE_LIST(AMS) + bool is_x = (instr->ExtractBits(31, 30) == 3); + const char *form = is_x ? "'Xs, 'Xt" : "'Ws, 'Wt"; + const char *suffix = ", ['Xns]"; - // SWP has the same semantics as ldadd etc but without the store aliases. - AMS(SWP, "swp") -#undef AMS + std::string mnemonic = mnemonic_; - case LDAPRB: - mnemonic = "ldaprb"; - form = "'Wt, ['Xns]"; - break; - case LDAPRH: - mnemonic = "ldaprh"; - form = "'Wt, ['Xns]"; + switch (form_hash_) { + case "ldaprb_32l_memop"_h: + case "ldaprh_32l_memop"_h: + case "ldapr_32l_memop"_h: + form = "'Wt"; break; - case LDAPR_w: - mnemonic = "ldapr"; - form = "'Wt, ['Xns]"; - break; - case LDAPR_x: - mnemonic = "ldapr"; - form = "'Xt, ['Xns]"; + case "ldapr_64l_memop"_h: + form = "'Xt"; break; default: - mnemonic = "unimplemented"; - form = "(AtomicMemory)"; - } - - const char *prefix = ""; - switch (instr->Mask(AtomicMemoryMask)) { -#define AMS(A, MN) \ - case A##AB: \ - case A##ALB: \ - case A##AH: \ - case A##ALH: \ - case A##A_w: \ - case A##AL_w: \ - case A##A_x: \ - case A##AL_x: \ - prefix = "ld"; \ - break; \ - case A##B: \ - case A##LB: \ - case A##H: \ - case A##LH: \ - case A##_w: \ - case A##L_w: { \ - prefix = "ld"; \ - unsigned rt = instr->GetRt(); \ - if (Register(rt, 32).IsZero()) { \ - prefix = "st"; \ - form = "'Ws, ['Xns]"; \ - } \ - break; \ - } \ - case A##_x: \ - case A##L_x: { \ - prefix = "ld"; \ - unsigned rt = instr->GetRt(); \ - if (Register(rt, 64).IsZero()) { \ - prefix = "st"; \ - form = "'Xs, ['Xns]"; \ - } \ - break; \ - } - ATOMIC_MEMORY_SIMPLE_LIST(AMS) -#undef AMS - } - - char buffer[kMaxAtomicOpMnemonicLength]; - if (strlen(prefix) > 0) { - snprintf(buffer, kMaxAtomicOpMnemonicLength, "%s%s", prefix, mnemonic); - mnemonic = buffer; + // Zero register implies a store instruction. + if (instr->GetRt() == kZeroRegCode) { + mnemonic.replace(0, 2, "st"); + form = is_x ? "'Xs" : "'Ws"; + } } - - Format(instr, mnemonic, form); + Format(instr, mnemonic.c_str(), form, suffix); } void Disassembler::VisitFPCompare(const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "'Fn, 'Fm"; - const char *form_zero = "'Fn, #0.0"; - - switch (instr->Mask(FPCompareMask)) { - case FCMP_h_zero: - case FCMP_s_zero: - case FCMP_d_zero: - form = form_zero; - VIXL_FALLTHROUGH(); - case FCMP_h: - case FCMP_s: - case FCMP_d: - mnemonic = "fcmp"; - break; - case FCMPE_h_zero: - case FCMPE_s_zero: - case FCMPE_d_zero: - form = form_zero; - VIXL_FALLTHROUGH(); - case FCMPE_h: - case FCMPE_s: - case FCMPE_d: - mnemonic = "fcmpe"; - break; - default: - form = "(FPCompare)"; + switch (form_hash_) { + case "fcmpe_dz_floatcmp"_h: + case "fcmpe_hz_floatcmp"_h: + case "fcmpe_sz_floatcmp"_h: + case "fcmp_dz_floatcmp"_h: + case "fcmp_hz_floatcmp"_h: + case "fcmp_sz_floatcmp"_h: + form = "'Fn, #0.0"; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitFPConditionalCompare(const Instruction *instr) { - const char *mnemonic = "unmplemented"; - const char *form = "'Fn, 'Fm, 'INzcv, 'Cond"; - - switch (instr->Mask(FPConditionalCompareMask)) { - case FCCMP_h: - case FCCMP_s: - case FCCMP_d: - mnemonic = "fccmp"; - break; - case FCCMPE_h: - case FCCMPE_s: - case FCCMPE_d: - mnemonic = "fccmpe"; - break; - default: - form = "(FPConditionalCompare)"; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Fn, 'Fm, 'INzcv, 'Cond"); } void Disassembler::VisitFPConditionalSelect(const Instruction *instr) { - const char *mnemonic = ""; - const char *form = "'Fd, 'Fn, 'Fm, 'Cond"; - - switch (instr->Mask(FPConditionalSelectMask)) { - case FCSEL_h: - case FCSEL_s: - case FCSEL_d: - mnemonic = "fcsel"; - break; - default: - VIXL_UNREACHABLE(); - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Fd, 'Fn, 'Fm, 'Cond"); } void Disassembler::VisitFPDataProcessing1Source(const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "'Fd, 'Fn"; - - switch (instr->Mask(FPDataProcessing1SourceMask)) { -#define FORMAT(A, B) \ - case A##_h: \ - case A##_s: \ - case A##_d: \ - mnemonic = B; \ - break; - FORMAT(FMOV, "fmov"); - FORMAT(FABS, "fabs"); - FORMAT(FNEG, "fneg"); - FORMAT(FSQRT, "fsqrt"); - FORMAT(FRINTN, "frintn"); - FORMAT(FRINTP, "frintp"); - FORMAT(FRINTM, "frintm"); - FORMAT(FRINTZ, "frintz"); - FORMAT(FRINTA, "frinta"); - FORMAT(FRINTX, "frintx"); - FORMAT(FRINTI, "frinti"); -#undef FORMAT -#define FORMAT(A, B) \ - case A##_s: \ - case A##_d: \ - mnemonic = B; \ - break; - FORMAT(FRINT32X, "frint32x"); - FORMAT(FRINT32Z, "frint32z"); - FORMAT(FRINT64X, "frint64x"); - FORMAT(FRINT64Z, "frint64z"); -#undef FORMAT - case FCVT_ds: - mnemonic = "fcvt"; + switch (form_hash_) { + case "fcvt_ds_floatdp1"_h: form = "'Dd, 'Sn"; break; - case FCVT_sd: - mnemonic = "fcvt"; + case "fcvt_sd_floatdp1"_h: form = "'Sd, 'Dn"; break; - case FCVT_hs: - mnemonic = "fcvt"; + case "fcvt_hs_floatdp1"_h: form = "'Hd, 'Sn"; break; - case FCVT_sh: - mnemonic = "fcvt"; + case "fcvt_sh_floatdp1"_h: form = "'Sd, 'Hn"; break; - case FCVT_dh: - mnemonic = "fcvt"; + case "fcvt_dh_floatdp1"_h: form = "'Dd, 'Hn"; break; - case FCVT_hd: - mnemonic = "fcvt"; + case "fcvt_hd_floatdp1"_h: form = "'Hd, 'Dn"; break; - default: - form = "(FPDataProcessing1Source)"; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitFPDataProcessing2Source(const Instruction *instr) { - const char *mnemonic = ""; - const char *form = "'Fd, 'Fn, 'Fm"; - - switch (instr->Mask(FPDataProcessing2SourceMask)) { -#define FORMAT(A, B) \ - case A##_h: \ - case A##_s: \ - case A##_d: \ - mnemonic = B; \ - break; - FORMAT(FADD, "fadd"); - FORMAT(FSUB, "fsub"); - FORMAT(FMUL, "fmul"); - FORMAT(FDIV, "fdiv"); - FORMAT(FMAX, "fmax"); - FORMAT(FMIN, "fmin"); - FORMAT(FMAXNM, "fmaxnm"); - FORMAT(FMINNM, "fminnm"); - FORMAT(FNMUL, "fnmul"); -#undef FORMAT - default: - VIXL_UNREACHABLE(); - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Fd, 'Fn, 'Fm"); } void Disassembler::VisitFPDataProcessing3Source(const Instruction *instr) { - const char *mnemonic = ""; - const char *form = "'Fd, 'Fn, 'Fm, 'Fa"; - - switch (instr->Mask(FPDataProcessing3SourceMask)) { -#define FORMAT(A, B) \ - case A##_h: \ - case A##_s: \ - case A##_d: \ - mnemonic = B; \ - break; - FORMAT(FMADD, "fmadd"); - FORMAT(FMSUB, "fmsub"); - FORMAT(FNMADD, "fnmadd"); - FORMAT(FNMSUB, "fnmsub"); -#undef FORMAT - default: - VIXL_UNREACHABLE(); - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Fd, 'Fn, 'Fm, 'Fa"); } void Disassembler::VisitFPImmediate(const Instruction *instr) { - const char *mnemonic = ""; - const char *form = "(FPImmediate)"; - switch (instr->Mask(FPImmediateMask)) { - case FMOV_h_imm: - mnemonic = "fmov"; - form = "'Hd, 'IFP"; + const char *form = "'Hd"; + const char *suffix = ", 'IFP"; + switch (form_hash_) { + case "fmov_s_floatimm"_h: + form = "'Sd"; break; - case FMOV_s_imm: - mnemonic = "fmov"; - form = "'Sd, 'IFP"; + case "fmov_d_floatimm"_h: + form = "'Dd"; break; - case FMOV_d_imm: - mnemonic = "fmov"; - form = "'Dd, 'IFP"; - break; - default: - VIXL_UNREACHABLE(); } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitFPIntegerConvert(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(FPIntegerConvert)"; - const char *form_rf = "'Rd, 'Fn"; - const char *form_fr = "'Fd, 'Rn"; - - switch (instr->Mask(FPIntegerConvertMask)) { - case FMOV_wh: - case FMOV_xh: - case FMOV_ws: - case FMOV_xd: - mnemonic = "fmov"; - form = form_rf; - break; - case FMOV_hw: - case FMOV_hx: - case FMOV_sw: - case FMOV_dx: - mnemonic = "fmov"; - form = form_fr; - break; - case FMOV_d1_x: - mnemonic = "fmov"; + const char *form = "'Rd, 'Fn"; + switch (form_hash_) { + case "fmov_h32_float2int"_h: + case "fmov_h64_float2int"_h: + case "fmov_s32_float2int"_h: + case "fmov_d64_float2int"_h: + case "scvtf_d32_float2int"_h: + case "scvtf_d64_float2int"_h: + case "scvtf_h32_float2int"_h: + case "scvtf_h64_float2int"_h: + case "scvtf_s32_float2int"_h: + case "scvtf_s64_float2int"_h: + case "ucvtf_d32_float2int"_h: + case "ucvtf_d64_float2int"_h: + case "ucvtf_h32_float2int"_h: + case "ucvtf_h64_float2int"_h: + case "ucvtf_s32_float2int"_h: + case "ucvtf_s64_float2int"_h: + form = "'Fd, 'Rn"; + break; + case "fmov_v64i_float2int"_h: form = "'Vd.D[1], 'Rn"; break; - case FMOV_x_d1: - mnemonic = "fmov"; + case "fmov_64vx_float2int"_h: form = "'Rd, 'Vn.D[1]"; break; - case FCVTAS_wh: - case FCVTAS_xh: - case FCVTAS_ws: - case FCVTAS_xs: - case FCVTAS_wd: - case FCVTAS_xd: - mnemonic = "fcvtas"; - form = form_rf; - break; - case FCVTAU_wh: - case FCVTAU_xh: - case FCVTAU_ws: - case FCVTAU_xs: - case FCVTAU_wd: - case FCVTAU_xd: - mnemonic = "fcvtau"; - form = form_rf; - break; - case FCVTMS_wh: - case FCVTMS_xh: - case FCVTMS_ws: - case FCVTMS_xs: - case FCVTMS_wd: - case FCVTMS_xd: - mnemonic = "fcvtms"; - form = form_rf; - break; - case FCVTMU_wh: - case FCVTMU_xh: - case FCVTMU_ws: - case FCVTMU_xs: - case FCVTMU_wd: - case FCVTMU_xd: - mnemonic = "fcvtmu"; - form = form_rf; - break; - case FCVTNS_wh: - case FCVTNS_xh: - case FCVTNS_ws: - case FCVTNS_xs: - case FCVTNS_wd: - case FCVTNS_xd: - mnemonic = "fcvtns"; - form = form_rf; - break; - case FCVTNU_wh: - case FCVTNU_xh: - case FCVTNU_ws: - case FCVTNU_xs: - case FCVTNU_wd: - case FCVTNU_xd: - mnemonic = "fcvtnu"; - form = form_rf; - break; - case FCVTZU_wh: - case FCVTZU_xh: - case FCVTZU_ws: - case FCVTZU_xs: - case FCVTZU_wd: - case FCVTZU_xd: - mnemonic = "fcvtzu"; - form = form_rf; - break; - case FCVTZS_wh: - case FCVTZS_xh: - case FCVTZS_ws: - case FCVTZS_xs: - case FCVTZS_wd: - case FCVTZS_xd: - mnemonic = "fcvtzs"; - form = form_rf; - break; - case FCVTPU_wh: - case FCVTPU_xh: - case FCVTPU_xs: - case FCVTPU_wd: - case FCVTPU_ws: - case FCVTPU_xd: - mnemonic = "fcvtpu"; - form = form_rf; - break; - case FCVTPS_wh: - case FCVTPS_xh: - case FCVTPS_ws: - case FCVTPS_xs: - case FCVTPS_wd: - case FCVTPS_xd: - mnemonic = "fcvtps"; - form = form_rf; - break; - case SCVTF_hw: - case SCVTF_hx: - case SCVTF_sw: - case SCVTF_sx: - case SCVTF_dw: - case SCVTF_dx: - mnemonic = "scvtf"; - form = form_fr; - break; - case UCVTF_hw: - case UCVTF_hx: - case UCVTF_sw: - case UCVTF_sx: - case UCVTF_dw: - case UCVTF_dx: - mnemonic = "ucvtf"; - form = form_fr; - break; - case FJCVTZS: - mnemonic = "fjcvtzs"; - form = form_rf; - break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitFPFixedPointConvert(const Instruction *instr) { - const char *mnemonic = ""; - const char *form = "'Rd, 'Fn, 'IFPFBits"; - const char *form_fr = "'Fd, 'Rn, 'IFPFBits"; - - switch (instr->Mask(FPFixedPointConvertMask)) { - case FCVTZS_wh_fixed: - case FCVTZS_xh_fixed: - case FCVTZS_ws_fixed: - case FCVTZS_xs_fixed: - case FCVTZS_wd_fixed: - case FCVTZS_xd_fixed: - mnemonic = "fcvtzs"; - break; - case FCVTZU_wh_fixed: - case FCVTZU_xh_fixed: - case FCVTZU_ws_fixed: - case FCVTZU_xs_fixed: - case FCVTZU_wd_fixed: - case FCVTZU_xd_fixed: - mnemonic = "fcvtzu"; - break; - case SCVTF_hw_fixed: - case SCVTF_hx_fixed: - case SCVTF_sw_fixed: - case SCVTF_sx_fixed: - case SCVTF_dw_fixed: - case SCVTF_dx_fixed: - mnemonic = "scvtf"; - form = form_fr; - break; - case UCVTF_hw_fixed: - case UCVTF_hx_fixed: - case UCVTF_sw_fixed: - case UCVTF_sx_fixed: - case UCVTF_dw_fixed: - case UCVTF_dx_fixed: - mnemonic = "ucvtf"; - form = form_fr; + const char *form = "'Rd, 'Fn"; + const char *suffix = ", 'IFPFBits"; + + switch (form_hash_) { + case "scvtf_d32_float2fix"_h: + case "scvtf_d64_float2fix"_h: + case "scvtf_h32_float2fix"_h: + case "scvtf_h64_float2fix"_h: + case "scvtf_s32_float2fix"_h: + case "scvtf_s64_float2fix"_h: + case "ucvtf_d32_float2fix"_h: + case "ucvtf_d64_float2fix"_h: + case "ucvtf_h32_float2fix"_h: + case "ucvtf_h64_float2fix"_h: + case "ucvtf_s32_float2fix"_h: + case "ucvtf_s64_float2fix"_h: + form = "'Fd, 'Rn"; break; - default: - VIXL_UNREACHABLE(); } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } -// clang-format off -#define PAUTH_SYSTEM_MNEMONICS(V) \ - V(PACIA1716, "pacia1716") \ - V(PACIB1716, "pacib1716") \ - V(AUTIA1716, "autia1716") \ - V(AUTIB1716, "autib1716") \ - V(PACIAZ, "paciaz") \ - V(PACIASP, "paciasp") \ - V(PACIBZ, "pacibz") \ - V(PACIBSP, "pacibsp") \ - V(AUTIAZ, "autiaz") \ - V(AUTIASP, "autiasp") \ - V(AUTIBZ, "autibz") \ - V(AUTIBSP, "autibsp") -// clang-format on +void Disassembler::DisassembleNoArgs(const Instruction *instr) { + Format(instr, mnemonic_.c_str(), ""); +} void Disassembler::VisitSystem(const Instruction *instr) { - // Some system instructions hijack their Op and Cp fields to represent a - // range of immediates instead of indicating a different instruction. This - // makes the decoding tricky. - const char *mnemonic = "unimplemented"; + const char *mnemonic = mnemonic_.c_str(); const char *form = "(System)"; - if (instr->GetInstructionBits() == XPACLRI) { - mnemonic = "xpaclri"; - form = NULL; - } else if (instr->Mask(SystemPStateFMask) == SystemPStateFixed) { - switch (instr->Mask(SystemPStateMask)) { - case CFINV: - mnemonic = "cfinv"; - form = NULL; - break; - case AXFLAG: - mnemonic = "axflag"; - form = NULL; - break; - case XAFLAG: - mnemonic = "xaflag"; - form = NULL; - break; - } - } else if (instr->Mask(SystemPAuthFMask) == SystemPAuthFixed) { - switch (instr->Mask(SystemPAuthMask)) { -#define PAUTH_CASE(NAME, MN) \ - case NAME: \ - mnemonic = MN; \ - form = NULL; \ - break; + const char *suffix = NULL; - PAUTH_SYSTEM_MNEMONICS(PAUTH_CASE) -#undef PAUTH_CASE - } - } else if (instr->Mask(SystemExclusiveMonitorFMask) == - SystemExclusiveMonitorFixed) { - switch (instr->Mask(SystemExclusiveMonitorMask)) { - case CLREX: { - mnemonic = "clrex"; - form = (instr->GetCRm() == 0xf) ? NULL : "'IX"; - break; - } - } - } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) { - switch (instr->Mask(SystemSysRegMask)) { - case MRS: { - mnemonic = "mrs"; - form = "'Xt, 'IY"; - break; - } - case MSR: { - mnemonic = "msr"; - form = "'IY, 'Xt"; - break; - } - } - } else if (instr->Mask(SystemHintFMask) == SystemHintFixed) { - form = NULL; - switch (instr->GetImmHint()) { - case NOP: - mnemonic = "nop"; - break; - case YIELD: - mnemonic = "yield"; - break; - case WFE: - mnemonic = "wfe"; - break; - case WFI: - mnemonic = "wfi"; - break; - case SEV: - mnemonic = "sev"; - break; - case SEVL: - mnemonic = "sevl"; - break; - case ESB: - mnemonic = "esb"; - break; - case CSDB: - mnemonic = "csdb"; - break; - case BTI: - mnemonic = "bti"; - break; - case BTI_c: - mnemonic = "bti c"; - break; - case BTI_j: - mnemonic = "bti j"; - break; - case BTI_jc: - mnemonic = "bti jc"; - break; - default: - // Fall back to 'hint #<imm7>'. - form = "'IH"; - mnemonic = "hint"; - break; - } - } else if (instr->Mask(MemBarrierFMask) == MemBarrierFixed) { - switch (instr->Mask(MemBarrierMask)) { - case DMB: { - mnemonic = "dmb"; - form = "'M"; - break; - } - case DSB: { - mnemonic = "dsb"; - form = "'M"; - break; - } - case ISB: { - mnemonic = "isb"; - form = NULL; - break; + switch (form_hash_) { + case "clrex_bn_barriers"_h: + form = (instr->GetCRm() == 0xf) ? "" : "'IX"; + break; + case "mrs_rs_systemmove"_h: + form = "'Xt, 'IY"; + break; + case "msr_si_pstate"_h: + case "msr_sr_systemmove"_h: + form = "'IY, 'Xt"; + break; + case "bti_hb_hints"_h: + switch (instr->ExtractBits(7, 6)) { + case 0: + form = ""; + break; + case 1: + form = "c"; + break; + case 2: + form = "j"; + break; + case 3: + form = "jc"; + break; } - } - } else if (instr->Mask(SystemSysFMask) == SystemSysFixed) { - switch (instr->GetSysOp()) { - case IVAU: - mnemonic = "ic"; - form = "ivau, 'Xt"; - break; - case CVAC: - mnemonic = "dc"; - form = "cvac, 'Xt"; - break; - case CVAU: - mnemonic = "dc"; - form = "cvau, 'Xt"; - break; - case CVAP: - mnemonic = "dc"; - form = "cvap, 'Xt"; - break; - case CVADP: - mnemonic = "dc"; - form = "cvadp, 'Xt"; - break; - case CIVAC: - mnemonic = "dc"; - form = "civac, 'Xt"; - break; - case ZVA: - mnemonic = "dc"; - form = "zva, 'Xt"; - break; - default: - mnemonic = "sys"; - if (instr->GetRt() == 31) { + break; + case "hint_hm_hints"_h: + form = "'IH"; + break; + case "dmb_bo_barriers"_h: + case "dsb_bo_barriers"_h: + form = "'M"; + break; + case "sys_cr_systeminstrs"_h: + mnemonic = "dc"; + suffix = ", 'Xt"; + switch (instr->GetSysOp()) { + case IVAU: + mnemonic = "ic"; + form = "ivau"; + break; + case CVAC: + form = "cvac"; + break; + case CVAU: + form = "cvau"; + break; + case CVAP: + form = "cvap"; + break; + case CVADP: + form = "cvadp"; + break; + case CIVAC: + form = "civac"; + break; + case ZVA: + form = "zva"; + break; + default: + mnemonic = "sys"; form = "'G1, 'Kn, 'Km, 'G2"; - } else { - form = "'G1, 'Kn, 'Km, 'G2, 'Xt"; - } - break; - } + if (instr->GetRt() == 31) { + suffix = NULL; + } + break; + } } - Format(instr, mnemonic, form); + Format(instr, mnemonic, form, suffix); } @@ -2341,1098 +2056,455 @@ void Disassembler::VisitCryptoAES(const Instruction *instr) { VisitUnimplemented(instr); } - -void Disassembler::VisitNEON2RegMisc(const Instruction *instr) { - const char *mnemonic = "unimplemented"; +void Disassembler::DisassembleNEON2RegAddlp(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); const char *form = "'Vd.%s, 'Vn.%s"; - const char *form_cmp_zero = "'Vd.%s, 'Vn.%s, #0"; - const char *form_fcmp_zero = "'Vd.%s, 'Vn.%s, #0.0"; - NEONFormatDecoder nfd(instr); static const NEONFormatMap map_lp_ta = {{23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}}; + NEONFormatDecoder nfd(instr); + nfd.SetFormatMap(0, &map_lp_ta); + Format(instr, mnemonic, nfd.Substitute(form)); +} +void Disassembler::DisassembleNEON2RegCompare(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, #0"; + NEONFormatDecoder nfd(instr); + Format(instr, mnemonic, nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEON2RegFPCompare(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, #0.0"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPFormatMap()); + Format(instr, mnemonic, nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEON2RegFPConvert(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s"; static const NEONFormatMap map_cvt_ta = {{22}, {NF_4S, NF_2D}}; static const NEONFormatMap map_cvt_tb = {{22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S}}; + NEONFormatDecoder nfd(instr, &map_cvt_tb, &map_cvt_ta); - if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_opcode) { - // These instructions all use a two bit size field, except NOT and RBIT, - // which use the field to encode the operation. - switch (instr->Mask(NEON2RegMiscMask)) { - case NEON_REV64: - mnemonic = "rev64"; - break; - case NEON_REV32: - mnemonic = "rev32"; - break; - case NEON_REV16: - mnemonic = "rev16"; - break; - case NEON_SADDLP: - mnemonic = "saddlp"; - nfd.SetFormatMap(0, &map_lp_ta); - break; - case NEON_UADDLP: - mnemonic = "uaddlp"; - nfd.SetFormatMap(0, &map_lp_ta); - break; - case NEON_SUQADD: - mnemonic = "suqadd"; - break; - case NEON_USQADD: - mnemonic = "usqadd"; - break; - case NEON_CLS: - mnemonic = "cls"; - break; - case NEON_CLZ: - mnemonic = "clz"; - break; - case NEON_CNT: - mnemonic = "cnt"; - break; - case NEON_SADALP: - mnemonic = "sadalp"; - nfd.SetFormatMap(0, &map_lp_ta); - break; - case NEON_UADALP: - mnemonic = "uadalp"; - nfd.SetFormatMap(0, &map_lp_ta); - break; - case NEON_SQABS: - mnemonic = "sqabs"; - break; - case NEON_SQNEG: - mnemonic = "sqneg"; - break; - case NEON_CMGT_zero: - mnemonic = "cmgt"; - form = form_cmp_zero; - break; - case NEON_CMGE_zero: - mnemonic = "cmge"; - form = form_cmp_zero; - break; - case NEON_CMEQ_zero: - mnemonic = "cmeq"; - form = form_cmp_zero; - break; - case NEON_CMLE_zero: - mnemonic = "cmle"; - form = form_cmp_zero; - break; - case NEON_CMLT_zero: - mnemonic = "cmlt"; - form = form_cmp_zero; + VectorFormat vform_dst = nfd.GetVectorFormat(0); + switch (form_hash_) { + case "fcvtl_asimdmisc_l"_h: + nfd.SetFormatMaps(&map_cvt_ta, &map_cvt_tb); + break; + case "fcvtxn_asimdmisc_n"_h: + if ((vform_dst != kFormat2S) && (vform_dst != kFormat4S)) { + mnemonic = NULL; + } + break; + } + Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEON2RegFP(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPFormatMap()); + Format(instr, mnemonic, nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEON2RegLogical(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap()); + if (form_hash_ == "not_asimdmisc_r"_h) { + mnemonic = "mvn"; + } + Format(instr, mnemonic, nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEON2RegExtract(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s"; + const char *suffix = NULL; + NEONFormatDecoder nfd(instr, + NEONFormatDecoder::IntegerFormatMap(), + NEONFormatDecoder::LongIntegerFormatMap()); + + if (form_hash_ == "shll_asimdmisc_s"_h) { + nfd.SetFormatMaps(nfd.LongIntegerFormatMap(), nfd.IntegerFormatMap()); + switch (instr->GetNEONSize()) { + case 0: + suffix = ", #8"; break; - case NEON_ABS: - mnemonic = "abs"; + case 1: + suffix = ", #16"; break; - case NEON_NEG: - mnemonic = "neg"; - break; - case NEON_RBIT_NOT: - switch (instr->GetFPType()) { - case 0: - mnemonic = "mvn"; - break; - case 1: - mnemonic = "rbit"; - break; - default: - form = "(NEON2RegMisc)"; - } - nfd.SetFormatMaps(nfd.LogicalFormatMap()); + case 2: + suffix = ", #32"; break; } - } else { - // These instructions all use a one bit size field, except XTN, SQXTUN, - // SHLL, SQXTN and UQXTN, which use a two bit size field. - nfd.SetFormatMaps(nfd.FPFormatMap()); - switch (instr->Mask(NEON2RegMiscFPMask)) { - case NEON_FABS: - mnemonic = "fabs"; - break; - case NEON_FNEG: - mnemonic = "fneg"; - break; - case NEON_FCVTN: - mnemonic = instr->Mask(NEON_Q) ? "fcvtn2" : "fcvtn"; - nfd.SetFormatMap(0, &map_cvt_tb); - nfd.SetFormatMap(1, &map_cvt_ta); - break; - case NEON_FCVTXN: - mnemonic = instr->Mask(NEON_Q) ? "fcvtxn2" : "fcvtxn"; - nfd.SetFormatMap(0, &map_cvt_tb); - nfd.SetFormatMap(1, &map_cvt_ta); - break; - case NEON_FCVTL: - mnemonic = instr->Mask(NEON_Q) ? "fcvtl2" : "fcvtl"; - nfd.SetFormatMap(0, &map_cvt_ta); - nfd.SetFormatMap(1, &map_cvt_tb); - break; - case NEON_FRINT32X: - mnemonic = "frint32x"; - break; - case NEON_FRINT32Z: - mnemonic = "frint32z"; - break; - case NEON_FRINT64X: - mnemonic = "frint64x"; - break; - case NEON_FRINT64Z: - mnemonic = "frint64z"; - break; - case NEON_FRINTN: - mnemonic = "frintn"; - break; - case NEON_FRINTA: - mnemonic = "frinta"; - break; - case NEON_FRINTP: - mnemonic = "frintp"; - break; - case NEON_FRINTM: - mnemonic = "frintm"; - break; - case NEON_FRINTX: - mnemonic = "frintx"; - break; - case NEON_FRINTZ: - mnemonic = "frintz"; - break; - case NEON_FRINTI: - mnemonic = "frinti"; - break; - case NEON_FCVTNS: - mnemonic = "fcvtns"; - break; - case NEON_FCVTNU: - mnemonic = "fcvtnu"; - break; - case NEON_FCVTPS: - mnemonic = "fcvtps"; - break; - case NEON_FCVTPU: - mnemonic = "fcvtpu"; - break; - case NEON_FCVTMS: - mnemonic = "fcvtms"; - break; - case NEON_FCVTMU: - mnemonic = "fcvtmu"; - break; - case NEON_FCVTZS: - mnemonic = "fcvtzs"; - break; - case NEON_FCVTZU: - mnemonic = "fcvtzu"; - break; - case NEON_FCVTAS: - mnemonic = "fcvtas"; - break; - case NEON_FCVTAU: - mnemonic = "fcvtau"; - break; - case NEON_FSQRT: - mnemonic = "fsqrt"; - break; - case NEON_SCVTF: - mnemonic = "scvtf"; - break; - case NEON_UCVTF: - mnemonic = "ucvtf"; - break; - case NEON_URSQRTE: - mnemonic = "ursqrte"; - break; - case NEON_URECPE: - mnemonic = "urecpe"; - break; - case NEON_FRSQRTE: - mnemonic = "frsqrte"; - break; - case NEON_FRECPE: - mnemonic = "frecpe"; - break; - case NEON_FCMGT_zero: - mnemonic = "fcmgt"; - form = form_fcmp_zero; - break; - case NEON_FCMGE_zero: - mnemonic = "fcmge"; - form = form_fcmp_zero; - break; - case NEON_FCMEQ_zero: - mnemonic = "fcmeq"; - form = form_fcmp_zero; - break; - case NEON_FCMLE_zero: - mnemonic = "fcmle"; - form = form_fcmp_zero; + } + Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form), suffix); +} + +void Disassembler::VisitNEON2RegMisc(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s"; + NEONFormatDecoder nfd(instr); + + VectorFormat vform_dst = nfd.GetVectorFormat(0); + if (vform_dst != kFormatUndefined) { + uint32_t ls_dst = LaneSizeInBitsFromFormat(vform_dst); + switch (form_hash_) { + case "cnt_asimdmisc_r"_h: + case "rev16_asimdmisc_r"_h: + if (ls_dst != kBRegSize) { + mnemonic = NULL; + } break; - case NEON_FCMLT_zero: - mnemonic = "fcmlt"; - form = form_fcmp_zero; + case "rev32_asimdmisc_r"_h: + if ((ls_dst == kDRegSize) || (ls_dst == kSRegSize)) { + mnemonic = NULL; + } break; - default: - if ((NEON_XTN_opcode <= instr->Mask(NEON2RegMiscOpcode)) && - (instr->Mask(NEON2RegMiscOpcode) <= NEON_UQXTN_opcode)) { - nfd.SetFormatMap(0, nfd.IntegerFormatMap()); - nfd.SetFormatMap(1, nfd.LongIntegerFormatMap()); - - switch (instr->Mask(NEON2RegMiscMask)) { - case NEON_XTN: - mnemonic = "xtn"; - break; - case NEON_SQXTN: - mnemonic = "sqxtn"; - break; - case NEON_UQXTN: - mnemonic = "uqxtn"; - break; - case NEON_SQXTUN: - mnemonic = "sqxtun"; - break; - case NEON_SHLL: - mnemonic = "shll"; - nfd.SetFormatMap(0, nfd.LongIntegerFormatMap()); - nfd.SetFormatMap(1, nfd.IntegerFormatMap()); - switch (instr->GetNEONSize()) { - case 0: - form = "'Vd.%s, 'Vn.%s, #8"; - break; - case 1: - form = "'Vd.%s, 'Vn.%s, #16"; - break; - case 2: - form = "'Vd.%s, 'Vn.%s, #32"; - break; - default: - Format(instr, "unallocated", "(NEON2RegMisc)"); - return; - } - } - Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form)); - return; - } else { - form = "(NEON2RegMisc)"; + case "urecpe_asimdmisc_r"_h: + case "ursqrte_asimdmisc_r"_h: + // For urecpe and ursqrte, only S-sized elements are supported. The MSB + // of the size field is always set by the instruction (0b1x) so we need + // only check and discard D-sized elements here. + VIXL_ASSERT((ls_dst == kSRegSize) || (ls_dst == kDRegSize)); + VIXL_FALLTHROUGH(); + case "clz_asimdmisc_r"_h: + case "cls_asimdmisc_r"_h: + case "rev64_asimdmisc_r"_h: + if (ls_dst == kDRegSize) { + mnemonic = NULL; } + break; } } + Format(instr, mnemonic, nfd.Substitute(form)); } void Disassembler::VisitNEON2RegMiscFP16(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Vd.%s, 'Vn.%s"; - const char *form_cmp = "'Vd.%s, 'Vn.%s, #0.0"; + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.'?30:84h, 'Vn.'?30:84h"; + const char *suffix = NULL; - static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}}; - NEONFormatDecoder nfd(instr, &map_half); + switch (form_hash_) { + case "fcmeq_asimdmiscfp16_fz"_h: + case "fcmge_asimdmiscfp16_fz"_h: + case "fcmgt_asimdmiscfp16_fz"_h: + case "fcmle_asimdmiscfp16_fz"_h: + case "fcmlt_asimdmiscfp16_fz"_h: + suffix = ", #0.0"; + } + Format(instr, mnemonic, form, suffix); +} - switch (instr->Mask(NEON2RegMiscFP16Mask)) { -// clang-format off -#define FORMAT(A, B) \ - case NEON_##A##_H: \ - mnemonic = B; \ - break; - FORMAT(FABS, "fabs") - FORMAT(FCVTAS, "fcvtas") - FORMAT(FCVTAU, "fcvtau") - FORMAT(FCVTMS, "fcvtms") - FORMAT(FCVTMU, "fcvtmu") - FORMAT(FCVTNS, "fcvtns") - FORMAT(FCVTNU, "fcvtnu") - FORMAT(FCVTPS, "fcvtps") - FORMAT(FCVTPU, "fcvtpu") - FORMAT(FCVTZS, "fcvtzs") - FORMAT(FCVTZU, "fcvtzu") - FORMAT(FNEG, "fneg") - FORMAT(FRECPE, "frecpe") - FORMAT(FRINTA, "frinta") - FORMAT(FRINTI, "frinti") - FORMAT(FRINTM, "frintm") - FORMAT(FRINTN, "frintn") - FORMAT(FRINTP, "frintp") - FORMAT(FRINTX, "frintx") - FORMAT(FRINTZ, "frintz") - FORMAT(FRSQRTE, "frsqrte") - FORMAT(FSQRT, "fsqrt") - FORMAT(SCVTF, "scvtf") - FORMAT(UCVTF, "ucvtf") -// clang-format on -#undef FORMAT +void Disassembler::DisassembleNEON3SameLogical(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap()); - case NEON_FCMEQ_H_zero: - mnemonic = "fcmeq"; - form = form_cmp; - break; - case NEON_FCMGT_H_zero: - mnemonic = "fcmgt"; - form = form_cmp; - break; - case NEON_FCMGE_H_zero: - mnemonic = "fcmge"; - form = form_cmp; - break; - case NEON_FCMLT_H_zero: - mnemonic = "fcmlt"; - form = form_cmp; - break; - case NEON_FCMLE_H_zero: - mnemonic = "fcmle"; - form = form_cmp; + switch (form_hash_) { + case "orr_asimdsame_only"_h: + if (instr->GetRm() == instr->GetRn()) { + mnemonic = "mov"; + form = "'Vd.%s, 'Vn.%s"; + } break; - default: - form = "(NEON2RegMiscFP16)"; + case "pmul_asimdsame_only"_h: + if (instr->GetNEONSize() != 0) { + mnemonic = NULL; + } } Format(instr, mnemonic, nfd.Substitute(form)); } +void Disassembler::DisassembleNEON3SameFHM(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Vd.'?30:42s, 'Vn.'?30:42h, 'Vm.'?30:42h"); +} + +void Disassembler::DisassembleNEON3SameNoD(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; + static const NEONFormatMap map = + {{23, 22, 30}, + {NF_8B, NF_16B, NF_4H, NF_8H, NF_2S, NF_4S, NF_UNDEF, NF_UNDEF}}; + NEONFormatDecoder nfd(instr, &map); + Format(instr, mnemonic, nfd.Substitute(form)); +} void Disassembler::VisitNEON3Same(const Instruction *instr) { - const char *mnemonic = "unimplemented"; + const char *mnemonic = mnemonic_.c_str(); const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; NEONFormatDecoder nfd(instr); - if (instr->Mask(NEON3SameLogicalFMask) == NEON3SameLogicalFixed) { - switch (instr->Mask(NEON3SameLogicalMask)) { - case NEON_AND: - mnemonic = "and"; - break; - case NEON_ORR: - mnemonic = "orr"; - if (instr->GetRm() == instr->GetRn()) { - mnemonic = "mov"; - form = "'Vd.%s, 'Vn.%s"; + if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) { + nfd.SetFormatMaps(nfd.FPFormatMap()); + } + + VectorFormat vform_dst = nfd.GetVectorFormat(0); + if (vform_dst != kFormatUndefined) { + uint32_t ls_dst = LaneSizeInBitsFromFormat(vform_dst); + switch (form_hash_) { + case "sqdmulh_asimdsame_only"_h: + case "sqrdmulh_asimdsame_only"_h: + if ((ls_dst == kBRegSize) || (ls_dst == kDRegSize)) { + mnemonic = NULL; } break; - case NEON_ORN: - mnemonic = "orn"; - break; - case NEON_EOR: - mnemonic = "eor"; - break; - case NEON_BIC: - mnemonic = "bic"; - break; - case NEON_BIF: - mnemonic = "bif"; - break; - case NEON_BIT: - mnemonic = "bit"; - break; - case NEON_BSL: - mnemonic = "bsl"; - break; - default: - form = "(NEON3Same)"; - } - nfd.SetFormatMaps(nfd.LogicalFormatMap()); - } else { - static const char kUnknown[] = "unallocated"; - static const char *mnemonics[] = {"shadd", - "uhadd", - "shadd", - "uhadd", - "sqadd", - "uqadd", - "sqadd", - "uqadd", - "srhadd", - "urhadd", - "srhadd", - "urhadd", - // Handled by logical cases above. - NULL, - NULL, - NULL, - NULL, - "shsub", - "uhsub", - "shsub", - "uhsub", - "sqsub", - "uqsub", - "sqsub", - "uqsub", - "cmgt", - "cmhi", - "cmgt", - "cmhi", - "cmge", - "cmhs", - "cmge", - "cmhs", - "sshl", - "ushl", - "sshl", - "ushl", - "sqshl", - "uqshl", - "sqshl", - "uqshl", - "srshl", - "urshl", - "srshl", - "urshl", - "sqrshl", - "uqrshl", - "sqrshl", - "uqrshl", - "smax", - "umax", - "smax", - "umax", - "smin", - "umin", - "smin", - "umin", - "sabd", - "uabd", - "sabd", - "uabd", - "saba", - "uaba", - "saba", - "uaba", - "add", - "sub", - "add", - "sub", - "cmtst", - "cmeq", - "cmtst", - "cmeq", - "mla", - "mls", - "mla", - "mls", - "mul", - "pmul", - "mul", - "pmul", - "smaxp", - "umaxp", - "smaxp", - "umaxp", - "sminp", - "uminp", - "sminp", - "uminp", - "sqdmulh", - "sqrdmulh", - "sqdmulh", - "sqrdmulh", - "addp", - kUnknown, - "addp", - kUnknown, - "fmaxnm", - "fmaxnmp", - "fminnm", - "fminnmp", - "fmla", - kUnknown, // FMLAL2 or unallocated - "fmls", - kUnknown, // FMLSL2 or unallocated - "fadd", - "faddp", - "fsub", - "fabd", - "fmulx", - "fmul", - kUnknown, - kUnknown, - "fcmeq", - "fcmge", - kUnknown, - "fcmgt", - kUnknown, // FMLAL or unallocated - "facge", - kUnknown, // FMLSL or unallocated - "facgt", - "fmax", - "fmaxp", - "fmin", - "fminp", - "frecps", - "fdiv", - "frsqrts", - kUnknown}; - - // Operation is determined by the opcode bits (15-11), the top bit of - // size (23) and the U bit (29). - unsigned index = (instr->ExtractBits(15, 11) << 2) | - (instr->ExtractBit(23) << 1) | instr->ExtractBit(29); - VIXL_ASSERT(index < ArrayLength(mnemonics)); - mnemonic = mnemonics[index]; - // Assert that index is not one of the previously handled logical - // instructions. - VIXL_ASSERT(mnemonic != NULL); - - if (mnemonic == kUnknown) { - // Catch special cases where we need to check more bits than we have in - // the table index. Anything not matched here is unallocated. - - const char *fhm_form = (instr->Mask(NEON_Q) == 0) - ? "'Vd.2s, 'Vn.2h, 'Vm.2h" - : "'Vd.4s, 'Vn.4h, 'Vm.4h"; - switch (instr->Mask(NEON3SameFHMMask)) { - case NEON_FMLAL: - mnemonic = "fmlal"; - form = fhm_form; - break; - case NEON_FMLAL2: - mnemonic = "fmlal2"; - form = fhm_form; - break; - case NEON_FMLSL: - mnemonic = "fmlsl"; - form = fhm_form; - break; - case NEON_FMLSL2: - mnemonic = "fmlsl2"; - form = fhm_form; - break; - default: - VIXL_ASSERT(strcmp(mnemonic, "unallocated") == 0); - form = "(NEON3Same)"; - break; - } - } - - if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) { - nfd.SetFormatMaps(nfd.FPFormatMap()); } } Format(instr, mnemonic, nfd.Substitute(form)); } void Disassembler::VisitNEON3SameFP16(const Instruction *instr) { - const char *mnemonic = "unimplemented"; + const char *mnemonic = mnemonic_.c_str(); const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; - NEONFormatDecoder nfd(instr); nfd.SetFormatMaps(nfd.FP16FormatMap()); - - switch (instr->Mask(NEON3SameFP16Mask)) { -#define FORMAT(A, B) \ - case NEON_##A##_H: \ - mnemonic = B; \ - break; - FORMAT(FMAXNM, "fmaxnm"); - FORMAT(FMLA, "fmla"); - FORMAT(FADD, "fadd"); - FORMAT(FMULX, "fmulx"); - FORMAT(FCMEQ, "fcmeq"); - FORMAT(FMAX, "fmax"); - FORMAT(FRECPS, "frecps"); - FORMAT(FMINNM, "fminnm"); - FORMAT(FMLS, "fmls"); - FORMAT(FSUB, "fsub"); - FORMAT(FMIN, "fmin"); - FORMAT(FRSQRTS, "frsqrts"); - FORMAT(FMAXNMP, "fmaxnmp"); - FORMAT(FADDP, "faddp"); - FORMAT(FMUL, "fmul"); - FORMAT(FCMGE, "fcmge"); - FORMAT(FACGE, "facge"); - FORMAT(FMAXP, "fmaxp"); - FORMAT(FDIV, "fdiv"); - FORMAT(FMINNMP, "fminnmp"); - FORMAT(FABD, "fabd"); - FORMAT(FCMGT, "fcmgt"); - FORMAT(FACGT, "facgt"); - FORMAT(FMINP, "fminp"); -#undef FORMAT - default: - form = "(NEON3SameFP16)"; - } - Format(instr, mnemonic, nfd.Substitute(form)); } void Disassembler::VisitNEON3SameExtra(const Instruction *instr) { static const NEONFormatMap map_usdot = {{30}, {NF_8B, NF_16B}}; - const char *mnemonic = "unallocated"; - const char *form = "(NEON3SameExtra)"; + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; + const char *suffix = NULL; NEONFormatDecoder nfd(instr); - if (instr->Mask(NEON3SameExtraFCMLAMask) == NEON_FCMLA) { - mnemonic = "fcmla"; - form = "'Vd.%s, 'Vn.%s, 'Vm.%s, 'IVFCNM"; - } else if (instr->Mask(NEON3SameExtraFCADDMask) == NEON_FCADD) { - mnemonic = "fcadd"; - form = "'Vd.%s, 'Vn.%s, 'Vm.%s, 'IVFCNA"; - } else { - form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; - switch (instr->Mask(NEON3SameExtraMask)) { - case NEON_SDOT: - mnemonic = "sdot"; - nfd.SetFormatMap(1, &map_usdot); - nfd.SetFormatMap(2, &map_usdot); - break; - case NEON_SQRDMLAH: - mnemonic = "sqrdmlah"; - break; - case NEON_UDOT: - mnemonic = "udot"; - nfd.SetFormatMap(1, &map_usdot); - nfd.SetFormatMap(2, &map_usdot); - break; - case NEON_SQRDMLSH: - mnemonic = "sqrdmlsh"; - break; - } + switch (form_hash_) { + case "fcmla_asimdsame2_c"_h: + suffix = ", #'u1211*90"; + break; + case "fcadd_asimdsame2_c"_h: + // Bit 10 is always set, so this gives 90 * 1 or 3. + suffix = ", #'u1212:1010*90"; + break; + case "sdot_asimdsame2_d"_h: + case "udot_asimdsame2_d"_h: + case "usdot_asimdsame2_d"_h: + nfd.SetFormatMap(1, &map_usdot); + nfd.SetFormatMap(2, &map_usdot); + break; + default: + // sqrdml[as]h - nothing to do. + break; } - Format(instr, mnemonic, nfd.Substitute(form)); + Format(instr, mnemonic, nfd.Substitute(form), suffix); } void Disassembler::VisitNEON3Different(const Instruction *instr) { - const char *mnemonic = "unimplemented"; + const char *mnemonic = mnemonic_.c_str(); const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; NEONFormatDecoder nfd(instr); nfd.SetFormatMap(0, nfd.LongIntegerFormatMap()); - // Ignore the Q bit. Appending a "2" suffix is handled later. - switch (instr->Mask(NEON3DifferentMask) & ~NEON_Q) { - case NEON_PMULL: - mnemonic = "pmull"; - break; - case NEON_SABAL: - mnemonic = "sabal"; - break; - case NEON_SABDL: - mnemonic = "sabdl"; - break; - case NEON_SADDL: - mnemonic = "saddl"; - break; - case NEON_SMLAL: - mnemonic = "smlal"; - break; - case NEON_SMLSL: - mnemonic = "smlsl"; - break; - case NEON_SMULL: - mnemonic = "smull"; - break; - case NEON_SSUBL: - mnemonic = "ssubl"; - break; - case NEON_SQDMLAL: - mnemonic = "sqdmlal"; - break; - case NEON_SQDMLSL: - mnemonic = "sqdmlsl"; - break; - case NEON_SQDMULL: - mnemonic = "sqdmull"; - break; - case NEON_UABAL: - mnemonic = "uabal"; - break; - case NEON_UABDL: - mnemonic = "uabdl"; - break; - case NEON_UADDL: - mnemonic = "uaddl"; - break; - case NEON_UMLAL: - mnemonic = "umlal"; - break; - case NEON_UMLSL: - mnemonic = "umlsl"; - break; - case NEON_UMULL: - mnemonic = "umull"; - break; - case NEON_USUBL: - mnemonic = "usubl"; - break; - case NEON_SADDW: - mnemonic = "saddw"; - nfd.SetFormatMap(1, nfd.LongIntegerFormatMap()); - break; - case NEON_SSUBW: - mnemonic = "ssubw"; - nfd.SetFormatMap(1, nfd.LongIntegerFormatMap()); - break; - case NEON_UADDW: - mnemonic = "uaddw"; + switch (form_hash_) { + case "saddw_asimddiff_w"_h: + case "ssubw_asimddiff_w"_h: + case "uaddw_asimddiff_w"_h: + case "usubw_asimddiff_w"_h: nfd.SetFormatMap(1, nfd.LongIntegerFormatMap()); break; - case NEON_USUBW: - mnemonic = "usubw"; - nfd.SetFormatMap(1, nfd.LongIntegerFormatMap()); - break; - case NEON_ADDHN: - mnemonic = "addhn"; - nfd.SetFormatMaps(nfd.LongIntegerFormatMap()); - nfd.SetFormatMap(0, nfd.IntegerFormatMap()); - break; - case NEON_RADDHN: - mnemonic = "raddhn"; + case "addhn_asimddiff_n"_h: + case "raddhn_asimddiff_n"_h: + case "rsubhn_asimddiff_n"_h: + case "subhn_asimddiff_n"_h: nfd.SetFormatMaps(nfd.LongIntegerFormatMap()); nfd.SetFormatMap(0, nfd.IntegerFormatMap()); break; - case NEON_RSUBHN: - mnemonic = "rsubhn"; - nfd.SetFormatMaps(nfd.LongIntegerFormatMap()); - nfd.SetFormatMap(0, nfd.IntegerFormatMap()); + case "pmull_asimddiff_l"_h: + if (nfd.GetVectorFormat(0) != kFormat8H) { + mnemonic = NULL; + } break; - case NEON_SUBHN: - mnemonic = "subhn"; - nfd.SetFormatMaps(nfd.LongIntegerFormatMap()); - nfd.SetFormatMap(0, nfd.IntegerFormatMap()); + case "sqdmlal_asimddiff_l"_h: + case "sqdmlsl_asimddiff_l"_h: + case "sqdmull_asimddiff_l"_h: + if (nfd.GetVectorFormat(0) == kFormat8H) { + mnemonic = NULL; + } break; - default: - form = "(NEON3Different)"; } Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form)); } +void Disassembler::DisassembleNEONFPAcrossLanes(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Sd, 'Vn.4s"; + if ((instr->GetNEONQ() == 0) || (instr->ExtractBit(22) == 1)) { + mnemonic = NULL; + } + Format(instr, mnemonic, form); +} + +void Disassembler::DisassembleNEONFP16AcrossLanes(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Hd, 'Vn.'?30:84h"); +} void Disassembler::VisitNEONAcrossLanes(const Instruction *instr) { - const char *mnemonic = "unimplemented"; + const char *mnemonic = mnemonic_.c_str(); const char *form = "%sd, 'Vn.%s"; - const char *form_half = "'Hd, 'Vn.%s"; - bool half_op = false; - static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}}; NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap(), NEONFormatDecoder::IntegerFormatMap()); - if (instr->Mask(NEONAcrossLanesFP16FMask) == NEONAcrossLanesFP16Fixed) { - half_op = true; - form = form_half; - nfd.SetFormatMaps(&map_half); - switch (instr->Mask(NEONAcrossLanesFP16Mask)) { - case NEON_FMAXV_H: - mnemonic = "fmaxv"; - break; - case NEON_FMINV_H: - mnemonic = "fminv"; - break; - case NEON_FMAXNMV_H: - mnemonic = "fmaxnmv"; - break; - case NEON_FMINNMV_H: - mnemonic = "fminnmv"; - break; - } - } else if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) { - nfd.SetFormatMap(0, nfd.FPScalarFormatMap()); - nfd.SetFormatMap(1, nfd.FPFormatMap()); - switch (instr->Mask(NEONAcrossLanesFPMask)) { - case NEON_FMAXV: - mnemonic = "fmaxv"; - break; - case NEON_FMINV: - mnemonic = "fminv"; - break; - case NEON_FMAXNMV: - mnemonic = "fmaxnmv"; - break; - case NEON_FMINNMV: - mnemonic = "fminnmv"; - break; - default: - form = "(NEONAcrossLanes)"; - break; - } - } else if (instr->Mask(NEONAcrossLanesFMask) == NEONAcrossLanesFixed) { - switch (instr->Mask(NEONAcrossLanesMask)) { - case NEON_ADDV: - mnemonic = "addv"; - break; - case NEON_SMAXV: - mnemonic = "smaxv"; - break; - case NEON_SMINV: - mnemonic = "sminv"; - break; - case NEON_UMAXV: - mnemonic = "umaxv"; - break; - case NEON_UMINV: - mnemonic = "uminv"; - break; - case NEON_SADDLV: - mnemonic = "saddlv"; - nfd.SetFormatMap(0, nfd.LongScalarFormatMap()); - break; - case NEON_UADDLV: - mnemonic = "uaddlv"; - nfd.SetFormatMap(0, nfd.LongScalarFormatMap()); - break; - default: - form = "(NEONAcrossLanes)"; - break; - } + switch (form_hash_) { + case "saddlv_asimdall_only"_h: + case "uaddlv_asimdall_only"_h: + nfd.SetFormatMap(0, nfd.LongScalarFormatMap()); } - if (half_op) { - Format(instr, mnemonic, nfd.Substitute(form)); - } else { - Format(instr, - mnemonic, - nfd.Substitute(form, - NEONFormatDecoder::kPlaceholder, - NEONFormatDecoder::kFormat)); + VectorFormat vform_src = nfd.GetVectorFormat(1); + if ((vform_src == kFormat2S) || (vform_src == kFormat2D)) { + mnemonic = NULL; } -} + Format(instr, + mnemonic, + nfd.Substitute(form, + NEONFormatDecoder::kPlaceholder, + NEONFormatDecoder::kFormat)); +} void Disassembler::VisitNEONByIndexedElement(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - bool l_instr = false; - bool fp_instr = false; - bool cn_instr = false; - bool half_instr = false; - bool fhm_instr = false; // FMLAL{2}, FMLSL{2} - const char *form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndex]"; - - static const NEONFormatMap map_ta = {{23, 22}, {NF_UNDEF, NF_4S, NF_2D}}; - static const NEONFormatMap map_cn = + static const NEONFormatMap map_v = {{23, 22, 30}, - {NF_UNDEF, NF_UNDEF, NF_4H, NF_8H, NF_UNDEF, NF_4S, NF_UNDEF, NF_UNDEF}}; - static const NEONFormatMap map_usdot = {{30}, {NF_8B, NF_16B}}; - static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}}; + {NF_UNDEF, NF_UNDEF, NF_4H, NF_8H, NF_2S, NF_4S, NF_UNDEF, NF_UNDEF}}; + static const NEONFormatMap map_s = {{23, 22}, + {NF_UNDEF, NF_H, NF_S, NF_UNDEF}}; + NEONFormatDecoder nfd(instr, &map_v, &map_v, &map_s); + Format(instr, mnemonic_.c_str(), nfd.Substitute(form)); +} +void Disassembler::DisassembleNEONMulByElementLong(const Instruction *instr) { + const char *form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndex]"; + // TODO: Disallow undefined element types for this instruction. + static const NEONFormatMap map_ta = {{23, 22}, {NF_UNDEF, NF_4S, NF_2D}}; NEONFormatDecoder nfd(instr, &map_ta, NEONFormatDecoder::IntegerFormatMap(), NEONFormatDecoder::ScalarFormatMap()); + Format(instr, nfd.Mnemonic(mnemonic_.c_str()), nfd.Substitute(form)); +} - switch (instr->Mask(NEONByIndexedElementMask)) { - case NEON_SMULL_byelement: - mnemonic = "smull"; - l_instr = true; - break; - case NEON_UMULL_byelement: - mnemonic = "umull"; - l_instr = true; - break; - case NEON_SMLAL_byelement: - mnemonic = "smlal"; - l_instr = true; - break; - case NEON_UMLAL_byelement: - mnemonic = "umlal"; - l_instr = true; - break; - case NEON_SMLSL_byelement: - mnemonic = "smlsl"; - l_instr = true; - break; - case NEON_UMLSL_byelement: - mnemonic = "umlsl"; - l_instr = true; - break; - case NEON_SQDMULL_byelement: - mnemonic = "sqdmull"; - l_instr = true; - break; - case NEON_SQDMLAL_byelement: - mnemonic = "sqdmlal"; - l_instr = true; - break; - case NEON_SQDMLSL_byelement: - mnemonic = "sqdmlsl"; - l_instr = true; - break; - case NEON_MUL_byelement: - mnemonic = "mul"; - break; - case NEON_MLA_byelement: - mnemonic = "mla"; - break; - case NEON_MLS_byelement: - mnemonic = "mls"; - break; - case NEON_SQDMULH_byelement: - mnemonic = "sqdmulh"; - break; - case NEON_SQRDMULH_byelement: - mnemonic = "sqrdmulh"; - break; - case NEON_SDOT_byelement: - mnemonic = "sdot"; - form = "'Vd.%s, 'Vn.%s, 'Ve.4b['IVByElemIndex]"; - nfd.SetFormatMap(1, &map_usdot); - break; - case NEON_SQRDMLAH_byelement: - mnemonic = "sqrdmlah"; - break; - case NEON_UDOT_byelement: - mnemonic = "udot"; - form = "'Vd.%s, 'Vn.%s, 'Ve.4b['IVByElemIndex]"; - nfd.SetFormatMap(1, &map_usdot); - break; - case NEON_SQRDMLSH_byelement: - mnemonic = "sqrdmlsh"; - break; - default: { - switch (instr->Mask(NEONByIndexedElementFPLongMask)) { - case NEON_FMLAL_H_byelement: - mnemonic = "fmlal"; - fhm_instr = true; - break; - case NEON_FMLAL2_H_byelement: - mnemonic = "fmlal2"; - fhm_instr = true; - break; - case NEON_FMLSL_H_byelement: - mnemonic = "fmlsl"; - fhm_instr = true; - break; - case NEON_FMLSL2_H_byelement: - mnemonic = "fmlsl2"; - fhm_instr = true; - break; - default: - switch (instr->Mask(NEONByIndexedElementFPMask)) { - case NEON_FMUL_byelement: - mnemonic = "fmul"; - fp_instr = true; - break; - case NEON_FMLA_byelement: - mnemonic = "fmla"; - fp_instr = true; - break; - case NEON_FMLS_byelement: - mnemonic = "fmls"; - fp_instr = true; - break; - case NEON_FMULX_byelement: - mnemonic = "fmulx"; - fp_instr = true; - break; - case NEON_FMLA_H_byelement: - mnemonic = "fmla"; - half_instr = true; - break; - case NEON_FMLS_H_byelement: - mnemonic = "fmls"; - half_instr = true; - break; - case NEON_FMUL_H_byelement: - mnemonic = "fmul"; - half_instr = true; - break; - case NEON_FMULX_H_byelement: - mnemonic = "fmulx"; - half_instr = true; - break; - default: - switch (instr->Mask(NEONByIndexedElementFPComplexMask)) { - case NEON_FCMLA_byelement: - mnemonic = "fcmla"; - cn_instr = true; - form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndexRot], 'ILFCNR"; - break; - } - } - } - } - } +void Disassembler::DisassembleNEONDotProdByElement(const Instruction *instr) { + const char *form = instr->ExtractBit(30) ? "'Vd.4s, 'Vn.16" : "'Vd.2s, 'Vn.8"; + const char *suffix = "b, 'Vm.4b['u1111:2121]"; + Format(instr, mnemonic_.c_str(), form, suffix); +} - if (fhm_instr) { - // These are oddballs. Set the format manually. - form = (instr->Mask(NEON_Q) == 0) - ? "'Vd.2s, 'Vn.2h, 'Ve.h['IVByElemIndexFHM]" - : "'Vd.4s, 'Vn.4h, 'Ve.h['IVByElemIndexFHM]"; - Format(instr, mnemonic, nfd.Substitute(form)); - } else if (half_instr) { - form = "'Vd.%s, 'Vn.%s, 'Ve.h['IVByElemIndex]"; - nfd.SetFormatMaps(&map_half, &map_half); - Format(instr, mnemonic, nfd.Substitute(form)); - } else if (l_instr) { - Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form)); - } else if (fp_instr) { - nfd.SetFormatMap(0, nfd.FPFormatMap()); - Format(instr, mnemonic, nfd.Substitute(form)); - } else if (cn_instr) { - nfd.SetFormatMap(0, &map_cn); - nfd.SetFormatMap(1, &map_cn); - Format(instr, mnemonic, nfd.Substitute(form)); - } else { - nfd.SetFormatMap(0, nfd.IntegerFormatMap()); - Format(instr, mnemonic, nfd.Substitute(form)); - } +void Disassembler::DisassembleNEONFPMulByElement(const Instruction *instr) { + const char *form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndex]"; + NEONFormatDecoder nfd(instr, + NEONFormatDecoder::FPFormatMap(), + NEONFormatDecoder::FPFormatMap(), + NEONFormatDecoder::FPScalarFormatMap()); + Format(instr, mnemonic_.c_str(), nfd.Substitute(form)); } +void Disassembler::DisassembleNEONHalfFPMulByElement(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, + "'Vd.'?30:84h, 'Vn.'?30:84h, " + "'Ve.h['IVByElemIndex]"); +} + +void Disassembler::DisassembleNEONFPMulByElementLong(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, + "'Vd.'?30:42s, 'Vn.'?30:42h, " + "'Ve.h['IVByElemIndexFHM]"); +} + +void Disassembler::DisassembleNEONComplexMulByElement( + const Instruction *instr) { + const char *form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndexRot], #'u1413*90"; + // TODO: Disallow undefined element types for this instruction. + static const NEONFormatMap map_cn = + {{23, 22, 30}, + {NF_UNDEF, NF_UNDEF, NF_4H, NF_8H, NF_UNDEF, NF_4S, NF_UNDEF, NF_UNDEF}}; + NEONFormatDecoder nfd(instr, + &map_cn, + &map_cn, + NEONFormatDecoder::ScalarFormatMap()); + Format(instr, mnemonic_.c_str(), nfd.Substitute(form)); +} void Disassembler::VisitNEONCopy(const Instruction *instr) { - const char *mnemonic = "unimplemented"; + const char *mnemonic = mnemonic_.c_str(); const char *form = "(NEONCopy)"; NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularFormatMap(), NEONFormatDecoder::TriangularScalarFormatMap()); - if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) { - mnemonic = "mov"; - nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); - form = "'Vd.%s['IVInsIndex1], 'Vn.%s['IVInsIndex2]"; - } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) { - mnemonic = "mov"; - nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); - if (nfd.GetVectorFormat() == kFormatD) { - form = "'Vd.%s['IVInsIndex1], 'Xn"; - } else { - form = "'Vd.%s['IVInsIndex1], 'Wn"; - } - } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) { - if (instr->Mask(NEON_Q) || ((instr->GetImmNEON5() & 7) == 4)) { + switch (form_hash_) { + case "ins_asimdins_iv_v"_h: mnemonic = "mov"; - } else { - mnemonic = "umov"; - } - nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); - if (nfd.GetVectorFormat() == kFormatD) { - form = "'Xd, 'Vn.%s['IVInsIndex1]"; - } else { - form = "'Wd, 'Vn.%s['IVInsIndex1]"; - } - } else if (instr->Mask(NEONCopySmovMask) == NEON_SMOV) { - mnemonic = "smov"; - nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); - form = "'R30d, 'Vn.%s['IVInsIndex1]"; - } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) { - mnemonic = "dup"; - form = "'Vd.%s, 'Vn.%s['IVInsIndex1]"; - } else if (instr->Mask(NEONCopyDupGeneralMask) == NEON_DUP_GENERAL) { - mnemonic = "dup"; - if (nfd.GetVectorFormat() == kFormat2D) { - form = "'Vd.%s, 'Xn"; - } else { - form = "'Vd.%s, 'Wn"; + nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); + form = "'Vd.%s['IVInsIndex1], 'Vn.%s['IVInsIndex2]"; + break; + case "ins_asimdins_ir_r"_h: + mnemonic = "mov"; + nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); + if (nfd.GetVectorFormat() == kFormatD) { + form = "'Vd.%s['IVInsIndex1], 'Xn"; + } else { + form = "'Vd.%s['IVInsIndex1], 'Wn"; + } + break; + case "umov_asimdins_w_w"_h: + case "umov_asimdins_x_x"_h: + if (instr->Mask(NEON_Q) || ((instr->GetImmNEON5() & 7) == 4)) { + mnemonic = "mov"; + } + nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); + if (nfd.GetVectorFormat() == kFormatD) { + form = "'Xd, 'Vn.%s['IVInsIndex1]"; + } else { + form = "'Wd, 'Vn.%s['IVInsIndex1]"; + } + break; + case "smov_asimdins_w_w"_h: + case "smov_asimdins_x_x"_h: { + nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); + VectorFormat vform = nfd.GetVectorFormat(); + if ((vform == kFormatD) || + ((vform == kFormatS) && (instr->ExtractBit(30) == 0))) { + mnemonic = NULL; + } + form = "'R30d, 'Vn.%s['IVInsIndex1]"; + break; } + case "dup_asimdins_dv_v"_h: + form = "'Vd.%s, 'Vn.%s['IVInsIndex1]"; + break; + case "dup_asimdins_dr_r"_h: + if (nfd.GetVectorFormat() == kFormat2D) { + form = "'Vd.%s, 'Xn"; + } else { + form = "'Vd.%s, 'Wn"; + } } Format(instr, mnemonic, nfd.Substitute(form)); } void Disassembler::VisitNEONExtract(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(NEONExtract)"; + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s, 'IVExtract"; NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap()); - if (instr->Mask(NEONExtractMask) == NEON_EXT) { - mnemonic = "ext"; - form = "'Vd.%s, 'Vn.%s, 'Vm.%s, 'IVExtract"; + if ((instr->GetImmNEONExt() > 7) && (instr->GetNEONQ() == 0)) { + mnemonic = NULL; } Format(instr, mnemonic, nfd.Substitute(form)); } @@ -3961,532 +3033,217 @@ void Disassembler::VisitNEONLoadStoreSingleStructPostIndex( void Disassembler::VisitNEONModifiedImmediate(const Instruction *instr) { - const char *mnemonic = "unimplemented"; + const char *mnemonic = mnemonic_.c_str(); const char *form = "'Vt.%s, 'IVMIImm8, lsl 'IVMIShiftAmt1"; - int half_enc = instr->ExtractBit(11); - int cmode = instr->GetNEONCmode(); - int cmode_3 = (cmode >> 3) & 1; - int cmode_2 = (cmode >> 2) & 1; - int cmode_1 = (cmode >> 1) & 1; - int cmode_0 = cmode & 1; - int q = instr->GetNEONQ(); - int op = instr->GetNEONModImmOp(); - - static const NEONFormatMap map_b = {{30}, {NF_8B, NF_16B}}; static const NEONFormatMap map_h = {{30}, {NF_4H, NF_8H}}; static const NEONFormatMap map_s = {{30}, {NF_2S, NF_4S}}; - NEONFormatDecoder nfd(instr, &map_b); - if (cmode_3 == 0) { - if (cmode_0 == 0) { - mnemonic = (op == 1) ? "mvni" : "movi"; - } else { // cmode<0> == '1'. - mnemonic = (op == 1) ? "bic" : "orr"; - } - nfd.SetFormatMap(0, &map_s); - } else { // cmode<3> == '1'. - if (cmode_2 == 0) { - if (cmode_0 == 0) { - mnemonic = (op == 1) ? "mvni" : "movi"; - } else { // cmode<0> == '1'. - mnemonic = (op == 1) ? "bic" : "orr"; - } + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap()); + + switch (form_hash_) { + case "movi_asimdimm_n_b"_h: + form = "'Vt.%s, 'IVMIImm8"; + break; + case "bic_asimdimm_l_hl"_h: + case "movi_asimdimm_l_hl"_h: + case "mvni_asimdimm_l_hl"_h: + case "orr_asimdimm_l_hl"_h: nfd.SetFormatMap(0, &map_h); - } else { // cmode<2> == '1'. - if (cmode_1 == 0) { - mnemonic = (op == 1) ? "mvni" : "movi"; - form = "'Vt.%s, 'IVMIImm8, msl 'IVMIShiftAmt2"; - nfd.SetFormatMap(0, &map_s); - } else { // cmode<1> == '1'. - if (cmode_0 == 0) { - mnemonic = "movi"; - if (op == 0) { - form = "'Vt.%s, 'IVMIImm8"; - } else { - form = (q == 0) ? "'Dd, 'IVMIImm" : "'Vt.2d, 'IVMIImm"; - } - } else { // cmode<0> == '1' - mnemonic = "fmov"; - form = "'Vt.%s, 'IFPNeon"; - if (half_enc == 1) { - nfd.SetFormatMap(0, &map_h); - } else if (op == 0) { - nfd.SetFormatMap(0, &map_s); - } else if (q == 1) { - form = "'Vt.2d, 'IFPNeon"; - } else { - mnemonic = "unallocated"; - form = "(NEONModifiedImmediate)"; - } - } - } - } + break; + case "movi_asimdimm_m_sm"_h: + case "mvni_asimdimm_m_sm"_h: + form = "'Vt.%s, 'IVMIImm8, msl 'IVMIShiftAmt2"; + VIXL_FALLTHROUGH(); + case "bic_asimdimm_l_sl"_h: + case "movi_asimdimm_l_sl"_h: + case "mvni_asimdimm_l_sl"_h: + case "orr_asimdimm_l_sl"_h: + nfd.SetFormatMap(0, &map_s); + break; + case "movi_asimdimm_d_ds"_h: + form = "'Dd, 'IVMIImm"; + break; + case "movi_asimdimm_d2_d"_h: + form = "'Vt.2d, 'IVMIImm"; + break; + case "fmov_asimdimm_h_h"_h: + form = "'Vt.%s, 'IFPNeon"; + nfd.SetFormatMap(0, &map_h); + break; + case "fmov_asimdimm_s_s"_h: + form = "'Vt.%s, 'IFPNeon"; + nfd.SetFormatMap(0, &map_s); + break; + case "fmov_asimdimm_d2_d"_h: + form = "'Vt.2d, 'IFPNeon"; + break; } + Format(instr, mnemonic, nfd.Substitute(form)); } +void Disassembler::DisassembleNEONScalar2RegMiscOnlyD( + const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Dd, 'Dn"; + const char *suffix = ", #0"; + if (instr->GetNEONSize() != 3) { + mnemonic = NULL; + } + switch (form_hash_) { + case "abs_asisdmisc_r"_h: + case "neg_asisdmisc_r"_h: + suffix = NULL; + } + Format(instr, mnemonic, form, suffix); +} -void Disassembler::VisitNEONScalar2RegMisc(const Instruction *instr) { - const char *mnemonic = "unimplemented"; +void Disassembler::DisassembleNEONFPScalar2RegMisc(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); const char *form = "%sd, %sn"; - const char *form_0 = "%sd, %sn, #0"; - const char *form_fp0 = "%sd, %sn, #0.0"; + const char *suffix = NULL; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap()); + switch (form_hash_) { + case "fcmeq_asisdmisc_fz"_h: + case "fcmge_asisdmisc_fz"_h: + case "fcmgt_asisdmisc_fz"_h: + case "fcmle_asisdmisc_fz"_h: + case "fcmlt_asisdmisc_fz"_h: + suffix = ", #0.0"; + break; + case "fcvtxn_asisdmisc_n"_h: + if (nfd.GetVectorFormat(0) == kFormatS) { // Source format. + mnemonic = NULL; + } + form = "'Sd, 'Dn"; + } + Format(instr, mnemonic, nfd.SubstitutePlaceholders(form), suffix); +} +void Disassembler::VisitNEONScalar2RegMisc(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, %sn"; NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap()); - - if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_scalar_opcode) { - // These instructions all use a two bit size field, except NOT and RBIT, - // which use the field to encode the operation. - switch (instr->Mask(NEONScalar2RegMiscMask)) { - case NEON_CMGT_zero_scalar: - mnemonic = "cmgt"; - form = form_0; - break; - case NEON_CMGE_zero_scalar: - mnemonic = "cmge"; - form = form_0; - break; - case NEON_CMLE_zero_scalar: - mnemonic = "cmle"; - form = form_0; - break; - case NEON_CMLT_zero_scalar: - mnemonic = "cmlt"; - form = form_0; - break; - case NEON_CMEQ_zero_scalar: - mnemonic = "cmeq"; - form = form_0; - break; - case NEON_NEG_scalar: - mnemonic = "neg"; - break; - case NEON_SQNEG_scalar: - mnemonic = "sqneg"; - break; - case NEON_ABS_scalar: - mnemonic = "abs"; - break; - case NEON_SQABS_scalar: - mnemonic = "sqabs"; - break; - case NEON_SUQADD_scalar: - mnemonic = "suqadd"; - break; - case NEON_USQADD_scalar: - mnemonic = "usqadd"; - break; - default: - form = "(NEONScalar2RegMisc)"; - } - } else { - // These instructions all use a one bit size field, except SQXTUN, SQXTN - // and UQXTN, which use a two bit size field. - nfd.SetFormatMaps(nfd.FPScalarFormatMap()); - switch (instr->Mask(NEONScalar2RegMiscFPMask)) { - case NEON_FRSQRTE_scalar: - mnemonic = "frsqrte"; - break; - case NEON_FRECPE_scalar: - mnemonic = "frecpe"; - break; - case NEON_SCVTF_scalar: - mnemonic = "scvtf"; - break; - case NEON_UCVTF_scalar: - mnemonic = "ucvtf"; - break; - case NEON_FCMGT_zero_scalar: - mnemonic = "fcmgt"; - form = form_fp0; - break; - case NEON_FCMGE_zero_scalar: - mnemonic = "fcmge"; - form = form_fp0; - break; - case NEON_FCMLE_zero_scalar: - mnemonic = "fcmle"; - form = form_fp0; - break; - case NEON_FCMLT_zero_scalar: - mnemonic = "fcmlt"; - form = form_fp0; - break; - case NEON_FCMEQ_zero_scalar: - mnemonic = "fcmeq"; - form = form_fp0; - break; - case NEON_FRECPX_scalar: - mnemonic = "frecpx"; - break; - case NEON_FCVTNS_scalar: - mnemonic = "fcvtns"; - break; - case NEON_FCVTNU_scalar: - mnemonic = "fcvtnu"; - break; - case NEON_FCVTPS_scalar: - mnemonic = "fcvtps"; - break; - case NEON_FCVTPU_scalar: - mnemonic = "fcvtpu"; - break; - case NEON_FCVTMS_scalar: - mnemonic = "fcvtms"; - break; - case NEON_FCVTMU_scalar: - mnemonic = "fcvtmu"; - break; - case NEON_FCVTZS_scalar: - mnemonic = "fcvtzs"; - break; - case NEON_FCVTZU_scalar: - mnemonic = "fcvtzu"; - break; - case NEON_FCVTAS_scalar: - mnemonic = "fcvtas"; - break; - case NEON_FCVTAU_scalar: - mnemonic = "fcvtau"; - break; - case NEON_FCVTXN_scalar: - nfd.SetFormatMap(0, nfd.LongScalarFormatMap()); - mnemonic = "fcvtxn"; - break; - default: - nfd.SetFormatMap(0, nfd.ScalarFormatMap()); - nfd.SetFormatMap(1, nfd.LongScalarFormatMap()); - switch (instr->Mask(NEONScalar2RegMiscMask)) { - case NEON_SQXTN_scalar: - mnemonic = "sqxtn"; - break; - case NEON_UQXTN_scalar: - mnemonic = "uqxtn"; - break; - case NEON_SQXTUN_scalar: - mnemonic = "sqxtun"; - break; - default: - form = "(NEONScalar2RegMisc)"; - } - } + switch (form_hash_) { + case "sqxtn_asisdmisc_n"_h: + case "sqxtun_asisdmisc_n"_h: + case "uqxtn_asisdmisc_n"_h: + nfd.SetFormatMap(1, nfd.LongScalarFormatMap()); } Format(instr, mnemonic, nfd.SubstitutePlaceholders(form)); } void Disassembler::VisitNEONScalar2RegMiscFP16(const Instruction *instr) { - const char *mnemonic = "unimplemented"; + const char *mnemonic = mnemonic_.c_str(); const char *form = "'Hd, 'Hn"; - const char *form_fp0 = "'Hd, 'Hn, #0.0"; - - switch (instr->Mask(NEONScalar2RegMiscFP16Mask)) { -#define FORMAT(A, B) \ - case NEON_##A##_H_scalar: \ - mnemonic = B; \ - break; - // clang-format off - FORMAT(FCVTNS, "fcvtns") - FORMAT(FCVTMS, "fcvtms") - FORMAT(FCVTAS, "fcvtas") - FORMAT(SCVTF, "scvtf") - FORMAT(FCVTPS, "fcvtps") - FORMAT(FCVTZS, "fcvtzs") - FORMAT(FRECPE, "frecpe") - FORMAT(FRECPX, "frecpx") - FORMAT(FCVTNU, "fcvtnu") - FORMAT(FCVTMU, "fcvtmu") - FORMAT(FCVTAU, "fcvtau") - FORMAT(UCVTF, "ucvtf") - FORMAT(FCVTPU, "fcvtpu") - FORMAT(FCVTZU, "fcvtzu") - FORMAT(FRSQRTE, "frsqrte") -// clang-format on -#undef FORMAT -#define FORMAT(A, B) \ - case NEON_##A##_H_zero_scalar: \ - mnemonic = B; \ - form = form_fp0; \ - break; - FORMAT(FCMGT, "fcmgt") - FORMAT(FCMEQ, "fcmeq") - FORMAT(FCMLT, "fcmlt") - FORMAT(FCMGE, "fcmge") - FORMAT(FCMLE, "fcmle") -#undef FORMAT + const char *suffix = NULL; - default: - VIXL_UNREACHABLE(); + switch (form_hash_) { + case "fcmeq_asisdmiscfp16_fz"_h: + case "fcmge_asisdmiscfp16_fz"_h: + case "fcmgt_asisdmiscfp16_fz"_h: + case "fcmle_asisdmiscfp16_fz"_h: + case "fcmlt_asisdmiscfp16_fz"_h: + suffix = ", #0.0"; } - Format(instr, mnemonic, form); + Format(instr, mnemonic, form, suffix); } void Disassembler::VisitNEONScalar3Diff(const Instruction *instr) { - const char *mnemonic = "unimplemented"; + const char *mnemonic = mnemonic_.c_str(); const char *form = "%sd, %sn, %sm"; NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap(), NEONFormatDecoder::ScalarFormatMap()); - - switch (instr->Mask(NEONScalar3DiffMask)) { - case NEON_SQDMLAL_scalar: - mnemonic = "sqdmlal"; - break; - case NEON_SQDMLSL_scalar: - mnemonic = "sqdmlsl"; - break; - case NEON_SQDMULL_scalar: - mnemonic = "sqdmull"; - break; - default: - form = "(NEONScalar3Diff)"; + if (nfd.GetVectorFormat(0) == kFormatH) { + mnemonic = NULL; } Format(instr, mnemonic, nfd.SubstitutePlaceholders(form)); } +void Disassembler::DisassembleNEONFPScalar3Same(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, %sn, %sm"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap()); + Format(instr, mnemonic, nfd.SubstitutePlaceholders(form)); +} + +void Disassembler::DisassembleNEONScalar3SameOnlyD(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Dd, 'Dn, 'Dm"; + if (instr->GetNEONSize() != 3) { + mnemonic = NULL; + } + Format(instr, mnemonic, form); +} void Disassembler::VisitNEONScalar3Same(const Instruction *instr) { - const char *mnemonic = "unimplemented"; + const char *mnemonic = mnemonic_.c_str(); const char *form = "%sd, %sn, %sm"; NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap()); - - if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) { - nfd.SetFormatMaps(nfd.FPScalarFormatMap()); - switch (instr->Mask(NEONScalar3SameFPMask)) { - case NEON_FACGE_scalar: - mnemonic = "facge"; - break; - case NEON_FACGT_scalar: - mnemonic = "facgt"; - break; - case NEON_FCMEQ_scalar: - mnemonic = "fcmeq"; - break; - case NEON_FCMGE_scalar: - mnemonic = "fcmge"; - break; - case NEON_FCMGT_scalar: - mnemonic = "fcmgt"; - break; - case NEON_FMULX_scalar: - mnemonic = "fmulx"; - break; - case NEON_FRECPS_scalar: - mnemonic = "frecps"; - break; - case NEON_FRSQRTS_scalar: - mnemonic = "frsqrts"; - break; - case NEON_FABD_scalar: - mnemonic = "fabd"; - break; - default: - form = "(NEONScalar3Same)"; - } - } else { - switch (instr->Mask(NEONScalar3SameMask)) { - case NEON_ADD_scalar: - mnemonic = "add"; - break; - case NEON_SUB_scalar: - mnemonic = "sub"; - break; - case NEON_CMEQ_scalar: - mnemonic = "cmeq"; - break; - case NEON_CMGE_scalar: - mnemonic = "cmge"; - break; - case NEON_CMGT_scalar: - mnemonic = "cmgt"; - break; - case NEON_CMHI_scalar: - mnemonic = "cmhi"; - break; - case NEON_CMHS_scalar: - mnemonic = "cmhs"; - break; - case NEON_CMTST_scalar: - mnemonic = "cmtst"; - break; - case NEON_UQADD_scalar: - mnemonic = "uqadd"; - break; - case NEON_SQADD_scalar: - mnemonic = "sqadd"; - break; - case NEON_UQSUB_scalar: - mnemonic = "uqsub"; - break; - case NEON_SQSUB_scalar: - mnemonic = "sqsub"; - break; - case NEON_USHL_scalar: - mnemonic = "ushl"; - break; - case NEON_SSHL_scalar: - mnemonic = "sshl"; - break; - case NEON_UQSHL_scalar: - mnemonic = "uqshl"; - break; - case NEON_SQSHL_scalar: - mnemonic = "sqshl"; - break; - case NEON_URSHL_scalar: - mnemonic = "urshl"; - break; - case NEON_SRSHL_scalar: - mnemonic = "srshl"; - break; - case NEON_UQRSHL_scalar: - mnemonic = "uqrshl"; - break; - case NEON_SQRSHL_scalar: - mnemonic = "sqrshl"; - break; - case NEON_SQDMULH_scalar: - mnemonic = "sqdmulh"; - break; - case NEON_SQRDMULH_scalar: - mnemonic = "sqrdmulh"; - break; - default: - form = "(NEONScalar3Same)"; - } + VectorFormat vform = nfd.GetVectorFormat(0); + switch (form_hash_) { + case "srshl_asisdsame_only"_h: + case "urshl_asisdsame_only"_h: + case "sshl_asisdsame_only"_h: + case "ushl_asisdsame_only"_h: + if (vform != kFormatD) { + mnemonic = NULL; + } + break; + case "sqdmulh_asisdsame_only"_h: + case "sqrdmulh_asisdsame_only"_h: + if ((vform == kFormatB) || (vform == kFormatD)) { + mnemonic = NULL; + } } Format(instr, mnemonic, nfd.SubstitutePlaceholders(form)); } void Disassembler::VisitNEONScalar3SameFP16(const Instruction *instr) { - const char *mnemonic = NULL; - const char *form = "'Hd, 'Hn, 'Hm"; - - switch (instr->Mask(NEONScalar3SameFP16Mask)) { - case NEON_FABD_H_scalar: - mnemonic = "fabd"; - break; - case NEON_FMULX_H_scalar: - mnemonic = "fmulx"; - break; - case NEON_FCMEQ_H_scalar: - mnemonic = "fcmeq"; - break; - case NEON_FCMGE_H_scalar: - mnemonic = "fcmge"; - break; - case NEON_FCMGT_H_scalar: - mnemonic = "fcmgt"; - break; - case NEON_FACGE_H_scalar: - mnemonic = "facge"; - break; - case NEON_FACGT_H_scalar: - mnemonic = "facgt"; - break; - case NEON_FRECPS_H_scalar: - mnemonic = "frecps"; - break; - case NEON_FRSQRTS_H_scalar: - mnemonic = "frsqrts"; - break; - default: - VIXL_UNREACHABLE(); - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Hd, 'Hn, 'Hm"); } void Disassembler::VisitNEONScalar3SameExtra(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "%sd, %sn, %sm"; - NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap()); + USE(instr); + // Nothing to do - handled by VisitNEONScalar3Same. + VIXL_UNREACHABLE(); +} - switch (instr->Mask(NEONScalar3SameExtraMask)) { - case NEON_SQRDMLAH_scalar: - mnemonic = "sqrdmlah"; - break; - case NEON_SQRDMLSH_scalar: - mnemonic = "sqrdmlsh"; - break; - default: - form = "(NEONScalar3SameExtra)"; +void Disassembler::DisassembleNEONScalarSatMulLongIndex( + const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, %sn, 'Ve.%s['IVByElemIndex]"; + NEONFormatDecoder nfd(instr, + NEONFormatDecoder::LongScalarFormatMap(), + NEONFormatDecoder::ScalarFormatMap()); + if (nfd.GetVectorFormat(0) == kFormatH) { + mnemonic = NULL; } - Format(instr, mnemonic, nfd.SubstitutePlaceholders(form)); + Format(instr, + mnemonic, + nfd.Substitute(form, nfd.kPlaceholder, nfd.kPlaceholder, nfd.kFormat)); } +void Disassembler::DisassembleNEONFPScalarMulIndex(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, %sn, 'Ve.%s['IVByElemIndex]"; + static const NEONFormatMap map = {{23, 22}, {NF_H, NF_UNDEF, NF_S, NF_D}}; + NEONFormatDecoder nfd(instr, &map); + Format(instr, + mnemonic, + nfd.Substitute(form, nfd.kPlaceholder, nfd.kPlaceholder, nfd.kFormat)); +} void Disassembler::VisitNEONScalarByIndexedElement(const Instruction *instr) { - const char *mnemonic = "unimplemented"; + const char *mnemonic = mnemonic_.c_str(); const char *form = "%sd, %sn, 'Ve.%s['IVByElemIndex]"; - const char *form_half = "'Hd, 'Hn, 'Ve.h['IVByElemIndex]"; NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap()); - bool long_instr = false; - - switch (instr->Mask(NEONScalarByIndexedElementMask)) { - case NEON_SQDMULL_byelement_scalar: - mnemonic = "sqdmull"; - long_instr = true; - break; - case NEON_SQDMLAL_byelement_scalar: - mnemonic = "sqdmlal"; - long_instr = true; - break; - case NEON_SQDMLSL_byelement_scalar: - mnemonic = "sqdmlsl"; - long_instr = true; - break; - case NEON_SQDMULH_byelement_scalar: - mnemonic = "sqdmulh"; - break; - case NEON_SQRDMULH_byelement_scalar: - mnemonic = "sqrdmulh"; - break; - case NEON_SQRDMLAH_byelement_scalar: - mnemonic = "sqrdmlah"; - break; - case NEON_SQRDMLSH_byelement_scalar: - mnemonic = "sqrdmlsh"; - break; - default: - nfd.SetFormatMap(0, nfd.FPScalarFormatMap()); - switch (instr->Mask(NEONScalarByIndexedElementFPMask)) { - case NEON_FMUL_byelement_scalar: - mnemonic = "fmul"; - break; - case NEON_FMLA_byelement_scalar: - mnemonic = "fmla"; - break; - case NEON_FMLS_byelement_scalar: - mnemonic = "fmls"; - break; - case NEON_FMULX_byelement_scalar: - mnemonic = "fmulx"; - break; - case NEON_FMLA_H_byelement_scalar: - mnemonic = "fmla"; - form = form_half; - break; - case NEON_FMLS_H_byelement_scalar: - mnemonic = "fmls"; - form = form_half; - break; - case NEON_FMUL_H_byelement_scalar: - mnemonic = "fmul"; - form = form_half; - break; - case NEON_FMULX_H_byelement_scalar: - mnemonic = "fmulx"; - form = form_half; - break; - default: - form = "(NEONScalarByIndexedElement)"; - } + VectorFormat vform_dst = nfd.GetVectorFormat(0); + if ((vform_dst == kFormatB) || (vform_dst == kFormatD)) { + mnemonic = NULL; } - - if (long_instr) { - nfd.SetFormatMap(0, nfd.LongScalarFormatMap()); - } - Format(instr, mnemonic, nfd.Substitute(form, nfd.kPlaceholder, nfd.kPlaceholder, nfd.kFormat)); @@ -4509,379 +3266,174 @@ void Disassembler::VisitNEONScalarCopy(const Instruction *instr) { void Disassembler::VisitNEONScalarPairwise(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "%sd, 'Vn.%s"; - NEONFormatMap map = {{22}, {NF_2S, NF_2D}}; - NEONFormatDecoder nfd(instr, - NEONFormatDecoder::FPScalarPairwiseFormatMap(), - &map); - - switch (instr->Mask(NEONScalarPairwiseMask)) { - case NEON_ADDP_scalar: - // All pairwise operations except ADDP use bit U to differentiate FP16 - // from FP32/FP64 variations. - nfd.SetFormatMap(0, NEONFormatDecoder::FPScalarFormatMap()); - mnemonic = "addp"; - break; - case NEON_FADDP_h_scalar: - form = "%sd, 'Vn.2h"; - VIXL_FALLTHROUGH(); - case NEON_FADDP_scalar: - mnemonic = "faddp"; - break; - case NEON_FMAXP_h_scalar: - form = "%sd, 'Vn.2h"; - VIXL_FALLTHROUGH(); - case NEON_FMAXP_scalar: - mnemonic = "fmaxp"; - break; - case NEON_FMAXNMP_h_scalar: - form = "%sd, 'Vn.2h"; - VIXL_FALLTHROUGH(); - case NEON_FMAXNMP_scalar: - mnemonic = "fmaxnmp"; - break; - case NEON_FMINP_h_scalar: - form = "%sd, 'Vn.2h"; - VIXL_FALLTHROUGH(); - case NEON_FMINP_scalar: - mnemonic = "fminp"; - break; - case NEON_FMINNMP_h_scalar: - form = "%sd, 'Vn.2h"; - VIXL_FALLTHROUGH(); - case NEON_FMINNMP_scalar: - mnemonic = "fminnmp"; - break; - default: - form = "(NEONScalarPairwise)"; + const char *mnemonic = mnemonic_.c_str(); + if (form_hash_ == "addp_asisdpair_only"_h) { + // All pairwise operations except ADDP use bit U to differentiate FP16 + // from FP32/FP64 variations. + if (instr->GetNEONSize() != 3) { + mnemonic = NULL; + } + Format(instr, mnemonic, "'Dd, 'Vn.2d"); + } else { + const char *form = "%sd, 'Vn.2%s"; + NEONFormatDecoder nfd(instr, + NEONFormatDecoder::FPScalarPairwiseFormatMap()); + + Format(instr, + mnemonic, + nfd.Substitute(form, + NEONFormatDecoder::kPlaceholder, + NEONFormatDecoder::kFormat)); } - Format(instr, - mnemonic, - nfd.Substitute(form, - NEONFormatDecoder::kPlaceholder, - NEONFormatDecoder::kFormat)); } +void Disassembler::DisassembleNEONScalarShiftImmOnlyD( + const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Dd, 'Dn, "; + const char *suffix = "'IsR"; -void Disassembler::VisitNEONScalarShiftImmediate(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "%sd, %sn, 'Is1"; - const char *form_2 = "%sd, %sn, 'Is2"; - - static const NEONFormatMap map_shift = {{22, 21, 20, 19}, - {NF_UNDEF, - NF_B, - NF_H, - NF_H, - NF_S, - NF_S, - NF_S, - NF_S, - NF_D, - NF_D, - NF_D, - NF_D, - NF_D, - NF_D, - NF_D, - NF_D}}; - static const NEONFormatMap map_shift_narrow = - {{21, 20, 19}, {NF_UNDEF, NF_H, NF_S, NF_S, NF_D, NF_D, NF_D, NF_D}}; - NEONFormatDecoder nfd(instr, &map_shift); - - if (instr->GetImmNEONImmh()) { // immh has to be non-zero. - switch (instr->Mask(NEONScalarShiftImmediateMask)) { - case NEON_FCVTZU_imm_scalar: - mnemonic = "fcvtzu"; - break; - case NEON_FCVTZS_imm_scalar: - mnemonic = "fcvtzs"; - break; - case NEON_SCVTF_imm_scalar: - mnemonic = "scvtf"; - break; - case NEON_UCVTF_imm_scalar: - mnemonic = "ucvtf"; - break; - case NEON_SRI_scalar: - mnemonic = "sri"; - break; - case NEON_SSHR_scalar: - mnemonic = "sshr"; - break; - case NEON_USHR_scalar: - mnemonic = "ushr"; - break; - case NEON_SRSHR_scalar: - mnemonic = "srshr"; - break; - case NEON_URSHR_scalar: - mnemonic = "urshr"; - break; - case NEON_SSRA_scalar: - mnemonic = "ssra"; - break; - case NEON_USRA_scalar: - mnemonic = "usra"; - break; - case NEON_SRSRA_scalar: - mnemonic = "srsra"; - break; - case NEON_URSRA_scalar: - mnemonic = "ursra"; - break; - case NEON_SHL_scalar: - mnemonic = "shl"; - form = form_2; - break; - case NEON_SLI_scalar: - mnemonic = "sli"; - form = form_2; - break; - case NEON_SQSHLU_scalar: - mnemonic = "sqshlu"; - form = form_2; - break; - case NEON_SQSHL_imm_scalar: - mnemonic = "sqshl"; - form = form_2; - break; - case NEON_UQSHL_imm_scalar: - mnemonic = "uqshl"; - form = form_2; - break; - case NEON_UQSHRN_scalar: - mnemonic = "uqshrn"; - nfd.SetFormatMap(1, &map_shift_narrow); - break; - case NEON_UQRSHRN_scalar: - mnemonic = "uqrshrn"; - nfd.SetFormatMap(1, &map_shift_narrow); - break; - case NEON_SQSHRN_scalar: - mnemonic = "sqshrn"; - nfd.SetFormatMap(1, &map_shift_narrow); - break; - case NEON_SQRSHRN_scalar: - mnemonic = "sqrshrn"; - nfd.SetFormatMap(1, &map_shift_narrow); - break; - case NEON_SQSHRUN_scalar: - mnemonic = "sqshrun"; - nfd.SetFormatMap(1, &map_shift_narrow); - break; - case NEON_SQRSHRUN_scalar: - mnemonic = "sqrshrun"; - nfd.SetFormatMap(1, &map_shift_narrow); - break; - default: - form = "(NEONScalarShiftImmediate)"; - } - } else { - form = "(NEONScalarShiftImmediate)"; + if (instr->ExtractBit(22) == 0) { + // Only D registers are supported. + mnemonic = NULL; } + + switch (form_hash_) { + case "shl_asisdshf_r"_h: + case "sli_asisdshf_r"_h: + suffix = "'IsL"; + } + + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::DisassembleNEONScalarShiftRightNarrowImm( + const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, %sn, 'IsR"; + static const NEONFormatMap map_dst = + {{22, 21, 20, 19}, {NF_UNDEF, NF_B, NF_H, NF_H, NF_S, NF_S, NF_S, NF_S}}; + static const NEONFormatMap map_src = + {{22, 21, 20, 19}, {NF_UNDEF, NF_H, NF_S, NF_S, NF_D, NF_D, NF_D, NF_D}}; + NEONFormatDecoder nfd(instr, &map_dst, &map_src); Format(instr, mnemonic, nfd.SubstitutePlaceholders(form)); } +void Disassembler::VisitNEONScalarShiftImmediate(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, %sn, "; + const char *suffix = "'IsR"; + + // clang-format off + static const NEONFormatMap map = {{22, 21, 20, 19}, + {NF_UNDEF, NF_B, NF_H, NF_H, + NF_S, NF_S, NF_S, NF_S, + NF_D, NF_D, NF_D, NF_D, + NF_D, NF_D, NF_D, NF_D}}; + // clang-format on + NEONFormatDecoder nfd(instr, &map); + switch (form_hash_) { + case "sqshlu_asisdshf_r"_h: + case "sqshl_asisdshf_r"_h: + case "uqshl_asisdshf_r"_h: + suffix = "'IsL"; + break; + default: + if (nfd.GetVectorFormat(0) == kFormatB) { + mnemonic = NULL; + } + } + Format(instr, mnemonic, nfd.SubstitutePlaceholders(form), suffix); +} -void Disassembler::VisitNEONShiftImmediate(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Vd.%s, 'Vn.%s, 'Is1"; - const char *form_shift_2 = "'Vd.%s, 'Vn.%s, 'Is2"; - const char *form_xtl = "'Vd.%s, 'Vn.%s"; - - // 0001->8H, 001x->4S, 01xx->2D, all others undefined. - static const NEONFormatMap map_shift_ta = - {{22, 21, 20, 19}, - {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}}; - - // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H, - // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined. - static const NEONFormatMap map_shift_tb = - {{22, 21, 20, 19, 30}, - {NF_UNDEF, NF_UNDEF, NF_8B, NF_16B, NF_4H, NF_8H, NF_4H, - NF_8H, NF_2S, NF_4S, NF_2S, NF_4S, NF_2S, NF_4S, - NF_2S, NF_4S, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, - NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, - NF_UNDEF, NF_2D, NF_UNDEF, NF_2D}}; - - NEONFormatDecoder nfd(instr, &map_shift_tb); - - if (instr->GetImmNEONImmh()) { // immh has to be non-zero. - switch (instr->Mask(NEONShiftImmediateMask)) { - case NEON_SQSHLU: - mnemonic = "sqshlu"; - form = form_shift_2; - break; - case NEON_SQSHL_imm: - mnemonic = "sqshl"; - form = form_shift_2; - break; - case NEON_UQSHL_imm: - mnemonic = "uqshl"; - form = form_shift_2; - break; - case NEON_SHL: - mnemonic = "shl"; - form = form_shift_2; - break; - case NEON_SLI: - mnemonic = "sli"; - form = form_shift_2; - break; - case NEON_SCVTF_imm: - mnemonic = "scvtf"; - break; - case NEON_UCVTF_imm: - mnemonic = "ucvtf"; - break; - case NEON_FCVTZU_imm: - mnemonic = "fcvtzu"; - break; - case NEON_FCVTZS_imm: - mnemonic = "fcvtzs"; - break; - case NEON_SRI: - mnemonic = "sri"; - break; - case NEON_SSHR: - mnemonic = "sshr"; - break; - case NEON_USHR: - mnemonic = "ushr"; - break; - case NEON_SRSHR: - mnemonic = "srshr"; - break; - case NEON_URSHR: - mnemonic = "urshr"; - break; - case NEON_SSRA: - mnemonic = "ssra"; - break; - case NEON_USRA: - mnemonic = "usra"; - break; - case NEON_SRSRA: - mnemonic = "srsra"; - break; - case NEON_URSRA: - mnemonic = "ursra"; - break; - case NEON_SHRN: - mnemonic = instr->Mask(NEON_Q) ? "shrn2" : "shrn"; - nfd.SetFormatMap(1, &map_shift_ta); - break; - case NEON_RSHRN: - mnemonic = instr->Mask(NEON_Q) ? "rshrn2" : "rshrn"; - nfd.SetFormatMap(1, &map_shift_ta); - break; - case NEON_UQSHRN: - mnemonic = instr->Mask(NEON_Q) ? "uqshrn2" : "uqshrn"; - nfd.SetFormatMap(1, &map_shift_ta); - break; - case NEON_UQRSHRN: - mnemonic = instr->Mask(NEON_Q) ? "uqrshrn2" : "uqrshrn"; - nfd.SetFormatMap(1, &map_shift_ta); - break; - case NEON_SQSHRN: - mnemonic = instr->Mask(NEON_Q) ? "sqshrn2" : "sqshrn"; - nfd.SetFormatMap(1, &map_shift_ta); - break; - case NEON_SQRSHRN: - mnemonic = instr->Mask(NEON_Q) ? "sqrshrn2" : "sqrshrn"; - nfd.SetFormatMap(1, &map_shift_ta); - break; - case NEON_SQSHRUN: - mnemonic = instr->Mask(NEON_Q) ? "sqshrun2" : "sqshrun"; - nfd.SetFormatMap(1, &map_shift_ta); - break; - case NEON_SQRSHRUN: - mnemonic = instr->Mask(NEON_Q) ? "sqrshrun2" : "sqrshrun"; - nfd.SetFormatMap(1, &map_shift_ta); - break; - case NEON_SSHLL: - nfd.SetFormatMap(0, &map_shift_ta); - if (instr->GetImmNEONImmb() == 0 && - CountSetBits(instr->GetImmNEONImmh(), 32) == 1) { // sxtl variant. - form = form_xtl; - mnemonic = instr->Mask(NEON_Q) ? "sxtl2" : "sxtl"; - } else { // sshll variant. - form = form_shift_2; - mnemonic = instr->Mask(NEON_Q) ? "sshll2" : "sshll"; - } - break; - case NEON_USHLL: - nfd.SetFormatMap(0, &map_shift_ta); - if (instr->GetImmNEONImmb() == 0 && - CountSetBits(instr->GetImmNEONImmh(), 32) == 1) { // uxtl variant. - form = form_xtl; - mnemonic = instr->Mask(NEON_Q) ? "uxtl2" : "uxtl"; - } else { // ushll variant. - form = form_shift_2; - mnemonic = instr->Mask(NEON_Q) ? "ushll2" : "ushll"; +void Disassembler::DisassembleNEONShiftLeftLongImm(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s"; + const char *suffix = ", 'IsL"; + + NEONFormatDecoder nfd(instr, + NEONFormatDecoder::ShiftLongNarrowImmFormatMap(), + NEONFormatDecoder::ShiftImmFormatMap()); + + if (instr->GetImmNEONImmb() == 0 && + CountSetBits(instr->GetImmNEONImmh(), 32) == 1) { // xtl variant. + VIXL_ASSERT((form_hash_ == "sshll_asimdshf_l"_h) || + (form_hash_ == "ushll_asimdshf_l"_h)); + mnemonic = (form_hash_ == "sshll_asimdshf_l"_h) ? "sxtl" : "uxtl"; + suffix = NULL; + } + Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form), suffix); +} + +void Disassembler::DisassembleNEONShiftRightImm(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'IsR"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::ShiftImmFormatMap()); + + VectorFormat vform_dst = nfd.GetVectorFormat(0); + if (vform_dst != kFormatUndefined) { + uint32_t ls_dst = LaneSizeInBitsFromFormat(vform_dst); + switch (form_hash_) { + case "scvtf_asimdshf_c"_h: + case "ucvtf_asimdshf_c"_h: + case "fcvtzs_asimdshf_c"_h: + case "fcvtzu_asimdshf_c"_h: + if (ls_dst == kBRegSize) { + mnemonic = NULL; } break; - default: - form = "(NEONShiftImmediate)"; } - } else { - form = "(NEONShiftImmediate)"; } Format(instr, mnemonic, nfd.Substitute(form)); } +void Disassembler::DisassembleNEONShiftRightNarrowImm( + const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'IsR"; + + NEONFormatDecoder nfd(instr, + NEONFormatDecoder::ShiftImmFormatMap(), + NEONFormatDecoder::ShiftLongNarrowImmFormatMap()); + Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form)); +} + +void Disassembler::VisitNEONShiftImmediate(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'IsL"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::ShiftImmFormatMap()); + Format(instr, mnemonic, nfd.Substitute(form)); +} + void Disassembler::VisitNEONTable(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(NEONTable)"; + const char *mnemonic = mnemonic_.c_str(); const char form_1v[] = "'Vd.%%s, {'Vn.16b}, 'Vm.%%s"; const char form_2v[] = "'Vd.%%s, {'Vn.16b, v%d.16b}, 'Vm.%%s"; const char form_3v[] = "'Vd.%%s, {'Vn.16b, v%d.16b, v%d.16b}, 'Vm.%%s"; const char form_4v[] = "'Vd.%%s, {'Vn.16b, v%d.16b, v%d.16b, v%d.16b}, 'Vm.%%s"; - static const NEONFormatMap map_b = {{30}, {NF_8B, NF_16B}}; - NEONFormatDecoder nfd(instr, &map_b); + const char *form = form_1v; - switch (instr->Mask(NEONTableMask)) { - case NEON_TBL_1v: - mnemonic = "tbl"; - form = form_1v; - break; - case NEON_TBL_2v: - mnemonic = "tbl"; - form = form_2v; - break; - case NEON_TBL_3v: - mnemonic = "tbl"; - form = form_3v; - break; - case NEON_TBL_4v: - mnemonic = "tbl"; - form = form_4v; - break; - case NEON_TBX_1v: - mnemonic = "tbx"; - form = form_1v; - break; - case NEON_TBX_2v: - mnemonic = "tbx"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap()); + + switch (form_hash_) { + case "tbl_asimdtbl_l2_2"_h: + case "tbx_asimdtbl_l2_2"_h: form = form_2v; break; - case NEON_TBX_3v: - mnemonic = "tbx"; + case "tbl_asimdtbl_l3_3"_h: + case "tbx_asimdtbl_l3_3"_h: form = form_3v; break; - case NEON_TBX_4v: - mnemonic = "tbx"; + case "tbl_asimdtbl_l4_4"_h: + case "tbx_asimdtbl_l4_4"_h: form = form_4v; break; - default: - break; } + VIXL_ASSERT(form != NULL); - char re_form[sizeof(form_4v) + 6]; + char re_form[sizeof(form_4v) + 6]; // 3 * two-digit substitutions => 6 int reg_num = instr->GetRn(); snprintf(re_form, sizeof(re_form), @@ -4895,179 +3447,52 @@ void Disassembler::VisitNEONTable(const Instruction *instr) { void Disassembler::VisitNEONPerm(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; NEONFormatDecoder nfd(instr); + FormatWithDecodedMnemonic(instr, nfd.Substitute("'Vd.%s, 'Vn.%s, 'Vm.%s")); +} - switch (instr->Mask(NEONPermMask)) { - case NEON_TRN1: - mnemonic = "trn1"; - break; - case NEON_TRN2: - mnemonic = "trn2"; - break; - case NEON_UZP1: - mnemonic = "uzp1"; - break; - case NEON_UZP2: - mnemonic = "uzp2"; - break; - case NEON_ZIP1: - mnemonic = "zip1"; - break; - case NEON_ZIP2: - mnemonic = "zip2"; - break; - default: - form = "(NEONPerm)"; - } - Format(instr, mnemonic, nfd.Substitute(form)); +void Disassembler::Disassemble_Vd4S_Vn16B_Vm16B(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Vd.4s, 'Vn.16b, 'Vm.16b"); } void Disassembler:: VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #1]"; - - switch (instr->Mask( - SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask)) { - case LD1H_z_p_bz_s_x32_scaled: - mnemonic = "ld1h"; - break; - case LD1SH_z_p_bz_s_x32_scaled: - mnemonic = "ld1sh"; - break; - case LDFF1H_z_p_bz_s_x32_scaled: - mnemonic = "ldff1h"; - break; - case LDFF1SH_z_p_bz_s_x32_scaled: - mnemonic = "ldff1sh"; - break; - default: - form = "(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, + "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #1]"); } void Disassembler::VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #2]"; - - switch ( - instr->Mask(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask)) { - case LD1W_z_p_bz_s_x32_scaled: - mnemonic = "ld1w"; - break; - case LDFF1W_z_p_bz_s_x32_scaled: - mnemonic = "ldff1w"; - break; - default: - form = "(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, + "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #2]"); } void Disassembler::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets( const Instruction *instr) { - const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw]"; - - const char *mnemonic = "unimplemented"; - switch (instr->Mask(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask)) { - case LD1B_z_p_bz_s_x32_unscaled: - mnemonic = "ld1b"; - break; - case LD1H_z_p_bz_s_x32_unscaled: - mnemonic = "ld1h"; - break; - case LD1SB_z_p_bz_s_x32_unscaled: - mnemonic = "ld1sb"; - break; - case LD1SH_z_p_bz_s_x32_unscaled: - mnemonic = "ld1sh"; - break; - case LD1W_z_p_bz_s_x32_unscaled: - mnemonic = "ld1w"; - break; - case LDFF1B_z_p_bz_s_x32_unscaled: - mnemonic = "ldff1b"; - break; - case LDFF1H_z_p_bz_s_x32_unscaled: - mnemonic = "ldff1h"; - break; - case LDFF1SB_z_p_bz_s_x32_unscaled: - mnemonic = "ldff1sb"; - break; - case LDFF1SH_z_p_bz_s_x32_unscaled: - mnemonic = "ldff1sh"; - break; - case LDFF1W_z_p_bz_s_x32_unscaled: - mnemonic = "ldff1w"; - break; - default: - form = "(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, + "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw]"); } void Disassembler::VisitSVE32BitGatherLoad_VectorPlusImm( const Instruction *instr) { const char *form = "{'Zt.s}, 'Pgl/z, ['Zn.s]"; - const char *form_imm_b = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016]"; + const char *form_imm = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016]"; const char *form_imm_h = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016*2]"; const char *form_imm_w = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016*4]"; - const char *form_imm; - const char *mnemonic = "unimplemented"; - switch (instr->Mask(SVE32BitGatherLoad_VectorPlusImmMask)) { - case LD1B_z_p_ai_s: - mnemonic = "ld1b"; - form_imm = form_imm_b; - break; - case LD1H_z_p_ai_s: - mnemonic = "ld1h"; - form_imm = form_imm_h; - break; - case LD1SB_z_p_ai_s: - mnemonic = "ld1sb"; - form_imm = form_imm_b; - break; - case LD1SH_z_p_ai_s: - mnemonic = "ld1sh"; - form_imm = form_imm_h; - break; - case LD1W_z_p_ai_s: - mnemonic = "ld1w"; - form_imm = form_imm_w; - break; - case LDFF1B_z_p_ai_s: - mnemonic = "ldff1b"; - form_imm = form_imm_b; - break; - case LDFF1H_z_p_ai_s: - mnemonic = "ldff1h"; + const char *mnemonic = mnemonic_.c_str(); + switch (form_hash_) { + case "ld1h_z_p_ai_s"_h: + case "ld1sh_z_p_ai_s"_h: + case "ldff1h_z_p_ai_s"_h: + case "ldff1sh_z_p_ai_s"_h: form_imm = form_imm_h; break; - case LDFF1SB_z_p_ai_s: - mnemonic = "ldff1sb"; - form_imm = form_imm_b; - break; - case LDFF1SH_z_p_ai_s: - mnemonic = "ldff1sh"; - form_imm = form_imm_h; - break; - case LDFF1W_z_p_ai_s: - mnemonic = "ldff1w"; + case "ld1w_z_p_ai_s"_h: + case "ldff1w_z_p_ai_s"_h: form_imm = form_imm_w; break; - default: - form = "(SVE32BitGatherLoad_VectorPlusImm)"; - form_imm = form; - break; } if (instr->ExtractBits(20, 16) != 0) form = form_imm; @@ -5107,70 +3532,21 @@ void Disassembler::VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets( void Disassembler::VisitSVE32BitGatherPrefetch_VectorPlusImm( const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = (instr->ExtractBits(20, 16) != 0) ? "'prefSVEOp, 'Pgl, ['Zn.s, #'u2016]" : "'prefSVEOp, 'Pgl, ['Zn.s]"; - - switch (instr->Mask(SVE32BitGatherPrefetch_VectorPlusImmMask)) { - case PRFB_i_p_ai_s: - mnemonic = "prfb"; - break; - case PRFD_i_p_ai_s: - mnemonic = "prfd"; - break; - case PRFH_i_p_ai_s: - mnemonic = "prfh"; - break; - case PRFW_i_p_ai_s: - mnemonic = "prfw"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw #'u2423]"; - - switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) { - case ST1H_z_p_bz_s_x32_scaled: - mnemonic = "st1h"; - break; - case ST1W_z_p_bz_s_x32_scaled: - mnemonic = "st1w"; - break; - default: - form = "(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, + "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw #'u2423]"); } void Disassembler::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw]"; - - switch ( - instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) { - case ST1B_z_p_bz_s_x32_unscaled: - mnemonic = "st1b"; - break; - case ST1H_z_p_bz_s_x32_unscaled: - mnemonic = "st1h"; - break; - case ST1W_z_p_bz_s_x32_unscaled: - mnemonic = "st1w"; - break; - default: - form = "(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw]"); } void Disassembler::VisitSVE32BitScatterStore_VectorPlusImm( @@ -5203,200 +3579,27 @@ void Disassembler::VisitSVE32BitScatterStore_VectorPlusImm( void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw #'u2423]"; - - switch (instr->Mask( - SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) { - case LD1D_z_p_bz_d_x32_scaled: - mnemonic = "ld1d"; - break; - case LD1H_z_p_bz_d_x32_scaled: - mnemonic = "ld1h"; - break; - case LD1SH_z_p_bz_d_x32_scaled: - mnemonic = "ld1sh"; - break; - case LD1SW_z_p_bz_d_x32_scaled: - mnemonic = "ld1sw"; - break; - case LD1W_z_p_bz_d_x32_scaled: - mnemonic = "ld1w"; - break; - case LDFF1D_z_p_bz_d_x32_scaled: - mnemonic = "ldff1d"; - break; - case LDFF1H_z_p_bz_d_x32_scaled: - mnemonic = "ldff1h"; - break; - case LDFF1SH_z_p_bz_d_x32_scaled: - mnemonic = "ldff1sh"; - break; - case LDFF1SW_z_p_bz_d_x32_scaled: - mnemonic = "ldff1sw"; - break; - case LDFF1W_z_p_bz_d_x32_scaled: - mnemonic = "ldff1w"; - break; - default: - form = "(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, + "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw " + "#'u2423]"); } void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, lsl #'u2423]"; - - switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) { - case LD1D_z_p_bz_d_64_scaled: - mnemonic = "ld1d"; - break; - case LD1H_z_p_bz_d_64_scaled: - mnemonic = "ld1h"; - break; - case LD1SH_z_p_bz_d_64_scaled: - mnemonic = "ld1sh"; - break; - case LD1SW_z_p_bz_d_64_scaled: - mnemonic = "ld1sw"; - break; - case LD1W_z_p_bz_d_64_scaled: - mnemonic = "ld1w"; - break; - case LDFF1D_z_p_bz_d_64_scaled: - mnemonic = "ldff1d"; - break; - case LDFF1H_z_p_bz_d_64_scaled: - mnemonic = "ldff1h"; - break; - case LDFF1SH_z_p_bz_d_64_scaled: - mnemonic = "ldff1sh"; - break; - case LDFF1SW_z_p_bz_d_64_scaled: - mnemonic = "ldff1sw"; - break; - case LDFF1W_z_p_bz_d_64_scaled: - mnemonic = "ldff1w"; - break; - default: - form = "(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, + "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, lsl #'u2423]"); } void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d]"; - - switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) { - case LD1B_z_p_bz_d_64_unscaled: - mnemonic = "ld1b"; - break; - case LD1D_z_p_bz_d_64_unscaled: - mnemonic = "ld1d"; - break; - case LD1H_z_p_bz_d_64_unscaled: - mnemonic = "ld1h"; - break; - case LD1SB_z_p_bz_d_64_unscaled: - mnemonic = "ld1sb"; - break; - case LD1SH_z_p_bz_d_64_unscaled: - mnemonic = "ld1sh"; - break; - case LD1SW_z_p_bz_d_64_unscaled: - mnemonic = "ld1sw"; - break; - case LD1W_z_p_bz_d_64_unscaled: - mnemonic = "ld1w"; - break; - case LDFF1B_z_p_bz_d_64_unscaled: - mnemonic = "ldff1b"; - break; - case LDFF1D_z_p_bz_d_64_unscaled: - mnemonic = "ldff1d"; - break; - case LDFF1H_z_p_bz_d_64_unscaled: - mnemonic = "ldff1h"; - break; - case LDFF1SB_z_p_bz_d_64_unscaled: - mnemonic = "ldff1sb"; - break; - case LDFF1SH_z_p_bz_d_64_unscaled: - mnemonic = "ldff1sh"; - break; - case LDFF1SW_z_p_bz_d_64_unscaled: - mnemonic = "ldff1sw"; - break; - case LDFF1W_z_p_bz_d_64_unscaled: - mnemonic = "ldff1w"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d]"); } void Disassembler:: VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw]"; - - switch (instr->Mask( - SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) { - case LD1B_z_p_bz_d_x32_unscaled: - mnemonic = "ld1b"; - break; - case LD1D_z_p_bz_d_x32_unscaled: - mnemonic = "ld1d"; - break; - case LD1H_z_p_bz_d_x32_unscaled: - mnemonic = "ld1h"; - break; - case LD1SB_z_p_bz_d_x32_unscaled: - mnemonic = "ld1sb"; - break; - case LD1SH_z_p_bz_d_x32_unscaled: - mnemonic = "ld1sh"; - break; - case LD1SW_z_p_bz_d_x32_unscaled: - mnemonic = "ld1sw"; - break; - case LD1W_z_p_bz_d_x32_unscaled: - mnemonic = "ld1w"; - break; - case LDFF1B_z_p_bz_d_x32_unscaled: - mnemonic = "ldff1b"; - break; - case LDFF1D_z_p_bz_d_x32_unscaled: - mnemonic = "ldff1d"; - break; - case LDFF1H_z_p_bz_d_x32_unscaled: - mnemonic = "ldff1h"; - break; - case LDFF1SB_z_p_bz_d_x32_unscaled: - mnemonic = "ldff1sb"; - break; - case LDFF1SH_z_p_bz_d_x32_unscaled: - mnemonic = "ldff1sh"; - break; - case LDFF1SW_z_p_bz_d_x32_unscaled: - mnemonic = "ldff1sw"; - break; - case LDFF1W_z_p_bz_d_x32_unscaled: - mnemonic = "ldff1w"; - break; - default: - form = "(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, + "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw]"); } void Disassembler::VisitSVE64BitGatherLoad_VectorPlusImm( @@ -5418,409 +3621,164 @@ void Disassembler::VisitSVE64BitGatherLoad_VectorPlusImm( } } - const char *mnemonic = "unimplemented"; - switch (instr->Mask(SVE64BitGatherLoad_VectorPlusImmMask)) { - case LD1B_z_p_ai_d: - mnemonic = "ld1b"; - break; - case LD1D_z_p_ai_d: - mnemonic = "ld1d"; - break; - case LD1H_z_p_ai_d: - mnemonic = "ld1h"; - break; - case LD1SB_z_p_ai_d: - mnemonic = "ld1sb"; - break; - case LD1SH_z_p_ai_d: - mnemonic = "ld1sh"; - break; - case LD1SW_z_p_ai_d: - mnemonic = "ld1sw"; - break; - case LD1W_z_p_ai_d: - mnemonic = "ld1w"; - break; - case LDFF1B_z_p_ai_d: - mnemonic = "ldff1b"; - break; - case LDFF1D_z_p_ai_d: - mnemonic = "ldff1d"; - break; - case LDFF1H_z_p_ai_d: - mnemonic = "ldff1h"; - break; - case LDFF1SB_z_p_ai_d: - mnemonic = "ldff1sb"; - break; - case LDFF1SH_z_p_ai_d: - mnemonic = "ldff1sh"; - break; - case LDFF1SW_z_p_ai_d: - mnemonic = "ldff1sw"; - break; - case LDFF1W_z_p_ai_d: - mnemonic = "ldff1w"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets)"; + const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d"; + const char *suffix = "]"; - switch ( - instr->Mask(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask)) { - case PRFB_i_p_bz_d_64_scaled: - mnemonic = "prfb"; - form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d]"; - break; - case PRFD_i_p_bz_d_64_scaled: - mnemonic = "prfd"; - form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #3]"; - break; - case PRFH_i_p_bz_d_64_scaled: - mnemonic = "prfh"; - form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #1]"; + switch (form_hash_) { + case "prfh_i_p_bz_d_64_scaled"_h: + suffix = ", lsl #1]"; break; - case PRFW_i_p_bz_d_64_scaled: - mnemonic = "prfw"; - form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #2]"; + case "prfs_i_p_bz_d_64_scaled"_h: + suffix = ", lsl #2]"; break; - default: + case "prfd_i_p_bz_d_64_scaled"_h: + suffix = ", lsl #3]"; break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler:: VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, '?22:suxtw"; - const char *suffix = NULL; + const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, '?22:suxtw "; + const char *suffix = "]"; - switch (instr->Mask( - SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask)) { - case PRFB_i_p_bz_d_x32_scaled: - mnemonic = "prfb"; - suffix = " ]"; - break; - case PRFD_i_p_bz_d_x32_scaled: - mnemonic = "prfd"; - suffix = " #3]"; + switch (form_hash_) { + case "prfh_i_p_bz_d_x32_scaled"_h: + suffix = "#1]"; break; - case PRFH_i_p_bz_d_x32_scaled: - mnemonic = "prfh"; - suffix = " #1]"; + case "prfs_i_p_bz_d_x32_scaled"_h: + suffix = "#2]"; break; - case PRFW_i_p_bz_d_x32_scaled: - mnemonic = "prfw"; - suffix = " #2]"; - break; - default: - form = "(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets)"; + case "prfd_i_p_bz_d_x32_scaled"_h: + suffix = "#3]"; break; } - Format(instr, mnemonic, form, suffix); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVE64BitGatherPrefetch_VectorPlusImm( const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = (instr->ExtractBits(20, 16) != 0) ? "'prefSVEOp, 'Pgl, ['Zn.d, #'u2016]" : "'prefSVEOp, 'Pgl, ['Zn.d]"; - switch (instr->Mask(SVE64BitGatherPrefetch_VectorPlusImmMask)) { - case PRFB_i_p_ai_d: - mnemonic = "prfb"; - break; - case PRFD_i_p_ai_d: - mnemonic = "prfd"; - break; - case PRFH_i_p_ai_d: - mnemonic = "prfh"; - break; - case PRFW_i_p_ai_d: - mnemonic = "prfw"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, lsl #'u2423]"; - - switch (instr->Mask(SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask)) { - case ST1D_z_p_bz_d_64_scaled: - mnemonic = "st1d"; - break; - case ST1H_z_p_bz_d_64_scaled: - mnemonic = "st1h"; - break; - case ST1W_z_p_bz_d_64_scaled: - mnemonic = "st1w"; - break; - default: - form = "(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, lsl #'u2423]"); } void Disassembler::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d]"; - - switch ( - instr->Mask(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask)) { - case ST1B_z_p_bz_d_64_unscaled: - mnemonic = "st1b"; - break; - case ST1D_z_p_bz_d_64_unscaled: - mnemonic = "st1d"; - break; - case ST1H_z_p_bz_d_64_unscaled: - mnemonic = "st1h"; - break; - case ST1W_z_p_bz_d_64_unscaled: - mnemonic = "st1w"; - break; - default: - form = "(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffset)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d]"); } void Disassembler:: VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw #'u2423]"; - - switch (instr->Mask( - SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) { - case ST1D_z_p_bz_d_x32_scaled: - mnemonic = "st1d"; - break; - case ST1H_z_p_bz_d_x32_scaled: - mnemonic = "st1h"; - break; - case ST1W_z_p_bz_d_x32_scaled: - mnemonic = "st1w"; - break; - default: - form = "(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, + "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw #'u2423]"); } void Disassembler:: VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw]"; - - switch (instr->Mask( - SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) { - case ST1B_z_p_bz_d_x32_unscaled: - mnemonic = "st1b"; - break; - case ST1D_z_p_bz_d_x32_unscaled: - mnemonic = "st1d"; - break; - case ST1H_z_p_bz_d_x32_unscaled: - mnemonic = "st1h"; - break; - case ST1W_z_p_bz_d_x32_unscaled: - mnemonic = "st1w"; - break; - default: - form = "(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw]"); } void Disassembler::VisitSVE64BitScatterStore_VectorPlusImm( const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "{'Zt.d}, 'Pgl, ['Zn.d"; - const char *suffix = NULL; + const char *suffix = "]"; - bool is_zero = instr->ExtractBits(20, 16) == 0; - - switch (instr->Mask(SVE64BitScatterStore_VectorPlusImmMask)) { - case ST1B_z_p_ai_d: - mnemonic = "st1b"; - suffix = is_zero ? "]" : ", #'u2016]"; - break; - case ST1D_z_p_ai_d: - mnemonic = "st1d"; - suffix = is_zero ? "]" : ", #'u2016*8]"; - break; - case ST1H_z_p_ai_d: - mnemonic = "st1h"; - suffix = is_zero ? "]" : ", #'u2016*2]"; - break; - case ST1W_z_p_ai_d: - mnemonic = "st1w"; - suffix = is_zero ? "]" : ", #'u2016*4]"; - break; - default: - form = "(SVE64BitScatterStore_VectorPlusImm)"; - break; + if (instr->ExtractBits(20, 16) != 0) { + switch (form_hash_) { + case "st1b_z_p_ai_d"_h: + suffix = ", #'u2016]"; + break; + case "st1h_z_p_ai_d"_h: + suffix = ", #'u2016*2]"; + break; + case "st1w_z_p_ai_d"_h: + suffix = ", #'u2016*4]"; + break; + case "st1d_z_p_ai_d"_h: + suffix = ", #'u2016*8]"; + break; + } } - Format(instr, mnemonic, form, suffix); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVEBitwiseLogicalWithImm_Unpredicated( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'tl, 'Zd.'tl, 'ITriSvel"; - if (instr->GetSVEImmLogical() == 0) { // The immediate encoded in the instruction is not in the expected format. Format(instr, "unallocated", "(SVEBitwiseImm)"); - return; - } - - switch (instr->Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask)) { - case AND_z_zi: - mnemonic = "and"; - break; - case EOR_z_zi: - mnemonic = "eor"; - break; - case ORR_z_zi: - mnemonic = "orr"; - break; - default: - break; + } else { + FormatWithDecodedMnemonic(instr, "'Zd.'tl, 'Zd.'tl, 'ITriSvel"); } - Format(instr, mnemonic, form); } void Disassembler::VisitSVEBitwiseLogical_Predicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEBitwiseLogical_PredicatedMask)) { - case AND_z_p_zz: - mnemonic = "and"; - break; - case BIC_z_p_zz: - mnemonic = "bic"; - break; - case EOR_z_p_zz: - mnemonic = "eor"; - break; - case ORR_z_p_zz: - mnemonic = "orr"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"); } void Disassembler::VisitSVEBitwiseShiftByImm_Predicated( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'tszp, 'Pgl/m, 'Zd.'tszp, 'ITriSveq"; + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Zd.'tszp, 'Pgl/m, 'Zd.'tszp, "; + const char *suffix = NULL; unsigned tsize = (instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(9, 8); if (tsize == 0) { + mnemonic = "unimplemented"; form = "(SVEBitwiseShiftByImm_Predicated)"; } else { - switch (instr->Mask(SVEBitwiseShiftByImm_PredicatedMask)) { - case ASRD_z_p_zi: - mnemonic = "asrd"; - break; - case ASR_z_p_zi: - mnemonic = "asr"; - break; - case LSL_z_p_zi: - mnemonic = "lsl"; - form = "'Zd.'tszp, p'u1210/m, 'Zd.'tszp, 'ITriSvep"; - break; - case LSR_z_p_zi: - mnemonic = "lsr"; + switch (form_hash_) { + case "lsl_z_p_zi"_h: + case "sqshl_z_p_zi"_h: + case "sqshlu_z_p_zi"_h: + case "uqshl_z_p_zi"_h: + suffix = "'ITriSvep"; + break; + case "asrd_z_p_zi"_h: + case "asr_z_p_zi"_h: + case "lsr_z_p_zi"_h: + case "srshr_z_p_zi"_h: + case "urshr_z_p_zi"_h: + suffix = "'ITriSveq"; break; default: + mnemonic = "unimplemented"; + form = "(SVEBitwiseShiftByImm_Predicated)"; break; } } - Format(instr, mnemonic, form); + Format(instr, mnemonic, form, suffix); } void Disassembler::VisitSVEBitwiseShiftByVector_Predicated( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEBitwiseShiftByVector_PredicatedMask)) { - case ASRR_z_p_zz: - mnemonic = "asrr"; - break; - case ASR_z_p_zz: - mnemonic = "asr"; - break; - case LSLR_z_p_zz: - mnemonic = "lslr"; - break; - case LSL_z_p_zz: - mnemonic = "lsl"; - break; - case LSRR_z_p_zz: - mnemonic = "lsrr"; - break; - case LSR_z_p_zz: - mnemonic = "lsr"; - break; - default: - form = "(SVEBitwiseShiftByVector_Predicated)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"); } void Disassembler::VisitSVEBitwiseShiftByWideElements_Predicated( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.d"; - if (instr->GetSVESize() == kDRegSizeInBytesLog2) { - form = "(SVEBitwiseShiftByWideElements_Predicated)"; + Format(instr, "unallocated", "(SVEBitwiseShiftByWideElements_Predicated)"); } else { - switch (instr->Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) { - case ASR_z_p_zw: - mnemonic = "asr"; - break; - case LSL_z_p_zw: - mnemonic = "lsl"; - break; - case LSR_z_p_zw: - mnemonic = "lsr"; - break; - default: - form = "(SVEBitwiseShiftByWideElements_Predicated)"; - break; - } + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.d"); } - Format(instr, mnemonic, form); } static bool SVEMoveMaskPreferred(uint64_t value, int lane_bytes_log2) { @@ -5929,14 +3887,16 @@ void Disassembler::VisitSVEBroadcastFPImm_Unpredicated( const char *mnemonic = "unimplemented"; const char *form = "(SVEBroadcastFPImm_Unpredicated)"; - switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) { - case FDUP_z_i: - // The preferred disassembly for fdup is "fmov". - mnemonic = "fmov"; - form = "'Zd.'t, 'IFPSve"; - break; - default: - break; + if (instr->GetSVEVectorFormat() != kFormatVnB) { + switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) { + case FDUP_z_i: + // The preferred disassembly for fdup is "fmov". + mnemonic = "fmov"; + form = "'Zd.'t, 'IFPSve"; + break; + default: + break; + } } Format(instr, mnemonic, form); } @@ -5976,9 +3936,9 @@ void Disassembler::VisitSVEBroadcastIndexElement(const Instruction *instr) { if ((CountSetBits(imm2) + CountSetBits(tsz)) == 1) { // If imm2:tsz has one set bit, the index is zero. This is // disassembled as a mov from a b/h/s/d/q scalar register. - form = "'Zd.'tszx, 'tszx'u0905"; + form = "'Zd.'ti, 'ti'u0905"; } else { - form = "'Zd.'tszx, 'Zn.'tszx['IVInsSVEIndex]"; + form = "'Zd.'ti, 'Zn.'ti['IVInsSVEIndex]"; } } break; @@ -6013,304 +3973,145 @@ void Disassembler::VisitSVEBroadcastIntImm_Unpredicated( } void Disassembler::VisitSVECompressActiveElements(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVECompressActiveElements)"; - - switch (instr->Mask(SVECompressActiveElementsMask)) { - case COMPACT_z_p_z: - // The top bit of size is always set for compact, so 't can only be - // substituted with types S and D. - VIXL_ASSERT(instr->ExtractBit(23) == 1); - mnemonic = "compact"; - form = "'Zd.'t, 'Pgl, 'Zn.'t"; - break; - default: - break; + // The top bit of size is always set for compact, so 't can only be + // substituted with types S and D. + if (instr->ExtractBit(23) == 1) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl, 'Zn.'t"); + } else { + VisitUnallocated(instr); } - Format(instr, mnemonic, form); } void Disassembler::VisitSVEConditionallyBroadcastElementToVector( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEConditionallyBroadcastElementToVectorMask)) { - case CLASTA_z_p_zz: - mnemonic = "clasta"; - break; - case CLASTB_z_p_zz: - mnemonic = "clastb"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t"); } void Disassembler::VisitSVEConditionallyExtractElementToGeneralRegister( const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "'Wd, 'Pgl, 'Wd, 'Zn.'t"; if (instr->GetSVESize() == kDRegSizeInBytesLog2) { form = "'Xd, p'u1210, 'Xd, 'Zn.'t"; } - - switch (instr->Mask(SVEConditionallyExtractElementToGeneralRegisterMask)) { - case CLASTA_r_p_z: - mnemonic = "clasta"; - break; - case CLASTB_r_p_z: - mnemonic = "clastb"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEConditionallyExtractElementToSIMDFPScalar( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t"; - - switch (instr->Mask(SVEConditionallyExtractElementToSIMDFPScalarMask)) { - case CLASTA_v_p_z: - mnemonic = "clasta"; - break; - case CLASTB_v_p_z: - mnemonic = "clastb"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t"); } void Disassembler::VisitSVEConditionallyTerminateScalars( const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = (instr->ExtractBit(22) == 0) ? "'Wn, 'Wm" : "'Xn, 'Xm"; - - switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) { - case CTERMEQ_rr: - mnemonic = "ctermeq"; - break; - case CTERMNE_rr: - mnemonic = "ctermne"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEConstructivePrefix_Unpredicated( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEConstructivePrefix_Unpredicated)"; - - switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) { - case MOVPRFX_z_z: - mnemonic = "movprfx"; - form = "'Zd, 'Zn"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Zd, 'Zn"); } void Disassembler::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - - bool rm_is_zr = instr->GetRm() == kZeroRegCode; - const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns"; - const char *suffix = NULL; + const char *suffix = "]"; - switch (instr->Mask(SVEContiguousFirstFaultLoad_ScalarPlusScalarMask)) { - case LDFF1B_z_p_br_u16: - case LDFF1B_z_p_br_u32: - case LDFF1B_z_p_br_u64: - case LDFF1B_z_p_br_u8: - mnemonic = "ldff1b"; - suffix = rm_is_zr ? "]" : ", 'Xm]"; - break; - case LDFF1D_z_p_br_u64: - mnemonic = "ldff1d"; - suffix = rm_is_zr ? "]" : ", 'Xm, lsl #3]"; - break; - case LDFF1H_z_p_br_u16: - case LDFF1H_z_p_br_u32: - case LDFF1H_z_p_br_u64: - mnemonic = "ldff1h"; - suffix = rm_is_zr ? "]" : ", 'Xm, lsl #1]"; - break; - case LDFF1SB_z_p_br_s16: - case LDFF1SB_z_p_br_s32: - case LDFF1SB_z_p_br_s64: - mnemonic = "ldff1sb"; - suffix = rm_is_zr ? "]" : ", 'Xm]"; - break; - case LDFF1SH_z_p_br_s32: - case LDFF1SH_z_p_br_s64: - mnemonic = "ldff1sh"; - suffix = rm_is_zr ? "]" : ", 'Xm, lsl #1]"; - break; - case LDFF1SW_z_p_br_s64: - mnemonic = "ldff1sw"; - suffix = rm_is_zr ? "]" : ", 'Xm, lsl #2]"; - break; - case LDFF1W_z_p_br_u32: - case LDFF1W_z_p_br_u64: - mnemonic = "ldff1w"; - suffix = rm_is_zr ? "]" : ", 'Xm, lsl #2]"; - break; - default: - form = "(SVEContiguousFirstFaultLoad_ScalarPlusScalar)"; - break; + if (instr->GetRm() != kZeroRegCode) { + switch (form_hash_) { + case "ldff1b_z_p_br_u8"_h: + case "ldff1b_z_p_br_u16"_h: + case "ldff1b_z_p_br_u32"_h: + case "ldff1b_z_p_br_u64"_h: + case "ldff1sb_z_p_br_s16"_h: + case "ldff1sb_z_p_br_s32"_h: + case "ldff1sb_z_p_br_s64"_h: + suffix = ", 'Xm]"; + break; + case "ldff1h_z_p_br_u16"_h: + case "ldff1h_z_p_br_u32"_h: + case "ldff1h_z_p_br_u64"_h: + case "ldff1sh_z_p_br_s32"_h: + case "ldff1sh_z_p_br_s64"_h: + suffix = ", 'Xm, lsl #1]"; + break; + case "ldff1w_z_p_br_u32"_h: + case "ldff1w_z_p_br_u64"_h: + case "ldff1sw_z_p_br_s64"_h: + suffix = ", 'Xm, lsl #2]"; + break; + case "ldff1d_z_p_br_u64"_h: + suffix = ", 'Xm, lsl #3]"; + break; + } } - Format(instr, mnemonic, form, suffix); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVEContiguousNonFaultLoad_ScalarPlusImm( const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns"; const char *suffix = (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]"; - - switch (instr->Mask(SVEContiguousNonFaultLoad_ScalarPlusImmMask)) { - case LDNF1B_z_p_bi_u16: - case LDNF1B_z_p_bi_u32: - case LDNF1B_z_p_bi_u64: - case LDNF1B_z_p_bi_u8: - mnemonic = "ldnf1b"; - break; - case LDNF1D_z_p_bi_u64: - mnemonic = "ldnf1d"; - break; - case LDNF1H_z_p_bi_u16: - case LDNF1H_z_p_bi_u32: - case LDNF1H_z_p_bi_u64: - mnemonic = "ldnf1h"; - break; - case LDNF1SB_z_p_bi_s16: - case LDNF1SB_z_p_bi_s32: - case LDNF1SB_z_p_bi_s64: - mnemonic = "ldnf1sb"; - break; - case LDNF1SH_z_p_bi_s32: - case LDNF1SH_z_p_bi_s64: - mnemonic = "ldnf1sh"; - break; - case LDNF1SW_z_p_bi_s64: - mnemonic = "ldnf1sw"; - break; - case LDNF1W_z_p_bi_u32: - case LDNF1W_z_p_bi_u64: - mnemonic = "ldnf1w"; - break; - default: - form = "(SVEContiguousNonFaultLoad_ScalarPlusImm)"; - suffix = NULL; - break; - } - Format(instr, mnemonic, form, suffix); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEContiguousNonTemporalLoad_ScalarPlusImm)"; - + const char *form = "{'Zt.b}, 'Pgl/z, ['Xns"; const char *suffix = (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]"; - switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusImmMask)) { - case LDNT1B_z_p_bi_contiguous: - mnemonic = "ldnt1b"; - form = "{'Zt.b}, 'Pgl/z, ['Xns"; - break; - case LDNT1D_z_p_bi_contiguous: - mnemonic = "ldnt1d"; + switch (form_hash_) { + case "ldnt1d_z_p_bi_contiguous"_h: form = "{'Zt.d}, 'Pgl/z, ['Xns"; break; - case LDNT1H_z_p_bi_contiguous: - mnemonic = "ldnt1h"; + case "ldnt1h_z_p_bi_contiguous"_h: form = "{'Zt.h}, 'Pgl/z, ['Xns"; break; - case LDNT1W_z_p_bi_contiguous: - mnemonic = "ldnt1w"; + case "ldnt1w_z_p_bi_contiguous"_h: form = "{'Zt.s}, 'Pgl/z, ['Xns"; break; - default: - suffix = NULL; - break; } - Format(instr, mnemonic, form, suffix); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEContiguousNonTemporalLoad_ScalarPlusScalar)"; - - switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusScalarMask)) { - case LDNT1B_z_p_br_contiguous: - mnemonic = "ldnt1b"; - form = "{'Zt.b}, 'Pgl/z, ['Xns, 'Rm]"; - break; - case LDNT1D_z_p_br_contiguous: - mnemonic = "ldnt1d"; + const char *form = "{'Zt.b}, 'Pgl/z, ['Xns, 'Rm]"; + switch (form_hash_) { + case "ldnt1d_z_p_br_contiguous"_h: form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Rm, lsl #3]"; break; - case LDNT1H_z_p_br_contiguous: - mnemonic = "ldnt1h"; + case "ldnt1h_z_p_br_contiguous"_h: form = "{'Zt.h}, 'Pgl/z, ['Xns, 'Rm, lsl #1]"; break; - case LDNT1W_z_p_br_contiguous: - mnemonic = "ldnt1w"; + case "ldnt1w_z_p_br_contiguous"_h: form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Rm, lsl #2]"; break; - default: - break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEContiguousNonTemporalStore_ScalarPlusImm( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEContiguousNonTemporalStore_ScalarPlusImm)"; - + const char *form = "{'Zt.b}, 'Pgl, ['Xns"; const char *suffix = (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]"; - switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusImmMask)) { - case STNT1B_z_p_bi_contiguous: - mnemonic = "stnt1b"; - form = "{'Zt.b}, 'Pgl, ['Xns"; - break; - case STNT1D_z_p_bi_contiguous: - mnemonic = "stnt1d"; + + switch (form_hash_) { + case "stnt1d_z_p_bi_contiguous"_h: form = "{'Zt.d}, 'Pgl, ['Xns"; break; - case STNT1H_z_p_bi_contiguous: - mnemonic = "stnt1h"; + case "stnt1h_z_p_bi_contiguous"_h: form = "{'Zt.h}, 'Pgl, ['Xns"; break; - case STNT1W_z_p_bi_contiguous: - mnemonic = "stnt1w"; + case "stnt1w_z_p_bi_contiguous"_h: form = "{'Zt.s}, 'Pgl, ['Xns"; break; - default: - suffix = NULL; - break; } - Format(instr, mnemonic, form, suffix); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar( @@ -6343,28 +4144,10 @@ void Disassembler::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar( void Disassembler::VisitSVEContiguousPrefetch_ScalarPlusImm( const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = (instr->ExtractBits(21, 16) != 0) ? "'prefSVEOp, 'Pgl, ['Xns, #'s2116, mul vl]" : "'prefSVEOp, 'Pgl, ['Xns]"; - - switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusImmMask)) { - case PRFB_i_p_bi_s: - mnemonic = "prfb"; - break; - case PRFD_i_p_bi_s: - mnemonic = "prfd"; - break; - case PRFH_i_p_bi_s: - mnemonic = "prfh"; - break; - case PRFW_i_p_bi_s: - mnemonic = "prfw"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEContiguousPrefetch_ScalarPlusScalar( @@ -6399,71 +4182,34 @@ void Disassembler::VisitSVEContiguousPrefetch_ScalarPlusScalar( void Disassembler::VisitSVEContiguousStore_ScalarPlusImm( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - // The 'size' field isn't in the usual place here. const char *form = "{'Zt.'tls}, 'Pgl, ['Xns, #'s1916, mul vl]"; if (instr->ExtractBits(19, 16) == 0) { form = "{'Zt.'tls}, 'Pgl, ['Xns]"; } - - switch (instr->Mask(SVEContiguousStore_ScalarPlusImmMask)) { - case ST1B_z_p_bi: - mnemonic = "st1b"; - break; - case ST1D_z_p_bi: - mnemonic = "st1d"; - break; - case ST1H_z_p_bi: - mnemonic = "st1h"; - break; - case ST1W_z_p_bi: - mnemonic = "st1w"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEContiguousStore_ScalarPlusScalar( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - // The 'size' field isn't in the usual place here. - const char *form = "{'Zt.'tls}, 'Pgl, ['Xns, 'Xm'NSveS]"; - - switch (instr->Mask(SVEContiguousStore_ScalarPlusScalarMask)) { - case ST1B_z_p_br: - mnemonic = "st1b"; - break; - case ST1D_z_p_br: - mnemonic = "st1d"; - break; - case ST1H_z_p_br: - mnemonic = "st1h"; - break; - case ST1W_z_p_br: - mnemonic = "st1w"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "{'Zt.'tls}, 'Pgl, ['Xns, 'Xm'NSveS]"); } void Disassembler::VisitSVECopyFPImm_Predicated(const Instruction *instr) { const char *mnemonic = "unimplemented"; const char *form = "(SVECopyFPImm_Predicated)"; - switch (instr->Mask(SVECopyFPImm_PredicatedMask)) { - case FCPY_z_p_i: - // The preferred disassembly for fcpy is "fmov". - mnemonic = "fmov"; - form = "'Zd.'t, 'Pm/m, 'IFPSve"; - break; - default: - break; + if (instr->GetSVEVectorFormat() != kFormatVnB) { + switch (instr->Mask(SVECopyFPImm_PredicatedMask)) { + case FCPY_z_p_i: + // The preferred disassembly for fcpy is "fmov". + mnemonic = "fmov"; + form = "'Zd.'t, 'Pm/m, 'IFPSve"; + break; + default: + break; + } } Format(instr, mnemonic, form); } @@ -6526,381 +4272,168 @@ void Disassembler::VisitSVECopySIMDFPScalarRegisterToVector_Predicated( void Disassembler::VisitSVEExtractElementToGeneralRegister( const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "'Wd, 'Pgl, 'Zn.'t"; - if (instr->GetSVESize() == kDRegSizeInBytesLog2) { form = "'Xd, p'u1210, 'Zn.'t"; } - - switch (instr->Mask(SVEExtractElementToGeneralRegisterMask)) { - case LASTA_r_p_z: - mnemonic = "lasta"; - break; - case LASTB_r_p_z: - mnemonic = "lastb"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEExtractElementToSIMDFPScalarRegister( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'t'u0400, 'Pgl, 'Zn.'t"; - - switch (instr->Mask(SVEExtractElementToSIMDFPScalarRegisterMask)) { - case LASTA_v_p_z: - mnemonic = "lasta"; - break; - case LASTB_v_p_z: - mnemonic = "lastb"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'t'u0400, 'Pgl, 'Zn.'t"); } void Disassembler::VisitSVEFFRInitialise(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFFRInitialise)"; - - switch (instr->Mask(SVEFFRInitialiseMask)) { - case SETFFR_f: - mnemonic = "setffr"; - form = " "; - break; - default: - break; - } - Format(instr, mnemonic, form); + DisassembleNoArgs(instr); } void Disassembler::VisitSVEFFRWriteFromPredicate(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFFRWriteFromPredicate)"; - - switch (instr->Mask(SVEFFRWriteFromPredicateMask)) { - case WRFFR_f_p: - mnemonic = "wrffr"; - form = "'Pn.b"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Pn.b"); } void Disassembler::VisitSVEFPArithmeticWithImm_Predicated( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form00 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #0.0"; - const char *form05 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #0.5"; - const char *form10 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #1.0"; - const char *form20 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #2.0"; + const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, #"; + const char *suffix00 = "0.0"; + const char *suffix05 = "0.5"; + const char *suffix10 = "1.0"; + const char *suffix20 = "2.0"; int i1 = instr->ExtractBit(5); - const char *form = i1 ? form10 : form00; + const char *suffix = i1 ? suffix10 : suffix00; - switch (instr->Mask(SVEFPArithmeticWithImm_PredicatedMask)) { - case FADD_z_p_zs: - mnemonic = "fadd"; - form = i1 ? form10 : form05; - break; - case FMAXNM_z_p_zs: - mnemonic = "fmaxnm"; - break; - case FMAX_z_p_zs: - mnemonic = "fmax"; - break; - case FMINNM_z_p_zs: - mnemonic = "fminnm"; - break; - case FMIN_z_p_zs: - mnemonic = "fmin"; - break; - case FMUL_z_p_zs: - mnemonic = "fmul"; - form = i1 ? form20 : form05; - break; - case FSUBR_z_p_zs: - mnemonic = "fsubr"; - form = i1 ? form10 : form05; - break; - case FSUB_z_p_zs: - mnemonic = "fsub"; - form = i1 ? form10 : form05; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + return; + } + + switch (form_hash_) { + case "fadd_z_p_zs"_h: + case "fsubr_z_p_zs"_h: + case "fsub_z_p_zs"_h: + suffix = i1 ? suffix10 : suffix05; break; - default: - form = "(SVEFPArithmeticWithImm_Predicated)"; + case "fmul_z_p_zs"_h: + suffix = i1 ? suffix20 : suffix05; break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVEFPArithmetic_Predicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) { - case FABD_z_p_zz: - mnemonic = "fabd"; - break; - case FADD_z_p_zz: - mnemonic = "fadd"; - break; - case FDIVR_z_p_zz: - mnemonic = "fdivr"; - break; - case FDIV_z_p_zz: - mnemonic = "fdiv"; - break; - case FMAXNM_z_p_zz: - mnemonic = "fmaxnm"; - break; - case FMAX_z_p_zz: - mnemonic = "fmax"; - break; - case FMINNM_z_p_zz: - mnemonic = "fminnm"; - break; - case FMIN_z_p_zz: - mnemonic = "fmin"; - break; - case FMULX_z_p_zz: - mnemonic = "fmulx"; - break; - case FMUL_z_p_zz: - mnemonic = "fmul"; - break; - case FSCALE_z_p_zz: - mnemonic = "fscale"; - break; - case FSUBR_z_p_zz: - mnemonic = "fsubr"; - break; - case FSUB_z_p_zz: - mnemonic = "fsub"; - break; - default: - break; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"); } - Format(instr, mnemonic, form); } void Disassembler::VisitSVEFPConvertPrecision(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPConvertPrecision)"; + const char *form = NULL; - switch (instr->Mask(SVEFPConvertPrecisionMask)) { - case FCVT_z_p_z_d2h: - mnemonic = "fcvt"; + switch (form_hash_) { + case "fcvt_z_p_z_d2h"_h: form = "'Zd.h, 'Pgl/m, 'Zn.d"; break; - case FCVT_z_p_z_d2s: - mnemonic = "fcvt"; + case "fcvt_z_p_z_d2s"_h: form = "'Zd.s, 'Pgl/m, 'Zn.d"; break; - case FCVT_z_p_z_h2d: - mnemonic = "fcvt"; + case "fcvt_z_p_z_h2d"_h: form = "'Zd.d, 'Pgl/m, 'Zn.h"; break; - case FCVT_z_p_z_h2s: - mnemonic = "fcvt"; + case "fcvt_z_p_z_h2s"_h: form = "'Zd.s, 'Pgl/m, 'Zn.h"; break; - case FCVT_z_p_z_s2d: - mnemonic = "fcvt"; + case "fcvt_z_p_z_s2d"_h: form = "'Zd.d, 'Pgl/m, 'Zn.s"; break; - case FCVT_z_p_z_s2h: - mnemonic = "fcvt"; + case "fcvt_z_p_z_s2h"_h: form = "'Zd.h, 'Pgl/m, 'Zn.s"; break; - default: - break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEFPConvertToInt(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPConvertToInt)"; + const char *form = NULL; - switch (instr->Mask(SVEFPConvertToIntMask)) { - case FCVTZS_z_p_z_d2w: - mnemonic = "fcvtzs"; - form = "'Zd.s, 'Pgl/m, 'Zn.d"; - break; - case FCVTZS_z_p_z_d2x: - mnemonic = "fcvtzs"; - form = "'Zd.d, 'Pgl/m, 'Zn.d"; - break; - case FCVTZS_z_p_z_fp162h: - mnemonic = "fcvtzs"; - form = "'Zd.h, 'Pgl/m, 'Zn.h"; - break; - case FCVTZS_z_p_z_fp162w: - mnemonic = "fcvtzs"; - form = "'Zd.s, 'Pgl/m, 'Zn.h"; - break; - case FCVTZS_z_p_z_fp162x: - mnemonic = "fcvtzs"; - form = "'Zd.d, 'Pgl/m, 'Zn.h"; - break; - case FCVTZS_z_p_z_s2w: - mnemonic = "fcvtzs"; - form = "'Zd.s, 'Pgl/m, 'Zn.s"; - break; - case FCVTZS_z_p_z_s2x: - mnemonic = "fcvtzs"; - form = "'Zd.d, 'Pgl/m, 'Zn.s"; - break; - case FCVTZU_z_p_z_d2w: - mnemonic = "fcvtzu"; + switch (form_hash_) { + case "fcvtzs_z_p_z_d2w"_h: + case "fcvtzu_z_p_z_d2w"_h: form = "'Zd.s, 'Pgl/m, 'Zn.d"; break; - case FCVTZU_z_p_z_d2x: - mnemonic = "fcvtzu"; + case "fcvtzs_z_p_z_d2x"_h: + case "fcvtzu_z_p_z_d2x"_h: form = "'Zd.d, 'Pgl/m, 'Zn.d"; break; - case FCVTZU_z_p_z_fp162h: - mnemonic = "fcvtzu"; + case "fcvtzs_z_p_z_fp162h"_h: + case "fcvtzu_z_p_z_fp162h"_h: form = "'Zd.h, 'Pgl/m, 'Zn.h"; break; - case FCVTZU_z_p_z_fp162w: - mnemonic = "fcvtzu"; + case "fcvtzs_z_p_z_fp162w"_h: + case "fcvtzu_z_p_z_fp162w"_h: form = "'Zd.s, 'Pgl/m, 'Zn.h"; break; - case FCVTZU_z_p_z_fp162x: - mnemonic = "fcvtzu"; + case "fcvtzs_z_p_z_fp162x"_h: + case "fcvtzu_z_p_z_fp162x"_h: form = "'Zd.d, 'Pgl/m, 'Zn.h"; break; - case FCVTZU_z_p_z_s2w: - mnemonic = "fcvtzu"; + case "fcvtzs_z_p_z_s2w"_h: + case "fcvtzu_z_p_z_s2w"_h: form = "'Zd.s, 'Pgl/m, 'Zn.s"; break; - case FCVTZU_z_p_z_s2x: - mnemonic = "fcvtzu"; + case "fcvtzs_z_p_z_s2x"_h: + case "fcvtzu_z_p_z_s2x"_h: form = "'Zd.d, 'Pgl/m, 'Zn.s"; break; - default: - break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEFPExponentialAccelerator(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPExponentialAccelerator)"; - unsigned size = instr->GetSVESize(); - switch (instr->Mask(SVEFPExponentialAcceleratorMask)) { - case FEXPA_z_z: - if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || - (size == kDRegSizeInBytesLog2)) { - mnemonic = "fexpa"; - form = "'Zd.'t, 'Zn.'t"; - } - break; - default: - break; + if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || + (size == kDRegSizeInBytesLog2)) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t"); + } else { + VisitUnallocated(instr); } - Format(instr, mnemonic, form); } void Disassembler::VisitSVEFPRoundToIntegralValue(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t"; - - switch (instr->Mask(SVEFPRoundToIntegralValueMask)) { - case FRINTA_z_p_z: - mnemonic = "frinta"; - break; - case FRINTI_z_p_z: - mnemonic = "frinti"; - break; - case FRINTM_z_p_z: - mnemonic = "frintm"; - break; - case FRINTN_z_p_z: - mnemonic = "frintn"; - break; - case FRINTP_z_p_z: - mnemonic = "frintp"; - break; - case FRINTX_z_p_z: - mnemonic = "frintx"; - break; - case FRINTZ_z_p_z: - mnemonic = "frintz"; - break; - default: - break; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zn.'t"); } - Format(instr, mnemonic, form); } void Disassembler::VisitSVEFPTrigMulAddCoefficient(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPTrigMulAddCoefficient)"; - unsigned size = instr->GetSVESize(); - switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) { - case FTMAD_z_zzi: - if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || - (size == kDRegSizeInBytesLog2)) { - mnemonic = "ftmad"; - form = "'Zd.'t, 'Zd.'t, 'Zn.'t, #'u1816"; - } - break; - default: - break; + if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || + (size == kDRegSizeInBytesLog2)) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zd.'t, 'Zn.'t, #'u1816"); + } else { + VisitUnallocated(instr); } - Format(instr, mnemonic, form); } void Disassembler::VisitSVEFPTrigSelectCoefficient(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPTrigSelectCoefficient)"; - unsigned size = instr->GetSVESize(); - switch (instr->Mask(SVEFPTrigSelectCoefficientMask)) { - case FTSSEL_z_zz: - if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || - (size == kDRegSizeInBytesLog2)) { - mnemonic = "ftssel"; - form = "'Zd.'t, 'Zn.'t, 'Zm.'t"; - } - break; - default: - break; + if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || + (size == kDRegSizeInBytesLog2)) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t, 'Zm.'t"); + } else { + VisitUnallocated(instr); } - Format(instr, mnemonic, form); } void Disassembler::VisitSVEFPUnaryOp(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t"; - if (instr->GetSVESize() == kBRegSizeInBytesLog2) { - form = "(SVEFPUnaryOp)"; + VisitUnallocated(instr); } else { - switch (instr->Mask(SVEFPUnaryOpMask)) { - case FRECPX_z_p_z: - mnemonic = "frecpx"; - break; - case FSQRT_z_p_z: - mnemonic = "fsqrt"; - break; - default: - form = "(SVEFPUnaryOp)"; - break; - } + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zn.'t"); } - Format(instr, mnemonic, form); } static const char *IncDecFormHelper(const Instruction *instr, @@ -6921,399 +4454,125 @@ static const char *IncDecFormHelper(const Instruction *instr, void Disassembler::VisitSVEIncDecRegisterByElementCount( const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = IncDecFormHelper(instr, "'Xd, 'Ipc, mul #'u1916+1", "'Xd, 'Ipc", "'Xd"); - - switch (instr->Mask(SVEIncDecRegisterByElementCountMask)) { - case DECB_r_rs: - mnemonic = "decb"; - break; - case DECD_r_rs: - mnemonic = "decd"; - break; - case DECH_r_rs: - mnemonic = "dech"; - break; - case DECW_r_rs: - mnemonic = "decw"; - break; - case INCB_r_rs: - mnemonic = "incb"; - break; - case INCD_r_rs: - mnemonic = "incd"; - break; - case INCH_r_rs: - mnemonic = "inch"; - break; - case INCW_r_rs: - mnemonic = "incw"; - break; - default: - form = "(SVEIncDecRegisterByElementCount)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEIncDecVectorByElementCount( const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = IncDecFormHelper(instr, "'Zd.'t, 'Ipc, mul #'u1916+1", "'Zd.'t, 'Ipc", "'Zd.'t"); - - switch (instr->Mask(SVEIncDecVectorByElementCountMask)) { - case DECD_z_zs: - mnemonic = "decd"; - break; - case DECH_z_zs: - mnemonic = "dech"; - break; - case DECW_z_zs: - mnemonic = "decw"; - break; - case INCD_z_zs: - mnemonic = "incd"; - break; - case INCH_z_zs: - mnemonic = "inch"; - break; - case INCW_z_zs: - mnemonic = "incw"; - break; - default: - form = "(SVEIncDecVectorByElementCount)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEInsertGeneralRegister(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEInsertGeneralRegister)"; - - switch (instr->Mask(SVEInsertGeneralRegisterMask)) { - case INSR_z_r: - mnemonic = "insr"; - if (instr->GetSVESize() == kDRegSizeInBytesLog2) { - form = "'Zd.'t, 'Xn"; - } else { - form = "'Zd.'t, 'Wn"; - } - break; - default: - break; + const char *form = "'Zd.'t, 'Wn"; + if (instr->GetSVESize() == kDRegSizeInBytesLog2) { + form = "'Zd.'t, 'Xn"; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEInsertSIMDFPScalarRegister( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEInsertSIMDFPScalarRegister)"; - - switch (instr->Mask(SVEInsertSIMDFPScalarRegisterMask)) { - case INSR_z_v: - mnemonic = "insr"; - form = "'Zd.'t, 'Vnv"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Vnv"); } void Disassembler::VisitSVEIntAddSubtractImm_Unpredicated( const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = (instr->ExtractBit(13) == 0) ? "'Zd.'t, 'Zd.'t, #'u1205" : "'Zd.'t, 'Zd.'t, #'u1205, lsl #8"; - - switch (instr->Mask(SVEIntAddSubtractImm_UnpredicatedMask)) { - case ADD_z_zi: - mnemonic = "add"; - break; - case SQADD_z_zi: - mnemonic = "sqadd"; - break; - case SQSUB_z_zi: - mnemonic = "sqsub"; - break; - case SUBR_z_zi: - mnemonic = "subr"; - break; - case SUB_z_zi: - mnemonic = "sub"; - break; - case UQADD_z_zi: - mnemonic = "uqadd"; - break; - case UQSUB_z_zi: - mnemonic = "uqsub"; - break; - default: - form = "(SVEIntAddSubtractImm_Unpredicated)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEIntAddSubtractVectors_Predicated( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEIntAddSubtractVectors_PredicatedMask)) { - case ADD_z_p_zz: - mnemonic = "add"; - break; - case SUBR_z_p_zz: - mnemonic = "subr"; - break; - case SUB_z_p_zz: - mnemonic = "sub"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"); } void Disassembler::VisitSVEIntCompareScalarCountAndLimit( const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = (instr->ExtractBit(12) == 0) ? "'Pd.'t, 'Wn, 'Wm" : "'Pd.'t, 'Xn, 'Xm"; - - switch (instr->Mask(SVEIntCompareScalarCountAndLimitMask)) { - case WHILELE_p_p_rr: - mnemonic = "whilele"; - break; - case WHILELO_p_p_rr: - mnemonic = "whilelo"; - break; - case WHILELS_p_p_rr: - mnemonic = "whilels"; - break; - case WHILELT_p_p_rr: - mnemonic = "whilelt"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEIntConvertToFP(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEIntConvertToFP)"; - - switch (instr->Mask(SVEIntConvertToFPMask)) { - case SCVTF_z_p_z_h2fp16: - mnemonic = "scvtf"; - form = "'Zd.h, 'Pgl/m, 'Zn.h"; - break; - case SCVTF_z_p_z_w2d: - mnemonic = "scvtf"; - form = "'Zd.d, 'Pgl/m, 'Zn.s"; - break; - case SCVTF_z_p_z_w2fp16: - mnemonic = "scvtf"; - form = "'Zd.h, 'Pgl/m, 'Zn.s"; - break; - case SCVTF_z_p_z_w2s: - mnemonic = "scvtf"; - form = "'Zd.s, 'Pgl/m, 'Zn.s"; - break; - case SCVTF_z_p_z_x2d: - mnemonic = "scvtf"; - form = "'Zd.d, 'Pgl/m, 'Zn.d"; - break; - case SCVTF_z_p_z_x2fp16: - mnemonic = "scvtf"; - form = "'Zd.h, 'Pgl/m, 'Zn.d"; - break; - case SCVTF_z_p_z_x2s: - mnemonic = "scvtf"; - form = "'Zd.s, 'Pgl/m, 'Zn.d"; - break; - case UCVTF_z_p_z_h2fp16: - mnemonic = "ucvtf"; + const char *form = NULL; + switch (form_hash_) { + case "scvtf_z_p_z_h2fp16"_h: + case "ucvtf_z_p_z_h2fp16"_h: form = "'Zd.h, 'Pgl/m, 'Zn.h"; break; - case UCVTF_z_p_z_w2d: - mnemonic = "ucvtf"; + case "scvtf_z_p_z_w2d"_h: + case "ucvtf_z_p_z_w2d"_h: form = "'Zd.d, 'Pgl/m, 'Zn.s"; break; - case UCVTF_z_p_z_w2fp16: - mnemonic = "ucvtf"; + case "scvtf_z_p_z_w2fp16"_h: + case "ucvtf_z_p_z_w2fp16"_h: form = "'Zd.h, 'Pgl/m, 'Zn.s"; break; - case UCVTF_z_p_z_w2s: - mnemonic = "ucvtf"; + case "scvtf_z_p_z_w2s"_h: + case "ucvtf_z_p_z_w2s"_h: form = "'Zd.s, 'Pgl/m, 'Zn.s"; break; - case UCVTF_z_p_z_x2d: - mnemonic = "ucvtf"; + case "scvtf_z_p_z_x2d"_h: + case "ucvtf_z_p_z_x2d"_h: form = "'Zd.d, 'Pgl/m, 'Zn.d"; break; - case UCVTF_z_p_z_x2fp16: - mnemonic = "ucvtf"; + case "scvtf_z_p_z_x2fp16"_h: + case "ucvtf_z_p_z_x2fp16"_h: form = "'Zd.h, 'Pgl/m, 'Zn.d"; break; - case UCVTF_z_p_z_x2s: - mnemonic = "ucvtf"; + case "scvtf_z_p_z_x2s"_h: + case "ucvtf_z_p_z_x2s"_h: form = "'Zd.s, 'Pgl/m, 'Zn.d"; break; - default: - break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEIntDivideVectors_Predicated( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) { - case SDIVR_z_p_zz: - mnemonic = "sdivr"; - break; - case SDIV_z_p_zz: - mnemonic = "sdiv"; - break; - case UDIVR_z_p_zz: - mnemonic = "udivr"; - break; - case UDIV_z_p_zz: - mnemonic = "udiv"; - break; - default: - break; - } - - switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) { - case SDIVR_z_p_zz: - case SDIV_z_p_zz: - case UDIVR_z_p_zz: - case UDIV_z_p_zz: - switch (instr->GetSVESize()) { - case kBRegSizeInBytesLog2: - case kHRegSizeInBytesLog2: - mnemonic = "unimplemented"; - form = "(SVEIntBinaryArithmeticPredicated)"; - break; - case kSRegSizeInBytesLog2: - case kDRegSizeInBytesLog2: - // The default form works for these instructions. - break; - default: - // GetSVESize() should never return other values. - VIXL_UNREACHABLE(); - break; - } + unsigned size = instr->GetSVESize(); + if ((size == kSRegSizeInBytesLog2) || (size == kDRegSizeInBytesLog2)) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"); + } else { + VisitUnallocated(instr); } - - Format(instr, mnemonic, form); } void Disassembler::VisitSVEIntMinMaxDifference_Predicated( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEIntMinMaxDifference_PredicatedMask)) { - case SABD_z_p_zz: - mnemonic = "sabd"; - break; - case SMAX_z_p_zz: - mnemonic = "smax"; - break; - case SMIN_z_p_zz: - mnemonic = "smin"; - break; - case UABD_z_p_zz: - mnemonic = "uabd"; - break; - case UMAX_z_p_zz: - mnemonic = "umax"; - break; - case UMIN_z_p_zz: - mnemonic = "umin"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"); } void Disassembler::VisitSVEIntMinMaxImm_Unpredicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Zd.'t, #'u1205"; + const char *form = "'Zd.'t, 'Zd.'t, #"; + const char *suffix = "'u1205"; - switch (instr->Mask(SVEIntMinMaxImm_UnpredicatedMask)) { - case SMAX_z_zi: - mnemonic = "smax"; - form = "'Zd.'t, 'Zd.'t, #'s1205"; - break; - case SMIN_z_zi: - mnemonic = "smin"; - form = "'Zd.'t, 'Zd.'t, #'s1205"; - break; - case UMAX_z_zi: - mnemonic = "umax"; - break; - case UMIN_z_zi: - mnemonic = "umin"; - break; - default: + switch (form_hash_) { + case "smax_z_zi"_h: + case "smin_z_zi"_h: + suffix = "'s1205"; break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVEIntMulImm_Unpredicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEIntMulImm_Unpredicated)"; - - switch (instr->Mask(SVEIntMulImm_UnpredicatedMask)) { - case MUL_z_zi: - mnemonic = "mul"; - form = "'Zd.'t, 'Zd.'t, #'s1205"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zd.'t, #'s1205"); } void Disassembler::VisitSVEIntMulVectors_Predicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEIntMulVectors_PredicatedMask)) { - case MUL_z_p_zz: - mnemonic = "mul"; - break; - case SMULH_z_p_zz: - mnemonic = "smulh"; - break; - case UMULH_z_p_zz: - mnemonic = "umulh"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"); } void Disassembler::VisitSVELoadAndBroadcastElement(const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "(SVELoadAndBroadcastElement)"; const char *suffix_b = ", #'u2116]"; const char *suffix_h = ", #'u2116*2]"; @@ -7321,88 +4580,52 @@ void Disassembler::VisitSVELoadAndBroadcastElement(const Instruction *instr) { const char *suffix_d = ", #'u2116*8]"; const char *suffix = NULL; - switch (instr->Mask(SVELoadAndBroadcastElementMask)) { - case LD1RB_z_p_bi_u16: - mnemonic = "ld1rb"; - form = "{'Zt.h}, 'Pgl/z, ['Xns"; - suffix = suffix_b; - break; - case LD1RB_z_p_bi_u32: - mnemonic = "ld1rb"; - form = "{'Zt.s}, 'Pgl/z, ['Xns"; - suffix = suffix_b; - break; - case LD1RB_z_p_bi_u64: - mnemonic = "ld1rb"; - form = "{'Zt.d}, 'Pgl/z, ['Xns"; - suffix = suffix_b; - break; - case LD1RB_z_p_bi_u8: - mnemonic = "ld1rb"; + switch (form_hash_) { + case "ld1rb_z_p_bi_u8"_h: form = "{'Zt.b}, 'Pgl/z, ['Xns"; suffix = suffix_b; break; - case LD1RD_z_p_bi_u64: - mnemonic = "ld1rd"; - form = "{'Zt.d}, 'Pgl/z, ['Xns"; - suffix = suffix_d; - break; - case LD1RH_z_p_bi_u16: - mnemonic = "ld1rh"; - form = "{'Zt.h}, 'Pgl/z, ['Xns"; - suffix = suffix_h; - break; - case LD1RH_z_p_bi_u32: - mnemonic = "ld1rh"; - form = "{'Zt.s}, 'Pgl/z, ['Xns"; - suffix = suffix_h; - break; - case LD1RH_z_p_bi_u64: - mnemonic = "ld1rh"; - form = "{'Zt.d}, 'Pgl/z, ['Xns"; - suffix = suffix_h; - break; - case LD1RSB_z_p_bi_s16: - mnemonic = "ld1rsb"; + case "ld1rb_z_p_bi_u16"_h: + case "ld1rsb_z_p_bi_s16"_h: form = "{'Zt.h}, 'Pgl/z, ['Xns"; suffix = suffix_b; break; - case LD1RSB_z_p_bi_s32: - mnemonic = "ld1rsb"; + case "ld1rb_z_p_bi_u32"_h: + case "ld1rsb_z_p_bi_s32"_h: form = "{'Zt.s}, 'Pgl/z, ['Xns"; suffix = suffix_b; break; - case LD1RSB_z_p_bi_s64: - mnemonic = "ld1rsb"; + case "ld1rb_z_p_bi_u64"_h: + case "ld1rsb_z_p_bi_s64"_h: form = "{'Zt.d}, 'Pgl/z, ['Xns"; suffix = suffix_b; break; - case LD1RSH_z_p_bi_s32: - mnemonic = "ld1rsh"; - form = "{'Zt.s}, 'Pgl/z, ['Xns"; + case "ld1rh_z_p_bi_u16"_h: + form = "{'Zt.h}, 'Pgl/z, ['Xns"; suffix = suffix_h; break; - case LD1RSH_z_p_bi_s64: - mnemonic = "ld1rsh"; - form = "{'Zt.d}, 'Pgl/z, ['Xns"; + case "ld1rh_z_p_bi_u32"_h: + case "ld1rsh_z_p_bi_s32"_h: + form = "{'Zt.s}, 'Pgl/z, ['Xns"; suffix = suffix_h; break; - case LD1RSW_z_p_bi_s64: - mnemonic = "ld1rsw"; + case "ld1rh_z_p_bi_u64"_h: + case "ld1rsh_z_p_bi_s64"_h: form = "{'Zt.d}, 'Pgl/z, ['Xns"; - suffix = suffix_w; + suffix = suffix_h; break; - case LD1RW_z_p_bi_u32: - mnemonic = "ld1rw"; + case "ld1rw_z_p_bi_u32"_h: form = "{'Zt.s}, 'Pgl/z, ['Xns"; suffix = suffix_w; break; - case LD1RW_z_p_bi_u64: - mnemonic = "ld1rw"; + case "ld1rsw_z_p_bi_s64"_h: + case "ld1rw_z_p_bi_u64"_h: form = "{'Zt.d}, 'Pgl/z, ['Xns"; suffix = suffix_w; break; - default: + case "ld1rd_z_p_bi_u64"_h: + form = "{'Zt.d}, 'Pgl/z, ['Xns"; + suffix = suffix_d; break; } @@ -7411,401 +4634,141 @@ void Disassembler::VisitSVELoadAndBroadcastElement(const Instruction *instr) { suffix = "]"; } - Format(instr, mnemonic, form, suffix); + FormatWithDecodedMnemonic(instr, form, suffix); } -void Disassembler::VisitSVELoadAndBroadcastQuadword_ScalarPlusImm( +void Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVELoadAndBroadcastQuadword_ScalarPlusImm)"; + const char *form = "{'Zt.'tmsz}, 'Pgl/z, ['Xns"; + const char *suffix = ", #'s1916*16]"; - const char *suffix = - (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916*16]"; - - switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusImmMask)) { - case LD1RQB_z_p_bi_u8: - mnemonic = "ld1rqb"; - form = "{'Zt.b}, 'Pgl/z, ['Xns"; - break; - case LD1RQD_z_p_bi_u64: - mnemonic = "ld1rqd"; - form = "{'Zt.d}, 'Pgl/z, ['Xns"; - break; - case LD1RQH_z_p_bi_u16: - mnemonic = "ld1rqh"; - form = "{'Zt.h}, 'Pgl/z, ['Xns"; - break; - case LD1RQW_z_p_bi_u32: - mnemonic = "ld1rqw"; - form = "{'Zt.s}, 'Pgl/z, ['Xns"; - break; - default: - suffix = NULL; + switch (form_hash_) { + case "ld1rob_z_p_bi_u8"_h: + case "ld1rod_z_p_bi_u64"_h: + case "ld1roh_z_p_bi_u16"_h: + case "ld1row_z_p_bi_u32"_h: + suffix = ", #'s1916*32]"; break; } - Format(instr, mnemonic, form, suffix); + if (instr->ExtractBits(19, 16) == 0) suffix = "]"; + + FormatWithDecodedMnemonic(instr, form, suffix); } -void Disassembler::VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar( +void Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVELoadAndBroadcastQuadword_ScalarPlusScalar)"; + const char *form = "{'Zt.'tmsz}, 'Pgl/z, ['Xns, "; + const char *suffix = "'Rm, lsl #'u2423]"; - switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusScalarMask)) { - case LD1RQB_z_p_br_contiguous: - mnemonic = "ld1rqb"; - form = "{'Zt.b}, 'Pgl/z, ['Xns, 'Rm]"; - break; - case LD1RQD_z_p_br_contiguous: - mnemonic = "ld1rqd"; - form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Rm, lsl #3]"; - break; - case LD1RQH_z_p_br_contiguous: - mnemonic = "ld1rqh"; - form = "{'Zt.h}, 'Pgl/z, ['Xns, 'Rm, lsl #1]"; - break; - case LD1RQW_z_p_br_contiguous: - mnemonic = "ld1rqw"; - form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Rm, lsl #2]"; - break; - default: + switch (form_hash_) { + case "ld1rqb_z_p_br_contiguous"_h: + case "ld1rob_z_p_br_contiguous"_h: + suffix = "'Rm]"; break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVELoadMultipleStructures_ScalarPlusImm( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVELoadMultipleStructures_ScalarPlusImm)"; - - const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl/z, ['Xns'ISveSvl]"; - const char *form_3 = - "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl/z, ['Xns'ISveSvl]"; - const char *form_4 = - "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, " - "'Pgl/z, ['Xns'ISveSvl]"; - - switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusImmMask)) { - case LD2B_z_p_bi_contiguous: - mnemonic = "ld2b"; - form = form_2; - break; - case LD2D_z_p_bi_contiguous: - mnemonic = "ld2d"; - form = form_2; - break; - case LD2H_z_p_bi_contiguous: - mnemonic = "ld2h"; - form = form_2; - break; - case LD2W_z_p_bi_contiguous: - mnemonic = "ld2w"; - form = form_2; - break; - case LD3B_z_p_bi_contiguous: - mnemonic = "ld3b"; - form = form_3; - break; - case LD3D_z_p_bi_contiguous: - mnemonic = "ld3d"; - form = form_3; - break; - case LD3H_z_p_bi_contiguous: - mnemonic = "ld3h"; - form = form_3; - break; - case LD3W_z_p_bi_contiguous: - mnemonic = "ld3w"; + const char *form = "{'Zt.'tmsz, 'Zt2.'tmsz}"; + const char *form_3 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}"; + const char *form_4 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}"; + const char *suffix = ", 'Pgl/z, ['Xns'ISveSvl]"; + + switch (form_hash_) { + case "ld3b_z_p_bi_contiguous"_h: + case "ld3d_z_p_bi_contiguous"_h: + case "ld3h_z_p_bi_contiguous"_h: + case "ld3w_z_p_bi_contiguous"_h: form = form_3; break; - case LD4B_z_p_bi_contiguous: - mnemonic = "ld4b"; + case "ld4b_z_p_bi_contiguous"_h: + case "ld4d_z_p_bi_contiguous"_h: + case "ld4h_z_p_bi_contiguous"_h: + case "ld4w_z_p_bi_contiguous"_h: form = form_4; break; - case LD4D_z_p_bi_contiguous: - mnemonic = "ld4d"; - form = form_4; - break; - case LD4H_z_p_bi_contiguous: - mnemonic = "ld4h"; - form = form_4; - break; - case LD4W_z_p_bi_contiguous: - mnemonic = "ld4w"; - form = form_4; - break; - default: - break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVELoadMultipleStructures_ScalarPlusScalar( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVELoadMultipleStructures_ScalarPlusScalar)"; - - const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl/z, ['Xns, 'Xm'NSveS]"; - const char *form_3 = - "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl/z, ['Xns, 'Xm'NSveS]"; - const char *form_4 = - "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, " - "'Pgl/z, ['Xns, 'Xm'NSveS]"; - - switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusScalarMask)) { - case LD2B_z_p_br_contiguous: - mnemonic = "ld2b"; - form = form_2; - break; - case LD2D_z_p_br_contiguous: - mnemonic = "ld2d"; - form = form_2; - break; - case LD2H_z_p_br_contiguous: - mnemonic = "ld2h"; - form = form_2; - break; - case LD2W_z_p_br_contiguous: - mnemonic = "ld2w"; - form = form_2; - break; - case LD3B_z_p_br_contiguous: - mnemonic = "ld3b"; - form = form_3; - break; - case LD3D_z_p_br_contiguous: - mnemonic = "ld3d"; - form = form_3; - break; - case LD3H_z_p_br_contiguous: - mnemonic = "ld3h"; + const char *form = "{'Zt.'tmsz, 'Zt2.'tmsz}"; + const char *form_3 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}"; + const char *form_4 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}"; + const char *suffix = ", 'Pgl/z, ['Xns, 'Xm'NSveS]"; + + switch (form_hash_) { + case "ld3b_z_p_br_contiguous"_h: + case "ld3d_z_p_br_contiguous"_h: + case "ld3h_z_p_br_contiguous"_h: + case "ld3w_z_p_br_contiguous"_h: form = form_3; break; - case LD3W_z_p_br_contiguous: - mnemonic = "ld3w"; - form = form_3; - break; - case LD4B_z_p_br_contiguous: - mnemonic = "ld4b"; - form = form_4; - break; - case LD4D_z_p_br_contiguous: - mnemonic = "ld4d"; - form = form_4; - break; - case LD4H_z_p_br_contiguous: - mnemonic = "ld4h"; - form = form_4; - break; - case LD4W_z_p_br_contiguous: - mnemonic = "ld4w"; + case "ld4b_z_p_br_contiguous"_h: + case "ld4d_z_p_br_contiguous"_h: + case "ld4h_z_p_br_contiguous"_h: + case "ld4w_z_p_br_contiguous"_h: form = form_4; break; - default: - break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVELoadPredicateRegister(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVELoadPredicateRegister)"; - - switch (instr->Mask(SVELoadPredicateRegisterMask)) { - case LDR_p_bi: - mnemonic = "ldr"; - if (instr->Mask(0x003f1c00) == 0) { - form = "'Pd, ['Xns]"; - } else { - form = "'Pd, ['Xns, #'s2116:1210, mul vl]"; - } - break; - default: - break; + const char *form = "'Pd, ['Xns, #'s2116:1210, mul vl]"; + if (instr->Mask(0x003f1c00) == 0) { + form = "'Pd, ['Xns]"; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVELoadVectorRegister(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVELoadVectorRegister)"; - - switch (instr->Mask(SVELoadVectorRegisterMask)) { - case LDR_z_bi: - mnemonic = "ldr"; - if (instr->Mask(0x003f1c00) == 0) { - form = "'Zd, ['Xns]"; - } else { - form = "'Zt, ['Xns, #'s2116:1210, mul vl]"; - } - break; - default: - break; + const char *form = "'Zt, ['Xns, #'s2116:1210, mul vl]"; + if (instr->Mask(0x003f1c00) == 0) { + form = "'Zd, ['Xns]"; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEPartitionBreakCondition(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.b, p'u1310/'?04:mz, 'Pn.b"; - - switch (instr->Mask(SVEPartitionBreakConditionMask)) { - case BRKAS_p_p_p_z: - mnemonic = "brkas"; - break; - case BRKA_p_p_p: - mnemonic = "brka"; - break; - case BRKBS_p_p_p_z: - mnemonic = "brkbs"; - break; - case BRKB_p_p_p: - mnemonic = "brkb"; - break; - default: - form = "(SVEPartitionBreakCondition)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Pd.b, p'u1310/'?04:mz, 'Pn.b"); } void Disassembler::VisitSVEPermutePredicateElements(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.'t, 'Pn.'t, 'Pm.'t"; - - switch (instr->Mask(SVEPermutePredicateElementsMask)) { - case TRN1_p_pp: - mnemonic = "trn1"; - break; - case TRN2_p_pp: - mnemonic = "trn2"; - break; - case UZP1_p_pp: - mnemonic = "uzp1"; - break; - case UZP2_p_pp: - mnemonic = "uzp2"; - break; - case ZIP1_p_pp: - mnemonic = "zip1"; - break; - case ZIP2_p_pp: - mnemonic = "zip2"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pn.'t, 'Pm.'t"); } void Disassembler::VisitSVEPredicateFirstActive(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEPredicateFirstActive)"; - - switch (instr->Mask(SVEPredicateFirstActiveMask)) { - case PFIRST_p_p_p: - mnemonic = "pfirst"; - form = "'Pd.b, 'Pn, 'Pd.b"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Pd.b, 'Pn, 'Pd.b"); } void Disassembler::VisitSVEPredicateReadFromFFR_Unpredicated( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEPredicateReadFromFFR_Unpredicated)"; - - switch (instr->Mask(SVEPredicateReadFromFFR_UnpredicatedMask)) { - case RDFFR_p_f: - mnemonic = "rdffr"; - form = "'Pd.b"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Pd.b"); } void Disassembler::VisitSVEPredicateTest(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEPredicateTest)"; - - switch (instr->Mask(SVEPredicateTestMask)) { - case PTEST_p_p: - mnemonic = "ptest"; - form = "p'u1310, 'Pn.b"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "p'u1310, 'Pn.b"); } void Disassembler::VisitSVEPredicateZero(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEPredicateZero)"; - - switch (instr->Mask(SVEPredicateZeroMask)) { - case PFALSE_p: - mnemonic = "pfalse"; - form = "'Pd.b"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Pd.b"); } void Disassembler::VisitSVEPropagateBreakToNextPartition( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pd.b"; - - switch (instr->Mask(SVEPropagateBreakToNextPartitionMask)) { - case BRKNS_p_p_pp: - mnemonic = "brkns"; - break; - case BRKN_p_p_pp: - mnemonic = "brkn"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Pd.b, p'u1310/z, 'Pn.b, 'Pd.b"); } void Disassembler::VisitSVEReversePredicateElements(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEReversePredicateElements)"; - - switch (instr->Mask(SVEReversePredicateElementsMask)) { - case REV_p_p: - mnemonic = "rev"; - form = "'Pd.'t, 'Pn.'t"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pn.'t"); } void Disassembler::VisitSVEReverseVectorElements(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEReverseVectorElements)"; - - switch (instr->Mask(SVEReverseVectorElementsMask)) { - case REV_z_z: - mnemonic = "rev"; - form = "'Zd.'t, 'Zn.'t"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t"); } void Disassembler::VisitSVEReverseWithinElements(const Instruction *instr) { @@ -7847,7 +4810,6 @@ void Disassembler::VisitSVEReverseWithinElements(const Instruction *instr) { void Disassembler::VisitSVESaturatingIncDecRegisterByElementCount( const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = IncDecFormHelper(instr, "'R20d, 'Ipc, mul #'u1916+1", "'R20d, 'Ipc", @@ -7857,399 +4819,113 @@ void Disassembler::VisitSVESaturatingIncDecRegisterByElementCount( "'Xd, 'Wd, 'Ipc", "'Xd, 'Wd"); - switch (instr->Mask(SVESaturatingIncDecRegisterByElementCountMask)) { - case SQDECB_r_rs_sx: - mnemonic = "sqdecb"; - form = form_sx; - break; - case SQDECD_r_rs_sx: - mnemonic = "sqdecd"; - form = form_sx; - break; - case SQDECH_r_rs_sx: - mnemonic = "sqdech"; + switch (form_hash_) { + case "sqdecb_r_rs_sx"_h: + case "sqdecd_r_rs_sx"_h: + case "sqdech_r_rs_sx"_h: + case "sqdecw_r_rs_sx"_h: + case "sqincb_r_rs_sx"_h: + case "sqincd_r_rs_sx"_h: + case "sqinch_r_rs_sx"_h: + case "sqincw_r_rs_sx"_h: form = form_sx; break; - case SQDECW_r_rs_sx: - mnemonic = "sqdecw"; - form = form_sx; - break; - case SQINCB_r_rs_sx: - mnemonic = "sqincb"; - form = form_sx; - break; - case SQINCD_r_rs_sx: - mnemonic = "sqincd"; - form = form_sx; - break; - case SQINCH_r_rs_sx: - mnemonic = "sqinch"; - form = form_sx; - break; - case SQINCW_r_rs_sx: - mnemonic = "sqincw"; - form = form_sx; - break; - case SQDECB_r_rs_x: - mnemonic = "sqdecb"; - break; - case SQDECD_r_rs_x: - mnemonic = "sqdecd"; - break; - case SQDECH_r_rs_x: - mnemonic = "sqdech"; - break; - case SQDECW_r_rs_x: - mnemonic = "sqdecw"; - break; - case SQINCB_r_rs_x: - mnemonic = "sqincb"; - break; - case SQINCD_r_rs_x: - mnemonic = "sqincd"; - break; - case SQINCH_r_rs_x: - mnemonic = "sqinch"; - break; - case SQINCW_r_rs_x: - mnemonic = "sqincw"; - break; - case UQDECB_r_rs_uw: - case UQDECB_r_rs_x: - mnemonic = "uqdecb"; - break; - case UQDECD_r_rs_uw: - case UQDECD_r_rs_x: - mnemonic = "uqdecd"; - break; - case UQDECH_r_rs_uw: - case UQDECH_r_rs_x: - mnemonic = "uqdech"; - break; - case UQDECW_r_rs_uw: - case UQDECW_r_rs_x: - mnemonic = "uqdecw"; - break; - case UQINCB_r_rs_uw: - case UQINCB_r_rs_x: - mnemonic = "uqincb"; - break; - case UQINCD_r_rs_uw: - case UQINCD_r_rs_x: - mnemonic = "uqincd"; - break; - case UQINCH_r_rs_uw: - case UQINCH_r_rs_x: - mnemonic = "uqinch"; - break; - case UQINCW_r_rs_uw: - case UQINCW_r_rs_x: - mnemonic = "uqincw"; - break; - default: - break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVESaturatingIncDecVectorByElementCount( const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = IncDecFormHelper(instr, "'Zd.'t, 'Ipc, mul #'u1916+1", "'Zd.'t, 'Ipc", "'Zd.'t"); - - switch (instr->Mask(SVESaturatingIncDecVectorByElementCountMask)) { - case SQDECD_z_zs: - mnemonic = "sqdecd"; - break; - case SQDECH_z_zs: - mnemonic = "sqdech"; - break; - case SQDECW_z_zs: - mnemonic = "sqdecw"; - break; - case SQINCD_z_zs: - mnemonic = "sqincd"; - break; - case SQINCH_z_zs: - mnemonic = "sqinch"; - break; - case SQINCW_z_zs: - mnemonic = "sqincw"; - break; - case UQDECD_z_zs: - mnemonic = "uqdecd"; - break; - case UQDECH_z_zs: - mnemonic = "uqdech"; - break; - case UQDECW_z_zs: - mnemonic = "uqdecw"; - break; - case UQINCD_z_zs: - mnemonic = "uqincd"; - break; - case UQINCH_z_zs: - mnemonic = "uqinch"; - break; - case UQINCW_z_zs: - mnemonic = "uqincw"; - break; - default: - form = "(SVEElementCount)"; - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEStoreMultipleStructures_ScalarPlusImm( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEStoreMultipleStructures_ScalarPlusImm)"; - - const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl, ['Xns'ISveSvl]"; - const char *form_3 = - "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl, ['Xns'ISveSvl]"; - const char *form_4 = - "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, " - "'Pgl, ['Xns'ISveSvl]"; - - switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusImmMask)) { - case ST2B_z_p_bi_contiguous: - mnemonic = "st2b"; - form = form_2; - break; - case ST2H_z_p_bi_contiguous: - mnemonic = "st2h"; - form = form_2; - break; - case ST2W_z_p_bi_contiguous: - mnemonic = "st2w"; - form = form_2; - break; - case ST2D_z_p_bi_contiguous: - mnemonic = "st2d"; - form = form_2; - break; - case ST3B_z_p_bi_contiguous: - mnemonic = "st3b"; - form = form_3; - break; - case ST3H_z_p_bi_contiguous: - mnemonic = "st3h"; + const char *form = "{'Zt.'tmsz, 'Zt2.'tmsz}"; + const char *form_3 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}"; + const char *form_4 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}"; + const char *suffix = ", 'Pgl, ['Xns'ISveSvl]"; + + switch (form_hash_) { + case "st3b_z_p_bi_contiguous"_h: + case "st3h_z_p_bi_contiguous"_h: + case "st3w_z_p_bi_contiguous"_h: + case "st3d_z_p_bi_contiguous"_h: form = form_3; break; - case ST3W_z_p_bi_contiguous: - mnemonic = "st3w"; - form = form_3; - break; - case ST3D_z_p_bi_contiguous: - mnemonic = "st3d"; - form = form_3; - break; - case ST4B_z_p_bi_contiguous: - mnemonic = "st4b"; - form = form_4; - break; - case ST4H_z_p_bi_contiguous: - mnemonic = "st4h"; + case "st4b_z_p_bi_contiguous"_h: + case "st4h_z_p_bi_contiguous"_h: + case "st4w_z_p_bi_contiguous"_h: + case "st4d_z_p_bi_contiguous"_h: form = form_4; break; - case ST4W_z_p_bi_contiguous: - mnemonic = "st4w"; - form = form_4; - break; - case ST4D_z_p_bi_contiguous: - mnemonic = "st4d"; - form = form_4; - break; - default: - break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVEStoreMultipleStructures_ScalarPlusScalar( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEStoreMultipleStructures_ScalarPlusScalar)"; - - const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl, ['Xns, 'Xm'NSveS]"; - const char *form_3 = - "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl, ['Xns, 'Xm'NSveS]"; - const char *form_4 = - "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, " - "'Pgl, ['Xns, 'Xm'NSveS]"; - - switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusScalarMask)) { - case ST2B_z_p_br_contiguous: - mnemonic = "st2b"; - form = form_2; - break; - case ST2D_z_p_br_contiguous: - mnemonic = "st2d"; - form = form_2; - break; - case ST2H_z_p_br_contiguous: - mnemonic = "st2h"; - form = form_2; - break; - case ST2W_z_p_br_contiguous: - mnemonic = "st2w"; - form = form_2; - break; - case ST3B_z_p_br_contiguous: - mnemonic = "st3b"; + const char *form = "{'Zt.'tmsz, 'Zt2.'tmsz}"; + const char *form_3 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}"; + const char *form_4 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}"; + const char *suffix = ", 'Pgl, ['Xns, 'Xm'NSveS]"; + + switch (form_hash_) { + case "st3b_z_p_br_contiguous"_h: + case "st3d_z_p_br_contiguous"_h: + case "st3h_z_p_br_contiguous"_h: + case "st3w_z_p_br_contiguous"_h: form = form_3; break; - case ST3D_z_p_br_contiguous: - mnemonic = "st3d"; - form = form_3; - break; - case ST3H_z_p_br_contiguous: - mnemonic = "st3h"; - form = form_3; - break; - case ST3W_z_p_br_contiguous: - mnemonic = "st3w"; - form = form_3; - break; - case ST4B_z_p_br_contiguous: - mnemonic = "st4b"; + case "st4b_z_p_br_contiguous"_h: + case "st4d_z_p_br_contiguous"_h: + case "st4h_z_p_br_contiguous"_h: + case "st4w_z_p_br_contiguous"_h: form = form_4; break; - case ST4D_z_p_br_contiguous: - mnemonic = "st4d"; - form = form_4; - break; - case ST4H_z_p_br_contiguous: - mnemonic = "st4h"; - form = form_4; - break; - case ST4W_z_p_br_contiguous: - mnemonic = "st4w"; - form = form_4; - break; - default: - break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVEStorePredicateRegister(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEStorePredicateRegister)"; - - switch (instr->Mask(SVEStorePredicateRegisterMask)) { - case STR_p_bi: - mnemonic = "str"; - if (instr->Mask(0x003f1c00) == 0) { - form = "'Pd, ['Xns]"; - } else { - form = "'Pd, ['Xns, #'s2116:1210, mul vl]"; - } - break; - default: - break; + const char *form = "'Pd, ['Xns, #'s2116:1210, mul vl]"; + if (instr->Mask(0x003f1c00) == 0) { + form = "'Pd, ['Xns]"; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEStoreVectorRegister(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEStoreVectorRegister)"; - - switch (instr->Mask(SVEStoreVectorRegisterMask)) { - case STR_z_bi: - mnemonic = "str"; - if (instr->Mask(0x003f1c00) == 0) { - form = "'Zd, ['Xns]"; - } else { - form = "'Zt, ['Xns, #'s2116:1210, mul vl]"; - } - break; - default: - break; + const char *form = "'Zt, ['Xns, #'s2116:1210, mul vl]"; + if (instr->Mask(0x003f1c00) == 0) { + form = "'Zd, ['Xns]"; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVETableLookup(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVETableLookup)"; - - switch (instr->Mask(SVETableLookupMask)) { - case TBL_z_zz_1: - mnemonic = "tbl"; - form = "'Zd.'t, {'Zn.'t}, 'Zm.'t"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Zd.'t, {'Zn.'t}, 'Zm.'t"); } void Disassembler::VisitSVEUnpackPredicateElements(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.h, 'Pn.b"; - - switch (instr->Mask(SVEUnpackPredicateElementsMask)) { - case PUNPKHI_p_p: - mnemonic = "punpkhi"; - break; - case PUNPKLO_p_p: - mnemonic = "punpklo"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Pd.h, 'Pn.b"); } void Disassembler::VisitSVEUnpackVectorElements(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Zn.'th"; - if (instr->GetSVESize() == 0) { // The lowest lane size of the destination vector is H-sized lane. - Format(instr, "unallocated", "(SVEUnpackVectorElements)"); - return; - } - - switch (instr->Mask(SVEUnpackVectorElementsMask)) { - case SUNPKHI_z_z: - mnemonic = "sunpkhi"; - break; - case SUNPKLO_z_z: - mnemonic = "sunpklo"; - break; - case UUNPKHI_z_z: - mnemonic = "uunpkhi"; - break; - case UUNPKLO_z_z: - mnemonic = "uunpklo"; - break; - default: - break; + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'th"); } - Format(instr, mnemonic, form); } -void Disassembler::VisitSVEVectorSplice_Destructive(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEVectorSplice_Destructive)"; - - switch (instr->Mask(SVEVectorSplice_DestructiveMask)) { - case SPLICE_z_p_zz_des: - mnemonic = "splice"; - form = "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t"; - break; - default: - break; - } - Format(instr, mnemonic, form); +void Disassembler::VisitSVEVectorSplice(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t"); } void Disassembler::VisitSVEAddressGeneration(const Instruction *instr) { @@ -8314,43 +4990,38 @@ void Disassembler::VisitSVEBitwiseShiftUnpredicated(const Instruction *instr) { (instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(20, 19); unsigned lane_size = instr->GetSVESize(); - switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) { - case ASR_z_zi: - if (tsize != 0) { - // The tsz field must not be zero. - mnemonic = "asr"; - form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSves"; - } - break; - case ASR_z_zw: - if (lane_size <= kSRegSizeInBytesLog2) { - mnemonic = "asr"; - form = "'Zd.'t, 'Zn.'t, 'Zm.d"; - } - break; - case LSL_z_zi: + const char *suffix = NULL; + const char *form_i = "'Zd.'tszs, 'Zn.'tszs, "; + + switch (form_hash_) { + case "asr_z_zi"_h: + case "lsr_z_zi"_h: + case "sri_z_zzi"_h: + case "srsra_z_zi"_h: + case "ssra_z_zi"_h: + case "ursra_z_zi"_h: + case "usra_z_zi"_h: if (tsize != 0) { // The tsz field must not be zero. - mnemonic = "lsl"; - form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSver"; - } - break; - case LSL_z_zw: - if (lane_size <= kSRegSizeInBytesLog2) { - mnemonic = "lsl"; - form = "'Zd.'t, 'Zn.'t, 'Zm.d"; + mnemonic = mnemonic_.c_str(); + form = form_i; + suffix = "'ITriSves"; } break; - case LSR_z_zi: + case "lsl_z_zi"_h: + case "sli_z_zzi"_h: if (tsize != 0) { // The tsz field must not be zero. - mnemonic = "lsr"; - form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSves"; + mnemonic = mnemonic_.c_str(); + form = form_i; + suffix = "'ITriSver"; } break; - case LSR_z_zw: + case "asr_z_zw"_h: + case "lsl_z_zw"_h: + case "lsr_z_zw"_h: if (lane_size <= kSRegSizeInBytesLog2) { - mnemonic = "lsr"; + mnemonic = mnemonic_.c_str(); form = "'Zd.'t, 'Zn.'t, 'Zm.d"; } break; @@ -8358,842 +5029,300 @@ void Disassembler::VisitSVEBitwiseShiftUnpredicated(const Instruction *instr) { break; } - Format(instr, mnemonic, form); + Format(instr, mnemonic, form, suffix); } void Disassembler::VisitSVEElementCount(const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = IncDecFormHelper(instr, "'Xd, 'Ipc, mul #'u1916+1", "'Xd, 'Ipc", "'Xd"); - - switch (instr->Mask(SVEElementCountMask)) { - case CNTB_r_s: - mnemonic = "cntb"; - break; - case CNTD_r_s: - mnemonic = "cntd"; - break; - case CNTH_r_s: - mnemonic = "cnth"; - break; - case CNTW_r_s: - mnemonic = "cntw"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEFPAccumulatingReduction(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPAccumulatingReduction)"; - - switch (instr->Mask(SVEFPAccumulatingReductionMask)) { - case FADDA_v_p_z: - mnemonic = "fadda"; - form = "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t"; - break; - default: - break; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t"); } - Format(instr, mnemonic, form); } void Disassembler::VisitSVEFPArithmeticUnpredicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t"; - - switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) { - case FADD_z_zz: - mnemonic = "fadd"; - break; - case FMUL_z_zz: - mnemonic = "fmul"; - break; - case FRECPS_z_zz: - mnemonic = "frecps"; - break; - case FRSQRTS_z_zz: - mnemonic = "frsqrts"; - break; - case FSUB_z_zz: - mnemonic = "fsub"; - break; - case FTSMUL_z_zz: - mnemonic = "ftsmul"; - break; - default: - break; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t, 'Zm.'t"); } - Format(instr, mnemonic, form); } void Disassembler::VisitSVEFPCompareVectors(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; - - switch (instr->Mask(SVEFPCompareVectorsMask)) { - case FACGE_p_p_zz: - mnemonic = "facge"; - break; - case FACGT_p_p_zz: - mnemonic = "facgt"; - break; - case FCMEQ_p_p_zz: - mnemonic = "fcmeq"; - break; - case FCMGE_p_p_zz: - mnemonic = "fcmge"; - break; - case FCMGT_p_p_zz: - mnemonic = "fcmgt"; - break; - case FCMNE_p_p_zz: - mnemonic = "fcmne"; - break; - case FCMUO_p_p_zz: - mnemonic = "fcmuo"; - break; - default: - break; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"); } - Format(instr, mnemonic, form); } void Disassembler::VisitSVEFPCompareWithZero(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #0.0"; - - switch (instr->Mask(SVEFPCompareWithZeroMask)) { - case FCMEQ_p_p_z0: - mnemonic = "fcmeq"; - break; - case FCMGE_p_p_z0: - mnemonic = "fcmge"; - break; - case FCMGT_p_p_z0: - mnemonic = "fcmgt"; - break; - case FCMLE_p_p_z0: - mnemonic = "fcmle"; - break; - case FCMLT_p_p_z0: - mnemonic = "fcmlt"; - break; - case FCMNE_p_p_z0: - mnemonic = "fcmne"; - break; - default: - break; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pgl/z, 'Zn.'t, #0.0"); } - Format(instr, mnemonic, form); } void Disassembler::VisitSVEFPComplexAddition(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPComplexAddition)"; - - switch (instr->Mask(SVEFPComplexAdditionMask)) { - case FCADD_z_p_zz: - mnemonic = "fcadd"; - if (instr->ExtractBit(16) == 0) { - form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t, #90"; - } else { - form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t, #270"; - } - break; - default: - break; + // Bit 15 is always set, so this gives 90 * 1 or 3. + const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t, #'u1615*90"; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, form); } - Format(instr, mnemonic, form); } void Disassembler::VisitSVEFPComplexMulAdd(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPComplexMulAdd)"; - const char *suffix = NULL; - - const char *fcmla_constants[] = {"0", "90", "180", "270"}; - - switch (instr->Mask(SVEFPComplexMulAddMask)) { - case FCMLA_z_p_zzz: - mnemonic = "fcmla"; - form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t, #"; - suffix = fcmla_constants[instr->ExtractBits(14, 13)]; - break; - default: - break; + const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t, #'u1413*90"; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, form); } - Format(instr, mnemonic, form, suffix); } void Disassembler::VisitSVEFPComplexMulAddIndex(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPComplexMulAddIndex)"; - - const char *fcmla_constants[] = {"0", "90", "180", "270"}; - const char *suffix = fcmla_constants[instr->ExtractBits(11, 10)]; - - switch (instr->Mask(SVEFPComplexMulAddIndexMask)) { - case FCMLA_z_zzzi_h: - mnemonic = "fcmla"; - form = "'Zd.h, 'Zn.h, z'u1816.h['u2019], #"; - break; - case FCMLA_z_zzzi_s: - mnemonic = "fcmla"; - form = "'Zd.s, 'Zn.s, z'u1916.s['u2020], #"; - break; - default: - suffix = NULL; + const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2019]"; + const char *suffix = ", #'u1110*90"; + switch (form_hash_) { + case "fcmla_z_zzzi_s"_h: + form = "'Zd.s, 'Zn.s, z'u1916.s['u2020]"; break; } - Format(instr, mnemonic, form, suffix); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVEFPFastReduction(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'t'u0400, 'Pgl, 'Zn.'t"; - - switch (instr->Mask(SVEFPFastReductionMask)) { - case FADDV_v_p_z: - mnemonic = "faddv"; - break; - case FMAXNMV_v_p_z: - mnemonic = "fmaxnmv"; - break; - case FMAXV_v_p_z: - mnemonic = "fmaxv"; - break; - case FMINNMV_v_p_z: - mnemonic = "fminnmv"; - break; - case FMINV_v_p_z: - mnemonic = "fminv"; - break; - default: - break; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'t'u0400, 'Pgl, 'Zn.'t"); } - Format(instr, mnemonic, form); } void Disassembler::VisitSVEFPMulIndex(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPMulIndex)"; - - switch (instr->Mask(SVEFPMulIndexMask)) { - case FMUL_z_zzi_d: - mnemonic = "fmul"; + const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; + switch (form_hash_) { + case "fmul_z_zzi_d"_h: form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]"; break; - case FMUL_z_zzi_h: - case FMUL_z_zzi_h_i3h: - mnemonic = "fmul"; - form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; - break; - case FMUL_z_zzi_s: - mnemonic = "fmul"; + case "fmul_z_zzi_s"_h: form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]"; break; - default: - break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEFPMulAdd(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t"; - - switch (instr->Mask(SVEFPMulAddMask)) { - case FMAD_z_p_zzz: - mnemonic = "fmad"; - break; - case FMLA_z_p_zzz: - mnemonic = "fmla"; - break; - case FMLS_z_p_zzz: - mnemonic = "fmls"; - break; - case FMSB_z_p_zzz: - mnemonic = "fmsb"; - break; - case FNMAD_z_p_zzz: - mnemonic = "fnmad"; - break; - case FNMLA_z_p_zzz: - mnemonic = "fnmla"; - break; - case FNMLS_z_p_zzz: - mnemonic = "fnmls"; - break; - case FNMSB_z_p_zzz: - mnemonic = "fnmsb"; - break; - default: - break; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t"); } - Format(instr, mnemonic, form); } void Disassembler::VisitSVEFPMulAddIndex(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPMulAddIndex)"; - - switch (instr->Mask(SVEFPMulAddIndexMask)) { - case FMLA_z_zzzi_d: - mnemonic = "fmla"; - form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]"; - break; - case FMLA_z_zzzi_s: - mnemonic = "fmla"; + const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; + switch (form_hash_) { + case "fmla_z_zzzi_s"_h: + case "fmls_z_zzzi_s"_h: form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]"; break; - case FMLS_z_zzzi_d: - mnemonic = "fmls"; + case "fmla_z_zzzi_d"_h: + case "fmls_z_zzzi_d"_h: form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]"; break; - case FMLS_z_zzzi_s: - mnemonic = "fmls"; - form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]"; - break; - case FMLA_z_zzzi_h: - case FMLA_z_zzzi_h_i3h: - mnemonic = "fmla"; - form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; - break; - case FMLS_z_zzzi_h: - case FMLS_z_zzzi_h_i3h: - mnemonic = "fmls"; - form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; - break; - default: - break; } - - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEFPUnaryOpUnpredicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) { - case FRECPE_z_z: - mnemonic = "frecpe"; - break; - case FRSQRTE_z_z: - mnemonic = "frsqrte"; - break; - default: - break; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t"); } - Format(instr, mnemonic, form); } void Disassembler::VisitSVEIncDecByPredicateCount(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEIncDecByPredicateCount)"; - - switch (instr->Mask(SVEIncDecByPredicateCountMask)) { - case DECP_r_p_r: - case DECP_z_p_z: - mnemonic = "decp"; - break; - case INCP_r_p_r: - case INCP_z_p_z: - mnemonic = "incp"; - break; - case SQDECP_r_p_r_sx: - case SQDECP_r_p_r_x: - case SQDECP_z_p_z: - mnemonic = "sqdecp"; - break; - case SQINCP_r_p_r_sx: - case SQINCP_r_p_r_x: - case SQINCP_z_p_z: - mnemonic = "sqincp"; - break; - case UQDECP_r_p_r_uw: - case UQDECP_r_p_r_x: - case UQDECP_z_p_z: - mnemonic = "uqdecp"; - break; - case UQINCP_r_p_r_uw: - case UQINCP_r_p_r_x: - case UQINCP_z_p_z: - mnemonic = "uqincp"; - break; - default: - break; - } - - switch (instr->Mask(SVEIncDecByPredicateCountMask)) { + const char *form = "'Zd.'t, 'Pn"; + switch (form_hash_) { // <Xdn>, <Pg>.<T> - case DECP_r_p_r: - case INCP_r_p_r: + case "decp_r_p_r"_h: + case "incp_r_p_r"_h: form = "'Xd, 'Pn.'t"; break; - // <Zdn>.<T>, <Pg> - case DECP_z_p_z: - case INCP_z_p_z: - case SQDECP_z_p_z: - case SQINCP_z_p_z: - case UQDECP_z_p_z: - case UQINCP_z_p_z: - form = "'Zd.'t, 'Pn"; - break; // <Xdn>, <Pg>.<T>, <Wdn> - case SQDECP_r_p_r_sx: - case SQINCP_r_p_r_sx: + case "sqdecp_r_p_r_sx"_h: + case "sqincp_r_p_r_sx"_h: form = "'Xd, 'Pn.'t, 'Wd"; break; // <Xdn>, <Pg>.<T> - case SQDECP_r_p_r_x: - case SQINCP_r_p_r_x: - case UQDECP_r_p_r_x: - case UQINCP_r_p_r_x: + case "sqdecp_r_p_r_x"_h: + case "sqincp_r_p_r_x"_h: + case "uqdecp_r_p_r_x"_h: + case "uqincp_r_p_r_x"_h: form = "'Xd, 'Pn.'t"; break; // <Wdn>, <Pg>.<T> - case UQDECP_r_p_r_uw: - case UQINCP_r_p_r_uw: + case "uqdecp_r_p_r_uw"_h: + case "uqincp_r_p_r_uw"_h: form = "'Wd, 'Pn.'t"; break; - default: - break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEIndexGeneration(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEIndexGeneration)"; - + const char *form = "'Zd.'t, #'s0905, #'s2016"; bool w_inputs = static_cast<unsigned>(instr->GetSVESize()) <= kWRegSizeInBytesLog2; - switch (instr->Mask(SVEIndexGenerationMask)) { - case INDEX_z_ii: - mnemonic = "index"; - form = "'Zd.'t, #'s0905, #'s2016"; - break; - case INDEX_z_ir: - mnemonic = "index"; + switch (form_hash_) { + case "index_z_ir"_h: form = w_inputs ? "'Zd.'t, #'s0905, 'Wm" : "'Zd.'t, #'s0905, 'Xm"; break; - case INDEX_z_ri: - mnemonic = "index"; + case "index_z_ri"_h: form = w_inputs ? "'Zd.'t, 'Wn, #'s2016" : "'Zd.'t, 'Xn, #'s2016"; break; - case INDEX_z_rr: - mnemonic = "index"; + case "index_z_rr"_h: form = w_inputs ? "'Zd.'t, 'Wn, 'Wm" : "'Zd.'t, 'Xn, 'Xm"; break; - default: - break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEIntArithmeticUnpredicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t"; - - switch (instr->Mask(SVEIntArithmeticUnpredicatedMask)) { - case ADD_z_zz: - mnemonic = "add"; - break; - case SQADD_z_zz: - mnemonic = "sqadd"; - break; - case SQSUB_z_zz: - mnemonic = "sqsub"; - break; - case SUB_z_zz: - mnemonic = "sub"; - break; - case UQADD_z_zz: - mnemonic = "uqadd"; - break; - case UQSUB_z_zz: - mnemonic = "uqsub"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t, 'Zm.'t"); } void Disassembler::VisitSVEIntCompareSignedImm(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #'s2016"; - - switch (instr->Mask(SVEIntCompareSignedImmMask)) { - case CMPEQ_p_p_zi: - mnemonic = "cmpeq"; - break; - case CMPGE_p_p_zi: - mnemonic = "cmpge"; - break; - case CMPGT_p_p_zi: - mnemonic = "cmpgt"; - break; - case CMPLE_p_p_zi: - mnemonic = "cmple"; - break; - case CMPLT_p_p_zi: - mnemonic = "cmplt"; - break; - case CMPNE_p_p_zi: - mnemonic = "cmpne"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pgl/z, 'Zn.'t, #'s2016"); } void Disassembler::VisitSVEIntCompareUnsignedImm(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #'u2014"; - - switch (instr->Mask(SVEIntCompareUnsignedImmMask)) { - case CMPHI_p_p_zi: - mnemonic = "cmphi"; - break; - case CMPHS_p_p_zi: - mnemonic = "cmphs"; - break; - case CMPLO_p_p_zi: - mnemonic = "cmplo"; - break; - case CMPLS_p_p_zi: - mnemonic = "cmpls"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pgl/z, 'Zn.'t, #'u2014"); } void Disassembler::VisitSVEIntCompareVectors(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.d"; - - switch (instr->Mask(SVEIntCompareVectorsMask)) { - case CMPEQ_p_p_zw: - mnemonic = "cmpeq"; - break; - case CMPEQ_p_p_zz: - mnemonic = "cmpeq"; - form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; - break; - case CMPGE_p_p_zw: - mnemonic = "cmpge"; - break; - case CMPGE_p_p_zz: - mnemonic = "cmpge"; - form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; - break; - case CMPGT_p_p_zw: - mnemonic = "cmpgt"; - break; - case CMPGT_p_p_zz: - mnemonic = "cmpgt"; - form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; - break; - case CMPHI_p_p_zw: - mnemonic = "cmphi"; - break; - case CMPHI_p_p_zz: - mnemonic = "cmphi"; - form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; - break; - case CMPHS_p_p_zw: - mnemonic = "cmphs"; - break; - case CMPHS_p_p_zz: - mnemonic = "cmphs"; - form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; - break; - case CMPLE_p_p_zw: - mnemonic = "cmple"; - break; - case CMPLO_p_p_zw: - mnemonic = "cmplo"; - break; - case CMPLS_p_p_zw: - mnemonic = "cmpls"; - break; - case CMPLT_p_p_zw: - mnemonic = "cmplt"; - break; - case CMPNE_p_p_zw: - mnemonic = "cmpne"; - break; - case CMPNE_p_p_zz: - mnemonic = "cmpne"; - form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; - break; - default: + const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm."; + const char *suffix = "d"; + switch (form_hash_) { + case "cmpeq_p_p_zz"_h: + case "cmpge_p_p_zz"_h: + case "cmpgt_p_p_zz"_h: + case "cmphi_p_p_zz"_h: + case "cmphs_p_p_zz"_h: + case "cmpne_p_p_zz"_h: + suffix = "'t"; break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVEIntMulAddPredicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEIntMulAddPredicated)"; - - switch (instr->Mask(SVEIntMulAddPredicatedMask)) { - case MAD_z_p_zzz: - mnemonic = "mad"; - form = "'Zd.'t, 'Pgl/m, 'Zm.'t, 'Zn.'t"; - break; - case MLA_z_p_zzz: - mnemonic = "mla"; - form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t"; - break; - case MLS_z_p_zzz: - mnemonic = "mls"; - form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t"; - break; - case MSB_z_p_zzz: - mnemonic = "msb"; - form = "'Zd.'t, 'Pgl/m, 'Zm.'t, 'Zn.'t"; - break; - default: + const char *form = "'Zd.'t, 'Pgl/m, "; + const char *suffix = "'Zn.'t, 'Zm.'t"; + switch (form_hash_) { + case "mad_z_p_zzz"_h: + case "msb_z_p_zzz"_h: + suffix = "'Zm.'t, 'Zn.'t"; break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVEIntMulAddUnpredicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEIntMulAddUnpredicated)"; - if (static_cast<unsigned>(instr->GetSVESize()) >= kSRegSizeInBytesLog2) { - form = "'Zd.'t, 'Zn.'tq, 'Zm.'tq"; - switch (instr->Mask(SVEIntMulAddUnpredicatedMask)) { - case SDOT_z_zzz: - mnemonic = "sdot"; - break; - case UDOT_z_zzz: - mnemonic = "udot"; - break; - default: - break; - } + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'tq, 'Zm.'tq"); + } else { + VisitUnallocated(instr); } - - Format(instr, mnemonic, form); } void Disassembler::VisitSVEMovprfx(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEMovprfx)"; - - if (instr->Mask(SVEMovprfxMask) == MOVPRFX_z_p_z) { - mnemonic = "movprfx"; - form = "'Zd.'t, 'Pgl/'?16:mz, 'Zn.'t"; - } - - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/'?16:mz, 'Zn.'t"); } void Disassembler::VisitSVEIntReduction(const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "'Vdv, 'Pgl, 'Zn.'t"; - - if (instr->Mask(SVEIntReductionLogicalFMask) == SVEIntReductionLogicalFixed) { - switch (instr->Mask(SVEIntReductionLogicalMask)) { - case ANDV_r_p_z: - mnemonic = "andv"; - break; - case EORV_r_p_z: - mnemonic = "eorv"; - break; - case ORV_r_p_z: - mnemonic = "orv"; - break; - default: - break; - } - } else { - switch (instr->Mask(SVEIntReductionMask)) { - case SADDV_r_p_z: - mnemonic = "saddv"; - form = "'Dd, 'Pgl, 'Zn.'t"; - break; - case SMAXV_r_p_z: - mnemonic = "smaxv"; - break; - case SMINV_r_p_z: - mnemonic = "sminv"; - break; - case UADDV_r_p_z: - mnemonic = "uaddv"; - form = "'Dd, 'Pgl, 'Zn.'t"; - break; - case UMAXV_r_p_z: - mnemonic = "umaxv"; - break; - case UMINV_r_p_z: - mnemonic = "uminv"; - break; - default: - break; - } + switch (form_hash_) { + case "saddv_r_p_z"_h: + case "uaddv_r_p_z"_h: + form = "'Dd, 'Pgl, 'Zn.'t"; + break; } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEIntUnaryArithmeticPredicated( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t"; - - switch (instr->Mask(SVEIntUnaryArithmeticPredicatedMask)) { - case ABS_z_p_z: - mnemonic = "abs"; - break; - case CLS_z_p_z: - mnemonic = "cls"; - break; - case CLZ_z_p_z: - mnemonic = "clz"; - break; - case CNOT_z_p_z: - mnemonic = "cnot"; - break; - case CNT_z_p_z: - mnemonic = "cnt"; - break; - case FABS_z_p_z: - mnemonic = "fabs"; - break; - case FNEG_z_p_z: - mnemonic = "fneg"; - break; - case NEG_z_p_z: - mnemonic = "neg"; - break; - case NOT_z_p_z: - mnemonic = "not"; - break; - case SXTB_z_p_z: - mnemonic = "sxtb"; - break; - case SXTH_z_p_z: - mnemonic = "sxth"; - break; - case SXTW_z_p_z: - mnemonic = "sxtw"; - break; - case UXTB_z_p_z: - mnemonic = "uxtb"; - break; - case UXTH_z_p_z: - mnemonic = "uxth"; - break; - case UXTW_z_p_z: - mnemonic = "uxtw"; - break; - default: + VectorFormat vform = instr->GetSVEVectorFormat(); + + switch (form_hash_) { + case "sxtw_z_p_z"_h: + case "uxtw_z_p_z"_h: + if (vform == kFormatVnS) { + VisitUnallocated(instr); + return; + } + VIXL_FALLTHROUGH(); + case "sxth_z_p_z"_h: + case "uxth_z_p_z"_h: + if (vform == kFormatVnH) { + VisitUnallocated(instr); + return; + } + VIXL_FALLTHROUGH(); + case "sxtb_z_p_z"_h: + case "uxtb_z_p_z"_h: + case "fabs_z_p_z"_h: + case "fneg_z_p_z"_h: + if (vform == kFormatVnB) { + VisitUnallocated(instr); + return; + } break; } - Format(instr, mnemonic, form); + + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zn.'t"); } void Disassembler::VisitSVEMulIndex(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEMulIndex)"; + const char *form = "'Zd.s, 'Zn.b, z'u1816.b['u2019]"; - switch (instr->Mask(SVEMulIndexMask)) { - case SDOT_z_zzzi_d: - mnemonic = "sdot"; - form = "'Zd.d, 'Zn.h, z'u1916.h['u2020]"; - break; - case SDOT_z_zzzi_s: - mnemonic = "sdot"; - form = "'Zd.s, 'Zn.b, z'u1816.b['u2019]"; - break; - case UDOT_z_zzzi_d: - mnemonic = "udot"; + switch (form_hash_) { + case "sdot_z_zzzi_d"_h: + case "udot_z_zzzi_d"_h: form = "'Zd.d, 'Zn.h, z'u1916.h['u2020]"; break; - case UDOT_z_zzzi_s: - mnemonic = "udot"; - form = "'Zd.s, 'Zn.b, z'u1816.b['u2019]"; - break; - default: - break; } - Format(instr, mnemonic, form); + + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEPermuteVectorExtract(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEPermuteVectorExtract)"; - - switch (instr->Mask(SVEPermuteVectorExtractMask)) { - case EXT_z_zi_des: - mnemonic = "ext"; - form = "'Zd.b, 'Zd.b, 'Zn.b, #'u2016:1210"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Zd.b, 'Zd.b, 'Zn.b, #'u2016:1210"); } void Disassembler::VisitSVEPermuteVectorInterleaving(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t"; - - switch (instr->Mask(SVEPermuteVectorInterleavingMask)) { - case TRN1_z_zz: - mnemonic = "trn1"; - break; - case TRN2_z_zz: - mnemonic = "trn2"; - break; - case UZP1_z_zz: - mnemonic = "uzp1"; - break; - case UZP2_z_zz: - mnemonic = "uzp2"; - break; - case ZIP1_z_zz: - mnemonic = "zip1"; - break; - case ZIP2_z_zz: - mnemonic = "zip2"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t, 'Zm.'t"); } void Disassembler::VisitSVEPredicateCount(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEPredicateCount)"; - - switch (instr->Mask(SVEPredicateCountMask)) { - case CNTP_r_p_p: - mnemonic = "cntp"; - form = "'Xd, p'u1310, 'Pn.'t"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Xd, p'u1310, 'Pn.'t"); } void Disassembler::VisitSVEPredicateLogical(const Instruction *instr) { - const char *mnemonic = "unimplemented"; + const char *mnemonic = mnemonic_.c_str(); const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pm.b"; int pd = instr->GetPd(); @@ -9201,304 +5330,127 @@ void Disassembler::VisitSVEPredicateLogical(const Instruction *instr) { int pm = instr->GetPm(); int pg = instr->ExtractBits(13, 10); - switch (instr->Mask(SVEPredicateLogicalMask)) { - case ANDS_p_p_pp_z: - mnemonic = "ands"; + switch (form_hash_) { + case "ands_p_p_pp_z"_h: if (pn == pm) { mnemonic = "movs"; form = "'Pd.b, p'u1310/z, 'Pn.b"; } break; - case AND_p_p_pp_z: - mnemonic = "and"; + case "and_p_p_pp_z"_h: if (pn == pm) { mnemonic = "mov"; form = "'Pd.b, p'u1310/z, 'Pn.b"; } break; - case BICS_p_p_pp_z: - mnemonic = "bics"; - break; - case BIC_p_p_pp_z: - mnemonic = "bic"; - break; - case EORS_p_p_pp_z: - mnemonic = "eors"; + case "eors_p_p_pp_z"_h: if (pm == pg) { mnemonic = "nots"; form = "'Pd.b, 'Pm/z, 'Pn.b"; } break; - case EOR_p_p_pp_z: - mnemonic = "eor"; + case "eor_p_p_pp_z"_h: if (pm == pg) { mnemonic = "not"; form = "'Pd.b, 'Pm/z, 'Pn.b"; } break; - case NANDS_p_p_pp_z: - mnemonic = "nands"; - break; - case NAND_p_p_pp_z: - mnemonic = "nand"; - break; - case NORS_p_p_pp_z: - mnemonic = "nors"; - break; - case NOR_p_p_pp_z: - mnemonic = "nor"; - break; - case ORNS_p_p_pp_z: - mnemonic = "orns"; - break; - case ORN_p_p_pp_z: - mnemonic = "orn"; - break; - case ORRS_p_p_pp_z: - mnemonic = "orrs"; + case "orrs_p_p_pp_z"_h: if ((pn == pm) && (pn == pg)) { mnemonic = "movs"; form = "'Pd.b, 'Pn.b"; } break; - case ORR_p_p_pp_z: - mnemonic = "orr"; + case "orr_p_p_pp_z"_h: if ((pn == pm) && (pn == pg)) { mnemonic = "mov"; form = "'Pd.b, 'Pn.b"; } break; - case SEL_p_p_pp: + case "sel_p_p_pp"_h: if (pd == pm) { mnemonic = "mov"; form = "'Pd.b, p'u1310/m, 'Pn.b"; } else { - mnemonic = "sel"; form = "'Pd.b, p'u1310, 'Pn.b, 'Pm.b"; } break; - default: - form = "(SVEPredicateLogical)"; - break; } Format(instr, mnemonic, form); } void Disassembler::VisitSVEPredicateInitialize(const Instruction *instr) { - // This group only contains PTRUE{S}, and there are no unallocated encodings. - VIXL_STATIC_ASSERT( - SVEPredicateInitializeMask == - (SVEPredicateInitializeFMask | SVEPredicateInitializeSetFlagsBit)); - VIXL_ASSERT((instr->Mask(SVEPredicateInitializeMask) == PTRUE_p_s) || - (instr->Mask(SVEPredicateInitializeMask) == PTRUES_p_s)); - - const char *mnemonic = instr->ExtractBit(16) ? "ptrues" : "ptrue"; const char *form = "'Pd.'t, 'Ipc"; // Omit the pattern if it is the default ('ALL'). if (instr->ExtractBits(9, 5) == SVE_ALL) form = "'Pd.'t"; - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, form); } void Disassembler::VisitSVEPredicateNextActive(const Instruction *instr) { - // This group only contains PNEXT, and there are no unallocated encodings. - VIXL_STATIC_ASSERT(SVEPredicateNextActiveFMask == SVEPredicateNextActiveMask); - VIXL_ASSERT(instr->Mask(SVEPredicateNextActiveMask) == PNEXT_p_p_p); - - Format(instr, "pnext", "'Pd.'t, 'Pn, 'Pd.'t"); + FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pn, 'Pd.'t"); } void Disassembler::VisitSVEPredicateReadFromFFR_Predicated( const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEPredicateReadFromFFR_Predicated)"; - switch (instr->Mask(SVEPredicateReadFromFFR_PredicatedMask)) { - case RDFFR_p_p_f: - case RDFFRS_p_p_f: - mnemonic = instr->ExtractBit(22) ? "rdffrs" : "rdffr"; - form = "'Pd.b, 'Pn/z"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Pd.b, 'Pn/z"); } void Disassembler::VisitSVEPropagateBreak(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pm.b"; - - switch (instr->Mask(SVEPropagateBreakMask)) { - case BRKPAS_p_p_pp: - mnemonic = "brkpas"; - break; - case BRKPA_p_p_pp: - mnemonic = "brkpa"; - break; - case BRKPBS_p_p_pp: - mnemonic = "brkpbs"; - break; - case BRKPB_p_p_pp: - mnemonic = "brkpb"; - break; - default: - break; - } - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Pd.b, p'u1310/z, 'Pn.b, 'Pm.b"); } void Disassembler::VisitSVEStackFrameAdjustment(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Xds, 'Xms, #'s1005"; - - switch (instr->Mask(SVEStackFrameAdjustmentMask)) { - case ADDPL_r_ri: - mnemonic = "addpl"; - break; - case ADDVL_r_ri: - mnemonic = "addvl"; - break; - default: - form = "(SVEStackFrameAdjustment)"; - break; - } - - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Xds, 'Xms, #'s1005"); } void Disassembler::VisitSVEStackFrameSize(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEStackFrameSize)"; - - switch (instr->Mask(SVEStackFrameSizeMask)) { - case RDVL_r_i: - mnemonic = "rdvl"; - form = "'Xd, #'s1005"; - break; - default: - break; - } - - Format(instr, mnemonic, form); + FormatWithDecodedMnemonic(instr, "'Xd, #'s1005"); } void Disassembler::VisitSVEVectorSelect(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEVectorSelect)"; + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Zd.'t, p'u1310, 'Zn.'t, 'Zm.'t"; - switch (instr->Mask(SVEVectorSelectMask)) { - case SEL_z_p_zz: - if (instr->GetRd() == instr->GetRm()) { - mnemonic = "mov"; - form = "'Zd.'t, p'u1310/m, 'Zn.'t"; - } else { - mnemonic = "sel"; - form = "'Zd.'t, p'u1310, 'Zn.'t, 'Zm.'t"; - } - break; - default: - break; + if (instr->GetRd() == instr->GetRm()) { + mnemonic = "mov"; + form = "'Zd.'t, p'u1310/m, 'Zn.'t"; } + Format(instr, mnemonic, form); } void Disassembler::VisitSVEContiguousLoad_ScalarPlusImm( const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns"; const char *suffix = (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]"; - - switch (instr->Mask(SVEContiguousLoad_ScalarPlusImmMask)) { - case LD1B_z_p_bi_u16: - case LD1B_z_p_bi_u32: - case LD1B_z_p_bi_u64: - case LD1B_z_p_bi_u8: - mnemonic = "ld1b"; - break; - case LD1D_z_p_bi_u64: - mnemonic = "ld1d"; - break; - case LD1H_z_p_bi_u16: - case LD1H_z_p_bi_u32: - case LD1H_z_p_bi_u64: - mnemonic = "ld1h"; - break; - case LD1SB_z_p_bi_s16: - case LD1SB_z_p_bi_s32: - case LD1SB_z_p_bi_s64: - mnemonic = "ld1sb"; - break; - case LD1SH_z_p_bi_s32: - case LD1SH_z_p_bi_s64: - mnemonic = "ld1sh"; - break; - case LD1SW_z_p_bi_s64: - mnemonic = "ld1sw"; - break; - case LD1W_z_p_bi_u32: - case LD1W_z_p_bi_u64: - mnemonic = "ld1w"; - break; - default: - form = "(SVEContiguousLoad_ScalarPlusImm)"; - suffix = NULL; - break; - } - - Format(instr, mnemonic, form, suffix); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitSVEContiguousLoad_ScalarPlusScalar( const Instruction *instr) { - const char *mnemonic = "unimplemented"; const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns, 'Xm"; - const char *suffix = NULL; - - switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) { - case LD1B_z_p_br_u16: - case LD1B_z_p_br_u32: - case LD1B_z_p_br_u64: - case LD1B_z_p_br_u8: - mnemonic = "ld1b"; - suffix = "]"; - break; - case LD1D_z_p_br_u64: - mnemonic = "ld1d"; - suffix = ", lsl #'u2423]"; - break; - case LD1H_z_p_br_u16: - case LD1H_z_p_br_u32: - case LD1H_z_p_br_u64: - mnemonic = "ld1h"; + const char *suffix = "]"; + + switch (form_hash_) { + case "ld1h_z_p_br_u16"_h: + case "ld1h_z_p_br_u32"_h: + case "ld1h_z_p_br_u64"_h: + case "ld1w_z_p_br_u32"_h: + case "ld1w_z_p_br_u64"_h: + case "ld1d_z_p_br_u64"_h: suffix = ", lsl #'u2423]"; break; - case LD1SB_z_p_br_s16: - case LD1SB_z_p_br_s32: - case LD1SB_z_p_br_s64: - mnemonic = "ld1sb"; - suffix = "]"; - break; - case LD1SH_z_p_br_s32: - case LD1SH_z_p_br_s64: - mnemonic = "ld1sh"; + case "ld1sh_z_p_br_s32"_h: + case "ld1sh_z_p_br_s64"_h: suffix = ", lsl #1]"; break; - case LD1SW_z_p_br_s64: - mnemonic = "ld1sw"; + case "ld1sw_z_p_br_s64"_h: suffix = ", lsl #2]"; break; - case LD1W_z_p_br_u32: - case LD1W_z_p_br_u64: - mnemonic = "ld1w"; - suffix = ", lsl #'u2423]"; - break; - default: - form = "(SVEContiguousLoad_ScalarPlusScalar)"; - suffix = NULL; - break; } - Format(instr, mnemonic, form, suffix); + FormatWithDecodedMnemonic(instr, form, suffix); } void Disassembler::VisitReserved(const Instruction *instr) { @@ -9507,7 +5459,6 @@ void Disassembler::VisitReserved(const Instruction *instr) { Format(instr, "udf", "'IUdf"); } - void Disassembler::VisitUnimplemented(const Instruction *instr) { Format(instr, "unimplemented", "(Unimplemented)"); } @@ -9517,6 +5468,403 @@ void Disassembler::VisitUnallocated(const Instruction *instr) { Format(instr, "unallocated", "(Unallocated)"); } +void Disassembler::Visit(Metadata *metadata, const Instruction *instr) { + VIXL_ASSERT(metadata->count("form") > 0); + const std::string &form = (*metadata)["form"]; + form_hash_ = Hash(form.c_str()); + const FormToVisitorFnMap *fv = Disassembler::GetFormToVisitorFnMap(); + FormToVisitorFnMap::const_iterator it = fv->find(form_hash_); + if (it == fv->end()) { + VisitUnimplemented(instr); + } else { + SetMnemonicFromForm(form); + (it->second)(this, instr); + } +} + +void Disassembler::Disassemble_PdT_PgZ_ZnT_ZmT(const Instruction *instr) { + const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; + VectorFormat vform = instr->GetSVEVectorFormat(); + + if ((vform == kFormatVnS) || (vform == kFormatVnD)) { + Format(instr, "unimplemented", "(PdT_PgZ_ZnT_ZmT)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::Disassemble_ZdB_Zn1B_Zn2B_imm(const Instruction *instr) { + const char *form = "'Zd.b, {'Zn.b, 'Zn2.b}, #'u2016:1210"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdB_ZnB_ZmB(const Instruction *instr) { + const char *form = "'Zd.b, 'Zn.b, 'Zm.b"; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + Format(instr, mnemonic_.c_str(), form); + } else { + Format(instr, "unimplemented", "(ZdB_ZnB_ZmB)"); + } +} + +void Disassembler::Disassemble_ZdD_PgM_ZnS(const Instruction *instr) { + const char *form = "'Zd.d, 'Pgl/m, 'Zn.s"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdD_ZnD_ZmD(const Instruction *instr) { + const char *form = "'Zd.d, 'Zn.d, 'Zm.d"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdD_ZnD_ZmD_imm(const Instruction *instr) { + const char *form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdD_ZnS_ZmS_imm(const Instruction *instr) { + const char *form = "'Zd.d, 'Zn.s, z'u1916.s['u2020:1111]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdH_PgM_ZnS(const Instruction *instr) { + const char *form = "'Zd.h, 'Pgl/m, 'Zn.s"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdH_ZnH_ZmH_imm(const Instruction *instr) { + const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdS_PgM_ZnD(const Instruction *instr) { + const char *form = "'Zd.s, 'Pgl/m, 'Zn.d"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdS_PgM_ZnH(const Instruction *instr) { + const char *form = "'Zd.s, 'Pgl/m, 'Zn.h"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdS_PgM_ZnS(const Instruction *instr) { + const char *form = "'Zd.s, 'Pgl/m, 'Zn.s"; + if (instr->GetSVEVectorFormat() == kFormatVnS) { + Format(instr, mnemonic_.c_str(), form); + } else { + Format(instr, "unimplemented", "(ZdS_PgM_ZnS)"); + } +} + +void Disassembler::Disassemble_ZdS_ZnH_ZmH_imm(const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.h, z'u1816.h['u2019:1111]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdS_ZnS_ZmS(const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.s, 'Zm.s"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdS_ZnS_ZmS_imm(const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::DisassembleSVEFlogb(const Instruction *instr) { + const char *form = "'Zd.'tf, 'Pgl/m, 'Zn.'tf"; + if (instr->GetSVEVectorFormat(17) == kFormatVnB) { + Format(instr, "unimplemented", "(SVEFlogb)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::Disassemble_ZdT_PgM_ZnT(const Instruction *instr) { + const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdT_PgZ_ZnT_ZmT(const Instruction *instr) { + const char *form = "'Zd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; + VectorFormat vform = instr->GetSVEVectorFormat(); + if ((vform == kFormatVnS) || (vform == kFormatVnD)) { + Format(instr, mnemonic_.c_str(), form); + } else { + Format(instr, "unimplemented", "(ZdT_PgZ_ZnT_ZmT)"); + } +} + +void Disassembler::Disassemble_ZdT_Pg_Zn1T_Zn2T(const Instruction *instr) { + const char *form = "'Zd.'t, 'Pgl, {'Zn.'t, 'Zn2.'t}"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdT_Zn1T_Zn2T_ZmT(const Instruction *instr) { + const char *form = "'Zd.'t, {'Zn.'t, 'Zn2.'t}, 'Zm.'t"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdT_ZnT_ZmT(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdT_ZnT_ZmTb(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'th"; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + Format(instr, "unimplemented", "(ZdT_ZnT_ZmTb)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::Disassemble_ZdT_ZnTb(const Instruction *instr) { + const char *form = "'Zd.'tszs, 'Zn.'tszd"; + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false); + int shift_dist = shift_and_lane_size.first; + int lane_size = shift_and_lane_size.second; + // Convert shift_dist from a right to left shift. Valid xtn instructions + // must have a left shift_dist equivalent of zero. + shift_dist = (8 << lane_size) - shift_dist; + if ((lane_size >= static_cast<int>(kBRegSizeInBytesLog2)) && + (lane_size <= static_cast<int>(kSRegSizeInBytesLog2)) && + (shift_dist == 0)) { + Format(instr, mnemonic_.c_str(), form); + } else { + Format(instr, "unimplemented", "(ZdT_ZnTb)"); + } +} + +void Disassembler::Disassemble_ZdT_ZnTb_ZmTb(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zn.'th, 'Zm.'th"; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + // TODO: This is correct for saddlbt, ssublbt, subltb, which don't have + // b-lane sized form, and for pmull[b|t] as feature `SVEPmull128` isn't + // supported, but may need changes for other instructions reaching here. + Format(instr, "unimplemented", "(ZdT_ZnTb_ZmTb)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::DisassembleSVEAddSubHigh(const Instruction *instr) { + const char *form = "'Zd.'th, 'Zn.'t, 'Zm.'t"; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + Format(instr, "unimplemented", "(SVEAddSubHigh)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::DisassembleSVEShiftLeftImm(const Instruction *instr) { + const char *form = "'Zd.'tszd, 'Zn.'tszs, 'ITriSver"; + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false); + int lane_size = shift_and_lane_size.second; + if ((lane_size >= static_cast<int>(kBRegSizeInBytesLog2)) && + (lane_size <= static_cast<int>(kSRegSizeInBytesLog2))) { + Format(instr, mnemonic_.c_str(), form); + } else { + Format(instr, "unimplemented", "(SVEShiftLeftImm)"); + } +} + +void Disassembler::DisassembleSVEShiftRightImm(const Instruction *instr) { + const char *form = "'Zd.'tszs, 'Zn.'tszd, 'ITriSves"; + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false); + int lane_size = shift_and_lane_size.second; + if ((lane_size >= static_cast<int>(kBRegSizeInBytesLog2)) && + (lane_size <= static_cast<int>(kSRegSizeInBytesLog2))) { + Format(instr, mnemonic_.c_str(), form); + } else { + Format(instr, "unimplemented", "(SVEShiftRightImm)"); + } +} + +void Disassembler::Disassemble_ZdaD_ZnD_ZmD_imm(const Instruction *instr) { + const char *form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaD_ZnH_ZmH_imm_const( + const Instruction *instr) { + const char *form = "'Zd.d, 'Zn.h, z'u1916.h['u2020], #'u1110*90"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm(const Instruction *instr) { + const char *form = "'Zd.d, 'Zn.s, z'u1916.s['u2020:1111]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm(const Instruction *instr) { + const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm_const( + const Instruction *instr) { + const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2019], #'u1110*90"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaS_ZnB_ZmB(const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.b, 'Zm.b"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaS_ZnB_ZmB_imm_const( + const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.b, z'u1816.b['u2019], #'u1110*90"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaS_ZnH_ZmH(const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.h, 'Zm.h"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm(const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.h, z'u1816.h['u2019:1111]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm(const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm_const( + const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.s, z'u1916.s['u2020], #'u1110*90"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaT_PgM_ZnTb(const Instruction *instr) { + const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'th"; + + if (instr->GetSVESize() == 0) { + // The lowest lane size of the destination vector is H-sized lane. + Format(instr, "unimplemented", "(Disassemble_ZdaT_PgM_ZnTb)"); + return; + } + + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::DisassembleSVEAddSubCarry(const Instruction *instr) { + const char *form = "'Zd.'?22:ds, 'Zn.'?22:ds, 'Zm.'?22:ds"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaT_ZnT_ZmT(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaT_ZnT_ZmT_const(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t, #'u1110*90"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaT_ZnTb_ZmTb(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zn.'th, 'Zm.'th"; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + Format(instr, "unimplemented", "(ZdaT_ZnTb_ZmTb)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::Disassemble_ZdaT_ZnTb_ZmTb_const(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zn.'tq, 'Zm.'tq, #'u1110*90"; + VectorFormat vform = instr->GetSVEVectorFormat(); + + if ((vform == kFormatVnB) || (vform == kFormatVnH)) { + Format(instr, "unimplemented", "(ZdaT_ZnTb_ZmTb_const)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::Disassemble_ZdnB_ZdnB(const Instruction *instr) { + const char *form = "'Zd.b, 'Zd.b"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdnB_ZdnB_ZmB(const Instruction *instr) { + const char *form = "'Zd.b, 'Zd.b, 'Zn.b"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::DisassembleSVEBitwiseTernary(const Instruction *instr) { + const char *form = "'Zd.d, 'Zd.d, 'Zm.d, 'Zn.d"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdnS_ZdnS_ZmS(const Instruction *instr) { + const char *form = "'Zd.s, 'Zd.s, 'Zn.s"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::DisassembleSVEFPPair(const Instruction *instr) { + const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + Format(instr, "unimplemented", "(SVEFPPair)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT(const Instruction *instr) { + const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::DisassembleSVEComplexIntAddition(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zd.'t, 'Zn.'t, #"; + const char *suffix = (instr->ExtractBit(10) == 0) ? "90" : "270"; + Format(instr, mnemonic_.c_str(), form, suffix); +} + +void Disassembler::Disassemble_ZdnT_ZdnT_ZmT_const(const Instruction *instr) { + const char *form = "'Zd.'tszs, 'Zd.'tszs, 'Zn.'tszs, 'ITriSves"; + unsigned tsize = + (instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(20, 19); + + if (tsize == 0) { + Format(instr, "unimplemented", "(ZdnT_ZdnT_ZmT_const)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm(const Instruction *instr) { + const char *form = "{'Zt.d}, 'Pgl/z, ['Zn.d"; + const char *suffix = instr->GetRm() == 31 ? "]" : ", 'Xm]"; + Format(instr, mnemonic_.c_str(), form, suffix); +} + +void Disassembler::Disassemble_ZtD_Pg_ZnD_Xm(const Instruction *instr) { + const char *form = "{'Zt.d}, 'Pgl, ['Zn.d"; + const char *suffix = instr->GetRm() == 31 ? "]" : ", 'Xm]"; + Format(instr, mnemonic_.c_str(), form, suffix); +} + +void Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm(const Instruction *instr) { + const char *form = "{'Zt.s}, 'Pgl/z, ['Zn.s"; + const char *suffix = instr->GetRm() == 31 ? "]" : ", 'Xm]"; + Format(instr, mnemonic_.c_str(), form, suffix); +} + +void Disassembler::Disassemble_ZtS_Pg_ZnS_Xm(const Instruction *instr) { + const char *form = "{'Zt.s}, 'Pgl, ['Zn.s"; + const char *suffix = instr->GetRm() == 31 ? "]" : ", 'Xm]"; + Format(instr, mnemonic_.c_str(), form, suffix); +} void Disassembler::ProcessOutput(const Instruction * /*instr*/) { // The base disasm does nothing more than disassembling into a buffer. @@ -9635,22 +5983,31 @@ void Disassembler::Format(const Instruction *instr, const char *mnemonic, const char *format0, const char *format1) { - VIXL_ASSERT(mnemonic != NULL); - ResetOutput(); - Substitute(instr, mnemonic); - if (format0 != NULL) { - VIXL_ASSERT(buffer_pos_ < buffer_size_); - buffer_[buffer_pos_++] = ' '; - Substitute(instr, format0); - if (format1 != NULL) { - Substitute(instr, format1); + if ((mnemonic == NULL) || (format0 == NULL)) { + VisitUnallocated(instr); + } else { + ResetOutput(); + Substitute(instr, mnemonic); + if (format0[0] != 0) { // Not a zero-length string. + VIXL_ASSERT(buffer_pos_ < buffer_size_); + buffer_[buffer_pos_++] = ' '; + Substitute(instr, format0); + // TODO: consider using a zero-length string here, too. + if (format1 != NULL) { + Substitute(instr, format1); + } } + VIXL_ASSERT(buffer_pos_ < buffer_size_); + buffer_[buffer_pos_] = 0; + ProcessOutput(instr); } - VIXL_ASSERT(buffer_pos_ < buffer_size_); - buffer_[buffer_pos_] = 0; - ProcessOutput(instr); } +void Disassembler::FormatWithDecodedMnemonic(const Instruction *instr, + const char *format0, + const char *format1) { + Format(instr, mnemonic_.c_str(), format0, format1); +} void Disassembler::Substitute(const Instruction *instr, const char *string) { char chr = *string++; @@ -9759,8 +6116,8 @@ std::pair<unsigned, unsigned> Disassembler::GetRegNumForField( case '2': case '3': case '4': - if ((reg_prefix == 'V') || (reg_prefix == 'Z')) { // Vt2/3/4, Zt2/3/4 - VIXL_ASSERT(field[0] == 't'); + if ((reg_prefix == 'V') || (reg_prefix == 'Z')) { // t2/3/4, n2/3/4 + VIXL_ASSERT((field[0] == 't') || (field[0] == 'n')); reg_num = (reg_num + field[1] - '1') % 32; field_len++; } else { @@ -10000,10 +6357,6 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr, } return 3; } - case 'F': { // ILF(CNR) - Immediate Rotation Value for Complex Numbers - AppendToOutput("#%" PRId32, instr->GetImmRotFcmlaSca() * 90); - return strlen("ILFCNR"); - } case 'A': { // ILA - Immediate Load with pointer authentication. if (instr->GetImmLSPAC() != 0) { AppendToOutput(", #%" PRId32, instr->GetImmLSPAC()); @@ -10084,7 +6437,7 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr, return 8; } case 'r': { - // SVE unpredicated shift immediate encoding, lsl. + // SVE unpredicated shift immediate encoding, left shifts. std::pair<int, int> shift_and_lane_size = instr->GetSVEImmShiftAndLaneSizeLog2( /* is_predicated = */ false); @@ -10093,7 +6446,7 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr, return 8; } case 's': { - // SVE unpredicated shift immediate encoding, asr and lsr. + // SVE unpredicated shift immediate encoding, right shifts. std::pair<int, int> shift_and_lane_size = instr->GetSVEImmShiftAndLaneSizeLog2( /* is_predicated = */ false); @@ -10146,13 +6499,13 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr, } case 's': { // Is - Shift (immediate). switch (format[2]) { - case '1': { // Is1 - SSHR. + case 'R': { // IsR - right shifts. int shift = 16 << HighestSetBitPosition(instr->GetImmNEONImmh()); shift -= instr->GetImmNEONImmhImmb(); AppendToOutput("#%d", shift); return 3; } - case '2': { // Is2 - SLI. + case 'L': { // IsL - left shifts. int shift = instr->GetImmNEONImmhImmb(); shift -= 8 << HighestSetBitPosition(instr->GetImmNEONImmh()); AppendToOutput("#%d", shift); @@ -10174,43 +6527,35 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr, } case 'V': { // Immediate Vector. switch (format[2]) { - case 'F': { - switch (format[5]) { - // Convert 'rot' bit encodings into equivalent angle rotation - case 'A': - AppendToOutput("#%" PRId32, - instr->GetImmRotFcadd() == 1 ? 270 : 90); - break; - case 'M': - AppendToOutput("#%" PRId32, instr->GetImmRotFcmlaVec() * 90); - break; - } - return strlen("IVFCN") + 1; - } case 'E': { // IVExtract. AppendToOutput("#%" PRId32, instr->GetImmNEONExt()); return 9; } case 'B': { // IVByElemIndex. int ret = strlen("IVByElemIndex"); - int vm_index = (instr->GetNEONH() << 1) | instr->GetNEONL(); + uint32_t vm_index = instr->GetNEONH() << 2; + vm_index |= instr->GetNEONL() << 1; + vm_index |= instr->GetNEONM(); + static const char *format_rot = "IVByElemIndexRot"; static const char *format_fhm = "IVByElemIndexFHM"; - bool is_fhm = strncmp(format, format_fhm, strlen(format_fhm)) == 0; if (strncmp(format, format_rot, strlen(format_rot)) == 0) { // FCMLA uses 'H' bit index when SIZE is 2, else H:L + VIXL_ASSERT((instr->GetNEONSize() == 1) || + (instr->GetNEONSize() == 2)); + vm_index >>= instr->GetNEONSize(); + ret = static_cast<int>(strlen(format_rot)); + } else if (strncmp(format, format_fhm, strlen(format_fhm)) == 0) { + // Nothing to do - FMLAL and FMLSL use H:L:M. + ret = static_cast<int>(strlen(format_fhm)); + } else { if (instr->GetNEONSize() == 2) { - vm_index = instr->GetNEONH(); + // S-sized elements use H:L. + vm_index >>= 1; + } else if (instr->GetNEONSize() == 3) { + // D-sized elements use H. + vm_index >>= 2; } - ret = static_cast<int>(strlen(format_rot)); - } else if (is_fhm || (instr->GetNEONSize() == 0)) { - // Half-precision FP ops use H:L:M bit index - // Widening operations with H-sized operands also use H:L:M. - vm_index = (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) | - instr->GetNEONM(); - if (is_fhm) ret = static_cast<int>(strlen(format_fhm)); - } else if (instr->GetNEONSize() == 1) { - vm_index = (vm_index << 1) | instr->GetNEONM(); } AppendToOutput("%d", vm_index); return ret; @@ -10660,10 +7005,10 @@ int Disassembler::SubstitutePrefetchField(const Instruction *instr, int placeholder_length = is_sve ? 9 : 6; static const char *stream_options[] = {"keep", "strm"}; - auto get_hints = [](bool is_sve) -> std::vector<std::string> { + auto get_hints = [](bool want_sve_hint) -> std::vector<std::string> { static const std::vector<std::string> sve_hints = {"ld", "st"}; static const std::vector<std::string> core_hints = {"ld", "li", "st"}; - return (is_sve) ? sve_hints : core_hints; + return (want_sve_hint) ? sve_hints : core_hints; }; std::vector<std::string> hints = get_hints(is_sve); @@ -10807,12 +7152,13 @@ int Disassembler::SubstituteSVESize(const Instruction *instr, VIXL_ASSERT(format[0] == 't'); static const char sizes[] = {'b', 'h', 's', 'd', 'q'}; - // TODO: only the most common case for <size> is supported at the moment, - // and even then, the RESERVED values are handled as if they're not - // reserved. unsigned size_in_bytes_log2 = instr->GetSVESize(); int placeholder_length = 1; switch (format[1]) { + case 'f': // 'tf - FP size encoded in <18:17> + placeholder_length++; + size_in_bytes_log2 = instr->ExtractBits(18, 17); + break; case 'l': placeholder_length++; if (format[2] == 's') { @@ -10835,27 +7181,25 @@ int Disassembler::SubstituteSVESize(const Instruction *instr, placeholder_length += 3; size_in_bytes_log2 = instr->ExtractBits(24, 23); break; + case 'i': { // 'ti: indices. + std::pair<int, int> index_and_lane_size = + instr->GetSVEPermuteIndexAndLaneSizeLog2(); + placeholder_length++; + size_in_bytes_log2 = index_and_lane_size.second; + break; + } case 's': if (format[2] == 'z') { - VIXL_ASSERT((format[3] == 'x') || (format[3] == 's') || - (format[3] == 'p')); - if (format[3] == 'x') { - // 'tszx: Indexes. - std::pair<int, int> index_and_lane_size = - instr->GetSVEPermuteIndexAndLaneSizeLog2(); - size_in_bytes_log2 = index_and_lane_size.second; - } else if (format[3] == 'p') { - // 'tszp: Predicated shifts. - std::pair<int, int> shift_and_lane_size = - instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true); - size_in_bytes_log2 = shift_and_lane_size.second; - } else { - // 'tszs: Unpredicated shifts. - std::pair<int, int> shift_and_lane_size = - instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false); - size_in_bytes_log2 = shift_and_lane_size.second; + VIXL_ASSERT((format[3] == 'p') || (format[3] == 's') || + (format[3] == 'd')); + bool is_predicated = (format[3] == 'p'); + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(is_predicated); + size_in_bytes_log2 = shift_and_lane_size.second; + if (format[3] == 'd') { // Double size lanes. + size_in_bytes_log2++; } - placeholder_length += 3; // skip `sz[x|s]` + placeholder_length += 3; // skip "sz(p|s|d)" } break; case 'h': diff --git a/src/aarch64/disasm-aarch64.h b/src/aarch64/disasm-aarch64.h index 176fb790..aa5348dc 100644 --- a/src/aarch64/disasm-aarch64.h +++ b/src/aarch64/disasm-aarch64.h @@ -27,6 +27,8 @@ #ifndef VIXL_AARCH64_DISASM_AARCH64_H #define VIXL_AARCH64_DISASM_AARCH64_H +#include <functional> +#include <unordered_map> #include <utility> #include "../globals-vixl.h" @@ -34,6 +36,7 @@ #include "cpu-features-auditor-aarch64.h" #include "decoder-aarch64.h" +#include "decoder-visitor-map-aarch64.h" #include "instructions-aarch64.h" #include "operands-aarch64.h" @@ -47,11 +50,9 @@ class Disassembler : public DecoderVisitor { virtual ~Disassembler(); char* GetOutput(); -// Declare all Visitor functions. -#define DECLARE(A) \ - virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE; - VISITOR_LIST(DECLARE) -#undef DECLARE + // Declare all Visitor functions. + virtual void Visit(Metadata* metadata, + const Instruction* instr) VIXL_OVERRIDE; protected: virtual void ProcessOutput(const Instruction* instr); @@ -112,10 +113,126 @@ class Disassembler : public DecoderVisitor { int64_t CodeRelativeAddress(const void* instr); private: +#define DECLARE(A) virtual void Visit##A(const Instruction* instr); + VISITOR_LIST(DECLARE) +#undef DECLARE + + using FormToVisitorFnMap = std::unordered_map< + uint32_t, + std::function<void(Disassembler*, const Instruction*)>>; + static const FormToVisitorFnMap* GetFormToVisitorFnMap(); + + std::string mnemonic_; + uint32_t form_hash_; + + void SetMnemonicFromForm(const std::string& form) { + if (form != "unallocated") { + VIXL_ASSERT(form.find_first_of('_') != std::string::npos); + mnemonic_ = form.substr(0, form.find_first_of('_')); + } + } + + void Disassemble_PdT_PgZ_ZnT_ZmT(const Instruction* instr); + void Disassemble_ZdB_Zn1B_Zn2B_imm(const Instruction* instr); + void Disassemble_ZdB_ZnB_ZmB(const Instruction* instr); + void Disassemble_ZdD_PgM_ZnS(const Instruction* instr); + void Disassemble_ZdD_ZnD_ZmD(const Instruction* instr); + void Disassemble_ZdD_ZnD_ZmD_imm(const Instruction* instr); + void Disassemble_ZdD_ZnS_ZmS_imm(const Instruction* instr); + void Disassemble_ZdH_PgM_ZnS(const Instruction* instr); + void Disassemble_ZdH_ZnH_ZmH_imm(const Instruction* instr); + void Disassemble_ZdS_PgM_ZnD(const Instruction* instr); + void Disassemble_ZdS_PgM_ZnH(const Instruction* instr); + void Disassemble_ZdS_PgM_ZnS(const Instruction* instr); + void Disassemble_ZdS_ZnH_ZmH_imm(const Instruction* instr); + void Disassemble_ZdS_ZnS_ZmS(const Instruction* instr); + void Disassemble_ZdS_ZnS_ZmS_imm(const Instruction* instr); + void Disassemble_ZdT_PgM_ZnT(const Instruction* instr); + void Disassemble_ZdT_PgZ_ZnT_ZmT(const Instruction* instr); + void Disassemble_ZdT_Pg_Zn1T_Zn2T(const Instruction* instr); + void Disassemble_ZdT_Zn1T_Zn2T_ZmT(const Instruction* instr); + void Disassemble_ZdT_ZnT_ZmT(const Instruction* instr); + void Disassemble_ZdT_ZnT_ZmTb(const Instruction* instr); + void Disassemble_ZdT_ZnTb(const Instruction* instr); + void Disassemble_ZdT_ZnTb_ZmTb(const Instruction* instr); + void Disassemble_ZdaD_ZnD_ZmD_imm(const Instruction* instr); + void Disassemble_ZdaD_ZnH_ZmH_imm_const(const Instruction* instr); + void Disassemble_ZdaD_ZnS_ZmS_imm(const Instruction* instr); + void Disassemble_ZdaH_ZnH_ZmH_imm(const Instruction* instr); + void Disassemble_ZdaH_ZnH_ZmH_imm_const(const Instruction* instr); + void Disassemble_ZdaS_ZnB_ZmB_imm_const(const Instruction* instr); + void Disassemble_ZdaS_ZnH_ZmH(const Instruction* instr); + void Disassemble_ZdaS_ZnH_ZmH_imm(const Instruction* instr); + void Disassemble_ZdaS_ZnS_ZmS_imm(const Instruction* instr); + void Disassemble_ZdaS_ZnS_ZmS_imm_const(const Instruction* instr); + void Disassemble_ZdaT_PgM_ZnTb(const Instruction* instr); + void Disassemble_ZdaT_ZnT_ZmT(const Instruction* instr); + void Disassemble_ZdaT_ZnT_ZmT_const(const Instruction* instr); + void Disassemble_ZdaT_ZnT_const(const Instruction* instr); + void Disassemble_ZdaT_ZnTb_ZmTb(const Instruction* instr); + void Disassemble_ZdaT_ZnTb_ZmTb_const(const Instruction* instr); + void Disassemble_ZdnB_ZdnB(const Instruction* instr); + void Disassemble_ZdnB_ZdnB_ZmB(const Instruction* instr); + void Disassemble_ZdnS_ZdnS_ZmS(const Instruction* instr); + void Disassemble_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr); + void Disassemble_ZdnT_PgM_ZdnT_const(const Instruction* instr); + void Disassemble_ZdnT_ZdnT_ZmT_const(const Instruction* instr); + void Disassemble_ZtD_PgZ_ZnD_Xm(const Instruction* instr); + void Disassemble_ZtD_Pg_ZnD_Xm(const Instruction* instr); + void Disassemble_ZtS_PgZ_ZnS_Xm(const Instruction* instr); + void Disassemble_ZtS_Pg_ZnS_Xm(const Instruction* instr); + void Disassemble_ZdaS_ZnB_ZmB(const Instruction* instr); + void Disassemble_Vd4S_Vn16B_Vm16B(const Instruction* instr); + + void DisassembleSVEShiftLeftImm(const Instruction* instr); + void DisassembleSVEShiftRightImm(const Instruction* instr); + void DisassembleSVEAddSubCarry(const Instruction* instr); + void DisassembleSVEAddSubHigh(const Instruction* instr); + void DisassembleSVEComplexIntAddition(const Instruction* instr); + void DisassembleSVEBitwiseTernary(const Instruction* instr); + void DisassembleSVEFlogb(const Instruction* instr); + void DisassembleSVEFPPair(const Instruction* instr); + + void DisassembleNoArgs(const Instruction* instr); + + void DisassembleNEONMulByElementLong(const Instruction* instr); + void DisassembleNEONDotProdByElement(const Instruction* instr); + void DisassembleNEONFPMulByElement(const Instruction* instr); + void DisassembleNEONHalfFPMulByElement(const Instruction* instr); + void DisassembleNEONFPMulByElementLong(const Instruction* instr); + void DisassembleNEONComplexMulByElement(const Instruction* instr); + void DisassembleNEON2RegLogical(const Instruction* instr); + void DisassembleNEON2RegExtract(const Instruction* instr); + void DisassembleNEON2RegAddlp(const Instruction* instr); + void DisassembleNEON2RegCompare(const Instruction* instr); + void DisassembleNEON2RegFPCompare(const Instruction* instr); + void DisassembleNEON2RegFPConvert(const Instruction* instr); + void DisassembleNEON2RegFP(const Instruction* instr); + void DisassembleNEON3SameLogical(const Instruction* instr); + void DisassembleNEON3SameFHM(const Instruction* instr); + void DisassembleNEON3SameNoD(const Instruction* instr); + void DisassembleNEONShiftLeftLongImm(const Instruction* instr); + void DisassembleNEONShiftRightImm(const Instruction* instr); + void DisassembleNEONShiftRightNarrowImm(const Instruction* instr); + void DisassembleNEONScalarSatMulLongIndex(const Instruction* instr); + void DisassembleNEONFPScalarMulIndex(const Instruction* instr); + void DisassembleNEONFPScalar3Same(const Instruction* instr); + void DisassembleNEONScalar3SameOnlyD(const Instruction* instr); + void DisassembleNEONFPAcrossLanes(const Instruction* instr); + void DisassembleNEONFP16AcrossLanes(const Instruction* instr); + void DisassembleNEONScalarShiftImmOnlyD(const Instruction* instr); + void DisassembleNEONScalarShiftRightNarrowImm(const Instruction* instr); + void DisassembleNEONScalar2RegMiscOnlyD(const Instruction* instr); + void DisassembleNEONFPScalar2RegMisc(const Instruction* instr); + void Format(const Instruction* instr, const char* mnemonic, const char* format0, const char* format1 = NULL); + void FormatWithDecodedMnemonic(const Instruction* instr, + const char* format0, + const char* format1 = NULL); + void Substitute(const Instruction* instr, const char* string); int SubstituteField(const Instruction* instr, const char* format); int SubstituteRegisterField(const Instruction* instr, const char* format); diff --git a/src/aarch64/instructions-aarch64.cc b/src/aarch64/instructions-aarch64.cc index b3e28384..4718e2d2 100644 --- a/src/aarch64/instructions-aarch64.cc +++ b/src/aarch64/instructions-aarch64.cc @@ -44,7 +44,13 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size, return result; } -bool Instruction::CanTakeSVEMovprfx(const Instruction* movprfx) const { +bool Instruction::CanTakeSVEMovprfx(const char* form, + const Instruction* movprfx) const { + return CanTakeSVEMovprfx(Hash(form), movprfx); +} + +bool Instruction::CanTakeSVEMovprfx(uint32_t form_hash, + const Instruction* movprfx) const { bool movprfx_is_predicated = movprfx->Mask(SVEMovprfxMask) == MOVPRFX_z_p_z; bool movprfx_is_unpredicated = movprfx->Mask(SVEConstructivePrefix_UnpredicatedMask) == MOVPRFX_z_z; @@ -58,90 +64,201 @@ bool Instruction::CanTakeSVEMovprfx(const Instruction* movprfx) const { bool pg_matches_low8 = movprfx_pg == GetPgLow8(); bool vform_matches = movprfx_vform == GetSVEVectorFormat(); bool zd_matches = movprfx_zd == GetRd(); - bool zd_matches_zm = movprfx_zd == GetRm(); - bool zd_matches_zn = movprfx_zd == GetRn(); - - switch (Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask)) { - case AND_z_zi: - case EOR_z_zi: - case ORR_z_zi: + bool zd_isnt_zn = movprfx_zd != GetRn(); + bool zd_isnt_zm = movprfx_zd != GetRm(); + + switch (form_hash) { + case "cdot_z_zzzi_s"_h: + case "sdot_z_zzzi_s"_h: + case "sudot_z_zzzi_s"_h: + case "udot_z_zzzi_s"_h: + case "usdot_z_zzzi_s"_h: + return (GetRd() != static_cast<int>(ExtractBits(18, 16))) && + movprfx_is_unpredicated && zd_isnt_zn && zd_matches; + + case "cdot_z_zzzi_d"_h: + case "sdot_z_zzzi_d"_h: + case "udot_z_zzzi_d"_h: + return (GetRd() != static_cast<int>(ExtractBits(19, 16))) && + movprfx_is_unpredicated && zd_isnt_zn && zd_matches; + + case "fmlalb_z_zzzi_s"_h: + case "fmlalt_z_zzzi_s"_h: + case "fmlslb_z_zzzi_s"_h: + case "fmlslt_z_zzzi_s"_h: + case "smlalb_z_zzzi_d"_h: + case "smlalb_z_zzzi_s"_h: + case "smlalt_z_zzzi_d"_h: + case "smlalt_z_zzzi_s"_h: + case "smlslb_z_zzzi_d"_h: + case "smlslb_z_zzzi_s"_h: + case "smlslt_z_zzzi_d"_h: + case "smlslt_z_zzzi_s"_h: + case "sqdmlalb_z_zzzi_d"_h: + case "sqdmlalb_z_zzzi_s"_h: + case "sqdmlalt_z_zzzi_d"_h: + case "sqdmlalt_z_zzzi_s"_h: + case "sqdmlslb_z_zzzi_d"_h: + case "sqdmlslb_z_zzzi_s"_h: + case "sqdmlslt_z_zzzi_d"_h: + case "sqdmlslt_z_zzzi_s"_h: + case "umlalb_z_zzzi_d"_h: + case "umlalb_z_zzzi_s"_h: + case "umlalt_z_zzzi_d"_h: + case "umlalt_z_zzzi_s"_h: + case "umlslb_z_zzzi_d"_h: + case "umlslb_z_zzzi_s"_h: + case "umlslt_z_zzzi_d"_h: + case "umlslt_z_zzzi_s"_h: + return (GetRd() != GetSVEMulLongZmAndIndex().first) && + movprfx_is_unpredicated && zd_isnt_zn && zd_matches; + + case "cmla_z_zzzi_h"_h: + case "cmla_z_zzzi_s"_h: + case "fcmla_z_zzzi_h"_h: + case "fcmla_z_zzzi_s"_h: + case "fmla_z_zzzi_d"_h: + case "fmla_z_zzzi_h"_h: + case "fmla_z_zzzi_s"_h: + case "fmls_z_zzzi_d"_h: + case "fmls_z_zzzi_h"_h: + case "fmls_z_zzzi_s"_h: + case "mla_z_zzzi_d"_h: + case "mla_z_zzzi_h"_h: + case "mla_z_zzzi_s"_h: + case "mls_z_zzzi_d"_h: + case "mls_z_zzzi_h"_h: + case "mls_z_zzzi_s"_h: + case "sqrdcmlah_z_zzzi_h"_h: + case "sqrdcmlah_z_zzzi_s"_h: + case "sqrdmlah_z_zzzi_d"_h: + case "sqrdmlah_z_zzzi_h"_h: + case "sqrdmlah_z_zzzi_s"_h: + case "sqrdmlsh_z_zzzi_d"_h: + case "sqrdmlsh_z_zzzi_h"_h: + case "sqrdmlsh_z_zzzi_s"_h: + return (GetRd() != GetSVEMulZmAndIndex().first) && + movprfx_is_unpredicated && zd_isnt_zn && zd_matches; + + case "adclb_z_zzz"_h: + case "adclt_z_zzz"_h: + case "bcax_z_zzz"_h: + case "bsl1n_z_zzz"_h: + case "bsl2n_z_zzz"_h: + case "bsl_z_zzz"_h: + case "cdot_z_zzz"_h: + case "cmla_z_zzz"_h: + case "eor3_z_zzz"_h: + case "eorbt_z_zz"_h: + case "eortb_z_zz"_h: + case "fmlalb_z_zzz"_h: + case "fmlalt_z_zzz"_h: + case "fmlslb_z_zzz"_h: + case "fmlslt_z_zzz"_h: + case "nbsl_z_zzz"_h: + case "saba_z_zzz"_h: + case "sabalb_z_zzz"_h: + case "sabalt_z_zzz"_h: + case "sbclb_z_zzz"_h: + case "sbclt_z_zzz"_h: + case "sdot_z_zzz"_h: + case "smlalb_z_zzz"_h: + case "smlalt_z_zzz"_h: + case "smlslb_z_zzz"_h: + case "smlslt_z_zzz"_h: + case "sqdmlalb_z_zzz"_h: + case "sqdmlalbt_z_zzz"_h: + case "sqdmlalt_z_zzz"_h: + case "sqdmlslb_z_zzz"_h: + case "sqdmlslbt_z_zzz"_h: + case "sqdmlslt_z_zzz"_h: + case "sqrdcmlah_z_zzz"_h: + case "sqrdmlah_z_zzz"_h: + case "sqrdmlsh_z_zzz"_h: + case "uaba_z_zzz"_h: + case "uabalb_z_zzz"_h: + case "uabalt_z_zzz"_h: + case "udot_z_zzz"_h: + case "umlalb_z_zzz"_h: + case "umlalt_z_zzz"_h: + case "umlslb_z_zzz"_h: + case "umlslt_z_zzz"_h: + case "usdot_z_zzz_s"_h: + case "fmmla_z_zzz_s"_h: + case "fmmla_z_zzz_d"_h: + case "smmla_z_zzz"_h: + case "ummla_z_zzz"_h: + case "usmmla_z_zzz"_h: + return movprfx_is_unpredicated && zd_isnt_zm && zd_isnt_zn && zd_matches; + + case "addp_z_p_zz"_h: + case "cadd_z_zz"_h: + case "clasta_z_p_zz"_h: + case "clastb_z_p_zz"_h: + case "decd_z_zs"_h: + case "dech_z_zs"_h: + case "decw_z_zs"_h: + case "faddp_z_p_zz"_h: + case "fmaxnmp_z_p_zz"_h: + case "fmaxp_z_p_zz"_h: + case "fminnmp_z_p_zz"_h: + case "fminp_z_p_zz"_h: + case "ftmad_z_zzi"_h: + case "incd_z_zs"_h: + case "inch_z_zs"_h: + case "incw_z_zs"_h: + case "insr_z_v"_h: + case "smaxp_z_p_zz"_h: + case "sminp_z_p_zz"_h: + case "splice_z_p_zz_con"_h: + case "splice_z_p_zz_des"_h: + case "sqcadd_z_zz"_h: + case "sqdecd_z_zs"_h: + case "sqdech_z_zs"_h: + case "sqdecw_z_zs"_h: + case "sqincd_z_zs"_h: + case "sqinch_z_zs"_h: + case "sqincw_z_zs"_h: + case "srsra_z_zi"_h: + case "ssra_z_zi"_h: + case "umaxp_z_p_zz"_h: + case "uminp_z_p_zz"_h: + case "uqdecd_z_zs"_h: + case "uqdech_z_zs"_h: + case "uqdecw_z_zs"_h: + case "uqincd_z_zs"_h: + case "uqinch_z_zs"_h: + case "uqincw_z_zs"_h: + case "ursra_z_zi"_h: + case "usra_z_zi"_h: + case "xar_z_zzi"_h: + return movprfx_is_unpredicated && zd_isnt_zn && zd_matches; + + case "add_z_zi"_h: + case "and_z_zi"_h: + case "decp_z_p_z"_h: + case "eor_z_zi"_h: + case "incp_z_p_z"_h: + case "insr_z_r"_h: + case "mul_z_zi"_h: + case "orr_z_zi"_h: + case "smax_z_zi"_h: + case "smin_z_zi"_h: + case "sqadd_z_zi"_h: + case "sqdecp_z_p_z"_h: + case "sqincp_z_p_z"_h: + case "sqsub_z_zi"_h: + case "sub_z_zi"_h: + case "subr_z_zi"_h: + case "umax_z_zi"_h: + case "umin_z_zi"_h: + case "uqadd_z_zi"_h: + case "uqdecp_z_p_z"_h: + case "uqincp_z_p_z"_h: + case "uqsub_z_zi"_h: return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEBitwiseLogical_PredicatedMask)) { - case AND_z_p_zz: - case BIC_z_p_zz: - case EOR_z_p_zz: - case ORR_z_p_zz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEBitwiseShiftByImm_PredicatedMask)) { - case ASRD_z_p_zi: - case ASR_z_p_zi: - case LSL_z_p_zi: - case LSR_z_p_zi: - if (movprfx_is_predicated) { - if (!pg_matches_low8) return false; - unsigned tsz = ExtractBits<0x00c00300>(); - VectorFormat instr_vform = - SVEFormatFromLaneSizeInBytesLog2(HighestSetBitPosition(tsz)); - if (movprfx_vform != instr_vform) return false; - } - return zd_matches; - } - switch (Mask(SVEBitwiseShiftByVector_PredicatedMask)) { - case ASRR_z_p_zz: - case ASR_z_p_zz: - case LSLR_z_p_zz: - case LSL_z_p_zz: - case LSRR_z_p_zz: - case LSR_z_p_zz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) { - case ASR_z_p_zw: - case LSL_z_p_zw: - case LSR_z_p_zw: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEConditionallyBroadcastElementToVectorMask)) { - case CLASTA_z_p_zz: - case CLASTB_z_p_zz: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVECopyFPImm_PredicatedMask)) { - case FCPY_z_p_i: - if (movprfx_is_predicated) { - if (!vform_matches) return false; - if (movprfx_pg != GetRx<19, 16>()) return false; - } - return zd_matches; - } - switch (Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) { - case CPY_z_p_r: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches; - } - switch (Mask(SVECopyIntImm_PredicatedMask)) { - case CPY_z_p_i: + + case "cpy_z_p_i"_h: if (movprfx_is_predicated) { if (!vform_matches) return false; if (movprfx_pg != GetRx<19, 16>()) return false; @@ -149,397 +266,219 @@ bool Instruction::CanTakeSVEMovprfx(const Instruction* movprfx) const { // Only the merging form can take movprfx. if (ExtractBit(14) == 0) return false; return zd_matches; + + case "fcpy_z_p_i"_h: + return (movprfx_is_unpredicated || + ((movprfx_pg == GetRx<19, 16>()) && vform_matches)) && + zd_matches; + + case "flogb_z_p_z"_h: + return (movprfx_is_unpredicated || + ((movprfx_vform == GetSVEVectorFormat(17)) && pg_matches_low8)) && + zd_isnt_zn && zd_matches; + + case "asr_z_p_zi"_h: + case "asrd_z_p_zi"_h: + case "lsl_z_p_zi"_h: + case "lsr_z_p_zi"_h: + case "sqshl_z_p_zi"_h: + case "sqshlu_z_p_zi"_h: + case "srshr_z_p_zi"_h: + case "uqshl_z_p_zi"_h: + case "urshr_z_p_zi"_h: + return (movprfx_is_unpredicated || + ((movprfx_vform == + SVEFormatFromLaneSizeInBytesLog2( + GetSVEImmShiftAndLaneSizeLog2(true).second)) && + pg_matches_low8)) && + zd_matches; + + case "fcvt_z_p_z_d2h"_h: + case "fcvt_z_p_z_d2s"_h: + case "fcvt_z_p_z_h2d"_h: + case "fcvt_z_p_z_s2d"_h: + case "fcvtx_z_p_z_d2s"_h: + case "fcvtzs_z_p_z_d2w"_h: + case "fcvtzs_z_p_z_d2x"_h: + case "fcvtzs_z_p_z_fp162x"_h: + case "fcvtzs_z_p_z_s2x"_h: + case "fcvtzu_z_p_z_d2w"_h: + case "fcvtzu_z_p_z_d2x"_h: + case "fcvtzu_z_p_z_fp162x"_h: + case "fcvtzu_z_p_z_s2x"_h: + case "scvtf_z_p_z_w2d"_h: + case "scvtf_z_p_z_x2d"_h: + case "scvtf_z_p_z_x2fp16"_h: + case "scvtf_z_p_z_x2s"_h: + case "ucvtf_z_p_z_w2d"_h: + case "ucvtf_z_p_z_x2d"_h: + case "ucvtf_z_p_z_x2fp16"_h: + case "ucvtf_z_p_z_x2s"_h: + return (movprfx_is_unpredicated || + ((movprfx_vform == kFormatVnD) && pg_matches_low8)) && + zd_isnt_zn && zd_matches; + + case "fcvtzs_z_p_z_fp162h"_h: + case "fcvtzu_z_p_z_fp162h"_h: + case "scvtf_z_p_z_h2fp16"_h: + case "ucvtf_z_p_z_h2fp16"_h: + return (movprfx_is_unpredicated || + ((movprfx_vform == kFormatVnH) && pg_matches_low8)) && + zd_isnt_zn && zd_matches; + + case "fcvt_z_p_z_h2s"_h: + case "fcvt_z_p_z_s2h"_h: + case "fcvtzs_z_p_z_fp162w"_h: + case "fcvtzs_z_p_z_s2w"_h: + case "fcvtzu_z_p_z_fp162w"_h: + case "fcvtzu_z_p_z_s2w"_h: + case "scvtf_z_p_z_w2fp16"_h: + case "scvtf_z_p_z_w2s"_h: + case "ucvtf_z_p_z_w2fp16"_h: + case "ucvtf_z_p_z_w2s"_h: + return (movprfx_is_unpredicated || + ((movprfx_vform == kFormatVnS) && pg_matches_low8)) && + zd_isnt_zn && zd_matches; + + case "fcmla_z_p_zzz"_h: + case "fmad_z_p_zzz"_h: + case "fmla_z_p_zzz"_h: + case "fmls_z_p_zzz"_h: + case "fmsb_z_p_zzz"_h: + case "fnmad_z_p_zzz"_h: + case "fnmla_z_p_zzz"_h: + case "fnmls_z_p_zzz"_h: + case "fnmsb_z_p_zzz"_h: + case "mad_z_p_zzz"_h: + case "mla_z_p_zzz"_h: + case "mls_z_p_zzz"_h: + case "msb_z_p_zzz"_h: + return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) && + zd_isnt_zm && zd_isnt_zn && zd_matches; + + case "abs_z_p_z"_h: + case "add_z_p_zz"_h: + case "and_z_p_zz"_h: + case "asr_z_p_zw"_h: + case "asr_z_p_zz"_h: + case "asrr_z_p_zz"_h: + case "bic_z_p_zz"_h: + case "cls_z_p_z"_h: + case "clz_z_p_z"_h: + case "cnot_z_p_z"_h: + case "cnt_z_p_z"_h: + case "cpy_z_p_v"_h: + case "eor_z_p_zz"_h: + case "fabd_z_p_zz"_h: + case "fabs_z_p_z"_h: + case "fadd_z_p_zz"_h: + case "fcadd_z_p_zz"_h: + case "fdiv_z_p_zz"_h: + case "fdivr_z_p_zz"_h: + case "fmax_z_p_zz"_h: + case "fmaxnm_z_p_zz"_h: + case "fmin_z_p_zz"_h: + case "fminnm_z_p_zz"_h: + case "fmul_z_p_zz"_h: + case "fmulx_z_p_zz"_h: + case "fneg_z_p_z"_h: + case "frecpx_z_p_z"_h: + case "frinta_z_p_z"_h: + case "frinti_z_p_z"_h: + case "frintm_z_p_z"_h: + case "frintn_z_p_z"_h: + case "frintp_z_p_z"_h: + case "frintx_z_p_z"_h: + case "frintz_z_p_z"_h: + case "fscale_z_p_zz"_h: + case "fsqrt_z_p_z"_h: + case "fsub_z_p_zz"_h: + case "fsubr_z_p_zz"_h: + case "lsl_z_p_zw"_h: + case "lsl_z_p_zz"_h: + case "lslr_z_p_zz"_h: + case "lsr_z_p_zw"_h: + case "lsr_z_p_zz"_h: + case "lsrr_z_p_zz"_h: + case "mul_z_p_zz"_h: + case "neg_z_p_z"_h: + case "not_z_p_z"_h: + case "orr_z_p_zz"_h: + case "rbit_z_p_z"_h: + case "revb_z_z"_h: + case "revh_z_z"_h: + case "revw_z_z"_h: + case "sabd_z_p_zz"_h: + case "sadalp_z_p_z"_h: + case "sdiv_z_p_zz"_h: + case "sdivr_z_p_zz"_h: + case "shadd_z_p_zz"_h: + case "shsub_z_p_zz"_h: + case "shsubr_z_p_zz"_h: + case "smax_z_p_zz"_h: + case "smin_z_p_zz"_h: + case "smulh_z_p_zz"_h: + case "sqabs_z_p_z"_h: + case "sqadd_z_p_zz"_h: + case "sqneg_z_p_z"_h: + case "sqrshl_z_p_zz"_h: + case "sqrshlr_z_p_zz"_h: + case "sqshl_z_p_zz"_h: + case "sqshlr_z_p_zz"_h: + case "sqsub_z_p_zz"_h: + case "sqsubr_z_p_zz"_h: + case "srhadd_z_p_zz"_h: + case "srshl_z_p_zz"_h: + case "srshlr_z_p_zz"_h: + case "sub_z_p_zz"_h: + case "subr_z_p_zz"_h: + case "suqadd_z_p_zz"_h: + case "sxtb_z_p_z"_h: + case "sxth_z_p_z"_h: + case "sxtw_z_p_z"_h: + case "uabd_z_p_zz"_h: + case "uadalp_z_p_z"_h: + case "udiv_z_p_zz"_h: + case "udivr_z_p_zz"_h: + case "uhadd_z_p_zz"_h: + case "uhsub_z_p_zz"_h: + case "uhsubr_z_p_zz"_h: + case "umax_z_p_zz"_h: + case "umin_z_p_zz"_h: + case "umulh_z_p_zz"_h: + case "uqadd_z_p_zz"_h: + case "uqrshl_z_p_zz"_h: + case "uqrshlr_z_p_zz"_h: + case "uqshl_z_p_zz"_h: + case "uqshlr_z_p_zz"_h: + case "uqsub_z_p_zz"_h: + case "uqsubr_z_p_zz"_h: + case "urecpe_z_p_z"_h: + case "urhadd_z_p_zz"_h: + case "urshl_z_p_zz"_h: + case "urshlr_z_p_zz"_h: + case "ursqrte_z_p_z"_h: + case "usqadd_z_p_zz"_h: + case "uxtb_z_p_z"_h: + case "uxth_z_p_z"_h: + case "uxtw_z_p_z"_h: + return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) && + zd_isnt_zn && zd_matches; + + case "cpy_z_p_r"_h: + case "fadd_z_p_zs"_h: + case "fmax_z_p_zs"_h: + case "fmaxnm_z_p_zs"_h: + case "fmin_z_p_zs"_h: + case "fminnm_z_p_zs"_h: + case "fmul_z_p_zs"_h: + case "fsub_z_p_zs"_h: + case "fsubr_z_p_zs"_h: + return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) && + zd_matches; + default: + return false; } - switch (Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) { - case CPY_z_p_v: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zn; - } - switch (Mask(SVEFPArithmeticWithImm_PredicatedMask)) { - case FADD_z_p_zs: - case FMAXNM_z_p_zs: - case FMAX_z_p_zs: - case FMINNM_z_p_zs: - case FMIN_z_p_zs: - case FMUL_z_p_zs: - case FSUBR_z_p_zs: - case FSUB_z_p_zs: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches; - } - switch (Mask(SVEFPArithmetic_PredicatedMask)) { - case FABD_z_p_zz: - case FADD_z_p_zz: - case FDIVR_z_p_zz: - case FDIV_z_p_zz: - case FMAXNM_z_p_zz: - case FMAX_z_p_zz: - case FMINNM_z_p_zz: - case FMIN_z_p_zz: - case FMULX_z_p_zz: - case FMUL_z_p_zz: - case FSCALE_z_p_zz: - case FSUBR_z_p_zz: - case FSUB_z_p_zz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEFPComplexAdditionMask)) { - case FCADD_z_p_zz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEFPComplexMulAddIndexMask)) { - case FCMLA_z_zzzi_h: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<18, 16>()) return false; - return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; - case FCMLA_z_zzzi_s: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<19, 16>()) return false; - return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; - } - switch (Mask(SVEFPComplexMulAddMask)) { - case FCMLA_z_p_zzz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zm && !zd_matches_zn; - } - switch (Mask(SVEFPConvertPrecisionMask)) { - case FCVT_z_p_z_d2h: - case FCVT_z_p_z_d2s: - case FCVT_z_p_z_h2d: - case FCVT_z_p_z_h2s: - case FCVT_z_p_z_s2d: - case FCVT_z_p_z_s2h: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zn; - } - switch (Mask(SVEFPConvertToIntMask)) { - case FCVTZS_z_p_z_d2w: - case FCVTZS_z_p_z_d2x: - case FCVTZS_z_p_z_fp162h: - case FCVTZS_z_p_z_fp162w: - case FCVTZS_z_p_z_fp162x: - case FCVTZS_z_p_z_s2w: - case FCVTZS_z_p_z_s2x: - case FCVTZU_z_p_z_d2w: - case FCVTZU_z_p_z_d2x: - case FCVTZU_z_p_z_fp162h: - case FCVTZU_z_p_z_fp162w: - case FCVTZU_z_p_z_fp162x: - case FCVTZU_z_p_z_s2w: - case FCVTZU_z_p_z_s2x: - if (movprfx_is_predicated) { - if (!pg_matches_low8) return false; - // The movprfx element size must match the instruction's maximum encoded - // element size. We have to partially decode the opc and opc2 fields to - // find this. - unsigned opc = ExtractBits(23, 22); - unsigned opc2 = ExtractBits(18, 17); - VectorFormat instr_vform = - SVEFormatFromLaneSizeInBytesLog2(std::max(opc, opc2)); - if (movprfx_vform != instr_vform) return false; - } - return zd_matches && !zd_matches_zn; - } - switch (Mask(SVEFPMulAddIndexMask)) { - case FMLA_z_zzzi_h: - case FMLA_z_zzzi_h_i3h: - case FMLA_z_zzzi_s: - case FMLS_z_zzzi_h: - case FMLS_z_zzzi_h_i3h: - case FMLS_z_zzzi_s: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<18, 16>()) return false; - return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; - case FMLA_z_zzzi_d: - case FMLS_z_zzzi_d: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<19, 16>()) return false; - return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; - } - switch (Mask(SVEFPMulAddMask)) { - case FMAD_z_p_zzz: - case FMSB_z_p_zzz: - case FNMAD_z_p_zzz: - case FNMSB_z_p_zzz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<20, 16>()) return false; - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - case FMLA_z_p_zzz: - case FMLS_z_p_zzz: - case FNMLA_z_p_zzz: - case FNMLS_z_p_zzz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zm && !zd_matches_zn; - } - switch (Mask(SVEFPRoundToIntegralValueMask)) { - case FRINTA_z_p_z: - case FRINTI_z_p_z: - case FRINTM_z_p_z: - case FRINTN_z_p_z: - case FRINTP_z_p_z: - case FRINTX_z_p_z: - case FRINTZ_z_p_z: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zn; - } - switch (Mask(SVEFPTrigMulAddCoefficientMask)) { - case FTMAD_z_zzi: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEFPUnaryOpMask)) { - case FRECPX_z_p_z: - case FSQRT_z_p_z: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zn; - } - switch (Mask(SVEIncDecByPredicateCountMask)) { - case DECP_z_p_z: - case INCP_z_p_z: - case SQDECP_z_p_z: - case SQINCP_z_p_z: - case UQDECP_z_p_z: - case UQINCP_z_p_z: - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEIncDecVectorByElementCountMask)) { - case DECD_z_zs: - case DECH_z_zs: - case DECW_z_zs: - case INCD_z_zs: - case INCH_z_zs: - case INCW_z_zs: - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEInsertGeneralRegisterMask)) { - case INSR_z_r: - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEInsertSIMDFPScalarRegisterMask)) { - case INSR_z_v: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEIntAddSubtractImm_UnpredicatedMask)) { - case ADD_z_zi: - case SQADD_z_zi: - case SQSUB_z_zi: - case SUBR_z_zi: - case SUB_z_zi: - case UQADD_z_zi: - case UQSUB_z_zi: - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEIntAddSubtractVectors_PredicatedMask)) { - case ADD_z_p_zz: - case SUBR_z_p_zz: - case SUB_z_p_zz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEIntConvertToFPMask)) { - case SCVTF_z_p_z_h2fp16: - case SCVTF_z_p_z_w2d: - case SCVTF_z_p_z_w2fp16: - case SCVTF_z_p_z_w2s: - case SCVTF_z_p_z_x2d: - case SCVTF_z_p_z_x2fp16: - case SCVTF_z_p_z_x2s: - case UCVTF_z_p_z_h2fp16: - case UCVTF_z_p_z_w2d: - case UCVTF_z_p_z_w2fp16: - case UCVTF_z_p_z_w2s: - case UCVTF_z_p_z_x2d: - case UCVTF_z_p_z_x2fp16: - case UCVTF_z_p_z_x2s: - if (movprfx_is_predicated) { - if (!pg_matches_low8) return false; - // The movprfx element size must match the instruction's maximum encoded - // element size. We have to partially decode the opc and opc2 fields to - // find this. - unsigned opc = ExtractBits(23, 22); - unsigned opc2 = ExtractBits(18, 17); - VectorFormat instr_vform = - SVEFormatFromLaneSizeInBytesLog2(std::max(opc, opc2)); - if (movprfx_vform != instr_vform) return false; - } - return zd_matches && !zd_matches_zn; - } - switch (Mask(SVEIntDivideVectors_PredicatedMask)) { - case SDIVR_z_p_zz: - case SDIV_z_p_zz: - case UDIVR_z_p_zz: - case UDIV_z_p_zz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEIntMinMaxDifference_PredicatedMask)) { - case SABD_z_p_zz: - case SMAX_z_p_zz: - case SMIN_z_p_zz: - case UABD_z_p_zz: - case UMAX_z_p_zz: - case UMIN_z_p_zz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEIntMinMaxImm_UnpredicatedMask)) { - case SMAX_z_zi: - case SMIN_z_zi: - case UMAX_z_zi: - case UMIN_z_zi: - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEIntMulAddPredicatedMask)) { - case MAD_z_p_zzz: - case MSB_z_p_zzz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches && !zd_matches_zm; - case MLA_z_p_zzz: - case MLS_z_p_zzz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zm && !zd_matches_zn; - } - switch (Mask(SVEIntMulAddUnpredicatedMask)) { - case SDOT_z_zzz: - case UDOT_z_zzz: - return movprfx_is_unpredicated && zd_matches && !zd_matches_zm && - !zd_matches_zn; - } - switch (Mask(SVEIntMulImm_UnpredicatedMask)) { - case MUL_z_zi: - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEIntMulVectors_PredicatedMask)) { - case MUL_z_p_zz: - case SMULH_z_p_zz: - case UMULH_z_p_zz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEIntUnaryArithmeticPredicatedMask)) { - case ABS_z_p_z: - case CLS_z_p_z: - case CLZ_z_p_z: - case CNOT_z_p_z: - case CNT_z_p_z: - case FABS_z_p_z: - case FNEG_z_p_z: - case NEG_z_p_z: - case NOT_z_p_z: - case SXTB_z_p_z: - case SXTH_z_p_z: - case SXTW_z_p_z: - case UXTB_z_p_z: - case UXTH_z_p_z: - case UXTW_z_p_z: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zn; - } - switch (Mask(SVEMulIndexMask)) { - case SDOT_z_zzzi_s: - case UDOT_z_zzzi_s: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<18, 16>()) return false; - return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; - case SDOT_z_zzzi_d: - case UDOT_z_zzzi_d: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<19, 16>()) return false; - return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; - } - switch (Mask(SVEPermuteVectorExtractMask)) { - case EXT_z_zi_des: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEReverseWithinElementsMask)) { - case RBIT_z_p_z: - case REVB_z_z: - case REVH_z_z: - case REVW_z_z: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zn; - } - switch (Mask(SVESaturatingIncDecVectorByElementCountMask)) { - case SQDECD_z_zs: - case SQDECH_z_zs: - case SQDECW_z_zs: - case SQINCD_z_zs: - case SQINCH_z_zs: - case SQINCW_z_zs: - case UQDECD_z_zs: - case UQDECH_z_zs: - case UQDECW_z_zs: - case UQINCD_z_zs: - case UQINCH_z_zs: - case UQINCW_z_zs: - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEVectorSplice_DestructiveMask)) { - case SPLICE_z_p_zz_des: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return movprfx_is_unpredicated && zd_matches; - } - return false; } // NOLINT(readability/fn_size) bool Instruction::IsLoad() const { @@ -610,6 +549,58 @@ std::pair<int, int> Instruction::GetSVEPermuteIndexAndLaneSizeLog2() const { return std::make_pair(index, lane_size_in_byte_log_2); } +// Get the register and index for SVE indexed multiplies encoded in the forms: +// .h : Zm = <18:16>, index = <22><20:19> +// .s : Zm = <18:16>, index = <20:19> +// .d : Zm = <19:16>, index = <20> +std::pair<int, int> Instruction::GetSVEMulZmAndIndex() const { + int reg_code = GetRmLow16(); + int index = ExtractBits(20, 19); + + // For .h, index uses bit zero of the size field, so kFormatVnB below implies + // half-word lane, with most-significant bit of the index zero. + switch (GetSVEVectorFormat()) { + case kFormatVnD: + index >>= 1; // Only bit 20 in the index for D lanes. + break; + case kFormatVnH: + index += 4; // Bit 22 is the top bit of index. + VIXL_FALLTHROUGH(); + case kFormatVnB: + case kFormatVnS: + reg_code &= 7; // Three bits used for the register. + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + return std::make_pair(reg_code, index); +} + +// Get the register and index for SVE indexed long multiplies encoded in the +// forms: +// .h : Zm = <18:16>, index = <20:19><11> +// .s : Zm = <19:16>, index = <20><11> +std::pair<int, int> Instruction::GetSVEMulLongZmAndIndex() const { + int reg_code = GetRmLow16(); + int index = ExtractBit(11); + + // For long multiplies, the SVE size field <23:22> encodes the destination + // element size. The source element size is half the width. + switch (GetSVEVectorFormat()) { + case kFormatVnS: + reg_code &= 7; + index |= ExtractBits(20, 19) << 1; + break; + case kFormatVnD: + index |= ExtractBit(20) << 1; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + return std::make_pair(reg_code, index); +} // Logical immediates can't encode zero, so a return value of zero is used to // indicate a failure case. Specifically, where the constraints on imm_s are @@ -717,6 +708,12 @@ int Instruction::GetSVEBitwiseImmLaneSizeInBytesLog2() const { } } +int Instruction::GetSVEExtractImmediate() const { + const int imm8h_mask = 0x001F0000; + const int imm8l_mask = 0x00001C00; + return ExtractBits<imm8h_mask | imm8l_mask>(); +} + uint64_t Instruction::DecodeImmBitMask(int32_t n, int32_t imm_s, int32_t imm_r, @@ -1025,7 +1022,6 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform) { return kFormatVnH; case kFormatVnD: return kFormatVnS; - break; default: VIXL_UNREACHABLE(); return kFormatUndefined; @@ -1034,8 +1030,6 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform) { VectorFormat VectorFormatDoubleWidth(VectorFormat vform) { - VIXL_ASSERT(vform == kFormat8B || vform == kFormat4H || vform == kFormat2S || - vform == kFormatB || vform == kFormatH || vform == kFormatS); switch (vform) { case kFormat8B: return kFormat8H; @@ -1049,6 +1043,12 @@ VectorFormat VectorFormatDoubleWidth(VectorFormat vform) { return kFormatS; case kFormatS: return kFormatD; + case kFormatVnB: + return kFormatVnH; + case kFormatVnH: + return kFormatVnS; + case kFormatVnS: + return kFormatVnD; default: VIXL_UNREACHABLE(); return kFormatUndefined; @@ -1162,6 +1162,7 @@ bool IsSVEFormat(VectorFormat vform) { case kFormatVnS: case kFormatVnD: case kFormatVnQ: + case kFormatVnO: return true; default: return false; @@ -1283,6 +1284,8 @@ unsigned LaneSizeInBitsFromFormat(VectorFormat vform) { return 64; case kFormatVnQ: return 128; + case kFormatVnO: + return 256; default: VIXL_UNREACHABLE(); return 0; diff --git a/src/aarch64/instructions-aarch64.h b/src/aarch64/instructions-aarch64.h index 5f56ae16..d92e6ee2 100644 --- a/src/aarch64/instructions-aarch64.h +++ b/src/aarch64/instructions-aarch64.h @@ -199,8 +199,11 @@ enum VectorFormat { // An artificial value, used to distinguish from NEON format category. kFormatSVE = 0x0000fffd, - // An artificial value. Q lane size isn't encoded in the usual size field. - kFormatSVEQ = 0x000f0000, + // Artificial values. Q and O lane sizes aren't encoded in the usual size + // field. + kFormatSVEQ = 0x00080000, + kFormatSVEO = 0x00040000, + // Vector element width of SVE register with the unknown lane count since // the vector length is implementation dependent. kFormatVnB = SVE_B | kFormatSVE, @@ -208,6 +211,7 @@ enum VectorFormat { kFormatVnS = SVE_S | kFormatSVE, kFormatVnD = SVE_D | kFormatSVE, kFormatVnQ = kFormatSVEQ | kFormatSVE, + kFormatVnO = kFormatSVEO | kFormatSVE, // An artificial value, used by simulator trace tests and a few oddball // instructions (such as FMLAL). @@ -267,11 +271,21 @@ class Instruction { return Compress(M); } + uint32_t ExtractBitsAbsent() const { + VIXL_UNREACHABLE(); + return 0; + } + template <uint32_t M, uint32_t V> uint32_t IsMaskedValue() const { return (Mask(M) == V) ? 1 : 0; } + uint32_t IsMaskedValueAbsent() const { + VIXL_UNREACHABLE(); + return 0; + } + int32_t ExtractSignedBits(int msb, int lsb) const { int32_t bits = *(reinterpret_cast<const int32_t*>(this)); return ExtractSignedBitfield32(msb, lsb, bits); @@ -300,8 +314,13 @@ class Instruction { return this->ExtractBits(msb, lsb); } - VectorFormat GetSVEVectorFormat() const { - switch (Mask(SVESizeFieldMask)) { + VectorFormat GetSVEVectorFormat(int field_lsb = 22) const { + VIXL_ASSERT((field_lsb >= 0) && (field_lsb <= 30)); + uint32_t instr = ExtractUnsignedBitfield32(field_lsb + 1, + field_lsb, + GetInstructionBits()) + << 22; + switch (instr & SVESizeFieldMask) { case SVE_B: return kFormatVnB; case SVE_H: @@ -349,12 +368,18 @@ class Instruction { std::pair<int, int> GetSVEPermuteIndexAndLaneSizeLog2() const; + std::pair<int, int> GetSVEMulZmAndIndex() const; + std::pair<int, int> GetSVEMulLongZmAndIndex() const; + std::pair<int, int> GetSVEImmShiftAndLaneSizeLog2(bool is_predicated) const; + int GetSVEExtractImmediate() const; + int GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb = 23) const; int GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb = 21) const; + unsigned GetImmNEONabcdefgh() const; VIXL_DEPRECATED("GetImmNEONabcdefgh", unsigned ImmNEONabcdefgh() const) { return GetImmNEONabcdefgh(); @@ -453,7 +478,8 @@ class Instruction { } // True if `this` is valid immediately after the provided movprfx instruction. - bool CanTakeSVEMovprfx(Instruction const* movprfx) const; + bool CanTakeSVEMovprfx(uint32_t form_hash, Instruction const* movprfx) const; + bool CanTakeSVEMovprfx(const char* form, Instruction const* movprfx) const; bool IsLoad() const; bool IsStore() const; @@ -789,18 +815,26 @@ class NEONFormatDecoder { SubstitutionMode mode0 = kFormat, SubstitutionMode mode1 = kFormat, SubstitutionMode mode2 = kFormat) { + const char* subst0 = GetSubstitute(0, mode0); + const char* subst1 = GetSubstitute(1, mode1); + const char* subst2 = GetSubstitute(2, mode2); + + if ((subst0 == NULL) || (subst1 == NULL) || (subst2 == NULL)) { + return NULL; + } + snprintf(form_buffer_, sizeof(form_buffer_), string, - GetSubstitute(0, mode0), - GetSubstitute(1, mode1), - GetSubstitute(2, mode2)); + subst0, + subst1, + subst2); return form_buffer_; } - // Append a "2" to a mnemonic string based of the state of the Q bit. + // Append a "2" to a mnemonic string based on the state of the Q bit. const char* Mnemonic(const char* mnemonic) { - if ((instrbits_ & NEON_Q) != 0) { + if ((mnemonic != NULL) && (instrbits_ & NEON_Q) != 0) { snprintf(mne_buffer_, sizeof(mne_buffer_), "%s2", mnemonic); return mne_buffer_; } @@ -895,6 +929,33 @@ class NEONFormatDecoder { return ↦ } + // The shift immediate map uses between two and five bits to encode the NEON + // vector format: + // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H, + // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined. + static const NEONFormatMap* ShiftImmFormatMap() { + static const NEONFormatMap map = {{22, 21, 20, 19, 30}, + {NF_UNDEF, NF_UNDEF, NF_8B, NF_16B, + NF_4H, NF_8H, NF_4H, NF_8H, + NF_2S, NF_4S, NF_2S, NF_4S, + NF_2S, NF_4S, NF_2S, NF_4S, + NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, + NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, + NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, + NF_UNDEF, NF_2D, NF_UNDEF, NF_2D}}; + return ↦ + } + + // The shift long/narrow immediate map uses between two and four bits to + // encode the NEON vector format: + // 0001->8H, 001x->4S, 01xx->2D, all others undefined. + static const NEONFormatMap* ShiftLongNarrowImmFormatMap() { + static const NEONFormatMap map = + {{22, 21, 20, 19}, + {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}}; + return ↦ + } + // The scalar format map uses two bits (size<1:0>) to encode the NEON scalar // formats: NF_B, NF_H, NF_S, NF_D. static const NEONFormatMap* ScalarFormatMap() { @@ -968,7 +1029,7 @@ class NEONFormatDecoder { static const char* NEONFormatAsString(NEONFormat format) { // clang-format off static const char* formats[] = { - "undefined", + NULL, "8b", "16b", "4h", "8h", "2s", "4s", "1d", "2d", "b", "h", "s", "d" }; @@ -983,9 +1044,9 @@ class NEONFormatDecoder { (format == NF_D) || (format == NF_UNDEF)); // clang-format off static const char* formats[] = { - "undefined", - "undefined", "undefined", "undefined", "undefined", - "undefined", "undefined", "undefined", "undefined", + NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, "'B", "'H", "'S", "'D" }; // clang-format on diff --git a/src/aarch64/logic-aarch64.cc b/src/aarch64/logic-aarch64.cc index cb82f715..a77e7f28 100644 --- a/src/aarch64/logic-aarch64.cc +++ b/src/aarch64/logic-aarch64.cc @@ -625,6 +625,9 @@ LogicVRegister Simulator::addp(VectorFormat vform, uzp1(vform, temp1, src1, src2); uzp2(vform, temp2, src1, src2); add(vform, dst, temp1, temp2); + if (IsSVEFormat(vform)) { + interleave_top_bottom(vform, dst, dst); + } return dst; } @@ -723,7 +726,7 @@ LogicVRegister Simulator::smulh(VectorFormat vform, const LogicVRegister& src1, const LogicVRegister& src2) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { - int64_t dst_val; + int64_t dst_val = 0xbadbeef; int64_t val1 = src1.Int(vform, i); int64_t val2 = src2.Int(vform, i); switch (LaneSizeInBitsFromFormat(vform)) { @@ -740,7 +743,6 @@ LogicVRegister Simulator::smulh(VectorFormat vform, dst_val = internal::MultiplyHigh<64>(val1, val2); break; default: - dst_val = 0xbadbeef; VIXL_UNREACHABLE(); break; } @@ -755,7 +757,7 @@ LogicVRegister Simulator::umulh(VectorFormat vform, const LogicVRegister& src1, const LogicVRegister& src2) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { - uint64_t dst_val; + uint64_t dst_val = 0xbadbeef; uint64_t val1 = src1.Uint(vform, i); uint64_t val2 = src2.Uint(vform, i); switch (LaneSizeInBitsFromFormat(vform)) { @@ -772,7 +774,6 @@ LogicVRegister Simulator::umulh(VectorFormat vform, dst_val = internal::MultiplyHigh<64>(val1, val2); break; default: - dst_val = 0xbadbeef; VIXL_UNREACHABLE(); break; } @@ -803,151 +804,6 @@ LogicVRegister Simulator::mls(VectorFormat vform, return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index)); } - -LogicVRegister Simulator::smull(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - VectorFormat indexform = - VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); - return smull(vform, dst, src1, dup_element(indexform, temp, src2, index)); -} - - -LogicVRegister Simulator::smull2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - VectorFormat indexform = - VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); - return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); -} - - -LogicVRegister Simulator::umull(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - VectorFormat indexform = - VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); - return umull(vform, dst, src1, dup_element(indexform, temp, src2, index)); -} - - -LogicVRegister Simulator::umull2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - VectorFormat indexform = - VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); - return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); -} - - -LogicVRegister Simulator::smlal(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - VectorFormat indexform = - VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); - return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); -} - - -LogicVRegister Simulator::smlal2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - VectorFormat indexform = - VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); - return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); -} - - -LogicVRegister Simulator::umlal(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - VectorFormat indexform = - VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); - return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); -} - - -LogicVRegister Simulator::umlal2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - VectorFormat indexform = - VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); - return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); -} - - -LogicVRegister Simulator::smlsl(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - VectorFormat indexform = - VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); - return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); -} - - -LogicVRegister Simulator::smlsl2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - VectorFormat indexform = - VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); - return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); -} - - -LogicVRegister Simulator::umlsl(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - VectorFormat indexform = - VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); - return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); -} - - -LogicVRegister Simulator::umlsl2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - VectorFormat indexform = - VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); - return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); -} - - LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -959,19 +815,6 @@ LogicVRegister Simulator::sqdmull(VectorFormat vform, return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); } - -LogicVRegister Simulator::sqdmull2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - VectorFormat indexform = - VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); - return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); -} - - LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -983,19 +826,6 @@ LogicVRegister Simulator::sqdmlal(VectorFormat vform, return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); } - -LogicVRegister Simulator::sqdmlal2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - VectorFormat indexform = - VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); - return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); -} - - LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -1007,19 +837,6 @@ LogicVRegister Simulator::sqdmlsl(VectorFormat vform, return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); } - -LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - VectorFormat indexform = - VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); - return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); -} - - LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -1042,23 +859,6 @@ LogicVRegister Simulator::sqrdmulh(VectorFormat vform, } -LogicVRegister Simulator::sdot(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - // NEON indexed `dot` allows the index value exceed the register size. - // Promote the format to Q-sized vector format before the duplication. - dup_elements_to_segments(IsSVEFormat(vform) ? vform - : VectorFormatFillQ(vform), - temp, - src2, - index); - return sdot(vform, dst, src1, temp); -} - - LogicVRegister Simulator::sqrdmlah(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -1070,23 +870,6 @@ LogicVRegister Simulator::sqrdmlah(VectorFormat vform, } -LogicVRegister Simulator::udot(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index) { - SimVRegister temp; - // NEON indexed `dot` allows the index value exceed the register size. - // Promote the format to Q-sized vector format before the duplication. - dup_elements_to_segments(IsSVEFormat(vform) ? vform - : VectorFormatFillQ(vform), - temp, - src2, - index); - return udot(vform, dst, src1, temp); -} - - LogicVRegister Simulator::sqrdmlsh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -1098,12 +881,16 @@ LogicVRegister Simulator::sqrdmlsh(VectorFormat vform, } -uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const { - uint16_t result = 0; - uint16_t extended_op2 = op2; - for (int i = 0; i < 8; ++i) { +uint64_t Simulator::PolynomialMult(uint64_t op1, + uint64_t op2, + int lane_size_in_bits) const { + VIXL_ASSERT(static_cast<unsigned>(lane_size_in_bits) <= kSRegSize); + VIXL_ASSERT(IsUintN(lane_size_in_bits, op1)); + VIXL_ASSERT(IsUintN(lane_size_in_bits, op2)); + uint64_t result = 0; + for (int i = 0; i < lane_size_in_bits; ++i) { if ((op1 >> i) & 1) { - result = result ^ (extended_op2 << i); + result = result ^ (op2 << i); } } return result; @@ -1118,7 +905,9 @@ LogicVRegister Simulator::pmul(VectorFormat vform, for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetUint(vform, i, - PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i))); + PolynomialMult(src1.Uint(vform, i), + src2.Uint(vform, i), + LaneSizeInBitsFromFormat(vform))); } return dst; } @@ -1128,14 +917,17 @@ LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - VectorFormat vform_src = VectorFormatHalfWidth(vform); dst.ClearForWrite(vform); + + VectorFormat vform_src = VectorFormatHalfWidth(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i), - src2.Uint(vform_src, i))); + src2.Uint(vform_src, i), + LaneSizeInBitsFromFormat(vform_src))); } + return dst; } @@ -1151,7 +943,8 @@ LogicVRegister Simulator::pmull2(VectorFormat vform, dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i), - src2.Uint(vform_src, lane_count + i))); + src2.Uint(vform_src, lane_count + i), + LaneSizeInBitsFromFormat(vform_src))); } return dst; } @@ -1327,12 +1120,13 @@ LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst, + const LogicVRegister& src_mask, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { uint64_t operand1 = src2.Uint(vform, i); - uint64_t operand2 = dst.Uint(vform, i); + uint64_t operand2 = src_mask.Uint(vform, i); uint64_t operand3 = src1.Uint(vform, i); uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); dst.SetUint(vform, i, result); @@ -1383,11 +1177,11 @@ LogicVRegister Simulator::sminmaxp(VectorFormat vform, const LogicVRegister& src1, const LogicVRegister& src2, bool max) { - int lanes = LaneCountFromFormat(vform); - int64_t result[kMaxLanesPerVector]; + unsigned lanes = LaneCountFromFormat(vform); + int64_t result[kZRegMaxSizeInBytes]; const LogicVRegister* src = &src1; - for (int j = 0; j < 2; j++) { - for (int i = 0; i < lanes; i += 2) { + for (unsigned j = 0; j < 2; j++) { + for (unsigned i = 0; i < lanes; i += 2) { int64_t first_val = src->Int(vform, i); int64_t second_val = src->Int(vform, i + 1); int64_t dst_val; @@ -1396,12 +1190,15 @@ LogicVRegister Simulator::sminmaxp(VectorFormat vform, } else { dst_val = (first_val < second_val) ? first_val : second_val; } - VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector); + VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result)); result[(i >> 1) + (j * lanes / 2)] = dst_val; } src = &src2; } dst.SetIntArray(vform, result); + if (IsSVEFormat(vform)) { + interleave_top_bottom(vform, dst, dst); + } return dst; } @@ -1586,11 +1383,11 @@ LogicVRegister Simulator::uminmaxp(VectorFormat vform, const LogicVRegister& src1, const LogicVRegister& src2, bool max) { - int lanes = LaneCountFromFormat(vform); - uint64_t result[kMaxLanesPerVector]; + unsigned lanes = LaneCountFromFormat(vform); + uint64_t result[kZRegMaxSizeInBytes]; const LogicVRegister* src = &src1; - for (int j = 0; j < 2; j++) { - for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { + for (unsigned j = 0; j < 2; j++) { + for (unsigned i = 0; i < lanes; i += 2) { uint64_t first_val = src->Uint(vform, i); uint64_t second_val = src->Uint(vform, i + 1); uint64_t dst_val; @@ -1599,12 +1396,15 @@ LogicVRegister Simulator::uminmaxp(VectorFormat vform, } else { dst_val = (first_val < second_val) ? first_val : second_val; } - VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector); + VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result)); result[(i >> 1) + (j * lanes / 2)] = dst_val; } src = &src2; } dst.SetUintArray(vform, result); + if (IsSVEFormat(vform)) { + interleave_top_bottom(vform, dst, dst); + } return dst; } @@ -1811,9 +1611,8 @@ LogicVRegister Simulator::splice(VectorFormat vform, result[i] = src2.Uint(vform, i - dst_idx); } - for (int i = 0; i < lane_count; i++) { - dst.SetUint(vform, i, result[i]); - } + dst.SetUintArray(vform, result); + return dst; } @@ -2060,14 +1859,33 @@ LogicVRegister Simulator::cnt(VectorFormat vform, return dst; } +static int64_t CalculateSignedShiftDistance(int64_t shift_val, + int esize, + bool shift_in_ls_byte) { + if (shift_in_ls_byte) { + // Neon uses the least-significant byte of the lane as the shift distance. + shift_val = ExtractSignedBitfield64(7, 0, shift_val); + } else { + // SVE uses a saturated shift distance in the range + // -(esize + 1) ... (esize + 1). + if (shift_val > (esize + 1)) shift_val = esize + 1; + if (shift_val < -(esize + 1)) shift_val = -(esize + 1); + } + return shift_val; +} LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, - const LogicVRegister& src2) { + const LogicVRegister& src2, + bool shift_in_ls_byte) { dst.ClearForWrite(vform); + int esize = LaneSizeInBitsFromFormat(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { - int8_t shift_val = src2.Int(vform, i); + int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i), + esize, + shift_in_ls_byte); + int64_t lj_src_val = src1.IntLeftJustified(vform, i); // Set signed saturation state. @@ -2124,10 +1942,15 @@ LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, - const LogicVRegister& src2) { + const LogicVRegister& src2, + bool shift_in_ls_byte) { dst.ClearForWrite(vform); + int esize = LaneSizeInBitsFromFormat(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { - int8_t shift_val = src2.Int(vform, i); + int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i), + esize, + shift_in_ls_byte); + uint64_t lj_src_val = src1.UintLeftJustified(vform, i); // Set saturation state. @@ -2159,6 +1982,27 @@ LogicVRegister Simulator::ushl(VectorFormat vform, return dst; } +LogicVRegister Simulator::sshr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + // Saturate to sidestep the min-int problem. + neg(vform, temp, src2).SignedSaturate(vform); + sshl(vform, dst, src1, temp, false); + return dst; +} + +LogicVRegister Simulator::ushr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + // Saturate to sidestep the min-int problem. + neg(vform, temp, src2).SignedSaturate(vform); + ushl(vform, dst, src1, temp, false); + return dst; +} LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst, @@ -2178,11 +2022,12 @@ LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& src) { + const LogicVRegister& src1, + const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { - int64_t sa = dst.IntLeftJustified(vform, i); - uint64_t ub = src.UintLeftJustified(vform, i); + int64_t sa = src1.IntLeftJustified(vform, i); + uint64_t ub = src2.UintLeftJustified(vform, i); uint64_t ur = sa + ub; int64_t sr; @@ -2190,7 +2035,7 @@ LogicVRegister Simulator::suqadd(VectorFormat vform, if (sr < sa) { // Test for signed positive saturation. dst.SetInt(vform, i, MaxIntFromFormat(vform)); } else { - dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i)); + dst.SetUint(vform, i, src1.Int(vform, i) + src2.Uint(vform, i)); } } return dst; @@ -2199,11 +2044,12 @@ LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& src) { + const LogicVRegister& src1, + const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { - uint64_t ua = dst.UintLeftJustified(vform, i); - int64_t sb = src.IntLeftJustified(vform, i); + uint64_t ua = src1.UintLeftJustified(vform, i); + int64_t sb = src2.IntLeftJustified(vform, i); uint64_t ur = ua + sb; if ((sb > 0) && (ur <= ua)) { @@ -2211,7 +2057,7 @@ LogicVRegister Simulator::usqadd(VectorFormat vform, } else if ((sb < 0) && (ur >= ua)) { dst.SetUint(vform, i, 0); // Negative saturation. } else { - dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i)); + dst.SetUint(vform, i, src1.Uint(vform, i) + src2.Int(vform, i)); } } return dst; @@ -2341,55 +2187,15 @@ LogicVRegister Simulator::extractnarrow(VectorFormat dstform, const LogicVRegister& src, bool src_is_signed) { bool upperhalf = false; - VectorFormat srcform = kFormatUndefined; - int64_t ssrc[8]; - uint64_t usrc[8]; - - switch (dstform) { - case kFormat8B: - upperhalf = false; - srcform = kFormat8H; - break; - case kFormat16B: - upperhalf = true; - srcform = kFormat8H; - break; - case kFormat4H: - upperhalf = false; - srcform = kFormat4S; - break; - case kFormat8H: - upperhalf = true; - srcform = kFormat4S; - break; - case kFormat2S: - upperhalf = false; - srcform = kFormat2D; - break; - case kFormat4S: - upperhalf = true; - srcform = kFormat2D; - break; - case kFormatB: - upperhalf = false; - srcform = kFormatH; - break; - case kFormatH: - upperhalf = false; - srcform = kFormatS; - break; - case kFormatS: - upperhalf = false; - srcform = kFormatD; - break; - default: - VIXL_UNIMPLEMENTED(); + VectorFormat srcform = dstform; + if ((dstform == kFormat16B) || (dstform == kFormat8H) || + (dstform == kFormat4S)) { + upperhalf = true; + srcform = VectorFormatHalfLanes(srcform); } + srcform = VectorFormatDoubleWidth(srcform); - for (int i = 0; i < LaneCountFromFormat(srcform); i++) { - ssrc[i] = src.Int(srcform, i); - usrc[i] = src.Uint(srcform, i); - } + LogicVRegister src_copy = src; int offset; if (upperhalf) { @@ -2400,31 +2206,34 @@ LogicVRegister Simulator::extractnarrow(VectorFormat dstform, } for (int i = 0; i < LaneCountFromFormat(srcform); i++) { + int64_t ssrc = src_copy.Int(srcform, i); + uint64_t usrc = src_copy.Uint(srcform, i); + // Test for signed saturation - if (ssrc[i] > MaxIntFromFormat(dstform)) { + if (ssrc > MaxIntFromFormat(dstform)) { dst.SetSignedSat(offset + i, true); - } else if (ssrc[i] < MinIntFromFormat(dstform)) { + } else if (ssrc < MinIntFromFormat(dstform)) { dst.SetSignedSat(offset + i, false); } // Test for unsigned saturation if (src_is_signed) { - if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { + if (ssrc > static_cast<int64_t>(MaxUintFromFormat(dstform))) { dst.SetUnsignedSat(offset + i, true); - } else if (ssrc[i] < 0) { + } else if (ssrc < 0) { dst.SetUnsignedSat(offset + i, false); } } else { - if (usrc[i] > MaxUintFromFormat(dstform)) { + if (usrc > MaxUintFromFormat(dstform)) { dst.SetUnsignedSat(offset + i, true); } } int64_t result; if (src_is_signed) { - result = ssrc[i] & MaxUintFromFormat(dstform); + result = ssrc & MaxUintFromFormat(dstform); } else { - result = usrc[i] & MaxUintFromFormat(dstform); + result = usrc & MaxUintFromFormat(dstform); } if (dst_is_signed) { @@ -2565,7 +2374,7 @@ LogicVRegister Simulator::rev_byte(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int rev_size) { - uint64_t result[kZRegMaxSizeInBytes]; + uint64_t result[kZRegMaxSizeInBytes] = {}; int lane_count = LaneCountFromFormat(vform); int lane_size = LaneSizeInBytesFromFormat(vform); int lanes_per_loop = rev_size / lane_size; @@ -2602,17 +2411,15 @@ LogicVRegister Simulator::rev64(VectorFormat vform, return rev_byte(vform, dst, src, 8); } - LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, bool is_signed, bool do_accumulate) { VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); - VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= 32); - VIXL_ASSERT(LaneCountFromFormat(vform) <= 8); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= kSRegSize); - uint64_t result[8]; + uint64_t result[kZRegMaxSizeInBytes]; int lane_count = LaneCountFromFormat(vform); for (int i = 0; i < lane_count; i++) { if (is_signed) { @@ -2662,13 +2469,24 @@ LogicVRegister Simulator::uadalp(VectorFormat vform, return addlp(vform, dst, src, false, true); } +LogicVRegister Simulator::ror(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int rotation) { + int width = LaneSizeInBitsFromFormat(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t value = src.Uint(vform, i); + dst.SetUint(vform, i, RotateRight(value, rotation, width)); + } + return dst; +} LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { - uint8_t result[kZRegMaxSizeInBytes]; + uint8_t result[kZRegMaxSizeInBytes] = {}; int lane_count = LaneCountFromFormat(vform); for (int i = 0; i < lane_count - index; ++i) { result[i] = src1.Uint(vform, i + index); @@ -2683,6 +2501,17 @@ LogicVRegister Simulator::ext(VectorFormat vform, return dst; } +LogicVRegister Simulator::rotate_elements_right(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int index) { + if (index < 0) index += LaneCountFromFormat(vform); + VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform))); + index *= LaneSizeInBytesFromFormat(vform); + return ext(kFormatVnB, dst, src, src, index); +} + + template <typename T> LogicVRegister Simulator::fadda(VectorFormat vform, LogicVRegister acc, @@ -2869,21 +2698,198 @@ LogicVRegister Simulator::fcmla(VectorFormat vform, return dst; } +LogicVRegister Simulator::cadd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot, + bool saturate) { + SimVRegister src1_r, src1_i; + SimVRegister src2_r, src2_i; + SimVRegister zero; + zero.Clear(); + uzp1(vform, src1_r, src1, zero); + uzp2(vform, src1_i, src1, zero); + uzp1(vform, src2_r, src2, zero); + uzp2(vform, src2_i, src2, zero); + + if (rot == 90) { + if (saturate) { + sub(vform, src1_r, src1_r, src2_i).SignedSaturate(vform); + add(vform, src1_i, src1_i, src2_r).SignedSaturate(vform); + } else { + sub(vform, src1_r, src1_r, src2_i); + add(vform, src1_i, src1_i, src2_r); + } + } else { + VIXL_ASSERT(rot == 270); + if (saturate) { + add(vform, src1_r, src1_r, src2_i).SignedSaturate(vform); + sub(vform, src1_i, src1_i, src2_r).SignedSaturate(vform); + } else { + add(vform, src1_r, src1_r, src2_i); + sub(vform, src1_i, src1_i, src2_r); + } + } + + zip1(vform, dst, src1_r, src1_i); + return dst; +} + +LogicVRegister Simulator::cmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot) { + SimVRegister src1_a; + SimVRegister src2_a, src2_b; + SimVRegister srca_i, srca_r; + SimVRegister zero, temp; + zero.Clear(); + + if ((rot == 0) || (rot == 180)) { + uzp1(vform, src1_a, src1, zero); + uzp1(vform, src2_a, src2, zero); + uzp2(vform, src2_b, src2, zero); + } else { + uzp2(vform, src1_a, src1, zero); + uzp2(vform, src2_a, src2, zero); + uzp1(vform, src2_b, src2, zero); + } + + uzp1(vform, srca_r, srca, zero); + uzp2(vform, srca_i, srca, zero); + + bool sub_r = (rot == 90) || (rot == 180); + bool sub_i = (rot == 180) || (rot == 270); + + mul(vform, temp, src1_a, src2_a); + if (sub_r) { + sub(vform, srca_r, srca_r, temp); + } else { + add(vform, srca_r, srca_r, temp); + } + + mul(vform, temp, src1_a, src2_b); + if (sub_i) { + sub(vform, srca_i, srca_i, temp); + } else { + add(vform, srca_i, srca_i, temp); + } + + zip1(vform, dst, srca_r, srca_i); + return dst; +} + +LogicVRegister Simulator::cmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index, + int rot) { + SimVRegister temp; + dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index); + return cmla(vform, dst, srca, src1, temp, rot); +} + +LogicVRegister Simulator::bgrp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool do_bext) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t value = src1.Uint(vform, i); + uint64_t mask = src2.Uint(vform, i); + int high_pos = 0; + int low_pos = 0; + uint64_t result_high = 0; + uint64_t result_low = 0; + for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) { + if ((mask & 1) == 0) { + result_high |= (value & 1) << high_pos; + high_pos++; + } else { + result_low |= (value & 1) << low_pos; + low_pos++; + } + mask >>= 1; + value >>= 1; + } + + if (!do_bext) { + result_low |= result_high << low_pos; + } + + dst.SetUint(vform, i, result_low); + } + return dst; +} + +LogicVRegister Simulator::bdep(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t value = src1.Uint(vform, i); + uint64_t mask = src2.Uint(vform, i); + uint64_t result = 0; + for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) { + if ((mask & 1) == 1) { + result |= (value & 1) << j; + value >>= 1; + } + mask >>= 1; + } + dst.SetUint(vform, i, result); + } + return dst; +} + +LogicVRegister Simulator::histogram(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool do_segmented) { + int elements_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform); + uint64_t result[kZRegMaxSizeInBytes]; + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t count = 0; + uint64_t value = src1.Uint(vform, i); + + int segment = do_segmented ? (i / elements_per_segment) : 0; + int segment_offset = segment * elements_per_segment; + int hist_limit = do_segmented ? elements_per_segment : (i + 1); + for (int j = 0; j < hist_limit; j++) { + if (pg.IsActive(vform, j) && + (value == src2.Uint(vform, j + segment_offset))) { + count++; + } + } + result[i] = count; + } + dst.SetUintArray(vform, result); + return dst; +} LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int src_index) { - if (vform == kFormatVnQ) { - // When duplicating a 128-bit value, split it into two 64-bit parts, and - // then copy the two to their slots on destination register. - uint64_t low = src.Uint(kFormatVnD, src_index * 2); - uint64_t high = src.Uint(kFormatVnD, (src_index * 2) + 1); - dst.ClearForWrite(vform); - for (int d_lane = 0; d_lane < LaneCountFromFormat(kFormatVnD); - d_lane += 2) { - dst.SetUint(kFormatVnD, d_lane, low); - dst.SetUint(kFormatVnD, d_lane + 1, high); + if ((vform == kFormatVnQ) || (vform == kFormatVnO)) { + // When duplicating an element larger than 64 bits, split the element into + // 64-bit parts, and duplicate the parts across the destination. + uint64_t d[4]; + int count = (vform == kFormatVnQ) ? 2 : 4; + for (int i = 0; i < count; i++) { + d[i] = src.Uint(kFormatVnD, (src_index * count) + i); + } + dst.Clear(); + for (int i = 0; i < LaneCountFromFormat(vform) * count; i++) { + dst.SetUint(kFormatVnD, i, d[i % count]); } } else { int lane_count = LaneCountFromFormat(vform); @@ -2920,6 +2926,16 @@ LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform, return dst; } +LogicVRegister Simulator::dup_elements_to_segments( + VectorFormat vform, + LogicVRegister dst, + const std::pair<int, int>& src_and_index) { + return dup_elements_to_segments(vform, + dst, + ReadVRegister(src_and_index.first), + src_and_index.second); +} + LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst, uint64_t imm) { @@ -3008,7 +3024,6 @@ LogicVRegister Simulator::mov_merging(VectorFormat vform, return sel(vform, dst, pg, src, dst); } - LogicVRegister Simulator::mov_zeroing(VectorFormat vform, LogicVRegister dst, const SimPRegister& pg, @@ -3018,6 +3033,16 @@ LogicVRegister Simulator::mov_zeroing(VectorFormat vform, return sel(vform, dst, pg, src, zero); } +LogicVRegister Simulator::mov_alternating(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int start_at) { + VIXL_ASSERT((start_at == 0) || (start_at == 1)); + for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) { + dst.SetUint(vform, i, src.Uint(vform, i)); + } + return dst; +} LogicPRegister Simulator::mov_merging(LogicPRegister dst, const LogicPRegister& pg, @@ -3025,7 +3050,6 @@ LogicPRegister Simulator::mov_merging(LogicPRegister dst, return sel(dst, pg, src, dst); } - LogicPRegister Simulator::mov_zeroing(LogicPRegister dst, const LogicPRegister& pg, const LogicPRegister& src) { @@ -3033,7 +3057,6 @@ LogicPRegister Simulator::mov_zeroing(LogicPRegister dst, return sel(dst, pg, src, pfalse(all_false)); } - LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst, uint64_t imm) { @@ -3077,12 +3100,15 @@ LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& src) { + const LogicVRegister& src, + bool is_2) { VectorFormat vform_half = VectorFormatHalfWidth(vform); + int lane_count = LaneCountFromFormat(vform); + int src_offset = is_2 ? lane_count : 0; dst.ClearForWrite(vform); - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - dst.SetUint(vform, i, src.Uint(vform_half, i)); + for (int i = 0; i < lane_count; i++) { + dst.SetUint(vform, i, src.Uint(vform_half, src_offset + i)); } return dst; } @@ -3090,12 +3116,15 @@ LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& src) { + const LogicVRegister& src, + bool is_2) { VectorFormat vform_half = VectorFormatHalfWidth(vform); + int lane_count = LaneCountFromFormat(vform); + int src_offset = is_2 ? lane_count : 0; dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { - dst.SetInt(vform, i, src.Int(vform_half, i)); + dst.SetInt(vform, i, src.Int(vform_half, src_offset + i)); } return dst; } @@ -3104,28 +3133,14 @@ LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - VectorFormat vform_half = VectorFormatHalfWidth(vform); - int lane_count = LaneCountFromFormat(vform); - - dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; i++) { - dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i)); - } - return dst; + return uxtl(vform, dst, src, /* is_2 = */ true); } LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - VectorFormat vform_half = VectorFormatHalfWidth(vform); - int lane_count = LaneCountFromFormat(vform); - - dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; i++) { - dst.SetInt(vform, i, src.Int(vform_half, lane_count + i)); - } - return dst; + return sxtl(vform, dst, src, /* is_2 = */ true); } @@ -3209,23 +3224,6 @@ LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& tab, - const LogicVRegister& ind) { - VIXL_ASSERT(IsSVEFormat(vform)); - int lane_count = LaneCountFromFormat(vform); - for (int i = 0; i < lane_count; i++) { - uint64_t index = ind.Uint(vform, i); - uint64_t value = (index >= static_cast<uint64_t>(lane_count)) - ? 0 - : tab.Uint(vform, static_cast<int>(index)); - dst.SetUint(vform, i, value); - } - return dst; -} - - -LogicVRegister Simulator::Table(VectorFormat vform, - LogicVRegister dst, const LogicVRegister& ind, bool zero_out_of_bounds, const LogicVRegister* tab1, @@ -3233,24 +3231,30 @@ LogicVRegister Simulator::Table(VectorFormat vform, const LogicVRegister* tab3, const LogicVRegister* tab4) { VIXL_ASSERT(tab1 != NULL); - const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4}; - uint64_t result[kMaxLanesPerVector]; - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i); - } - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - uint64_t j = ind.Uint(vform, i); - int tab_idx = static_cast<int>(j >> 4); - int j_idx = static_cast<int>(j & 15); - if ((tab_idx < 4) && (tab[tab_idx] != NULL)) { - result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx); - } + int lane_count = LaneCountFromFormat(vform); + VIXL_ASSERT((tab3 == NULL) || (lane_count <= 16)); + uint64_t table[kZRegMaxSizeInBytes * 2]; + uint64_t result[kZRegMaxSizeInBytes]; + + // For Neon, the table source registers are always 16B, and Neon allows only + // 8B or 16B vform for the destination, so infer the table format from the + // destination. + VectorFormat vform_tab = (vform == kFormat8B) ? kFormat16B : vform; + + uint64_t tab_size = tab1->UintArray(vform_tab, &table[0]); + if (tab2 != NULL) tab_size += tab2->UintArray(vform_tab, &table[tab_size]); + if (tab3 != NULL) tab_size += tab3->UintArray(vform_tab, &table[tab_size]); + if (tab4 != NULL) tab_size += tab4->UintArray(vform_tab, &table[tab_size]); + + for (int i = 0; i < lane_count; i++) { + uint64_t index = ind.Uint(vform, i); + result[i] = zero_out_of_bounds ? 0 : dst.Uint(vform, i); + if (index < tab_size) result[i] = table[index]; } dst.SetUintArray(vform, result); return dst; } - LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& tab, @@ -3738,10 +3742,11 @@ LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, - const LogicVRegister& src2) { + const LogicVRegister& src2, + bool is_2) { SimVRegister temp1, temp2; - uxtl(vform, temp1, src1); - uxtl(vform, temp2, src2); + uxtl(vform, temp1, src1, is_2); + uxtl(vform, temp2, src2, is_2); mul(vform, dst, temp1, temp2); return dst; } @@ -3751,21 +3756,18 @@ LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - SimVRegister temp1, temp2; - uxtl2(vform, temp1, src1); - uxtl2(vform, temp2, src2); - mul(vform, dst, temp1, temp2); - return dst; + return umull(vform, dst, src1, src2, /* is_2 = */ true); } LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, - const LogicVRegister& src2) { + const LogicVRegister& src2, + bool is_2) { SimVRegister temp1, temp2; - sxtl(vform, temp1, src1); - sxtl(vform, temp2, src2); + sxtl(vform, temp1, src1, is_2); + sxtl(vform, temp2, src2, is_2); mul(vform, dst, temp1, temp2); return dst; } @@ -3775,21 +3777,18 @@ LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - SimVRegister temp1, temp2; - sxtl2(vform, temp1, src1); - sxtl2(vform, temp2, src2); - mul(vform, dst, temp1, temp2); - return dst; + return smull(vform, dst, src1, src2, /* is_2 = */ true); } LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, - const LogicVRegister& src2) { + const LogicVRegister& src2, + bool is_2) { SimVRegister temp1, temp2; - uxtl(vform, temp1, src1); - uxtl(vform, temp2, src2); + uxtl(vform, temp1, src1, is_2); + uxtl(vform, temp2, src2, is_2); mls(vform, dst, dst, temp1, temp2); return dst; } @@ -3799,21 +3798,18 @@ LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - SimVRegister temp1, temp2; - uxtl2(vform, temp1, src1); - uxtl2(vform, temp2, src2); - mls(vform, dst, dst, temp1, temp2); - return dst; + return umlsl(vform, dst, src1, src2, /* is_2 = */ true); } LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, - const LogicVRegister& src2) { + const LogicVRegister& src2, + bool is_2) { SimVRegister temp1, temp2; - sxtl(vform, temp1, src1); - sxtl(vform, temp2, src2); + sxtl(vform, temp1, src1, is_2); + sxtl(vform, temp2, src2, is_2); mls(vform, dst, dst, temp1, temp2); return dst; } @@ -3823,21 +3819,18 @@ LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - SimVRegister temp1, temp2; - sxtl2(vform, temp1, src1); - sxtl2(vform, temp2, src2); - mls(vform, dst, dst, temp1, temp2); - return dst; + return smlsl(vform, dst, src1, src2, /* is_2 = */ true); } LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, - const LogicVRegister& src2) { + const LogicVRegister& src2, + bool is_2) { SimVRegister temp1, temp2; - uxtl(vform, temp1, src1); - uxtl(vform, temp2, src2); + uxtl(vform, temp1, src1, is_2); + uxtl(vform, temp2, src2, is_2); mla(vform, dst, dst, temp1, temp2); return dst; } @@ -3847,21 +3840,18 @@ LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - SimVRegister temp1, temp2; - uxtl2(vform, temp1, src1); - uxtl2(vform, temp2, src2); - mla(vform, dst, dst, temp1, temp2); - return dst; + return umlal(vform, dst, src1, src2, /* is_2 = */ true); } LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, - const LogicVRegister& src2) { + const LogicVRegister& src2, + bool is_2) { SimVRegister temp1, temp2; - sxtl(vform, temp1, src1); - sxtl(vform, temp2, src2); + sxtl(vform, temp1, src1, is_2); + sxtl(vform, temp2, src2, is_2); mla(vform, dst, dst, temp1, temp2); return dst; } @@ -3871,20 +3861,17 @@ LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - SimVRegister temp1, temp2; - sxtl2(vform, temp1, src1); - sxtl2(vform, temp2, src2); - mla(vform, dst, dst, temp1, temp2); - return dst; + return smlal(vform, dst, src1, src2, /* is_2 = */ true); } LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, - const LogicVRegister& src2) { + const LogicVRegister& src2, + bool is_2) { SimVRegister temp; - LogicVRegister product = sqdmull(vform, temp, src1, src2); + LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2); return add(vform, dst, dst, product).SignedSaturate(vform); } @@ -3893,18 +3880,17 @@ LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - SimVRegister temp; - LogicVRegister product = sqdmull2(vform, temp, src1, src2); - return add(vform, dst, dst, product).SignedSaturate(vform); + return sqdmlal(vform, dst, src1, src2, /* is_2 = */ true); } LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, - const LogicVRegister& src2) { + const LogicVRegister& src2, + bool is_2) { SimVRegister temp; - LogicVRegister product = sqdmull(vform, temp, src1, src2); + LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2); return sub(vform, dst, dst, product).SignedSaturate(vform); } @@ -3913,18 +3899,17 @@ LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - SimVRegister temp; - LogicVRegister product = sqdmull2(vform, temp, src1, src2); - return sub(vform, dst, dst, product).SignedSaturate(vform); + return sqdmlsl(vform, dst, src1, src2, /* is_2 = */ true); } LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, - const LogicVRegister& src2) { + const LogicVRegister& src2, + bool is_2) { SimVRegister temp; - LogicVRegister product = smull(vform, temp, src1, src2); + LogicVRegister product = smull(vform, temp, src1, src2, is_2); return add(vform, dst, product, product).SignedSaturate(vform); } @@ -3933,38 +3918,49 @@ LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - SimVRegister temp; - LogicVRegister product = smull2(vform, temp, src1, src2); - return add(vform, dst, product, product).SignedSaturate(vform); + return sqdmull(vform, dst, src1, src2, /* is_2 = */ true); } - LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, bool round) { - // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. - // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1) - // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize. - int esize = LaneSizeInBitsFromFormat(vform); - int round_const = round ? (1 << (esize - 2)) : 0; - int64_t product; + SimVRegister temp_lo, temp_hi; + + // Compute low and high multiplication results. + mul(vform, temp_lo, src1, src2); + smulh(vform, temp_hi, src1, src2); + + // Double by shifting high half, and adding in most-significant bit of low + // half. + shl(vform, temp_hi, temp_hi, 1); + usra(vform, temp_hi, temp_lo, esize - 1); + + if (round) { + // Add the second (due to doubling) most-significant bit of the low half + // into the result. + shl(vform, temp_lo, temp_lo, 1); + usra(vform, temp_hi, temp_lo, esize - 1); + } + + SimPRegister not_sat; + LogicPRegister ptemp(not_sat); dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { - product = src1.Int(vform, i) * src2.Int(vform, i); - product += round_const; - product = product >> (esize - 1); - - if (product > MaxIntFromFormat(vform)) { - product = MaxIntFromFormat(vform); - } else if (product < MinIntFromFormat(vform)) { - product = MinIntFromFormat(vform); + // Saturation only occurs when src1 = src2 = minimum representable value. + // Check this as a special case. + ptemp.SetActive(vform, i, true); + if ((src1.Int(vform, i) == MinIntFromFormat(vform)) && + (src2.Int(vform, i) == MinIntFromFormat(vform))) { + ptemp.SetActive(vform, i, false); } - dst.SetInt(vform, i, product); + dst.SetInt(vform, i, MaxIntFromFormat(vform)); } + + mov_merging(vform, dst, not_sat, temp_hi); return dst; } @@ -3973,7 +3969,8 @@ LogicVRegister Simulator::dot(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, - bool is_signed) { + bool is_src1_signed, + bool is_src2_signed) { VectorFormat quarter_vform = VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform)); @@ -3983,11 +3980,14 @@ LogicVRegister Simulator::dot(VectorFormat vform, int64_t element1, element2; for (int i = 0; i < 4; i++) { int index = 4 * e + i; - if (is_signed) { + if (is_src1_signed) { element1 = src1.Int(quarter_vform, index); - element2 = src2.Int(quarter_vform, index); } else { element1 = src1.Uint(quarter_vform, index); + } + if (is_src2_signed) { + element2 = src2.Int(quarter_vform, index); + } else { element2 = src2.Uint(quarter_vform, index); } result += element1 * element2; @@ -4002,7 +4002,7 @@ LogicVRegister Simulator::sdot(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - return dot(vform, dst, src1, src2, true); + return dot(vform, dst, src1, src2, true, true); } @@ -4010,9 +4010,148 @@ LogicVRegister Simulator::udot(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - return dot(vform, dst, src1, src2, false); + return dot(vform, dst, src1, src2, false, false); } +LogicVRegister Simulator::usdot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return dot(vform, dst, src1, src2, false, true); +} + +LogicVRegister Simulator::cdot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& acc, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot) { + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + VectorFormat quarter_vform = + VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform)); + + int sel_a = ((rot == 0) || (rot == 180)) ? 0 : 1; + int sel_b = 1 - sel_a; + int sub_i = ((rot == 90) || (rot == 180)) ? 1 : -1; + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t result = acc.Int(vform, i); + for (int j = 0; j < 2; j++) { + int64_t r1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 0); + int64_t i1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 1); + int64_t r2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_a); + int64_t i2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_b); + result += (r1 * r2) + (sub_i * i1 * i2); + } + dst.SetInt(vform, i, result); + } + return dst; +} + +LogicVRegister Simulator::sqrdcmlah(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot) { + SimVRegister src1_a, src1_b; + SimVRegister src2_a, src2_b; + SimVRegister srca_i, srca_r; + SimVRegister zero, temp; + zero.Clear(); + + if ((rot == 0) || (rot == 180)) { + uzp1(vform, src1_a, src1, zero); + uzp1(vform, src2_a, src2, zero); + uzp2(vform, src2_b, src2, zero); + } else { + uzp2(vform, src1_a, src1, zero); + uzp2(vform, src2_a, src2, zero); + uzp1(vform, src2_b, src2, zero); + } + + uzp1(vform, srca_r, srca, zero); + uzp2(vform, srca_i, srca, zero); + + bool sub_r = (rot == 90) || (rot == 180); + bool sub_i = (rot == 180) || (rot == 270); + + const bool round = true; + sqrdmlash(vform, srca_r, src1_a, src2_a, round, sub_r); + sqrdmlash(vform, srca_i, src1_a, src2_b, round, sub_i); + zip1(vform, dst, srca_r, srca_i); + return dst; +} + +LogicVRegister Simulator::sqrdcmlah(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index, + int rot) { + SimVRegister temp; + dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index); + return sqrdcmlah(vform, dst, srca, src1, temp, rot); +} + +LogicVRegister Simulator::sqrdmlash_d(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool round, + bool sub_op) { + // 2 * INT_64_MIN * INT_64_MIN causes INT_128 to overflow. + // To avoid this, we use: + // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1) + // which is same as: + // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize. + + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + int esize = kDRegSize; + vixl_uint128_t round_const, accum; + round_const.first = 0; + if (round) { + round_const.second = UINT64_C(1) << (esize - 2); + } else { + round_const.second = 0; + } + + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Shift the whole value left by `esize - 1` bits. + accum.first = dst.Int(vform, i) >> 1; + accum.second = dst.Int(vform, i) << (esize - 1); + + vixl_uint128_t product = Mul64(src1.Int(vform, i), src2.Int(vform, i)); + + if (sub_op) { + product = Neg128(product); + } + accum = Add128(accum, product); + + // Perform rounding. + accum = Add128(accum, round_const); + + // Arithmetic shift the whole value right by `esize - 1` bits. + accum.second = (accum.first << 1) | (accum.second >> (esize - 1)); + accum.first = -(accum.first >> (esize - 1)); + + // Perform saturation. + bool is_pos = (accum.first == 0) ? true : false; + if (is_pos && + (accum.second > static_cast<uint64_t>(MaxIntFromFormat(vform)))) { + accum.second = MaxIntFromFormat(vform); + } else if (!is_pos && (accum.second < + static_cast<uint64_t>(MinIntFromFormat(vform)))) { + accum.second = MinIntFromFormat(vform); + } + + dst.SetInt(vform, i, accum.second); + } + + return dst; +} LogicVRegister Simulator::sqrdmlash(VectorFormat vform, LogicVRegister dst, @@ -4026,6 +4165,10 @@ LogicVRegister Simulator::sqrdmlash(VectorFormat vform, // which is same as: // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize. + if (vform == kFormatVnD) { + return sqrdmlash_d(vform, dst, src1, src2, round, sub_op); + } + int esize = LaneSizeInBitsFromFormat(vform); int round_const = round ? (1 << (esize - 2)) : 0; int64_t accum; @@ -4170,7 +4313,7 @@ LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - uint64_t result[kZRegMaxSizeInBytes]; + uint64_t result[kZRegMaxSizeInBytes] = {}; int lane_count = LaneCountFromFormat(vform); int pairs = lane_count / 2; for (int i = 0; i < pairs; ++i) { @@ -4190,7 +4333,7 @@ LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - uint64_t result[kZRegMaxSizeInBytes]; + uint64_t result[kZRegMaxSizeInBytes] = {}; int lane_count = LaneCountFromFormat(vform); int pairs = lane_count / 2; for (int i = 0; i < pairs; ++i) { @@ -4210,7 +4353,7 @@ LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - uint64_t result[kZRegMaxSizeInBytes]; + uint64_t result[kZRegMaxSizeInBytes] = {}; int lane_count = LaneCountFromFormat(vform); int pairs = lane_count / 2; for (int i = 0; i < pairs; ++i) { @@ -4230,7 +4373,7 @@ LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - uint64_t result[kZRegMaxSizeInBytes]; + uint64_t result[kZRegMaxSizeInBytes] = {}; int lane_count = LaneCountFromFormat(vform); int pairs = lane_count / 2; for (int i = 0; i < pairs; ++i) { @@ -4283,6 +4426,27 @@ LogicVRegister Simulator::uzp2(VectorFormat vform, return dst; } +LogicVRegister Simulator::interleave_top_bottom(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + // Interleave the top and bottom half of a vector, ie. for a vector: + // + // [ ... | F | D | B | ... | E | C | A ] + // + // where B is the first element in the top half of the vector, produce a + // result vector: + // + // [ ... | ... | F | E | D | C | B | A ] + + uint64_t result[kZRegMaxSizeInBytes] = {}; + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count; i += 2) { + result[i] = src.Uint(vform, i / 2); + result[i + 1] = src.Uint(vform, (lane_count / 2) + (i / 2)); + } + dst.SetUintArray(vform, result); + return dst; +} template <typename T> T Simulator::FPNeg(T op) { @@ -4322,6 +4486,11 @@ T Simulator::FPSub(T op1, T op2) { } } +template <typename T> +T Simulator::FPMulNaNs(T op1, T op2) { + T result = FPProcessNaNs(op1, op2); + return IsNaN(result) ? result : FPMul(op1, op2); +} template <typename T> T Simulator::FPMul(T op1, T op2) { @@ -4755,7 +4924,9 @@ int32_t Simulator::FPToInt32(double value, FPRounding rmode) { int64_t Simulator::FPToInt64(double value, FPRounding rmode) { value = FPRoundInt(value, rmode); - if (value >= kXMaxInt) { + // This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues + // as a result of kMaxInt not being representable as a double. + if (value >= 9223372036854775808.) { return kXMaxInt; } else if (value < kXMinInt) { return kXMinInt; @@ -4788,7 +4959,9 @@ uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { value = FPRoundInt(value, rmode); - if (value >= kXMaxUInt) { + // This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues + // as a result of kMaxUInt not being representable as a double. + if (value >= 18446744073709551616.) { return kXMaxUInt; } else if (value < 0.0) { return 0; @@ -5346,6 +5519,9 @@ LogicVRegister Simulator::fsqrt(VectorFormat vform, uzp1(vform, temp1, src1, src2); \ uzp2(vform, temp2, src1, src2); \ FN(vform, dst, temp1, temp2); \ + if (IsSVEFormat(vform)) { \ + interleave_top_bottom(vform, dst, dst); \ + } \ return dst; \ } \ \ @@ -5628,14 +5804,15 @@ LogicVRegister Simulator::frint(VectorFormat vform, return dst; } -LogicVRegister Simulator::fcvt(VectorFormat vform, - unsigned dst_data_size_in_bits, - unsigned src_data_size_in_bits, +LogicVRegister Simulator::fcvt(VectorFormat dst_vform, + VectorFormat src_vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src) { - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); + unsigned dst_data_size_in_bits = LaneSizeInBitsFromFormat(dst_vform); + unsigned src_data_size_in_bits = LaneSizeInBitsFromFormat(src_vform); + VectorFormat vform = SVEFormatFromLaneSizeInBits( + std::max(dst_data_size_in_bits, src_data_size_in_bits)); for (int i = 0; i < LaneCountFromFormat(vform); i++) { if (!pg.IsActive(vform, i)) continue; @@ -5810,16 +5987,20 @@ LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { + SimVRegister tmp; + LogicVRegister srctmp = mov(kFormat2D, tmp, src); + dst.ClearForWrite(vform); if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetFloat(i, - Float16ToRawbits( - FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN()))); + Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i), + FPTieEven, + ReadDN()))); } } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); for (int i = 0; i < LaneCountFromFormat(vform); i++) { - dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven, ReadDN())); + dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN())); } } return dst; @@ -5850,10 +6031,19 @@ LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { + SimVRegister tmp; + LogicVRegister srctmp = mov(kFormat2D, tmp, src); + int input_lane_count = LaneCountFromFormat(vform); + if (IsSVEFormat(vform)) { + mov(kFormatVnB, tmp, src); + input_lane_count /= 2; + } + dst.ClearForWrite(vform); VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN())); + + for (int i = 0; i < input_lane_count; i++) { + dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN())); } return dst; } @@ -5874,14 +6064,14 @@ LogicVRegister Simulator::fcvtxn2(VectorFormat vform, // Based on reference C function recip_sqrt_estimate from ARM ARM. double Simulator::recip_sqrt_estimate(double a) { - int q0, q1, s; + int quot0, quot1, s; double r; if (a < 0.5) { - q0 = static_cast<int>(a * 512.0); - r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); + quot0 = static_cast<int>(a * 512.0); + r = 1.0 / sqrt((static_cast<double>(quot0) + 0.5) / 512.0); } else { - q1 = static_cast<int>(a * 256.0); - r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); + quot1 = static_cast<int>(a * 256.0); + r = 1.0 / sqrt((static_cast<double>(quot1) + 0.5) / 256.0); } s = static_cast<int>(256.0 * r + 0.5); return static_cast<double>(s) / 256.0; @@ -6049,7 +6239,6 @@ T Simulator::FPRecipEstimate(T op, FPRounding rounding) { } else { uint64_t fraction; int exp, result_exp; - uint32_t sign; if (IsFloat16<T>()) { sign = Float16Sign(op); @@ -6285,6 +6474,52 @@ LogicVRegister Simulator::frecpx(VectorFormat vform, return dst; } +LogicVRegister Simulator::flogb(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + double op = 0.0; + switch (vform) { + case kFormatVnH: + op = FPToDouble(src.Float<SimFloat16>(i), kIgnoreDefaultNaN); + break; + case kFormatVnS: + op = src.Float<float>(i); + break; + case kFormatVnD: + op = src.Float<double>(i); + break; + default: + VIXL_UNREACHABLE(); + } + + switch (std::fpclassify(op)) { + case FP_INFINITE: + dst.SetInt(vform, i, MaxIntFromFormat(vform)); + break; + case FP_NAN: + case FP_ZERO: + dst.SetInt(vform, i, MinIntFromFormat(vform)); + break; + case FP_SUBNORMAL: { + // DoubleMantissa returns the mantissa of its input, leaving 12 zero + // bits where the sign and exponent would be. We subtract 12 to + // find the number of leading zero bits in the mantissa itself. + int64_t mant_zero_count = CountLeadingZeros(DoubleMantissa(op)) - 12; + // Log2 of a subnormal is the lowest exponent a normal number can + // represent, together with the zeros in the mantissa. + dst.SetInt(vform, i, -1023 - mant_zero_count); + break; + } + case FP_NORMAL: + // Log2 of a normal number is the exponent minus the bias. + dst.SetInt(vform, i, static_cast<int64_t>(DoubleExp(op)) - 1023); + break; + } + } + return dst; +} + LogicVRegister Simulator::ftsmul(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -6543,8 +6778,8 @@ LogicVRegister Simulator::fscale(VectorFormat vform, const LogicVRegister& src2) { T two = T(2.0); for (int i = 0; i < LaneCountFromFormat(vform); i++) { - T s1 = src1.Float<T>(i); - if (!IsNaN(s1)) { + T src1_val = src1.Float<T>(i); + if (!IsNaN(src1_val)) { int64_t scale = src2.Int(vform, i); // TODO: this is a low-performance implementation, but it's simple and // less likely to be buggy. Consider replacing it with something faster. @@ -6553,19 +6788,19 @@ LogicVRegister Simulator::fscale(VectorFormat vform, // point iterating further. scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048); - // Compute s1 * 2 ^ scale. If scale is positive, multiply by two and + // Compute src1_val * 2 ^ scale. If scale is positive, multiply by two and // decrement scale until it's zero. while (scale-- > 0) { - s1 = FPMul(s1, two); + src1_val = FPMul(src1_val, two); } // If scale is negative, divide by two and increment scale until it's // zero. Initially, scale is (src2 - 1), so we pre-increment. while (++scale < 0) { - s1 = FPDiv(s1, two); + src1_val = FPDiv(src1_val, two); } } - dst.SetFloat<T>(i, s1); + dst.SetFloat<T>(i, src1_val); } return dst; } @@ -6595,6 +6830,7 @@ LogicVRegister Simulator::scvtf(VectorFormat vform, int fbits) { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); + dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { if (!pg.IsActive(vform, i)) continue; @@ -6653,6 +6889,7 @@ LogicVRegister Simulator::ucvtf(VectorFormat vform, int fbits) { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); + dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { if (!pg.IsActive(vform, i)) continue; @@ -6883,7 +7120,7 @@ LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper( for (int i = 0; i < LaneCountFromFormat(vform); i++) { uint64_t op1 = zn.Uint(vform, i); uint64_t op2 = zm.Uint(vform, i); - uint64_t result; + uint64_t result = 0; switch (logical_op) { case AND: result = op1 & op2; @@ -6898,7 +7135,6 @@ LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper( result = op1 | op2; break; default: - result = 0; VIXL_UNIMPLEMENTED(); } zd.SetUint(vform, i, result); @@ -6914,7 +7150,7 @@ LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op, for (int i = 0; i < pn.GetChunkCount(); i++) { LogicPRegister::ChunkType op1 = pn.GetChunk(i); LogicPRegister::ChunkType op2 = pm.GetChunk(i); - LogicPRegister::ChunkType result; + LogicPRegister::ChunkType result = 0; switch (op) { case ANDS_p_p_pp_z: case AND_p_p_pp_z: @@ -6945,7 +7181,6 @@ LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op, result = op1 | op2; break; default: - result = 0; VIXL_UNIMPLEMENTED(); } pd.SetChunk(i, result); @@ -6960,7 +7195,7 @@ LogicVRegister Simulator::SVEBitwiseImmHelper( uint64_t imm) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { uint64_t op1 = zd.Uint(vform, i); - uint64_t result; + uint64_t result = 0; switch (op) { case AND_z_zi: result = op1 & imm; @@ -6972,7 +7207,6 @@ LogicVRegister Simulator::SVEBitwiseImmHelper( result = op1 | imm; break; default: - result = 0; VIXL_UNIMPLEMENTED(); } zd.SetUint(vform, i, result); @@ -7363,10 +7597,39 @@ int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform, case SVE_ALL: return all; } - // Unnamed cases archicturally return 0. + // Unnamed cases architecturally return 0. return 0; } +LogicPRegister Simulator::match(VectorFormat vform, + LogicPRegister dst, + const LogicVRegister& haystack, + const LogicVRegister& needles, + bool negate_match) { + SimVRegister ztemp; + SimPRegister ptemp; + + pfalse(dst); + int lanes_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform); + for (int i = 0; i < lanes_per_segment; i++) { + dup_elements_to_segments(vform, ztemp, needles, i); + SVEIntCompareVectorsHelper(eq, + vform, + ptemp, + GetPTrue(), + haystack, + ztemp, + false, + LeaveFlags); + SVEPredicateLogicalHelper(ORR_p_p_pp_z, dst, dst, ptemp); + } + if (negate_match) { + ptrue(vform, ptemp, SVE_ALL); + SVEPredicateLogicalHelper(EOR_p_p_pp_z, dst, dst, ptemp); + } + return dst; +} + uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const { if (IsContiguous()) { return base_ + (lane * GetRegCount()) * GetMsizeInBytes(); @@ -7412,6 +7675,160 @@ uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const { return base_ + (offset << vector_shift_); } +LogicVRegister Simulator::pack_odd_elements(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + SimVRegister zero; + zero.Clear(); + return uzp2(vform, dst, src, zero); +} + +LogicVRegister Simulator::pack_even_elements(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + SimVRegister zero; + zero.Clear(); + return uzp1(vform, dst, src, zero); +} + +LogicVRegister Simulator::adcl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool top) { + unsigned reg_size = LaneSizeInBitsFromFormat(vform); + VIXL_ASSERT((reg_size == kSRegSize) || (reg_size == kDRegSize)); + + for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { + uint64_t left = src1.Uint(vform, i + (top ? 1 : 0)); + uint64_t right = dst.Uint(vform, i); + unsigned carry_in = src2.Uint(vform, i + 1) & 1; + std::pair<uint64_t, uint8_t> val_and_flags = + AddWithCarry(reg_size, left, right, carry_in); + + // Set even lanes to the result of the addition. + dst.SetUint(vform, i, val_and_flags.first); + + // Set odd lanes to the carry flag from the addition. + uint64_t carry_out = (val_and_flags.second >> 1) & 1; + dst.SetUint(vform, i + 1, carry_out); + } + return dst; +} + +// Multiply the 2x8 8-bit matrix in src1 by the 8x2 8-bit matrix in src2, add +// the 2x2 32-bit result to the matrix in srcdst, and write back to srcdst. +// +// Matrices of the form: +// +// src1 = ( a b c d e f g h ) src2 = ( A B ) +// ( i j k l m n o p ) ( C D ) +// ( E F ) +// ( G H ) +// ( I J ) +// ( K L ) +// ( M N ) +// ( O P ) +// +// Are stored in the input vector registers as: +// +// 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 +// src1 = [ p | o | n | m | l | k | j | i | h | g | f | e | d | c | b | a ] +// src2 = [ P | N | L | J | H | F | D | B | O | M | K | I | G | E | C | A ] +// +LogicVRegister Simulator::matmul(VectorFormat vform_dst, + LogicVRegister srcdst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool src1_signed, + bool src2_signed) { + // Two destination forms are supported: Q register containing four S-sized + // elements (4S) and Z register containing n S-sized elements (VnS). + VIXL_ASSERT((vform_dst == kFormat4S) || (vform_dst == kFormatVnS)); + VectorFormat vform_src = kFormatVnB; + int b_per_segment = kQRegSize / kBRegSize; + int s_per_segment = kQRegSize / kSRegSize; + int64_t result[kZRegMaxSizeInBytes / kSRegSizeInBytes] = {}; + int segment_count = LaneCountFromFormat(vform_dst) / 4; + for (int seg = 0; seg < segment_count; seg++) { + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + int dstidx = (2 * i) + j + (seg * s_per_segment); + int64_t sum = srcdst.Int(vform_dst, dstidx); + for (int k = 0; k < 8; k++) { + int idx1 = (8 * i) + k + (seg * b_per_segment); + int idx2 = (8 * j) + k + (seg * b_per_segment); + int64_t e1 = src1_signed ? src1.Int(vform_src, idx1) + : src1.Uint(vform_src, idx1); + int64_t e2 = src2_signed ? src2.Int(vform_src, idx2) + : src2.Uint(vform_src, idx2); + sum += e1 * e2; + } + result[dstidx] = sum; + } + } + } + srcdst.SetIntArray(vform_dst, result); + return srcdst; +} + +// Multiply the 2x2 FP matrix in src1 by the 2x2 FP matrix in src2, add the 2x2 +// result to the matrix in srcdst, and write back to srcdst. +// +// Matrices of the form: +// +// src1 = ( a b ) src2 = ( A B ) +// ( c d ) ( C D ) +// +// Are stored in the input vector registers as: +// +// 3 2 1 0 +// src1 = [ d | c | b | a ] +// src2 = [ D | B | C | A ] +// +template <typename T> +LogicVRegister Simulator::fmatmul(VectorFormat vform, + LogicVRegister srcdst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + T result[kZRegMaxSizeInBytes / sizeof(T)]; + int T_per_segment = 4; + int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T)); + for (int seg = 0; seg < segment_count; seg++) { + int segoff = seg * T_per_segment; + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + T prod0 = FPMulNaNs(src1.Float<T>(2 * i + 0 + segoff), + src2.Float<T>(2 * j + 0 + segoff)); + T prod1 = FPMulNaNs(src1.Float<T>(2 * i + 1 + segoff), + src2.Float<T>(2 * j + 1 + segoff)); + T sum = FPAdd(srcdst.Float<T>(2 * i + j + segoff), prod0); + result[2 * i + j + segoff] = FPAdd(sum, prod1); + } + } + } + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Elements outside a multiple of 4T are set to zero. This happens only + // for double precision operations, when the VL is a multiple of 128 bits, + // but not a mutiple of 256 bits. + T value = (i < (T_per_segment * segment_count)) ? result[i] : 0; + srcdst.SetFloat<T>(vform, i, value); + } + return srcdst; +} + +LogicVRegister Simulator::fmatmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fmatmul<float>(vform, dst, src1, src2); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + fmatmul<double>(vform, dst, src1, src2); + } + return dst; +} } // namespace aarch64 } // namespace vixl diff --git a/src/aarch64/macro-assembler-aarch64.cc b/src/aarch64/macro-assembler-aarch64.cc index 56c6eaf6..e18f846a 100644 --- a/src/aarch64/macro-assembler-aarch64.cc +++ b/src/aarch64/macro-assembler-aarch64.cc @@ -133,14 +133,14 @@ void LiteralPool::Emit(EmitOption option) { masm_->SetAllowMacroInstructions(false); #endif if (option == kBranchRequired) { - ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize); + ExactAssemblyScopeWithoutPoolsCheck eas_guard(masm_, kInstructionSize); masm_->b(&end_of_pool); } { // Marker indicating the size of the literal pool in 32-bit words. VIXL_ASSERT((pool_size % kWRegSizeInBytes) == 0); - ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize); + ExactAssemblyScopeWithoutPoolsCheck eas_guard(masm_, kInstructionSize); masm_->ldr(xzr, static_cast<int>(pool_size / kWRegSizeInBytes)); } @@ -510,44 +510,6 @@ int MacroAssembler::MoveImmediateHelper(MacroAssembler* masm, } -bool MacroAssembler::OneInstrMoveImmediateHelper(MacroAssembler* masm, - const Register& dst, - uint64_t imm) { - bool emit_code = masm != NULL; - unsigned n, imm_s, imm_r; - int reg_size = dst.GetSizeInBits(); - - if (IsImmMovz(imm, reg_size) && !dst.IsSP()) { - // Immediate can be represented in a move zero instruction. Movz can't write - // to the stack pointer. - if (emit_code) { - masm->movz(dst, imm); - } - return true; - } else if (IsImmMovn(imm, reg_size) && !dst.IsSP()) { - // Immediate can be represented in a move negative instruction. Movn can't - // write to the stack pointer. - if (emit_code) { - masm->movn(dst, dst.Is64Bits() ? ~imm : (~imm & kWRegMask)); - } - return true; - } else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) { - // Immediate can be represented in a logical orr instruction. - VIXL_ASSERT(!dst.IsZero()); - if (emit_code) { - masm->LogicalImmediate(dst, - AppropriateZeroRegFor(dst), - n, - imm_s, - imm_r, - ORR); - } - return true; - } - return false; -} - - void MacroAssembler::B(Label* label, BranchType type, Register reg, int bit) { VIXL_ASSERT((reg.Is(NoReg) || (type >= kBranchTypeFirstUsingReg)) && ((bit == -1) || (type >= kBranchTypeFirstUsingBit))); @@ -1135,11 +1097,15 @@ void MacroAssembler::Movi(const VRegister& vd, void MacroAssembler::Movi(const VRegister& vd, uint64_t hi, uint64_t lo) { // TODO: Move 128-bit values in a more efficient way. VIXL_ASSERT(vd.Is128Bits()); - UseScratchRegisterScope temps(this); Movi(vd.V2D(), lo); - Register temp = temps.AcquireX(); - Mov(temp, hi); - Ins(vd.V2D(), 1, temp); + if (hi != lo) { + UseScratchRegisterScope temps(this); + // TODO: Figure out if using a temporary V register to materialise the + // immediate is better. + Register temp = temps.AcquireX(); + Mov(temp, hi); + Ins(vd.V2D(), 1, temp); + } } @@ -1946,6 +1912,9 @@ LS_MACRO_LIST(DEFINE_FUNCTION) void MacroAssembler::LoadStoreMacro(const CPURegister& rt, const MemOperand& addr, LoadStoreOp op) { + VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsImmediatePostIndex() || + addr.IsImmediatePreIndex() || addr.IsRegisterOffset()); + // Worst case is ldr/str pre/post index: // * 1 instruction for ldr/str // * up to 4 instructions to materialise the constant @@ -1966,11 +1935,11 @@ void MacroAssembler::LoadStoreMacro(const CPURegister& rt, Register temp = temps.AcquireSameSizeAs(addr.GetBaseRegister()); Mov(temp, addr.GetOffset()); LoadStore(rt, MemOperand(addr.GetBaseRegister(), temp), op); - } else if (addr.IsPostIndex() && !IsImmLSUnscaled(offset)) { + } else if (addr.IsImmediatePostIndex() && !IsImmLSUnscaled(offset)) { // Post-index beyond unscaled addressing range. LoadStore(rt, MemOperand(addr.GetBaseRegister()), op); Add(addr.GetBaseRegister(), addr.GetBaseRegister(), Operand(offset)); - } else if (addr.IsPreIndex() && !IsImmLSUnscaled(offset)) { + } else if (addr.IsImmediatePreIndex() && !IsImmLSUnscaled(offset)) { // Pre-index beyond unscaled addressing range. Add(addr.GetBaseRegister(), addr.GetBaseRegister(), Operand(offset)); LoadStore(rt, MemOperand(addr.GetBaseRegister()), op); @@ -2018,11 +1987,11 @@ void MacroAssembler::LoadStorePairMacro(const CPURegister& rt, Register temp = temps.AcquireSameSizeAs(base); Add(temp, base, offset); LoadStorePair(rt, rt2, MemOperand(temp), op); - } else if (addr.IsPostIndex()) { + } else if (addr.IsImmediatePostIndex()) { LoadStorePair(rt, rt2, MemOperand(base), op); Add(base, base, offset); } else { - VIXL_ASSERT(addr.IsPreIndex()); + VIXL_ASSERT(addr.IsImmediatePreIndex()); Add(base, base, offset); LoadStorePair(rt, rt2, MemOperand(base), op); } diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h index 8becddbb..b1e9ec5c 100644 --- a/src/aarch64/macro-assembler-aarch64.h +++ b/src/aarch64/macro-assembler-aarch64.h @@ -716,9 +716,6 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { static int MoveImmediateHelper(MacroAssembler* masm, const Register& rd, uint64_t imm); - static bool OneInstrMoveImmediateHelper(MacroAssembler* masm, - const Register& dst, - uint64_t imm); // Logical macros. @@ -2819,7 +2816,11 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { V(uzp1, Uzp1) \ V(uzp2, Uzp2) \ V(zip1, Zip1) \ - V(zip2, Zip2) + V(zip2, Zip2) \ + V(smmla, Smmla) \ + V(ummla, Ummla) \ + V(usmmla, Usmmla) \ + V(usdot, Usdot) #define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ void MASM(const VRegister& vd, const VRegister& vn, const VRegister& vm) { \ @@ -2971,7 +2972,10 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { V(umlal, Umlal) \ V(umlal2, Umlal2) \ V(umlsl, Umlsl) \ - V(umlsl2, Umlsl2) + V(umlsl2, Umlsl2) \ + V(sudot, Sudot) \ + V(usdot, Usdot) + #define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ void MASM(const VRegister& vd, \ @@ -3056,13 +3060,19 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { V(mul, Mul) \ V(orr, Orr) \ V(sabd, Sabd) \ + V(shadd, Shadd) \ V(smax, Smax) \ - V(smulh, Smulh) \ V(smin, Smin) \ + V(smulh, Smulh) \ + V(sqadd, Sqadd) \ + V(srhadd, Srhadd) \ V(uabd, Uabd) \ + V(uhadd, Uhadd) \ V(umax, Umax) \ V(umin, Umin) \ - V(umulh, Umulh) + V(umulh, Umulh) \ + V(uqadd, Uqadd) \ + V(urhadd, Urhadd) #define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ void MASM(const ZRegister& zd, \ @@ -4103,7 +4113,11 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { void Ext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm, - unsigned offset); + unsigned offset) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ext(zd, zn, zm, offset); + } void Fabd(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn, @@ -4245,15 +4259,12 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { SingleEmissionCheckScope guard(this); fcmgt(pd, pg, zn, zm); } - void Fcmla(const ZRegister& zda, + void Fcmla(const ZRegister& zd, const PRegisterM& pg, + const ZRegister& za, const ZRegister& zn, const ZRegister& zm, - int rot) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zda, pg, zda); - fcmla(zda, pg, zn, zm, rot); - } + int rot); void Fcmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm, @@ -4862,6 +4873,18 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { void Ld1rqw(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr); + void Ld1rob(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1rod(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1roh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1row(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); void Ld1rsb(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { @@ -6366,6 +6389,1103 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { zip2(zd, zn, zm); } + // SVE2 + void Adclb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Adclt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Addhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + addhnb(zd, zn, zm); + } + void Addhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + addhnt(zd, zn, zm); + } + void Addp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Bcax(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + void Bdep(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + bdep(zd, zn, zm); + } + void Bext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + bext(zd, zn, zm); + } + void Bgrp(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + bgrp(zd, zn, zm); + } + void Bsl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + void Bsl1n(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + void Bsl2n(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + void Cadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot); + void Cdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot); + void Cdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int rot); + void Cmla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot); + void Cmla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int rot); + void Eor3(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + void Eorbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + eorbt(zd, zn, zm); + } + void Eortb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + eortb(zd, zn, zm); + } + void Faddp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Fcvtlt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtlt(zd, pg, zn); + } + void Fcvtnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtnt(zd, pg, zn); + } + void Fcvtx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(zn.IsLaneSizeD()); + MovprfxHelperScope guard(this, zd.VnD(), pg, zd.VnD()); + fcvtx(zd, pg.Merging(), zn); + } + void Fcvtxnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtxnt(zd, pg, zn); + } + void Flogb(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zd); + flogb(zd, pg.Merging(), zn); + } + void Fmaxnmp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Fmaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Fminnmp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Fminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Fmlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Fmlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Fmlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Fmlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Fmlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Fmlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Fmlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Fmlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Histcnt(const ZRegister& zd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + histcnt(zd, pg, zn, zm); + } + void Histseg(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + histseg(zd, zn, zm); + } + void Ldnt1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnt1sb(zt, pg, addr); + } + void Ldnt1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnt1sh(zt, pg, addr); + } + void Ldnt1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnt1sw(zt, pg, addr); + } + void Match(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + match(pd, pg, zn, zm); + } + void Mla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Mls(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Mul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mul(zd, zn, zm, index); + } + void Mul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mul(zd, zn, zm); + } + void Nbsl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + void Nmatch(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + nmatch(pd, pg, zn, zm); + } + void Pmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + pmul(zd, zn, zm); + } + void Pmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + pmullb(zd, zn, zm); + } + void Pmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + pmullt(zd, zn, zm); + } + void Raddhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + raddhnb(zd, zn, zm); + } + void Raddhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + raddhnt(zd, zn, zm); + } + void Rshrnb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rshrnb(zd, zn, shift); + } + void Rshrnt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rshrnt(zd, zn, shift); + } + void Rsubhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rsubhnb(zd, zn, zm); + } + void Rsubhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rsubhnt(zd, zn, zm); + } + void Saba(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sabalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sabalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sabdlb(zd, zn, zm); + } + void Sabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sabdlt(zd, zn, zm); + } + void Sadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sadalp(zda, pg, zn); + } + void Saddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + saddlb(zd, zn, zm); + } + void Saddlbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + saddlbt(zd, zn, zm); + } + void Saddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + saddlt(zd, zn, zm); + } + void Saddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + saddwb(zd, zn, zm); + } + void Saddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + saddwt(zd, zn, zm); + } + void Sbclb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sbclt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Shrnb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + shrnb(zd, zn, shift); + } + void Shrnt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + shrnt(zd, zn, shift); + } + void Shsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Sli(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sli(zd, zn, shift); + } + void Smaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Sminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Smlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Smlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Smlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Smlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Smlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Smlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Smlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Smlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Smulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + smulh(zd, zn, zm); + } + void Smullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + smullb(zd, zn, zm, index); + } + void Smullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + smullb(zd, zn, zm); + } + void Smullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + smullt(zd, zn, zm, index); + } + void Smullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + smullt(zd, zn, zm); + } + void Sqabs(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zd); + sqabs(zd, pg.Merging(), zn); + } + void Sqcadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot); + void Sqdmlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Sqdmlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sqdmlalbt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sqdmlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Sqdmlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sqdmlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Sqdmlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sqdmlslbt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sqdmlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Sqdmlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sqdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdmulh(zd, zn, zm, index); + } + void Sqdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdmulh(zd, zn, zm); + } + void Sqdmullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdmullb(zd, zn, zm, index); + } + void Sqdmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdmullb(zd, zn, zm); + } + void Sqdmullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdmullt(zd, zn, zm, index); + } + void Sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdmullt(zd, zn, zm); + } + void Sqneg(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zd); + sqneg(zd, pg.Merging(), zn); + } + void Sqrdcmlah(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot); + void Sqrdcmlah(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int rot); + void Sqrdmlah(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sqrdmlah(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Sqrdmlsh(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sqrdmlsh(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Sqrdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqrdmulh(zd, zn, zm, index); + } + void Sqrdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqrdmulh(zd, zn, zm); + } + void Sqrshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Sqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqrshrnb(zd, zn, shift); + } + void Sqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqrshrnt(zd, zn, shift); + } + void Sqrshrunb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqrshrunb(zd, zn, shift); + } + void Sqrshrunt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqrshrunt(zd, zn, shift); + } + void Sqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + sqshl(zd, pg, zd, shift); + } + void Sqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Sqshlu(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + sqshlu(zd, pg, zd, shift); + } + void Sqshrnb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqshrnb(zd, zn, shift); + } + void Sqshrnt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqshrnt(zd, zn, shift); + } + void Sqshrunb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqshrunb(zd, zn, shift); + } + void Sqshrunt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqshrunt(zd, zn, shift); + } + void Sqsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Sqxtnb(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqxtnb(zd, zn); + } + void Sqxtnt(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqxtnt(zd, zn); + } + void Sqxtunb(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqxtunb(zd, zn); + } + void Sqxtunt(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqxtunt(zd, zn); + } + void Sri(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sri(zd, zn, shift); + } + void Srshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Srshr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + srshr(zd, pg, zd, shift); + } + void Srsra(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift); + void Sshllb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sshllb(zd, zn, shift); + } + void Sshllt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sshllt(zd, zn, shift); + } + void Ssra(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift); + void Ssublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ssublb(zd, zn, zm); + } + void Ssublbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ssublbt(zd, zn, zm); + } + void Ssublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ssublt(zd, zn, zm); + } + void Ssubltb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ssubltb(zd, zn, zm); + } + void Ssubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ssubwb(zd, zn, zm); + } + void Ssubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ssubwt(zd, zn, zm); + } + void Subhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + subhnb(zd, zn, zm); + } + void Subhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + subhnt(zd, zn, zm); + } + void Suqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Tbl(const ZRegister& zd, + const ZRegister& zn1, + const ZRegister& zn2, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + tbl(zd, zn1, zn2, zm); + } + void Tbx(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + tbx(zd, zn, zm); + } + void Uaba(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Uabalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Uabalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Uabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uabdlb(zd, zn, zm); + } + void Uabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uabdlt(zd, zn, zm); + } + void Uadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uadalp(zda, pg, zn); + } + void Uaddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uaddlb(zd, zn, zm); + } + void Uaddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uaddlt(zd, zn, zm); + } + void Uaddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uaddwb(zd, zn, zm); + } + void Uaddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uaddwt(zd, zn, zm); + } + void Uhsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Umaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Uminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Umlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Umlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Umlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Umlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Umlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Umlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Umlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Umlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Umulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + umulh(zd, zn, zm); + } + void Umullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + umullb(zd, zn, zm, index); + } + void Umullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + umullb(zd, zn, zm); + } + void Umullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + umullt(zd, zn, zm, index); + } + void Umullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + umullt(zd, zn, zm); + } + void Uqrshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Uqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqrshrnb(zd, zn, shift); + } + void Uqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqrshrnt(zd, zn, shift); + } + void Uqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + uqshl(zd, pg, zd, shift); + } + void Uqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Uqshrnb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqshrnb(zd, zn, shift); + } + void Uqshrnt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqshrnt(zd, zn, shift); + } + void Uqsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Uqxtnb(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqxtnb(zd, zn); + } + void Uqxtnt(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqxtnt(zd, zn); + } + void Urecpe(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zd); + urecpe(zd, pg.Merging(), zn); + } + void Urshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Urshr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + urshr(zd, pg, zd, shift); + } + void Ursqrte(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zd); + ursqrte(zd, pg.Merging(), zn); + } + void Ursra(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift); + void Ushllb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ushllb(zd, zn, shift); + } + void Ushllt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ushllt(zd, zn, shift); + } + void Usqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Usra(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift); + void Usublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + usublb(zd, zn, zm); + } + void Usublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + usublt(zd, zn, zm); + } + void Usubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + usubwb(zd, zn, zm); + } + void Usubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + usubwt(zd, zn, zm); + } + void Whilege(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilege(pd, rn, rm); + } + void Whilegt(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilegt(pd, rn, rm); + } + void Whilehi(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilehi(pd, rn, rm); + } + void Whilehs(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilehs(pd, rn, rm); + } + void Whilerw(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilerw(pd, rn, rm); + } + void Whilewr(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilewr(pd, rn, rm); + } + void Xar(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(zm)) { + SingleEmissionCheckScope guard(this); + xar(zd, zm, zn, shift); + } else { + MovprfxHelperScope guard(this, zd, zn); + xar(zd, zd, zm, shift); + } + } + void Fmmla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Smmla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Ummla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Usmmla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Usdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Usdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Sudot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + template <typename T> Literal<T>* CreateLiteralDestroyedWithPool(T value) { return new Literal<T>(value, @@ -6783,7 +7903,7 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { // SVE_MUL_VL). The ratio log2 of VL to memory access size is passed as // vl_divisor_log2; pass -1 to indicate no dependency. template <typename Tg, typename Tf> - void SVELoadStoreScalarImmHelper( + void SVELoadStoreNTBroadcastQOHelper( const ZRegister& zt, const Tg& pg, const SVEMemOperand& addr, @@ -6816,25 +7936,30 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { int imm, int shift); - typedef void (Assembler::*IntArithFn)(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm); + typedef void (Assembler::*Int3ArithFn)(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm); + + typedef void (Assembler::*Int4ArithFn)(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); - typedef void (Assembler::*IntWideImmFn)(const ZRegister& zd, - const ZRegister& zn, - int imm); + typedef void (Assembler::*IntArithImmFn)(const ZRegister& zd, + const ZRegister& zn, + int imm); - typedef void (Assembler::*IntArithIndexFn)(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm, - int index); + typedef void (Assembler::*ZZZImmFn)(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int imm); typedef void (MacroAssembler::*SVEArithPredicatedFn)(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn, const ZRegister& zm); - void IntWideImmHelper(IntWideImmFn imm_fn, + void IntWideImmHelper(IntArithImmFn imm_fn, SVEArithPredicatedFn reg_fn, const ZRegister& zd, const ZRegister& zn, @@ -6860,18 +7985,30 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { const ZRegister& zn, IntegerOperand imm); - void SVESdotUdotHelper(IntArithFn fn, + void AbsoluteDifferenceAccumulate(Int3ArithFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + + void FourRegDestructiveHelper(Int3ArithFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + + void FourRegDestructiveHelper(Int4ArithFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + + void SVEDotIndexHelper(ZZZImmFn fn, const ZRegister& zd, const ZRegister& za, const ZRegister& zn, - const ZRegister& zm); - - void SVESdotUdotIndexHelper(IntArithIndexFn fn, - const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index); + const ZRegister& zm, + int index); // For noncommutative arithmetic operations. void NoncommutativeArithmeticHelper(const ZRegister& zd, @@ -6915,12 +8052,24 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { const ZRegister& zm, int index); - void FPMulAddIndexHelper(SVEMulAddIndexFn fn, - const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index); + void FourRegOneImmDestructiveHelper(ZZZImmFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int imm); + + void ShiftRightAccumulate(IntArithImmFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int imm); + + void ComplexAddition(ZZZImmFn fn, + const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot); // Tell whether any of the macro instruction can be used. When false the // MacroAssembler will assert if a method which can emit a variable number diff --git a/src/aarch64/macro-assembler-sve-aarch64.cc b/src/aarch64/macro-assembler-sve-aarch64.cc index b107f132..6bf56076 100644 --- a/src/aarch64/macro-assembler-sve-aarch64.cc +++ b/src/aarch64/macro-assembler-sve-aarch64.cc @@ -89,7 +89,7 @@ bool MacroAssembler::TrySingleAddSub(AddSubHelperOption option, return false; } -void MacroAssembler::IntWideImmHelper(IntWideImmFn imm_fn, +void MacroAssembler::IntWideImmHelper(IntArithImmFn imm_fn, SVEArithPredicatedFn reg_macro, const ZRegister& zd, const ZRegister& zn, @@ -130,7 +130,7 @@ void MacroAssembler::Mul(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { VIXL_ASSERT(allow_macro_instructions_); - IntWideImmFn imm_fn = &Assembler::mul; + IntArithImmFn imm_fn = &Assembler::mul; SVEArithPredicatedFn reg_fn = &MacroAssembler::Mul; IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); } @@ -140,7 +140,7 @@ void MacroAssembler::Smin(const ZRegister& zd, IntegerOperand imm) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(imm.FitsInSignedLane(zd)); - IntWideImmFn imm_fn = &Assembler::smin; + IntArithImmFn imm_fn = &Assembler::smin; SVEArithPredicatedFn reg_fn = &MacroAssembler::Smin; IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); } @@ -150,7 +150,7 @@ void MacroAssembler::Smax(const ZRegister& zd, IntegerOperand imm) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(imm.FitsInSignedLane(zd)); - IntWideImmFn imm_fn = &Assembler::smax; + IntArithImmFn imm_fn = &Assembler::smax; SVEArithPredicatedFn reg_fn = &MacroAssembler::Smax; IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); } @@ -160,7 +160,7 @@ void MacroAssembler::Umax(const ZRegister& zd, IntegerOperand imm) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(imm.FitsInUnsignedLane(zd)); - IntWideImmFn imm_fn = &Assembler::umax; + IntArithImmFn imm_fn = &Assembler::umax; SVEArithPredicatedFn reg_fn = &MacroAssembler::Umax; IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false); } @@ -170,7 +170,7 @@ void MacroAssembler::Umin(const ZRegister& zd, IntegerOperand imm) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(imm.FitsInUnsignedLane(zd)); - IntWideImmFn imm_fn = &Assembler::umin; + IntArithImmFn imm_fn = &Assembler::umin; SVEArithPredicatedFn reg_fn = &MacroAssembler::Umin; IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false); } @@ -562,80 +562,143 @@ void MacroAssembler::FPCommutativeArithmeticHelper( } } -void MacroAssembler::Asr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - NoncommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::asr), - static_cast<SVEArithPredicatedFn>( - &Assembler::asrr)); -} - -void MacroAssembler::Lsl(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - NoncommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::lsl), - static_cast<SVEArithPredicatedFn>( - &Assembler::lslr)); -} - -void MacroAssembler::Lsr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - NoncommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::lsr), - static_cast<SVEArithPredicatedFn>( - &Assembler::lsrr)); -} - -void MacroAssembler::Fdiv(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - NoncommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::fdiv), - static_cast<SVEArithPredicatedFn>( - &Assembler::fdivr)); -} - -void MacroAssembler::Fsub(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - NoncommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::fsub), - static_cast<SVEArithPredicatedFn>( - &Assembler::fsubr)); -} +// Instructions of the form "inst zda, zn, zm, #num", where they are +// non-commutative and no reversed form is provided. +#define VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(V) \ + V(Cmla, cmla) \ + V(Sqrdcmlah, sqrdcmlah) + +#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \ + void MacroAssembler::MASMFN(const ZRegister& zd, \ + const ZRegister& za, \ + const ZRegister& zn, \ + const ZRegister& zm, \ + int imm) { \ + if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \ + UseScratchRegisterScope temps(this); \ + VIXL_ASSERT(AreSameLaneSize(zn, zm)); \ + ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn); \ + Mov(ztmp, zd.Aliases(zn) ? zn : zm); \ + MovprfxHelperScope guard(this, zd, za); \ + ASMFN(zd, \ + (zd.Aliases(zn) ? ztmp : zn), \ + (zd.Aliases(zm) ? ztmp : zm), \ + imm); \ + } else { \ + MovprfxHelperScope guard(this, zd, za); \ + ASMFN(zd, zn, zm, imm); \ + } \ + } +VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(VIXL_DEFINE_MASM_FUNC) +#undef VIXL_DEFINE_MASM_FUNC + +// Instructions of the form "inst zda, zn, zm, #num, #num", where they are +// non-commutative and no reversed form is provided. +#define VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(V) \ + V(Cmla, cmla) \ + V(Sqrdcmlah, sqrdcmlah) + +// This doesn't handle zm when it's out of the range that can be encoded in +// instruction. The range depends on element size: z0-z7 for H, z0-15 for S. +#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \ + void MacroAssembler::MASMFN(const ZRegister& zd, \ + const ZRegister& za, \ + const ZRegister& zn, \ + const ZRegister& zm, \ + int index, \ + int rot) { \ + if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \ + UseScratchRegisterScope temps(this); \ + ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd); \ + { \ + MovprfxHelperScope guard(this, ztmp, za); \ + ASMFN(ztmp, zn, zm, index, rot); \ + } \ + Mov(zd, ztmp); \ + } else { \ + MovprfxHelperScope guard(this, zd, za); \ + ASMFN(zd, zn, zm, index, rot); \ + } \ + } +VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(VIXL_DEFINE_MASM_FUNC) +#undef VIXL_DEFINE_MASM_FUNC + +// Instructions of the form "inst zda, pg, zda, zn", where they are +// non-commutative and no reversed form is provided. +#define VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(V) \ + V(Addp, addp) \ + V(Faddp, faddp) \ + V(Fmaxnmp, fmaxnmp) \ + V(Fminnmp, fminnmp) \ + V(Fmaxp, fmaxp) \ + V(Fminp, fminp) \ + V(Fscale, fscale) \ + V(Smaxp, smaxp) \ + V(Sminp, sminp) \ + V(Suqadd, suqadd) \ + V(Umaxp, umaxp) \ + V(Uminp, uminp) \ + V(Usqadd, usqadd) + +#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \ + void MacroAssembler::MASMFN(const ZRegister& zd, \ + const PRegisterM& pg, \ + const ZRegister& zn, \ + const ZRegister& zm) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + if (zd.Aliases(zm) && !zd.Aliases(zn)) { \ + UseScratchRegisterScope temps(this); \ + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); \ + Mov(scratch, zm); \ + MovprfxHelperScope guard(this, zd, pg, zn); \ + ASMFN(zd, pg, zd, scratch); \ + } else { \ + MovprfxHelperScope guard(this, zd, pg, zn); \ + ASMFN(zd, pg, zd, zm); \ + } \ + } +VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC) +#undef VIXL_DEFINE_MASM_FUNC + +// Instructions of the form "inst zda, pg, zda, zn", where they are +// non-commutative and a reversed form is provided. +#define VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(V) \ + V(Asr, asr) \ + V(Fdiv, fdiv) \ + V(Fsub, fsub) \ + V(Lsl, lsl) \ + V(Lsr, lsr) \ + V(Sdiv, sdiv) \ + V(Shsub, shsub) \ + V(Sqrshl, sqrshl) \ + V(Sqshl, sqshl) \ + V(Sqsub, sqsub) \ + V(Srshl, srshl) \ + V(Sub, sub) \ + V(Udiv, udiv) \ + V(Uhsub, uhsub) \ + V(Uqrshl, uqrshl) \ + V(Uqshl, uqshl) \ + V(Uqsub, uqsub) \ + V(Urshl, urshl) + +#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \ + void MacroAssembler::MASMFN(const ZRegister& zd, \ + const PRegisterM& pg, \ + const ZRegister& zn, \ + const ZRegister& zm) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + NoncommutativeArithmeticHelper(zd, \ + pg, \ + zn, \ + zm, \ + static_cast<SVEArithPredicatedFn>( \ + &Assembler::ASMFN), \ + static_cast<SVEArithPredicatedFn>( \ + &Assembler::ASMFN##r)); \ + } +VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC) +#undef VIXL_DEFINE_MASM_FUNC void MacroAssembler::Fadd(const ZRegister& zd, const PRegisterM& pg, @@ -828,14 +891,14 @@ void MacroAssembler::Index(const ZRegister& zd, static IndexOperand Prepare(MacroAssembler* masm, UseScratchRegisterScope* temps, const Operand& op, - const ZRegister& zd) { + const ZRegister& zd_inner) { // Look for encodable immediates. int imm; if (op.IsImmediate()) { - if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd, &imm)) { + if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd_inner, &imm)) { return IndexOperand(imm); } - Register scratch = temps->AcquireRegisterToHoldLane(zd); + Register scratch = temps->AcquireRegisterToHoldLane(zd_inner); masm->Mov(scratch, op); return IndexOperand(scratch); } else { @@ -1022,21 +1085,6 @@ void MacroAssembler::Ptrue(const PRegisterWithLaneSize& pd, VIXL_UNREACHABLE(); } -void MacroAssembler::Sdiv(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - NoncommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::sdiv), - static_cast<SVEArithPredicatedFn>( - &Assembler::sdivr)); -} - void MacroAssembler::Sub(const ZRegister& zd, IntegerOperand imm, const ZRegister& zm) { @@ -1058,36 +1106,6 @@ void MacroAssembler::Sub(const ZRegister& zd, } } -void MacroAssembler::Sub(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - NoncommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::sub), - static_cast<SVEArithPredicatedFn>( - &Assembler::subr)); -} - -void MacroAssembler::Udiv(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - NoncommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::udiv), - static_cast<SVEArithPredicatedFn>( - &Assembler::udivr)); -} - void MacroAssembler::SVELoadBroadcastImmHelper(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr, @@ -1135,7 +1153,7 @@ void MacroAssembler::SVELoadStoreScalarImmHelper(const CPURegister& rt, } template <typename Tg, typename Tf> -void MacroAssembler::SVELoadStoreScalarImmHelper( +void MacroAssembler::SVELoadStoreNTBroadcastQOHelper( const ZRegister& zt, const Tg& pg, const SVEMemOperand& addr, @@ -1157,6 +1175,13 @@ void MacroAssembler::SVELoadStoreScalarImmHelper( return; } + if (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() && + addr.IsEquivalentToLSL(zt.GetLaneSizeInBytesLog2())) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, addr); + return; + } + if (addr.IsEquivalentToScalar()) { SingleEmissionCheckScope guard(this); (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase())); @@ -1473,169 +1498,176 @@ void MacroAssembler::Ldff1sw(const ZRegister& zt, static_cast<SVELoad1Fn>(&Assembler::ldff1sw)); } -void MacroAssembler::Ld1rqb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::ld1rqb, - 4, - 4, - NO_SVE_OFFSET_MODIFIER, - -1); -} +#define VIXL_SVE_LD1R_LIST(V) \ + V(qb, 4) V(qh, 4) V(qw, 4) V(qd, 4) V(ob, 5) V(oh, 5) V(ow, 5) V(od, 5) + +#define VIXL_DEFINE_MASM_FUNC(SZ, SH) \ + void MacroAssembler::Ld1r##SZ(const ZRegister& zt, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SVELoadStoreNTBroadcastQOHelper(zt, \ + pg, \ + addr, \ + &MacroAssembler::ld1r##SZ, \ + 4, \ + SH, \ + NO_SVE_OFFSET_MODIFIER, \ + -1); \ + } -void MacroAssembler::Ld1rqd(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::ld1rqd, - 4, - 4, - NO_SVE_OFFSET_MODIFIER, - -1); -} +VIXL_SVE_LD1R_LIST(VIXL_DEFINE_MASM_FUNC) -void MacroAssembler::Ld1rqh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::ld1rqh, - 4, - 4, - NO_SVE_OFFSET_MODIFIER, - -1); -} - -void MacroAssembler::Ld1rqw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::ld1rqw, - 4, - 4, - NO_SVE_OFFSET_MODIFIER, - -1); -} +#undef VIXL_DEFINE_MASM_FUNC +#undef VIXL_SVE_LD1R_LIST void MacroAssembler::Ldnt1b(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::ldnt1b, - 4, - 0, - SVE_MUL_VL); + if (addr.IsVectorPlusScalar()) { + SingleEmissionCheckScope guard(this); + ldnt1b(zt, pg, addr); + } else { + SVELoadStoreNTBroadcastQOHelper(zt, + pg, + addr, + &MacroAssembler::ldnt1b, + 4, + 0, + SVE_MUL_VL); + } } void MacroAssembler::Ldnt1d(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::ldnt1d, - 4, - 0, - SVE_MUL_VL); + if (addr.IsVectorPlusScalar()) { + SingleEmissionCheckScope guard(this); + ldnt1d(zt, pg, addr); + } else { + SVELoadStoreNTBroadcastQOHelper(zt, + pg, + addr, + &MacroAssembler::ldnt1d, + 4, + 0, + SVE_MUL_VL); + } } void MacroAssembler::Ldnt1h(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::ldnt1h, - 4, - 0, - SVE_MUL_VL); + if (addr.IsVectorPlusScalar()) { + SingleEmissionCheckScope guard(this); + ldnt1h(zt, pg, addr); + } else { + SVELoadStoreNTBroadcastQOHelper(zt, + pg, + addr, + &MacroAssembler::ldnt1h, + 4, + 0, + SVE_MUL_VL); + } } void MacroAssembler::Ldnt1w(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::ldnt1w, - 4, - 0, - SVE_MUL_VL); + if (addr.IsVectorPlusScalar()) { + SingleEmissionCheckScope guard(this); + ldnt1w(zt, pg, addr); + } else { + SVELoadStoreNTBroadcastQOHelper(zt, + pg, + addr, + &MacroAssembler::ldnt1w, + 4, + 0, + SVE_MUL_VL); + } } void MacroAssembler::Stnt1b(const ZRegister& zt, const PRegister& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::stnt1b, - 4, - 0, - SVE_MUL_VL); + if (addr.IsVectorPlusScalar()) { + SingleEmissionCheckScope guard(this); + stnt1b(zt, pg, addr); + } else { + SVELoadStoreNTBroadcastQOHelper(zt, + pg, + addr, + &MacroAssembler::stnt1b, + 4, + 0, + SVE_MUL_VL); + } } void MacroAssembler::Stnt1d(const ZRegister& zt, const PRegister& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::stnt1d, - 4, - 0, - SVE_MUL_VL); + if (addr.IsVectorPlusScalar()) { + SingleEmissionCheckScope guard(this); + stnt1d(zt, pg, addr); + } else { + SVELoadStoreNTBroadcastQOHelper(zt, + pg, + addr, + &MacroAssembler::stnt1d, + 4, + 0, + SVE_MUL_VL); + } } void MacroAssembler::Stnt1h(const ZRegister& zt, const PRegister& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::stnt1h, - 4, - 0, - SVE_MUL_VL); + if (addr.IsVectorPlusScalar()) { + SingleEmissionCheckScope guard(this); + stnt1h(zt, pg, addr); + } else { + SVELoadStoreNTBroadcastQOHelper(zt, + pg, + addr, + &MacroAssembler::stnt1h, + 4, + 0, + SVE_MUL_VL); + } } void MacroAssembler::Stnt1w(const ZRegister& zt, const PRegister& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::stnt1w, - 4, - 0, - SVE_MUL_VL); -} - -void MacroAssembler::SVESdotUdotIndexHelper(IntArithIndexFn fn, - const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index) { + if (addr.IsVectorPlusScalar()) { + SingleEmissionCheckScope guard(this); + stnt1w(zt, pg, addr); + } else { + SVELoadStoreNTBroadcastQOHelper(zt, + pg, + addr, + &MacroAssembler::stnt1w, + 4, + 0, + SVE_MUL_VL); + } +} + +void MacroAssembler::SVEDotIndexHelper(ZZZImmFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index) { if (zd.Aliases(za)) { // zda = zda + (zn . zm) SingleEmissionCheckScope guard(this); @@ -1660,20 +1692,15 @@ void MacroAssembler::SVESdotUdotIndexHelper(IntArithIndexFn fn, } } -void MacroAssembler::SVESdotUdotHelper(IntArithFn fn, - const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm) { - if (zd.Aliases(za)) { - // zda = zda + (zn . zm) - SingleEmissionCheckScope guard(this); - (this->*fn)(zd, zn, zm); - - } else if (zd.Aliases(zn) || zd.Aliases(zm)) { - // zdn = za + (zdn . zm) - // zdm = za + (zn . zdm) - // zdnm = za + (zdnm . zdnm) +void MacroAssembler::FourRegDestructiveHelper(Int3ArithFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm) { + if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) { + // zd = za . zd . zm + // zd = za . zn . zd + // zd = za . zd . zd UseScratchRegisterScope temps(this); ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); { @@ -1683,61 +1710,256 @@ void MacroAssembler::SVESdotUdotHelper(IntArithFn fn, Mov(zd, scratch); } else { - // zd = za + (zn . zm) MovprfxHelperScope guard(this, zd, za); (this->*fn)(zd, zn, zm); } } -void MacroAssembler::Fscale(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - if (zd.Aliases(zm) && !zd.Aliases(zn)) { +void MacroAssembler::FourRegDestructiveHelper(Int4ArithFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm) { + if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) { + // zd = za . zd . zm + // zd = za . zn . zd + // zd = za . zd . zd UseScratchRegisterScope temps(this); - ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); - Mov(scratch, zm); - MovprfxHelperScope guard(this, zd, pg, zn); - fscale(zd, pg, zd, scratch); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, za); + (this->*fn)(scratch, scratch, zn, zm); + } + + Mov(zd, scratch); } else { - MovprfxHelperScope guard(this, zd, pg, zn); - fscale(zd, pg, zd, zm); + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, zd, zn, zm); + } +} + +void MacroAssembler::FourRegOneImmDestructiveHelper(ZZZImmFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int imm) { + if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) { + // zd = za . zd . zm[i] + // zd = za . zn . zd[i] + // zd = za . zd . zd[i] + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, za); + (this->*fn)(scratch, zn, zm, imm); + } + + Mov(zd, scratch); + } else { + // zd = za . zn . zm[i] + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, zn, zm, imm); + } +} + +void MacroAssembler::AbsoluteDifferenceAccumulate(Int3ArithFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm) { + if (zn.Aliases(zm)) { + // If zn == zm, the difference is zero. + if (!zd.Aliases(za)) { + Mov(zd, za); + } + } else if (zd.Aliases(za)) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zd, zn, zm); + } else if (zd.Aliases(zn)) { + UseScratchRegisterScope temps(this); + ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); + Mov(ztmp, zn); + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, ztmp, zm); + } else if (zd.Aliases(zm)) { + UseScratchRegisterScope temps(this); + ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); + Mov(ztmp, zm); + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, zn, ztmp); + } else { + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, zn, zm); } } +#define VIXL_SVE_4REG_LIST(V) \ + V(Saba, saba, AbsoluteDifferenceAccumulate) \ + V(Uaba, uaba, AbsoluteDifferenceAccumulate) \ + V(Sabalb, sabalb, AbsoluteDifferenceAccumulate) \ + V(Sabalt, sabalt, AbsoluteDifferenceAccumulate) \ + V(Uabalb, uabalb, AbsoluteDifferenceAccumulate) \ + V(Uabalt, uabalt, AbsoluteDifferenceAccumulate) \ + V(Sdot, sdot, FourRegDestructiveHelper) \ + V(Udot, udot, FourRegDestructiveHelper) \ + V(Adclb, adclb, FourRegDestructiveHelper) \ + V(Adclt, adclt, FourRegDestructiveHelper) \ + V(Sbclb, sbclb, FourRegDestructiveHelper) \ + V(Sbclt, sbclt, FourRegDestructiveHelper) \ + V(Smlalb, smlalb, FourRegDestructiveHelper) \ + V(Smlalt, smlalt, FourRegDestructiveHelper) \ + V(Smlslb, smlslb, FourRegDestructiveHelper) \ + V(Smlslt, smlslt, FourRegDestructiveHelper) \ + V(Umlalb, umlalb, FourRegDestructiveHelper) \ + V(Umlalt, umlalt, FourRegDestructiveHelper) \ + V(Umlslb, umlslb, FourRegDestructiveHelper) \ + V(Umlslt, umlslt, FourRegDestructiveHelper) \ + V(Bcax, bcax, FourRegDestructiveHelper) \ + V(Bsl, bsl, FourRegDestructiveHelper) \ + V(Bsl1n, bsl1n, FourRegDestructiveHelper) \ + V(Bsl2n, bsl2n, FourRegDestructiveHelper) \ + V(Eor3, eor3, FourRegDestructiveHelper) \ + V(Nbsl, nbsl, FourRegDestructiveHelper) \ + V(Fmlalb, fmlalb, FourRegDestructiveHelper) \ + V(Fmlalt, fmlalt, FourRegDestructiveHelper) \ + V(Fmlslb, fmlslb, FourRegDestructiveHelper) \ + V(Fmlslt, fmlslt, FourRegDestructiveHelper) \ + V(Sqdmlalb, sqdmlalb, FourRegDestructiveHelper) \ + V(Sqdmlalbt, sqdmlalbt, FourRegDestructiveHelper) \ + V(Sqdmlalt, sqdmlalt, FourRegDestructiveHelper) \ + V(Sqdmlslb, sqdmlslb, FourRegDestructiveHelper) \ + V(Sqdmlslbt, sqdmlslbt, FourRegDestructiveHelper) \ + V(Sqdmlslt, sqdmlslt, FourRegDestructiveHelper) \ + V(Sqrdmlah, sqrdmlah, FourRegDestructiveHelper) \ + V(Sqrdmlsh, sqrdmlsh, FourRegDestructiveHelper) \ + V(Fmmla, fmmla, FourRegDestructiveHelper) \ + V(Smmla, smmla, FourRegDestructiveHelper) \ + V(Ummla, ummla, FourRegDestructiveHelper) \ + V(Usmmla, usmmla, FourRegDestructiveHelper) \ + V(Usdot, usdot, FourRegDestructiveHelper) + +#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \ + void MacroAssembler::MASMFN(const ZRegister& zd, \ + const ZRegister& za, \ + const ZRegister& zn, \ + const ZRegister& zm) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + HELPER(&Assembler::ASMFN, zd, za, zn, zm); \ + } +VIXL_SVE_4REG_LIST(VIXL_DEFINE_MASM_FUNC) +#undef VIXL_DEFINE_MASM_FUNC + +#define VIXL_SVE_4REG_1IMM_LIST(V) \ + V(Fmla, fmla, FourRegOneImmDestructiveHelper) \ + V(Fmls, fmls, FourRegOneImmDestructiveHelper) \ + V(Fmlalb, fmlalb, FourRegOneImmDestructiveHelper) \ + V(Fmlalt, fmlalt, FourRegOneImmDestructiveHelper) \ + V(Fmlslb, fmlslb, FourRegOneImmDestructiveHelper) \ + V(Fmlslt, fmlslt, FourRegOneImmDestructiveHelper) \ + V(Mla, mla, FourRegOneImmDestructiveHelper) \ + V(Mls, mls, FourRegOneImmDestructiveHelper) \ + V(Smlalb, smlalb, FourRegOneImmDestructiveHelper) \ + V(Smlalt, smlalt, FourRegOneImmDestructiveHelper) \ + V(Smlslb, smlslb, FourRegOneImmDestructiveHelper) \ + V(Smlslt, smlslt, FourRegOneImmDestructiveHelper) \ + V(Sqdmlalb, sqdmlalb, FourRegOneImmDestructiveHelper) \ + V(Sqdmlalt, sqdmlalt, FourRegOneImmDestructiveHelper) \ + V(Sqdmlslb, sqdmlslb, FourRegOneImmDestructiveHelper) \ + V(Sqdmlslt, sqdmlslt, FourRegOneImmDestructiveHelper) \ + V(Sqrdmlah, sqrdmlah, FourRegOneImmDestructiveHelper) \ + V(Sqrdmlsh, sqrdmlsh, FourRegOneImmDestructiveHelper) \ + V(Umlalb, umlalb, FourRegOneImmDestructiveHelper) \ + V(Umlalt, umlalt, FourRegOneImmDestructiveHelper) \ + V(Umlslb, umlslb, FourRegOneImmDestructiveHelper) \ + V(Umlslt, umlslt, FourRegOneImmDestructiveHelper) + +#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \ + void MacroAssembler::MASMFN(const ZRegister& zd, \ + const ZRegister& za, \ + const ZRegister& zn, \ + const ZRegister& zm, \ + int imm) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + HELPER(&Assembler::ASMFN, zd, za, zn, zm, imm); \ + } +VIXL_SVE_4REG_1IMM_LIST(VIXL_DEFINE_MASM_FUNC) +#undef VIXL_DEFINE_MASM_FUNC + void MacroAssembler::Sdot(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, - const ZRegister& zm) { + const ZRegister& zm, + int index) { VIXL_ASSERT(allow_macro_instructions_); - SVESdotUdotHelper(&Assembler::sdot, zd, za, zn, zm); + SVEDotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index); } -void MacroAssembler::Sdot(const ZRegister& zd, +void MacroAssembler::Udot(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, int index) { VIXL_ASSERT(allow_macro_instructions_); - SVESdotUdotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index); + SVEDotIndexHelper(&Assembler::udot, zd, za, zn, zm, index); } -void MacroAssembler::Udot(const ZRegister& zd, +void MacroAssembler::Sudot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SVEDotIndexHelper(&Assembler::sudot, zd, za, zn, zm, index); +} + +void MacroAssembler::Usdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SVEDotIndexHelper(&Assembler::usdot, zd, za, zn, zm, index); +} + +void MacroAssembler::Cdot(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SVESdotUdotHelper(&Assembler::udot, zd, za, zn, zm); + const ZRegister& zm, + int index, + int rot) { + // This doesn't handle zm when it's out of the range that can be encoded in + // instruction. The range depends on element size: z0-z7 for B, z0-15 for H. + if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { + UseScratchRegisterScope temps(this); + ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, ztmp, za); + cdot(ztmp, zn, zm, index, rot); + } + Mov(zd, ztmp); + } else { + MovprfxHelperScope guard(this, zd, za); + cdot(zd, zn, zm, index, rot); + } } -void MacroAssembler::Udot(const ZRegister& zd, +void MacroAssembler::Cdot(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, - int index) { - VIXL_ASSERT(allow_macro_instructions_); - SVESdotUdotIndexHelper(&Assembler::udot, zd, za, zn, zm, index); + int rot) { + if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { + UseScratchRegisterScope temps(this); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn); + Mov(ztmp, zd.Aliases(zn) ? zn : zm); + MovprfxHelperScope guard(this, zd, za); + cdot(zd, (zd.Aliases(zn) ? ztmp : zn), (zd.Aliases(zm) ? ztmp : zm), rot); + } else { + MovprfxHelperScope guard(this, zd, za); + cdot(zd, zn, zm, rot); + } } void MacroAssembler::FPMulAddHelper(const ZRegister& zd, @@ -1792,35 +2014,6 @@ void MacroAssembler::FPMulAddHelper(const ZRegister& zd, } } -void MacroAssembler::FPMulAddIndexHelper(SVEMulAddIndexFn fn, - const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index) { - if (zd.Aliases(za)) { - // zda = zda + (zn * zm[i]) - SingleEmissionCheckScope guard(this); - (this->*fn)(zd, zn, zm, index); - - } else if (zd.Aliases(zn) || zd.Aliases(zm)) { - // zdn = za + (zdn * zm[i]) - // zdm = za + (zn * zdm[i]) - // zdnm = za + (zdnm * zdnm[i]) - UseScratchRegisterScope temps(this); - ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); - { - MovprfxHelperScope guard(this, scratch, za); - (this->*fn)(scratch, zn, zm, index); - } - Mov(zd, scratch); - } else { - // zd = za + (zn * zm[i]) - MovprfxHelperScope guard(this, zd, za); - (this->*fn)(zd, zn, zm, index); - } -} - void MacroAssembler::Fmla(const ZRegister& zd, const PRegisterM& pg, const ZRegister& za, @@ -1838,15 +2031,6 @@ void MacroAssembler::Fmla(const ZRegister& zd, nan_option); } -void MacroAssembler::Fmla(const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index) { - VIXL_ASSERT(allow_macro_instructions_); - FPMulAddIndexHelper(&Assembler::fmla, zd, za, zn, zm, index); -} - void MacroAssembler::Fmls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& za, @@ -1864,15 +2048,6 @@ void MacroAssembler::Fmls(const ZRegister& zd, nan_option); } -void MacroAssembler::Fmls(const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index) { - VIXL_ASSERT(allow_macro_instructions_); - FPMulAddIndexHelper(&Assembler::fmls, zd, za, zn, zm, index); -} - void MacroAssembler::Fnmla(const ZRegister& zd, const PRegisterM& pg, const ZRegister& za, @@ -1944,25 +2119,24 @@ void MacroAssembler::Fcadd(const ZRegister& zd, } } -void MacroAssembler::Ext(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm, - unsigned offset) { +void MacroAssembler::Fcmla(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int rot) { VIXL_ASSERT(allow_macro_instructions_); - if (zd.Aliases(zm) && !zd.Aliases(zn)) { - // zd = ext(zn, zd, offset) + if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { UseScratchRegisterScope temps(this); - ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd); { - MovprfxHelperScope guard(this, scratch, zn); - ext(scratch, scratch, zm, offset); + MovprfxHelperScope guard(this, ztmp, za); + fcmla(ztmp, pg, zn, zm, rot); } - Mov(zd, scratch); + Mov(zd, pg, ztmp); } else { - // zd = ext(zn, zm, offset) - // zd = ext(zd, zd, offset) - MovprfxHelperScope guard(this, zd, zn); - ext(zd, zd, zm, offset); + MovprfxHelperScope guard(this, zd, pg, za); + fcmla(zd, pg, zn, zm, rot); } } @@ -1971,7 +2145,10 @@ void MacroAssembler::Splice(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { VIXL_ASSERT(allow_macro_instructions_); - if (zd.Aliases(zm) && !zd.Aliases(zn)) { + if (CPUHas(CPUFeatures::kSVE2) && AreConsecutive(zn, zm) && !zd.Aliases(zn)) { + SingleEmissionCheckScope guard(this); + splice(zd, pg, zn, zm); + } else if (zd.Aliases(zm) && !zd.Aliases(zn)) { UseScratchRegisterScope temps(this); ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); { @@ -2023,5 +2200,87 @@ void MacroAssembler::Clastb(const ZRegister& zd, } } +void MacroAssembler::ShiftRightAccumulate(IntArithImmFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + if (!zd.Aliases(za) && zd.Aliases(zn)) { + UseScratchRegisterScope temps(this); + ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn); + Mov(ztmp, zn); + { + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, ztmp, shift); + } + } else { + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, zn, shift); + } +} + +void MacroAssembler::Srsra(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift) { + ShiftRightAccumulate(&Assembler::srsra, zd, za, zn, shift); +} + +void MacroAssembler::Ssra(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift) { + ShiftRightAccumulate(&Assembler::ssra, zd, za, zn, shift); +} + +void MacroAssembler::Ursra(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift) { + ShiftRightAccumulate(&Assembler::ursra, zd, za, zn, shift); +} + +void MacroAssembler::Usra(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift) { + ShiftRightAccumulate(&Assembler::usra, zd, za, zn, shift); +} + +void MacroAssembler::ComplexAddition(ZZZImmFn fn, + const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + VIXL_ASSERT(allow_macro_instructions_); + if (!zd.Aliases(zn) && zd.Aliases(zm)) { + UseScratchRegisterScope temps(this); + ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zm); + Mov(ztmp, zm); + { + MovprfxHelperScope guard(this, zd, zn); + (this->*fn)(zd, zd, ztmp, rot); + } + } else { + MovprfxHelperScope guard(this, zd, zn); + (this->*fn)(zd, zd, zm, rot); + } +} + +void MacroAssembler::Cadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + ComplexAddition(&Assembler::cadd, zd, zn, zm, rot); +} + +void MacroAssembler::Sqcadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + ComplexAddition(&Assembler::sqcadd, zd, zn, zm, rot); +} + } // namespace aarch64 } // namespace vixl diff --git a/src/aarch64/operands-aarch64.cc b/src/aarch64/operands-aarch64.cc index 008179e4..8db129c9 100644 --- a/src/aarch64/operands-aarch64.cc +++ b/src/aarch64/operands-aarch64.cc @@ -360,12 +360,16 @@ bool MemOperand::IsRegisterOffset() const { return (addrmode_ == Offset) && !regoffset_.Is(NoReg); } - bool MemOperand::IsPreIndex() const { return addrmode_ == PreIndex; } - - bool MemOperand::IsPostIndex() const { return addrmode_ == PostIndex; } +bool MemOperand::IsImmediatePreIndex() const { + return IsPreIndex() && regoffset_.Is(NoReg); +} + +bool MemOperand::IsImmediatePostIndex() const { + return IsPostIndex() && regoffset_.Is(NoReg); +} void MemOperand::AddOffset(int64_t offset) { VIXL_ASSERT(IsImmediateOffset()); @@ -382,6 +386,7 @@ bool SVEMemOperand::IsValid() const { if (IsScalarPlusScalar()) count++; if (IsScalarPlusVector()) count++; if (IsVectorPlusImmediate()) count++; + if (IsVectorPlusScalar()) count++; if (IsVectorPlusVector()) count++; VIXL_ASSERT(count <= 1); } @@ -406,7 +411,7 @@ bool SVEMemOperand::IsValid() const { return IsScalarPlusImmediate() || IsScalarPlusScalar() || IsScalarPlusVector() || IsVectorPlusImmediate() || - IsVectorPlusVector(); + IsVectorPlusScalar() || IsVectorPlusVector(); } diff --git a/src/aarch64/operands-aarch64.h b/src/aarch64/operands-aarch64.h index ad03a9ee..08ee4a61 100644 --- a/src/aarch64/operands-aarch64.h +++ b/src/aarch64/operands-aarch64.h @@ -434,9 +434,14 @@ class MemOperand { bool IsImmediateOffset() const; // True for register-offset (but not indexed) MemOperands. bool IsRegisterOffset() const; - + // True for immediate or register pre-indexed MemOperands. bool IsPreIndex() const; + // True for immediate or register post-indexed MemOperands. bool IsPostIndex() const; + // True for immediate pre-indexed MemOperands, [reg, #imm]! + bool IsImmediatePreIndex() const; + // True for immediate post-indexed MemOperands, [reg], #imm + bool IsImmediatePostIndex() const; void AddOffset(int64_t offset); @@ -545,6 +550,17 @@ class SVEMemOperand { VIXL_ASSERT(IsValid()); } + // "vector-plus-scalar", like [z0.d, x0] + SVEMemOperand(ZRegister base, Register offset) + : base_(base), + regoffset_(offset), + offset_(0), + mod_(NO_SVE_OFFSET_MODIFIER), + shift_amount_(0) { + VIXL_ASSERT(IsValid()); + VIXL_ASSERT(IsVectorPlusScalar()); + } + // "vector-plus-vector", like [z0.d, z1.d, UXTW] template <typename M = SVEOffsetModifier> SVEMemOperand(ZRegister base, @@ -603,6 +619,11 @@ class SVEMemOperand { regoffset_.IsNone() && (mod_ == NO_SVE_OFFSET_MODIFIER); } + bool IsVectorPlusScalar() const { + return base_.IsZRegister() && regoffset_.IsX() && + (base_.IsLaneSizeS() || base_.IsLaneSizeD()); + } + bool IsVectorPlusVector() const { return base_.IsZRegister() && regoffset_.IsZRegister() && (offset_ == 0) && AreSameFormat(base_, regoffset_) && diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc index 04f1165d..d183dc35 100644 --- a/src/aarch64/simulator-aarch64.cc +++ b/src/aarch64/simulator-aarch64.cc @@ -67,9 +67,352 @@ SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) { } +const Simulator::FormToVisitorFnMap* Simulator::GetFormToVisitorFnMap() { + static const FormToVisitorFnMap form_to_visitor = { + DEFAULT_FORM_TO_VISITOR_MAP(Simulator), + SIM_AUD_VISITOR_MAP(Simulator), + {"smlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong}, + {"smlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong}, + {"smull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong}, + {"sqdmlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong}, + {"sqdmlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong}, + {"sqdmull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong}, + {"umlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong}, + {"umlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong}, + {"umull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong}, + {"fcmla_asimdelem_c_h"_h, &Simulator::SimulateNEONComplexMulByElement}, + {"fcmla_asimdelem_c_s"_h, &Simulator::SimulateNEONComplexMulByElement}, + {"fmlal2_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong}, + {"fmlal_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong}, + {"fmlsl2_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong}, + {"fmlsl_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong}, + {"fmla_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement}, + {"fmls_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement}, + {"fmulx_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement}, + {"fmul_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement}, + {"fmla_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement}, + {"fmls_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement}, + {"fmulx_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement}, + {"fmul_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement}, + {"sdot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement}, + {"udot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement}, + {"adclb_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry}, + {"adclt_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry}, + {"addhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh}, + {"addhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh}, + {"addp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair}, + {"bcax_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary}, + {"bdep_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT}, + {"bext_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT}, + {"bgrp_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT}, + {"bsl1n_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary}, + {"bsl2n_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary}, + {"bsl_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary}, + {"cadd_z_zz"_h, &Simulator::Simulate_ZdnT_ZdnT_ZmT_const}, + {"cdot_z_zzz"_h, &Simulator::SimulateSVEComplexDotProduct}, + {"cdot_z_zzzi_d"_h, &Simulator::SimulateSVEComplexDotProduct}, + {"cdot_z_zzzi_s"_h, &Simulator::SimulateSVEComplexDotProduct}, + {"cmla_z_zzz"_h, &Simulator::SimulateSVEComplexIntMulAdd}, + {"cmla_z_zzzi_h"_h, &Simulator::SimulateSVEComplexIntMulAdd}, + {"cmla_z_zzzi_s"_h, &Simulator::SimulateSVEComplexIntMulAdd}, + {"eor3_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary}, + {"eorbt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT}, + {"eortb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT}, + {"ext_z_zi_con"_h, &Simulator::Simulate_ZdB_Zn1B_Zn2B_imm}, + {"faddp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT}, + {"fcvtlt_z_p_z_h2s"_h, &Simulator::SimulateSVEFPConvertLong}, + {"fcvtlt_z_p_z_s2d"_h, &Simulator::SimulateSVEFPConvertLong}, + {"fcvtnt_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD}, + {"fcvtnt_z_p_z_s2h"_h, &Simulator::Simulate_ZdH_PgM_ZnS}, + {"fcvtx_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD}, + {"fcvtxnt_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD}, + {"flogb_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT}, + {"fmaxnmp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT}, + {"fmaxp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT}, + {"fminnmp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT}, + {"fminp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT}, + {"fmlalb_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH}, + {"fmlalb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm}, + {"fmlalt_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH}, + {"fmlalt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm}, + {"fmlslb_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH}, + {"fmlslb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm}, + {"fmlslt_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH}, + {"fmlslt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm}, + {"histcnt_z_p_zz"_h, &Simulator::Simulate_ZdT_PgZ_ZnT_ZmT}, + {"histseg_z_zz"_h, &Simulator::Simulate_ZdB_ZnB_ZmB}, + {"ldnt1b_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm}, + {"ldnt1b_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm}, + {"ldnt1d_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm}, + {"ldnt1h_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm}, + {"ldnt1h_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm}, + {"ldnt1sb_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm}, + {"ldnt1sb_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm}, + {"ldnt1sh_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm}, + {"ldnt1sh_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm}, + {"ldnt1sw_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm}, + {"ldnt1w_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm}, + {"ldnt1w_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm}, + {"match_p_p_zz"_h, &Simulator::Simulate_PdT_PgZ_ZnT_ZmT}, + {"mla_z_zzzi_d"_h, &Simulator::SimulateSVEMlaMlsIndex}, + {"mla_z_zzzi_h"_h, &Simulator::SimulateSVEMlaMlsIndex}, + {"mla_z_zzzi_s"_h, &Simulator::SimulateSVEMlaMlsIndex}, + {"mls_z_zzzi_d"_h, &Simulator::SimulateSVEMlaMlsIndex}, + {"mls_z_zzzi_h"_h, &Simulator::SimulateSVEMlaMlsIndex}, + {"mls_z_zzzi_s"_h, &Simulator::SimulateSVEMlaMlsIndex}, + {"mul_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT}, + {"mul_z_zzi_d"_h, &Simulator::SimulateSVEMulIndex}, + {"mul_z_zzi_h"_h, &Simulator::SimulateSVEMulIndex}, + {"mul_z_zzi_s"_h, &Simulator::SimulateSVEMulIndex}, + {"nbsl_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary}, + {"nmatch_p_p_zz"_h, &Simulator::Simulate_PdT_PgZ_ZnT_ZmT}, + {"pmul_z_zz"_h, &Simulator::Simulate_ZdB_ZnB_ZmB}, + {"pmullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec}, + {"pmullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec}, + {"raddhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh}, + {"raddhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh}, + {"rshrnb_z_zi"_h, &Simulator::SimulateSVENarrow}, + {"rshrnt_z_zi"_h, &Simulator::SimulateSVENarrow}, + {"rsubhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh}, + {"rsubhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh}, + {"saba_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnT_ZmT}, + {"sabalb_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"sabalt_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"sabdlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"sabdlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"sadalp_z_p_z"_h, &Simulator::Simulate_ZdaT_PgM_ZnTb}, + {"saddlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"saddlbt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"saddlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"saddwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb}, + {"saddwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb}, + {"sbclb_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry}, + {"sbclt_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry}, + {"shadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub}, + {"shrnb_z_zi"_h, &Simulator::SimulateSVENarrow}, + {"shrnt_z_zi"_h, &Simulator::SimulateSVENarrow}, + {"shsub_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub}, + {"shsubr_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub}, + {"sli_z_zzi"_h, &Simulator::Simulate_ZdT_ZnT_const}, + {"smaxp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair}, + {"sminp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair}, + {"smlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, + {"smlalb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"smlalb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"smlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, + {"smlalt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"smlalt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"smlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, + {"smlslb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"smlslb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"smlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, + {"smlslt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"smlslt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"smulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT}, + {"smullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec}, + {"smullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"smullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"smullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec}, + {"smullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"smullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"splice_z_p_zz_con"_h, &Simulator::VisitSVEVectorSplice}, + {"sqabs_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT}, + {"sqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic}, + {"sqcadd_z_zz"_h, &Simulator::Simulate_ZdnT_ZdnT_ZmT_const}, + {"sqdmlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, + {"sqdmlalb_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm}, + {"sqdmlalb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm}, + {"sqdmlalbt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, + {"sqdmlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, + {"sqdmlalt_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm}, + {"sqdmlalt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm}, + {"sqdmlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, + {"sqdmlslb_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm}, + {"sqdmlslb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm}, + {"sqdmlslbt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, + {"sqdmlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, + {"sqdmlslt_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm}, + {"sqdmlslt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm}, + {"sqdmulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT}, + {"sqdmulh_z_zzi_d"_h, &Simulator::SimulateSVESaturatingMulHighIndex}, + {"sqdmulh_z_zzi_h"_h, &Simulator::SimulateSVESaturatingMulHighIndex}, + {"sqdmulh_z_zzi_s"_h, &Simulator::SimulateSVESaturatingMulHighIndex}, + {"sqdmullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec}, + {"sqdmullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"sqdmullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"sqdmullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec}, + {"sqdmullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"sqdmullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"sqneg_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT}, + {"sqrdcmlah_z_zzz"_h, &Simulator::SimulateSVEComplexIntMulAdd}, + {"sqrdcmlah_z_zzzi_h"_h, &Simulator::SimulateSVEComplexIntMulAdd}, + {"sqrdcmlah_z_zzzi_s"_h, &Simulator::SimulateSVEComplexIntMulAdd}, + {"sqrdmlah_z_zzz"_h, &Simulator::SimulateSVESaturatingMulAddHigh}, + {"sqrdmlah_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingMulAddHigh}, + {"sqrdmlah_z_zzzi_h"_h, &Simulator::SimulateSVESaturatingMulAddHigh}, + {"sqrdmlah_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingMulAddHigh}, + {"sqrdmlsh_z_zzz"_h, &Simulator::SimulateSVESaturatingMulAddHigh}, + {"sqrdmlsh_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingMulAddHigh}, + {"sqrdmlsh_z_zzzi_h"_h, &Simulator::SimulateSVESaturatingMulAddHigh}, + {"sqrdmlsh_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingMulAddHigh}, + {"sqrdmulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT}, + {"sqrdmulh_z_zzi_d"_h, &Simulator::SimulateSVESaturatingMulHighIndex}, + {"sqrdmulh_z_zzi_h"_h, &Simulator::SimulateSVESaturatingMulHighIndex}, + {"sqrdmulh_z_zzi_s"_h, &Simulator::SimulateSVESaturatingMulHighIndex}, + {"sqrshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated}, + {"sqrshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated}, + {"sqrshrnb_z_zi"_h, &Simulator::SimulateSVENarrow}, + {"sqrshrnt_z_zi"_h, &Simulator::SimulateSVENarrow}, + {"sqrshrunb_z_zi"_h, &Simulator::SimulateSVENarrow}, + {"sqrshrunt_z_zi"_h, &Simulator::SimulateSVENarrow}, + {"sqshl_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const}, + {"sqshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated}, + {"sqshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated}, + {"sqshlu_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const}, + {"sqshrnb_z_zi"_h, &Simulator::SimulateSVENarrow}, + {"sqshrnt_z_zi"_h, &Simulator::SimulateSVENarrow}, + {"sqshrunb_z_zi"_h, &Simulator::SimulateSVENarrow}, + {"sqshrunt_z_zi"_h, &Simulator::SimulateSVENarrow}, + {"sqsub_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic}, + {"sqsubr_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic}, + {"sqxtnb_z_zz"_h, &Simulator::SimulateSVENarrow}, + {"sqxtnt_z_zz"_h, &Simulator::SimulateSVENarrow}, + {"sqxtunb_z_zz"_h, &Simulator::SimulateSVENarrow}, + {"sqxtunt_z_zz"_h, &Simulator::SimulateSVENarrow}, + {"srhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub}, + {"sri_z_zzi"_h, &Simulator::Simulate_ZdT_ZnT_const}, + {"srshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated}, + {"srshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated}, + {"srshr_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const}, + {"srsra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const}, + {"sshllb_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm}, + {"sshllt_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm}, + {"ssra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const}, + {"ssublb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"ssublbt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"ssublt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"ssubltb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"ssubwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb}, + {"ssubwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb}, + {"stnt1b_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm}, + {"stnt1b_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm}, + {"stnt1d_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm}, + {"stnt1h_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm}, + {"stnt1h_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm}, + {"stnt1w_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm}, + {"stnt1w_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm}, + {"subhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh}, + {"subhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh}, + {"suqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic}, + {"tbl_z_zz_2"_h, &Simulator::VisitSVETableLookup}, + {"tbx_z_zz"_h, &Simulator::VisitSVETableLookup}, + {"uaba_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnT_ZmT}, + {"uabalb_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"uabalt_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"uabdlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"uabdlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"uadalp_z_p_z"_h, &Simulator::Simulate_ZdaT_PgM_ZnTb}, + {"uaddlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"uaddlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"uaddwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb}, + {"uaddwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb}, + {"uhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub}, + {"uhsub_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub}, + {"uhsubr_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub}, + {"umaxp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair}, + {"uminp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair}, + {"umlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, + {"umlalb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"umlalb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"umlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, + {"umlalt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"umlalt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"umlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, + {"umlslb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"umlslb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"umlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, + {"umlslt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"umlslt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"umulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT}, + {"umullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec}, + {"umullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"umullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"umullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec}, + {"umullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"umullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"uqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic}, + {"uqrshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated}, + {"uqrshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated}, + {"uqrshrnb_z_zi"_h, &Simulator::SimulateSVENarrow}, + {"uqrshrnt_z_zi"_h, &Simulator::SimulateSVENarrow}, + {"uqshl_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const}, + {"uqshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated}, + {"uqshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated}, + {"uqshrnb_z_zi"_h, &Simulator::SimulateSVENarrow}, + {"uqshrnt_z_zi"_h, &Simulator::SimulateSVENarrow}, + {"uqsub_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic}, + {"uqsubr_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic}, + {"uqxtnb_z_zz"_h, &Simulator::SimulateSVENarrow}, + {"uqxtnt_z_zz"_h, &Simulator::SimulateSVENarrow}, + {"urecpe_z_p_z"_h, &Simulator::Simulate_ZdS_PgM_ZnS}, + {"urhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub}, + {"urshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated}, + {"urshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated}, + {"urshr_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const}, + {"ursqrte_z_p_z"_h, &Simulator::Simulate_ZdS_PgM_ZnS}, + {"ursra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const}, + {"ushllb_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm}, + {"ushllt_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm}, + {"usqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic}, + {"usra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const}, + {"usublb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"usublt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong}, + {"usubwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb}, + {"usubwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb}, + {"whilege_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit}, + {"whilegt_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit}, + {"whilehi_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit}, + {"whilehs_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit}, + {"whilerw_p_rr"_h, &Simulator::Simulate_PdT_Xn_Xm}, + {"whilewr_p_rr"_h, &Simulator::Simulate_PdT_Xn_Xm}, + {"xar_z_zzi"_h, &Simulator::SimulateSVEExclusiveOrRotate}, + {"smmla_z_zzz"_h, &Simulator::SimulateMatrixMul}, + {"ummla_z_zzz"_h, &Simulator::SimulateMatrixMul}, + {"usmmla_z_zzz"_h, &Simulator::SimulateMatrixMul}, + {"smmla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul}, + {"ummla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul}, + {"usmmla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul}, + {"fmmla_z_zzz_s"_h, &Simulator::SimulateSVEFPMatrixMul}, + {"fmmla_z_zzz_d"_h, &Simulator::SimulateSVEFPMatrixMul}, + {"ld1row_z_p_bi_u32"_h, + &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, + {"ld1row_z_p_br_contiguous"_h, + &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, + {"ld1rod_z_p_bi_u64"_h, + &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, + {"ld1rod_z_p_br_contiguous"_h, + &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, + {"ld1rob_z_p_bi_u8"_h, + &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, + {"ld1rob_z_p_br_contiguous"_h, + &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, + {"ld1roh_z_p_bi_u16"_h, + &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, + {"ld1roh_z_p_br_contiguous"_h, + &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, + {"usdot_z_zzz_s"_h, &Simulator::VisitSVEIntMulAddUnpredicated}, + {"sudot_z_zzzi_s"_h, &Simulator::VisitSVEMulIndex}, + {"usdot_z_zzzi_s"_h, &Simulator::VisitSVEMulIndex}, + {"usdot_asimdsame2_d"_h, &Simulator::VisitNEON3SameExtra}, + {"sudot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement}, + {"usdot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement}, + }; + return &form_to_visitor; +} + Simulator::Simulator(Decoder* decoder, FILE* stream, SimStack::Allocated stack) : memory_(std::move(stack)), - movprfx_(NULL), + last_instr_(NULL), cpu_features_auditor_(decoder, CPUFeatures::All()) { // Ensure that shift operations act as the simulator expects. VIXL_ASSERT((static_cast<int32_t>(-1) >> 1) == -1); @@ -440,13 +783,29 @@ void Simulator::SetTraceParameters(int parameters) { } } - // Helpers --------------------------------------------------------------------- uint64_t Simulator::AddWithCarry(unsigned reg_size, bool set_flags, uint64_t left, uint64_t right, int carry_in) { + std::pair<uint64_t, uint8_t> result_and_flags = + AddWithCarry(reg_size, left, right, carry_in); + if (set_flags) { + uint8_t flags = result_and_flags.second; + ReadNzcv().SetN((flags >> 3) & 1); + ReadNzcv().SetZ((flags >> 2) & 1); + ReadNzcv().SetC((flags >> 1) & 1); + ReadNzcv().SetV((flags >> 0) & 1); + LogSystemRegister(NZCV); + } + return result_and_flags.first; +} + +std::pair<uint64_t, uint8_t> Simulator::AddWithCarry(unsigned reg_size, + uint64_t left, + uint64_t right, + int carry_in) { VIXL_ASSERT((carry_in == 0) || (carry_in == 1)); VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize)); @@ -458,28 +817,74 @@ uint64_t Simulator::AddWithCarry(unsigned reg_size, right &= reg_mask; uint64_t result = (left + right + carry_in) & reg_mask; - if (set_flags) { - ReadNzcv().SetN(CalcNFlag(result, reg_size)); - ReadNzcv().SetZ(CalcZFlag(result)); + // NZCV bits, ordered N in bit 3 to V in bit 0. + uint8_t nzcv = CalcNFlag(result, reg_size) ? 8 : 0; + nzcv |= CalcZFlag(result) ? 4 : 0; - // Compute the C flag by comparing the result to the max unsigned integer. - uint64_t max_uint_2op = max_uint - carry_in; - bool C = (left > max_uint_2op) || ((max_uint_2op - left) < right); - ReadNzcv().SetC(C ? 1 : 0); + // Compute the C flag by comparing the result to the max unsigned integer. + uint64_t max_uint_2op = max_uint - carry_in; + bool C = (left > max_uint_2op) || ((max_uint_2op - left) < right); + nzcv |= C ? 2 : 0; - // Overflow iff the sign bit is the same for the two inputs and different - // for the result. - uint64_t left_sign = left & sign_mask; - uint64_t right_sign = right & sign_mask; - uint64_t result_sign = result & sign_mask; - bool V = (left_sign == right_sign) && (left_sign != result_sign); - ReadNzcv().SetV(V ? 1 : 0); + // Overflow iff the sign bit is the same for the two inputs and different + // for the result. + uint64_t left_sign = left & sign_mask; + uint64_t right_sign = right & sign_mask; + uint64_t result_sign = result & sign_mask; + bool V = (left_sign == right_sign) && (left_sign != result_sign); + nzcv |= V ? 1 : 0; - LogSystemRegister(NZCV); - } - return result; + return std::make_pair(result, nzcv); +} + +using vixl_uint128_t = std::pair<uint64_t, uint64_t>; + +vixl_uint128_t Simulator::Add128(vixl_uint128_t x, vixl_uint128_t y) { + std::pair<uint64_t, uint8_t> sum_lo = + AddWithCarry(kXRegSize, x.second, y.second, 0); + int carry_in = (sum_lo.second & 0x2) >> 1; // C flag in NZCV result. + std::pair<uint64_t, uint8_t> sum_hi = + AddWithCarry(kXRegSize, x.first, y.first, carry_in); + return std::make_pair(sum_hi.first, sum_lo.first); +} + +vixl_uint128_t Simulator::Neg128(vixl_uint128_t x) { + // Negate the integer value. Throw an assertion when the input is INT128_MIN. + VIXL_ASSERT((x.first != GetSignMask(64)) || (x.second != 0)); + x.first = ~x.first; + x.second = ~x.second; + return Add128(x, {0, 1}); } +vixl_uint128_t Simulator::Mul64(uint64_t x, uint64_t y) { + bool neg_result = false; + if ((x >> 63) == 1) { + x = -x; + neg_result = !neg_result; + } + if ((y >> 63) == 1) { + y = -y; + neg_result = !neg_result; + } + + uint64_t x_lo = x & 0xffffffff; + uint64_t x_hi = x >> 32; + uint64_t y_lo = y & 0xffffffff; + uint64_t y_hi = y >> 32; + + uint64_t t1 = x_lo * y_hi; + uint64_t t2 = x_hi * y_lo; + vixl_uint128_t a = std::make_pair(0, x_lo * y_lo); + vixl_uint128_t b = std::make_pair(t1 >> 32, t1 << 32); + vixl_uint128_t c = std::make_pair(t2 >> 32, t2 << 32); + vixl_uint128_t d = std::make_pair(x_hi * y_hi, 0); + + vixl_uint128_t result = Add128(a, b); + result = Add128(result, c); + result = Add128(result, d); + return neg_result ? std::make_pair(-result.first - 1, -result.second) + : result; +} int64_t Simulator::ShiftOperand(unsigned reg_size, uint64_t uvalue, @@ -1569,6 +1974,1564 @@ void Simulator::PrintTakenBranch(const Instruction* target) { // Visitors--------------------------------------------------------------------- +void Simulator::Visit(Metadata* metadata, const Instruction* instr) { + VIXL_ASSERT(metadata->count("form") > 0); + std::string form = (*metadata)["form"]; + form_hash_ = Hash(form.c_str()); + const FormToVisitorFnMap* fv = Simulator::GetFormToVisitorFnMap(); + FormToVisitorFnMap::const_iterator it = fv->find(form_hash_); + if (it == fv->end()) { + VisitUnimplemented(instr); + } else { + (it->second)(this, instr); + } +} + +void Simulator::Simulate_PdT_PgZ_ZnT_ZmT(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pd = ReadPRegister(instr->GetPd()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + switch (form_hash_) { + case "match_p_p_zz"_h: + match(vform, pd, zn, zm, /* negate_match = */ false); + break; + case "nmatch_p_p_zz"_h: + match(vform, pd, zn, zm, /* negate_match = */ true); + break; + default: + VIXL_UNIMPLEMENTED(); + } + mov_zeroing(pd, pg, pd); + PredTest(vform, pg, pd); +} + +void Simulator::Simulate_PdT_Xn_Xm(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pd = ReadPRegister(instr->GetPd()); + uint64_t src1 = ReadXRegister(instr->GetRn()); + uint64_t src2 = ReadXRegister(instr->GetRm()); + + uint64_t absdiff = (src1 > src2) ? (src1 - src2) : (src2 - src1); + absdiff >>= LaneSizeInBytesLog2FromFormat(vform); + + bool no_conflict = false; + switch (form_hash_) { + case "whilerw_p_rr"_h: + no_conflict = (absdiff == 0); + break; + case "whilewr_p_rr"_h: + no_conflict = (absdiff == 0) || (src2 <= src1); + break; + default: + VIXL_UNIMPLEMENTED(); + } + + LogicPRegister dst(pd); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetActive(vform, + i, + no_conflict || (static_cast<uint64_t>(i) < absdiff)); + } + + PredTest(vform, GetPTrue(), pd); +} + +void Simulator::Simulate_ZdB_Zn1B_Zn2B_imm(const Instruction* instr) { + VIXL_ASSERT(form_hash_ == "ext_z_zi_con"_h); + + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters); + + int index = instr->GetSVEExtractImmediate(); + int vl = GetVectorLengthInBytes(); + index = (index >= vl) ? 0 : index; + + ext(kFormatVnB, zd, zn, zn2, index); +} + +void Simulator::Simulate_ZdB_ZnB_ZmB(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + switch (form_hash_) { + case "histseg_z_zz"_h: + if (instr->GetSVEVectorFormat() == kFormatVnB) { + histogram(kFormatVnB, + zd, + GetPTrue(), + zn, + zm, + /* do_segmented = */ true); + } else { + VIXL_UNIMPLEMENTED(); + } + break; + case "pmul_z_zz"_h: + pmul(kFormatVnB, zd, zn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::SimulateSVEMulIndex(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + // The encoding for B and H-sized lanes are redefined to encode the most + // significant bit of index for H-sized lanes. B-sized lanes are not + // supported. + if (vform == kFormatVnB) vform = kFormatVnH; + + VIXL_ASSERT((form_hash_ == "mul_z_zzi_d"_h) || + (form_hash_ == "mul_z_zzi_h"_h) || + (form_hash_ == "mul_z_zzi_s"_h)); + + SimVRegister temp; + dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex()); + mul(vform, zd, zn, temp); +} + +void Simulator::SimulateSVEMlaMlsIndex(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zda = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + // The encoding for B and H-sized lanes are redefined to encode the most + // significant bit of index for H-sized lanes. B-sized lanes are not + // supported. + if (vform == kFormatVnB) vform = kFormatVnH; + + VIXL_ASSERT( + (form_hash_ == "mla_z_zzzi_d"_h) || (form_hash_ == "mla_z_zzzi_h"_h) || + (form_hash_ == "mla_z_zzzi_s"_h) || (form_hash_ == "mls_z_zzzi_d"_h) || + (form_hash_ == "mls_z_zzzi_h"_h) || (form_hash_ == "mls_z_zzzi_s"_h)); + + SimVRegister temp; + dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex()); + if (instr->ExtractBit(10) == 0) { + mla(vform, zda, zda, zn, temp); + } else { + mls(vform, zda, zda, zn, temp); + } +} + +void Simulator::SimulateSVESaturatingMulHighIndex(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + // The encoding for B and H-sized lanes are redefined to encode the most + // significant bit of index for H-sized lanes. B-sized lanes are not + // supported. + if (vform == kFormatVnB) { + vform = kFormatVnH; + } + + SimVRegister temp; + dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex()); + switch (form_hash_) { + case "sqdmulh_z_zzi_h"_h: + case "sqdmulh_z_zzi_s"_h: + case "sqdmulh_z_zzi_d"_h: + sqdmulh(vform, zd, zn, temp); + break; + case "sqrdmulh_z_zzi_h"_h: + case "sqrdmulh_z_zzi_s"_h: + case "sqrdmulh_z_zzi_d"_h: + sqrdmulh(vform, zd, zn, temp); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::SimulateSVESaturatingIntMulLongIdx(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + SimVRegister temp, zm_idx, zn_b, zn_t; + // Instead of calling the indexed form of the instruction logic, we call the + // vector form, which can reuse existing function logic without modification. + // Select the specified elements based on the index input and than pack them + // to the corresponding position. + VectorFormat vform_half = VectorFormatHalfWidth(vform); + dup_elements_to_segments(vform_half, temp, instr->GetSVEMulLongZmAndIndex()); + pack_even_elements(vform_half, zm_idx, temp); + + pack_even_elements(vform_half, zn_b, zn); + pack_odd_elements(vform_half, zn_t, zn); + + switch (form_hash_) { + case "smullb_z_zzi_s"_h: + case "smullb_z_zzi_d"_h: + smull(vform, zd, zn_b, zm_idx); + break; + case "smullt_z_zzi_s"_h: + case "smullt_z_zzi_d"_h: + smull(vform, zd, zn_t, zm_idx); + break; + case "sqdmullb_z_zzi_d"_h: + sqdmull(vform, zd, zn_b, zm_idx); + break; + case "sqdmullt_z_zzi_d"_h: + sqdmull(vform, zd, zn_t, zm_idx); + break; + case "umullb_z_zzi_s"_h: + case "umullb_z_zzi_d"_h: + umull(vform, zd, zn_b, zm_idx); + break; + case "umullt_z_zzi_s"_h: + case "umullt_z_zzi_d"_h: + umull(vform, zd, zn_t, zm_idx); + break; + case "sqdmullb_z_zzi_s"_h: + sqdmull(vform, zd, zn_b, zm_idx); + break; + case "sqdmullt_z_zzi_s"_h: + sqdmull(vform, zd, zn_t, zm_idx); + break; + case "smlalb_z_zzzi_s"_h: + case "smlalb_z_zzzi_d"_h: + smlal(vform, zd, zn_b, zm_idx); + break; + case "smlalt_z_zzzi_s"_h: + case "smlalt_z_zzzi_d"_h: + smlal(vform, zd, zn_t, zm_idx); + break; + case "smlslb_z_zzzi_s"_h: + case "smlslb_z_zzzi_d"_h: + smlsl(vform, zd, zn_b, zm_idx); + break; + case "smlslt_z_zzzi_s"_h: + case "smlslt_z_zzzi_d"_h: + smlsl(vform, zd, zn_t, zm_idx); + break; + case "umlalb_z_zzzi_s"_h: + case "umlalb_z_zzzi_d"_h: + umlal(vform, zd, zn_b, zm_idx); + break; + case "umlalt_z_zzzi_s"_h: + case "umlalt_z_zzzi_d"_h: + umlal(vform, zd, zn_t, zm_idx); + break; + case "umlslb_z_zzzi_s"_h: + case "umlslb_z_zzzi_d"_h: + umlsl(vform, zd, zn_b, zm_idx); + break; + case "umlslt_z_zzzi_s"_h: + case "umlslt_z_zzzi_d"_h: + umlsl(vform, zd, zn_t, zm_idx); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::Simulate_ZdH_PgM_ZnS(const Instruction* instr) { + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister result, zd_b; + + pack_even_elements(kFormatVnH, zd_b, zd); + + switch (form_hash_) { + case "fcvtnt_z_p_z_s2h"_h: + fcvt(kFormatVnH, kFormatVnS, result, pg, zn); + pack_even_elements(kFormatVnH, result, result); + zip1(kFormatVnH, result, zd_b, result); + break; + default: + VIXL_UNIMPLEMENTED(); + } + mov_merging(kFormatVnS, zd, pg, result); +} + +void Simulator::Simulate_ZdS_PgM_ZnD(const Instruction* instr) { + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister result, zero, zd_b; + + zero.Clear(); + pack_even_elements(kFormatVnS, zd_b, zd); + + switch (form_hash_) { + case "fcvtnt_z_p_z_d2s"_h: + fcvt(kFormatVnS, kFormatVnD, result, pg, zn); + pack_even_elements(kFormatVnS, result, result); + zip1(kFormatVnS, result, zd_b, result); + break; + case "fcvtx_z_p_z_d2s"_h: + fcvtxn(kFormatVnS, result, zn); + zip1(kFormatVnS, result, result, zero); + break; + case "fcvtxnt_z_p_z_d2s"_h: + fcvtxn(kFormatVnS, result, zn); + zip1(kFormatVnS, result, zd_b, result); + break; + default: + VIXL_UNIMPLEMENTED(); + } + mov_merging(kFormatVnD, zd, pg, result); +} + +void Simulator::SimulateSVEFPConvertLong(const Instruction* instr) { + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister result; + + switch (form_hash_) { + case "fcvtlt_z_p_z_h2s"_h: + ext(kFormatVnB, result, zn, zn, kHRegSizeInBytes); + fcvt(kFormatVnS, kFormatVnH, zd, pg, result); + break; + case "fcvtlt_z_p_z_s2d"_h: + ext(kFormatVnB, result, zn, zn, kSRegSizeInBytes); + fcvt(kFormatVnD, kFormatVnS, zd, pg, result); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::Simulate_ZdS_PgM_ZnS(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister result; + + if (vform != kFormatVnS) { + VIXL_UNIMPLEMENTED(); + } + + switch (form_hash_) { + case "urecpe_z_p_z"_h: + urecpe(vform, result, zn); + break; + case "ursqrte_z_p_z"_h: + ursqrte(vform, result, zn); + break; + default: + VIXL_UNIMPLEMENTED(); + } + mov_merging(vform, zd, pg, result); +} + +void Simulator::Simulate_ZdT_PgM_ZnT(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister result; + + switch (form_hash_) { + case "flogb_z_p_z"_h: + vform = instr->GetSVEVectorFormat(17); + flogb(vform, result, zn); + break; + case "sqabs_z_p_z"_h: + abs(vform, result, zn).SignedSaturate(vform); + break; + case "sqneg_z_p_z"_h: + neg(vform, result, zn).SignedSaturate(vform); + break; + default: + VIXL_UNIMPLEMENTED(); + } + mov_merging(vform, zd, pg, result); +} + +void Simulator::Simulate_ZdT_PgZ_ZnT_ZmT(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister result; + + VIXL_ASSERT(form_hash_ == "histcnt_z_p_zz"_h); + if ((vform == kFormatVnS) || (vform == kFormatVnD)) { + histogram(vform, result, pg, zn, zm); + mov_zeroing(vform, zd, pg, result); + } else { + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::Simulate_ZdT_ZnT_ZmT(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister result; + bool do_bext = false; + + switch (form_hash_) { + case "bdep_z_zz"_h: + bdep(vform, zd, zn, zm); + break; + case "bext_z_zz"_h: + do_bext = true; + VIXL_FALLTHROUGH(); + case "bgrp_z_zz"_h: + bgrp(vform, zd, zn, zm, do_bext); + break; + case "eorbt_z_zz"_h: + rotate_elements_right(vform, result, zm, 1); + SVEBitwiseLogicalUnpredicatedHelper(EOR, kFormatVnD, result, zn, result); + mov_alternating(vform, zd, result, 0); + break; + case "eortb_z_zz"_h: + rotate_elements_right(vform, result, zm, -1); + SVEBitwiseLogicalUnpredicatedHelper(EOR, kFormatVnD, result, zn, result); + mov_alternating(vform, zd, result, 1); + break; + case "mul_z_zz"_h: + mul(vform, zd, zn, zm); + break; + case "smulh_z_zz"_h: + smulh(vform, zd, zn, zm); + break; + case "sqdmulh_z_zz"_h: + sqdmulh(vform, zd, zn, zm); + break; + case "sqrdmulh_z_zz"_h: + sqrdmulh(vform, zd, zn, zm); + break; + case "umulh_z_zz"_h: + umulh(vform, zd, zn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::Simulate_ZdT_ZnT_ZmTb(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + SimVRegister zm_b, zm_t; + VectorFormat vform_half = VectorFormatHalfWidth(vform); + pack_even_elements(vform_half, zm_b, zm); + pack_odd_elements(vform_half, zm_t, zm); + + switch (form_hash_) { + case "saddwb_z_zz"_h: + saddw(vform, zd, zn, zm_b); + break; + case "saddwt_z_zz"_h: + saddw(vform, zd, zn, zm_t); + break; + case "ssubwb_z_zz"_h: + ssubw(vform, zd, zn, zm_b); + break; + case "ssubwt_z_zz"_h: + ssubw(vform, zd, zn, zm_t); + break; + case "uaddwb_z_zz"_h: + uaddw(vform, zd, zn, zm_b); + break; + case "uaddwt_z_zz"_h: + uaddw(vform, zd, zn, zm_t); + break; + case "usubwb_z_zz"_h: + usubw(vform, zd, zn, zm_b); + break; + case "usubwt_z_zz"_h: + usubw(vform, zd, zn, zm_t); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::Simulate_ZdT_ZnT_const(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false); + int lane_size = shift_and_lane_size.second; + VIXL_ASSERT((lane_size >= 0) && + (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2)); + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size); + int shift_dist = shift_and_lane_size.first; + + switch (form_hash_) { + case "sli_z_zzi"_h: + // Shift distance is computed differently for left shifts. Convert the + // result. + shift_dist = (8 << lane_size) - shift_dist; + sli(vform, zd, zn, shift_dist); + break; + case "sri_z_zzi"_h: + sri(vform, zd, zn, shift_dist); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::SimulateSVENarrow(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister result; + + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false); + int lane_size = shift_and_lane_size.second; + VIXL_ASSERT((lane_size >= static_cast<int>(kBRegSizeInBytesLog2)) && + (lane_size <= static_cast<int>(kSRegSizeInBytesLog2))); + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size); + int right_shift_dist = shift_and_lane_size.first; + bool top = false; + + switch (form_hash_) { + case "sqxtnt_z_zz"_h: + top = true; + VIXL_FALLTHROUGH(); + case "sqxtnb_z_zz"_h: + sqxtn(vform, result, zn); + break; + case "sqxtunt_z_zz"_h: + top = true; + VIXL_FALLTHROUGH(); + case "sqxtunb_z_zz"_h: + sqxtun(vform, result, zn); + break; + case "uqxtnt_z_zz"_h: + top = true; + VIXL_FALLTHROUGH(); + case "uqxtnb_z_zz"_h: + uqxtn(vform, result, zn); + break; + case "rshrnt_z_zi"_h: + top = true; + VIXL_FALLTHROUGH(); + case "rshrnb_z_zi"_h: + rshrn(vform, result, zn, right_shift_dist); + break; + case "shrnt_z_zi"_h: + top = true; + VIXL_FALLTHROUGH(); + case "shrnb_z_zi"_h: + shrn(vform, result, zn, right_shift_dist); + break; + case "sqrshrnt_z_zi"_h: + top = true; + VIXL_FALLTHROUGH(); + case "sqrshrnb_z_zi"_h: + sqrshrn(vform, result, zn, right_shift_dist); + break; + case "sqrshrunt_z_zi"_h: + top = true; + VIXL_FALLTHROUGH(); + case "sqrshrunb_z_zi"_h: + sqrshrun(vform, result, zn, right_shift_dist); + break; + case "sqshrnt_z_zi"_h: + top = true; + VIXL_FALLTHROUGH(); + case "sqshrnb_z_zi"_h: + sqshrn(vform, result, zn, right_shift_dist); + break; + case "sqshrunt_z_zi"_h: + top = true; + VIXL_FALLTHROUGH(); + case "sqshrunb_z_zi"_h: + sqshrun(vform, result, zn, right_shift_dist); + break; + case "uqrshrnt_z_zi"_h: + top = true; + VIXL_FALLTHROUGH(); + case "uqrshrnb_z_zi"_h: + uqrshrn(vform, result, zn, right_shift_dist); + break; + case "uqshrnt_z_zi"_h: + top = true; + VIXL_FALLTHROUGH(); + case "uqshrnb_z_zi"_h: + uqshrn(vform, result, zn, right_shift_dist); + break; + default: + VIXL_UNIMPLEMENTED(); + } + + if (top) { + // Keep even elements, replace odd elements with the results. + xtn(vform, zd, zd); + zip1(vform, zd, zd, result); + } else { + // Zero odd elements, replace even elements with the results. + SimVRegister zero; + zero.Clear(); + zip1(vform, zd, result, zero); + } +} + +void Simulator::SimulateSVEInterleavedArithLong(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister temp, zn_b, zm_b, zn_t, zm_t; + + // Construct temporary registers containing the even (bottom) and odd (top) + // elements. + VectorFormat vform_half = VectorFormatHalfWidth(vform); + pack_even_elements(vform_half, zn_b, zn); + pack_even_elements(vform_half, zm_b, zm); + pack_odd_elements(vform_half, zn_t, zn); + pack_odd_elements(vform_half, zm_t, zm); + + switch (form_hash_) { + case "sabdlb_z_zz"_h: + sabdl(vform, zd, zn_b, zm_b); + break; + case "sabdlt_z_zz"_h: + sabdl(vform, zd, zn_t, zm_t); + break; + case "saddlb_z_zz"_h: + saddl(vform, zd, zn_b, zm_b); + break; + case "saddlbt_z_zz"_h: + saddl(vform, zd, zn_b, zm_t); + break; + case "saddlt_z_zz"_h: + saddl(vform, zd, zn_t, zm_t); + break; + case "ssublb_z_zz"_h: + ssubl(vform, zd, zn_b, zm_b); + break; + case "ssublbt_z_zz"_h: + ssubl(vform, zd, zn_b, zm_t); + break; + case "ssublt_z_zz"_h: + ssubl(vform, zd, zn_t, zm_t); + break; + case "ssubltb_z_zz"_h: + ssubl(vform, zd, zn_t, zm_b); + break; + case "uabdlb_z_zz"_h: + uabdl(vform, zd, zn_b, zm_b); + break; + case "uabdlt_z_zz"_h: + uabdl(vform, zd, zn_t, zm_t); + break; + case "uaddlb_z_zz"_h: + uaddl(vform, zd, zn_b, zm_b); + break; + case "uaddlt_z_zz"_h: + uaddl(vform, zd, zn_t, zm_t); + break; + case "usublb_z_zz"_h: + usubl(vform, zd, zn_b, zm_b); + break; + case "usublt_z_zz"_h: + usubl(vform, zd, zn_t, zm_t); + break; + case "sabalb_z_zzz"_h: + sabal(vform, zd, zn_b, zm_b); + break; + case "sabalt_z_zzz"_h: + sabal(vform, zd, zn_t, zm_t); + break; + case "uabalb_z_zzz"_h: + uabal(vform, zd, zn_b, zm_b); + break; + case "uabalt_z_zzz"_h: + uabal(vform, zd, zn_t, zm_t); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::SimulateSVEIntMulLongVec(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister temp, zn_b, zm_b, zn_t, zm_t; + VectorFormat vform_half = VectorFormatHalfWidth(vform); + pack_even_elements(vform_half, zn_b, zn); + pack_even_elements(vform_half, zm_b, zm); + pack_odd_elements(vform_half, zn_t, zn); + pack_odd_elements(vform_half, zm_t, zm); + + switch (form_hash_) { + case "pmullb_z_zz"_h: + // '00' is reserved for Q-sized lane. + if (vform == kFormatVnB) { + VIXL_UNIMPLEMENTED(); + } + pmull(vform, zd, zn_b, zm_b); + break; + case "pmullt_z_zz"_h: + // '00' is reserved for Q-sized lane. + if (vform == kFormatVnB) { + VIXL_UNIMPLEMENTED(); + } + pmull(vform, zd, zn_t, zm_t); + break; + case "smullb_z_zz"_h: + smull(vform, zd, zn_b, zm_b); + break; + case "smullt_z_zz"_h: + smull(vform, zd, zn_t, zm_t); + break; + case "sqdmullb_z_zz"_h: + sqdmull(vform, zd, zn_b, zm_b); + break; + case "sqdmullt_z_zz"_h: + sqdmull(vform, zd, zn_t, zm_t); + break; + case "umullb_z_zz"_h: + umull(vform, zd, zn_b, zm_b); + break; + case "umullt_z_zz"_h: + umull(vform, zd, zn_t, zm_t); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::SimulateSVEAddSubHigh(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister result; + bool top = false; + + VectorFormat vform_src = instr->GetSVEVectorFormat(); + if (vform_src == kFormatVnB) { + VIXL_UNIMPLEMENTED(); + } + VectorFormat vform = VectorFormatHalfWidth(vform_src); + + switch (form_hash_) { + case "addhnt_z_zz"_h: + top = true; + VIXL_FALLTHROUGH(); + case "addhnb_z_zz"_h: + addhn(vform, result, zn, zm); + break; + case "raddhnt_z_zz"_h: + top = true; + VIXL_FALLTHROUGH(); + case "raddhnb_z_zz"_h: + raddhn(vform, result, zn, zm); + break; + case "rsubhnt_z_zz"_h: + top = true; + VIXL_FALLTHROUGH(); + case "rsubhnb_z_zz"_h: + rsubhn(vform, result, zn, zm); + break; + case "subhnt_z_zz"_h: + top = true; + VIXL_FALLTHROUGH(); + case "subhnb_z_zz"_h: + subhn(vform, result, zn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + } + + if (top) { + // Keep even elements, replace odd elements with the results. + xtn(vform, zd, zd); + zip1(vform, zd, zd, result); + } else { + // Zero odd elements, replace even elements with the results. + SimVRegister zero; + zero.Clear(); + zip1(vform, zd, result, zero); + } +} + +void Simulator::SimulateSVEShiftLeftImm(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister zn_b, zn_t; + + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false); + int lane_size = shift_and_lane_size.second; + VIXL_ASSERT((lane_size >= 0) && + (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2)); + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size + 1); + int right_shift_dist = shift_and_lane_size.first; + int left_shift_dist = (8 << lane_size) - right_shift_dist; + + // Construct temporary registers containing the even (bottom) and odd (top) + // elements. + VectorFormat vform_half = VectorFormatHalfWidth(vform); + pack_even_elements(vform_half, zn_b, zn); + pack_odd_elements(vform_half, zn_t, zn); + + switch (form_hash_) { + case "sshllb_z_zi"_h: + sshll(vform, zd, zn_b, left_shift_dist); + break; + case "sshllt_z_zi"_h: + sshll(vform, zd, zn_t, left_shift_dist); + break; + case "ushllb_z_zi"_h: + ushll(vform, zd, zn_b, left_shift_dist); + break; + case "ushllt_z_zi"_h: + ushll(vform, zd, zn_t, left_shift_dist); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::SimulateSVESaturatingMulAddHigh(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zda = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + unsigned zm_code = instr->GetRm(); + int index = -1; + bool is_mla = false; + + switch (form_hash_) { + case "sqrdmlah_z_zzz"_h: + is_mla = true; + VIXL_FALLTHROUGH(); + case "sqrdmlsh_z_zzz"_h: + // Nothing to do. + break; + case "sqrdmlah_z_zzzi_h"_h: + is_mla = true; + VIXL_FALLTHROUGH(); + case "sqrdmlsh_z_zzzi_h"_h: + vform = kFormatVnH; + index = (instr->ExtractBit(22) << 2) | instr->ExtractBits(20, 19); + zm_code = instr->ExtractBits(18, 16); + break; + case "sqrdmlah_z_zzzi_s"_h: + is_mla = true; + VIXL_FALLTHROUGH(); + case "sqrdmlsh_z_zzzi_s"_h: + vform = kFormatVnS; + index = instr->ExtractBits(20, 19); + zm_code = instr->ExtractBits(18, 16); + break; + case "sqrdmlah_z_zzzi_d"_h: + is_mla = true; + VIXL_FALLTHROUGH(); + case "sqrdmlsh_z_zzzi_d"_h: + vform = kFormatVnD; + index = instr->ExtractBit(20); + zm_code = instr->ExtractBits(19, 16); + break; + default: + VIXL_UNIMPLEMENTED(); + } + + SimVRegister& zm = ReadVRegister(zm_code); + SimVRegister zm_idx; + if (index >= 0) { + dup_elements_to_segments(vform, zm_idx, zm, index); + } + + if (is_mla) { + sqrdmlah(vform, zda, zn, (index >= 0) ? zm_idx : zm); + } else { + sqrdmlsh(vform, zda, zn, (index >= 0) ? zm_idx : zm); + } +} + +void Simulator::Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr) { + SimVRegister& zda = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister& zm = ReadVRegister(instr->ExtractBits(19, 16)); + + SimVRegister temp, zm_idx, zn_b, zn_t; + Instr index = (instr->ExtractBit(20) << 1) | instr->ExtractBit(11); + dup_elements_to_segments(kFormatVnS, temp, zm, index); + pack_even_elements(kFormatVnS, zm_idx, temp); + pack_even_elements(kFormatVnS, zn_b, zn); + pack_odd_elements(kFormatVnS, zn_t, zn); + + switch (form_hash_) { + case "sqdmlalb_z_zzzi_d"_h: + sqdmlal(kFormatVnD, zda, zn_b, zm_idx); + break; + case "sqdmlalt_z_zzzi_d"_h: + sqdmlal(kFormatVnD, zda, zn_t, zm_idx); + break; + case "sqdmlslb_z_zzzi_d"_h: + sqdmlsl(kFormatVnD, zda, zn_b, zm_idx); + break; + case "sqdmlslt_z_zzzi_d"_h: + sqdmlsl(kFormatVnD, zda, zn_t, zm_idx); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::Simulate_ZdaS_ZnH_ZmH(const Instruction* instr) { + SimVRegister& zda = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + SimVRegister temp, zn_b, zm_b, zn_t, zm_t; + pack_even_elements(kFormatVnH, zn_b, zn); + pack_even_elements(kFormatVnH, zm_b, zm); + pack_odd_elements(kFormatVnH, zn_t, zn); + pack_odd_elements(kFormatVnH, zm_t, zm); + + switch (form_hash_) { + case "fmlalb_z_zzz"_h: + fmlal(kFormatVnS, zda, zn_b, zm_b); + break; + case "fmlalt_z_zzz"_h: + fmlal(kFormatVnS, zda, zn_t, zm_t); + break; + case "fmlslb_z_zzz"_h: + fmlsl(kFormatVnS, zda, zn_b, zm_b); + break; + case "fmlslt_z_zzz"_h: + fmlsl(kFormatVnS, zda, zn_t, zm_t); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr) { + SimVRegister& zda = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister& zm = ReadVRegister(instr->ExtractBits(18, 16)); + + SimVRegister temp, zm_idx, zn_b, zn_t; + Instr index = (instr->ExtractBits(20, 19) << 1) | instr->ExtractBit(11); + dup_elements_to_segments(kFormatVnH, temp, zm, index); + pack_even_elements(kFormatVnH, zm_idx, temp); + pack_even_elements(kFormatVnH, zn_b, zn); + pack_odd_elements(kFormatVnH, zn_t, zn); + + switch (form_hash_) { + case "fmlalb_z_zzzi_s"_h: + fmlal(kFormatVnS, zda, zn_b, zm_idx); + break; + case "fmlalt_z_zzzi_s"_h: + fmlal(kFormatVnS, zda, zn_t, zm_idx); + break; + case "fmlslb_z_zzzi_s"_h: + fmlsl(kFormatVnS, zda, zn_b, zm_idx); + break; + case "fmlslt_z_zzzi_s"_h: + fmlsl(kFormatVnS, zda, zn_t, zm_idx); + break; + case "sqdmlalb_z_zzzi_s"_h: + sqdmlal(kFormatVnS, zda, zn_b, zm_idx); + break; + case "sqdmlalt_z_zzzi_s"_h: + sqdmlal(kFormatVnS, zda, zn_t, zm_idx); + break; + case "sqdmlslb_z_zzzi_s"_h: + sqdmlsl(kFormatVnS, zda, zn_b, zm_idx); + break; + case "sqdmlslt_z_zzzi_s"_h: + sqdmlsl(kFormatVnS, zda, zn_t, zm_idx); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::Simulate_ZdaT_PgM_ZnTb(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zda = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister result; + + switch (form_hash_) { + case "sadalp_z_p_z"_h: + sadalp(vform, result, zn); + break; + case "uadalp_z_p_z"_h: + uadalp(vform, result, zn); + break; + default: + VIXL_UNIMPLEMENTED(); + } + mov_merging(vform, zda, pg, result); +} + +void Simulator::SimulateSVEAddSubCarry(const Instruction* instr) { + VectorFormat vform = (instr->ExtractBit(22) == 0) ? kFormatVnS : kFormatVnD; + SimVRegister& zda = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + SimVRegister not_zn; + not_(vform, not_zn, zn); + + switch (form_hash_) { + case "adclb_z_zzz"_h: + adcl(vform, zda, zn, zm, /* top = */ false); + break; + case "adclt_z_zzz"_h: + adcl(vform, zda, zn, zm, /* top = */ true); + break; + case "sbclb_z_zzz"_h: + adcl(vform, zda, not_zn, zm, /* top = */ false); + break; + case "sbclt_z_zzz"_h: + adcl(vform, zda, not_zn, zm, /* top = */ true); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::Simulate_ZdaT_ZnT_ZmT(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zda = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + switch (form_hash_) { + case "saba_z_zzz"_h: + saba(vform, zda, zn, zm); + break; + case "uaba_z_zzz"_h: + uaba(vform, zda, zn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::SimulateSVEComplexIntMulAdd(const Instruction* instr) { + SimVRegister& zda = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + int rot = instr->ExtractBits(11, 10) * 90; + // vform and zm are only valid for the vector form of instruction. + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + + // Inputs for indexed form of instruction. + SimVRegister& zm_h = ReadVRegister(instr->ExtractBits(18, 16)); + SimVRegister& zm_s = ReadVRegister(instr->ExtractBits(19, 16)); + int idx_h = instr->ExtractBits(20, 19); + int idx_s = instr->ExtractBit(20); + + switch (form_hash_) { + case "cmla_z_zzz"_h: + cmla(vform, zda, zda, zn, zm, rot); + break; + case "cmla_z_zzzi_h"_h: + cmla(kFormatVnH, zda, zda, zn, zm_h, idx_h, rot); + break; + case "cmla_z_zzzi_s"_h: + cmla(kFormatVnS, zda, zda, zn, zm_s, idx_s, rot); + break; + case "sqrdcmlah_z_zzz"_h: + sqrdcmlah(vform, zda, zda, zn, zm, rot); + break; + case "sqrdcmlah_z_zzzi_h"_h: + sqrdcmlah(kFormatVnH, zda, zda, zn, zm_h, idx_h, rot); + break; + case "sqrdcmlah_z_zzzi_s"_h: + sqrdcmlah(kFormatVnS, zda, zda, zn, zm_s, idx_s, rot); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::Simulate_ZdaT_ZnT_const(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false); + int lane_size = shift_and_lane_size.second; + VIXL_ASSERT((lane_size >= 0) && + (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2)); + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size); + int shift_dist = shift_and_lane_size.first; + + switch (form_hash_) { + case "srsra_z_zi"_h: + srsra(vform, zd, zn, shift_dist); + break; + case "ssra_z_zi"_h: + ssra(vform, zd, zn, shift_dist); + break; + case "ursra_z_zi"_h: + ursra(vform, zd, zn, shift_dist); + break; + case "usra_z_zi"_h: + usra(vform, zd, zn, shift_dist); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zda = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + SimVRegister zero, zn_b, zm_b, zn_t, zm_t; + zero.Clear(); + + VectorFormat vform_half = VectorFormatHalfWidth(vform); + uzp1(vform_half, zn_b, zn, zero); + uzp1(vform_half, zm_b, zm, zero); + uzp2(vform_half, zn_t, zn, zero); + uzp2(vform_half, zm_t, zm, zero); + + switch (form_hash_) { + case "smlalb_z_zzz"_h: + smlal(vform, zda, zn_b, zm_b); + break; + case "smlalt_z_zzz"_h: + smlal(vform, zda, zn_t, zm_t); + break; + case "smlslb_z_zzz"_h: + smlsl(vform, zda, zn_b, zm_b); + break; + case "smlslt_z_zzz"_h: + smlsl(vform, zda, zn_t, zm_t); + break; + case "sqdmlalb_z_zzz"_h: + sqdmlal(vform, zda, zn_b, zm_b); + break; + case "sqdmlalbt_z_zzz"_h: + sqdmlal(vform, zda, zn_b, zm_t); + break; + case "sqdmlalt_z_zzz"_h: + sqdmlal(vform, zda, zn_t, zm_t); + break; + case "sqdmlslb_z_zzz"_h: + sqdmlsl(vform, zda, zn_b, zm_b); + break; + case "sqdmlslbt_z_zzz"_h: + sqdmlsl(vform, zda, zn_b, zm_t); + break; + case "sqdmlslt_z_zzz"_h: + sqdmlsl(vform, zda, zn_t, zm_t); + break; + case "umlalb_z_zzz"_h: + umlal(vform, zda, zn_b, zm_b); + break; + case "umlalt_z_zzz"_h: + umlal(vform, zda, zn_t, zm_t); + break; + case "umlslb_z_zzz"_h: + umlsl(vform, zda, zn_b, zm_b); + break; + case "umlslt_z_zzz"_h: + umlsl(vform, zda, zn_t, zm_t); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::SimulateSVEComplexDotProduct(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zda = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + int rot = instr->ExtractBits(11, 10) * 90; + unsigned zm_code = instr->GetRm(); + int index = -1; + + switch (form_hash_) { + case "cdot_z_zzz"_h: + // Nothing to do. + break; + case "cdot_z_zzzi_s"_h: + index = zm_code >> 3; + zm_code &= 0x7; + break; + case "cdot_z_zzzi_d"_h: + index = zm_code >> 4; + zm_code &= 0xf; + break; + default: + VIXL_UNIMPLEMENTED(); + } + + SimVRegister temp; + SimVRegister& zm = ReadVRegister(zm_code); + if (index >= 0) dup_elements_to_segments(vform, temp, zm, index); + cdot(vform, zda, zda, zn, (index >= 0) ? temp : zm, rot); +} + +void Simulator::SimulateSVEBitwiseTernary(const Instruction* instr) { + VectorFormat vform = kFormatVnD; + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimVRegister& zk = ReadVRegister(instr->GetRn()); + SimVRegister temp; + + switch (form_hash_) { + case "bcax_z_zzz"_h: + bic(vform, temp, zm, zk); + eor(vform, zdn, temp, zdn); + break; + case "bsl1n_z_zzz"_h: + not_(vform, temp, zdn); + bsl(vform, zdn, zk, temp, zm); + break; + case "bsl2n_z_zzz"_h: + not_(vform, temp, zm); + bsl(vform, zdn, zk, zdn, temp); + break; + case "bsl_z_zzz"_h: + bsl(vform, zdn, zk, zdn, zm); + break; + case "eor3_z_zzz"_h: + eor(vform, temp, zdn, zm); + eor(vform, zdn, temp, zk); + break; + case "nbsl_z_zzz"_h: + bsl(vform, zdn, zk, zdn, zm); + not_(vform, zdn, zdn); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::SimulateSVEHalvingAddSub(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimVRegister result; + + switch (form_hash_) { + case "shadd_z_p_zz"_h: + add(vform, result, zdn, zm).Halve(vform); + break; + case "shsub_z_p_zz"_h: + sub(vform, result, zdn, zm).Halve(vform); + break; + case "shsubr_z_p_zz"_h: + sub(vform, result, zm, zdn).Halve(vform); + break; + case "srhadd_z_p_zz"_h: + add(vform, result, zdn, zm).Halve(vform).Round(vform); + break; + case "uhadd_z_p_zz"_h: + add(vform, result, zdn, zm).Uhalve(vform); + break; + case "uhsub_z_p_zz"_h: + sub(vform, result, zdn, zm).Uhalve(vform); + break; + case "uhsubr_z_p_zz"_h: + sub(vform, result, zm, zdn).Uhalve(vform); + break; + case "urhadd_z_p_zz"_h: + add(vform, result, zdn, zm).Uhalve(vform).Round(vform); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + mov_merging(vform, zdn, pg, result); +} + +void Simulator::SimulateSVESaturatingArithmetic(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister result; + + switch (form_hash_) { + case "sqadd_z_p_zz"_h: + add(vform, result, zdn, zm).SignedSaturate(vform); + break; + case "sqsub_z_p_zz"_h: + sub(vform, result, zdn, zm).SignedSaturate(vform); + break; + case "sqsubr_z_p_zz"_h: + sub(vform, result, zm, zdn).SignedSaturate(vform); + break; + case "suqadd_z_p_zz"_h: + suqadd(vform, result, zdn, zm); + break; + case "uqadd_z_p_zz"_h: + add(vform, result, zdn, zm).UnsignedSaturate(vform); + break; + case "uqsub_z_p_zz"_h: + sub(vform, result, zdn, zm).UnsignedSaturate(vform); + break; + case "uqsubr_z_p_zz"_h: + sub(vform, result, zm, zdn).UnsignedSaturate(vform); + break; + case "usqadd_z_p_zz"_h: + usqadd(vform, result, zdn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + mov_merging(vform, zdn, pg, result); +} + +void Simulator::SimulateSVEIntArithPair(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimVRegister result; + + switch (form_hash_) { + case "addp_z_p_zz"_h: + addp(vform, result, zdn, zm); + break; + case "smaxp_z_p_zz"_h: + smaxp(vform, result, zdn, zm); + break; + case "sminp_z_p_zz"_h: + sminp(vform, result, zdn, zm); + break; + case "umaxp_z_p_zz"_h: + umaxp(vform, result, zdn, zm); + break; + case "uminp_z_p_zz"_h: + uminp(vform, result, zdn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + mov_merging(vform, zdn, pg, result); +} + +void Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimVRegister result; + + switch (form_hash_) { + case "faddp_z_p_zz"_h: + faddp(vform, result, zdn, zm); + break; + case "fmaxnmp_z_p_zz"_h: + fmaxnmp(vform, result, zdn, zm); + break; + case "fmaxp_z_p_zz"_h: + fmaxp(vform, result, zdn, zm); + break; + case "fminnmp_z_p_zz"_h: + fminnmp(vform, result, zdn, zm); + break; + case "fminp_z_p_zz"_h: + fminp(vform, result, zdn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + } + mov_merging(vform, zdn, pg, result); +} + +void Simulator::Simulate_ZdnT_PgM_ZdnT_const(const Instruction* instr) { + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true); + unsigned lane_size = shift_and_lane_size.second; + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size); + int right_shift_dist = shift_and_lane_size.first; + int left_shift_dist = (8 << lane_size) - right_shift_dist; + SimVRegister result; + + switch (form_hash_) { + case "sqshl_z_p_zi"_h: + sqshl(vform, result, zdn, left_shift_dist); + break; + case "sqshlu_z_p_zi"_h: + sqshlu(vform, result, zdn, left_shift_dist); + break; + case "srshr_z_p_zi"_h: + sshr(vform, result, zdn, right_shift_dist).Round(vform); + break; + case "uqshl_z_p_zi"_h: + uqshl(vform, result, zdn, left_shift_dist); + break; + case "urshr_z_p_zi"_h: + ushr(vform, result, zdn, right_shift_dist).Round(vform); + break; + default: + VIXL_UNIMPLEMENTED(); + } + mov_merging(vform, zdn, pg, result); +} + +void Simulator::SimulateSVEExclusiveOrRotate(const Instruction* instr) { + VIXL_ASSERT(form_hash_ == "xar_z_zzi"_h); + + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false); + unsigned lane_size = shift_and_lane_size.second; + VIXL_ASSERT(lane_size <= kDRegSizeInBytesLog2); + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size); + int shift_dist = shift_and_lane_size.first; + eor(vform, zdn, zdn, zm); + ror(vform, zdn, zdn, shift_dist); +} + +void Simulator::Simulate_ZdnT_ZdnT_ZmT_const(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + int rot = (instr->ExtractBit(10) == 0) ? 90 : 270; + + switch (form_hash_) { + case "cadd_z_zz"_h: + cadd(vform, zdn, zdn, zm, rot); + break; + case "sqcadd_z_zz"_h: + cadd(vform, zdn, zdn, zm, rot, /* saturate = */ true); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::Simulate_ZtD_PgZ_ZnD_Xm(const Instruction* instr) { + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + uint64_t xm = ReadXRegister(instr->GetRm()); + + LogicSVEAddressVector addr(xm, &zn, kFormatVnD); + int msize = -1; + bool is_signed = false; + + switch (form_hash_) { + case "ldnt1b_z_p_ar_d_64_unscaled"_h: + msize = 0; + break; + case "ldnt1d_z_p_ar_d_64_unscaled"_h: + msize = 3; + break; + case "ldnt1h_z_p_ar_d_64_unscaled"_h: + msize = 1; + break; + case "ldnt1sb_z_p_ar_d_64_unscaled"_h: + msize = 0; + is_signed = true; + break; + case "ldnt1sh_z_p_ar_d_64_unscaled"_h: + msize = 1; + is_signed = true; + break; + case "ldnt1sw_z_p_ar_d_64_unscaled"_h: + msize = 2; + is_signed = true; + break; + case "ldnt1w_z_p_ar_d_64_unscaled"_h: + msize = 2; + break; + default: + VIXL_UNIMPLEMENTED(); + } + addr.SetMsizeInBytesLog2(msize); + SVEStructuredLoadHelper(kFormatVnD, pg, instr->GetRt(), addr, is_signed); +} + +void Simulator::Simulate_ZtD_Pg_ZnD_Xm(const Instruction* instr) { + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + uint64_t xm = ReadXRegister(instr->GetRm()); + + LogicSVEAddressVector addr(xm, &zn, kFormatVnD); + VIXL_ASSERT((form_hash_ == "stnt1b_z_p_ar_d_64_unscaled"_h) || + (form_hash_ == "stnt1d_z_p_ar_d_64_unscaled"_h) || + (form_hash_ == "stnt1h_z_p_ar_d_64_unscaled"_h) || + (form_hash_ == "stnt1w_z_p_ar_d_64_unscaled"_h)); + + addr.SetMsizeInBytesLog2( + instr->GetSVEMsizeFromDtype(/* is_signed = */ false)); + SVEStructuredStoreHelper(kFormatVnD, pg, instr->GetRt(), addr); +} + +void Simulator::Simulate_ZtS_PgZ_ZnS_Xm(const Instruction* instr) { + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + uint64_t xm = ReadXRegister(instr->GetRm()); + + LogicSVEAddressVector addr(xm, &zn, kFormatVnS); + int msize = -1; + bool is_signed = false; + + switch (form_hash_) { + case "ldnt1b_z_p_ar_s_x32_unscaled"_h: + msize = 0; + break; + case "ldnt1h_z_p_ar_s_x32_unscaled"_h: + msize = 1; + break; + case "ldnt1sb_z_p_ar_s_x32_unscaled"_h: + msize = 0; + is_signed = true; + break; + case "ldnt1sh_z_p_ar_s_x32_unscaled"_h: + msize = 1; + is_signed = true; + break; + case "ldnt1w_z_p_ar_s_x32_unscaled"_h: + msize = 2; + break; + default: + VIXL_UNIMPLEMENTED(); + } + addr.SetMsizeInBytesLog2(msize); + SVEStructuredLoadHelper(kFormatVnS, pg, instr->GetRt(), addr, is_signed); +} + +void Simulator::Simulate_ZtS_Pg_ZnS_Xm(const Instruction* instr) { + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + uint64_t xm = ReadXRegister(instr->GetRm()); + + LogicSVEAddressVector addr(xm, &zn, kFormatVnS); + VIXL_ASSERT((form_hash_ == "stnt1b_z_p_ar_s_x32_unscaled"_h) || + (form_hash_ == "stnt1h_z_p_ar_s_x32_unscaled"_h) || + (form_hash_ == "stnt1w_z_p_ar_s_x32_unscaled"_h)); + + addr.SetMsizeInBytesLog2( + instr->GetSVEMsizeFromDtype(/* is_signed = */ false)); + SVEStructuredStoreHelper(kFormatVnS, pg, instr->GetRt(), addr); +} + void Simulator::VisitReserved(const Instruction* instr) { // UDF is the only instruction in this group, and the Decoder is precise here. VIXL_ASSERT(instr->Mask(ReservedMask) == UDF); @@ -2848,11 +4811,17 @@ void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) { __sync_synchronize(); } - MemWrite<T>(address, result); WriteRegister<T>(rt, data, NoRegLog); - PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size); - LogRead(rt, format, address); + unsigned register_size = element_size; + if (element_size < kXRegSizeInBytes) { + register_size = kWRegSizeInBytes; + } + PrintRegisterFormat format = GetPrintRegisterFormatForSize(register_size); + LogExtendingRead(rt, format, element_size, address); + + MemWrite<T>(address, result); + format = GetPrintRegisterFormatForSize(element_size); LogWrite(rs, format, address); } @@ -3162,42 +5131,71 @@ void Simulator::VisitConditionalSelect(const Instruction* instr) { } -// clang-format off -#define PAUTH_MODES(V) \ - V(IA, ReadXRegister(src), kPACKeyIA, kInstructionPointer) \ - V(IB, ReadXRegister(src), kPACKeyIB, kInstructionPointer) \ - V(IZA, 0x00000000, kPACKeyIA, kInstructionPointer) \ - V(IZB, 0x00000000, kPACKeyIB, kInstructionPointer) \ - V(DA, ReadXRegister(src), kPACKeyDA, kDataPointer) \ - V(DB, ReadXRegister(src), kPACKeyDB, kDataPointer) \ - V(DZA, 0x00000000, kPACKeyDA, kDataPointer) \ - V(DZB, 0x00000000, kPACKeyDB, kDataPointer) -// clang-format on +#define PAUTH_MODES_REGISTER_CONTEXT(V) \ + V(IA, kPACKeyIA, kInstructionPointer) \ + V(IB, kPACKeyIB, kInstructionPointer) \ + V(DA, kPACKeyDA, kDataPointer) \ + V(DB, kPACKeyDB, kDataPointer) + +#define PAUTH_MODES_ZERO_CONTEXT(V) \ + V(IZA, kPACKeyIA, kInstructionPointer) \ + V(IZB, kPACKeyIB, kInstructionPointer) \ + V(DZA, kPACKeyDA, kDataPointer) \ + V(DZB, kPACKeyDB, kDataPointer) void Simulator::VisitDataProcessing1Source(const Instruction* instr) { unsigned dst = instr->GetRd(); unsigned src = instr->GetRn(); switch (instr->Mask(DataProcessing1SourceMask)) { -#define DEFINE_PAUTH_FUNCS(SUFFIX, MOD, KEY, D) \ +#define DEFINE_PAUTH_FUNCS(SUFFIX, KEY, D) \ case PAC##SUFFIX: { \ + uint64_t mod = ReadXRegister(src); \ uint64_t ptr = ReadXRegister(dst); \ - WriteXRegister(dst, AddPAC(ptr, MOD, KEY, D)); \ + WriteXRegister(dst, AddPAC(ptr, mod, KEY, D)); \ break; \ } \ case AUT##SUFFIX: { \ + uint64_t mod = ReadXRegister(src); \ uint64_t ptr = ReadXRegister(dst); \ - WriteXRegister(dst, AuthPAC(ptr, MOD, KEY, D)); \ + WriteXRegister(dst, AuthPAC(ptr, mod, KEY, D)); \ break; \ } - PAUTH_MODES(DEFINE_PAUTH_FUNCS) + PAUTH_MODES_REGISTER_CONTEXT(DEFINE_PAUTH_FUNCS) +#undef DEFINE_PAUTH_FUNCS + +#define DEFINE_PAUTH_FUNCS(SUFFIX, KEY, D) \ + case PAC##SUFFIX: { \ + if (src != kZeroRegCode) { \ + VIXL_UNIMPLEMENTED(); \ + } \ + uint64_t ptr = ReadXRegister(dst); \ + WriteXRegister(dst, AddPAC(ptr, 0x0, KEY, D)); \ + break; \ + } \ + case AUT##SUFFIX: { \ + if (src != kZeroRegCode) { \ + VIXL_UNIMPLEMENTED(); \ + } \ + uint64_t ptr = ReadXRegister(dst); \ + WriteXRegister(dst, AuthPAC(ptr, 0x0, KEY, D)); \ + break; \ + } + + PAUTH_MODES_ZERO_CONTEXT(DEFINE_PAUTH_FUNCS) #undef DEFINE_PAUTH_FUNCS case XPACI: + if (src != kZeroRegCode) { + VIXL_UNIMPLEMENTED(); + } WriteXRegister(dst, StripPAC(ReadXRegister(dst), kInstructionPointer)); break; case XPACD: + if (src != kZeroRegCode) { + VIXL_UNIMPLEMENTED(); + } WriteXRegister(dst, StripPAC(ReadXRegister(dst), kDataPointer)); break; case RBIT_w: @@ -3471,6 +5469,15 @@ void Simulator::VisitBitfield(const Instruction* instr) { int64_t reg_mask = instr->GetSixtyFourBits() ? kXRegMask : kWRegMask; int R = instr->GetImmR(); int S = instr->GetImmS(); + + if (instr->GetSixtyFourBits() != instr->GetBitN()) { + VisitUnallocated(instr); + } + + if ((instr->GetSixtyFourBits() == 0) && ((S > 31) || (R > 31))) { + VisitUnallocated(instr); + } + int diff = S - R; uint64_t mask; if (diff >= 0) { @@ -4701,10 +6708,10 @@ void Simulator::VisitNEON2RegMisc(const Instruction* instr) { rev16(vf, rd, rn); break; case NEON_SUQADD: - suqadd(vf, rd, rn); + suqadd(vf, rd, rd, rn); break; case NEON_USQADD: - usqadd(vf, rd, rn); + usqadd(vf, rd, rd, rn); break; case NEON_CLS: cls(vf, rd, rn); @@ -5086,7 +7093,7 @@ void Simulator::VisitNEON3Same(const Instruction* instr) { bit(vf, rd, rn, rm); break; case NEON_BSL: - bsl(vf, rd, rn, rm); + bsl(vf, rd, rd, rn, rm); break; default: VIXL_UNIMPLEMENTED(); @@ -5394,30 +7401,31 @@ void Simulator::VisitNEON3SameExtra(const Instruction* instr) { SimVRegister& rm = ReadVRegister(instr->GetRm()); int rot = 0; VectorFormat vf = nfd.GetVectorFormat(); - if (instr->Mask(NEON3SameExtraFCMLAMask) == NEON_FCMLA) { - rot = instr->GetImmRotFcmlaVec(); - fcmla(vf, rd, rn, rm, rd, rot); - } else if (instr->Mask(NEON3SameExtraFCADDMask) == NEON_FCADD) { - rot = instr->GetImmRotFcadd(); - fcadd(vf, rd, rn, rm, rot); - } else { - switch (instr->Mask(NEON3SameExtraMask)) { - case NEON_SDOT: - sdot(vf, rd, rn, rm); - break; - case NEON_SQRDMLAH: - sqrdmlah(vf, rd, rn, rm); - break; - case NEON_UDOT: - udot(vf, rd, rn, rm); - break; - case NEON_SQRDMLSH: - sqrdmlsh(vf, rd, rn, rm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } + + switch (form_hash_) { + case "fcmla_asimdsame2_c"_h: + rot = instr->GetImmRotFcmlaVec(); + fcmla(vf, rd, rn, rm, rd, rot); + break; + case "fcadd_asimdsame2_c"_h: + rot = instr->GetImmRotFcadd(); + fcadd(vf, rd, rn, rm, rot); + break; + case "sdot_asimdsame2_d"_h: + sdot(vf, rd, rn, rm); + break; + case "udot_asimdsame2_d"_h: + udot(vf, rd, rn, rm); + break; + case "usdot_asimdsame2_d"_h: + usdot(vf, rd, rn, rm); + break; + case "sqrdmlah_asimdsame2_only"_h: + sqrdmlah(vf, rd, rn, rm); + break; + case "sqrdmlsh_asimdsame2_only"_h: + sqrdmlsh(vf, rd, rn, rm); + break; } } @@ -5671,206 +7679,225 @@ void Simulator::VisitNEONAcrossLanes(const Instruction* instr) { } } - -void Simulator::VisitNEONByIndexedElement(const Instruction* instr) { +void Simulator::SimulateNEONMulByElementLong(const Instruction* instr) { NEONFormatDecoder nfd(instr); - static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}}; - VectorFormat vf_r = nfd.GetVectorFormat(); - VectorFormat vf_half = nfd.GetVectorFormat(&map_half); VectorFormat vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap()); SimVRegister& rd = ReadVRegister(instr->GetRd()); SimVRegister& rn = ReadVRegister(instr->GetRn()); - ByElementOp Op = NULL; - int rm_reg = instr->GetRm(); - int rm_low_reg = instr->GetRmLow16(); int index = (instr->GetNEONH() << 1) | instr->GetNEONL(); - int index_hlm = (index << 1) | instr->GetNEONM(); - - switch (instr->Mask(NEONByIndexedElementFPLongMask)) { - // These are oddballs and are best handled as special cases. - // - Rm is encoded with only 4 bits (and must be in the lower 16 registers). - // - The index is always H:L:M. - case NEON_FMLAL_H_byelement: - fmlal(vf_r, rd, rn, ReadVRegister(rm_low_reg), index_hlm); - return; - case NEON_FMLAL2_H_byelement: - fmlal2(vf_r, rd, rn, ReadVRegister(rm_low_reg), index_hlm); - return; - case NEON_FMLSL_H_byelement: - fmlsl(vf_r, rd, rn, ReadVRegister(rm_low_reg), index_hlm); - return; - case NEON_FMLSL2_H_byelement: - fmlsl2(vf_r, rd, rn, ReadVRegister(rm_low_reg), index_hlm); - return; - } - if (instr->GetNEONSize() == 1) { - rm_reg = rm_low_reg; - index = index_hlm; + rm_reg = instr->GetRmLow16(); + index = (index << 1) | instr->GetNEONM(); } + SimVRegister& rm = ReadVRegister(rm_reg); - switch (instr->Mask(NEONByIndexedElementMask)) { - case NEON_MUL_byelement: - Op = &Simulator::mul; - vf = vf_r; + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vf)); + dup_element(indexform, temp, rm, index); + + bool is_2 = instr->Mask(NEON_Q) ? true : false; + + switch (form_hash_) { + case "smull_asimdelem_l"_h: + smull(vf, rd, rn, temp, is_2); break; - case NEON_MLA_byelement: - Op = &Simulator::mla; - vf = vf_r; + case "umull_asimdelem_l"_h: + umull(vf, rd, rn, temp, is_2); break; - case NEON_MLS_byelement: - Op = &Simulator::mls; - vf = vf_r; + case "smlal_asimdelem_l"_h: + smlal(vf, rd, rn, temp, is_2); break; - case NEON_SQDMULH_byelement: - Op = &Simulator::sqdmulh; - vf = vf_r; + case "umlal_asimdelem_l"_h: + umlal(vf, rd, rn, temp, is_2); break; - case NEON_SQRDMULH_byelement: - Op = &Simulator::sqrdmulh; - vf = vf_r; + case "smlsl_asimdelem_l"_h: + smlsl(vf, rd, rn, temp, is_2); break; - case NEON_SDOT_byelement: - Op = &Simulator::sdot; - vf = vf_r; + case "umlsl_asimdelem_l"_h: + umlsl(vf, rd, rn, temp, is_2); break; - case NEON_SQRDMLAH_byelement: - Op = &Simulator::sqrdmlah; - vf = vf_r; + case "sqdmull_asimdelem_l"_h: + sqdmull(vf, rd, rn, temp, is_2); break; - case NEON_UDOT_byelement: - Op = &Simulator::udot; - vf = vf_r; + case "sqdmlal_asimdelem_l"_h: + sqdmlal(vf, rd, rn, temp, is_2); break; - case NEON_SQRDMLSH_byelement: - Op = &Simulator::sqrdmlsh; - vf = vf_r; + case "sqdmlsl_asimdelem_l"_h: + sqdmlsl(vf, rd, rn, temp, is_2); break; - case NEON_SMULL_byelement: - if (instr->Mask(NEON_Q)) { - Op = &Simulator::smull2; - } else { - Op = &Simulator::smull; - } + default: + VIXL_UNREACHABLE(); + } +} + +void Simulator::SimulateNEONFPMulByElementLong(const Instruction* instr) { + VectorFormat vform = instr->GetNEONQ() ? kFormat4S : kFormat2S; + SimVRegister& rd = ReadVRegister(instr->GetRd()); + SimVRegister& rn = ReadVRegister(instr->GetRn()); + SimVRegister& rm = ReadVRegister(instr->GetRmLow16()); + + int index = + (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM(); + + switch (form_hash_) { + case "fmlal_asimdelem_lh"_h: + fmlal(vform, rd, rn, rm, index); break; - case NEON_UMULL_byelement: - if (instr->Mask(NEON_Q)) { - Op = &Simulator::umull2; - } else { - Op = &Simulator::umull; - } + case "fmlal2_asimdelem_lh"_h: + fmlal2(vform, rd, rn, rm, index); break; - case NEON_SMLAL_byelement: - if (instr->Mask(NEON_Q)) { - Op = &Simulator::smlal2; - } else { - Op = &Simulator::smlal; - } + case "fmlsl_asimdelem_lh"_h: + fmlsl(vform, rd, rn, rm, index); break; - case NEON_UMLAL_byelement: - if (instr->Mask(NEON_Q)) { - Op = &Simulator::umlal2; - } else { - Op = &Simulator::umlal; - } + case "fmlsl2_asimdelem_lh"_h: + fmlsl2(vform, rd, rn, rm, index); break; - case NEON_SMLSL_byelement: - if (instr->Mask(NEON_Q)) { - Op = &Simulator::smlsl2; - } else { - Op = &Simulator::smlsl; - } + default: + VIXL_UNREACHABLE(); + } +} + +void Simulator::SimulateNEONFPMulByElement(const Instruction* instr) { + NEONFormatDecoder nfd(instr); + static const NEONFormatMap map = + {{23, 22, 30}, + {NF_4H, NF_8H, NF_UNDEF, NF_UNDEF, NF_2S, NF_4S, NF_UNDEF, NF_2D}}; + VectorFormat vform = nfd.GetVectorFormat(&map); + + SimVRegister& rd = ReadVRegister(instr->GetRd()); + SimVRegister& rn = ReadVRegister(instr->GetRn()); + + int rm_reg = instr->GetRm(); + int index = + (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM(); + + if ((vform == kFormat4H) || (vform == kFormat8H)) { + rm_reg &= 0xf; + } else if ((vform == kFormat2S) || (vform == kFormat4S)) { + index >>= 1; + } else { + VIXL_ASSERT(vform == kFormat2D); + VIXL_ASSERT(instr->GetNEONL() == 0); + index >>= 2; + } + + SimVRegister& rm = ReadVRegister(rm_reg); + + switch (form_hash_) { + case "fmul_asimdelem_rh_h"_h: + case "fmul_asimdelem_r_sd"_h: + fmul(vform, rd, rn, rm, index); break; - case NEON_UMLSL_byelement: - if (instr->Mask(NEON_Q)) { - Op = &Simulator::umlsl2; - } else { - Op = &Simulator::umlsl; - } + case "fmla_asimdelem_rh_h"_h: + case "fmla_asimdelem_r_sd"_h: + fmla(vform, rd, rn, rm, index); break; - case NEON_SQDMULL_byelement: - if (instr->Mask(NEON_Q)) { - Op = &Simulator::sqdmull2; - } else { - Op = &Simulator::sqdmull; - } + case "fmls_asimdelem_rh_h"_h: + case "fmls_asimdelem_r_sd"_h: + fmls(vform, rd, rn, rm, index); break; - case NEON_SQDMLAL_byelement: - if (instr->Mask(NEON_Q)) { - Op = &Simulator::sqdmlal2; - } else { - Op = &Simulator::sqdmlal; - } + case "fmulx_asimdelem_rh_h"_h: + case "fmulx_asimdelem_r_sd"_h: + fmulx(vform, rd, rn, rm, index); break; - case NEON_SQDMLSL_byelement: - if (instr->Mask(NEON_Q)) { - Op = &Simulator::sqdmlsl2; - } else { - Op = &Simulator::sqdmlsl; - } + default: + VIXL_UNREACHABLE(); + } +} + +void Simulator::SimulateNEONComplexMulByElement(const Instruction* instr) { + VectorFormat vform = instr->GetNEONQ() ? kFormat8H : kFormat4H; + SimVRegister& rd = ReadVRegister(instr->GetRd()); + SimVRegister& rn = ReadVRegister(instr->GetRn()); + SimVRegister& rm = ReadVRegister(instr->GetRm()); + int index = (instr->GetNEONH() << 1) | instr->GetNEONL(); + + switch (form_hash_) { + case "fcmla_asimdelem_c_s"_h: + vform = kFormat4S; + index >>= 1; + VIXL_FALLTHROUGH(); + case "fcmla_asimdelem_c_h"_h: + fcmla(vform, rd, rn, rm, index, instr->GetImmRotFcmlaSca()); break; default: - index = instr->GetNEONH(); - if (instr->GetFPType() == 0) { - rm_reg &= 0xf; - index = (index << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM(); - } else if ((instr->GetFPType() & 1) == 0) { - index = (index << 1) | instr->GetNEONL(); - } + VIXL_UNREACHABLE(); + } +} - vf = nfd.GetVectorFormat(nfd.FPFormatMap()); +void Simulator::SimulateNEONDotProdByElement(const Instruction* instr) { + VectorFormat vform = instr->GetNEONQ() ? kFormat4S : kFormat2S; - switch (instr->Mask(NEONByIndexedElementFPMask)) { - case NEON_FMUL_H_byelement: - vf = vf_half; - VIXL_FALLTHROUGH(); - case NEON_FMUL_byelement: - Op = &Simulator::fmul; - break; - case NEON_FMLA_H_byelement: - vf = vf_half; - VIXL_FALLTHROUGH(); - case NEON_FMLA_byelement: - Op = &Simulator::fmla; - break; - case NEON_FMLS_H_byelement: - vf = vf_half; - VIXL_FALLTHROUGH(); - case NEON_FMLS_byelement: - Op = &Simulator::fmls; - break; - case NEON_FMULX_H_byelement: - vf = vf_half; - VIXL_FALLTHROUGH(); - case NEON_FMULX_byelement: - Op = &Simulator::fmulx; - break; - default: - if (instr->GetNEONSize() == 2) { - index = instr->GetNEONH(); - } else { - index = (instr->GetNEONH() << 1) | instr->GetNEONL(); - } - switch (instr->Mask(NEONByIndexedElementFPComplexMask)) { - case NEON_FCMLA_byelement: - vf = vf_r; - fcmla(vf, - rd, - rn, - ReadVRegister(instr->GetRm()), - index, - instr->GetImmRotFcmlaSca()); - return; - default: - VIXL_UNIMPLEMENTED(); - } - } + SimVRegister& rd = ReadVRegister(instr->GetRd()); + SimVRegister& rn = ReadVRegister(instr->GetRn()); + SimVRegister& rm = ReadVRegister(instr->GetRm()); + int index = (instr->GetNEONH() << 1) | instr->GetNEONL(); + + SimVRegister temp; + // NEON indexed `dot` allows the index value exceed the register size. + // Promote the format to Q-sized vector format before the duplication. + dup_elements_to_segments(VectorFormatFillQ(vform), temp, rm, index); + + switch (form_hash_) { + case "sdot_asimdelem_d"_h: + sdot(vform, rd, rn, temp); + break; + case "udot_asimdelem_d"_h: + udot(vform, rd, rn, temp); + break; + case "sudot_asimdelem_d"_h: + usdot(vform, rd, temp, rn); + break; + case "usdot_asimdelem_d"_h: + usdot(vform, rd, rn, temp); + break; } +} - (this->*Op)(vf, rd, rn, ReadVRegister(rm_reg), index); +void Simulator::VisitNEONByIndexedElement(const Instruction* instr) { + NEONFormatDecoder nfd(instr); + VectorFormat vform = nfd.GetVectorFormat(); + + SimVRegister& rd = ReadVRegister(instr->GetRd()); + SimVRegister& rn = ReadVRegister(instr->GetRn()); + + int rm_reg = instr->GetRm(); + int index = (instr->GetNEONH() << 1) | instr->GetNEONL(); + + if ((vform == kFormat4H) || (vform == kFormat8H)) { + rm_reg &= 0xf; + index = (index << 1) | instr->GetNEONM(); + } + + SimVRegister& rm = ReadVRegister(rm_reg); + + switch (form_hash_) { + case "mul_asimdelem_r"_h: + mul(vform, rd, rn, rm, index); + break; + case "mla_asimdelem_r"_h: + mla(vform, rd, rn, rm, index); + break; + case "mls_asimdelem_r"_h: + mls(vform, rd, rn, rm, index); + break; + case "sqdmulh_asimdelem_r"_h: + sqdmulh(vform, rd, rn, rm, index); + break; + case "sqrdmulh_asimdelem_r"_h: + sqrdmulh(vform, rd, rn, rm, index); + break; + case "sqrdmlah_asimdelem_r"_h: + sqrdmlah(vform, rd, rn, rm, index); + break; + case "sqrdmlsh_asimdelem_r"_h: + sqrdmlsh(vform, rd, rn, rm, index); + break; + } } @@ -5882,11 +7909,11 @@ void Simulator::VisitNEONCopy(const Instruction* instr) { SimVRegister& rn = ReadVRegister(instr->GetRn()); int imm5 = instr->GetImmNEON5(); int tz = CountTrailingZeros(imm5, 32); - int reg_index = imm5 >> (tz + 1); + int reg_index = ExtractSignedBitfield32(31, tz + 1, imm5); if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) { int imm4 = instr->GetImmNEON4(); - int rn_index = imm4 >> tz; + int rn_index = ExtractSignedBitfield32(31, tz, imm4); ins_element(vf, rd, reg_index, rn, rn_index); } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) { ins_immediate(vf, rd, reg_index, ReadXRegister(instr->GetRn())); @@ -6485,10 +8512,10 @@ void Simulator::VisitNEONScalar2RegMisc(const Instruction* instr) { neg(vf, rd, rn).SignedSaturate(vf); break; case NEON_SUQADD_scalar: - suqadd(vf, rd, rn); + suqadd(vf, rd, rd, rn); break; case NEON_USQADD_scalar: - usqadd(vf, rd, rn); + usqadd(vf, rd, rd, rn); break; default: VIXL_UNIMPLEMENTED(); @@ -6943,7 +8970,7 @@ void Simulator::VisitNEONScalarCopy(const Instruction* instr) { if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) { int imm5 = instr->GetImmNEON5(); int tz = CountTrailingZeros(imm5, 32); - int rn_index = imm5 >> (tz + 1); + int rn_index = ExtractSignedBitfield32(31, tz + 1, imm5); dup_element(vf, rd, rn, rn_index); } else { VIXL_UNIMPLEMENTED(); @@ -7415,7 +9442,7 @@ void Simulator::VisitSVEBitwiseLogicalUnpredicated(const Instruction* instr) { SimVRegister& zm = ReadVRegister(instr->GetRm()); Instr op = instr->Mask(SVEBitwiseLogicalUnpredicatedMask); - LogicalOp logical_op; + LogicalOp logical_op = LogicalOpMask; switch (op) { case AND_z_zz: logical_op = AND; @@ -7430,7 +9457,6 @@ void Simulator::VisitSVEBitwiseLogicalUnpredicated(const Instruction* instr) { logical_op = ORR; break; default: - logical_op = LogicalOpMask; VIXL_UNIMPLEMENTED(); break; } @@ -7492,46 +9518,78 @@ void Simulator::VisitSVEBitwiseShiftByVector_Predicated( SimVRegister& zdn = ReadVRegister(instr->GetRd()); SimVRegister& zm = ReadVRegister(instr->GetRn()); SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - SimVRegister result; - SimVRegister shiftand; // Vector to be shifted. - SimVRegister shiftor; // Vector shift amount. - Shift shift_op = ASR; - mov(vform, shiftand, zdn); - mov(vform, shiftor, zm); + // SVE uses the whole (saturated) lane for the shift amount. + bool shift_in_ls_byte = false; - switch (instr->Mask(SVEBitwiseShiftByVector_PredicatedMask)) { - case ASRR_z_p_zz: - mov(vform, shiftand, zm); - mov(vform, shiftor, zdn); - VIXL_FALLTHROUGH(); - case ASR_z_p_zz: + switch (form_hash_) { + case "asrr_z_p_zz"_h: + sshr(vform, result, zm, zdn); break; - case LSLR_z_p_zz: - mov(vform, shiftand, zm); - mov(vform, shiftor, zdn); - VIXL_FALLTHROUGH(); - case LSL_z_p_zz: - shift_op = LSL; + case "asr_z_p_zz"_h: + sshr(vform, result, zdn, zm); break; - case LSRR_z_p_zz: - mov(vform, shiftand, zm); - mov(vform, shiftor, zdn); - VIXL_FALLTHROUGH(); - case LSR_z_p_zz: - shift_op = LSR; + case "lslr_z_p_zz"_h: + sshl(vform, result, zm, zdn, shift_in_ls_byte); + break; + case "lsl_z_p_zz"_h: + sshl(vform, result, zdn, zm, shift_in_ls_byte); + break; + case "lsrr_z_p_zz"_h: + ushr(vform, result, zm, zdn); + break; + case "lsr_z_p_zz"_h: + ushr(vform, result, zdn, zm); + break; + case "sqrshl_z_p_zz"_h: + sshl(vform, result, zdn, zm, shift_in_ls_byte) + .Round(vform) + .SignedSaturate(vform); + break; + case "sqrshlr_z_p_zz"_h: + sshl(vform, result, zm, zdn, shift_in_ls_byte) + .Round(vform) + .SignedSaturate(vform); + break; + case "sqshl_z_p_zz"_h: + sshl(vform, result, zdn, zm, shift_in_ls_byte).SignedSaturate(vform); + break; + case "sqshlr_z_p_zz"_h: + sshl(vform, result, zm, zdn, shift_in_ls_byte).SignedSaturate(vform); + break; + case "srshl_z_p_zz"_h: + sshl(vform, result, zdn, zm, shift_in_ls_byte).Round(vform); + break; + case "srshlr_z_p_zz"_h: + sshl(vform, result, zm, zdn, shift_in_ls_byte).Round(vform); + break; + case "uqrshl_z_p_zz"_h: + ushl(vform, result, zdn, zm, shift_in_ls_byte) + .Round(vform) + .UnsignedSaturate(vform); + break; + case "uqrshlr_z_p_zz"_h: + ushl(vform, result, zm, zdn, shift_in_ls_byte) + .Round(vform) + .UnsignedSaturate(vform); + break; + case "uqshl_z_p_zz"_h: + ushl(vform, result, zdn, zm, shift_in_ls_byte).UnsignedSaturate(vform); + break; + case "uqshlr_z_p_zz"_h: + ushl(vform, result, zm, zdn, shift_in_ls_byte).UnsignedSaturate(vform); + break; + case "urshl_z_p_zz"_h: + ushl(vform, result, zdn, zm, shift_in_ls_byte).Round(vform); + break; + case "urshlr_z_p_zz"_h: + ushl(vform, result, zm, zdn, shift_in_ls_byte).Round(vform); break; default: VIXL_UNIMPLEMENTED(); break; } - SVEBitwiseShiftHelper(shift_op, - vform, - result, - shiftand, - shiftor, - /* is_wide_elements = */ false); mov_merging(vform, zdn, pg, result); } @@ -7571,7 +9629,7 @@ void Simulator::VisitSVEBitwiseShiftUnpredicated(const Instruction* instr) { SimVRegister& zd = ReadVRegister(instr->GetRd()); SimVRegister& zn = ReadVRegister(instr->GetRn()); - Shift shift_op; + Shift shift_op = NO_SHIFT; switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) { case ASR_z_zi: case ASR_z_zw: @@ -7586,7 +9644,6 @@ void Simulator::VisitSVEBitwiseShiftUnpredicated(const Instruction* instr) { shift_op = LSR; break; default: - shift_op = NO_SHIFT; VIXL_UNIMPLEMENTED(); break; } @@ -7846,6 +9903,8 @@ void Simulator::VisitSVEFPAccumulatingReduction(const Instruction* instr) { SimVRegister& zm = ReadVRegister(instr->GetRn()); SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + if (vform == kFormatVnB) VIXL_UNIMPLEMENTED(); + switch (instr->Mask(SVEFPAccumulatingReductionMask)) { case FADDA_v_p_z: fadda(vform, vdn, pg, zm); @@ -7862,8 +9921,9 @@ void Simulator::VisitSVEFPArithmetic_Predicated(const Instruction* instr) { SimVRegister& zm = ReadVRegister(instr->GetRn()); SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - SimVRegister result; + if (vform == kFormatVnB) VIXL_UNIMPLEMENTED(); + SimVRegister result; switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) { case FABD_z_p_zz: fabd(vform, result, zdn, zm); @@ -7968,6 +10028,8 @@ void Simulator::VisitSVEFPTrigMulAddCoefficient(const Instruction* instr) { SimVRegister& zd = ReadVRegister(instr->GetRd()); SimVRegister& zm = ReadVRegister(instr->GetRn()); + if (vform == kFormatVnB) VIXL_UNIMPLEMENTED(); + switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) { case FTMAD_z_zzi: ftmad(vform, zd, zd, zm, instr->ExtractBits(18, 16)); @@ -7984,6 +10046,8 @@ void Simulator::VisitSVEFPArithmeticUnpredicated(const Instruction* instr) { SimVRegister& zn = ReadVRegister(instr->GetRn()); SimVRegister& zm = ReadVRegister(instr->GetRm()); + if (vform == kFormatVnB) VIXL_UNIMPLEMENTED(); + switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) { case FADD_z_zz: fadd(vform, zd, zn, zm); @@ -8017,6 +10081,8 @@ void Simulator::VisitSVEFPCompareVectors(const Instruction* instr) { VectorFormat vform = instr->GetSVEVectorFormat(); SimVRegister result; + if (vform == kFormatVnB) VIXL_UNIMPLEMENTED(); + switch (instr->Mask(SVEFPCompareVectorsMask)) { case FACGE_p_p_zz: fabscmp(vform, result, zn, zm, ge); @@ -8053,8 +10119,10 @@ void Simulator::VisitSVEFPCompareWithZero(const Instruction* instr) { SimVRegister& zn = ReadVRegister(instr->GetRn()); SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister result; + if (vform == kFormatVnB) VIXL_UNIMPLEMENTED(); + + SimVRegister result; SimVRegister zeros; dup_immediate(kFormatVnD, zeros, 0); @@ -8184,6 +10252,8 @@ void Simulator::VisitSVEFPFastReduction(const Instruction* instr) { uint64_t inactive_value = 0; FastReduceFn fn = nullptr; + if (vform == kFormatVnB) VIXL_UNIMPLEMENTED(); + switch (instr->Mask(SVEFPFastReductionMask)) { case FADDV_v_p_z: fn = &Simulator::faddv; @@ -8217,24 +10287,17 @@ void Simulator::VisitSVEFPFastReduction(const Instruction* instr) { void Simulator::VisitSVEFPMulIndex(const Instruction* instr) { VectorFormat vform = kFormatUndefined; - unsigned zm_code = instr->GetRm() & 0xf; - unsigned index = instr->ExtractBits(20, 19); switch (instr->Mask(SVEFPMulIndexMask)) { case FMUL_z_zzi_d: vform = kFormatVnD; - index >>= 1; // Only bit 20 is the index for D lanes. break; case FMUL_z_zzi_h_i3h: - index += 4; // Bit 22 (i3h) is the top bit of index. - VIXL_FALLTHROUGH(); case FMUL_z_zzi_h: vform = kFormatVnH; - zm_code &= 7; // Three bits used for zm. break; case FMUL_z_zzi_s: vform = kFormatVnS; - zm_code &= 7; // Three bits used for zm. break; default: VIXL_UNIMPLEMENTED(); @@ -8245,17 +10308,18 @@ void Simulator::VisitSVEFPMulIndex(const Instruction* instr) { SimVRegister& zn = ReadVRegister(instr->GetRn()); SimVRegister temp; - dup_elements_to_segments(vform, temp, ReadVRegister(zm_code), index); + dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex()); fmul(vform, zd, zn, temp); } void Simulator::VisitSVEFPMulAdd(const Instruction* instr) { VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zd = ReadVRegister(instr->GetRd()); SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); SimVRegister result; + if (vform == kFormatVnB) VIXL_UNIMPLEMENTED(); + if (instr->ExtractBit(15) == 0) { // Floating-point multiply-accumulate writing addend. SimVRegister& zm = ReadVRegister(instr->GetRm()); @@ -8319,30 +10383,21 @@ void Simulator::VisitSVEFPMulAdd(const Instruction* instr) { void Simulator::VisitSVEFPMulAddIndex(const Instruction* instr) { VectorFormat vform = kFormatUndefined; - unsigned zm_code = 0xffffffff; - unsigned index = 0xffffffff; switch (instr->Mask(SVEFPMulAddIndexMask)) { case FMLA_z_zzzi_d: case FMLS_z_zzzi_d: vform = kFormatVnD; - zm_code = instr->GetRmLow16(); - // Only bit 20 is the index for D lanes. - index = instr->ExtractBit(20); break; case FMLA_z_zzzi_s: case FMLS_z_zzzi_s: vform = kFormatVnS; - zm_code = instr->GetRm() & 0x7; // Three bits used for zm. - index = instr->ExtractBits(20, 19); break; case FMLA_z_zzzi_h: case FMLS_z_zzzi_h: case FMLA_z_zzzi_h_i3h: case FMLS_z_zzzi_h_i3h: vform = kFormatVnH; - zm_code = instr->GetRm() & 0x7; // Three bits used for zm. - index = (instr->ExtractBit(22) << 2) | instr->ExtractBits(20, 19); break; default: VIXL_UNIMPLEMENTED(); @@ -8353,7 +10408,7 @@ void Simulator::VisitSVEFPMulAddIndex(const Instruction* instr) { SimVRegister& zn = ReadVRegister(instr->GetRn()); SimVRegister temp; - dup_elements_to_segments(vform, temp, ReadVRegister(zm_code), index); + dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex()); if (instr->ExtractBit(10) == 1) { fmls(vform, zd, zd, zn, temp); } else { @@ -8425,44 +10480,40 @@ void Simulator::VisitSVEFPConvertPrecision(const Instruction* instr) { SimVRegister& zd = ReadVRegister(instr->GetRd()); SimVRegister& zn = ReadVRegister(instr->GetRn()); SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - int dst_data_size; - int src_data_size; + VectorFormat dst_data_size = kFormatUndefined; + VectorFormat src_data_size = kFormatUndefined; switch (instr->Mask(SVEFPConvertPrecisionMask)) { case FCVT_z_p_z_d2h: - dst_data_size = kHRegSize; - src_data_size = kDRegSize; + dst_data_size = kFormatVnH; + src_data_size = kFormatVnD; break; case FCVT_z_p_z_d2s: - dst_data_size = kSRegSize; - src_data_size = kDRegSize; + dst_data_size = kFormatVnS; + src_data_size = kFormatVnD; break; case FCVT_z_p_z_h2d: - dst_data_size = kDRegSize; - src_data_size = kHRegSize; + dst_data_size = kFormatVnD; + src_data_size = kFormatVnH; break; case FCVT_z_p_z_h2s: - dst_data_size = kSRegSize; - src_data_size = kHRegSize; + dst_data_size = kFormatVnS; + src_data_size = kFormatVnH; break; case FCVT_z_p_z_s2d: - dst_data_size = kDRegSize; - src_data_size = kSRegSize; + dst_data_size = kFormatVnD; + src_data_size = kFormatVnS; break; case FCVT_z_p_z_s2h: - dst_data_size = kHRegSize; - src_data_size = kSRegSize; + dst_data_size = kFormatVnH; + src_data_size = kFormatVnS; break; default: VIXL_UNIMPLEMENTED(); - dst_data_size = 0; - src_data_size = 0; break; } - VectorFormat vform = - SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size)); - fcvt(vform, dst_data_size, src_data_size, zd, pg, zn); + fcvt(dst_data_size, src_data_size, zd, pg, zn); } void Simulator::VisitSVEFPUnaryOp(const Instruction* instr) { @@ -8494,6 +10545,8 @@ void Simulator::VisitSVEFPRoundToIntegralValue(const Instruction* instr) { FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode()); bool exact_exception = false; + if (vform == kFormatVnB) VIXL_UNIMPLEMENTED(); + switch (instr->Mask(SVEFPRoundToIntegralValueMask)) { case FRINTA_z_p_z: fpcr_rounding = FPTieAway; @@ -8592,6 +10645,8 @@ void Simulator::VisitSVEFPUnaryOpUnpredicated(const Instruction* instr) { SimVRegister& zn = ReadVRegister(instr->GetRn()); FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode()); + if (vform == kFormatVnB) VIXL_UNIMPLEMENTED(); + switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) { case FRECPE_z_z: frecpe(vform, zd, zn, fpcr_rounding); @@ -8973,33 +11028,52 @@ void Simulator::VisitSVEIntCompareScalarCountAndLimit( int64_t ssrc2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code); uint64_t usrc2 = ssrc2 & mask; + bool reverse = (form_hash_ == "whilege_p_p_rr"_h) || + (form_hash_ == "whilegt_p_p_rr"_h) || + (form_hash_ == "whilehi_p_p_rr"_h) || + (form_hash_ == "whilehs_p_p_rr"_h); + + int lane_count = LaneCountFromFormat(vform); bool last = true; - for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { + for (int i = 0; i < lane_count; i++) { usrc1 &= mask; int64_t ssrc1 = ExtractSignedBitfield64(rsize - 1, 0, usrc1); bool cond = false; - switch (instr->Mask(SVEIntCompareScalarCountAndLimitMask)) { - case WHILELE_p_p_rr: + switch (form_hash_) { + case "whilele_p_p_rr"_h: cond = ssrc1 <= ssrc2; break; - case WHILELO_p_p_rr: + case "whilelo_p_p_rr"_h: cond = usrc1 < usrc2; break; - case WHILELS_p_p_rr: + case "whilels_p_p_rr"_h: cond = usrc1 <= usrc2; break; - case WHILELT_p_p_rr: + case "whilelt_p_p_rr"_h: cond = ssrc1 < ssrc2; break; + case "whilege_p_p_rr"_h: + cond = ssrc1 >= ssrc2; + break; + case "whilegt_p_p_rr"_h: + cond = ssrc1 > ssrc2; + break; + case "whilehi_p_p_rr"_h: + cond = usrc1 > usrc2; + break; + case "whilehs_p_p_rr"_h: + cond = usrc1 >= usrc2; + break; default: VIXL_UNIMPLEMENTED(); break; } last = last && cond; LogicPRegister dst(pd); + int lane = reverse ? ((lane_count - 1) - i) : i; dst.SetActive(vform, lane, last); - usrc1++; + usrc1 += reverse ? -1 : 1; } PredTest(vform, GetPTrue(), pd); @@ -9013,7 +11087,7 @@ void Simulator::VisitSVEConditionallyTerminateScalars( bool is_64_bit = instr->ExtractBit(22) == 1; uint64_t src1 = is_64_bit ? ReadXRegister(rn_code) : ReadWRegister(rn_code); uint64_t src2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code); - bool term; + bool term = false; switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) { case CTERMEQ_rr: term = src1 == src2; @@ -9022,7 +11096,6 @@ void Simulator::VisitSVEConditionallyTerminateScalars( term = src1 != src2; break; default: - term = false; VIXL_UNIMPLEMENTED(); break; } @@ -9033,7 +11106,7 @@ void Simulator::VisitSVEConditionallyTerminateScalars( void Simulator::VisitSVEIntCompareSignedImm(const Instruction* instr) { bool commute_inputs = false; - Condition cond; + Condition cond = al; switch (instr->Mask(SVEIntCompareSignedImmMask)) { case CMPEQ_p_p_zi: cond = eq; @@ -9056,7 +11129,6 @@ void Simulator::VisitSVEIntCompareSignedImm(const Instruction* instr) { cond = ne; break; default: - cond = al; VIXL_UNIMPLEMENTED(); break; } @@ -9078,7 +11150,7 @@ void Simulator::VisitSVEIntCompareSignedImm(const Instruction* instr) { void Simulator::VisitSVEIntCompareUnsignedImm(const Instruction* instr) { bool commute_inputs = false; - Condition cond; + Condition cond = al; switch (instr->Mask(SVEIntCompareUnsignedImmMask)) { case CMPHI_p_p_zi: cond = hi; @@ -9095,7 +11167,6 @@ void Simulator::VisitSVEIntCompareUnsignedImm(const Instruction* instr) { commute_inputs = true; break; default: - cond = al; VIXL_UNIMPLEMENTED(); break; } @@ -9229,8 +11300,6 @@ void Simulator::VisitSVEConstructivePrefix_Unpredicated( switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) { case MOVPRFX_z_z: mov(kFormatVnD, zd, zn); // The lane size is arbitrary. - // Record the movprfx, so the next ExecuteInstruction() can check it. - movprfx_ = instr; break; default: VIXL_UNIMPLEMENTED(); @@ -9274,13 +11343,16 @@ void Simulator::VisitSVEIntMulAddUnpredicated(const Instruction* instr) { SimVRegister& zn = ReadVRegister(instr->GetRn()); SimVRegister& zm = ReadVRegister(instr->GetRm()); - switch (instr->Mask(SVEIntMulAddUnpredicatedMask)) { - case SDOT_z_zzz: + switch (form_hash_) { + case "sdot_z_zzz"_h: sdot(vform, zda, zn, zm); break; - case UDOT_z_zzz: + case "udot_z_zzz"_h: udot(vform, zda, zn, zm); break; + case "usdot_z_zzz_s"_h: + usdot(vform, zda, zn, zm); + break; default: VIXL_UNIMPLEMENTED(); break; @@ -9300,9 +11372,6 @@ void Simulator::VisitSVEMovprfx(const Instruction* instr) { } else { mov_zeroing(vform, zd, pg, zn); } - - // Record the movprfx, so the next ExecuteInstruction() can check it. - movprfx_ = instr; break; default: VIXL_UNIMPLEMENTED(); @@ -9419,6 +11488,8 @@ void Simulator::VisitSVECopyFPImm_Predicated(const Instruction* instr) { SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16)); SimVRegister& zd = ReadVRegister(instr->GetRd()); + if (vform == kFormatVnB) VIXL_UNIMPLEMENTED(); + SimVRegister result; switch (instr->Mask(SVECopyFPImm_PredicatedMask)) { case FCPY_z_p_i: { @@ -10116,69 +12187,59 @@ void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar( /* is_signed = */ false); } -void Simulator::VisitSVELoadAndBroadcastQuadword_ScalarPlusImm( +void Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm( const Instruction* instr) { SimVRegister& zt = ReadVRegister(instr->GetRt()); SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + uint64_t dwords = 2; + VectorFormat vform_dst = kFormatVnQ; + if ((form_hash_ == "ld1rob_z_p_bi_u8"_h) || + (form_hash_ == "ld1roh_z_p_bi_u16"_h) || + (form_hash_ == "ld1row_z_p_bi_u32"_h) || + (form_hash_ == "ld1rod_z_p_bi_u64"_h)) { + dwords = 4; + vform_dst = kFormatVnO; + } + uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer); - uint64_t offset = instr->ExtractSignedBits(19, 16) * 16; + uint64_t offset = + instr->ExtractSignedBits(19, 16) * dwords * kDRegSizeInBytes; + int msz = instr->ExtractBits(24, 23); + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz); - VectorFormat vform = kFormatUndefined; - switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusImmMask)) { - case LD1RQB_z_p_bi_u8: - vform = kFormatVnB; - break; - case LD1RQD_z_p_bi_u64: - vform = kFormatVnD; - break; - case LD1RQH_z_p_bi_u16: - vform = kFormatVnH; - break; - case LD1RQW_z_p_bi_u32: - vform = kFormatVnS; - break; - default: - addr = offset = 0; - break; + for (unsigned i = 0; i < dwords; i++) { + ld1(kFormatVnD, zt, i, addr + offset + (i * kDRegSizeInBytes)); } - ld1(kFormat16B, zt, addr + offset); mov_zeroing(vform, zt, pg, zt); - dup_element(kFormatVnQ, zt, zt, 0); + dup_element(vform_dst, zt, zt, 0); } -void Simulator::VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar( +void Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar( const Instruction* instr) { SimVRegister& zt = ReadVRegister(instr->GetRt()); SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + uint64_t bytes = 16; + VectorFormat vform_dst = kFormatVnQ; + if ((form_hash_ == "ld1rob_z_p_br_contiguous"_h) || + (form_hash_ == "ld1roh_z_p_br_contiguous"_h) || + (form_hash_ == "ld1row_z_p_br_contiguous"_h) || + (form_hash_ == "ld1rod_z_p_br_contiguous"_h)) { + bytes = 32; + vform_dst = kFormatVnO; + } + uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer); uint64_t offset = ReadXRegister(instr->GetRm()); - - VectorFormat vform = kFormatUndefined; - switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusScalarMask)) { - case LD1RQB_z_p_br_contiguous: - vform = kFormatVnB; - break; - case LD1RQD_z_p_br_contiguous: - vform = kFormatVnD; - offset <<= 3; - break; - case LD1RQH_z_p_br_contiguous: - vform = kFormatVnH; - offset <<= 1; - break; - case LD1RQW_z_p_br_contiguous: - vform = kFormatVnS; - offset <<= 2; - break; - default: - addr = offset = 0; - break; + int msz = instr->ExtractBits(24, 23); + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz); + offset <<= msz; + for (unsigned i = 0; i < bytes; i++) { + ld1(kFormatVnB, zt, i, addr + offset + i); } - ld1(kFormat16B, zt, addr + offset); mov_zeroing(vform, zt, pg, zt); - dup_element(kFormatVnQ, zt, zt, 0); + dup_element(vform_dst, zt, zt, 0); } void Simulator::VisitSVELoadMultipleStructures_ScalarPlusImm( @@ -10726,35 +12787,78 @@ void Simulator::VisitSVEMulIndex(const Instruction* instr) { VectorFormat vform = instr->GetSVEVectorFormat(); SimVRegister& zda = ReadVRegister(instr->GetRd()); SimVRegister& zn = ReadVRegister(instr->GetRn()); + std::pair<int, int> zm_and_index = instr->GetSVEMulZmAndIndex(); + SimVRegister zm = ReadVRegister(zm_and_index.first); + int index = zm_and_index.second; + + SimVRegister temp; + dup_elements_to_segments(vform, temp, zm, index); + + switch (form_hash_) { + case "sdot_z_zzzi_d"_h: + case "sdot_z_zzzi_s"_h: + sdot(vform, zda, zn, temp); + break; + case "udot_z_zzzi_d"_h: + case "udot_z_zzzi_s"_h: + udot(vform, zda, zn, temp); + break; + case "sudot_z_zzzi_s"_h: + usdot(vform, zda, temp, zn); + break; + case "usdot_z_zzzi_s"_h: + usdot(vform, zda, zn, temp); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} - switch (instr->Mask(SVEMulIndexMask)) { - case SDOT_z_zzzi_d: - sdot(vform, - zda, - zn, - ReadVRegister(instr->ExtractBits(19, 16)), - instr->ExtractBit(20)); +void Simulator::SimulateMatrixMul(const Instruction* instr) { + VectorFormat vform = kFormatVnS; + SimVRegister& dn = ReadVRegister(instr->GetRd()); + SimVRegister& n = ReadVRegister(instr->GetRn()); + SimVRegister& m = ReadVRegister(instr->GetRm()); + + bool n_signed = false; + bool m_signed = false; + switch (form_hash_) { + case "smmla_asimdsame2_g"_h: + vform = kFormat4S; + VIXL_FALLTHROUGH(); + case "smmla_z_zzz"_h: + n_signed = m_signed = true; break; - case SDOT_z_zzzi_s: - sdot(vform, - zda, - zn, - ReadVRegister(instr->ExtractBits(18, 16)), - instr->ExtractBits(20, 19)); + case "ummla_asimdsame2_g"_h: + vform = kFormat4S; + VIXL_FALLTHROUGH(); + case "ummla_z_zzz"_h: + // Nothing to do. break; - case UDOT_z_zzzi_d: - udot(vform, - zda, - zn, - ReadVRegister(instr->ExtractBits(19, 16)), - instr->ExtractBit(20)); + case "usmmla_asimdsame2_g"_h: + vform = kFormat4S; + VIXL_FALLTHROUGH(); + case "usmmla_z_zzz"_h: + m_signed = true; break; - case UDOT_z_zzzi_s: - udot(vform, - zda, - zn, - ReadVRegister(instr->ExtractBits(18, 16)), - instr->ExtractBits(20, 19)); + default: + VIXL_UNIMPLEMENTED(); + break; + } + matmul(vform, dn, n, m, n_signed, m_signed); +} + +void Simulator::SimulateSVEFPMatrixMul(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + + switch (form_hash_) { + case "fmmla_z_zzz_s"_h: + case "fmmla_z_zzz_d"_h: + fmatmul(vform, zdn, zn, zm); break; default: VIXL_UNIMPLEMENTED(); @@ -10896,9 +13000,7 @@ void Simulator::VisitSVEPermuteVectorExtract(const Instruction* instr) { // Second source register "Zm" is encoded where "Zn" would usually be. SimVRegister& zm = ReadVRegister(instr->GetRn()); - const int imm8h_mask = 0x001F0000; - const int imm8l_mask = 0x00001C00; - int index = instr->ExtractBits<imm8h_mask | imm8l_mask>(); + int index = instr->GetSVEExtractImmediate(); int vl = GetVectorLengthInBytes(); index = (index >= vl) ? 0 : index; @@ -11199,15 +13301,19 @@ void Simulator::VisitSVEReverseWithinElements(const Instruction* instr) { mov_merging(chunk_form, zd, pg, result); } -void Simulator::VisitSVEVectorSplice_Destructive(const Instruction* instr) { +void Simulator::VisitSVEVectorSplice(const Instruction* instr) { VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zdn = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters); SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - switch (instr->Mask(SVEVectorSplice_DestructiveMask)) { - case SPLICE_z_p_zz_des: - splice(vform, zdn, pg, zdn, zm); + switch (form_hash_) { + case "splice_z_p_zz_des"_h: + splice(vform, zd, pg, zd, zn); + break; + case "splice_z_p_zz_con"_h: + splice(vform, zd, pg, zn, zn2); break; default: VIXL_UNIMPLEMENTED(); @@ -11315,15 +13421,24 @@ void Simulator::VisitSVEUnpackVectorElements(const Instruction* instr) { } void Simulator::VisitSVETableLookup(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); SimVRegister& zd = ReadVRegister(instr->GetRd()); - switch (instr->Mask(SVETableLookupMask)) { - case TBL_z_zz_1: - Table(instr->GetSVEVectorFormat(), - zd, - ReadVRegister(instr->GetRn()), - ReadVRegister(instr->GetRm())); - return; + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + + switch (form_hash_) { + case "tbl_z_zz_1"_h: + tbl(vform, zd, zn, zm); + break; + case "tbl_z_zz_2"_h: + tbl(vform, zd, zn, zn2, zm); + break; + case "tbx_z_zz"_h: + tbx(vform, zd, zn, zm); + break; default: + VIXL_UNIMPLEMENTED(); break; } } diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h index 44fb0cdb..1fdbb6f6 100644 --- a/src/aarch64/simulator-aarch64.h +++ b/src/aarch64/simulator-aarch64.h @@ -28,6 +28,7 @@ #define VIXL_AARCH64_SIMULATOR_AARCH64_H_ #include <memory> +#include <unordered_map> #include <vector> #include "../globals-vixl.h" @@ -555,6 +556,13 @@ class LogicVRegister { return element; } + int UintArray(VectorFormat vform, uint64_t* dst) const { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst[i] = Uint(vform, i); + } + return LaneCountFromFormat(vform); + } + uint64_t UintLeftJustified(VectorFormat vform, int index) const { return Uint(vform, index) << (64 - LaneSizeInBitsFromFormat(vform)); } @@ -638,6 +646,8 @@ class LogicVRegister { register_.Insert(index, value); } + void Clear() { register_.Clear(); } + // When setting a result in a register larger than the result itself, the top // bits of the register must be cleared. void ClearForWrite(VectorFormat vform) const { @@ -1131,11 +1141,6 @@ class Simulator : public DecoderVisitor { VIXL_ASSERT(IsWordAligned(pc_)); pc_modified_ = false; - if (movprfx_ != NULL) { - VIXL_CHECK(pc_->CanTakeSVEMovprfx(movprfx_)); - movprfx_ = NULL; - } - // On guarded pages, if BType is not zero, take an exception on any // instruction other than BTI, PACI[AB]SP, HLT or BRK. if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) { @@ -1150,6 +1155,9 @@ class Simulator : public DecoderVisitor { } } + bool last_instr_was_movprfx = + (form_hash_ == "movprfx_z_z"_h) || (form_hash_ == "movprfx_z_p_z"_h); + // decoder_->Decode(...) triggers at least the following visitors: // 1. The CPUFeaturesAuditor (`cpu_features_auditor_`). // 2. The PrintDisassembler (`print_disasm_`), if enabled. @@ -1157,6 +1165,13 @@ class Simulator : public DecoderVisitor { // User can add additional visitors at any point, but the Simulator requires // that the ordering above is preserved. decoder_->Decode(pc_); + + if (last_instr_was_movprfx) { + VIXL_ASSERT(last_instr_ != NULL); + VIXL_CHECK(pc_->CanTakeSVEMovprfx(form_hash_, last_instr_)); + } + + last_instr_ = ReadPc(); IncrementPc(); LogAllWrittenRegisters(); UpdateBType(); @@ -1164,18 +1179,75 @@ class Simulator : public DecoderVisitor { VIXL_CHECK(cpu_features_auditor_.InstructionIsAvailable()); } -// Declare all Visitor functions. -#define DECLARE(A) \ - virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE; + virtual void Visit(Metadata* metadata, + const Instruction* instr) VIXL_OVERRIDE; + +#define DECLARE(A) virtual void Visit##A(const Instruction* instr); VISITOR_LIST_THAT_RETURN(DECLARE) #undef DECLARE - - #define DECLARE(A) \ - VIXL_NO_RETURN virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE; + VIXL_NO_RETURN virtual void Visit##A(const Instruction* instr); VISITOR_LIST_THAT_DONT_RETURN(DECLARE) #undef DECLARE + void Simulate_PdT_PgZ_ZnT_ZmT(const Instruction* instr); + void Simulate_PdT_Xn_Xm(const Instruction* instr); + void Simulate_ZdB_Zn1B_Zn2B_imm(const Instruction* instr); + void Simulate_ZdB_ZnB_ZmB(const Instruction* instr); + void Simulate_ZdD_ZnD_ZmD_imm(const Instruction* instr); + void Simulate_ZdH_PgM_ZnS(const Instruction* instr); + void Simulate_ZdH_ZnH_ZmH_imm(const Instruction* instr); + void Simulate_ZdS_PgM_ZnD(const Instruction* instr); + void Simulate_ZdS_PgM_ZnS(const Instruction* instr); + void Simulate_ZdS_ZnS_ZmS_imm(const Instruction* instr); + void Simulate_ZdT_PgM_ZnT(const Instruction* instr); + void Simulate_ZdT_PgZ_ZnT_ZmT(const Instruction* instr); + void Simulate_ZdT_ZnT_ZmT(const Instruction* instr); + void Simulate_ZdT_ZnT_ZmTb(const Instruction* instr); + void Simulate_ZdT_ZnT_const(const Instruction* instr); + void Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr); + void Simulate_ZdaH_ZnH_ZmH_imm_const(const Instruction* instr); + void Simulate_ZdaS_ZnH_ZmH(const Instruction* instr); + void Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr); + void Simulate_ZdaS_ZnS_ZmS_imm_const(const Instruction* instr); + void Simulate_ZdaT_PgM_ZnTb(const Instruction* instr); + void Simulate_ZdaT_ZnT_ZmT(const Instruction* instr); + void Simulate_ZdaT_ZnT_const(const Instruction* instr); + void Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr); + void Simulate_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr); + void Simulate_ZdnT_PgM_ZdnT_const(const Instruction* instr); + void Simulate_ZdnT_ZdnT_ZmT_const(const Instruction* instr); + void Simulate_ZtD_PgZ_ZnD_Xm(const Instruction* instr); + void Simulate_ZtD_Pg_ZnD_Xm(const Instruction* instr); + void Simulate_ZtS_PgZ_ZnS_Xm(const Instruction* instr); + void Simulate_ZtS_Pg_ZnS_Xm(const Instruction* instr); + + void SimulateSVEHalvingAddSub(const Instruction* instr); + void SimulateSVESaturatingArithmetic(const Instruction* instr); + void SimulateSVEIntArithPair(const Instruction* instr); + void SimulateSVENarrow(const Instruction* instr); + void SimulateSVEInterleavedArithLong(const Instruction* instr); + void SimulateSVEShiftLeftImm(const Instruction* instr); + void SimulateSVEAddSubCarry(const Instruction* instr); + void SimulateSVEAddSubHigh(const Instruction* instr); + void SimulateSVEIntMulLongVec(const Instruction* instr); + void SimulateSVESaturatingIntMulLongIdx(const Instruction* instr); + void SimulateSVEExclusiveOrRotate(const Instruction* instr); + void SimulateSVEBitwiseTernary(const Instruction* instr); + void SimulateSVEComplexDotProduct(const Instruction* instr); + void SimulateSVEMulIndex(const Instruction* instr); + void SimulateSVEMlaMlsIndex(const Instruction* instr); + void SimulateSVEComplexIntMulAdd(const Instruction* instr); + void SimulateSVESaturatingMulAddHigh(const Instruction* instr); + void SimulateSVESaturatingMulHighIndex(const Instruction* instr); + void SimulateSVEFPConvertLong(const Instruction* instr); + void SimulateMatrixMul(const Instruction* instr); + void SimulateSVEFPMatrixMul(const Instruction* instr); + void SimulateNEONMulByElementLong(const Instruction* instr); + void SimulateNEONFPMulByElement(const Instruction* instr); + void SimulateNEONFPMulByElementLong(const Instruction* instr); + void SimulateNEONComplexMulByElement(const Instruction* instr); + void SimulateNEONDotProdByElement(const Instruction* instr); // Integer register accessors. @@ -2790,6 +2862,14 @@ class Simulator : public DecoderVisitor { uint64_t left, uint64_t right, int carry_in = 0); + std::pair<uint64_t, uint8_t> AddWithCarry(unsigned reg_size, + uint64_t left, + uint64_t right, + int carry_in); + using vixl_uint128_t = std::pair<uint64_t, uint64_t>; + vixl_uint128_t Add128(vixl_uint128_t x, vixl_uint128_t y); + vixl_uint128_t Mul64(uint64_t x, uint64_t y); + vixl_uint128_t Neg128(vixl_uint128_t x); void LogicalHelper(const Instruction* instr, int64_t op2); void ConditionalCompareHelper(const Instruction* instr, int64_t op2); void LoadStoreHelper(const Instruction* instr, @@ -2834,7 +2914,9 @@ class Simulator : public DecoderVisitor { int64_t value, Extend extend_type, unsigned left_shift = 0) const; - uint16_t PolynomialMult(uint8_t op1, uint8_t op2) const; + uint64_t PolynomialMult(uint64_t op1, + uint64_t op2, + int lane_size_in_bits) const; void ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr); void ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr); @@ -3065,66 +3147,6 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2); - LogicVRegister smull(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); - LogicVRegister smull2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); - LogicVRegister umull(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); - LogicVRegister umull2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); - LogicVRegister smlal(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); - LogicVRegister smlal2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); - LogicVRegister umlal(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); - LogicVRegister umlal2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); - LogicVRegister smlsl(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); - LogicVRegister smlsl2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); - LogicVRegister umlsl(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); - LogicVRegister umlsl2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); LogicVRegister umulh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -3134,31 +3156,16 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src1, const LogicVRegister& src2, int index); - LogicVRegister sqdmull2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); LogicVRegister sqdmlal(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index); - LogicVRegister sqdmlal2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); LogicVRegister sqdmlsl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index); - LogicVRegister sqdmlsl2(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); LogicVRegister sqdmulh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -3169,21 +3176,11 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src1, const LogicVRegister& src2, int index); - LogicVRegister sdot(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); LogicVRegister sqrdmlah(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index); - LogicVRegister udot(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int index); LogicVRegister sqrdmlsh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -3233,6 +3230,7 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src2); LogicVRegister bsl(VectorFormat vform, LogicVRegister dst, + const LogicVRegister& src_mask, const LogicVRegister& src1, const LogicVRegister& src2); LogicVRegister cls(VectorFormat vform, @@ -3286,11 +3284,19 @@ class Simulator : public DecoderVisitor { LogicVRegister uadalp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); + LogicVRegister ror(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int rotation); LogicVRegister ext(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index); + LogicVRegister rotate_elements_right(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int index); template <typename T> LogicVRegister fcadd(VectorFormat vform, LogicVRegister dst, @@ -3331,6 +3337,40 @@ class Simulator : public DecoderVisitor { LogicVRegister acc, const LogicPRegister& pg, const LogicVRegister& src); + LogicVRegister cadd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot, + bool saturate = false); + LogicVRegister cmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot); + LogicVRegister cmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index, + int rot); + LogicVRegister bgrp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool do_bext = false); + LogicVRegister bdep(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister histogram(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool do_segmented = false); LogicVRegister index(VectorFormat vform, LogicVRegister dst, uint64_t start, @@ -3353,6 +3393,10 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src, int src_index); + LogicVRegister dup_elements_to_segments( + VectorFormat vform, + LogicVRegister dst, + const std::pair<int, int>& src_and_index); LogicVRegister dup_immediate(VectorFormat vform, LogicVRegister dst, uint64_t imm); @@ -3368,6 +3412,10 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const SimPRegister& pg, const LogicVRegister& src); + LogicVRegister mov_alternating(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int start_at); LogicPRegister mov_merging(LogicPRegister dst, const LogicPRegister& pg, const LogicPRegister& src); @@ -3383,10 +3431,20 @@ class Simulator : public DecoderVisitor { LogicVRegister sshl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, - const LogicVRegister& src2); + const LogicVRegister& src2, + bool shift_is_8bit = true); LogicVRegister ushl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, + const LogicVRegister& src2, + bool shift_is_8bit = true); + LogicVRegister sshr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister ushr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, const LogicVRegister& src2); // Perform a "conditional last" operation. The first part of the pair is true // if any predicate lane is active, false otherwise. The second part takes the @@ -3396,6 +3454,11 @@ class Simulator : public DecoderVisitor { const LogicPRegister& pg, const LogicVRegister& src2, int offset_from_last_active); + LogicPRegister match(VectorFormat vform, + LogicPRegister dst, + const LogicVRegister& haystack, + const LogicVRegister& needles, + bool negate_match); LogicVRegister compact(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, @@ -3465,13 +3528,15 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src); LogicVRegister uxtl(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& src); + const LogicVRegister& src, + bool is_2 = false); LogicVRegister uxtl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); LogicVRegister sxtl(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& src); + const LogicVRegister& src, + bool is_2 = false); LogicVRegister sxtl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); @@ -3507,10 +3572,6 @@ class Simulator : public DecoderVisitor { const LogicVRegister& ind); LogicVRegister Table(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& src, - const LogicVRegister& tab); - LogicVRegister Table(VectorFormat vform, - LogicVRegister dst, const LogicVRegister& ind, bool zero_out_of_bounds, const LogicVRegister* tab1, @@ -3750,10 +3811,12 @@ class Simulator : public DecoderVisitor { int shift); LogicVRegister suqadd(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& src); + const LogicVRegister& src1, + const LogicVRegister& src2); LogicVRegister usqadd(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& src); + const LogicVRegister& src1, + const LogicVRegister& src2); LogicVRegister sqshl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, @@ -3875,7 +3938,8 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, - bool is_signed); + bool is_src1_signed, + bool is_src2_signed); LogicVRegister sdot(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -3884,12 +3948,41 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2); + LogicVRegister usdot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister cdot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& acc, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot); + LogicVRegister sqrdcmlah(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot); + LogicVRegister sqrdcmlah(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index, + int rot); LogicVRegister sqrdmlash(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, bool round = true, bool sub_op = false); + LogicVRegister sqrdmlash_d(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool round = true, + bool sub_op = false); LogicVRegister sqrdmlah(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -3904,6 +3997,21 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2); + LogicVRegister matmul(VectorFormat vform_dst, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool src1_signed, + bool src2_signed); + template <typename T> + LogicVRegister fmatmul(VectorFormat vform, + LogicVRegister srcdst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister fmatmul(VectorFormat vform, + LogicVRegister srcdst, + const LogicVRegister& src1, + const LogicVRegister& src2); #define NEON_3VREG_LOGIC_LIST(V) \ V(addhn) \ V(addhn2) \ @@ -3923,23 +4031,14 @@ class Simulator : public DecoderVisitor { V(sabdl2) \ V(uabdl) \ V(uabdl2) \ - V(smull) \ V(smull2) \ - V(umull) \ V(umull2) \ - V(smlal) \ V(smlal2) \ - V(umlal) \ V(umlal2) \ - V(smlsl) \ V(smlsl2) \ - V(umlsl) \ V(umlsl2) \ - V(sqdmlal) \ V(sqdmlal2) \ - V(sqdmlsl) \ V(sqdmlsl2) \ - V(sqdmull) \ V(sqdmull2) #define DEFINE_LOGIC_FUNC(FXN) \ @@ -3950,6 +4049,26 @@ class Simulator : public DecoderVisitor { NEON_3VREG_LOGIC_LIST(DEFINE_LOGIC_FUNC) #undef DEFINE_LOGIC_FUNC +#define NEON_MULL_LIST(V) \ + V(smull) \ + V(umull) \ + V(smlal) \ + V(umlal) \ + V(smlsl) \ + V(umlsl) \ + V(sqdmlal) \ + V(sqdmlsl) \ + V(sqdmull) + +#define DECLARE_NEON_MULL_OP(FN) \ + LogicVRegister FN(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src1, \ + const LogicVRegister& src2, \ + bool is_2 = false); + NEON_MULL_LIST(DECLARE_NEON_MULL_OP) +#undef DECLARE_NEON_MULL_OP + #define NEON_FP3SAME_LIST(V) \ V(fadd, FPAdd, false) \ V(fsub, FPSub, true) \ @@ -4111,6 +4230,9 @@ class Simulator : public DecoderVisitor { LogicVRegister fexpa(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); + LogicVRegister flogb(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); template <typename T> LogicVRegister fscale(VectorFormat vform, LogicVRegister dst, @@ -4137,9 +4259,8 @@ class Simulator : public DecoderVisitor { FPRounding rounding_mode, bool inexact_exception = false, FrintMode frint_mode = kFrintToInteger); - LogicVRegister fcvt(VectorFormat vform, - unsigned dst_data_size_in_bits, - unsigned src_data_size_in_bits, + LogicVRegister fcvt(VectorFormat dst_vform, + VectorFormat src_vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src); @@ -4256,6 +4377,10 @@ class Simulator : public DecoderVisitor { const LogicPRegister& pg, const LogicVRegister& src); + LogicVRegister interleave_top_bottom(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + template <typename T> struct TFPPairOp { typedef T (Simulator::*type)(T a, T b); @@ -4357,6 +4482,9 @@ class Simulator : public DecoderVisitor { T FPMinNM(T a, T b); template <typename T> + T FPMulNaNs(T op1, T op2); + + template <typename T> T FPMul(T op1, T op2); template <typename T> @@ -4491,6 +4619,27 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src2, bool is_wide_elements); + // Pack all even- or odd-numbered elements of source vector side by side and + // place in elements of lower half the destination vector, and leave the upper + // half all zero. + // [...| H | G | F | E | D | C | B | A ] + // => [...................| G | E | C | A ] + LogicVRegister pack_even_elements(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + + // [...| H | G | F | E | D | C | B | A ] + // => [...................| H | F | D | B ] + LogicVRegister pack_odd_elements(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + + LogicVRegister adcl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool top); + template <typename T> LogicVRegister FTMaddHelper(VectorFormat vform, LogicVRegister dst, @@ -4587,9 +4736,9 @@ class Simulator : public DecoderVisitor { bool pc_modified_; const Instruction* pc_; - // If non-NULL, the last instruction was a movprfx, and validity needs to be - // checked. - Instruction const* movprfx_; + // Pointer to the last simulated instruction, used for checking the validity + // of the current instruction with movprfx. + Instruction const* last_instr_; // Branch type register, used for branch target identification. BType btype_; @@ -4613,6 +4762,13 @@ class Simulator : public DecoderVisitor { static const char* preg_names[]; private: + using FormToVisitorFnMap = + std::unordered_map<uint32_t, + std::function<void(Simulator*, const Instruction*)>>; + static const FormToVisitorFnMap* GetFormToVisitorFnMap(); + + uint32_t form_hash_; + static const PACKey kPACKeyIA; static const PACKey kPACKeyIB; static const PACKey kPACKeyDA; diff --git a/src/cpu-features.h b/src/cpu-features.h index 1b0f2c24..ebd05787 100644 --- a/src/cpu-features.h +++ b/src/cpu-features.h @@ -170,7 +170,20 @@ namespace vixl { V(kPAuthEnhancedPAC, "PAuth EnhancedPAC", NULL) \ V(kPAuthEnhancedPAC2, "PAuth EnhancedPAC2", NULL) \ V(kPAuthFPAC, "PAuth FPAC", NULL) \ - V(kPAuthFPACCombined, "PAuth FPACCombined", NULL) + V(kPAuthFPACCombined, "PAuth FPACCombined", NULL) \ + /* Scalable Vector Extension 2. */ \ + V(kSVE2, "SVE2", "sve2") \ + V(kSVESM4, "SVE SM4", "svesm4") \ + V(kSVESHA3, "SVE SHA3", "svesha3") \ + V(kSVEBitPerm, "SVE BitPerm", "svebitperm") \ + V(kSVEAES, "SVE AES", "sveaes") \ + V(kSVEPmull128, "SVE Pmull128", "svepmull") \ + /* Alternate floating-point behavior */ \ + V(kAFP, "AFP", "afp") \ + /* Enhanced Counter Virtualization */ \ + V(kECV, "ECV", "ecv") \ + /* Increased precision of Reciprocal Estimate and Square Root Estimate */ \ + V(kRPRES, "RPRES", "rpres") // clang-format on diff --git a/src/utils-vixl.h b/src/utils-vixl.h index 0ae6dfc0..53876869 100644 --- a/src/utils-vixl.h +++ b/src/utils-vixl.h @@ -1395,6 +1395,25 @@ T RawbitsWithSizeToFP(unsigned size_in_bits, uint64_t value) { return 0; } +// Jenkins one-at-a-time hash, based on +// https://en.wikipedia.org/wiki/Jenkins_hash_function citing +// https://www.drdobbs.com/database/algorithm-alley/184410284. +constexpr uint32_t Hash(const char* str, uint32_t hash = 0) { + if (*str == '\0') { + hash += hash << 3; + hash ^= hash >> 11; + hash += hash << 15; + return hash; + } else { + hash += *str; + hash += hash << 10; + hash ^= hash >> 6; + return Hash(str + 1, hash); + } +} + +constexpr uint32_t operator"" _h(const char* x, size_t) { return Hash(x); } + } // namespace vixl #endif // VIXL_UTILS_H diff --git a/test/aarch32/test-assembler-aarch32.cc b/test/aarch32/test-assembler-aarch32.cc index 0be51e15..418bc11f 100644 --- a/test/aarch32/test-assembler-aarch32.cc +++ b/test/aarch32/test-assembler-aarch32.cc @@ -2207,7 +2207,7 @@ TEST(custom_literal_place_shared) { VIXL_CHECK(!after.IsBound()); // Load the entries several times to test that literals can be shared. - for (int i = 0; i < 20; i++) { + for (int j = 0; j < 20; j++) { (masm.*test_case.instruction)(r0, &before); (masm.*test_case.instruction)(r1, &after); } @@ -5160,7 +5160,7 @@ TEST_T32(veneer_and_literal5) { int first_test = 2000; // Test on both sizes of the Adr range which is 4095. - for (int test = 0; test < kTestCount; test++) { + for (int test_num = 0; test_num < kTestCount; test_num++) { const int string_size = 1000; // A lot more than the cbz range. std::string test_string(string_size, 'x'); StringLiteral big_literal(test_string.c_str()); @@ -5168,7 +5168,7 @@ TEST_T32(veneer_and_literal5) { __ Adr(r11, &big_literal); { - int num_nops = first_test + test; + int num_nops = first_test + test_num; ExactAssemblyScope aas(&masm, 2 * num_nops, CodeBufferCheckScope::kMaximumSize); @@ -5177,15 +5177,15 @@ TEST_T32(veneer_and_literal5) { } } - __ Cbz(r1, &labels[test]); + __ Cbz(r1, &labels[test_num]); { ExactAssemblyScope aas(&masm, 4, CodeBufferCheckScope::kMaximumSize); __ add(r1, r1, 3); } - __ Bind(&labels[test]); + __ Bind(&labels[test_num]); // Emit the literal pool if it has not beeen emitted (it's the case for - // the lower values of test). + // the lower values of test_num). __ EmitLiteralPool(PoolManager<int32_t>::kBranchRequired); } @@ -6476,61 +6476,65 @@ TEST_T32(assembler_bind_label) { POSITIVE_TEST_FORWARD_REFERENCE_INFO(INST, INFO, ASM) #endif -#define POSITIVE_TEST_FORWARD_REFERENCE_INFO(INST, INFO, ASM) \ - can_encode = masm.INFO; \ - VIXL_CHECK(can_encode); \ - { \ - ExactAssemblyScope scope(&masm, \ - info->size, \ - ExactAssemblyScope::kExactSize); \ - int32_t pc = masm.GetCursorOffset() + __ GetArchitectureStatePCOffset(); \ - if (info->pc_needs_aligning == ReferenceInfo::kAlignPc) { \ - pc = AlignDown(pc, 4); \ - } \ - Label label(pc + info->min_offset); \ - masm.ASM; \ - } \ - { \ - ExactAssemblyScope scope(&masm, \ - info->size, \ - ExactAssemblyScope::kExactSize); \ - int32_t pc = masm.GetCursorOffset() + __ GetArchitectureStatePCOffset(); \ - if (info->pc_needs_aligning == ReferenceInfo::kAlignPc) { \ - pc = AlignDown(pc, 4); \ - } \ - Label label(pc + info->max_offset); \ - masm.ASM; \ +#define POSITIVE_TEST_FORWARD_REFERENCE_INFO(INST, INFO, ASM) \ + can_encode = masm.INFO; \ + VIXL_CHECK(can_encode); \ + { \ + ExactAssemblyScope scope(&masm, \ + info->size, \ + ExactAssemblyScope::kExactSize); \ + int32_t program_counter = \ + masm.GetCursorOffset() + __ GetArchitectureStatePCOffset(); \ + if (info->pc_needs_aligning == ReferenceInfo::kAlignPc) { \ + program_counter = AlignDown(program_counter, 4); \ + } \ + Label label(program_counter + info->min_offset); \ + masm.ASM; \ + } \ + { \ + ExactAssemblyScope scope(&masm, \ + info->size, \ + ExactAssemblyScope::kExactSize); \ + int32_t program_counter = \ + masm.GetCursorOffset() + __ GetArchitectureStatePCOffset(); \ + if (info->pc_needs_aligning == ReferenceInfo::kAlignPc) { \ + program_counter = AlignDown(program_counter, 4); \ + } \ + Label label(program_counter + info->max_offset); \ + masm.ASM; \ } #ifdef VIXL_NEGATIVE_TESTING -#define NEGATIVE_TEST_FORWARD_REFERENCE_INFO(INST, ASM) \ - try { \ - ExactAssemblyScope scope(&masm, \ - info->size, \ - ExactAssemblyScope::kMaximumSize); \ - int32_t pc = masm.GetCursorOffset() + __ GetArchitectureStatePCOffset(); \ - if (info->pc_needs_aligning == ReferenceInfo::kAlignPc) { \ - pc = AlignDown(pc, 4); \ - } \ - Label label(pc + info->max_offset + info->alignment); \ - masm.ASM; \ - printf("Negative test for forward reference failed for %s.\n", INST); \ - abort(); \ - } catch (const std::runtime_error&) { \ - } \ - try { \ - ExactAssemblyScope scope(&masm, \ - info->size, \ - ExactAssemblyScope::kMaximumSize); \ - int32_t pc = masm.GetCursorOffset() + __ GetArchitectureStatePCOffset(); \ - if (info->pc_needs_aligning == ReferenceInfo::kAlignPc) { \ - pc = AlignDown(pc, 4); \ - } \ - Label label(pc + info->min_offset - info->alignment); \ - masm.ASM; \ - printf("Negative test for forward reference failed for %s.\n", INST); \ - abort(); \ - } catch (const std::runtime_error&) { \ +#define NEGATIVE_TEST_FORWARD_REFERENCE_INFO(INST, ASM) \ + try { \ + ExactAssemblyScope scope(&masm, \ + info->size, \ + ExactAssemblyScope::kMaximumSize); \ + int32_t program_counter = \ + masm.GetCursorOffset() + __ GetArchitectureStatePCOffset(); \ + if (info->pc_needs_aligning == ReferenceInfo::kAlignPc) { \ + program_counter = AlignDown(program_counter, 4); \ + } \ + Label label(program_counter + info->max_offset + info->alignment); \ + masm.ASM; \ + printf("Negative test for forward reference failed for %s.\n", INST); \ + abort(); \ + } catch (const std::runtime_error&) { \ + } \ + try { \ + ExactAssemblyScope scope(&masm, \ + info->size, \ + ExactAssemblyScope::kMaximumSize); \ + int32_t program_counter = \ + masm.GetCursorOffset() + __ GetArchitectureStatePCOffset(); \ + if (info->pc_needs_aligning == ReferenceInfo::kAlignPc) { \ + program_counter = AlignDown(program_counter, 4); \ + } \ + Label label(program_counter + info->min_offset - info->alignment); \ + masm.ASM; \ + printf("Negative test for forward reference failed for %s.\n", INST); \ + abort(); \ + } catch (const std::runtime_error&) { \ } #else #define NEGATIVE_TEST_FORWARD_REFERENCE_INFO(INST, ASM) diff --git a/test/aarch32/test-disasm-a32.cc b/test/aarch32/test-disasm-a32.cc index efc997ff..c6acac97 100644 --- a/test/aarch32/test-disasm-a32.cc +++ b/test/aarch32/test-disasm-a32.cc @@ -348,8 +348,9 @@ namespace aarch32 { class TestDisassembler : public PrintDisassembler { public: - TestDisassembler(std::ostream& os, uint32_t pc) // NOLINT(runtime/references) - : PrintDisassembler(os, pc) {} + TestDisassembler(std::ostream& os, + uint32_t program_counter) // NOLINT(runtime/references) + : PrintDisassembler(os, program_counter) {} virtual void PrintCodeAddress(uint32_t code_address) VIXL_OVERRIDE { USE(code_address); @@ -2507,38 +2508,44 @@ TEST(macro_assembler_PushRegisterList) { "beq 0x00000006\n" "push {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,ip}\n"); - COMPARE_A32(Push(RegisterList(sp)), "stmdb sp!, {sp}\n"); + // Narrow form, T1. + COMPARE_T32(Pop(RegisterList(r0)), "pop {r0}\n"); + // <single_register_list> form, T4 + COMPARE_T32(Pop(RegisterList(r10)), "pop {r10}\n"); - // TODO: Clarify behaviour of MacroAssembler vs Assembler with respect to - // deprecated and unpredictable instructions. The tests reflect the - // current behaviour and will need to be updated. + // It is usually UNPREDICTABLE to push sp. + MUST_FAIL_TEST_BOTH(Push(RegisterList(r0, sp)), + "Unpredictable instruction.\n"); + MUST_FAIL_TEST_T32(Push(RegisterList(sp)), "Unpredictable instruction.\n"); + MUST_FAIL_TEST_T32(Push(sp), "Unpredictable instruction.\n"); + // A32 can push sp if it is the first register in the list. + COMPARE_A32(Push(sp), "stmdb sp!, {sp}\n"); + COMPARE_A32(Push(RegisterList(sp)), "stmdb sp!, {sp}\n"); + COMPARE_A32(Push(RegisterList(sp, lr)), "push {sp,lr}\n"); // Deprecated, but accepted: + SHOULD_FAIL_TEST_A32(Push(pc)); SHOULD_FAIL_TEST_A32(Push(RegisterList(pc))); - // Whereas we don't accept the single-register version: - MUST_FAIL_TEST_BOTH(Push(pc), "Unpredictable instruction.\n"); - - // Accepted, but stores UNKNOWN value for the SP: - SHOULD_FAIL_TEST_A32(Push(RegisterList(r0, sp))); - - // The following use the T1 and A1 encodings for T32 and A32 respectively, and - // hence have different preferred disassembly. - COMPARE_T32(Push(RegisterList(r0)), "push {r0}\n"); - COMPARE_A32(Push(RegisterList(r0)), "stmdb sp!, {r0}\n"); - COMPARE_T32(Push(RegisterList(r7)), "push {r7}\n"); - COMPARE_A32(Push(RegisterList(r7)), "stmdb sp!, {r7}\n"); - COMPARE_T32(Push(RegisterList(lr)), "push {lr}\n"); - COMPARE_A32(Push(RegisterList(lr)), "stmdb sp!, {lr}\n"); - - // T2 and A1 encodings, with the same preferred disassembly: - COMPARE_BOTH(Push(RegisterList(r8)), "stmdb sp!, {r8}\n"); - - // Cannot push the sp and pc in T32 when using a register list. - MUST_FAIL_TEST_T32(Push(RegisterList(sp)), - "Ill-formed 'push' instruction.\n"); - MUST_FAIL_TEST_T32(Push(RegisterList(pc)), + SHOULD_FAIL_TEST_A32(Push(RegisterList(r0, pc))); + + MUST_FAIL_TEST_T32(Push(pc), "Unpredictable instruction.\n"); + MUST_FAIL_TEST_T32(Push(RegisterList(pc)), "Unpredictable instruction.\n"); + // The multiple-register T32 push can't encode PC at all. + MUST_FAIL_TEST_T32(Push(RegisterList(r0, pc)), "Ill-formed 'push' instruction.\n"); + // The following use the PUSH (T1) and PUSH (single register) (A1) encodings + // for T32 and A32 respectively: + COMPARE_BOTH(Push(RegisterList(r0)), "push {r0}\n"); + COMPARE_BOTH(Push(RegisterList(r7)), "push {r7}\n"); + COMPARE_BOTH(Push(RegisterList(lr)), "push {lr}\n"); + + // PUSH (single register), T4 and A1 encodings: + COMPARE_BOTH(Push(RegisterList(r8)), "push {r8}\n"); + + // Pushing zero registers should produce no instructions. + COMPARE_BOTH(Push(RegisterList()), ""); + CLEANUP(); } @@ -2564,29 +2571,33 @@ TEST(macro_assembler_PopRegisterList) { "beq 0x00000006\n" "pop {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,ip}\n"); - // TODO: Accepted, but value of SP after the instruction is UNKNOWN: - SHOULD_FAIL_TEST_A32(Pop(RegisterList(sp))); + // Narrow form, T1. + COMPARE_T32(Pop(RegisterList(r0)), "pop {r0}\n"); + // <single_register_list> form, T4. + COMPARE_T32(Pop(RegisterList(r10)), "pop {r10}\n"); - // Cannot pop the sp in T32 when using a register list. - MUST_FAIL_TEST_T32(Pop(RegisterList(sp)), "Ill-formed 'pop' instruction.\n"); + // It is UNPREDICTABLE to pop sp. + MUST_FAIL_TEST_BOTH(Pop(RegisterList(r0, sp)), + "Unpredictable instruction.\n"); + MUST_FAIL_TEST_BOTH(Pop(RegisterList(sp)), "Unpredictable instruction.\n"); + MUST_FAIL_TEST_BOTH(Pop(sp), "Unpredictable instruction.\n"); - // The following use the T1 and A1 encodings for T32 and A32 respectively, and - // hence have different preferred disassembly. - COMPARE_T32(Pop(RegisterList(pc)), "pop {pc}\n"); - COMPARE_A32(Pop(RegisterList(pc)), "ldm sp!, {pc}\n"); - COMPARE_T32(Pop(RegisterList(r0)), "pop {r0}\n"); - COMPARE_A32(Pop(RegisterList(r0)), "ldm sp!, {r0}\n"); - COMPARE_T32(Pop(RegisterList(r7)), "pop {r7}\n"); - COMPARE_A32(Pop(RegisterList(r7)), "ldm sp!, {r7}\n"); - - // T2 and A1 encodings, with the same preferred disassembly: - COMPARE_BOTH(Pop(RegisterList(r8)), "ldm sp!, {r8}\n"); - COMPARE_BOTH(Pop(RegisterList(lr)), "ldm sp!, {lr}\n"); - - // TODO: Pushing both the lr and pc should not be allowed by the - // MacroAssembler (deprecated for A32, for T32 they shouldn't both - // be in the list). - SHOULD_FAIL_TEST_BOTH(Pop(RegisterList(lr, pc))); + // The following use the POP (T1) and POP (single register) (A1) encodings for + // T32 and A32 respectively: + COMPARE_BOTH(Pop(RegisterList(pc)), "pop {pc}\n"); + COMPARE_BOTH(Pop(RegisterList(r0)), "pop {r0}\n"); + COMPARE_BOTH(Pop(RegisterList(r7)), "pop {r7}\n"); + + // POP (single register), T4 and A1 encodings: + COMPARE_BOTH(Pop(RegisterList(r8)), "pop {r8}\n"); + COMPARE_BOTH(Pop(RegisterList(lr)), "pop {lr}\n"); + + MUST_FAIL_TEST_T32(Pop(RegisterList(lr, pc)), "Unpredictable instruction.\n"); + // Deprecated, but allowed. + COMPARE_A32(Pop(RegisterList(lr, pc)), "pop {lr,pc}\n"); + + // Popping zero registers should produce no instructions. + COMPARE_BOTH(Pop(RegisterList()), ""); CLEANUP(); } diff --git a/test/aarch64/test-api-movprfx-aarch64.cc b/test/aarch64/test-api-movprfx-aarch64.cc index 1c1bceec..535ae0bf 100644 --- a/test/aarch64/test-api-movprfx-aarch64.cc +++ b/test/aarch64/test-api-movprfx-aarch64.cc @@ -41,19 +41,42 @@ namespace vixl { namespace aarch64 { +class InstructionReporter : public DecoderVisitor { + public: + InstructionReporter() : DecoderVisitor(kNonConstVisitor) {} + + void Visit(Metadata* metadata, const Instruction* instr) VIXL_OVERRIDE { + USE(instr); + instr_form_ = (*metadata)["form"]; + } + + std::string MoveForm() { return std::move(instr_form_); } + + private: + std::string instr_form_; +}; + static void CheckAndMaybeDisassembleMovprfxPairs(const CodeBuffer* buffer, bool can_take_movprfx) { const Instruction* pair = buffer->GetStartAddress<Instruction*>(); const Instruction* end = buffer->GetEndAddress<Instruction*>(); bool any_failures = false; PrintDisassembler print_disasm(stdout); + Decoder decoder; + InstructionReporter reporter; + decoder.AppendVisitor(&reporter); + while (pair < end) { const Instruction* movprfx = pair; const Instruction* candidate = pair->GetNextInstruction(); const Instruction* next_pair = candidate->GetNextInstruction(); VIXL_ASSERT(candidate < end); - bool failed = can_take_movprfx != candidate->CanTakeSVEMovprfx(movprfx); + Instr inst = candidate->GetInstructionBits(); + decoder.Decode(reinterpret_cast<Instruction*>(&inst)); + std::string form = reporter.MoveForm(); + bool failed = + can_take_movprfx != candidate->CanTakeSVEMovprfx(form.c_str(), movprfx); any_failures = any_failures || failed; if (failed || Test::disassemble()) { @@ -75,11 +98,11 @@ TEST(movprfx_negative_aliasing) { // Test that CanTakeSVEMovprfx() checks that the movprfx destination does not // alias an input to the prefixed instruction. Assembler assm; - assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE); + assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. - static const size_t kPairCount = 73; + static const size_t kPairCount = 79; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z0.VnB(), p0.Merging(), z9.VnB()); @@ -300,6 +323,24 @@ TEST(movprfx_negative_aliasing) { __ movprfx(z14, z5); __ uxtw(z14.VnD(), p3.Merging(), z14.VnD()); + + __ movprfx(z22, z5); + __ smmla(z22.VnS(), z22.VnB(), z0.VnB()); + + __ movprfx(z1, z5); + __ ummla(z1.VnS(), z10.VnB(), z1.VnB()); + + __ movprfx(z30, z5); + __ usmmla(z30.VnS(), z30.VnB(), z18.VnB()); + + __ movprfx(z4, z5); + __ usdot(z4.VnS(), z3.VnB(), z4.VnB()); + + __ movprfx(z10, z5); + __ usdot(z10.VnS(), z10.VnB(), z0.VnB(), 0); + + __ movprfx(z1, z5); + __ sudot(z1.VnS(), z10.VnB(), z1.VnB(), 1); } assm.FinalizeCode(); @@ -310,11 +351,13 @@ TEST(movprfx_negative_aliasing_fp) { // Test that CanTakeSVEMovprfx() checks that the movprfx destination does not // alias an input to the prefixed instruction. Assembler assm; - assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE); + assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, + CPUFeatures::kSVEF32MM, + CPUFeatures::kSVEF64MM); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. - static const size_t kPairCount = 78; + static const size_t kPairCount = 80; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z17.VnS(), p1.Zeroing(), z12.VnS()); @@ -550,6 +593,12 @@ TEST(movprfx_negative_aliasing_fp) { __ movprfx(z0.VnD(), p5.Zeroing(), z12.VnD()); __ ucvtf(z0.VnH(), p5.Merging(), z0.VnD()); + + __ movprfx(z30, z5); + __ fmmla(z30.VnS(), z30.VnS(), z18.VnS()); + + __ movprfx(z31, z5); + __ fmmla(z31.VnD(), z31.VnD(), z18.VnD()); } assm.FinalizeCode(); @@ -1035,11 +1084,11 @@ TEST(movprfx_negative_predication) { // Test that CanTakeSVEMovprfx() is false when a predicated movprfx appears // before an unpredicated instruction. Assembler assm; - assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE); + assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. - static const size_t kPairCount = 54; + static const size_t kPairCount = 60; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z27.VnS(), p1.Zeroing(), z12.VnS()); @@ -1203,6 +1252,24 @@ TEST(movprfx_negative_predication) { __ movprfx(z9.VnD(), p0.Zeroing(), z16.VnD()); __ uqsub(z9.VnD(), z9.VnD(), 42); + + __ movprfx(z22.VnS(), p0.Zeroing(), z5.VnS()); + __ smmla(z22.VnS(), z21.VnB(), z0.VnB()); + + __ movprfx(z1.VnS(), p0.Zeroing(), z5.VnS()); + __ ummla(z1.VnS(), z10.VnB(), z2.VnB()); + + __ movprfx(z30.VnS(), p0.Zeroing(), z5.VnS()); + __ usmmla(z30.VnS(), z29.VnB(), z18.VnB()); + + __ movprfx(z4.VnS(), p0.Zeroing(), z5.VnS()); + __ usdot(z4.VnS(), z3.VnB(), z4.VnB()); + + __ movprfx(z10.VnS(), p0.Zeroing(), z5.VnS()); + __ usdot(z10.VnS(), z10.VnB(), z0.VnB(), 0); + + __ movprfx(z1.VnS(), p0.Zeroing(), z5.VnS()); + __ sudot(z1.VnS(), z10.VnB(), z1.VnB(), 1); } assm.FinalizeCode(); @@ -1213,11 +1280,13 @@ TEST(movprfx_negative_predication_fp) { // Test that CanTakeSVEMovprfx() is false when a predicated movprfx appears // before an unpredicated instruction. Assembler assm; - assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE); + assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, + CPUFeatures::kSVEF32MM, + CPUFeatures::kSVEF64MM); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. - static const size_t kPairCount = 9; + static const size_t kPairCount = 11; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z10.VnH(), p3.Zeroing(), z3.VnH()); @@ -1244,9 +1313,15 @@ TEST(movprfx_negative_predication_fp) { __ movprfx(z2.VnS(), p1.Zeroing(), z0.VnS()); __ fmls(z2.VnS(), z9.VnS(), z0.VnS(), 3); - // Note that ftsmul and ftssel _cannot_ take movprfx. + // Note that ftsmul and ftssel cannot take movprfx. __ movprfx(z22.VnD(), p6.Merging(), z16.VnD()); __ ftmad(z22.VnD(), z22.VnD(), z20.VnD(), 2); + + __ movprfx(z30.VnS(), p0.Zeroing(), z5.VnS()); + __ fmmla(z30.VnS(), z29.VnS(), z18.VnS()); + + __ movprfx(z31.VnD(), p1.Merging(), z5.VnD()); + __ fmmla(z31.VnD(), z30.VnD(), z18.VnD()); } assm.FinalizeCode(); @@ -1255,11 +1330,11 @@ TEST(movprfx_negative_predication_fp) { TEST(movprfx_positive) { Assembler assm; - assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE); + assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. - static const size_t kPairCount = 117; + static const size_t kPairCount = 123; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z17, z28); @@ -1349,9 +1424,6 @@ TEST(movprfx_positive) { __ movprfx(z15, z18); __ eor(z15.VnH(), z15.VnH(), 4); - __ movprfx(z30, z11); - __ ext(z30.VnB(), z30.VnB(), z11.VnB(), 42); - __ movprfx(z19, z28); __ incd(z19.VnD(), SVE_MUL3); @@ -1613,6 +1685,24 @@ TEST(movprfx_positive) { __ movprfx(z18.VnD(), p7.Merging(), z25.VnD()); __ uxtw(z18.VnD(), p7.Merging(), z25.VnD()); + + __ movprfx(z22, z5); + __ smmla(z22.VnS(), z21.VnB(), z0.VnB()); + + __ movprfx(z1, z5); + __ ummla(z1.VnS(), z10.VnB(), z0.VnB()); + + __ movprfx(z30, z5); + __ usmmla(z30.VnS(), z31.VnB(), z18.VnB()); + + __ movprfx(z4, z5); + __ usdot(z4.VnS(), z3.VnB(), z3.VnB()); + + __ movprfx(z10, z5); + __ usdot(z10.VnS(), z9.VnB(), z0.VnB(), 0); + + __ movprfx(z1, z5); + __ sudot(z1.VnS(), z10.VnB(), z2.VnB(), 1); } assm.FinalizeCode(); @@ -1621,11 +1711,13 @@ TEST(movprfx_positive) { TEST(movprfx_positive_fp) { Assembler assm; - assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE); + assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, + CPUFeatures::kSVEF32MM, + CPUFeatures::kSVEF64MM); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. - static const size_t kPairCount = 73; + static const size_t kPairCount = 75; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z18.VnS(), p6.Zeroing(), z20.VnS()); @@ -1848,12 +1940,1775 @@ TEST(movprfx_positive_fp) { __ movprfx(z17.VnD(), p4.Merging(), z22.VnD()); __ ucvtf(z17.VnH(), p4.Merging(), z4.VnD()); + + __ movprfx(z30, z5); + __ fmmla(z30.VnS(), z29.VnS(), z18.VnS()); + + __ movprfx(z31, z5); + __ fmmla(z31.VnD(), z30.VnD(), z18.VnD()); } assm.FinalizeCode(); CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), true); } +TEST(movprfx_positive_sve2) { + Assembler assm; + assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2); + { + // We have to use the Assembler directly to generate movprfx, so we need + // to manually reserve space for the code we're about to emit. + static const size_t kPairCount = 145; + CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); + + __ movprfx(z25, z26); + __ adclb(z25.VnS(), z17.VnS(), z24.VnS()); + + __ movprfx(z0, z1); + __ adclt(z0.VnS(), z2.VnS(), z15.VnS()); + + __ movprfx(z3, z4); + __ addp(z3.VnB(), p1.Merging(), z3.VnB(), z0.VnB()); + + __ movprfx(z6, z7); + __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD()); + + __ movprfx(z18, z19); + __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD()); + + __ movprfx(z7, z8); + __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD()); + + __ movprfx(z21, z22); + __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD()); + + __ movprfx(z5, z6); + __ cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90); + + __ movprfx(z7, z8); + __ cdot(z7.VnS(), z4.VnB(), z10.VnB(), 0); + + __ movprfx(z7, z8); + __ cdot(z7.VnS(), z4.VnB(), z0.VnB(), 0, 0); + + __ movprfx(z7, z8); + __ cdot(z7.VnD(), z4.VnH(), z0.VnH(), 0, 0); + + __ movprfx(z19, z20); + __ cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0); + + __ movprfx(z19, z20); + __ cmla(z19.VnS(), z7.VnS(), z2.VnS(), 0, 0); + + __ movprfx(z19, z20); + __ cmla(z19.VnH(), z7.VnH(), z2.VnH(), 0, 0); + + __ movprfx(z10, z11); + __ eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD()); + + __ movprfx(z3, z4); + __ eorbt(z3.VnB(), z10.VnB(), z8.VnB()); + + __ movprfx(z20, z22); + __ eortb(z20.VnB(), z21.VnB(), z15.VnB()); + + __ movprfx(z14, z15); + __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD()); + + __ movprfx(z14.VnD(), p4.Merging(), z15.VnD()); + __ fcvtx(z14.VnS(), p4.Merging(), z0.VnD()); + + __ movprfx(z15.VnH(), p0.Merging(), z16.VnH()); + __ flogb(z15.VnH(), p0.Merging(), z3.VnH()); + + __ movprfx(z2, z3); + __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD()); + + __ movprfx(z22, z23); + __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD()); + + __ movprfx(z1, z2); + __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD()); + + __ movprfx(z16, z17); + __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD()); + + __ movprfx(z16, z17); + __ fmlalb(z16.VnS(), z18.VnH(), z29.VnH()); + + __ movprfx(z16, z17); + __ fmlalb(z16.VnS(), z18.VnH(), z2.VnH(), 0); + + __ movprfx(z18, z19); + __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH()); + + __ movprfx(z18, z19); + __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH(), 0); + + __ movprfx(z16, z17); + __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH()); + + __ movprfx(z16, z17); + __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH(), 0); + + __ movprfx(z3, z4); + __ fmlslt(z3.VnS(), z17.VnH(), z14.VnH()); + + __ movprfx(z3, z4); + __ fmlslt(z3.VnS(), z17.VnH(), z1.VnH(), 0); + + __ movprfx(z2, z3); + __ mla(z2.VnH(), z0.VnH(), z1.VnH(), 0); + + __ movprfx(z2, z3); + __ mla(z2.VnS(), z0.VnS(), z1.VnS(), 0); + + __ movprfx(z2, z3); + __ mla(z2.VnD(), z0.VnD(), z1.VnD(), 0); + + __ movprfx(z2, z3); + __ mls(z2.VnH(), z0.VnH(), z1.VnH(), 0); + + __ movprfx(z2, z3); + __ mls(z2.VnS(), z0.VnS(), z1.VnS(), 0); + + __ movprfx(z2, z3); + __ mls(z2.VnD(), z0.VnD(), z1.VnD(), 0); + + __ movprfx(z17, z18); + __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD()); + + __ movprfx(z13, z14); + __ saba(z13.VnB(), z2.VnB(), z31.VnB()); + + __ movprfx(z13, z14); + __ sabalb(z13.VnD(), z20.VnS(), z26.VnS()); + + __ movprfx(z14, z15); + __ sabalt(z14.VnD(), z19.VnS(), z10.VnS()); + + __ movprfx(z19.VnD(), p5.Merging(), z20.VnD()); + __ sadalp(z19.VnD(), p5.Merging(), z9.VnS()); + + __ movprfx(z17, z18); + __ sbclb(z17.VnS(), z10.VnS(), z8.VnS()); + + __ movprfx(z20, z21); + __ sbclt(z20.VnS(), z0.VnS(), z13.VnS()); + + __ movprfx(z20.VnB(), p3.Merging(), z21.VnB()); + __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB()); + + __ movprfx(z21.VnB(), p0.Merging(), z22.VnB()); + __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB()); + + __ movprfx(z1.VnB(), p0.Merging(), z2.VnB()); + __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB()); + + __ movprfx(z5, z6); + __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB()); + + __ movprfx(z27, z28); + __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB()); + + __ movprfx(z1, z2); + __ smlalb(z1.VnD(), z3.VnS(), z23.VnS()); + + __ movprfx(z1, z2); + __ smlalb(z1.VnD(), z3.VnS(), z2.VnS(), 0); + + __ movprfx(z1, z2); + __ smlalb(z1.VnS(), z3.VnH(), z2.VnH(), 0); + + __ movprfx(z1, z2); + __ smlalt(z1.VnD(), z3.VnS(), z23.VnS()); + + __ movprfx(z1, z2); + __ smlalt(z1.VnD(), z3.VnS(), z2.VnS(), 0); + + __ movprfx(z1, z2); + __ smlalt(z1.VnS(), z3.VnH(), z2.VnH(), 0); + + __ movprfx(z1, z2); + __ smlslb(z1.VnD(), z3.VnS(), z23.VnS()); + + __ movprfx(z1, z2); + __ smlslb(z1.VnD(), z3.VnS(), z2.VnS(), 0); + + __ movprfx(z1, z2); + __ smlslb(z1.VnS(), z3.VnH(), z2.VnH(), 0); + + __ movprfx(z1, z2); + __ smlslt(z1.VnD(), z3.VnS(), z23.VnS()); + + __ movprfx(z1, z2); + __ smlslt(z1.VnD(), z3.VnS(), z2.VnS(), 0); + + __ movprfx(z1, z2); + __ smlslt(z1.VnS(), z3.VnH(), z2.VnH(), 0); + + __ movprfx(z29.VnB(), p1.Merging(), z30.VnB()); + __ sqabs(z29.VnB(), p1.Merging(), z18.VnB()); + + __ movprfx(z28.VnB(), p0.Merging(), z29.VnB()); + __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB()); + + __ movprfx(z20, z21); + __ sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90); + + __ movprfx(z6, z7); + __ sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS()); + + __ movprfx(z6, z7); + __ sqdmlalb(z6.VnD(), z19.VnS(), z2.VnS(), 0); + + __ movprfx(z6, z7); + __ sqdmlalb(z6.VnS(), z19.VnH(), z2.VnH(), 0); + + __ movprfx(z23, z24); + __ sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS()); + + __ movprfx(z11, z12); + __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS()); + + __ movprfx(z11, z12); + __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS(), 0); + + __ movprfx(z11, z12); + __ sqdmlalt(z11.VnS(), z0.VnH(), z0.VnH(), 0); + + __ movprfx(z16, z17); + __ sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS()); + + __ movprfx(z16, z17); + __ sqdmlslb(z16.VnD(), z26.VnS(), z2.VnS(), 0); + + __ movprfx(z16, z17); + __ sqdmlslb(z16.VnS(), z26.VnH(), z2.VnH(), 0); + + __ movprfx(z26, z27); + __ sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS()); + + __ movprfx(z21, z22); + __ sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS()); + + __ movprfx(z21, z22); + __ sqdmlslt(z21.VnD(), z23.VnS(), z0.VnS(), 0); + + __ movprfx(z21, z22); + __ sqdmlslt(z21.VnS(), z23.VnH(), z0.VnH(), 0); + + __ movprfx(z21.VnB(), p0.Merging(), z22.VnB()); + __ sqneg(z21.VnB(), p0.Merging(), z17.VnB()); + + __ movprfx(z31, z0); + __ sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0); + + __ movprfx(z31, z0); + __ sqrdcmlah(z31.VnH(), z15.VnH(), z2.VnH(), 0, 0); + + __ movprfx(z31, z0); + __ sqrdcmlah(z31.VnS(), z15.VnS(), z2.VnS(), 0, 0); + + __ movprfx(z27, z28); + __ sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB()); + + __ movprfx(z27, z28); + __ sqrdmlah(z27.VnH(), z28.VnH(), z1.VnH(), 0); + + __ movprfx(z27, z28); + __ sqrdmlah(z27.VnS(), z28.VnS(), z1.VnS(), 0); + + __ movprfx(z27, z28); + __ sqrdmlah(z27.VnD(), z28.VnD(), z1.VnD(), 0); + + __ movprfx(z11, z12); + __ sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB()); + + __ movprfx(z11, z12); + __ sqrdmlsh(z11.VnH(), z16.VnH(), z1.VnH(), 0); + + __ movprfx(z11, z12); + __ sqrdmlsh(z11.VnS(), z16.VnS(), z1.VnS(), 0); + + __ movprfx(z11, z12); + __ sqrdmlsh(z11.VnD(), z16.VnD(), z1.VnD(), 0); + + __ movprfx(z31.VnB(), p5.Merging(), z0.VnB()); + __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB()); + + __ movprfx(z25.VnB(), p6.Merging(), z26.VnB()); + __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB()); + + __ movprfx(z0.VnB(), p5.Merging(), z1.VnB()); + __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0); + + __ movprfx(z0.VnB(), p5.Merging(), z1.VnB()); + __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z2.VnB()); + + __ movprfx(z7.VnB(), p3.Merging(), z8.VnB()); + __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB()); + + __ movprfx(z10.VnB(), p1.Merging(), z11.VnB()); + __ sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0); + + __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); + __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()); + + __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); + __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()); + + __ movprfx(z23.VnB(), p4.Merging(), z24.VnB()); + __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB()); + + __ movprfx(z31.VnB(), p7.Merging(), z0.VnB()); + __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB()); + + __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); + __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB()); + + __ movprfx(z12.VnB(), p0.Merging(), z13.VnB()); + __ srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1); + + __ movprfx(z0, z1); + __ srsra(z0.VnB(), z8.VnB(), 1); + + __ movprfx(z0, z1); + __ ssra(z0.VnB(), z8.VnB(), 1); + + __ movprfx(z26.VnB(), p2.Merging(), z27.VnB()); + __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB()); + + __ movprfx(z23, z24); + __ uaba(z23.VnB(), z22.VnB(), z20.VnB()); + + __ movprfx(z11, z12); + __ uabalb(z11.VnD(), z25.VnS(), z12.VnS()); + + __ movprfx(z4, z5); + __ uabalt(z4.VnD(), z2.VnS(), z31.VnS()); + + __ movprfx(z20.VnD(), p4.Merging(), z21.VnD()); + __ uadalp(z20.VnD(), p4.Merging(), z5.VnS()); + + __ movprfx(z21.VnB(), p2.Merging(), z22.VnB()); + __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB()); + + __ movprfx(z1.VnB(), p4.Merging(), z2.VnB()); + __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB()); + + __ movprfx(z18.VnB(), p0.Merging(), z19.VnB()); + __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB()); + + __ movprfx(z7, z8); + __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB()); + + __ movprfx(z10, z11); + __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB()); + + __ movprfx(z31, z0); + __ umlalb(z31.VnD(), z9.VnS(), z21.VnS()); + + __ movprfx(z31, z0); + __ umlalb(z31.VnD(), z9.VnS(), z1.VnS(), 0); + + __ movprfx(z31, z0); + __ umlalb(z31.VnS(), z9.VnH(), z1.VnH(), 0); + + __ movprfx(z11, z12); + __ umlalt(z11.VnD(), z5.VnS(), z22.VnS()); + + __ movprfx(z11, z12); + __ umlalt(z11.VnD(), z5.VnS(), z2.VnS(), 0); + + __ movprfx(z11, z12); + __ umlalt(z11.VnS(), z5.VnH(), z2.VnH(), 0); + + __ movprfx(z28, z29); + __ umlslb(z28.VnD(), z13.VnS(), z9.VnS()); + + __ movprfx(z28, z29); + __ umlslb(z28.VnD(), z13.VnS(), z1.VnS(), 0); + + __ movprfx(z28, z29); + __ umlslb(z28.VnS(), z13.VnH(), z1.VnH(), 0); + + __ movprfx(z9, z10); + __ umlslt(z9.VnD(), z12.VnS(), z30.VnS()); + + __ movprfx(z9, z10); + __ umlslt(z9.VnD(), z12.VnS(), z0.VnS(), 0); + + __ movprfx(z9, z10); + __ umlslt(z9.VnS(), z12.VnH(), z0.VnH(), 0); + + __ movprfx(z24.VnB(), p7.Merging(), z25.VnB()); + __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()), + + __ movprfx(z20.VnB(), p1.Merging(), z21.VnB()); + __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB()); + + __ movprfx(z8.VnB(), p5.Merging(), z9.VnB()); + __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB()); + + __ movprfx(z29.VnB(), p7.Merging(), z30.VnB()); + __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0); + + __ movprfx(z29.VnB(), p7.Merging(), z30.VnB()); + __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z30.VnB()); + + __ movprfx(z12.VnB(), p1.Merging(), z13.VnB()); + __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z13.VnB()); + + __ movprfx(z20.VnB(), p0.Merging(), z21.VnB()); + __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()); + + __ movprfx(z20.VnB(), p0.Merging(), z21.VnB()); + __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()); + + __ movprfx(z25.VnS(), p7.Merging(), z26.VnS()); + __ urecpe(z25.VnS(), p7.Merging(), z2.VnS()); + + __ movprfx(z29.VnB(), p4.Merging(), z30.VnB()); + __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB()); + + __ movprfx(z15.VnB(), p2.Merging(), z16.VnB()); + __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB()); + + __ movprfx(z27.VnB(), p1.Merging(), z28.VnB()); + __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB()); + + __ movprfx(z31.VnB(), p2.Merging(), z0.VnB()); + __ urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1); + + __ movprfx(z4.VnS(), p3.Merging(), z5.VnS()); + __ ursqrte(z4.VnS(), p3.Merging(), z3.VnS()); + + __ movprfx(z0, z1); + __ ursra(z0.VnB(), z8.VnB(), 1); + + __ movprfx(z25.VnB(), p4.Merging(), z26.VnB()); + __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB()); + + __ movprfx(z0, z1); + __ usra(z0.VnB(), z8.VnB(), 1); + + __ movprfx(z16, z17); + __ xar(z16.VnB(), z16.VnB(), z13.VnB(), 1); + } + assm.FinalizeCode(); + + CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), true); +} + +TEST(movprfx_negative_instructions_sve2) { + Assembler assm; + assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kSVEBitPerm); + { + // We have to use the Assembler directly to generate movprfx, so we need + // to manually reserve space for the code we're about to emit. + static const size_t kPairCount = 133; + CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); + + __ movprfx(z29, z30); + __ addhnb(z29.VnS(), z19.VnD(), z2.VnD()); + + __ movprfx(z8, z9); + __ addhnt(z8.VnS(), z12.VnD(), z6.VnD()); + + __ movprfx(z18, z19); + __ bdep(z18.VnB(), z10.VnB(), z0.VnB()); + + __ movprfx(z6, z7); + __ bext(z6.VnB(), z2.VnB(), z5.VnB()); + + __ movprfx(z24, z25); + __ bgrp(z24.VnB(), z9.VnB(), z5.VnB()); + + __ movprfx(z1, z2); + __ fcvtlt(z1.VnD(), p1.Merging(), z28.VnS()); + + __ movprfx(z1, z2); + __ fcvtlt(z1.VnS(), p1.Merging(), z28.VnH()); + + __ movprfx(z4, z5); + __ fcvtnt(z4.VnH(), p7.Merging(), z0.VnS()); + + __ movprfx(z4, z5); + __ fcvtnt(z4.VnS(), p7.Merging(), z0.VnD()); + + __ movprfx(z27, z28); + __ fcvtxnt(z27.VnS(), p0.Merging(), z17.VnD()); + + __ movprfx(z24, z25); + __ histcnt(z24.VnS(), p6.Zeroing(), z3.VnS(), z10.VnS()); + + __ movprfx(z22, z23); + __ histseg(z22.VnB(), z14.VnB(), z8.VnB()); + + __ movprfx(z21, z22); + __ ldnt1b(z21.VnS(), p5.Zeroing(), SVEMemOperand(z21.VnS(), x23)); + + __ movprfx(z21, z22); + __ ldnt1b(z21.VnD(), p5.Zeroing(), SVEMemOperand(z1.VnD(), x23)); + + __ movprfx(z10, z11); + __ ldnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(z23.VnD(), x6)); + + __ movprfx(z30, z31); + __ ldnt1h(z30.VnS(), p4.Zeroing(), SVEMemOperand(z6.VnS(), x11)); + + __ movprfx(z30, z31); + __ ldnt1h(z30.VnD(), p4.Zeroing(), SVEMemOperand(z6.VnD(), x11)); + + __ movprfx(z7, z8); + __ ldnt1sb(z7.VnS(), p3.Zeroing(), SVEMemOperand(z18.VnS(), x11)); + + __ movprfx(z7, z8); + __ ldnt1sb(z7.VnD(), p3.Zeroing(), SVEMemOperand(z18.VnD(), x11)); + + __ movprfx(z17, z18); + __ ldnt1sh(z17.VnS(), p5.Zeroing(), SVEMemOperand(z31.VnS(), x19)); + + __ movprfx(z17, z18); + __ ldnt1sh(z17.VnD(), p5.Zeroing(), SVEMemOperand(z31.VnD(), x19)); + + __ movprfx(z3, z4); + __ ldnt1sw(z3.VnD(), p7.Zeroing(), SVEMemOperand(z1.VnD(), x10)); + + __ movprfx(z0, z1); + __ ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(z11.VnS(), x1)); + + __ movprfx(z0, z1); + __ ldnt1w(z0.VnD(), p4.Zeroing(), SVEMemOperand(z11.VnD(), x1)); + + __ movprfx(z18, z19); + __ match(p15.VnB(), p1.Zeroing(), z18.VnB(), z5.VnB()); + + __ movprfx(z15, z16); + __ mul(z15.VnB(), z15.VnB(), z15.VnB()); + + __ movprfx(z15, z16); + __ mul(z15.VnH(), z15.VnH(), z1.VnH(), 0); + + __ movprfx(z15, z16); + __ mul(z15.VnS(), z15.VnS(), z1.VnS(), 0); + + __ movprfx(z15, z16); + __ mul(z15.VnD(), z15.VnD(), z1.VnD(), 0); + + __ movprfx(z20, z21); + __ nmatch(p1.VnB(), p1.Zeroing(), z20.VnB(), z17.VnB()); + + __ movprfx(z0, z1); + __ pmul(z0.VnB(), z5.VnB(), z5.VnB()); + + __ movprfx(z12, z13); + __ pmullb(z12.VnD(), z21.VnS(), z12.VnS()); + + __ movprfx(z31, z0); + __ pmullt(z31.VnD(), z30.VnS(), z26.VnS()); + + __ movprfx(z0, z1); + __ raddhnb(z0.VnS(), z11.VnD(), z10.VnD()); + + __ movprfx(z23, z24); + __ raddhnt(z23.VnS(), z27.VnD(), z9.VnD()); + + __ movprfx(z5, z6); + __ rshrnb(z5.VnB(), z1.VnH(), 1); + + __ movprfx(z5, z6); + __ rshrnt(z5.VnB(), z1.VnH(), 8); + + __ movprfx(z30, z31); + __ rsubhnb(z30.VnS(), z29.VnD(), z11.VnD()); + + __ movprfx(z25, z26); + __ rsubhnt(z25.VnS(), z7.VnD(), z18.VnD()); + + __ movprfx(z2, z3); + __ sabdlb(z2.VnD(), z21.VnS(), z3.VnS()); + + __ movprfx(z25, z26); + __ sabdlt(z25.VnD(), z23.VnS(), z17.VnS()); + + __ movprfx(z24, z25); + __ saddlb(z24.VnD(), z30.VnS(), z16.VnS()); + + __ movprfx(z15, z16); + __ saddlbt(z15.VnD(), z6.VnS(), z18.VnS()); + + __ movprfx(z21, z22); + __ saddlt(z21.VnD(), z29.VnS(), z31.VnS()); + + __ movprfx(z12, z13); + __ saddwb(z12.VnD(), z8.VnD(), z8.VnS()); + + __ movprfx(z24, z25); + __ saddwt(z24.VnD(), z0.VnD(), z3.VnS()); + + __ movprfx(z7, z8); + __ shrnb(z7.VnB(), z4.VnH(), 1); + + __ movprfx(z21, z22); + __ shrnt(z21.VnB(), z29.VnH(), 1); + + __ movprfx(z29, z30); + __ sli(z29.VnB(), z7.VnB(), 0); + + __ movprfx(z23, z24); + __ smulh(z23.VnB(), z23.VnB(), z3.VnB()); + + __ movprfx(z10, z11); + __ smullb(z10.VnD(), z4.VnS(), z4.VnS()); + + __ movprfx(z10, z11); + __ smullb(z10.VnS(), z4.VnH(), z4.VnH(), 0); + + __ movprfx(z10, z11); + __ smullb(z10.VnD(), z4.VnS(), z4.VnS(), 0); + + __ movprfx(z31, z0); + __ smullt(z31.VnD(), z26.VnS(), z5.VnS()); + + __ movprfx(z31, z0); + __ smullt(z31.VnS(), z26.VnH(), z5.VnH(), 0); + + __ movprfx(z31, z0); + __ smullt(z31.VnD(), z26.VnS(), z5.VnS(), 0); + + __ movprfx(z18, z19); + __ sqdmulh(z18.VnB(), z25.VnB(), z1.VnB()); + + __ movprfx(z18, z19); + __ sqdmulh(z18.VnH(), z25.VnH(), z1.VnH(), 0); + + __ movprfx(z18, z19); + __ sqdmulh(z18.VnS(), z25.VnS(), z1.VnS(), 0); + + __ movprfx(z18, z19); + __ sqdmulh(z18.VnD(), z25.VnD(), z1.VnD(), 0); + + __ movprfx(z1, z2); + __ sqdmullb(z1.VnD(), z31.VnS(), z21.VnS()); + + __ movprfx(z1, z2); + __ sqdmullb(z1.VnS(), z31.VnH(), z1.VnH(), 0); + + __ movprfx(z1, z2); + __ sqdmullb(z1.VnD(), z31.VnS(), z1.VnS(), 0); + + __ movprfx(z2, z3); + __ sqdmullt(z2.VnD(), z1.VnS(), z5.VnS()); + + __ movprfx(z2, z3); + __ sqdmullt(z2.VnS(), z1.VnH(), z5.VnH(), 0); + + __ movprfx(z2, z3); + __ sqdmullt(z2.VnD(), z1.VnS(), z5.VnS(), 0); + + __ movprfx(z21, z22); + __ sqrdmulh(z21.VnB(), z21.VnB(), z27.VnB()); + + __ movprfx(z21, z22); + __ sqrdmulh(z21.VnH(), z21.VnH(), z2.VnH(), 0); + + __ movprfx(z21, z22); + __ sqrdmulh(z21.VnS(), z21.VnS(), z2.VnS(), 0); + + __ movprfx(z21, z22); + __ sqrdmulh(z21.VnD(), z21.VnD(), z2.VnD(), 0); + + __ movprfx(z1, z2); + __ sqrshrnb(z1.VnB(), z1.VnH(), 1); + + __ movprfx(z24, z25); + __ sqrshrnt(z24.VnB(), z19.VnH(), 8); + + __ movprfx(z23, z24); + __ sqrshrunb(z23.VnB(), z28.VnH(), 1); + + __ movprfx(z9, z10); + __ sqrshrunt(z9.VnB(), z15.VnH(), 8); + + __ movprfx(z25, z26); + __ sqshrnb(z25.VnB(), z1.VnH(), 1); + + __ movprfx(z0, z1); + __ sqshrnt(z0.VnB(), z25.VnH(), 8); + + __ movprfx(z25, z26); + __ sqshrunb(z25.VnB(), z10.VnH(), 1); + + __ movprfx(z20, z21); + __ sqshrunt(z20.VnB(), z3.VnH(), 8); + + __ movprfx(z2, z3); + __ sqxtnb(z2.VnB(), z0.VnH()); + + __ movprfx(z31, z0); + __ sqxtnt(z31.VnB(), z18.VnH()); + + __ movprfx(z28, z29); + __ sqxtunb(z28.VnB(), z6.VnH()); + + __ movprfx(z14, z15); + __ sqxtunt(z14.VnB(), z31.VnH()); + + __ movprfx(z6, z7); + __ sri(z6.VnB(), z9.VnB(), 1); + + __ movprfx(z2, z3); + __ sshllb(z2.VnH(), z20.VnB(), 0); + + __ movprfx(z27, z28); + __ sshllt(z27.VnH(), z8.VnB(), 0); + + __ movprfx(z4, z5); + __ ssublb(z4.VnD(), z23.VnS(), z7.VnS()); + + __ movprfx(z6, z7); + __ ssublbt(z6.VnD(), z28.VnS(), z12.VnS()); + + __ movprfx(z12, z13); + __ ssublt(z12.VnD(), z13.VnS(), z6.VnS()); + + __ movprfx(z11, z12); + __ ssubltb(z11.VnD(), z18.VnS(), z19.VnS()); + + __ movprfx(z7, z8); + __ ssubwb(z7.VnD(), z28.VnD(), z11.VnS()); + + __ movprfx(z29, z30); + __ ssubwt(z29.VnD(), z25.VnD(), z20.VnS()); + + __ movprfx(z21, z22); + __ stnt1b(z21.VnS(), p5.Zeroing(), SVEMemOperand(z1.VnS(), x23)); + + __ movprfx(z21, z22); + __ stnt1b(z21.VnD(), p5.Zeroing(), SVEMemOperand(z1.VnD(), x23)); + + __ movprfx(z10, z11); + __ stnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(z1.VnD(), x23)); + + __ movprfx(z30, z31); + __ stnt1h(z30.VnS(), p4.Zeroing(), SVEMemOperand(z6.VnS(), x6)); + + __ movprfx(z30, z31); + __ stnt1h(z30.VnD(), p4.Zeroing(), SVEMemOperand(z6.VnD(), x6)); + + __ movprfx(z0, z1); + __ stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(z11.VnS(), x1)); + + __ movprfx(z0, z1); + __ stnt1w(z0.VnD(), p4.Zeroing(), SVEMemOperand(z11.VnD(), x1)); + + __ movprfx(z31, z0); + __ subhnb(z31.VnS(), z31.VnD(), z7.VnD()); + + __ movprfx(z31, z0); + __ subhnt(z31.VnS(), z22.VnD(), z27.VnD()); + + __ movprfx(z24, z25); + __ tbl(z24.VnB(), z29.VnB(), z30.VnB(), z0.VnB()); + + __ movprfx(z22, z23); + __ tbx(z22.VnB(), z15.VnB(), z19.VnB()); + + __ movprfx(z1, z2); + __ uabdlb(z1.VnD(), z26.VnS(), z12.VnS()); + + __ movprfx(z25, z26); + __ uabdlt(z25.VnD(), z29.VnS(), z14.VnS()); + + __ movprfx(z3, z4); + __ uaddlb(z3.VnD(), z5.VnS(), z2.VnS()); + + __ movprfx(z15, z16); + __ uaddlt(z15.VnD(), z28.VnS(), z20.VnS()); + + __ movprfx(z31, z0); + __ uaddwb(z31.VnD(), z8.VnD(), z25.VnS()); + + __ movprfx(z17, z18); + __ uaddwt(z17.VnD(), z15.VnD(), z2.VnS()); + + __ movprfx(z12, z13); + __ umulh(z12.VnB(), z12.VnB(), z17.VnB()); + + __ movprfx(z12, z13); + __ umullb(z12.VnD(), z5.VnS(), z2.VnS()); + + __ movprfx(z12, z13); + __ umullb(z12.VnS(), z5.VnH(), z2.VnH(), 0); + + __ movprfx(z12, z13); + __ umullb(z12.VnD(), z5.VnS(), z2.VnS(), 0); + + __ movprfx(z24, z25); + __ umullt(z24.VnD(), z6.VnS(), z6.VnS()); + + __ movprfx(z24, z25); + __ umullt(z24.VnS(), z6.VnH(), z1.VnH(), 0); + + __ movprfx(z24, z25); + __ umullt(z24.VnD(), z6.VnS(), z1.VnS(), 0); + + __ movprfx(z30, z31); + __ uqrshrnb(z30.VnB(), z25.VnH(), 1); + + __ movprfx(z3, z4); + __ uqrshrnt(z3.VnB(), z25.VnH(), 8); + + __ movprfx(z17, z18); + __ uqshrnb(z17.VnB(), z4.VnH(), 1); + + __ movprfx(z28, z29); + __ uqshrnt(z28.VnB(), z18.VnH(), 8); + + __ movprfx(z28, z29); + __ uqxtnb(z28.VnB(), z4.VnH()); + + __ movprfx(z19, z20); + __ uqxtnt(z19.VnB(), z7.VnH()); + + __ movprfx(z8, z9); + __ ushllb(z8.VnH(), z31.VnB(), 0); + + __ movprfx(z3, z4); + __ ushllt(z3.VnH(), z21.VnB(), 0); + + __ movprfx(z25, z26); + __ usublb(z25.VnD(), z9.VnS(), z17.VnS()); + + __ movprfx(z5, z6); + __ usublt(z5.VnD(), z11.VnS(), z15.VnS()); + + __ movprfx(z10, z11); + __ usubwb(z10.VnD(), z13.VnD(), z20.VnS()); + + __ movprfx(z15, z16); + __ usubwt(z15.VnD(), z8.VnD(), z23.VnS()); + + __ movprfx(z20, z21); + __ whilege(p0.VnB(), w20, w29); + + __ movprfx(z24, z25); + __ whilegt(p11.VnB(), w24, w3); + + __ movprfx(z20, z21); + __ whilehi(p2.VnB(), x20, x8); + + __ movprfx(z22, z23); + __ whilehs(p4.VnB(), w22, w9); + + __ movprfx(z25, z26); + __ whilerw(p7.VnB(), x25, x27); + + __ movprfx(z14, z15); + __ whilewr(p8.VnB(), x14, x14); + } + assm.FinalizeCode(); + + CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); +} + +TEST(movprfx_negative_predication_sve2) { + Assembler assm; + assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2); + { + // We have to use the Assembler directly to generate movprfx, so we need + // to manually reserve space for the code we're about to emit. + static const size_t kPairCount = 140; + CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); + + __ movprfx(z25.VnS(), p0.Zeroing(), z26.VnS()); + __ adclb(z25.VnS(), z17.VnS(), z24.VnS()); + + __ movprfx(z0.VnS(), p0.Zeroing(), z1.VnS()); + __ adclt(z0.VnS(), z2.VnS(), z15.VnS()); + + __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD()); + __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD()); + + __ movprfx(z18.VnD(), p0.Zeroing(), z19.VnD()); + __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD()); + + __ movprfx(z7.VnD(), p0.Zeroing(), z8.VnD()); + __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD()); + + __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD()); + __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD()); + + __ movprfx(z5.VnB(), p0.Zeroing(), z6.VnB()); + __ cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90); + + __ movprfx(z7.VnS(), p0.Zeroing(), z8.VnS()); + __ cdot(z7.VnS(), z4.VnB(), z10.VnB(), 0); + + __ movprfx(z7.VnS(), p0.Zeroing(), z8.VnS()); + __ cdot(z7.VnS(), z4.VnB(), z0.VnB(), 0, 0); + + __ movprfx(z7.VnD(), p0.Zeroing(), z8.VnD()); + __ cdot(z7.VnD(), z4.VnH(), z0.VnH(), 0, 0); + + __ movprfx(z19.VnB(), p0.Zeroing(), z20.VnB()); + __ cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0); + + __ movprfx(z19.VnS(), p0.Zeroing(), z20.VnS()); + __ cmla(z19.VnS(), z7.VnS(), z2.VnS(), 0, 0); + + __ movprfx(z19.VnH(), p0.Zeroing(), z20.VnH()); + __ cmla(z19.VnH(), z7.VnH(), z2.VnH(), 0, 0); + + __ movprfx(z10.VnD(), p0.Zeroing(), z11.VnD()); + __ eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD()); + + __ movprfx(z3.VnB(), p0.Zeroing(), z4.VnB()); + __ eorbt(z3.VnB(), z10.VnB(), z8.VnB()); + + __ movprfx(z20.VnB(), p0.Zeroing(), z22.VnB()); + __ eortb(z20.VnB(), z21.VnB(), z15.VnB()); + + __ movprfx(z14.VnD(), p0.Zeroing(), z15.VnD()); + __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD()); + + __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD()); + __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD()); + + __ movprfx(z22.VnD(), p0.Zeroing(), z23.VnD()); + __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD()); + + __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); + __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD()); + + __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD()); + __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD()); + + __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); + __ fmlalb(z16.VnS(), z18.VnH(), z29.VnH()); + + __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); + __ fmlalb(z16.VnS(), z18.VnH(), z2.VnH(), 0); + + __ movprfx(z18.VnS(), p0.Zeroing(), z19.VnS()); + __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH()); + + __ movprfx(z18.VnS(), p0.Zeroing(), z19.VnS()); + __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH(), 0); + + __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); + __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH()); + + __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); + __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH(), 0); + + __ movprfx(z3.VnS(), p0.Zeroing(), z4.VnS()); + __ fmlslt(z3.VnS(), z17.VnH(), z14.VnH()); + + __ movprfx(z3.VnS(), p0.Zeroing(), z4.VnS()); + __ fmlslt(z3.VnS(), z17.VnH(), z1.VnH(), 0); + + __ movprfx(z2.VnH(), p0.Zeroing(), z3.VnH()); + __ mla(z2.VnH(), z0.VnH(), z1.VnH(), 0); + + __ movprfx(z2.VnS(), p0.Zeroing(), z3.VnS()); + __ mla(z2.VnS(), z0.VnS(), z1.VnS(), 0); + + __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD()); + __ mla(z2.VnD(), z0.VnD(), z1.VnD(), 0); + + __ movprfx(z2.VnH(), p0.Zeroing(), z3.VnH()); + __ mls(z2.VnH(), z0.VnH(), z1.VnH(), 0); + + __ movprfx(z2.VnS(), p0.Zeroing(), z3.VnS()); + __ mls(z2.VnS(), z0.VnS(), z1.VnS(), 0); + + __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD()); + __ mls(z2.VnD(), z0.VnD(), z1.VnD(), 0); + + __ movprfx(z17.VnD(), p0.Zeroing(), z18.VnD()); + __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD()); + + __ movprfx(z13.VnB(), p0.Zeroing(), z14.VnB()); + __ saba(z13.VnB(), z2.VnB(), z31.VnB()); + + __ movprfx(z13.VnD(), p0.Zeroing(), z14.VnD()); + __ sabalb(z13.VnD(), z20.VnS(), z26.VnS()); + + __ movprfx(z14.VnD(), p0.Zeroing(), z15.VnD()); + __ sabalt(z14.VnD(), z19.VnS(), z10.VnS()); + + __ movprfx(z17.VnS(), p0.Zeroing(), z18.VnS()); + __ sbclb(z17.VnS(), z10.VnS(), z8.VnS()); + + __ movprfx(z20.VnS(), p0.Zeroing(), z21.VnS()); + __ sbclt(z20.VnS(), z0.VnS(), z13.VnS()); + + __ movprfx(z5.VnB(), p0.Zeroing(), z6.VnB()); + __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB()); + + __ movprfx(z27.VnB(), p0.Zeroing(), z28.VnB()); + __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB()); + + __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); + __ smlalb(z1.VnD(), z3.VnS(), z23.VnS()); + + __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); + __ smlalb(z1.VnD(), z3.VnS(), z2.VnS(), 0); + + __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS()); + __ smlalb(z1.VnS(), z3.VnH(), z2.VnH(), 0); + + __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); + __ smlalt(z1.VnD(), z3.VnS(), z23.VnS()); + + __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); + __ smlalt(z1.VnD(), z3.VnS(), z2.VnS(), 0); + + __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS()); + __ smlalt(z1.VnS(), z3.VnH(), z2.VnH(), 0); + + __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); + __ smlslb(z1.VnD(), z3.VnS(), z23.VnS()); + + __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); + __ smlslb(z1.VnD(), z3.VnS(), z2.VnS(), 0); + + __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS()); + __ smlslb(z1.VnS(), z3.VnH(), z2.VnH(), 0); + + __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); + __ smlslt(z1.VnD(), z3.VnS(), z23.VnS()); + + __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); + __ smlslt(z1.VnD(), z3.VnS(), z2.VnS(), 0); + + __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS()); + __ smlslt(z1.VnS(), z3.VnH(), z2.VnH(), 0); + + __ movprfx(z20.VnB(), p0.Zeroing(), z21.VnB()); + __ sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90); + + __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD()); + __ sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS()); + + __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD()); + __ sqdmlalb(z6.VnD(), z19.VnS(), z2.VnS(), 0); + + __ movprfx(z6.VnS(), p0.Zeroing(), z7.VnS()); + __ sqdmlalb(z6.VnS(), z19.VnH(), z2.VnH(), 0); + + __ movprfx(z23.VnD(), p0.Zeroing(), z24.VnD()); + __ sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS()); + + __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); + __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS()); + + __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); + __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS(), 0); + + __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS()); + __ sqdmlalt(z11.VnS(), z0.VnH(), z0.VnH(), 0); + + __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD()); + __ sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS()); + + __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD()); + __ sqdmlslb(z16.VnD(), z26.VnS(), z2.VnS(), 0); + + __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); + __ sqdmlslb(z16.VnS(), z26.VnH(), z2.VnH(), 0); + + __ movprfx(z26.VnD(), p0.Zeroing(), z27.VnD()); + __ sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS()); + + __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD()); + __ sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS()); + + __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD()); + __ sqdmlslt(z21.VnD(), z23.VnS(), z0.VnS(), 0); + + __ movprfx(z21.VnS(), p0.Zeroing(), z22.VnS()); + __ sqdmlslt(z21.VnS(), z23.VnH(), z0.VnH(), 0); + + __ movprfx(z31.VnB(), p0.Zeroing(), z0.VnB()); + __ sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0); + + __ movprfx(z31.VnH(), p0.Zeroing(), z0.VnH()); + __ sqrdcmlah(z31.VnH(), z15.VnH(), z2.VnH(), 0, 0); + + __ movprfx(z31.VnS(), p0.Zeroing(), z0.VnS()); + __ sqrdcmlah(z31.VnS(), z15.VnS(), z2.VnS(), 0, 0); + + __ movprfx(z27.VnB(), p0.Zeroing(), z28.VnB()); + __ sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB()); + + __ movprfx(z27.VnH(), p0.Zeroing(), z28.VnH()); + __ sqrdmlah(z27.VnH(), z28.VnH(), z1.VnH(), 0); + + __ movprfx(z27.VnS(), p0.Zeroing(), z28.VnS()); + __ sqrdmlah(z27.VnS(), z28.VnS(), z1.VnS(), 0); + + __ movprfx(z27.VnD(), p0.Zeroing(), z28.VnD()); + __ sqrdmlah(z27.VnD(), z28.VnD(), z1.VnD(), 0); + + __ movprfx(z11.VnB(), p0.Zeroing(), z12.VnB()); + __ sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB()); + + __ movprfx(z11.VnH(), p0.Zeroing(), z12.VnH()); + __ sqrdmlsh(z11.VnH(), z16.VnH(), z1.VnH(), 0); + + __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS()); + __ sqrdmlsh(z11.VnS(), z16.VnS(), z1.VnS(), 0); + + __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); + __ sqrdmlsh(z11.VnD(), z16.VnD(), z1.VnD(), 0); + + __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB()); + __ srsra(z0.VnB(), z8.VnB(), 1); + + __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB()); + __ ssra(z0.VnB(), z8.VnB(), 1); + + __ movprfx(z23.VnB(), p0.Zeroing(), z24.VnB()); + __ uaba(z23.VnB(), z22.VnB(), z20.VnB()); + + __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); + __ uabalb(z11.VnD(), z25.VnS(), z12.VnS()); + + __ movprfx(z4.VnD(), p0.Zeroing(), z5.VnD()); + __ uabalt(z4.VnD(), z2.VnS(), z31.VnS()); + + __ movprfx(z7.VnB(), p0.Zeroing(), z8.VnB()); + __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB()); + + __ movprfx(z10.VnB(), p0.Zeroing(), z11.VnB()); + __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB()); + + __ movprfx(z31.VnD(), p0.Zeroing(), z0.VnD()); + __ umlalb(z31.VnD(), z9.VnS(), z21.VnS()); + + __ movprfx(z31.VnD(), p0.Zeroing(), z0.VnD()); + __ umlalb(z31.VnD(), z9.VnS(), z1.VnS(), 0); + + __ movprfx(z31.VnS(), p0.Zeroing(), z0.VnS()); + __ umlalb(z31.VnS(), z9.VnH(), z1.VnH(), 0); + + __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); + __ umlalt(z11.VnD(), z5.VnS(), z22.VnS()); + + __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); + __ umlalt(z11.VnD(), z5.VnS(), z2.VnS(), 0); + + __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS()); + __ umlalt(z11.VnS(), z5.VnH(), z2.VnH(), 0); + + __ movprfx(z28.VnD(), p0.Zeroing(), z29.VnD()); + __ umlslb(z28.VnD(), z13.VnS(), z9.VnS()); + + __ movprfx(z28.VnD(), p0.Zeroing(), z29.VnD()); + __ umlslb(z28.VnD(), z13.VnS(), z1.VnS(), 0); + + __ movprfx(z28.VnS(), p0.Zeroing(), z29.VnS()); + __ umlslb(z28.VnS(), z13.VnH(), z1.VnH(), 0); + + __ movprfx(z9.VnD(), p0.Zeroing(), z10.VnD()); + __ umlslt(z9.VnD(), z12.VnS(), z30.VnS()); + + __ movprfx(z9.VnD(), p0.Zeroing(), z10.VnD()); + __ umlslt(z9.VnD(), z12.VnS(), z0.VnS(), 0); + + __ movprfx(z9.VnS(), p0.Zeroing(), z10.VnS()); + __ umlslt(z9.VnS(), z12.VnH(), z0.VnH(), 0); + + __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB()); + __ ursra(z0.VnB(), z8.VnB(), 1); + + __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB()); + __ usra(z0.VnB(), z8.VnB(), 1); + + __ movprfx(z16.VnB(), p0.Zeroing(), z17.VnB()); + __ xar(z16.VnB(), z16.VnB(), z13.VnB(), 1); + } + assm.FinalizeCode(); + + CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); +} + +TEST(movprfx_negative_aliasing_sve2) { + Assembler assm; + assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2); + { + // We have to use the Assembler directly to generate movprfx, so we need + // to manually reserve space for the code we're about to emit. + static const size_t kPairCount = 140; + CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); + + __ movprfx(z25, z26); + __ adclb(z25.VnS(), z17.VnS(), z25.VnS()); + + __ movprfx(z0, z1); + __ adclt(z0.VnS(), z2.VnS(), z0.VnS()); + + __ movprfx(z3, z4); + __ addp(z3.VnB(), p1.Merging(), z3.VnB(), z3.VnB()); + + __ movprfx(z6, z7); + __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z6.VnD()); + + __ movprfx(z18, z19); + __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z18.VnD()); + + __ movprfx(z7, z8); + __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z7.VnD()); + + __ movprfx(z21, z22); + __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z21.VnD()); + + __ movprfx(z5, z6); + __ cadd(z5.VnB(), z5.VnB(), z5.VnB(), 90); + + __ movprfx(z7, z8); + __ cdot(z7.VnS(), z4.VnB(), z7.VnB(), 0); + + __ movprfx(z7, z8); + __ cdot(z7.VnS(), z4.VnB(), z7.VnB(), 0, 0); + + __ movprfx(z7, z8); + __ cdot(z7.VnD(), z7.VnH(), z0.VnH(), 0, 0); + + __ movprfx(z19, z20); + __ cmla(z19.VnB(), z19.VnB(), z2.VnB(), 0); + + __ movprfx(z19, z20); + __ cmla(z19.VnS(), z19.VnS(), z2.VnS(), 0, 0); + + __ movprfx(z1, z20); + __ cmla(z1.VnH(), z7.VnH(), z1.VnH(), 0, 0); + + __ movprfx(z10, z11); + __ eor3(z10.VnD(), z10.VnD(), z10.VnD(), z23.VnD()); + + __ movprfx(z3, z4); + __ eorbt(z3.VnB(), z10.VnB(), z3.VnB()); + + __ movprfx(z20, z22); + __ eortb(z20.VnB(), z21.VnB(), z20.VnB()); + + __ movprfx(z14, z15); + __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z14.VnD()); + + __ movprfx(z14.VnD(), p4.Merging(), z15.VnD()); + __ fcvtx(z14.VnS(), p4.Merging(), z14.VnD()); + + __ movprfx(z15.VnH(), p0.Merging(), z16.VnH()); + __ flogb(z15.VnH(), p0.Merging(), z15.VnH()); + + __ movprfx(z2, z3); + __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z2.VnD()); + + __ movprfx(z22, z23); + __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z22.VnD()); + + __ movprfx(z1, z2); + __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z1.VnD()); + + __ movprfx(z16, z17); + __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z16.VnD()); + + __ movprfx(z16, z17); + __ fmlalb(z16.VnS(), z18.VnH(), z16.VnH()); + + __ movprfx(z16, z17); + __ fmlalb(z16.VnS(), z16.VnH(), z2.VnH(), 0); + + __ movprfx(z18, z19); + __ fmlalt(z18.VnS(), z13.VnH(), z18.VnH()); + + __ movprfx(z18, z19); + __ fmlalt(z18.VnS(), z18.VnH(), z5.VnH(), 0); + + __ movprfx(z16, z17); + __ fmlslb(z16.VnS(), z16.VnH(), z1.VnH()); + + __ movprfx(z16, z17); + __ fmlslb(z16.VnS(), z16.VnH(), z1.VnH(), 0); + + __ movprfx(z3, z4); + __ fmlslt(z3.VnS(), z17.VnH(), z3.VnH()); + + __ movprfx(z3, z4); + __ fmlslt(z3.VnS(), z17.VnH(), z3.VnH(), 0); + + __ movprfx(z2, z3); + __ mla(z2.VnH(), z0.VnH(), z2.VnH(), 0); + + __ movprfx(z2, z3); + __ mla(z2.VnS(), z0.VnS(), z2.VnS(), 0); + + __ movprfx(z2, z3); + __ mla(z2.VnD(), z0.VnD(), z2.VnD(), 0); + + __ movprfx(z2, z3); + __ mls(z2.VnH(), z0.VnH(), z2.VnH(), 0); + + __ movprfx(z2, z3); + __ mls(z2.VnS(), z0.VnS(), z2.VnS(), 0); + + __ movprfx(z2, z3); + __ mls(z2.VnD(), z0.VnD(), z2.VnD(), 0); + + __ movprfx(z17, z18); + __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z17.VnD()); + + __ movprfx(z13, z14); + __ saba(z13.VnB(), z2.VnB(), z13.VnB()); + + __ movprfx(z13, z14); + __ sabalb(z13.VnD(), z13.VnS(), z26.VnS()); + + __ movprfx(z14, z15); + __ sabalt(z14.VnD(), z14.VnS(), z10.VnS()); + + __ movprfx(z19.VnD(), p5.Merging(), z20.VnD()); + __ sadalp(z19.VnD(), p5.Merging(), z19.VnS()); + + __ movprfx(z17, z18); + __ sbclb(z17.VnS(), z17.VnS(), z8.VnS()); + + __ movprfx(z20, z21); + __ sbclt(z20.VnS(), z20.VnS(), z13.VnS()); + + __ movprfx(z20.VnB(), p3.Merging(), z21.VnB()); + __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z20.VnB()); + + __ movprfx(z21.VnB(), p0.Merging(), z22.VnB()); + __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z21.VnB()); + + __ movprfx(z1.VnB(), p0.Merging(), z2.VnB()); + __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z1.VnB()); + + __ movprfx(z5, z6); + __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z5.VnB()); + + __ movprfx(z27, z28); + __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z27.VnB()); + + __ movprfx(z1, z2); + __ smlalb(z1.VnD(), z3.VnS(), z1.VnS()); + + __ movprfx(z1, z2); + __ smlalb(z1.VnD(), z3.VnS(), z1.VnS(), 0); + + __ movprfx(z1, z2); + __ smlalb(z1.VnS(), z1.VnH(), z2.VnH(), 0); + + __ movprfx(z1, z2); + __ smlalt(z1.VnD(), z1.VnS(), z23.VnS()); + + __ movprfx(z1, z2); + __ smlalt(z1.VnD(), z3.VnS(), z1.VnS(), 0); + + __ movprfx(z1, z2); + __ smlalt(z1.VnS(), z1.VnH(), z2.VnH(), 0); + + __ movprfx(z1, z2); + __ smlslb(z1.VnD(), z1.VnS(), z23.VnS()); + + __ movprfx(z1, z2); + __ smlslb(z1.VnD(), z3.VnS(), z1.VnS(), 0); + + __ movprfx(z1, z2); + __ smlslb(z1.VnS(), z3.VnH(), z1.VnH(), 0); + + __ movprfx(z1, z2); + __ smlslt(z1.VnD(), z1.VnS(), z23.VnS()); + + __ movprfx(z1, z2); + __ smlslt(z1.VnD(), z3.VnS(), z1.VnS(), 0); + + __ movprfx(z1, z2); + __ smlslt(z1.VnS(), z1.VnH(), z2.VnH(), 0); + + __ movprfx(z29.VnB(), p1.Merging(), z30.VnB()); + __ sqabs(z29.VnB(), p1.Merging(), z29.VnB()); + + __ movprfx(z28.VnB(), p0.Merging(), z29.VnB()); + __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()); + + __ movprfx(z20, z21); + __ sqcadd(z20.VnB(), z20.VnB(), z20.VnB(), 90); + + __ movprfx(z6, z7); + __ sqdmlalb(z6.VnD(), z6.VnS(), z25.VnS()); + + __ movprfx(z6, z7); + __ sqdmlalb(z6.VnD(), z6.VnS(), z2.VnS(), 0); + + __ movprfx(z6, z7); + __ sqdmlalb(z6.VnS(), z6.VnH(), z2.VnH(), 0); + + __ movprfx(z23, z24); + __ sqdmlalbt(z23.VnD(), z23.VnS(), z26.VnS()); + + __ movprfx(z11, z12); + __ sqdmlalt(z11.VnD(), z11.VnS(), z0.VnS()); + + __ movprfx(z11, z12); + __ sqdmlalt(z11.VnD(), z11.VnS(), z0.VnS(), 0); + + __ movprfx(z1, z12); + __ sqdmlalt(z1.VnS(), z0.VnH(), z1.VnH(), 0); + + __ movprfx(z16, z17); + __ sqdmlslb(z16.VnD(), z26.VnS(), z16.VnS()); + + __ movprfx(z16, z17); + __ sqdmlslb(z16.VnD(), z16.VnS(), z2.VnS(), 0); + + __ movprfx(z16, z17); + __ sqdmlslb(z16.VnS(), z16.VnH(), z2.VnH(), 0); + + __ movprfx(z26, z27); + __ sqdmlslbt(z26.VnD(), z26.VnS(), z4.VnS()); + + __ movprfx(z21, z22); + __ sqdmlslt(z21.VnD(), z23.VnS(), z21.VnS()); + + __ movprfx(z21, z22); + __ sqdmlslt(z21.VnD(), z21.VnS(), z0.VnS(), 0); + + __ movprfx(z1, z22); + __ sqdmlslt(z21.VnS(), z23.VnH(), z1.VnH(), 0); + + __ movprfx(z21.VnB(), p0.Merging(), z22.VnB()); + __ sqneg(z21.VnB(), p0.Merging(), z21.VnB()); + + __ movprfx(z31, z0); + __ sqrdcmlah(z31.VnB(), z15.VnB(), z31.VnB(), 0); + + __ movprfx(z31, z0); + __ sqrdcmlah(z31.VnH(), z31.VnH(), z2.VnH(), 0, 0); + + __ movprfx(z31, z0); + __ sqrdcmlah(z31.VnS(), z31.VnS(), z2.VnS(), 0, 0); + + __ movprfx(z27, z28); + __ sqrdmlah(z27.VnB(), z27.VnB(), z19.VnB()); + + __ movprfx(z27, z28); + __ sqrdmlah(z27.VnH(), z27.VnH(), z1.VnH(), 0); + + __ movprfx(z27, z28); + __ sqrdmlah(z27.VnS(), z27.VnS(), z1.VnS(), 0); + + __ movprfx(z27, z28); + __ sqrdmlah(z27.VnD(), z27.VnD(), z1.VnD(), 0); + + __ movprfx(z11, z12); + __ sqrdmlsh(z11.VnB(), z16.VnB(), z11.VnB()); + + __ movprfx(z11, z12); + __ sqrdmlsh(z11.VnH(), z11.VnH(), z1.VnH(), 0); + + __ movprfx(z11, z12); + __ sqrdmlsh(z11.VnS(), z11.VnS(), z1.VnS(), 0); + + __ movprfx(z11, z12); + __ sqrdmlsh(z11.VnD(), z11.VnD(), z1.VnD(), 0); + + __ movprfx(z31.VnB(), p5.Merging(), z0.VnB()); + __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z31.VnB()); + + __ movprfx(z25.VnB(), p6.Merging(), z26.VnB()); + __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z25.VnB()); + + __ movprfx(z0.VnB(), p5.Merging(), z1.VnB()); + __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z0.VnB()); + + __ movprfx(z7.VnB(), p3.Merging(), z8.VnB()); + __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z7.VnB()); + + __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); + __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB()); + + __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); + __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB()); + + __ movprfx(z23.VnB(), p4.Merging(), z24.VnB()); + __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z23.VnB()); + + __ movprfx(z31.VnB(), p7.Merging(), z0.VnB()); + __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z31.VnB()); + + __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); + __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB()); + + __ movprfx(z0, z1); + __ srsra(z0.VnB(), z0.VnB(), 1); + + __ movprfx(z0, z1); + __ ssra(z0.VnB(), z0.VnB(), 1); + + __ movprfx(z26.VnB(), p2.Merging(), z27.VnB()); + __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z26.VnB()); + + __ movprfx(z23, z24); + __ uaba(z23.VnB(), z22.VnB(), z23.VnB()); + + __ movprfx(z11, z12); + __ uabalb(z11.VnD(), z25.VnS(), z11.VnS()); + + __ movprfx(z4, z5); + __ uabalt(z4.VnD(), z4.VnS(), z31.VnS()); + + __ movprfx(z20.VnD(), p4.Merging(), z21.VnD()); + __ uadalp(z20.VnD(), p4.Merging(), z20.VnS()); + + __ movprfx(z21.VnB(), p2.Merging(), z22.VnB()); + __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z21.VnB()); + + __ movprfx(z1.VnB(), p4.Merging(), z2.VnB()); + __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z1.VnB()); + + __ movprfx(z18.VnB(), p0.Merging(), z19.VnB()); + __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z18.VnB()); + + __ movprfx(z7, z8); + __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z7.VnB()); + + __ movprfx(z10, z11); + __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z10.VnB()); + + __ movprfx(z31, z0); + __ umlalb(z31.VnD(), z9.VnS(), z31.VnS()); + + __ movprfx(z31, z0); + __ umlalb(z31.VnD(), z31.VnS(), z1.VnS(), 0); + + __ movprfx(z31, z0); + __ umlalb(z31.VnS(), z31.VnH(), z1.VnH(), 0); + + __ movprfx(z11, z12); + __ umlalt(z11.VnD(), z11.VnS(), z22.VnS()); + + __ movprfx(z11, z12); + __ umlalt(z11.VnD(), z11.VnS(), z2.VnS(), 0); + + __ movprfx(z1, z12); + __ umlalt(z1.VnS(), z5.VnH(), z1.VnH(), 0); + + __ movprfx(z28, z29); + __ umlslb(z28.VnD(), z28.VnS(), z9.VnS()); + + __ movprfx(z28, z29); + __ umlslb(z28.VnD(), z28.VnS(), z1.VnS(), 0); + + __ movprfx(z28, z29); + __ umlslb(z28.VnS(), z28.VnH(), z1.VnH(), 0); + + __ movprfx(z9, z10); + __ umlslt(z9.VnD(), z9.VnS(), z30.VnS()); + + __ movprfx(z9, z10); + __ umlslt(z9.VnD(), z9.VnS(), z0.VnS(), 0); + + __ movprfx(z9, z10); + __ umlslt(z9.VnS(), z9.VnH(), z0.VnH(), 0); + + __ movprfx(z24.VnB(), p7.Merging(), z25.VnB()); + __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z24.VnB()), + + __ movprfx(z20.VnB(), p1.Merging(), z21.VnB()); + __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z20.VnB()); + + __ movprfx(z8.VnB(), p5.Merging(), z9.VnB()); + __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z8.VnB()); + + __ movprfx(z29.VnB(), p7.Merging(), z30.VnB()); + __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z29.VnB()); + + __ movprfx(z12.VnB(), p1.Merging(), z13.VnB()); + __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z12.VnB()); + + __ movprfx(z20.VnB(), p0.Merging(), z21.VnB()); + __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z20.VnB()); + + __ movprfx(z20.VnB(), p0.Merging(), z21.VnB()); + __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z20.VnB()); + + __ movprfx(z25.VnS(), p7.Merging(), z26.VnS()); + __ urecpe(z25.VnS(), p7.Merging(), z25.VnS()); + + __ movprfx(z29.VnB(), p4.Merging(), z30.VnB()); + __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z29.VnB()); + + __ movprfx(z15.VnB(), p2.Merging(), z16.VnB()); + __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z15.VnB()); + + __ movprfx(z27.VnB(), p1.Merging(), z28.VnB()); + __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z27.VnB()); + + __ movprfx(z4.VnS(), p3.Merging(), z5.VnS()); + __ ursqrte(z4.VnS(), p3.Merging(), z4.VnS()); + + __ movprfx(z0, z1); + __ ursra(z0.VnB(), z0.VnB(), 1); + + __ movprfx(z25.VnB(), p4.Merging(), z26.VnB()); + __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z25.VnB()); + + __ movprfx(z0, z1); + __ usra(z0.VnB(), z0.VnB(), 1); + + __ movprfx(z16, z17); + __ xar(z16.VnB(), z16.VnB(), z16.VnB(), 1); + } + assm.FinalizeCode(); + + CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); +} + +TEST(movprfx_negative_lane_size_sve2) { + Assembler assm; + assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2); + { + // We have to use the Assembler directly to generate movprfx, so we need + // to manually reserve space for the code we're about to emit. + static const size_t kPairCount = 140; + CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); + + __ movprfx(z14.VnS(), p4.Merging(), z15.VnS()); + __ fcvtx(z14.VnS(), p4.Merging(), z0.VnD()); + + __ movprfx(z15.VnS(), p0.Merging(), z16.VnS()); + __ flogb(z15.VnH(), p0.Merging(), z3.VnH()); + + __ movprfx(z19.VnB(), p5.Merging(), z20.VnB()); + __ sadalp(z19.VnD(), p5.Merging(), z9.VnS()); + + __ movprfx(z20.VnH(), p3.Merging(), z21.VnH()); + __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB()); + + __ movprfx(z21.VnH(), p0.Merging(), z22.VnH()); + __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB()); + + __ movprfx(z1.VnS(), p0.Merging(), z2.VnS()); + __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB()); + + __ movprfx(z29.VnD(), p1.Merging(), z30.VnD()); + __ sqabs(z29.VnB(), p1.Merging(), z18.VnB()); + + __ movprfx(z28.VnH(), p0.Merging(), z29.VnH()); + __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB()); + + __ movprfx(z21.VnH(), p0.Merging(), z22.VnH()); + __ sqneg(z21.VnB(), p0.Merging(), z17.VnB()); + + __ movprfx(z31.VnS(), p5.Merging(), z0.VnS()); + __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB()); + + __ movprfx(z25.VnD(), p6.Merging(), z26.VnD()); + __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB()); + + __ movprfx(z0.VnH(), p5.Merging(), z1.VnH()); + __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0); + + __ movprfx(z0.VnS(), p5.Merging(), z1.VnS()); + __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z2.VnB()); + + __ movprfx(z7.VnD(), p3.Merging(), z8.VnD()); + __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB()); + + __ movprfx(z10.VnH(), p1.Merging(), z11.VnH()); + __ sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0); + + __ movprfx(z16.VnH(), p7.Merging(), z17.VnH()); + __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()); + + __ movprfx(z16.VnS(), p7.Merging(), z17.VnS()); + __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()); + + __ movprfx(z23.VnD(), p4.Merging(), z24.VnD()); + __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB()); + + __ movprfx(z31.VnH(), p7.Merging(), z0.VnH()); + __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB()); + + __ movprfx(z16.VnH(), p7.Merging(), z17.VnH()); + __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB()); + + __ movprfx(z12.VnH(), p0.Merging(), z13.VnH()); + __ srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1); + + __ movprfx(z26.VnH(), p2.Merging(), z27.VnH()); + __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB()); + + __ movprfx(z20.VnB(), p4.Merging(), z21.VnB()); + __ uadalp(z20.VnD(), p4.Merging(), z5.VnS()); + + __ movprfx(z21.VnH(), p2.Merging(), z22.VnH()); + __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB()); + + __ movprfx(z1.VnH(), p4.Merging(), z2.VnH()); + __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB()); + + __ movprfx(z18.VnH(), p0.Merging(), z19.VnH()); + __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB()); + + __ movprfx(z24.VnH(), p7.Merging(), z25.VnH()); + __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()), + + __ movprfx(z20.VnS(), p1.Merging(), z21.VnS()); + __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB()); + + __ movprfx(z8.VnS(), p5.Merging(), z9.VnS()); + __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB()); + + __ movprfx(z29.VnS(), p7.Merging(), z30.VnS()); + __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0); + + __ movprfx(z29.VnS(), p7.Merging(), z30.VnS()); + __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z30.VnB()); + + __ movprfx(z12.VnS(), p1.Merging(), z13.VnS()); + __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z13.VnB()); + + __ movprfx(z20.VnS(), p0.Merging(), z21.VnS()); + __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()); + + __ movprfx(z20.VnS(), p0.Merging(), z21.VnS()); + __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()); + + __ movprfx(z25.VnB(), p7.Merging(), z26.VnB()); + __ urecpe(z25.VnS(), p7.Merging(), z2.VnS()); + + __ movprfx(z29.VnD(), p4.Merging(), z30.VnD()); + __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB()); + + __ movprfx(z15.VnD(), p2.Merging(), z16.VnD()); + __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB()); + + __ movprfx(z27.VnD(), p1.Merging(), z28.VnD()); + __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB()); + + __ movprfx(z31.VnD(), p2.Merging(), z0.VnD()); + __ urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1); + + __ movprfx(z4.VnH(), p3.Merging(), z5.VnH()); + __ ursqrte(z4.VnS(), p3.Merging(), z3.VnS()); + + __ movprfx(z25.VnD(), p4.Merging(), z26.VnD()); + __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB()); + } + assm.FinalizeCode(); + + CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); +} } // namespace aarch64 } // namespace vixl diff --git a/test/aarch64/test-assembler-aarch64.cc b/test/aarch64/test-assembler-aarch64.cc index e23bd8ce..4ca1a56e 100644 --- a/test/aarch64/test-assembler-aarch64.cc +++ b/test/aarch64/test-assembler-aarch64.cc @@ -10664,10 +10664,10 @@ TEST(atomic_memory_swp) { TEST(ldaprb_ldaprh_ldapr) { - uint64_t data0[] = {0x1010101010101010, 0}; - uint64_t data1[] = {0x1010101010101010, 0}; - uint64_t data2[] = {0x1010101010101010, 0}; - uint64_t data3[] = {0x1010101010101010, 0}; + uint64_t data0[] = {0x1010101010101010, 0x1010101010101010}; + uint64_t data1[] = {0x1010101010101010, 0x1010101010101010}; + uint64_t data2[] = {0x1010101010101010, 0x1010101010101010}; + uint64_t data3[] = {0x1010101010101010, 0x1010101010101010}; uint64_t* data0_aligned = AlignUp(data0, kXRegSizeInBytes * 2); uint64_t* data1_aligned = AlignUp(data1, kXRegSizeInBytes * 2); diff --git a/test/aarch64/test-assembler-aarch64.h b/test/aarch64/test-assembler-aarch64.h index 31e926dd..c3f3264e 100644 --- a/test/aarch64/test-assembler-aarch64.h +++ b/test/aarch64/test-assembler-aarch64.h @@ -164,7 +164,7 @@ namespace aarch64 { { \ /* We expect the test to use all of the features it requested, plus the */ \ /* features that the instructure code requires. */ \ - CPUFeatures const& expected = \ + CPUFeatures const& expected_features = \ simulator.GetCPUFeatures()->With(CPUFeatures::kNEON); \ CPUFeatures const& seen = simulator.GetSeenFeatures(); \ /* This gives three broad categories of features that we care about: */ \ @@ -172,13 +172,13 @@ namespace aarch64 { /* 2. Things seen, but not expected. The simulator catches these. */ \ /* 3. Things expected, but not seen. We check these here. */ \ /* In a valid, passing test, categories 2 and 3 should be empty. */ \ - if (seen != expected) { \ + if (seen != expected_features) { \ /* The Simulator should have caught anything in category 2 already. */ \ - VIXL_ASSERT(expected.Has(seen)); \ + VIXL_ASSERT(expected_features.Has(seen)); \ /* Anything left is category 3: things expected, but not seen. This */ \ /* is not necessarily a bug in VIXL itself, but indicates that the */ \ /* test is less strict than it could be. */ \ - CPUFeatures missing = expected.Without(seen); \ + CPUFeatures missing = expected_features.Without(seen); \ VIXL_ASSERT(missing.Count() > 0); \ std::cout << "Error: expected to see CPUFeatures { " << missing \ << " }\n"; \ @@ -265,15 +265,15 @@ namespace aarch64 { if (Test::disassemble()) { \ PrintDisassembler disasm(stdout); \ CodeBuffer* buffer = masm.GetBuffer(); \ - Instruction* start = buffer->GetOffsetAddress<Instruction*>( \ + Instruction* test_start = buffer->GetOffsetAddress<Instruction*>( \ offset_after_infrastructure_start); \ - Instruction* end = buffer->GetOffsetAddress<Instruction*>( \ + Instruction* test_end = buffer->GetOffsetAddress<Instruction*>( \ offset_before_infrastructure_end); \ \ if (Test::disassemble_infrastructure()) { \ Instruction* infra_start = buffer->GetStartAddress<Instruction*>(); \ printf("# Infrastructure code (prologue)\n"); \ - disasm.DisassembleBuffer(infra_start, start); \ + disasm.DisassembleBuffer(infra_start, test_start); \ printf("# Test code\n"); \ } else { \ printf( \ @@ -281,12 +281,12 @@ namespace aarch64 { "Use --disassemble to see it.\n"); \ } \ \ - disasm.DisassembleBuffer(start, end); \ + disasm.DisassembleBuffer(test_start, test_end); \ \ if (Test::disassemble_infrastructure()) { \ printf("# Infrastructure code (epilogue)\n"); \ Instruction* infra_end = buffer->GetEndAddress<Instruction*>(); \ - disasm.DisassembleBuffer(end, infra_end); \ + disasm.DisassembleBuffer(test_end, infra_end); \ } \ } diff --git a/test/aarch64/test-assembler-fp-aarch64.cc b/test/aarch64/test-assembler-fp-aarch64.cc index b9a581e7..4ae9ec7e 100644 --- a/test/aarch64/test-assembler-fp-aarch64.cc +++ b/test/aarch64/test-assembler-fp-aarch64.cc @@ -905,95 +905,209 @@ TEST(fmadd_fmsub_float) { TEST(fmadd_fmsub_double_nans) { // Make sure that NaN propagation works correctly. - double s1 = RawbitsToDouble(0x7ff5555511111111); - double s2 = RawbitsToDouble(0x7ff5555522222222); - double sa = RawbitsToDouble(0x7ff55555aaaaaaaa); - double q1 = RawbitsToDouble(0x7ffaaaaa11111111); - double q2 = RawbitsToDouble(0x7ffaaaaa22222222); - double qa = RawbitsToDouble(0x7ffaaaaaaaaaaaaa); - VIXL_ASSERT(IsSignallingNaN(s1)); - VIXL_ASSERT(IsSignallingNaN(s2)); - VIXL_ASSERT(IsSignallingNaN(sa)); - VIXL_ASSERT(IsQuietNaN(q1)); - VIXL_ASSERT(IsQuietNaN(q2)); - VIXL_ASSERT(IsQuietNaN(qa)); + double sig1 = RawbitsToDouble(0x7ff5555511111111); + double sig2 = RawbitsToDouble(0x7ff5555522222222); + double siga = RawbitsToDouble(0x7ff55555aaaaaaaa); + double qui1 = RawbitsToDouble(0x7ffaaaaa11111111); + double qui2 = RawbitsToDouble(0x7ffaaaaa22222222); + double quia = RawbitsToDouble(0x7ffaaaaaaaaaaaaa); + VIXL_ASSERT(IsSignallingNaN(sig1)); + VIXL_ASSERT(IsSignallingNaN(sig2)); + VIXL_ASSERT(IsSignallingNaN(siga)); + VIXL_ASSERT(IsQuietNaN(qui1)); + VIXL_ASSERT(IsQuietNaN(qui2)); + VIXL_ASSERT(IsQuietNaN(quia)); // The input NaNs after passing through ProcessNaN. - double s1_proc = RawbitsToDouble(0x7ffd555511111111); - double s2_proc = RawbitsToDouble(0x7ffd555522222222); - double sa_proc = RawbitsToDouble(0x7ffd5555aaaaaaaa); - double q1_proc = q1; - double q2_proc = q2; - double qa_proc = qa; - VIXL_ASSERT(IsQuietNaN(s1_proc)); - VIXL_ASSERT(IsQuietNaN(s2_proc)); - VIXL_ASSERT(IsQuietNaN(sa_proc)); - VIXL_ASSERT(IsQuietNaN(q1_proc)); - VIXL_ASSERT(IsQuietNaN(q2_proc)); - VIXL_ASSERT(IsQuietNaN(qa_proc)); + double sig1_proc = RawbitsToDouble(0x7ffd555511111111); + double sig2_proc = RawbitsToDouble(0x7ffd555522222222); + double siga_proc = RawbitsToDouble(0x7ffd5555aaaaaaaa); + double qui1_proc = qui1; + double qui2_proc = qui2; + double quia_proc = quia; + VIXL_ASSERT(IsQuietNaN(sig1_proc)); + VIXL_ASSERT(IsQuietNaN(sig2_proc)); + VIXL_ASSERT(IsQuietNaN(siga_proc)); + VIXL_ASSERT(IsQuietNaN(qui1_proc)); + VIXL_ASSERT(IsQuietNaN(qui2_proc)); + VIXL_ASSERT(IsQuietNaN(quia_proc)); // Negated NaNs as it would be done on ARMv8 hardware. - double s1_proc_neg = RawbitsToDouble(0xfffd555511111111); - double sa_proc_neg = RawbitsToDouble(0xfffd5555aaaaaaaa); - double q1_proc_neg = RawbitsToDouble(0xfffaaaaa11111111); - double qa_proc_neg = RawbitsToDouble(0xfffaaaaaaaaaaaaa); - VIXL_ASSERT(IsQuietNaN(s1_proc_neg)); - VIXL_ASSERT(IsQuietNaN(sa_proc_neg)); - VIXL_ASSERT(IsQuietNaN(q1_proc_neg)); - VIXL_ASSERT(IsQuietNaN(qa_proc_neg)); + double sig1_proc_neg = RawbitsToDouble(0xfffd555511111111); + double siga_proc_neg = RawbitsToDouble(0xfffd5555aaaaaaaa); + double qui1_proc_neg = RawbitsToDouble(0xfffaaaaa11111111); + double quia_proc_neg = RawbitsToDouble(0xfffaaaaaaaaaaaaa); + VIXL_ASSERT(IsQuietNaN(sig1_proc_neg)); + VIXL_ASSERT(IsQuietNaN(siga_proc_neg)); + VIXL_ASSERT(IsQuietNaN(qui1_proc_neg)); + VIXL_ASSERT(IsQuietNaN(quia_proc_neg)); // Quiet NaNs are propagated. - FmaddFmsubHelper(q1, 0, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc); - FmaddFmsubHelper(0, q2, 0, q2_proc, q2_proc, q2_proc, q2_proc); - FmaddFmsubHelper(0, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); - FmaddFmsubHelper(q1, q2, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc); - FmaddFmsubHelper(0, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); - FmaddFmsubHelper(q1, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); - FmaddFmsubHelper(q1, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); + FmaddFmsubHelper(qui1, + 0, + 0, + qui1_proc, + qui1_proc_neg, + qui1_proc_neg, + qui1_proc); + FmaddFmsubHelper(0, qui2, 0, qui2_proc, qui2_proc, qui2_proc, qui2_proc); + FmaddFmsubHelper(0, + 0, + quia, + quia_proc, + quia_proc, + quia_proc_neg, + quia_proc_neg); + FmaddFmsubHelper(qui1, + qui2, + 0, + qui1_proc, + qui1_proc_neg, + qui1_proc_neg, + qui1_proc); + FmaddFmsubHelper(0, + qui2, + quia, + quia_proc, + quia_proc, + quia_proc_neg, + quia_proc_neg); + FmaddFmsubHelper(qui1, + 0, + quia, + quia_proc, + quia_proc, + quia_proc_neg, + quia_proc_neg); + FmaddFmsubHelper(qui1, + qui2, + quia, + quia_proc, + quia_proc, + quia_proc_neg, + quia_proc_neg); // Signalling NaNs are propagated, and made quiet. - FmaddFmsubHelper(s1, 0, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); - FmaddFmsubHelper(0, s2, 0, s2_proc, s2_proc, s2_proc, s2_proc); - FmaddFmsubHelper(0, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); - FmaddFmsubHelper(s1, s2, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); - FmaddFmsubHelper(0, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); - FmaddFmsubHelper(s1, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); - FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); + FmaddFmsubHelper(sig1, + 0, + 0, + sig1_proc, + sig1_proc_neg, + sig1_proc_neg, + sig1_proc); + FmaddFmsubHelper(0, sig2, 0, sig2_proc, sig2_proc, sig2_proc, sig2_proc); + FmaddFmsubHelper(0, + 0, + siga, + siga_proc, + siga_proc, + siga_proc_neg, + siga_proc_neg); + FmaddFmsubHelper(sig1, + sig2, + 0, + sig1_proc, + sig1_proc_neg, + sig1_proc_neg, + sig1_proc); + FmaddFmsubHelper(0, + sig2, + siga, + siga_proc, + siga_proc, + siga_proc_neg, + siga_proc_neg); + FmaddFmsubHelper(sig1, + 0, + siga, + siga_proc, + siga_proc, + siga_proc_neg, + siga_proc_neg); + FmaddFmsubHelper(sig1, + sig2, + siga, + siga_proc, + siga_proc, + siga_proc_neg, + siga_proc_neg); // Signalling NaNs take precedence over quiet NaNs. - FmaddFmsubHelper(s1, q2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); - FmaddFmsubHelper(q1, s2, qa, s2_proc, s2_proc, s2_proc, s2_proc); - FmaddFmsubHelper(q1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); - FmaddFmsubHelper(s1, s2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); - FmaddFmsubHelper(q1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); - FmaddFmsubHelper(s1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); - FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); + FmaddFmsubHelper(sig1, + qui2, + quia, + sig1_proc, + sig1_proc_neg, + sig1_proc_neg, + sig1_proc); + FmaddFmsubHelper(qui1, + sig2, + quia, + sig2_proc, + sig2_proc, + sig2_proc, + sig2_proc); + FmaddFmsubHelper(qui1, + qui2, + siga, + siga_proc, + siga_proc, + siga_proc_neg, + siga_proc_neg); + FmaddFmsubHelper(sig1, + sig2, + quia, + sig1_proc, + sig1_proc_neg, + sig1_proc_neg, + sig1_proc); + FmaddFmsubHelper(qui1, + sig2, + siga, + siga_proc, + siga_proc, + siga_proc_neg, + siga_proc_neg); + FmaddFmsubHelper(sig1, + qui2, + siga, + siga_proc, + siga_proc, + siga_proc_neg, + siga_proc_neg); + FmaddFmsubHelper(sig1, + sig2, + siga, + siga_proc, + siga_proc, + siga_proc_neg, + siga_proc_neg); // A NaN generated by the intermediate op1 * op2 overrides a quiet NaN in a. FmaddFmsubHelper(0, kFP64PositiveInfinity, - qa, + quia, kFP64DefaultNaN, kFP64DefaultNaN, kFP64DefaultNaN, kFP64DefaultNaN); FmaddFmsubHelper(kFP64PositiveInfinity, 0, - qa, + quia, kFP64DefaultNaN, kFP64DefaultNaN, kFP64DefaultNaN, kFP64DefaultNaN); FmaddFmsubHelper(0, kFP64NegativeInfinity, - qa, + quia, kFP64DefaultNaN, kFP64DefaultNaN, kFP64DefaultNaN, kFP64DefaultNaN); FmaddFmsubHelper(kFP64NegativeInfinity, 0, - qa, + quia, kFP64DefaultNaN, kFP64DefaultNaN, kFP64DefaultNaN, @@ -1003,95 +1117,209 @@ TEST(fmadd_fmsub_double_nans) { TEST(fmadd_fmsub_float_nans) { // Make sure that NaN propagation works correctly. - float s1 = RawbitsToFloat(0x7f951111); - float s2 = RawbitsToFloat(0x7f952222); - float sa = RawbitsToFloat(0x7f95aaaa); - float q1 = RawbitsToFloat(0x7fea1111); - float q2 = RawbitsToFloat(0x7fea2222); - float qa = RawbitsToFloat(0x7feaaaaa); - VIXL_ASSERT(IsSignallingNaN(s1)); - VIXL_ASSERT(IsSignallingNaN(s2)); - VIXL_ASSERT(IsSignallingNaN(sa)); - VIXL_ASSERT(IsQuietNaN(q1)); - VIXL_ASSERT(IsQuietNaN(q2)); - VIXL_ASSERT(IsQuietNaN(qa)); + float sig1 = RawbitsToFloat(0x7f951111); + float sig2 = RawbitsToFloat(0x7f952222); + float siga = RawbitsToFloat(0x7f95aaaa); + float qui1 = RawbitsToFloat(0x7fea1111); + float qui2 = RawbitsToFloat(0x7fea2222); + float quia = RawbitsToFloat(0x7feaaaaa); + VIXL_ASSERT(IsSignallingNaN(sig1)); + VIXL_ASSERT(IsSignallingNaN(sig2)); + VIXL_ASSERT(IsSignallingNaN(siga)); + VIXL_ASSERT(IsQuietNaN(qui1)); + VIXL_ASSERT(IsQuietNaN(qui2)); + VIXL_ASSERT(IsQuietNaN(quia)); // The input NaNs after passing through ProcessNaN. - float s1_proc = RawbitsToFloat(0x7fd51111); - float s2_proc = RawbitsToFloat(0x7fd52222); - float sa_proc = RawbitsToFloat(0x7fd5aaaa); - float q1_proc = q1; - float q2_proc = q2; - float qa_proc = qa; - VIXL_ASSERT(IsQuietNaN(s1_proc)); - VIXL_ASSERT(IsQuietNaN(s2_proc)); - VIXL_ASSERT(IsQuietNaN(sa_proc)); - VIXL_ASSERT(IsQuietNaN(q1_proc)); - VIXL_ASSERT(IsQuietNaN(q2_proc)); - VIXL_ASSERT(IsQuietNaN(qa_proc)); + float sig1_proc = RawbitsToFloat(0x7fd51111); + float sig2_proc = RawbitsToFloat(0x7fd52222); + float siga_proc = RawbitsToFloat(0x7fd5aaaa); + float qui1_proc = qui1; + float qui2_proc = qui2; + float quia_proc = quia; + VIXL_ASSERT(IsQuietNaN(sig1_proc)); + VIXL_ASSERT(IsQuietNaN(sig2_proc)); + VIXL_ASSERT(IsQuietNaN(siga_proc)); + VIXL_ASSERT(IsQuietNaN(qui1_proc)); + VIXL_ASSERT(IsQuietNaN(qui2_proc)); + VIXL_ASSERT(IsQuietNaN(quia_proc)); // Negated NaNs as it would be done on ARMv8 hardware. - float s1_proc_neg = RawbitsToFloat(0xffd51111); - float sa_proc_neg = RawbitsToFloat(0xffd5aaaa); - float q1_proc_neg = RawbitsToFloat(0xffea1111); - float qa_proc_neg = RawbitsToFloat(0xffeaaaaa); - VIXL_ASSERT(IsQuietNaN(s1_proc_neg)); - VIXL_ASSERT(IsQuietNaN(sa_proc_neg)); - VIXL_ASSERT(IsQuietNaN(q1_proc_neg)); - VIXL_ASSERT(IsQuietNaN(qa_proc_neg)); + float sig1_proc_neg = RawbitsToFloat(0xffd51111); + float siga_proc_neg = RawbitsToFloat(0xffd5aaaa); + float qui1_proc_neg = RawbitsToFloat(0xffea1111); + float quia_proc_neg = RawbitsToFloat(0xffeaaaaa); + VIXL_ASSERT(IsQuietNaN(sig1_proc_neg)); + VIXL_ASSERT(IsQuietNaN(siga_proc_neg)); + VIXL_ASSERT(IsQuietNaN(qui1_proc_neg)); + VIXL_ASSERT(IsQuietNaN(quia_proc_neg)); // Quiet NaNs are propagated. - FmaddFmsubHelper(q1, 0, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc); - FmaddFmsubHelper(0, q2, 0, q2_proc, q2_proc, q2_proc, q2_proc); - FmaddFmsubHelper(0, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); - FmaddFmsubHelper(q1, q2, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc); - FmaddFmsubHelper(0, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); - FmaddFmsubHelper(q1, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); - FmaddFmsubHelper(q1, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); + FmaddFmsubHelper(qui1, + 0, + 0, + qui1_proc, + qui1_proc_neg, + qui1_proc_neg, + qui1_proc); + FmaddFmsubHelper(0, qui2, 0, qui2_proc, qui2_proc, qui2_proc, qui2_proc); + FmaddFmsubHelper(0, + 0, + quia, + quia_proc, + quia_proc, + quia_proc_neg, + quia_proc_neg); + FmaddFmsubHelper(qui1, + qui2, + 0, + qui1_proc, + qui1_proc_neg, + qui1_proc_neg, + qui1_proc); + FmaddFmsubHelper(0, + qui2, + quia, + quia_proc, + quia_proc, + quia_proc_neg, + quia_proc_neg); + FmaddFmsubHelper(qui1, + 0, + quia, + quia_proc, + quia_proc, + quia_proc_neg, + quia_proc_neg); + FmaddFmsubHelper(qui1, + qui2, + quia, + quia_proc, + quia_proc, + quia_proc_neg, + quia_proc_neg); // Signalling NaNs are propagated, and made quiet. - FmaddFmsubHelper(s1, 0, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); - FmaddFmsubHelper(0, s2, 0, s2_proc, s2_proc, s2_proc, s2_proc); - FmaddFmsubHelper(0, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); - FmaddFmsubHelper(s1, s2, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); - FmaddFmsubHelper(0, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); - FmaddFmsubHelper(s1, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); - FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); + FmaddFmsubHelper(sig1, + 0, + 0, + sig1_proc, + sig1_proc_neg, + sig1_proc_neg, + sig1_proc); + FmaddFmsubHelper(0, sig2, 0, sig2_proc, sig2_proc, sig2_proc, sig2_proc); + FmaddFmsubHelper(0, + 0, + siga, + siga_proc, + siga_proc, + siga_proc_neg, + siga_proc_neg); + FmaddFmsubHelper(sig1, + sig2, + 0, + sig1_proc, + sig1_proc_neg, + sig1_proc_neg, + sig1_proc); + FmaddFmsubHelper(0, + sig2, + siga, + siga_proc, + siga_proc, + siga_proc_neg, + siga_proc_neg); + FmaddFmsubHelper(sig1, + 0, + siga, + siga_proc, + siga_proc, + siga_proc_neg, + siga_proc_neg); + FmaddFmsubHelper(sig1, + sig2, + siga, + siga_proc, + siga_proc, + siga_proc_neg, + siga_proc_neg); // Signalling NaNs take precedence over quiet NaNs. - FmaddFmsubHelper(s1, q2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); - FmaddFmsubHelper(q1, s2, qa, s2_proc, s2_proc, s2_proc, s2_proc); - FmaddFmsubHelper(q1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); - FmaddFmsubHelper(s1, s2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); - FmaddFmsubHelper(q1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); - FmaddFmsubHelper(s1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); - FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); + FmaddFmsubHelper(sig1, + qui2, + quia, + sig1_proc, + sig1_proc_neg, + sig1_proc_neg, + sig1_proc); + FmaddFmsubHelper(qui1, + sig2, + quia, + sig2_proc, + sig2_proc, + sig2_proc, + sig2_proc); + FmaddFmsubHelper(qui1, + qui2, + siga, + siga_proc, + siga_proc, + siga_proc_neg, + siga_proc_neg); + FmaddFmsubHelper(sig1, + sig2, + quia, + sig1_proc, + sig1_proc_neg, + sig1_proc_neg, + sig1_proc); + FmaddFmsubHelper(qui1, + sig2, + siga, + siga_proc, + siga_proc, + siga_proc_neg, + siga_proc_neg); + FmaddFmsubHelper(sig1, + qui2, + siga, + siga_proc, + siga_proc, + siga_proc_neg, + siga_proc_neg); + FmaddFmsubHelper(sig1, + sig2, + siga, + siga_proc, + siga_proc, + siga_proc_neg, + siga_proc_neg); // A NaN generated by the intermediate op1 * op2 overrides a quiet NaN in a. FmaddFmsubHelper(0, kFP32PositiveInfinity, - qa, + quia, kFP32DefaultNaN, kFP32DefaultNaN, kFP32DefaultNaN, kFP32DefaultNaN); FmaddFmsubHelper(kFP32PositiveInfinity, 0, - qa, + quia, kFP32DefaultNaN, kFP32DefaultNaN, kFP32DefaultNaN, kFP32DefaultNaN); FmaddFmsubHelper(0, kFP32NegativeInfinity, - qa, + quia, kFP32DefaultNaN, kFP32DefaultNaN, kFP32DefaultNaN, kFP32DefaultNaN); FmaddFmsubHelper(kFP32NegativeInfinity, 0, - qa, + quia, kFP32DefaultNaN, kFP32DefaultNaN, kFP32DefaultNaN, diff --git a/test/aarch64/test-assembler-neon-aarch64.cc b/test/aarch64/test-assembler-neon-aarch64.cc index d2e04052..b1cf5163 100644 --- a/test/aarch64/test-assembler-neon-aarch64.cc +++ b/test/aarch64/test-assembler-neon-aarch64.cc @@ -3896,6 +3896,30 @@ TEST(neon_fcvtn) { } } +TEST(neon_fcvtn_fcvtxn_regression_test) { + SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP); + + START(); + __ Movi(v0.V2D(), 0x3ff0000000000000, 0xbff0000000000000); + __ Movi(v1.V2D(), 0x3f800000bf800000, 0x40000000c0000000); + __ Movi(v2.V2D(), 0x3ff0000000000000, 0xbff0000000000000); + + __ Fcvtn(v16.V2S(), v0.V2D()); + __ Fcvtn(v17.V4H(), v1.V4S()); + __ Fcvtn(v0.V2S(), v0.V2D()); + __ Fcvtn(v1.V4H(), v1.V4S()); + __ Fcvtxn(v2.V2S(), v2.V2D()); + END(); + + if (CAN_RUN()) { + RUN(); + ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q16); + ASSERT_EQUAL_128(0x0000000000000000, 0x3c00bc004000c000, q17); + ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q0); + ASSERT_EQUAL_128(0x0000000000000000, 0x3c00bc004000c000, q1); + ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q2); + } +} TEST(neon_fcvtxn) { SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP); @@ -10818,6 +10842,1371 @@ TEST(neon_tbl) { } } +TEST(neon_usdot) { + SETUP_WITH_FEATURES(CPUFeatures::kNEON, + CPUFeatures::kDotProduct, + CPUFeatures::kI8MM); + + START(); + __ Movi(v0.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); + __ Movi(v1.V2D(), 0x7f7f7f7f7f7f7f7f, 0x7f7f7f7f7f7f7f7f); + __ Movi(v2.V2D(), 0x8080808080808080, 0x8080808080808080); + __ Movi(v3.V2D(), 0, 0); + __ Mov(q4, q3); + __ Mov(q5, q3); + __ Mov(q6, q3); + __ Mov(q7, q3); + __ Mov(q8, q3); + __ Mov(q9, q3); + __ Mov(q10, q3); + __ Mov(q11, q3); + + // Test Usdot against Udot/Sdot over the range of inputs where they should be + // equal. + __ Usdot(v3.V2S(), v0.V8B(), v1.V8B()); + __ Udot(v4.V2S(), v0.V8B(), v1.V8B()); + __ Cmeq(v3.V4S(), v3.V4S(), v4.V4S()); + __ Usdot(v5.V4S(), v0.V16B(), v1.V16B()); + __ Udot(v6.V4S(), v0.V16B(), v1.V16B()); + __ Cmeq(v5.V4S(), v5.V4S(), v6.V4S()); + + __ Usdot(v7.V2S(), v1.V8B(), v2.V8B()); + __ Sdot(v8.V2S(), v1.V8B(), v2.V8B()); + __ Cmeq(v7.V4S(), v7.V4S(), v8.V4S()); + __ Usdot(v9.V4S(), v1.V16B(), v2.V16B()); + __ Sdot(v10.V4S(), v1.V16B(), v2.V16B()); + __ Cmeq(v9.V4S(), v9.V4S(), v10.V4S()); + + // Construct values which, when interpreted correctly as signed/unsigned, + // should give a zero result for dot product. + __ Mov(w0, 0x8101ff40); // [-127, 1, -1, 64] as signed bytes. + __ Mov(w1, 0x02fe8002); // [2, 254, 128, 2] as unsigned bytes. + __ Dup(v0.V4S(), w0); + __ Dup(v1.V4S(), w1); + __ Usdot(v11.V4S(), v1.V16B(), v0.V16B()); + + END(); + + if (CAN_RUN()) { + RUN(); + + ASSERT_EQUAL_128(-1, -1, q3); + ASSERT_EQUAL_128(-1, -1, q5); + ASSERT_EQUAL_128(-1, -1, q7); + ASSERT_EQUAL_128(-1, -1, q9); + ASSERT_EQUAL_128(0, 0, q11); + } +} + +TEST(neon_usdot_element) { + SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kI8MM); + + START(); + __ Movi(v0.V2D(), 0xfedcba9876543210, 0x0123456789abcdef); + __ Movi(v1.V2D(), 0x4242424242424242, 0x5555aaaaaaaa5555); + + // Test element Usdot against vector variant. + __ Dup(v2.V4S(), v1.V4S(), 0); + __ Dup(v3.V4S(), v1.V4S(), 1); + __ Dup(v4.V4S(), v1.V4S(), 3); + + __ Mov(q10, q1); + __ Usdot(v10.V2S(), v0.V8B(), v2.V8B()); + __ Mov(q11, q1); + __ Usdot(v11.V2S(), v0.V8B(), v1.S4B(), 0); + __ Cmeq(v11.V4S(), v11.V4S(), v10.V4S()); + + __ Mov(q12, q1); + __ Usdot(v12.V4S(), v0.V16B(), v3.V16B()); + __ Mov(q13, q1); + __ Usdot(v13.V4S(), v0.V16B(), v1.S4B(), 1); + __ Cmeq(v13.V4S(), v13.V4S(), v12.V4S()); + + __ Mov(q14, q1); + __ Usdot(v14.V4S(), v4.V16B(), v0.V16B()); + __ Mov(q15, q1); + __ Sudot(v15.V4S(), v0.V16B(), v1.S4B(), 3); + __ Cmeq(v15.V4S(), v15.V4S(), v14.V4S()); + END(); + + if (CAN_RUN()) { + RUN(); + + ASSERT_EQUAL_128(-1, -1, q11); + ASSERT_EQUAL_128(-1, -1, q13); + ASSERT_EQUAL_128(-1, -1, q15); + } +} + +TEST(zero_high_b) { + SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON, CPUFeatures::kRDM); + START(); + + __ Mov(x0, 0x55aa42ffaa42ff55); + __ Mov(x1, 4); + __ Movi(q30.V16B(), 0); + + // Iterate over the SISD instructions using different input values on each + // loop. + Label loop; + __ Bind(&loop); + + __ Dup(q0.V16B(), w0); + __ Ror(x0, x0, 8); + __ Dup(q1.V16B(), w0); + __ Ror(x0, x0, 8); + __ Dup(q2.V16B(), w0); + __ Ror(x0, x0, 8); + + { + ExactAssemblyScope scope(&masm, 81 * kInstructionSize); + __ movi(q9.V16B(), 0x55); + __ dci(0x5e010409); // mov b9, v0.b[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e207809); // sqabs b9, b0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e200c29); // sqadd b9, b1, b0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e207809); // sqneg b9, b0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e008429); // sqrdmlah b9, b1, b0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e008c29); // sqrdmlsh b9, b1, b0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e205c29); // sqrshl b9, b1, b0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f089c09); // sqrshrn b9, h0, #8 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f088c09); // sqrshrun b9, h0, #8 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e204c29); // sqshl b9, b1, b0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f087409); // sqshl b9, b0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f086409); // sqshlu b9, b0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f089409); // sqshrn b9, h0, #8 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f088409); // sqshrun b9, h0, #8 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e202c29); // sqsub b9, b1, b0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e214809); // sqxtn b9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e212809); // sqxtun b9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e203809); // suqadd b9, b0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e200c29); // uqadd b9, b1, b0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e205c29); // uqrshl b9, b1, b0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f089c09); // uqrshrn b9, h0, #8 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e204c29); // uqshl b9, b1, b0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f087409); // uqshl b9, b0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f089409); // uqshrn b9, h0, #8 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e202c29); // uqsub b9, b1, b0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e214809); // uqxtn b9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e203809); // usqadd b9, b0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + } + __ Sub(x1, x1, 1); + __ Cbnz(x1, &loop); + + __ Ins(q30.V16B(), 0, wzr); + + END(); + if (CAN_RUN()) { + RUN(); + ASSERT_EQUAL_128(0, 0, q30); + } +} + +TEST(zero_high_h) { + SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kNEON, + CPUFeatures::kFP, + CPUFeatures::kNEONHalf, + CPUFeatures::kRDM); + START(); + + __ Mov(x0, 0x55aa42ffaa42ff55); + __ Mov(x1, 4); + __ Movi(q30.V16B(), 0); + + // Iterate over the SISD instructions using different input values on each + // loop. + Label loop; + __ Bind(&loop); + + __ Dup(q0.V8H(), w0); + __ Ror(x0, x0, 8); + __ Dup(q1.V8H(), w0); + __ Ror(x0, x0, 8); + __ Dup(q2.V8H(), w0); + __ Ror(x0, x0, 8); + + { + ExactAssemblyScope scope(&masm, 225 * kInstructionSize); + __ movi(q9.V16B(), 0x55); + __ dci(0x5e020409); // mov h9, v0.h[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ec01429); // fabd h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e402c29); // facge h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ec02c29); // facgt h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e30d809); // faddp h9, v0.2h + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ef8d809); // fcmeq h9, h0, #0.0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e402429); // fcmeq h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ef8c809); // fcmge h9, h0, #0.0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e402429); // fcmge h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ef8c809); // fcmgt h9, h0, #0.0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ec02429); // fcmgt h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ef8d809); // fcmle h9, h0, #0.0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ef8e809); // fcmlt h9, h0, #0.0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e79c809); // fcvtas h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e79c809); // fcvtau h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e79b809); // fcvtms h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e79b809); // fcvtmu h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e79a809); // fcvtns h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e79a809); // fcvtnu h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ef9a809); // fcvtps h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ef9a809); // fcvtpu h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ef9b809); // fcvtzs h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f10fc09); // fcvtzs h9, h0, #16 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ef9b809); // fcvtzu h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f10fc09); // fcvtzu h9, h0, #16 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e30c809); // fmaxnmp h9, v0.2h + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e30f809); // fmaxp h9, v0.2h + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5eb0c809); // fminnmp h9, v0.2h + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5eb0f809); // fminp h9, v0.2h + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f001029); // fmla h9, h1, v0.h[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f005029); // fmls h9, h1, v0.h[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f009029); // fmul h9, h1, v0.h[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f009029); // fmulx h9, h1, v0.h[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e401c29); // fmulx h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ef9d809); // frecpe h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e403c29); // frecps h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ef9f809); // frecpx h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ef9d809); // frsqrte h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ec03c29); // frsqrts h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e79d809); // scvtf h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f10e409); // scvtf h9, h0, #16 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e607809); // sqabs h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e600c29); // sqadd h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f40c029); // sqdmulh h9, h1, v0.h[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e60b429); // sqdmulh h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e607809); // sqneg h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f40d029); // sqrdmlah h9, h1, v0.h[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e408429); // sqrdmlah h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f40f029); // sqrdmlsh h9, h1, v0.h[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e408c29); // sqrdmlsh h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f40d029); // sqrdmulh h9, h1, v0.h[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e60b429); // sqrdmulh h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e605c29); // sqrshl h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f109c09); // sqrshrn h9, s0, #16 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f108c09); // sqrshrun h9, s0, #16 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e604c29); // sqshl h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f107409); // sqshl h9, h0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f106409); // sqshlu h9, h0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f109409); // sqshrn h9, s0, #16 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f108409); // sqshrun h9, s0, #16 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e602c29); // sqsub h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e614809); // sqxtn h9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e612809); // sqxtun h9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e603809); // suqadd h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e79d809); // ucvtf h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f10e409); // ucvtf h9, h0, #16 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e600c29); // uqadd h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e605c29); // uqrshl h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f109c09); // uqrshrn h9, s0, #16 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e604c29); // uqshl h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f107409); // uqshl h9, h0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f109409); // uqshrn h9, s0, #16 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e602c29); // uqsub h9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e614809); // uqxtn h9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e603809); // usqadd h9, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + } + __ Sub(x1, x1, 1); + __ Cbnz(x1, &loop); + + __ Ins(q30.V8H(), 0, wzr); + + END(); + if (CAN_RUN()) { + RUN(); + ASSERT_EQUAL_128(0, 0, q30); + } +} + +TEST(zero_high_s) { + SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kNEON, + CPUFeatures::kFP, + CPUFeatures::kRDM); + START(); + + __ Mov(x0, 0x55aa42ffaa42ff55); + __ Mov(x1, 4); + __ Movi(q30.V16B(), 0); + + // Iterate over the SISD instructions using different input values on each + // loop. + Label loop; + __ Bind(&loop); + + __ Dup(q0.V4S(), w0); + __ Ror(x0, x0, 8); + __ Dup(q1.V4S(), w0); + __ Ror(x0, x0, 8); + __ Dup(q2.V4S(), w0); + __ Ror(x0, x0, 8); + + { + ExactAssemblyScope scope(&masm, 246 * kInstructionSize); + __ movi(q9.V16B(), 0x55); + __ dci(0x5e040409); // mov s9, v0.s[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea0d429); // fabd s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e20ec29); // facge s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea0ec29); // facgt s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e30d809); // faddp s9, v0.2s + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea0d809); // fcmeq s9, s0, #0.0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e20e429); // fcmeq s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea0c809); // fcmge s9, s0, #0.0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e20e429); // fcmge s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea0c809); // fcmgt s9, s0, #0.0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea0e429); // fcmgt s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea0d809); // fcmle s9, s0, #0.0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea0e809); // fcmlt s9, s0, #0.0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e21c809); // fcvtas s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e21c809); // fcvtau s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e21b809); // fcvtms s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e21b809); // fcvtmu s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e21a809); // fcvtns s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e21a809); // fcvtnu s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea1a809); // fcvtps s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea1a809); // fcvtpu s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e616809); // fcvtxn s9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea1b809); // fcvtzs s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f20fc09); // fcvtzs s9, s0, #32 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea1b809); // fcvtzu s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f20fc09); // fcvtzu s9, s0, #32 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e30c809); // fmaxnmp s9, v0.2s + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e30f809); // fmaxp s9, v0.2s + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7eb0c809); // fminnmp s9, v0.2s + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7eb0f809); // fminp s9, v0.2s + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f801029); // fmla s9, s1, v0.s[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f805029); // fmls s9, s1, v0.s[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f809029); // fmul s9, s1, v0.s[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f809029); // fmulx s9, s1, v0.s[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e20dc29); // fmulx s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea1d809); // frecpe s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e20fc29); // frecps s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea1f809); // frecpx s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea1d809); // frsqrte s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea0fc29); // frsqrts s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e21d809); // scvtf s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f20e409); // scvtf s9, s0, #32 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea07809); // sqabs s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea00c29); // sqadd s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e609029); // sqdmlal s9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f403029); // sqdmlal s9, h1, v0.h[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e60b029); // sqdmlsl s9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f407029); // sqdmlsl s9, h1, v0.h[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f80c029); // sqdmulh s9, s1, v0.s[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea0b429); // sqdmulh s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e60d029); // sqdmull s9, h1, h0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f40b029); // sqdmull s9, h1, v0.h[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea07809); // sqneg s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f80d029); // sqrdmlah s9, s1, v0.s[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e808429); // sqrdmlah s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f80f029); // sqrdmlsh s9, s1, v0.s[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e808c29); // sqrdmlsh s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f80d029); // sqrdmulh s9, s1, v0.s[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea0b429); // sqrdmulh s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea05c29); // sqrshl s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f209c09); // sqrshrn s9, d0, #32 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f208c09); // sqrshrun s9, d0, #32 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea04c29); // sqshl s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f207409); // sqshl s9, s0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f206409); // sqshlu s9, s0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f209409); // sqshrn s9, d0, #32 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f208409); // sqshrun s9, d0, #32 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea02c29); // sqsub s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea14809); // sqxtn s9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea12809); // sqxtun s9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea03809); // suqadd s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e21d809); // ucvtf s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f20e409); // ucvtf s9, s0, #32 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea00c29); // uqadd s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea05c29); // uqrshl s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f209c09); // uqrshrn s9, d0, #32 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea04c29); // uqshl s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f207409); // uqshl s9, s0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f209409); // uqshrn s9, d0, #32 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea02c29); // uqsub s9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea14809); // uqxtn s9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ea03809); // usqadd s9, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + } + __ Sub(x1, x1, 1); + __ Cbnz(x1, &loop); + + __ Ins(q30.V4S(), 0, wzr); + + END(); + if (CAN_RUN()) { + RUN(); + ASSERT_EQUAL_128(0, 0, q30); + } +} + +TEST(zero_high_d) { + SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kNEON, + CPUFeatures::kFP, + CPUFeatures::kRDM); + START(); + + __ Mov(x0, 0x55aa42ffaa42ff55); + __ Mov(x1, 4); + __ Movi(q30.V16B(), 0); + + // Iterate over the SISD instructions using different input values on each + // loop. + Label loop; + __ Bind(&loop); + + __ Dup(q0.V2D(), x0); + __ Ror(x0, x0, 8); + __ Dup(q1.V2D(), x0); + __ Ror(x0, x0, 8); + __ Dup(q2.V2D(), x0); + __ Ror(x0, x0, 8); + + { + ExactAssemblyScope scope(&masm, 291 * kInstructionSize); + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee0b809); // abs d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee08429); // add d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ef1b809); // addp d9, v0.2d + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee09809); // cmeq d9, d0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee08c29); // cmeq d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee08809); // cmge d9, d0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee03c29); // cmge d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee08809); // cmgt d9, d0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee03429); // cmgt d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee03429); // cmhi d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee03c29); // cmhs d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee09809); // cmle d9, d0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee0a809); // cmlt d9, d0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee08c29); // cmtst d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e080409); // mov d9, v0.d[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee0d429); // fabd d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e60ec29); // facge d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee0ec29); // facgt d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e70d809); // faddp d9, v0.2d + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee0d809); // fcmeq d9, d0, #0.0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e60e429); // fcmeq d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee0c809); // fcmge d9, d0, #0.0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e60e429); // fcmge d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee0c809); // fcmgt d9, d0, #0.0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee0e429); // fcmgt d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee0d809); // fcmle d9, d0, #0.0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee0e809); // fcmlt d9, d0, #0.0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e61c809); // fcvtas d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e61c809); // fcvtau d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e61b809); // fcvtms d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e61b809); // fcvtmu d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e61a809); // fcvtns d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e61a809); // fcvtnu d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee1a809); // fcvtps d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee1a809); // fcvtpu d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee1b809); // fcvtzs d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f40fc09); // fcvtzs d9, d0, #64 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee1b809); // fcvtzu d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f40fc09); // fcvtzu d9, d0, #64 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e70c809); // fmaxnmp d9, v0.2d + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e70f809); // fmaxp d9, v0.2d + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ef0c809); // fminnmp d9, v0.2d + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ef0f809); // fminp d9, v0.2d + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5fc01029); // fmla d9, d1, v0.d[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5fc05029); // fmls d9, d1, v0.d[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5fc09029); // fmul d9, d1, v0.d[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7fc09029); // fmulx d9, d1, v0.d[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e60dc29); // fmulx d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee1d809); // frecpe d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e60fc29); // frecps d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee1f809); // frecpx d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee1d809); // frsqrte d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee0fc29); // frsqrts d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee0b809); // neg d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5e61d809); // scvtf d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f40e409); // scvtf d9, d0, #64 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f405409); // shl d9, d0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f405409); // sli d9, d0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee07809); // sqabs d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee00c29); // sqadd d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea09029); // sqdmlal d9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f803029); // sqdmlal d9, s1, v0.s[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea0b029); // sqdmlsl d9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f807029); // sqdmlsl d9, s1, v0.s[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ea0d029); // sqdmull d9, s1, s0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f80b029); // sqdmull d9, s1, v0.s[0] + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee07809); // sqneg d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ec08429); // sqrdmlah d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ec08c29); // sqrdmlsh d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee05c29); // sqrshl d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee04c29); // sqshl d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f407409); // sqshl d9, d0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f406409); // sqshlu d9, d0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee02c29); // sqsub d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f404409); // sri d9, d0, #64 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee05429); // srshl d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f402409); // srshr d9, d0, #64 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f403409); // srsra d9, d0, #64 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee04429); // sshl d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f400409); // sshr d9, d0, #64 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5f401409); // ssra d9, d0, #64 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee08429); // sub d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x5ee03809); // suqadd d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7e61d809); // ucvtf d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f40e409); // ucvtf d9, d0, #64 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee00c29); // uqadd d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee05c29); // uqrshl d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee04c29); // uqshl d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f407409); // uqshl d9, d0, #0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee02c29); // uqsub d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee05429); // urshl d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f402409); // urshr d9, d0, #64 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f403409); // ursra d9, d0, #64 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee04429); // ushl d9, d1, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f400409); // ushr d9, d0, #64 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7ee03809); // usqadd d9, d0 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + + __ movi(q9.V16B(), 0x55); + __ dci(0x7f401409); // usra d9, d0, #64 + __ orr(q30.V16B(), q30.V16B(), q9.V16B()); + } + __ Sub(x1, x1, 1); + __ Cbnz(x1, &loop); + + __ Ins(q30.V2D(), 0, xzr); + + END(); + if (CAN_RUN()) { + RUN(); + ASSERT_EQUAL_128(0, 0, q30); + } +} } // namespace aarch64 } // namespace vixl diff --git a/test/aarch64/test-assembler-sve-aarch64.cc b/test/aarch64/test-assembler-sve-aarch64.cc index 61583143..053d5c82 100644 --- a/test/aarch64/test-assembler-sve-aarch64.cc +++ b/test/aarch64/test-assembler-sve-aarch64.cc @@ -44,76 +44,11 @@ #include "aarch64/simulator-aarch64.h" #include "test-assembler-aarch64.h" +#define TEST_SVE(name) TEST_SVE_INNER("ASM", name) + namespace vixl { namespace aarch64 { -Test* MakeSVETest(int vl, const char* name, Test::TestFunctionWithConfig* fn) { - // We never free this memory, but we need it to live for as long as the static - // linked list of tests, and this is the easiest way to do it. - Test* test = new Test(name, fn); - test->set_sve_vl_in_bits(vl); - return test; -} - -// The TEST_SVE macro works just like the usual TEST macro, but the resulting -// function receives a `const Test& config` argument, to allow it to query the -// vector length. -#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 -// On the Simulator, run SVE tests with several vector lengths, including the -// extreme values and an intermediate value that isn't a power of two. - -#define TEST_SVE(name) \ - void Test##name(Test* config); \ - Test* test_##name##_list[] = \ - {MakeSVETest(128, "AARCH64_ASM_" #name "_vl128", &Test##name), \ - MakeSVETest(384, "AARCH64_ASM_" #name "_vl384", &Test##name), \ - MakeSVETest(2048, "AARCH64_ASM_" #name "_vl2048", &Test##name)}; \ - void Test##name(Test* config) - -#define SVE_SETUP_WITH_FEATURES(...) \ - SETUP_WITH_FEATURES(__VA_ARGS__); \ - simulator.SetVectorLengthInBits(config->sve_vl_in_bits()) - -#else -// Otherwise, just use whatever the hardware provides. -static const int kSVEVectorLengthInBits = - CPUFeatures::InferFromOS().Has(CPUFeatures::kSVE) - ? CPU::ReadSVEVectorLengthInBits() - : kZRegMinSize; - -#define TEST_SVE(name) \ - void Test##name(Test* config); \ - Test* test_##name##_vlauto = MakeSVETest(kSVEVectorLengthInBits, \ - "AARCH64_ASM_" #name "_vlauto", \ - &Test##name); \ - void Test##name(Test* config) - -#define SVE_SETUP_WITH_FEATURES(...) \ - SETUP_WITH_FEATURES(__VA_ARGS__); \ - USE(config) - -#endif - -// Call masm->Insr repeatedly to allow test inputs to be set up concisely. This -// is optimised for call-site clarity, not generated code quality, so it doesn't -// exist in the MacroAssembler itself. -// -// Usage: -// -// int values[] = { 42, 43, 44 }; -// InsrHelper(&masm, z0.VnS(), values); // Sets z0.S = { ..., 42, 43, 44 } -// -// The rightmost (highest-indexed) array element maps to the lowest-numbered -// lane. -template <typename T, size_t N> -void InsrHelper(MacroAssembler* masm, - const ZRegister& zdn, - const T (&values)[N]) { - for (size_t i = 0; i < N; i++) { - masm->Insr(zdn, values[i]); - } -} - // Conveniently initialise P registers with scalar bit patterns. The destination // lane size is ignored. This is optimised for call-site clarity, not generated // code quality. @@ -3448,8 +3383,10 @@ static void PtrueHelper(Test* config, typedef void ( MacroAssembler::*AssemblePtrueFn)(const PRegisterWithLaneSize& pd, int pattern); - AssemblePtrueFn assemble = - (s == SetFlags) ? &MacroAssembler::ptrues : &MacroAssembler::ptrue; + AssemblePtrueFn assemble = &MacroAssembler::ptrue; + if (s == SetFlags) { + assemble = &MacroAssembler::ptrues; + } ExactAssemblyScope guard(&masm, 12 * kInstructionSize); __ msr(NZCV, x20); @@ -5537,6 +5474,9 @@ TEST_SVE(sve_addpl) { } TEST_SVE(sve_calculate_sve_address) { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" + // Shadow the `MacroAssembler` type so that the test macros work without // modification. typedef CalculateSVEAddressMacroAssembler MacroAssembler; @@ -5646,6 +5586,7 @@ TEST_SVE(sve_calculate_sve_address) { ASSERT_EQUAL_64(0xabcd404400000000 - 48, x28); ASSERT_EQUAL_64(0xabcd505500000000 - (48 << 4), x29); } +#pragma GCC diagnostic pop } TEST_SVE(sve_permute_vector_unpredicated) { @@ -10549,18 +10490,22 @@ TEST_SVE(sve_ld1rq) { // Check that all segments match by rotating the vector by one segment, // eoring, and orring across the vector. - __ Ext(z4.VnB(), z0.VnB(), z0.VnB(), 16); + __ Mov(z4, z0); + __ Ext(z4.VnB(), z4.VnB(), z4.VnB(), 16); __ Eor(z4.VnB(), z4.VnB(), z0.VnB()); __ Orv(b4, p0, z4.VnB()); - __ Ext(z5.VnB(), z1.VnB(), z1.VnB(), 16); + __ Mov(z5, z1); + __ Ext(z5.VnB(), z5.VnB(), z5.VnB(), 16); __ Eor(z5.VnB(), z5.VnB(), z1.VnB()); __ Orv(b5, p0, z5.VnB()); __ Orr(z4, z4, z5); - __ Ext(z5.VnB(), z2.VnB(), z2.VnB(), 16); + __ Mov(z5, z2); + __ Ext(z5.VnB(), z5.VnB(), z5.VnB(), 16); __ Eor(z5.VnB(), z5.VnB(), z2.VnB()); __ Orv(b5, p0, z5.VnB()); __ Orr(z4, z4, z5); - __ Ext(z5.VnB(), z3.VnB(), z3.VnB(), 16); + __ Mov(z5, z3); + __ Ext(z5.VnB(), z5.VnB(), z5.VnB(), 16); __ Eor(z5.VnB(), z5.VnB(), z3.VnB()); __ Orv(b5, p0, z5.VnB()); __ Orr(z4, z4, z5); @@ -11681,19 +11626,19 @@ static void SdotUdotHelper(Test* config, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, - bool is_signed, - int index) { - if (is_signed) { - if (index < 0) { + bool is_signed_fn, + int index_fn) { + if (is_signed_fn) { + if (index_fn < 0) { __ Sdot(zd, za, zn, zm); } else { - __ Sdot(zd, za, zn, zm, index); + __ Sdot(zd, za, zn, zm, index_fn); } } else { - if (index < 0) { + if (index_fn < 0) { __ Udot(zd, za, zn, zm); } else { - __ Udot(zd, za, zn, zm, index); + __ Udot(zd, za, zn, zm, index_fn); } } }; @@ -14573,7 +14518,8 @@ TEST_SVE(sve_fcadd) { __ Sel(z2.VnH(), p3, z1.VnH(), z30.VnH()); // 5i + 0 __ Sel(z3.VnH(), p2, z1.VnH(), z30.VnH()); // 0i + 5 __ Sel(z7.VnH(), p3, z7.VnH(), z0.VnH()); // Ai + 10 - __ Ext(z8.VnB(), z7.VnB(), z7.VnB(), 2); + __ Mov(z8, z7); + __ Ext(z8.VnB(), z8.VnB(), z8.VnB(), 2); __ Sel(z8.VnH(), p2, z8.VnH(), z30.VnH()); // 0i + A // (10i + 10) + rotate(5i + 0, 90) @@ -14615,7 +14561,8 @@ TEST_SVE(sve_fcadd) { __ Sel(z2.VnS(), p3, z1.VnS(), z30.VnS()); __ Sel(z29.VnS(), p2, z1.VnS(), z30.VnS()); __ Sel(z11.VnS(), p3, z11.VnS(), z0.VnS()); - __ Ext(z12.VnB(), z11.VnB(), z11.VnB(), 4); + __ Mov(z12, z11); + __ Ext(z12.VnB(), z12.VnB(), z12.VnB(), 4); __ Sel(z12.VnS(), p2, z12.VnS(), z30.VnS()); __ Fcadd(z8.VnS(), p0.Merging(), z0.VnS(), z2.VnS(), 90); __ Fcadd(z8.VnS(), p0.Merging(), z8.VnS(), z29.VnS(), 270); @@ -14635,7 +14582,8 @@ TEST_SVE(sve_fcadd) { __ Sel(z2.VnD(), p3, z1.VnD(), z30.VnD()); __ Sel(z28.VnD(), p2, z1.VnD(), z30.VnD()); __ Sel(z15.VnD(), p3, z15.VnD(), z0.VnD()); - __ Ext(z16.VnB(), z15.VnB(), z15.VnB(), 8); + __ Mov(z16, z15); + __ Ext(z16.VnB(), z16.VnB(), z16.VnB(), 8); __ Sel(z16.VnD(), p2, z16.VnD(), z30.VnD()); __ Fcadd(z12.VnD(), p0.Merging(), z0.VnD(), z2.VnD(), 90); __ Fcadd(z12.VnD(), p0.Merging(), z12.VnD(), z28.VnD(), 270); @@ -14701,8 +14649,8 @@ TEST_SVE(sve_fcmla_index) { // Create a reference result from a vector complex multiply. __ Dup(z6.VnH(), 0); - __ Fcmla(z6.VnH(), p0.Merging(), z0.VnH(), z2.VnH(), 0); - __ Fcmla(z6.VnH(), p0.Merging(), z0.VnH(), z2.VnH(), 90); + __ Fcmla(z6.VnH(), p0.Merging(), z6.VnH(), z0.VnH(), z2.VnH(), 0); + __ Fcmla(z6.VnH(), p0.Merging(), z6.VnH(), z0.VnH(), z2.VnH(), 90); // Repeated, but for wider elements. __ Fdup(z0.VnS(), 42.0); @@ -14726,8 +14674,8 @@ TEST_SVE(sve_fcmla_index) { __ Fcmla(z8.VnS(), z0.VnS(), z3.VnS(), 0, 270); __ Fneg(z8.VnS(), p0.Merging(), z8.VnS()); __ Dup(z9.VnS(), 0); - __ Fcmla(z9.VnS(), p0.Merging(), z0.VnS(), z2.VnS(), 0); - __ Fcmla(z9.VnS(), p0.Merging(), z0.VnS(), z2.VnS(), 90); + __ Fcmla(z9.VnS(), p0.Merging(), z9.VnS(), z0.VnS(), z2.VnS(), 0); + __ Fcmla(z9.VnS(), p0.Merging(), z9.VnS(), z0.VnS(), z2.VnS(), 90); END(); if (CAN_RUN()) { @@ -14770,8 +14718,8 @@ TEST_SVE(sve_fcmla) { // ... 7 6 5 4 3 2 1 0 <-- element // ... | 20+A^2 | 8A | 0 | 0 | 20+A^2 | 8A | 0 | 0 | <-- value __ Dup(z5.VnH(), 0); - __ Fcmla(z5.VnH(), p3.Merging(), z4.VnH(), z3.VnH(), 0); - __ Fcmla(z5.VnH(), p3.Merging(), z4.VnH(), z3.VnH(), 90); + __ Fcmla(z5.VnH(), p3.Merging(), z5.VnH(), z4.VnH(), z3.VnH(), 0); + __ Fcmla(z5.VnH(), p3.Merging(), z5.VnH(), z4.VnH(), z3.VnH(), 90); // Move the odd results to the even result positions. // ... 7 6 5 4 3 2 1 0 <-- element @@ -14783,8 +14731,8 @@ TEST_SVE(sve_fcmla) { // ... 7 6 5 4 3 2 1 0 <-- element // ... | 0 | 0 | -20-A^2 | -8A | 0 | 0 | -20-A^2 | -8A | <-- value __ Dup(z6.VnH(), 0); - __ Fcmla(z6.VnH(), p2.Merging(), z4.VnH(), z3.VnH(), 180); - __ Fcmla(z6.VnH(), p2.Merging(), z4.VnH(), z3.VnH(), 270); + __ Fcmla(z6.VnH(), p2.Merging(), z6.VnH(), z4.VnH(), z3.VnH(), 180); + __ Fcmla(z6.VnH(), p2.Merging(), z6.VnH(), z4.VnH(), z3.VnH(), 270); // Negate the even results. The results in z6 should now match the results // computed earlier in z5. @@ -14807,12 +14755,12 @@ TEST_SVE(sve_fcmla) { __ Punpklo(p2.VnH(), p2.VnB()); __ Punpklo(p3.VnH(), p3.VnB()); __ Dup(z7.VnS(), 0); - __ Fcmla(z7.VnS(), p3.Merging(), z4.VnS(), z3.VnS(), 0); - __ Fcmla(z7.VnS(), p3.Merging(), z4.VnS(), z3.VnS(), 90); + __ Fcmla(z7.VnS(), p3.Merging(), z7.VnS(), z4.VnS(), z3.VnS(), 0); + __ Fcmla(z7.VnS(), p3.Merging(), z7.VnS(), z4.VnS(), z3.VnS(), 90); __ Ext(z7.VnB(), z7.VnB(), z7.VnB(), 8); __ Dup(z8.VnS(), 0); - __ Fcmla(z8.VnS(), p2.Merging(), z4.VnS(), z3.VnS(), 180); - __ Fcmla(z8.VnS(), p2.Merging(), z4.VnS(), z3.VnS(), 270); + __ Fcmla(z8.VnS(), p2.Merging(), z8.VnS(), z4.VnS(), z3.VnS(), 180); + __ Fcmla(z8.VnS(), p2.Merging(), z8.VnS(), z4.VnS(), z3.VnS(), 270); __ Fneg(z8.VnS(), p2.Merging(), z8.VnS()); // Double precision computed for even lanes only. @@ -14827,11 +14775,11 @@ TEST_SVE(sve_fcmla) { __ Sel(z4.VnD(), p2, z1.VnD(), z2.VnD()); __ Punpklo(p2.VnH(), p2.VnB()); __ Dup(z9.VnD(), 0); - __ Fcmla(z9.VnD(), p2.Merging(), z4.VnD(), z3.VnD(), 0); - __ Fcmla(z9.VnD(), p2.Merging(), z4.VnD(), z3.VnD(), 90); + __ Fcmla(z9.VnD(), p2.Merging(), z9.VnD(), z4.VnD(), z3.VnD(), 0); + __ Fcmla(z9.VnD(), p2.Merging(), z9.VnD(), z4.VnD(), z3.VnD(), 90); __ Dup(z10.VnD(), 0); - __ Fcmla(z10.VnD(), p2.Merging(), z4.VnD(), z3.VnD(), 180); - __ Fcmla(z10.VnD(), p2.Merging(), z4.VnD(), z3.VnD(), 270); + __ Fcmla(z10.VnD(), p2.Merging(), z10.VnD(), z4.VnD(), z3.VnD(), 180); + __ Fcmla(z10.VnD(), p2.Merging(), z10.VnD(), z4.VnD(), z3.VnD(), 270); __ Fneg(z10.VnD(), p2.Merging(), z10.VnD()); END(); @@ -18706,5 +18654,1331 @@ TEST_SVE(sve_prefetch_offset) { } } +TEST_SVE(sve2_match_nmatch) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2); + + START(); + + __ Ptrue(p0.VnB()); + __ Ptrue(p1.VnH()); + __ Ptrue(p2.VnS()); + + // Vector to search is bytes 0 - 7, repeating every eight bytes. + __ Index(z0.VnB(), 0, 1); + __ Dup(z0.VnD(), z0.VnD(), 0); + + // Elements to find are (repeated) bytes 0 - 3 in the first segment, 4 - 7 + // in the second, 8 - 11 in the third, etc. + __ Index(z1.VnB(), 0, 1); + __ Lsr(z1.VnB(), z1.VnB(), 2); + + __ Match(p3.VnB(), p0.Zeroing(), z0.VnB(), z1.VnB()); + __ Match(p4.VnB(), p1.Zeroing(), z0.VnB(), z1.VnB()); + __ Nmatch(p0.VnB(), p0.Zeroing(), z0.VnB(), z1.VnB()); + + __ Uunpklo(z0.VnH(), z0.VnB()); + __ Uunpklo(z1.VnH(), z1.VnB()); + + __ Match(p5.VnH(), p1.Zeroing(), z0.VnH(), z1.VnH()); + __ Match(p6.VnH(), p2.Zeroing(), z0.VnH(), z1.VnH()); + __ Nmatch(p1.VnH(), p1.Zeroing(), z0.VnH(), z1.VnH()); + + END(); + if (CAN_RUN()) { + RUN(); + + int p3_exp[] = {1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1}; + ASSERT_EQUAL_SVE(p3_exp, p3.VnB()); + int p4_exp[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1}; + ASSERT_EQUAL_SVE(p4_exp, p4.VnB()); + int p0_exp[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0}; + ASSERT_EQUAL_SVE(p0_exp, p0.VnB()); + + int p5_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1}; + ASSERT_EQUAL_SVE(p5_exp, p5.VnB()); + int p6_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + ASSERT_EQUAL_SVE(p6_exp, p6.VnB()); + int p1_exp[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0}; + ASSERT_EQUAL_SVE(p1_exp, p1.VnB()); + } +} + +TEST_SVE(sve2_saba_uaba) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2); + + START(); + + __ Index(z0.VnB(), 0, 1); + __ Dup(z1.VnB(), 0xff); + __ Dup(z2.VnB(), 1); + __ Uaba(z2.VnB(), z2.VnB(), z0.VnB(), z1.VnB()); + __ Index(z0.VnB(), 0, -1); + + __ Index(z3.VnH(), 0, 1); + __ Index(z4.VnH(), 1, 1); + __ Uaba(z3.VnH(), z3.VnH(), z3.VnH(), z4.VnH()); + + __ Index(z5.VnS(), 3, 6); + __ Index(z6.VnS(), 5, 6); + __ Uaba(z5.VnS(), z5.VnS(), z5.VnS(), z6.VnS()); + + __ Index(z7.VnD(), 424, 12); + __ Index(z8.VnD(), 4242, 12); + __ Uaba(z7.VnD(), z7.VnD(), z7.VnD(), z8.VnD()); + + __ Index(z9.VnH(), -1, -1); + __ Dup(z10.VnB(), 0); + __ Saba(z10.VnB(), z10.VnB(), z9.VnB(), z10.VnB()); + __ Index(z11.VnH(), 0x0101, 1); + + __ Index(z12.VnH(), 0, 1); + __ Index(z13.VnH(), 0, -1); + __ Saba(z13.VnH(), z13.VnH(), z12.VnH(), z13.VnH()); + + __ Index(z14.VnS(), 0, 2); + __ Index(z15.VnS(), 0, -2); + __ Saba(z15.VnS(), z15.VnS(), z14.VnS(), z15.VnS()); + + __ Index(z16.VnD(), 0, 42); + __ Index(z17.VnD(), 0, -42); + __ Saba(z17.VnD(), z17.VnD(), z16.VnD(), z17.VnD()); + + END(); + + if (CAN_RUN()) { + RUN(); + + ASSERT_EQUAL_SVE(z0, z2); + ASSERT_EQUAL_SVE(z3, z4); + ASSERT_EQUAL_SVE(z5, z6); + ASSERT_EQUAL_SVE(z7, z8); + + ASSERT_EQUAL_SVE(z10, z11); + ASSERT_EQUAL_SVE(z12, z13); + ASSERT_EQUAL_SVE(z14, z15); + ASSERT_EQUAL_SVE(z16, z17); + } +} + +TEST_SVE(sve2_integer_multiply_long_vector) { + // The test just check Sqdmull[b|t] and Pmull[b|t], as the way how the element + // operating of the other instructions in the group are likewise. + int32_t zn_inputs_s[] = + {1, -2, 3, -4, 5, -6, 7, -8, INT32_MIN, INT32_MAX, INT32_MAX, INT32_MIN}; + + int32_t zm_inputs_s[] = + {1, 2, 3, 4, 5, 6, 7, 8, INT32_MAX, INT32_MIN, INT32_MAX, INT32_MIN}; + int64_t sqdmullb_vec_expected_d[] = + {-8, -32, -72, -128, RawbitsToInt64(0x8000000100000000), INT64_MAX}; + + uint64_t sqdmullt_vec_expected_d[] = + {2, 18, 50, 98, 0x8000000100000000, 0x7ffffffe00000002}; + + uint64_t pmullb_vec_expected_d[] = {0x00000001fffffffc, + 0x00000003fffffff0, + 0x000000020000001c, + 0x00000007ffffffc0, + 0x3fffffff80000000, + 0x4000000000000000}; + + uint64_t pmullt_vec_expected_d[] = {0x05, + 0x11, + 0x15, + 0x3fffffff80000000, + 0x1555555555555555}; + + uint64_t sqdmullb_idx_expected_d[] = {0xfffffffffffffff8, + 0xfffffffffffffff0, + 0xffffffffffffffb8, + 0xffffffffffffffa0, + 0x8000000100000000, + INT64_MAX}; + + uint64_t sqdmullt_idx_expected_d[] = + {8, // 2 * zn[11] * zm[8] = 2 * 4 * 1 + 24, // 2 * zn[9] * zm[8] = 2 * 4 * 3 + 80, // 2 * zn[7] * zm[4] = 2 * 8 * 5 + 112, // 2 * zn[5] * zm[4] = 2 * 8 * 7 + 0x7fffffffffffffff, // 2 * zn[3] * zm[0] + 0x8000000100000000}; // 2 * zn[1] * zm[0] + + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2); + START(); + + InsrHelper(&masm, z31.VnS(), zn_inputs_s); + InsrHelper(&masm, z30.VnS(), zm_inputs_s); + + __ Sqdmullb(z1.VnD(), z31.VnS(), z30.VnS()); + __ Sqdmullt(z2.VnD(), z31.VnS(), z30.VnS()); + + __ Pmullb(z3.VnD(), z31.VnS(), z30.VnS()); + __ Pmullt(z4.VnD(), z31.VnS(), z30.VnS()); + + __ Mov(z7, z30); + __ Mov(z8, z31); + __ Sqdmullb(z5.VnD(), z8.VnS(), z7.VnS(), 2); + __ Sqdmullt(z6.VnD(), z8.VnS(), z7.VnS(), 0); + + END(); + + if (CAN_RUN()) { + RUN(); + + ASSERT_EQUAL_SVE(sqdmullb_vec_expected_d, z1.VnD()); + ASSERT_EQUAL_SVE(sqdmullt_vec_expected_d, z2.VnD()); + ASSERT_EQUAL_SVE(pmullb_vec_expected_d, z3.VnD()); + ASSERT_EQUAL_SVE(pmullt_vec_expected_d, z4.VnD()); + ASSERT_EQUAL_SVE(sqdmullb_idx_expected_d, z5.VnD()); + ASSERT_EQUAL_SVE(sqdmullt_idx_expected_d, z6.VnD()); + } +} + +TEST_SVE(sve2_integer_multiply_add_long_vector) { + int32_t zn_inputs_s[] = + {1, -2, 3, -4, 5, -6, 7, -8, INT32_MIN, INT32_MAX, INT32_MAX, INT32_MIN}; + + int32_t zm_inputs_s[] = + {1, 2, 3, 4, 5, 6, 7, 8, INT32_MAX, INT32_MIN, INT32_MAX, INT32_MIN}; + + int64_t sqdmlalb_vec_expected_d[] = + {-3, -28, -69, -126, RawbitsToInt64(0x8000000100000001), INT64_MAX}; + + int64_t sqdmlalt_vec_expected_d[] = {-3, + 14, + 47, + 96, + RawbitsToInt64(0x80000000ffffffff), + static_cast<int64_t>( + 0x7ffffffe00000002)}; + + int64_t sqdmlalb_idx_expected_d[] = + {-11, // za.d[5] + 2 * zn.s[10] * zm.s[8] = 5 + 2 * -2 * 4 + -28, // za.d[4] + 2 * zn.s[8] * zm.s[8] = 4 + 2 * -4 * 4 + -93, // za.d[3] + 2 * zn.s[6] * zm.s[4] = 3 + 2 * -6 * 8 + -126, // za.d[2] + 2 * zn.s[4] * zm.s[4] = 2 + 2 * -8 * 8 + RawbitsToInt64(0x8000000100000001), + INT64_MAX}; + + int64_t sqdmlalt_idx_expected_d[] = + {1, // za.d[5] + 2 * zn.s[11] * zm.s[9] = -5 + 2 * 1 * 3 + 14, // za.d[4] + 2 * zn.s[9] * zm.s[9] = -4 + 2 * 3 * 3 + 67, // za.d[3] + 2 * zn.s[7] * zm.s[5] = -3 + 2 * 5 * 7 + 96, // za.d[2] + 2 * zn.s[5] * zm.s[5] = -2 + 2 * 7 * 7 + RawbitsToInt64(0x80000000ffffffff), + static_cast<int64_t>(0x7ffffffe00000002)}; + + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2); + START(); + + InsrHelper(&masm, z0.VnS(), zn_inputs_s); + InsrHelper(&masm, z1.VnS(), zm_inputs_s); + __ Index(z2.VnD(), 0, 1); + __ Index(z3.VnD(), 0, -1); + + __ Mov(z31, z2); + __ Sqdmlalb(z31.VnD(), z31.VnD(), z0.VnS(), z1.VnS()); + __ Mov(z30, z3); + __ Sqdmlalt(z30.VnD(), z30.VnD(), z0.VnS(), z1.VnS()); + __ Mov(z29, z31); + __ Sqdmlslb(z29.VnD(), z29.VnD(), z0.VnS(), z1.VnS()); + __ Mov(z28, z30); + __ Sqdmlslt(z28.VnD(), z28.VnD(), z0.VnS(), z1.VnS()); + + __ Sqdmlalb(z27.VnD(), z2.VnD(), z0.VnS(), z1.VnS()); + __ Sqdmlalt(z26.VnD(), z3.VnD(), z0.VnS(), z1.VnS()); + __ Sqdmlslb(z25.VnD(), z27.VnD(), z0.VnS(), z1.VnS()); + __ Sqdmlslt(z24.VnD(), z26.VnD(), z0.VnS(), z1.VnS()); + + __ Mov(z23, z2); + __ Sqdmlalb(z23.VnD(), z23.VnD(), z0.VnS(), z1.VnS(), 0); + __ Mov(z22, z3); + __ Sqdmlalt(z22.VnD(), z22.VnD(), z0.VnS(), z1.VnS(), 1); + __ Mov(z21, z23); + __ Sqdmlslb(z21.VnD(), z21.VnD(), z0.VnS(), z1.VnS(), 0); + __ Mov(z20, z22); + __ Sqdmlslt(z20.VnD(), z20.VnD(), z0.VnS(), z1.VnS(), 1); + + + END(); + + if (CAN_RUN()) { + RUN(); + + ASSERT_EQUAL_SVE(sqdmlalb_vec_expected_d, z31.VnD()); + ASSERT_EQUAL_SVE(sqdmlalt_vec_expected_d, z30.VnD()); + ASSERT_EQUAL_SVE(z2, z29); + ASSERT_EQUAL_SVE(z3, z28); + + ASSERT_EQUAL_SVE(z31, z27); + ASSERT_EQUAL_SVE(z30, z26); + ASSERT_EQUAL_SVE(z29, z25); + ASSERT_EQUAL_SVE(z28, z24); + + ASSERT_EQUAL_SVE(sqdmlalb_idx_expected_d, z23.VnD()); + ASSERT_EQUAL_SVE(sqdmlalt_idx_expected_d, z22.VnD()); + ASSERT_EQUAL_SVE(z2, z21); + ASSERT_EQUAL_SVE(z3, z20); + } +} + +TEST_SVE(sve2_ldnt1) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2); + START(); + + int data_size = kZRegMaxSizeInBytes * 4; + uint8_t* data = new uint8_t[data_size]; + for (int i = 0; i < data_size; i++) { + data[i] = i & 0xff; + } + + // Set the base half-way through the buffer so we can use negative indices. + __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2])); + __ Index(z30.VnD(), x0, 1); + __ Ptrue(p0.VnB()); + __ Punpklo(p1.VnH(), p0.VnB()); + __ Punpklo(p2.VnH(), p1.VnB()); + __ Punpklo(p3.VnH(), p2.VnB()); + __ Punpklo(p4.VnH(), p3.VnB()); + + __ Mov(x1, 1); + __ Ldnt1b(z0.VnD(), p1.Zeroing(), SVEMemOperand(z30.VnD(), x1)); + __ Ld1b(z1.VnD(), p1.Zeroing(), SVEMemOperand(x1, z30.VnD())); + + __ Mov(x1, -4); + __ Ldnt1h(z2.VnD(), p2.Zeroing(), SVEMemOperand(z30.VnD(), x1)); + __ Ld1h(z3.VnD(), p2.Zeroing(), SVEMemOperand(x1, z30.VnD())); + + __ Mov(x1, 16); + __ Ldnt1w(z4.VnD(), p3.Zeroing(), SVEMemOperand(z30.VnD(), x1)); + __ Ld1w(z5.VnD(), p3.Zeroing(), SVEMemOperand(x1, z30.VnD())); + + __ Mov(x1, -16); + __ Ldnt1d(z6.VnD(), p4.Zeroing(), SVEMemOperand(z30.VnD(), x1)); + __ Ld1d(z7.VnD(), p4.Zeroing(), SVEMemOperand(x1, z30.VnD())); + + __ Mov(x1, 1); + __ Ldnt1sb(z8.VnD(), p0.Zeroing(), SVEMemOperand(z30.VnD(), x1)); + __ Ld1sb(z9.VnD(), p0.Zeroing(), SVEMemOperand(x1, z30.VnD())); + + __ Mov(x1, -4); + __ Ldnt1sh(z10.VnD(), p2.Zeroing(), SVEMemOperand(z30.VnD(), x1)); + __ Ld1sh(z11.VnD(), p2.Zeroing(), SVEMemOperand(x1, z30.VnD())); + + __ Mov(x1, 16); + __ Ldnt1sw(z12.VnD(), p3.Zeroing(), SVEMemOperand(z30.VnD(), x1)); + __ Ld1sw(z13.VnD(), p3.Zeroing(), SVEMemOperand(x1, z30.VnD())); + + END(); + + if (CAN_RUN()) { + RUN(); + ASSERT_EQUAL_SVE(z0, z1); + ASSERT_EQUAL_SVE(z2, z3); + ASSERT_EQUAL_SVE(z4, z5); + ASSERT_EQUAL_SVE(z6, z7); + ASSERT_EQUAL_SVE(z8, z9); + ASSERT_EQUAL_SVE(z10, z11); + ASSERT_EQUAL_SVE(z12, z13); + } +} + +TEST_SVE(sve2_stnt1) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2); + START(); + + int data_size = kZRegMaxSizeInBytes * 4; + uint8_t* data = new uint8_t[data_size]; + + // Set the base half-way through the buffer so we can use negative indices. + __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2])); + __ Ptrue(p0.VnB()); + __ Punpklo(p1.VnH(), p0.VnB()); + __ Punpklo(p2.VnH(), p1.VnB()); + __ Punpklo(p3.VnH(), p2.VnB()); + __ Punpklo(p4.VnH(), p3.VnB()); + __ Dup(z0.VnB(), 0xaa); + __ Dup(z1.VnB(), 0x55); + __ Rdvl(x1, 1); + __ Mov(x3, 0); + + // Put store addresses into z30, and a small offset in x4. + __ Index(z30.VnD(), x0, 1); + __ Mov(x4, 2); + + // Store an entire vector of 0xaa to the buffer, then a smaller scatter store + // of 0x55 using Stnt1b. + __ St1b(z0.VnB(), p0, SVEMemOperand(x0, x4)); + __ Stnt1b(z1.VnD(), p0, SVEMemOperand(z30.VnD(), x4)); + + // Load the entire vector back from the buffer. + __ Ld1b(z2.VnB(), p0.Zeroing(), SVEMemOperand(x0, x4)); + + // Construct a predicate that reflects the number of bytes stored by Stnt1b, + // based on the current VL, and use Sel to obtain a reference vector for + // comparison. + __ Lsr(x2, x1, 3); + __ Whilelo(p5.VnB(), x3, x2); + __ Sel(z3.VnB(), p5.Merging(), z1.VnB(), z0.VnB()); + + // Repeat for larger element sizes. + __ Mov(x4, -4); + __ Index(z30.VnD(), x0, 2); + __ St1b(z0.VnB(), p0, SVEMemOperand(x0, x4)); + __ Stnt1h(z1.VnD(), p0, SVEMemOperand(z30.VnD(), x4)); + __ Ld1b(z4.VnB(), p0.Zeroing(), SVEMemOperand(x0, x4)); + __ Lsr(x2, x1, 2); + __ Whilelo(p5.VnB(), x3, x2); + __ Sel(z5.VnB(), p5.Merging(), z1.VnB(), z0.VnB()); + + __ Mov(x4, 16); + __ Index(z30.VnD(), x0, 4); + __ St1b(z0.VnB(), p0, SVEMemOperand(x0, x4)); + __ Stnt1w(z1.VnD(), p0, SVEMemOperand(z30.VnD(), x4)); + __ Ld1b(z6.VnB(), p0.Zeroing(), SVEMemOperand(x0, x4)); + __ Lsr(x2, x1, 1); + __ Whilelo(p5.VnB(), x3, x2); + __ Sel(z7.VnB(), p5.Merging(), z1.VnB(), z0.VnB()); + + __ Mov(x4, -16); + __ Index(z30.VnD(), x0, 8); + __ St1b(z0.VnB(), p0, SVEMemOperand(x0, x4)); + __ Stnt1d(z1.VnD(), p0, SVEMemOperand(z30.VnD(), x4)); + __ Ld1b(z8.VnB(), p0.Zeroing(), SVEMemOperand(x0, x4)); + __ Whilelo(p5.VnB(), x3, x1); + __ Sel(z9.VnB(), p5.Merging(), z1.VnB(), z0.VnB()); + END(); + + if (CAN_RUN()) { + RUN(); + ASSERT_EQUAL_SVE(z2, z3); + ASSERT_EQUAL_SVE(z4, z5); + ASSERT_EQUAL_SVE(z6, z7); + ASSERT_EQUAL_SVE(z8, z9); + } +} + +TEST_SVE(sve2_while_simple) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2); + + START(); + __ Mov(x0, 1); + __ Mov(x1, 0); + __ Mov(x2, 3); + + __ Whilehi(p0.VnB(), x0, x1); + __ Whilehs(p1.VnB(), x0, x1); + __ Whilehi(p2.VnB(), x2, x1); + __ Whilehs(p3.VnB(), x2, x1); + __ Whilehi(p4.VnB(), x2, x0); + __ Whilehs(p5.VnB(), x2, x0); + + __ Whilegt(p6.VnB(), x0, x1); + __ Whilege(p7.VnB(), x0, x1); + __ Whilegt(p8.VnB(), x2, x1); + __ Whilege(p9.VnB(), x2, x1); + __ Whilegt(p10.VnB(), x2, x0); + __ Whilege(p11.VnB(), x2, x0); + + __ Mov(x4, 0x80000000); + __ Mov(x5, 0x80000001); + __ Whilege(p12.VnB(), w5, w4); + __ Whilegt(p13.VnB(), w5, w4); + + __ Mov(x6, 0x8000000000000000); + __ Mov(x7, 0x8000000000000001); + __ Whilege(p14.VnB(), x7, x6); + __ Whilegt(p15.VnB(), x7, x6); + + for (int i = 0; i < 16; i++) { + __ Rev(PRegister(i).VnB(), PRegister(i).VnB()); + } + + END(); + + if (CAN_RUN()) { + RUN(); + int p0_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + int p1_exp[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + int p2_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1}; + int p3_exp[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + int p4_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1}; + int p5_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1}; + int p6_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + int p7_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1}; + int p8_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1}; + int p9_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1}; + int p10_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1}; + int p11_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1}; + int p12_exp[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + int p13_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + int p14_exp[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + int p15_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + + ASSERT_EQUAL_SVE(p0_exp, p0.VnB()); + ASSERT_EQUAL_SVE(p1_exp, p1.VnB()); + ASSERT_EQUAL_SVE(p2_exp, p2.VnB()); + ASSERT_EQUAL_SVE(p3_exp, p3.VnB()); + ASSERT_EQUAL_SVE(p4_exp, p4.VnB()); + ASSERT_EQUAL_SVE(p5_exp, p5.VnB()); + ASSERT_EQUAL_SVE(p6_exp, p6.VnB()); + ASSERT_EQUAL_SVE(p7_exp, p7.VnB()); + ASSERT_EQUAL_SVE(p8_exp, p8.VnB()); + ASSERT_EQUAL_SVE(p9_exp, p9.VnB()); + ASSERT_EQUAL_SVE(p10_exp, p10.VnB()); + ASSERT_EQUAL_SVE(p11_exp, p11.VnB()); + ASSERT_EQUAL_SVE(p12_exp, p12.VnB()); + ASSERT_EQUAL_SVE(p13_exp, p13.VnB()); + ASSERT_EQUAL_SVE(p14_exp, p14.VnB()); + ASSERT_EQUAL_SVE(p15_exp, p15.VnB()); + } +} + +TEST_SVE(sve2_whilerw_whilewr_simple) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2); + + START(); + __ Mov(x0, 0); + __ Mov(x1, 1); + __ Mov(x2, 3); + + __ Whilerw(p0.VnB(), x0, x0); + __ Whilerw(p1.VnB(), x0, x1); + __ Whilerw(p2.VnB(), x1, x0); + + __ Whilewr(p3.VnB(), x0, x0); + __ Whilewr(p4.VnB(), x0, x1); + __ Whilewr(p5.VnB(), x1, x0); + + __ Whilewr(p6.VnH(), x1, x1); + __ Whilewr(p7.VnH(), x1, x2); + __ Whilewr(p8.VnH(), x2, x1); + + END(); + + if (CAN_RUN()) { + RUN(); + int p0_exp[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + ASSERT_EQUAL_SVE(p0_exp, p0.VnB()); + int p1_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + ASSERT_EQUAL_SVE(p1_exp, p1.VnB()); + int p2_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + ASSERT_EQUAL_SVE(p2_exp, p2.VnB()); + int p3_exp[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + ASSERT_EQUAL_SVE(p3_exp, p3.VnB()); + int p4_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + ASSERT_EQUAL_SVE(p4_exp, p4.VnB()); + int p5_exp[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + ASSERT_EQUAL_SVE(p5_exp, p5.VnB()); + int p6_exp[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + ASSERT_EQUAL_SVE(p6_exp, p6.VnB()); + int p7_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + ASSERT_EQUAL_SVE(p7_exp, p7.VnB()); + int p8_exp[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + ASSERT_EQUAL_SVE(p8_exp, p8.VnB()); + } +} + +TEST_SVE(sve2_sqrdcmlah) { + int32_t zn_inputs[] = {-1, -2, -3, -4, 1, 2, 3, 4}; + int32_t zm_inputs[] = {-1, -2, 3, 4, 1, 2, -3, -4}; + int32_t za_inputs[] = {1, 2, 3, 4, 5, 6, 7, 8}; + int32_t zd_000_expected[] = + {1025, 2050, -6141, -8188, 1029, 2054, -6137, -8184}; + int32_t zd_090_expected[] = + {1025, -510, -6141, 4612, 1029, -506, -6137, 4616}; + int32_t zd_180_expected[] = + {-1023, -2046, 6147, 8196, -1019, -2042, 6151, 8200}; + int32_t zd_270_expected[] = + {-1023, 514, 6147, -4604, -1019, 518, 6151, -4600}; + int32_t zd_0_270_expected[] = + {2049, -1534, 6147, -4604, 2053, -1530, 6151, -4600}; + int32_t zd_3_090_expected[] = + {1025, -510, 3075, -1532, 1029, -506, 3079, -1528}; + + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2); + START(); + + InsrHelper(&masm, z0.VnS(), zn_inputs); + InsrHelper(&masm, z1.VnS(), zm_inputs); + InsrHelper(&masm, z31.VnS(), za_inputs); + + // When the value in operands is small, shift left a random value so that it + // can affect the result in destination. + int shift = 20; + __ Lsl(z0.VnS(), z0.VnS(), shift); + __ Lsl(z1.VnS(), z1.VnS(), shift); + + __ Mov(z10, z31); + __ Sqrdcmlah(z10.VnS(), z10.VnS(), z0.VnS(), z1.VnS(), 0); + + __ Mov(z11, z31); + __ Sqrdcmlah(z11.VnS(), z11.VnS(), z0.VnS(), z1.VnS(), 90); + + __ Mov(z12, z31); + __ Sqrdcmlah(z12.VnS(), z12.VnS(), z0.VnS(), z1.VnS(), 180); + + __ Mov(z13, z31); + __ Sqrdcmlah(z13.VnS(), z13.VnS(), z0.VnS(), z1.VnS(), 270); + + __ Sqrdcmlah(z14.VnS(), z31.VnS(), z0.VnS(), z1.VnS(), 0); + __ Sqrdcmlah(z15.VnS(), z31.VnS(), z0.VnS(), z1.VnS(), 90); + __ Sqrdcmlah(z16.VnS(), z31.VnS(), z0.VnS(), z1.VnS(), 180); + __ Sqrdcmlah(z17.VnS(), z31.VnS(), z0.VnS(), z1.VnS(), 270); + + __ Mov(z18, z31); + __ Sqrdcmlah(z18.VnS(), z18.VnS(), z0.VnS(), z1.VnS(), 0, 270); + + __ Mov(z19, z31); + __ Sqrdcmlah(z19.VnS(), z19.VnS(), z0.VnS(), z1.VnS(), 1, 90); + + END(); + + if (CAN_RUN()) { + RUN(); + + ASSERT_EQUAL_SVE(zd_000_expected, z10.VnS()); + ASSERT_EQUAL_SVE(zd_090_expected, z11.VnS()); + ASSERT_EQUAL_SVE(zd_180_expected, z12.VnS()); + ASSERT_EQUAL_SVE(zd_270_expected, z13.VnS()); + + ASSERT_EQUAL_SVE(z14, z10); + ASSERT_EQUAL_SVE(z15, z11); + ASSERT_EQUAL_SVE(z16, z12); + ASSERT_EQUAL_SVE(z17, z13); + + ASSERT_EQUAL_SVE(zd_0_270_expected, z18.VnS()); + ASSERT_EQUAL_SVE(zd_3_090_expected, z19.VnS()); + } +} + +TEST_SVE(sve2_sqrdmlah) { + uint16_t zn_inputs_h[] = {0x7ffe, 0x7ffd, 0x7ffd, 0x7ffd, 0x8000, + 0x7fff, 0x7ffe, 0x7ffe, 0x8001, 0x8000, + 0x7ffd, 0x7ffd, 0x7ffd, 0x5555, 0x5555, + 0x5555, 0x8000, 0x8000, 0xaaaa, 0x8001}; + + uint16_t zm_inputs_h[] = {0x7ffd, 0x7fff, 0x7ffe, 0x7ffd, 0x8001, + 0x7fff, 0x7fff, 0x7ffe, 0x8000, 0x8000, + 0xaaaa, 0x0001, 0x0001, 0xaaaa, 0xaaaa, + 0xcccc, 0x8000, 0x8000, 0x8000, 0x8001}; + + uint16_t za_inputs_h[] = {0x1010, 0x1010, 0x1010, 0x1010, 0x1010, + 0x1010, 0x1010, 0x1010, 0x8000, 0x8011, + 0x8006, 0xff7d, 0xfeff, 0xaabc, 0xaabb, + 0x9c72, 0x8000, 0x0000, 0x8000, 0xffff}; + + uint16_t zd_expected_h[] = {0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, + 0x7fff, 0x7fff, 0x7fff, 0xffff, 0x0011, + 0x8000, 0xff7e, 0xff00, 0x8000, 0x8000, + 0x8000, 0x0000, 0x7fff, 0xd556, 0x7ffd}; + + uint32_t zn_inputs_s[] = {0x04000000, + 0x80000000, + 0x04000000, + 0x80000000, + 0x80000000, + 0x80000001, + 0x7fffffff, + 0x80000000, + 0x7ffffffe, + 0x7ffffffd, + 0x7ffffffd, + 0x7ffffffd}; + + uint32_t zm_inputs_s[] = {0x00000020, + 0x80000000, + 0x00000010, + 0x80000000, + 0x7fffffff, + 0x80000000, + 0x80000000, + 0x80000001, + 0x7ffffffd, + 0x7fffffff, + 0x7ffffffe, + 0x7ffffffd}; + + uint32_t za_inputs_s[] = {0x00000000, + 0x00000000, + 0x00000020, + 0x00108000, + 0x00000000, + 0x00000001, + 0x00000000, + 0x00000001, + 0x10101010, + 0x10101010, + 0x10101010, + 0x10101010}; + + uint32_t zd_expected_s[] = {0x00000001, + 0x7fffffff, + 0x00000021, + 0x7fffffff, + 0x80000001, + 0x7fffffff, + 0x80000001, + 0x7fffffff, + 0x7fffffff, + 0x7fffffff, + 0x7fffffff, + 0x7fffffff}; + + uint64_t zn_inputs_d[] = {0x0400000000000000, 0x8000000000000000, + 0x0400000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000001, + 0x7fffffffffffffff, 0x8000000000000000, + 0x7ffffffffffffffe, 0x7ffffffffffffffd, + 0x7ffffffffffffffd, 0x7ffffffffffffffd, + 0xf1299accc9186169, 0xd529d2675ee9da21, + 0x1a10b5d60b92dcf9, 0xfb1d358e0e6455b1, + 0x8eb7721078bdc589, 0x4171509750ded141, + 0x8eb7721078bdc589, 0x4171509750ded141}; + + uint64_t zm_inputs_d[] = {0x0000000000000020, 0x8000000000000000, + 0x0000000000000010, 0x8000000000000000, + 0x7fffffffffffffff, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000001, + 0x7ffffffffffffffd, 0x7fffffffffffffff, + 0x7ffffffffffffffe, 0x7ffffffffffffffd, + 0x30b940efe73f180e, 0x3bc1ff1e52a99b66, + 0x40de5c9793535a5e, 0x24752faf47bdddb6, + 0x162663016b07e5ae, 0x1de34b56f3d22006, + 0x8eb7721078bdc589, 0x4171509750ded141}; + + uint64_t za_inputs_d[] = {0x0000000000000000, 0x0000000000000000, + 0x0000000000000020, 0x0010108000000000, + 0x0000000000000000, 0x0000000000000001, + 0x0000000000000000, 0x0000000000000001, + 0x1010101010101010, 0x1010101010101010, + 0x1010101010101010, 0x1010101010101010, + 0xb18253371b2c2c77, 0xa70de31e6645eaef, + 0xda817198c0318487, 0x9fd9e6b8e04b42ff, + 0xced1f6b7119ab197, 0x01ae051a85509b0f, + 0x01a211e9352f7927, 0x7667b70a5b13749f}; + + uint64_t zd_expected_d[] = {0x0000000000000001, 0x7fffffffffffffff, + 0x0000000000000021, 0x7fffffffffffffff, + 0x8000000000000001, 0x7fffffffffffffff, + 0x8000000000000001, 0x7fffffffffffffff, + 0x7fffffffffffffff, 0x7fffffffffffffff, + 0x7fffffffffffffff, 0x7fffffffffffffff, + 0xabdc73dea0d72a35, 0x930e3dc877301966, + 0xe7b7145a059f8a9f, 0x9e75a4a9d10cf8af, + 0xbb378528642d2581, 0x10f5e6d693ffddf3, + 0x65e455a46adc091c, 0x7fffffffffffffff}; + + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2); + START(); + + InsrHelper(&masm, z0.VnH(), zn_inputs_h); + InsrHelper(&masm, z1.VnH(), zm_inputs_h); + InsrHelper(&masm, z2.VnH(), za_inputs_h); + + __ Sqrdmlah(z2.VnH(), z2.VnH(), z0.VnH(), z1.VnH()); + + InsrHelper(&masm, z3.VnS(), zn_inputs_s); + InsrHelper(&masm, z4.VnS(), zm_inputs_s); + InsrHelper(&masm, z5.VnS(), za_inputs_s); + + __ Sqrdmlah(z5.VnS(), z5.VnS(), z3.VnS(), z4.VnS()); + + InsrHelper(&masm, z6.VnD(), zn_inputs_d); + InsrHelper(&masm, z7.VnD(), zm_inputs_d); + InsrHelper(&masm, z8.VnD(), za_inputs_d); + + __ Sqrdmlah(z8.VnD(), z8.VnD(), z6.VnD(), z7.VnD()); + + END(); + + if (CAN_RUN()) { + RUN(); + ASSERT_EQUAL_SVE(zd_expected_h, z2.VnH()); + ASSERT_EQUAL_SVE(zd_expected_s, z5.VnS()); + ASSERT_EQUAL_SVE(zd_expected_d, z8.VnD()); + } +} + +TEST_SVE(sve2_cmla) { + int32_t zn_inputs_s[] = {-2, -4, -6, -8, 2, 4, 6, 8}; + int32_t zm_inputs_s[] = {-2, -4, -6, -8, 2, 4, 6, 8}; + int32_t zda_inputs_s[] = {1, 2, 3, 4, 5, 6, 7, 8}; + int32_t zd_000_expected[] = {9, 18, 51, 68, 13, 22, 55, 72}; + int32_t zd_090_expected[] = {9, -2, 51, -32, 13, 2, 55, -28}; + int32_t zd_180_expected[] = {-7, -14, -45, -60, -3, -10, -41, -56}; + int32_t zd_270_expected[] = {-7, 6, -45, 40, -3, 10, -41, 44}; + + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2); + START(); + + InsrHelper(&masm, z31.VnS(), zn_inputs_s); + InsrHelper(&masm, z30.VnS(), zm_inputs_s); + + InsrHelper(&masm, z0.VnS(), zda_inputs_s); + __ Mov(z29, z0); + __ Cmla(z0.VnS(), z0.VnS(), z31.VnS(), z30.VnS(), 0); + + InsrHelper(&masm, z1.VnS(), zda_inputs_s); + __ Mov(z28, z1); + __ Cmla(z1.VnS(), z1.VnS(), z31.VnS(), z30.VnS(), 90); + + InsrHelper(&masm, z2.VnS(), zda_inputs_s); + __ Mov(z27, z2); + __ Cmla(z2.VnS(), z2.VnS(), z31.VnS(), z30.VnS(), 180); + + InsrHelper(&masm, z3.VnS(), zda_inputs_s); + __ Mov(z26, z3); + __ Cmla(z3.VnS(), z3.VnS(), z31.VnS(), z30.VnS(), 270); + + __ Cmla(z4.VnS(), z29.VnS(), z31.VnS(), z30.VnS(), 0); + __ Cmla(z5.VnS(), z28.VnS(), z31.VnS(), z30.VnS(), 90); + __ Cmla(z6.VnS(), z27.VnS(), z31.VnS(), z30.VnS(), 180); + __ Cmla(z7.VnS(), z26.VnS(), z31.VnS(), z30.VnS(), 270); + + END(); + + if (CAN_RUN()) { + RUN(); + + ASSERT_EQUAL_SVE(zd_000_expected, z0.VnS()); + ASSERT_EQUAL_SVE(zd_090_expected, z1.VnS()); + ASSERT_EQUAL_SVE(zd_180_expected, z2.VnS()); + ASSERT_EQUAL_SVE(zd_270_expected, z3.VnS()); + + ASSERT_EQUAL_SVE(z4, z0); + ASSERT_EQUAL_SVE(z5, z1); + ASSERT_EQUAL_SVE(z6, z2); + ASSERT_EQUAL_SVE(z7, z3); + } +} + +TEST_SVE(sve2_integer_saturating_multiply_add_long) { + int32_t zn_bottom_inputs[] = + {-2, -4, -6, -8, INT32_MAX, INT32_MIN, INT32_MIN}; + + int32_t zm_top_inputs[] = {1, 3, 5, 7, INT32_MAX, INT32_MAX, INT32_MIN}; + + int64_t sqdmlalbt_expected[] = {2, + -19, + -56, + -109, + static_cast<int64_t>(0x7ffffffe00000004), + RawbitsToInt64(0x8000000100000001), + INT64_MAX}; + + int64_t sqdmlslbt_expected[] = {-2, + 19, + 56, + 109, + RawbitsToInt64(0x80000001fffffffc), + static_cast<int64_t>(0x7ffffffeffffffff), + RawbitsToInt64(0x8000000000000001)}; + + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2); + START(); + + InsrHelper(&masm, z31.VnS(), zn_bottom_inputs); + InsrHelper(&masm, z30.VnS(), zm_top_inputs); + + __ Dup(z29.VnD(), 0); + __ Zip1(z31.VnS(), z31.VnS(), z29.VnS()); + __ Zip1(z30.VnS(), z29.VnS(), z30.VnS()); + + // Initialise inputs for za. + __ Index(z1.VnD(), 0, 1); + __ Index(z2.VnD(), 0, -1); + + __ Sqdmlalbt(z1.VnD(), z1.VnD(), z31.VnS(), z30.VnS()); + __ Sqdmlslbt(z2.VnD(), z2.VnD(), z31.VnS(), z30.VnS()); + + END(); + + if (CAN_RUN()) { + RUN(); + + ASSERT_EQUAL_SVE(sqdmlalbt_expected, z1.VnD()); + ASSERT_EQUAL_SVE(sqdmlslbt_expected, z2.VnD()); + } +} + +TEST_SVE(sve2_floating_point_multiply_add_long_vector) { + uint16_t zn_inputs[] = {Float16ToRawbits(Float16(1000)), + Float16ToRawbits(Float16(2000)), + Float16ToRawbits(Float16(0.5)), + Float16ToRawbits(Float16(-0.5)), + Float16ToRawbits(Float16(14)), + Float16ToRawbits(Float16(-14)), + Float16ToRawbits(kFP16PositiveInfinity), + Float16ToRawbits(kFP16NegativeInfinity)}; + + uint16_t zm_inputs[] = {Float16ToRawbits(Float16(10)), + Float16ToRawbits(Float16(-10)), + Float16ToRawbits(Float16(10)), + Float16ToRawbits(Float16(-10)), + Float16ToRawbits(Float16(10)), + Float16ToRawbits(Float16(-10)), + Float16ToRawbits(Float16(10)), + Float16ToRawbits(Float16(-10))}; + + uint32_t za_inputs[] = {FloatToRawbits(1.0f), + FloatToRawbits(-1.0f), + FloatToRawbits(1.0f), + FloatToRawbits(-1.0f)}; + + uint32_t fmlalb_zd_expected[] = {0xc69c3e00, // -19999 + 0x40800000, // 4 + 0x430d0000, // 141 + FloatToRawbits(kFP32PositiveInfinity)}; + + uint32_t fmlalt_zd_expected[] = {0x461c4400, // 10001 + 0x40800000, // 4 + 0x430d0000, // 141 + FloatToRawbits(kFP32PositiveInfinity)}; + + uint32_t fmlslb_zd_expected[] = {0x469c4200, // 20001 + 0xc0c00000, // -6 + 0xc30b0000, // -139 + FloatToRawbits(kFP32NegativeInfinity)}; + + uint32_t fmlslt_zd_expected[] = {0xc61c3c00, // -9999 + 0xc0c00000, // -6 + 0xc30b0000, // -139 + FloatToRawbits(kFP32NegativeInfinity)}; + + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2); + START(); + + InsrHelper(&masm, z31.VnH(), zn_inputs); + InsrHelper(&masm, z30.VnH(), zm_inputs); + InsrHelper(&masm, z29.VnS(), za_inputs); + + __ Mov(z0, z29); + __ Fmlalb(z0.VnS(), z0.VnS(), z31.VnH(), z30.VnH()); + + __ Mov(z1, z29); + __ Fmlalt(z1.VnS(), z1.VnS(), z31.VnH(), z30.VnH()); + + __ Mov(z2, z29); + __ Fmlslb(z2.VnS(), z2.VnS(), z31.VnH(), z30.VnH()); + + __ Mov(z3, z29); + __ Fmlslt(z3.VnS(), z3.VnS(), z31.VnH(), z30.VnH()); + + __ Fmlalb(z4.VnS(), z29.VnS(), z31.VnH(), z30.VnH()); + __ Fmlalt(z5.VnS(), z29.VnS(), z31.VnH(), z30.VnH()); + __ Fmlslb(z6.VnS(), z29.VnS(), z31.VnH(), z30.VnH()); + __ Fmlslt(z7.VnS(), z29.VnS(), z31.VnH(), z30.VnH()); + + END(); + + if (CAN_RUN()) { + RUN(); + + ASSERT_EQUAL_SVE(fmlalb_zd_expected, z0.VnS()); + ASSERT_EQUAL_SVE(fmlalt_zd_expected, z1.VnS()); + ASSERT_EQUAL_SVE(fmlslb_zd_expected, z2.VnS()); + ASSERT_EQUAL_SVE(fmlslt_zd_expected, z3.VnS()); + + ASSERT_EQUAL_SVE(z4, z0); + ASSERT_EQUAL_SVE(z5, z1); + ASSERT_EQUAL_SVE(z6, z2); + ASSERT_EQUAL_SVE(z7, z3); + } +} + +TEST_SVE(sve2_flogb_simple) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2); + + START(); + __ Ptrue(p0.VnB()); + __ Index(z0.VnS(), -4, 1); + __ Mov(z1.VnS(), 0); + __ Mov(z2.VnD(), 0x000fffffffffffff); + __ Mov(z3.VnD(), 0x0010000000000000); + __ Scvtf(z0.VnS(), p0.Merging(), z0.VnS()); + __ Scvtf(z1.VnS(), p0.Merging(), z1.VnS()); + __ Fdiv(z1.VnS(), p0.Merging(), z0.VnS(), z1.VnS()); + __ Flogb(z0.VnS(), p0.Merging(), z0.VnS()); + __ Flogb(z1.VnS(), p0.Merging(), z1.VnS()); + __ Flogb(z2.VnD(), p0.Merging(), z2.VnD()); + __ Flogb(z3.VnD(), p0.Merging(), z3.VnD()); + END(); + + if (CAN_RUN()) { + RUN(); + uint64_t expected_z0[] = {0x0000000200000002, + 0x0000000200000002, + 0x0000000100000001, + 0x0000000080000000, + 0x0000000000000001, + 0x0000000100000002}; + ASSERT_EQUAL_SVE(expected_z0, z0.VnD()); + + uint64_t expected_z1[] = {0x7fffffff7fffffff, + 0x7fffffff7fffffff, + 0x7fffffff7fffffff, + 0x7fffffff80000000, + 0x7fffffff7fffffff, + 0x7fffffff7fffffff}; + ASSERT_EQUAL_SVE(expected_z1, z1.VnD()); + + uint64_t expected_z2[] = {0xfffffffffffffc01, + 0xfffffffffffffc01, + 0xfffffffffffffc01, + 0xfffffffffffffc01}; + ASSERT_EQUAL_SVE(expected_z2, z2.VnD()); + + uint64_t expected_z3[] = {0xfffffffffffffc02, + 0xfffffffffffffc02, + 0xfffffffffffffc02, + 0xfffffffffffffc02}; + ASSERT_EQUAL_SVE(expected_z3, z3.VnD()); + } +} + +TEST_SVE(neon_matmul) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVEI8MM, + CPUFeatures::kNEON, + CPUFeatures::kI8MM); + + // Test Neon integer matrix multiply against SVE. + START(); + __ Movi(v0.V2D(), 0xffeeddccbbaa9988, 0x77665544332211); + __ Movi(v1.V2D(), 0xaa5555aa55555555, 0x55aaaa55aaaaaa); + __ Movi(v2.V2D(), 0, 0); + __ Movi(v3.V2D(), 0, 0); + __ Movi(v4.V2D(), 0, 0); + __ Movi(v5.V2D(), 0, 0); + __ Movi(v6.V2D(), 0, 0); + __ Movi(v7.V2D(), 0, 0); + + __ Smmla(v2.V4S(), v0.V16B(), v1.V16B()); + __ Smmla(z3.VnS(), z3.VnS(), z0.VnB(), z1.VnB()); + __ Ummla(v4.V4S(), v0.V16B(), v1.V16B()); + __ Ummla(z5.VnS(), z5.VnS(), z0.VnB(), z1.VnB()); + __ Usmmla(v6.V4S(), v0.V16B(), v1.V16B()); + __ Usmmla(z7.VnS(), z7.VnS(), z0.VnB(), z1.VnB()); + END(); + + if (CAN_RUN()) { + RUN(); + + // The inputs as Z registers are zero beyond the least-significant 128 bits, + // so the Neon and SVE results should be equal for any VL. + ASSERT_EQUAL_SVE(z3, z2); + ASSERT_EQUAL_SVE(z5, z4); + ASSERT_EQUAL_SVE(z7, z6); + } +} + +TEST_SVE(sudot_usdot) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kSVEI8MM); + + START(); + __ Ptrue(p0.VnB()); + __ Index(z0.VnS(), -424242, 77777); + __ Index(z1.VnB(), 127, -1); + __ Sqabs(z1.VnB(), p0.Merging(), z1.VnB()); + __ Index(z2.VnB(), 0, 1); + __ Sqabs(z2.VnB(), p0.Merging(), z2.VnB()); + __ Index(z3.VnB(), -128, 1); + __ Mov(z4.VnD(), 0); + + // Test Usdot against Udot/Sdot over the range of inputs where they should be + // equal. + __ Usdot(z5.VnS(), z0.VnS(), z1.VnB(), z2.VnB()); + __ Udot(z6.VnS(), z0.VnS(), z1.VnB(), z2.VnB()); + __ Usdot(z7.VnS(), z0.VnS(), z1.VnB(), z3.VnB()); + __ Sdot(z8.VnS(), z0.VnS(), z1.VnB(), z3.VnB()); + + // Construct values which, when interpreted correctly as signed/unsigned, + // should give a zero result for dot product. + __ Mov(z10.VnS(), 0x8101ff40); // [-127, 1, -1, 64] as signed bytes. + __ Mov(z11.VnS(), 0x02fe8002); // [2, 254, 128, 2] as unsigned bytes. + __ Usdot(z12.VnS(), z4.VnS(), z11.VnB(), z10.VnB()); + __ Usdot(z13.VnS(), z4.VnS(), z10.VnB(), z11.VnB()); + + // Construct a vector with duplicated values across segments. This allows + // testing indexed dot product against the already tested variant. + __ Mov(z14.VnS(), 1); + __ Mul(z15.VnS(), z14.VnS(), z3.VnS(), 1); + + __ Usdot(z16.VnS(), z0.VnS(), z3.VnB(), z3.VnB(), 1); + __ Usdot(z17.VnS(), z0.VnS(), z3.VnB(), z15.VnB()); + __ Sudot(z18.VnS(), z0.VnS(), z3.VnB(), z3.VnB(), 1); + __ Usdot(z19.VnS(), z0.VnS(), z15.VnB(), z3.VnB()); + END(); + + if (CAN_RUN()) { + RUN(); + ASSERT_EQUAL_SVE(z6, z5); + ASSERT_EQUAL_SVE(z8, z7); + ASSERT_EQUAL_SVE(z4, z12); + + uint64_t z13_expected[] = {0xffff8200ffff8200, 0xffff8200ffff8200}; + ASSERT_EQUAL_SVE(z13_expected, z13.VnD()); + + ASSERT_EQUAL_SVE(z17, z16); + ASSERT_EQUAL_SVE(z19, z18); + } +} + +// Manually constructed simulator test to avoid creating a VL128 variant. + +#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 +void Testsve_fmatmul(Test* config) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM); + + // Only double-precision matrix multiply is tested here. Single-precision is + // tested in the simulator tests using a generated sequence. The (templated) + // code used in the simulator for both cases is the same, which is why the + // tests here don't need to be comprehensive. + START(); + Label vl_too_short; + __ Rdvl(x0, 1); + __ Cmp(x0, 32); + __ B(lt, &vl_too_short); // Skip testing VL128. + + __ Fdup(z0.VnD(), 1.0); + __ Fdup(z1.VnD(), 2.0); + __ Mov(z2.VnD(), 0); + + // Build 2x2 identity matrix in z3. + Label iden_loop; + __ Lsr(x0, x0, 5); + __ Bind(&iden_loop); + __ Insr(z3.VnD(), d0); + __ Insr(z3.VnD(), d2); + __ Insr(z3.VnD(), d2); + __ Insr(z3.VnD(), d0); + __ Sub(x0, x0, 1); + __ Cbnz(x0, &iden_loop); + + __ Fmmla(z1.VnD(), z1.VnD(), z0.VnD(), z0.VnD()); + __ Fmmla(z2.VnD(), z2.VnD(), z1.VnD(), z3.VnD()); + + __ Ptrue(p0.VnB()); + __ Index(z4.VnD(), -8, 3); + __ Scvtf(z4.VnD(), p0.Merging(), z4.VnD()); + __ Mov(z5.VnD(), 0); + __ Fmmla(z4.VnD(), z4.VnD(), z4.VnD(), z4.VnD()); + __ Fmmla(z5.VnD(), z5.VnD(), z4.VnD(), z3.VnD()); + + __ Bind(&vl_too_short); + END(); + + if (CAN_RUN()) { + RUN(); + + int vl = core.GetSVELaneCount(kBRegSize) * 8; + if (vl >= 256) { + ASSERT_EQUAL_SVE(z1, z2); + ASSERT_EQUAL_SVE(z4, z5); + + switch (vl) { + case 256: + case 384: { + // All results are 4.0 (1 * 1 + 2). Results for elements beyond a VL + // that's a multiple of 256 bits should be zero. + uint64_t z1_expected[] = {0x0000000000000000, + 0x0000000000000000, + 0x4010000000000000, + 0x4010000000000000, + 0x4010000000000000, + 0x4010000000000000}; + ASSERT_EQUAL_SVE(z1_expected, z1.VnD()); + + uint64_t z4_expected[] = {0x0000000000000000, + 0x0000000000000000, + 0x4018000000000000, // 6.0 + 0x4022000000000000, // 9.0 + 0x4018000000000000, // 6.0 + 0x4054400000000000}; // 81.0 + ASSERT_EQUAL_SVE(z4_expected, z4.VnD()); + break; + } + case 2048: { + uint64_t z1_expected[] = + {0x4010000000000000, 0x4010000000000000, 0x4010000000000000, + 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, + 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, + 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, + 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, + 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, + 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, + 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, + 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, + 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, + 0x4010000000000000, 0x4010000000000000}; + ASSERT_EQUAL_SVE(z1_expected, z1.VnD()); + + uint64_t z4_expected[] = { + 0x40cb690000000000, 0x40c9728000000000, 0x40c9710000000000, + 0x40c79e8000000000, 0x40c41f0000000000, 0x40c2708000000000, + 0x40c26f0000000000, 0x40c0e48000000000, 0x40bbea0000000000, + 0x40b91d0000000000, 0x40b91a0000000000, 0x40b6950000000000, + 0x40b1d60000000000, 0x40af320000000000, 0x40af2c0000000000, + 0x40ab420000000000, 0x40a4040000000000, 0x40a0aa0000000000, + 0x40a0a40000000000, 0x409bb40000000000, 0x4091b80000000000, + 0x408a880000000000, 0x408a700000000000, 0x4083c80000000000, + 0x4071a00000000000, 0x4061a00000000000, 0x4061400000000000, + 0x4051400000000000, 0x4018000000000000, 0x4022000000000000, + 0x4018000000000000, 0x4054400000000000, + }; + ASSERT_EQUAL_SVE(z4_expected, z4.VnD()); + break; + } + default: + printf("WARNING: Some tests skipped due to unexpected VL.\n"); + break; + } + } + } +} +Test* test_sve_fmatmul_list[] = + {Test::MakeSVETest(256, "AARCH64_ASM_sve_fmatmul_vl256", &Testsve_fmatmul), + Test::MakeSVETest(384, "AARCH64_ASM_sve_fmatmul_vl384", &Testsve_fmatmul), + Test::MakeSVETest(2048, + "AARCH64_ASM_sve_fmatmul_vl2048", + &Testsve_fmatmul)}; + +void Testsve_ld1ro(Test* config) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM); + START(); + + int data_size = (kQRegSizeInBytes + 128) * 4; + uint8_t* data = new uint8_t[data_size]; + for (int i = 0; i < data_size; i++) { + data[i] = i & 0xff; + } + + // Set the base to just past half-way through the buffer so we can use + // negative indices. + __ Mov(x0, reinterpret_cast<uintptr_t>(&data[7 + data_size / 2])); + + __ Index(z0.VnB(), 0, 1); + __ Ptrue(p0.VnB()); + __ Cmplo(p0.VnB(), p0.Zeroing(), z0.VnB(), 4); + __ Pfalse(p1.VnB()); + __ Zip1(p1.VnB(), p0.VnB(), p1.VnB()); + __ Ptrue(p2.VnB()); + + __ Mov(x1, -32); + __ Ld1rob(z0.VnB(), p1.Zeroing(), SVEMemOperand(x0, -32)); + __ Ld1rob(z1.VnB(), p1.Zeroing(), SVEMemOperand(x0, x1)); + + __ Mov(x1, 64 / 2); + __ Ld1roh(z2.VnH(), p2.Zeroing(), SVEMemOperand(x0, 64)); + __ Ld1roh(z3.VnH(), p2.Zeroing(), SVEMemOperand(x0, x1, LSL, 1)); + + __ Mov(x1, -96 / 4); + __ Ld1row(z4.VnS(), p2.Zeroing(), SVEMemOperand(x0, -96)); + __ Ld1row(z5.VnS(), p2.Zeroing(), SVEMemOperand(x0, x1, LSL, 2)); + + __ Mov(x1, 128 / 8); + __ Ld1rod(z6.VnD(), p2.Zeroing(), SVEMemOperand(x0, 128)); + __ Ld1rod(z7.VnD(), p2.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)); + + // Check that all 256-bit segments match by rotating the vector by one + // segment, eoring, and orring across the vector. + __ Dup(z11.VnQ(), z0.VnQ(), 2); + __ Mov(z8, z0); + __ Ext(z8.VnB(), z8.VnB(), z8.VnB(), 32); + __ Eor(z8.VnB(), z8.VnB(), z0.VnB()); + __ Orv(b9, p2, z8.VnB()); + + __ Mov(z8, z2); + __ Ext(z8.VnB(), z8.VnB(), z8.VnB(), 32); + __ Eor(z8.VnB(), z8.VnB(), z2.VnB()); + __ Orv(b8, p2, z8.VnB()); + __ Orr(z9, z9, z8); + + __ Mov(z8, z4); + __ Ext(z8.VnB(), z8.VnB(), z8.VnB(), 32); + __ Eor(z8.VnB(), z8.VnB(), z4.VnB()); + __ Orv(b8, p2, z8.VnB()); + __ Orr(z9, z9, z8); + + __ Mov(z8, z6); + __ Ext(z8.VnB(), z8.VnB(), z8.VnB(), 32); + __ Eor(z8.VnB(), z8.VnB(), z6.VnB()); + __ Orv(b8, p2, z8.VnB()); + __ Orr(z9, z9, z8); + + END(); + + if (CAN_RUN()) { + RUN(); + + int vl = core.GetSVELaneCount(kBRegSize) * 8; + if (vl >= 256) { + ASSERT_EQUAL_SVE(z0, z1); + ASSERT_EQUAL_SVE(z2, z3); + ASSERT_EQUAL_SVE(z4, z5); + ASSERT_EQUAL_SVE(z6, z7); + + switch (vl) { + case 256: + case 2048: { + // Check the result of the rotate/eor sequence. + uint64_t expected_z9[] = {0, 0}; + ASSERT_EQUAL_SVE(expected_z9, z9.VnD()); + break; + } + case 384: { + // For non-multiple-of-256 VL, the top 128-bits must be zero, which + // breaks the rotate/eor sequence. Check the results explicitly. + uint64_t z0_expected[] = {0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + 0x000d000b00090007}; + uint64_t z2_expected[] = {0x0000000000000000, + 0x0000000000000000, + 0x868584838281807f, + 0x7e7d7c7b7a797877, + 0x767574737271706f, + 0x6e6d6c6b6a696867}; + uint64_t z4_expected[] = {0x0000000000000000, + 0x0000000000000000, + 0xe6e5e4e3e2e1e0df, + 0xdedddcdbdad9d8d7, + 0xd6d5d4d3d2d1d0cf, + 0xcecdcccbcac9c8c7}; + uint64_t z6_expected[] = {0x0000000000000000, + 0x0000000000000000, + 0xc6c5c4c3c2c1c0bf, + 0xbebdbcbbbab9b8b7, + 0xb6b5b4b3b2b1b0af, + 0xaeadacabaaa9a8a7}; + ASSERT_EQUAL_SVE(z0_expected, z0.VnD()); + ASSERT_EQUAL_SVE(z2_expected, z2.VnD()); + ASSERT_EQUAL_SVE(z4_expected, z4.VnD()); + ASSERT_EQUAL_SVE(z6_expected, z6.VnD()); + break; + } + default: + printf("WARNING: Some tests skipped due to unexpected VL.\n"); + break; + } + } + } +} +Test* test_sve_ld1ro_list[] = + {Test::MakeSVETest(256, "AARCH64_ASM_sve_ld1ro_vl256", &Testsve_ld1ro), + Test::MakeSVETest(384, "AARCH64_ASM_sve_ld1ro_vl384", &Testsve_ld1ro), + Test::MakeSVETest(2048, "AARCH64_ASM_sve_ld1ro_vl2048", &Testsve_ld1ro)}; +#endif + } // namespace aarch64 } // namespace vixl diff --git a/test/aarch64/test-disasm-aarch64.cc b/test/aarch64/test-disasm-aarch64.cc index 4c5f5951..553168c8 100644 --- a/test/aarch64/test-disasm-aarch64.cc +++ b/test/aarch64/test-disasm-aarch64.cc @@ -133,6 +133,15 @@ TEST(move_immediate) { COMPARE(movn(x19, 0x5555, 32), "mov x19, #0xffffaaaaffffffff"); COMPARE(movn(x20, 0xaaaa, 48), "mov x20, #0x5555ffffffffffff"); + COMPARE(mov(w14, 0x1234), "mov w14, #0x1234"); + COMPARE(mov(x15, 0xabcd0000), "mov x15, #0xabcd0000"); + COMPARE(mov(x16, 0xaaaa000000000000), "mov x16, #0xaaaa000000000000"); + COMPARE(mov(w17, 0xaaaaffff), "mov w17, #0xaaaaffff"); + COMPARE(mov(x18, 0xffffaaaaffffffff), "mov x18, #0xffffaaaaffffffff"); + COMPARE(mov(x19, 0xffffffffffffffff), "mov x19, #0xffffffffffffffff"); + COMPARE(mov(x20, 0xc001c001c001c001), "mov x20, #0xc001c001c001c001"); + COMPARE(mov(sp, 0xfefefefefefefefe), "mov sp, #0xfefefefefefefefe"); + COMPARE(movk(w21, 0), "movk w21, #0x0"); COMPARE(movk(x22, 0, 0), "movk x22, #0x0"); COMPARE(movk(w23, 0, 16), "movk w23, #0x0, lsl #16"); @@ -560,6 +569,19 @@ TEST(bitfield) { CLEANUP(); } +TEST(bitfield_regression_test) { + SETUP(); + + COMPARE(dci(0x533ae450), "unallocated (Unallocated)"); + COMPARE(dci(0x133c464c), "unallocated (Unallocated)"); + COMPARE(dci(0x133c4e6d), "unallocated (Unallocated)"); + COMPARE(dci(0x133c5e45), "unallocated (Unallocated)"); + COMPARE(dci(0x1335853c), "unallocated (Unallocated)"); + COMPARE(dci(0x1335a73d), "unallocated (Unallocated)"); + + CLEANUP(); +} + TEST(crc32b) { SETUP(); @@ -2919,8 +2941,8 @@ TEST(barriers) { COMPARE_MACRO(Dsb(FullSystem, BarrierOther), "dsb sy (0b1100)"); COMPARE_MACRO(Dsb(InnerShareable, BarrierOther), "dsb sy (0b1000)"); - COMPARE_MACRO(Dsb(NonShareable, BarrierOther), "dsb sy (0b0100)"); - COMPARE_MACRO(Dsb(OuterShareable, BarrierOther), "dsb sy (0b0000)"); + COMPARE_MACRO(Dsb(NonShareable, BarrierOther), "pssbb"); + COMPARE_MACRO(Dsb(OuterShareable, BarrierOther), "ssbb"); // ISB COMPARE_MACRO(Isb(), "isb"); @@ -3021,7 +3043,7 @@ TEST(hint) { COMPARE(hint(WFI), "wfi"); COMPARE(hint(SEV), "sev"); COMPARE(hint(SEVL), "sevl"); - COMPARE(hint(6), "hint #6"); + COMPARE(hint(6), "dgh"); COMPARE(hint(ESB), "esb"); COMPARE(hint(CSDB), "csdb"); COMPARE(hint(42), "hint #42"); @@ -3077,5 +3099,634 @@ TEST(udf) { CLEANUP(); } +TEST(architecture_features) { + SETUP(); + + // ARMv8.1 - LOR + COMPARE_PREFIX(dci(0x08800000), "stllrb"); // STLLRB_SL32_ldstexcl + COMPARE_PREFIX(dci(0x08c00000), "ldlarb"); // LDLARB_LR32_ldstexcl + COMPARE_PREFIX(dci(0x48800000), "stllrh"); // STLLRH_SL32_ldstexcl + COMPARE_PREFIX(dci(0x48c00000), "ldlarh"); // LDLARH_LR32_ldstexcl + COMPARE_PREFIX(dci(0x88800000), "stllr"); // STLLR_SL32_ldstexcl + COMPARE_PREFIX(dci(0x88c00000), "ldlar"); // LDLAR_LR32_ldstexcl + COMPARE_PREFIX(dci(0xc8800000), "stllr"); // STLLR_SL64_ldstexcl + COMPARE_PREFIX(dci(0xc8c00000), "ldlar"); // LDLAR_LR64_ldstexcl + + // ARMv8.1 - LSE + COMPARE_PREFIX(dci(0x08207c00), "casp"); // CASP_CP32_ldstexcl + COMPARE_PREFIX(dci(0x0820fc00), "caspl"); // CASPL_CP32_ldstexcl + COMPARE_PREFIX(dci(0x08607c00), "caspa"); // CASPA_CP32_ldstexcl + COMPARE_PREFIX(dci(0x0860fc00), "caspal"); // CASPAL_CP32_ldstexcl + COMPARE_PREFIX(dci(0x08a07c00), "casb"); // CASB_C32_ldstexcl + COMPARE_PREFIX(dci(0x08a0fc00), "caslb"); // CASLB_C32_ldstexcl + COMPARE_PREFIX(dci(0x08e07c00), "casab"); // CASAB_C32_ldstexcl + COMPARE_PREFIX(dci(0x08e0fc00), "casalb"); // CASALB_C32_ldstexcl + COMPARE_PREFIX(dci(0x38200000), "ldaddb"); // LDADDB_32_memop + COMPARE_PREFIX(dci(0x38201000), "ldclrb"); // LDCLRB_32_memop + COMPARE_PREFIX(dci(0x38202000), "ldeorb"); // LDEORB_32_memop + COMPARE_PREFIX(dci(0x38203000), "ldsetb"); // LDSETB_32_memop + COMPARE_PREFIX(dci(0x38204000), "ldsmaxb"); // LDSMAXB_32_memop + COMPARE_PREFIX(dci(0x38205000), "ldsminb"); // LDSMINB_32_memop + COMPARE_PREFIX(dci(0x38206000), "ldumaxb"); // LDUMAXB_32_memop + COMPARE_PREFIX(dci(0x38207000), "lduminb"); // LDUMINB_32_memop + COMPARE_PREFIX(dci(0x38208000), "swpb"); // SWPB_32_memop + COMPARE_PREFIX(dci(0x38600000), "ldaddlb"); // LDADDLB_32_memop + COMPARE_PREFIX(dci(0x38601000), "ldclrlb"); // LDCLRLB_32_memop + COMPARE_PREFIX(dci(0x38602000), "ldeorlb"); // LDEORLB_32_memop + COMPARE_PREFIX(dci(0x38603000), "ldsetlb"); // LDSETLB_32_memop + COMPARE_PREFIX(dci(0x38604000), "ldsmaxlb"); // LDSMAXLB_32_memop + COMPARE_PREFIX(dci(0x38605000), "ldsminlb"); // LDSMINLB_32_memop + COMPARE_PREFIX(dci(0x38606000), "ldumaxlb"); // LDUMAXLB_32_memop + COMPARE_PREFIX(dci(0x38607000), "lduminlb"); // LDUMINLB_32_memop + COMPARE_PREFIX(dci(0x38608000), "swplb"); // SWPLB_32_memop + COMPARE_PREFIX(dci(0x38a00000), "ldaddab"); // LDADDAB_32_memop + COMPARE_PREFIX(dci(0x38a01000), "ldclrab"); // LDCLRAB_32_memop + COMPARE_PREFIX(dci(0x38a02000), "ldeorab"); // LDEORAB_32_memop + COMPARE_PREFIX(dci(0x38a03000), "ldsetab"); // LDSETAB_32_memop + COMPARE_PREFIX(dci(0x38a04000), "ldsmaxab"); // LDSMAXAB_32_memop + COMPARE_PREFIX(dci(0x38a05000), "ldsminab"); // LDSMINAB_32_memop + COMPARE_PREFIX(dci(0x38a06000), "ldumaxab"); // LDUMAXAB_32_memop + COMPARE_PREFIX(dci(0x38a07000), "lduminab"); // LDUMINAB_32_memop + COMPARE_PREFIX(dci(0x38a08000), "swpab"); // SWPAB_32_memop + COMPARE_PREFIX(dci(0x38e00000), "ldaddalb"); // LDADDALB_32_memop + COMPARE_PREFIX(dci(0x38e01000), "ldclralb"); // LDCLRALB_32_memop + COMPARE_PREFIX(dci(0x38e02000), "ldeoralb"); // LDEORALB_32_memop + COMPARE_PREFIX(dci(0x38e03000), "ldsetalb"); // LDSETALB_32_memop + COMPARE_PREFIX(dci(0x38e04000), "ldsmaxalb"); // LDSMAXALB_32_memop + COMPARE_PREFIX(dci(0x38e05000), "ldsminalb"); // LDSMINALB_32_memop + COMPARE_PREFIX(dci(0x38e06000), "ldumaxalb"); // LDUMAXALB_32_memop + COMPARE_PREFIX(dci(0x38e07000), "lduminalb"); // LDUMINALB_32_memop + COMPARE_PREFIX(dci(0x38e08000), "swpalb"); // SWPALB_32_memop + COMPARE_PREFIX(dci(0x48207c00), "casp"); // CASP_CP64_ldstexcl + COMPARE_PREFIX(dci(0x4820fc00), "caspl"); // CASPL_CP64_ldstexcl + COMPARE_PREFIX(dci(0x48607c00), "caspa"); // CASPA_CP64_ldstexcl + COMPARE_PREFIX(dci(0x4860fc00), "caspal"); // CASPAL_CP64_ldstexcl + COMPARE_PREFIX(dci(0x48a07c00), "cash"); // CASH_C32_ldstexcl + COMPARE_PREFIX(dci(0x48a0fc00), "caslh"); // CASLH_C32_ldstexcl + COMPARE_PREFIX(dci(0x48e07c00), "casah"); // CASAH_C32_ldstexcl + COMPARE_PREFIX(dci(0x48e0fc00), "casalh"); // CASALH_C32_ldstexcl + COMPARE_PREFIX(dci(0x78200000), "ldaddh"); // LDADDH_32_memop + COMPARE_PREFIX(dci(0x78201000), "ldclrh"); // LDCLRH_32_memop + COMPARE_PREFIX(dci(0x78202000), "ldeorh"); // LDEORH_32_memop + COMPARE_PREFIX(dci(0x78203000), "ldseth"); // LDSETH_32_memop + COMPARE_PREFIX(dci(0x78204000), "ldsmaxh"); // LDSMAXH_32_memop + COMPARE_PREFIX(dci(0x78205000), "ldsminh"); // LDSMINH_32_memop + COMPARE_PREFIX(dci(0x78206000), "ldumaxh"); // LDUMAXH_32_memop + COMPARE_PREFIX(dci(0x78207000), "lduminh"); // LDUMINH_32_memop + COMPARE_PREFIX(dci(0x78208000), "swph"); // SWPH_32_memop + COMPARE_PREFIX(dci(0x78600000), "ldaddlh"); // LDADDLH_32_memop + COMPARE_PREFIX(dci(0x78601000), "ldclrlh"); // LDCLRLH_32_memop + COMPARE_PREFIX(dci(0x78602000), "ldeorlh"); // LDEORLH_32_memop + COMPARE_PREFIX(dci(0x78603000), "ldsetlh"); // LDSETLH_32_memop + COMPARE_PREFIX(dci(0x78604000), "ldsmaxlh"); // LDSMAXLH_32_memop + COMPARE_PREFIX(dci(0x78605000), "ldsminlh"); // LDSMINLH_32_memop + COMPARE_PREFIX(dci(0x78606000), "ldumaxlh"); // LDUMAXLH_32_memop + COMPARE_PREFIX(dci(0x78607000), "lduminlh"); // LDUMINLH_32_memop + COMPARE_PREFIX(dci(0x78608000), "swplh"); // SWPLH_32_memop + COMPARE_PREFIX(dci(0x78a00000), "ldaddah"); // LDADDAH_32_memop + COMPARE_PREFIX(dci(0x78a01000), "ldclrah"); // LDCLRAH_32_memop + COMPARE_PREFIX(dci(0x78a02000), "ldeorah"); // LDEORAH_32_memop + COMPARE_PREFIX(dci(0x78a03000), "ldsetah"); // LDSETAH_32_memop + COMPARE_PREFIX(dci(0x78a04000), "ldsmaxah"); // LDSMAXAH_32_memop + COMPARE_PREFIX(dci(0x78a05000), "ldsminah"); // LDSMINAH_32_memop + COMPARE_PREFIX(dci(0x78a06000), "ldumaxah"); // LDUMAXAH_32_memop + COMPARE_PREFIX(dci(0x78a07000), "lduminah"); // LDUMINAH_32_memop + COMPARE_PREFIX(dci(0x78a08000), "swpah"); // SWPAH_32_memop + COMPARE_PREFIX(dci(0x78e00000), "ldaddalh"); // LDADDALH_32_memop + COMPARE_PREFIX(dci(0x78e01000), "ldclralh"); // LDCLRALH_32_memop + COMPARE_PREFIX(dci(0x78e02000), "ldeoralh"); // LDEORALH_32_memop + COMPARE_PREFIX(dci(0x78e03000), "ldsetalh"); // LDSETALH_32_memop + COMPARE_PREFIX(dci(0x78e04000), "ldsmaxalh"); // LDSMAXALH_32_memop + COMPARE_PREFIX(dci(0x78e05000), "ldsminalh"); // LDSMINALH_32_memop + COMPARE_PREFIX(dci(0x78e06000), "ldumaxalh"); // LDUMAXALH_32_memop + COMPARE_PREFIX(dci(0x78e07000), "lduminalh"); // LDUMINALH_32_memop + COMPARE_PREFIX(dci(0x78e08000), "swpalh"); // SWPALH_32_memop + COMPARE_PREFIX(dci(0x88a07c00), "cas"); // CAS_C32_ldstexcl + COMPARE_PREFIX(dci(0x88a0fc00), "casl"); // CASL_C32_ldstexcl + COMPARE_PREFIX(dci(0x88e07c00), "casa"); // CASA_C32_ldstexcl + COMPARE_PREFIX(dci(0x88e0fc00), "casal"); // CASAL_C32_ldstexcl + COMPARE_PREFIX(dci(0xb8200000), "ldadd"); // LDADD_32_memop + COMPARE_PREFIX(dci(0xb8201000), "ldclr"); // LDCLR_32_memop + COMPARE_PREFIX(dci(0xb8202000), "ldeor"); // LDEOR_32_memop + COMPARE_PREFIX(dci(0xb8203000), "ldset"); // LDSET_32_memop + COMPARE_PREFIX(dci(0xb8204000), "ldsmax"); // LDSMAX_32_memop + COMPARE_PREFIX(dci(0xb8205000), "ldsmin"); // LDSMIN_32_memop + COMPARE_PREFIX(dci(0xb8206000), "ldumax"); // LDUMAX_32_memop + COMPARE_PREFIX(dci(0xb8207000), "ldumin"); // LDUMIN_32_memop + COMPARE_PREFIX(dci(0xb8208000), "swp"); // SWP_32_memop + COMPARE_PREFIX(dci(0xb8600000), "ldaddl"); // LDADDL_32_memop + COMPARE_PREFIX(dci(0xb8601000), "ldclrl"); // LDCLRL_32_memop + COMPARE_PREFIX(dci(0xb8602000), "ldeorl"); // LDEORL_32_memop + COMPARE_PREFIX(dci(0xb8603000), "ldsetl"); // LDSETL_32_memop + COMPARE_PREFIX(dci(0xb8604000), "ldsmaxl"); // LDSMAXL_32_memop + COMPARE_PREFIX(dci(0xb8605000), "ldsminl"); // LDSMINL_32_memop + COMPARE_PREFIX(dci(0xb8606000), "ldumaxl"); // LDUMAXL_32_memop + COMPARE_PREFIX(dci(0xb8607000), "lduminl"); // LDUMINL_32_memop + COMPARE_PREFIX(dci(0xb8608000), "swpl"); // SWPL_32_memop + COMPARE_PREFIX(dci(0xb8a00000), "ldadda"); // LDADDA_32_memop + COMPARE_PREFIX(dci(0xb8a01000), "ldclra"); // LDCLRA_32_memop + COMPARE_PREFIX(dci(0xb8a02000), "ldeora"); // LDEORA_32_memop + COMPARE_PREFIX(dci(0xb8a03000), "ldseta"); // LDSETA_32_memop + COMPARE_PREFIX(dci(0xb8a04000), "ldsmaxa"); // LDSMAXA_32_memop + COMPARE_PREFIX(dci(0xb8a05000), "ldsmina"); // LDSMINA_32_memop + COMPARE_PREFIX(dci(0xb8a06000), "ldumaxa"); // LDUMAXA_32_memop + COMPARE_PREFIX(dci(0xb8a07000), "ldumina"); // LDUMINA_32_memop + COMPARE_PREFIX(dci(0xb8a08000), "swpa"); // SWPA_32_memop + COMPARE_PREFIX(dci(0xb8e00000), "ldaddal"); // LDADDAL_32_memop + COMPARE_PREFIX(dci(0xb8e01000), "ldclral"); // LDCLRAL_32_memop + COMPARE_PREFIX(dci(0xb8e02000), "ldeoral"); // LDEORAL_32_memop + COMPARE_PREFIX(dci(0xb8e03000), "ldsetal"); // LDSETAL_32_memop + COMPARE_PREFIX(dci(0xb8e04000), "ldsmaxal"); // LDSMAXAL_32_memop + COMPARE_PREFIX(dci(0xb8e05000), "ldsminal"); // LDSMINAL_32_memop + COMPARE_PREFIX(dci(0xb8e06000), "ldumaxal"); // LDUMAXAL_32_memop + COMPARE_PREFIX(dci(0xb8e07000), "lduminal"); // LDUMINAL_32_memop + COMPARE_PREFIX(dci(0xb8e08000), "swpal"); // SWPAL_32_memop + COMPARE_PREFIX(dci(0xc8a07c00), "cas"); // CAS_C64_ldstexcl + COMPARE_PREFIX(dci(0xc8a0fc00), "casl"); // CASL_C64_ldstexcl + COMPARE_PREFIX(dci(0xc8e07c00), "casa"); // CASA_C64_ldstexcl + COMPARE_PREFIX(dci(0xc8e0fc00), "casal"); // CASAL_C64_ldstexcl + COMPARE_PREFIX(dci(0xf8200000), "ldadd"); // LDADD_64_memop + COMPARE_PREFIX(dci(0xf8201000), "ldclr"); // LDCLR_64_memop + COMPARE_PREFIX(dci(0xf8202000), "ldeor"); // LDEOR_64_memop + COMPARE_PREFIX(dci(0xf8203000), "ldset"); // LDSET_64_memop + COMPARE_PREFIX(dci(0xf8204000), "ldsmax"); // LDSMAX_64_memop + COMPARE_PREFIX(dci(0xf8205000), "ldsmin"); // LDSMIN_64_memop + COMPARE_PREFIX(dci(0xf8206000), "ldumax"); // LDUMAX_64_memop + COMPARE_PREFIX(dci(0xf8207000), "ldumin"); // LDUMIN_64_memop + COMPARE_PREFIX(dci(0xf8208000), "swp"); // SWP_64_memop + COMPARE_PREFIX(dci(0xf8600000), "ldaddl"); // LDADDL_64_memop + COMPARE_PREFIX(dci(0xf8601000), "ldclrl"); // LDCLRL_64_memop + COMPARE_PREFIX(dci(0xf8602000), "ldeorl"); // LDEORL_64_memop + COMPARE_PREFIX(dci(0xf8603000), "ldsetl"); // LDSETL_64_memop + COMPARE_PREFIX(dci(0xf8604000), "ldsmaxl"); // LDSMAXL_64_memop + COMPARE_PREFIX(dci(0xf8605000), "ldsminl"); // LDSMINL_64_memop + COMPARE_PREFIX(dci(0xf8606000), "ldumaxl"); // LDUMAXL_64_memop + COMPARE_PREFIX(dci(0xf8607000), "lduminl"); // LDUMINL_64_memop + COMPARE_PREFIX(dci(0xf8608000), "swpl"); // SWPL_64_memop + COMPARE_PREFIX(dci(0xf8a00000), "ldadda"); // LDADDA_64_memop + COMPARE_PREFIX(dci(0xf8a01000), "ldclra"); // LDCLRA_64_memop + COMPARE_PREFIX(dci(0xf8a02000), "ldeora"); // LDEORA_64_memop + COMPARE_PREFIX(dci(0xf8a03000), "ldseta"); // LDSETA_64_memop + COMPARE_PREFIX(dci(0xf8a04000), "ldsmaxa"); // LDSMAXA_64_memop + COMPARE_PREFIX(dci(0xf8a05000), "ldsmina"); // LDSMINA_64_memop + COMPARE_PREFIX(dci(0xf8a06000), "ldumaxa"); // LDUMAXA_64_memop + COMPARE_PREFIX(dci(0xf8a07000), "ldumina"); // LDUMINA_64_memop + COMPARE_PREFIX(dci(0xf8a08000), "swpa"); // SWPA_64_memop + COMPARE_PREFIX(dci(0xf8e00000), "ldaddal"); // LDADDAL_64_memop + COMPARE_PREFIX(dci(0xf8e01000), "ldclral"); // LDCLRAL_64_memop + COMPARE_PREFIX(dci(0xf8e02000), "ldeoral"); // LDEORAL_64_memop + COMPARE_PREFIX(dci(0xf8e03000), "ldsetal"); // LDSETAL_64_memop + COMPARE_PREFIX(dci(0xf8e04000), "ldsmaxal"); // LDSMAXAL_64_memop + COMPARE_PREFIX(dci(0xf8e05000), "ldsminal"); // LDSMINAL_64_memop + COMPARE_PREFIX(dci(0xf8e06000), "ldumaxal"); // LDUMAXAL_64_memop + COMPARE_PREFIX(dci(0xf8e07000), "lduminal"); // LDUMINAL_64_memop + COMPARE_PREFIX(dci(0xf8e08000), "swpal"); // SWPAL_64_memop + + // ARMv8.1 - RDM + COMPARE_PREFIX(dci(0x2e008400), "sqrdmlah"); // SQRDMLAH_asimdsame2_only + COMPARE_PREFIX(dci(0x2e008c00), "sqrdmlsh"); // SQRDMLSH_asimdsame2_only + COMPARE_PREFIX(dci(0x2f40d000), "sqrdmlah"); // SQRDMLAH_asimdelem_R + COMPARE_PREFIX(dci(0x2f40f000), "sqrdmlsh"); // SQRDMLSH_asimdelem_R + COMPARE_PREFIX(dci(0x7e008400), "sqrdmlah"); // SQRDMLAH_asisdsame2_only + COMPARE_PREFIX(dci(0x7e008c00), "sqrdmlsh"); // SQRDMLSH_asisdsame2_only + COMPARE_PREFIX(dci(0x7f40d000), "sqrdmlah"); // SQRDMLAH_asisdelem_R + COMPARE_PREFIX(dci(0x7f40f000), "sqrdmlsh"); // SQRDMLSH_asisdelem_R + + // ARMv8.2 - DotProd + COMPARE_PREFIX(dci(0x0e009400), "sdot"); // SDOT_asimdsame2_D + COMPARE_PREFIX(dci(0x0f00e000), "sdot"); // SDOT_asimdelem_D + COMPARE_PREFIX(dci(0x2e009400), "udot"); // UDOT_asimdsame2_D + COMPARE_PREFIX(dci(0x2f00e000), "udot"); // UDOT_asimdelem_D + + // ARMv8.2 - FHM + COMPARE_PREFIX(dci(0x0e20ec00), "fmlal"); // FMLAL_asimdsame_F + COMPARE_PREFIX(dci(0x0ea0ec00), "fmlsl"); // FMLSL_asimdsame_F + COMPARE_PREFIX(dci(0x0f800000), "fmlal"); // FMLAL_asimdelem_LH + COMPARE_PREFIX(dci(0x0f804000), "fmlsl"); // FMLSL_asimdelem_LH + COMPARE_PREFIX(dci(0x2e20cc00), "fmlal2"); // FMLAL2_asimdsame_F + COMPARE_PREFIX(dci(0x2ea0cc00), "fmlsl2"); // FMLSL2_asimdsame_F + COMPARE_PREFIX(dci(0x2f808000), "fmlal2"); // FMLAL2_asimdelem_LH + COMPARE_PREFIX(dci(0x2f80c000), "fmlsl2"); // FMLSL2_asimdelem_LH + + // ARMv8.2 - FP16 + COMPARE_PREFIX(dci(0x0e20c400), "fmaxnm"); // FMAXNM_asimdsame_only + COMPARE_PREFIX(dci(0x0e20cc00), "fmla"); // FMLA_asimdsame_only + COMPARE_PREFIX(dci(0x0e20d400), "fadd"); // FADD_asimdsame_only + COMPARE_PREFIX(dci(0x0e20dc00), "fmulx"); // FMULX_asimdsame_only + COMPARE_PREFIX(dci(0x0e20e400), "fcmeq"); // FCMEQ_asimdsame_only + COMPARE_PREFIX(dci(0x0e20f400), "fmax"); // FMAX_asimdsame_only + COMPARE_PREFIX(dci(0x0e20fc00), "frecps"); // FRECPS_asimdsame_only + COMPARE_PREFIX(dci(0x0e218800), "frintn"); // FRINTN_asimdmisc_R + COMPARE_PREFIX(dci(0x0e219800), "frintm"); // FRINTM_asimdmisc_R + COMPARE_PREFIX(dci(0x0e21a800), "fcvtns"); // FCVTNS_asimdmisc_R + COMPARE_PREFIX(dci(0x0e21b800), "fcvtms"); // FCVTMS_asimdmisc_R + COMPARE_PREFIX(dci(0x0e21c800), "fcvtas"); // FCVTAS_asimdmisc_R + COMPARE_PREFIX(dci(0x0e21d800), "scvtf"); // SCVTF_asimdmisc_R + COMPARE_PREFIX(dci(0x0e30c800), "fmaxnmv"); // FMAXNMV_asimdall_only_H + COMPARE_PREFIX(dci(0x0e30f800), "fmaxv"); // FMAXV_asimdall_only_H + COMPARE_PREFIX(dci(0x0e400400), "fmaxnm"); // FMAXNM_asimdsamefp16_only + COMPARE_PREFIX(dci(0x0e400c00), "fmla"); // FMLA_asimdsamefp16_only + COMPARE_PREFIX(dci(0x0e401400), "fadd"); // FADD_asimdsamefp16_only + COMPARE_PREFIX(dci(0x0e401c00), "fmulx"); // FMULX_asimdsamefp16_only + COMPARE_PREFIX(dci(0x0e402400), "fcmeq"); // FCMEQ_asimdsamefp16_only + COMPARE_PREFIX(dci(0x0e403400), "fmax"); // FMAX_asimdsamefp16_only + COMPARE_PREFIX(dci(0x0e403c00), "frecps"); // FRECPS_asimdsamefp16_only + COMPARE_PREFIX(dci(0x0e798800), "frintn"); // FRINTN_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x0e799800), "frintm"); // FRINTM_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x0e79a800), "fcvtns"); // FCVTNS_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x0e79b800), "fcvtms"); // FCVTMS_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x0e79c800), "fcvtas"); // FCVTAS_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x0e79d800), "scvtf"); // SCVTF_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x0ea0c400), "fminnm"); // FMINNM_asimdsame_only + COMPARE_PREFIX(dci(0x0ea0cc00), "fmls"); // FMLS_asimdsame_only + COMPARE_PREFIX(dci(0x0ea0d400), "fsub"); // FSUB_asimdsame_only + COMPARE_PREFIX(dci(0x0ea0f400), "fmin"); // FMIN_asimdsame_only + COMPARE_PREFIX(dci(0x0ea0fc00), "frsqrts"); // FRSQRTS_asimdsame_only + COMPARE_PREFIX(dci(0x0ea0c800), "fcmgt"); // FCMGT_asimdmisc_FZ + COMPARE_PREFIX(dci(0x0ea0d800), "fcmeq"); // FCMEQ_asimdmisc_FZ + COMPARE_PREFIX(dci(0x0ea0e800), "fcmlt"); // FCMLT_asimdmisc_FZ + COMPARE_PREFIX(dci(0x0ea0f800), "fabs"); // FABS_asimdmisc_R + COMPARE_PREFIX(dci(0x0ea18800), "frintp"); // FRINTP_asimdmisc_R + COMPARE_PREFIX(dci(0x0ea19800), "frintz"); // FRINTZ_asimdmisc_R + COMPARE_PREFIX(dci(0x0ea1a800), "fcvtps"); // FCVTPS_asimdmisc_R + COMPARE_PREFIX(dci(0x0ea1b800), "fcvtzs"); // FCVTZS_asimdmisc_R + COMPARE_PREFIX(dci(0x0ea1d800), "frecpe"); // FRECPE_asimdmisc_R + COMPARE_PREFIX(dci(0x0eb0c800), "fminnmv"); // FMINNMV_asimdall_only_H + COMPARE_PREFIX(dci(0x0eb0f800), "fminv"); // FMINV_asimdall_only_H + COMPARE_PREFIX(dci(0x0ec00400), "fminnm"); // FMINNM_asimdsamefp16_only + COMPARE_PREFIX(dci(0x0ec00c00), "fmls"); // FMLS_asimdsamefp16_only + COMPARE_PREFIX(dci(0x0ec01400), "fsub"); // FSUB_asimdsamefp16_only + COMPARE_PREFIX(dci(0x0ec03400), "fmin"); // FMIN_asimdsamefp16_only + COMPARE_PREFIX(dci(0x0ec03c00), "frsqrts"); // FRSQRTS_asimdsamefp16_only + COMPARE_PREFIX(dci(0x0ef8c800), "fcmgt"); // FCMGT_asimdmiscfp16_FZ + COMPARE_PREFIX(dci(0x0ef8d800), "fcmeq"); // FCMEQ_asimdmiscfp16_FZ + COMPARE_PREFIX(dci(0x0ef8e800), "fcmlt"); // FCMLT_asimdmiscfp16_FZ + COMPARE_PREFIX(dci(0x0ef8f800), "fabs"); // FABS_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x0ef98800), "frintp"); // FRINTP_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x0ef99800), "frintz"); // FRINTZ_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x0ef9a800), "fcvtps"); // FCVTPS_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x0ef9b800), "fcvtzs"); // FCVTZS_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x0ef9d800), "frecpe"); // FRECPE_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x0f001000), "fmla"); // FMLA_asimdelem_RH_H + COMPARE_PREFIX(dci(0x0f005000), "fmls"); // FMLS_asimdelem_RH_H + COMPARE_PREFIX(dci(0x0f009000), "fmul"); // FMUL_asimdelem_RH_H + COMPARE_PREFIX(dci(0x0f00f400), "fmov"); // FMOV_asimdimm_S_s + COMPARE_PREFIX(dci(0x0f00fc00), "fmov"); // FMOV_asimdimm_H_h + COMPARE_PREFIX(dci(0x0f801000), "fmla"); // FMLA_asimdelem_R_SD + COMPARE_PREFIX(dci(0x0f805000), "fmls"); // FMLS_asimdelem_R_SD + COMPARE_PREFIX(dci(0x0f809000), "fmul"); // FMUL_asimdelem_R_SD + COMPARE_PREFIX(dci(0x2e20c400), "fmaxnmp"); // FMAXNMP_asimdsame_only + COMPARE_PREFIX(dci(0x2e20d400), "faddp"); // FADDP_asimdsame_only + COMPARE_PREFIX(dci(0x2e20dc00), "fmul"); // FMUL_asimdsame_only + COMPARE_PREFIX(dci(0x2e20e400), "fcmge"); // FCMGE_asimdsame_only + COMPARE_PREFIX(dci(0x2e20ec00), "facge"); // FACGE_asimdsame_only + COMPARE_PREFIX(dci(0x2e20f400), "fmaxp"); // FMAXP_asimdsame_only + COMPARE_PREFIX(dci(0x2e20fc00), "fdiv"); // FDIV_asimdsame_only + COMPARE_PREFIX(dci(0x2e218800), "frinta"); // FRINTA_asimdmisc_R + COMPARE_PREFIX(dci(0x2e219800), "frintx"); // FRINTX_asimdmisc_R + COMPARE_PREFIX(dci(0x2e21a800), "fcvtnu"); // FCVTNU_asimdmisc_R + COMPARE_PREFIX(dci(0x2e21b800), "fcvtmu"); // FCVTMU_asimdmisc_R + COMPARE_PREFIX(dci(0x2e21c800), "fcvtau"); // FCVTAU_asimdmisc_R + COMPARE_PREFIX(dci(0x2e21d800), "ucvtf"); // UCVTF_asimdmisc_R + COMPARE_PREFIX(dci(0x6e30c800), "fmaxnmv"); // FMAXNMV_asimdall_only_SD + COMPARE_PREFIX(dci(0x6e30f800), "fmaxv"); // FMAXV_asimdall_only_SD + COMPARE_PREFIX(dci(0x2e400400), "fmaxnmp"); // FMAXNMP_asimdsamefp16_only + COMPARE_PREFIX(dci(0x2e401400), "faddp"); // FADDP_asimdsamefp16_only + COMPARE_PREFIX(dci(0x2e401c00), "fmul"); // FMUL_asimdsamefp16_only + COMPARE_PREFIX(dci(0x2e402400), "fcmge"); // FCMGE_asimdsamefp16_only + COMPARE_PREFIX(dci(0x2e402c00), "facge"); // FACGE_asimdsamefp16_only + COMPARE_PREFIX(dci(0x2e403400), "fmaxp"); // FMAXP_asimdsamefp16_only + COMPARE_PREFIX(dci(0x2e403c00), "fdiv"); // FDIV_asimdsamefp16_only + COMPARE_PREFIX(dci(0x2e798800), "frinta"); // FRINTA_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x2e799800), "frintx"); // FRINTX_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x2e79a800), "fcvtnu"); // FCVTNU_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x2e79b800), "fcvtmu"); // FCVTMU_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x2e79c800), "fcvtau"); // FCVTAU_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x2e79d800), "ucvtf"); // UCVTF_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x2ea0c400), "fminnmp"); // FMINNMP_asimdsame_only + COMPARE_PREFIX(dci(0x2ea0d400), "fabd"); // FABD_asimdsame_only + COMPARE_PREFIX(dci(0x2ea0e400), "fcmgt"); // FCMGT_asimdsame_only + COMPARE_PREFIX(dci(0x2ea0ec00), "facgt"); // FACGT_asimdsame_only + COMPARE_PREFIX(dci(0x2ea0f400), "fminp"); // FMINP_asimdsame_only + COMPARE_PREFIX(dci(0x2ea0c800), "fcmge"); // FCMGE_asimdmisc_FZ + COMPARE_PREFIX(dci(0x2ea0d800), "fcmle"); // FCMLE_asimdmisc_FZ + COMPARE_PREFIX(dci(0x2ea0f800), "fneg"); // FNEG_asimdmisc_R + COMPARE_PREFIX(dci(0x2ea19800), "frinti"); // FRINTI_asimdmisc_R + COMPARE_PREFIX(dci(0x2ea1a800), "fcvtpu"); // FCVTPU_asimdmisc_R + COMPARE_PREFIX(dci(0x2ea1b800), "fcvtzu"); // FCVTZU_asimdmisc_R + COMPARE_PREFIX(dci(0x2ea1d800), "frsqrte"); // FRSQRTE_asimdmisc_R + COMPARE_PREFIX(dci(0x2ea1f800), "fsqrt"); // FSQRT_asimdmisc_R + COMPARE_PREFIX(dci(0x6eb0c800), "fminnmv"); // FMINNMV_asimdall_only_SD + COMPARE_PREFIX(dci(0x6eb0f800), "fminv"); // FMINV_asimdall_only_SD + COMPARE_PREFIX(dci(0x2ec00400), "fminnmp"); // FMINNMP_asimdsamefp16_only + COMPARE_PREFIX(dci(0x2ec01400), "fabd"); // FABD_asimdsamefp16_only + COMPARE_PREFIX(dci(0x2ec02400), "fcmgt"); // FCMGT_asimdsamefp16_only + COMPARE_PREFIX(dci(0x2ec02c00), "facgt"); // FACGT_asimdsamefp16_only + COMPARE_PREFIX(dci(0x2ec03400), "fminp"); // FMINP_asimdsamefp16_only + COMPARE_PREFIX(dci(0x2ef8c800), "fcmge"); // FCMGE_asimdmiscfp16_FZ + COMPARE_PREFIX(dci(0x2ef8d800), "fcmle"); // FCMLE_asimdmiscfp16_FZ + COMPARE_PREFIX(dci(0x2ef8f800), "fneg"); // FNEG_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x2ef99800), "frinti"); // FRINTI_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x2ef9a800), "fcvtpu"); // FCVTPU_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x2ef9b800), "fcvtzu"); // FCVTZU_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x2ef9d800), "frsqrte"); // FRSQRTE_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x2ef9f800), "fsqrt"); // FSQRT_asimdmiscfp16_R + COMPARE_PREFIX(dci(0x2f009000), "fmulx"); // FMULX_asimdelem_RH_H + COMPARE_PREFIX(dci(0x2f809000), "fmulx"); // FMULX_asimdelem_R_SD + COMPARE_PREFIX(dci(0x5e20dc00), "fmulx"); // FMULX_asisdsame_only + COMPARE_PREFIX(dci(0x5e20e400), "fcmeq"); // FCMEQ_asisdsame_only + COMPARE_PREFIX(dci(0x5e20fc00), "frecps"); // FRECPS_asisdsame_only + COMPARE_PREFIX(dci(0x5e21a800), "fcvtns"); // FCVTNS_asisdmisc_R + COMPARE_PREFIX(dci(0x5e21b800), "fcvtms"); // FCVTMS_asisdmisc_R + COMPARE_PREFIX(dci(0x5e21c800), "fcvtas"); // FCVTAS_asisdmisc_R + COMPARE_PREFIX(dci(0x5e21d800), "scvtf"); // SCVTF_asisdmisc_R + COMPARE_PREFIX(dci(0x5e30c800), "fmaxnmp"); // FMAXNMP_asisdpair_only_H + COMPARE_PREFIX(dci(0x5e30d800), "faddp"); // FADDP_asisdpair_only_H + COMPARE_PREFIX(dci(0x5e30f800), "fmaxp"); // FMAXP_asisdpair_only_H + COMPARE_PREFIX(dci(0x5e401c00), "fmulx"); // FMULX_asisdsamefp16_only + COMPARE_PREFIX(dci(0x5e402400), "fcmeq"); // FCMEQ_asisdsamefp16_only + COMPARE_PREFIX(dci(0x5e403c00), "frecps"); // FRECPS_asisdsamefp16_only + COMPARE_PREFIX(dci(0x5e79a800), "fcvtns"); // FCVTNS_asisdmiscfp16_R + COMPARE_PREFIX(dci(0x5e79b800), "fcvtms"); // FCVTMS_asisdmiscfp16_R + COMPARE_PREFIX(dci(0x5e79c800), "fcvtas"); // FCVTAS_asisdmiscfp16_R + COMPARE_PREFIX(dci(0x5e79d800), "scvtf"); // SCVTF_asisdmiscfp16_R + COMPARE_PREFIX(dci(0x5ea0fc00), "frsqrts"); // FRSQRTS_asisdsame_only + COMPARE_PREFIX(dci(0x5ea0c800), "fcmgt"); // FCMGT_asisdmisc_FZ + COMPARE_PREFIX(dci(0x5ea0d800), "fcmeq"); // FCMEQ_asisdmisc_FZ + COMPARE_PREFIX(dci(0x5ea0e800), "fcmlt"); // FCMLT_asisdmisc_FZ + COMPARE_PREFIX(dci(0x5ea1a800), "fcvtps"); // FCVTPS_asisdmisc_R + COMPARE_PREFIX(dci(0x5ea1b800), "fcvtzs"); // FCVTZS_asisdmisc_R + COMPARE_PREFIX(dci(0x5ea1d800), "frecpe"); // FRECPE_asisdmisc_R + COMPARE_PREFIX(dci(0x5ea1f800), "frecpx"); // FRECPX_asisdmisc_R + COMPARE_PREFIX(dci(0x5eb0c800), "fminnmp"); // FMINNMP_asisdpair_only_H + COMPARE_PREFIX(dci(0x5eb0f800), "fminp"); // FMINP_asisdpair_only_H + COMPARE_PREFIX(dci(0x5ec03c00), "frsqrts"); // FRSQRTS_asisdsamefp16_only + COMPARE_PREFIX(dci(0x5ef8c800), "fcmgt"); // FCMGT_asisdmiscfp16_FZ + COMPARE_PREFIX(dci(0x5ef8d800), "fcmeq"); // FCMEQ_asisdmiscfp16_FZ + COMPARE_PREFIX(dci(0x5ef8e800), "fcmlt"); // FCMLT_asisdmiscfp16_FZ + COMPARE_PREFIX(dci(0x5ef9a800), "fcvtps"); // FCVTPS_asisdmiscfp16_R + COMPARE_PREFIX(dci(0x5ef9b800), "fcvtzs"); // FCVTZS_asisdmiscfp16_R + COMPARE_PREFIX(dci(0x5ef9d800), "frecpe"); // FRECPE_asisdmiscfp16_R + COMPARE_PREFIX(dci(0x5ef9f800), "frecpx"); // FRECPX_asisdmiscfp16_R + COMPARE_PREFIX(dci(0x5f001000), "fmla"); // FMLA_asisdelem_RH_H + COMPARE_PREFIX(dci(0x5f005000), "fmls"); // FMLS_asisdelem_RH_H + COMPARE_PREFIX(dci(0x5f009000), "fmul"); // FMUL_asisdelem_RH_H + COMPARE_PREFIX(dci(0x5f801000), "fmla"); // FMLA_asisdelem_R_SD + COMPARE_PREFIX(dci(0x5f805000), "fmls"); // FMLS_asisdelem_R_SD + COMPARE_PREFIX(dci(0x5f809000), "fmul"); // FMUL_asisdelem_R_SD + COMPARE_PREFIX(dci(0x6f00f400), "fmov"); // FMOV_asimdimm_D2_d + COMPARE_PREFIX(dci(0x7e20e400), "fcmge"); // FCMGE_asisdsame_only + COMPARE_PREFIX(dci(0x7e20ec00), "facge"); // FACGE_asisdsame_only + COMPARE_PREFIX(dci(0x7e21a800), "fcvtnu"); // FCVTNU_asisdmisc_R + COMPARE_PREFIX(dci(0x7e21b800), "fcvtmu"); // FCVTMU_asisdmisc_R + COMPARE_PREFIX(dci(0x7e21c800), "fcvtau"); // FCVTAU_asisdmisc_R + COMPARE_PREFIX(dci(0x7e21d800), "ucvtf"); // UCVTF_asisdmisc_R + COMPARE_PREFIX(dci(0x7e30c800), "fmaxnmp"); // FMAXNMP_asisdpair_only_SD + COMPARE_PREFIX(dci(0x7e30d800), "faddp"); // FADDP_asisdpair_only_SD + COMPARE_PREFIX(dci(0x7e30f800), "fmaxp"); // FMAXP_asisdpair_only_SD + COMPARE_PREFIX(dci(0x7e402400), "fcmge"); // FCMGE_asisdsamefp16_only + COMPARE_PREFIX(dci(0x7e402c00), "facge"); // FACGE_asisdsamefp16_only + COMPARE_PREFIX(dci(0x7e79a800), "fcvtnu"); // FCVTNU_asisdmiscfp16_R + COMPARE_PREFIX(dci(0x7e79b800), "fcvtmu"); // FCVTMU_asisdmiscfp16_R + COMPARE_PREFIX(dci(0x7e79c800), "fcvtau"); // FCVTAU_asisdmiscfp16_R + COMPARE_PREFIX(dci(0x7e79d800), "ucvtf"); // UCVTF_asisdmiscfp16_R + COMPARE_PREFIX(dci(0x7ea0d400), "fabd"); // FABD_asisdsame_only + COMPARE_PREFIX(dci(0x7ea0e400), "fcmgt"); // FCMGT_asisdsame_only + COMPARE_PREFIX(dci(0x7ea0ec00), "facgt"); // FACGT_asisdsame_only + COMPARE_PREFIX(dci(0x7ea0c800), "fcmge"); // FCMGE_asisdmisc_FZ + COMPARE_PREFIX(dci(0x7ea0d800), "fcmle"); // FCMLE_asisdmisc_FZ + COMPARE_PREFIX(dci(0x7ea1a800), "fcvtpu"); // FCVTPU_asisdmisc_R + COMPARE_PREFIX(dci(0x7ea1b800), "fcvtzu"); // FCVTZU_asisdmisc_R + COMPARE_PREFIX(dci(0x7ea1d800), "frsqrte"); // FRSQRTE_asisdmisc_R + COMPARE_PREFIX(dci(0x7eb0c800), "fminnmp"); // FMINNMP_asisdpair_only_SD + COMPARE_PREFIX(dci(0x7eb0f800), "fminp"); // FMINP_asisdpair_only_SD + COMPARE_PREFIX(dci(0x7ec01400), "fabd"); // FABD_asisdsamefp16_only + COMPARE_PREFIX(dci(0x7ec02400), "fcmgt"); // FCMGT_asisdsamefp16_only + COMPARE_PREFIX(dci(0x7ec02c00), "facgt"); // FACGT_asisdsamefp16_only + COMPARE_PREFIX(dci(0x7ef8c800), "fcmge"); // FCMGE_asisdmiscfp16_FZ + COMPARE_PREFIX(dci(0x7ef8d800), "fcmle"); // FCMLE_asisdmiscfp16_FZ + COMPARE_PREFIX(dci(0x7ef9a800), "fcvtpu"); // FCVTPU_asisdmiscfp16_R + COMPARE_PREFIX(dci(0x7ef9b800), "fcvtzu"); // FCVTZU_asisdmiscfp16_R + COMPARE_PREFIX(dci(0x7ef9d800), "frsqrte"); // FRSQRTE_asisdmiscfp16_R + COMPARE_PREFIX(dci(0x7f009000), "fmulx"); // FMULX_asisdelem_RH_H + COMPARE_PREFIX(dci(0x7f809000), "fmulx"); // FMULX_asisdelem_R_SD + + // ARMv8.2 - RAS + COMPARE_PREFIX(dci(0xd503221f), "esb"); // ESB_HI_hints + + // ARMv8.2 - SHA3 + // COMPARE_PREFIX(dci(0xce000000), "eor3"); // EOR3_VVV16_crypto4 + // COMPARE_PREFIX(dci(0xce200000), "bcax"); // BCAX_VVV16_crypto4 + // COMPARE_PREFIX(dci(0xce608c00), "rax1"); // RAX1_VVV2_cryptosha512_3 + // COMPARE_PREFIX(dci(0xce800000), "xar"); // XAR_VVV2_crypto3_imm6 + + // ARMv8.2 - SHA512 + // COMPARE_PREFIX(dci(0xce608000), "sha512h"); // SHA512H_QQV_cryptosha512_3 + // COMPARE_PREFIX(dci(0xce608400), "sha512h2"); // + // SHA512H2_QQV_cryptosha512_3 + // COMPARE_PREFIX(dci(0xce608800), "sha512su1"); // + // SHA512SU1_VVV2_cryptosha512_3 + // COMPARE_PREFIX(dci(0xcec08000), "sha512su0"); // + // SHA512SU0_VV2_cryptosha512_2 + + // ARMv8.2 - SM3 + // COMPARE_PREFIX(dci(0xce400000), "sm3ss1"); // SM3SS1_VVV4_crypto4 + // COMPARE_PREFIX(dci(0xce408000), "sm3tt1a"); // SM3TT1A_VVV4_crypto3_imm2 + // COMPARE_PREFIX(dci(0xce408400), "sm3tt1b"); // SM3TT1B_VVV4_crypto3_imm2 + // COMPARE_PREFIX(dci(0xce408800), "sm3tt2a"); // SM3TT2A_VVV4_crypto3_imm2 + // COMPARE_PREFIX(dci(0xce408c00), "sm3tt2b"); // SM3TT2B_VVV_crypto3_imm2 + // COMPARE_PREFIX(dci(0xce60c000), "sm3partw1"); // + // SM3PARTW1_VVV4_cryptosha512_3 + // COMPARE_PREFIX(dci(0xce60c400), "sm3partw2"); // + // SM3PARTW2_VVV4_cryptosha512_3 + + // ARMv8.2 - SM4 + // COMPARE_PREFIX(dci(0xce60c800), "sm4ekey"); // + // SM4EKEY_VVV4_cryptosha512_3 + // COMPARE_PREFIX(dci(0xcec08400), "sm4e"); // SM4E_VV4_cryptosha512_2 + + // ARMv8.2 - SPE + // COMPARE_PREFIX(dci(0xd503223f), "psb"); // PSB_HC_hints + + // ARMv8.3 - FCMA + COMPARE_PREFIX(dci(0x2e40c400), "fcmla"); // FCMLA_asimdsame2_C + COMPARE_PREFIX(dci(0x2e00e400), "fcadd"); // FCADD_asimdsame2_C + COMPARE_PREFIX(dci(0x2f401000), "fcmla"); // FCMLA_asimdelem_C_H + COMPARE_PREFIX(dci(0x6f801000), "fcmla"); // FCMLA_asimdelem_C_S + + // ARMv8.3 - JSCVT + COMPARE_PREFIX(dci(0x1e7e0000), "fjcvtzs"); // FJCVTZS_32D_float2int + + // ARMv8.3 - LRCPC + COMPARE_PREFIX(dci(0x38a0c000), "ldaprb"); // LDAPRB_32L_memop + COMPARE_PREFIX(dci(0x78a0c000), "ldaprh"); // LDAPRH_32L_memop + COMPARE_PREFIX(dci(0xb8a0c000), "ldapr"); // LDAPR_32L_memop + COMPARE_PREFIX(dci(0xf8a0c000), "ldapr"); // LDAPR_64L_memop + + // ARMv8.3 - PAuth + COMPARE_PREFIX(dci(0x9ac03000), "pacga"); // PACGA_64P_dp_2src + COMPARE_PREFIX(dci(0xd50320ff), "xpaclri"); // XPACLRI_HI_hints + COMPARE_PREFIX(dci(0xd503211f), "pacia1716"); // PACIA1716_HI_hints + COMPARE_PREFIX(dci(0xd503215f), "pacib1716"); // PACIB1716_HI_hints + COMPARE_PREFIX(dci(0xd503219f), "autia1716"); // AUTIA1716_HI_hints + COMPARE_PREFIX(dci(0xd50321df), "autib1716"); // AUTIB1716_HI_hints + COMPARE_PREFIX(dci(0xd503231f), "paciaz"); // PACIAZ_HI_hints + COMPARE_PREFIX(dci(0xd503233f), "paciasp"); // PACIASP_HI_hints + COMPARE_PREFIX(dci(0xd503235f), "pacibz"); // PACIBZ_HI_hints + COMPARE_PREFIX(dci(0xd503237f), "pacibsp"); // PACIBSP_HI_hints + COMPARE_PREFIX(dci(0xd503239f), "autiaz"); // AUTIAZ_HI_hints + COMPARE_PREFIX(dci(0xd50323bf), "autiasp"); // AUTIASP_HI_hints + COMPARE_PREFIX(dci(0xd50323df), "autibz"); // AUTIBZ_HI_hints + COMPARE_PREFIX(dci(0xd50323ff), "autibsp"); // AUTIBSP_HI_hints + COMPARE_PREFIX(dci(0xd61f081f), "braaz"); // BRAAZ_64_branch_reg + COMPARE_PREFIX(dci(0xd61f0c1f), "brabz"); // BRABZ_64_branch_reg + COMPARE_PREFIX(dci(0xd63f081f), "blraaz"); // BLRAAZ_64_branch_reg + COMPARE_PREFIX(dci(0xd63f0c1f), "blrabz"); // BLRABZ_64_branch_reg + COMPARE_PREFIX(dci(0xd65f0bff), "retaa"); // RETAA_64E_branch_reg + COMPARE_PREFIX(dci(0xd65f0fff), "retab"); // RETAB_64E_branch_reg + // COMPARE_PREFIX(dci(0xd69f0bff), "eretaa"); // ERETAA_64E_branch_reg + // COMPARE_PREFIX(dci(0xd69f0fff), "eretab"); // ERETAB_64E_branch_reg + COMPARE_PREFIX(dci(0xd71f0800), "braa"); // BRAA_64P_branch_reg + COMPARE_PREFIX(dci(0xd71f0c00), "brab"); // BRAB_64P_branch_reg + COMPARE_PREFIX(dci(0xd73f0800), "blraa"); // BLRAA_64P_branch_reg + COMPARE_PREFIX(dci(0xd73f0c00), "blrab"); // BLRAB_64P_branch_reg + COMPARE_PREFIX(dci(0xdac10000), "pacia"); // PACIA_64P_dp_1src + COMPARE_PREFIX(dci(0xdac10400), "pacib"); // PACIB_64P_dp_1src + COMPARE_PREFIX(dci(0xdac10800), "pacda"); // PACDA_64P_dp_1src + COMPARE_PREFIX(dci(0xdac10c00), "pacdb"); // PACDB_64P_dp_1src + COMPARE_PREFIX(dci(0xdac11000), "autia"); // AUTIA_64P_dp_1src + COMPARE_PREFIX(dci(0xdac11400), "autib"); // AUTIB_64P_dp_1src + COMPARE_PREFIX(dci(0xdac11800), "autda"); // AUTDA_64P_dp_1src + COMPARE_PREFIX(dci(0xdac11c00), "autdb"); // AUTDB_64P_dp_1src + COMPARE_PREFIX(dci(0xdac123e0), "paciza"); // PACIZA_64Z_dp_1src + COMPARE_PREFIX(dci(0xdac127e0), "pacizb"); // PACIZB_64Z_dp_1src + COMPARE_PREFIX(dci(0xdac12be0), "pacdza"); // PACDZA_64Z_dp_1src + COMPARE_PREFIX(dci(0xdac12fe0), "pacdzb"); // PACDZB_64Z_dp_1src + COMPARE_PREFIX(dci(0xdac133e0), "autiza"); // AUTIZA_64Z_dp_1src + COMPARE_PREFIX(dci(0xdac137e0), "autizb"); // AUTIZB_64Z_dp_1src + COMPARE_PREFIX(dci(0xdac13be0), "autdza"); // AUTDZA_64Z_dp_1src + COMPARE_PREFIX(dci(0xdac13fe0), "autdzb"); // AUTDZB_64Z_dp_1src + COMPARE_PREFIX(dci(0xdac143e0), "xpaci"); // XPACI_64Z_dp_1src + COMPARE_PREFIX(dci(0xdac147e0), "xpacd"); // XPACD_64Z_dp_1src + COMPARE_PREFIX(dci(0xf8200400), "ldraa"); // LDRAA_64_ldst_pac + COMPARE_PREFIX(dci(0xf8200c00), "ldraa"); // LDRAA_64W_ldst_pac + COMPARE_PREFIX(dci(0xf8a00400), "ldrab"); // LDRAB_64_ldst_pac + COMPARE_PREFIX(dci(0xf8a00c00), "ldrab"); // LDRAB_64W_ldst_pac + + // ARMv8.4 - FlagM + COMPARE_PREFIX(dci(0x3a00080d), "setf8"); // SETF8_only_setf + COMPARE_PREFIX(dci(0x3a00480d), "setf16"); // SETF16_only_setf + COMPARE_PREFIX(dci(0xba000400), "rmif"); // RMIF_only_rmif + COMPARE_PREFIX(dci(0xd500401f), "cfinv"); // CFINV_M_pstate + + // ARMv8.4 - LRCPC2 + COMPARE_PREFIX(dci(0x19000000), "stlurb"); // STLURB_32_ldapstl_unscaled + COMPARE_PREFIX(dci(0x19400000), "ldapurb"); // LDAPURB_32_ldapstl_unscaled + COMPARE_PREFIX(dci(0x19800000), "ldapursb"); // LDAPURSB_64_ldapstl_unscaled + COMPARE_PREFIX(dci(0x19c00000), "ldapursb"); // LDAPURSB_32_ldapstl_unscaled + COMPARE_PREFIX(dci(0x59000000), "stlurh"); // STLURH_32_ldapstl_unscaled + COMPARE_PREFIX(dci(0x59400000), "ldapurh"); // LDAPURH_32_ldapstl_unscaled + COMPARE_PREFIX(dci(0x59800000), "ldapursh"); // LDAPURSH_64_ldapstl_unscaled + COMPARE_PREFIX(dci(0x59c00000), "ldapursh"); // LDAPURSH_32_ldapstl_unscaled + COMPARE_PREFIX(dci(0x99000000), "stlur"); // STLUR_32_ldapstl_unscaled + COMPARE_PREFIX(dci(0x99400000), "ldapur"); // LDAPUR_32_ldapstl_unscaled + COMPARE_PREFIX(dci(0x99800000), "ldapursw"); // LDAPURSW_64_ldapstl_unscaled + COMPARE_PREFIX(dci(0xd9000000), "stlur"); // STLUR_64_ldapstl_unscaled + COMPARE_PREFIX(dci(0xd9400000), "ldapur"); // LDAPUR_64_ldapstl_unscaled + + // ARMv8.4 - TRF + // COMPARE_PREFIX(dci(0xd503225f), "tsb"); // TSB_HC_hints + + // ARMv8.5 - BTI + COMPARE_PREFIX(dci(0xd503241f), "bti"); // BTI_HB_hints + + // ARMv8.5 - FRINTTS + COMPARE_PREFIX(dci(0x0e21e800), "frint32z"); // FRINT32Z_asimdmisc_R + COMPARE_PREFIX(dci(0x0e21f800), "frint64z"); // FRINT64Z_asimdmisc_R + COMPARE_PREFIX(dci(0x2e21e800), "frint32x"); // FRINT32X_asimdmisc_R + COMPARE_PREFIX(dci(0x2e21f800), "frint64x"); // FRINT64X_asimdmisc_R + COMPARE_PREFIX(dci(0x1e284000), "frint32z"); // FRINT32Z_S_floatdp1 + COMPARE_PREFIX(dci(0x1e28c000), "frint32x"); // FRINT32X_S_floatdp1 + COMPARE_PREFIX(dci(0x1e294000), "frint64z"); // FRINT64Z_S_floatdp1 + COMPARE_PREFIX(dci(0x1e29c000), "frint64x"); // FRINT64X_S_floatdp1 + COMPARE_PREFIX(dci(0x1e684000), "frint32z"); // FRINT32Z_D_floatdp1 + COMPARE_PREFIX(dci(0x1e68c000), "frint32x"); // FRINT32X_D_floatdp1 + COMPARE_PREFIX(dci(0x1e694000), "frint64z"); // FRINT64Z_D_floatdp1 + COMPARE_PREFIX(dci(0x1e69c000), "frint64x"); // FRINT64X_D_floatdp1 + + // ARMv8.5 - FlagM2 + COMPARE_PREFIX(dci(0xd500403f), "xaflag"); // XAFLAG_M_pstate + COMPARE_PREFIX(dci(0xd500405f), "axflag"); // AXFLAG_M_pstate + + // ARMv8.5 - MTE + // COMPARE_PREFIX(dci(0x68800000), "stgp"); // STGP_64_ldstpair_post + // COMPARE_PREFIX(dci(0x69000000), "stgp"); // STGP_64_ldstpair_off + // COMPARE_PREFIX(dci(0x69800000), "stgp"); // STGP_64_ldstpair_pre + // COMPARE_PREFIX(dci(0x91800000), "addg"); // ADDG_64_addsub_immtags + // COMPARE_PREFIX(dci(0x9ac00000), "subp"); // SUBP_64S_dp_2src + // COMPARE_PREFIX(dci(0x9ac01000), "irg"); // IRG_64I_dp_2src + // COMPARE_PREFIX(dci(0x9ac01400), "gmi"); // GMI_64G_dp_2src + // COMPARE_PREFIX(dci(0xbac00000), "subps"); // SUBPS_64S_dp_2src + // COMPARE_PREFIX(dci(0xd1800000), "subg"); // SUBG_64_addsub_immtags + // COMPARE_PREFIX(dci(0xd9200400), "stg"); // STG_64Spost_ldsttags + // COMPARE_PREFIX(dci(0xd9200800), "stg"); // STG_64Soffset_ldsttags + // COMPARE_PREFIX(dci(0xd9200c00), "stg"); // STG_64Spre_ldsttags + // COMPARE_PREFIX(dci(0xd9600000), "ldg"); // LDG_64Loffset_ldsttags + // COMPARE_PREFIX(dci(0xd9600400), "stzg"); // STZG_64Spost_ldsttags + // COMPARE_PREFIX(dci(0xd9600800), "stzg"); // STZG_64Soffset_ldsttags + // COMPARE_PREFIX(dci(0xd9600c00), "stzg"); // STZG_64Spre_ldsttags + // COMPARE_PREFIX(dci(0xd9a00400), "st2g"); // ST2G_64Spost_ldsttags + // COMPARE_PREFIX(dci(0xd9a00800), "st2g"); // ST2G_64Soffset_ldsttags + // COMPARE_PREFIX(dci(0xd9a00c00), "st2g"); // ST2G_64Spre_ldsttags + // COMPARE_PREFIX(dci(0xd9e00400), "stz2g"); // STZ2G_64Spost_ldsttags + // COMPARE_PREFIX(dci(0xd9e00800), "stz2g"); // STZ2G_64Soffset_ldsttags + // COMPARE_PREFIX(dci(0xd9e00c00), "stz2g"); // STZ2G_64Spre_ldsttags + + // ARMv8.5 - MTE2 + // COMPARE_PREFIX(dci(0xd9200000), "stzgm"); // STZGM_64bulk_ldsttags + // COMPARE_PREFIX(dci(0xd9a00000), "stgm"); // STGM_64bulk_ldsttags + // COMPARE_PREFIX(dci(0xd9e00000), "ldgm"); // LDGM_64bulk_ldsttags + + // ARMv8.6 - BF16 + // COMPARE_PREFIX(dci(0x0ea16800), "bfcvtn"); // BFCVTN_asimdmisc_4S + // COMPARE_PREFIX(dci(0x0f40f000), "bfdot"); // BFDOT_asimdelem_E + // COMPARE_PREFIX(dci(0x0fc0f000), "bfmlal"); // BFMLAL_asimdelem_F + // COMPARE_PREFIX(dci(0x2e40fc00), "bfdot"); // BFDOT_asimdsame2_D + // COMPARE_PREFIX(dci(0x2ec0fc00), "bfmlal"); // BFMLAL_asimdsame2_F_ + // COMPARE_PREFIX(dci(0x1e634000), "bfcvt"); // BFCVT_BS_floatdp1 + // COMPARE_PREFIX(dci(0x6e40ec00), "bfmmla"); // BFMMLA_asimdsame2_E + + // ARMv8.6 - DGH + // COMPARE_PREFIX(dci(0xd50320df), "dgh"); // DGH_HI_hints + + // ARMv8.6 - I8MM + COMPARE_PREFIX(dci(0x0e809c00), "usdot"); // USDOT_asimdsame2_D + COMPARE_PREFIX(dci(0x0f00f000), "sudot"); // SUDOT_asimdelem_D + COMPARE_PREFIX(dci(0x0f80f000), "usdot"); // USDOT_asimdelem_D + COMPARE_PREFIX(dci(0x4e80a400), "smmla"); // SMMLA_asimdsame2_G + COMPARE_PREFIX(dci(0x4e80ac00), "usmmla"); // USMMLA_asimdsame2_G + COMPARE_PREFIX(dci(0x6e80a400), "ummla"); // UMMLA_asimdsame2_G + + // ARMv8.7 - LS64 + // COMPARE_PREFIX(dci(0xf83f9000), "st64b"); // ST64B_64L_memop + // COMPARE_PREFIX(dci(0xf83fd000), "ld64b"); // LD64B_64L_memop + + // ARMv8.7 - LS64_V + // COMPARE_PREFIX(dci(0xf820a000), "st64bv0"); // ST64BV0_64_memop + // COMPARE_PREFIX(dci(0xf820b000), "st64bv"); // ST64BV_64_memop + + // ARMv8.7 - WFxT + // COMPARE_PREFIX(dci(0xd5031000), "wfet"); // WFET_only_systeminstrswithreg + // COMPARE_PREFIX(dci(0xd5031020), "wfit"); // WFIT_only_systeminstrswithreg + + // TME + // COMPARE_PREFIX(dci(0xd4600000), "tcancel"); // TCANCEL_EX_exception + // COMPARE_PREFIX(dci(0xd503307f), "tcommit"); // TCOMMIT_only_barriers + // COMPARE_PREFIX(dci(0xd5233060), "tstart"); // TSTART_BR_systemresult + // COMPARE_PREFIX(dci(0xd5233160), "ttest"); // TTEST_BR_systemresult + + CLEANUP(); +} + } // namespace aarch64 } // namespace vixl diff --git a/test/aarch64/test-disasm-aarch64.h b/test/aarch64/test-disasm-aarch64.h index 9e16bc3d..5c65e8df 100644 --- a/test/aarch64/test-disasm-aarch64.h +++ b/test/aarch64/test-disasm-aarch64.h @@ -54,9 +54,10 @@ do { \ printf("----\n"); \ PrintDisassembler print_disasm(stdout); \ - Instruction* start = masm.GetBuffer()->GetStartAddress<Instruction*>(); \ - Instruction* end = masm.GetBuffer()->GetEndAddress<Instruction*>(); \ - print_disasm.DisassembleBuffer(start, end); \ + Instruction* dis_start = \ + masm.GetBuffer()->GetStartAddress<Instruction*>(); \ + Instruction* dis_end = masm.GetBuffer()->GetEndAddress<Instruction*>(); \ + print_disasm.DisassembleBuffer(dis_start, dis_end); \ } while (0) #define COMPARE(ASM, EXP) \ diff --git a/test/aarch64/test-disasm-neon-aarch64.cc b/test/aarch64/test-disasm-neon-aarch64.cc index a8e91e95..17957f5e 100644 --- a/test/aarch64/test-disasm-neon-aarch64.cc +++ b/test/aarch64/test-disasm-neon-aarch64.cc @@ -1745,6 +1745,11 @@ TEST(neon_3same) { COMPARE_MACRO(Udot(v1.V4S(), v2.V16B(), v3.V16B()), "udot v1.4s, v2.16b, v3.16b"); + COMPARE_MACRO(Usdot(v7.V2S(), v9.V8B(), v30.V8B()), + "usdot v7.2s, v9.8b, v30.8b"); + COMPARE_MACRO(Usdot(v7.V4S(), v9.V16B(), v30.V16B()), + "usdot v7.4s, v9.16b, v30.16b"); + COMPARE_MACRO(And(v6.V8B(), v7.V8B(), v8.V8B()), "and v6.8b, v7.8b, v8.8b"); COMPARE_MACRO(And(v6.V16B(), v7.V16B(), v8.V16B()), "and v6.16b, v7.16b, v8.16b"); @@ -2425,6 +2430,15 @@ TEST(neon_byelement) { COMPARE_MACRO(Fmlsl2(v28.V4S(), v28.V4H(), v7.H(), 0), "fmlsl2 v28.4s, v28.4h, v7.h[0]"); + COMPARE_MACRO(Sudot(v10.V2S(), v21.V8B(), v31.S4B(), 0), + "sudot v10.2s, v21.8b, v31.4b[0]"); + COMPARE_MACRO(Sudot(v12.V4S(), v23.V16B(), v16.S4B(), 3), + "sudot v12.4s, v23.16b, v16.4b[3]"); + COMPARE_MACRO(Usdot(v10.V2S(), v21.V8B(), v31.S4B(), 0), + "usdot v10.2s, v21.8b, v31.4b[0]"); + COMPARE_MACRO(Usdot(v12.V4S(), v23.V16B(), v16.S4B(), 3), + "usdot v12.4s, v23.16b, v16.4b[3]"); + CLEANUP(); } @@ -2434,56 +2448,64 @@ TEST(neon_fp_byelement) { COMPARE_MACRO(Fmul(v0.V4H(), v1.V4H(), v2.H(), 0), "fmul v0.4h, v1.4h, v2.h[0]"); - COMPARE_MACRO(Fmul(v2.V8H(), v3.V8H(), v15.H(), 3), - "fmul v2.8h, v3.8h, v15.h[3]"); + COMPARE_MACRO(Fmul(v2.V8H(), v3.V8H(), v15.H(), 7), + "fmul v2.8h, v3.8h, v15.h[7]"); COMPARE_MACRO(Fmul(v0.V2S(), v1.V2S(), v2.S(), 0), "fmul v0.2s, v1.2s, v2.s[0]"); COMPARE_MACRO(Fmul(v2.V4S(), v3.V4S(), v15.S(), 3), "fmul v2.4s, v3.4s, v15.s[3]"); COMPARE_MACRO(Fmul(v0.V2D(), v1.V2D(), v2.D(), 0), "fmul v0.2d, v1.2d, v2.d[0]"); + COMPARE_MACRO(Fmul(v0.V2D(), v1.V2D(), v2.D(), 1), + "fmul v0.2d, v1.2d, v2.d[1]"); COMPARE_MACRO(Fmul(d0, d1, v2.D(), 0), "fmul d0, d1, v2.d[0]"); COMPARE_MACRO(Fmul(s0, s1, v2.S(), 0), "fmul s0, s1, v2.s[0]"); COMPARE_MACRO(Fmul(h0, h1, v2.H(), 0), "fmul h0, h1, v2.h[0]"); COMPARE_MACRO(Fmla(v0.V4H(), v1.V4H(), v2.H(), 0), "fmla v0.4h, v1.4h, v2.h[0]"); - COMPARE_MACRO(Fmla(v2.V8H(), v3.V8H(), v15.H(), 3), - "fmla v2.8h, v3.8h, v15.h[3]"); + COMPARE_MACRO(Fmla(v2.V8H(), v3.V8H(), v15.H(), 7), + "fmla v2.8h, v3.8h, v15.h[7]"); COMPARE_MACRO(Fmla(v0.V2S(), v1.V2S(), v2.S(), 0), "fmla v0.2s, v1.2s, v2.s[0]"); COMPARE_MACRO(Fmla(v2.V4S(), v3.V4S(), v15.S(), 3), "fmla v2.4s, v3.4s, v15.s[3]"); COMPARE_MACRO(Fmla(v0.V2D(), v1.V2D(), v2.D(), 0), "fmla v0.2d, v1.2d, v2.d[0]"); + COMPARE_MACRO(Fmla(v0.V2D(), v1.V2D(), v2.D(), 1), + "fmla v0.2d, v1.2d, v2.d[1]"); COMPARE_MACRO(Fmla(d0, d1, v2.D(), 0), "fmla d0, d1, v2.d[0]"); COMPARE_MACRO(Fmla(s0, s1, v2.S(), 0), "fmla s0, s1, v2.s[0]"); COMPARE_MACRO(Fmla(h0, h1, v2.H(), 0), "fmla h0, h1, v2.h[0]"); COMPARE_MACRO(Fmls(v0.V4H(), v1.V4H(), v2.H(), 0), "fmls v0.4h, v1.4h, v2.h[0]"); - COMPARE_MACRO(Fmls(v2.V8H(), v3.V8H(), v15.H(), 3), - "fmls v2.8h, v3.8h, v15.h[3]"); + COMPARE_MACRO(Fmls(v2.V8H(), v3.V8H(), v15.H(), 7), + "fmls v2.8h, v3.8h, v15.h[7]"); COMPARE_MACRO(Fmls(v0.V2S(), v1.V2S(), v2.S(), 0), "fmls v0.2s, v1.2s, v2.s[0]"); COMPARE_MACRO(Fmls(v2.V4S(), v3.V4S(), v15.S(), 3), "fmls v2.4s, v3.4s, v15.s[3]"); COMPARE_MACRO(Fmls(v0.V2D(), v1.V2D(), v2.D(), 0), "fmls v0.2d, v1.2d, v2.d[0]"); + COMPARE_MACRO(Fmls(v0.V2D(), v1.V2D(), v2.D(), 1), + "fmls v0.2d, v1.2d, v2.d[1]"); COMPARE_MACRO(Fmls(d0, d1, v2.D(), 0), "fmls d0, d1, v2.d[0]"); COMPARE_MACRO(Fmls(s0, s1, v2.S(), 0), "fmls s0, s1, v2.s[0]"); COMPARE_MACRO(Fmls(h0, h1, v2.H(), 0), "fmls h0, h1, v2.h[0]"); COMPARE_MACRO(Fmulx(v0.V4H(), v1.V4H(), v2.H(), 0), "fmulx v0.4h, v1.4h, v2.h[0]"); - COMPARE_MACRO(Fmulx(v2.V8H(), v3.V8H(), v15.H(), 3), - "fmulx v2.8h, v3.8h, v15.h[3]"); + COMPARE_MACRO(Fmulx(v2.V8H(), v3.V8H(), v15.H(), 7), + "fmulx v2.8h, v3.8h, v15.h[7]"); COMPARE_MACRO(Fmulx(v0.V2S(), v1.V2S(), v2.S(), 0), "fmulx v0.2s, v1.2s, v2.s[0]"); COMPARE_MACRO(Fmulx(v2.V4S(), v3.V4S(), v8.S(), 3), "fmulx v2.4s, v3.4s, v8.s[3]"); COMPARE_MACRO(Fmulx(v0.V2D(), v1.V2D(), v2.D(), 0), "fmulx v0.2d, v1.2d, v2.d[0]"); + COMPARE_MACRO(Fmulx(v0.V2D(), v1.V2D(), v2.D(), 1), + "fmulx v0.2d, v1.2d, v2.d[1]"); COMPARE_MACRO(Fmulx(d0, d1, v2.D(), 0), "fmulx d0, d1, v2.d[0]"); COMPARE_MACRO(Fmulx(s0, s1, v2.S(), 0), "fmulx s0, s1, v2.s[0]"); COMPARE_MACRO(Fmulx(h0, h1, v2.H(), 0), "fmulx h0, h1, v2.h[0]"); @@ -3099,6 +3121,13 @@ TEST(neon_modimm) { COMPARE_MACRO(Movi(v1.V2D(), 0xffff0000ffffff), "movi v1.2d, #0xffff0000ffffff"); + COMPARE_MACRO(Movi(v2.V2D(), 0xff00ff00ff00ff, 0xff00ff00ff00ff), + "movi v2.2d, #0xff00ff00ff00ff"); + COMPARE_MACRO(Movi(v3.V2D(), 0xffff, 0xff00ff00ff00ff), + "movi v3.2d, #0xff00ff00ff00ff\n" + "mov x16, #0xffff\n" + "mov v3.d[1], x16"); + COMPARE_MACRO(Fmov(v0.V2S(), 1.0f), "fmov v0.2s, #0x70 (1.0000)"); COMPARE_MACRO(Fmov(v31.V2S(), -13.0f), "fmov v31.2s, #0xaa (-13.0000)"); COMPARE_MACRO(Fmov(v0.V4S(), 1.0f), "fmov v0.4s, #0x70 (1.0000)"); @@ -3135,9 +3164,9 @@ TEST(neon_2regmisc) { COMPARE_MACRO(Shll2(v6.V2D(), v4.V4S(), 32), "shll2 v6.2d, v4.4s, #32"); // An unallocated form of shll. - COMPARE(dci(0x2ee13bff), "unallocated (NEON2RegMisc)"); + COMPARE(dci(0x2ee13bff), "unallocated (Unallocated)"); // An unallocated form of shll2. - COMPARE(dci(0x6ee13bff), "unallocated (NEON2RegMisc)"); + COMPARE(dci(0x6ee13bff), "unallocated (Unallocated)"); #define DISASM_INST(M, S) \ COMPARE_MACRO(Cmeq(v0.M, v1.M, 0), "cmeq v0." S ", v1." S ", #0"); @@ -4350,5 +4379,299 @@ TEST(neon_shift_immediate) { CLEANUP(); } +TEST(neon_matmul) { + SETUP(); + + COMPARE_MACRO(Smmla(v0.V4S(), v1.V16B(), v2.V16B()), + "smmla v0.4s, v1.16b, v2.16b"); + COMPARE_MACRO(Ummla(v20.V4S(), v30.V16B(), v31.V16B()), + "ummla v20.4s, v30.16b, v31.16b"); + COMPARE_MACRO(Usmmla(v3.V4S(), v29.V16B(), v13.V16B()), + "usmmla v3.4s, v29.16b, v13.16b"); + + CLEANUP(); +} + +TEST(neon_unallocated_regression_test) { + SETUP(); + + COMPARE_PREFIX(dci(0x5e20b985), "unallocated"); // abs b, b + COMPARE_PREFIX(dci(0x5e60b8e3), "unallocated"); // abs h, h + COMPARE_PREFIX(dci(0x5ea0b8d1), "unallocated"); // abs s, s + COMPARE_PREFIX(dci(0x5e318764), "unallocated"); // add b, b, b + COMPARE_PREFIX(dci(0x5e7f877a), "unallocated"); // add h, h, h + COMPARE_PREFIX(dci(0x5eb8842b), "unallocated"); // add s, s, s + COMPARE_PREFIX(dci(0x5eb1bbb7), "unallocated"); // addp s, v.s + COMPARE_PREFIX(dci(0x4ef1b90c), "unallocated"); // addv d, v.d + COMPARE_PREFIX(dci(0x0ef1babc), "unallocated"); // addv d, v.und + COMPARE_PREFIX(dci(0x4ee04a0f), "unallocated"); // cls v.d, v.d + COMPARE_PREFIX(dci(0x6ee048ef), "unallocated"); // clz v.d, v.d + COMPARE_PREFIX(dci(0x5e2099b4), "unallocated"); // cmeq b, b, # + COMPARE_PREFIX(dci(0x7e3a8c75), "unallocated"); // cmeq b, b, b + COMPARE_PREFIX(dci(0x5e6099a2), "unallocated"); // cmeq h, h, # + COMPARE_PREFIX(dci(0x7e7e8ce2), "unallocated"); // cmeq h, h, h + COMPARE_PREFIX(dci(0x5ea09a20), "unallocated"); // cmeq s, s, # + COMPARE_PREFIX(dci(0x7ea98fbd), "unallocated"); // cmeq s, s, s + COMPARE_PREFIX(dci(0x7e208ad0), "unallocated"); // cmge b, b, # + COMPARE_PREFIX(dci(0x5e233f3f), "unallocated"); // cmge b, b, b + COMPARE_PREFIX(dci(0x7e608b4e), "unallocated"); // cmge h, h, # + COMPARE_PREFIX(dci(0x5e643f87), "unallocated"); // cmge h, h, h + COMPARE_PREFIX(dci(0x7ea08b3c), "unallocated"); // cmge s, s, # + COMPARE_PREFIX(dci(0x5ea63e20), "unallocated"); // cmge s, s, s + COMPARE_PREFIX(dci(0x5e208837), "unallocated"); // cmgt b, b, # + COMPARE_PREFIX(dci(0x5e2f3591), "unallocated"); // cmgt b, b, b + COMPARE_PREFIX(dci(0x5e608bde), "unallocated"); // cmgt h, h, # + COMPARE_PREFIX(dci(0x5e7f377b), "unallocated"); // cmgt h, h, h + COMPARE_PREFIX(dci(0x5ea08813), "unallocated"); // cmgt s, s, # + COMPARE_PREFIX(dci(0x5ead3429), "unallocated"); // cmgt s, s, s + COMPARE_PREFIX(dci(0x7e23373d), "unallocated"); // cmhi b, b, b + COMPARE_PREFIX(dci(0x7e7937c1), "unallocated"); // cmhi h, h, h + COMPARE_PREFIX(dci(0x7ea6361e), "unallocated"); // cmhi s, s, s + COMPARE_PREFIX(dci(0x7e3c3e4a), "unallocated"); // cmhs b, b, b + COMPARE_PREFIX(dci(0x7e653cb8), "unallocated"); // cmhs h, h, h + COMPARE_PREFIX(dci(0x7eb03d39), "unallocated"); // cmhs s, s, s + COMPARE_PREFIX(dci(0x7e209894), "unallocated"); // cmle b, b, # + COMPARE_PREFIX(dci(0x7e609882), "unallocated"); // cmle h, h, # + COMPARE_PREFIX(dci(0x7ea09900), "unallocated"); // cmle s, s, # + COMPARE_PREFIX(dci(0x5e20a808), "unallocated"); // cmlt b, b, # + COMPARE_PREFIX(dci(0x5e60ab1f), "unallocated"); // cmlt h, h, # + COMPARE_PREFIX(dci(0x5ea0ab0d), "unallocated"); // cmlt s, s, # + COMPARE_PREFIX(dci(0x5e218cda), "unallocated"); // cmtst b, b, b + COMPARE_PREFIX(dci(0x5e718ec4), "unallocated"); // cmtst h, h, h + COMPARE_PREFIX(dci(0x5eb38ccd), "unallocated"); // cmtst s, s, s + COMPARE_PREFIX(dci(0x4ee05863), "unallocated"); // cnt v.d, v.d + COMPARE_PREFIX(dci(0x4e605887), "unallocated"); // cnt v.h, v.h + COMPARE_PREFIX(dci(0x4ea05875), "unallocated"); // cnt v.s, v.s + COMPARE_PREFIX(dci(0x0ee05a13), "unallocated"); // cnt v.und, v.und + COMPARE_PREFIX(dci(0x2e0f419d), "unallocated"); // ext v.b, v.b, v.b, # + COMPARE_PREFIX(dci(0x7e216950), "unallocated"); // fcvtxn h, s + COMPARE_PREFIX(dci(0x6e216950), "unallocated"); // fcvtxn v.h, v.s + COMPARE_PREFIX(dci(0x5f08fc37), "unallocated"); // fcvtzs b, b, # + COMPARE_PREFIX(dci(0x4f0cfcb6), "unallocated"); // fcvtzs v.b, v.b, # + COMPARE_PREFIX(dci(0x7f08fed0), "unallocated"); // fcvtzu b, b, # + COMPARE_PREFIX(dci(0x6f0dfc80), "unallocated"); // fcvtzu v.b, v.b, # + COMPARE_PREFIX(dci(0x6e70c813), "unallocated"); // fmaxnmv d, v.d + COMPARE_PREFIX(dci(0x2e70ca53), "unallocated"); // fmaxnmv d, v.und + COMPARE_PREFIX(dci(0x2e30ca65), "unallocated"); // fmaxnmv s, v.s + COMPARE_PREFIX(dci(0x6e70fbfa), "unallocated"); // fmaxv d, v.d + COMPARE_PREFIX(dci(0x2e70fa81), "unallocated"); // fmaxv d, v.und + COMPARE_PREFIX(dci(0x2e30fb23), "unallocated"); // fmaxv s, v.s + COMPARE_PREFIX(dci(0x6ef0c87f), "unallocated"); // fminnmv d, v.d + COMPARE_PREFIX(dci(0x2ef0ca2f), "unallocated"); // fminnmv d, v.und + COMPARE_PREFIX(dci(0x2eb0ca41), "unallocated"); // fminnmv s, v.s + COMPARE_PREFIX(dci(0x6ef0f8ad), "unallocated"); // fminv d, v.d + COMPARE_PREFIX(dci(0x2ef0faed), "unallocated"); // fminv d, v.und + COMPARE_PREFIX(dci(0x2eb0faff), "unallocated"); // fminv s, v.s + COMPARE_PREFIX(dci(0x0fc61a34), "unallocated"); // fmla v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x0fed5909), "unallocated"); // fmls v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x0fd09a0a), "unallocated"); // fmul v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x2fdf99fc), "unallocated"); // fmulx v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x6f310336), "unallocated"); // mla v.b, v.b, v.b[] + COMPARE_PREFIX(dci(0x4efd978f), "unallocated"); // mla v.d, v.d, v.d + COMPARE_PREFIX(dci(0x6fe80bb3), "unallocated"); // mla v.d, v.d, v.d[] + COMPARE_PREFIX(dci(0x2fda0aa2), "unallocated"); // mla v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x6f0f4035), "unallocated"); // mls v.b, v.b, v.b[] + COMPARE_PREFIX(dci(0x6eee95ed), "unallocated"); // mls v.d, v.d, v.d + COMPARE_PREFIX(dci(0x6ffa43fa), "unallocated"); // mls v.d, v.d, v.d[] + COMPARE_PREFIX(dci(0x2ffd4186), "unallocated"); // mls v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x4f2482ac), "unallocated"); // mul v.b, v.b, v.b[] + COMPARE_PREFIX(dci(0x4efc9d87), "unallocated"); // mul v.d, v.d, v.d + COMPARE_PREFIX(dci(0x4fc58321), "unallocated"); // mul v.d, v.d, v.d[] + COMPARE_PREFIX(dci(0x0fef8b9b), "unallocated"); // mul v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x7e20b865), "unallocated"); // neg b, b + COMPARE_PREFIX(dci(0x7e60b853), "unallocated"); // neg h, h + COMPARE_PREFIX(dci(0x7ea0bbfa), "unallocated"); // neg s, s + COMPARE_PREFIX(dci(0x6eea9c50), "unallocated"); // pmul v.d, v.d, v.d + COMPARE_PREFIX(dci(0x2e789e4c), "unallocated"); // pmul v.h, v.h, v.h + COMPARE_PREFIX(dci(0x2ea39e8e), "unallocated"); // pmul v.s, v.s, v.s + COMPARE_PREFIX(dci(0x2efb9dbd), "unallocated"); // pmul v.und, v.und, v.und + COMPARE_PREFIX(dci(0x4eace101), "unallocated"); // pmull v.d, v.s, v.s + COMPARE_PREFIX(dci(0x0e6de3ad), "unallocated"); // pmull v.s, v.h, v.h + COMPARE_PREFIX(dci(0x4ee3e2c0), "unallocated"); // pmull v.und, v.d, v.d + COMPARE_PREFIX(dci(0x0eede060), "unallocated"); // pmull v.und, v.und, v.und + COMPARE_PREFIX(dci(0x6ee00afd), "unallocated"); // rev v.d, v.d + COMPARE_PREFIX(dci(0x4e601975), "unallocated"); // rev v.h, v.h + COMPARE_PREFIX(dci(0x4ea019f3), "unallocated"); // rev v.s, v.s + COMPARE_PREFIX(dci(0x2ee00984), "unallocated"); // rev v.und, v.und + COMPARE_PREFIX(dci(0x4ef07cc9), "unallocated"); // saba v.d, v.d, v.d + COMPARE_PREFIX(dci(0x4ee57554), "unallocated"); // sabd v.d, v.d, v.d + COMPARE_PREFIX(dci(0x0eb03927), "unallocated"); // saddlv d, v.s + COMPARE_PREFIX(dci(0x5f0de4b1), "unallocated"); // scvtf b, b, # + COMPARE_PREFIX(dci(0x4f08e468), "unallocated"); // scvtf v.b, v.b, # + COMPARE_PREFIX(dci(0x4eed07a2), "unallocated"); // shadd v.d, v.d, v.d + COMPARE_PREFIX(dci(0x5f0b54ec), "unallocated"); // shl b, b, # + COMPARE_PREFIX(dci(0x5f1f56d7), "unallocated"); // shl h, h, # + COMPARE_PREFIX(dci(0x5f205498), "unallocated"); // shl s, s, # + COMPARE_PREFIX(dci(0x4ef7256c), "unallocated"); // shsub v.d, v.d, v.d + COMPARE_PREFIX(dci(0x7f095521), "unallocated"); // sli b, b, # + COMPARE_PREFIX(dci(0x7f1d579c), "unallocated"); // sli h, h, # + COMPARE_PREFIX(dci(0x7f21578b), "unallocated"); // sli s, s, # + COMPARE_PREFIX(dci(0x4eeb662a), "unallocated"); // smax v.d, v.d, v.d + COMPARE_PREFIX(dci(0x4ef6a53f), "unallocated"); // smaxp v.d, v.d, v.d + COMPARE_PREFIX(dci(0x4ef0aa5e), "unallocated"); // smaxv d, v.d + COMPARE_PREFIX(dci(0x0eb0ab90), "unallocated"); // smaxv s, v.s + COMPARE_PREFIX(dci(0x4eeb6d0c), "unallocated"); // smin v.d, v.d, v.d + COMPARE_PREFIX(dci(0x4ee4ac0a), "unallocated"); // sminp v.d, v.d, v.d + COMPARE_PREFIX(dci(0x4ef1aab8), "unallocated"); // sminv d, v.d + COMPARE_PREFIX(dci(0x0eb1a951), "unallocated"); // sminv s, v.s + COMPARE_PREFIX(dci(0x4fd32bd8), "unallocated"); // smlal v.und, v.d, v.d[] + COMPARE_PREFIX(dci(0x0ff32a9e), "unallocated"); // smlal v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x4ffa6aad), "unallocated"); // smlsl v.und, v.d, v.d[] + COMPARE_PREFIX(dci(0x0ff56af4), "unallocated"); // smlsl v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x0e182f6f), "unallocated"); // smov w, v.d[] + COMPARE_PREFIX(dci(0x0e042d84), "unallocated"); // smov w, v.s[] + COMPARE_PREFIX(dci(0x4e082c53), "unallocated"); // smov x, v.d[] + COMPARE_PREFIX(dci(0x4fcfa8ed), "unallocated"); // smull v.und, v.d, v.d[] + COMPARE_PREFIX(dci(0x0fdba861), "unallocated"); // smull v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x5e2f93e4), "unallocated"); // sqdmlal h, b, b + COMPARE_PREFIX(dci(0x5f113b27), "unallocated"); // sqdmlal h, b, v.b[] + COMPARE_PREFIX(dci(0x5fff3b58), "unallocated"); // sqdmlal undd, d, v.d[] + COMPARE_PREFIX(dci(0x0e2491d8), "unallocated"); // sqdmlal v.h, v.b, v.b + COMPARE_PREFIX(dci(0x4fdb3b2a), "unallocated"); // sqdmlal v.und, v.d, v.d[] + COMPARE_PREFIX(dci(0x0ffc3a4a), + "unallocated"); // sqdmlal v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x5e3eb3a7), "unallocated"); // sqdmlsl h, b, b + COMPARE_PREFIX(dci(0x5f337121), "unallocated"); // sqdmlsl h, b, v.b[] + COMPARE_PREFIX(dci(0x5fd378ae), "unallocated"); // sqdmlsl undd, d, v.d[] + COMPARE_PREFIX(dci(0x4e3eb3a7), "unallocated"); // sqdmlsl v.h, v.b, v.b + COMPARE_PREFIX(dci(0x4fda78c2), "unallocated"); // sqdmlsl v.und, v.d, v.d[] + COMPARE_PREFIX(dci(0x0ff279b3), + "unallocated"); // sqdmlsl v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x5e34b7b7), "unallocated"); // sqdmulh b, b, b + COMPARE_PREFIX(dci(0x5f16c106), "unallocated"); // sqdmulh b, b, v.b[] + COMPARE_PREFIX(dci(0x5ef8b447), "unallocated"); // sqdmulh d, d, d + COMPARE_PREFIX(dci(0x5fc5c113), "unallocated"); // sqdmulh d, d, v.d[] + COMPARE_PREFIX(dci(0x4e33b6cd), "unallocated"); // sqdmulh v.b, v.b, v.b + COMPARE_PREFIX(dci(0x4f3bc21e), "unallocated"); // sqdmulh v.b, v.b, v.b[] + COMPARE_PREFIX(dci(0x4eefb738), "unallocated"); // sqdmulh v.d, v.d, v.d + COMPARE_PREFIX(dci(0x4ff2cb2b), "unallocated"); // sqdmulh v.d, v.d, v.d[] + COMPARE_PREFIX(dci(0x0feacbdd), + "unallocated"); // sqdmulh v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x5e3ed2e8), "unallocated"); // sqdmull h, b, b + COMPARE_PREFIX(dci(0x5f23b842), "unallocated"); // sqdmull h, b, v.b[] + COMPARE_PREFIX(dci(0x5fc8ba56), "unallocated"); // sqdmull undd, d, v.d[] + COMPARE_PREFIX(dci(0x4e38d125), "unallocated"); // sqdmull v.h, v.b, v.b + COMPARE_PREFIX(dci(0x4ff5b8b3), "unallocated"); // sqdmull v.und, v.d, v.d[] + COMPARE_PREFIX(dci(0x0fdcbac8), + "unallocated"); // sqdmull v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x7fcdd950), "unallocated"); // sqrdmlah d, d, v.d[] + COMPARE_PREFIX(dci(0x6fd6d80f), "unallocated"); // sqrdmlah v.d, v.d, v.d[] + COMPARE_PREFIX(dci(0x2fecdae5), + "unallocated"); // sqrdmlah v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x7fe0f992), "unallocated"); // sqrdmlsh d, d, v.d[] + COMPARE_PREFIX(dci(0x6ff1f9df), "unallocated"); // sqrdmlsh v.d, v.d, v.d[] + COMPARE_PREFIX(dci(0x2fcdfad1), + "unallocated"); // sqrdmlsh v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x7e23b7fa), "unallocated"); // sqrdmulh b, b, b + COMPARE_PREFIX(dci(0x5f1ad272), "unallocated"); // sqrdmulh b, b, v.b[] + COMPARE_PREFIX(dci(0x7ef8b6e0), "unallocated"); // sqrdmulh d, d, d + COMPARE_PREFIX(dci(0x5fd7d2a7), "unallocated"); // sqrdmulh d, d, v.d[] + COMPARE_PREFIX(dci(0x6e23b7fa), "unallocated"); // sqrdmulh v.b, v.b, v.b + COMPARE_PREFIX(dci(0x4f28d32a), "unallocated"); // sqrdmulh v.b, v.b, v.b[] + COMPARE_PREFIX(dci(0x6ef0b702), "unallocated"); // sqrdmulh v.d, v.d, v.d + COMPARE_PREFIX(dci(0x4feddb3f), "unallocated"); // sqrdmulh v.d, v.d, v.d[] + COMPARE_PREFIX(dci(0x0fdddaf3), + "unallocated"); // sqrdmulh v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x5f679d27), "unallocated"); // sqrshrn d, d, # + COMPARE_PREFIX(dci(0x5f4d9f3b), "unallocated"); // sqrshrn d, h, # + COMPARE_PREFIX(dci(0x5f569dfa), "unallocated"); // sqrshrn d, s, # + COMPARE_PREFIX(dci(0x5f449d53), "unallocated"); // sqrshrn d, undn, # + COMPARE_PREFIX(dci(0x4f609da3), "unallocated"); // sqrshrn v.d, v.und, # + COMPARE_PREFIX(dci(0x7f698cee), "unallocated"); // sqrshrun d, d, # + COMPARE_PREFIX(dci(0x7f498fd8), "unallocated"); // sqrshrun d, h, # + COMPARE_PREFIX(dci(0x7f5d8e9a), "unallocated"); // sqrshrun d, s, # + COMPARE_PREFIX(dci(0x7f478e04), "unallocated"); // sqrshrun d, undn, # + COMPARE_PREFIX(dci(0x6f568c7d), "unallocated"); // sqrshrun v.d, v.und, # + COMPARE_PREFIX(dci(0x5f779488), "unallocated"); // sqshrn d, d, # + COMPARE_PREFIX(dci(0x5f4b9715), "unallocated"); // sqshrn d, h, # + COMPARE_PREFIX(dci(0x5f579449), "unallocated"); // sqshrn d, s, # + COMPARE_PREFIX(dci(0x5f4695ac), "unallocated"); // sqshrn d, undn, # + COMPARE_PREFIX(dci(0x4f6096c1), "unallocated"); // sqshrn v.d, v.und, # + COMPARE_PREFIX(dci(0x7f6786d1), "unallocated"); // sqshrun d, d, # + COMPARE_PREFIX(dci(0x7f4884e3), "unallocated"); // sqshrun d, h, # + COMPARE_PREFIX(dci(0x7f5886df), "unallocated"); // sqshrun d, s, # + COMPARE_PREFIX(dci(0x7f448464), "unallocated"); // sqshrun d, undn, # + COMPARE_PREFIX(dci(0x6f5b8674), "unallocated"); // sqshrun v.d, v.und, # + COMPARE_PREFIX(dci(0x4ef2163f), "unallocated"); // srhadd v.d, v.d, v.d + COMPARE_PREFIX(dci(0x7f09475d), "unallocated"); // sri b, b, # + COMPARE_PREFIX(dci(0x7f104771), "unallocated"); // sri h, h, # + COMPARE_PREFIX(dci(0x7f2045b4), "unallocated"); // sri s, s, # + COMPARE_PREFIX(dci(0x5e2656a3), "unallocated"); // srshl b, b, b + COMPARE_PREFIX(dci(0x5e605767), "unallocated"); // srshl h, h, h + COMPARE_PREFIX(dci(0x5eb654c2), "unallocated"); // srshl s, s, s + COMPARE_PREFIX(dci(0x5f0827c2), "unallocated"); // srshr b, b, # + COMPARE_PREFIX(dci(0x5f13249c), "unallocated"); // srshr h, h, # + COMPARE_PREFIX(dci(0x5f3526af), "unallocated"); // srshr s, s, # + COMPARE_PREFIX(dci(0x5f0e34b0), "unallocated"); // srsra b, b, # + COMPARE_PREFIX(dci(0x5f1537ed), "unallocated"); // srsra h, h, # + COMPARE_PREFIX(dci(0x5f3934f2), "unallocated"); // srsra s, s, # + COMPARE_PREFIX(dci(0x5e24470b), "unallocated"); // sshl b, b, b + COMPARE_PREFIX(dci(0x5e624525), "unallocated"); // sshl h, h, h + COMPARE_PREFIX(dci(0x5ea846d6), "unallocated"); // sshl s, s, s + COMPARE_PREFIX(dci(0x5f0a07bc), "unallocated"); // sshr b, b, # + COMPARE_PREFIX(dci(0x5f1d0504), "unallocated"); // sshr h, h, # + COMPARE_PREFIX(dci(0x5f3e059d), "unallocated"); // sshr s, s, # + COMPARE_PREFIX(dci(0x5f0d17ae), "unallocated"); // ssra b, b, # + COMPARE_PREFIX(dci(0x5f1417c2), "unallocated"); // ssra h, h, # + COMPARE_PREFIX(dci(0x5f2214c1), "unallocated"); // ssra s, s, # + COMPARE_PREFIX(dci(0x7e3a8503), "unallocated"); // sub b, b, b + COMPARE_PREFIX(dci(0x7e748657), "unallocated"); // sub h, h, h + COMPARE_PREFIX(dci(0x7eaf844c), "unallocated"); // sub s, s, s + COMPARE_PREFIX(dci(0x6efb7c3c), "unallocated"); // uaba v.d, v.d, v.d + COMPARE_PREFIX(dci(0x6ee2749f), "unallocated"); // uabd v.d, v.d, v.d + COMPARE_PREFIX(dci(0x2eb03807), "unallocated"); // uaddlv d, v.s + COMPARE_PREFIX(dci(0x7f08e671), "unallocated"); // ucvtf b, b, # + COMPARE_PREFIX(dci(0x6f0ee59b), "unallocated"); // ucvtf v.b, v.b, # + COMPARE_PREFIX(dci(0x6eef052d), "unallocated"); // uhadd v.d, v.d, v.d + COMPARE_PREFIX(dci(0x6eef2707), "unallocated"); // uhsub v.d, v.d, v.d + COMPARE_PREFIX(dci(0x6ee6675a), "unallocated"); // umax v.d, v.d, v.d + COMPARE_PREFIX(dci(0x6ef2a430), "unallocated"); // umaxp v.d, v.d, v.d + COMPARE_PREFIX(dci(0x6ef0a8ae), "unallocated"); // umaxv d, v.d + COMPARE_PREFIX(dci(0x2eb0aa70), "unallocated"); // umaxv s, v.s + COMPARE_PREFIX(dci(0x6efd6d23), "unallocated"); // umin v.d, v.d, v.d + COMPARE_PREFIX(dci(0x6ee2accf), "unallocated"); // uminp v.d, v.d, v.d + COMPARE_PREFIX(dci(0x6ef1aa28), "unallocated"); // uminv d, v.d + COMPARE_PREFIX(dci(0x2eb1a831), "unallocated"); // uminv s, v.s + COMPARE_PREFIX(dci(0x6ffa2b0b), "unallocated"); // umlal v.und, v.d, v.d[] + COMPARE_PREFIX(dci(0x2fdb2acd), "unallocated"); // umlal v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x6fe76bb5), "unallocated"); // umlsl v.und, v.d, v.d[] + COMPARE_PREFIX(dci(0x2ff068fb), "unallocated"); // umlsl v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x6fd0a947), "unallocated"); // umull v.und, v.d, v.d[] + COMPARE_PREFIX(dci(0x2fc0a8fb), "unallocated"); // umull v.und, v.und, v.d[] + COMPARE_PREFIX(dci(0x7f6e9c1b), "unallocated"); // uqrshrn d, d, # + COMPARE_PREFIX(dci(0x7f4d9e1b), "unallocated"); // uqrshrn d, h, # + COMPARE_PREFIX(dci(0x7f5e9d48), "unallocated"); // uqrshrn d, s, # + COMPARE_PREFIX(dci(0x7f419d2e), "unallocated"); // uqrshrn d, undn, # + COMPARE_PREFIX(dci(0x6f779e93), "unallocated"); // uqrshrn v.d, v.und, # + COMPARE_PREFIX(dci(0x7f649620), "unallocated"); // uqshrn d, d, # + COMPARE_PREFIX(dci(0x7f4a950b), "unallocated"); // uqshrn d, h, # + COMPARE_PREFIX(dci(0x7f55950e), "unallocated"); // uqshrn d, s, # + COMPARE_PREFIX(dci(0x7f4697b5), "unallocated"); // uqshrn d, undn, # + COMPARE_PREFIX(dci(0x6f749463), "unallocated"); // uqshrn v.d, v.und, # + COMPARE_PREFIX(dci(0x4ee1c88d), "unallocated"); // urecpe v.d, v.d + COMPARE_PREFIX(dci(0x6eed17ff), "unallocated"); // urhadd v.d, v.d, v.d + COMPARE_PREFIX(dci(0x7e30549c), "unallocated"); // urshl b, b, b + COMPARE_PREFIX(dci(0x7e6157c1), "unallocated"); // urshl h, h, h + COMPARE_PREFIX(dci(0x7eb65432), "unallocated"); // urshl s, s, s + COMPARE_PREFIX(dci(0x7f0b2637), "unallocated"); // urshr b, b, # + COMPARE_PREFIX(dci(0x7f13240c), "unallocated"); // urshr h, h, # + COMPARE_PREFIX(dci(0x7f232578), "unallocated"); // urshr s, s, # + COMPARE_PREFIX(dci(0x6ee1ca96), "unallocated"); // ursqrte v.d, v.d + COMPARE_PREFIX(dci(0x7f0a375a), "unallocated"); // ursra b, b, # + COMPARE_PREFIX(dci(0x7f12340f), "unallocated"); // ursra h, h, # + COMPARE_PREFIX(dci(0x7f2f3549), "unallocated"); // ursra s, s, # + COMPARE_PREFIX(dci(0x7e2d47d3), "unallocated"); // ushl b, b, b + COMPARE_PREFIX(dci(0x7e694742), "unallocated"); // ushl h, h, h + COMPARE_PREFIX(dci(0x7eab45db), "unallocated"); // ushl s, s, s + COMPARE_PREFIX(dci(0x7f0d0631), "unallocated"); // ushr b, b, # + COMPARE_PREFIX(dci(0x7f1805a4), "unallocated"); // ushr h, h, # + COMPARE_PREFIX(dci(0x7f2e063a), "unallocated"); // ushr s, s, # + COMPARE_PREFIX(dci(0x7f0c15a4), "unallocated"); // usra b, b, # + COMPARE_PREFIX(dci(0x7f1716c7), "unallocated"); // usra h, h, # + COMPARE_PREFIX(dci(0x7f261749), "unallocated"); // usra s, s, # + + CLEANUP(); +} + } // namespace aarch64 } // namespace vixl diff --git a/test/aarch64/test-disasm-sve-aarch64.cc b/test/aarch64/test-disasm-sve-aarch64.cc index 46128d91..933e808c 100644 --- a/test/aarch64/test-disasm-sve-aarch64.cc +++ b/test/aarch64/test-disasm-sve-aarch64.cc @@ -44,24 +44,81 @@ TEST(sve) { SETUP(); // TODO: Replace these tests when the disassembler is more capable. - COMPARE_PREFIX(asrr(z0.VnB(), p7.Merging(), z0.VnB(), z1.VnB()), - "asrr z0.b, p7/m, z0.b, z1.b"); - COMPARE_PREFIX(fcmeq(p6.VnD(), p7.Zeroing(), z0.VnD(), z1.VnD()), - "fcmeq p6.d, p7/z, z0.d, z1.d"); - COMPARE_PREFIX(mla(z0.VnB(), p7.Merging(), z0.VnB(), z1.VnB()), - "mla z0.b, p7/m, z0.b, z1.b"); - COMPARE_PREFIX(mla(z1.VnS(), p7.Merging(), z1.VnS(), z0.VnS()), - "mla z1.s, p7/m, z1.s, z0.s"); - COMPARE_PREFIX(asr(z1.VnB(), z0.VnB(), 8), "asr z1.b, z0.b, #8"); - COMPARE_PREFIX(lsl(z0.VnH(), z1.VnH(), 15), "lsl z0.h, z1.h, #15"); - COMPARE_PREFIX(lsr(z1.VnS(), z0.VnS(), 32), "lsr z1.s, z0.s, #32"); - COMPARE_PREFIX(and_(p6.VnB(), p7.Zeroing(), p6.VnB(), p7.VnB()), - "and p6.b, p7/z, p6.b, p7.b"); - COMPARE_PREFIX(rev(p7.VnB(), p6.VnB()), "rev p7.b, p6.b"); - COMPARE_PREFIX(splice(z0.VnH(), p7, z0.VnH(), z1.VnH()), - "splice z0.h, p7, z0.h, z1.h"); - COMPARE_PREFIX(fnmad(z0.VnD(), p6.Merging(), z1.VnD(), z0.VnD()), - "fnmad z0.d, p6/m, z1.d, z0.d"); + COMPARE(asrr(z0.VnB(), p7.Merging(), z0.VnB(), z1.VnB()), + "asrr z0.b, p7/m, z0.b, z1.b"); + COMPARE(fcmeq(p6.VnD(), p7.Zeroing(), z0.VnD(), z1.VnD()), + "fcmeq p6.d, p7/z, z0.d, z1.d"); + COMPARE(mla(z0.VnB(), p7.Merging(), z0.VnB(), z1.VnB()), + "mla z0.b, p7/m, z0.b, z1.b"); + COMPARE(mla(z1.VnS(), p7.Merging(), z1.VnS(), z0.VnS()), + "mla z1.s, p7/m, z1.s, z0.s"); + COMPARE(asr(z1.VnB(), z0.VnB(), 8), "asr z1.b, z0.b, #8"); + COMPARE(lsl(z0.VnH(), z1.VnH(), 15), "lsl z0.h, z1.h, #15"); + COMPARE(lsr(z1.VnS(), z0.VnS(), 32), "lsr z1.s, z0.s, #32"); + COMPARE(and_(p6.VnB(), p7.Zeroing(), p6.VnB(), p7.VnB()), + "and p6.b, p7/z, p6.b, p7.b"); + COMPARE(rev(p7.VnB(), p6.VnB()), "rev p7.b, p6.b"); + COMPARE(splice(z0.VnH(), p7, z0.VnH(), z1.VnH()), + "splice z0.h, p7, z0.h, z1.h"); + COMPARE(fnmad(z0.VnD(), p6.Merging(), z1.VnD(), z0.VnD()), + "fnmad z0.d, p6/m, z1.d, z0.d"); + + CLEANUP(); +} + +TEST(sve_unallocated_fp_byte_type) { + // Ensure disassembly of FP instructions does not report byte-sized lanes. + + SETUP(); + + COMPARE_PREFIX(dci(0x650003ca), "unallocated"); + COMPARE_PREFIX(dci(0x6500230b), "unallocated"); + COMPARE_PREFIX(dci(0x6500424c), "unallocated"); + COMPARE_PREFIX(dci(0x6500618d), "unallocated"); + COMPARE_PREFIX(dci(0x6500a00f), "unallocated"); + COMPARE_PREFIX(dci(0x6500de91), "unallocated"); + COMPARE_PREFIX(dci(0x6500fdd2), "unallocated"); + COMPARE_PREFIX(dci(0x65011d13), "unallocated"); + COMPARE_PREFIX(dci(0x65015b95), "unallocated"); + COMPARE_PREFIX(dci(0x65017ad6), "unallocated"); + COMPARE_PREFIX(dci(0x65019a17), "unallocated"); + COMPARE_PREFIX(dci(0x6501b958), "unallocated"); + COMPARE_PREFIX(dci(0x6502941f), "unallocated"); + COMPARE_PREFIX(dci(0x6502b360), "unallocated"); + COMPARE_PREFIX(dci(0x6502d2a1), "unallocated"); + COMPARE_PREFIX(dci(0x65038e27), "unallocated"); + COMPARE_PREFIX(dci(0x6503ad68), "unallocated"); + COMPARE_PREFIX(dci(0x65042a6c), "unallocated"); + COMPARE_PREFIX(dci(0x6504882f), "unallocated"); + COMPARE_PREFIX(dci(0x6504a770), "unallocated"); + COMPARE_PREFIX(dci(0x65052474), "unallocated"); + COMPARE_PREFIX(dci(0x65058237), "unallocated"); + COMPARE_PREFIX(dci(0x65063dbd), "unallocated"); + COMPARE_PREFIX(dci(0x65069b80), "unallocated"); + COMPARE_PREFIX(dci(0x6506bac1), "unallocated"); + COMPARE_PREFIX(dci(0x65071884), "unallocated"); + COMPARE_PREFIX(dci(0x650737c5), "unallocated"); + COMPARE_PREFIX(dci(0x65079588), "unallocated"); + COMPARE_PREFIX(dci(0x6507b4c9), "unallocated"); + COMPARE_PREFIX(dci(0x65088f90), "unallocated"); + COMPARE_PREFIX(dci(0x65090c94), "unallocated"); + COMPARE_PREFIX(dci(0x65098998), "unallocated"); + COMPARE_PREFIX(dci(0x650a83a0), "unallocated"); + COMPARE_PREFIX(dci(0x650c96f1), "unallocated"); + COMPARE_PREFIX(dci(0x650d90f9), "unallocated"); + COMPARE_PREFIX(dci(0x65113a97), "unallocated"); + COMPARE_PREFIX(dci(0x65183010), "unallocated"); + COMPARE_PREFIX(dci(0x65200050), "unallocated"); + COMPARE_PREFIX(dci(0x65203ed2), "unallocated"); + COMPARE_PREFIX(dci(0x65205e13), "unallocated"); + COMPARE_PREFIX(dci(0x65207d54), "unallocated"); + COMPARE_PREFIX(dci(0x65209c95), "unallocated"); + COMPARE_PREFIX(dci(0x6520bbd6), "unallocated"); + COMPARE_PREFIX(dci(0x6520db17), "unallocated"); + COMPARE_PREFIX(dci(0x6520fa58), "unallocated"); + COMPARE_PREFIX(dci(0x650f31e1), "unallocated"); + COMPARE_PREFIX(dci(0x650e30f7), "unallocated"); + COMPARE_PREFIX(dci(0x6511376e), "unallocated"); CLEANUP(); } @@ -69,49 +126,47 @@ TEST(sve) { TEST(sve_address_generation) { SETUP(); -#if 0 - COMPARE_PREFIX(adr(z19.VnD(), z22.VnD(), z11.VnD()), "adr <Zd>.D, [<Zn>.D, <Zm>.D, SXTW{<amount>}]"); - COMPARE_PREFIX(adr(z30.VnD(), z14.VnD(), z24.VnD()), "adr <Zd>.D, [<Zn>.D, <Zm>.D, UXTW{<amount>}]"); - COMPARE_PREFIX(adr(z8.Vn?(), z16.Vn?(), z16.Vn?()), "adr <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>{, <mod> <amount>}]"); -#endif - COMPARE_PREFIX(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW)), - "adr z19.d, [z22.d, z11.d, sxtw]"); - COMPARE_PREFIX(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 1)), - "adr z19.d, [z22.d, z11.d, sxtw #1]"); - COMPARE_PREFIX(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 2)), - "adr z19.d, [z22.d, z11.d, sxtw #2]"); - COMPARE_PREFIX(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 3)), - "adr z19.d, [z22.d, z11.d, sxtw #3]"); - COMPARE_PREFIX(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW)), - "adr z30.d, [z14.d, z16.d, uxtw]"); - COMPARE_PREFIX(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 1)), - "adr z30.d, [z14.d, z16.d, uxtw #1]"); - COMPARE_PREFIX(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 2)), - "adr z30.d, [z14.d, z16.d, uxtw #2]"); - COMPARE_PREFIX(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 3)), - "adr z30.d, [z14.d, z16.d, uxtw #3]"); - COMPARE_PREFIX(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS())), - "adr z8.s, [z16.s, z16.s]"); - COMPARE_PREFIX(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 1)), - "adr z8.s, [z16.s, z16.s, lsl #1]"); - COMPARE_PREFIX(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 2)), - "adr z8.s, [z16.s, z16.s, lsl #2]"); - COMPARE_PREFIX(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 3)), - "adr z8.s, [z16.s, z16.s, lsl #3]"); - COMPARE_PREFIX(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD())), - "adr z9.d, [z1.d, z16.d]"); - COMPARE_PREFIX(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 1)), - "adr z9.d, [z1.d, z16.d, lsl #1]"); - COMPARE_PREFIX(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 2)), - "adr z9.d, [z1.d, z16.d, lsl #2]"); - COMPARE_PREFIX(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 3)), - "adr z9.d, [z1.d, z16.d, lsl #3]"); + COMPARE(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW)), + "adr z19.d, [z22.d, z11.d, sxtw]"); + COMPARE(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 1)), + "adr z19.d, [z22.d, z11.d, sxtw #1]"); + COMPARE(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 2)), + "adr z19.d, [z22.d, z11.d, sxtw #2]"); + COMPARE(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 3)), + "adr z19.d, [z22.d, z11.d, sxtw #3]"); + COMPARE(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW)), + "adr z30.d, [z14.d, z16.d, uxtw]"); + COMPARE(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 1)), + "adr z30.d, [z14.d, z16.d, uxtw #1]"); + COMPARE(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 2)), + "adr z30.d, [z14.d, z16.d, uxtw #2]"); + COMPARE(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 3)), + "adr z30.d, [z14.d, z16.d, uxtw #3]"); + COMPARE(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS())), + "adr z8.s, [z16.s, z16.s]"); + COMPARE(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 1)), + "adr z8.s, [z16.s, z16.s, lsl #1]"); + COMPARE(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 2)), + "adr z8.s, [z16.s, z16.s, lsl #2]"); + COMPARE(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 3)), + "adr z8.s, [z16.s, z16.s, lsl #3]"); + COMPARE(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD())), + "adr z9.d, [z1.d, z16.d]"); + COMPARE(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 1)), + "adr z9.d, [z1.d, z16.d, lsl #1]"); + COMPARE(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 2)), + "adr z9.d, [z1.d, z16.d, lsl #2]"); + COMPARE(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 3)), + "adr z9.d, [z1.d, z16.d, lsl #3]"); CLEANUP(); } TEST(sve_calculate_sve_address) { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" + // Shadow the `MacroAssembler` type so that the test macros work without // modification. typedef CalculateSVEAddressMacroAssembler MacroAssembler; @@ -156,49 +211,47 @@ TEST(sve_calculate_sve_address) { "add x22, sp, x3, lsl #2"); CLEANUP(); + +#pragma GCC diagnostic pop } TEST(sve_bitwise_imm) { SETUP(); // The assembler will necessarily encode an immediate in the simplest bitset. - COMPARE_PREFIX(and_(z2.VnD(), z2.VnD(), 0x0000ffff0000ffff), - "and z2.s, z2.s, #0xffff"); - COMPARE_PREFIX(dupm(z15.VnS(), 0x7f007f00), "dupm z15.h, #0x7f00"); - COMPARE_PREFIX(eor(z26.VnH(), z26.VnH(), 0x7ff8), - "eor z26.h, z26.h, #0x7ff8"); - COMPARE_PREFIX(orr(z13.VnB(), z13.VnB(), 0x78), "orr z13.b, z13.b, #0x78"); + COMPARE(and_(z2.VnD(), z2.VnD(), 0x0000ffff0000ffff), + "and z2.s, z2.s, #0xffff"); + COMPARE(dupm(z15.VnS(), 0x7f007f00), "dupm z15.h, #0x7f00"); + COMPARE(eor(z26.VnH(), z26.VnH(), 0x7ff8), "eor z26.h, z26.h, #0x7ff8"); + COMPARE(orr(z13.VnB(), z13.VnB(), 0x78), "orr z13.b, z13.b, #0x78"); // Logical aliases. - COMPARE_PREFIX(bic(z21.VnD(), z21.VnD(), 0xffff00000000ffff), - "and z21.d, z21.d, #0xffffffff0000"); - COMPARE_PREFIX(eon(z31.VnS(), z31.VnS(), 0x1ffe), - "eor z31.s, z31.s, #0xffffe001"); - COMPARE_PREFIX(orn(z11.VnH(), z11.VnH(), 0x2), "orr z11.h, z11.h, #0xfffd"); + COMPARE(bic(z21.VnD(), z21.VnD(), 0xffff00000000ffff), + "and z21.d, z21.d, #0xffffffff0000"); + COMPARE(eon(z31.VnS(), z31.VnS(), 0x1ffe), "eor z31.s, z31.s, #0xffffe001"); + COMPARE(orn(z11.VnH(), z11.VnH(), 0x2), "orr z11.h, z11.h, #0xfffd"); // Mov alias for dupm. - COMPARE_PREFIX(mov(z0.VnH(), 0xf00f), "mov z0.h, #0xf00f"); + COMPARE(mov(z0.VnH(), 0xf00f), "mov z0.h, #0xf00f"); COMPARE_MACRO(Mov(z11.VnS(), 0xe0000003), "mov z11.s, #0xe0000003"); COMPARE_MACRO(Mov(z22.VnD(), 0x8000), "dupm z22.d, #0x8000"); // Test dupm versus mov disassembly. - COMPARE_PREFIX(dupm(z0.VnH(), 0xfe), "dupm z0.h, #0xfe"); - COMPARE_PREFIX(dupm(z0.VnH(), 0xff), "dupm z0.h, #0xff"); - COMPARE_PREFIX(dupm(z0.VnH(), 0x1fe), "mov z0.h, #0x1fe"); - COMPARE_PREFIX(dupm(z0.VnH(), 0xfe00), "dupm z0.h, #0xfe00"); - COMPARE_PREFIX(dupm(z0.VnH(), 0xfe01), "mov z0.h, #0xfe01"); - COMPARE_PREFIX(dupm(z0.VnS(), 0xfe00), "dupm z0.s, #0xfe00"); - COMPARE_PREFIX(dupm(z0.VnS(), 0xfe000001), "mov z0.s, #0xfe000001"); - COMPARE_PREFIX(dupm(z0.VnS(), 0xffffff00), "dupm z0.s, #0xffffff00"); - COMPARE_PREFIX(dupm(z0.VnS(), 0xffffff01), "dupm z0.s, #0xffffff01"); - COMPARE_PREFIX(dupm(z0.VnS(), 0xfffffe01), "mov z0.s, #0xfffffe01"); - COMPARE_PREFIX(dupm(z0.VnS(), 0xfff), "mov z0.s, #0xfff"); - COMPARE_PREFIX(dupm(z0.VnD(), 0xffffffffffffff00), - "dupm z0.d, #0xffffffffffffff00"); - COMPARE_PREFIX(dupm(z0.VnD(), 0x7fffffffffffff80), - "mov z0.d, #0x7fffffffffffff80"); - COMPARE_PREFIX(dupm(z0.VnD(), 0x8000), "dupm z0.d, #0x8000"); - COMPARE_PREFIX(dupm(z0.VnD(), 0x10000), "mov z0.d, #0x10000"); + COMPARE(dupm(z0.VnH(), 0xfe), "dupm z0.h, #0xfe"); + COMPARE(dupm(z0.VnH(), 0xff), "dupm z0.h, #0xff"); + COMPARE(dupm(z0.VnH(), 0x1fe), "mov z0.h, #0x1fe"); + COMPARE(dupm(z0.VnH(), 0xfe00), "dupm z0.h, #0xfe00"); + COMPARE(dupm(z0.VnH(), 0xfe01), "mov z0.h, #0xfe01"); + COMPARE(dupm(z0.VnS(), 0xfe00), "dupm z0.s, #0xfe00"); + COMPARE(dupm(z0.VnS(), 0xfe000001), "mov z0.s, #0xfe000001"); + COMPARE(dupm(z0.VnS(), 0xffffff00), "dupm z0.s, #0xffffff00"); + COMPARE(dupm(z0.VnS(), 0xffffff01), "dupm z0.s, #0xffffff01"); + COMPARE(dupm(z0.VnS(), 0xfffffe01), "mov z0.s, #0xfffffe01"); + COMPARE(dupm(z0.VnS(), 0xfff), "mov z0.s, #0xfff"); + COMPARE(dupm(z0.VnD(), 0xffffffffffffff00), "dupm z0.d, #0xffffffffffffff00"); + COMPARE(dupm(z0.VnD(), 0x7fffffffffffff80), "mov z0.d, #0x7fffffffffffff80"); + COMPARE(dupm(z0.VnD(), 0x8000), "dupm z0.d, #0x8000"); + COMPARE(dupm(z0.VnD(), 0x10000), "mov z0.d, #0x10000"); CLEANUP(); } @@ -206,15 +259,14 @@ TEST(sve_bitwise_imm) { TEST(sve_bitwise_logical_unpredicated) { SETUP(); - COMPARE_PREFIX(and_(z12.VnD(), z5.VnD(), z29.VnD()), - "and z12.d, z5.d, z29.d"); - COMPARE_PREFIX(bic(z11.VnD(), z15.VnD(), z9.VnD()), "bic z11.d, z15.d, z9.d"); - COMPARE_PREFIX(eor(z9.VnD(), z31.VnD(), z29.VnD()), "eor z9.d, z31.d, z29.d"); - COMPARE_PREFIX(orr(z17.VnD(), z8.VnD(), z19.VnD()), "orr z17.d, z8.d, z19.d"); + COMPARE(and_(z12.VnD(), z5.VnD(), z29.VnD()), "and z12.d, z5.d, z29.d"); + COMPARE(bic(z11.VnD(), z15.VnD(), z9.VnD()), "bic z11.d, z15.d, z9.d"); + COMPARE(eor(z9.VnD(), z31.VnD(), z29.VnD()), "eor z9.d, z31.d, z29.d"); + COMPARE(orr(z17.VnD(), z8.VnD(), z19.VnD()), "orr z17.d, z8.d, z19.d"); // Check mov aliases. - COMPARE_PREFIX(orr(z17.VnD(), z8.VnD(), z8.VnD()), "mov z17.d, z8.d"); - COMPARE_PREFIX(mov(z18, z9), "mov z18.d, z9.d"); + COMPARE(orr(z17.VnD(), z8.VnD(), z8.VnD()), "mov z17.d, z8.d"); + COMPARE(mov(z18, z9), "mov z18.d, z9.d"); COMPARE_MACRO(Mov(z19, z10), "mov z19.d, z10.d"); CLEANUP(); @@ -223,73 +275,73 @@ TEST(sve_bitwise_logical_unpredicated) { TEST(sve_bitwise_shift_predicated) { SETUP(); - COMPARE_PREFIX(asrr(z20.VnB(), p3.Merging(), z20.VnB(), z11.VnB()), - "asrr z20.b, p3/m, z20.b, z11.b"); - COMPARE_PREFIX(asrr(z20.VnH(), p3.Merging(), z20.VnH(), z11.VnH()), - "asrr z20.h, p3/m, z20.h, z11.h"); - COMPARE_PREFIX(asrr(z20.VnS(), p3.Merging(), z20.VnS(), z11.VnS()), - "asrr z20.s, p3/m, z20.s, z11.s"); - COMPARE_PREFIX(asrr(z20.VnD(), p3.Merging(), z20.VnD(), z11.VnD()), - "asrr z20.d, p3/m, z20.d, z11.d"); - COMPARE_PREFIX(asr(z26.VnB(), p2.Merging(), z26.VnB(), z17.VnB()), - "asr z26.b, p2/m, z26.b, z17.b"); - COMPARE_PREFIX(asr(z26.VnH(), p2.Merging(), z26.VnH(), z17.VnH()), - "asr z26.h, p2/m, z26.h, z17.h"); - COMPARE_PREFIX(asr(z26.VnS(), p2.Merging(), z26.VnS(), z17.VnS()), - "asr z26.s, p2/m, z26.s, z17.s"); - COMPARE_PREFIX(asr(z26.VnD(), p2.Merging(), z26.VnD(), z17.VnD()), - "asr z26.d, p2/m, z26.d, z17.d"); - COMPARE_PREFIX(lslr(z30.VnB(), p1.Merging(), z30.VnB(), z26.VnB()), - "lslr z30.b, p1/m, z30.b, z26.b"); - COMPARE_PREFIX(lslr(z30.VnH(), p1.Merging(), z30.VnH(), z26.VnH()), - "lslr z30.h, p1/m, z30.h, z26.h"); - COMPARE_PREFIX(lslr(z30.VnS(), p1.Merging(), z30.VnS(), z26.VnS()), - "lslr z30.s, p1/m, z30.s, z26.s"); - COMPARE_PREFIX(lslr(z30.VnD(), p1.Merging(), z30.VnD(), z26.VnD()), - "lslr z30.d, p1/m, z30.d, z26.d"); - COMPARE_PREFIX(lsl(z14.VnB(), p6.Merging(), z14.VnB(), z25.VnB()), - "lsl z14.b, p6/m, z14.b, z25.b"); - COMPARE_PREFIX(lsl(z14.VnH(), p6.Merging(), z14.VnH(), z25.VnH()), - "lsl z14.h, p6/m, z14.h, z25.h"); - COMPARE_PREFIX(lsl(z14.VnS(), p6.Merging(), z14.VnS(), z25.VnS()), - "lsl z14.s, p6/m, z14.s, z25.s"); - COMPARE_PREFIX(lsl(z14.VnD(), p6.Merging(), z14.VnD(), z25.VnD()), - "lsl z14.d, p6/m, z14.d, z25.d"); - COMPARE_PREFIX(lsrr(z3.VnB(), p1.Merging(), z3.VnB(), z16.VnB()), - "lsrr z3.b, p1/m, z3.b, z16.b"); - COMPARE_PREFIX(lsrr(z3.VnH(), p1.Merging(), z3.VnH(), z16.VnH()), - "lsrr z3.h, p1/m, z3.h, z16.h"); - COMPARE_PREFIX(lsrr(z3.VnS(), p1.Merging(), z3.VnS(), z16.VnS()), - "lsrr z3.s, p1/m, z3.s, z16.s"); - COMPARE_PREFIX(lsrr(z3.VnD(), p1.Merging(), z3.VnD(), z16.VnD()), - "lsrr z3.d, p1/m, z3.d, z16.d"); - COMPARE_PREFIX(lsr(z29.VnB(), p7.Merging(), z29.VnB(), z13.VnB()), - "lsr z29.b, p7/m, z29.b, z13.b"); - COMPARE_PREFIX(lsr(z29.VnH(), p7.Merging(), z29.VnH(), z13.VnH()), - "lsr z29.h, p7/m, z29.h, z13.h"); - COMPARE_PREFIX(lsr(z29.VnS(), p7.Merging(), z29.VnS(), z13.VnS()), - "lsr z29.s, p7/m, z29.s, z13.s"); - COMPARE_PREFIX(lsr(z29.VnD(), p7.Merging(), z29.VnD(), z13.VnD()), - "lsr z29.d, p7/m, z29.d, z13.d"); - - COMPARE_PREFIX(asr(z4.VnB(), p0.Merging(), z4.VnB(), z30.VnD()), - "asr z4.b, p0/m, z4.b, z30.d"); - COMPARE_PREFIX(asr(z4.VnH(), p0.Merging(), z4.VnH(), z30.VnD()), - "asr z4.h, p0/m, z4.h, z30.d"); - COMPARE_PREFIX(asr(z4.VnS(), p0.Merging(), z4.VnS(), z30.VnD()), - "asr z4.s, p0/m, z4.s, z30.d"); - COMPARE_PREFIX(lsl(z13.VnB(), p7.Merging(), z13.VnB(), z18.VnD()), - "lsl z13.b, p7/m, z13.b, z18.d"); - COMPARE_PREFIX(lsl(z13.VnH(), p7.Merging(), z13.VnH(), z18.VnD()), - "lsl z13.h, p7/m, z13.h, z18.d"); - COMPARE_PREFIX(lsl(z13.VnS(), p7.Merging(), z13.VnS(), z18.VnD()), - "lsl z13.s, p7/m, z13.s, z18.d"); - COMPARE_PREFIX(lsr(z1.VnB(), p4.Merging(), z1.VnB(), z14.VnD()), - "lsr z1.b, p4/m, z1.b, z14.d"); - COMPARE_PREFIX(lsr(z1.VnH(), p4.Merging(), z1.VnH(), z14.VnD()), - "lsr z1.h, p4/m, z1.h, z14.d"); - COMPARE_PREFIX(lsr(z1.VnS(), p4.Merging(), z1.VnS(), z14.VnD()), - "lsr z1.s, p4/m, z1.s, z14.d"); + COMPARE(asrr(z20.VnB(), p3.Merging(), z20.VnB(), z11.VnB()), + "asrr z20.b, p3/m, z20.b, z11.b"); + COMPARE(asrr(z20.VnH(), p3.Merging(), z20.VnH(), z11.VnH()), + "asrr z20.h, p3/m, z20.h, z11.h"); + COMPARE(asrr(z20.VnS(), p3.Merging(), z20.VnS(), z11.VnS()), + "asrr z20.s, p3/m, z20.s, z11.s"); + COMPARE(asrr(z20.VnD(), p3.Merging(), z20.VnD(), z11.VnD()), + "asrr z20.d, p3/m, z20.d, z11.d"); + COMPARE(asr(z26.VnB(), p2.Merging(), z26.VnB(), z17.VnB()), + "asr z26.b, p2/m, z26.b, z17.b"); + COMPARE(asr(z26.VnH(), p2.Merging(), z26.VnH(), z17.VnH()), + "asr z26.h, p2/m, z26.h, z17.h"); + COMPARE(asr(z26.VnS(), p2.Merging(), z26.VnS(), z17.VnS()), + "asr z26.s, p2/m, z26.s, z17.s"); + COMPARE(asr(z26.VnD(), p2.Merging(), z26.VnD(), z17.VnD()), + "asr z26.d, p2/m, z26.d, z17.d"); + COMPARE(lslr(z30.VnB(), p1.Merging(), z30.VnB(), z26.VnB()), + "lslr z30.b, p1/m, z30.b, z26.b"); + COMPARE(lslr(z30.VnH(), p1.Merging(), z30.VnH(), z26.VnH()), + "lslr z30.h, p1/m, z30.h, z26.h"); + COMPARE(lslr(z30.VnS(), p1.Merging(), z30.VnS(), z26.VnS()), + "lslr z30.s, p1/m, z30.s, z26.s"); + COMPARE(lslr(z30.VnD(), p1.Merging(), z30.VnD(), z26.VnD()), + "lslr z30.d, p1/m, z30.d, z26.d"); + COMPARE(lsl(z14.VnB(), p6.Merging(), z14.VnB(), z25.VnB()), + "lsl z14.b, p6/m, z14.b, z25.b"); + COMPARE(lsl(z14.VnH(), p6.Merging(), z14.VnH(), z25.VnH()), + "lsl z14.h, p6/m, z14.h, z25.h"); + COMPARE(lsl(z14.VnS(), p6.Merging(), z14.VnS(), z25.VnS()), + "lsl z14.s, p6/m, z14.s, z25.s"); + COMPARE(lsl(z14.VnD(), p6.Merging(), z14.VnD(), z25.VnD()), + "lsl z14.d, p6/m, z14.d, z25.d"); + COMPARE(lsrr(z3.VnB(), p1.Merging(), z3.VnB(), z16.VnB()), + "lsrr z3.b, p1/m, z3.b, z16.b"); + COMPARE(lsrr(z3.VnH(), p1.Merging(), z3.VnH(), z16.VnH()), + "lsrr z3.h, p1/m, z3.h, z16.h"); + COMPARE(lsrr(z3.VnS(), p1.Merging(), z3.VnS(), z16.VnS()), + "lsrr z3.s, p1/m, z3.s, z16.s"); + COMPARE(lsrr(z3.VnD(), p1.Merging(), z3.VnD(), z16.VnD()), + "lsrr z3.d, p1/m, z3.d, z16.d"); + COMPARE(lsr(z29.VnB(), p7.Merging(), z29.VnB(), z13.VnB()), + "lsr z29.b, p7/m, z29.b, z13.b"); + COMPARE(lsr(z29.VnH(), p7.Merging(), z29.VnH(), z13.VnH()), + "lsr z29.h, p7/m, z29.h, z13.h"); + COMPARE(lsr(z29.VnS(), p7.Merging(), z29.VnS(), z13.VnS()), + "lsr z29.s, p7/m, z29.s, z13.s"); + COMPARE(lsr(z29.VnD(), p7.Merging(), z29.VnD(), z13.VnD()), + "lsr z29.d, p7/m, z29.d, z13.d"); + + COMPARE(asr(z4.VnB(), p0.Merging(), z4.VnB(), z30.VnD()), + "asr z4.b, p0/m, z4.b, z30.d"); + COMPARE(asr(z4.VnH(), p0.Merging(), z4.VnH(), z30.VnD()), + "asr z4.h, p0/m, z4.h, z30.d"); + COMPARE(asr(z4.VnS(), p0.Merging(), z4.VnS(), z30.VnD()), + "asr z4.s, p0/m, z4.s, z30.d"); + COMPARE(lsl(z13.VnB(), p7.Merging(), z13.VnB(), z18.VnD()), + "lsl z13.b, p7/m, z13.b, z18.d"); + COMPARE(lsl(z13.VnH(), p7.Merging(), z13.VnH(), z18.VnD()), + "lsl z13.h, p7/m, z13.h, z18.d"); + COMPARE(lsl(z13.VnS(), p7.Merging(), z13.VnS(), z18.VnD()), + "lsl z13.s, p7/m, z13.s, z18.d"); + COMPARE(lsr(z1.VnB(), p4.Merging(), z1.VnB(), z14.VnD()), + "lsr z1.b, p4/m, z1.b, z14.d"); + COMPARE(lsr(z1.VnH(), p4.Merging(), z1.VnH(), z14.VnD()), + "lsr z1.h, p4/m, z1.h, z14.d"); + COMPARE(lsr(z1.VnS(), p4.Merging(), z1.VnS(), z14.VnD()), + "lsr z1.s, p4/m, z1.s, z14.d"); COMPARE_MACRO(Asr(z4.VnB(), p0.Merging(), z4.VnB(), z30.VnB()), "asr z4.b, p0/m, z4.b, z30.b"); @@ -313,38 +365,34 @@ TEST(sve_bitwise_shift_predicated) { "movprfx z4.b, p0/m, z10.b\n" "lsr z4.b, p0/m, z4.b, z14.b"); - COMPARE_PREFIX(asrd(z0.VnB(), p4.Merging(), z0.VnB(), 1), - "asrd z0.b, p4/m, z0.b, #1"); - COMPARE_PREFIX(asrd(z0.VnH(), p4.Merging(), z0.VnH(), 1), - "asrd z0.h, p4/m, z0.h, #1"); - COMPARE_PREFIX(asrd(z0.VnS(), p4.Merging(), z0.VnS(), 1), - "asrd z0.s, p4/m, z0.s, #1"); - COMPARE_PREFIX(asrd(z0.VnD(), p4.Merging(), z0.VnD(), 1), - "asrd z0.d, p4/m, z0.d, #1"); - COMPARE_PREFIX(asr(z8.VnB(), p7.Merging(), z8.VnB(), 3), - "asr z8.b, p7/m, z8.b, #3"); - COMPARE_PREFIX(asr(z8.VnH(), p7.Merging(), z8.VnH(), 3), - "asr z8.h, p7/m, z8.h, #3"); - COMPARE_PREFIX(asr(z8.VnS(), p7.Merging(), z8.VnS(), 3), - "asr z8.s, p7/m, z8.s, #3"); - COMPARE_PREFIX(asr(z8.VnD(), p7.Merging(), z8.VnD(), 3), - "asr z8.d, p7/m, z8.d, #3"); - COMPARE_PREFIX(lsl(z29.VnB(), p6.Merging(), z29.VnB(), 0), - "lsl z29.b, p6/m, z29.b, #0"); - COMPARE_PREFIX(lsl(z29.VnH(), p6.Merging(), z29.VnH(), 5), - "lsl z29.h, p6/m, z29.h, #5"); - COMPARE_PREFIX(lsl(z29.VnS(), p6.Merging(), z29.VnS(), 0), - "lsl z29.s, p6/m, z29.s, #0"); - COMPARE_PREFIX(lsl(z29.VnD(), p6.Merging(), z29.VnD(), 63), - "lsl z29.d, p6/m, z29.d, #63"); - COMPARE_PREFIX(lsr(z24.VnB(), p2.Merging(), z24.VnB(), 8), - "lsr z24.b, p2/m, z24.b, #8"); - COMPARE_PREFIX(lsr(z24.VnH(), p2.Merging(), z24.VnH(), 16), - "lsr z24.h, p2/m, z24.h, #16"); - COMPARE_PREFIX(lsr(z24.VnS(), p2.Merging(), z24.VnS(), 32), - "lsr z24.s, p2/m, z24.s, #32"); - COMPARE_PREFIX(lsr(z24.VnD(), p2.Merging(), z24.VnD(), 64), - "lsr z24.d, p2/m, z24.d, #64"); + COMPARE(asrd(z0.VnB(), p4.Merging(), z0.VnB(), 1), + "asrd z0.b, p4/m, z0.b, #1"); + COMPARE(asrd(z0.VnH(), p4.Merging(), z0.VnH(), 1), + "asrd z0.h, p4/m, z0.h, #1"); + COMPARE(asrd(z0.VnS(), p4.Merging(), z0.VnS(), 1), + "asrd z0.s, p4/m, z0.s, #1"); + COMPARE(asrd(z0.VnD(), p4.Merging(), z0.VnD(), 1), + "asrd z0.d, p4/m, z0.d, #1"); + COMPARE(asr(z8.VnB(), p7.Merging(), z8.VnB(), 3), "asr z8.b, p7/m, z8.b, #3"); + COMPARE(asr(z8.VnH(), p7.Merging(), z8.VnH(), 3), "asr z8.h, p7/m, z8.h, #3"); + COMPARE(asr(z8.VnS(), p7.Merging(), z8.VnS(), 3), "asr z8.s, p7/m, z8.s, #3"); + COMPARE(asr(z8.VnD(), p7.Merging(), z8.VnD(), 3), "asr z8.d, p7/m, z8.d, #3"); + COMPARE(lsl(z29.VnB(), p6.Merging(), z29.VnB(), 0), + "lsl z29.b, p6/m, z29.b, #0"); + COMPARE(lsl(z29.VnH(), p6.Merging(), z29.VnH(), 5), + "lsl z29.h, p6/m, z29.h, #5"); + COMPARE(lsl(z29.VnS(), p6.Merging(), z29.VnS(), 0), + "lsl z29.s, p6/m, z29.s, #0"); + COMPARE(lsl(z29.VnD(), p6.Merging(), z29.VnD(), 63), + "lsl z29.d, p6/m, z29.d, #63"); + COMPARE(lsr(z24.VnB(), p2.Merging(), z24.VnB(), 8), + "lsr z24.b, p2/m, z24.b, #8"); + COMPARE(lsr(z24.VnH(), p2.Merging(), z24.VnH(), 16), + "lsr z24.h, p2/m, z24.h, #16"); + COMPARE(lsr(z24.VnS(), p2.Merging(), z24.VnS(), 32), + "lsr z24.s, p2/m, z24.s, #32"); + COMPARE(lsr(z24.VnD(), p2.Merging(), z24.VnD(), 64), + "lsr z24.d, p2/m, z24.d, #64"); COMPARE_MACRO(Asrd(z0.VnB(), p4.Merging(), z8.VnB(), 1), "movprfx z0.b, p4/m, z8.b\n" @@ -370,42 +418,39 @@ TEST(sve_bitwise_shift_unpredicated) { // Test asr with reserved tsz field. COMPARE_PREFIX(dci(0x04209345), "unimplemented"); - COMPARE_PREFIX(asr(z4.VnB(), z27.VnB(), 1), "asr z4.b, z27.b, #1"); - COMPARE_PREFIX(asr(z5.VnB(), z26.VnB(), 8), "asr z5.b, z26.b, #8"); - COMPARE_PREFIX(asr(z6.VnH(), z25.VnH(), 1), "asr z6.h, z25.h, #1"); - COMPARE_PREFIX(asr(z7.VnH(), z24.VnH(), 16), "asr z7.h, z24.h, #16"); - COMPARE_PREFIX(asr(z8.VnS(), z23.VnS(), 1), "asr z8.s, z23.s, #1"); - COMPARE_PREFIX(asr(z9.VnS(), z22.VnS(), 32), "asr z9.s, z22.s, #32"); - COMPARE_PREFIX(asr(z10.VnD(), z21.VnD(), 1), "asr z10.d, z21.d, #1"); - COMPARE_PREFIX(asr(z11.VnD(), z20.VnD(), 64), "asr z11.d, z20.d, #64"); - COMPARE_PREFIX(lsr(z4.VnB(), z27.VnB(), 3), "lsr z4.b, z27.b, #3"); - COMPARE_PREFIX(lsr(z5.VnB(), z26.VnB(), 7), "lsr z5.b, z26.b, #7"); - COMPARE_PREFIX(lsr(z6.VnH(), z25.VnH(), 8), "lsr z6.h, z25.h, #8"); - COMPARE_PREFIX(lsr(z7.VnH(), z24.VnH(), 15), "lsr z7.h, z24.h, #15"); - COMPARE_PREFIX(lsr(z8.VnS(), z23.VnS(), 14), "lsr z8.s, z23.s, #1"); - COMPARE_PREFIX(lsr(z9.VnS(), z22.VnS(), 31), "lsr z9.s, z22.s, #31"); - COMPARE_PREFIX(lsr(z10.VnD(), z21.VnD(), 30), "lsr z10.d, z21.d, #30"); - COMPARE_PREFIX(lsr(z11.VnD(), z20.VnD(), 63), "lsr z11.d, z20.d, #63"); - COMPARE_PREFIX(lsl(z4.VnB(), z27.VnB(), 4), "lsl z4.b, z27.b, #4"); - COMPARE_PREFIX(lsl(z5.VnB(), z26.VnB(), 6), "lsl z5.b, z26.b, #6"); - COMPARE_PREFIX(lsl(z6.VnH(), z25.VnH(), 10), "lsl z6.h, z25.h, #10"); - COMPARE_PREFIX(lsl(z7.VnH(), z24.VnH(), 14), "lsl z7.h, z24.h, #14"); - COMPARE_PREFIX(lsl(z8.VnS(), z23.VnS(), 21), "lsl z8.s, z23.s, #21"); - COMPARE_PREFIX(lsl(z9.VnS(), z22.VnS(), 30), "lsl z9.s, z22.s, #30"); - COMPARE_PREFIX(lsl(z10.VnD(), z21.VnD(), 44), "lsl z10.d, z21.d, #44"); - COMPARE_PREFIX(lsl(z11.VnD(), z20.VnD(), 62), "lsl z11.d, z20.d, #62"); - COMPARE_PREFIX(asr(z12.VnB(), z8.VnB(), z14.VnD()), "asr z12.b, z8.b, z14.d"); - COMPARE_PREFIX(asr(z14.VnH(), z8.VnH(), z12.VnD()), "asr z14.h, z8.h, z12.d"); - COMPARE_PREFIX(asr(z16.VnS(), z8.VnS(), z10.VnD()), "asr z16.s, z8.s, z10.d"); - COMPARE_PREFIX(lsl(z21.VnB(), z16.VnB(), z15.VnD()), - "lsl z21.b, z16.b, z15.d"); - COMPARE_PREFIX(lsl(z23.VnH(), z16.VnH(), z13.VnD()), - "lsl z23.h, z16.h, z13.d"); - COMPARE_PREFIX(lsl(z25.VnS(), z16.VnS(), z11.VnD()), - "lsl z25.s, z16.s, z11.d"); - COMPARE_PREFIX(lsr(z16.VnB(), z19.VnB(), z2.VnD()), "lsr z16.b, z19.b, z2.d"); - COMPARE_PREFIX(lsr(z18.VnH(), z19.VnH(), z4.VnD()), "lsr z18.h, z19.h, z4.d"); - COMPARE_PREFIX(lsr(z20.VnS(), z19.VnS(), z6.VnD()), "lsr z20.s, z19.s, z6.d"); + COMPARE(asr(z4.VnB(), z27.VnB(), 1), "asr z4.b, z27.b, #1"); + COMPARE(asr(z5.VnB(), z26.VnB(), 8), "asr z5.b, z26.b, #8"); + COMPARE(asr(z6.VnH(), z25.VnH(), 1), "asr z6.h, z25.h, #1"); + COMPARE(asr(z7.VnH(), z24.VnH(), 16), "asr z7.h, z24.h, #16"); + COMPARE(asr(z8.VnS(), z23.VnS(), 1), "asr z8.s, z23.s, #1"); + COMPARE(asr(z9.VnS(), z22.VnS(), 32), "asr z9.s, z22.s, #32"); + COMPARE(asr(z10.VnD(), z21.VnD(), 1), "asr z10.d, z21.d, #1"); + COMPARE(asr(z11.VnD(), z20.VnD(), 64), "asr z11.d, z20.d, #64"); + COMPARE(lsr(z4.VnB(), z27.VnB(), 3), "lsr z4.b, z27.b, #3"); + COMPARE(lsr(z5.VnB(), z26.VnB(), 7), "lsr z5.b, z26.b, #7"); + COMPARE(lsr(z6.VnH(), z25.VnH(), 8), "lsr z6.h, z25.h, #8"); + COMPARE(lsr(z7.VnH(), z24.VnH(), 15), "lsr z7.h, z24.h, #15"); + COMPARE(lsr(z8.VnS(), z23.VnS(), 14), "lsr z8.s, z23.s, #14"); + COMPARE(lsr(z9.VnS(), z22.VnS(), 31), "lsr z9.s, z22.s, #31"); + COMPARE(lsr(z10.VnD(), z21.VnD(), 30), "lsr z10.d, z21.d, #30"); + COMPARE(lsr(z11.VnD(), z20.VnD(), 63), "lsr z11.d, z20.d, #63"); + COMPARE(lsl(z4.VnB(), z27.VnB(), 4), "lsl z4.b, z27.b, #4"); + COMPARE(lsl(z5.VnB(), z26.VnB(), 6), "lsl z5.b, z26.b, #6"); + COMPARE(lsl(z6.VnH(), z25.VnH(), 10), "lsl z6.h, z25.h, #10"); + COMPARE(lsl(z7.VnH(), z24.VnH(), 14), "lsl z7.h, z24.h, #14"); + COMPARE(lsl(z8.VnS(), z23.VnS(), 21), "lsl z8.s, z23.s, #21"); + COMPARE(lsl(z9.VnS(), z22.VnS(), 30), "lsl z9.s, z22.s, #30"); + COMPARE(lsl(z10.VnD(), z21.VnD(), 44), "lsl z10.d, z21.d, #44"); + COMPARE(lsl(z11.VnD(), z20.VnD(), 62), "lsl z11.d, z20.d, #62"); + COMPARE(asr(z12.VnB(), z8.VnB(), z14.VnD()), "asr z12.b, z8.b, z14.d"); + COMPARE(asr(z14.VnH(), z8.VnH(), z12.VnD()), "asr z14.h, z8.h, z12.d"); + COMPARE(asr(z16.VnS(), z8.VnS(), z10.VnD()), "asr z16.s, z8.s, z10.d"); + COMPARE(lsl(z21.VnB(), z16.VnB(), z15.VnD()), "lsl z21.b, z16.b, z15.d"); + COMPARE(lsl(z23.VnH(), z16.VnH(), z13.VnD()), "lsl z23.h, z16.h, z13.d"); + COMPARE(lsl(z25.VnS(), z16.VnS(), z11.VnD()), "lsl z25.s, z16.s, z11.d"); + COMPARE(lsr(z16.VnB(), z19.VnB(), z2.VnD()), "lsr z16.b, z19.b, z2.d"); + COMPARE(lsr(z18.VnH(), z19.VnH(), z4.VnD()), "lsr z18.h, z19.h, z4.d"); + COMPARE(lsr(z20.VnS(), z19.VnS(), z6.VnD()), "lsr z20.s, z19.s, z6.d"); CLEANUP(); } @@ -825,9 +870,9 @@ TEST(sve_vector_inc_element_count) { TEST(sve_fp_accumulating_reduction) { SETUP(); - COMPARE_PREFIX(fadda(h10, p2, h10, z0.VnH()), "fadda h10, p2, h10, z0.h"); - COMPARE_PREFIX(fadda(s10, p2, s10, z0.VnS()), "fadda s10, p2, s10, z0.s"); - COMPARE_PREFIX(fadda(d10, p2, d10, z0.VnD()), "fadda d10, p2, d10, z0.d"); + COMPARE(fadda(h10, p2, h10, z0.VnH()), "fadda h10, p2, h10, z0.h"); + COMPARE(fadda(s10, p2, s10, z0.VnS()), "fadda s10, p2, s10, z0.s"); + COMPARE(fadda(d10, p2, d10, z0.VnD()), "fadda d10, p2, d10, z0.d"); CLEANUP(); } @@ -835,49 +880,49 @@ TEST(sve_fp_accumulating_reduction) { TEST(sve_fp_arithmetic_predicated) { SETUP(); - COMPARE_PREFIX(fdiv(z9.VnH(), p4.Merging(), z9.VnH(), z4.VnH()), - "fdiv z9.h, p4/m, z9.h, z4.h"); - COMPARE_PREFIX(fdiv(z19.VnS(), p5.Merging(), z19.VnS(), z14.VnS()), - "fdiv z19.s, p5/m, z19.s, z14.s"); - COMPARE_PREFIX(fdiv(z29.VnD(), p6.Merging(), z29.VnD(), z24.VnD()), - "fdiv z29.d, p6/m, z29.d, z24.d"); - COMPARE_PREFIX(fdivr(z21.VnH(), p3.Merging(), z21.VnH(), z11.VnH()), - "fdivr z21.h, p3/m, z21.h, z11.h"); - COMPARE_PREFIX(fdivr(z23.VnS(), p5.Merging(), z23.VnS(), z15.VnS()), - "fdivr z23.s, p5/m, z23.s, z15.s"); - COMPARE_PREFIX(fdivr(z25.VnD(), p7.Merging(), z25.VnD(), z19.VnD()), - "fdivr z25.d, p7/m, z25.d, z19.d"); - COMPARE_PREFIX(fmax(z4.VnH(), p1.Merging(), z4.VnH(), z29.VnH()), - "fmax z4.h, p1/m, z4.h, z29.h"); - COMPARE_PREFIX(fmax(z14.VnS(), p3.Merging(), z14.VnS(), z29.VnS()), - "fmax z14.s, p3/m, z14.s, z29.s"); - COMPARE_PREFIX(fmax(z24.VnD(), p5.Merging(), z24.VnD(), z29.VnD()), - "fmax z24.d, p5/m, z24.d, z29.d"); - COMPARE_PREFIX(fmin(z1.VnH(), p2.Merging(), z1.VnH(), z30.VnH()), - "fmin z1.h, p2/m, z1.h, z30.h"); - COMPARE_PREFIX(fmin(z11.VnS(), p4.Merging(), z11.VnS(), z30.VnS()), - "fmin z11.s, p4/m, z11.s, z30.s"); - COMPARE_PREFIX(fmin(z21.VnD(), p6.Merging(), z21.VnD(), z30.VnD()), - "fmin z21.d, p6/m, z21.d, z30.d"); - - COMPARE_PREFIX(ftmad(z21.VnH(), z21.VnH(), z22.VnH(), 0), - "ftmad z21.h, z21.h, z22.h, #0"); - COMPARE_PREFIX(ftmad(z21.VnH(), z21.VnH(), z22.VnH(), 2), - "ftmad z21.h, z21.h, z22.h, #2"); - COMPARE_PREFIX(ftmad(z2.VnH(), z2.VnH(), z21.VnH(), 7), - "ftmad z2.h, z2.h, z21.h, #7"); - COMPARE_PREFIX(ftmad(z21.VnS(), z21.VnS(), z22.VnS(), 0), - "ftmad z21.s, z21.s, z22.s, #0"); - COMPARE_PREFIX(ftmad(z21.VnS(), z21.VnS(), z22.VnS(), 2), - "ftmad z21.s, z21.s, z22.s, #2"); - COMPARE_PREFIX(ftmad(z2.VnS(), z2.VnS(), z21.VnS(), 7), - "ftmad z2.s, z2.s, z21.s, #7"); - COMPARE_PREFIX(ftmad(z21.VnD(), z21.VnD(), z22.VnD(), 0), - "ftmad z21.d, z21.d, z22.d, #0"); - COMPARE_PREFIX(ftmad(z21.VnD(), z21.VnD(), z22.VnD(), 2), - "ftmad z21.d, z21.d, z22.d, #2"); - COMPARE_PREFIX(ftmad(z2.VnD(), z2.VnD(), z21.VnD(), 7), - "ftmad z2.d, z2.d, z21.d, #7"); + COMPARE(fdiv(z9.VnH(), p4.Merging(), z9.VnH(), z4.VnH()), + "fdiv z9.h, p4/m, z9.h, z4.h"); + COMPARE(fdiv(z19.VnS(), p5.Merging(), z19.VnS(), z14.VnS()), + "fdiv z19.s, p5/m, z19.s, z14.s"); + COMPARE(fdiv(z29.VnD(), p6.Merging(), z29.VnD(), z24.VnD()), + "fdiv z29.d, p6/m, z29.d, z24.d"); + COMPARE(fdivr(z21.VnH(), p3.Merging(), z21.VnH(), z11.VnH()), + "fdivr z21.h, p3/m, z21.h, z11.h"); + COMPARE(fdivr(z23.VnS(), p5.Merging(), z23.VnS(), z15.VnS()), + "fdivr z23.s, p5/m, z23.s, z15.s"); + COMPARE(fdivr(z25.VnD(), p7.Merging(), z25.VnD(), z19.VnD()), + "fdivr z25.d, p7/m, z25.d, z19.d"); + COMPARE(fmax(z4.VnH(), p1.Merging(), z4.VnH(), z29.VnH()), + "fmax z4.h, p1/m, z4.h, z29.h"); + COMPARE(fmax(z14.VnS(), p3.Merging(), z14.VnS(), z29.VnS()), + "fmax z14.s, p3/m, z14.s, z29.s"); + COMPARE(fmax(z24.VnD(), p5.Merging(), z24.VnD(), z29.VnD()), + "fmax z24.d, p5/m, z24.d, z29.d"); + COMPARE(fmin(z1.VnH(), p2.Merging(), z1.VnH(), z30.VnH()), + "fmin z1.h, p2/m, z1.h, z30.h"); + COMPARE(fmin(z11.VnS(), p4.Merging(), z11.VnS(), z30.VnS()), + "fmin z11.s, p4/m, z11.s, z30.s"); + COMPARE(fmin(z21.VnD(), p6.Merging(), z21.VnD(), z30.VnD()), + "fmin z21.d, p6/m, z21.d, z30.d"); + + COMPARE(ftmad(z21.VnH(), z21.VnH(), z22.VnH(), 0), + "ftmad z21.h, z21.h, z22.h, #0"); + COMPARE(ftmad(z21.VnH(), z21.VnH(), z22.VnH(), 2), + "ftmad z21.h, z21.h, z22.h, #2"); + COMPARE(ftmad(z2.VnH(), z2.VnH(), z21.VnH(), 7), + "ftmad z2.h, z2.h, z21.h, #7"); + COMPARE(ftmad(z21.VnS(), z21.VnS(), z22.VnS(), 0), + "ftmad z21.s, z21.s, z22.s, #0"); + COMPARE(ftmad(z21.VnS(), z21.VnS(), z22.VnS(), 2), + "ftmad z21.s, z21.s, z22.s, #2"); + COMPARE(ftmad(z2.VnS(), z2.VnS(), z21.VnS(), 7), + "ftmad z2.s, z2.s, z21.s, #7"); + COMPARE(ftmad(z21.VnD(), z21.VnD(), z22.VnD(), 0), + "ftmad z21.d, z21.d, z22.d, #0"); + COMPARE(ftmad(z21.VnD(), z21.VnD(), z22.VnD(), 2), + "ftmad z21.d, z21.d, z22.d, #2"); + COMPARE(ftmad(z2.VnD(), z2.VnD(), z21.VnD(), 7), + "ftmad z2.d, z2.d, z21.d, #7"); COMPARE_MACRO(Ftmad(z3.VnH(), z2.VnH(), z1.VnH(), 1), "movprfx z3, z2\n" @@ -887,60 +932,60 @@ TEST(sve_fp_arithmetic_predicated) { "movprfx z6, z4\n" "ftmad z6.s, z6.s, z31.s, #1"); - COMPARE_PREFIX(fabd(z31.VnH(), p7.Merging(), z31.VnH(), z17.VnH()), - "fabd z31.h, p7/m, z31.h, z17.h"); - COMPARE_PREFIX(fabd(z31.VnS(), p7.Merging(), z31.VnS(), z17.VnS()), - "fabd z31.s, p7/m, z31.s, z17.s"); - COMPARE_PREFIX(fabd(z31.VnD(), p7.Merging(), z31.VnD(), z17.VnD()), - "fabd z31.d, p7/m, z31.d, z17.d"); - COMPARE_PREFIX(fadd(z24.VnH(), p2.Merging(), z24.VnH(), z15.VnH()), - "fadd z24.h, p2/m, z24.h, z15.h"); - COMPARE_PREFIX(fadd(z24.VnS(), p2.Merging(), z24.VnS(), z15.VnS()), - "fadd z24.s, p2/m, z24.s, z15.s"); - COMPARE_PREFIX(fadd(z24.VnD(), p2.Merging(), z24.VnD(), z15.VnD()), - "fadd z24.d, p2/m, z24.d, z15.d"); - COMPARE_PREFIX(fmaxnm(z15.VnH(), p4.Merging(), z15.VnH(), z3.VnH()), - "fmaxnm z15.h, p4/m, z15.h, z3.h"); - COMPARE_PREFIX(fmaxnm(z15.VnS(), p4.Merging(), z15.VnS(), z3.VnS()), - "fmaxnm z15.s, p4/m, z15.s, z3.s"); - COMPARE_PREFIX(fmaxnm(z15.VnD(), p4.Merging(), z15.VnD(), z3.VnD()), - "fmaxnm z15.d, p4/m, z15.d, z3.d"); - COMPARE_PREFIX(fminnm(z19.VnH(), p2.Merging(), z19.VnH(), z29.VnH()), - "fminnm z19.h, p2/m, z19.h, z29.h"); - COMPARE_PREFIX(fminnm(z19.VnS(), p2.Merging(), z19.VnS(), z29.VnS()), - "fminnm z19.s, p2/m, z19.s, z29.s"); - COMPARE_PREFIX(fminnm(z19.VnD(), p2.Merging(), z19.VnD(), z29.VnD()), - "fminnm z19.d, p2/m, z19.d, z29.d"); - COMPARE_PREFIX(fmulx(z30.VnH(), p6.Merging(), z30.VnH(), z20.VnH()), - "fmulx z30.h, p6/m, z30.h, z20.h"); - COMPARE_PREFIX(fmulx(z30.VnS(), p6.Merging(), z30.VnS(), z20.VnS()), - "fmulx z30.s, p6/m, z30.s, z20.s"); - COMPARE_PREFIX(fmulx(z30.VnD(), p6.Merging(), z30.VnD(), z20.VnD()), - "fmulx z30.d, p6/m, z30.d, z20.d"); - COMPARE_PREFIX(fmul(z26.VnH(), p2.Merging(), z26.VnH(), z6.VnH()), - "fmul z26.h, p2/m, z26.h, z6.h"); - COMPARE_PREFIX(fmul(z26.VnS(), p2.Merging(), z26.VnS(), z6.VnS()), - "fmul z26.s, p2/m, z26.s, z6.s"); - COMPARE_PREFIX(fmul(z26.VnD(), p2.Merging(), z26.VnD(), z6.VnD()), - "fmul z26.d, p2/m, z26.d, z6.d"); - COMPARE_PREFIX(fscale(z8.VnH(), p3.Merging(), z8.VnH(), z6.VnH()), - "fscale z8.h, p3/m, z8.h, z6.h"); - COMPARE_PREFIX(fscale(z8.VnS(), p3.Merging(), z8.VnS(), z6.VnS()), - "fscale z8.s, p3/m, z8.s, z6.s"); - COMPARE_PREFIX(fscale(z8.VnD(), p3.Merging(), z8.VnD(), z6.VnD()), - "fscale z8.d, p3/m, z8.d, z6.d"); - COMPARE_PREFIX(fsubr(z16.VnH(), p5.Merging(), z16.VnH(), z15.VnH()), - "fsubr z16.h, p5/m, z16.h, z15.h"); - COMPARE_PREFIX(fsubr(z16.VnS(), p5.Merging(), z16.VnS(), z15.VnS()), - "fsubr z16.s, p5/m, z16.s, z15.s"); - COMPARE_PREFIX(fsubr(z16.VnD(), p5.Merging(), z16.VnD(), z15.VnD()), - "fsubr z16.d, p5/m, z16.d, z15.d"); - COMPARE_PREFIX(fsub(z16.VnH(), p5.Merging(), z16.VnH(), z26.VnH()), - "fsub z16.h, p5/m, z16.h, z26.h"); - COMPARE_PREFIX(fsub(z16.VnS(), p5.Merging(), z16.VnS(), z26.VnS()), - "fsub z16.s, p5/m, z16.s, z26.s"); - COMPARE_PREFIX(fsub(z16.VnD(), p5.Merging(), z16.VnD(), z26.VnD()), - "fsub z16.d, p5/m, z16.d, z26.d"); + COMPARE(fabd(z31.VnH(), p7.Merging(), z31.VnH(), z17.VnH()), + "fabd z31.h, p7/m, z31.h, z17.h"); + COMPARE(fabd(z31.VnS(), p7.Merging(), z31.VnS(), z17.VnS()), + "fabd z31.s, p7/m, z31.s, z17.s"); + COMPARE(fabd(z31.VnD(), p7.Merging(), z31.VnD(), z17.VnD()), + "fabd z31.d, p7/m, z31.d, z17.d"); + COMPARE(fadd(z24.VnH(), p2.Merging(), z24.VnH(), z15.VnH()), + "fadd z24.h, p2/m, z24.h, z15.h"); + COMPARE(fadd(z24.VnS(), p2.Merging(), z24.VnS(), z15.VnS()), + "fadd z24.s, p2/m, z24.s, z15.s"); + COMPARE(fadd(z24.VnD(), p2.Merging(), z24.VnD(), z15.VnD()), + "fadd z24.d, p2/m, z24.d, z15.d"); + COMPARE(fmaxnm(z15.VnH(), p4.Merging(), z15.VnH(), z3.VnH()), + "fmaxnm z15.h, p4/m, z15.h, z3.h"); + COMPARE(fmaxnm(z15.VnS(), p4.Merging(), z15.VnS(), z3.VnS()), + "fmaxnm z15.s, p4/m, z15.s, z3.s"); + COMPARE(fmaxnm(z15.VnD(), p4.Merging(), z15.VnD(), z3.VnD()), + "fmaxnm z15.d, p4/m, z15.d, z3.d"); + COMPARE(fminnm(z19.VnH(), p2.Merging(), z19.VnH(), z29.VnH()), + "fminnm z19.h, p2/m, z19.h, z29.h"); + COMPARE(fminnm(z19.VnS(), p2.Merging(), z19.VnS(), z29.VnS()), + "fminnm z19.s, p2/m, z19.s, z29.s"); + COMPARE(fminnm(z19.VnD(), p2.Merging(), z19.VnD(), z29.VnD()), + "fminnm z19.d, p2/m, z19.d, z29.d"); + COMPARE(fmulx(z30.VnH(), p6.Merging(), z30.VnH(), z20.VnH()), + "fmulx z30.h, p6/m, z30.h, z20.h"); + COMPARE(fmulx(z30.VnS(), p6.Merging(), z30.VnS(), z20.VnS()), + "fmulx z30.s, p6/m, z30.s, z20.s"); + COMPARE(fmulx(z30.VnD(), p6.Merging(), z30.VnD(), z20.VnD()), + "fmulx z30.d, p6/m, z30.d, z20.d"); + COMPARE(fmul(z26.VnH(), p2.Merging(), z26.VnH(), z6.VnH()), + "fmul z26.h, p2/m, z26.h, z6.h"); + COMPARE(fmul(z26.VnS(), p2.Merging(), z26.VnS(), z6.VnS()), + "fmul z26.s, p2/m, z26.s, z6.s"); + COMPARE(fmul(z26.VnD(), p2.Merging(), z26.VnD(), z6.VnD()), + "fmul z26.d, p2/m, z26.d, z6.d"); + COMPARE(fscale(z8.VnH(), p3.Merging(), z8.VnH(), z6.VnH()), + "fscale z8.h, p3/m, z8.h, z6.h"); + COMPARE(fscale(z8.VnS(), p3.Merging(), z8.VnS(), z6.VnS()), + "fscale z8.s, p3/m, z8.s, z6.s"); + COMPARE(fscale(z8.VnD(), p3.Merging(), z8.VnD(), z6.VnD()), + "fscale z8.d, p3/m, z8.d, z6.d"); + COMPARE(fsubr(z16.VnH(), p5.Merging(), z16.VnH(), z15.VnH()), + "fsubr z16.h, p5/m, z16.h, z15.h"); + COMPARE(fsubr(z16.VnS(), p5.Merging(), z16.VnS(), z15.VnS()), + "fsubr z16.s, p5/m, z16.s, z15.s"); + COMPARE(fsubr(z16.VnD(), p5.Merging(), z16.VnD(), z15.VnD()), + "fsubr z16.d, p5/m, z16.d, z15.d"); + COMPARE(fsub(z16.VnH(), p5.Merging(), z16.VnH(), z26.VnH()), + "fsub z16.h, p5/m, z16.h, z26.h"); + COMPARE(fsub(z16.VnS(), p5.Merging(), z16.VnS(), z26.VnS()), + "fsub z16.s, p5/m, z16.s, z26.s"); + COMPARE(fsub(z16.VnD(), p5.Merging(), z16.VnD(), z26.VnD()), + "fsub z16.d, p5/m, z16.d, z26.d"); COMPARE_MACRO(Fsub(z0.VnH(), p0.Merging(), z1.VnH(), z0.VnH()), "fsubr z0.h, p0/m, z0.h, z1.h"); @@ -1005,54 +1050,54 @@ TEST(sve_fp_arithmetic_predicated) { "movprfx z7.s, p7/m, z8.s\n" "fscale z7.s, p7/m, z7.s, z31.s"); - COMPARE_PREFIX(fadd(z18.VnH(), p0.Merging(), z18.VnH(), 0.5), - "fadd z18.h, p0/m, z18.h, #0.5"); - COMPARE_PREFIX(fadd(z18.VnS(), p0.Merging(), z18.VnS(), 1.0), - "fadd z18.s, p0/m, z18.s, #1.0"); - COMPARE_PREFIX(fadd(z18.VnD(), p0.Merging(), z18.VnD(), 1.0), - "fadd z18.d, p0/m, z18.d, #1.0"); - COMPARE_PREFIX(fmaxnm(z6.VnH(), p1.Merging(), z6.VnH(), 0.0), - "fmaxnm z6.h, p1/m, z6.h, #0.0"); - COMPARE_PREFIX(fmaxnm(z6.VnS(), p1.Merging(), z6.VnS(), 1.0), - "fmaxnm z6.s, p1/m, z6.s, #1.0"); - COMPARE_PREFIX(fmaxnm(z6.VnD(), p1.Merging(), z6.VnD(), 1.0), - "fmaxnm z6.d, p1/m, z6.d, #1.0"); - COMPARE_PREFIX(fmax(z8.VnH(), p6.Merging(), z8.VnH(), 0.0), - "fmax z8.h, p6/m, z8.h, #0.0"); - COMPARE_PREFIX(fmax(z8.VnS(), p6.Merging(), z8.VnS(), 0.0), - "fmax z8.s, p6/m, z8.s, #0.0"); - COMPARE_PREFIX(fmax(z8.VnD(), p6.Merging(), z8.VnD(), 1.0), - "fmax z8.d, p6/m, z8.d, #1.0"); - COMPARE_PREFIX(fminnm(z26.VnH(), p0.Merging(), z26.VnH(), 1.0), - "fminnm z26.h, p0/m, z26.h, #1.0"); - COMPARE_PREFIX(fminnm(z26.VnS(), p0.Merging(), z26.VnS(), 0.0), - "fminnm z26.s, p0/m, z26.s, #0.0"); - COMPARE_PREFIX(fminnm(z26.VnD(), p0.Merging(), z26.VnD(), 1.0), - "fminnm z26.d, p0/m, z26.d, #1.0"); - COMPARE_PREFIX(fmin(z22.VnH(), p0.Merging(), z22.VnH(), 1.0), - "fmin z22.h, p0/m, z22.h, #1.0"); - COMPARE_PREFIX(fmin(z22.VnS(), p0.Merging(), z22.VnS(), 1.0), - "fmin z22.s, p0/m, z22.s, #1.0"); - COMPARE_PREFIX(fmin(z22.VnD(), p0.Merging(), z22.VnD(), 0.0), - "fmin z22.d, p0/m, z22.d, #0.0"); - COMPARE_PREFIX(fmul(z21.VnH(), p3.Merging(), z21.VnH(), 0.5), - "fmul z21.h, p3/m, z21.h, #0.5"); - COMPARE_PREFIX(fmul(z21.VnS(), p3.Merging(), z21.VnS(), 2.0), - "fmul z21.s, p3/m, z21.s, #2.0"); - COMPARE_PREFIX(fmul(z21.VnD(), p3.Merging(), z21.VnD(), 2.0), - "fmul z21.d, p3/m, z21.d, #2.0"); - COMPARE_PREFIX(fsubr(z21.VnH(), p3.Merging(), z21.VnH(), 1.0), - "fsubr z21.h, p3/m, z21.h, #1.0"); - COMPARE_PREFIX(fsubr(z21.VnS(), p3.Merging(), z21.VnS(), 0.5), - "fsubr z21.s, p3/m, z21.s, #0.5"); - COMPARE_PREFIX(fsubr(z21.VnD(), p3.Merging(), z21.VnD(), 1.0), - "fsubr z21.d, p3/m, z21.d, #1.0"); - COMPARE_PREFIX(fsub(z26.VnH(), p4.Merging(), z26.VnH(), 0.5), - "fsub z26.h, p4/m, z26.h, #0.5"); - COMPARE_PREFIX(fsub(z26.VnS(), p4.Merging(), z26.VnS(), 1.0), - "fsub z26.s, p4/m, z26.s, #1.0"); - COMPARE_PREFIX(fsub(z26.VnD(), p4.Merging(), z26.VnD(), 0.5), - "fsub z26.d, p4/m, z26.d, #0.5"); + COMPARE(fadd(z18.VnH(), p0.Merging(), z18.VnH(), 0.5), + "fadd z18.h, p0/m, z18.h, #0.5"); + COMPARE(fadd(z18.VnS(), p0.Merging(), z18.VnS(), 1.0), + "fadd z18.s, p0/m, z18.s, #1.0"); + COMPARE(fadd(z18.VnD(), p0.Merging(), z18.VnD(), 1.0), + "fadd z18.d, p0/m, z18.d, #1.0"); + COMPARE(fmaxnm(z6.VnH(), p1.Merging(), z6.VnH(), 0.0), + "fmaxnm z6.h, p1/m, z6.h, #0.0"); + COMPARE(fmaxnm(z6.VnS(), p1.Merging(), z6.VnS(), 1.0), + "fmaxnm z6.s, p1/m, z6.s, #1.0"); + COMPARE(fmaxnm(z6.VnD(), p1.Merging(), z6.VnD(), 1.0), + "fmaxnm z6.d, p1/m, z6.d, #1.0"); + COMPARE(fmax(z8.VnH(), p6.Merging(), z8.VnH(), 0.0), + "fmax z8.h, p6/m, z8.h, #0.0"); + COMPARE(fmax(z8.VnS(), p6.Merging(), z8.VnS(), 0.0), + "fmax z8.s, p6/m, z8.s, #0.0"); + COMPARE(fmax(z8.VnD(), p6.Merging(), z8.VnD(), 1.0), + "fmax z8.d, p6/m, z8.d, #1.0"); + COMPARE(fminnm(z26.VnH(), p0.Merging(), z26.VnH(), 1.0), + "fminnm z26.h, p0/m, z26.h, #1.0"); + COMPARE(fminnm(z26.VnS(), p0.Merging(), z26.VnS(), 0.0), + "fminnm z26.s, p0/m, z26.s, #0.0"); + COMPARE(fminnm(z26.VnD(), p0.Merging(), z26.VnD(), 1.0), + "fminnm z26.d, p0/m, z26.d, #1.0"); + COMPARE(fmin(z22.VnH(), p0.Merging(), z22.VnH(), 1.0), + "fmin z22.h, p0/m, z22.h, #1.0"); + COMPARE(fmin(z22.VnS(), p0.Merging(), z22.VnS(), 1.0), + "fmin z22.s, p0/m, z22.s, #1.0"); + COMPARE(fmin(z22.VnD(), p0.Merging(), z22.VnD(), 0.0), + "fmin z22.d, p0/m, z22.d, #0.0"); + COMPARE(fmul(z21.VnH(), p3.Merging(), z21.VnH(), 0.5), + "fmul z21.h, p3/m, z21.h, #0.5"); + COMPARE(fmul(z21.VnS(), p3.Merging(), z21.VnS(), 2.0), + "fmul z21.s, p3/m, z21.s, #2.0"); + COMPARE(fmul(z21.VnD(), p3.Merging(), z21.VnD(), 2.0), + "fmul z21.d, p3/m, z21.d, #2.0"); + COMPARE(fsubr(z21.VnH(), p3.Merging(), z21.VnH(), 1.0), + "fsubr z21.h, p3/m, z21.h, #1.0"); + COMPARE(fsubr(z21.VnS(), p3.Merging(), z21.VnS(), 0.5), + "fsubr z21.s, p3/m, z21.s, #0.5"); + COMPARE(fsubr(z21.VnD(), p3.Merging(), z21.VnD(), 1.0), + "fsubr z21.d, p3/m, z21.d, #1.0"); + COMPARE(fsub(z26.VnH(), p4.Merging(), z26.VnH(), 0.5), + "fsub z26.h, p4/m, z26.h, #0.5"); + COMPARE(fsub(z26.VnS(), p4.Merging(), z26.VnS(), 1.0), + "fsub z26.s, p4/m, z26.s, #1.0"); + COMPARE(fsub(z26.VnD(), p4.Merging(), z26.VnD(), 0.5), + "fsub z26.d, p4/m, z26.d, #0.5"); COMPARE_MACRO(Fadd(z18.VnH(), p0.Merging(), z8.VnH(), 1.0), "movprfx z18.h, p0/m, z8.h\n" @@ -1082,6 +1127,9 @@ TEST(sve_fp_arithmetic_predicated) { } TEST(sve_fp_arithmetic_predicated_macro_fast_nan_propagation) { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" + // Shadow the `MacroAssembler` type so that the test macros work without // modification. typedef FastNaNPropagationMacroAssembler MacroAssembler; @@ -1131,9 +1179,14 @@ TEST(sve_fp_arithmetic_predicated_macro_fast_nan_propagation) { "fmin z15.d, p6/m, z15.d, z8.d"); CLEANUP(); + +#pragma GCC diagnostic pop } TEST(sve_fp_arithmetic_predicated_macro_strict_nan_propagation) { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" + // Shadow the `MacroAssembler` type so that the test macros work without // modification. typedef StrictNaNPropagationMacroAssembler MacroAssembler; @@ -1187,47 +1240,37 @@ TEST(sve_fp_arithmetic_predicated_macro_strict_nan_propagation) { "fmin z15.d, p6/m, z15.d, z8.d"); CLEANUP(); + +#pragma GCC diagnostic pop } TEST(sve_fp_arithmetic_unpredicated) { SETUP(); - COMPARE_PREFIX(fadd(z5.VnH(), z24.VnH(), z11.VnH()), - "fadd z5.h, z24.h, z11.h"); - COMPARE_PREFIX(fadd(z15.VnS(), z14.VnS(), z12.VnS()), - "fadd z15.s, z14.s, z12.s"); - COMPARE_PREFIX(fadd(z25.VnD(), z4.VnD(), z13.VnD()), - "fadd z25.d, z4.d, z13.d"); - COMPARE_PREFIX(fmul(z9.VnH(), z24.VnH(), z10.VnH()), - "fmul z9.h, z24.h, z10.h"); - COMPARE_PREFIX(fmul(z19.VnS(), z14.VnS(), z0.VnS()), - "fmul z19.s, z14.s, z0.s"); - COMPARE_PREFIX(fmul(z29.VnD(), z4.VnD(), z20.VnD()), - "fmul z29.d, z4.d, z20.d"); - COMPARE_PREFIX(fsub(z4.VnH(), z14.VnH(), z29.VnH()), - "fsub z4.h, z14.h, z29.h"); - COMPARE_PREFIX(fsub(z14.VnS(), z24.VnS(), z9.VnS()), - "fsub z14.s, z24.s, z9.s"); - COMPARE_PREFIX(fsub(z14.VnD(), z4.VnD(), z19.VnD()), - "fsub z14.d, z4.d, z19.d"); - COMPARE_PREFIX(frecps(z14.VnH(), z29.VnH(), z18.VnH()), - "frecps z14.h, z29.h, z18.h"); - COMPARE_PREFIX(frecps(z14.VnS(), z29.VnS(), z18.VnS()), - "frecps z14.s, z29.s, z18.s"); - COMPARE_PREFIX(frecps(z14.VnD(), z29.VnD(), z18.VnD()), - "frecps z14.d, z29.d, z18.d"); - COMPARE_PREFIX(frsqrts(z5.VnH(), z6.VnH(), z28.VnH()), - "frsqrts z5.h, z6.h, z28.h"); - COMPARE_PREFIX(frsqrts(z5.VnS(), z6.VnS(), z28.VnS()), - "frsqrts z5.s, z6.s, z28.s"); - COMPARE_PREFIX(frsqrts(z5.VnD(), z6.VnD(), z28.VnD()), - "frsqrts z5.d, z6.d, z28.d"); - COMPARE_PREFIX(ftsmul(z21.VnH(), z17.VnH(), z24.VnH()), - "ftsmul z21.h, z17.h, z24.h"); - COMPARE_PREFIX(ftsmul(z21.VnS(), z17.VnS(), z24.VnS()), - "ftsmul z21.s, z17.s, z24.s"); - COMPARE_PREFIX(ftsmul(z21.VnD(), z17.VnD(), z24.VnD()), - "ftsmul z21.d, z17.d, z24.d"); + COMPARE(fadd(z5.VnH(), z24.VnH(), z11.VnH()), "fadd z5.h, z24.h, z11.h"); + COMPARE(fadd(z15.VnS(), z14.VnS(), z12.VnS()), "fadd z15.s, z14.s, z12.s"); + COMPARE(fadd(z25.VnD(), z4.VnD(), z13.VnD()), "fadd z25.d, z4.d, z13.d"); + COMPARE(fmul(z9.VnH(), z24.VnH(), z10.VnH()), "fmul z9.h, z24.h, z10.h"); + COMPARE(fmul(z19.VnS(), z14.VnS(), z0.VnS()), "fmul z19.s, z14.s, z0.s"); + COMPARE(fmul(z29.VnD(), z4.VnD(), z20.VnD()), "fmul z29.d, z4.d, z20.d"); + COMPARE(fsub(z4.VnH(), z14.VnH(), z29.VnH()), "fsub z4.h, z14.h, z29.h"); + COMPARE(fsub(z14.VnS(), z24.VnS(), z9.VnS()), "fsub z14.s, z24.s, z9.s"); + COMPARE(fsub(z14.VnD(), z4.VnD(), z19.VnD()), "fsub z14.d, z4.d, z19.d"); + COMPARE(frecps(z14.VnH(), z29.VnH(), z18.VnH()), + "frecps z14.h, z29.h, z18.h"); + COMPARE(frecps(z14.VnS(), z29.VnS(), z18.VnS()), + "frecps z14.s, z29.s, z18.s"); + COMPARE(frecps(z14.VnD(), z29.VnD(), z18.VnD()), + "frecps z14.d, z29.d, z18.d"); + COMPARE(frsqrts(z5.VnH(), z6.VnH(), z28.VnH()), "frsqrts z5.h, z6.h, z28.h"); + COMPARE(frsqrts(z5.VnS(), z6.VnS(), z28.VnS()), "frsqrts z5.s, z6.s, z28.s"); + COMPARE(frsqrts(z5.VnD(), z6.VnD(), z28.VnD()), "frsqrts z5.d, z6.d, z28.d"); + COMPARE(ftsmul(z21.VnH(), z17.VnH(), z24.VnH()), + "ftsmul z21.h, z17.h, z24.h"); + COMPARE(ftsmul(z21.VnS(), z17.VnS(), z24.VnS()), + "ftsmul z21.s, z17.s, z24.s"); + COMPARE(ftsmul(z21.VnD(), z17.VnD(), z24.VnD()), + "ftsmul z21.d, z17.d, z24.d"); CLEANUP(); } @@ -1236,48 +1279,48 @@ TEST(sve_fp_compare_vectors) { SETUP(); - COMPARE_PREFIX(facge(p1.VnH(), p3.Zeroing(), z22.VnH(), z25.VnH()), - "facge p1.h, p3/z, z22.h, z25.h"); - COMPARE_PREFIX(facge(p1.VnS(), p3.Zeroing(), z22.VnS(), z25.VnS()), - "facge p1.s, p3/z, z22.s, z25.s"); - COMPARE_PREFIX(facge(p1.VnD(), p3.Zeroing(), z22.VnD(), z25.VnD()), - "facge p1.d, p3/z, z22.d, z25.d"); - COMPARE_PREFIX(facgt(p8.VnH(), p7.Zeroing(), z25.VnH(), z17.VnH()), - "facgt p8.h, p7/z, z25.h, z17.h"); - COMPARE_PREFIX(facgt(p8.VnS(), p7.Zeroing(), z25.VnS(), z17.VnS()), - "facgt p8.s, p7/z, z25.s, z17.s"); - COMPARE_PREFIX(facgt(p8.VnD(), p7.Zeroing(), z25.VnD(), z17.VnD()), - "facgt p8.d, p7/z, z25.d, z17.d"); - COMPARE_PREFIX(fcmeq(p10.VnH(), p2.Zeroing(), z1.VnH(), z17.VnH()), - "fcmeq p10.h, p2/z, z1.h, z17.h"); - COMPARE_PREFIX(fcmeq(p10.VnS(), p2.Zeroing(), z1.VnS(), z17.VnS()), - "fcmeq p10.s, p2/z, z1.s, z17.s"); - COMPARE_PREFIX(fcmeq(p10.VnD(), p2.Zeroing(), z1.VnD(), z17.VnD()), - "fcmeq p10.d, p2/z, z1.d, z17.d"); - COMPARE_PREFIX(fcmge(p0.VnH(), p0.Zeroing(), z1.VnH(), z0.VnH()), - "fcmge p0.h, p0/z, z1.h, z0.h"); - COMPARE_PREFIX(fcmge(p0.VnS(), p0.Zeroing(), z1.VnS(), z0.VnS()), - "fcmge p0.s, p0/z, z1.s, z0.s"); - COMPARE_PREFIX(fcmge(p0.VnD(), p0.Zeroing(), z1.VnD(), z0.VnD()), - "fcmge p0.d, p0/z, z1.d, z0.d"); - COMPARE_PREFIX(fcmgt(p15.VnH(), p5.Zeroing(), z26.VnH(), z5.VnH()), - "fcmgt p15.h, p5/z, z26.h, z5.h"); - COMPARE_PREFIX(fcmgt(p15.VnS(), p5.Zeroing(), z26.VnS(), z5.VnS()), - "fcmgt p15.s, p5/z, z26.s, z5.s"); - COMPARE_PREFIX(fcmgt(p15.VnD(), p5.Zeroing(), z26.VnD(), z5.VnD()), - "fcmgt p15.d, p5/z, z26.d, z5.d"); - COMPARE_PREFIX(fcmne(p2.VnH(), p1.Zeroing(), z9.VnH(), z4.VnH()), - "fcmne p2.h, p1/z, z9.h, z4.h"); - COMPARE_PREFIX(fcmne(p2.VnS(), p1.Zeroing(), z9.VnS(), z4.VnS()), - "fcmne p2.s, p1/z, z9.s, z4.s"); - COMPARE_PREFIX(fcmne(p2.VnD(), p1.Zeroing(), z9.VnD(), z4.VnD()), - "fcmne p2.d, p1/z, z9.d, z4.d"); - COMPARE_PREFIX(fcmuo(p6.VnH(), p4.Zeroing(), z10.VnH(), z21.VnH()), - "fcmuo p6.h, p4/z, z10.h, z21.h"); - COMPARE_PREFIX(fcmuo(p6.VnS(), p4.Zeroing(), z10.VnS(), z21.VnS()), - "fcmuo p6.s, p4/z, z10.s, z21.s"); - COMPARE_PREFIX(fcmuo(p6.VnD(), p4.Zeroing(), z10.VnD(), z21.VnD()), - "fcmuo p6.d, p4/z, z10.d, z21.d"); + COMPARE(facge(p1.VnH(), p3.Zeroing(), z22.VnH(), z25.VnH()), + "facge p1.h, p3/z, z22.h, z25.h"); + COMPARE(facge(p1.VnS(), p3.Zeroing(), z22.VnS(), z25.VnS()), + "facge p1.s, p3/z, z22.s, z25.s"); + COMPARE(facge(p1.VnD(), p3.Zeroing(), z22.VnD(), z25.VnD()), + "facge p1.d, p3/z, z22.d, z25.d"); + COMPARE(facgt(p8.VnH(), p7.Zeroing(), z25.VnH(), z17.VnH()), + "facgt p8.h, p7/z, z25.h, z17.h"); + COMPARE(facgt(p8.VnS(), p7.Zeroing(), z25.VnS(), z17.VnS()), + "facgt p8.s, p7/z, z25.s, z17.s"); + COMPARE(facgt(p8.VnD(), p7.Zeroing(), z25.VnD(), z17.VnD()), + "facgt p8.d, p7/z, z25.d, z17.d"); + COMPARE(fcmeq(p10.VnH(), p2.Zeroing(), z1.VnH(), z17.VnH()), + "fcmeq p10.h, p2/z, z1.h, z17.h"); + COMPARE(fcmeq(p10.VnS(), p2.Zeroing(), z1.VnS(), z17.VnS()), + "fcmeq p10.s, p2/z, z1.s, z17.s"); + COMPARE(fcmeq(p10.VnD(), p2.Zeroing(), z1.VnD(), z17.VnD()), + "fcmeq p10.d, p2/z, z1.d, z17.d"); + COMPARE(fcmge(p0.VnH(), p0.Zeroing(), z1.VnH(), z0.VnH()), + "fcmge p0.h, p0/z, z1.h, z0.h"); + COMPARE(fcmge(p0.VnS(), p0.Zeroing(), z1.VnS(), z0.VnS()), + "fcmge p0.s, p0/z, z1.s, z0.s"); + COMPARE(fcmge(p0.VnD(), p0.Zeroing(), z1.VnD(), z0.VnD()), + "fcmge p0.d, p0/z, z1.d, z0.d"); + COMPARE(fcmgt(p15.VnH(), p5.Zeroing(), z26.VnH(), z5.VnH()), + "fcmgt p15.h, p5/z, z26.h, z5.h"); + COMPARE(fcmgt(p15.VnS(), p5.Zeroing(), z26.VnS(), z5.VnS()), + "fcmgt p15.s, p5/z, z26.s, z5.s"); + COMPARE(fcmgt(p15.VnD(), p5.Zeroing(), z26.VnD(), z5.VnD()), + "fcmgt p15.d, p5/z, z26.d, z5.d"); + COMPARE(fcmne(p2.VnH(), p1.Zeroing(), z9.VnH(), z4.VnH()), + "fcmne p2.h, p1/z, z9.h, z4.h"); + COMPARE(fcmne(p2.VnS(), p1.Zeroing(), z9.VnS(), z4.VnS()), + "fcmne p2.s, p1/z, z9.s, z4.s"); + COMPARE(fcmne(p2.VnD(), p1.Zeroing(), z9.VnD(), z4.VnD()), + "fcmne p2.d, p1/z, z9.d, z4.d"); + COMPARE(fcmuo(p6.VnH(), p4.Zeroing(), z10.VnH(), z21.VnH()), + "fcmuo p6.h, p4/z, z10.h, z21.h"); + COMPARE(fcmuo(p6.VnS(), p4.Zeroing(), z10.VnS(), z21.VnS()), + "fcmuo p6.s, p4/z, z10.s, z21.s"); + COMPARE(fcmuo(p6.VnD(), p4.Zeroing(), z10.VnD(), z21.VnD()), + "fcmuo p6.d, p4/z, z10.d, z21.d"); COMPARE_MACRO(Facle(p2.VnH(), p0.Zeroing(), z11.VnH(), z15.VnH()), "facge p2.h, p0/z, z15.h, z11.h"); @@ -1311,42 +1354,42 @@ TEST(sve_fp_compare_vectors) { TEST(sve_fp_compare_with_zero) { SETUP(); - COMPARE_PREFIX(fcmeq(p9.VnH(), p1.Zeroing(), z17.VnH(), 0), - "fcmeq p9.h, p1/z, z17.h, #0.0"); - COMPARE_PREFIX(fcmeq(p9.VnS(), p1.Zeroing(), z17.VnS(), 0), - "fcmeq p9.s, p1/z, z17.s, #0.0"); - COMPARE_PREFIX(fcmeq(p9.VnD(), p1.Zeroing(), z17.VnD(), 0), - "fcmeq p9.d, p1/z, z17.d, #0.0"); - COMPARE_PREFIX(fcmge(p13.VnH(), p3.Zeroing(), z13.VnH(), 0), - "fcmge p13.h, p3/z, z13.h, #0.0"); - COMPARE_PREFIX(fcmge(p13.VnS(), p3.Zeroing(), z13.VnS(), 0), - "fcmge p13.s, p3/z, z13.s, #0.0"); - COMPARE_PREFIX(fcmge(p13.VnD(), p3.Zeroing(), z13.VnD(), 0), - "fcmge p13.d, p3/z, z13.d, #0.0"); - COMPARE_PREFIX(fcmgt(p10.VnH(), p2.Zeroing(), z24.VnH(), 0), - "fcmgt p10.h, p2/z, z24.h, #0.0"); - COMPARE_PREFIX(fcmgt(p10.VnS(), p2.Zeroing(), z24.VnS(), 0), - "fcmgt p10.s, p2/z, z24.s, #0.0"); - COMPARE_PREFIX(fcmgt(p10.VnD(), p2.Zeroing(), z24.VnD(), 0), - "fcmgt p10.d, p2/z, z24.d, #0.0"); - COMPARE_PREFIX(fcmle(p4.VnH(), p7.Zeroing(), z1.VnH(), 0), - "fcmle p4.h, p7/z, z1.h, #0.0"); - COMPARE_PREFIX(fcmle(p4.VnS(), p7.Zeroing(), z1.VnS(), 0), - "fcmle p4.s, p7/z, z1.s, #0.0"); - COMPARE_PREFIX(fcmle(p4.VnD(), p7.Zeroing(), z1.VnD(), 0), - "fcmle p4.d, p7/z, z1.d, #0.0"); - COMPARE_PREFIX(fcmlt(p15.VnH(), p7.Zeroing(), z9.VnH(), 0), - "fcmlt p15.h, p7/z, z9.h, #0.0"); - COMPARE_PREFIX(fcmlt(p15.VnS(), p7.Zeroing(), z9.VnS(), 0), - "fcmlt p15.s, p7/z, z9.s, #0.0"); - COMPARE_PREFIX(fcmlt(p15.VnD(), p7.Zeroing(), z9.VnD(), 0), - "fcmlt p15.d, p7/z, z9.d, #0.0"); - COMPARE_PREFIX(fcmne(p14.VnH(), p7.Zeroing(), z28.VnH(), 0), - "fcmne p14.h, p7/z, z28.h, #0.0"); - COMPARE_PREFIX(fcmne(p14.VnS(), p7.Zeroing(), z28.VnS(), 0), - "fcmne p14.s, p7/z, z28.s, #0.0"); - COMPARE_PREFIX(fcmne(p14.VnD(), p7.Zeroing(), z28.VnD(), 0), - "fcmne p14.d, p7/z, z28.d, #0.0"); + COMPARE(fcmeq(p9.VnH(), p1.Zeroing(), z17.VnH(), 0), + "fcmeq p9.h, p1/z, z17.h, #0.0"); + COMPARE(fcmeq(p9.VnS(), p1.Zeroing(), z17.VnS(), 0), + "fcmeq p9.s, p1/z, z17.s, #0.0"); + COMPARE(fcmeq(p9.VnD(), p1.Zeroing(), z17.VnD(), 0), + "fcmeq p9.d, p1/z, z17.d, #0.0"); + COMPARE(fcmge(p13.VnH(), p3.Zeroing(), z13.VnH(), 0), + "fcmge p13.h, p3/z, z13.h, #0.0"); + COMPARE(fcmge(p13.VnS(), p3.Zeroing(), z13.VnS(), 0), + "fcmge p13.s, p3/z, z13.s, #0.0"); + COMPARE(fcmge(p13.VnD(), p3.Zeroing(), z13.VnD(), 0), + "fcmge p13.d, p3/z, z13.d, #0.0"); + COMPARE(fcmgt(p10.VnH(), p2.Zeroing(), z24.VnH(), 0), + "fcmgt p10.h, p2/z, z24.h, #0.0"); + COMPARE(fcmgt(p10.VnS(), p2.Zeroing(), z24.VnS(), 0), + "fcmgt p10.s, p2/z, z24.s, #0.0"); + COMPARE(fcmgt(p10.VnD(), p2.Zeroing(), z24.VnD(), 0), + "fcmgt p10.d, p2/z, z24.d, #0.0"); + COMPARE(fcmle(p4.VnH(), p7.Zeroing(), z1.VnH(), 0), + "fcmle p4.h, p7/z, z1.h, #0.0"); + COMPARE(fcmle(p4.VnS(), p7.Zeroing(), z1.VnS(), 0), + "fcmle p4.s, p7/z, z1.s, #0.0"); + COMPARE(fcmle(p4.VnD(), p7.Zeroing(), z1.VnD(), 0), + "fcmle p4.d, p7/z, z1.d, #0.0"); + COMPARE(fcmlt(p15.VnH(), p7.Zeroing(), z9.VnH(), 0), + "fcmlt p15.h, p7/z, z9.h, #0.0"); + COMPARE(fcmlt(p15.VnS(), p7.Zeroing(), z9.VnS(), 0), + "fcmlt p15.s, p7/z, z9.s, #0.0"); + COMPARE(fcmlt(p15.VnD(), p7.Zeroing(), z9.VnD(), 0), + "fcmlt p15.d, p7/z, z9.d, #0.0"); + COMPARE(fcmne(p14.VnH(), p7.Zeroing(), z28.VnH(), 0), + "fcmne p14.h, p7/z, z28.h, #0.0"); + COMPARE(fcmne(p14.VnS(), p7.Zeroing(), z28.VnS(), 0), + "fcmne p14.s, p7/z, z28.s, #0.0"); + COMPARE(fcmne(p14.VnD(), p7.Zeroing(), z28.VnD(), 0), + "fcmne p14.d, p7/z, z28.d, #0.0"); CLEANUP(); } @@ -1354,18 +1397,18 @@ TEST(sve_fp_compare_with_zero) { TEST(sve_fp_complex_addition) { SETUP(); - COMPARE_PREFIX(fcadd(z12.VnH(), p5.Merging(), z12.VnH(), z13.VnH(), 90), - "fcadd z12.h, p5/m, z12.h, z13.h, #90"); - COMPARE_PREFIX(fcadd(z12.VnS(), p5.Merging(), z12.VnS(), z13.VnS(), 90), - "fcadd z12.s, p5/m, z12.s, z13.s, #90"); - COMPARE_PREFIX(fcadd(z12.VnD(), p5.Merging(), z12.VnD(), z13.VnD(), 90), - "fcadd z12.d, p5/m, z12.d, z13.d, #90"); - COMPARE_PREFIX(fcadd(z22.VnH(), p0.Merging(), z22.VnH(), z23.VnH(), 270), - "fcadd z22.h, p0/m, z22.h, z23.h, #270"); - COMPARE_PREFIX(fcadd(z22.VnS(), p0.Merging(), z22.VnS(), z23.VnS(), 270), - "fcadd z22.s, p0/m, z22.s, z23.s, #270"); - COMPARE_PREFIX(fcadd(z22.VnD(), p0.Merging(), z22.VnD(), z23.VnD(), 270), - "fcadd z22.d, p0/m, z22.d, z23.d, #270"); + COMPARE(fcadd(z12.VnH(), p5.Merging(), z12.VnH(), z13.VnH(), 90), + "fcadd z12.h, p5/m, z12.h, z13.h, #90"); + COMPARE(fcadd(z12.VnS(), p5.Merging(), z12.VnS(), z13.VnS(), 90), + "fcadd z12.s, p5/m, z12.s, z13.s, #90"); + COMPARE(fcadd(z12.VnD(), p5.Merging(), z12.VnD(), z13.VnD(), 90), + "fcadd z12.d, p5/m, z12.d, z13.d, #90"); + COMPARE(fcadd(z22.VnH(), p0.Merging(), z22.VnH(), z23.VnH(), 270), + "fcadd z22.h, p0/m, z22.h, z23.h, #270"); + COMPARE(fcadd(z22.VnS(), p0.Merging(), z22.VnS(), z23.VnS(), 270), + "fcadd z22.s, p0/m, z22.s, z23.s, #270"); + COMPARE(fcadd(z22.VnD(), p0.Merging(), z22.VnD(), z23.VnD(), 270), + "fcadd z22.d, p0/m, z22.d, z23.d, #270"); COMPARE_MACRO(Fcadd(z12.VnH(), p5.Merging(), z1.VnH(), z13.VnH(), 90), "movprfx z12.h, p5/m, z1.h\n" @@ -1380,19 +1423,92 @@ TEST(sve_fp_complex_addition) { TEST(sve_fp_complex_mul_add) { SETUP(); - COMPARE_PREFIX(fcmla(z19.VnH(), p7.Merging(), z16.VnH(), z0.VnH(), 90), - "fcmla z19.h, p7/m, z16.h, z0.h, #90"); - COMPARE_PREFIX(fcmla(z19.VnS(), p7.Merging(), z16.VnS(), z0.VnS(), 90), - "fcmla z19.s, p7/m, z16.s, z0.s, #90"); - COMPARE_PREFIX(fcmla(z19.VnD(), p7.Merging(), z16.VnD(), z0.VnD(), 90), - "fcmla z19.d, p7/m, z16.d, z0.d, #90"); + COMPARE_MACRO(Fcmla(z19.VnH(), + p7.Merging(), + z19.VnH(), + z16.VnH(), + z0.VnH(), + 90), + "fcmla z19.h, p7/m, z16.h, z0.h, #90"); + COMPARE_MACRO(Fcmla(z19.VnS(), + p7.Merging(), + z19.VnS(), + z16.VnS(), + z0.VnS(), + 90), + "fcmla z19.s, p7/m, z16.s, z0.s, #90"); + COMPARE_MACRO(Fcmla(z19.VnD(), + p7.Merging(), + z19.VnD(), + z16.VnD(), + z0.VnD(), + 90), + "fcmla z19.d, p7/m, z16.d, z0.d, #90"); - COMPARE_PREFIX(fcmla(z20.VnD(), p6.Merging(), z15.VnD(), z1.VnD(), 0), - "fcmla z20.d, p6/m, z15.d, z1.d, #0"); - COMPARE_PREFIX(fcmla(z20.VnD(), p6.Merging(), z15.VnD(), z1.VnD(), 180), - "fcmla z20.d, p6/m, z15.d, z1.d, #180"); - COMPARE_PREFIX(fcmla(z20.VnD(), p6.Merging(), z15.VnD(), z1.VnD(), 270), - "fcmla z20.d, p6/m, z15.d, z1.d, #270"); + COMPARE_MACRO(Fcmla(z20.VnD(), + p6.Merging(), + z20.VnD(), + z15.VnD(), + z1.VnD(), + 0), + "fcmla z20.d, p6/m, z15.d, z1.d, #0"); + COMPARE_MACRO(Fcmla(z20.VnD(), + p6.Merging(), + z20.VnD(), + z15.VnD(), + z1.VnD(), + 180), + "fcmla z20.d, p6/m, z15.d, z1.d, #180"); + COMPARE_MACRO(Fcmla(z20.VnD(), + p6.Merging(), + z20.VnD(), + z15.VnD(), + z1.VnD(), + 270), + "fcmla z20.d, p6/m, z15.d, z1.d, #270"); + + COMPARE_MACRO(Fcmla(z20.VnD(), + p6.Merging(), + z20.VnD(), + z15.VnD(), + z20.VnD(), + 270), + "fcmla z20.d, p6/m, z15.d, z20.d, #270"); + COMPARE_MACRO(Fcmla(z20.VnD(), + p6.Merging(), + z21.VnD(), + z15.VnD(), + z1.VnD(), + 270), + "movprfx z20.d, p6/m, z21.d\n" + "fcmla z20.d, p6/m, z15.d, z1.d, #270"); + COMPARE_MACRO(Fcmla(z20.VnD(), + p6.Merging(), + z21.VnD(), + z20.VnD(), + z1.VnD(), + 270), + "movprfx z31, z21\n" + "fcmla z31.d, p6/m, z20.d, z1.d, #270\n" + "mov z20.d, p6/m, z31.d"); + COMPARE_MACRO(Fcmla(z20.VnD(), + p6.Merging(), + z21.VnD(), + z15.VnD(), + z20.VnD(), + 270), + "movprfx z31, z21\n" + "fcmla z31.d, p6/m, z15.d, z20.d, #270\n" + "mov z20.d, p6/m, z31.d"); + COMPARE_MACRO(Fcmla(z20.VnD(), + p6.Merging(), + z21.VnD(), + z20.VnD(), + z20.VnD(), + 270), + "movprfx z31, z21\n" + "fcmla z31.d, p6/m, z20.d, z20.d, #270\n" + "mov z20.d, p6/m, z31.d"); CLEANUP(); } @@ -1400,22 +1516,22 @@ TEST(sve_fp_complex_mul_add) { TEST(sve_fp_complex_mul_add_index) { SETUP(); - COMPARE_PREFIX(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 0, 0), - "fcmla z30.h, z20.h, z3.h[0], #0"); - COMPARE_PREFIX(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 1, 0), - "fcmla z30.h, z20.h, z3.h[1], #0"); - COMPARE_PREFIX(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 2, 90), - "fcmla z30.h, z20.h, z3.h[2], #90"); - COMPARE_PREFIX(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 0, 270), - "fcmla z30.h, z20.h, z3.h[0], #270"); - COMPARE_PREFIX(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 0, 0), - "fcmla z10.s, z20.s, z1.s[0], #0"); - COMPARE_PREFIX(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 1, 0), - "fcmla z10.s, z20.s, z1.s[1], #0"); - COMPARE_PREFIX(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 1, 90), - "fcmla z10.s, z20.s, z1.s[1], #90"); - COMPARE_PREFIX(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 0, 270), - "fcmla z10.s, z20.s, z1.s[0], #270"); + COMPARE(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 0, 0), + "fcmla z30.h, z20.h, z3.h[0], #0"); + COMPARE(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 1, 0), + "fcmla z30.h, z20.h, z3.h[1], #0"); + COMPARE(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 2, 90), + "fcmla z30.h, z20.h, z3.h[2], #90"); + COMPARE(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 0, 270), + "fcmla z30.h, z20.h, z3.h[0], #270"); + COMPARE(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 0, 0), + "fcmla z10.s, z20.s, z1.s[0], #0"); + COMPARE(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 1, 0), + "fcmla z10.s, z20.s, z1.s[1], #0"); + COMPARE(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 1, 90), + "fcmla z10.s, z20.s, z1.s[1], #90"); + COMPARE(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 0, 270), + "fcmla z10.s, z20.s, z1.s[0], #270"); CLEANUP(); } @@ -1423,21 +1539,21 @@ TEST(sve_fp_complex_mul_add_index) { TEST(sve_fp_fast_reduction) { SETUP(); - COMPARE_PREFIX(faddv(h26, p6, z16.VnH()), "faddv h26, p6, z16.h"); - COMPARE_PREFIX(faddv(s26, p6, z16.VnS()), "faddv s26, p6, z16.s"); - COMPARE_PREFIX(faddv(d26, p6, z16.VnD()), "faddv d26, p6, z16.d"); - COMPARE_PREFIX(fmaxnmv(h28, p1, z0.VnH()), "fmaxnmv h28, p1, z0.h"); - COMPARE_PREFIX(fmaxnmv(s28, p1, z0.VnS()), "fmaxnmv s28, p1, z0.s"); - COMPARE_PREFIX(fmaxnmv(d28, p1, z0.VnD()), "fmaxnmv d28, p1, z0.d"); - COMPARE_PREFIX(fmaxv(h3, p1, z23.VnH()), "fmaxv h3, p1, z23.h"); - COMPARE_PREFIX(fmaxv(s3, p1, z23.VnS()), "fmaxv s3, p1, z23.s"); - COMPARE_PREFIX(fmaxv(d3, p1, z23.VnD()), "fmaxv d3, p1, z23.d"); - COMPARE_PREFIX(fminnmv(h20, p6, z21.VnH()), "fminnmv h20, p6, z21.h"); - COMPARE_PREFIX(fminnmv(s20, p6, z21.VnS()), "fminnmv s20, p6, z21.s"); - COMPARE_PREFIX(fminnmv(d20, p6, z21.VnD()), "fminnmv d20, p6, z21.d"); - COMPARE_PREFIX(fminv(h10, p4, z27.VnH()), "fminv h10, p4, z27.h"); - COMPARE_PREFIX(fminv(s10, p4, z27.VnS()), "fminv s10, p4, z27.s"); - COMPARE_PREFIX(fminv(d10, p4, z27.VnD()), "fminv d10, p4, z27.d"); + COMPARE(faddv(h26, p6, z16.VnH()), "faddv h26, p6, z16.h"); + COMPARE(faddv(s26, p6, z16.VnS()), "faddv s26, p6, z16.s"); + COMPARE(faddv(d26, p6, z16.VnD()), "faddv d26, p6, z16.d"); + COMPARE(fmaxnmv(h28, p1, z0.VnH()), "fmaxnmv h28, p1, z0.h"); + COMPARE(fmaxnmv(s28, p1, z0.VnS()), "fmaxnmv s28, p1, z0.s"); + COMPARE(fmaxnmv(d28, p1, z0.VnD()), "fmaxnmv d28, p1, z0.d"); + COMPARE(fmaxv(h3, p1, z23.VnH()), "fmaxv h3, p1, z23.h"); + COMPARE(fmaxv(s3, p1, z23.VnS()), "fmaxv s3, p1, z23.s"); + COMPARE(fmaxv(d3, p1, z23.VnD()), "fmaxv d3, p1, z23.d"); + COMPARE(fminnmv(h20, p6, z21.VnH()), "fminnmv h20, p6, z21.h"); + COMPARE(fminnmv(s20, p6, z21.VnS()), "fminnmv s20, p6, z21.s"); + COMPARE(fminnmv(d20, p6, z21.VnD()), "fminnmv d20, p6, z21.d"); + COMPARE(fminv(h10, p4, z27.VnH()), "fminv h10, p4, z27.h"); + COMPARE(fminv(s10, p4, z27.VnS()), "fminv s10, p4, z27.s"); + COMPARE(fminv(d10, p4, z27.VnD()), "fminv d10, p4, z27.d"); CLEANUP(); } @@ -1445,59 +1561,62 @@ TEST(sve_fp_fast_reduction) { TEST(sve_fp_mul_add) { SETUP(); - COMPARE_PREFIX(fmad(z31.VnH(), p2.Merging(), z8.VnH(), z1.VnH()), - "fmad z31.h, p2/m, z8.h, z1.h"); - COMPARE_PREFIX(fmad(z31.VnS(), p2.Merging(), z8.VnS(), z1.VnS()), - "fmad z31.s, p2/m, z8.s, z1.s"); - COMPARE_PREFIX(fmad(z31.VnD(), p2.Merging(), z8.VnD(), z1.VnD()), - "fmad z31.d, p2/m, z8.d, z1.d"); - COMPARE_PREFIX(fmla(z26.VnH(), p7.Merging(), z19.VnH(), z16.VnH()), - "fmla z26.h, p7/m, z19.h, z16.h"); - COMPARE_PREFIX(fmla(z26.VnS(), p7.Merging(), z19.VnS(), z16.VnS()), - "fmla z26.s, p7/m, z19.s, z16.s"); - COMPARE_PREFIX(fmla(z26.VnD(), p7.Merging(), z19.VnD(), z16.VnD()), - "fmla z26.d, p7/m, z19.d, z16.d"); - COMPARE_PREFIX(fmls(z20.VnH(), p6.Merging(), z28.VnH(), z0.VnH()), - "fmls z20.h, p6/m, z28.h, z0.h"); - COMPARE_PREFIX(fmls(z20.VnS(), p6.Merging(), z28.VnS(), z0.VnS()), - "fmls z20.s, p6/m, z28.s, z0.s"); - COMPARE_PREFIX(fmls(z20.VnD(), p6.Merging(), z28.VnD(), z0.VnD()), - "fmls z20.d, p6/m, z28.d, z0.d"); - COMPARE_PREFIX(fmsb(z3.VnH(), p4.Merging(), z8.VnH(), z22.VnH()), - "fmsb z3.h, p4/m, z8.h, z22.h"); - COMPARE_PREFIX(fmsb(z3.VnS(), p4.Merging(), z8.VnS(), z22.VnS()), - "fmsb z3.s, p4/m, z8.s, z22.s"); - COMPARE_PREFIX(fmsb(z3.VnD(), p4.Merging(), z8.VnD(), z22.VnD()), - "fmsb z3.d, p4/m, z8.d, z22.d"); - COMPARE_PREFIX(fnmad(z0.VnH(), p5.Merging(), z20.VnH(), z17.VnH()), - "fnmad z0.h, p5/m, z20.h, z17.h"); - COMPARE_PREFIX(fnmad(z0.VnS(), p5.Merging(), z20.VnS(), z17.VnS()), - "fnmad z0.s, p5/m, z20.s, z17.s"); - COMPARE_PREFIX(fnmad(z0.VnD(), p5.Merging(), z20.VnD(), z17.VnD()), - "fnmad z0.d, p5/m, z20.d, z17.d"); - COMPARE_PREFIX(fnmla(z31.VnH(), p6.Merging(), z14.VnH(), z8.VnH()), - "fnmla z31.h, p6/m, z14.h, z8.h"); - COMPARE_PREFIX(fnmla(z31.VnS(), p6.Merging(), z14.VnS(), z8.VnS()), - "fnmla z31.s, p6/m, z14.s, z8.s"); - COMPARE_PREFIX(fnmla(z31.VnD(), p6.Merging(), z14.VnD(), z8.VnD()), - "fnmla z31.d, p6/m, z14.d, z8.d"); - COMPARE_PREFIX(fnmls(z2.VnH(), p1.Merging(), z23.VnH(), z15.VnH()), - "fnmls z2.h, p1/m, z23.h, z15.h"); - COMPARE_PREFIX(fnmls(z2.VnS(), p1.Merging(), z23.VnS(), z15.VnS()), - "fnmls z2.s, p1/m, z23.s, z15.s"); - COMPARE_PREFIX(fnmls(z2.VnD(), p1.Merging(), z23.VnD(), z15.VnD()), - "fnmls z2.d, p1/m, z23.d, z15.d"); - COMPARE_PREFIX(fnmsb(z28.VnH(), p3.Merging(), z26.VnH(), z11.VnH()), - "fnmsb z28.h, p3/m, z26.h, z11.h"); - COMPARE_PREFIX(fnmsb(z28.VnS(), p3.Merging(), z26.VnS(), z11.VnS()), - "fnmsb z28.s, p3/m, z26.s, z11.s"); - COMPARE_PREFIX(fnmsb(z28.VnD(), p3.Merging(), z26.VnD(), z11.VnD()), - "fnmsb z28.d, p3/m, z26.d, z11.d"); + COMPARE(fmad(z31.VnH(), p2.Merging(), z8.VnH(), z1.VnH()), + "fmad z31.h, p2/m, z8.h, z1.h"); + COMPARE(fmad(z31.VnS(), p2.Merging(), z8.VnS(), z1.VnS()), + "fmad z31.s, p2/m, z8.s, z1.s"); + COMPARE(fmad(z31.VnD(), p2.Merging(), z8.VnD(), z1.VnD()), + "fmad z31.d, p2/m, z8.d, z1.d"); + COMPARE(fmla(z26.VnH(), p7.Merging(), z19.VnH(), z16.VnH()), + "fmla z26.h, p7/m, z19.h, z16.h"); + COMPARE(fmla(z26.VnS(), p7.Merging(), z19.VnS(), z16.VnS()), + "fmla z26.s, p7/m, z19.s, z16.s"); + COMPARE(fmla(z26.VnD(), p7.Merging(), z19.VnD(), z16.VnD()), + "fmla z26.d, p7/m, z19.d, z16.d"); + COMPARE(fmls(z20.VnH(), p6.Merging(), z28.VnH(), z0.VnH()), + "fmls z20.h, p6/m, z28.h, z0.h"); + COMPARE(fmls(z20.VnS(), p6.Merging(), z28.VnS(), z0.VnS()), + "fmls z20.s, p6/m, z28.s, z0.s"); + COMPARE(fmls(z20.VnD(), p6.Merging(), z28.VnD(), z0.VnD()), + "fmls z20.d, p6/m, z28.d, z0.d"); + COMPARE(fmsb(z3.VnH(), p4.Merging(), z8.VnH(), z22.VnH()), + "fmsb z3.h, p4/m, z8.h, z22.h"); + COMPARE(fmsb(z3.VnS(), p4.Merging(), z8.VnS(), z22.VnS()), + "fmsb z3.s, p4/m, z8.s, z22.s"); + COMPARE(fmsb(z3.VnD(), p4.Merging(), z8.VnD(), z22.VnD()), + "fmsb z3.d, p4/m, z8.d, z22.d"); + COMPARE(fnmad(z0.VnH(), p5.Merging(), z20.VnH(), z17.VnH()), + "fnmad z0.h, p5/m, z20.h, z17.h"); + COMPARE(fnmad(z0.VnS(), p5.Merging(), z20.VnS(), z17.VnS()), + "fnmad z0.s, p5/m, z20.s, z17.s"); + COMPARE(fnmad(z0.VnD(), p5.Merging(), z20.VnD(), z17.VnD()), + "fnmad z0.d, p5/m, z20.d, z17.d"); + COMPARE(fnmla(z31.VnH(), p6.Merging(), z14.VnH(), z8.VnH()), + "fnmla z31.h, p6/m, z14.h, z8.h"); + COMPARE(fnmla(z31.VnS(), p6.Merging(), z14.VnS(), z8.VnS()), + "fnmla z31.s, p6/m, z14.s, z8.s"); + COMPARE(fnmla(z31.VnD(), p6.Merging(), z14.VnD(), z8.VnD()), + "fnmla z31.d, p6/m, z14.d, z8.d"); + COMPARE(fnmls(z2.VnH(), p1.Merging(), z23.VnH(), z15.VnH()), + "fnmls z2.h, p1/m, z23.h, z15.h"); + COMPARE(fnmls(z2.VnS(), p1.Merging(), z23.VnS(), z15.VnS()), + "fnmls z2.s, p1/m, z23.s, z15.s"); + COMPARE(fnmls(z2.VnD(), p1.Merging(), z23.VnD(), z15.VnD()), + "fnmls z2.d, p1/m, z23.d, z15.d"); + COMPARE(fnmsb(z28.VnH(), p3.Merging(), z26.VnH(), z11.VnH()), + "fnmsb z28.h, p3/m, z26.h, z11.h"); + COMPARE(fnmsb(z28.VnS(), p3.Merging(), z26.VnS(), z11.VnS()), + "fnmsb z28.s, p3/m, z26.s, z11.s"); + COMPARE(fnmsb(z28.VnD(), p3.Merging(), z26.VnD(), z11.VnD()), + "fnmsb z28.d, p3/m, z26.d, z11.d"); CLEANUP(); } TEST(sve_fp_mul_add_macro_strict_nan_propagation) { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" + // Shadow the `MacroAssembler` type so that the test macros work without // modification. typedef StrictNaNPropagationMacroAssembler MacroAssembler; @@ -1553,9 +1672,14 @@ TEST(sve_fp_mul_add_macro_strict_nan_propagation) { "fnmls z15.d, p0/m, z17.d, z18.d"); CLEANUP(); + +#pragma GCC diagnostic pop } TEST(sve_fp_mul_add_macro_fast_nan_propagation) { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" + // Shadow the `MacroAssembler` type so that the test macros work without // modification. typedef FastNaNPropagationMacroAssembler MacroAssembler; @@ -1603,56 +1727,46 @@ TEST(sve_fp_mul_add_macro_fast_nan_propagation) { "fnmls z15.d, p0/m, z17.d, z18.d"); CLEANUP(); + +#pragma GCC diagnostic pop } TEST(sve_fp_mul_add_index) { SETUP(); - COMPARE_PREFIX(fmla(z25.VnD(), z9.VnD(), z1.VnD(), 0), - "fmla z25.d, z9.d, z1.d[0]"); - COMPARE_PREFIX(fmla(z25.VnD(), z9.VnD(), z1.VnD(), 1), - "fmla z25.d, z9.d, z1.d[1]"); - - COMPARE_PREFIX(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 0), - "fmla z13.h, z7.h, z7.h[0]"); - COMPARE_PREFIX(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 2), - "fmla z13.h, z7.h, z7.h[2]"); - COMPARE_PREFIX(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 5), - "fmla z13.h, z7.h, z7.h[5]"); - COMPARE_PREFIX(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 7), - "fmla z13.h, z7.h, z7.h[7]"); - - COMPARE_PREFIX(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 0), - "fmla z17.s, z27.s, z2.s[0]"); - COMPARE_PREFIX(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 1), - "fmla z17.s, z27.s, z2.s[1]"); - COMPARE_PREFIX(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 2), - "fmla z17.s, z27.s, z2.s[2]"); - COMPARE_PREFIX(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 3), - "fmla z17.s, z27.s, z2.s[3]"); - - COMPARE_PREFIX(fmls(z28.VnD(), z2.VnD(), z0.VnD(), 0), - "fmls z28.d, z2.d, z0.d[0]"); - COMPARE_PREFIX(fmls(z28.VnD(), z2.VnD(), z0.VnD(), 1), - "fmls z28.d, z2.d, z0.d[1]"); - - COMPARE_PREFIX(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 1), - "fmls z30.h, z29.h, z7.h[1]"); - COMPARE_PREFIX(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 4), - "fmls z30.h, z29.h, z7.h[4]"); - COMPARE_PREFIX(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 3), - "fmls z30.h, z29.h, z7.h[3]"); - COMPARE_PREFIX(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 6), - "fmls z30.h, z29.h, z7.h[6]"); - - COMPARE_PREFIX(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 0), - "fmls z30.s, z1.s, z6.s[0]"); - COMPARE_PREFIX(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 1), - "fmls z30.s, z1.s, z6.s[1]"); - COMPARE_PREFIX(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 2), - "fmls z30.s, z1.s, z6.s[2]"); - COMPARE_PREFIX(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 3), - "fmls z30.s, z1.s, z6.s[3]"); + COMPARE(fmla(z25.VnD(), z9.VnD(), z1.VnD(), 0), "fmla z25.d, z9.d, z1.d[0]"); + COMPARE(fmla(z25.VnD(), z9.VnD(), z1.VnD(), 1), "fmla z25.d, z9.d, z1.d[1]"); + + COMPARE(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 0), "fmla z13.h, z7.h, z7.h[0]"); + COMPARE(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 2), "fmla z13.h, z7.h, z7.h[2]"); + COMPARE(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 5), "fmla z13.h, z7.h, z7.h[5]"); + COMPARE(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 7), "fmla z13.h, z7.h, z7.h[7]"); + + COMPARE(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 0), + "fmla z17.s, z27.s, z2.s[0]"); + COMPARE(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 1), + "fmla z17.s, z27.s, z2.s[1]"); + COMPARE(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 2), + "fmla z17.s, z27.s, z2.s[2]"); + COMPARE(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 3), + "fmla z17.s, z27.s, z2.s[3]"); + + COMPARE(fmls(z28.VnD(), z2.VnD(), z0.VnD(), 0), "fmls z28.d, z2.d, z0.d[0]"); + COMPARE(fmls(z28.VnD(), z2.VnD(), z0.VnD(), 1), "fmls z28.d, z2.d, z0.d[1]"); + + COMPARE(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 1), + "fmls z30.h, z29.h, z7.h[1]"); + COMPARE(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 4), + "fmls z30.h, z29.h, z7.h[4]"); + COMPARE(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 3), + "fmls z30.h, z29.h, z7.h[3]"); + COMPARE(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 6), + "fmls z30.h, z29.h, z7.h[6]"); + + COMPARE(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 0), "fmls z30.s, z1.s, z6.s[0]"); + COMPARE(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 1), "fmls z30.s, z1.s, z6.s[1]"); + COMPARE(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 2), "fmls z30.s, z1.s, z6.s[2]"); + COMPARE(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 3), "fmls z30.s, z1.s, z6.s[3]"); COMPARE_MACRO(Fmla(z10.VnH(), z11.VnH(), z12.VnH(), z4.VnH(), 7), "movprfx z10, z11\n" @@ -1688,28 +1802,18 @@ TEST(sve_fp_mul_add_index) { TEST(sve_fp_mul_index) { SETUP(); - COMPARE_PREFIX(fmul(z12.VnD(), z3.VnD(), z4.VnD(), 0), - "fmul z12.d, z3.d, z4.d[0]"); - COMPARE_PREFIX(fmul(z12.VnD(), z3.VnD(), z4.VnD(), 1), - "fmul z12.d, z3.d, z4.d[1]"); + COMPARE(fmul(z12.VnD(), z3.VnD(), z4.VnD(), 0), "fmul z12.d, z3.d, z4.d[0]"); + COMPARE(fmul(z12.VnD(), z3.VnD(), z4.VnD(), 1), "fmul z12.d, z3.d, z4.d[1]"); - COMPARE_PREFIX(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 0), - "fmul z22.h, z2.h, z3.h[0]"); - COMPARE_PREFIX(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 3), - "fmul z22.h, z2.h, z3.h[3]"); - COMPARE_PREFIX(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 4), - "fmul z22.h, z2.h, z3.h[4]"); - COMPARE_PREFIX(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 7), - "fmul z22.h, z2.h, z3.h[7]"); + COMPARE(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 0), "fmul z22.h, z2.h, z3.h[0]"); + COMPARE(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 3), "fmul z22.h, z2.h, z3.h[3]"); + COMPARE(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 4), "fmul z22.h, z2.h, z3.h[4]"); + COMPARE(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 7), "fmul z22.h, z2.h, z3.h[7]"); - COMPARE_PREFIX(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 0), - "fmul z2.s, z8.s, z7.s[0]"); - COMPARE_PREFIX(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 1), - "fmul z2.s, z8.s, z7.s[1]"); - COMPARE_PREFIX(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 2), - "fmul z2.s, z8.s, z7.s[2]"); - COMPARE_PREFIX(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 3), - "fmul z2.s, z8.s, z7.s[3]"); + COMPARE(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 0), "fmul z2.s, z8.s, z7.s[0]"); + COMPARE(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 1), "fmul z2.s, z8.s, z7.s[1]"); + COMPARE(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 2), "fmul z2.s, z8.s, z7.s[2]"); + COMPARE(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 3), "fmul z2.s, z8.s, z7.s[3]"); CLEANUP(); } @@ -1717,128 +1821,114 @@ TEST(sve_fp_mul_index) { TEST(sve_fp_unary_op_predicated) { SETUP(); - COMPARE_PREFIX(fcvtzs(z29.VnS(), p5.Merging(), z8.VnD()), - "fcvtzs z29.s, p5/m, z8.d"); - COMPARE_PREFIX(fcvtzs(z30.VnD(), p5.Merging(), z8.VnD()), - "fcvtzs z30.d, p5/m, z8.d"); - COMPARE_PREFIX(fcvtzs(z14.VnH(), p1.Merging(), z29.VnH()), - "fcvtzs z14.h, p1/m, z29.h"); - COMPARE_PREFIX(fcvtzs(z11.VnS(), p3.Merging(), z16.VnH()), - "fcvtzs z11.s, p3/m, z16.h"); - COMPARE_PREFIX(fcvtzs(z4.VnD(), p7.Merging(), z4.VnH()), - "fcvtzs z4.d, p7/m, z4.h"); - COMPARE_PREFIX(fcvtzs(z24.VnS(), p1.Merging(), z4.VnS()), - "fcvtzs z24.s, p1/m, z4.s"); - COMPARE_PREFIX(fcvtzs(z25.VnD(), p4.Merging(), z24.VnS()), - "fcvtzs z25.d, p4/m, z24.s"); - COMPARE_PREFIX(fcvtzu(z16.VnS(), p7.Merging(), z14.VnD()), - "fcvtzu z16.s, p7/m, z14.d"); - COMPARE_PREFIX(fcvtzu(z31.VnD(), p1.Merging(), z16.VnD()), - "fcvtzu z31.d, p1/m, z16.d"); - COMPARE_PREFIX(fcvtzu(z12.VnH(), p2.Merging(), z27.VnH()), - "fcvtzu z12.h, p2/m, z27.h"); - COMPARE_PREFIX(fcvtzu(z26.VnS(), p6.Merging(), z29.VnH()), - "fcvtzu z26.s, p6/m, z29.h"); - COMPARE_PREFIX(fcvtzu(z29.VnD(), p5.Merging(), z27.VnH()), - "fcvtzu z29.d, p5/m, z27.h"); - COMPARE_PREFIX(fcvtzu(z13.VnS(), p2.Merging(), z17.VnS()), - "fcvtzu z13.s, p2/m, z17.s"); - COMPARE_PREFIX(fcvtzu(z25.VnD(), p7.Merging(), z28.VnS()), - "fcvtzu z25.d, p7/m, z28.s"); - COMPARE_PREFIX(scvtf(z16.VnH(), p6.Merging(), z5.VnH()), - "scvtf z16.h, p6/m, z5.h"); - COMPARE_PREFIX(scvtf(z31.VnD(), p5.Merging(), z26.VnS()), - "scvtf z31.d, p5/m, z26.s"); - COMPARE_PREFIX(scvtf(z0.VnH(), p7.Merging(), z0.VnS()), - "scvtf z0.h, p7/m, z0.s"); - COMPARE_PREFIX(scvtf(z12.VnS(), p7.Merging(), z0.VnS()), - "scvtf z12.s, p7/m, z0.s"); - COMPARE_PREFIX(scvtf(z17.VnD(), p1.Merging(), z17.VnD()), - "scvtf z17.d, p1/m, z17.d"); - COMPARE_PREFIX(scvtf(z2.VnH(), p0.Merging(), z9.VnD()), - "scvtf z2.h, p0/m, z9.d"); - COMPARE_PREFIX(scvtf(z26.VnS(), p5.Merging(), z4.VnD()), - "scvtf z26.s, p5/m, z4.d"); - COMPARE_PREFIX(ucvtf(z27.VnH(), p4.Merging(), z25.VnH()), - "ucvtf z27.h, p4/m, z25.h"); - COMPARE_PREFIX(ucvtf(z3.VnD(), p4.Merging(), z3.VnS()), - "ucvtf z3.d, p4/m, z3.s"); - COMPARE_PREFIX(ucvtf(z24.VnH(), p2.Merging(), z29.VnS()), - "ucvtf z24.h, p2/m, z29.s"); - COMPARE_PREFIX(ucvtf(z29.VnS(), p5.Merging(), z14.VnS()), - "ucvtf z29.s, p5/m, z14.s"); - COMPARE_PREFIX(ucvtf(z7.VnD(), p2.Merging(), z14.VnD()), - "ucvtf z7.d, p2/m, z14.d"); - COMPARE_PREFIX(ucvtf(z20.VnH(), p2.Merging(), z14.VnD()), - "ucvtf z20.h, p2/m, z14.d"); - COMPARE_PREFIX(ucvtf(z26.VnS(), p1.Merging(), z18.VnD()), - "ucvtf z26.s, p1/m, z18.d"); - COMPARE_PREFIX(frinta(z11.VnH(), p0.Merging(), z3.VnH()), - "frinta z11.h, p0/m, z3.h"); - COMPARE_PREFIX(frinta(z11.VnS(), p0.Merging(), z3.VnS()), - "frinta z11.s, p0/m, z3.s"); - COMPARE_PREFIX(frinta(z11.VnD(), p0.Merging(), z3.VnD()), - "frinta z11.d, p0/m, z3.d"); - COMPARE_PREFIX(frinti(z17.VnH(), p0.Merging(), z16.VnH()), - "frinti z17.h, p0/m, z16.h"); - COMPARE_PREFIX(frinti(z17.VnS(), p0.Merging(), z16.VnS()), - "frinti z17.s, p0/m, z16.s"); - COMPARE_PREFIX(frinti(z17.VnD(), p0.Merging(), z16.VnD()), - "frinti z17.d, p0/m, z16.d"); - COMPARE_PREFIX(frintm(z2.VnH(), p7.Merging(), z15.VnH()), - "frintm z2.h, p7/m, z15.h"); - COMPARE_PREFIX(frintm(z2.VnS(), p7.Merging(), z15.VnS()), - "frintm z2.s, p7/m, z15.s"); - COMPARE_PREFIX(frintm(z2.VnD(), p7.Merging(), z15.VnD()), - "frintm z2.d, p7/m, z15.d"); - COMPARE_PREFIX(frintn(z14.VnH(), p5.Merging(), z18.VnH()), - "frintn z14.h, p5/m, z18.h"); - COMPARE_PREFIX(frintn(z14.VnS(), p5.Merging(), z18.VnS()), - "frintn z14.s, p5/m, z18.s"); - COMPARE_PREFIX(frintn(z14.VnD(), p5.Merging(), z18.VnD()), - "frintn z14.d, p5/m, z18.d"); - COMPARE_PREFIX(frintp(z20.VnH(), p6.Merging(), z23.VnH()), - "frintp z20.h, p6/m, z23.h"); - COMPARE_PREFIX(frintp(z20.VnS(), p6.Merging(), z23.VnS()), - "frintp z20.s, p6/m, z23.s"); - COMPARE_PREFIX(frintp(z20.VnD(), p6.Merging(), z23.VnD()), - "frintp z20.d, p6/m, z23.d"); - COMPARE_PREFIX(frintx(z2.VnH(), p6.Merging(), z18.VnH()), - "frintx z2.h, p6/m, z18.h"); - COMPARE_PREFIX(frintx(z2.VnS(), p6.Merging(), z18.VnS()), - "frintx z2.s, p6/m, z18.s"); - COMPARE_PREFIX(frintx(z2.VnD(), p6.Merging(), z18.VnD()), - "frintx z2.d, p6/m, z18.d"); - COMPARE_PREFIX(frintz(z26.VnH(), p7.Merging(), z25.VnH()), - "frintz z26.h, p7/m, z25.h"); - COMPARE_PREFIX(frintz(z26.VnS(), p7.Merging(), z25.VnS()), - "frintz z26.s, p7/m, z25.s"); - COMPARE_PREFIX(frintz(z26.VnD(), p7.Merging(), z25.VnD()), - "frintz z26.d, p7/m, z25.d"); - COMPARE_PREFIX(fcvt(z5.VnH(), p2.Merging(), z11.VnD()), - "fcvt z5.h, p2/m, z11.d"); - COMPARE_PREFIX(fcvt(z30.VnS(), p7.Merging(), z0.VnD()), - "fcvt z30.s, p7/m, z0.d"); - COMPARE_PREFIX(fcvt(z10.VnD(), p0.Merging(), z17.VnH()), - "fcvt z10.d, p0/m, z17.h"); - COMPARE_PREFIX(fcvt(z28.VnS(), p3.Merging(), z27.VnH()), - "fcvt z28.s, p3/m, z27.h"); - COMPARE_PREFIX(fcvt(z9.VnD(), p7.Merging(), z0.VnS()), - "fcvt z9.d, p7/m, z0.s"); - COMPARE_PREFIX(fcvt(z27.VnH(), p7.Merging(), z9.VnS()), - "fcvt z27.h, p7/m, z9.s"); - COMPARE_PREFIX(frecpx(z16.VnH(), p1.Merging(), z29.VnH()), - "frecpx z16.h, p1/m, z29.h"); - COMPARE_PREFIX(frecpx(z16.VnS(), p1.Merging(), z29.VnS()), - "frecpx z16.s, p1/m, z29.s"); - COMPARE_PREFIX(frecpx(z16.VnD(), p1.Merging(), z29.VnD()), - "frecpx z16.d, p1/m, z29.d"); - COMPARE_PREFIX(fsqrt(z30.VnH(), p3.Merging(), z13.VnH()), - "fsqrt z30.h, p3/m, z13.h"); - COMPARE_PREFIX(fsqrt(z30.VnS(), p3.Merging(), z13.VnS()), - "fsqrt z30.s, p3/m, z13.s"); - COMPARE_PREFIX(fsqrt(z30.VnD(), p3.Merging(), z13.VnD()), - "fsqrt z30.d, p3/m, z13.d"); + COMPARE(fcvtzs(z29.VnS(), p5.Merging(), z8.VnD()), + "fcvtzs z29.s, p5/m, z8.d"); + COMPARE(fcvtzs(z30.VnD(), p5.Merging(), z8.VnD()), + "fcvtzs z30.d, p5/m, z8.d"); + COMPARE(fcvtzs(z14.VnH(), p1.Merging(), z29.VnH()), + "fcvtzs z14.h, p1/m, z29.h"); + COMPARE(fcvtzs(z11.VnS(), p3.Merging(), z16.VnH()), + "fcvtzs z11.s, p3/m, z16.h"); + COMPARE(fcvtzs(z4.VnD(), p7.Merging(), z4.VnH()), "fcvtzs z4.d, p7/m, z4.h"); + COMPARE(fcvtzs(z24.VnS(), p1.Merging(), z4.VnS()), + "fcvtzs z24.s, p1/m, z4.s"); + COMPARE(fcvtzs(z25.VnD(), p4.Merging(), z24.VnS()), + "fcvtzs z25.d, p4/m, z24.s"); + COMPARE(fcvtzu(z16.VnS(), p7.Merging(), z14.VnD()), + "fcvtzu z16.s, p7/m, z14.d"); + COMPARE(fcvtzu(z31.VnD(), p1.Merging(), z16.VnD()), + "fcvtzu z31.d, p1/m, z16.d"); + COMPARE(fcvtzu(z12.VnH(), p2.Merging(), z27.VnH()), + "fcvtzu z12.h, p2/m, z27.h"); + COMPARE(fcvtzu(z26.VnS(), p6.Merging(), z29.VnH()), + "fcvtzu z26.s, p6/m, z29.h"); + COMPARE(fcvtzu(z29.VnD(), p5.Merging(), z27.VnH()), + "fcvtzu z29.d, p5/m, z27.h"); + COMPARE(fcvtzu(z13.VnS(), p2.Merging(), z17.VnS()), + "fcvtzu z13.s, p2/m, z17.s"); + COMPARE(fcvtzu(z25.VnD(), p7.Merging(), z28.VnS()), + "fcvtzu z25.d, p7/m, z28.s"); + COMPARE(scvtf(z16.VnH(), p6.Merging(), z5.VnH()), "scvtf z16.h, p6/m, z5.h"); + COMPARE(scvtf(z31.VnD(), p5.Merging(), z26.VnS()), + "scvtf z31.d, p5/m, z26.s"); + COMPARE(scvtf(z0.VnH(), p7.Merging(), z0.VnS()), "scvtf z0.h, p7/m, z0.s"); + COMPARE(scvtf(z12.VnS(), p7.Merging(), z0.VnS()), "scvtf z12.s, p7/m, z0.s"); + COMPARE(scvtf(z17.VnD(), p1.Merging(), z17.VnD()), + "scvtf z17.d, p1/m, z17.d"); + COMPARE(scvtf(z2.VnH(), p0.Merging(), z9.VnD()), "scvtf z2.h, p0/m, z9.d"); + COMPARE(scvtf(z26.VnS(), p5.Merging(), z4.VnD()), "scvtf z26.s, p5/m, z4.d"); + COMPARE(ucvtf(z27.VnH(), p4.Merging(), z25.VnH()), + "ucvtf z27.h, p4/m, z25.h"); + COMPARE(ucvtf(z3.VnD(), p4.Merging(), z3.VnS()), "ucvtf z3.d, p4/m, z3.s"); + COMPARE(ucvtf(z24.VnH(), p2.Merging(), z29.VnS()), + "ucvtf z24.h, p2/m, z29.s"); + COMPARE(ucvtf(z29.VnS(), p5.Merging(), z14.VnS()), + "ucvtf z29.s, p5/m, z14.s"); + COMPARE(ucvtf(z7.VnD(), p2.Merging(), z14.VnD()), "ucvtf z7.d, p2/m, z14.d"); + COMPARE(ucvtf(z20.VnH(), p2.Merging(), z14.VnD()), + "ucvtf z20.h, p2/m, z14.d"); + COMPARE(ucvtf(z26.VnS(), p1.Merging(), z18.VnD()), + "ucvtf z26.s, p1/m, z18.d"); + COMPARE(frinta(z11.VnH(), p0.Merging(), z3.VnH()), + "frinta z11.h, p0/m, z3.h"); + COMPARE(frinta(z11.VnS(), p0.Merging(), z3.VnS()), + "frinta z11.s, p0/m, z3.s"); + COMPARE(frinta(z11.VnD(), p0.Merging(), z3.VnD()), + "frinta z11.d, p0/m, z3.d"); + COMPARE(frinti(z17.VnH(), p0.Merging(), z16.VnH()), + "frinti z17.h, p0/m, z16.h"); + COMPARE(frinti(z17.VnS(), p0.Merging(), z16.VnS()), + "frinti z17.s, p0/m, z16.s"); + COMPARE(frinti(z17.VnD(), p0.Merging(), z16.VnD()), + "frinti z17.d, p0/m, z16.d"); + COMPARE(frintm(z2.VnH(), p7.Merging(), z15.VnH()), + "frintm z2.h, p7/m, z15.h"); + COMPARE(frintm(z2.VnS(), p7.Merging(), z15.VnS()), + "frintm z2.s, p7/m, z15.s"); + COMPARE(frintm(z2.VnD(), p7.Merging(), z15.VnD()), + "frintm z2.d, p7/m, z15.d"); + COMPARE(frintn(z14.VnH(), p5.Merging(), z18.VnH()), + "frintn z14.h, p5/m, z18.h"); + COMPARE(frintn(z14.VnS(), p5.Merging(), z18.VnS()), + "frintn z14.s, p5/m, z18.s"); + COMPARE(frintn(z14.VnD(), p5.Merging(), z18.VnD()), + "frintn z14.d, p5/m, z18.d"); + COMPARE(frintp(z20.VnH(), p6.Merging(), z23.VnH()), + "frintp z20.h, p6/m, z23.h"); + COMPARE(frintp(z20.VnS(), p6.Merging(), z23.VnS()), + "frintp z20.s, p6/m, z23.s"); + COMPARE(frintp(z20.VnD(), p6.Merging(), z23.VnD()), + "frintp z20.d, p6/m, z23.d"); + COMPARE(frintx(z2.VnH(), p6.Merging(), z18.VnH()), + "frintx z2.h, p6/m, z18.h"); + COMPARE(frintx(z2.VnS(), p6.Merging(), z18.VnS()), + "frintx z2.s, p6/m, z18.s"); + COMPARE(frintx(z2.VnD(), p6.Merging(), z18.VnD()), + "frintx z2.d, p6/m, z18.d"); + COMPARE(frintz(z26.VnH(), p7.Merging(), z25.VnH()), + "frintz z26.h, p7/m, z25.h"); + COMPARE(frintz(z26.VnS(), p7.Merging(), z25.VnS()), + "frintz z26.s, p7/m, z25.s"); + COMPARE(frintz(z26.VnD(), p7.Merging(), z25.VnD()), + "frintz z26.d, p7/m, z25.d"); + COMPARE(fcvt(z5.VnH(), p2.Merging(), z11.VnD()), "fcvt z5.h, p2/m, z11.d"); + COMPARE(fcvt(z30.VnS(), p7.Merging(), z0.VnD()), "fcvt z30.s, p7/m, z0.d"); + COMPARE(fcvt(z10.VnD(), p0.Merging(), z17.VnH()), "fcvt z10.d, p0/m, z17.h"); + COMPARE(fcvt(z28.VnS(), p3.Merging(), z27.VnH()), "fcvt z28.s, p3/m, z27.h"); + COMPARE(fcvt(z9.VnD(), p7.Merging(), z0.VnS()), "fcvt z9.d, p7/m, z0.s"); + COMPARE(fcvt(z27.VnH(), p7.Merging(), z9.VnS()), "fcvt z27.h, p7/m, z9.s"); + COMPARE(frecpx(z16.VnH(), p1.Merging(), z29.VnH()), + "frecpx z16.h, p1/m, z29.h"); + COMPARE(frecpx(z16.VnS(), p1.Merging(), z29.VnS()), + "frecpx z16.s, p1/m, z29.s"); + COMPARE(frecpx(z16.VnD(), p1.Merging(), z29.VnD()), + "frecpx z16.d, p1/m, z29.d"); + COMPARE(fsqrt(z30.VnH(), p3.Merging(), z13.VnH()), + "fsqrt z30.h, p3/m, z13.h"); + COMPARE(fsqrt(z30.VnS(), p3.Merging(), z13.VnS()), + "fsqrt z30.s, p3/m, z13.s"); + COMPARE(fsqrt(z30.VnD(), p3.Merging(), z13.VnD()), + "fsqrt z30.d, p3/m, z13.d"); CLEANUP(); } @@ -1910,12 +2000,12 @@ TEST(sve_fp_unary_op_predicated_macro) { TEST(sve_fp_unary_op_unpredicated) { SETUP(); - COMPARE_PREFIX(frecpe(z0.VnH(), z2.VnH()), "frecpe z0.h, z2.h"); - COMPARE_PREFIX(frecpe(z0.VnS(), z2.VnS()), "frecpe z0.s, z2.s"); - COMPARE_PREFIX(frecpe(z0.VnD(), z2.VnD()), "frecpe z0.d, z2.d"); - COMPARE_PREFIX(frsqrte(z27.VnH(), z14.VnH()), "frsqrte z27.h, z14.h"); - COMPARE_PREFIX(frsqrte(z27.VnS(), z14.VnS()), "frsqrte z27.s, z14.s"); - COMPARE_PREFIX(frsqrte(z27.VnD(), z14.VnD()), "frsqrte z27.d, z14.d"); + COMPARE(frecpe(z0.VnH(), z2.VnH()), "frecpe z0.h, z2.h"); + COMPARE(frecpe(z0.VnS(), z2.VnS()), "frecpe z0.s, z2.s"); + COMPARE(frecpe(z0.VnD(), z2.VnD()), "frecpe z0.d, z2.d"); + COMPARE(frsqrte(z27.VnH(), z14.VnH()), "frsqrte z27.h, z14.h"); + COMPARE(frsqrte(z27.VnS(), z14.VnS()), "frsqrte z27.s, z14.s"); + COMPARE(frsqrte(z27.VnD(), z14.VnD()), "frsqrte z27.d, z14.d"); CLEANUP(); } @@ -1923,64 +2013,64 @@ TEST(sve_fp_unary_op_unpredicated) { TEST(sve_inc_dec_by_predicate_count) { SETUP(); - COMPARE_PREFIX(decp(x17, p0.VnB()), "decp x17, p0.b"); - COMPARE_PREFIX(decp(x17, p0.VnH()), "decp x17, p0.h"); - COMPARE_PREFIX(decp(x17, p0.VnS()), "decp x17, p0.s"); - COMPARE_PREFIX(decp(x17, p0.VnD()), "decp x17, p0.d"); - COMPARE_PREFIX(decp(z2.VnH(), p11), "decp z2.h, p11"); - COMPARE_PREFIX(decp(z2.VnS(), p11), "decp z2.s, p11"); - COMPARE_PREFIX(decp(z2.VnD(), p11), "decp z2.d, p11"); - COMPARE_PREFIX(incp(x26, p8.VnB()), "incp x26, p8.b"); - COMPARE_PREFIX(incp(x26, p8.VnH()), "incp x26, p8.h"); - COMPARE_PREFIX(incp(x26, p8.VnS()), "incp x26, p8.s"); - COMPARE_PREFIX(incp(x26, p8.VnD()), "incp x26, p8.d"); - COMPARE_PREFIX(incp(z27.VnH(), p9), "incp z27.h, p9"); - COMPARE_PREFIX(incp(z27.VnS(), p9), "incp z27.s, p9"); - COMPARE_PREFIX(incp(z27.VnD(), p9), "incp z27.d, p9"); - COMPARE_PREFIX(sqdecp(x12, p7.VnB(), w12), "sqdecp x12, p7.b, w12"); - COMPARE_PREFIX(sqdecp(x12, p7.VnH(), w12), "sqdecp x12, p7.h, w12"); - COMPARE_PREFIX(sqdecp(x12, p7.VnS(), w12), "sqdecp x12, p7.s, w12"); - COMPARE_PREFIX(sqdecp(x12, p7.VnD(), w12), "sqdecp x12, p7.d, w12"); - COMPARE_PREFIX(sqdecp(x30, p5.VnB()), "sqdecp x30, p5.b"); - COMPARE_PREFIX(sqdecp(x30, p5.VnH()), "sqdecp x30, p5.h"); - COMPARE_PREFIX(sqdecp(x30, p5.VnS()), "sqdecp x30, p5.s"); - COMPARE_PREFIX(sqdecp(x30, p5.VnD()), "sqdecp x30, p5.d"); - COMPARE_PREFIX(sqdecp(z13.VnH(), p1), "sqdecp z13.h, p1"); - COMPARE_PREFIX(sqdecp(z13.VnS(), p1), "sqdecp z13.s, p1"); - COMPARE_PREFIX(sqdecp(z13.VnD(), p1), "sqdecp z13.d, p1"); - COMPARE_PREFIX(sqincp(x26, p5.VnB(), w26), "sqincp x26, p5.b, w26"); - COMPARE_PREFIX(sqincp(x26, p5.VnH(), w26), "sqincp x26, p5.h, w26"); - COMPARE_PREFIX(sqincp(x26, p5.VnS(), w26), "sqincp x26, p5.s, w26"); - COMPARE_PREFIX(sqincp(x26, p5.VnD(), w26), "sqincp x26, p5.d, w26"); - COMPARE_PREFIX(sqincp(x5, p15.VnB()), "sqincp x5, p15.b"); - COMPARE_PREFIX(sqincp(x5, p15.VnH()), "sqincp x5, p15.h"); - COMPARE_PREFIX(sqincp(x5, p15.VnS()), "sqincp x5, p15.s"); - COMPARE_PREFIX(sqincp(x5, p15.VnD()), "sqincp x5, p15.d"); - COMPARE_PREFIX(sqincp(z14.VnH(), p4), "sqincp z14.h, p4"); - COMPARE_PREFIX(sqincp(z14.VnS(), p4), "sqincp z14.s, p4"); - COMPARE_PREFIX(sqincp(z14.VnD(), p4), "sqincp z14.d, p4"); - COMPARE_PREFIX(uqdecp(w3, p13.VnB()), "uqdecp w3, p13.b"); - COMPARE_PREFIX(uqdecp(w3, p13.VnH()), "uqdecp w3, p13.h"); - COMPARE_PREFIX(uqdecp(w3, p13.VnS()), "uqdecp w3, p13.s"); - COMPARE_PREFIX(uqdecp(w3, p13.VnD()), "uqdecp w3, p13.d"); - COMPARE_PREFIX(uqdecp(x19, p0.VnB()), "uqdecp x19, p0.b"); - COMPARE_PREFIX(uqdecp(x19, p0.VnH()), "uqdecp x19, p0.h"); - COMPARE_PREFIX(uqdecp(x19, p0.VnS()), "uqdecp x19, p0.s"); - COMPARE_PREFIX(uqdecp(x19, p0.VnD()), "uqdecp x19, p0.d"); - COMPARE_PREFIX(uqdecp(z15.VnH(), p9), "uqdecp z15.h, p9"); - COMPARE_PREFIX(uqdecp(z15.VnS(), p9), "uqdecp z15.s, p9"); - COMPARE_PREFIX(uqdecp(z15.VnD(), p9), "uqdecp z15.d, p9"); - COMPARE_PREFIX(uqincp(w18, p1.VnB()), "uqincp w18, p1.b"); - COMPARE_PREFIX(uqincp(w18, p1.VnH()), "uqincp w18, p1.h"); - COMPARE_PREFIX(uqincp(w18, p1.VnS()), "uqincp w18, p1.s"); - COMPARE_PREFIX(uqincp(w18, p1.VnD()), "uqincp w18, p1.d"); - COMPARE_PREFIX(uqincp(x17, p15.VnB()), "uqincp x17, p15.b"); - COMPARE_PREFIX(uqincp(x17, p15.VnH()), "uqincp x17, p15.h"); - COMPARE_PREFIX(uqincp(x17, p15.VnS()), "uqincp x17, p15.s"); - COMPARE_PREFIX(uqincp(x17, p15.VnD()), "uqincp x17, p15.d"); - COMPARE_PREFIX(uqincp(z4.VnH(), p3), "uqincp z4.h, p3"); - COMPARE_PREFIX(uqincp(z4.VnS(), p3), "uqincp z4.s, p3"); - COMPARE_PREFIX(uqincp(z4.VnD(), p3), "uqincp z4.d, p3"); + COMPARE(decp(x17, p0.VnB()), "decp x17, p0.b"); + COMPARE(decp(x17, p0.VnH()), "decp x17, p0.h"); + COMPARE(decp(x17, p0.VnS()), "decp x17, p0.s"); + COMPARE(decp(x17, p0.VnD()), "decp x17, p0.d"); + COMPARE(decp(z2.VnH(), p11), "decp z2.h, p11"); + COMPARE(decp(z2.VnS(), p11), "decp z2.s, p11"); + COMPARE(decp(z2.VnD(), p11), "decp z2.d, p11"); + COMPARE(incp(x26, p8.VnB()), "incp x26, p8.b"); + COMPARE(incp(x26, p8.VnH()), "incp x26, p8.h"); + COMPARE(incp(x26, p8.VnS()), "incp x26, p8.s"); + COMPARE(incp(x26, p8.VnD()), "incp x26, p8.d"); + COMPARE(incp(z27.VnH(), p9), "incp z27.h, p9"); + COMPARE(incp(z27.VnS(), p9), "incp z27.s, p9"); + COMPARE(incp(z27.VnD(), p9), "incp z27.d, p9"); + COMPARE(sqdecp(x12, p7.VnB(), w12), "sqdecp x12, p7.b, w12"); + COMPARE(sqdecp(x12, p7.VnH(), w12), "sqdecp x12, p7.h, w12"); + COMPARE(sqdecp(x12, p7.VnS(), w12), "sqdecp x12, p7.s, w12"); + COMPARE(sqdecp(x12, p7.VnD(), w12), "sqdecp x12, p7.d, w12"); + COMPARE(sqdecp(x30, p5.VnB()), "sqdecp x30, p5.b"); + COMPARE(sqdecp(x30, p5.VnH()), "sqdecp x30, p5.h"); + COMPARE(sqdecp(x30, p5.VnS()), "sqdecp x30, p5.s"); + COMPARE(sqdecp(x30, p5.VnD()), "sqdecp x30, p5.d"); + COMPARE(sqdecp(z13.VnH(), p1), "sqdecp z13.h, p1"); + COMPARE(sqdecp(z13.VnS(), p1), "sqdecp z13.s, p1"); + COMPARE(sqdecp(z13.VnD(), p1), "sqdecp z13.d, p1"); + COMPARE(sqincp(x26, p5.VnB(), w26), "sqincp x26, p5.b, w26"); + COMPARE(sqincp(x26, p5.VnH(), w26), "sqincp x26, p5.h, w26"); + COMPARE(sqincp(x26, p5.VnS(), w26), "sqincp x26, p5.s, w26"); + COMPARE(sqincp(x26, p5.VnD(), w26), "sqincp x26, p5.d, w26"); + COMPARE(sqincp(x5, p15.VnB()), "sqincp x5, p15.b"); + COMPARE(sqincp(x5, p15.VnH()), "sqincp x5, p15.h"); + COMPARE(sqincp(x5, p15.VnS()), "sqincp x5, p15.s"); + COMPARE(sqincp(x5, p15.VnD()), "sqincp x5, p15.d"); + COMPARE(sqincp(z14.VnH(), p4), "sqincp z14.h, p4"); + COMPARE(sqincp(z14.VnS(), p4), "sqincp z14.s, p4"); + COMPARE(sqincp(z14.VnD(), p4), "sqincp z14.d, p4"); + COMPARE(uqdecp(w3, p13.VnB()), "uqdecp w3, p13.b"); + COMPARE(uqdecp(w3, p13.VnH()), "uqdecp w3, p13.h"); + COMPARE(uqdecp(w3, p13.VnS()), "uqdecp w3, p13.s"); + COMPARE(uqdecp(w3, p13.VnD()), "uqdecp w3, p13.d"); + COMPARE(uqdecp(x19, p0.VnB()), "uqdecp x19, p0.b"); + COMPARE(uqdecp(x19, p0.VnH()), "uqdecp x19, p0.h"); + COMPARE(uqdecp(x19, p0.VnS()), "uqdecp x19, p0.s"); + COMPARE(uqdecp(x19, p0.VnD()), "uqdecp x19, p0.d"); + COMPARE(uqdecp(z15.VnH(), p9), "uqdecp z15.h, p9"); + COMPARE(uqdecp(z15.VnS(), p9), "uqdecp z15.s, p9"); + COMPARE(uqdecp(z15.VnD(), p9), "uqdecp z15.d, p9"); + COMPARE(uqincp(w18, p1.VnB()), "uqincp w18, p1.b"); + COMPARE(uqincp(w18, p1.VnH()), "uqincp w18, p1.h"); + COMPARE(uqincp(w18, p1.VnS()), "uqincp w18, p1.s"); + COMPARE(uqincp(w18, p1.VnD()), "uqincp w18, p1.d"); + COMPARE(uqincp(x17, p15.VnB()), "uqincp x17, p15.b"); + COMPARE(uqincp(x17, p15.VnH()), "uqincp x17, p15.h"); + COMPARE(uqincp(x17, p15.VnS()), "uqincp x17, p15.s"); + COMPARE(uqincp(x17, p15.VnD()), "uqincp x17, p15.d"); + COMPARE(uqincp(z4.VnH(), p3), "uqincp z4.h, p3"); + COMPARE(uqincp(z4.VnS(), p3), "uqincp z4.s, p3"); + COMPARE(uqincp(z4.VnD(), p3), "uqincp z4.d, p3"); CLEANUP(); } @@ -2038,24 +2128,24 @@ TEST(sve_inc_dec_by_predicate_count_macro) { TEST(sve_index_generation) { SETUP(); - COMPARE_PREFIX(index(z21.VnB(), -16, 15), "index z21.b, #-16, #15"); - COMPARE_PREFIX(index(z22.VnB(), -2, 1), "index z22.b, #-2, #1"); - COMPARE_PREFIX(index(z23.VnH(), -1, 0), "index z23.h, #-1, #0"); - COMPARE_PREFIX(index(z24.VnS(), 0, -1), "index z24.s, #0, #-1"); - COMPARE_PREFIX(index(z25.VnD(), 1, -2), "index z25.d, #1, #-2"); - COMPARE_PREFIX(index(z26.VnB(), 15, -16), "index z26.b, #15, #-16"); - COMPARE_PREFIX(index(z23.VnB(), -16, w8), "index z23.b, #-16, w8"); - COMPARE_PREFIX(index(z24.VnH(), -1, x9), "index z24.h, #-1, w9"); - COMPARE_PREFIX(index(z25.VnS(), 0, w10), "index z25.s, #0, w10"); - COMPARE_PREFIX(index(z26.VnD(), 15, x11), "index z26.d, #15, x11"); - COMPARE_PREFIX(index(z14.VnB(), w15, 15), "index z14.b, w15, #15"); - COMPARE_PREFIX(index(z15.VnH(), x16, 1), "index z15.h, w16, #1"); - COMPARE_PREFIX(index(z16.VnS(), w17, 0), "index z16.s, w17, #0"); - COMPARE_PREFIX(index(z17.VnD(), x18, -16), "index z17.d, x18, #-16"); - COMPARE_PREFIX(index(z20.VnB(), w23, w21), "index z20.b, w23, w21"); - COMPARE_PREFIX(index(z21.VnH(), x24, w22), "index z21.h, w24, w22"); - COMPARE_PREFIX(index(z22.VnS(), w25, x23), "index z22.s, w25, w23"); - COMPARE_PREFIX(index(z23.VnD(), x26, x24), "index z23.d, x26, x24"); + COMPARE(index(z21.VnB(), -16, 15), "index z21.b, #-16, #15"); + COMPARE(index(z22.VnB(), -2, 1), "index z22.b, #-2, #1"); + COMPARE(index(z23.VnH(), -1, 0), "index z23.h, #-1, #0"); + COMPARE(index(z24.VnS(), 0, -1), "index z24.s, #0, #-1"); + COMPARE(index(z25.VnD(), 1, -2), "index z25.d, #1, #-2"); + COMPARE(index(z26.VnB(), 15, -16), "index z26.b, #15, #-16"); + COMPARE(index(z23.VnB(), -16, w8), "index z23.b, #-16, w8"); + COMPARE(index(z24.VnH(), -1, x9), "index z24.h, #-1, w9"); + COMPARE(index(z25.VnS(), 0, w10), "index z25.s, #0, w10"); + COMPARE(index(z26.VnD(), 15, x11), "index z26.d, #15, x11"); + COMPARE(index(z14.VnB(), w15, 15), "index z14.b, w15, #15"); + COMPARE(index(z15.VnH(), x16, 1), "index z15.h, w16, #1"); + COMPARE(index(z16.VnS(), w17, 0), "index z16.s, w17, #0"); + COMPARE(index(z17.VnD(), x18, -16), "index z17.d, x18, #-16"); + COMPARE(index(z20.VnB(), w23, w21), "index z20.b, w23, w21"); + COMPARE(index(z21.VnH(), x24, w22), "index z21.h, w24, w22"); + COMPARE(index(z22.VnS(), w25, x23), "index z22.s, w25, w23"); + COMPARE(index(z23.VnD(), x26, x24), "index z23.d, x26, x24"); // Simple pass-through macros. COMPARE_MACRO(Index(z21.VnB(), -16, 15), "index z21.b, #-16, #15"); @@ -2099,50 +2189,30 @@ TEST(sve_index_generation) { TEST(sve_int_arithmetic_unpredicated) { SETUP(); - COMPARE_PREFIX(add(z23.VnB(), z30.VnB(), z31.VnB()), - "add z23.b, z30.b, z31.b"); - COMPARE_PREFIX(add(z24.VnH(), z29.VnH(), z30.VnH()), - "add z24.h, z29.h, z30.h"); - COMPARE_PREFIX(add(z25.VnS(), z28.VnS(), z29.VnS()), - "add z25.s, z28.s, z29.s"); - COMPARE_PREFIX(add(z26.VnD(), z27.VnD(), z28.VnD()), - "add z26.d, z27.d, z28.d"); - COMPARE_PREFIX(sqadd(z26.VnB(), z21.VnB(), z1.VnB()), - "sqadd z26.b, z21.b, z1.b"); - COMPARE_PREFIX(sqadd(z25.VnH(), z20.VnH(), z2.VnH()), - "sqadd z25.h, z20.h, z2.h"); - COMPARE_PREFIX(sqadd(z24.VnS(), z19.VnS(), z3.VnS()), - "sqadd z24.s, z19.s, z3.s"); - COMPARE_PREFIX(sqadd(z23.VnD(), z18.VnD(), z4.VnD()), - "sqadd z23.d, z18.d, z4.d"); - COMPARE_PREFIX(sqsub(z1.VnB(), z10.VnB(), z0.VnB()), - "sqsub z1.b, z10.b, z0.b"); - COMPARE_PREFIX(sqsub(z2.VnH(), z11.VnH(), z1.VnH()), - "sqsub z2.h, z11.h, z1.h"); - COMPARE_PREFIX(sqsub(z3.VnS(), z12.VnS(), z2.VnS()), - "sqsub z3.s, z12.s, z2.s"); - COMPARE_PREFIX(sqsub(z4.VnD(), z13.VnD(), z3.VnD()), - "sqsub z4.d, z13.d, z3.d"); - COMPARE_PREFIX(sub(z9.VnB(), z7.VnB(), z25.VnB()), "sub z9.b, z7.b, z25.b"); - COMPARE_PREFIX(sub(z8.VnH(), z8.VnH(), z26.VnH()), "sub z8.h, z8.h, z26.h"); - COMPARE_PREFIX(sub(z7.VnS(), z9.VnS(), z27.VnS()), "sub z7.s, z9.s, z27.s"); - COMPARE_PREFIX(sub(z6.VnD(), z10.VnD(), z28.VnD()), "sub z6.d, z10.d, z28.d"); - COMPARE_PREFIX(uqadd(z13.VnB(), z15.VnB(), z3.VnB()), - "uqadd z13.b, z15.b, z3.b"); - COMPARE_PREFIX(uqadd(z12.VnH(), z16.VnH(), z2.VnH()), - "uqadd z12.h, z16.h, z2.h"); - COMPARE_PREFIX(uqadd(z11.VnS(), z17.VnS(), z1.VnS()), - "uqadd z11.s, z17.s, z1.s"); - COMPARE_PREFIX(uqadd(z10.VnD(), z18.VnD(), z0.VnD()), - "uqadd z10.d, z18.d, z0.d"); - COMPARE_PREFIX(uqsub(z9.VnB(), z13.VnB(), z13.VnB()), - "uqsub z9.b, z13.b, z13.b"); - COMPARE_PREFIX(uqsub(z11.VnH(), z15.VnH(), z11.VnH()), - "uqsub z11.h, z15.h, z11.h"); - COMPARE_PREFIX(uqsub(z13.VnS(), z17.VnS(), z13.VnS()), - "uqsub z13.s, z17.s, z13.s"); - COMPARE_PREFIX(uqsub(z15.VnD(), z19.VnD(), z15.VnD()), - "uqsub z15.d, z19.d, z15.d"); + COMPARE(add(z23.VnB(), z30.VnB(), z31.VnB()), "add z23.b, z30.b, z31.b"); + COMPARE(add(z24.VnH(), z29.VnH(), z30.VnH()), "add z24.h, z29.h, z30.h"); + COMPARE(add(z25.VnS(), z28.VnS(), z29.VnS()), "add z25.s, z28.s, z29.s"); + COMPARE(add(z26.VnD(), z27.VnD(), z28.VnD()), "add z26.d, z27.d, z28.d"); + COMPARE(sqadd(z26.VnB(), z21.VnB(), z1.VnB()), "sqadd z26.b, z21.b, z1.b"); + COMPARE(sqadd(z25.VnH(), z20.VnH(), z2.VnH()), "sqadd z25.h, z20.h, z2.h"); + COMPARE(sqadd(z24.VnS(), z19.VnS(), z3.VnS()), "sqadd z24.s, z19.s, z3.s"); + COMPARE(sqadd(z23.VnD(), z18.VnD(), z4.VnD()), "sqadd z23.d, z18.d, z4.d"); + COMPARE(sqsub(z1.VnB(), z10.VnB(), z0.VnB()), "sqsub z1.b, z10.b, z0.b"); + COMPARE(sqsub(z2.VnH(), z11.VnH(), z1.VnH()), "sqsub z2.h, z11.h, z1.h"); + COMPARE(sqsub(z3.VnS(), z12.VnS(), z2.VnS()), "sqsub z3.s, z12.s, z2.s"); + COMPARE(sqsub(z4.VnD(), z13.VnD(), z3.VnD()), "sqsub z4.d, z13.d, z3.d"); + COMPARE(sub(z9.VnB(), z7.VnB(), z25.VnB()), "sub z9.b, z7.b, z25.b"); + COMPARE(sub(z8.VnH(), z8.VnH(), z26.VnH()), "sub z8.h, z8.h, z26.h"); + COMPARE(sub(z7.VnS(), z9.VnS(), z27.VnS()), "sub z7.s, z9.s, z27.s"); + COMPARE(sub(z6.VnD(), z10.VnD(), z28.VnD()), "sub z6.d, z10.d, z28.d"); + COMPARE(uqadd(z13.VnB(), z15.VnB(), z3.VnB()), "uqadd z13.b, z15.b, z3.b"); + COMPARE(uqadd(z12.VnH(), z16.VnH(), z2.VnH()), "uqadd z12.h, z16.h, z2.h"); + COMPARE(uqadd(z11.VnS(), z17.VnS(), z1.VnS()), "uqadd z11.s, z17.s, z1.s"); + COMPARE(uqadd(z10.VnD(), z18.VnD(), z0.VnD()), "uqadd z10.d, z18.d, z0.d"); + COMPARE(uqsub(z9.VnB(), z13.VnB(), z13.VnB()), "uqsub z9.b, z13.b, z13.b"); + COMPARE(uqsub(z11.VnH(), z15.VnH(), z11.VnH()), "uqsub z11.h, z15.h, z11.h"); + COMPARE(uqsub(z13.VnS(), z17.VnS(), z13.VnS()), "uqsub z13.s, z17.s, z13.s"); + COMPARE(uqsub(z15.VnD(), z19.VnD(), z15.VnD()), "uqsub z15.d, z19.d, z15.d"); CLEANUP(); } @@ -2150,142 +2220,142 @@ TEST(sve_int_arithmetic_unpredicated) { TEST(sve_int_binary_arithmetic_predicated) { SETUP(); - COMPARE_PREFIX(add(z22.VnB(), p4.Merging(), z22.VnB(), z20.VnB()), - "add z22.b, p4/m, z22.b, z20.b"); - COMPARE_PREFIX(add(z22.VnH(), p4.Merging(), z22.VnH(), z20.VnH()), - "add z22.h, p4/m, z22.h, z20.h"); - COMPARE_PREFIX(add(z22.VnS(), p4.Merging(), z22.VnS(), z20.VnS()), - "add z22.s, p4/m, z22.s, z20.s"); - COMPARE_PREFIX(add(z22.VnD(), p4.Merging(), z22.VnD(), z20.VnD()), - "add z22.d, p4/m, z22.d, z20.d"); - COMPARE_PREFIX(and_(z22.VnB(), p3.Merging(), z22.VnB(), z3.VnB()), - "and z22.b, p3/m, z22.b, z3.b"); - COMPARE_PREFIX(and_(z22.VnH(), p3.Merging(), z22.VnH(), z3.VnH()), - "and z22.h, p3/m, z22.h, z3.h"); - COMPARE_PREFIX(and_(z22.VnS(), p3.Merging(), z22.VnS(), z3.VnS()), - "and z22.s, p3/m, z22.s, z3.s"); - COMPARE_PREFIX(and_(z22.VnD(), p3.Merging(), z22.VnD(), z3.VnD()), - "and z22.d, p3/m, z22.d, z3.d"); - COMPARE_PREFIX(bic(z17.VnB(), p7.Merging(), z17.VnB(), z10.VnB()), - "bic z17.b, p7/m, z17.b, z10.b"); - COMPARE_PREFIX(bic(z17.VnH(), p7.Merging(), z17.VnH(), z10.VnH()), - "bic z17.h, p7/m, z17.h, z10.h"); - COMPARE_PREFIX(bic(z17.VnS(), p7.Merging(), z17.VnS(), z10.VnS()), - "bic z17.s, p7/m, z17.s, z10.s"); - COMPARE_PREFIX(bic(z17.VnD(), p7.Merging(), z17.VnD(), z10.VnD()), - "bic z17.d, p7/m, z17.d, z10.d"); - COMPARE_PREFIX(eor(z23.VnB(), p4.Merging(), z23.VnB(), z15.VnB()), - "eor z23.b, p4/m, z23.b, z15.b"); - COMPARE_PREFIX(eor(z23.VnH(), p4.Merging(), z23.VnH(), z15.VnH()), - "eor z23.h, p4/m, z23.h, z15.h"); - COMPARE_PREFIX(eor(z23.VnS(), p4.Merging(), z23.VnS(), z15.VnS()), - "eor z23.s, p4/m, z23.s, z15.s"); - COMPARE_PREFIX(eor(z23.VnD(), p4.Merging(), z23.VnD(), z15.VnD()), - "eor z23.d, p4/m, z23.d, z15.d"); - COMPARE_PREFIX(mul(z15.VnB(), p5.Merging(), z15.VnB(), z15.VnB()), - "mul z15.b, p5/m, z15.b, z15.b"); - COMPARE_PREFIX(mul(z15.VnH(), p5.Merging(), z15.VnH(), z15.VnH()), - "mul z15.h, p5/m, z15.h, z15.h"); - COMPARE_PREFIX(mul(z15.VnS(), p5.Merging(), z15.VnS(), z15.VnS()), - "mul z15.s, p5/m, z15.s, z15.s"); - COMPARE_PREFIX(mul(z15.VnD(), p5.Merging(), z15.VnD(), z15.VnD()), - "mul z15.d, p5/m, z15.d, z15.d"); - COMPARE_PREFIX(orr(z9.VnB(), p1.Merging(), z9.VnB(), z28.VnB()), - "orr z9.b, p1/m, z9.b, z28.b"); - COMPARE_PREFIX(orr(z9.VnH(), p1.Merging(), z9.VnH(), z28.VnH()), - "orr z9.h, p1/m, z9.h, z28.h"); - COMPARE_PREFIX(orr(z9.VnS(), p1.Merging(), z9.VnS(), z28.VnS()), - "orr z9.s, p1/m, z9.s, z28.s"); - COMPARE_PREFIX(orr(z9.VnD(), p1.Merging(), z9.VnD(), z28.VnD()), - "orr z9.d, p1/m, z9.d, z28.d"); - COMPARE_PREFIX(sabd(z11.VnB(), p6.Merging(), z11.VnB(), z31.VnB()), - "sabd z11.b, p6/m, z11.b, z31.b"); - COMPARE_PREFIX(sabd(z11.VnH(), p6.Merging(), z11.VnH(), z31.VnH()), - "sabd z11.h, p6/m, z11.h, z31.h"); - COMPARE_PREFIX(sabd(z11.VnS(), p6.Merging(), z11.VnS(), z31.VnS()), - "sabd z11.s, p6/m, z11.s, z31.s"); - COMPARE_PREFIX(sabd(z11.VnD(), p6.Merging(), z11.VnD(), z31.VnD()), - "sabd z11.d, p6/m, z11.d, z31.d"); - COMPARE_PREFIX(sdivr(z20.VnS(), p5.Merging(), z20.VnS(), z23.VnS()), - "sdivr z20.s, p5/m, z20.s, z23.s"); - COMPARE_PREFIX(sdiv(z15.VnD(), p6.Merging(), z15.VnD(), z8.VnD()), - "sdiv z15.d, p6/m, z15.d, z8.d"); - COMPARE_PREFIX(smax(z30.VnB(), p4.Merging(), z30.VnB(), z30.VnB()), - "smax z30.b, p4/m, z30.b, z30.b"); - COMPARE_PREFIX(smax(z30.VnH(), p4.Merging(), z30.VnH(), z30.VnH()), - "smax z30.h, p4/m, z30.h, z30.h"); - COMPARE_PREFIX(smax(z30.VnS(), p4.Merging(), z30.VnS(), z30.VnS()), - "smax z30.s, p4/m, z30.s, z30.s"); - COMPARE_PREFIX(smax(z30.VnD(), p4.Merging(), z30.VnD(), z30.VnD()), - "smax z30.d, p4/m, z30.d, z30.d"); - COMPARE_PREFIX(smin(z20.VnB(), p7.Merging(), z20.VnB(), z19.VnB()), - "smin z20.b, p7/m, z20.b, z19.b"); - COMPARE_PREFIX(smin(z20.VnH(), p7.Merging(), z20.VnH(), z19.VnH()), - "smin z20.h, p7/m, z20.h, z19.h"); - COMPARE_PREFIX(smin(z20.VnS(), p7.Merging(), z20.VnS(), z19.VnS()), - "smin z20.s, p7/m, z20.s, z19.s"); - COMPARE_PREFIX(smin(z20.VnD(), p7.Merging(), z20.VnD(), z19.VnD()), - "smin z20.d, p7/m, z20.d, z19.d"); - COMPARE_PREFIX(smulh(z23.VnB(), p0.Merging(), z23.VnB(), z3.VnB()), - "smulh z23.b, p0/m, z23.b, z3.b"); - COMPARE_PREFIX(smulh(z23.VnH(), p0.Merging(), z23.VnH(), z3.VnH()), - "smulh z23.h, p0/m, z23.h, z3.h"); - COMPARE_PREFIX(smulh(z23.VnS(), p0.Merging(), z23.VnS(), z3.VnS()), - "smulh z23.s, p0/m, z23.s, z3.s"); - COMPARE_PREFIX(smulh(z23.VnD(), p0.Merging(), z23.VnD(), z3.VnD()), - "smulh z23.d, p0/m, z23.d, z3.d"); - COMPARE_PREFIX(subr(z1.VnB(), p6.Merging(), z1.VnB(), z1.VnB()), - "subr z1.b, p6/m, z1.b, z1.b"); - COMPARE_PREFIX(subr(z1.VnH(), p6.Merging(), z1.VnH(), z1.VnH()), - "subr z1.h, p6/m, z1.h, z1.h"); - COMPARE_PREFIX(subr(z1.VnS(), p6.Merging(), z1.VnS(), z1.VnS()), - "subr z1.s, p6/m, z1.s, z1.s"); - COMPARE_PREFIX(subr(z1.VnD(), p6.Merging(), z1.VnD(), z1.VnD()), - "subr z1.d, p6/m, z1.d, z1.d"); - COMPARE_PREFIX(sub(z28.VnB(), p2.Merging(), z28.VnB(), z0.VnB()), - "sub z28.b, p2/m, z28.b, z0.b"); - COMPARE_PREFIX(sub(z28.VnH(), p2.Merging(), z28.VnH(), z0.VnH()), - "sub z28.h, p2/m, z28.h, z0.h"); - COMPARE_PREFIX(sub(z28.VnS(), p2.Merging(), z28.VnS(), z0.VnS()), - "sub z28.s, p2/m, z28.s, z0.s"); - COMPARE_PREFIX(sub(z28.VnD(), p2.Merging(), z28.VnD(), z0.VnD()), - "sub z28.d, p2/m, z28.d, z0.d"); - COMPARE_PREFIX(uabd(z14.VnB(), p6.Merging(), z14.VnB(), z22.VnB()), - "uabd z14.b, p6/m, z14.b, z22.b"); - COMPARE_PREFIX(uabd(z14.VnH(), p6.Merging(), z14.VnH(), z22.VnH()), - "uabd z14.h, p6/m, z14.h, z22.h"); - COMPARE_PREFIX(uabd(z14.VnS(), p6.Merging(), z14.VnS(), z22.VnS()), - "uabd z14.s, p6/m, z14.s, z22.s"); - COMPARE_PREFIX(uabd(z14.VnD(), p6.Merging(), z14.VnD(), z22.VnD()), - "uabd z14.d, p6/m, z14.d, z22.d"); - COMPARE_PREFIX(udivr(z27.VnS(), p5.Merging(), z27.VnS(), z31.VnS()), - "udivr z27.s, p5/m, z27.s, z31.s"); - COMPARE_PREFIX(udiv(z13.VnD(), p4.Merging(), z13.VnD(), z11.VnD()), - "udiv z13.d, p4/m, z13.d, z11.d"); - COMPARE_PREFIX(umax(z0.VnB(), p5.Merging(), z0.VnB(), z14.VnB()), - "umax z0.b, p5/m, z0.b, z14.b"); - COMPARE_PREFIX(umax(z0.VnH(), p5.Merging(), z0.VnH(), z14.VnH()), - "umax z0.h, p5/m, z0.h, z14.h"); - COMPARE_PREFIX(umax(z0.VnS(), p5.Merging(), z0.VnS(), z14.VnS()), - "umax z0.s, p5/m, z0.s, z14.s"); - COMPARE_PREFIX(umax(z0.VnD(), p5.Merging(), z0.VnD(), z14.VnD()), - "umax z0.d, p5/m, z0.d, z14.d"); - COMPARE_PREFIX(umin(z26.VnB(), p5.Merging(), z26.VnB(), z12.VnB()), - "umin z26.b, p5/m, z26.b, z12.b"); - COMPARE_PREFIX(umin(z26.VnH(), p5.Merging(), z26.VnH(), z12.VnH()), - "umin z26.h, p5/m, z26.h, z12.h"); - COMPARE_PREFIX(umin(z26.VnS(), p5.Merging(), z26.VnS(), z12.VnS()), - "umin z26.s, p5/m, z26.s, z12.s"); - COMPARE_PREFIX(umin(z26.VnD(), p5.Merging(), z26.VnD(), z12.VnD()), - "umin z26.d, p5/m, z26.d, z12.d"); - COMPARE_PREFIX(umulh(z12.VnB(), p2.Merging(), z12.VnB(), z17.VnB()), - "umulh z12.b, p2/m, z12.b, z17.b"); - COMPARE_PREFIX(umulh(z12.VnH(), p2.Merging(), z12.VnH(), z17.VnH()), - "umulh z12.h, p2/m, z12.h, z17.h"); - COMPARE_PREFIX(umulh(z12.VnS(), p2.Merging(), z12.VnS(), z17.VnS()), - "umulh z12.s, p2/m, z12.s, z17.s"); - COMPARE_PREFIX(umulh(z12.VnD(), p2.Merging(), z12.VnD(), z17.VnD()), - "umulh z12.d, p2/m, z12.d, z17.d"); + COMPARE(add(z22.VnB(), p4.Merging(), z22.VnB(), z20.VnB()), + "add z22.b, p4/m, z22.b, z20.b"); + COMPARE(add(z22.VnH(), p4.Merging(), z22.VnH(), z20.VnH()), + "add z22.h, p4/m, z22.h, z20.h"); + COMPARE(add(z22.VnS(), p4.Merging(), z22.VnS(), z20.VnS()), + "add z22.s, p4/m, z22.s, z20.s"); + COMPARE(add(z22.VnD(), p4.Merging(), z22.VnD(), z20.VnD()), + "add z22.d, p4/m, z22.d, z20.d"); + COMPARE(and_(z22.VnB(), p3.Merging(), z22.VnB(), z3.VnB()), + "and z22.b, p3/m, z22.b, z3.b"); + COMPARE(and_(z22.VnH(), p3.Merging(), z22.VnH(), z3.VnH()), + "and z22.h, p3/m, z22.h, z3.h"); + COMPARE(and_(z22.VnS(), p3.Merging(), z22.VnS(), z3.VnS()), + "and z22.s, p3/m, z22.s, z3.s"); + COMPARE(and_(z22.VnD(), p3.Merging(), z22.VnD(), z3.VnD()), + "and z22.d, p3/m, z22.d, z3.d"); + COMPARE(bic(z17.VnB(), p7.Merging(), z17.VnB(), z10.VnB()), + "bic z17.b, p7/m, z17.b, z10.b"); + COMPARE(bic(z17.VnH(), p7.Merging(), z17.VnH(), z10.VnH()), + "bic z17.h, p7/m, z17.h, z10.h"); + COMPARE(bic(z17.VnS(), p7.Merging(), z17.VnS(), z10.VnS()), + "bic z17.s, p7/m, z17.s, z10.s"); + COMPARE(bic(z17.VnD(), p7.Merging(), z17.VnD(), z10.VnD()), + "bic z17.d, p7/m, z17.d, z10.d"); + COMPARE(eor(z23.VnB(), p4.Merging(), z23.VnB(), z15.VnB()), + "eor z23.b, p4/m, z23.b, z15.b"); + COMPARE(eor(z23.VnH(), p4.Merging(), z23.VnH(), z15.VnH()), + "eor z23.h, p4/m, z23.h, z15.h"); + COMPARE(eor(z23.VnS(), p4.Merging(), z23.VnS(), z15.VnS()), + "eor z23.s, p4/m, z23.s, z15.s"); + COMPARE(eor(z23.VnD(), p4.Merging(), z23.VnD(), z15.VnD()), + "eor z23.d, p4/m, z23.d, z15.d"); + COMPARE(mul(z15.VnB(), p5.Merging(), z15.VnB(), z15.VnB()), + "mul z15.b, p5/m, z15.b, z15.b"); + COMPARE(mul(z15.VnH(), p5.Merging(), z15.VnH(), z15.VnH()), + "mul z15.h, p5/m, z15.h, z15.h"); + COMPARE(mul(z15.VnS(), p5.Merging(), z15.VnS(), z15.VnS()), + "mul z15.s, p5/m, z15.s, z15.s"); + COMPARE(mul(z15.VnD(), p5.Merging(), z15.VnD(), z15.VnD()), + "mul z15.d, p5/m, z15.d, z15.d"); + COMPARE(orr(z9.VnB(), p1.Merging(), z9.VnB(), z28.VnB()), + "orr z9.b, p1/m, z9.b, z28.b"); + COMPARE(orr(z9.VnH(), p1.Merging(), z9.VnH(), z28.VnH()), + "orr z9.h, p1/m, z9.h, z28.h"); + COMPARE(orr(z9.VnS(), p1.Merging(), z9.VnS(), z28.VnS()), + "orr z9.s, p1/m, z9.s, z28.s"); + COMPARE(orr(z9.VnD(), p1.Merging(), z9.VnD(), z28.VnD()), + "orr z9.d, p1/m, z9.d, z28.d"); + COMPARE(sabd(z11.VnB(), p6.Merging(), z11.VnB(), z31.VnB()), + "sabd z11.b, p6/m, z11.b, z31.b"); + COMPARE(sabd(z11.VnH(), p6.Merging(), z11.VnH(), z31.VnH()), + "sabd z11.h, p6/m, z11.h, z31.h"); + COMPARE(sabd(z11.VnS(), p6.Merging(), z11.VnS(), z31.VnS()), + "sabd z11.s, p6/m, z11.s, z31.s"); + COMPARE(sabd(z11.VnD(), p6.Merging(), z11.VnD(), z31.VnD()), + "sabd z11.d, p6/m, z11.d, z31.d"); + COMPARE(sdivr(z20.VnS(), p5.Merging(), z20.VnS(), z23.VnS()), + "sdivr z20.s, p5/m, z20.s, z23.s"); + COMPARE(sdiv(z15.VnD(), p6.Merging(), z15.VnD(), z8.VnD()), + "sdiv z15.d, p6/m, z15.d, z8.d"); + COMPARE(smax(z30.VnB(), p4.Merging(), z30.VnB(), z30.VnB()), + "smax z30.b, p4/m, z30.b, z30.b"); + COMPARE(smax(z30.VnH(), p4.Merging(), z30.VnH(), z30.VnH()), + "smax z30.h, p4/m, z30.h, z30.h"); + COMPARE(smax(z30.VnS(), p4.Merging(), z30.VnS(), z30.VnS()), + "smax z30.s, p4/m, z30.s, z30.s"); + COMPARE(smax(z30.VnD(), p4.Merging(), z30.VnD(), z30.VnD()), + "smax z30.d, p4/m, z30.d, z30.d"); + COMPARE(smin(z20.VnB(), p7.Merging(), z20.VnB(), z19.VnB()), + "smin z20.b, p7/m, z20.b, z19.b"); + COMPARE(smin(z20.VnH(), p7.Merging(), z20.VnH(), z19.VnH()), + "smin z20.h, p7/m, z20.h, z19.h"); + COMPARE(smin(z20.VnS(), p7.Merging(), z20.VnS(), z19.VnS()), + "smin z20.s, p7/m, z20.s, z19.s"); + COMPARE(smin(z20.VnD(), p7.Merging(), z20.VnD(), z19.VnD()), + "smin z20.d, p7/m, z20.d, z19.d"); + COMPARE(smulh(z23.VnB(), p0.Merging(), z23.VnB(), z3.VnB()), + "smulh z23.b, p0/m, z23.b, z3.b"); + COMPARE(smulh(z23.VnH(), p0.Merging(), z23.VnH(), z3.VnH()), + "smulh z23.h, p0/m, z23.h, z3.h"); + COMPARE(smulh(z23.VnS(), p0.Merging(), z23.VnS(), z3.VnS()), + "smulh z23.s, p0/m, z23.s, z3.s"); + COMPARE(smulh(z23.VnD(), p0.Merging(), z23.VnD(), z3.VnD()), + "smulh z23.d, p0/m, z23.d, z3.d"); + COMPARE(subr(z1.VnB(), p6.Merging(), z1.VnB(), z1.VnB()), + "subr z1.b, p6/m, z1.b, z1.b"); + COMPARE(subr(z1.VnH(), p6.Merging(), z1.VnH(), z1.VnH()), + "subr z1.h, p6/m, z1.h, z1.h"); + COMPARE(subr(z1.VnS(), p6.Merging(), z1.VnS(), z1.VnS()), + "subr z1.s, p6/m, z1.s, z1.s"); + COMPARE(subr(z1.VnD(), p6.Merging(), z1.VnD(), z1.VnD()), + "subr z1.d, p6/m, z1.d, z1.d"); + COMPARE(sub(z28.VnB(), p2.Merging(), z28.VnB(), z0.VnB()), + "sub z28.b, p2/m, z28.b, z0.b"); + COMPARE(sub(z28.VnH(), p2.Merging(), z28.VnH(), z0.VnH()), + "sub z28.h, p2/m, z28.h, z0.h"); + COMPARE(sub(z28.VnS(), p2.Merging(), z28.VnS(), z0.VnS()), + "sub z28.s, p2/m, z28.s, z0.s"); + COMPARE(sub(z28.VnD(), p2.Merging(), z28.VnD(), z0.VnD()), + "sub z28.d, p2/m, z28.d, z0.d"); + COMPARE(uabd(z14.VnB(), p6.Merging(), z14.VnB(), z22.VnB()), + "uabd z14.b, p6/m, z14.b, z22.b"); + COMPARE(uabd(z14.VnH(), p6.Merging(), z14.VnH(), z22.VnH()), + "uabd z14.h, p6/m, z14.h, z22.h"); + COMPARE(uabd(z14.VnS(), p6.Merging(), z14.VnS(), z22.VnS()), + "uabd z14.s, p6/m, z14.s, z22.s"); + COMPARE(uabd(z14.VnD(), p6.Merging(), z14.VnD(), z22.VnD()), + "uabd z14.d, p6/m, z14.d, z22.d"); + COMPARE(udivr(z27.VnS(), p5.Merging(), z27.VnS(), z31.VnS()), + "udivr z27.s, p5/m, z27.s, z31.s"); + COMPARE(udiv(z13.VnD(), p4.Merging(), z13.VnD(), z11.VnD()), + "udiv z13.d, p4/m, z13.d, z11.d"); + COMPARE(umax(z0.VnB(), p5.Merging(), z0.VnB(), z14.VnB()), + "umax z0.b, p5/m, z0.b, z14.b"); + COMPARE(umax(z0.VnH(), p5.Merging(), z0.VnH(), z14.VnH()), + "umax z0.h, p5/m, z0.h, z14.h"); + COMPARE(umax(z0.VnS(), p5.Merging(), z0.VnS(), z14.VnS()), + "umax z0.s, p5/m, z0.s, z14.s"); + COMPARE(umax(z0.VnD(), p5.Merging(), z0.VnD(), z14.VnD()), + "umax z0.d, p5/m, z0.d, z14.d"); + COMPARE(umin(z26.VnB(), p5.Merging(), z26.VnB(), z12.VnB()), + "umin z26.b, p5/m, z26.b, z12.b"); + COMPARE(umin(z26.VnH(), p5.Merging(), z26.VnH(), z12.VnH()), + "umin z26.h, p5/m, z26.h, z12.h"); + COMPARE(umin(z26.VnS(), p5.Merging(), z26.VnS(), z12.VnS()), + "umin z26.s, p5/m, z26.s, z12.s"); + COMPARE(umin(z26.VnD(), p5.Merging(), z26.VnD(), z12.VnD()), + "umin z26.d, p5/m, z26.d, z12.d"); + COMPARE(umulh(z12.VnB(), p2.Merging(), z12.VnB(), z17.VnB()), + "umulh z12.b, p2/m, z12.b, z17.b"); + COMPARE(umulh(z12.VnH(), p2.Merging(), z12.VnH(), z17.VnH()), + "umulh z12.h, p2/m, z12.h, z17.h"); + COMPARE(umulh(z12.VnS(), p2.Merging(), z12.VnS(), z17.VnS()), + "umulh z12.s, p2/m, z12.s, z17.s"); + COMPARE(umulh(z12.VnD(), p2.Merging(), z12.VnD(), z17.VnD()), + "umulh z12.d, p2/m, z12.d, z17.d"); CLEANUP(); } @@ -2430,25 +2500,25 @@ TEST(sve_int_binary_arithmetic_predicated_macro) { TEST(sve_int_compare_scalars) { SETUP(); - COMPARE_PREFIX(ctermeq(w30, w26), "ctermeq w30, w26"); - COMPARE_PREFIX(ctermne(x21, x18), "ctermne x21, x18"); - COMPARE_PREFIX(whilele(p10.VnB(), x11, x6), "whilele p10.b, x11, x6"); - COMPARE_PREFIX(whilele(p10.VnH(), w11, w6), "whilele p10.h, w11, w6"); - COMPARE_PREFIX(whilele(p10.VnH(), x11, x6), "whilele p10.h, x11, x6"); - COMPARE_PREFIX(whilele(p10.VnS(), w11, w6), "whilele p10.s, w11, w6"); - COMPARE_PREFIX(whilele(p10.VnD(), x11, x6), "whilele p10.d, x11, x6"); - COMPARE_PREFIX(whilelo(p4.VnB(), w3, w25), "whilelo p4.b, w3, w25"); - COMPARE_PREFIX(whilelo(p4.VnH(), x3, x25), "whilelo p4.h, x3, x25"); - COMPARE_PREFIX(whilelo(p4.VnS(), w3, w25), "whilelo p4.s, w3, w25"); - COMPARE_PREFIX(whilelo(p4.VnD(), x3, x25), "whilelo p4.d, x3, x25"); - COMPARE_PREFIX(whilels(p7.VnB(), w15, w15), "whilels p7.b, w15, w15"); - COMPARE_PREFIX(whilels(p7.VnH(), x15, x15), "whilels p7.h, x15, x15"); - COMPARE_PREFIX(whilels(p7.VnS(), w15, w15), "whilels p7.s, w15, w15"); - COMPARE_PREFIX(whilels(p7.VnD(), x15, x15), "whilels p7.d, x15, x15"); - COMPARE_PREFIX(whilelt(p14.VnB(), w11, w14), "whilelt p14.b, w11, w14"); - COMPARE_PREFIX(whilelt(p14.VnH(), x11, x14), "whilelt p14.h, x11, x14"); - COMPARE_PREFIX(whilelt(p14.VnS(), w11, w14), "whilelt p14.s, w11, w14"); - COMPARE_PREFIX(whilelt(p14.VnD(), x11, x14), "whilelt p14.d, x11, x14"); + COMPARE(ctermeq(w30, w26), "ctermeq w30, w26"); + COMPARE(ctermne(x21, x18), "ctermne x21, x18"); + COMPARE(whilele(p10.VnB(), x11, x6), "whilele p10.b, x11, x6"); + COMPARE(whilele(p10.VnH(), w11, w6), "whilele p10.h, w11, w6"); + COMPARE(whilele(p10.VnH(), x11, x6), "whilele p10.h, x11, x6"); + COMPARE(whilele(p10.VnS(), w11, w6), "whilele p10.s, w11, w6"); + COMPARE(whilele(p10.VnD(), x11, x6), "whilele p10.d, x11, x6"); + COMPARE(whilelo(p4.VnB(), w3, w25), "whilelo p4.b, w3, w25"); + COMPARE(whilelo(p4.VnH(), x3, x25), "whilelo p4.h, x3, x25"); + COMPARE(whilelo(p4.VnS(), w3, w25), "whilelo p4.s, w3, w25"); + COMPARE(whilelo(p4.VnD(), x3, x25), "whilelo p4.d, x3, x25"); + COMPARE(whilels(p7.VnB(), w15, w15), "whilels p7.b, w15, w15"); + COMPARE(whilels(p7.VnH(), x15, x15), "whilels p7.h, x15, x15"); + COMPARE(whilels(p7.VnS(), w15, w15), "whilels p7.s, w15, w15"); + COMPARE(whilels(p7.VnD(), x15, x15), "whilels p7.d, x15, x15"); + COMPARE(whilelt(p14.VnB(), w11, w14), "whilelt p14.b, w11, w14"); + COMPARE(whilelt(p14.VnH(), x11, x14), "whilelt p14.h, x11, x14"); + COMPARE(whilelt(p14.VnS(), w11, w14), "whilelt p14.s, w11, w14"); + COMPARE(whilelt(p14.VnD(), x11, x14), "whilelt p14.d, x11, x14"); CLEANUP(); } @@ -2456,54 +2526,54 @@ TEST(sve_int_compare_scalars) { TEST(sve_int_compare_signed_imm) { SETUP(); - COMPARE_PREFIX(cmpeq(p0.VnB(), p3.Zeroing(), z1.VnB(), 15), - "cmpeq p0.b, p3/z, z1.b, #15"); - COMPARE_PREFIX(cmpeq(p0.VnH(), p3.Zeroing(), z1.VnH(), 7), - "cmpeq p0.h, p3/z, z1.h, #7"); - COMPARE_PREFIX(cmpeq(p0.VnS(), p3.Zeroing(), z1.VnS(), -3), - "cmpeq p0.s, p3/z, z1.s, #-3"); - COMPARE_PREFIX(cmpeq(p0.VnD(), p3.Zeroing(), z1.VnD(), -14), - "cmpeq p0.d, p3/z, z1.d, #-14"); - COMPARE_PREFIX(cmpge(p9.VnB(), p6.Zeroing(), z12.VnB(), 14), - "cmpge p9.b, p6/z, z12.b, #14"); - COMPARE_PREFIX(cmpge(p9.VnH(), p6.Zeroing(), z12.VnH(), 6), - "cmpge p9.h, p6/z, z12.h, #6"); - COMPARE_PREFIX(cmpge(p9.VnS(), p6.Zeroing(), z12.VnS(), -4), - "cmpge p9.s, p6/z, z12.s, #-4"); - COMPARE_PREFIX(cmpge(p9.VnD(), p6.Zeroing(), z12.VnD(), -13), - "cmpge p9.d, p6/z, z12.d, #-13"); - COMPARE_PREFIX(cmpgt(p15.VnB(), p4.Zeroing(), z23.VnB(), 13), - "cmpgt p15.b, p4/z, z23.b, #13"); - COMPARE_PREFIX(cmpgt(p15.VnH(), p4.Zeroing(), z23.VnH(), 5), - "cmpgt p15.h, p4/z, z23.h, #5"); - COMPARE_PREFIX(cmpgt(p15.VnS(), p4.Zeroing(), z23.VnS(), -12), - "cmpgt p15.s, p4/z, z23.s, #-12"); - COMPARE_PREFIX(cmpgt(p15.VnD(), p4.Zeroing(), z23.VnD(), -5), - "cmpgt p15.d, p4/z, z23.d, #-5"); - COMPARE_PREFIX(cmple(p4.VnB(), p3.Zeroing(), z5.VnB(), 12), - "cmple p4.b, p3/z, z5.b, #12"); - COMPARE_PREFIX(cmple(p4.VnH(), p3.Zeroing(), z5.VnH(), 4), - "cmple p4.h, p3/z, z5.h, #4"); - COMPARE_PREFIX(cmple(p4.VnS(), p3.Zeroing(), z5.VnS(), -11), - "cmple p4.s, p3/z, z5.s, #-11"); - COMPARE_PREFIX(cmple(p4.VnD(), p3.Zeroing(), z5.VnD(), -6), - "cmple p4.d, p3/z, z5.d, #-6"); - COMPARE_PREFIX(cmplt(p3.VnB(), p7.Zeroing(), z15.VnB(), 11), - "cmplt p3.b, p7/z, z15.b, #11"); - COMPARE_PREFIX(cmplt(p3.VnH(), p7.Zeroing(), z15.VnH(), 3), - "cmplt p3.h, p7/z, z15.h, #3"); - COMPARE_PREFIX(cmplt(p3.VnS(), p7.Zeroing(), z15.VnS(), -10), - "cmplt p3.s, p7/z, z15.s, #-10"); - COMPARE_PREFIX(cmplt(p3.VnD(), p7.Zeroing(), z15.VnD(), -7), - "cmplt p3.d, p7/z, z15.d, #-7"); - COMPARE_PREFIX(cmpne(p13.VnB(), p5.Zeroing(), z20.VnB(), 10), - "cmpne p13.b, p5/z, z20.b, #10"); - COMPARE_PREFIX(cmpne(p13.VnH(), p5.Zeroing(), z20.VnH(), 2), - "cmpne p13.h, p5/z, z20.h, #2"); - COMPARE_PREFIX(cmpne(p13.VnS(), p5.Zeroing(), z20.VnS(), -9), - "cmpne p13.s, p5/z, z20.s, #-9"); - COMPARE_PREFIX(cmpne(p13.VnD(), p5.Zeroing(), z20.VnD(), -8), - "cmpne p13.d, p5/z, z20.d, #-8"); + COMPARE(cmpeq(p0.VnB(), p3.Zeroing(), z1.VnB(), 15), + "cmpeq p0.b, p3/z, z1.b, #15"); + COMPARE(cmpeq(p0.VnH(), p3.Zeroing(), z1.VnH(), 7), + "cmpeq p0.h, p3/z, z1.h, #7"); + COMPARE(cmpeq(p0.VnS(), p3.Zeroing(), z1.VnS(), -3), + "cmpeq p0.s, p3/z, z1.s, #-3"); + COMPARE(cmpeq(p0.VnD(), p3.Zeroing(), z1.VnD(), -14), + "cmpeq p0.d, p3/z, z1.d, #-14"); + COMPARE(cmpge(p9.VnB(), p6.Zeroing(), z12.VnB(), 14), + "cmpge p9.b, p6/z, z12.b, #14"); + COMPARE(cmpge(p9.VnH(), p6.Zeroing(), z12.VnH(), 6), + "cmpge p9.h, p6/z, z12.h, #6"); + COMPARE(cmpge(p9.VnS(), p6.Zeroing(), z12.VnS(), -4), + "cmpge p9.s, p6/z, z12.s, #-4"); + COMPARE(cmpge(p9.VnD(), p6.Zeroing(), z12.VnD(), -13), + "cmpge p9.d, p6/z, z12.d, #-13"); + COMPARE(cmpgt(p15.VnB(), p4.Zeroing(), z23.VnB(), 13), + "cmpgt p15.b, p4/z, z23.b, #13"); + COMPARE(cmpgt(p15.VnH(), p4.Zeroing(), z23.VnH(), 5), + "cmpgt p15.h, p4/z, z23.h, #5"); + COMPARE(cmpgt(p15.VnS(), p4.Zeroing(), z23.VnS(), -12), + "cmpgt p15.s, p4/z, z23.s, #-12"); + COMPARE(cmpgt(p15.VnD(), p4.Zeroing(), z23.VnD(), -5), + "cmpgt p15.d, p4/z, z23.d, #-5"); + COMPARE(cmple(p4.VnB(), p3.Zeroing(), z5.VnB(), 12), + "cmple p4.b, p3/z, z5.b, #12"); + COMPARE(cmple(p4.VnH(), p3.Zeroing(), z5.VnH(), 4), + "cmple p4.h, p3/z, z5.h, #4"); + COMPARE(cmple(p4.VnS(), p3.Zeroing(), z5.VnS(), -11), + "cmple p4.s, p3/z, z5.s, #-11"); + COMPARE(cmple(p4.VnD(), p3.Zeroing(), z5.VnD(), -6), + "cmple p4.d, p3/z, z5.d, #-6"); + COMPARE(cmplt(p3.VnB(), p7.Zeroing(), z15.VnB(), 11), + "cmplt p3.b, p7/z, z15.b, #11"); + COMPARE(cmplt(p3.VnH(), p7.Zeroing(), z15.VnH(), 3), + "cmplt p3.h, p7/z, z15.h, #3"); + COMPARE(cmplt(p3.VnS(), p7.Zeroing(), z15.VnS(), -10), + "cmplt p3.s, p7/z, z15.s, #-10"); + COMPARE(cmplt(p3.VnD(), p7.Zeroing(), z15.VnD(), -7), + "cmplt p3.d, p7/z, z15.d, #-7"); + COMPARE(cmpne(p13.VnB(), p5.Zeroing(), z20.VnB(), 10), + "cmpne p13.b, p5/z, z20.b, #10"); + COMPARE(cmpne(p13.VnH(), p5.Zeroing(), z20.VnH(), 2), + "cmpne p13.h, p5/z, z20.h, #2"); + COMPARE(cmpne(p13.VnS(), p5.Zeroing(), z20.VnS(), -9), + "cmpne p13.s, p5/z, z20.s, #-9"); + COMPARE(cmpne(p13.VnD(), p5.Zeroing(), z20.VnD(), -8), + "cmpne p13.d, p5/z, z20.d, #-8"); CLEANUP(); } @@ -2511,38 +2581,38 @@ TEST(sve_int_compare_signed_imm) { TEST(sve_int_compare_unsigned_imm) { SETUP(); - COMPARE_PREFIX(cmphi(p8.VnB(), p6.Zeroing(), z1.VnB(), 127), - "cmphi p8.b, p6/z, z1.b, #127"); - COMPARE_PREFIX(cmphi(p8.VnH(), p6.Zeroing(), z1.VnH(), 126), - "cmphi p8.h, p6/z, z1.h, #126"); - COMPARE_PREFIX(cmphi(p8.VnS(), p6.Zeroing(), z1.VnS(), 99), - "cmphi p8.s, p6/z, z1.s, #99"); - COMPARE_PREFIX(cmphi(p8.VnD(), p6.Zeroing(), z1.VnD(), 78), - "cmphi p8.d, p6/z, z1.d, #78"); - COMPARE_PREFIX(cmphs(p11.VnB(), p2.Zeroing(), z8.VnB(), 67), - "cmphs p11.b, p2/z, z8.b, #67"); - COMPARE_PREFIX(cmphs(p11.VnH(), p2.Zeroing(), z8.VnH(), 63), - "cmphs p11.h, p2/z, z8.h, #63"); - COMPARE_PREFIX(cmphs(p11.VnS(), p2.Zeroing(), z8.VnS(), 51), - "cmphs p11.s, p2/z, z8.s, #51"); - COMPARE_PREFIX(cmphs(p11.VnD(), p2.Zeroing(), z8.VnD(), 40), - "cmphs p11.d, p2/z, z8.d, #40"); - COMPARE_PREFIX(cmplo(p9.VnB(), p4.Zeroing(), z4.VnB(), 32), - "cmplo p9.b, p4/z, z4.b, #32"); - COMPARE_PREFIX(cmplo(p9.VnH(), p4.Zeroing(), z4.VnH(), 22), - "cmplo p9.h, p4/z, z4.h, #22"); - COMPARE_PREFIX(cmplo(p9.VnS(), p4.Zeroing(), z4.VnS(), 15), - "cmplo p9.s, p4/z, z4.s, #15"); - COMPARE_PREFIX(cmplo(p9.VnD(), p4.Zeroing(), z4.VnD(), 11), - "cmplo p9.d, p4/z, z4.d, #11"); - COMPARE_PREFIX(cmpls(p14.VnB(), p5.Zeroing(), z9.VnB(), 7), - "cmpls p14.b, p5/z, z9.b, #7"); - COMPARE_PREFIX(cmpls(p14.VnH(), p5.Zeroing(), z9.VnH(), 4), - "cmpls p14.h, p5/z, z9.h, #4"); - COMPARE_PREFIX(cmpls(p14.VnS(), p5.Zeroing(), z9.VnS(), 3), - "cmpls p14.s, p5/z, z9.s, #3"); - COMPARE_PREFIX(cmpls(p14.VnD(), p5.Zeroing(), z9.VnD(), 1), - "cmpls p14.d, p5/z, z9.d, #1"); + COMPARE(cmphi(p8.VnB(), p6.Zeroing(), z1.VnB(), 127), + "cmphi p8.b, p6/z, z1.b, #127"); + COMPARE(cmphi(p8.VnH(), p6.Zeroing(), z1.VnH(), 126), + "cmphi p8.h, p6/z, z1.h, #126"); + COMPARE(cmphi(p8.VnS(), p6.Zeroing(), z1.VnS(), 99), + "cmphi p8.s, p6/z, z1.s, #99"); + COMPARE(cmphi(p8.VnD(), p6.Zeroing(), z1.VnD(), 78), + "cmphi p8.d, p6/z, z1.d, #78"); + COMPARE(cmphs(p11.VnB(), p2.Zeroing(), z8.VnB(), 67), + "cmphs p11.b, p2/z, z8.b, #67"); + COMPARE(cmphs(p11.VnH(), p2.Zeroing(), z8.VnH(), 63), + "cmphs p11.h, p2/z, z8.h, #63"); + COMPARE(cmphs(p11.VnS(), p2.Zeroing(), z8.VnS(), 51), + "cmphs p11.s, p2/z, z8.s, #51"); + COMPARE(cmphs(p11.VnD(), p2.Zeroing(), z8.VnD(), 40), + "cmphs p11.d, p2/z, z8.d, #40"); + COMPARE(cmplo(p9.VnB(), p4.Zeroing(), z4.VnB(), 32), + "cmplo p9.b, p4/z, z4.b, #32"); + COMPARE(cmplo(p9.VnH(), p4.Zeroing(), z4.VnH(), 22), + "cmplo p9.h, p4/z, z4.h, #22"); + COMPARE(cmplo(p9.VnS(), p4.Zeroing(), z4.VnS(), 15), + "cmplo p9.s, p4/z, z4.s, #15"); + COMPARE(cmplo(p9.VnD(), p4.Zeroing(), z4.VnD(), 11), + "cmplo p9.d, p4/z, z4.d, #11"); + COMPARE(cmpls(p14.VnB(), p5.Zeroing(), z9.VnB(), 7), + "cmpls p14.b, p5/z, z9.b, #7"); + COMPARE(cmpls(p14.VnH(), p5.Zeroing(), z9.VnH(), 4), + "cmpls p14.h, p5/z, z9.h, #4"); + COMPARE(cmpls(p14.VnS(), p5.Zeroing(), z9.VnS(), 3), + "cmpls p14.s, p5/z, z9.s, #3"); + COMPARE(cmpls(p14.VnD(), p5.Zeroing(), z9.VnD(), 1), + "cmpls p14.d, p5/z, z9.d, #1"); CLEANUP(); } @@ -2550,146 +2620,146 @@ TEST(sve_int_compare_unsigned_imm) { TEST(sve_int_compare_vectors) { SETUP(); - COMPARE_PREFIX(cmpeq(p13.VnB(), p0.Zeroing(), z26.VnB(), z10.VnD()), - "cmpeq p13.b, p0/z, z26.b, z10.d"); - COMPARE_PREFIX(cmpeq(p13.VnH(), p0.Zeroing(), z26.VnH(), z10.VnD()), - "cmpeq p13.h, p0/z, z26.h, z10.d"); - COMPARE_PREFIX(cmpeq(p13.VnS(), p0.Zeroing(), z26.VnS(), z10.VnD()), - "cmpeq p13.s, p0/z, z26.s, z10.d"); - COMPARE_PREFIX(cmpeq(p14.VnB(), p3.Zeroing(), z18.VnB(), z15.VnB()), - "cmpeq p14.b, p3/z, z18.b, z15.b"); - COMPARE_PREFIX(cmpeq(p14.VnH(), p3.Zeroing(), z18.VnH(), z15.VnH()), - "cmpeq p14.h, p3/z, z18.h, z15.h"); - COMPARE_PREFIX(cmpeq(p14.VnS(), p3.Zeroing(), z18.VnS(), z15.VnS()), - "cmpeq p14.s, p3/z, z18.s, z15.s"); - COMPARE_PREFIX(cmpeq(p14.VnD(), p3.Zeroing(), z18.VnD(), z15.VnD()), - "cmpeq p14.d, p3/z, z18.d, z15.d"); - COMPARE_PREFIX(cmpge(p8.VnB(), p3.Zeroing(), z13.VnB(), z0.VnD()), - "cmpge p8.b, p3/z, z13.b, z0.d"); - COMPARE_PREFIX(cmpge(p8.VnH(), p3.Zeroing(), z13.VnH(), z0.VnD()), - "cmpge p8.h, p3/z, z13.h, z0.d"); - COMPARE_PREFIX(cmpge(p8.VnS(), p3.Zeroing(), z13.VnS(), z0.VnD()), - "cmpge p8.s, p3/z, z13.s, z0.d"); - COMPARE_PREFIX(cmpge(p3.VnB(), p4.Zeroing(), z6.VnB(), z1.VnB()), - "cmpge p3.b, p4/z, z6.b, z1.b"); - COMPARE_PREFIX(cmpge(p3.VnH(), p4.Zeroing(), z6.VnH(), z1.VnH()), - "cmpge p3.h, p4/z, z6.h, z1.h"); - COMPARE_PREFIX(cmpge(p3.VnS(), p4.Zeroing(), z6.VnS(), z1.VnS()), - "cmpge p3.s, p4/z, z6.s, z1.s"); - COMPARE_PREFIX(cmpge(p3.VnD(), p4.Zeroing(), z6.VnD(), z1.VnD()), - "cmpge p3.d, p4/z, z6.d, z1.d"); - COMPARE_PREFIX(cmpgt(p4.VnB(), p2.Zeroing(), z24.VnB(), z1.VnD()), - "cmpgt p4.b, p2/z, z24.b, z1.d"); - COMPARE_PREFIX(cmpgt(p4.VnH(), p2.Zeroing(), z24.VnH(), z1.VnD()), - "cmpgt p4.h, p2/z, z24.h, z1.d"); - COMPARE_PREFIX(cmpgt(p4.VnS(), p2.Zeroing(), z24.VnS(), z1.VnD()), - "cmpgt p4.s, p2/z, z24.s, z1.d"); - COMPARE_PREFIX(cmpgt(p10.VnB(), p3.Zeroing(), z23.VnB(), z19.VnB()), - "cmpgt p10.b, p3/z, z23.b, z19.b"); - COMPARE_PREFIX(cmpgt(p10.VnH(), p3.Zeroing(), z23.VnH(), z19.VnH()), - "cmpgt p10.h, p3/z, z23.h, z19.h"); - COMPARE_PREFIX(cmpgt(p10.VnS(), p3.Zeroing(), z23.VnS(), z19.VnS()), - "cmpgt p10.s, p3/z, z23.s, z19.s"); - COMPARE_PREFIX(cmpgt(p10.VnD(), p3.Zeroing(), z23.VnD(), z19.VnD()), - "cmpgt p10.d, p3/z, z23.d, z19.d"); - COMPARE_PREFIX(cmphi(p10.VnB(), p6.Zeroing(), z6.VnB(), z11.VnD()), - "cmphi p10.b, p6/z, z6.b, z11.d"); - COMPARE_PREFIX(cmphi(p10.VnH(), p6.Zeroing(), z6.VnH(), z11.VnD()), - "cmphi p10.h, p6/z, z6.h, z11.d"); - COMPARE_PREFIX(cmphi(p10.VnS(), p6.Zeroing(), z6.VnS(), z11.VnD()), - "cmphi p10.s, p6/z, z6.s, z11.d"); - COMPARE_PREFIX(cmphi(p1.VnB(), p0.Zeroing(), z4.VnB(), z2.VnB()), - "cmphi p1.b, p0/z, z4.b, z2.b"); - COMPARE_PREFIX(cmphi(p1.VnH(), p0.Zeroing(), z4.VnH(), z2.VnH()), - "cmphi p1.h, p0/z, z4.h, z2.h"); - COMPARE_PREFIX(cmphi(p1.VnS(), p0.Zeroing(), z4.VnS(), z2.VnS()), - "cmphi p1.s, p0/z, z4.s, z2.s"); - COMPARE_PREFIX(cmphi(p1.VnD(), p0.Zeroing(), z4.VnD(), z2.VnD()), - "cmphi p1.d, p0/z, z4.d, z2.d"); - COMPARE_PREFIX(cmphs(p10.VnB(), p5.Zeroing(), z22.VnB(), z5.VnD()), - "cmphs p10.b, p5/z, z22.b, z5.d"); - COMPARE_PREFIX(cmphs(p10.VnH(), p5.Zeroing(), z22.VnH(), z5.VnD()), - "cmphs p10.h, p5/z, z22.h, z5.d"); - COMPARE_PREFIX(cmphs(p10.VnS(), p5.Zeroing(), z22.VnS(), z5.VnD()), - "cmphs p10.s, p5/z, z22.s, z5.d"); - COMPARE_PREFIX(cmphs(p12.VnB(), p6.Zeroing(), z20.VnB(), z24.VnB()), - "cmphs p12.b, p6/z, z20.b, z24.b"); - COMPARE_PREFIX(cmphs(p12.VnH(), p6.Zeroing(), z20.VnH(), z24.VnH()), - "cmphs p12.h, p6/z, z20.h, z24.h"); - COMPARE_PREFIX(cmphs(p12.VnS(), p6.Zeroing(), z20.VnS(), z24.VnS()), - "cmphs p12.s, p6/z, z20.s, z24.s"); - COMPARE_PREFIX(cmphs(p12.VnD(), p6.Zeroing(), z20.VnD(), z24.VnD()), - "cmphs p12.d, p6/z, z20.d, z24.d"); - COMPARE_PREFIX(cmple(p11.VnB(), p2.Zeroing(), z18.VnB(), z0.VnD()), - "cmple p11.b, p2/z, z18.b, z0.d"); - COMPARE_PREFIX(cmple(p11.VnH(), p2.Zeroing(), z18.VnH(), z0.VnD()), - "cmple p11.h, p2/z, z18.h, z0.d"); - COMPARE_PREFIX(cmple(p11.VnS(), p2.Zeroing(), z18.VnS(), z0.VnD()), - "cmple p11.s, p2/z, z18.s, z0.d"); - COMPARE_PREFIX(cmplo(p12.VnB(), p6.Zeroing(), z21.VnB(), z10.VnD()), - "cmplo p12.b, p6/z, z21.b, z10.d"); - COMPARE_PREFIX(cmplo(p12.VnH(), p6.Zeroing(), z21.VnH(), z10.VnD()), - "cmplo p12.h, p6/z, z21.h, z10.d"); - COMPARE_PREFIX(cmplo(p12.VnS(), p6.Zeroing(), z21.VnS(), z10.VnD()), - "cmplo p12.s, p6/z, z21.s, z10.d"); - COMPARE_PREFIX(cmpls(p8.VnB(), p4.Zeroing(), z9.VnB(), z15.VnD()), - "cmpls p8.b, p4/z, z9.b, z15.d"); - COMPARE_PREFIX(cmpls(p8.VnH(), p4.Zeroing(), z9.VnH(), z15.VnD()), - "cmpls p8.h, p4/z, z9.h, z15.d"); - COMPARE_PREFIX(cmpls(p8.VnS(), p4.Zeroing(), z9.VnS(), z15.VnD()), - "cmpls p8.s, p4/z, z9.s, z15.d"); - COMPARE_PREFIX(cmplt(p6.VnB(), p6.Zeroing(), z4.VnB(), z8.VnD()), - "cmplt p6.b, p6/z, z4.b, z8.d"); - COMPARE_PREFIX(cmplt(p6.VnH(), p6.Zeroing(), z4.VnH(), z8.VnD()), - "cmplt p6.h, p6/z, z4.h, z8.d"); - COMPARE_PREFIX(cmplt(p6.VnS(), p6.Zeroing(), z4.VnS(), z8.VnD()), - "cmplt p6.s, p6/z, z4.s, z8.d"); - COMPARE_PREFIX(cmpne(p1.VnB(), p6.Zeroing(), z31.VnB(), z16.VnD()), - "cmpne p1.b, p6/z, z31.b, z16.d"); - COMPARE_PREFIX(cmpne(p1.VnH(), p6.Zeroing(), z31.VnH(), z16.VnD()), - "cmpne p1.h, p6/z, z31.h, z16.d"); - COMPARE_PREFIX(cmpne(p1.VnS(), p6.Zeroing(), z31.VnS(), z16.VnD()), - "cmpne p1.s, p6/z, z31.s, z16.d"); - COMPARE_PREFIX(cmpne(p11.VnB(), p1.Zeroing(), z3.VnB(), z24.VnB()), - "cmpne p11.b, p1/z, z3.b, z24.b"); - COMPARE_PREFIX(cmpne(p11.VnH(), p1.Zeroing(), z3.VnH(), z24.VnH()), - "cmpne p11.h, p1/z, z3.h, z24.h"); - COMPARE_PREFIX(cmpne(p11.VnS(), p1.Zeroing(), z3.VnS(), z24.VnS()), - "cmpne p11.s, p1/z, z3.s, z24.s"); - COMPARE_PREFIX(cmpne(p11.VnD(), p1.Zeroing(), z3.VnD(), z24.VnD()), - "cmpne p11.d, p1/z, z3.d, z24.d"); - COMPARE_PREFIX(cmpls(p8.VnB(), p4.Zeroing(), z9.VnB(), z15.VnB()), - "cmphs p8.b, p4/z, z15.b, z9.b"); - COMPARE_PREFIX(cmpls(p8.VnH(), p4.Zeroing(), z9.VnH(), z15.VnH()), - "cmphs p8.h, p4/z, z15.h, z9.h"); - COMPARE_PREFIX(cmpls(p8.VnS(), p4.Zeroing(), z9.VnS(), z15.VnS()), - "cmphs p8.s, p4/z, z15.s, z9.s"); - COMPARE_PREFIX(cmpls(p8.VnD(), p4.Zeroing(), z9.VnD(), z15.VnD()), - "cmphs p8.d, p4/z, z15.d, z9.d"); - COMPARE_PREFIX(cmplo(p10.VnB(), p3.Zeroing(), z14.VnB(), z20.VnB()), - "cmphi p10.b, p3/z, z20.b, z14.b"); - COMPARE_PREFIX(cmplo(p10.VnH(), p3.Zeroing(), z14.VnH(), z20.VnH()), - "cmphi p10.h, p3/z, z20.h, z14.h"); - COMPARE_PREFIX(cmplo(p10.VnS(), p3.Zeroing(), z14.VnS(), z20.VnS()), - "cmphi p10.s, p3/z, z20.s, z14.s"); - COMPARE_PREFIX(cmplo(p10.VnD(), p3.Zeroing(), z14.VnD(), z20.VnD()), - "cmphi p10.d, p3/z, z20.d, z14.d"); - COMPARE_PREFIX(cmple(p12.VnB(), p2.Zeroing(), z19.VnB(), z25.VnB()), - "cmpge p12.b, p2/z, z25.b, z19.b"); - COMPARE_PREFIX(cmple(p12.VnH(), p2.Zeroing(), z19.VnH(), z25.VnH()), - "cmpge p12.h, p2/z, z25.h, z19.h"); - COMPARE_PREFIX(cmple(p12.VnS(), p2.Zeroing(), z19.VnS(), z25.VnS()), - "cmpge p12.s, p2/z, z25.s, z19.s"); - COMPARE_PREFIX(cmple(p12.VnD(), p2.Zeroing(), z19.VnD(), z25.VnD()), - "cmpge p12.d, p2/z, z25.d, z19.d"); - COMPARE_PREFIX(cmplt(p14.VnB(), p1.Zeroing(), z24.VnB(), z30.VnB()), - "cmpgt p14.b, p1/z, z30.b, z24.b"); - COMPARE_PREFIX(cmplt(p14.VnH(), p1.Zeroing(), z24.VnH(), z30.VnH()), - "cmpgt p14.h, p1/z, z30.h, z24.h"); - COMPARE_PREFIX(cmplt(p14.VnS(), p1.Zeroing(), z24.VnS(), z30.VnS()), - "cmpgt p14.s, p1/z, z30.s, z24.s"); - COMPARE_PREFIX(cmplt(p14.VnD(), p1.Zeroing(), z24.VnD(), z30.VnD()), - "cmpgt p14.d, p1/z, z30.d, z24.d"); + COMPARE(cmpeq(p13.VnB(), p0.Zeroing(), z26.VnB(), z10.VnD()), + "cmpeq p13.b, p0/z, z26.b, z10.d"); + COMPARE(cmpeq(p13.VnH(), p0.Zeroing(), z26.VnH(), z10.VnD()), + "cmpeq p13.h, p0/z, z26.h, z10.d"); + COMPARE(cmpeq(p13.VnS(), p0.Zeroing(), z26.VnS(), z10.VnD()), + "cmpeq p13.s, p0/z, z26.s, z10.d"); + COMPARE(cmpeq(p14.VnB(), p3.Zeroing(), z18.VnB(), z15.VnB()), + "cmpeq p14.b, p3/z, z18.b, z15.b"); + COMPARE(cmpeq(p14.VnH(), p3.Zeroing(), z18.VnH(), z15.VnH()), + "cmpeq p14.h, p3/z, z18.h, z15.h"); + COMPARE(cmpeq(p14.VnS(), p3.Zeroing(), z18.VnS(), z15.VnS()), + "cmpeq p14.s, p3/z, z18.s, z15.s"); + COMPARE(cmpeq(p14.VnD(), p3.Zeroing(), z18.VnD(), z15.VnD()), + "cmpeq p14.d, p3/z, z18.d, z15.d"); + COMPARE(cmpge(p8.VnB(), p3.Zeroing(), z13.VnB(), z0.VnD()), + "cmpge p8.b, p3/z, z13.b, z0.d"); + COMPARE(cmpge(p8.VnH(), p3.Zeroing(), z13.VnH(), z0.VnD()), + "cmpge p8.h, p3/z, z13.h, z0.d"); + COMPARE(cmpge(p8.VnS(), p3.Zeroing(), z13.VnS(), z0.VnD()), + "cmpge p8.s, p3/z, z13.s, z0.d"); + COMPARE(cmpge(p3.VnB(), p4.Zeroing(), z6.VnB(), z1.VnB()), + "cmpge p3.b, p4/z, z6.b, z1.b"); + COMPARE(cmpge(p3.VnH(), p4.Zeroing(), z6.VnH(), z1.VnH()), + "cmpge p3.h, p4/z, z6.h, z1.h"); + COMPARE(cmpge(p3.VnS(), p4.Zeroing(), z6.VnS(), z1.VnS()), + "cmpge p3.s, p4/z, z6.s, z1.s"); + COMPARE(cmpge(p3.VnD(), p4.Zeroing(), z6.VnD(), z1.VnD()), + "cmpge p3.d, p4/z, z6.d, z1.d"); + COMPARE(cmpgt(p4.VnB(), p2.Zeroing(), z24.VnB(), z1.VnD()), + "cmpgt p4.b, p2/z, z24.b, z1.d"); + COMPARE(cmpgt(p4.VnH(), p2.Zeroing(), z24.VnH(), z1.VnD()), + "cmpgt p4.h, p2/z, z24.h, z1.d"); + COMPARE(cmpgt(p4.VnS(), p2.Zeroing(), z24.VnS(), z1.VnD()), + "cmpgt p4.s, p2/z, z24.s, z1.d"); + COMPARE(cmpgt(p10.VnB(), p3.Zeroing(), z23.VnB(), z19.VnB()), + "cmpgt p10.b, p3/z, z23.b, z19.b"); + COMPARE(cmpgt(p10.VnH(), p3.Zeroing(), z23.VnH(), z19.VnH()), + "cmpgt p10.h, p3/z, z23.h, z19.h"); + COMPARE(cmpgt(p10.VnS(), p3.Zeroing(), z23.VnS(), z19.VnS()), + "cmpgt p10.s, p3/z, z23.s, z19.s"); + COMPARE(cmpgt(p10.VnD(), p3.Zeroing(), z23.VnD(), z19.VnD()), + "cmpgt p10.d, p3/z, z23.d, z19.d"); + COMPARE(cmphi(p10.VnB(), p6.Zeroing(), z6.VnB(), z11.VnD()), + "cmphi p10.b, p6/z, z6.b, z11.d"); + COMPARE(cmphi(p10.VnH(), p6.Zeroing(), z6.VnH(), z11.VnD()), + "cmphi p10.h, p6/z, z6.h, z11.d"); + COMPARE(cmphi(p10.VnS(), p6.Zeroing(), z6.VnS(), z11.VnD()), + "cmphi p10.s, p6/z, z6.s, z11.d"); + COMPARE(cmphi(p1.VnB(), p0.Zeroing(), z4.VnB(), z2.VnB()), + "cmphi p1.b, p0/z, z4.b, z2.b"); + COMPARE(cmphi(p1.VnH(), p0.Zeroing(), z4.VnH(), z2.VnH()), + "cmphi p1.h, p0/z, z4.h, z2.h"); + COMPARE(cmphi(p1.VnS(), p0.Zeroing(), z4.VnS(), z2.VnS()), + "cmphi p1.s, p0/z, z4.s, z2.s"); + COMPARE(cmphi(p1.VnD(), p0.Zeroing(), z4.VnD(), z2.VnD()), + "cmphi p1.d, p0/z, z4.d, z2.d"); + COMPARE(cmphs(p10.VnB(), p5.Zeroing(), z22.VnB(), z5.VnD()), + "cmphs p10.b, p5/z, z22.b, z5.d"); + COMPARE(cmphs(p10.VnH(), p5.Zeroing(), z22.VnH(), z5.VnD()), + "cmphs p10.h, p5/z, z22.h, z5.d"); + COMPARE(cmphs(p10.VnS(), p5.Zeroing(), z22.VnS(), z5.VnD()), + "cmphs p10.s, p5/z, z22.s, z5.d"); + COMPARE(cmphs(p12.VnB(), p6.Zeroing(), z20.VnB(), z24.VnB()), + "cmphs p12.b, p6/z, z20.b, z24.b"); + COMPARE(cmphs(p12.VnH(), p6.Zeroing(), z20.VnH(), z24.VnH()), + "cmphs p12.h, p6/z, z20.h, z24.h"); + COMPARE(cmphs(p12.VnS(), p6.Zeroing(), z20.VnS(), z24.VnS()), + "cmphs p12.s, p6/z, z20.s, z24.s"); + COMPARE(cmphs(p12.VnD(), p6.Zeroing(), z20.VnD(), z24.VnD()), + "cmphs p12.d, p6/z, z20.d, z24.d"); + COMPARE(cmple(p11.VnB(), p2.Zeroing(), z18.VnB(), z0.VnD()), + "cmple p11.b, p2/z, z18.b, z0.d"); + COMPARE(cmple(p11.VnH(), p2.Zeroing(), z18.VnH(), z0.VnD()), + "cmple p11.h, p2/z, z18.h, z0.d"); + COMPARE(cmple(p11.VnS(), p2.Zeroing(), z18.VnS(), z0.VnD()), + "cmple p11.s, p2/z, z18.s, z0.d"); + COMPARE(cmplo(p12.VnB(), p6.Zeroing(), z21.VnB(), z10.VnD()), + "cmplo p12.b, p6/z, z21.b, z10.d"); + COMPARE(cmplo(p12.VnH(), p6.Zeroing(), z21.VnH(), z10.VnD()), + "cmplo p12.h, p6/z, z21.h, z10.d"); + COMPARE(cmplo(p12.VnS(), p6.Zeroing(), z21.VnS(), z10.VnD()), + "cmplo p12.s, p6/z, z21.s, z10.d"); + COMPARE(cmpls(p8.VnB(), p4.Zeroing(), z9.VnB(), z15.VnD()), + "cmpls p8.b, p4/z, z9.b, z15.d"); + COMPARE(cmpls(p8.VnH(), p4.Zeroing(), z9.VnH(), z15.VnD()), + "cmpls p8.h, p4/z, z9.h, z15.d"); + COMPARE(cmpls(p8.VnS(), p4.Zeroing(), z9.VnS(), z15.VnD()), + "cmpls p8.s, p4/z, z9.s, z15.d"); + COMPARE(cmplt(p6.VnB(), p6.Zeroing(), z4.VnB(), z8.VnD()), + "cmplt p6.b, p6/z, z4.b, z8.d"); + COMPARE(cmplt(p6.VnH(), p6.Zeroing(), z4.VnH(), z8.VnD()), + "cmplt p6.h, p6/z, z4.h, z8.d"); + COMPARE(cmplt(p6.VnS(), p6.Zeroing(), z4.VnS(), z8.VnD()), + "cmplt p6.s, p6/z, z4.s, z8.d"); + COMPARE(cmpne(p1.VnB(), p6.Zeroing(), z31.VnB(), z16.VnD()), + "cmpne p1.b, p6/z, z31.b, z16.d"); + COMPARE(cmpne(p1.VnH(), p6.Zeroing(), z31.VnH(), z16.VnD()), + "cmpne p1.h, p6/z, z31.h, z16.d"); + COMPARE(cmpne(p1.VnS(), p6.Zeroing(), z31.VnS(), z16.VnD()), + "cmpne p1.s, p6/z, z31.s, z16.d"); + COMPARE(cmpne(p11.VnB(), p1.Zeroing(), z3.VnB(), z24.VnB()), + "cmpne p11.b, p1/z, z3.b, z24.b"); + COMPARE(cmpne(p11.VnH(), p1.Zeroing(), z3.VnH(), z24.VnH()), + "cmpne p11.h, p1/z, z3.h, z24.h"); + COMPARE(cmpne(p11.VnS(), p1.Zeroing(), z3.VnS(), z24.VnS()), + "cmpne p11.s, p1/z, z3.s, z24.s"); + COMPARE(cmpne(p11.VnD(), p1.Zeroing(), z3.VnD(), z24.VnD()), + "cmpne p11.d, p1/z, z3.d, z24.d"); + COMPARE(cmpls(p8.VnB(), p4.Zeroing(), z9.VnB(), z15.VnB()), + "cmphs p8.b, p4/z, z15.b, z9.b"); + COMPARE(cmpls(p8.VnH(), p4.Zeroing(), z9.VnH(), z15.VnH()), + "cmphs p8.h, p4/z, z15.h, z9.h"); + COMPARE(cmpls(p8.VnS(), p4.Zeroing(), z9.VnS(), z15.VnS()), + "cmphs p8.s, p4/z, z15.s, z9.s"); + COMPARE(cmpls(p8.VnD(), p4.Zeroing(), z9.VnD(), z15.VnD()), + "cmphs p8.d, p4/z, z15.d, z9.d"); + COMPARE(cmplo(p10.VnB(), p3.Zeroing(), z14.VnB(), z20.VnB()), + "cmphi p10.b, p3/z, z20.b, z14.b"); + COMPARE(cmplo(p10.VnH(), p3.Zeroing(), z14.VnH(), z20.VnH()), + "cmphi p10.h, p3/z, z20.h, z14.h"); + COMPARE(cmplo(p10.VnS(), p3.Zeroing(), z14.VnS(), z20.VnS()), + "cmphi p10.s, p3/z, z20.s, z14.s"); + COMPARE(cmplo(p10.VnD(), p3.Zeroing(), z14.VnD(), z20.VnD()), + "cmphi p10.d, p3/z, z20.d, z14.d"); + COMPARE(cmple(p12.VnB(), p2.Zeroing(), z19.VnB(), z25.VnB()), + "cmpge p12.b, p2/z, z25.b, z19.b"); + COMPARE(cmple(p12.VnH(), p2.Zeroing(), z19.VnH(), z25.VnH()), + "cmpge p12.h, p2/z, z25.h, z19.h"); + COMPARE(cmple(p12.VnS(), p2.Zeroing(), z19.VnS(), z25.VnS()), + "cmpge p12.s, p2/z, z25.s, z19.s"); + COMPARE(cmple(p12.VnD(), p2.Zeroing(), z19.VnD(), z25.VnD()), + "cmpge p12.d, p2/z, z25.d, z19.d"); + COMPARE(cmplt(p14.VnB(), p1.Zeroing(), z24.VnB(), z30.VnB()), + "cmpgt p14.b, p1/z, z30.b, z24.b"); + COMPARE(cmplt(p14.VnH(), p1.Zeroing(), z24.VnH(), z30.VnH()), + "cmpgt p14.h, p1/z, z30.h, z24.h"); + COMPARE(cmplt(p14.VnS(), p1.Zeroing(), z24.VnS(), z30.VnS()), + "cmpgt p14.s, p1/z, z30.s, z24.s"); + COMPARE(cmplt(p14.VnD(), p1.Zeroing(), z24.VnD(), z30.VnD()), + "cmpgt p14.d, p1/z, z30.d, z24.d"); CLEANUP(); } @@ -2697,16 +2767,16 @@ TEST(sve_int_compare_vectors) { TEST(sve_int_misc_unpredicated) { SETUP(); - COMPARE_PREFIX(fexpa(z5.VnH(), z9.VnH()), "fexpa z5.h, z9.h"); - COMPARE_PREFIX(fexpa(z5.VnS(), z9.VnS()), "fexpa z5.s, z9.s"); - COMPARE_PREFIX(fexpa(z5.VnD(), z9.VnD()), "fexpa z5.d, z9.d"); - COMPARE_PREFIX(ftssel(z17.VnH(), z24.VnH(), z14.VnH()), - "ftssel z17.h, z24.h, z14.h"); - COMPARE_PREFIX(ftssel(z17.VnS(), z24.VnS(), z14.VnS()), - "ftssel z17.s, z24.s, z14.s"); - COMPARE_PREFIX(ftssel(z17.VnD(), z24.VnD(), z14.VnD()), - "ftssel z17.d, z24.d, z14.d"); - COMPARE_PREFIX(movprfx(z24, z1), "movprfx z24, z1"); + COMPARE(fexpa(z5.VnH(), z9.VnH()), "fexpa z5.h, z9.h"); + COMPARE(fexpa(z5.VnS(), z9.VnS()), "fexpa z5.s, z9.s"); + COMPARE(fexpa(z5.VnD(), z9.VnD()), "fexpa z5.d, z9.d"); + COMPARE(ftssel(z17.VnH(), z24.VnH(), z14.VnH()), + "ftssel z17.h, z24.h, z14.h"); + COMPARE(ftssel(z17.VnS(), z24.VnS(), z14.VnS()), + "ftssel z17.s, z24.s, z14.s"); + COMPARE(ftssel(z17.VnD(), z24.VnD(), z14.VnD()), + "ftssel z17.d, z24.d, z14.d"); + COMPARE(movprfx(z24, z1), "movprfx z24, z1"); CLEANUP(); } @@ -2714,38 +2784,38 @@ TEST(sve_int_misc_unpredicated) { TEST(sve_int_mul_add_predicated) { SETUP(); - COMPARE_PREFIX(mad(z29.VnB(), p6.Merging(), z22.VnB(), z21.VnB()), - "mad z29.b, p6/m, z22.b, z21.b"); - COMPARE_PREFIX(mad(z29.VnH(), p6.Merging(), z22.VnH(), z21.VnH()), - "mad z29.h, p6/m, z22.h, z21.h"); - COMPARE_PREFIX(mad(z29.VnS(), p6.Merging(), z22.VnS(), z21.VnS()), - "mad z29.s, p6/m, z22.s, z21.s"); - COMPARE_PREFIX(mad(z29.VnD(), p6.Merging(), z22.VnD(), z21.VnD()), - "mad z29.d, p6/m, z22.d, z21.d"); - COMPARE_PREFIX(mla(z23.VnB(), p1.Merging(), z21.VnB(), z23.VnB()), - "mla z23.b, p1/m, z21.b, z23.b"); - COMPARE_PREFIX(mla(z23.VnH(), p1.Merging(), z21.VnH(), z23.VnH()), - "mla z23.h, p1/m, z21.h, z23.h"); - COMPARE_PREFIX(mla(z23.VnS(), p1.Merging(), z21.VnS(), z23.VnS()), - "mla z23.s, p1/m, z21.s, z23.s"); - COMPARE_PREFIX(mla(z23.VnD(), p1.Merging(), z21.VnD(), z23.VnD()), - "mla z23.d, p1/m, z21.d, z23.d"); - COMPARE_PREFIX(mls(z4.VnB(), p6.Merging(), z17.VnB(), z28.VnB()), - "mls z4.b, p6/m, z17.b, z28.b"); - COMPARE_PREFIX(mls(z4.VnH(), p6.Merging(), z17.VnH(), z28.VnH()), - "mls z4.h, p6/m, z17.h, z28.h"); - COMPARE_PREFIX(mls(z4.VnS(), p6.Merging(), z17.VnS(), z28.VnS()), - "mls z4.s, p6/m, z17.s, z28.s"); - COMPARE_PREFIX(mls(z4.VnD(), p6.Merging(), z17.VnD(), z28.VnD()), - "mls z4.d, p6/m, z17.d, z28.d"); - COMPARE_PREFIX(msb(z27.VnB(), p7.Merging(), z29.VnB(), z1.VnB()), - "msb z27.b, p7/m, z29.b, z1.b"); - COMPARE_PREFIX(msb(z27.VnH(), p7.Merging(), z29.VnH(), z1.VnH()), - "msb z27.h, p7/m, z29.h, z1.h"); - COMPARE_PREFIX(msb(z27.VnS(), p7.Merging(), z29.VnS(), z1.VnS()), - "msb z27.s, p7/m, z29.s, z1.s"); - COMPARE_PREFIX(msb(z27.VnD(), p7.Merging(), z29.VnD(), z1.VnD()), - "msb z27.d, p7/m, z29.d, z1.d"); + COMPARE(mad(z29.VnB(), p6.Merging(), z22.VnB(), z21.VnB()), + "mad z29.b, p6/m, z22.b, z21.b"); + COMPARE(mad(z29.VnH(), p6.Merging(), z22.VnH(), z21.VnH()), + "mad z29.h, p6/m, z22.h, z21.h"); + COMPARE(mad(z29.VnS(), p6.Merging(), z22.VnS(), z21.VnS()), + "mad z29.s, p6/m, z22.s, z21.s"); + COMPARE(mad(z29.VnD(), p6.Merging(), z22.VnD(), z21.VnD()), + "mad z29.d, p6/m, z22.d, z21.d"); + COMPARE(mla(z23.VnB(), p1.Merging(), z21.VnB(), z23.VnB()), + "mla z23.b, p1/m, z21.b, z23.b"); + COMPARE(mla(z23.VnH(), p1.Merging(), z21.VnH(), z23.VnH()), + "mla z23.h, p1/m, z21.h, z23.h"); + COMPARE(mla(z23.VnS(), p1.Merging(), z21.VnS(), z23.VnS()), + "mla z23.s, p1/m, z21.s, z23.s"); + COMPARE(mla(z23.VnD(), p1.Merging(), z21.VnD(), z23.VnD()), + "mla z23.d, p1/m, z21.d, z23.d"); + COMPARE(mls(z4.VnB(), p6.Merging(), z17.VnB(), z28.VnB()), + "mls z4.b, p6/m, z17.b, z28.b"); + COMPARE(mls(z4.VnH(), p6.Merging(), z17.VnH(), z28.VnH()), + "mls z4.h, p6/m, z17.h, z28.h"); + COMPARE(mls(z4.VnS(), p6.Merging(), z17.VnS(), z28.VnS()), + "mls z4.s, p6/m, z17.s, z28.s"); + COMPARE(mls(z4.VnD(), p6.Merging(), z17.VnD(), z28.VnD()), + "mls z4.d, p6/m, z17.d, z28.d"); + COMPARE(msb(z27.VnB(), p7.Merging(), z29.VnB(), z1.VnB()), + "msb z27.b, p7/m, z29.b, z1.b"); + COMPARE(msb(z27.VnH(), p7.Merging(), z29.VnH(), z1.VnH()), + "msb z27.h, p7/m, z29.h, z1.h"); + COMPARE(msb(z27.VnS(), p7.Merging(), z29.VnS(), z1.VnS()), + "msb z27.s, p7/m, z29.s, z1.s"); + COMPARE(msb(z27.VnD(), p7.Merging(), z29.VnD(), z1.VnD()), + "msb z27.d, p7/m, z29.d, z1.d"); CLEANUP(); } @@ -2779,14 +2849,10 @@ TEST(sve_int_mul_add_predicated_macro) { TEST(sve_int_mul_add_unpredicated) { SETUP(); - COMPARE_PREFIX(sdot(z13.VnS(), z12.VnB(), z12.VnB()), - "sdot z13.s, z12.b, z12.b"); - COMPARE_PREFIX(sdot(z18.VnD(), z27.VnH(), z22.VnH()), - "sdot z18.d, z27.h, z22.h"); - COMPARE_PREFIX(udot(z23.VnS(), z22.VnB(), z11.VnB()), - "udot z23.s, z22.b, z11.b"); - COMPARE_PREFIX(udot(z21.VnD(), z27.VnH(), z27.VnH()), - "udot z21.d, z27.h, z27.h"); + COMPARE(sdot(z13.VnS(), z12.VnB(), z12.VnB()), "sdot z13.s, z12.b, z12.b"); + COMPARE(sdot(z18.VnD(), z27.VnH(), z22.VnH()), "sdot z18.d, z27.h, z22.h"); + COMPARE(udot(z23.VnS(), z22.VnB(), z11.VnB()), "udot z23.s, z22.b, z11.b"); + COMPARE(udot(z21.VnD(), z27.VnH(), z27.VnH()), "udot z21.d, z27.h, z27.h"); CLEANUP(); } @@ -2831,49 +2897,49 @@ TEST(sve_int_mul_add_unpredicated_macro) { TEST(sve_int_reduction) { SETUP(); - COMPARE_PREFIX(andv(b15, p1, z4.VnB()), "andv b15, p1, z4.b"); - COMPARE_PREFIX(andv(h14, p2, z3.VnH()), "andv h14, p2, z3.h"); - COMPARE_PREFIX(andv(s13, p3, z2.VnS()), "andv s13, p3, z2.s"); - COMPARE_PREFIX(andv(d12, p4, z1.VnD()), "andv d12, p4, z1.d"); - COMPARE_PREFIX(eorv(b12, p0, z30.VnB()), "eorv b12, p0, z30.b"); - COMPARE_PREFIX(eorv(h11, p1, z29.VnH()), "eorv h11, p1, z29.h"); - COMPARE_PREFIX(eorv(s10, p2, z28.VnS()), "eorv s10, p2, z28.s"); - COMPARE_PREFIX(eorv(d9, p3, z27.VnD()), "eorv d9, p3, z27.d"); - COMPARE_PREFIX(movprfx(z30.VnB(), p2.Zeroing(), z23.VnB()), - "movprfx z30.b, p2/z, z23.b"); - COMPARE_PREFIX(movprfx(z10.VnH(), p0.Merging(), z10.VnH()), - "movprfx z10.h, p0/m, z10.h"); - COMPARE_PREFIX(movprfx(z0.VnS(), p2.Zeroing(), z23.VnS()), - "movprfx z0.s, p2/z, z23.s"); - COMPARE_PREFIX(movprfx(z31.VnD(), p7.Merging(), z23.VnD()), - "movprfx z31.d, p7/m, z23.d"); - COMPARE_PREFIX(orv(b4, p0, z16.VnB()), "orv b4, p0, z16.b"); - COMPARE_PREFIX(orv(h6, p2, z18.VnH()), "orv h6, p2, z18.h"); - COMPARE_PREFIX(orv(s8, p4, z20.VnS()), "orv s8, p4, z20.s"); - COMPARE_PREFIX(orv(d10, p6, z22.VnD()), "orv d10, p6, z22.d"); - COMPARE_PREFIX(saddv(d20, p1, z12.VnB()), "saddv d20, p1, z12.b"); - COMPARE_PREFIX(saddv(d22, p3, z15.VnH()), "saddv d22, p3, z15.h"); - COMPARE_PREFIX(saddv(d24, p5, z18.VnS()), "saddv d24, p5, z18.s"); - COMPARE_PREFIX(smaxv(b9, p3, z1.VnB()), "smaxv b9, p3, z1.b"); - COMPARE_PREFIX(smaxv(h19, p2, z1.VnH()), "smaxv h19, p2, z1.h"); - COMPARE_PREFIX(smaxv(s29, p1, z1.VnS()), "smaxv s29, p1, z1.s"); - COMPARE_PREFIX(smaxv(d9, p0, z1.VnD()), "smaxv d9, p0, z1.d"); - COMPARE_PREFIX(sminv(b8, p3, z14.VnB()), "sminv b8, p3, z14.b"); - COMPARE_PREFIX(sminv(h18, p2, z4.VnH()), "sminv h18, p2, z4.h"); - COMPARE_PREFIX(sminv(s28, p1, z4.VnS()), "sminv s28, p1, z4.s"); - COMPARE_PREFIX(sminv(d8, p0, z24.VnD()), "sminv d8, p0, z24.d"); - COMPARE_PREFIX(uaddv(d13, p0, z15.VnB()), "uaddv d13, p0, z15.b"); - COMPARE_PREFIX(uaddv(d15, p2, z20.VnH()), "uaddv d15, p2, z20.h"); - COMPARE_PREFIX(uaddv(d17, p4, z25.VnS()), "uaddv d17, p4, z25.s"); - COMPARE_PREFIX(uaddv(d19, p6, z30.VnD()), "uaddv d19, p6, z30.d"); - COMPARE_PREFIX(umaxv(b28, p3, z4.VnB()), "umaxv b28, p3, z4.b"); - COMPARE_PREFIX(umaxv(h18, p6, z2.VnH()), "umaxv h18, p6, z2.h"); - COMPARE_PREFIX(umaxv(s18, p4, z29.VnS()), "umaxv s18, p4, z29.s"); - COMPARE_PREFIX(umaxv(d28, p1, z24.VnD()), "umaxv d28, p1, z24.d"); - COMPARE_PREFIX(uminv(b16, p3, z0.VnB()), "uminv b16, p3, z0.b"); - COMPARE_PREFIX(uminv(h16, p2, z3.VnH()), "uminv h16, p2, z3.h"); - COMPARE_PREFIX(uminv(s16, p1, z5.VnS()), "uminv s16, p1, z5.s"); - COMPARE_PREFIX(uminv(d16, p7, z7.VnD()), "uminv d16, p7, z7.d"); + COMPARE(andv(b15, p1, z4.VnB()), "andv b15, p1, z4.b"); + COMPARE(andv(h14, p2, z3.VnH()), "andv h14, p2, z3.h"); + COMPARE(andv(s13, p3, z2.VnS()), "andv s13, p3, z2.s"); + COMPARE(andv(d12, p4, z1.VnD()), "andv d12, p4, z1.d"); + COMPARE(eorv(b12, p0, z30.VnB()), "eorv b12, p0, z30.b"); + COMPARE(eorv(h11, p1, z29.VnH()), "eorv h11, p1, z29.h"); + COMPARE(eorv(s10, p2, z28.VnS()), "eorv s10, p2, z28.s"); + COMPARE(eorv(d9, p3, z27.VnD()), "eorv d9, p3, z27.d"); + COMPARE(movprfx(z30.VnB(), p2.Zeroing(), z23.VnB()), + "movprfx z30.b, p2/z, z23.b"); + COMPARE(movprfx(z10.VnH(), p0.Merging(), z10.VnH()), + "movprfx z10.h, p0/m, z10.h"); + COMPARE(movprfx(z0.VnS(), p2.Zeroing(), z23.VnS()), + "movprfx z0.s, p2/z, z23.s"); + COMPARE(movprfx(z31.VnD(), p7.Merging(), z23.VnD()), + "movprfx z31.d, p7/m, z23.d"); + COMPARE(orv(b4, p0, z16.VnB()), "orv b4, p0, z16.b"); + COMPARE(orv(h6, p2, z18.VnH()), "orv h6, p2, z18.h"); + COMPARE(orv(s8, p4, z20.VnS()), "orv s8, p4, z20.s"); + COMPARE(orv(d10, p6, z22.VnD()), "orv d10, p6, z22.d"); + COMPARE(saddv(d20, p1, z12.VnB()), "saddv d20, p1, z12.b"); + COMPARE(saddv(d22, p3, z15.VnH()), "saddv d22, p3, z15.h"); + COMPARE(saddv(d24, p5, z18.VnS()), "saddv d24, p5, z18.s"); + COMPARE(smaxv(b9, p3, z1.VnB()), "smaxv b9, p3, z1.b"); + COMPARE(smaxv(h19, p2, z1.VnH()), "smaxv h19, p2, z1.h"); + COMPARE(smaxv(s29, p1, z1.VnS()), "smaxv s29, p1, z1.s"); + COMPARE(smaxv(d9, p0, z1.VnD()), "smaxv d9, p0, z1.d"); + COMPARE(sminv(b8, p3, z14.VnB()), "sminv b8, p3, z14.b"); + COMPARE(sminv(h18, p2, z4.VnH()), "sminv h18, p2, z4.h"); + COMPARE(sminv(s28, p1, z4.VnS()), "sminv s28, p1, z4.s"); + COMPARE(sminv(d8, p0, z24.VnD()), "sminv d8, p0, z24.d"); + COMPARE(uaddv(d13, p0, z15.VnB()), "uaddv d13, p0, z15.b"); + COMPARE(uaddv(d15, p2, z20.VnH()), "uaddv d15, p2, z20.h"); + COMPARE(uaddv(d17, p4, z25.VnS()), "uaddv d17, p4, z25.s"); + COMPARE(uaddv(d19, p6, z30.VnD()), "uaddv d19, p6, z30.d"); + COMPARE(umaxv(b28, p3, z4.VnB()), "umaxv b28, p3, z4.b"); + COMPARE(umaxv(h18, p6, z2.VnH()), "umaxv h18, p6, z2.h"); + COMPARE(umaxv(s18, p4, z29.VnS()), "umaxv s18, p4, z29.s"); + COMPARE(umaxv(d28, p1, z24.VnD()), "umaxv d28, p1, z24.d"); + COMPARE(uminv(b16, p3, z0.VnB()), "uminv b16, p3, z0.b"); + COMPARE(uminv(h16, p2, z3.VnH()), "uminv h16, p2, z3.h"); + COMPARE(uminv(s16, p1, z5.VnS()), "uminv s16, p1, z5.s"); + COMPARE(uminv(d16, p7, z7.VnD()), "uminv d16, p7, z7.d"); CLEANUP(); } @@ -2881,97 +2947,70 @@ TEST(sve_int_reduction) { TEST(sve_int_unary_arithmetic_predicated) { SETUP(); - COMPARE_PREFIX(abs(z5.VnB(), p5.Merging(), z31.VnB()), - "abs z5.b, p5/m, z31.b"); - COMPARE_PREFIX(abs(z29.VnH(), p5.Merging(), z17.VnH()), - "abs z29.h, p5/m, z17.h"); - COMPARE_PREFIX(abs(z6.VnS(), p4.Merging(), z24.VnS()), - "abs z6.s, p4/m, z24.s"); - COMPARE_PREFIX(abs(z19.VnD(), p3.Merging(), z25.VnD()), - "abs z19.d, p3/m, z25.d"); - COMPARE_PREFIX(cls(z4.VnB(), p0.Merging(), z20.VnB()), - "cls z4.b, p0/m, z20.b"); - COMPARE_PREFIX(cls(z11.VnH(), p0.Merging(), z26.VnH()), - "cls z11.h, p0/m, z26.h"); - COMPARE_PREFIX(cls(z10.VnS(), p1.Merging(), z10.VnS()), - "cls z10.s, p1/m, z10.s"); - COMPARE_PREFIX(cls(z5.VnD(), p1.Merging(), z4.VnD()), "cls z5.d, p1/m, z4.d"); - COMPARE_PREFIX(clz(z18.VnB(), p3.Merging(), z1.VnB()), - "clz z18.b, p3/m, z1.b"); - COMPARE_PREFIX(clz(z13.VnH(), p4.Merging(), z18.VnH()), - "clz z13.h, p4/m, z18.h"); - COMPARE_PREFIX(clz(z15.VnS(), p4.Merging(), z24.VnS()), - "clz z15.s, p4/m, z24.s"); - COMPARE_PREFIX(clz(z29.VnD(), p2.Merging(), z22.VnD()), - "clz z29.d, p2/m, z22.d"); - COMPARE_PREFIX(cnot(z16.VnB(), p6.Merging(), z20.VnB()), - "cnot z16.b, p6/m, z20.b"); - COMPARE_PREFIX(cnot(z10.VnH(), p5.Merging(), z12.VnH()), - "cnot z10.h, p5/m, z12.h"); - COMPARE_PREFIX(cnot(z8.VnS(), p5.Merging(), z21.VnS()), - "cnot z8.s, p5/m, z21.s"); - COMPARE_PREFIX(cnot(z3.VnD(), p3.Merging(), z18.VnD()), - "cnot z3.d, p3/m, z18.d"); - COMPARE_PREFIX(cnt(z29.VnB(), p3.Merging(), z7.VnB()), - "cnt z29.b, p3/m, z7.b"); - COMPARE_PREFIX(cnt(z3.VnH(), p6.Merging(), z31.VnH()), - "cnt z3.h, p6/m, z31.h"); - COMPARE_PREFIX(cnt(z2.VnS(), p4.Merging(), z16.VnS()), - "cnt z2.s, p4/m, z16.s"); - COMPARE_PREFIX(cnt(z0.VnD(), p0.Merging(), z24.VnD()), - "cnt z0.d, p0/m, z24.d"); - COMPARE_PREFIX(fabs(z17.VnH(), p7.Merging(), z15.VnH()), - "fabs z17.h, p7/m, z15.h"); - COMPARE_PREFIX(fabs(z18.VnS(), p0.Merging(), z29.VnS()), - "fabs z18.s, p0/m, z29.s"); - COMPARE_PREFIX(fabs(z17.VnD(), p1.Merging(), z9.VnD()), - "fabs z17.d, p1/m, z9.d"); - COMPARE_PREFIX(fneg(z25.VnH(), p1.Merging(), z28.VnH()), - "fneg z25.h, p1/m, z28.h"); - COMPARE_PREFIX(fneg(z5.VnS(), p1.Merging(), z25.VnS()), - "fneg z5.s, p1/m, z25.s"); - COMPARE_PREFIX(fneg(z6.VnD(), p1.Merging(), z17.VnD()), - "fneg z6.d, p1/m, z17.d"); - COMPARE_PREFIX(neg(z25.VnB(), p4.Merging(), z8.VnB()), - "neg z25.b, p4/m, z8.b"); - COMPARE_PREFIX(neg(z30.VnH(), p3.Merging(), z23.VnH()), - "neg z30.h, p3/m, z23.h"); - COMPARE_PREFIX(neg(z7.VnS(), p2.Merging(), z26.VnS()), - "neg z7.s, p2/m, z26.s"); - COMPARE_PREFIX(neg(z21.VnD(), p3.Merging(), z5.VnD()), - "neg z21.d, p3/m, z5.d"); - COMPARE_PREFIX(not_(z24.VnB(), p1.Merging(), z27.VnB()), - "not z24.b, p1/m, z27.b"); - COMPARE_PREFIX(not_(z31.VnH(), p6.Merging(), z19.VnH()), - "not z31.h, p6/m, z19.h"); - COMPARE_PREFIX(not_(z18.VnS(), p5.Merging(), z13.VnS()), - "not z18.s, p5/m, z13.s"); - COMPARE_PREFIX(not_(z12.VnD(), p2.Merging(), z28.VnD()), - "not z12.d, p2/m, z28.d"); - COMPARE_PREFIX(sxtb(z19.VnH(), p7.Merging(), z3.VnH()), - "sxtb z19.h, p7/m, z3.h"); - COMPARE_PREFIX(sxtb(z3.VnS(), p1.Merging(), z17.VnS()), - "sxtb z3.s, p1/m, z17.s"); - COMPARE_PREFIX(sxtb(z27.VnD(), p0.Merging(), z12.VnD()), - "sxtb z27.d, p0/m, z12.d"); - COMPARE_PREFIX(sxth(z6.VnS(), p1.Merging(), z17.VnS()), - "sxth z6.s, p1/m, z17.s"); - COMPARE_PREFIX(sxth(z8.VnD(), p6.Merging(), z2.VnD()), - "sxth z8.d, p6/m, z2.d"); - COMPARE_PREFIX(sxtw(z13.VnD(), p3.Merging(), z27.VnD()), - "sxtw z13.d, p3/m, z27.d"); - COMPARE_PREFIX(uxtb(z23.VnH(), p3.Merging(), z21.VnH()), - "uxtb z23.h, p3/m, z21.h"); - COMPARE_PREFIX(uxtb(z0.VnS(), p2.Merging(), z13.VnS()), - "uxtb z0.s, p2/m, z13.s"); - COMPARE_PREFIX(uxtb(z1.VnD(), p3.Merging(), z13.VnD()), - "uxtb z1.d, p3/m, z13.d"); - COMPARE_PREFIX(uxth(z27.VnS(), p0.Merging(), z29.VnS()), - "uxth z27.s, p0/m, z29.s"); - COMPARE_PREFIX(uxth(z22.VnD(), p4.Merging(), z20.VnD()), - "uxth z22.d, p4/m, z20.d"); - COMPARE_PREFIX(uxtw(z14.VnD(), p1.Merging(), z13.VnD()), - "uxtw z14.d, p1/m, z13.d"); + COMPARE(abs(z5.VnB(), p5.Merging(), z31.VnB()), "abs z5.b, p5/m, z31.b"); + COMPARE(abs(z29.VnH(), p5.Merging(), z17.VnH()), "abs z29.h, p5/m, z17.h"); + COMPARE(abs(z6.VnS(), p4.Merging(), z24.VnS()), "abs z6.s, p4/m, z24.s"); + COMPARE(abs(z19.VnD(), p3.Merging(), z25.VnD()), "abs z19.d, p3/m, z25.d"); + COMPARE(cls(z4.VnB(), p0.Merging(), z20.VnB()), "cls z4.b, p0/m, z20.b"); + COMPARE(cls(z11.VnH(), p0.Merging(), z26.VnH()), "cls z11.h, p0/m, z26.h"); + COMPARE(cls(z10.VnS(), p1.Merging(), z10.VnS()), "cls z10.s, p1/m, z10.s"); + COMPARE(cls(z5.VnD(), p1.Merging(), z4.VnD()), "cls z5.d, p1/m, z4.d"); + COMPARE(clz(z18.VnB(), p3.Merging(), z1.VnB()), "clz z18.b, p3/m, z1.b"); + COMPARE(clz(z13.VnH(), p4.Merging(), z18.VnH()), "clz z13.h, p4/m, z18.h"); + COMPARE(clz(z15.VnS(), p4.Merging(), z24.VnS()), "clz z15.s, p4/m, z24.s"); + COMPARE(clz(z29.VnD(), p2.Merging(), z22.VnD()), "clz z29.d, p2/m, z22.d"); + COMPARE(cnot(z16.VnB(), p6.Merging(), z20.VnB()), "cnot z16.b, p6/m, z20.b"); + COMPARE(cnot(z10.VnH(), p5.Merging(), z12.VnH()), "cnot z10.h, p5/m, z12.h"); + COMPARE(cnot(z8.VnS(), p5.Merging(), z21.VnS()), "cnot z8.s, p5/m, z21.s"); + COMPARE(cnot(z3.VnD(), p3.Merging(), z18.VnD()), "cnot z3.d, p3/m, z18.d"); + COMPARE(cnt(z29.VnB(), p3.Merging(), z7.VnB()), "cnt z29.b, p3/m, z7.b"); + COMPARE(cnt(z3.VnH(), p6.Merging(), z31.VnH()), "cnt z3.h, p6/m, z31.h"); + COMPARE(cnt(z2.VnS(), p4.Merging(), z16.VnS()), "cnt z2.s, p4/m, z16.s"); + COMPARE(cnt(z0.VnD(), p0.Merging(), z24.VnD()), "cnt z0.d, p0/m, z24.d"); + COMPARE(fabs(z17.VnH(), p7.Merging(), z15.VnH()), "fabs z17.h, p7/m, z15.h"); + COMPARE(fabs(z18.VnS(), p0.Merging(), z29.VnS()), "fabs z18.s, p0/m, z29.s"); + COMPARE(fabs(z17.VnD(), p1.Merging(), z9.VnD()), "fabs z17.d, p1/m, z9.d"); + COMPARE(fneg(z25.VnH(), p1.Merging(), z28.VnH()), "fneg z25.h, p1/m, z28.h"); + COMPARE(fneg(z5.VnS(), p1.Merging(), z25.VnS()), "fneg z5.s, p1/m, z25.s"); + COMPARE(fneg(z6.VnD(), p1.Merging(), z17.VnD()), "fneg z6.d, p1/m, z17.d"); + COMPARE(neg(z25.VnB(), p4.Merging(), z8.VnB()), "neg z25.b, p4/m, z8.b"); + COMPARE(neg(z30.VnH(), p3.Merging(), z23.VnH()), "neg z30.h, p3/m, z23.h"); + COMPARE(neg(z7.VnS(), p2.Merging(), z26.VnS()), "neg z7.s, p2/m, z26.s"); + COMPARE(neg(z21.VnD(), p3.Merging(), z5.VnD()), "neg z21.d, p3/m, z5.d"); + COMPARE(not_(z24.VnB(), p1.Merging(), z27.VnB()), "not z24.b, p1/m, z27.b"); + COMPARE(not_(z31.VnH(), p6.Merging(), z19.VnH()), "not z31.h, p6/m, z19.h"); + COMPARE(not_(z18.VnS(), p5.Merging(), z13.VnS()), "not z18.s, p5/m, z13.s"); + COMPARE(not_(z12.VnD(), p2.Merging(), z28.VnD()), "not z12.d, p2/m, z28.d"); + COMPARE(sxtb(z19.VnH(), p7.Merging(), z3.VnH()), "sxtb z19.h, p7/m, z3.h"); + COMPARE(sxtb(z3.VnS(), p1.Merging(), z17.VnS()), "sxtb z3.s, p1/m, z17.s"); + COMPARE(sxtb(z27.VnD(), p0.Merging(), z12.VnD()), "sxtb z27.d, p0/m, z12.d"); + COMPARE(sxth(z6.VnS(), p1.Merging(), z17.VnS()), "sxth z6.s, p1/m, z17.s"); + COMPARE(sxth(z8.VnD(), p6.Merging(), z2.VnD()), "sxth z8.d, p6/m, z2.d"); + COMPARE(sxtw(z13.VnD(), p3.Merging(), z27.VnD()), "sxtw z13.d, p3/m, z27.d"); + COMPARE(uxtb(z23.VnH(), p3.Merging(), z21.VnH()), "uxtb z23.h, p3/m, z21.h"); + COMPARE(uxtb(z0.VnS(), p2.Merging(), z13.VnS()), "uxtb z0.s, p2/m, z13.s"); + COMPARE(uxtb(z1.VnD(), p3.Merging(), z13.VnD()), "uxtb z1.d, p3/m, z13.d"); + COMPARE(uxth(z27.VnS(), p0.Merging(), z29.VnS()), "uxth z27.s, p0/m, z29.s"); + COMPARE(uxth(z22.VnD(), p4.Merging(), z20.VnD()), "uxth z22.d, p4/m, z20.d"); + COMPARE(uxtw(z14.VnD(), p1.Merging(), z13.VnD()), "uxtw z14.d, p1/m, z13.d"); + + // Check related but undefined encodings. + COMPARE(dci(0x0410a000), "unallocated (Unallocated)"); // sxtb b + COMPARE(dci(0x0412a000), "unallocated (Unallocated)"); // sxth b + COMPARE(dci(0x0452a000), "unallocated (Unallocated)"); // sxth h + COMPARE(dci(0x0414a000), "unallocated (Unallocated)"); // sxtw b + COMPARE(dci(0x0454a000), "unallocated (Unallocated)"); // sxtw h + COMPARE(dci(0x0494a000), "unallocated (Unallocated)"); // sxtw s + + COMPARE(dci(0x0411a000), "unallocated (Unallocated)"); // uxtb b + COMPARE(dci(0x0413a000), "unallocated (Unallocated)"); // uxth b + COMPARE(dci(0x0453a000), "unallocated (Unallocated)"); // uxth h + COMPARE(dci(0x0415a000), "unallocated (Unallocated)"); // uxtw b + COMPARE(dci(0x0455a000), "unallocated (Unallocated)"); // uxtw h + COMPARE(dci(0x0495a000), "unallocated (Unallocated)"); // uxtw s + + COMPARE(dci(0x041ca000), "unallocated (Unallocated)"); // fabs b + COMPARE(dci(0x041da000), "unallocated (Unallocated)"); // fneg b CLEANUP(); } @@ -2994,22 +3033,22 @@ TEST(sve_neg_macro) { TEST(sve_cpy_fcpy_imm) { SETUP(); - COMPARE_PREFIX(cpy(z25.VnB(), p13.Zeroing(), -1), "mov z25.b, p13/z, #-1"); - COMPARE_PREFIX(cpy(z25.VnB(), p13.Merging(), -1), "mov z25.b, p13/m, #-1"); - COMPARE_PREFIX(cpy(z25.VnH(), p13.Merging(), 127), "mov z25.h, p13/m, #127"); - COMPARE_PREFIX(cpy(z25.VnS(), p13.Merging(), 10752), - "mov z25.s, p13/m, #42, lsl #8"); - COMPARE_PREFIX(cpy(z25.VnD(), p13.Merging(), -10752), - "mov z25.d, p13/m, #-42, lsl #8"); - COMPARE_PREFIX(mov(z25.VnD(), p13.Merging(), -10752), - "mov z25.d, p13/m, #-42, lsl #8"); - - COMPARE_PREFIX(fcpy(z20.VnH(), p11.Merging(), 29.0), - "fmov z20.h, p11/m, #0x3d (29.0000)"); - COMPARE_PREFIX(fmov(z20.VnS(), p11.Merging(), -31.0), - "fmov z20.s, p11/m, #0xbf (-31.0000)"); - COMPARE_PREFIX(fcpy(z20.VnD(), p11.Merging(), 1.0), - "fmov z20.d, p11/m, #0x70 (1.0000)"); + COMPARE(cpy(z25.VnB(), p13.Zeroing(), -1), "mov z25.b, p13/z, #-1"); + COMPARE(cpy(z25.VnB(), p13.Merging(), -1), "mov z25.b, p13/m, #-1"); + COMPARE(cpy(z25.VnH(), p13.Merging(), 127), "mov z25.h, p13/m, #127"); + COMPARE(cpy(z25.VnS(), p13.Merging(), 10752), + "mov z25.s, p13/m, #42, lsl #8"); + COMPARE(cpy(z25.VnD(), p13.Merging(), -10752), + "mov z25.d, p13/m, #-42, lsl #8"); + COMPARE(mov(z25.VnD(), p13.Merging(), -10752), + "mov z25.d, p13/m, #-42, lsl #8"); + + COMPARE(fcpy(z20.VnH(), p11.Merging(), 29.0), + "fmov z20.h, p11/m, #0x3d (29.0000)"); + COMPARE(fmov(z20.VnS(), p11.Merging(), -31.0), + "fmov z20.s, p11/m, #0xbf (-31.0000)"); + COMPARE(fcpy(z20.VnD(), p11.Merging(), 1.0), + "fmov z20.d, p11/m, #0x70 (1.0000)"); CLEANUP(); } @@ -3018,16 +3057,16 @@ TEST(sve_fmov_zero) { SETUP(); // Predicated `fmov` is an alias for either `fcpy` or `cpy`. - COMPARE_PREFIX(fmov(z13.VnS(), p0.Merging(), 1.0), - "fmov z13.s, p0/m, #0x70 (1.0000)"); - COMPARE_PREFIX(fmov(z13.VnS(), p0.Merging(), 0.0), "mov z13.s, p0/m, #0"); + COMPARE(fmov(z13.VnS(), p0.Merging(), 1.0), + "fmov z13.s, p0/m, #0x70 (1.0000)"); + COMPARE(fmov(z13.VnS(), p0.Merging(), 0.0), "mov z13.s, p0/m, #0"); COMPARE_MACRO(Fmov(z13.VnD(), p0.Merging(), 1.0), "fmov z13.d, p0/m, #0x70 (1.0000)"); COMPARE_MACRO(Fmov(z13.VnD(), p0.Merging(), 0.0), "mov z13.d, p0/m, #0"); // Unpredicated `fmov` is an alias for either `fdup` or `dup`. - COMPARE_PREFIX(fmov(z13.VnS(), 1.0), "fmov z13.s, #0x70 (1.0000)"); - COMPARE_PREFIX(fmov(z13.VnS(), 0.0), "mov z13.s, #0"); + COMPARE(fmov(z13.VnS(), 1.0), "fmov z13.s, #0x70 (1.0000)"); + COMPARE(fmov(z13.VnS(), 0.0), "mov z13.s, #0"); COMPARE_MACRO(Fmov(z13.VnD(), 1.0), "fmov z13.d, #0x70 (1.0000)"); COMPARE_MACRO(Fmov(z13.VnD(), 0.0), "mov z13.d, #0"); @@ -3043,92 +3082,90 @@ TEST(sve_fmov_zero) { TEST(sve_int_wide_imm_unpredicated) { SETUP(); - COMPARE_PREFIX(add(z12.VnB(), z12.VnB(), 0), "add z12.b, z12.b, #0"); - COMPARE_PREFIX(add(z13.VnH(), z13.VnH(), 255), "add z13.h, z13.h, #255"); - COMPARE_PREFIX(add(z14.VnS(), z14.VnS(), 256), - "add z14.s, z14.s, #1, lsl #8"); - COMPARE_PREFIX(add(z15.VnD(), z15.VnD(), 255 * 256), - "add z15.d, z15.d, #255, lsl #8"); - - COMPARE_PREFIX(dup(z6.VnB(), -128), "mov z6.b, #-128"); - COMPARE_PREFIX(dup(z7.VnH(), 127), "mov z7.h, #127"); - COMPARE_PREFIX(dup(z8.VnS(), -128 * 256), "mov z8.s, #-128, lsl #8"); - COMPARE_PREFIX(dup(z9.VnD(), 127 * 256), "mov z9.d, #127, lsl #8"); - COMPARE_PREFIX(mov(z8.VnS(), -128 * 256, -1), "mov z8.s, #-128, lsl #8"); - COMPARE_PREFIX(mov(z9.VnD(), 127 * 256, -1), "mov z9.d, #127, lsl #8"); - - COMPARE_PREFIX(sqadd(z7.VnB(), z7.VnB(), 124), "sqadd z7.b, z7.b, #124"); - COMPARE_PREFIX(sqadd(z8.VnH(), z8.VnH(), 131), "sqadd z8.h, z8.h, #131"); - COMPARE_PREFIX(sqadd(z9.VnS(), z9.VnS(), 252 * 256), - "sqadd z9.s, z9.s, #252, lsl #8"); - COMPARE_PREFIX(sqadd(z10.VnD(), z10.VnD(), 20 * 256), - "sqadd z10.d, z10.d, #20, lsl #8"); - - COMPARE_PREFIX(sqsub(z31.VnB(), z31.VnB(), 132), "sqsub z31.b, z31.b, #132"); - COMPARE_PREFIX(sqsub(z30.VnH(), z30.VnH(), 251), "sqsub z30.h, z30.h, #251"); - COMPARE_PREFIX(sqsub(z29.VnS(), z29.VnS(), 21 * 256), - "sqsub z29.s, z29.s, #21, lsl #8"); - COMPARE_PREFIX(sqsub(z28.VnD(), z28.VnD(), 123 * 256), - "sqsub z28.d, z28.d, #123, lsl #8"); - - COMPARE_PREFIX(subr(z20.VnB(), z20.VnB(), 250), "subr z20.b, z20.b, #250"); - COMPARE_PREFIX(subr(z21.VnH(), z21.VnH(), 22), "subr z21.h, z21.h, #22"); - COMPARE_PREFIX(subr(z22.VnS(), z22.VnS(), 122 * 256), - "subr z22.s, z22.s, #122, lsl #8"); - COMPARE_PREFIX(subr(z23.VnD(), z23.VnD(), 133 * 256), - "subr z23.d, z23.d, #133, lsl #8"); - - COMPARE_PREFIX(sub(z18.VnB(), z18.VnB(), 23), "sub z18.b, z18.b, #23"); - COMPARE_PREFIX(sub(z19.VnH(), z19.VnH(), 121), "sub z19.h, z19.h, #121"); - COMPARE_PREFIX(sub(z20.VnS(), z20.VnS(), 134 * 256), - "sub z20.s, z20.s, #134, lsl #8"); - COMPARE_PREFIX(sub(z21.VnD(), z21.VnD(), 249 * 256), - "sub z21.d, z21.d, #249, lsl #8"); - - COMPARE_PREFIX(uqadd(z21.VnB(), z21.VnB(), 246), "uqadd z21.b, z21.b, #246"); - COMPARE_PREFIX(uqadd(z22.VnH(), z22.VnH(), 26), "uqadd z22.h, z22.h, #26"); - COMPARE_PREFIX(uqadd(z23.VnS(), z23.VnS(), 118 * 256), - "uqadd z23.s, z23.s, #118, lsl #8"); - COMPARE_PREFIX(uqadd(z24.VnD(), z24.VnD(), 137 * 256), - "uqadd z24.d, z24.d, #137, lsl #8"); - - COMPARE_PREFIX(uqsub(z10.VnB(), z10.VnB(), 27), "uqsub z10.b, z10.b, #27"); - COMPARE_PREFIX(uqsub(z11.VnH(), z11.VnH(), 117), "uqsub z11.h, z11.h, #117"); - COMPARE_PREFIX(uqsub(z12.VnS(), z12.VnS(), 138 * 256), - "uqsub z12.s, z12.s, #138, lsl #8"); - COMPARE_PREFIX(uqsub(z13.VnD(), z13.VnD(), 245 * 256), - "uqsub z13.d, z13.d, #245, lsl #8"); - - COMPARE_PREFIX(fdup(z26.VnH(), Float16(-5.0f)), - "fmov z26.h, #0x94 (-5.0000)"); - COMPARE_PREFIX(fdup(z27.VnS(), -13.0f), "fmov z27.s, #0xaa (-13.0000)"); - COMPARE_PREFIX(fdup(z28.VnD(), 1.0f), "fmov z28.d, #0x70 (1.0000)"); - COMPARE_PREFIX(fmov(z28.VnD(), 1.0f), "fmov z28.d, #0x70 (1.0000)"); - - COMPARE_PREFIX(mul(z15.VnB(), z15.VnB(), -128), "mul z15.b, z15.b, #-128"); - COMPARE_PREFIX(mul(z16.VnH(), z16.VnH(), -1), "mul z16.h, z16.h, #-1"); - COMPARE_PREFIX(mul(z17.VnS(), z17.VnS(), 17), "mul z17.s, z17.s, #17"); - COMPARE_PREFIX(mul(z18.VnD(), z18.VnD(), 127), "mul z18.d, z18.d, #127"); - - COMPARE_PREFIX(smax(z7.VnB(), z7.VnB(), -2), "smax z7.b, z7.b, #-2"); - COMPARE_PREFIX(smax(z8.VnH(), z8.VnH(), 18), "smax z8.h, z8.h, #18"); - COMPARE_PREFIX(smax(z9.VnS(), z9.VnS(), 126), "smax z9.s, z9.s, #126"); - COMPARE_PREFIX(smax(z10.VnD(), z10.VnD(), -127), "smax z10.d, z10.d, #-127"); - - COMPARE_PREFIX(smin(z5.VnB(), z5.VnB(), 19), "smin z5.b, z5.b, #19"); - COMPARE_PREFIX(smin(z6.VnH(), z6.VnH(), 125), "smin z6.h, z6.h, #125"); - COMPARE_PREFIX(smin(z7.VnS(), z7.VnS(), -126), "smin z7.s, z7.s, #-126"); - COMPARE_PREFIX(smin(z8.VnD(), z8.VnD(), -3), "smin z8.d, z8.d, #-3"); - - COMPARE_PREFIX(umax(z15.VnB(), z15.VnB(), 120), "umax z15.b, z15.b, #120"); - COMPARE_PREFIX(umax(z16.VnH(), z16.VnH(), 135), "umax z16.h, z16.h, #135"); - COMPARE_PREFIX(umax(z17.VnS(), z17.VnS(), 248), "umax z17.s, z17.s, #248"); - COMPARE_PREFIX(umax(z18.VnD(), z18.VnD(), 24), "umax z18.d, z18.d, #24"); - - COMPARE_PREFIX(umin(z22.VnB(), z22.VnB(), 136), "umin z22.b, z22.b, #136"); - COMPARE_PREFIX(umin(z23.VnH(), z23.VnH(), 247), "umin z23.h, z23.h, #247"); - COMPARE_PREFIX(umin(z24.VnS(), z24.VnS(), 25), "umin z24.s, z24.s, #25"); - COMPARE_PREFIX(umin(z25.VnD(), z25.VnD(), 119), "umin z25.d, z25.d, #119"); + COMPARE(add(z12.VnB(), z12.VnB(), 0), "add z12.b, z12.b, #0"); + COMPARE(add(z13.VnH(), z13.VnH(), 255), "add z13.h, z13.h, #255"); + COMPARE(add(z14.VnS(), z14.VnS(), 256), "add z14.s, z14.s, #1, lsl #8"); + COMPARE(add(z15.VnD(), z15.VnD(), 255 * 256), + "add z15.d, z15.d, #255, lsl #8"); + + COMPARE(dup(z6.VnB(), -128), "mov z6.b, #-128"); + COMPARE(dup(z7.VnH(), 127), "mov z7.h, #127"); + COMPARE(dup(z8.VnS(), -128 * 256), "mov z8.s, #-128, lsl #8"); + COMPARE(dup(z9.VnD(), 127 * 256), "mov z9.d, #127, lsl #8"); + COMPARE(mov(z8.VnS(), -128 * 256, -1), "mov z8.s, #-128, lsl #8"); + COMPARE(mov(z9.VnD(), 127 * 256, -1), "mov z9.d, #127, lsl #8"); + + COMPARE(sqadd(z7.VnB(), z7.VnB(), 124), "sqadd z7.b, z7.b, #124"); + COMPARE(sqadd(z8.VnH(), z8.VnH(), 131), "sqadd z8.h, z8.h, #131"); + COMPARE(sqadd(z9.VnS(), z9.VnS(), 252 * 256), + "sqadd z9.s, z9.s, #252, lsl #8"); + COMPARE(sqadd(z10.VnD(), z10.VnD(), 20 * 256), + "sqadd z10.d, z10.d, #20, lsl #8"); + + COMPARE(sqsub(z31.VnB(), z31.VnB(), 132), "sqsub z31.b, z31.b, #132"); + COMPARE(sqsub(z30.VnH(), z30.VnH(), 251), "sqsub z30.h, z30.h, #251"); + COMPARE(sqsub(z29.VnS(), z29.VnS(), 21 * 256), + "sqsub z29.s, z29.s, #21, lsl #8"); + COMPARE(sqsub(z28.VnD(), z28.VnD(), 123 * 256), + "sqsub z28.d, z28.d, #123, lsl #8"); + + COMPARE(subr(z20.VnB(), z20.VnB(), 250), "subr z20.b, z20.b, #250"); + COMPARE(subr(z21.VnH(), z21.VnH(), 22), "subr z21.h, z21.h, #22"); + COMPARE(subr(z22.VnS(), z22.VnS(), 122 * 256), + "subr z22.s, z22.s, #122, lsl #8"); + COMPARE(subr(z23.VnD(), z23.VnD(), 133 * 256), + "subr z23.d, z23.d, #133, lsl #8"); + + COMPARE(sub(z18.VnB(), z18.VnB(), 23), "sub z18.b, z18.b, #23"); + COMPARE(sub(z19.VnH(), z19.VnH(), 121), "sub z19.h, z19.h, #121"); + COMPARE(sub(z20.VnS(), z20.VnS(), 134 * 256), + "sub z20.s, z20.s, #134, lsl #8"); + COMPARE(sub(z21.VnD(), z21.VnD(), 249 * 256), + "sub z21.d, z21.d, #249, lsl #8"); + + COMPARE(uqadd(z21.VnB(), z21.VnB(), 246), "uqadd z21.b, z21.b, #246"); + COMPARE(uqadd(z22.VnH(), z22.VnH(), 26), "uqadd z22.h, z22.h, #26"); + COMPARE(uqadd(z23.VnS(), z23.VnS(), 118 * 256), + "uqadd z23.s, z23.s, #118, lsl #8"); + COMPARE(uqadd(z24.VnD(), z24.VnD(), 137 * 256), + "uqadd z24.d, z24.d, #137, lsl #8"); + + COMPARE(uqsub(z10.VnB(), z10.VnB(), 27), "uqsub z10.b, z10.b, #27"); + COMPARE(uqsub(z11.VnH(), z11.VnH(), 117), "uqsub z11.h, z11.h, #117"); + COMPARE(uqsub(z12.VnS(), z12.VnS(), 138 * 256), + "uqsub z12.s, z12.s, #138, lsl #8"); + COMPARE(uqsub(z13.VnD(), z13.VnD(), 245 * 256), + "uqsub z13.d, z13.d, #245, lsl #8"); + + COMPARE(fdup(z26.VnH(), Float16(-5.0f)), "fmov z26.h, #0x94 (-5.0000)"); + COMPARE(fdup(z27.VnS(), -13.0f), "fmov z27.s, #0xaa (-13.0000)"); + COMPARE(fdup(z28.VnD(), 1.0f), "fmov z28.d, #0x70 (1.0000)"); + COMPARE(fmov(z28.VnD(), 1.0f), "fmov z28.d, #0x70 (1.0000)"); + + COMPARE(mul(z15.VnB(), z15.VnB(), -128), "mul z15.b, z15.b, #-128"); + COMPARE(mul(z16.VnH(), z16.VnH(), -1), "mul z16.h, z16.h, #-1"); + COMPARE(mul(z17.VnS(), z17.VnS(), 17), "mul z17.s, z17.s, #17"); + COMPARE(mul(z18.VnD(), z18.VnD(), 127), "mul z18.d, z18.d, #127"); + + COMPARE(smax(z7.VnB(), z7.VnB(), -2), "smax z7.b, z7.b, #-2"); + COMPARE(smax(z8.VnH(), z8.VnH(), 18), "smax z8.h, z8.h, #18"); + COMPARE(smax(z9.VnS(), z9.VnS(), 126), "smax z9.s, z9.s, #126"); + COMPARE(smax(z10.VnD(), z10.VnD(), -127), "smax z10.d, z10.d, #-127"); + + COMPARE(smin(z5.VnB(), z5.VnB(), 19), "smin z5.b, z5.b, #19"); + COMPARE(smin(z6.VnH(), z6.VnH(), 125), "smin z6.h, z6.h, #125"); + COMPARE(smin(z7.VnS(), z7.VnS(), -126), "smin z7.s, z7.s, #-126"); + COMPARE(smin(z8.VnD(), z8.VnD(), -3), "smin z8.d, z8.d, #-3"); + + COMPARE(umax(z15.VnB(), z15.VnB(), 120), "umax z15.b, z15.b, #120"); + COMPARE(umax(z16.VnH(), z16.VnH(), 135), "umax z16.h, z16.h, #135"); + COMPARE(umax(z17.VnS(), z17.VnS(), 248), "umax z17.s, z17.s, #248"); + COMPARE(umax(z18.VnD(), z18.VnD(), 24), "umax z18.d, z18.d, #24"); + + COMPARE(umin(z22.VnB(), z22.VnB(), 136), "umin z22.b, z22.b, #136"); + COMPARE(umin(z23.VnH(), z23.VnH(), 247), "umin z23.h, z23.h, #247"); + COMPARE(umin(z24.VnS(), z24.VnS(), 25), "umin z24.s, z24.s, #25"); + COMPARE(umin(z25.VnD(), z25.VnD(), 119), "umin z25.d, z25.d, #119"); CLEANUP(); } @@ -3367,120 +3404,90 @@ TEST(sve_mem_32bit_gather_and_unsized_contiguous) { SETUP(); // 32-bit gather load in scalar-plus-vector vform with unscaled offset. - COMPARE_PREFIX(ld1b(z9.VnS(), - p5.Zeroing(), - SVEMemOperand(x2, z1.VnS(), SXTW)), - "ld1b {z9.s}, p5/z, [x2, z1.s, sxtw]"); - COMPARE_PREFIX(ld1b(z9.VnS(), - p5.Zeroing(), - SVEMemOperand(sp, z1.VnS(), UXTW)), - "ld1b {z9.s}, p5/z, [sp, z1.s, uxtw]"); - COMPARE_PREFIX(ld1h(z17.VnS(), - p2.Zeroing(), - SVEMemOperand(x11, z24.VnS(), SXTW)), - "ld1h {z17.s}, p2/z, [x11, z24.s, sxtw]"); - COMPARE_PREFIX(ld1w(z22.VnS(), - p6.Zeroing(), - SVEMemOperand(sp, z5.VnS(), UXTW)), - "ld1w {z22.s}, p6/z, [sp, z5.s, uxtw]"); - COMPARE_PREFIX(ld1sb(z12.VnS(), - p7.Zeroing(), - SVEMemOperand(x17, z23.VnS(), UXTW)), - "ld1sb {z12.s}, p7/z, [x17, z23.s, uxtw]"); - COMPARE_PREFIX(ld1sb(z22.VnS(), - p3.Zeroing(), - SVEMemOperand(x23, z23.VnS(), SXTW)), - "ld1sb {z22.s}, p3/z, [x23, z23.s, sxtw]"); - COMPARE_PREFIX(ld1sh(z11.VnS(), - p2.Zeroing(), - SVEMemOperand(x18, z10.VnS(), UXTW)), - "ld1sh {z11.s}, p2/z, [x18, z10.s, uxtw]"); + COMPARE(ld1b(z9.VnS(), p5.Zeroing(), SVEMemOperand(x2, z1.VnS(), SXTW)), + "ld1b {z9.s}, p5/z, [x2, z1.s, sxtw]"); + COMPARE(ld1b(z9.VnS(), p5.Zeroing(), SVEMemOperand(sp, z1.VnS(), UXTW)), + "ld1b {z9.s}, p5/z, [sp, z1.s, uxtw]"); + COMPARE(ld1h(z17.VnS(), p2.Zeroing(), SVEMemOperand(x11, z24.VnS(), SXTW)), + "ld1h {z17.s}, p2/z, [x11, z24.s, sxtw]"); + COMPARE(ld1w(z22.VnS(), p6.Zeroing(), SVEMemOperand(sp, z5.VnS(), UXTW)), + "ld1w {z22.s}, p6/z, [sp, z5.s, uxtw]"); + COMPARE(ld1sb(z12.VnS(), p7.Zeroing(), SVEMemOperand(x17, z23.VnS(), UXTW)), + "ld1sb {z12.s}, p7/z, [x17, z23.s, uxtw]"); + COMPARE(ld1sb(z22.VnS(), p3.Zeroing(), SVEMemOperand(x23, z23.VnS(), SXTW)), + "ld1sb {z22.s}, p3/z, [x23, z23.s, sxtw]"); + COMPARE(ld1sh(z11.VnS(), p2.Zeroing(), SVEMemOperand(x18, z10.VnS(), UXTW)), + "ld1sh {z11.s}, p2/z, [x18, z10.s, uxtw]"); // 32-bit gather load in scalar-plus-vector vform with scaled offset. - COMPARE_PREFIX(ld1h(z9.VnS(), - p3.Zeroing(), - SVEMemOperand(sp, z4.VnS(), UXTW, 1)), - "ld1h {z9.s}, p3/z, [sp, z4.s, uxtw #1]"); - COMPARE_PREFIX(ld1w(z0.VnS(), - p6.Zeroing(), - SVEMemOperand(x28, z21.VnS(), SXTW, 2)), - "ld1w {z0.s}, p6/z, [x28, z21.s, sxtw #2]"); - COMPARE_PREFIX(ld1sh(z11.VnS(), - p4.Zeroing(), - SVEMemOperand(sp, z0.VnS(), SXTW, 1)), - "ld1sh {z11.s}, p4/z, [sp, z0.s, sxtw #1]"); + COMPARE(ld1h(z9.VnS(), p3.Zeroing(), SVEMemOperand(sp, z4.VnS(), UXTW, 1)), + "ld1h {z9.s}, p3/z, [sp, z4.s, uxtw #1]"); + COMPARE(ld1w(z0.VnS(), p6.Zeroing(), SVEMemOperand(x28, z21.VnS(), SXTW, 2)), + "ld1w {z0.s}, p6/z, [x28, z21.s, sxtw #2]"); + COMPARE(ld1sh(z11.VnS(), p4.Zeroing(), SVEMemOperand(sp, z0.VnS(), SXTW, 1)), + "ld1sh {z11.s}, p4/z, [sp, z0.s, sxtw #1]"); // 32-bit gather first-fault load in scalar-plus-vector vform with 32-bit // unpacked unscaled offset. - COMPARE_PREFIX(ldff1b(z18.VnS(), - p6.Zeroing(), - SVEMemOperand(x27, z24.VnS(), UXTW)), - "ldff1b {z18.s}, p6/z, [x27, z24.s, uxtw]"); - COMPARE_PREFIX(ldff1h(z28.VnS(), - p6.Zeroing(), - SVEMemOperand(x1, z30.VnS(), UXTW)), - "ldff1h {z28.s}, p6/z, [x1, z30.s, uxtw]"); - COMPARE_PREFIX(ldff1w(z12.VnS(), - p3.Zeroing(), - SVEMemOperand(x25, z27.VnS(), SXTW)), - "ldff1w {z12.s}, p3/z, [x25, z27.s, sxtw]"); - COMPARE_PREFIX(ldff1sb(z15.VnS(), - p5.Zeroing(), - SVEMemOperand(x5, z14.VnS(), SXTW)), - "ldff1sb {z15.s}, p5/z, [x5, z14.s, sxtw]"); - COMPARE_PREFIX(ldff1sh(z18.VnS(), - p4.Zeroing(), - SVEMemOperand(x25, z25.VnS(), SXTW)), - "ldff1sh {z18.s}, p4/z, [x25, z25.s, sxtw]"); + COMPARE(ldff1b(z18.VnS(), p6.Zeroing(), SVEMemOperand(x27, z24.VnS(), UXTW)), + "ldff1b {z18.s}, p6/z, [x27, z24.s, uxtw]"); + COMPARE(ldff1h(z28.VnS(), p6.Zeroing(), SVEMemOperand(x1, z30.VnS(), UXTW)), + "ldff1h {z28.s}, p6/z, [x1, z30.s, uxtw]"); + COMPARE(ldff1w(z12.VnS(), p3.Zeroing(), SVEMemOperand(x25, z27.VnS(), SXTW)), + "ldff1w {z12.s}, p3/z, [x25, z27.s, sxtw]"); + COMPARE(ldff1sb(z15.VnS(), p5.Zeroing(), SVEMemOperand(x5, z14.VnS(), SXTW)), + "ldff1sb {z15.s}, p5/z, [x5, z14.s, sxtw]"); + COMPARE(ldff1sh(z18.VnS(), p4.Zeroing(), SVEMemOperand(x25, z25.VnS(), SXTW)), + "ldff1sh {z18.s}, p4/z, [x25, z25.s, sxtw]"); // 32-bit gather first-fault load in scalar-plus-vector vform with 32-bit // scaled offset. - COMPARE_PREFIX(ldff1h(z25.VnS(), - p3.Zeroing(), - SVEMemOperand(x17, z15.VnS(), SXTW, 1)), - "ldff1h {z25.s}, p3/z, [x17, z15.s, sxtw #1]"); - COMPARE_PREFIX(ldff1w(z5.VnS(), - p4.Zeroing(), - SVEMemOperand(x23, z31.VnS(), UXTW, 2)), - "ldff1w {z5.s}, p4/z, [x23, z31.s, uxtw #2]"); - COMPARE_PREFIX(ldff1sh(z10.VnS(), - p0.Zeroing(), - SVEMemOperand(x19, z15.VnS(), UXTW, 1)), - "ldff1sh {z10.s}, p0/z, [x19, z15.s, uxtw #1]"); + COMPARE(ldff1h(z25.VnS(), + p3.Zeroing(), + SVEMemOperand(x17, z15.VnS(), SXTW, 1)), + "ldff1h {z25.s}, p3/z, [x17, z15.s, sxtw #1]"); + COMPARE(ldff1w(z5.VnS(), + p4.Zeroing(), + SVEMemOperand(x23, z31.VnS(), UXTW, 2)), + "ldff1w {z5.s}, p4/z, [x23, z31.s, uxtw #2]"); + COMPARE(ldff1sh(z10.VnS(), + p0.Zeroing(), + SVEMemOperand(x19, z15.VnS(), UXTW, 1)), + "ldff1sh {z10.s}, p0/z, [x19, z15.s, uxtw #1]"); // Load and broadcast data to vector. - COMPARE_PREFIX(ld1rb(z2.VnH(), p0.Zeroing(), SVEMemOperand(x30, 0)), - "ld1rb {z2.h}, p0/z, [x30]"); - COMPARE_PREFIX(ld1rb(z14.VnS(), p2.Zeroing(), SVEMemOperand(x11, 63)), - "ld1rb {z14.s}, p2/z, [x11, #63]"); - COMPARE_PREFIX(ld1rb(z27.VnD(), p1.Zeroing(), SVEMemOperand(x29, 2)), - "ld1rb {z27.d}, p1/z, [x29, #2]"); - COMPARE_PREFIX(ld1rb(z0.VnB(), p3.Zeroing(), SVEMemOperand(sp, 59)), - "ld1rb {z0.b}, p3/z, [sp, #59]"); - COMPARE_PREFIX(ld1rh(z19.VnH(), p5.Zeroing(), SVEMemOperand(x1, 0)), - "ld1rh {z19.h}, p5/z, [x1]"); - COMPARE_PREFIX(ld1rh(z4.VnS(), p7.Zeroing(), SVEMemOperand(x29, 126)), - "ld1rh {z4.s}, p7/z, [x29, #126]"); - COMPARE_PREFIX(ld1rh(z24.VnD(), p0.Zeroing(), SVEMemOperand(sp, 78)), - "ld1rh {z24.d}, p0/z, [sp, #78]"); - COMPARE_PREFIX(ld1rw(z19.VnS(), p5.Zeroing(), SVEMemOperand(x4, 252)), - "ld1rw {z19.s}, p5/z, [x4, #252]"); - COMPARE_PREFIX(ld1rw(z13.VnD(), p3.Zeroing(), SVEMemOperand(x2, 100)), - "ld1rw {z13.d}, p3/z, [x2, #100]"); - COMPARE_PREFIX(ld1rd(z19.VnD(), p7.Zeroing(), SVEMemOperand(x14, 504)), - "ld1rd {z19.d}, p7/z, [x14, #504]"); - COMPARE_PREFIX(ld1rsb(z16.VnH(), p1.Zeroing(), SVEMemOperand(x29, 0)), - "ld1rsb {z16.h}, p1/z, [x29]"); - COMPARE_PREFIX(ld1rsb(z8.VnS(), p6.Zeroing(), SVEMemOperand(sp, 33)), - "ld1rsb {z8.s}, p6/z, [sp, #33]"); - COMPARE_PREFIX(ld1rsb(z25.VnD(), p2.Zeroing(), SVEMemOperand(x18, 63)), - "ld1rsb {z25.d}, p2/z, [x18, #63]"); - COMPARE_PREFIX(ld1rsh(z11.VnS(), p5.Zeroing(), SVEMemOperand(x14, 2)), - "ld1rsh {z11.s}, p5/z, [x14, #2]"); - COMPARE_PREFIX(ld1rsh(z28.VnD(), p1.Zeroing(), SVEMemOperand(x19, 124)), - "ld1rsh {z28.d}, p1/z, [x19, #124]"); - COMPARE_PREFIX(ld1rsw(z23.VnD(), p4.Zeroing(), SVEMemOperand(x10, 8)), - "ld1rsw {z23.d}, p4/z, [x10, #8]"); + COMPARE(ld1rb(z2.VnH(), p0.Zeroing(), SVEMemOperand(x30, 0)), + "ld1rb {z2.h}, p0/z, [x30]"); + COMPARE(ld1rb(z14.VnS(), p2.Zeroing(), SVEMemOperand(x11, 63)), + "ld1rb {z14.s}, p2/z, [x11, #63]"); + COMPARE(ld1rb(z27.VnD(), p1.Zeroing(), SVEMemOperand(x29, 2)), + "ld1rb {z27.d}, p1/z, [x29, #2]"); + COMPARE(ld1rb(z0.VnB(), p3.Zeroing(), SVEMemOperand(sp, 59)), + "ld1rb {z0.b}, p3/z, [sp, #59]"); + COMPARE(ld1rh(z19.VnH(), p5.Zeroing(), SVEMemOperand(x1, 0)), + "ld1rh {z19.h}, p5/z, [x1]"); + COMPARE(ld1rh(z4.VnS(), p7.Zeroing(), SVEMemOperand(x29, 126)), + "ld1rh {z4.s}, p7/z, [x29, #126]"); + COMPARE(ld1rh(z24.VnD(), p0.Zeroing(), SVEMemOperand(sp, 78)), + "ld1rh {z24.d}, p0/z, [sp, #78]"); + COMPARE(ld1rw(z19.VnS(), p5.Zeroing(), SVEMemOperand(x4, 252)), + "ld1rw {z19.s}, p5/z, [x4, #252]"); + COMPARE(ld1rw(z13.VnD(), p3.Zeroing(), SVEMemOperand(x2, 100)), + "ld1rw {z13.d}, p3/z, [x2, #100]"); + COMPARE(ld1rd(z19.VnD(), p7.Zeroing(), SVEMemOperand(x14, 504)), + "ld1rd {z19.d}, p7/z, [x14, #504]"); + COMPARE(ld1rsb(z16.VnH(), p1.Zeroing(), SVEMemOperand(x29, 0)), + "ld1rsb {z16.h}, p1/z, [x29]"); + COMPARE(ld1rsb(z8.VnS(), p6.Zeroing(), SVEMemOperand(sp, 33)), + "ld1rsb {z8.s}, p6/z, [sp, #33]"); + COMPARE(ld1rsb(z25.VnD(), p2.Zeroing(), SVEMemOperand(x18, 63)), + "ld1rsb {z25.d}, p2/z, [x18, #63]"); + COMPARE(ld1rsh(z11.VnS(), p5.Zeroing(), SVEMemOperand(x14, 2)), + "ld1rsh {z11.s}, p5/z, [x14, #2]"); + COMPARE(ld1rsh(z28.VnD(), p1.Zeroing(), SVEMemOperand(x19, 124)), + "ld1rsh {z28.d}, p1/z, [x19, #124]"); + COMPARE(ld1rsw(z23.VnD(), p4.Zeroing(), SVEMemOperand(x10, 8)), + "ld1rsw {z23.d}, p4/z, [x10, #8]"); CLEANUP(); } @@ -3514,34 +3521,34 @@ TEST(sve_mem_32bit_gather_and_unsized_contiguous_macro) { TEST(sve_mem_64bit_gather_vector_plus_immediate) { SETUP(); - COMPARE_PREFIX(ld1b(z2.VnD(), p2.Zeroing(), SVEMemOperand(z12.VnD(), 31)), - "ld1b {z2.d}, p2/z, [z12.d, #31]"); - COMPARE_PREFIX(ld1h(z30.VnD(), p7.Zeroing(), SVEMemOperand(z28.VnD(), 10)), - "ld1h {z30.d}, p7/z, [z28.d, #10]"); - COMPARE_PREFIX(ld1w(z10.VnD(), p5.Zeroing(), SVEMemOperand(z4.VnD(), 124)), - "ld1w {z10.d}, p5/z, [z4.d, #124]"); - COMPARE_PREFIX(ld1d(z13.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD(), 248)), - "ld1d {z13.d}, p3/z, [z19.d, #248]"); - COMPARE_PREFIX(ld1sb(z16.VnD(), p7.Zeroing(), SVEMemOperand(z31.VnD())), - "ld1sb {z16.d}, p7/z, [z31.d]"); - COMPARE_PREFIX(ld1sh(z20.VnD(), p2.Zeroing(), SVEMemOperand(z2.VnD(), 62)), - "ld1sh {z20.d}, p2/z, [z2.d, #62]"); - COMPARE_PREFIX(ld1sw(z2.VnD(), p7.Zeroing(), SVEMemOperand(z25.VnD())), - "ld1sw {z2.d}, p7/z, [z25.d]"); - COMPARE_PREFIX(ldff1b(z24.VnD(), p5.Zeroing(), SVEMemOperand(z8.VnD(), 0)), - "ldff1b {z24.d}, p5/z, [z8.d]"); - COMPARE_PREFIX(ldff1h(z9.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD())), - "ldff1h {z9.d}, p3/z, [z19.d]"); - COMPARE_PREFIX(ldff1w(z26.VnD(), p6.Zeroing(), SVEMemOperand(z15.VnD(), 4)), - "ldff1w {z26.d}, p6/z, [z15.d, #4]"); - COMPARE_PREFIX(ldff1d(z19.VnD(), p1.Zeroing(), SVEMemOperand(z14.VnD())), - "ldff1d {z19.d}, p1/z, [z14.d]"); - COMPARE_PREFIX(ldff1sb(z26.VnD(), p5.Zeroing(), SVEMemOperand(z14.VnD(), 21)), - "ldff1sb {z26.d}, p5/z, [z14.d, #21]"); - COMPARE_PREFIX(ldff1sh(z6.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD(), 42)), - "ldff1sh {z6.d}, p3/z, [z19.d, #42]"); - COMPARE_PREFIX(ldff1sw(z19.VnD(), p7.Zeroing(), SVEMemOperand(z14.VnD(), 84)), - "ldff1sw {z19.d}, p7/z, [z14.d, #84]"); + COMPARE(ld1b(z2.VnD(), p2.Zeroing(), SVEMemOperand(z12.VnD(), 31)), + "ld1b {z2.d}, p2/z, [z12.d, #31]"); + COMPARE(ld1h(z30.VnD(), p7.Zeroing(), SVEMemOperand(z28.VnD(), 10)), + "ld1h {z30.d}, p7/z, [z28.d, #10]"); + COMPARE(ld1w(z10.VnD(), p5.Zeroing(), SVEMemOperand(z4.VnD(), 124)), + "ld1w {z10.d}, p5/z, [z4.d, #124]"); + COMPARE(ld1d(z13.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD(), 248)), + "ld1d {z13.d}, p3/z, [z19.d, #248]"); + COMPARE(ld1sb(z16.VnD(), p7.Zeroing(), SVEMemOperand(z31.VnD())), + "ld1sb {z16.d}, p7/z, [z31.d]"); + COMPARE(ld1sh(z20.VnD(), p2.Zeroing(), SVEMemOperand(z2.VnD(), 62)), + "ld1sh {z20.d}, p2/z, [z2.d, #62]"); + COMPARE(ld1sw(z2.VnD(), p7.Zeroing(), SVEMemOperand(z25.VnD())), + "ld1sw {z2.d}, p7/z, [z25.d]"); + COMPARE(ldff1b(z24.VnD(), p5.Zeroing(), SVEMemOperand(z8.VnD(), 0)), + "ldff1b {z24.d}, p5/z, [z8.d]"); + COMPARE(ldff1h(z9.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD())), + "ldff1h {z9.d}, p3/z, [z19.d]"); + COMPARE(ldff1w(z26.VnD(), p6.Zeroing(), SVEMemOperand(z15.VnD(), 4)), + "ldff1w {z26.d}, p6/z, [z15.d, #4]"); + COMPARE(ldff1d(z19.VnD(), p1.Zeroing(), SVEMemOperand(z14.VnD())), + "ldff1d {z19.d}, p1/z, [z14.d]"); + COMPARE(ldff1sb(z26.VnD(), p5.Zeroing(), SVEMemOperand(z14.VnD(), 21)), + "ldff1sb {z26.d}, p5/z, [z14.d, #21]"); + COMPARE(ldff1sh(z6.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD(), 42)), + "ldff1sh {z6.d}, p3/z, [z19.d, #42]"); + COMPARE(ldff1sw(z19.VnD(), p7.Zeroing(), SVEMemOperand(z14.VnD(), 84)), + "ldff1sw {z19.d}, p7/z, [z14.d, #84]"); CLEANUP(); } @@ -3629,91 +3636,57 @@ TEST(sve_mem_64bit_gather_vector_plus_immediate_macro) { TEST(sve_mem_64bit_gather_scalar_plus_vector) { SETUP(); - COMPARE_PREFIX(ld1b(z30.VnD(), p6.Zeroing(), SVEMemOperand(sp, z24.VnD())), - "ld1b {z30.d}, p6/z, [sp, z24.d]"); - COMPARE_PREFIX(ld1d(z18.VnD(), p5.Zeroing(), SVEMemOperand(x11, z11.VnD())), - "ld1d {z18.d}, p5/z, [x11, z11.d]"); - COMPARE_PREFIX(ld1h(z2.VnD(), p3.Zeroing(), SVEMemOperand(x16, z18.VnD())), - "ld1h {z2.d}, p3/z, [x16, z18.d]"); - COMPARE_PREFIX(ld1sb(z11.VnD(), p3.Zeroing(), SVEMemOperand(x24, z21.VnD())), - "ld1sb {z11.d}, p3/z, [x24, z21.d]"); - COMPARE_PREFIX(ld1sh(z7.VnD(), p7.Zeroing(), SVEMemOperand(x28, z23.VnD())), - "ld1sh {z7.d}, p7/z, [x28, z23.d]"); - COMPARE_PREFIX(ld1sw(z29.VnD(), p7.Zeroing(), SVEMemOperand(x27, z4.VnD())), - "ld1sw {z29.d}, p7/z, [x27, z4.d]"); - COMPARE_PREFIX(ld1w(z19.VnD(), p1.Zeroing(), SVEMemOperand(x27, z4.VnD())), - "ld1w {z19.d}, p1/z, [x27, z4.d]"); - - COMPARE_PREFIX(ld1d(z20.VnD(), - p3.Zeroing(), - SVEMemOperand(x3, z15.VnD(), LSL, 3)), - "ld1d {z20.d}, p3/z, [x3, z15.d, lsl #3]"); - COMPARE_PREFIX(ld1h(z24.VnD(), - p4.Zeroing(), - SVEMemOperand(x6, z11.VnD(), LSL, 1)), - "ld1h {z24.d}, p4/z, [x6, z11.d, lsl #1]"); - COMPARE_PREFIX(ld1sh(z22.VnD(), - p6.Zeroing(), - SVEMemOperand(x7, z31.VnD(), LSL, 1)), - "ld1sh {z22.d}, p6/z, [x7, z31.d, lsl #1]"); - COMPARE_PREFIX(ld1sw(z9.VnD(), - p0.Zeroing(), - SVEMemOperand(x2, z27.VnD(), LSL, 2)), - "ld1sw {z9.d}, p0/z, [x2, z27.d, lsl #2]"); - COMPARE_PREFIX(ld1w(z9.VnD(), - p2.Zeroing(), - SVEMemOperand(x0, z0.VnD(), LSL, 2)), - "ld1w {z9.d}, p2/z, [x0, z0.d, lsl #2]"); - - COMPARE_PREFIX(ld1b(z19.VnD(), - p5.Zeroing(), - SVEMemOperand(x21, z29.VnD(), UXTW)), - "ld1b {z19.d}, p5/z, [x21, z29.d, uxtw]"); - COMPARE_PREFIX(ld1d(z9.VnD(), - p5.Zeroing(), - SVEMemOperand(x5, z21.VnD(), SXTW)), - "ld1d {z9.d}, p5/z, [x5, z21.d, sxtw]"); - COMPARE_PREFIX(ld1h(z26.VnD(), - p3.Zeroing(), - SVEMemOperand(x1, z10.VnD(), UXTW)), - "ld1h {z26.d}, p3/z, [x1, z10.d, uxtw]"); - COMPARE_PREFIX(ld1sb(z4.VnD(), - p1.Zeroing(), - SVEMemOperand(x24, z15.VnD(), SXTW)), - "ld1sb {z4.d}, p1/z, [x24, z15.d, sxtw]"); - COMPARE_PREFIX(ld1sh(z9.VnD(), - p1.Zeroing(), - SVEMemOperand(x0, z12.VnD(), UXTW)), - "ld1sh {z9.d}, p1/z, [x0, z12.d, uxtw]"); - COMPARE_PREFIX(ld1sw(z19.VnD(), - p2.Zeroing(), - SVEMemOperand(x19, z16.VnD(), SXTW)), - "ld1sw {z19.d}, p2/z, [x19, z16.d, sxtw]"); - COMPARE_PREFIX(ld1w(z13.VnD(), - p3.Zeroing(), - SVEMemOperand(x8, z10.VnD(), UXTW)), - "ld1w {z13.d}, p3/z, [x8, z10.d, uxtw]"); - - COMPARE_PREFIX(ld1d(z25.VnD(), - p3.Zeroing(), - SVEMemOperand(x14, z0.VnD(), UXTW, 3)), - "ld1d {z25.d}, p3/z, [x14, z0.d, uxtw #3]"); - COMPARE_PREFIX(ld1h(z21.VnD(), - p5.Zeroing(), - SVEMemOperand(x13, z8.VnD(), SXTW, 1)), - "ld1h {z21.d}, p5/z, [x13, z8.d, sxtw #1]"); - COMPARE_PREFIX(ld1sh(z29.VnD(), - p0.Zeroing(), - SVEMemOperand(x9, z10.VnD(), UXTW, 1)), - "ld1sh {z29.d}, p0/z, [x9, z10.d, uxtw #1]"); - COMPARE_PREFIX(ld1sw(z5.VnD(), - p2.Zeroing(), - SVEMemOperand(x1, z23.VnD(), SXTW, 2)), - "ld1sw {z5.d}, p2/z, [x1, z23.d, sxtw #2]"); - COMPARE_PREFIX(ld1w(z21.VnD(), - p1.Zeroing(), - SVEMemOperand(x7, z8.VnD(), UXTW, 2)), - "ld1w {z21.d}, p1/z, [x7, z8.d, uxtw #2]"); + COMPARE(ld1b(z30.VnD(), p6.Zeroing(), SVEMemOperand(sp, z24.VnD())), + "ld1b {z30.d}, p6/z, [sp, z24.d]"); + COMPARE(ld1d(z18.VnD(), p5.Zeroing(), SVEMemOperand(x11, z11.VnD())), + "ld1d {z18.d}, p5/z, [x11, z11.d]"); + COMPARE(ld1h(z2.VnD(), p3.Zeroing(), SVEMemOperand(x16, z18.VnD())), + "ld1h {z2.d}, p3/z, [x16, z18.d]"); + COMPARE(ld1sb(z11.VnD(), p3.Zeroing(), SVEMemOperand(x24, z21.VnD())), + "ld1sb {z11.d}, p3/z, [x24, z21.d]"); + COMPARE(ld1sh(z7.VnD(), p7.Zeroing(), SVEMemOperand(x28, z23.VnD())), + "ld1sh {z7.d}, p7/z, [x28, z23.d]"); + COMPARE(ld1sw(z29.VnD(), p7.Zeroing(), SVEMemOperand(x27, z4.VnD())), + "ld1sw {z29.d}, p7/z, [x27, z4.d]"); + COMPARE(ld1w(z19.VnD(), p1.Zeroing(), SVEMemOperand(x27, z4.VnD())), + "ld1w {z19.d}, p1/z, [x27, z4.d]"); + + COMPARE(ld1d(z20.VnD(), p3.Zeroing(), SVEMemOperand(x3, z15.VnD(), LSL, 3)), + "ld1d {z20.d}, p3/z, [x3, z15.d, lsl #3]"); + COMPARE(ld1h(z24.VnD(), p4.Zeroing(), SVEMemOperand(x6, z11.VnD(), LSL, 1)), + "ld1h {z24.d}, p4/z, [x6, z11.d, lsl #1]"); + COMPARE(ld1sh(z22.VnD(), p6.Zeroing(), SVEMemOperand(x7, z31.VnD(), LSL, 1)), + "ld1sh {z22.d}, p6/z, [x7, z31.d, lsl #1]"); + COMPARE(ld1sw(z9.VnD(), p0.Zeroing(), SVEMemOperand(x2, z27.VnD(), LSL, 2)), + "ld1sw {z9.d}, p0/z, [x2, z27.d, lsl #2]"); + COMPARE(ld1w(z9.VnD(), p2.Zeroing(), SVEMemOperand(x0, z0.VnD(), LSL, 2)), + "ld1w {z9.d}, p2/z, [x0, z0.d, lsl #2]"); + + COMPARE(ld1b(z19.VnD(), p5.Zeroing(), SVEMemOperand(x21, z29.VnD(), UXTW)), + "ld1b {z19.d}, p5/z, [x21, z29.d, uxtw]"); + COMPARE(ld1d(z9.VnD(), p5.Zeroing(), SVEMemOperand(x5, z21.VnD(), SXTW)), + "ld1d {z9.d}, p5/z, [x5, z21.d, sxtw]"); + COMPARE(ld1h(z26.VnD(), p3.Zeroing(), SVEMemOperand(x1, z10.VnD(), UXTW)), + "ld1h {z26.d}, p3/z, [x1, z10.d, uxtw]"); + COMPARE(ld1sb(z4.VnD(), p1.Zeroing(), SVEMemOperand(x24, z15.VnD(), SXTW)), + "ld1sb {z4.d}, p1/z, [x24, z15.d, sxtw]"); + COMPARE(ld1sh(z9.VnD(), p1.Zeroing(), SVEMemOperand(x0, z12.VnD(), UXTW)), + "ld1sh {z9.d}, p1/z, [x0, z12.d, uxtw]"); + COMPARE(ld1sw(z19.VnD(), p2.Zeroing(), SVEMemOperand(x19, z16.VnD(), SXTW)), + "ld1sw {z19.d}, p2/z, [x19, z16.d, sxtw]"); + COMPARE(ld1w(z13.VnD(), p3.Zeroing(), SVEMemOperand(x8, z10.VnD(), UXTW)), + "ld1w {z13.d}, p3/z, [x8, z10.d, uxtw]"); + + COMPARE(ld1d(z25.VnD(), p3.Zeroing(), SVEMemOperand(x14, z0.VnD(), UXTW, 3)), + "ld1d {z25.d}, p3/z, [x14, z0.d, uxtw #3]"); + COMPARE(ld1h(z21.VnD(), p5.Zeroing(), SVEMemOperand(x13, z8.VnD(), SXTW, 1)), + "ld1h {z21.d}, p5/z, [x13, z8.d, sxtw #1]"); + COMPARE(ld1sh(z29.VnD(), p0.Zeroing(), SVEMemOperand(x9, z10.VnD(), UXTW, 1)), + "ld1sh {z29.d}, p0/z, [x9, z10.d, uxtw #1]"); + COMPARE(ld1sw(z5.VnD(), p2.Zeroing(), SVEMemOperand(x1, z23.VnD(), SXTW, 2)), + "ld1sw {z5.d}, p2/z, [x1, z23.d, sxtw #2]"); + COMPARE(ld1w(z21.VnD(), p1.Zeroing(), SVEMemOperand(x7, z8.VnD(), UXTW, 2)), + "ld1w {z21.d}, p1/z, [x7, z8.d, uxtw #2]"); CLEANUP(); } @@ -3785,40 +3758,40 @@ TEST(sve_mem_prefetch) { VIXL_DISAM_PREFETCH_TEST(prfd, "prfd", 3); #undef VIXL_DISAM_PREFETCH_TEST - COMPARE_PREFIX(prfb(PLDL1KEEP, p5, SVEMemOperand(z30.VnS(), 0)), - "prfb pldl1keep, p5, [z30.s]"); - COMPARE_PREFIX(prfb(PLDL1STRM, p5, SVEMemOperand(x28, -11, SVE_MUL_VL)), - "prfb pldl1strm, p5, [x28, #-11, mul vl]"); - COMPARE_PREFIX(prfb(PLDL2KEEP, p6, SVEMemOperand(x30, x29)), - "prfb pldl2keep, p6, [x30, x29]"); - COMPARE_PREFIX(prfb(PLDL2STRM, p6, SVEMemOperand(x7, z12.VnD())), - "prfb pldl2strm, p6, [x7, z12.d]"); - COMPARE_PREFIX(prfb(PLDL2STRM, p6, SVEMemOperand(x7, z12.VnS(), UXTW)), - "prfb pldl2strm, p6, [x7, z12.s, uxtw]"); - COMPARE_PREFIX(prfd(PLDL3KEEP, p5, SVEMemOperand(z11.VnD(), 9)), - "prfd pldl3keep, p5, [z11.d, #9]"); - COMPARE_PREFIX(prfd(PLDL3STRM, p3, SVEMemOperand(x0, 0, SVE_MUL_VL)), - "prfd pldl3strm, p3, [x0]"); - COMPARE_PREFIX(prfd(PSTL1KEEP, p7, SVEMemOperand(x5, x5, LSL, 3)), - "prfd pstl1keep, p7, [x5, x5, lsl #3]"); - COMPARE_PREFIX(prfd(PSTL1STRM, p1, SVEMemOperand(x19, z18.VnS(), SXTW, 3)), - "prfd pstl1strm, p1, [x19, z18.s, sxtw #3]"); - COMPARE_PREFIX(prfh(PSTL2KEEP, p6, SVEMemOperand(z0.VnS(), 31)), - "prfh pstl2keep, p6, [z0.s, #31]"); - COMPARE_PREFIX(prfh(PSTL2STRM, p4, SVEMemOperand(x17, -3, SVE_MUL_VL)), - "prfh pstl2strm, p4, [x17, #-3, mul vl]"); - COMPARE_PREFIX(prfh(PSTL3KEEP, p3, SVEMemOperand(x0, x0, LSL, 1)), - "prfh pstl3keep, p3, [x0, x0, lsl #1]"); - COMPARE_PREFIX(prfh(PSTL3STRM, p4, SVEMemOperand(x20, z0.VnD(), LSL, 1)), - "prfh pstl3strm, p4, [x20, z0.d, lsl #1]"); - COMPARE_PREFIX(prfw(PLDL1KEEP, p3, SVEMemOperand(z23.VnD(), 5)), - "prfw pldl1keep, p3, [z23.d, #5]"); - COMPARE_PREFIX(prfw(PLDL1STRM, p1, SVEMemOperand(x4, 31, SVE_MUL_VL)), - "prfw pldl1strm, p1, [x4, #31, mul vl]"); - COMPARE_PREFIX(prfw(PLDL2KEEP, p2, SVEMemOperand(x22, x22, LSL, 2)), - "prfw pldl2keep, p2, [x22, x22, lsl #2]"); - COMPARE_PREFIX(prfw(PLDL2STRM, p1, SVEMemOperand(x2, z6.VnS(), SXTW, 2)), - "prfw pldl2strm, p1, [x2, z6.s, sxtw #2]"); + COMPARE(prfb(PLDL1KEEP, p5, SVEMemOperand(z30.VnS(), 0)), + "prfb pldl1keep, p5, [z30.s]"); + COMPARE(prfb(PLDL1STRM, p5, SVEMemOperand(x28, -11, SVE_MUL_VL)), + "prfb pldl1strm, p5, [x28, #-11, mul vl]"); + COMPARE(prfb(PLDL2KEEP, p6, SVEMemOperand(x30, x29)), + "prfb pldl2keep, p6, [x30, x29]"); + COMPARE(prfb(PLDL2STRM, p6, SVEMemOperand(x7, z12.VnD())), + "prfb pldl2strm, p6, [x7, z12.d]"); + COMPARE(prfb(PLDL2STRM, p6, SVEMemOperand(x7, z12.VnS(), UXTW)), + "prfb pldl2strm, p6, [x7, z12.s, uxtw]"); + COMPARE(prfd(PLDL3KEEP, p5, SVEMemOperand(z11.VnD(), 9)), + "prfd pldl3keep, p5, [z11.d, #9]"); + COMPARE(prfd(PLDL3STRM, p3, SVEMemOperand(x0, 0, SVE_MUL_VL)), + "prfd pldl3strm, p3, [x0]"); + COMPARE(prfd(PSTL1KEEP, p7, SVEMemOperand(x5, x5, LSL, 3)), + "prfd pstl1keep, p7, [x5, x5, lsl #3]"); + COMPARE(prfd(PSTL1STRM, p1, SVEMemOperand(x19, z18.VnS(), SXTW, 3)), + "prfd pstl1strm, p1, [x19, z18.s, sxtw #3]"); + COMPARE(prfh(PSTL2KEEP, p6, SVEMemOperand(z0.VnS(), 31)), + "prfh pstl2keep, p6, [z0.s, #31]"); + COMPARE(prfh(PSTL2STRM, p4, SVEMemOperand(x17, -3, SVE_MUL_VL)), + "prfh pstl2strm, p4, [x17, #-3, mul vl]"); + COMPARE(prfh(PSTL3KEEP, p3, SVEMemOperand(x0, x0, LSL, 1)), + "prfh pstl3keep, p3, [x0, x0, lsl #1]"); + COMPARE(prfh(PSTL3STRM, p4, SVEMemOperand(x20, z0.VnD(), LSL, 1)), + "prfh pstl3strm, p4, [x20, z0.d, lsl #1]"); + COMPARE(prfw(PLDL1KEEP, p3, SVEMemOperand(z23.VnD(), 5)), + "prfw pldl1keep, p3, [z23.d, #5]"); + COMPARE(prfw(PLDL1STRM, p1, SVEMemOperand(x4, 31, SVE_MUL_VL)), + "prfw pldl1strm, p1, [x4, #31, mul vl]"); + COMPARE(prfw(PLDL2KEEP, p2, SVEMemOperand(x22, x22, LSL, 2)), + "prfw pldl2keep, p2, [x22, x22, lsl #2]"); + COMPARE(prfw(PLDL2STRM, p1, SVEMemOperand(x2, z6.VnS(), SXTW, 2)), + "prfw pldl2strm, p1, [x2, z6.s, sxtw #2]"); CLEANUP(); } @@ -3827,98 +3800,72 @@ TEST(sve_mem_64bit_ff_gather_scalar_plus_vector) { SETUP(); // 64-bit unscaled offset. - COMPARE_PREFIX(ldff1b(z18.VnD(), p6.Zeroing(), SVEMemOperand(x27, z24.VnD())), - "ldff1b {z18.d}, p6/z, [x27, z24.d]"); - COMPARE_PREFIX(ldff1h(z28.VnD(), p6.Zeroing(), SVEMemOperand(x1, z30.VnD())), - "ldff1h {z28.d}, p6/z, [x1, z30.d]"); - COMPARE_PREFIX(ldff1w(z12.VnD(), p3.Zeroing(), SVEMemOperand(x25, z27.VnD())), - "ldff1w {z12.d}, p3/z, [x25, z27.d]"); - COMPARE_PREFIX(ldff1d(z23.VnD(), p5.Zeroing(), SVEMemOperand(x29, z31.VnD())), - "ldff1d {z23.d}, p5/z, [x29, z31.d]"); - COMPARE_PREFIX(ldff1sb(z15.VnD(), p5.Zeroing(), SVEMemOperand(x5, z14.VnD())), - "ldff1sb {z15.d}, p5/z, [x5, z14.d]"); - COMPARE_PREFIX(ldff1sh(z18.VnD(), - p4.Zeroing(), - SVEMemOperand(x25, z25.VnD())), - "ldff1sh {z18.d}, p4/z, [x25, z25.d]"); - COMPARE_PREFIX(ldff1sw(z12.VnD(), - p3.Zeroing(), - SVEMemOperand(x25, z27.VnD())), - "ldff1sw {z12.d}, p3/z, [x25, z27.d]"); + COMPARE(ldff1b(z18.VnD(), p6.Zeroing(), SVEMemOperand(x27, z24.VnD())), + "ldff1b {z18.d}, p6/z, [x27, z24.d]"); + COMPARE(ldff1h(z28.VnD(), p6.Zeroing(), SVEMemOperand(x1, z30.VnD())), + "ldff1h {z28.d}, p6/z, [x1, z30.d]"); + COMPARE(ldff1w(z12.VnD(), p3.Zeroing(), SVEMemOperand(x25, z27.VnD())), + "ldff1w {z12.d}, p3/z, [x25, z27.d]"); + COMPARE(ldff1d(z23.VnD(), p5.Zeroing(), SVEMemOperand(x29, z31.VnD())), + "ldff1d {z23.d}, p5/z, [x29, z31.d]"); + COMPARE(ldff1sb(z15.VnD(), p5.Zeroing(), SVEMemOperand(x5, z14.VnD())), + "ldff1sb {z15.d}, p5/z, [x5, z14.d]"); + COMPARE(ldff1sh(z18.VnD(), p4.Zeroing(), SVEMemOperand(x25, z25.VnD())), + "ldff1sh {z18.d}, p4/z, [x25, z25.d]"); + COMPARE(ldff1sw(z12.VnD(), p3.Zeroing(), SVEMemOperand(x25, z27.VnD())), + "ldff1sw {z12.d}, p3/z, [x25, z27.d]"); // 64-bit scaled offset. - COMPARE_PREFIX(ldff1h(z25.VnD(), - p3.Zeroing(), - SVEMemOperand(x17, z15.VnD(), LSL, 1)), - "ldff1h {z25.d}, p3/z, [x17, z15.d, lsl #1]"); - COMPARE_PREFIX(ldff1w(z5.VnD(), - p4.Zeroing(), - SVEMemOperand(x23, z31.VnD(), LSL, 2)), - "ldff1w {z5.d}, p4/z, [x23, z31.d, lsl #2]"); - COMPARE_PREFIX(ldff1d(z2.VnD(), - p0.Zeroing(), - SVEMemOperand(sp, z7.VnD(), LSL, 3)), - "ldff1d {z2.d}, p0/z, [sp, z7.d, lsl #3]"); - COMPARE_PREFIX(ldff1sh(z10.VnD(), - p0.Zeroing(), - SVEMemOperand(x19, z15.VnD(), LSL, 1)), - "ldff1sh {z10.d}, p0/z, [x19, z15.d, lsl #1]"); - COMPARE_PREFIX(ldff1sw(z5.VnD(), - p4.Zeroing(), - SVEMemOperand(x23, z31.VnD(), LSL, 2)), - "ldff1sw {z5.d}, p4/z, [x23, z31.d, lsl #2]"); + COMPARE(ldff1h(z25.VnD(), + p3.Zeroing(), + SVEMemOperand(x17, z15.VnD(), LSL, 1)), + "ldff1h {z25.d}, p3/z, [x17, z15.d, lsl #1]"); + COMPARE(ldff1w(z5.VnD(), p4.Zeroing(), SVEMemOperand(x23, z31.VnD(), LSL, 2)), + "ldff1w {z5.d}, p4/z, [x23, z31.d, lsl #2]"); + COMPARE(ldff1d(z2.VnD(), p0.Zeroing(), SVEMemOperand(sp, z7.VnD(), LSL, 3)), + "ldff1d {z2.d}, p0/z, [sp, z7.d, lsl #3]"); + COMPARE(ldff1sh(z10.VnD(), + p0.Zeroing(), + SVEMemOperand(x19, z15.VnD(), LSL, 1)), + "ldff1sh {z10.d}, p0/z, [x19, z15.d, lsl #1]"); + COMPARE(ldff1sw(z5.VnD(), + p4.Zeroing(), + SVEMemOperand(x23, z31.VnD(), LSL, 2)), + "ldff1sw {z5.d}, p4/z, [x23, z31.d, lsl #2]"); // 32-bit unpacked unscaled offset - COMPARE_PREFIX(ldff1b(z18.VnD(), - p6.Zeroing(), - SVEMemOperand(sp, z24.VnD(), UXTW)), - "ldff1b {z18.d}, p6/z, [sp, z24.d, uxtw]"); - COMPARE_PREFIX(ldff1h(z20.VnD(), - p5.Zeroing(), - SVEMemOperand(x7, z14.VnD(), SXTW)), - "ldff1h {z20.d}, p5/z, [x7, z14.d, sxtw]"); - COMPARE_PREFIX(ldff1w(z22.VnD(), - p4.Zeroing(), - SVEMemOperand(x17, z4.VnD(), UXTW)), - "ldff1w {z22.d}, p4/z, [x17, z4.d, uxtw]"); - COMPARE_PREFIX(ldff1d(z24.VnD(), - p3.Zeroing(), - SVEMemOperand(x3, z24.VnD(), SXTW)), - "ldff1d {z24.d}, p3/z, [x3, z24.d, sxtw]"); - COMPARE_PREFIX(ldff1sb(z26.VnD(), - p2.Zeroing(), - SVEMemOperand(x13, z14.VnD(), UXTW)), - "ldff1sb {z26.d}, p2/z, [x13, z14.d, uxtw]"); - COMPARE_PREFIX(ldff1sh(z28.VnD(), - p1.Zeroing(), - SVEMemOperand(x23, z4.VnD(), SXTW)), - "ldff1sh {z28.d}, p1/z, [x23, z4.d, sxtw]"); - COMPARE_PREFIX(ldff1sw(z30.VnD(), - p0.Zeroing(), - SVEMemOperand(x8, z24.VnD(), UXTW)), - "ldff1sw {z30.d}, p0/z, [x8, z24.d, uxtw]"); + COMPARE(ldff1b(z18.VnD(), p6.Zeroing(), SVEMemOperand(sp, z24.VnD(), UXTW)), + "ldff1b {z18.d}, p6/z, [sp, z24.d, uxtw]"); + COMPARE(ldff1h(z20.VnD(), p5.Zeroing(), SVEMemOperand(x7, z14.VnD(), SXTW)), + "ldff1h {z20.d}, p5/z, [x7, z14.d, sxtw]"); + COMPARE(ldff1w(z22.VnD(), p4.Zeroing(), SVEMemOperand(x17, z4.VnD(), UXTW)), + "ldff1w {z22.d}, p4/z, [x17, z4.d, uxtw]"); + COMPARE(ldff1d(z24.VnD(), p3.Zeroing(), SVEMemOperand(x3, z24.VnD(), SXTW)), + "ldff1d {z24.d}, p3/z, [x3, z24.d, sxtw]"); + COMPARE(ldff1sb(z26.VnD(), p2.Zeroing(), SVEMemOperand(x13, z14.VnD(), UXTW)), + "ldff1sb {z26.d}, p2/z, [x13, z14.d, uxtw]"); + COMPARE(ldff1sh(z28.VnD(), p1.Zeroing(), SVEMemOperand(x23, z4.VnD(), SXTW)), + "ldff1sh {z28.d}, p1/z, [x23, z4.d, sxtw]"); + COMPARE(ldff1sw(z30.VnD(), p0.Zeroing(), SVEMemOperand(x8, z24.VnD(), UXTW)), + "ldff1sw {z30.d}, p0/z, [x8, z24.d, uxtw]"); // 32-bit unpacked scaled offset - COMPARE_PREFIX(ldff1h(z4.VnD(), - p5.Zeroing(), - SVEMemOperand(x7, z1.VnD(), SXTW, 1)), - "ldff1h {z4.d}, p5/z, [x7, z1.d, sxtw #1]"); - COMPARE_PREFIX(ldff1w(z5.VnD(), - p4.Zeroing(), - SVEMemOperand(x17, z11.VnD(), UXTW, 2)), - "ldff1w {z5.d}, p4/z, [x17, z11.d, uxtw #2]"); - COMPARE_PREFIX(ldff1d(z6.VnD(), - p3.Zeroing(), - SVEMemOperand(x3, z31.VnD(), SXTW, 3)), - "ldff1d {z6.d}, p3/z, [x3, z31.d, sxtw #3]"); - COMPARE_PREFIX(ldff1sh(z7.VnD(), - p1.Zeroing(), - SVEMemOperand(x23, z7.VnD(), UXTW, 1)), - "ldff1sh {z7.d}, p1/z, [x23, z7.d, uxtw #1]"); - COMPARE_PREFIX(ldff1sw(z8.VnD(), - p0.Zeroing(), - SVEMemOperand(x8, z17.VnD(), SXTW, 2)), - "ldff1sw {z8.d}, p0/z, [x8, z17.d, sxtw #2]"); + COMPARE(ldff1h(z4.VnD(), p5.Zeroing(), SVEMemOperand(x7, z1.VnD(), SXTW, 1)), + "ldff1h {z4.d}, p5/z, [x7, z1.d, sxtw #1]"); + COMPARE(ldff1w(z5.VnD(), + p4.Zeroing(), + SVEMemOperand(x17, z11.VnD(), UXTW, 2)), + "ldff1w {z5.d}, p4/z, [x17, z11.d, uxtw #2]"); + COMPARE(ldff1d(z6.VnD(), p3.Zeroing(), SVEMemOperand(x3, z31.VnD(), SXTW, 3)), + "ldff1d {z6.d}, p3/z, [x3, z31.d, sxtw #3]"); + COMPARE(ldff1sh(z7.VnD(), + p1.Zeroing(), + SVEMemOperand(x23, z7.VnD(), UXTW, 1)), + "ldff1sh {z7.d}, p1/z, [x23, z7.d, uxtw #1]"); + COMPARE(ldff1sw(z8.VnD(), + p0.Zeroing(), + SVEMemOperand(x8, z17.VnD(), SXTW, 2)), + "ldff1sw {z8.d}, p0/z, [x8, z17.d, sxtw #2]"); CLEANUP(); } @@ -3926,57 +3873,57 @@ TEST(sve_mem_64bit_ff_gather_scalar_plus_vector) { TEST(sve_ld2_scalar_plus_immediate) { SETUP(); - COMPARE_PREFIX(ld2b(z31.VnB(), z0.VnB(), p6.Zeroing(), SVEMemOperand(x19)), - "ld2b {z31.b, z0.b}, p6/z, [x19]"); - COMPARE_PREFIX(ld2b(z31.VnB(), - z0.VnB(), - p6.Zeroing(), - SVEMemOperand(x19, 14, SVE_MUL_VL)), - "ld2b {z31.b, z0.b}, p6/z, [x19, #14, mul vl]"); - COMPARE_PREFIX(ld2b(z15.VnB(), - z16.VnB(), - p6.Zeroing(), - SVEMemOperand(x19, -16, SVE_MUL_VL)), - "ld2b {z15.b, z16.b}, p6/z, [x19, #-16, mul vl]"); - - COMPARE_PREFIX(ld2h(z15.VnH(), z16.VnH(), p6.Zeroing(), SVEMemOperand(x19)), - "ld2h {z15.h, z16.h}, p6/z, [x19]"); - COMPARE_PREFIX(ld2h(z15.VnH(), - z16.VnH(), - p0.Zeroing(), - SVEMemOperand(x19, 14, SVE_MUL_VL)), - "ld2h {z15.h, z16.h}, p0/z, [x19, #14, mul vl]"); - COMPARE_PREFIX(ld2h(z15.VnH(), - z16.VnH(), - p0.Zeroing(), - SVEMemOperand(x19, -16, SVE_MUL_VL)), - "ld2h {z15.h, z16.h}, p0/z, [x19, #-16, mul vl]"); - - COMPARE_PREFIX(ld2w(z0.VnS(), z1.VnS(), p0.Zeroing(), SVEMemOperand(x19)), - "ld2w {z0.s, z1.s}, p0/z, [x19]"); - COMPARE_PREFIX(ld2w(z0.VnS(), - z1.VnS(), - p0.Zeroing(), - SVEMemOperand(x19, 14, SVE_MUL_VL)), - "ld2w {z0.s, z1.s}, p0/z, [x19, #14, mul vl]"); - COMPARE_PREFIX(ld2w(z0.VnS(), - z1.VnS(), - p7.Zeroing(), - SVEMemOperand(x19, -16, SVE_MUL_VL)), - "ld2w {z0.s, z1.s}, p7/z, [x19, #-16, mul vl]"); - - COMPARE_PREFIX(ld2d(z0.VnD(), z1.VnD(), p7.Zeroing(), SVEMemOperand(x19)), - "ld2d {z0.d, z1.d}, p7/z, [x19]"); - COMPARE_PREFIX(ld2d(z31.VnD(), - z0.VnD(), - p7.Zeroing(), - SVEMemOperand(x19, 14, SVE_MUL_VL)), - "ld2d {z31.d, z0.d}, p7/z, [x19, #14, mul vl]"); - COMPARE_PREFIX(ld2d(z31.VnD(), - z0.VnD(), - p7.Zeroing(), - SVEMemOperand(x19, -16, SVE_MUL_VL)), - "ld2d {z31.d, z0.d}, p7/z, [x19, #-16, mul vl]"); + COMPARE(ld2b(z31.VnB(), z0.VnB(), p6.Zeroing(), SVEMemOperand(x19)), + "ld2b {z31.b, z0.b}, p6/z, [x19]"); + COMPARE(ld2b(z31.VnB(), + z0.VnB(), + p6.Zeroing(), + SVEMemOperand(x19, 14, SVE_MUL_VL)), + "ld2b {z31.b, z0.b}, p6/z, [x19, #14, mul vl]"); + COMPARE(ld2b(z15.VnB(), + z16.VnB(), + p6.Zeroing(), + SVEMemOperand(x19, -16, SVE_MUL_VL)), + "ld2b {z15.b, z16.b}, p6/z, [x19, #-16, mul vl]"); + + COMPARE(ld2h(z15.VnH(), z16.VnH(), p6.Zeroing(), SVEMemOperand(x19)), + "ld2h {z15.h, z16.h}, p6/z, [x19]"); + COMPARE(ld2h(z15.VnH(), + z16.VnH(), + p0.Zeroing(), + SVEMemOperand(x19, 14, SVE_MUL_VL)), + "ld2h {z15.h, z16.h}, p0/z, [x19, #14, mul vl]"); + COMPARE(ld2h(z15.VnH(), + z16.VnH(), + p0.Zeroing(), + SVEMemOperand(x19, -16, SVE_MUL_VL)), + "ld2h {z15.h, z16.h}, p0/z, [x19, #-16, mul vl]"); + + COMPARE(ld2w(z0.VnS(), z1.VnS(), p0.Zeroing(), SVEMemOperand(x19)), + "ld2w {z0.s, z1.s}, p0/z, [x19]"); + COMPARE(ld2w(z0.VnS(), + z1.VnS(), + p0.Zeroing(), + SVEMemOperand(x19, 14, SVE_MUL_VL)), + "ld2w {z0.s, z1.s}, p0/z, [x19, #14, mul vl]"); + COMPARE(ld2w(z0.VnS(), + z1.VnS(), + p7.Zeroing(), + SVEMemOperand(x19, -16, SVE_MUL_VL)), + "ld2w {z0.s, z1.s}, p7/z, [x19, #-16, mul vl]"); + + COMPARE(ld2d(z0.VnD(), z1.VnD(), p7.Zeroing(), SVEMemOperand(x19)), + "ld2d {z0.d, z1.d}, p7/z, [x19]"); + COMPARE(ld2d(z31.VnD(), + z0.VnD(), + p7.Zeroing(), + SVEMemOperand(x19, 14, SVE_MUL_VL)), + "ld2d {z31.d, z0.d}, p7/z, [x19, #14, mul vl]"); + COMPARE(ld2d(z31.VnD(), + z0.VnD(), + p7.Zeroing(), + SVEMemOperand(x19, -16, SVE_MUL_VL)), + "ld2d {z31.d, z0.d}, p7/z, [x19, #-16, mul vl]"); CLEANUP(); } @@ -3984,81 +3931,77 @@ TEST(sve_ld2_scalar_plus_immediate) { TEST(sve_ld3_scalar_plus_immediate) { SETUP(); - COMPARE_PREFIX(ld3b(z30.VnB(), - z31.VnB(), - z0.VnB(), - p7.Zeroing(), - SVEMemOperand(x19)), - "ld3b {z30.b, z31.b, z0.b}, p7/z, [x19]"); - COMPARE_PREFIX(ld3b(z30.VnB(), - z31.VnB(), - z0.VnB(), - p6.Zeroing(), - SVEMemOperand(x19, 21, SVE_MUL_VL)), - "ld3b {z30.b, z31.b, z0.b}, p6/z, [x19, #21, mul vl]"); - COMPARE_PREFIX(ld3b(z30.VnB(), - z31.VnB(), - z0.VnB(), - p6.Zeroing(), - SVEMemOperand(x19, -24, SVE_MUL_VL)), - "ld3b {z30.b, z31.b, z0.b}, p6/z, [x19, #-24, mul vl]"); - - COMPARE_PREFIX(ld3h(z15.VnH(), - z16.VnH(), - z17.VnH(), - p6.Zeroing(), - SVEMemOperand(x19)), - "ld3h {z15.h, z16.h, z17.h}, p6/z, [x19]"); - COMPARE_PREFIX(ld3h(z15.VnH(), - z16.VnH(), - z17.VnH(), - p6.Zeroing(), - SVEMemOperand(x19, 21, SVE_MUL_VL)), - "ld3h {z15.h, z16.h, z17.h}, p6/z, [x19, #21, mul vl]"); - COMPARE_PREFIX(ld3h(z15.VnH(), - z16.VnH(), - z17.VnH(), - p0.Zeroing(), - SVEMemOperand(x19, -24, SVE_MUL_VL)), - "ld3h {z15.h, z16.h, z17.h}, p0/z, [x19, #-24, mul vl]"); - - COMPARE_PREFIX(ld3w(z15.VnS(), - z16.VnS(), - z17.VnS(), - p0.Zeroing(), - SVEMemOperand(x19)), - "ld3w {z15.s, z16.s, z17.s}, p0/z, [x19]"); - COMPARE_PREFIX(ld3w(z0.VnS(), - z1.VnS(), - z2.VnS(), - p0.Zeroing(), - SVEMemOperand(x19, 21, SVE_MUL_VL)), - "ld3w {z0.s, z1.s, z2.s}, p0/z, [x19, #21, mul vl]"); - COMPARE_PREFIX(ld3w(z0.VnS(), - z1.VnS(), - z2.VnS(), - p0.Zeroing(), - SVEMemOperand(x19, -24, SVE_MUL_VL)), - "ld3w {z0.s, z1.s, z2.s}, p0/z, [x19, #-24, mul vl]"); - - COMPARE_PREFIX(ld3d(z0.VnD(), - z1.VnD(), - z2.VnD(), - p7.Zeroing(), - SVEMemOperand(x19)), - "ld3d {z0.d, z1.d, z2.d}, p7/z, [x19]"); - COMPARE_PREFIX(ld3d(z0.VnD(), - z1.VnD(), - z2.VnD(), - p7.Zeroing(), - SVEMemOperand(x19, 21, SVE_MUL_VL)), - "ld3d {z0.d, z1.d, z2.d}, p7/z, [x19, #21, mul vl]"); - COMPARE_PREFIX(ld3d(z30.VnD(), - z31.VnD(), - z0.VnD(), - p7.Zeroing(), - SVEMemOperand(x19, -24, SVE_MUL_VL)), - "ld3d {z30.d, z31.d, z0.d}, p7/z, [x19, #-24, mul vl]"); + COMPARE(ld3b(z30.VnB(), + z31.VnB(), + z0.VnB(), + p7.Zeroing(), + SVEMemOperand(x19)), + "ld3b {z30.b, z31.b, z0.b}, p7/z, [x19]"); + COMPARE(ld3b(z30.VnB(), + z31.VnB(), + z0.VnB(), + p6.Zeroing(), + SVEMemOperand(x19, 21, SVE_MUL_VL)), + "ld3b {z30.b, z31.b, z0.b}, p6/z, [x19, #21, mul vl]"); + COMPARE(ld3b(z30.VnB(), + z31.VnB(), + z0.VnB(), + p6.Zeroing(), + SVEMemOperand(x19, -24, SVE_MUL_VL)), + "ld3b {z30.b, z31.b, z0.b}, p6/z, [x19, #-24, mul vl]"); + + COMPARE(ld3h(z15.VnH(), + z16.VnH(), + z17.VnH(), + p6.Zeroing(), + SVEMemOperand(x19)), + "ld3h {z15.h, z16.h, z17.h}, p6/z, [x19]"); + COMPARE(ld3h(z15.VnH(), + z16.VnH(), + z17.VnH(), + p6.Zeroing(), + SVEMemOperand(x19, 21, SVE_MUL_VL)), + "ld3h {z15.h, z16.h, z17.h}, p6/z, [x19, #21, mul vl]"); + COMPARE(ld3h(z15.VnH(), + z16.VnH(), + z17.VnH(), + p0.Zeroing(), + SVEMemOperand(x19, -24, SVE_MUL_VL)), + "ld3h {z15.h, z16.h, z17.h}, p0/z, [x19, #-24, mul vl]"); + + COMPARE(ld3w(z15.VnS(), + z16.VnS(), + z17.VnS(), + p0.Zeroing(), + SVEMemOperand(x19)), + "ld3w {z15.s, z16.s, z17.s}, p0/z, [x19]"); + COMPARE(ld3w(z0.VnS(), + z1.VnS(), + z2.VnS(), + p0.Zeroing(), + SVEMemOperand(x19, 21, SVE_MUL_VL)), + "ld3w {z0.s, z1.s, z2.s}, p0/z, [x19, #21, mul vl]"); + COMPARE(ld3w(z0.VnS(), + z1.VnS(), + z2.VnS(), + p0.Zeroing(), + SVEMemOperand(x19, -24, SVE_MUL_VL)), + "ld3w {z0.s, z1.s, z2.s}, p0/z, [x19, #-24, mul vl]"); + + COMPARE(ld3d(z0.VnD(), z1.VnD(), z2.VnD(), p7.Zeroing(), SVEMemOperand(x19)), + "ld3d {z0.d, z1.d, z2.d}, p7/z, [x19]"); + COMPARE(ld3d(z0.VnD(), + z1.VnD(), + z2.VnD(), + p7.Zeroing(), + SVEMemOperand(x19, 21, SVE_MUL_VL)), + "ld3d {z0.d, z1.d, z2.d}, p7/z, [x19, #21, mul vl]"); + COMPARE(ld3d(z30.VnD(), + z31.VnD(), + z0.VnD(), + p7.Zeroing(), + SVEMemOperand(x19, -24, SVE_MUL_VL)), + "ld3d {z30.d, z31.d, z0.d}, p7/z, [x19, #-24, mul vl]"); CLEANUP(); } @@ -4066,96 +4009,96 @@ TEST(sve_ld3_scalar_plus_immediate) { TEST(sve_ld4_scalar_plus_immediate) { SETUP(); - COMPARE_PREFIX(ld4b(z31.VnB(), - z0.VnB(), - z1.VnB(), - z2.VnB(), - p7.Zeroing(), - SVEMemOperand(x19)), - "ld4b {z31.b, z0.b, z1.b, z2.b}, p7/z, [x19]"); - COMPARE_PREFIX(ld4b(z31.VnB(), - z0.VnB(), - z1.VnB(), - z2.VnB(), - p7.Zeroing(), - SVEMemOperand(x19, 28, SVE_MUL_VL)), - "ld4b {z31.b, z0.b, z1.b, z2.b}, p7/z, [x19, #28, mul vl]"); - COMPARE_PREFIX(ld4b(z31.VnB(), - z0.VnB(), - z1.VnB(), - z2.VnB(), - p6.Zeroing(), - SVEMemOperand(x19, -32, SVE_MUL_VL)), - "ld4b {z31.b, z0.b, z1.b, z2.b}, p6/z, [x19, #-32, mul vl]"); - - COMPARE_PREFIX(ld4h(z31.VnH(), - z0.VnH(), - z1.VnH(), - z2.VnH(), - p6.Zeroing(), - SVEMemOperand(x19)), - "ld4h {z31.h, z0.h, z1.h, z2.h}, p6/z, [x19]"); - COMPARE_PREFIX(ld4h(z15.VnH(), - z16.VnH(), - z17.VnH(), - z18.VnH(), - p6.Zeroing(), - SVEMemOperand(x19, 28, SVE_MUL_VL)), - "ld4h {z15.h, z16.h, z17.h, z18.h}, p6/z, " - "[x19, #28, mul vl]"); - COMPARE_PREFIX(ld4h(z15.VnH(), - z16.VnH(), - z17.VnH(), - z18.VnH(), - p6.Zeroing(), - SVEMemOperand(x19, -32, SVE_MUL_VL)), - "ld4h {z15.h, z16.h, z17.h, z18.h}, p6/z, " - "[x19, #-32, mul vl]"); - - COMPARE_PREFIX(ld4w(z15.VnS(), - z16.VnS(), - z17.VnS(), - z18.VnS(), - p0.Zeroing(), - SVEMemOperand(x19)), - "ld4w {z15.s, z16.s, z17.s, z18.s}, p0/z, [x19]"); - COMPARE_PREFIX(ld4w(z15.VnS(), - z16.VnS(), - z17.VnS(), - z18.VnS(), - p0.Zeroing(), - SVEMemOperand(x19, 28, SVE_MUL_VL)), - "ld4w {z15.s, z16.s, z17.s, z18.s}, p0/z, " - "[x19, #28, mul vl]"); - COMPARE_PREFIX(ld4w(z0.VnS(), - z1.VnS(), - z2.VnS(), - z3.VnS(), - p0.Zeroing(), - SVEMemOperand(x19, -32, SVE_MUL_VL)), - "ld4w {z0.s, z1.s, z2.s, z3.s}, p0/z, [x19, #-32, mul vl]"); - - COMPARE_PREFIX(ld4d(z0.VnD(), - z1.VnD(), - z2.VnD(), - z3.VnD(), - p0.Zeroing(), - SVEMemOperand(x19)), - "ld4d {z0.d, z1.d, z2.d, z3.d}, p0/z, [x19]"); - COMPARE_PREFIX(ld4d(z0.VnD(), - z1.VnD(), - z2.VnD(), - z3.VnD(), - p7.Zeroing(), - SVEMemOperand(x19, 28, SVE_MUL_VL)), - "ld4d {z0.d, z1.d, z2.d, z3.d}, p7/z, [x19, #28, mul vl]"); - COMPARE_PREFIX(ld4d(z0.VnD(), - z1.VnD(), - z2.VnD(), - z3.VnD(), - p7.Zeroing(), - SVEMemOperand(x19, -32, SVE_MUL_VL)), - "ld4d {z0.d, z1.d, z2.d, z3.d}, p7/z, [x19, #-32, mul vl]"); + COMPARE(ld4b(z31.VnB(), + z0.VnB(), + z1.VnB(), + z2.VnB(), + p7.Zeroing(), + SVEMemOperand(x19)), + "ld4b {z31.b, z0.b, z1.b, z2.b}, p7/z, [x19]"); + COMPARE(ld4b(z31.VnB(), + z0.VnB(), + z1.VnB(), + z2.VnB(), + p7.Zeroing(), + SVEMemOperand(x19, 28, SVE_MUL_VL)), + "ld4b {z31.b, z0.b, z1.b, z2.b}, p7/z, [x19, #28, mul vl]"); + COMPARE(ld4b(z31.VnB(), + z0.VnB(), + z1.VnB(), + z2.VnB(), + p6.Zeroing(), + SVEMemOperand(x19, -32, SVE_MUL_VL)), + "ld4b {z31.b, z0.b, z1.b, z2.b}, p6/z, [x19, #-32, mul vl]"); + + COMPARE(ld4h(z31.VnH(), + z0.VnH(), + z1.VnH(), + z2.VnH(), + p6.Zeroing(), + SVEMemOperand(x19)), + "ld4h {z31.h, z0.h, z1.h, z2.h}, p6/z, [x19]"); + COMPARE(ld4h(z15.VnH(), + z16.VnH(), + z17.VnH(), + z18.VnH(), + p6.Zeroing(), + SVEMemOperand(x19, 28, SVE_MUL_VL)), + "ld4h {z15.h, z16.h, z17.h, z18.h}, p6/z, " + "[x19, #28, mul vl]"); + COMPARE(ld4h(z15.VnH(), + z16.VnH(), + z17.VnH(), + z18.VnH(), + p6.Zeroing(), + SVEMemOperand(x19, -32, SVE_MUL_VL)), + "ld4h {z15.h, z16.h, z17.h, z18.h}, p6/z, " + "[x19, #-32, mul vl]"); + + COMPARE(ld4w(z15.VnS(), + z16.VnS(), + z17.VnS(), + z18.VnS(), + p0.Zeroing(), + SVEMemOperand(x19)), + "ld4w {z15.s, z16.s, z17.s, z18.s}, p0/z, [x19]"); + COMPARE(ld4w(z15.VnS(), + z16.VnS(), + z17.VnS(), + z18.VnS(), + p0.Zeroing(), + SVEMemOperand(x19, 28, SVE_MUL_VL)), + "ld4w {z15.s, z16.s, z17.s, z18.s}, p0/z, " + "[x19, #28, mul vl]"); + COMPARE(ld4w(z0.VnS(), + z1.VnS(), + z2.VnS(), + z3.VnS(), + p0.Zeroing(), + SVEMemOperand(x19, -32, SVE_MUL_VL)), + "ld4w {z0.s, z1.s, z2.s, z3.s}, p0/z, [x19, #-32, mul vl]"); + + COMPARE(ld4d(z0.VnD(), + z1.VnD(), + z2.VnD(), + z3.VnD(), + p0.Zeroing(), + SVEMemOperand(x19)), + "ld4d {z0.d, z1.d, z2.d, z3.d}, p0/z, [x19]"); + COMPARE(ld4d(z0.VnD(), + z1.VnD(), + z2.VnD(), + z3.VnD(), + p7.Zeroing(), + SVEMemOperand(x19, 28, SVE_MUL_VL)), + "ld4d {z0.d, z1.d, z2.d, z3.d}, p7/z, [x19, #28, mul vl]"); + COMPARE(ld4d(z0.VnD(), + z1.VnD(), + z2.VnD(), + z3.VnD(), + p7.Zeroing(), + SVEMemOperand(x19, -32, SVE_MUL_VL)), + "ld4d {z0.d, z1.d, z2.d, z3.d}, p7/z, [x19, #-32, mul vl]"); CLEANUP(); } @@ -4163,69 +4106,60 @@ TEST(sve_ld4_scalar_plus_immediate) { TEST(sve_ld2_scalar_plus_scalar) { SETUP(); - COMPARE_PREFIX(ld2b(z25.VnB(), - z26.VnB(), - p1.Zeroing(), - SVEMemOperand(x20, x19)), - "ld2b {z25.b, z26.b}, p1/z, [x20, x19]"); - COMPARE_PREFIX(ld2b(z25.VnB(), - z26.VnB(), - p1.Zeroing(), - SVEMemOperand(sp, x19)), - "ld2b {z25.b, z26.b}, p1/z, [sp, x19]"); - COMPARE_PREFIX(ld2b(z31.VnB(), - z0.VnB(), - p1.Zeroing(), - SVEMemOperand(sp, x19)), - "ld2b {z31.b, z0.b}, p1/z, [sp, x19]"); - - COMPARE_PREFIX(ld2h(z31.VnH(), - z0.VnH(), - p1.Zeroing(), - SVEMemOperand(x20, x19, LSL, 1)), - "ld2h {z31.h, z0.h}, p1/z, [x20, x19, lsl #1]"); - COMPARE_PREFIX(ld2h(z31.VnH(), - z0.VnH(), - p7.Zeroing(), - SVEMemOperand(sp, x19, LSL, 1)), - "ld2h {z31.h, z0.h}, p7/z, [sp, x19, lsl #1]"); - COMPARE_PREFIX(ld2h(z31.VnH(), - z0.VnH(), - p7.Zeroing(), - SVEMemOperand(sp, x19, LSL, 1)), - "ld2h {z31.h, z0.h}, p7/z, [sp, x19, lsl #1]"); - - COMPARE_PREFIX(ld2w(z16.VnS(), - z17.VnS(), - p7.Zeroing(), - SVEMemOperand(x20, x19, LSL, 2)), - "ld2w {z16.s, z17.s}, p7/z, [x20, x19, lsl #2]"); - COMPARE_PREFIX(ld2w(z16.VnS(), - z17.VnS(), - p7.Zeroing(), - SVEMemOperand(sp, x19, LSL, 2)), - "ld2w {z16.s, z17.s}, p7/z, [sp, x19, lsl #2]"); - COMPARE_PREFIX(ld2w(z16.VnS(), - z17.VnS(), - p0.Zeroing(), - SVEMemOperand(sp, x19, LSL, 2)), - "ld2w {z16.s, z17.s}, p0/z, [sp, x19, lsl #2]"); - - COMPARE_PREFIX(ld2d(z16.VnD(), - z17.VnD(), - p0.Zeroing(), - SVEMemOperand(x20, x19, LSL, 3)), - "ld2d {z16.d, z17.d}, p0/z, [x20, x19, lsl #3]"); - COMPARE_PREFIX(ld2d(z25.VnD(), - z26.VnD(), - p0.Zeroing(), - SVEMemOperand(sp, x19, LSL, 3)), - "ld2d {z25.d, z26.d}, p0/z, [sp, x19, lsl #3]"); - COMPARE_PREFIX(ld2d(z25.VnD(), - z26.VnD(), - p0.Zeroing(), - SVEMemOperand(sp, x19, LSL, 3)), - "ld2d {z25.d, z26.d}, p0/z, [sp, x19, lsl #3]"); + COMPARE(ld2b(z25.VnB(), z26.VnB(), p1.Zeroing(), SVEMemOperand(x20, x19)), + "ld2b {z25.b, z26.b}, p1/z, [x20, x19]"); + COMPARE(ld2b(z25.VnB(), z26.VnB(), p1.Zeroing(), SVEMemOperand(sp, x19)), + "ld2b {z25.b, z26.b}, p1/z, [sp, x19]"); + COMPARE(ld2b(z31.VnB(), z0.VnB(), p1.Zeroing(), SVEMemOperand(sp, x19)), + "ld2b {z31.b, z0.b}, p1/z, [sp, x19]"); + + COMPARE(ld2h(z31.VnH(), + z0.VnH(), + p1.Zeroing(), + SVEMemOperand(x20, x19, LSL, 1)), + "ld2h {z31.h, z0.h}, p1/z, [x20, x19, lsl #1]"); + COMPARE(ld2h(z31.VnH(), + z0.VnH(), + p7.Zeroing(), + SVEMemOperand(sp, x19, LSL, 1)), + "ld2h {z31.h, z0.h}, p7/z, [sp, x19, lsl #1]"); + COMPARE(ld2h(z31.VnH(), + z0.VnH(), + p7.Zeroing(), + SVEMemOperand(sp, x19, LSL, 1)), + "ld2h {z31.h, z0.h}, p7/z, [sp, x19, lsl #1]"); + + COMPARE(ld2w(z16.VnS(), + z17.VnS(), + p7.Zeroing(), + SVEMemOperand(x20, x19, LSL, 2)), + "ld2w {z16.s, z17.s}, p7/z, [x20, x19, lsl #2]"); + COMPARE(ld2w(z16.VnS(), + z17.VnS(), + p7.Zeroing(), + SVEMemOperand(sp, x19, LSL, 2)), + "ld2w {z16.s, z17.s}, p7/z, [sp, x19, lsl #2]"); + COMPARE(ld2w(z16.VnS(), + z17.VnS(), + p0.Zeroing(), + SVEMemOperand(sp, x19, LSL, 2)), + "ld2w {z16.s, z17.s}, p0/z, [sp, x19, lsl #2]"); + + COMPARE(ld2d(z16.VnD(), + z17.VnD(), + p0.Zeroing(), + SVEMemOperand(x20, x19, LSL, 3)), + "ld2d {z16.d, z17.d}, p0/z, [x20, x19, lsl #3]"); + COMPARE(ld2d(z25.VnD(), + z26.VnD(), + p0.Zeroing(), + SVEMemOperand(sp, x19, LSL, 3)), + "ld2d {z25.d, z26.d}, p0/z, [sp, x19, lsl #3]"); + COMPARE(ld2d(z25.VnD(), + z26.VnD(), + p0.Zeroing(), + SVEMemOperand(sp, x19, LSL, 3)), + "ld2d {z25.d, z26.d}, p0/z, [sp, x19, lsl #3]"); CLEANUP(); } @@ -4233,81 +4167,81 @@ TEST(sve_ld2_scalar_plus_scalar) { TEST(sve_ld3_scalar_plus_scalar) { SETUP(); - COMPARE_PREFIX(ld3b(z25.VnB(), - z26.VnB(), - z27.VnB(), - p1.Zeroing(), - SVEMemOperand(x20, x19)), - "ld3b {z25.b, z26.b, z27.b}, p1/z, [x20, x19]"); - COMPARE_PREFIX(ld3b(z25.VnB(), - z26.VnB(), - z27.VnB(), - p1.Zeroing(), - SVEMemOperand(sp, x19)), - "ld3b {z25.b, z26.b, z27.b}, p1/z, [sp, x19]"); - COMPARE_PREFIX(ld3b(z30.VnB(), - z31.VnB(), - z0.VnB(), - p1.Zeroing(), - SVEMemOperand(sp, x19)), - "ld3b {z30.b, z31.b, z0.b}, p1/z, [sp, x19]"); - - COMPARE_PREFIX(ld3h(z30.VnH(), - z31.VnH(), - z0.VnH(), - p1.Zeroing(), - SVEMemOperand(x20, x19, LSL, 1)), - "ld3h {z30.h, z31.h, z0.h}, p1/z, [x20, x19, lsl #1]"); - COMPARE_PREFIX(ld3h(z30.VnH(), - z31.VnH(), - z0.VnH(), - p7.Zeroing(), - SVEMemOperand(sp, x19, LSL, 1)), - "ld3h {z30.h, z31.h, z0.h}, p7/z, [sp, x19, lsl #1]"); - COMPARE_PREFIX(ld3h(z30.VnH(), - z31.VnH(), - z0.VnH(), - p7.Zeroing(), - SVEMemOperand(sp, x19, LSL, 1)), - "ld3h {z30.h, z31.h, z0.h}, p7/z, [sp, x19, lsl #1]"); - - COMPARE_PREFIX(ld3w(z16.VnS(), - z17.VnS(), - z18.VnS(), - p7.Zeroing(), - SVEMemOperand(x20, x19, LSL, 2)), - "ld3w {z16.s, z17.s, z18.s}, p7/z, [x20, x19, lsl #2]"); - COMPARE_PREFIX(ld3w(z16.VnS(), - z17.VnS(), - z18.VnS(), - p7.Zeroing(), - SVEMemOperand(sp, x19, LSL, 2)), - "ld3w {z16.s, z17.s, z18.s}, p7/z, [sp, x19, lsl #2]"); - COMPARE_PREFIX(ld3w(z16.VnS(), - z17.VnS(), - z18.VnS(), - p0.Zeroing(), - SVEMemOperand(sp, x19, LSL, 2)), - "ld3w {z16.s, z17.s, z18.s}, p0/z, [sp, x19, lsl #2]"); - - COMPARE_PREFIX(ld3d(z16.VnD(), - z17.VnD(), - z18.VnD(), - p0.Zeroing(), - SVEMemOperand(x20, x19, LSL, 3)), - "ld3d {z16.d, z17.d, z18.d}, p0/z, [x20, x19, lsl #3]"); - COMPARE_PREFIX(ld3d(z25.VnD(), - z26.VnD(), - z27.VnD(), - p0.Zeroing(), - SVEMemOperand(sp, x19, LSL, 3)), - "ld3d {z25.d, z26.d, z27.d}, p0/z, [sp, x19, lsl #3]"); - COMPARE_PREFIX(ld3d(z25.VnD(), - z26.VnD(), - z27.VnD(), - p0.Zeroing(), - SVEMemOperand(sp, x19, LSL, 3)), - "ld3d {z25.d, z26.d, z27.d}, p0/z, [sp, x19, lsl #3]"); + COMPARE(ld3b(z25.VnB(), + z26.VnB(), + z27.VnB(), + p1.Zeroing(), + SVEMemOperand(x20, x19)), + "ld3b {z25.b, z26.b, z27.b}, p1/z, [x20, x19]"); + COMPARE(ld3b(z25.VnB(), + z26.VnB(), + z27.VnB(), + p1.Zeroing(), + SVEMemOperand(sp, x19)), + "ld3b {z25.b, z26.b, z27.b}, p1/z, [sp, x19]"); + COMPARE(ld3b(z30.VnB(), + z31.VnB(), + z0.VnB(), + p1.Zeroing(), + SVEMemOperand(sp, x19)), + "ld3b {z30.b, z31.b, z0.b}, p1/z, [sp, x19]"); + + COMPARE(ld3h(z30.VnH(), + z31.VnH(), + z0.VnH(), + p1.Zeroing(), + SVEMemOperand(x20, x19, LSL, 1)), + "ld3h {z30.h, z31.h, z0.h}, p1/z, [x20, x19, lsl #1]"); + COMPARE(ld3h(z30.VnH(), + z31.VnH(), + z0.VnH(), + p7.Zeroing(), + SVEMemOperand(sp, x19, LSL, 1)), + "ld3h {z30.h, z31.h, z0.h}, p7/z, [sp, x19, lsl #1]"); + COMPARE(ld3h(z30.VnH(), + z31.VnH(), + z0.VnH(), + p7.Zeroing(), + SVEMemOperand(sp, x19, LSL, 1)), + "ld3h {z30.h, z31.h, z0.h}, p7/z, [sp, x19, lsl #1]"); + + COMPARE(ld3w(z16.VnS(), + z17.VnS(), + z18.VnS(), + p7.Zeroing(), + SVEMemOperand(x20, x19, LSL, 2)), + "ld3w {z16.s, z17.s, z18.s}, p7/z, [x20, x19, lsl #2]"); + COMPARE(ld3w(z16.VnS(), + z17.VnS(), + z18.VnS(), + p7.Zeroing(), + SVEMemOperand(sp, x19, LSL, 2)), + "ld3w {z16.s, z17.s, z18.s}, p7/z, [sp, x19, lsl #2]"); + COMPARE(ld3w(z16.VnS(), + z17.VnS(), + z18.VnS(), + p0.Zeroing(), + SVEMemOperand(sp, x19, LSL, 2)), + "ld3w {z16.s, z17.s, z18.s}, p0/z, [sp, x19, lsl #2]"); + + COMPARE(ld3d(z16.VnD(), + z17.VnD(), + z18.VnD(), + p0.Zeroing(), + SVEMemOperand(x20, x19, LSL, 3)), + "ld3d {z16.d, z17.d, z18.d}, p0/z, [x20, x19, lsl #3]"); + COMPARE(ld3d(z25.VnD(), + z26.VnD(), + z27.VnD(), + p0.Zeroing(), + SVEMemOperand(sp, x19, LSL, 3)), + "ld3d {z25.d, z26.d, z27.d}, p0/z, [sp, x19, lsl #3]"); + COMPARE(ld3d(z25.VnD(), + z26.VnD(), + z27.VnD(), + p0.Zeroing(), + SVEMemOperand(sp, x19, LSL, 3)), + "ld3d {z25.d, z26.d, z27.d}, p0/z, [sp, x19, lsl #3]"); CLEANUP(); } @@ -4315,98 +4249,98 @@ TEST(sve_ld3_scalar_plus_scalar) { TEST(sve_ld4_scalar_plus_scalar) { SETUP(); - COMPARE_PREFIX(ld4b(z25.VnB(), - z26.VnB(), - z27.VnB(), - z28.VnB(), - p0.Zeroing(), - SVEMemOperand(x20, x19)), - "ld4b {z25.b, z26.b, z27.b, z28.b}, p0/z, [x20, x19]"); - COMPARE_PREFIX(ld4b(z25.VnB(), - z26.VnB(), - z27.VnB(), - z28.VnB(), - p1.Zeroing(), - SVEMemOperand(sp, x19)), - "ld4b {z25.b, z26.b, z27.b, z28.b}, p1/z, [sp, x19]"); - COMPARE_PREFIX(ld4b(z25.VnB(), - z26.VnB(), - z27.VnB(), - z28.VnB(), - p1.Zeroing(), - SVEMemOperand(sp, x19)), - "ld4b {z25.b, z26.b, z27.b, z28.b}, p1/z, [sp, x19]"); - - COMPARE_PREFIX(ld4h(z31.VnH(), - z0.VnH(), - z1.VnH(), - z2.VnH(), - p1.Zeroing(), - SVEMemOperand(x20, x19, LSL, 1)), - "ld4h {z31.h, z0.h, z1.h, z2.h}, p1/z, [x20, x19, lsl #1]"); - COMPARE_PREFIX(ld4h(z31.VnH(), - z0.VnH(), - z1.VnH(), - z2.VnH(), - p1.Zeroing(), - SVEMemOperand(sp, x19, LSL, 1)), - "ld4h {z31.h, z0.h, z1.h, z2.h}, p1/z, [sp, x19, lsl #1]"); - COMPARE_PREFIX(ld4h(z31.VnH(), - z0.VnH(), - z1.VnH(), - z2.VnH(), - p7.Zeroing(), - SVEMemOperand(sp, x19, LSL, 1)), - "ld4h {z31.h, z0.h, z1.h, z2.h}, p7/z, [sp, x19, lsl #1]"); - - COMPARE_PREFIX(ld4w(z31.VnS(), - z0.VnS(), - z1.VnS(), - z2.VnS(), - p7.Zeroing(), - SVEMemOperand(x20, x19, LSL, 2)), - "ld4w {z31.s, z0.s, z1.s, z2.s}, p7/z, [x20, x19, lsl #2]"); - COMPARE_PREFIX(ld4w(z16.VnS(), - z17.VnS(), - z18.VnS(), - z19.VnS(), - p7.Zeroing(), - SVEMemOperand(sp, x19, LSL, 2)), - "ld4w {z16.s, z17.s, z18.s, z19.s}, p7/z, " - "[sp, x19, lsl #2]"); - COMPARE_PREFIX(ld4w(z16.VnS(), - z17.VnS(), - z18.VnS(), - z19.VnS(), - p7.Zeroing(), - SVEMemOperand(sp, x19, LSL, 2)), - "ld4w {z16.s, z17.s, z18.s, z19.s}, p7/z, " - "[sp, x19, lsl #2]"); - - COMPARE_PREFIX(ld4d(z16.VnD(), - z17.VnD(), - z18.VnD(), - z19.VnD(), - p0.Zeroing(), - SVEMemOperand(x20, x19, LSL, 3)), - "ld4d {z16.d, z17.d, z18.d, z19.d}, p0/z, " - "[x20, x19, lsl #3]"); - COMPARE_PREFIX(ld4d(z16.VnD(), - z17.VnD(), - z18.VnD(), - z19.VnD(), - p0.Zeroing(), - SVEMemOperand(sp, x19, LSL, 3)), - "ld4d {z16.d, z17.d, z18.d, z19.d}, p0/z, " - "[sp, x19, lsl #3]"); - COMPARE_PREFIX(ld4d(z25.VnD(), - z26.VnD(), - z27.VnD(), - z28.VnD(), - p0.Zeroing(), - SVEMemOperand(sp, x19, LSL, 3)), - "ld4d {z25.d, z26.d, z27.d, z28.d}, p0/z, " - "[sp, x19, lsl #3]"); + COMPARE(ld4b(z25.VnB(), + z26.VnB(), + z27.VnB(), + z28.VnB(), + p0.Zeroing(), + SVEMemOperand(x20, x19)), + "ld4b {z25.b, z26.b, z27.b, z28.b}, p0/z, [x20, x19]"); + COMPARE(ld4b(z25.VnB(), + z26.VnB(), + z27.VnB(), + z28.VnB(), + p1.Zeroing(), + SVEMemOperand(sp, x19)), + "ld4b {z25.b, z26.b, z27.b, z28.b}, p1/z, [sp, x19]"); + COMPARE(ld4b(z25.VnB(), + z26.VnB(), + z27.VnB(), + z28.VnB(), + p1.Zeroing(), + SVEMemOperand(sp, x19)), + "ld4b {z25.b, z26.b, z27.b, z28.b}, p1/z, [sp, x19]"); + + COMPARE(ld4h(z31.VnH(), + z0.VnH(), + z1.VnH(), + z2.VnH(), + p1.Zeroing(), + SVEMemOperand(x20, x19, LSL, 1)), + "ld4h {z31.h, z0.h, z1.h, z2.h}, p1/z, [x20, x19, lsl #1]"); + COMPARE(ld4h(z31.VnH(), + z0.VnH(), + z1.VnH(), + z2.VnH(), + p1.Zeroing(), + SVEMemOperand(sp, x19, LSL, 1)), + "ld4h {z31.h, z0.h, z1.h, z2.h}, p1/z, [sp, x19, lsl #1]"); + COMPARE(ld4h(z31.VnH(), + z0.VnH(), + z1.VnH(), + z2.VnH(), + p7.Zeroing(), + SVEMemOperand(sp, x19, LSL, 1)), + "ld4h {z31.h, z0.h, z1.h, z2.h}, p7/z, [sp, x19, lsl #1]"); + + COMPARE(ld4w(z31.VnS(), + z0.VnS(), + z1.VnS(), + z2.VnS(), + p7.Zeroing(), + SVEMemOperand(x20, x19, LSL, 2)), + "ld4w {z31.s, z0.s, z1.s, z2.s}, p7/z, [x20, x19, lsl #2]"); + COMPARE(ld4w(z16.VnS(), + z17.VnS(), + z18.VnS(), + z19.VnS(), + p7.Zeroing(), + SVEMemOperand(sp, x19, LSL, 2)), + "ld4w {z16.s, z17.s, z18.s, z19.s}, p7/z, " + "[sp, x19, lsl #2]"); + COMPARE(ld4w(z16.VnS(), + z17.VnS(), + z18.VnS(), + z19.VnS(), + p7.Zeroing(), + SVEMemOperand(sp, x19, LSL, 2)), + "ld4w {z16.s, z17.s, z18.s, z19.s}, p7/z, " + "[sp, x19, lsl #2]"); + + COMPARE(ld4d(z16.VnD(), + z17.VnD(), + z18.VnD(), + z19.VnD(), + p0.Zeroing(), + SVEMemOperand(x20, x19, LSL, 3)), + "ld4d {z16.d, z17.d, z18.d, z19.d}, p0/z, " + "[x20, x19, lsl #3]"); + COMPARE(ld4d(z16.VnD(), + z17.VnD(), + z18.VnD(), + z19.VnD(), + p0.Zeroing(), + SVEMemOperand(sp, x19, LSL, 3)), + "ld4d {z16.d, z17.d, z18.d, z19.d}, p0/z, " + "[sp, x19, lsl #3]"); + COMPARE(ld4d(z25.VnD(), + z26.VnD(), + z27.VnD(), + z28.VnD(), + p0.Zeroing(), + SVEMemOperand(sp, x19, LSL, 3)), + "ld4d {z25.d, z26.d, z27.d, z28.d}, p0/z, " + "[sp, x19, lsl #3]"); CLEANUP(); } @@ -4414,47 +4348,39 @@ TEST(sve_ld4_scalar_plus_scalar) { TEST(sve_ff_contiguous) { SETUP(); - COMPARE_PREFIX(ldff1b(z24.VnB(), p1.Zeroing(), SVEMemOperand(x21)), - "ldff1b {z24.b}, p1/z, [x21]"); - COMPARE_PREFIX(ldff1b(z22.VnH(), p5.Zeroing(), SVEMemOperand(x5, x28)), - "ldff1b {z22.h}, p5/z, [x5, x28]"); - COMPARE_PREFIX(ldff1b(z2.VnS(), p5.Zeroing(), SVEMemOperand(sp, x11)), - "ldff1b {z2.s}, p5/z, [sp, x11]"); - COMPARE_PREFIX(ldff1b(z12.VnD(), p3.Zeroing(), SVEMemOperand(x26, xzr)), - "ldff1b {z12.d}, p3/z, [x26]"); - COMPARE_PREFIX(ldff1h(z21.VnH(), p3.Zeroing(), SVEMemOperand(x27)), - "ldff1h {z21.h}, p3/z, [x27]"); - COMPARE_PREFIX(ldff1h(z11.VnS(), - p6.Zeroing(), - SVEMemOperand(sp, x15, LSL, 1)), - "ldff1h {z11.s}, p6/z, [sp, x15, lsl #1]"); - COMPARE_PREFIX(ldff1h(z6.VnD(), p7.Zeroing(), SVEMemOperand(x8, xzr, LSL, 1)), - "ldff1h {z6.d}, p7/z, [x8]"); - COMPARE_PREFIX(ldff1w(z11.VnS(), p7.Zeroing(), SVEMemOperand(sp)), - "ldff1w {z11.s}, p7/z, [sp]"); - COMPARE_PREFIX(ldff1w(z6.VnD(), p6.Zeroing(), SVEMemOperand(x5, x0, LSL, 2)), - "ldff1w {z6.d}, p6/z, [x5, x0, lsl #2]"); - COMPARE_PREFIX(ldff1d(z0.VnD(), p3.Zeroing(), SVEMemOperand(x15, x1, LSL, 3)), - "ldff1d {z0.d}, p3/z, [x15, x1, lsl #3]"); - - COMPARE_PREFIX(ldff1sb(z31.VnH(), p4.Zeroing(), SVEMemOperand(x10, x25)), - "ldff1sb {z31.h}, p4/z, [x10, x25]"); - COMPARE_PREFIX(ldff1sb(z25.VnS(), p7.Zeroing(), SVEMemOperand(sp, x20)), - "ldff1sb {z25.s}, p7/z, [sp, x20]"); - COMPARE_PREFIX(ldff1sb(z20.VnD(), p3.Zeroing(), SVEMemOperand(x19, xzr)), - "ldff1sb {z20.d}, p3/z, [x19]"); - COMPARE_PREFIX(ldff1sh(z18.VnS(), - p3.Zeroing(), - SVEMemOperand(sp, x0, LSL, 1)), - "ldff1sh {z18.s}, p3/z, [sp, x0, lsl #1]"); - COMPARE_PREFIX(ldff1sh(z30.VnD(), - p1.Zeroing(), - SVEMemOperand(x28, xzr, LSL, 1)), - "ldff1sh {z30.d}, p1/z, [x28]"); - COMPARE_PREFIX(ldff1sw(z3.VnD(), - p4.Zeroing(), - SVEMemOperand(x22, x18, LSL, 2)), - "ldff1sw {z3.d}, p4/z, [x22, x18, lsl #2]"); + COMPARE(ldff1b(z24.VnB(), p1.Zeroing(), SVEMemOperand(x21)), + "ldff1b {z24.b}, p1/z, [x21]"); + COMPARE(ldff1b(z22.VnH(), p5.Zeroing(), SVEMemOperand(x5, x28)), + "ldff1b {z22.h}, p5/z, [x5, x28]"); + COMPARE(ldff1b(z2.VnS(), p5.Zeroing(), SVEMemOperand(sp, x11)), + "ldff1b {z2.s}, p5/z, [sp, x11]"); + COMPARE(ldff1b(z12.VnD(), p3.Zeroing(), SVEMemOperand(x26, xzr)), + "ldff1b {z12.d}, p3/z, [x26]"); + COMPARE(ldff1h(z21.VnH(), p3.Zeroing(), SVEMemOperand(x27)), + "ldff1h {z21.h}, p3/z, [x27]"); + COMPARE(ldff1h(z11.VnS(), p6.Zeroing(), SVEMemOperand(sp, x15, LSL, 1)), + "ldff1h {z11.s}, p6/z, [sp, x15, lsl #1]"); + COMPARE(ldff1h(z6.VnD(), p7.Zeroing(), SVEMemOperand(x8, xzr, LSL, 1)), + "ldff1h {z6.d}, p7/z, [x8]"); + COMPARE(ldff1w(z11.VnS(), p7.Zeroing(), SVEMemOperand(sp)), + "ldff1w {z11.s}, p7/z, [sp]"); + COMPARE(ldff1w(z6.VnD(), p6.Zeroing(), SVEMemOperand(x5, x0, LSL, 2)), + "ldff1w {z6.d}, p6/z, [x5, x0, lsl #2]"); + COMPARE(ldff1d(z0.VnD(), p3.Zeroing(), SVEMemOperand(x15, x1, LSL, 3)), + "ldff1d {z0.d}, p3/z, [x15, x1, lsl #3]"); + + COMPARE(ldff1sb(z31.VnH(), p4.Zeroing(), SVEMemOperand(x10, x25)), + "ldff1sb {z31.h}, p4/z, [x10, x25]"); + COMPARE(ldff1sb(z25.VnS(), p7.Zeroing(), SVEMemOperand(sp, x20)), + "ldff1sb {z25.s}, p7/z, [sp, x20]"); + COMPARE(ldff1sb(z20.VnD(), p3.Zeroing(), SVEMemOperand(x19, xzr)), + "ldff1sb {z20.d}, p3/z, [x19]"); + COMPARE(ldff1sh(z18.VnS(), p3.Zeroing(), SVEMemOperand(sp, x0, LSL, 1)), + "ldff1sh {z18.s}, p3/z, [sp, x0, lsl #1]"); + COMPARE(ldff1sh(z30.VnD(), p1.Zeroing(), SVEMemOperand(x28, xzr, LSL, 1)), + "ldff1sh {z30.d}, p1/z, [x28]"); + COMPARE(ldff1sw(z3.VnD(), p4.Zeroing(), SVEMemOperand(x22, x18, LSL, 2)), + "ldff1sw {z3.d}, p4/z, [x22, x18, lsl #2]"); CLEANUP(); } @@ -4462,26 +4388,37 @@ TEST(sve_ff_contiguous) { TEST(sve_mem_contiguous_load) { SETUP(); - COMPARE_PREFIX(ld1rqb(z3.VnB(), p2.Zeroing(), SVEMemOperand(x22, x18)), - "ld1rqb {z3.b}, p2/z, [x22, x18]"); - COMPARE_PREFIX(ld1rqd(z6.VnD(), p0.Zeroing(), SVEMemOperand(x18, x9, LSL, 3)), - "ld1rqd {z6.d}, p0/z, [x18, x9, lsl #3]"); - COMPARE_PREFIX(ld1rqh(z1.VnH(), p7.Zeroing(), SVEMemOperand(x9, x6, LSL, 1)), - "ld1rqh {z1.h}, p7/z, [x9, x6, lsl #1]"); - COMPARE_PREFIX(ld1rqw(z12.VnS(), - p4.Zeroing(), - SVEMemOperand(sp, xzr, LSL, 2)), - "ld1rqw {z12.s}, p4/z, [sp, xzr, lsl #2]"); - COMPARE_PREFIX(ld1rqb(z18.VnB(), p2.Zeroing(), SVEMemOperand(x18, 0)), - "ld1rqb {z18.b}, p2/z, [x18]"); - COMPARE_PREFIX(ld1rqb(z18.VnB(), p2.Zeroing(), SVEMemOperand(x18, 16)), - "ld1rqb {z18.b}, p2/z, [x18, #16]"); - COMPARE_PREFIX(ld1rqd(z11.VnD(), p1.Zeroing(), SVEMemOperand(x23, -16)), - "ld1rqd {z11.d}, p1/z, [x23, #-16]"); - COMPARE_PREFIX(ld1rqh(z11.VnH(), p1.Zeroing(), SVEMemOperand(x0, 112)), - "ld1rqh {z11.h}, p1/z, [x0, #112]"); - COMPARE_PREFIX(ld1rqw(z22.VnS(), p3.Zeroing(), SVEMemOperand(sp, -128)), - "ld1rqw {z22.s}, p3/z, [sp, #-128]"); + COMPARE(ld1rqb(z3.VnB(), p2.Zeroing(), SVEMemOperand(x22, x18)), + "ld1rqb {z3.b}, p2/z, [x22, x18]"); + COMPARE(ld1rqd(z6.VnD(), p0.Zeroing(), SVEMemOperand(x18, x9, LSL, 3)), + "ld1rqd {z6.d}, p0/z, [x18, x9, lsl #3]"); + COMPARE(ld1rqh(z1.VnH(), p7.Zeroing(), SVEMemOperand(x9, x6, LSL, 1)), + "ld1rqh {z1.h}, p7/z, [x9, x6, lsl #1]"); + COMPARE(ld1rqw(z12.VnS(), p4.Zeroing(), SVEMemOperand(sp, xzr, LSL, 2)), + "ld1rqw {z12.s}, p4/z, [sp, xzr, lsl #2]"); + COMPARE(ld1rqb(z18.VnB(), p2.Zeroing(), SVEMemOperand(x18, 0)), + "ld1rqb {z18.b}, p2/z, [x18]"); + COMPARE(ld1rqb(z18.VnB(), p2.Zeroing(), SVEMemOperand(x18, 16)), + "ld1rqb {z18.b}, p2/z, [x18, #16]"); + COMPARE(ld1rqd(z11.VnD(), p1.Zeroing(), SVEMemOperand(x23, -16)), + "ld1rqd {z11.d}, p1/z, [x23, #-16]"); + COMPARE(ld1rqh(z11.VnH(), p1.Zeroing(), SVEMemOperand(x0, 112)), + "ld1rqh {z11.h}, p1/z, [x0, #112]"); + COMPARE(ld1rqw(z22.VnS(), p3.Zeroing(), SVEMemOperand(sp, -128)), + "ld1rqw {z22.s}, p3/z, [sp, #-128]"); + + COMPARE_MACRO(Ld1rqb(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0, x1)), + "ld1rqb {z0.b}, p0/z, [x0, x1]"); + COMPARE_MACRO(Ld1rqh(z0.VnH(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 1)), + "ld1rqh {z0.h}, p0/z, [x0, x1, lsl #1]"); + COMPARE_MACRO(Ld1rqw(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 2)), + "ld1rqw {z0.s}, p0/z, [x0, x1, lsl #2]"); + COMPARE_MACRO(Ld1rqd(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)), + "ld1rqd {z0.d}, p0/z, [x0, x1, lsl #3]"); + COMPARE_MACRO(Ld1rqh(z11.VnH(), p1.Zeroing(), SVEMemOperand(x0, 112)), + "ld1rqh {z11.h}, p1/z, [x0, #112]"); + COMPARE_MACRO(Ld1rqw(z22.VnS(), p3.Zeroing(), SVEMemOperand(sp, -128)), + "ld1rqw {z22.s}, p3/z, [sp, #-128]"); COMPARE_MACRO(Ld1rqb(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0, 2222)), "add x16, x0, #0x8ae (2222)\n" @@ -4493,43 +4430,45 @@ TEST(sve_mem_contiguous_load) { "add x16, x0, x1, lsl #1\n" "ld1rqd {z0.d}, p0/z, [x16]"); - COMPARE_PREFIX(ldnt1b(z21.VnB(), p5.Zeroing(), SVEMemOperand(x1, x23)), - "ldnt1b {z21.b}, p5/z, [x1, x23]"); - COMPARE_PREFIX(ldnt1d(z10.VnD(), - p0.Zeroing(), - SVEMemOperand(x23, x6, LSL, 3)), - "ldnt1d {z10.d}, p0/z, [x23, x6, lsl #3]"); - COMPARE_PREFIX(ldnt1h(z30.VnH(), - p4.Zeroing(), - SVEMemOperand(x6, x11, LSL, 1)), - "ldnt1h {z30.h}, p4/z, [x6, x11, lsl #1]"); - COMPARE_PREFIX(ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(x11, x1, LSL, 2)), - "ldnt1w {z0.s}, p4/z, [x11, x1, lsl #2]"); - COMPARE_PREFIX(ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(sp, xzr, LSL, 2)), - "ldnt1w {z0.s}, p4/z, [sp, xzr, lsl #2]"); - - COMPARE_PREFIX(ldnt1b(z1.VnB(), p3.Zeroing(), SVEMemOperand(x11)), - "ldnt1b {z1.b}, p3/z, [x11]"); - COMPARE_PREFIX(ldnt1b(z2.VnB(), - p2.Zeroing(), - SVEMemOperand(x12, -8, SVE_MUL_VL)), - "ldnt1b {z2.b}, p2/z, [x12, #-8, mul vl]"); - COMPARE_PREFIX(ldnt1d(z2.VnD(), - p7.Zeroing(), - SVEMemOperand(x13, -2, SVE_MUL_VL)), - "ldnt1d {z2.d}, p7/z, [x13, #-2, mul vl]"); - COMPARE_PREFIX(ldnt1h(z26.VnH(), - p4.Zeroing(), - SVEMemOperand(x16, 3, SVE_MUL_VL)), - "ldnt1h {z26.h}, p4/z, [x16, #3, mul vl]"); - COMPARE_PREFIX(ldnt1w(z17.VnS(), - p4.Zeroing(), - SVEMemOperand(x15, 7, SVE_MUL_VL)), - "ldnt1w {z17.s}, p4/z, [x15, #7, mul vl]"); - COMPARE_PREFIX(ldnt1w(z17.VnS(), - p4.Zeroing(), - SVEMemOperand(sp, 7, SVE_MUL_VL)), - "ldnt1w {z17.s}, p4/z, [sp, #7, mul vl]"); + COMPARE(ldnt1b(z21.VnB(), p5.Zeroing(), SVEMemOperand(x1, x23)), + "ldnt1b {z21.b}, p5/z, [x1, x23]"); + COMPARE(ldnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(x23, x6, LSL, 3)), + "ldnt1d {z10.d}, p0/z, [x23, x6, lsl #3]"); + COMPARE(ldnt1h(z30.VnH(), p4.Zeroing(), SVEMemOperand(x6, x11, LSL, 1)), + "ldnt1h {z30.h}, p4/z, [x6, x11, lsl #1]"); + COMPARE(ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(x11, x1, LSL, 2)), + "ldnt1w {z0.s}, p4/z, [x11, x1, lsl #2]"); + COMPARE(ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(sp, xzr, LSL, 2)), + "ldnt1w {z0.s}, p4/z, [sp, xzr, lsl #2]"); + + COMPARE_MACRO(Ldnt1b(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0, x1)), + "ldnt1b {z0.b}, p0/z, [x0, x1]"); + COMPARE_MACRO(Ldnt1h(z0.VnH(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 1)), + "ldnt1h {z0.h}, p0/z, [x0, x1, lsl #1]"); + COMPARE_MACRO(Ldnt1w(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 2)), + "ldnt1w {z0.s}, p0/z, [x0, x1, lsl #2]"); + COMPARE_MACRO(Ldnt1d(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)), + "ldnt1d {z0.d}, p0/z, [x0, x1, lsl #3]"); + + COMPARE_MACRO(Ldnt1w(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)), + "add x16, x0, x1, lsl #3\n" + "ldnt1w {z0.s}, p0/z, [x16]"); + COMPARE_MACRO(Ldnt1d(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 4)), + "add x16, x0, x1, lsl #4\n" + "ldnt1d {z0.d}, p0/z, [x16]"); + + COMPARE(ldnt1b(z1.VnB(), p3.Zeroing(), SVEMemOperand(x11)), + "ldnt1b {z1.b}, p3/z, [x11]"); + COMPARE(ldnt1b(z2.VnB(), p2.Zeroing(), SVEMemOperand(x12, -8, SVE_MUL_VL)), + "ldnt1b {z2.b}, p2/z, [x12, #-8, mul vl]"); + COMPARE(ldnt1d(z2.VnD(), p7.Zeroing(), SVEMemOperand(x13, -2, SVE_MUL_VL)), + "ldnt1d {z2.d}, p7/z, [x13, #-2, mul vl]"); + COMPARE(ldnt1h(z26.VnH(), p4.Zeroing(), SVEMemOperand(x16, 3, SVE_MUL_VL)), + "ldnt1h {z26.h}, p4/z, [x16, #3, mul vl]"); + COMPARE(ldnt1w(z17.VnS(), p4.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)), + "ldnt1w {z17.s}, p4/z, [x15, #7, mul vl]"); + COMPARE(ldnt1w(z17.VnS(), p4.Zeroing(), SVEMemOperand(sp, 7, SVE_MUL_VL)), + "ldnt1w {z17.s}, p4/z, [sp, #7, mul vl]"); COMPARE_MACRO(Ldnt1b(z2.VnB(), p0.Zeroing(), @@ -4554,68 +4493,38 @@ TEST(sve_mem_contiguous_load) { "add x16, x13, #0x3 (3)\n" "ldnt1d {z5.d}, p3/z, [x16]"); - COMPARE_PREFIX(ldnf1b(z1.VnH(), - p0.Zeroing(), - SVEMemOperand(x25, -8, SVE_MUL_VL)), - "ldnf1b {z1.h}, p0/z, [x25, #-8, mul vl]"); - COMPARE_PREFIX(ldnf1b(z0.VnS(), - p0.Zeroing(), - SVEMemOperand(x2, 7, SVE_MUL_VL)), - "ldnf1b {z0.s}, p0/z, [x2, #7, mul vl]"); - COMPARE_PREFIX(ldnf1b(z31.VnD(), - p6.Zeroing(), - SVEMemOperand(x0, -7, SVE_MUL_VL)), - "ldnf1b {z31.d}, p6/z, [x0, #-7, mul vl]"); - COMPARE_PREFIX(ldnf1b(z25.VnB(), - p1.Zeroing(), - SVEMemOperand(x5, 6, SVE_MUL_VL)), - "ldnf1b {z25.b}, p1/z, [x5, #6, mul vl]"); - COMPARE_PREFIX(ldnf1d(z25.VnD(), - p0.Zeroing(), - SVEMemOperand(x11, -6, SVE_MUL_VL)), - "ldnf1d {z25.d}, p0/z, [x11, #-6, mul vl]"); - COMPARE_PREFIX(ldnf1h(z22.VnH(), - p4.Zeroing(), - SVEMemOperand(x7, 5, SVE_MUL_VL)), - "ldnf1h {z22.h}, p4/z, [x7, #5, mul vl]"); - COMPARE_PREFIX(ldnf1h(z7.VnS(), - p2.Zeroing(), - SVEMemOperand(x1, -5, SVE_MUL_VL)), - "ldnf1h {z7.s}, p2/z, [x1, #-5, mul vl]"); - COMPARE_PREFIX(ldnf1h(z5.VnD(), - p3.Zeroing(), - SVEMemOperand(x29, 4, SVE_MUL_VL)), - "ldnf1h {z5.d}, p3/z, [x29, #4, mul vl]"); - COMPARE_PREFIX(ldnf1sb(z12.VnH(), - p5.Zeroing(), - SVEMemOperand(x27, -4, SVE_MUL_VL)), - "ldnf1sb {z12.h}, p5/z, [x27, #-4, mul vl]"); - COMPARE_PREFIX(ldnf1sb(z10.VnS(), - p2.Zeroing(), - SVEMemOperand(x13, 3, SVE_MUL_VL)), - "ldnf1sb {z10.s}, p2/z, [x13, #3, mul vl]"); - COMPARE_PREFIX(ldnf1sb(z25.VnD(), - p6.Zeroing(), - SVEMemOperand(x26, -3, SVE_MUL_VL)), - "ldnf1sb {z25.d}, p6/z, [x26, #-3, mul vl]"); - COMPARE_PREFIX(ldnf1sh(z3.VnS(), - p5.Zeroing(), - SVEMemOperand(x1, 2, SVE_MUL_VL)), - "ldnf1sh {z3.s}, p5/z, [x1, #2, mul vl]"); - COMPARE_PREFIX(ldnf1sh(z8.VnD(), - p6.Zeroing(), - SVEMemOperand(x13, -2, SVE_MUL_VL)), - "ldnf1sh {z8.d}, p6/z, [x13, #-2, mul vl]"); - COMPARE_PREFIX(ldnf1sw(z5.VnD(), - p6.Zeroing(), - SVEMemOperand(x2, 1, SVE_MUL_VL)), - "ldnf1sw {z5.d}, p6/z, [x2, #1, mul vl]"); - COMPARE_PREFIX(ldnf1w(z11.VnS(), - p3.Zeroing(), - SVEMemOperand(sp, -1, SVE_MUL_VL)), - "ldnf1w {z11.s}, p3/z, [sp, #-1, mul vl]"); - COMPARE_PREFIX(ldnf1w(z10.VnD(), p6.Zeroing(), SVEMemOperand(x12)), - "ldnf1w {z10.d}, p6/z, [x12]"); + COMPARE(ldnf1b(z1.VnH(), p0.Zeroing(), SVEMemOperand(x25, -8, SVE_MUL_VL)), + "ldnf1b {z1.h}, p0/z, [x25, #-8, mul vl]"); + COMPARE(ldnf1b(z0.VnS(), p0.Zeroing(), SVEMemOperand(x2, 7, SVE_MUL_VL)), + "ldnf1b {z0.s}, p0/z, [x2, #7, mul vl]"); + COMPARE(ldnf1b(z31.VnD(), p6.Zeroing(), SVEMemOperand(x0, -7, SVE_MUL_VL)), + "ldnf1b {z31.d}, p6/z, [x0, #-7, mul vl]"); + COMPARE(ldnf1b(z25.VnB(), p1.Zeroing(), SVEMemOperand(x5, 6, SVE_MUL_VL)), + "ldnf1b {z25.b}, p1/z, [x5, #6, mul vl]"); + COMPARE(ldnf1d(z25.VnD(), p0.Zeroing(), SVEMemOperand(x11, -6, SVE_MUL_VL)), + "ldnf1d {z25.d}, p0/z, [x11, #-6, mul vl]"); + COMPARE(ldnf1h(z22.VnH(), p4.Zeroing(), SVEMemOperand(x7, 5, SVE_MUL_VL)), + "ldnf1h {z22.h}, p4/z, [x7, #5, mul vl]"); + COMPARE(ldnf1h(z7.VnS(), p2.Zeroing(), SVEMemOperand(x1, -5, SVE_MUL_VL)), + "ldnf1h {z7.s}, p2/z, [x1, #-5, mul vl]"); + COMPARE(ldnf1h(z5.VnD(), p3.Zeroing(), SVEMemOperand(x29, 4, SVE_MUL_VL)), + "ldnf1h {z5.d}, p3/z, [x29, #4, mul vl]"); + COMPARE(ldnf1sb(z12.VnH(), p5.Zeroing(), SVEMemOperand(x27, -4, SVE_MUL_VL)), + "ldnf1sb {z12.h}, p5/z, [x27, #-4, mul vl]"); + COMPARE(ldnf1sb(z10.VnS(), p2.Zeroing(), SVEMemOperand(x13, 3, SVE_MUL_VL)), + "ldnf1sb {z10.s}, p2/z, [x13, #3, mul vl]"); + COMPARE(ldnf1sb(z25.VnD(), p6.Zeroing(), SVEMemOperand(x26, -3, SVE_MUL_VL)), + "ldnf1sb {z25.d}, p6/z, [x26, #-3, mul vl]"); + COMPARE(ldnf1sh(z3.VnS(), p5.Zeroing(), SVEMemOperand(x1, 2, SVE_MUL_VL)), + "ldnf1sh {z3.s}, p5/z, [x1, #2, mul vl]"); + COMPARE(ldnf1sh(z8.VnD(), p6.Zeroing(), SVEMemOperand(x13, -2, SVE_MUL_VL)), + "ldnf1sh {z8.d}, p6/z, [x13, #-2, mul vl]"); + COMPARE(ldnf1sw(z5.VnD(), p6.Zeroing(), SVEMemOperand(x2, 1, SVE_MUL_VL)), + "ldnf1sw {z5.d}, p6/z, [x2, #1, mul vl]"); + COMPARE(ldnf1w(z11.VnS(), p3.Zeroing(), SVEMemOperand(sp, -1, SVE_MUL_VL)), + "ldnf1w {z11.s}, p3/z, [sp, #-1, mul vl]"); + COMPARE(ldnf1w(z10.VnD(), p6.Zeroing(), SVEMemOperand(x12)), + "ldnf1w {z10.d}, p6/z, [x12]"); CLEANUP(); } @@ -4623,43 +4532,29 @@ TEST(sve_mem_contiguous_load) { TEST(sve_mem_contiguous_store) { SETUP(); - COMPARE_PREFIX(stnt1b(z21.VnB(), p5.Zeroing(), SVEMemOperand(x1, x23)), - "stnt1b {z21.b}, p5, [x1, x23]"); - COMPARE_PREFIX(stnt1d(z10.VnD(), - p0.Zeroing(), - SVEMemOperand(x23, x6, LSL, 3)), - "stnt1d {z10.d}, p0, [x23, x6, lsl #3]"); - COMPARE_PREFIX(stnt1h(z30.VnH(), - p4.Zeroing(), - SVEMemOperand(x6, x11, LSL, 1)), - "stnt1h {z30.h}, p4, [x6, x11, lsl #1]"); - COMPARE_PREFIX(stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(x11, x1, LSL, 2)), - "stnt1w {z0.s}, p4, [x11, x1, lsl #2]"); - COMPARE_PREFIX(stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(sp, xzr, LSL, 2)), - "stnt1w {z0.s}, p4, [sp, xzr, lsl #2]"); - - COMPARE_PREFIX(stnt1b(z1.VnB(), p3.Zeroing(), SVEMemOperand(x11)), - "stnt1b {z1.b}, p3, [x11]"); - COMPARE_PREFIX(stnt1b(z2.VnB(), - p2.Zeroing(), - SVEMemOperand(x12, -8, SVE_MUL_VL)), - "stnt1b {z2.b}, p2, [x12, #-8, mul vl]"); - COMPARE_PREFIX(stnt1d(z2.VnD(), - p7.Zeroing(), - SVEMemOperand(x13, -2, SVE_MUL_VL)), - "stnt1d {z2.d}, p7, [x13, #-2, mul vl]"); - COMPARE_PREFIX(stnt1h(z26.VnH(), - p4.Zeroing(), - SVEMemOperand(x16, 3, SVE_MUL_VL)), - "stnt1h {z26.h}, p4, [x16, #3, mul vl]"); - COMPARE_PREFIX(stnt1w(z17.VnS(), - p4.Zeroing(), - SVEMemOperand(x15, 7, SVE_MUL_VL)), - "stnt1w {z17.s}, p4, [x15, #7, mul vl]"); - COMPARE_PREFIX(stnt1w(z17.VnS(), - p4.Zeroing(), - SVEMemOperand(sp, 7, SVE_MUL_VL)), - "stnt1w {z17.s}, p4, [sp, #7, mul vl]"); + COMPARE(stnt1b(z21.VnB(), p5.Zeroing(), SVEMemOperand(x1, x23)), + "stnt1b {z21.b}, p5, [x1, x23]"); + COMPARE(stnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(x23, x6, LSL, 3)), + "stnt1d {z10.d}, p0, [x23, x6, lsl #3]"); + COMPARE(stnt1h(z30.VnH(), p4.Zeroing(), SVEMemOperand(x6, x11, LSL, 1)), + "stnt1h {z30.h}, p4, [x6, x11, lsl #1]"); + COMPARE(stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(x11, x1, LSL, 2)), + "stnt1w {z0.s}, p4, [x11, x1, lsl #2]"); + COMPARE(stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(sp, xzr, LSL, 2)), + "stnt1w {z0.s}, p4, [sp, xzr, lsl #2]"); + + COMPARE(stnt1b(z1.VnB(), p3.Zeroing(), SVEMemOperand(x11)), + "stnt1b {z1.b}, p3, [x11]"); + COMPARE(stnt1b(z2.VnB(), p2.Zeroing(), SVEMemOperand(x12, -8, SVE_MUL_VL)), + "stnt1b {z2.b}, p2, [x12, #-8, mul vl]"); + COMPARE(stnt1d(z2.VnD(), p7.Zeroing(), SVEMemOperand(x13, -2, SVE_MUL_VL)), + "stnt1d {z2.d}, p7, [x13, #-2, mul vl]"); + COMPARE(stnt1h(z26.VnH(), p4.Zeroing(), SVEMemOperand(x16, 3, SVE_MUL_VL)), + "stnt1h {z26.h}, p4, [x16, #3, mul vl]"); + COMPARE(stnt1w(z17.VnS(), p4.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)), + "stnt1w {z17.s}, p4, [x15, #7, mul vl]"); + COMPARE(stnt1w(z17.VnS(), p4.Zeroing(), SVEMemOperand(sp, 7, SVE_MUL_VL)), + "stnt1w {z17.s}, p4, [sp, #7, mul vl]"); COMPARE_MACRO(Stnt1b(z2.VnB(), p0.Zeroing(), @@ -4681,24 +4576,58 @@ TEST(sve_mem_contiguous_store) { "madd x16, x16, x17, x12\n" "stnt1w {z4.s}, p2, [x16]"); + COMPARE_MACRO(Stnt1b(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0, x1)), + "stnt1b {z0.b}, p0, [x0, x1]"); + COMPARE_MACRO(Stnt1h(z0.VnH(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 1)), + "stnt1h {z0.h}, p0, [x0, x1, lsl #1]"); + COMPARE_MACRO(Stnt1w(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 2)), + "stnt1w {z0.s}, p0, [x0, x1, lsl #2]"); + COMPARE_MACRO(Stnt1d(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)), + "stnt1d {z0.d}, p0, [x0, x1, lsl #3]"); + + CLEANUP(); +} + +TEST(sve_load_broadcast_octo) { + SETUP(); + + COMPARE_MACRO(Ld1rob(z3.VnB(), p1.Zeroing(), SVEMemOperand(x0, x1)), + "ld1rob {z3.b}, p1/z, [x0, x1]"); + COMPARE_MACRO(Ld1roh(z6.VnH(), p4.Zeroing(), SVEMemOperand(sp, x31, LSL, 1)), + "ld1roh {z6.h}, p4/z, [sp]"); + COMPARE_MACRO(Ld1roh(z6.VnH(), p4.Zeroing(), SVEMemOperand(sp, x30, LSL, 1)), + "ld1roh {z6.h}, p4/z, [sp, x30, lsl #1]"); + COMPARE_MACRO(Ld1row(z2.VnS(), p6.Zeroing(), SVEMemOperand(x30, x30, LSL, 2)), + "ld1row {z2.s}, p6/z, [x30, x30, lsl #2]"); + COMPARE_MACRO(Ld1rod(z30.VnD(), p7.Zeroing(), SVEMemOperand(x21, x5, LSL, 3)), + "ld1rod {z30.d}, p7/z, [x21, x5, lsl #3]"); + COMPARE_MACRO(Ld1rob(z9.VnB(), p0.Zeroing(), SVEMemOperand(sp, 32)), + "ld1rob {z9.b}, p0/z, [sp, #32]"); + COMPARE_MACRO(Ld1roh(z19.VnH(), p3.Zeroing(), SVEMemOperand(x4)), + "ld1roh {z19.h}, p3/z, [x4]"); + COMPARE_MACRO(Ld1row(z21.VnS(), p3.Zeroing(), SVEMemOperand(x11, 224)), + "ld1row {z21.s}, p3/z, [x11, #224]"); + COMPARE_MACRO(Ld1rod(z0.VnD(), p2.Zeroing(), SVEMemOperand(x16, -256)), + "ld1rod {z0.d}, p2/z, [x16, #-256]"); + CLEANUP(); } TEST(sve_ldr_str_simple) { SETUP(); - COMPARE_PREFIX(str(p14, SVEMemOperand(x0)), "str p14, [x0]"); - COMPARE_PREFIX(str(z14, SVEMemOperand(sp)), "str z14, [sp]"); - COMPARE_PREFIX(ldr(p4, SVEMemOperand(x0)), "ldr p4, [x0]"); - COMPARE_PREFIX(ldr(z4, SVEMemOperand(sp)), "ldr z4, [sp]"); - COMPARE_PREFIX(str(p15, SVEMemOperand(sp, -256, SVE_MUL_VL)), - "str p15, [sp, #-256, mul vl]"); - COMPARE_PREFIX(str(z16, SVEMemOperand(x13, 255, SVE_MUL_VL)), - "str z16, [x13, #255, mul vl]"); - COMPARE_PREFIX(ldr(p5, SVEMemOperand(sp, -42, SVE_MUL_VL)), - "ldr p5, [sp, #-42, mul vl]"); - COMPARE_PREFIX(ldr(z6, SVEMemOperand(x28, 42, SVE_MUL_VL)), - "ldr z6, [x28, #42, mul vl]"); + COMPARE(str(p14, SVEMemOperand(x0)), "str p14, [x0]"); + COMPARE(str(z14, SVEMemOperand(sp)), "str z14, [sp]"); + COMPARE(ldr(p4, SVEMemOperand(x0)), "ldr p4, [x0]"); + COMPARE(ldr(z4, SVEMemOperand(sp)), "ldr z4, [sp]"); + COMPARE(str(p15, SVEMemOperand(sp, -256, SVE_MUL_VL)), + "str p15, [sp, #-256, mul vl]"); + COMPARE(str(z16, SVEMemOperand(x13, 255, SVE_MUL_VL)), + "str z16, [x13, #255, mul vl]"); + COMPARE(ldr(p5, SVEMemOperand(sp, -42, SVE_MUL_VL)), + "ldr p5, [sp, #-42, mul vl]"); + COMPARE(ldr(z6, SVEMemOperand(x28, 42, SVE_MUL_VL)), + "ldr z6, [x28, #42, mul vl]"); COMPARE_MACRO(Str(p14, SVEMemOperand(x0)), "str p14, [x0]"); COMPARE_MACRO(Str(z14, SVEMemOperand(sp)), "str z14, [sp]"); @@ -4755,194 +4684,161 @@ TEST(sve_ldr_str_simple) { TEST(sve_ld1_st1) { SETUP(); - COMPARE_PREFIX(st1b(z11.VnB(), p0, SVEMemOperand(x22)), - "st1b {z11.b}, p0, [x22]"); - COMPARE_PREFIX(st1b(z15.VnH(), p1, SVEMemOperand(x15, 7, SVE_MUL_VL)), - "st1b {z15.h}, p1, [x15, #7, mul vl]"); - COMPARE_PREFIX(st1b(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)), - "st1b {z19.s}, p2, [sp, #-8, mul vl]"); - COMPARE_PREFIX(st1b(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)), - "st1b {z23.d}, p3, [x1]"); - COMPARE_PREFIX(st1b(z2.VnB(), p4, SVEMemOperand(x1, x2)), - "st1b {z2.b}, p4, [x1, x2]"); - COMPARE_PREFIX(st1b(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 0)), - "st1b {z31.d}, p7, [x9, x9]"); - COMPARE_PREFIX(st1b(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 30)), - "st1b {z3.s}, p0, [z14.s, #30]"); - COMPARE_PREFIX(st1b(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 31)), - "st1b {z14.d}, p4, [z3.d, #31]"); - COMPARE_PREFIX(st1b(z15.VnD(), p5, SVEMemOperand(x0, z5.VnD())), - "st1b {z15.d}, p5, [x0, z5.d]"); - COMPARE_PREFIX(st1b(z15.VnS(), p5, SVEMemOperand(sp, z2.VnS(), UXTW)), - "st1b {z15.s}, p5, [sp, z2.s, uxtw]"); - COMPARE_PREFIX(st1b(z15.VnD(), p5, SVEMemOperand(x0, z25.VnD(), SXTW)), - "st1b {z15.d}, p5, [x0, z25.d, sxtw]"); - - COMPARE_PREFIX(st1h(z15.VnH(), p1, SVEMemOperand(x15, 7, SVE_MUL_VL)), - "st1h {z15.h}, p1, [x15, #7, mul vl]"); - COMPARE_PREFIX(st1h(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)), - "st1h {z19.s}, p2, [sp, #-8, mul vl]"); - COMPARE_PREFIX(st1h(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)), - "st1h {z23.d}, p3, [x1]"); - COMPARE_PREFIX(st1h(z2.VnH(), p4, SVEMemOperand(x1, x2, LSL, 1)), - "st1h {z2.h}, p4, [x1, x2, lsl #1]"); - COMPARE_PREFIX(st1h(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 1)), - "st1h {z31.d}, p7, [x9, x9, lsl #1]"); - COMPARE_PREFIX(st1h(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 30)), - "st1h {z3.s}, p0, [z14.s, #30]"); - COMPARE_PREFIX(st1h(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 62)), - "st1h {z14.d}, p4, [z3.d, #62]"); - COMPARE_PREFIX(st1h(z15.VnD(), p6, SVEMemOperand(sp, z6.VnD())), - "st1h {z15.d}, p6, [sp, z6.d]"); - COMPARE_PREFIX(st1h(z15.VnD(), p6, SVEMemOperand(sp, z6.VnD(), LSL, 1)), - "st1h {z15.d}, p6, [sp, z6.d, lsl #1]"); - COMPARE_PREFIX(st1h(z15.VnS(), p3, SVEMemOperand(x25, z3.VnS(), SXTW)), - "st1h {z15.s}, p3, [x25, z3.s, sxtw]"); - COMPARE_PREFIX(st1h(z15.VnS(), p6, SVEMemOperand(x7, z15.VnS(), SXTW, 1)), - "st1h {z15.s}, p6, [x7, z15.s, sxtw #1]"); - COMPARE_PREFIX(st1h(z17.VnD(), p3, SVEMemOperand(sp, z26.VnD(), SXTW)), - "st1h {z17.d}, p3, [sp, z26.d, sxtw]"); - COMPARE_PREFIX(st1h(z15.VnD(), p6, SVEMemOperand(x13, z9.VnD(), UXTW, 1)), - "st1h {z15.d}, p6, [x13, z9.d, uxtw #1]"); - - COMPARE_PREFIX(st1w(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)), - "st1w {z19.s}, p2, [sp, #-8, mul vl]"); - COMPARE_PREFIX(st1w(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)), - "st1w {z23.d}, p3, [x1]"); - COMPARE_PREFIX(st1w(z2.VnS(), p4, SVEMemOperand(x1, x2, LSL, 2)), - "st1w {z2.s}, p4, [x1, x2, lsl #2]"); - COMPARE_PREFIX(st1w(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 2)), - "st1w {z31.d}, p7, [x9, x9, lsl #2]"); - COMPARE_PREFIX(st1w(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 32)), - "st1w {z3.s}, p0, [z14.s, #32]"); - COMPARE_PREFIX(st1w(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 124)), - "st1w {z14.d}, p4, [z3.d, #124]"); - COMPARE_PREFIX(st1w(z17.VnD(), p2, SVEMemOperand(x30, z5.VnD())), - "st1w {z17.d}, p2, [x30, z5.d]"); - COMPARE_PREFIX(st1w(z17.VnD(), p2, SVEMemOperand(x30, z5.VnD(), LSL, 2)), - "st1w {z17.d}, p2, [x30, z5.d, lsl #2]"); - COMPARE_PREFIX(st1w(z15.VnS(), p7, SVEMemOperand(x26, z4.VnS(), UXTW)), - "st1w {z15.s}, p7, [x26, z4.s, uxtw]"); - COMPARE_PREFIX(st1w(z15.VnS(), p4, SVEMemOperand(x8, z16.VnS(), UXTW, 2)), - "st1w {z15.s}, p4, [x8, z16.s, uxtw #2]"); - COMPARE_PREFIX(st1w(z19.VnD(), p7, SVEMemOperand(x1, z27.VnD(), UXTW)), - "st1w {z19.d}, p7, [x1, z27.d, uxtw]"); - COMPARE_PREFIX(st1w(z15.VnD(), p4, SVEMemOperand(sp, z10.VnD(), SXTW, 2)), - "st1w {z15.d}, p4, [sp, z10.d, sxtw #2]"); - - COMPARE_PREFIX(st1d(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)), - "st1d {z23.d}, p3, [x1]"); - COMPARE_PREFIX(st1d(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 3)), - "st1d {z31.d}, p7, [x9, x9, lsl #3]"); - COMPARE_PREFIX(st1d(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 32)), - "st1d {z14.d}, p4, [z3.d, #32]"); - COMPARE_PREFIX(st1d(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 248)), - "st1d {z14.d}, p4, [z3.d, #248]"); - COMPARE_PREFIX(st1d(z19.VnD(), p2, SVEMemOperand(x29, z22.VnD())), - "st1d {z19.d}, p2, [x29, z22.d]"); - COMPARE_PREFIX(st1d(z19.VnD(), p2, SVEMemOperand(x29, z22.VnD(), LSL, 3)), - "st1d {z19.d}, p2, [x29, z22.d, lsl #3]"); - COMPARE_PREFIX(st1d(z21.VnD(), p1, SVEMemOperand(x2, z28.VnD(), SXTW)), - "st1d {z21.d}, p1, [x2, z28.d, sxtw]"); - COMPARE_PREFIX(st1d(z15.VnD(), p2, SVEMemOperand(x14, z11.VnD(), UXTW, 3)), - "st1d {z15.d}, p2, [x14, z11.d, uxtw #3]"); - - COMPARE_PREFIX(ld1b(z11.VnB(), p0.Zeroing(), SVEMemOperand(x22)), - "ld1b {z11.b}, p0/z, [x22]"); - COMPARE_PREFIX(ld1b(z15.VnH(), - p1.Zeroing(), - SVEMemOperand(x15, 7, SVE_MUL_VL)), - "ld1b {z15.h}, p1/z, [x15, #7, mul vl]"); - COMPARE_PREFIX(ld1b(z19.VnS(), - p2.Zeroing(), - SVEMemOperand(sp, -8, SVE_MUL_VL)), - "ld1b {z19.s}, p2/z, [sp, #-8, mul vl]"); - COMPARE_PREFIX(ld1b(z23.VnD(), - p3.Zeroing(), - SVEMemOperand(x1, 0, SVE_MUL_VL)), - "ld1b {z23.d}, p3/z, [x1]"); - COMPARE_PREFIX(ld1b(z2.VnB(), p4.Zeroing(), SVEMemOperand(x1, x2)), - "ld1b {z2.b}, p4/z, [x1, x2]"); - COMPARE_PREFIX(ld1b(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 0)), - "ld1b {z31.d}, p7/z, [x9, x9]"); - - COMPARE_PREFIX(ld1h(z15.VnH(), - p1.Zeroing(), - SVEMemOperand(x15, 7, SVE_MUL_VL)), - "ld1h {z15.h}, p1/z, [x15, #7, mul vl]"); - COMPARE_PREFIX(ld1h(z19.VnS(), - p2.Zeroing(), - SVEMemOperand(sp, -8, SVE_MUL_VL)), - "ld1h {z19.s}, p2/z, [sp, #-8, mul vl]"); - COMPARE_PREFIX(ld1h(z23.VnD(), - p3.Zeroing(), - SVEMemOperand(x1, 0, SVE_MUL_VL)), - "ld1h {z23.d}, p3/z, [x1]"); - COMPARE_PREFIX(ld1h(z2.VnH(), p4.Zeroing(), SVEMemOperand(x1, x2, LSL, 1)), - "ld1h {z2.h}, p4/z, [x1, x2, lsl #1]"); - COMPARE_PREFIX(ld1h(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 1)), - "ld1h {z31.d}, p7/z, [x9, x9, lsl #1]"); - - COMPARE_PREFIX(ld1w(z19.VnS(), - p2.Zeroing(), - SVEMemOperand(sp, -8, SVE_MUL_VL)), - "ld1w {z19.s}, p2/z, [sp, #-8, mul vl]"); - COMPARE_PREFIX(ld1w(z23.VnD(), - p3.Zeroing(), - SVEMemOperand(x1, 0, SVE_MUL_VL)), - "ld1w {z23.d}, p3/z, [x1]"); - COMPARE_PREFIX(ld1w(z2.VnS(), p4.Zeroing(), SVEMemOperand(x1, x2, LSL, 2)), - "ld1w {z2.s}, p4/z, [x1, x2, lsl #2]"); - COMPARE_PREFIX(ld1w(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 2)), - "ld1w {z31.d}, p7/z, [x9, x9, lsl #2]"); - - COMPARE_PREFIX(ld1d(z23.VnD(), - p3.Zeroing(), - SVEMemOperand(x1, 0, SVE_MUL_VL)), - "ld1d {z23.d}, p3/z, [x1]"); - COMPARE_PREFIX(ld1d(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 3)), - "ld1d {z31.d}, p7/z, [x9, x9, lsl #3]"); - - COMPARE_PREFIX(ld1sb(z15.VnH(), - p1.Zeroing(), - SVEMemOperand(x15, 7, SVE_MUL_VL)), - "ld1sb {z15.h}, p1/z, [x15, #7, mul vl]"); - COMPARE_PREFIX(ld1sb(z19.VnS(), - p2.Zeroing(), - SVEMemOperand(sp, -8, SVE_MUL_VL)), - "ld1sb {z19.s}, p2/z, [sp, #-8, mul vl]"); - COMPARE_PREFIX(ld1d(z23.VnD(), - p3.Zeroing(), - SVEMemOperand(x1, 0, SVE_MUL_VL)), - "ld1d {z23.d}, p3/z, [x1]"); - COMPARE_PREFIX(ld1sb(z5.VnH(), p1.Zeroing(), SVEMemOperand(x15, x1, LSL, 0)), - "ld1sb {z5.h}, p1/z, [x15, x1]"); - COMPARE_PREFIX(ld1sb(z9.VnS(), p2.Zeroing(), SVEMemOperand(x29, x3, LSL, 0)), - "ld1sb {z9.s}, p2/z, [x29, x3]"); - COMPARE_PREFIX(ld1sb(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 0)), - "ld1sb {z31.d}, p7/z, [x9, x9]"); - - COMPARE_PREFIX(ld1sh(z19.VnS(), - p2.Zeroing(), - SVEMemOperand(sp, -8, SVE_MUL_VL)), - "ld1sh {z19.s}, p2/z, [sp, #-8, mul vl]"); - COMPARE_PREFIX(ld1sh(z23.VnD(), - p3.Zeroing(), - SVEMemOperand(x1, 0, SVE_MUL_VL)), - "ld1sh {z23.d}, p3/z, [x1]"); - COMPARE_PREFIX(ld1sh(z11.VnS(), - p4.Zeroing(), - SVEMemOperand(x22, x10, LSL, 1)), - "ld1sh {z11.s}, p4/z, [x22, x10, lsl #1]"); - COMPARE_PREFIX(ld1sh(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 1)), - "ld1sh {z31.d}, p7/z, [x9, x9, lsl #1]"); - - COMPARE_PREFIX(ld1sw(z23.VnD(), - p3.Zeroing(), - SVEMemOperand(x1, 0, SVE_MUL_VL)), - "ld1sw {z23.d}, p3/z, [x1]"); - COMPARE_PREFIX(ld1sw(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 2)), - "ld1sw {z31.d}, p7/z, [x9, x9, lsl #2]"); + COMPARE(st1b(z11.VnB(), p0, SVEMemOperand(x22)), "st1b {z11.b}, p0, [x22]"); + COMPARE(st1b(z15.VnH(), p1, SVEMemOperand(x15, 7, SVE_MUL_VL)), + "st1b {z15.h}, p1, [x15, #7, mul vl]"); + COMPARE(st1b(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)), + "st1b {z19.s}, p2, [sp, #-8, mul vl]"); + COMPARE(st1b(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)), + "st1b {z23.d}, p3, [x1]"); + COMPARE(st1b(z2.VnB(), p4, SVEMemOperand(x1, x2)), + "st1b {z2.b}, p4, [x1, x2]"); + COMPARE(st1b(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 0)), + "st1b {z31.d}, p7, [x9, x9]"); + COMPARE(st1b(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 30)), + "st1b {z3.s}, p0, [z14.s, #30]"); + COMPARE(st1b(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 31)), + "st1b {z14.d}, p4, [z3.d, #31]"); + COMPARE(st1b(z15.VnD(), p5, SVEMemOperand(x0, z5.VnD())), + "st1b {z15.d}, p5, [x0, z5.d]"); + COMPARE(st1b(z15.VnS(), p5, SVEMemOperand(sp, z2.VnS(), UXTW)), + "st1b {z15.s}, p5, [sp, z2.s, uxtw]"); + COMPARE(st1b(z15.VnD(), p5, SVEMemOperand(x0, z25.VnD(), SXTW)), + "st1b {z15.d}, p5, [x0, z25.d, sxtw]"); + + COMPARE(st1h(z15.VnH(), p1, SVEMemOperand(x15, 7, SVE_MUL_VL)), + "st1h {z15.h}, p1, [x15, #7, mul vl]"); + COMPARE(st1h(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)), + "st1h {z19.s}, p2, [sp, #-8, mul vl]"); + COMPARE(st1h(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)), + "st1h {z23.d}, p3, [x1]"); + COMPARE(st1h(z2.VnH(), p4, SVEMemOperand(x1, x2, LSL, 1)), + "st1h {z2.h}, p4, [x1, x2, lsl #1]"); + COMPARE(st1h(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 1)), + "st1h {z31.d}, p7, [x9, x9, lsl #1]"); + COMPARE(st1h(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 30)), + "st1h {z3.s}, p0, [z14.s, #30]"); + COMPARE(st1h(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 62)), + "st1h {z14.d}, p4, [z3.d, #62]"); + COMPARE(st1h(z15.VnD(), p6, SVEMemOperand(sp, z6.VnD())), + "st1h {z15.d}, p6, [sp, z6.d]"); + COMPARE(st1h(z15.VnD(), p6, SVEMemOperand(sp, z6.VnD(), LSL, 1)), + "st1h {z15.d}, p6, [sp, z6.d, lsl #1]"); + COMPARE(st1h(z15.VnS(), p3, SVEMemOperand(x25, z3.VnS(), SXTW)), + "st1h {z15.s}, p3, [x25, z3.s, sxtw]"); + COMPARE(st1h(z15.VnS(), p6, SVEMemOperand(x7, z15.VnS(), SXTW, 1)), + "st1h {z15.s}, p6, [x7, z15.s, sxtw #1]"); + COMPARE(st1h(z17.VnD(), p3, SVEMemOperand(sp, z26.VnD(), SXTW)), + "st1h {z17.d}, p3, [sp, z26.d, sxtw]"); + COMPARE(st1h(z15.VnD(), p6, SVEMemOperand(x13, z9.VnD(), UXTW, 1)), + "st1h {z15.d}, p6, [x13, z9.d, uxtw #1]"); + + COMPARE(st1w(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)), + "st1w {z19.s}, p2, [sp, #-8, mul vl]"); + COMPARE(st1w(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)), + "st1w {z23.d}, p3, [x1]"); + COMPARE(st1w(z2.VnS(), p4, SVEMemOperand(x1, x2, LSL, 2)), + "st1w {z2.s}, p4, [x1, x2, lsl #2]"); + COMPARE(st1w(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 2)), + "st1w {z31.d}, p7, [x9, x9, lsl #2]"); + COMPARE(st1w(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 32)), + "st1w {z3.s}, p0, [z14.s, #32]"); + COMPARE(st1w(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 124)), + "st1w {z14.d}, p4, [z3.d, #124]"); + COMPARE(st1w(z17.VnD(), p2, SVEMemOperand(x30, z5.VnD())), + "st1w {z17.d}, p2, [x30, z5.d]"); + COMPARE(st1w(z17.VnD(), p2, SVEMemOperand(x30, z5.VnD(), LSL, 2)), + "st1w {z17.d}, p2, [x30, z5.d, lsl #2]"); + COMPARE(st1w(z15.VnS(), p7, SVEMemOperand(x26, z4.VnS(), UXTW)), + "st1w {z15.s}, p7, [x26, z4.s, uxtw]"); + COMPARE(st1w(z15.VnS(), p4, SVEMemOperand(x8, z16.VnS(), UXTW, 2)), + "st1w {z15.s}, p4, [x8, z16.s, uxtw #2]"); + COMPARE(st1w(z19.VnD(), p7, SVEMemOperand(x1, z27.VnD(), UXTW)), + "st1w {z19.d}, p7, [x1, z27.d, uxtw]"); + COMPARE(st1w(z15.VnD(), p4, SVEMemOperand(sp, z10.VnD(), SXTW, 2)), + "st1w {z15.d}, p4, [sp, z10.d, sxtw #2]"); + + COMPARE(st1d(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)), + "st1d {z23.d}, p3, [x1]"); + COMPARE(st1d(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 3)), + "st1d {z31.d}, p7, [x9, x9, lsl #3]"); + COMPARE(st1d(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 32)), + "st1d {z14.d}, p4, [z3.d, #32]"); + COMPARE(st1d(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 248)), + "st1d {z14.d}, p4, [z3.d, #248]"); + COMPARE(st1d(z19.VnD(), p2, SVEMemOperand(x29, z22.VnD())), + "st1d {z19.d}, p2, [x29, z22.d]"); + COMPARE(st1d(z19.VnD(), p2, SVEMemOperand(x29, z22.VnD(), LSL, 3)), + "st1d {z19.d}, p2, [x29, z22.d, lsl #3]"); + COMPARE(st1d(z21.VnD(), p1, SVEMemOperand(x2, z28.VnD(), SXTW)), + "st1d {z21.d}, p1, [x2, z28.d, sxtw]"); + COMPARE(st1d(z15.VnD(), p2, SVEMemOperand(x14, z11.VnD(), UXTW, 3)), + "st1d {z15.d}, p2, [x14, z11.d, uxtw #3]"); + + COMPARE(ld1b(z11.VnB(), p0.Zeroing(), SVEMemOperand(x22)), + "ld1b {z11.b}, p0/z, [x22]"); + COMPARE(ld1b(z15.VnH(), p1.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)), + "ld1b {z15.h}, p1/z, [x15, #7, mul vl]"); + COMPARE(ld1b(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)), + "ld1b {z19.s}, p2/z, [sp, #-8, mul vl]"); + COMPARE(ld1b(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), + "ld1b {z23.d}, p3/z, [x1]"); + COMPARE(ld1b(z2.VnB(), p4.Zeroing(), SVEMemOperand(x1, x2)), + "ld1b {z2.b}, p4/z, [x1, x2]"); + COMPARE(ld1b(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 0)), + "ld1b {z31.d}, p7/z, [x9, x9]"); + + COMPARE(ld1h(z15.VnH(), p1.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)), + "ld1h {z15.h}, p1/z, [x15, #7, mul vl]"); + COMPARE(ld1h(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)), + "ld1h {z19.s}, p2/z, [sp, #-8, mul vl]"); + COMPARE(ld1h(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), + "ld1h {z23.d}, p3/z, [x1]"); + COMPARE(ld1h(z2.VnH(), p4.Zeroing(), SVEMemOperand(x1, x2, LSL, 1)), + "ld1h {z2.h}, p4/z, [x1, x2, lsl #1]"); + COMPARE(ld1h(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 1)), + "ld1h {z31.d}, p7/z, [x9, x9, lsl #1]"); + + COMPARE(ld1w(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)), + "ld1w {z19.s}, p2/z, [sp, #-8, mul vl]"); + COMPARE(ld1w(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), + "ld1w {z23.d}, p3/z, [x1]"); + COMPARE(ld1w(z2.VnS(), p4.Zeroing(), SVEMemOperand(x1, x2, LSL, 2)), + "ld1w {z2.s}, p4/z, [x1, x2, lsl #2]"); + COMPARE(ld1w(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 2)), + "ld1w {z31.d}, p7/z, [x9, x9, lsl #2]"); + + COMPARE(ld1d(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), + "ld1d {z23.d}, p3/z, [x1]"); + COMPARE(ld1d(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 3)), + "ld1d {z31.d}, p7/z, [x9, x9, lsl #3]"); + + COMPARE(ld1sb(z15.VnH(), p1.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)), + "ld1sb {z15.h}, p1/z, [x15, #7, mul vl]"); + COMPARE(ld1sb(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)), + "ld1sb {z19.s}, p2/z, [sp, #-8, mul vl]"); + COMPARE(ld1d(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), + "ld1d {z23.d}, p3/z, [x1]"); + COMPARE(ld1sb(z5.VnH(), p1.Zeroing(), SVEMemOperand(x15, x1, LSL, 0)), + "ld1sb {z5.h}, p1/z, [x15, x1]"); + COMPARE(ld1sb(z9.VnS(), p2.Zeroing(), SVEMemOperand(x29, x3, LSL, 0)), + "ld1sb {z9.s}, p2/z, [x29, x3]"); + COMPARE(ld1sb(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 0)), + "ld1sb {z31.d}, p7/z, [x9, x9]"); + + COMPARE(ld1sh(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)), + "ld1sh {z19.s}, p2/z, [sp, #-8, mul vl]"); + COMPARE(ld1sh(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), + "ld1sh {z23.d}, p3/z, [x1]"); + COMPARE(ld1sh(z11.VnS(), p4.Zeroing(), SVEMemOperand(x22, x10, LSL, 1)), + "ld1sh {z11.s}, p4/z, [x22, x10, lsl #1]"); + COMPARE(ld1sh(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 1)), + "ld1sh {z31.d}, p7/z, [x9, x9, lsl #1]"); + + COMPARE(ld1sw(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), + "ld1sw {z23.d}, p3/z, [x1]"); + COMPARE(ld1sw(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 2)), + "ld1sw {z31.d}, p7/z, [x9, x9, lsl #2]"); CLEANUP(); } @@ -5102,57 +4998,33 @@ TEST(sve_ld1_st1_macro) { TEST(sve_st2_scalar_plus_immediate) { SETUP(); - COMPARE_PREFIX(st2b(z31.VnB(), z0.VnB(), p6, SVEMemOperand(x19)), - "st2b {z31.b, z0.b}, p6, [x19]"); - COMPARE_PREFIX(st2b(z31.VnB(), - z0.VnB(), - p6, - SVEMemOperand(x19, 14, SVE_MUL_VL)), - "st2b {z31.b, z0.b}, p6, [x19, #14, mul vl]"); - COMPARE_PREFIX(st2b(z15.VnB(), - z16.VnB(), - p6, - SVEMemOperand(x19, -16, SVE_MUL_VL)), - "st2b {z15.b, z16.b}, p6, [x19, #-16, mul vl]"); - - COMPARE_PREFIX(st2h(z15.VnH(), z16.VnH(), p6, SVEMemOperand(x19)), - "st2h {z15.h, z16.h}, p6, [x19]"); - COMPARE_PREFIX(st2h(z15.VnH(), - z16.VnH(), - p0, - SVEMemOperand(x19, 14, SVE_MUL_VL)), - "st2h {z15.h, z16.h}, p0, [x19, #14, mul vl]"); - COMPARE_PREFIX(st2h(z15.VnH(), - z16.VnH(), - p0, - SVEMemOperand(x19, -16, SVE_MUL_VL)), - "st2h {z15.h, z16.h}, p0, [x19, #-16, mul vl]"); - - COMPARE_PREFIX(st2w(z0.VnS(), z1.VnS(), p0, SVEMemOperand(x19)), - "st2w {z0.s, z1.s}, p0, [x19]"); - COMPARE_PREFIX(st2w(z0.VnS(), - z1.VnS(), - p0, - SVEMemOperand(x19, 14, SVE_MUL_VL)), - "st2w {z0.s, z1.s}, p0, [x19, #14, mul vl]"); - COMPARE_PREFIX(st2w(z0.VnS(), - z1.VnS(), - p7, - SVEMemOperand(x19, -16, SVE_MUL_VL)), - "st2w {z0.s, z1.s}, p7, [x19, #-16, mul vl]"); - - COMPARE_PREFIX(st2d(z0.VnD(), z1.VnD(), p7, SVEMemOperand(x19)), - "st2d {z0.d, z1.d}, p7, [x19]"); - COMPARE_PREFIX(st2d(z31.VnD(), - z0.VnD(), - p7, - SVEMemOperand(x19, 14, SVE_MUL_VL)), - "st2d {z31.d, z0.d}, p7, [x19, #14, mul vl]"); - COMPARE_PREFIX(st2d(z31.VnD(), - z0.VnD(), - p7, - SVEMemOperand(x19, -16, SVE_MUL_VL)), - "st2d {z31.d, z0.d}, p7, [x19, #-16, mul vl]"); + COMPARE(st2b(z31.VnB(), z0.VnB(), p6, SVEMemOperand(x19)), + "st2b {z31.b, z0.b}, p6, [x19]"); + COMPARE(st2b(z31.VnB(), z0.VnB(), p6, SVEMemOperand(x19, 14, SVE_MUL_VL)), + "st2b {z31.b, z0.b}, p6, [x19, #14, mul vl]"); + COMPARE(st2b(z15.VnB(), z16.VnB(), p6, SVEMemOperand(x19, -16, SVE_MUL_VL)), + "st2b {z15.b, z16.b}, p6, [x19, #-16, mul vl]"); + + COMPARE(st2h(z15.VnH(), z16.VnH(), p6, SVEMemOperand(x19)), + "st2h {z15.h, z16.h}, p6, [x19]"); + COMPARE(st2h(z15.VnH(), z16.VnH(), p0, SVEMemOperand(x19, 14, SVE_MUL_VL)), + "st2h {z15.h, z16.h}, p0, [x19, #14, mul vl]"); + COMPARE(st2h(z15.VnH(), z16.VnH(), p0, SVEMemOperand(x19, -16, SVE_MUL_VL)), + "st2h {z15.h, z16.h}, p0, [x19, #-16, mul vl]"); + + COMPARE(st2w(z0.VnS(), z1.VnS(), p0, SVEMemOperand(x19)), + "st2w {z0.s, z1.s}, p0, [x19]"); + COMPARE(st2w(z0.VnS(), z1.VnS(), p0, SVEMemOperand(x19, 14, SVE_MUL_VL)), + "st2w {z0.s, z1.s}, p0, [x19, #14, mul vl]"); + COMPARE(st2w(z0.VnS(), z1.VnS(), p7, SVEMemOperand(x19, -16, SVE_MUL_VL)), + "st2w {z0.s, z1.s}, p7, [x19, #-16, mul vl]"); + + COMPARE(st2d(z0.VnD(), z1.VnD(), p7, SVEMemOperand(x19)), + "st2d {z0.d, z1.d}, p7, [x19]"); + COMPARE(st2d(z31.VnD(), z0.VnD(), p7, SVEMemOperand(x19, 14, SVE_MUL_VL)), + "st2d {z31.d, z0.d}, p7, [x19, #14, mul vl]"); + COMPARE(st2d(z31.VnD(), z0.VnD(), p7, SVEMemOperand(x19, -16, SVE_MUL_VL)), + "st2d {z31.d, z0.d}, p7, [x19, #-16, mul vl]"); CLEANUP(); } @@ -5160,65 +5032,65 @@ TEST(sve_st2_scalar_plus_immediate) { TEST(sve_st3_scalar_plus_immediate) { SETUP(); - COMPARE_PREFIX(st3b(z30.VnB(), z31.VnB(), z0.VnB(), p7, SVEMemOperand(x19)), - "st3b {z30.b, z31.b, z0.b}, p7, [x19]"); - COMPARE_PREFIX(st3b(z30.VnB(), - z31.VnB(), - z0.VnB(), - p6, - SVEMemOperand(x19, 21, SVE_MUL_VL)), - "st3b {z30.b, z31.b, z0.b}, p6, [x19, #21, mul vl]"); - COMPARE_PREFIX(st3b(z30.VnB(), - z31.VnB(), - z0.VnB(), - p6, - SVEMemOperand(x19, -24, SVE_MUL_VL)), - "st3b {z30.b, z31.b, z0.b}, p6, [x19, #-24, mul vl]"); - - COMPARE_PREFIX(st3h(z15.VnH(), z16.VnH(), z17.VnH(), p6, SVEMemOperand(x19)), - "st3h {z15.h, z16.h, z17.h}, p6, [x19]"); - COMPARE_PREFIX(st3h(z15.VnH(), - z16.VnH(), - z17.VnH(), - p6, - SVEMemOperand(x19, 21, SVE_MUL_VL)), - "st3h {z15.h, z16.h, z17.h}, p6, [x19, #21, mul vl]"); - COMPARE_PREFIX(st3h(z15.VnH(), - z16.VnH(), - z17.VnH(), - p0, - SVEMemOperand(x19, -24, SVE_MUL_VL)), - "st3h {z15.h, z16.h, z17.h}, p0, [x19, #-24, mul vl]"); - - COMPARE_PREFIX(st3w(z15.VnS(), z16.VnS(), z17.VnS(), p0, SVEMemOperand(x19)), - "st3w {z15.s, z16.s, z17.s}, p0, [x19]"); - COMPARE_PREFIX(st3w(z0.VnS(), - z1.VnS(), - z2.VnS(), - p0, - SVEMemOperand(x19, 21, SVE_MUL_VL)), - "st3w {z0.s, z1.s, z2.s}, p0, [x19, #21, mul vl]"); - COMPARE_PREFIX(st3w(z0.VnS(), - z1.VnS(), - z2.VnS(), - p0, - SVEMemOperand(x19, -24, SVE_MUL_VL)), - "st3w {z0.s, z1.s, z2.s}, p0, [x19, #-24, mul vl]"); - - COMPARE_PREFIX(st3d(z0.VnD(), z1.VnD(), z2.VnD(), p7, SVEMemOperand(x19)), - "st3d {z0.d, z1.d, z2.d}, p7, [x19]"); - COMPARE_PREFIX(st3d(z0.VnD(), - z1.VnD(), - z2.VnD(), - p7, - SVEMemOperand(x19, 21, SVE_MUL_VL)), - "st3d {z0.d, z1.d, z2.d}, p7, [x19, #21, mul vl]"); - COMPARE_PREFIX(st3d(z30.VnD(), - z31.VnD(), - z0.VnD(), - p7, - SVEMemOperand(x19, -24, SVE_MUL_VL)), - "st3d {z30.d, z31.d, z0.d}, p7, [x19, #-24, mul vl]"); + COMPARE(st3b(z30.VnB(), z31.VnB(), z0.VnB(), p7, SVEMemOperand(x19)), + "st3b {z30.b, z31.b, z0.b}, p7, [x19]"); + COMPARE(st3b(z30.VnB(), + z31.VnB(), + z0.VnB(), + p6, + SVEMemOperand(x19, 21, SVE_MUL_VL)), + "st3b {z30.b, z31.b, z0.b}, p6, [x19, #21, mul vl]"); + COMPARE(st3b(z30.VnB(), + z31.VnB(), + z0.VnB(), + p6, + SVEMemOperand(x19, -24, SVE_MUL_VL)), + "st3b {z30.b, z31.b, z0.b}, p6, [x19, #-24, mul vl]"); + + COMPARE(st3h(z15.VnH(), z16.VnH(), z17.VnH(), p6, SVEMemOperand(x19)), + "st3h {z15.h, z16.h, z17.h}, p6, [x19]"); + COMPARE(st3h(z15.VnH(), + z16.VnH(), + z17.VnH(), + p6, + SVEMemOperand(x19, 21, SVE_MUL_VL)), + "st3h {z15.h, z16.h, z17.h}, p6, [x19, #21, mul vl]"); + COMPARE(st3h(z15.VnH(), + z16.VnH(), + z17.VnH(), + p0, + SVEMemOperand(x19, -24, SVE_MUL_VL)), + "st3h {z15.h, z16.h, z17.h}, p0, [x19, #-24, mul vl]"); + + COMPARE(st3w(z15.VnS(), z16.VnS(), z17.VnS(), p0, SVEMemOperand(x19)), + "st3w {z15.s, z16.s, z17.s}, p0, [x19]"); + COMPARE(st3w(z0.VnS(), + z1.VnS(), + z2.VnS(), + p0, + SVEMemOperand(x19, 21, SVE_MUL_VL)), + "st3w {z0.s, z1.s, z2.s}, p0, [x19, #21, mul vl]"); + COMPARE(st3w(z0.VnS(), + z1.VnS(), + z2.VnS(), + p0, + SVEMemOperand(x19, -24, SVE_MUL_VL)), + "st3w {z0.s, z1.s, z2.s}, p0, [x19, #-24, mul vl]"); + + COMPARE(st3d(z0.VnD(), z1.VnD(), z2.VnD(), p7, SVEMemOperand(x19)), + "st3d {z0.d, z1.d, z2.d}, p7, [x19]"); + COMPARE(st3d(z0.VnD(), + z1.VnD(), + z2.VnD(), + p7, + SVEMemOperand(x19, 21, SVE_MUL_VL)), + "st3d {z0.d, z1.d, z2.d}, p7, [x19, #21, mul vl]"); + COMPARE(st3d(z30.VnD(), + z31.VnD(), + z0.VnD(), + p7, + SVEMemOperand(x19, -24, SVE_MUL_VL)), + "st3d {z30.d, z31.d, z0.d}, p7, [x19, #-24, mul vl]"); CLEANUP(); } @@ -5226,94 +5098,79 @@ TEST(sve_st3_scalar_plus_immediate) { TEST(sve_st4_scalar_plus_immediate) { SETUP(); - COMPARE_PREFIX(st4b(z31.VnB(), - z0.VnB(), - z1.VnB(), - z2.VnB(), - p7, - SVEMemOperand(x19)), - "st4b {z31.b, z0.b, z1.b, z2.b}, p7, [x19]"); - COMPARE_PREFIX(st4b(z31.VnB(), - z0.VnB(), - z1.VnB(), - z2.VnB(), - p7, - SVEMemOperand(x19, 28, SVE_MUL_VL)), - "st4b {z31.b, z0.b, z1.b, z2.b}, p7, [x19, #28, mul vl]"); - COMPARE_PREFIX(st4b(z31.VnB(), - z0.VnB(), - z1.VnB(), - z2.VnB(), - p6, - SVEMemOperand(x19, -32, SVE_MUL_VL)), - "st4b {z31.b, z0.b, z1.b, z2.b}, p6, [x19, #-32, mul vl]"); - - COMPARE_PREFIX(st4h(z31.VnH(), - z0.VnH(), - z1.VnH(), - z2.VnH(), - p6, - SVEMemOperand(x19)), - "st4h {z31.h, z0.h, z1.h, z2.h}, p6, [x19]"); - COMPARE_PREFIX(st4h(z15.VnH(), - z16.VnH(), - z17.VnH(), - z18.VnH(), - p6, - SVEMemOperand(x19, 28, SVE_MUL_VL)), - "st4h {z15.h, z16.h, z17.h, z18.h}, p6, [x19, #28, mul vl]"); - COMPARE_PREFIX(st4h(z15.VnH(), - z16.VnH(), - z17.VnH(), - z18.VnH(), - p6, - SVEMemOperand(x19, -32, SVE_MUL_VL)), - "st4h {z15.h, z16.h, z17.h, z18.h}, p6, " - "[x19, #-32, mul vl]"); - - COMPARE_PREFIX(st4w(z15.VnS(), - z16.VnS(), - z17.VnS(), - z18.VnS(), - p0, - SVEMemOperand(x19)), - "st4w {z15.s, z16.s, z17.s, z18.s}, p0, [x19]"); - COMPARE_PREFIX(st4w(z15.VnS(), - z16.VnS(), - z17.VnS(), - z18.VnS(), - p0, - SVEMemOperand(x19, 28, SVE_MUL_VL)), - "st4w {z15.s, z16.s, z17.s, z18.s}, p0, [x19, #28, mul vl]"); - COMPARE_PREFIX(st4w(z0.VnS(), - z1.VnS(), - z2.VnS(), - z3.VnS(), - p0, - SVEMemOperand(x19, -32, SVE_MUL_VL)), - "st4w {z0.s, z1.s, z2.s, z3.s}, p0, [x19, #-32, mul vl]"); - - COMPARE_PREFIX(st4d(z0.VnD(), - z1.VnD(), - z2.VnD(), - z3.VnD(), - p0, - SVEMemOperand(x19)), - "st4d {z0.d, z1.d, z2.d, z3.d}, p0, [x19]"); - COMPARE_PREFIX(st4d(z0.VnD(), - z1.VnD(), - z2.VnD(), - z3.VnD(), - p7, - SVEMemOperand(x19, 28, SVE_MUL_VL)), - "st4d {z0.d, z1.d, z2.d, z3.d}, p7, [x19, #28, mul vl]"); - COMPARE_PREFIX(st4d(z0.VnD(), - z1.VnD(), - z2.VnD(), - z3.VnD(), - p7, - SVEMemOperand(x19, -32, SVE_MUL_VL)), - "st4d {z0.d, z1.d, z2.d, z3.d}, p7, [x19, #-32, mul vl]"); + COMPARE(st4b(z31.VnB(), z0.VnB(), z1.VnB(), z2.VnB(), p7, SVEMemOperand(x19)), + "st4b {z31.b, z0.b, z1.b, z2.b}, p7, [x19]"); + COMPARE(st4b(z31.VnB(), + z0.VnB(), + z1.VnB(), + z2.VnB(), + p7, + SVEMemOperand(x19, 28, SVE_MUL_VL)), + "st4b {z31.b, z0.b, z1.b, z2.b}, p7, [x19, #28, mul vl]"); + COMPARE(st4b(z31.VnB(), + z0.VnB(), + z1.VnB(), + z2.VnB(), + p6, + SVEMemOperand(x19, -32, SVE_MUL_VL)), + "st4b {z31.b, z0.b, z1.b, z2.b}, p6, [x19, #-32, mul vl]"); + + COMPARE(st4h(z31.VnH(), z0.VnH(), z1.VnH(), z2.VnH(), p6, SVEMemOperand(x19)), + "st4h {z31.h, z0.h, z1.h, z2.h}, p6, [x19]"); + COMPARE(st4h(z15.VnH(), + z16.VnH(), + z17.VnH(), + z18.VnH(), + p6, + SVEMemOperand(x19, 28, SVE_MUL_VL)), + "st4h {z15.h, z16.h, z17.h, z18.h}, p6, [x19, #28, mul vl]"); + COMPARE(st4h(z15.VnH(), + z16.VnH(), + z17.VnH(), + z18.VnH(), + p6, + SVEMemOperand(x19, -32, SVE_MUL_VL)), + "st4h {z15.h, z16.h, z17.h, z18.h}, p6, " + "[x19, #-32, mul vl]"); + + COMPARE(st4w(z15.VnS(), + z16.VnS(), + z17.VnS(), + z18.VnS(), + p0, + SVEMemOperand(x19)), + "st4w {z15.s, z16.s, z17.s, z18.s}, p0, [x19]"); + COMPARE(st4w(z15.VnS(), + z16.VnS(), + z17.VnS(), + z18.VnS(), + p0, + SVEMemOperand(x19, 28, SVE_MUL_VL)), + "st4w {z15.s, z16.s, z17.s, z18.s}, p0, [x19, #28, mul vl]"); + COMPARE(st4w(z0.VnS(), + z1.VnS(), + z2.VnS(), + z3.VnS(), + p0, + SVEMemOperand(x19, -32, SVE_MUL_VL)), + "st4w {z0.s, z1.s, z2.s, z3.s}, p0, [x19, #-32, mul vl]"); + + COMPARE(st4d(z0.VnD(), z1.VnD(), z2.VnD(), z3.VnD(), p0, SVEMemOperand(x19)), + "st4d {z0.d, z1.d, z2.d, z3.d}, p0, [x19]"); + COMPARE(st4d(z0.VnD(), + z1.VnD(), + z2.VnD(), + z3.VnD(), + p7, + SVEMemOperand(x19, 28, SVE_MUL_VL)), + "st4d {z0.d, z1.d, z2.d, z3.d}, p7, [x19, #28, mul vl]"); + COMPARE(st4d(z0.VnD(), + z1.VnD(), + z2.VnD(), + z3.VnD(), + p7, + SVEMemOperand(x19, -32, SVE_MUL_VL)), + "st4d {z0.d, z1.d, z2.d, z3.d}, p7, [x19, #-32, mul vl]"); CLEANUP(); } @@ -5321,39 +5178,33 @@ TEST(sve_st4_scalar_plus_immediate) { TEST(sve_st2_scalar_plus_scalar) { SETUP(); - COMPARE_PREFIX(st2b(z25.VnB(), z26.VnB(), p1, SVEMemOperand(x20, x19)), - "st2b {z25.b, z26.b}, p1, [x20, x19]"); - COMPARE_PREFIX(st2b(z25.VnB(), z26.VnB(), p1, SVEMemOperand(sp, x19)), - "st2b {z25.b, z26.b}, p1, [sp, x19]"); - COMPARE_PREFIX(st2b(z31.VnB(), z0.VnB(), p1, SVEMemOperand(sp, x19)), - "st2b {z31.b, z0.b}, p1, [sp, x19]"); - - COMPARE_PREFIX(st2h(z31.VnH(), z0.VnH(), p1, SVEMemOperand(x20, x19, LSL, 1)), - "st2h {z31.h, z0.h}, p1, [x20, x19, lsl #1]"); - COMPARE_PREFIX(st2h(z31.VnH(), z0.VnH(), p7, SVEMemOperand(sp, x19, LSL, 1)), - "st2h {z31.h, z0.h}, p7, [sp, x19, lsl #1]"); - COMPARE_PREFIX(st2h(z31.VnH(), z0.VnH(), p7, SVEMemOperand(sp, x19, LSL, 1)), - "st2h {z31.h, z0.h}, p7, [sp, x19, lsl #1]"); - - COMPARE_PREFIX(st2w(z16.VnS(), - z17.VnS(), - p7, - SVEMemOperand(x20, x19, LSL, 2)), - "st2w {z16.s, z17.s}, p7, [x20, x19, lsl #2]"); - COMPARE_PREFIX(st2w(z16.VnS(), z17.VnS(), p7, SVEMemOperand(sp, x19, LSL, 2)), - "st2w {z16.s, z17.s}, p7, [sp, x19, lsl #2]"); - COMPARE_PREFIX(st2w(z16.VnS(), z17.VnS(), p0, SVEMemOperand(sp, x19, LSL, 2)), - "st2w {z16.s, z17.s}, p0, [sp, x19, lsl #2]"); - - COMPARE_PREFIX(st2d(z16.VnD(), - z17.VnD(), - p0, - SVEMemOperand(x20, x19, LSL, 3)), - "st2d {z16.d, z17.d}, p0, [x20, x19, lsl #3]"); - COMPARE_PREFIX(st2d(z25.VnD(), z26.VnD(), p0, SVEMemOperand(sp, x19, LSL, 3)), - "st2d {z25.d, z26.d}, p0, [sp, x19, lsl #3]"); - COMPARE_PREFIX(st2d(z25.VnD(), z26.VnD(), p0, SVEMemOperand(sp, x19, LSL, 3)), - "st2d {z25.d, z26.d}, p0, [sp, x19, lsl #3]"); + COMPARE(st2b(z25.VnB(), z26.VnB(), p1, SVEMemOperand(x20, x19)), + "st2b {z25.b, z26.b}, p1, [x20, x19]"); + COMPARE(st2b(z25.VnB(), z26.VnB(), p1, SVEMemOperand(sp, x19)), + "st2b {z25.b, z26.b}, p1, [sp, x19]"); + COMPARE(st2b(z31.VnB(), z0.VnB(), p1, SVEMemOperand(sp, x19)), + "st2b {z31.b, z0.b}, p1, [sp, x19]"); + + COMPARE(st2h(z31.VnH(), z0.VnH(), p1, SVEMemOperand(x20, x19, LSL, 1)), + "st2h {z31.h, z0.h}, p1, [x20, x19, lsl #1]"); + COMPARE(st2h(z31.VnH(), z0.VnH(), p7, SVEMemOperand(sp, x19, LSL, 1)), + "st2h {z31.h, z0.h}, p7, [sp, x19, lsl #1]"); + COMPARE(st2h(z31.VnH(), z0.VnH(), p7, SVEMemOperand(sp, x19, LSL, 1)), + "st2h {z31.h, z0.h}, p7, [sp, x19, lsl #1]"); + + COMPARE(st2w(z16.VnS(), z17.VnS(), p7, SVEMemOperand(x20, x19, LSL, 2)), + "st2w {z16.s, z17.s}, p7, [x20, x19, lsl #2]"); + COMPARE(st2w(z16.VnS(), z17.VnS(), p7, SVEMemOperand(sp, x19, LSL, 2)), + "st2w {z16.s, z17.s}, p7, [sp, x19, lsl #2]"); + COMPARE(st2w(z16.VnS(), z17.VnS(), p0, SVEMemOperand(sp, x19, LSL, 2)), + "st2w {z16.s, z17.s}, p0, [sp, x19, lsl #2]"); + + COMPARE(st2d(z16.VnD(), z17.VnD(), p0, SVEMemOperand(x20, x19, LSL, 3)), + "st2d {z16.d, z17.d}, p0, [x20, x19, lsl #3]"); + COMPARE(st2d(z25.VnD(), z26.VnD(), p0, SVEMemOperand(sp, x19, LSL, 3)), + "st2d {z25.d, z26.d}, p0, [sp, x19, lsl #3]"); + COMPARE(st2d(z25.VnD(), z26.VnD(), p0, SVEMemOperand(sp, x19, LSL, 3)), + "st2d {z25.d, z26.d}, p0, [sp, x19, lsl #3]"); CLEANUP(); } @@ -5361,81 +5212,69 @@ TEST(sve_st2_scalar_plus_scalar) { TEST(sve_st3_scalar_plus_scalar) { SETUP(); - COMPARE_PREFIX(st3b(z25.VnB(), - z26.VnB(), - z27.VnB(), - p1, - SVEMemOperand(x20, x19)), - "st3b {z25.b, z26.b, z27.b}, p1, [x20, x19]"); - COMPARE_PREFIX(st3b(z25.VnB(), - z26.VnB(), - z27.VnB(), - p1, - SVEMemOperand(sp, x19)), - "st3b {z25.b, z26.b, z27.b}, p1, [sp, x19]"); - COMPARE_PREFIX(st3b(z30.VnB(), - z31.VnB(), - z0.VnB(), - p1, - SVEMemOperand(sp, x19)), - "st3b {z30.b, z31.b, z0.b}, p1, [sp, x19]"); - - COMPARE_PREFIX(st3h(z30.VnH(), - z31.VnH(), - z0.VnH(), - p1, - SVEMemOperand(x20, x19, LSL, 1)), - "st3h {z30.h, z31.h, z0.h}, p1, [x20, x19, lsl #1]"); - COMPARE_PREFIX(st3h(z30.VnH(), - z31.VnH(), - z0.VnH(), - p7, - SVEMemOperand(sp, x19, LSL, 1)), - "st3h {z30.h, z31.h, z0.h}, p7, [sp, x19, lsl #1]"); - COMPARE_PREFIX(st3h(z30.VnH(), - z31.VnH(), - z0.VnH(), - p7, - SVEMemOperand(sp, x19, LSL, 1)), - "st3h {z30.h, z31.h, z0.h}, p7, [sp, x19, lsl #1]"); - - COMPARE_PREFIX(st3w(z16.VnS(), - z17.VnS(), - z18.VnS(), - p7, - SVEMemOperand(x20, x19, LSL, 2)), - "st3w {z16.s, z17.s, z18.s}, p7, [x20, x19, lsl #2]"); - COMPARE_PREFIX(st3w(z16.VnS(), - z17.VnS(), - z18.VnS(), - p7, - SVEMemOperand(sp, x19, LSL, 2)), - "st3w {z16.s, z17.s, z18.s}, p7, [sp, x19, lsl #2]"); - COMPARE_PREFIX(st3w(z16.VnS(), - z17.VnS(), - z18.VnS(), - p0, - SVEMemOperand(sp, x19, LSL, 2)), - "st3w {z16.s, z17.s, z18.s}, p0, [sp, x19, lsl #2]"); - - COMPARE_PREFIX(st3d(z16.VnD(), - z17.VnD(), - z18.VnD(), - p0, - SVEMemOperand(x20, x19, LSL, 3)), - "st3d {z16.d, z17.d, z18.d}, p0, [x20, x19, lsl #3]"); - COMPARE_PREFIX(st3d(z25.VnD(), - z26.VnD(), - z27.VnD(), - p0, - SVEMemOperand(sp, x19, LSL, 3)), - "st3d {z25.d, z26.d, z27.d}, p0, [sp, x19, lsl #3]"); - COMPARE_PREFIX(st3d(z25.VnD(), - z26.VnD(), - z27.VnD(), - p0, - SVEMemOperand(sp, x19, LSL, 3)), - "st3d {z25.d, z26.d, z27.d}, p0, [sp, x19, lsl #3]"); + COMPARE(st3b(z25.VnB(), z26.VnB(), z27.VnB(), p1, SVEMemOperand(x20, x19)), + "st3b {z25.b, z26.b, z27.b}, p1, [x20, x19]"); + COMPARE(st3b(z25.VnB(), z26.VnB(), z27.VnB(), p1, SVEMemOperand(sp, x19)), + "st3b {z25.b, z26.b, z27.b}, p1, [sp, x19]"); + COMPARE(st3b(z30.VnB(), z31.VnB(), z0.VnB(), p1, SVEMemOperand(sp, x19)), + "st3b {z30.b, z31.b, z0.b}, p1, [sp, x19]"); + + COMPARE(st3h(z30.VnH(), + z31.VnH(), + z0.VnH(), + p1, + SVEMemOperand(x20, x19, LSL, 1)), + "st3h {z30.h, z31.h, z0.h}, p1, [x20, x19, lsl #1]"); + COMPARE(st3h(z30.VnH(), + z31.VnH(), + z0.VnH(), + p7, + SVEMemOperand(sp, x19, LSL, 1)), + "st3h {z30.h, z31.h, z0.h}, p7, [sp, x19, lsl #1]"); + COMPARE(st3h(z30.VnH(), + z31.VnH(), + z0.VnH(), + p7, + SVEMemOperand(sp, x19, LSL, 1)), + "st3h {z30.h, z31.h, z0.h}, p7, [sp, x19, lsl #1]"); + + COMPARE(st3w(z16.VnS(), + z17.VnS(), + z18.VnS(), + p7, + SVEMemOperand(x20, x19, LSL, 2)), + "st3w {z16.s, z17.s, z18.s}, p7, [x20, x19, lsl #2]"); + COMPARE(st3w(z16.VnS(), + z17.VnS(), + z18.VnS(), + p7, + SVEMemOperand(sp, x19, LSL, 2)), + "st3w {z16.s, z17.s, z18.s}, p7, [sp, x19, lsl #2]"); + COMPARE(st3w(z16.VnS(), + z17.VnS(), + z18.VnS(), + p0, + SVEMemOperand(sp, x19, LSL, 2)), + "st3w {z16.s, z17.s, z18.s}, p0, [sp, x19, lsl #2]"); + + COMPARE(st3d(z16.VnD(), + z17.VnD(), + z18.VnD(), + p0, + SVEMemOperand(x20, x19, LSL, 3)), + "st3d {z16.d, z17.d, z18.d}, p0, [x20, x19, lsl #3]"); + COMPARE(st3d(z25.VnD(), + z26.VnD(), + z27.VnD(), + p0, + SVEMemOperand(sp, x19, LSL, 3)), + "st3d {z25.d, z26.d, z27.d}, p0, [sp, x19, lsl #3]"); + COMPARE(st3d(z25.VnD(), + z26.VnD(), + z27.VnD(), + p0, + SVEMemOperand(sp, x19, LSL, 3)), + "st3d {z25.d, z26.d, z27.d}, p0, [sp, x19, lsl #3]"); CLEANUP(); } @@ -5443,93 +5282,93 @@ TEST(sve_st3_scalar_plus_scalar) { TEST(sve_st4_scalar_plus_scalar) { SETUP(); - COMPARE_PREFIX(st4b(z25.VnB(), - z26.VnB(), - z27.VnB(), - z28.VnB(), - p0, - SVEMemOperand(x20, x19)), - "st4b {z25.b, z26.b, z27.b, z28.b}, p0, [x20, x19]"); - COMPARE_PREFIX(st4b(z25.VnB(), - z26.VnB(), - z27.VnB(), - z28.VnB(), - p1, - SVEMemOperand(sp, x19)), - "st4b {z25.b, z26.b, z27.b, z28.b}, p1, [sp, x19]"); - COMPARE_PREFIX(st4b(z25.VnB(), - z26.VnB(), - z27.VnB(), - z28.VnB(), - p1, - SVEMemOperand(sp, x19)), - "st4b {z25.b, z26.b, z27.b, z28.b}, p1, [sp, x19]"); - - COMPARE_PREFIX(st4h(z31.VnH(), - z0.VnH(), - z1.VnH(), - z2.VnH(), - p1, - SVEMemOperand(x20, x19, LSL, 1)), - "st4h {z31.h, z0.h, z1.h, z2.h}, p1, [x20, x19, lsl #1]"); - COMPARE_PREFIX(st4h(z31.VnH(), - z0.VnH(), - z1.VnH(), - z2.VnH(), - p1, - SVEMemOperand(sp, x19, LSL, 1)), - "st4h {z31.h, z0.h, z1.h, z2.h}, p1, [sp, x19, lsl #1]"); - COMPARE_PREFIX(st4h(z31.VnH(), - z0.VnH(), - z1.VnH(), - z2.VnH(), - p7, - SVEMemOperand(sp, x19, LSL, 1)), - "st4h {z31.h, z0.h, z1.h, z2.h}, p7, [sp, x19, lsl #1]"); - - COMPARE_PREFIX(st4w(z31.VnS(), - z0.VnS(), - z1.VnS(), - z2.VnS(), - p7, - SVEMemOperand(x20, x19, LSL, 2)), - "st4w {z31.s, z0.s, z1.s, z2.s}, p7, [x20, x19, lsl #2]"); - COMPARE_PREFIX(st4w(z16.VnS(), - z17.VnS(), - z18.VnS(), - z19.VnS(), - p7, - SVEMemOperand(sp, x19, LSL, 2)), - "st4w {z16.s, z17.s, z18.s, z19.s}, p7, [sp, x19, lsl #2]"); - COMPARE_PREFIX(st4w(z16.VnS(), - z17.VnS(), - z18.VnS(), - z19.VnS(), - p7, - SVEMemOperand(sp, x19, LSL, 2)), - "st4w {z16.s, z17.s, z18.s, z19.s}, p7, [sp, x19, lsl #2]"); - - COMPARE_PREFIX(st4d(z16.VnD(), - z17.VnD(), - z18.VnD(), - z19.VnD(), - p0, - SVEMemOperand(x20, x19, LSL, 3)), - "st4d {z16.d, z17.d, z18.d, z19.d}, p0, [x20, x19, lsl #3]"); - COMPARE_PREFIX(st4d(z16.VnD(), - z17.VnD(), - z18.VnD(), - z19.VnD(), - p0, - SVEMemOperand(sp, x19, LSL, 3)), - "st4d {z16.d, z17.d, z18.d, z19.d}, p0, [sp, x19, lsl #3]"); - COMPARE_PREFIX(st4d(z25.VnD(), - z26.VnD(), - z27.VnD(), - z28.VnD(), - p0, - SVEMemOperand(sp, x19, LSL, 3)), - "st4d {z25.d, z26.d, z27.d, z28.d}, p0, [sp, x19, lsl #3]"); + COMPARE(st4b(z25.VnB(), + z26.VnB(), + z27.VnB(), + z28.VnB(), + p0, + SVEMemOperand(x20, x19)), + "st4b {z25.b, z26.b, z27.b, z28.b}, p0, [x20, x19]"); + COMPARE(st4b(z25.VnB(), + z26.VnB(), + z27.VnB(), + z28.VnB(), + p1, + SVEMemOperand(sp, x19)), + "st4b {z25.b, z26.b, z27.b, z28.b}, p1, [sp, x19]"); + COMPARE(st4b(z25.VnB(), + z26.VnB(), + z27.VnB(), + z28.VnB(), + p1, + SVEMemOperand(sp, x19)), + "st4b {z25.b, z26.b, z27.b, z28.b}, p1, [sp, x19]"); + + COMPARE(st4h(z31.VnH(), + z0.VnH(), + z1.VnH(), + z2.VnH(), + p1, + SVEMemOperand(x20, x19, LSL, 1)), + "st4h {z31.h, z0.h, z1.h, z2.h}, p1, [x20, x19, lsl #1]"); + COMPARE(st4h(z31.VnH(), + z0.VnH(), + z1.VnH(), + z2.VnH(), + p1, + SVEMemOperand(sp, x19, LSL, 1)), + "st4h {z31.h, z0.h, z1.h, z2.h}, p1, [sp, x19, lsl #1]"); + COMPARE(st4h(z31.VnH(), + z0.VnH(), + z1.VnH(), + z2.VnH(), + p7, + SVEMemOperand(sp, x19, LSL, 1)), + "st4h {z31.h, z0.h, z1.h, z2.h}, p7, [sp, x19, lsl #1]"); + + COMPARE(st4w(z31.VnS(), + z0.VnS(), + z1.VnS(), + z2.VnS(), + p7, + SVEMemOperand(x20, x19, LSL, 2)), + "st4w {z31.s, z0.s, z1.s, z2.s}, p7, [x20, x19, lsl #2]"); + COMPARE(st4w(z16.VnS(), + z17.VnS(), + z18.VnS(), + z19.VnS(), + p7, + SVEMemOperand(sp, x19, LSL, 2)), + "st4w {z16.s, z17.s, z18.s, z19.s}, p7, [sp, x19, lsl #2]"); + COMPARE(st4w(z16.VnS(), + z17.VnS(), + z18.VnS(), + z19.VnS(), + p7, + SVEMemOperand(sp, x19, LSL, 2)), + "st4w {z16.s, z17.s, z18.s, z19.s}, p7, [sp, x19, lsl #2]"); + + COMPARE(st4d(z16.VnD(), + z17.VnD(), + z18.VnD(), + z19.VnD(), + p0, + SVEMemOperand(x20, x19, LSL, 3)), + "st4d {z16.d, z17.d, z18.d, z19.d}, p0, [x20, x19, lsl #3]"); + COMPARE(st4d(z16.VnD(), + z17.VnD(), + z18.VnD(), + z19.VnD(), + p0, + SVEMemOperand(sp, x19, LSL, 3)), + "st4d {z16.d, z17.d, z18.d, z19.d}, p0, [sp, x19, lsl #3]"); + COMPARE(st4d(z25.VnD(), + z26.VnD(), + z27.VnD(), + z28.VnD(), + p0, + SVEMemOperand(sp, x19, LSL, 3)), + "st4d {z25.d, z26.d, z27.d, z28.d}, p0, [sp, x19, lsl #3]"); CLEANUP(); } @@ -5537,14 +5376,13 @@ TEST(sve_st4_scalar_plus_scalar) { TEST(sve_mul_index) { SETUP(); - COMPARE_PREFIX(sdot(z17.VnD(), z21.VnH(), z15.VnH(), 0), - "sdot z17.d, z21.h, z15.h[0]"); - COMPARE_PREFIX(sdot(z28.VnS(), z9.VnB(), z7.VnB(), 1), - "sdot z28.s, z9.b, z7.b[1]"); - COMPARE_PREFIX(udot(z26.VnD(), z15.VnH(), z1.VnH(), 1), - "udot z26.d, z15.h, z1.h[1]"); - COMPARE_PREFIX(udot(z23.VnS(), z24.VnB(), z5.VnB(), 3), - "udot z23.s, z24.b, z5.b[3]"); + COMPARE(sdot(z17.VnD(), z21.VnH(), z15.VnH(), 0), + "sdot z17.d, z21.h, z15.h[0]"); + COMPARE(sdot(z28.VnS(), z9.VnB(), z7.VnB(), 1), "sdot z28.s, z9.b, z7.b[1]"); + COMPARE(udot(z26.VnD(), z15.VnH(), z1.VnH(), 1), + "udot z26.d, z15.h, z1.h[1]"); + COMPARE(udot(z23.VnS(), z24.VnB(), z5.VnB(), 3), + "udot z23.s, z24.b, z5.b[3]"); CLEANUP(); } @@ -5589,22 +5427,16 @@ TEST(sve_mul_index_macro) { TEST(sve_partition_break) { SETUP(); - COMPARE_PREFIX(brkas(p8.VnB(), p5.Zeroing(), p4.VnB()), - "brkas p8.b, p5/z, p4.b"); - COMPARE_PREFIX(brka(p11.VnB(), p7.Zeroing(), p15.VnB()), - "brka p11.b, p7/z, p15.b"); - COMPARE_PREFIX(brka(p12.VnB(), p8.Merging(), p13.VnB()), - "brka p12.b, p8/m, p13.b"); - COMPARE_PREFIX(brkbs(p6.VnB(), p9.Zeroing(), p14.VnB()), - "brkbs p6.b, p9/z, p14.b"); - COMPARE_PREFIX(brkb(p11.VnB(), p6.Zeroing(), p4.VnB()), - "brkb p11.b, p6/z, p4.b"); - COMPARE_PREFIX(brkb(p12.VnB(), p7.Merging(), p5.VnB()), - "brkb p12.b, p7/m, p5.b"); - COMPARE_PREFIX(brkns(p2.VnB(), p11.Zeroing(), p0.VnB(), p2.VnB()), - "brkns p2.b, p11/z, p0.b, p2.b"); - COMPARE_PREFIX(brkn(p4.VnB(), p3.Zeroing(), p1.VnB(), p4.VnB()), - "brkn p4.b, p3/z, p1.b, p4.b"); + COMPARE(brkas(p8.VnB(), p5.Zeroing(), p4.VnB()), "brkas p8.b, p5/z, p4.b"); + COMPARE(brka(p11.VnB(), p7.Zeroing(), p15.VnB()), "brka p11.b, p7/z, p15.b"); + COMPARE(brka(p12.VnB(), p8.Merging(), p13.VnB()), "brka p12.b, p8/m, p13.b"); + COMPARE(brkbs(p6.VnB(), p9.Zeroing(), p14.VnB()), "brkbs p6.b, p9/z, p14.b"); + COMPARE(brkb(p11.VnB(), p6.Zeroing(), p4.VnB()), "brkb p11.b, p6/z, p4.b"); + COMPARE(brkb(p12.VnB(), p7.Merging(), p5.VnB()), "brkb p12.b, p7/m, p5.b"); + COMPARE(brkns(p2.VnB(), p11.Zeroing(), p0.VnB(), p2.VnB()), + "brkns p2.b, p11/z, p0.b, p2.b"); + COMPARE(brkn(p4.VnB(), p3.Zeroing(), p1.VnB(), p4.VnB()), + "brkn p4.b, p3/z, p1.b, p4.b"); COMPARE_MACRO(Brkns(p3.VnB(), p10.Zeroing(), p2.VnB(), p5.VnB()), "mov p3.b, p5.b\n" @@ -5619,48 +5451,36 @@ TEST(sve_partition_break) { TEST(sve_permute_predicate) { SETUP(); - COMPARE_PREFIX(rev(p15.VnB(), p6.VnB()), "rev p15.b, p6.b"); - COMPARE_PREFIX(rev(p15.VnH(), p6.VnH()), "rev p15.h, p6.h"); - COMPARE_PREFIX(rev(p15.VnS(), p6.VnS()), "rev p15.s, p6.s"); - COMPARE_PREFIX(rev(p15.VnD(), p6.VnD()), "rev p15.d, p6.d"); - COMPARE_PREFIX(trn1(p13.VnB(), p15.VnB(), p12.VnB()), - "trn1 p13.b, p15.b, p12.b"); - COMPARE_PREFIX(trn1(p13.VnH(), p15.VnH(), p12.VnH()), - "trn1 p13.h, p15.h, p12.h"); - COMPARE_PREFIX(trn1(p13.VnS(), p15.VnS(), p12.VnS()), - "trn1 p13.s, p15.s, p12.s"); - COMPARE_PREFIX(trn1(p13.VnD(), p15.VnD(), p12.VnD()), - "trn1 p13.d, p15.d, p12.d"); - COMPARE_PREFIX(trn2(p5.VnB(), p5.VnB(), p6.VnB()), "trn2 p5.b, p5.b, p6.b"); - COMPARE_PREFIX(trn2(p5.VnH(), p5.VnH(), p6.VnH()), "trn2 p5.h, p5.h, p6.h"); - COMPARE_PREFIX(trn2(p5.VnS(), p5.VnS(), p6.VnS()), "trn2 p5.s, p5.s, p6.s"); - COMPARE_PREFIX(trn2(p5.VnD(), p5.VnD(), p6.VnD()), "trn2 p5.d, p5.d, p6.d"); - COMPARE_PREFIX(uzp1(p14.VnB(), p4.VnB(), p14.VnB()), - "uzp1 p14.b, p4.b, p14.b"); - COMPARE_PREFIX(uzp1(p14.VnH(), p4.VnH(), p14.VnH()), - "uzp1 p14.h, p4.h, p14.h"); - COMPARE_PREFIX(uzp1(p14.VnS(), p4.VnS(), p14.VnS()), - "uzp1 p14.s, p4.s, p14.s"); - COMPARE_PREFIX(uzp1(p14.VnD(), p4.VnD(), p14.VnD()), - "uzp1 p14.d, p4.d, p14.d"); - COMPARE_PREFIX(uzp2(p6.VnB(), p11.VnB(), p2.VnB()), "uzp2 p6.b, p11.b, p2.b"); - COMPARE_PREFIX(uzp2(p6.VnH(), p11.VnH(), p2.VnH()), "uzp2 p6.h, p11.h, p2.h"); - COMPARE_PREFIX(uzp2(p6.VnS(), p11.VnS(), p2.VnS()), "uzp2 p6.s, p11.s, p2.s"); - COMPARE_PREFIX(uzp2(p6.VnD(), p11.VnD(), p2.VnD()), "uzp2 p6.d, p11.d, p2.d"); - COMPARE_PREFIX(zip1(p13.VnB(), p4.VnB(), p12.VnB()), - "zip1 p13.b, p4.b, p12.b"); - COMPARE_PREFIX(zip1(p13.VnH(), p4.VnH(), p12.VnH()), - "zip1 p13.h, p4.h, p12.h"); - COMPARE_PREFIX(zip1(p13.VnS(), p4.VnS(), p12.VnS()), - "zip1 p13.s, p4.s, p12.s"); - COMPARE_PREFIX(zip1(p13.VnD(), p4.VnD(), p12.VnD()), - "zip1 p13.d, p4.d, p12.d"); - COMPARE_PREFIX(zip2(p1.VnB(), p15.VnB(), p2.VnB()), "zip2 p1.b, p15.b, p2.b"); - COMPARE_PREFIX(zip2(p1.VnH(), p15.VnH(), p2.VnH()), "zip2 p1.h, p15.h, p2.h"); - COMPARE_PREFIX(zip2(p1.VnS(), p15.VnS(), p2.VnS()), "zip2 p1.s, p15.s, p2.s"); - COMPARE_PREFIX(zip2(p1.VnD(), p15.VnD(), p2.VnD()), "zip2 p1.d, p15.d, p2.d"); - COMPARE_PREFIX(punpkhi(p12.VnH(), p6.VnB()), "punpkhi p12.h, p6.b"); - COMPARE_PREFIX(punpklo(p4.VnH(), p14.VnB()), "punpklo p4.h, p14.b"); + COMPARE(rev(p15.VnB(), p6.VnB()), "rev p15.b, p6.b"); + COMPARE(rev(p15.VnH(), p6.VnH()), "rev p15.h, p6.h"); + COMPARE(rev(p15.VnS(), p6.VnS()), "rev p15.s, p6.s"); + COMPARE(rev(p15.VnD(), p6.VnD()), "rev p15.d, p6.d"); + COMPARE(trn1(p13.VnB(), p15.VnB(), p12.VnB()), "trn1 p13.b, p15.b, p12.b"); + COMPARE(trn1(p13.VnH(), p15.VnH(), p12.VnH()), "trn1 p13.h, p15.h, p12.h"); + COMPARE(trn1(p13.VnS(), p15.VnS(), p12.VnS()), "trn1 p13.s, p15.s, p12.s"); + COMPARE(trn1(p13.VnD(), p15.VnD(), p12.VnD()), "trn1 p13.d, p15.d, p12.d"); + COMPARE(trn2(p5.VnB(), p5.VnB(), p6.VnB()), "trn2 p5.b, p5.b, p6.b"); + COMPARE(trn2(p5.VnH(), p5.VnH(), p6.VnH()), "trn2 p5.h, p5.h, p6.h"); + COMPARE(trn2(p5.VnS(), p5.VnS(), p6.VnS()), "trn2 p5.s, p5.s, p6.s"); + COMPARE(trn2(p5.VnD(), p5.VnD(), p6.VnD()), "trn2 p5.d, p5.d, p6.d"); + COMPARE(uzp1(p14.VnB(), p4.VnB(), p14.VnB()), "uzp1 p14.b, p4.b, p14.b"); + COMPARE(uzp1(p14.VnH(), p4.VnH(), p14.VnH()), "uzp1 p14.h, p4.h, p14.h"); + COMPARE(uzp1(p14.VnS(), p4.VnS(), p14.VnS()), "uzp1 p14.s, p4.s, p14.s"); + COMPARE(uzp1(p14.VnD(), p4.VnD(), p14.VnD()), "uzp1 p14.d, p4.d, p14.d"); + COMPARE(uzp2(p6.VnB(), p11.VnB(), p2.VnB()), "uzp2 p6.b, p11.b, p2.b"); + COMPARE(uzp2(p6.VnH(), p11.VnH(), p2.VnH()), "uzp2 p6.h, p11.h, p2.h"); + COMPARE(uzp2(p6.VnS(), p11.VnS(), p2.VnS()), "uzp2 p6.s, p11.s, p2.s"); + COMPARE(uzp2(p6.VnD(), p11.VnD(), p2.VnD()), "uzp2 p6.d, p11.d, p2.d"); + COMPARE(zip1(p13.VnB(), p4.VnB(), p12.VnB()), "zip1 p13.b, p4.b, p12.b"); + COMPARE(zip1(p13.VnH(), p4.VnH(), p12.VnH()), "zip1 p13.h, p4.h, p12.h"); + COMPARE(zip1(p13.VnS(), p4.VnS(), p12.VnS()), "zip1 p13.s, p4.s, p12.s"); + COMPARE(zip1(p13.VnD(), p4.VnD(), p12.VnD()), "zip1 p13.d, p4.d, p12.d"); + COMPARE(zip2(p1.VnB(), p15.VnB(), p2.VnB()), "zip2 p1.b, p15.b, p2.b"); + COMPARE(zip2(p1.VnH(), p15.VnH(), p2.VnH()), "zip2 p1.h, p15.h, p2.h"); + COMPARE(zip2(p1.VnS(), p15.VnS(), p2.VnS()), "zip2 p1.s, p15.s, p2.s"); + COMPARE(zip2(p1.VnD(), p15.VnD(), p2.VnD()), "zip2 p1.d, p15.d, p2.d"); + COMPARE(punpkhi(p12.VnH(), p6.VnB()), "punpkhi p12.h, p6.b"); + COMPARE(punpklo(p4.VnH(), p14.VnB()), "punpklo p4.h, p14.b"); CLEANUP(); } @@ -5676,67 +5496,37 @@ TEST(sve_permute_vector_extract) { "ext z2.b, z2.b, z10.b, #254"); COMPARE_MACRO(Ext(z2.VnB(), z2.VnB(), z10.VnB(), 255), "ext z2.b, z2.b, z10.b, #255"); - COMPARE_MACRO(Ext(z2.VnB(), z4.VnB(), z10.VnB(), 127), - "movprfx z2, z4\n" - "ext z2.b, z2.b, z10.b, #127"); - COMPARE_MACRO(Ext(z2.VnB(), z12.VnB(), z2.VnB(), 2), - "movprfx z31, z12\n" - "ext z31.b, z31.b, z2.b, #2\n" - "mov z2.d, z31.d"); + CLEANUP(); } TEST(sve_permute_vector_interleaving) { SETUP(); - COMPARE_PREFIX(trn1(z25.VnB(), z31.VnB(), z17.VnB()), - "trn1 z25.b, z31.b, z17.b"); - COMPARE_PREFIX(trn1(z25.VnH(), z31.VnH(), z17.VnH()), - "trn1 z25.h, z31.h, z17.h"); - COMPARE_PREFIX(trn1(z25.VnS(), z31.VnS(), z17.VnS()), - "trn1 z25.s, z31.s, z17.s"); - COMPARE_PREFIX(trn1(z25.VnD(), z31.VnD(), z17.VnD()), - "trn1 z25.d, z31.d, z17.d"); - COMPARE_PREFIX(trn2(z23.VnB(), z19.VnB(), z5.VnB()), - "trn2 z23.b, z19.b, z5.b"); - COMPARE_PREFIX(trn2(z23.VnH(), z19.VnH(), z5.VnH()), - "trn2 z23.h, z19.h, z5.h"); - COMPARE_PREFIX(trn2(z23.VnS(), z19.VnS(), z5.VnS()), - "trn2 z23.s, z19.s, z5.s"); - COMPARE_PREFIX(trn2(z23.VnD(), z19.VnD(), z5.VnD()), - "trn2 z23.d, z19.d, z5.d"); - COMPARE_PREFIX(uzp1(z3.VnB(), z27.VnB(), z10.VnB()), - "uzp1 z3.b, z27.b, z10.b"); - COMPARE_PREFIX(uzp1(z3.VnH(), z27.VnH(), z10.VnH()), - "uzp1 z3.h, z27.h, z10.h"); - COMPARE_PREFIX(uzp1(z3.VnS(), z27.VnS(), z10.VnS()), - "uzp1 z3.s, z27.s, z10.s"); - COMPARE_PREFIX(uzp1(z3.VnD(), z27.VnD(), z10.VnD()), - "uzp1 z3.d, z27.d, z10.d"); - COMPARE_PREFIX(uzp2(z22.VnB(), z26.VnB(), z15.VnB()), - "uzp2 z22.b, z26.b, z15.b"); - COMPARE_PREFIX(uzp2(z22.VnH(), z26.VnH(), z15.VnH()), - "uzp2 z22.h, z26.h, z15.h"); - COMPARE_PREFIX(uzp2(z22.VnS(), z26.VnS(), z15.VnS()), - "uzp2 z22.s, z26.s, z15.s"); - COMPARE_PREFIX(uzp2(z22.VnD(), z26.VnD(), z15.VnD()), - "uzp2 z22.d, z26.d, z15.d"); - COMPARE_PREFIX(zip1(z31.VnB(), z2.VnB(), z20.VnB()), - "zip1 z31.b, z2.b, z20.b"); - COMPARE_PREFIX(zip1(z31.VnH(), z2.VnH(), z20.VnH()), - "zip1 z31.h, z2.h, z20.h"); - COMPARE_PREFIX(zip1(z31.VnS(), z2.VnS(), z20.VnS()), - "zip1 z31.s, z2.s, z20.s"); - COMPARE_PREFIX(zip1(z31.VnD(), z2.VnD(), z20.VnD()), - "zip1 z31.d, z2.d, z20.d"); - COMPARE_PREFIX(zip2(z15.VnB(), z23.VnB(), z12.VnB()), - "zip2 z15.b, z23.b, z12.b"); - COMPARE_PREFIX(zip2(z15.VnH(), z23.VnH(), z12.VnH()), - "zip2 z15.h, z23.h, z12.h"); - COMPARE_PREFIX(zip2(z15.VnS(), z23.VnS(), z12.VnS()), - "zip2 z15.s, z23.s, z12.s"); - COMPARE_PREFIX(zip2(z15.VnD(), z23.VnD(), z12.VnD()), - "zip2 z15.d, z23.d, z12.d"); + COMPARE(trn1(z25.VnB(), z31.VnB(), z17.VnB()), "trn1 z25.b, z31.b, z17.b"); + COMPARE(trn1(z25.VnH(), z31.VnH(), z17.VnH()), "trn1 z25.h, z31.h, z17.h"); + COMPARE(trn1(z25.VnS(), z31.VnS(), z17.VnS()), "trn1 z25.s, z31.s, z17.s"); + COMPARE(trn1(z25.VnD(), z31.VnD(), z17.VnD()), "trn1 z25.d, z31.d, z17.d"); + COMPARE(trn2(z23.VnB(), z19.VnB(), z5.VnB()), "trn2 z23.b, z19.b, z5.b"); + COMPARE(trn2(z23.VnH(), z19.VnH(), z5.VnH()), "trn2 z23.h, z19.h, z5.h"); + COMPARE(trn2(z23.VnS(), z19.VnS(), z5.VnS()), "trn2 z23.s, z19.s, z5.s"); + COMPARE(trn2(z23.VnD(), z19.VnD(), z5.VnD()), "trn2 z23.d, z19.d, z5.d"); + COMPARE(uzp1(z3.VnB(), z27.VnB(), z10.VnB()), "uzp1 z3.b, z27.b, z10.b"); + COMPARE(uzp1(z3.VnH(), z27.VnH(), z10.VnH()), "uzp1 z3.h, z27.h, z10.h"); + COMPARE(uzp1(z3.VnS(), z27.VnS(), z10.VnS()), "uzp1 z3.s, z27.s, z10.s"); + COMPARE(uzp1(z3.VnD(), z27.VnD(), z10.VnD()), "uzp1 z3.d, z27.d, z10.d"); + COMPARE(uzp2(z22.VnB(), z26.VnB(), z15.VnB()), "uzp2 z22.b, z26.b, z15.b"); + COMPARE(uzp2(z22.VnH(), z26.VnH(), z15.VnH()), "uzp2 z22.h, z26.h, z15.h"); + COMPARE(uzp2(z22.VnS(), z26.VnS(), z15.VnS()), "uzp2 z22.s, z26.s, z15.s"); + COMPARE(uzp2(z22.VnD(), z26.VnD(), z15.VnD()), "uzp2 z22.d, z26.d, z15.d"); + COMPARE(zip1(z31.VnB(), z2.VnB(), z20.VnB()), "zip1 z31.b, z2.b, z20.b"); + COMPARE(zip1(z31.VnH(), z2.VnH(), z20.VnH()), "zip1 z31.h, z2.h, z20.h"); + COMPARE(zip1(z31.VnS(), z2.VnS(), z20.VnS()), "zip1 z31.s, z2.s, z20.s"); + COMPARE(zip1(z31.VnD(), z2.VnD(), z20.VnD()), "zip1 z31.d, z2.d, z20.d"); + COMPARE(zip2(z15.VnB(), z23.VnB(), z12.VnB()), "zip2 z15.b, z23.b, z12.b"); + COMPARE(zip2(z15.VnH(), z23.VnH(), z12.VnH()), "zip2 z15.h, z23.h, z12.h"); + COMPARE(zip2(z15.VnS(), z23.VnS(), z12.VnS()), "zip2 z15.s, z23.s, z12.s"); + COMPARE(zip2(z15.VnD(), z23.VnD(), z12.VnD()), "zip2 z15.d, z23.d, z12.d"); CLEANUP(); } @@ -5744,22 +5534,22 @@ TEST(sve_permute_vector_interleaving) { TEST(sve_cpy_reg) { SETUP(); - COMPARE_PREFIX(cpy(z1.VnB(), p2.Merging(), wsp), "mov z1.b, p2/m, wsp"); - COMPARE_PREFIX(cpy(z2.VnH(), p6.Merging(), w3), "mov z2.h, p6/m, w3"); - COMPARE_PREFIX(cpy(z3.VnS(), p7.Merging(), x5), "mov z3.s, p7/m, w5"); - COMPARE_PREFIX(cpy(z4.VnD(), p7.Merging(), x30), "mov z4.d, p7/m, x30"); - COMPARE_PREFIX(cpy(z5.VnD(), p7.Merging(), sp), "mov z5.d, p7/m, sp"); + COMPARE(cpy(z1.VnB(), p2.Merging(), wsp), "mov z1.b, p2/m, wsp"); + COMPARE(cpy(z2.VnH(), p6.Merging(), w3), "mov z2.h, p6/m, w3"); + COMPARE(cpy(z3.VnS(), p7.Merging(), x5), "mov z3.s, p7/m, w5"); + COMPARE(cpy(z4.VnD(), p7.Merging(), x30), "mov z4.d, p7/m, x30"); + COMPARE(cpy(z5.VnD(), p7.Merging(), sp), "mov z5.d, p7/m, sp"); - COMPARE_PREFIX(cpy(z27.VnB(), p3.Merging(), b23), "mov z27.b, p3/m, b23"); - COMPARE_PREFIX(cpy(z27.VnH(), p3.Merging(), h23), "mov z27.h, p3/m, h23"); - COMPARE_PREFIX(cpy(z27.VnS(), p3.Merging(), s23), "mov z27.s, p3/m, s23"); - COMPARE_PREFIX(cpy(z27.VnD(), p3.Merging(), d23), "mov z27.d, p3/m, d23"); + COMPARE(cpy(z27.VnB(), p3.Merging(), b23), "mov z27.b, p3/m, b23"); + COMPARE(cpy(z27.VnH(), p3.Merging(), h23), "mov z27.h, p3/m, h23"); + COMPARE(cpy(z27.VnS(), p3.Merging(), s23), "mov z27.s, p3/m, s23"); + COMPARE(cpy(z27.VnD(), p3.Merging(), d23), "mov z27.d, p3/m, d23"); - COMPARE_PREFIX(mov(z1.VnB(), p2.Merging(), wsp), "mov z1.b, p2/m, wsp"); - COMPARE_PREFIX(mov(z4.VnD(), p7.Merging(), x30), "mov z4.d, p7/m, x30"); - COMPARE_PREFIX(mov(z5.VnD(), p7.Merging(), sp), "mov z5.d, p7/m, sp"); - COMPARE_PREFIX(mov(z27.VnB(), p3.Merging(), b23), "mov z27.b, p3/m, b23"); - COMPARE_PREFIX(mov(z27.VnD(), p3.Merging(), d23), "mov z27.d, p3/m, d23"); + COMPARE(mov(z1.VnB(), p2.Merging(), wsp), "mov z1.b, p2/m, wsp"); + COMPARE(mov(z4.VnD(), p7.Merging(), x30), "mov z4.d, p7/m, x30"); + COMPARE(mov(z5.VnD(), p7.Merging(), sp), "mov z5.d, p7/m, sp"); + COMPARE(mov(z27.VnB(), p3.Merging(), b23), "mov z27.b, p3/m, b23"); + COMPARE(mov(z27.VnD(), p3.Merging(), d23), "mov z27.d, p3/m, d23"); COMPARE_MACRO(Mov(z1.VnB(), p2.Merging(), wsp), "mov z1.b, p2/m, wsp"); COMPARE_MACRO(Mov(z4.VnD(), p7.Merging(), x30), "mov z4.d, p7/m, x30"); @@ -5773,41 +5563,41 @@ TEST(sve_cpy_reg) { TEST(sve_permute_vector_predicated) { SETUP(); - COMPARE_PREFIX(compact(z13.VnS(), p7, z1.VnS()), "compact z13.s, p7, z1.s"); - COMPARE_PREFIX(compact(z13.VnD(), p7, z1.VnD()), "compact z13.d, p7, z1.d"); - COMPARE_PREFIX(splice(z7.VnB(), p6, z7.VnB(), z2.VnB()), - "splice z7.b, p6, z7.b, z2.b"); - COMPARE_PREFIX(splice(z7.VnH(), p6, z7.VnH(), z2.VnH()), - "splice z7.h, p6, z7.h, z2.h"); - COMPARE_PREFIX(splice(z7.VnS(), p6, z7.VnS(), z2.VnS()), - "splice z7.s, p6, z7.s, z2.s"); - COMPARE_PREFIX(splice(z7.VnD(), p6, z7.VnD(), z2.VnD()), - "splice z7.d, p6, z7.d, z2.d"); - - COMPARE_MACRO(Splice(z0.VnB(), p1, z2.VnB(), z3.VnB()), + COMPARE(compact(z13.VnS(), p7, z1.VnS()), "compact z13.s, p7, z1.s"); + COMPARE(compact(z13.VnD(), p7, z1.VnD()), "compact z13.d, p7, z1.d"); + COMPARE(splice(z7.VnB(), p6, z7.VnB(), z2.VnB()), + "splice z7.b, p6, z7.b, z2.b"); + COMPARE(splice(z7.VnH(), p6, z7.VnH(), z2.VnH()), + "splice z7.h, p6, z7.h, z2.h"); + COMPARE(splice(z7.VnS(), p6, z7.VnS(), z2.VnS()), + "splice z7.s, p6, z7.s, z2.s"); + COMPARE(splice(z7.VnD(), p6, z7.VnD(), z2.VnD()), + "splice z7.d, p6, z7.d, z2.d"); + + COMPARE_MACRO(Splice(z0.VnB(), p1, z2.VnB(), z4.VnB()), "movprfx z0, z2\n" - "splice z0.b, p1, z0.b, z3.b"); + "splice z0.b, p1, z0.b, z4.b"); COMPARE_MACRO(Splice(z0.VnH(), p1, z2.VnH(), z0.VnH()), "movprfx z31, z2\n" "splice z31.h, p1, z31.h, z0.h\n" "mov z0.d, z31.d"); - COMPARE_PREFIX(clasta(z4.VnB(), p2, z4.VnB(), z12.VnB()), - "clasta z4.b, p2, z4.b, z12.b"); - COMPARE_PREFIX(clasta(z4.VnH(), p2, z4.VnH(), z12.VnH()), - "clasta z4.h, p2, z4.h, z12.h"); - COMPARE_PREFIX(clasta(z4.VnS(), p2, z4.VnS(), z12.VnS()), - "clasta z4.s, p2, z4.s, z12.s"); - COMPARE_PREFIX(clasta(z4.VnD(), p2, z4.VnD(), z12.VnD()), - "clasta z4.d, p2, z4.d, z12.d"); - COMPARE_PREFIX(clastb(z29.VnB(), p7, z29.VnB(), z26.VnB()), - "clastb z29.b, p7, z29.b, z26.b"); - COMPARE_PREFIX(clastb(z29.VnH(), p7, z29.VnH(), z26.VnH()), - "clastb z29.h, p7, z29.h, z26.h"); - COMPARE_PREFIX(clastb(z29.VnS(), p7, z29.VnS(), z26.VnS()), - "clastb z29.s, p7, z29.s, z26.s"); - COMPARE_PREFIX(clastb(z29.VnD(), p7, z29.VnD(), z26.VnD()), - "clastb z29.d, p7, z29.d, z26.d"); + COMPARE(clasta(z4.VnB(), p2, z4.VnB(), z12.VnB()), + "clasta z4.b, p2, z4.b, z12.b"); + COMPARE(clasta(z4.VnH(), p2, z4.VnH(), z12.VnH()), + "clasta z4.h, p2, z4.h, z12.h"); + COMPARE(clasta(z4.VnS(), p2, z4.VnS(), z12.VnS()), + "clasta z4.s, p2, z4.s, z12.s"); + COMPARE(clasta(z4.VnD(), p2, z4.VnD(), z12.VnD()), + "clasta z4.d, p2, z4.d, z12.d"); + COMPARE(clastb(z29.VnB(), p7, z29.VnB(), z26.VnB()), + "clastb z29.b, p7, z29.b, z26.b"); + COMPARE(clastb(z29.VnH(), p7, z29.VnH(), z26.VnH()), + "clastb z29.h, p7, z29.h, z26.h"); + COMPARE(clastb(z29.VnS(), p7, z29.VnS(), z26.VnS()), + "clastb z29.s, p7, z29.s, z26.s"); + COMPARE(clastb(z29.VnD(), p7, z29.VnD(), z26.VnD()), + "clastb z29.d, p7, z29.d, z26.d"); COMPARE_MACRO(Clasta(z5.VnD(), p2, z4.VnD(), z12.VnD()), "movprfx z5, z4\n" @@ -5822,41 +5612,41 @@ TEST(sve_permute_vector_predicated) { COMPARE_MACRO(Clastb(z1.VnS(), p1, z1.VnS(), z1.VnS()), "clastb z1.s, p1, z1.s, z1.s"); - COMPARE_PREFIX(clasta(w6, p0, w6, z13.VnB()), "clasta w6, p0, w6, z13.b"); - COMPARE_PREFIX(clasta(w6, p0, w6, z13.VnH()), "clasta w6, p0, w6, z13.h"); - COMPARE_PREFIX(clasta(w6, p0, w6, z13.VnS()), "clasta w6, p0, w6, z13.s"); - COMPARE_PREFIX(clasta(x6, p0, x6, z13.VnD()), "clasta x6, p0, x6, z13.d"); - COMPARE_PREFIX(clastb(w21, p2, w21, z27.VnB()), "clastb w21, p2, w21, z27.b"); - COMPARE_PREFIX(clastb(w21, p2, w21, z27.VnH()), "clastb w21, p2, w21, z27.h"); - COMPARE_PREFIX(clastb(w21, p2, w21, z27.VnS()), "clastb w21, p2, w21, z27.s"); - COMPARE_PREFIX(clastb(x21, p2, x21, z27.VnD()), "clastb x21, p2, x21, z27.d"); - - COMPARE_PREFIX(clasta(b8, p6, b8, z7.VnB()), "clasta b8, p6, b8, z7.b"); - COMPARE_PREFIX(clasta(h8, p6, h8, z7.VnH()), "clasta h8, p6, h8, z7.h"); - COMPARE_PREFIX(clasta(s8, p6, s8, z7.VnS()), "clasta s8, p6, s8, z7.s"); - COMPARE_PREFIX(clasta(d8, p6, d8, z7.VnD()), "clasta d8, p6, d8, z7.d"); - COMPARE_PREFIX(clastb(b17, p0, b17, z19.VnB()), "clastb b17, p0, b17, z19.b"); - COMPARE_PREFIX(clastb(h17, p0, h17, z19.VnH()), "clastb h17, p0, h17, z19.h"); - COMPARE_PREFIX(clastb(s17, p0, s17, z19.VnS()), "clastb s17, p0, s17, z19.s"); - COMPARE_PREFIX(clastb(d17, p0, d17, z19.VnD()), "clastb d17, p0, d17, z19.d"); - - COMPARE_PREFIX(lasta(w15, p3, z3.VnB()), "lasta w15, p3, z3.b"); - COMPARE_PREFIX(lasta(w15, p3, z3.VnH()), "lasta w15, p3, z3.h"); - COMPARE_PREFIX(lasta(w15, p3, z3.VnS()), "lasta w15, p3, z3.s"); - COMPARE_PREFIX(lasta(x15, p3, z3.VnD()), "lasta x15, p3, z3.d"); - COMPARE_PREFIX(lasta(b30, p4, z24.VnB()), "lasta b30, p4, z24.b"); - COMPARE_PREFIX(lasta(h30, p4, z24.VnH()), "lasta h30, p4, z24.h"); - COMPARE_PREFIX(lasta(s30, p4, z24.VnS()), "lasta s30, p4, z24.s"); - COMPARE_PREFIX(lasta(d30, p4, z24.VnD()), "lasta d30, p4, z24.d"); - - COMPARE_PREFIX(lastb(w9, p2, z16.VnB()), "lastb w9, p2, z16.b"); - COMPARE_PREFIX(lastb(w9, p2, z16.VnH()), "lastb w9, p2, z16.h"); - COMPARE_PREFIX(lastb(w9, p2, z16.VnS()), "lastb w9, p2, z16.s"); - COMPARE_PREFIX(lastb(x9, p2, z16.VnD()), "lastb x9, p2, z16.d"); - COMPARE_PREFIX(lastb(b14, p5, z2.VnB()), "lastb b14, p5, z2.b"); - COMPARE_PREFIX(lastb(h14, p5, z2.VnH()), "lastb h14, p5, z2.h"); - COMPARE_PREFIX(lastb(s14, p5, z2.VnS()), "lastb s14, p5, z2.s"); - COMPARE_PREFIX(lastb(d14, p5, z2.VnD()), "lastb d14, p5, z2.d"); + COMPARE(clasta(w6, p0, w6, z13.VnB()), "clasta w6, p0, w6, z13.b"); + COMPARE(clasta(w6, p0, w6, z13.VnH()), "clasta w6, p0, w6, z13.h"); + COMPARE(clasta(w6, p0, w6, z13.VnS()), "clasta w6, p0, w6, z13.s"); + COMPARE(clasta(x6, p0, x6, z13.VnD()), "clasta x6, p0, x6, z13.d"); + COMPARE(clastb(w21, p2, w21, z27.VnB()), "clastb w21, p2, w21, z27.b"); + COMPARE(clastb(w21, p2, w21, z27.VnH()), "clastb w21, p2, w21, z27.h"); + COMPARE(clastb(w21, p2, w21, z27.VnS()), "clastb w21, p2, w21, z27.s"); + COMPARE(clastb(x21, p2, x21, z27.VnD()), "clastb x21, p2, x21, z27.d"); + + COMPARE(clasta(b8, p6, b8, z7.VnB()), "clasta b8, p6, b8, z7.b"); + COMPARE(clasta(h8, p6, h8, z7.VnH()), "clasta h8, p6, h8, z7.h"); + COMPARE(clasta(s8, p6, s8, z7.VnS()), "clasta s8, p6, s8, z7.s"); + COMPARE(clasta(d8, p6, d8, z7.VnD()), "clasta d8, p6, d8, z7.d"); + COMPARE(clastb(b17, p0, b17, z19.VnB()), "clastb b17, p0, b17, z19.b"); + COMPARE(clastb(h17, p0, h17, z19.VnH()), "clastb h17, p0, h17, z19.h"); + COMPARE(clastb(s17, p0, s17, z19.VnS()), "clastb s17, p0, s17, z19.s"); + COMPARE(clastb(d17, p0, d17, z19.VnD()), "clastb d17, p0, d17, z19.d"); + + COMPARE(lasta(w15, p3, z3.VnB()), "lasta w15, p3, z3.b"); + COMPARE(lasta(w15, p3, z3.VnH()), "lasta w15, p3, z3.h"); + COMPARE(lasta(w15, p3, z3.VnS()), "lasta w15, p3, z3.s"); + COMPARE(lasta(x15, p3, z3.VnD()), "lasta x15, p3, z3.d"); + COMPARE(lasta(b30, p4, z24.VnB()), "lasta b30, p4, z24.b"); + COMPARE(lasta(h30, p4, z24.VnH()), "lasta h30, p4, z24.h"); + COMPARE(lasta(s30, p4, z24.VnS()), "lasta s30, p4, z24.s"); + COMPARE(lasta(d30, p4, z24.VnD()), "lasta d30, p4, z24.d"); + + COMPARE(lastb(w9, p2, z16.VnB()), "lastb w9, p2, z16.b"); + COMPARE(lastb(w9, p2, z16.VnH()), "lastb w9, p2, z16.h"); + COMPARE(lastb(w9, p2, z16.VnS()), "lastb w9, p2, z16.s"); + COMPARE(lastb(x9, p2, z16.VnD()), "lastb x9, p2, z16.d"); + COMPARE(lastb(b14, p5, z2.VnB()), "lastb b14, p5, z2.b"); + COMPARE(lastb(h14, p5, z2.VnH()), "lastb h14, p5, z2.h"); + COMPARE(lastb(s14, p5, z2.VnS()), "lastb s14, p5, z2.s"); + COMPARE(lastb(d14, p5, z2.VnD()), "lastb d14, p5, z2.d"); CLEANUP(); } @@ -5891,19 +5681,19 @@ TEST(sve_reverse) { TEST(sve_permute_vector_unpredicated) { SETUP(); - COMPARE_PREFIX(dup(z4.VnB(), w7), "mov z4.b, w7"); - COMPARE_PREFIX(dup(z5.VnH(), w6), "mov z5.h, w6"); - COMPARE_PREFIX(dup(z6.VnS(), sp), "mov z6.s, wsp"); - COMPARE_PREFIX(dup(z7.VnD(), x4), "mov z7.d, x4"); - COMPARE_PREFIX(dup(z25.VnQ(), z28.VnQ(), 2), "mov z25.q, z28.q[2]"); - COMPARE_PREFIX(dup(z26.VnH(), z12.VnH(), 0), "mov z26.h, h12"); + COMPARE(dup(z4.VnB(), w7), "mov z4.b, w7"); + COMPARE(dup(z5.VnH(), w6), "mov z5.h, w6"); + COMPARE(dup(z6.VnS(), sp), "mov z6.s, wsp"); + COMPARE(dup(z7.VnD(), x4), "mov z7.d, x4"); + COMPARE(dup(z25.VnQ(), z28.VnQ(), 2), "mov z25.q, z28.q[2]"); + COMPARE(dup(z26.VnH(), z12.VnH(), 0), "mov z26.h, h12"); - COMPARE_PREFIX(mov(z4.VnB(), w7), "mov z4.b, w7"); - COMPARE_PREFIX(mov(z5.VnH(), w6), "mov z5.h, w6"); - COMPARE_PREFIX(mov(z6.VnS(), sp), "mov z6.s, wsp"); - COMPARE_PREFIX(mov(z7.VnD(), x4), "mov z7.d, x4"); - COMPARE_PREFIX(mov(z25.VnQ(), z28.VnQ(), 2), "mov z25.q, z28.q[2]"); - COMPARE_PREFIX(mov(z0.VnS(), s1), "mov z0.s, s1"); + COMPARE(mov(z4.VnB(), w7), "mov z4.b, w7"); + COMPARE(mov(z5.VnH(), w6), "mov z5.h, w6"); + COMPARE(mov(z6.VnS(), sp), "mov z6.s, wsp"); + COMPARE(mov(z7.VnD(), x4), "mov z7.d, x4"); + COMPARE(mov(z25.VnQ(), z28.VnQ(), 2), "mov z25.q, z28.q[2]"); + COMPARE(mov(z0.VnS(), s1), "mov z0.s, s1"); COMPARE_MACRO(Mov(z7.VnD(), x4), "mov z7.d, x4"); COMPARE_MACRO(Mov(z25.VnQ(), z28.VnQ(), 2), "mov z25.q, z28.q[2]"); @@ -5921,30 +5711,26 @@ TEST(sve_permute_vector_unpredicated) { COMPARE(insr(z6.VnH(), h15), "insr z6.h, h15"); COMPARE(insr(z7.VnS(), s22), "insr z7.s, s22"); COMPARE(insr(z8.VnD(), d30), "insr z8.d, d30"); - COMPARE_PREFIX(rev(z13.VnB(), z10.VnB()), "rev z13.b, z10.b"); - COMPARE_PREFIX(rev(z14.VnH(), z10.VnH()), "rev z14.h, z10.h"); - COMPARE_PREFIX(rev(z15.VnS(), z10.VnS()), "rev z15.s, z10.s"); - COMPARE_PREFIX(rev(z16.VnD(), z10.VnD()), "rev z16.d, z10.d"); - COMPARE_PREFIX(sunpkhi(z10.VnH(), z11.VnB()), "sunpkhi z10.h, z11.b"); - COMPARE_PREFIX(sunpkhi(z11.VnS(), z11.VnH()), "sunpkhi z11.s, z11.h"); - COMPARE_PREFIX(sunpkhi(z12.VnD(), z11.VnS()), "sunpkhi z12.d, z11.s"); - COMPARE_PREFIX(sunpklo(z20.VnH(), z12.VnB()), "sunpklo z20.h, z12.b"); - COMPARE_PREFIX(sunpklo(z21.VnS(), z12.VnH()), "sunpklo z21.s, z12.h"); - COMPARE_PREFIX(sunpklo(z22.VnD(), z12.VnS()), "sunpklo z22.d, z12.s"); - COMPARE_PREFIX(uunpkhi(z17.VnH(), z14.VnB()), "uunpkhi z17.h, z14.b"); - COMPARE_PREFIX(uunpkhi(z18.VnS(), z14.VnH()), "uunpkhi z18.s, z14.h"); - COMPARE_PREFIX(uunpkhi(z19.VnD(), z14.VnS()), "uunpkhi z19.d, z14.s"); - COMPARE_PREFIX(uunpklo(z27.VnH(), z6.VnB()), "uunpklo z27.h, z6.b"); - COMPARE_PREFIX(uunpklo(z28.VnS(), z6.VnH()), "uunpklo z28.s, z6.h"); - COMPARE_PREFIX(uunpklo(z29.VnD(), z6.VnS()), "uunpklo z29.d, z6.s"); - COMPARE_PREFIX(tbl(z24.VnB(), z29.VnB(), z0.VnB()), - "tbl z24.b, {z29.b}, z0.b"); - COMPARE_PREFIX(tbl(z25.VnH(), z29.VnH(), z1.VnH()), - "tbl z25.h, {z29.h}, z1.h"); - COMPARE_PREFIX(tbl(z26.VnS(), z29.VnS(), z2.VnS()), - "tbl z26.s, {z29.s}, z2.s"); - COMPARE_PREFIX(tbl(z27.VnD(), z29.VnD(), z3.VnD()), - "tbl z27.d, {z29.d}, z3.d"); + COMPARE(rev(z13.VnB(), z10.VnB()), "rev z13.b, z10.b"); + COMPARE(rev(z14.VnH(), z10.VnH()), "rev z14.h, z10.h"); + COMPARE(rev(z15.VnS(), z10.VnS()), "rev z15.s, z10.s"); + COMPARE(rev(z16.VnD(), z10.VnD()), "rev z16.d, z10.d"); + COMPARE(sunpkhi(z10.VnH(), z11.VnB()), "sunpkhi z10.h, z11.b"); + COMPARE(sunpkhi(z11.VnS(), z11.VnH()), "sunpkhi z11.s, z11.h"); + COMPARE(sunpkhi(z12.VnD(), z11.VnS()), "sunpkhi z12.d, z11.s"); + COMPARE(sunpklo(z20.VnH(), z12.VnB()), "sunpklo z20.h, z12.b"); + COMPARE(sunpklo(z21.VnS(), z12.VnH()), "sunpklo z21.s, z12.h"); + COMPARE(sunpklo(z22.VnD(), z12.VnS()), "sunpklo z22.d, z12.s"); + COMPARE(uunpkhi(z17.VnH(), z14.VnB()), "uunpkhi z17.h, z14.b"); + COMPARE(uunpkhi(z18.VnS(), z14.VnH()), "uunpkhi z18.s, z14.h"); + COMPARE(uunpkhi(z19.VnD(), z14.VnS()), "uunpkhi z19.d, z14.s"); + COMPARE(uunpklo(z27.VnH(), z6.VnB()), "uunpklo z27.h, z6.b"); + COMPARE(uunpklo(z28.VnS(), z6.VnH()), "uunpklo z28.s, z6.h"); + COMPARE(uunpklo(z29.VnD(), z6.VnS()), "uunpklo z29.d, z6.s"); + COMPARE(tbl(z24.VnB(), z29.VnB(), z0.VnB()), "tbl z24.b, {z29.b}, z0.b"); + COMPARE(tbl(z25.VnH(), z29.VnH(), z1.VnH()), "tbl z25.h, {z29.h}, z1.h"); + COMPARE(tbl(z26.VnS(), z29.VnS(), z2.VnS()), "tbl z26.s, {z29.s}, z2.s"); + COMPARE(tbl(z27.VnD(), z29.VnD(), z3.VnD()), "tbl z27.d, {z29.d}, z3.d"); CLEANUP(); } @@ -5952,10 +5738,10 @@ TEST(sve_permute_vector_unpredicated) { TEST(sve_predicate_count) { SETUP(); - COMPARE_PREFIX(cntp(x9, p1, p0.VnB()), "cntp x9, p1, p0.b"); - COMPARE_PREFIX(cntp(x10, p12, p1.VnH()), "cntp x10, p12, p1.h"); - COMPARE_PREFIX(cntp(x11, p13, p14.VnS()), "cntp x11, p13, p14.s"); - COMPARE_PREFIX(cntp(x12, p4, p15.VnD()), "cntp x12, p4, p15.d"); + COMPARE(cntp(x9, p1, p0.VnB()), "cntp x9, p1, p0.b"); + COMPARE(cntp(x10, p12, p1.VnH()), "cntp x10, p12, p1.h"); + COMPARE(cntp(x11, p13, p14.VnS()), "cntp x11, p13, p14.s"); + COMPARE(cntp(x12, p4, p15.VnD()), "cntp x12, p4, p15.d"); COMPARE_MACRO(Cntp(x0, p1, p2.VnB()), "cntp x0, p1, p2.b"); COMPARE_MACRO(Cntp(w10, p11, p12.VnH()), "cntp x10, p11, p12.h"); @@ -5966,61 +5752,56 @@ TEST(sve_predicate_count) { TEST(sve_predicate_logical_op) { SETUP(); - COMPARE_PREFIX(ands(p13.VnB(), p9.Zeroing(), p5.VnB(), p15.VnB()), - "ands p13.b, p9/z, p5.b, p15.b"); - COMPARE_PREFIX(and_(p9.VnB(), p3.Zeroing(), p0.VnB(), p14.VnB()), - "and p9.b, p3/z, p0.b, p14.b"); - COMPARE_PREFIX(bics(p8.VnB(), p5.Zeroing(), p3.VnB(), p1.VnB()), - "bics p8.b, p5/z, p3.b, p1.b"); - COMPARE_PREFIX(bic(p5.VnB(), p5.Zeroing(), p9.VnB(), p9.VnB()), - "bic p5.b, p5/z, p9.b, p9.b"); - COMPARE_PREFIX(eors(p11.VnB(), p1.Zeroing(), p1.VnB(), p2.VnB()), - "eors p11.b, p1/z, p1.b, p2.b"); - COMPARE_PREFIX(eor(p8.VnB(), p6.Zeroing(), p1.VnB(), p11.VnB()), - "eor p8.b, p6/z, p1.b, p11.b"); - COMPARE_PREFIX(nands(p13.VnB(), p0.Zeroing(), p9.VnB(), p4.VnB()), - "nands p13.b, p0/z, p9.b, p4.b"); - COMPARE_PREFIX(nand(p7.VnB(), p7.Zeroing(), p15.VnB(), p2.VnB()), - "nand p7.b, p7/z, p15.b, p2.b"); - COMPARE_PREFIX(nors(p8.VnB(), p8.Zeroing(), p12.VnB(), p11.VnB()), - "nors p8.b, p8/z, p12.b, p11.b"); - COMPARE_PREFIX(nor(p3.VnB(), p6.Zeroing(), p15.VnB(), p12.VnB()), - "nor p3.b, p6/z, p15.b, p12.b"); - COMPARE_PREFIX(orns(p10.VnB(), p11.Zeroing(), p0.VnB(), p15.VnB()), - "orns p10.b, p11/z, p0.b, p15.b"); - COMPARE_PREFIX(orn(p0.VnB(), p1.Zeroing(), p7.VnB(), p4.VnB()), - "orn p0.b, p1/z, p7.b, p4.b"); - COMPARE_PREFIX(orrs(p14.VnB(), p6.Zeroing(), p1.VnB(), p5.VnB()), - "orrs p14.b, p6/z, p1.b, p5.b"); - COMPARE_PREFIX(orr(p13.VnB(), p7.Zeroing(), p10.VnB(), p4.VnB()), - "orr p13.b, p7/z, p10.b, p4.b"); - COMPARE_PREFIX(sel(p9.VnB(), p15, p15.VnB(), p7.VnB()), - "sel p9.b, p15, p15.b, p7.b"); + COMPARE(ands(p13.VnB(), p9.Zeroing(), p5.VnB(), p15.VnB()), + "ands p13.b, p9/z, p5.b, p15.b"); + COMPARE(and_(p9.VnB(), p3.Zeroing(), p0.VnB(), p14.VnB()), + "and p9.b, p3/z, p0.b, p14.b"); + COMPARE(bics(p8.VnB(), p5.Zeroing(), p3.VnB(), p1.VnB()), + "bics p8.b, p5/z, p3.b, p1.b"); + COMPARE(bic(p5.VnB(), p5.Zeroing(), p9.VnB(), p9.VnB()), + "bic p5.b, p5/z, p9.b, p9.b"); + COMPARE(eors(p11.VnB(), p1.Zeroing(), p1.VnB(), p2.VnB()), + "eors p11.b, p1/z, p1.b, p2.b"); + COMPARE(eor(p8.VnB(), p6.Zeroing(), p1.VnB(), p11.VnB()), + "eor p8.b, p6/z, p1.b, p11.b"); + COMPARE(nands(p13.VnB(), p0.Zeroing(), p9.VnB(), p4.VnB()), + "nands p13.b, p0/z, p9.b, p4.b"); + COMPARE(nand(p7.VnB(), p7.Zeroing(), p15.VnB(), p2.VnB()), + "nand p7.b, p7/z, p15.b, p2.b"); + COMPARE(nors(p8.VnB(), p8.Zeroing(), p12.VnB(), p11.VnB()), + "nors p8.b, p8/z, p12.b, p11.b"); + COMPARE(nor(p3.VnB(), p6.Zeroing(), p15.VnB(), p12.VnB()), + "nor p3.b, p6/z, p15.b, p12.b"); + COMPARE(orns(p10.VnB(), p11.Zeroing(), p0.VnB(), p15.VnB()), + "orns p10.b, p11/z, p0.b, p15.b"); + COMPARE(orn(p0.VnB(), p1.Zeroing(), p7.VnB(), p4.VnB()), + "orn p0.b, p1/z, p7.b, p4.b"); + COMPARE(orrs(p14.VnB(), p6.Zeroing(), p1.VnB(), p5.VnB()), + "orrs p14.b, p6/z, p1.b, p5.b"); + COMPARE(orr(p13.VnB(), p7.Zeroing(), p10.VnB(), p4.VnB()), + "orr p13.b, p7/z, p10.b, p4.b"); + COMPARE(sel(p9.VnB(), p15, p15.VnB(), p7.VnB()), + "sel p9.b, p15, p15.b, p7.b"); // Aliases. - COMPARE_PREFIX(eor(p7.VnB(), p6.Zeroing(), p1.VnB(), p6.VnB()), - "not p7.b, p6/z, p1.b"); - COMPARE_PREFIX(not_(p7.VnB(), p6.Zeroing(), p1.VnB()), - "not p7.b, p6/z, p1.b"); - COMPARE_PREFIX(eors(p6.VnB(), p5.Zeroing(), p2.VnB(), p5.VnB()), - "nots p6.b, p5/z, p2.b"); - COMPARE_PREFIX(nots(p6.VnB(), p5.Zeroing(), p2.VnB()), - "nots p6.b, p5/z, p2.b"); - COMPARE_PREFIX(ands(p5.VnB(), p4.Zeroing(), p3.VnB(), p3.VnB()), - "movs p5.b, p4/z, p3.b"); - COMPARE_PREFIX(movs(p5.VnB(), p4.Zeroing(), p3.VnB()), - "movs p5.b, p4/z, p3.b"); - COMPARE_PREFIX(and_(p5.VnB(), p4.Zeroing(), p3.VnB(), p3.VnB()), - "mov p5.b, p4/z, p3.b"); - COMPARE_PREFIX(mov(p5.VnB(), p4.Zeroing(), p3.VnB()), "mov p5.b, p4/z, p3.b"); - COMPARE_PREFIX(orrs(p4.VnB(), p3.Zeroing(), p3.VnB(), p3.VnB()), - "movs p4.b, p3.b"); - COMPARE_PREFIX(movs(p4.VnB(), p3.VnB()), "movs p4.b, p3.b"); - COMPARE_PREFIX(orr(p4.VnB(), p3.Zeroing(), p3.VnB(), p3.VnB()), - "mov p4.b, p3.b"); - COMPARE_PREFIX(mov(p4.VnB(), p3.VnB()), "mov p4.b, p3.b"); - COMPARE_PREFIX(sel(p3.VnB(), p2, p4.VnB(), p3.VnB()), "mov p3.b, p2/m, p4.b"); - COMPARE_PREFIX(mov(p3.VnB(), p2.Merging(), p4.VnB()), "mov p3.b, p2/m, p4.b"); + COMPARE(eor(p7.VnB(), p6.Zeroing(), p1.VnB(), p6.VnB()), + "not p7.b, p6/z, p1.b"); + COMPARE(not_(p7.VnB(), p6.Zeroing(), p1.VnB()), "not p7.b, p6/z, p1.b"); + COMPARE(eors(p6.VnB(), p5.Zeroing(), p2.VnB(), p5.VnB()), + "nots p6.b, p5/z, p2.b"); + COMPARE(nots(p6.VnB(), p5.Zeroing(), p2.VnB()), "nots p6.b, p5/z, p2.b"); + COMPARE(ands(p5.VnB(), p4.Zeroing(), p3.VnB(), p3.VnB()), + "movs p5.b, p4/z, p3.b"); + COMPARE(movs(p5.VnB(), p4.Zeroing(), p3.VnB()), "movs p5.b, p4/z, p3.b"); + COMPARE(and_(p5.VnB(), p4.Zeroing(), p3.VnB(), p3.VnB()), + "mov p5.b, p4/z, p3.b"); + COMPARE(mov(p5.VnB(), p4.Zeroing(), p3.VnB()), "mov p5.b, p4/z, p3.b"); + COMPARE(orrs(p4.VnB(), p3.Zeroing(), p3.VnB(), p3.VnB()), "movs p4.b, p3.b"); + COMPARE(movs(p4.VnB(), p3.VnB()), "movs p4.b, p3.b"); + COMPARE(orr(p4.VnB(), p3.Zeroing(), p3.VnB(), p3.VnB()), "mov p4.b, p3.b"); + COMPARE(mov(p4.VnB(), p3.VnB()), "mov p4.b, p3.b"); + COMPARE(sel(p3.VnB(), p2, p4.VnB(), p3.VnB()), "mov p3.b, p2/m, p4.b"); + COMPARE(mov(p3.VnB(), p2.Merging(), p4.VnB()), "mov p3.b, p2/m, p4.b"); COMPARE_MACRO(Not(p7.VnB(), p6.Zeroing(), p1.VnB()), "not p7.b, p6/z, p1.b"); COMPARE_MACRO(Nots(p6.VnB(), p5.Zeroing(), p2.VnB()), @@ -6038,8 +5819,8 @@ TEST(sve_predicate_logical_op) { TEST(sve_predicate_first_active) { SETUP(); - COMPARE_PREFIX(pfirst(p0.VnB(), p7, p0.VnB()), "pfirst p0.b, p7, p0.b"); - COMPARE_PREFIX(pfirst(p7.VnB(), p0, p7.VnB()), "pfirst p7.b, p0, p7.b"); + COMPARE(pfirst(p0.VnB(), p7, p0.VnB()), "pfirst p0.b, p7, p0.b"); + COMPARE(pfirst(p7.VnB(), p0, p7.VnB()), "pfirst p7.b, p0, p7.b"); COMPARE_MACRO(Pfirst(p1.VnB(), p2, p1.VnB()), "pfirst p1.b, p2, p1.b"); COMPARE_MACRO(Pfirst(p3.VnB(), p4, p5.VnB()), @@ -6061,15 +5842,15 @@ TEST(sve_predicate_first_active) { TEST(sve_predicate_next_active) { SETUP(); - COMPARE_PREFIX(pnext(p0.VnB(), p8, p0.VnB()), "pnext p0.b, p8, p0.b"); - COMPARE_PREFIX(pnext(p1.VnH(), p9, p1.VnH()), "pnext p1.h, p9, p1.h"); - COMPARE_PREFIX(pnext(p2.VnS(), p10, p2.VnS()), "pnext p2.s, p10, p2.s"); - COMPARE_PREFIX(pnext(p3.VnD(), p11, p3.VnD()), "pnext p3.d, p11, p3.d"); + COMPARE(pnext(p0.VnB(), p8, p0.VnB()), "pnext p0.b, p8, p0.b"); + COMPARE(pnext(p1.VnH(), p9, p1.VnH()), "pnext p1.h, p9, p1.h"); + COMPARE(pnext(p2.VnS(), p10, p2.VnS()), "pnext p2.s, p10, p2.s"); + COMPARE(pnext(p3.VnD(), p11, p3.VnD()), "pnext p3.d, p11, p3.d"); - COMPARE_PREFIX(pnext(p12.VnB(), p4, p12.VnB()), "pnext p12.b, p4, p12.b"); - COMPARE_PREFIX(pnext(p13.VnH(), p5, p13.VnH()), "pnext p13.h, p5, p13.h"); - COMPARE_PREFIX(pnext(p14.VnS(), p6, p14.VnS()), "pnext p14.s, p6, p14.s"); - COMPARE_PREFIX(pnext(p15.VnD(), p7, p15.VnD()), "pnext p15.d, p7, p15.d"); + COMPARE(pnext(p12.VnB(), p4, p12.VnB()), "pnext p12.b, p4, p12.b"); + COMPARE(pnext(p13.VnH(), p5, p13.VnH()), "pnext p13.h, p5, p13.h"); + COMPARE(pnext(p14.VnS(), p6, p14.VnS()), "pnext p14.s, p6, p14.s"); + COMPARE(pnext(p15.VnD(), p7, p15.VnD()), "pnext p15.d, p7, p15.d"); COMPARE_MACRO(Pnext(p5.VnB(), p9, p5.VnB()), "pnext p5.b, p9, p5.b"); COMPARE_MACRO(Pnext(p6.VnH(), p8, p6.VnH()), "pnext p6.h, p8, p6.h"); @@ -6117,44 +5898,44 @@ TEST(sve_predicate_initialize) { SETUP(); // Basic forms. - COMPARE_PREFIX(ptrue(p0.VnB(), SVE_POW2), "ptrue p0.b, pow2"); - COMPARE_PREFIX(ptrue(p1.VnH(), SVE_VL1), "ptrue p1.h, vl1"); - COMPARE_PREFIX(ptrue(p2.VnS(), SVE_VL8), "ptrue p2.s, vl8"); - COMPARE_PREFIX(ptrue(p3.VnD(), SVE_VL16), "ptrue p3.d, vl16"); - COMPARE_PREFIX(ptrue(p4.VnB(), SVE_VL256), "ptrue p4.b, vl256"); - COMPARE_PREFIX(ptrue(p5.VnH(), SVE_MUL3), "ptrue p5.h, mul3"); - COMPARE_PREFIX(ptrue(p6.VnS(), SVE_MUL4), "ptrue p6.s, mul4"); - COMPARE_PREFIX(ptrue(p7.VnD(), SVE_ALL), "ptrue p7.d"); - - COMPARE_PREFIX(ptrues(p8.VnB(), SVE_ALL), "ptrues p8.b"); - COMPARE_PREFIX(ptrues(p9.VnH(), SVE_MUL4), "ptrues p9.h, mul4"); - COMPARE_PREFIX(ptrues(p10.VnS(), SVE_MUL3), "ptrues p10.s, mul3"); - COMPARE_PREFIX(ptrues(p11.VnD(), SVE_VL256), "ptrues p11.d, vl256"); - COMPARE_PREFIX(ptrues(p12.VnB(), SVE_VL16), "ptrues p12.b, vl16"); - COMPARE_PREFIX(ptrues(p13.VnH(), SVE_VL8), "ptrues p13.h, vl8"); - COMPARE_PREFIX(ptrues(p14.VnS(), SVE_VL1), "ptrues p14.s, vl1"); - COMPARE_PREFIX(ptrues(p15.VnD(), SVE_POW2), "ptrues p15.d, pow2"); + COMPARE(ptrue(p0.VnB(), SVE_POW2), "ptrue p0.b, pow2"); + COMPARE(ptrue(p1.VnH(), SVE_VL1), "ptrue p1.h, vl1"); + COMPARE(ptrue(p2.VnS(), SVE_VL8), "ptrue p2.s, vl8"); + COMPARE(ptrue(p3.VnD(), SVE_VL16), "ptrue p3.d, vl16"); + COMPARE(ptrue(p4.VnB(), SVE_VL256), "ptrue p4.b, vl256"); + COMPARE(ptrue(p5.VnH(), SVE_MUL3), "ptrue p5.h, mul3"); + COMPARE(ptrue(p6.VnS(), SVE_MUL4), "ptrue p6.s, mul4"); + COMPARE(ptrue(p7.VnD(), SVE_ALL), "ptrue p7.d"); + + COMPARE(ptrues(p8.VnB(), SVE_ALL), "ptrues p8.b"); + COMPARE(ptrues(p9.VnH(), SVE_MUL4), "ptrues p9.h, mul4"); + COMPARE(ptrues(p10.VnS(), SVE_MUL3), "ptrues p10.s, mul3"); + COMPARE(ptrues(p11.VnD(), SVE_VL256), "ptrues p11.d, vl256"); + COMPARE(ptrues(p12.VnB(), SVE_VL16), "ptrues p12.b, vl16"); + COMPARE(ptrues(p13.VnH(), SVE_VL8), "ptrues p13.h, vl8"); + COMPARE(ptrues(p14.VnS(), SVE_VL1), "ptrues p14.s, vl1"); + COMPARE(ptrues(p15.VnD(), SVE_POW2), "ptrues p15.d, pow2"); // The Assembler supports arbitrary immediates. - COMPARE_PREFIX(ptrue(p7.VnS(), 0xd), "ptrue p7.s, vl256"); - COMPARE_PREFIX(ptrue(p8.VnD(), 0xe), "ptrue p8.d, #0xe"); - COMPARE_PREFIX(ptrue(p9.VnB(), 0x15), "ptrue p9.b, #0x15"); - COMPARE_PREFIX(ptrue(p10.VnH(), 0x19), "ptrue p10.h, #0x19"); - COMPARE_PREFIX(ptrue(p11.VnS(), 0x1a), "ptrue p11.s, #0x1a"); - COMPARE_PREFIX(ptrue(p12.VnD(), 0x1c), "ptrue p12.d, #0x1c"); - COMPARE_PREFIX(ptrue(p13.VnB(), 0x1d), "ptrue p13.b, mul4"); - - COMPARE_PREFIX(ptrues(p14.VnS(), 0xd), "ptrues p14.s, vl256"); - COMPARE_PREFIX(ptrues(p15.VnD(), 0xe), "ptrues p15.d, #0xe"); - COMPARE_PREFIX(ptrues(p0.VnB(), 0x15), "ptrues p0.b, #0x15"); - COMPARE_PREFIX(ptrues(p1.VnH(), 0x19), "ptrues p1.h, #0x19"); - COMPARE_PREFIX(ptrues(p2.VnS(), 0x1a), "ptrues p2.s, #0x1a"); - COMPARE_PREFIX(ptrues(p3.VnD(), 0x1c), "ptrues p3.d, #0x1c"); - COMPARE_PREFIX(ptrues(p4.VnB(), 0x1d), "ptrues p4.b, mul4"); + COMPARE(ptrue(p7.VnS(), 0xd), "ptrue p7.s, vl256"); + COMPARE(ptrue(p8.VnD(), 0xe), "ptrue p8.d, #0xe"); + COMPARE(ptrue(p9.VnB(), 0x15), "ptrue p9.b, #0x15"); + COMPARE(ptrue(p10.VnH(), 0x19), "ptrue p10.h, #0x19"); + COMPARE(ptrue(p11.VnS(), 0x1a), "ptrue p11.s, #0x1a"); + COMPARE(ptrue(p12.VnD(), 0x1c), "ptrue p12.d, #0x1c"); + COMPARE(ptrue(p13.VnB(), 0x1d), "ptrue p13.b, mul4"); + + COMPARE(ptrues(p14.VnS(), 0xd), "ptrues p14.s, vl256"); + COMPARE(ptrues(p15.VnD(), 0xe), "ptrues p15.d, #0xe"); + COMPARE(ptrues(p0.VnB(), 0x15), "ptrues p0.b, #0x15"); + COMPARE(ptrues(p1.VnH(), 0x19), "ptrues p1.h, #0x19"); + COMPARE(ptrues(p2.VnS(), 0x1a), "ptrues p2.s, #0x1a"); + COMPARE(ptrues(p3.VnD(), 0x1c), "ptrues p3.d, #0x1c"); + COMPARE(ptrues(p4.VnB(), 0x1d), "ptrues p4.b, mul4"); // SVE_ALL is the default. - COMPARE_PREFIX(ptrue(p15.VnS()), "ptrue p15.s"); - COMPARE_PREFIX(ptrues(p0.VnS()), "ptrues p0.s"); + COMPARE(ptrue(p15.VnS()), "ptrue p15.s"); + COMPARE(ptrues(p0.VnS()), "ptrues p0.s"); // The MacroAssembler provides a `FlagsUpdate` argument. COMPARE_MACRO(Ptrue(p0.VnB(), SVE_MUL3), "ptrue p0.b, mul3"); @@ -6166,8 +5947,8 @@ TEST(sve_predicate_initialize) { TEST(sve_pfalse) { SETUP(); - COMPARE_PREFIX(pfalse(p0.VnB()), "pfalse p0.b"); - COMPARE_PREFIX(pfalse(p15.VnB()), "pfalse p15.b"); + COMPARE(pfalse(p0.VnB()), "pfalse p0.b"); + COMPARE(pfalse(p15.VnB()), "pfalse p15.b"); COMPARE_MACRO(Pfalse(p1.VnB()), "pfalse p1.b"); COMPARE_MACRO(Pfalse(p4.VnH()), "pfalse p4.b"); @@ -6178,9 +5959,9 @@ TEST(sve_pfalse) { TEST(sve_ptest) { SETUP(); - COMPARE_PREFIX(ptest(p15, p0.VnB()), "ptest p15, p0.b"); - COMPARE_PREFIX(ptest(p0, p15.VnB()), "ptest p0, p15.b"); - COMPARE_PREFIX(ptest(p6, p6.VnB()), "ptest p6, p6.b"); + COMPARE(ptest(p15, p0.VnB()), "ptest p15, p0.b"); + COMPARE(ptest(p0, p15.VnB()), "ptest p0, p15.b"); + COMPARE(ptest(p6, p6.VnB()), "ptest p6, p6.b"); COMPARE_MACRO(Ptest(p0, p1.VnB()), "ptest p0, p1.b"); } @@ -6212,9 +5993,9 @@ TEST(sve_lane_size_relaxing) { TEST(sve_read_ffr) { SETUP(); - COMPARE_PREFIX(rdffr(p13.VnB()), "rdffr p13.b"); - COMPARE_PREFIX(rdffrs(p14.VnB(), p9.Zeroing()), "rdffrs p14.b, p9/z"); - COMPARE_PREFIX(rdffr(p5.VnB(), p14.Zeroing()), "rdffr p5.b, p14/z"); + COMPARE(rdffr(p13.VnB()), "rdffr p13.b"); + COMPARE(rdffrs(p14.VnB(), p9.Zeroing()), "rdffrs p14.b, p9/z"); + COMPARE(rdffr(p5.VnB(), p14.Zeroing()), "rdffr p5.b, p14/z"); CLEANUP(); } @@ -6222,14 +6003,14 @@ TEST(sve_read_ffr) { TEST(sve_propagate_break) { SETUP(); - COMPARE_PREFIX(brkpas(p12.VnB(), p0.Zeroing(), p12.VnB(), p11.VnB()), - "brkpas p12.b, p0/z, p12.b, p11.b"); - COMPARE_PREFIX(brkpa(p1.VnB(), p2.Zeroing(), p13.VnB(), p8.VnB()), - "brkpa p1.b, p2/z, p13.b, p8.b"); - COMPARE_PREFIX(brkpbs(p14.VnB(), p1.Zeroing(), p8.VnB(), p3.VnB()), - "brkpbs p14.b, p1/z, p8.b, p3.b"); - COMPARE_PREFIX(brkpb(p2.VnB(), p5.Zeroing(), p0.VnB(), p14.VnB()), - "brkpb p2.b, p5/z, p0.b, p14.b"); + COMPARE(brkpas(p12.VnB(), p0.Zeroing(), p12.VnB(), p11.VnB()), + "brkpas p12.b, p0/z, p12.b, p11.b"); + COMPARE(brkpa(p1.VnB(), p2.Zeroing(), p13.VnB(), p8.VnB()), + "brkpa p1.b, p2/z, p13.b, p8.b"); + COMPARE(brkpbs(p14.VnB(), p1.Zeroing(), p8.VnB(), p3.VnB()), + "brkpbs p14.b, p1/z, p8.b, p3.b"); + COMPARE(brkpb(p2.VnB(), p5.Zeroing(), p0.VnB(), p14.VnB()), + "brkpb p2.b, p5/z, p0.b, p14.b"); CLEANUP(); } @@ -6237,22 +6018,22 @@ TEST(sve_propagate_break) { TEST(sve_stack_allocation) { SETUP(); - COMPARE_PREFIX(rdvl(x26, 0), "rdvl x26, #0"); - COMPARE_PREFIX(rdvl(x27, 31), "rdvl x27, #31"); - COMPARE_PREFIX(rdvl(x28, -32), "rdvl x28, #-32"); - COMPARE_PREFIX(rdvl(xzr, 9), "rdvl xzr, #9"); + COMPARE(rdvl(x26, 0), "rdvl x26, #0"); + COMPARE(rdvl(x27, 31), "rdvl x27, #31"); + COMPARE(rdvl(x28, -32), "rdvl x28, #-32"); + COMPARE(rdvl(xzr, 9), "rdvl xzr, #9"); - COMPARE_PREFIX(addvl(x6, x20, 0), "addvl x6, x20, #0"); - COMPARE_PREFIX(addvl(x7, x21, 31), "addvl x7, x21, #31"); - COMPARE_PREFIX(addvl(x8, x22, -32), "addvl x8, x22, #-32"); - COMPARE_PREFIX(addvl(sp, x1, 5), "addvl sp, x1, #5"); - COMPARE_PREFIX(addvl(x9, sp, -16), "addvl x9, sp, #-16"); + COMPARE(addvl(x6, x20, 0), "addvl x6, x20, #0"); + COMPARE(addvl(x7, x21, 31), "addvl x7, x21, #31"); + COMPARE(addvl(x8, x22, -32), "addvl x8, x22, #-32"); + COMPARE(addvl(sp, x1, 5), "addvl sp, x1, #5"); + COMPARE(addvl(x9, sp, -16), "addvl x9, sp, #-16"); - COMPARE_PREFIX(addpl(x20, x6, 0), "addpl x20, x6, #0"); - COMPARE_PREFIX(addpl(x21, x7, 31), "addpl x21, x7, #31"); - COMPARE_PREFIX(addpl(x22, x8, -32), "addpl x22, x8, #-32"); - COMPARE_PREFIX(addpl(sp, x1, 5), "addpl sp, x1, #5"); - COMPARE_PREFIX(addpl(x9, sp, -16), "addpl x9, sp, #-16"); + COMPARE(addpl(x20, x6, 0), "addpl x20, x6, #0"); + COMPARE(addpl(x21, x7, 31), "addpl x21, x7, #31"); + COMPARE(addpl(x22, x8, -32), "addpl x22, x8, #-32"); + COMPARE(addpl(sp, x1, 5), "addpl sp, x1, #5"); + COMPARE(addpl(x9, sp, -16), "addpl x9, sp, #-16"); CLEANUP(); } @@ -6418,10 +6199,2547 @@ TEST(sve_write_ffr) { SETUP(); COMPARE_PREFIX(setffr(), "setffr"); - COMPARE_PREFIX(wrffr(p9.VnB()), "wrffr p9.b"); + COMPARE(wrffr(p9.VnB()), "wrffr p9.b"); + + CLEANUP(); +} + +TEST(sve2_match_nmatch) { + SETUP(); + + COMPARE(match(p15.VnB(), p1.Zeroing(), z18.VnB(), z5.VnB()), + "match p15.b, p1/z, z18.b, z5.b"); + COMPARE(match(p15.VnH(), p1.Zeroing(), z18.VnH(), z5.VnH()), + "match p15.h, p1/z, z18.h, z5.h"); + COMPARE(nmatch(p1.VnB(), p1.Zeroing(), z20.VnB(), z17.VnB()), + "nmatch p1.b, p1/z, z20.b, z17.b"); + COMPARE(nmatch(p1.VnH(), p1.Zeroing(), z20.VnH(), z17.VnH()), + "nmatch p1.h, p1/z, z20.h, z17.h"); + + CLEANUP(); +} + +TEST(sve2_saba_uaba) { + SETUP(); + + COMPARE(saba(z13.VnB(), z2.VnB(), z31.VnB()), "saba z13.b, z2.b, z31.b"); + COMPARE(saba(z13.VnD(), z2.VnD(), z31.VnD()), "saba z13.d, z2.d, z31.d"); + COMPARE(saba(z13.VnH(), z2.VnH(), z31.VnH()), "saba z13.h, z2.h, z31.h"); + COMPARE(saba(z13.VnS(), z2.VnS(), z31.VnS()), "saba z13.s, z2.s, z31.s"); + COMPARE(uaba(z23.VnB(), z22.VnB(), z20.VnB()), "uaba z23.b, z22.b, z20.b"); + COMPARE(uaba(z23.VnD(), z22.VnD(), z20.VnD()), "uaba z23.d, z22.d, z20.d"); + COMPARE(uaba(z23.VnH(), z22.VnH(), z20.VnH()), "uaba z23.h, z22.h, z20.h"); + COMPARE(uaba(z23.VnS(), z22.VnS(), z20.VnS()), "uaba z23.s, z22.s, z20.s"); + + COMPARE_MACRO(Saba(z12.VnB(), z12.VnB(), z3.VnB(), z30.VnB()), + "saba z12.b, z3.b, z30.b"); + COMPARE_MACRO(Saba(z12.VnB(), z12.VnB(), z3.VnB(), z12.VnB()), + "saba z12.b, z3.b, z12.b"); + COMPARE_MACRO(Saba(z12.VnB(), z12.VnB(), z12.VnB(), z30.VnB()), + "saba z12.b, z12.b, z30.b"); + COMPARE_MACRO(Saba(z12.VnB(), z12.VnB(), z12.VnB(), z12.VnB()), ""); + COMPARE_MACRO(Saba(z12.VnB(), z13.VnB(), z3.VnB(), z30.VnB()), + "movprfx z12, z13\n" + "saba z12.b, z3.b, z30.b"); + COMPARE_MACRO(Saba(z12.VnB(), z3.VnB(), z3.VnB(), z30.VnB()), + "movprfx z12, z3\n" + "saba z12.b, z3.b, z30.b"); + COMPARE_MACRO(Saba(z12.VnB(), z30.VnB(), z3.VnB(), z30.VnB()), + "movprfx z12, z30\n" + "saba z12.b, z3.b, z30.b"); + COMPARE_MACRO(Saba(z12.VnB(), z3.VnB(), z3.VnB(), z3.VnB()), + "mov z12.d, z3.d"); + COMPARE_MACRO(Saba(z12.VnB(), z3.VnB(), z12.VnB(), z3.VnB()), + "mov z31.d, z12.d\n" + "movprfx z12, z3\n" + "saba z12.b, z31.b, z3.b"); + COMPARE_MACRO(Saba(z12.VnB(), z3.VnB(), z3.VnB(), z12.VnB()), + "mov z31.d, z12.d\n" + "movprfx z12, z3\n" + "saba z12.b, z3.b, z31.b"); + + COMPARE_MACRO(Uaba(z12.VnB(), z12.VnB(), z3.VnB(), z30.VnB()), + "uaba z12.b, z3.b, z30.b"); + COMPARE_MACRO(Uaba(z12.VnB(), z12.VnB(), z3.VnB(), z12.VnB()), + "uaba z12.b, z3.b, z12.b"); + COMPARE_MACRO(Uaba(z12.VnB(), z12.VnB(), z12.VnB(), z30.VnB()), + "uaba z12.b, z12.b, z30.b"); + COMPARE_MACRO(Uaba(z12.VnB(), z12.VnB(), z12.VnB(), z12.VnB()), ""); + COMPARE_MACRO(Uaba(z12.VnB(), z13.VnB(), z3.VnB(), z30.VnB()), + "movprfx z12, z13\n" + "uaba z12.b, z3.b, z30.b"); + COMPARE_MACRO(Uaba(z12.VnB(), z3.VnB(), z3.VnB(), z30.VnB()), + "movprfx z12, z3\n" + "uaba z12.b, z3.b, z30.b"); + COMPARE_MACRO(Uaba(z12.VnB(), z30.VnB(), z3.VnB(), z30.VnB()), + "movprfx z12, z30\n" + "uaba z12.b, z3.b, z30.b"); + COMPARE_MACRO(Uaba(z12.VnB(), z3.VnB(), z3.VnB(), z3.VnB()), + "mov z12.d, z3.d"); + COMPARE_MACRO(Uaba(z12.VnB(), z3.VnB(), z12.VnB(), z3.VnB()), + "mov z31.d, z12.d\n" + "movprfx z12, z3\n" + "uaba z12.b, z31.b, z3.b"); + COMPARE_MACRO(Uaba(z12.VnB(), z3.VnB(), z3.VnB(), z12.VnB()), + "mov z31.d, z12.d\n" + "movprfx z12, z3\n" + "uaba z12.b, z3.b, z31.b"); + + CLEANUP(); +} + +TEST(sve2_halving_arithmetic) { + SETUP(); + + COMPARE(shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB()), + "shadd z20.b, p3/m, z20.b, z7.b"); + COMPARE(shadd(z20.VnD(), p3.Merging(), z20.VnD(), z7.VnD()), + "shadd z20.d, p3/m, z20.d, z7.d"); + COMPARE(shadd(z20.VnH(), p3.Merging(), z20.VnH(), z7.VnH()), + "shadd z20.h, p3/m, z20.h, z7.h"); + COMPARE(shadd(z20.VnS(), p3.Merging(), z20.VnS(), z7.VnS()), + "shadd z20.s, p3/m, z20.s, z7.s"); + COMPARE(shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB()), + "shsub z21.b, p0/m, z21.b, z0.b"); + COMPARE(shsub(z21.VnD(), p0.Merging(), z21.VnD(), z0.VnD()), + "shsub z21.d, p0/m, z21.d, z0.d"); + COMPARE(shsub(z21.VnH(), p0.Merging(), z21.VnH(), z0.VnH()), + "shsub z21.h, p0/m, z21.h, z0.h"); + COMPARE(shsub(z21.VnS(), p0.Merging(), z21.VnS(), z0.VnS()), + "shsub z21.s, p0/m, z21.s, z0.s"); + COMPARE(shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB()), + "shsubr z1.b, p0/m, z1.b, z2.b"); + COMPARE(shsubr(z1.VnD(), p0.Merging(), z1.VnD(), z2.VnD()), + "shsubr z1.d, p0/m, z1.d, z2.d"); + COMPARE(shsubr(z1.VnH(), p0.Merging(), z1.VnH(), z2.VnH()), + "shsubr z1.h, p0/m, z1.h, z2.h"); + COMPARE(shsubr(z1.VnS(), p0.Merging(), z1.VnS(), z2.VnS()), + "shsubr z1.s, p0/m, z1.s, z2.s"); + COMPARE(srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB()), + "srhadd z23.b, p4/m, z23.b, z14.b"); + COMPARE(srhadd(z23.VnD(), p4.Merging(), z23.VnD(), z14.VnD()), + "srhadd z23.d, p4/m, z23.d, z14.d"); + COMPARE(srhadd(z23.VnH(), p4.Merging(), z23.VnH(), z14.VnH()), + "srhadd z23.h, p4/m, z23.h, z14.h"); + COMPARE(srhadd(z23.VnS(), p4.Merging(), z23.VnS(), z14.VnS()), + "srhadd z23.s, p4/m, z23.s, z14.s"); + + COMPARE(uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB()), + "uhadd z21.b, p2/m, z21.b, z19.b"); + COMPARE(uhadd(z21.VnD(), p2.Merging(), z21.VnD(), z19.VnD()), + "uhadd z21.d, p2/m, z21.d, z19.d"); + COMPARE(uhadd(z21.VnH(), p2.Merging(), z21.VnH(), z19.VnH()), + "uhadd z21.h, p2/m, z21.h, z19.h"); + COMPARE(uhadd(z21.VnS(), p2.Merging(), z21.VnS(), z19.VnS()), + "uhadd z21.s, p2/m, z21.s, z19.s"); + COMPARE(uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB()), + "uhsub z1.b, p4/m, z1.b, z9.b"); + COMPARE(uhsub(z1.VnD(), p4.Merging(), z1.VnD(), z9.VnD()), + "uhsub z1.d, p4/m, z1.d, z9.d"); + COMPARE(uhsub(z1.VnH(), p4.Merging(), z1.VnH(), z9.VnH()), + "uhsub z1.h, p4/m, z1.h, z9.h"); + COMPARE(uhsub(z1.VnS(), p4.Merging(), z1.VnS(), z9.VnS()), + "uhsub z1.s, p4/m, z1.s, z9.s"); + COMPARE(uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB()), + "uhsubr z18.b, p0/m, z18.b, z1.b"); + COMPARE(uhsubr(z18.VnD(), p0.Merging(), z18.VnD(), z1.VnD()), + "uhsubr z18.d, p0/m, z18.d, z1.d"); + COMPARE(uhsubr(z18.VnH(), p0.Merging(), z18.VnH(), z1.VnH()), + "uhsubr z18.h, p0/m, z18.h, z1.h"); + COMPARE(uhsubr(z18.VnS(), p0.Merging(), z18.VnS(), z1.VnS()), + "uhsubr z18.s, p0/m, z18.s, z1.s"); + COMPARE(urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB()), + "urhadd z29.b, p4/m, z29.b, z10.b"); + COMPARE(urhadd(z29.VnD(), p4.Merging(), z29.VnD(), z10.VnD()), + "urhadd z29.d, p4/m, z29.d, z10.d"); + COMPARE(urhadd(z29.VnH(), p4.Merging(), z29.VnH(), z10.VnH()), + "urhadd z29.h, p4/m, z29.h, z10.h"); + COMPARE(urhadd(z29.VnS(), p4.Merging(), z29.VnS(), z10.VnS()), + "urhadd z29.s, p4/m, z29.s, z10.s"); + + COMPARE_MACRO(Shadd(z0.VnB(), p0.Merging(), z1.VnB(), z2.VnB()), + "movprfx z0.b, p0/m, z1.b\n" + "shadd z0.b, p0/m, z0.b, z2.b"); + COMPARE_MACRO(Shadd(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()), + "shadd z0.b, p0/m, z0.b, z1.b"); + COMPARE_MACRO(Srhadd(z0.VnB(), p0.Merging(), z1.VnB(), z2.VnB()), + "movprfx z0.b, p0/m, z1.b\n" + "srhadd z0.b, p0/m, z0.b, z2.b"); + COMPARE_MACRO(Srhadd(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()), + "srhadd z0.b, p0/m, z0.b, z1.b"); + COMPARE_MACRO(Uhadd(z0.VnB(), p0.Merging(), z1.VnB(), z2.VnB()), + "movprfx z0.b, p0/m, z1.b\n" + "uhadd z0.b, p0/m, z0.b, z2.b"); + COMPARE_MACRO(Uhadd(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()), + "uhadd z0.b, p0/m, z0.b, z1.b"); + COMPARE_MACRO(Urhadd(z0.VnB(), p0.Merging(), z1.VnB(), z2.VnB()), + "movprfx z0.b, p0/m, z1.b\n" + "urhadd z0.b, p0/m, z0.b, z2.b"); + COMPARE_MACRO(Urhadd(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()), + "urhadd z0.b, p0/m, z0.b, z1.b"); + + COMPARE_MACRO(Shsub(z0.VnB(), p0.Merging(), z0.VnB(), z1.VnB()), + "shsub z0.b, p0/m, z0.b, z1.b"); + COMPARE_MACRO(Shsub(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()), + "shsubr z0.b, p0/m, z0.b, z1.b"); + COMPARE_MACRO(Uhsub(z0.VnB(), p0.Merging(), z0.VnB(), z1.VnB()), + "uhsub z0.b, p0/m, z0.b, z1.b"); + COMPARE_MACRO(Uhsub(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()), + "uhsubr z0.b, p0/m, z0.b, z1.b"); + + CLEANUP(); +} + +TEST(sve2_sra) { + SETUP(); + + COMPARE(ssra(z0.VnB(), z8.VnB(), 1), "ssra z0.b, z8.b, #1"); + COMPARE(ssra(z0.VnB(), z8.VnB(), 2), "ssra z0.b, z8.b, #2"); + COMPARE(ssra(z0.VnB(), z8.VnB(), 5), "ssra z0.b, z8.b, #5"); + COMPARE(ssra(z0.VnB(), z8.VnB(), 8), "ssra z0.b, z8.b, #8"); + COMPARE(ssra(z0.VnH(), z8.VnH(), 1), "ssra z0.h, z8.h, #1"); + COMPARE(ssra(z0.VnH(), z8.VnH(), 16), "ssra z0.h, z8.h, #16"); + COMPARE(ssra(z0.VnS(), z8.VnS(), 1), "ssra z0.s, z8.s, #1"); + COMPARE(ssra(z0.VnS(), z8.VnS(), 31), "ssra z0.s, z8.s, #31"); + COMPARE(ssra(z0.VnD(), z8.VnD(), 1), "ssra z0.d, z8.d, #1"); + COMPARE(ssra(z0.VnD(), z8.VnD(), 64), "ssra z0.d, z8.d, #64"); + + COMPARE(srsra(z0.VnB(), z8.VnB(), 1), "srsra z0.b, z8.b, #1"); + COMPARE(srsra(z0.VnB(), z8.VnB(), 2), "srsra z0.b, z8.b, #2"); + COMPARE(srsra(z0.VnB(), z8.VnB(), 5), "srsra z0.b, z8.b, #5"); + COMPARE(srsra(z0.VnB(), z8.VnB(), 8), "srsra z0.b, z8.b, #8"); + COMPARE(srsra(z0.VnH(), z8.VnH(), 1), "srsra z0.h, z8.h, #1"); + COMPARE(srsra(z0.VnH(), z8.VnH(), 16), "srsra z0.h, z8.h, #16"); + COMPARE(srsra(z0.VnS(), z8.VnS(), 1), "srsra z0.s, z8.s, #1"); + COMPARE(srsra(z0.VnS(), z8.VnS(), 31), "srsra z0.s, z8.s, #31"); + COMPARE(srsra(z0.VnD(), z8.VnD(), 1), "srsra z0.d, z8.d, #1"); + COMPARE(srsra(z0.VnD(), z8.VnD(), 64), "srsra z0.d, z8.d, #64"); + + COMPARE(usra(z0.VnB(), z8.VnB(), 1), "usra z0.b, z8.b, #1"); + COMPARE(usra(z0.VnB(), z8.VnB(), 2), "usra z0.b, z8.b, #2"); + COMPARE(usra(z0.VnB(), z8.VnB(), 5), "usra z0.b, z8.b, #5"); + COMPARE(usra(z0.VnB(), z8.VnB(), 8), "usra z0.b, z8.b, #8"); + COMPARE(usra(z0.VnH(), z8.VnH(), 1), "usra z0.h, z8.h, #1"); + COMPARE(usra(z0.VnH(), z8.VnH(), 16), "usra z0.h, z8.h, #16"); + COMPARE(usra(z0.VnS(), z8.VnS(), 1), "usra z0.s, z8.s, #1"); + COMPARE(usra(z0.VnS(), z8.VnS(), 31), "usra z0.s, z8.s, #31"); + COMPARE(usra(z0.VnD(), z8.VnD(), 1), "usra z0.d, z8.d, #1"); + COMPARE(usra(z0.VnD(), z8.VnD(), 64), "usra z0.d, z8.d, #64"); + + COMPARE(ursra(z0.VnB(), z8.VnB(), 1), "ursra z0.b, z8.b, #1"); + COMPARE(ursra(z0.VnB(), z8.VnB(), 2), "ursra z0.b, z8.b, #2"); + COMPARE(ursra(z0.VnB(), z8.VnB(), 5), "ursra z0.b, z8.b, #5"); + COMPARE(ursra(z0.VnB(), z8.VnB(), 8), "ursra z0.b, z8.b, #8"); + COMPARE(ursra(z0.VnH(), z8.VnH(), 1), "ursra z0.h, z8.h, #1"); + COMPARE(ursra(z0.VnH(), z8.VnH(), 16), "ursra z0.h, z8.h, #16"); + COMPARE(ursra(z0.VnS(), z8.VnS(), 1), "ursra z0.s, z8.s, #1"); + COMPARE(ursra(z0.VnS(), z8.VnS(), 31), "ursra z0.s, z8.s, #31"); + COMPARE(ursra(z0.VnD(), z8.VnD(), 1), "ursra z0.d, z8.d, #1"); + COMPARE(ursra(z0.VnD(), z8.VnD(), 64), "ursra z0.d, z8.d, #64"); + + COMPARE_MACRO(Ssra(z0.VnB(), z1.VnB(), z2.VnB(), 2), + "movprfx z0, z1\n" + "ssra z0.b, z2.b, #2"); + COMPARE_MACRO(Ssra(z0.VnB(), z0.VnB(), z2.VnB(), 2), "ssra z0.b, z2.b, #2"); + COMPARE_MACRO(Ssra(z0.VnB(), z1.VnB(), z1.VnB(), 2), + "movprfx z0, z1\n" + "ssra z0.b, z1.b, #2"); + COMPARE_MACRO(Ssra(z2.VnB(), z1.VnB(), z2.VnB(), 2), + "mov z31.d, z2.d\n" + "movprfx z2, z1\n" + "ssra z2.b, z31.b, #2"); + COMPARE_MACRO(Ssra(z0.VnB(), z0.VnB(), z0.VnB(), 2), "ssra z0.b, z0.b, #2"); + + COMPARE_MACRO(Srsra(z2.VnB(), z1.VnB(), z2.VnB(), 2), + "mov z31.d, z2.d\n" + "movprfx z2, z1\n" + "srsra z2.b, z31.b, #2"); + COMPARE_MACRO(Usra(z2.VnB(), z1.VnB(), z2.VnB(), 2), + "mov z31.d, z2.d\n" + "movprfx z2, z1\n" + "usra z2.b, z31.b, #2"); + COMPARE_MACRO(Ursra(z2.VnB(), z1.VnB(), z2.VnB(), 2), + "mov z31.d, z2.d\n" + "movprfx z2, z1\n" + "ursra z2.b, z31.b, #2"); + CLEANUP(); +} + +TEST(sve2_sri_sli) { + SETUP(); + + COMPARE(sri(z6.VnB(), z9.VnB(), 1), "sri z6.b, z9.b, #1"); + COMPARE(sri(z6.VnB(), z9.VnB(), 2), "sri z6.b, z9.b, #2"); + COMPARE(sri(z6.VnB(), z9.VnB(), 5), "sri z6.b, z9.b, #5"); + COMPARE(sri(z6.VnB(), z9.VnB(), 8), "sri z6.b, z9.b, #8"); + COMPARE(sri(z6.VnH(), z9.VnH(), 1), "sri z6.h, z9.h, #1"); + COMPARE(sri(z6.VnH(), z9.VnH(), 16), "sri z6.h, z9.h, #16"); + COMPARE(sri(z6.VnS(), z9.VnS(), 1), "sri z6.s, z9.s, #1"); + COMPARE(sri(z6.VnS(), z9.VnS(), 31), "sri z6.s, z9.s, #31"); + COMPARE(sri(z6.VnD(), z9.VnD(), 1), "sri z6.d, z9.d, #1"); + COMPARE(sri(z6.VnD(), z9.VnD(), 64), "sri z6.d, z9.d, #64"); + + COMPARE(sli(z29.VnB(), z7.VnB(), 0), "sli z29.b, z7.b, #0"); + COMPARE(sli(z29.VnB(), z7.VnB(), 2), "sli z29.b, z7.b, #2"); + COMPARE(sli(z29.VnB(), z7.VnB(), 5), "sli z29.b, z7.b, #5"); + COMPARE(sli(z29.VnB(), z7.VnB(), 7), "sli z29.b, z7.b, #7"); + COMPARE(sli(z29.VnH(), z7.VnH(), 0), "sli z29.h, z7.h, #0"); + COMPARE(sli(z29.VnH(), z7.VnH(), 15), "sli z29.h, z7.h, #15"); + COMPARE(sli(z29.VnS(), z7.VnS(), 0), "sli z29.s, z7.s, #0"); + COMPARE(sli(z29.VnS(), z7.VnS(), 31), "sli z29.s, z7.s, #31"); + COMPARE(sli(z29.VnD(), z7.VnD(), 0), "sli z29.d, z7.d, #0"); + COMPARE(sli(z29.VnD(), z7.VnD(), 63), "sli z29.d, z7.d, #63"); + + CLEANUP(); +} + +TEST(sve2_shift_imm) { + SETUP(); + + COMPARE(sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0), + "sqshl z0.b, p5/m, z0.b, #0"); + COMPARE(sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 2), + "sqshl z0.b, p5/m, z0.b, #2"); + COMPARE(sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 5), + "sqshl z0.b, p5/m, z0.b, #5"); + COMPARE(sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 7), + "sqshl z0.b, p5/m, z0.b, #7"); + COMPARE(sqshl(z0.VnH(), p5.Merging(), z0.VnH(), 0), + "sqshl z0.h, p5/m, z0.h, #0"); + COMPARE(sqshl(z0.VnH(), p5.Merging(), z0.VnH(), 15), + "sqshl z0.h, p5/m, z0.h, #15"); + COMPARE(sqshl(z0.VnS(), p5.Merging(), z0.VnS(), 0), + "sqshl z0.s, p5/m, z0.s, #0"); + COMPARE(sqshl(z0.VnS(), p5.Merging(), z0.VnS(), 31), + "sqshl z0.s, p5/m, z0.s, #31"); + COMPARE(sqshl(z0.VnD(), p5.Merging(), z0.VnD(), 0), + "sqshl z0.d, p5/m, z0.d, #0"); + COMPARE(sqshl(z0.VnD(), p5.Merging(), z0.VnD(), 63), + "sqshl z0.d, p5/m, z0.d, #63"); + + COMPARE(sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0), + "sqshlu z10.b, p1/m, z10.b, #0"); + COMPARE(sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 2), + "sqshlu z10.b, p1/m, z10.b, #2"); + COMPARE(sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 5), + "sqshlu z10.b, p1/m, z10.b, #5"); + COMPARE(sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 7), + "sqshlu z10.b, p1/m, z10.b, #7"); + COMPARE(sqshlu(z10.VnH(), p1.Merging(), z10.VnH(), 0), + "sqshlu z10.h, p1/m, z10.h, #0"); + COMPARE(sqshlu(z10.VnH(), p1.Merging(), z10.VnH(), 15), + "sqshlu z10.h, p1/m, z10.h, #15"); + COMPARE(sqshlu(z10.VnS(), p1.Merging(), z10.VnS(), 0), + "sqshlu z10.s, p1/m, z10.s, #0"); + COMPARE(sqshlu(z10.VnS(), p1.Merging(), z10.VnS(), 31), + "sqshlu z10.s, p1/m, z10.s, #31"); + COMPARE(sqshlu(z10.VnD(), p1.Merging(), z10.VnD(), 0), + "sqshlu z10.d, p1/m, z10.d, #0"); + COMPARE(sqshlu(z10.VnD(), p1.Merging(), z10.VnD(), 63), + "sqshlu z10.d, p1/m, z10.d, #63"); + + COMPARE(srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1), + "srshr z12.b, p0/m, z12.b, #1"); + COMPARE(srshr(z12.VnB(), p0.Merging(), z12.VnB(), 2), + "srshr z12.b, p0/m, z12.b, #2"); + COMPARE(srshr(z12.VnB(), p0.Merging(), z12.VnB(), 5), + "srshr z12.b, p0/m, z12.b, #5"); + COMPARE(srshr(z12.VnB(), p0.Merging(), z12.VnB(), 8), + "srshr z12.b, p0/m, z12.b, #8"); + COMPARE(srshr(z12.VnH(), p0.Merging(), z12.VnH(), 1), + "srshr z12.h, p0/m, z12.h, #1"); + COMPARE(srshr(z12.VnH(), p0.Merging(), z12.VnH(), 16), + "srshr z12.h, p0/m, z12.h, #16"); + COMPARE(srshr(z12.VnS(), p0.Merging(), z12.VnS(), 1), + "srshr z12.s, p0/m, z12.s, #1"); + COMPARE(srshr(z12.VnS(), p0.Merging(), z12.VnS(), 32), + "srshr z12.s, p0/m, z12.s, #32"); + COMPARE(srshr(z12.VnD(), p0.Merging(), z12.VnD(), 1), + "srshr z12.d, p0/m, z12.d, #1"); + COMPARE(srshr(z12.VnD(), p0.Merging(), z12.VnD(), 64), + "srshr z12.d, p0/m, z12.d, #64"); + + COMPARE(uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0), + "uqshl z29.b, p7/m, z29.b, #0"); + COMPARE(uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 2), + "uqshl z29.b, p7/m, z29.b, #2"); + COMPARE(uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 5), + "uqshl z29.b, p7/m, z29.b, #5"); + COMPARE(uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 7), + "uqshl z29.b, p7/m, z29.b, #7"); + COMPARE(uqshl(z29.VnH(), p7.Merging(), z29.VnH(), 0), + "uqshl z29.h, p7/m, z29.h, #0"); + COMPARE(uqshl(z29.VnH(), p7.Merging(), z29.VnH(), 15), + "uqshl z29.h, p7/m, z29.h, #15"); + COMPARE(uqshl(z29.VnS(), p7.Merging(), z29.VnS(), 0), + "uqshl z29.s, p7/m, z29.s, #0"); + COMPARE(uqshl(z29.VnS(), p7.Merging(), z29.VnS(), 31), + "uqshl z29.s, p7/m, z29.s, #31"); + COMPARE(uqshl(z29.VnD(), p7.Merging(), z29.VnD(), 0), + "uqshl z29.d, p7/m, z29.d, #0"); + COMPARE(uqshl(z29.VnD(), p7.Merging(), z29.VnD(), 63), + "uqshl z29.d, p7/m, z29.d, #63"); + + COMPARE(urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1), + "urshr z31.b, p2/m, z31.b, #1"); + COMPARE(urshr(z31.VnB(), p2.Merging(), z31.VnB(), 2), + "urshr z31.b, p2/m, z31.b, #2"); + COMPARE(urshr(z31.VnB(), p2.Merging(), z31.VnB(), 5), + "urshr z31.b, p2/m, z31.b, #5"); + COMPARE(urshr(z31.VnB(), p2.Merging(), z31.VnB(), 8), + "urshr z31.b, p2/m, z31.b, #8"); + COMPARE(urshr(z31.VnH(), p2.Merging(), z31.VnH(), 1), + "urshr z31.h, p2/m, z31.h, #1"); + COMPARE(urshr(z31.VnH(), p2.Merging(), z31.VnH(), 16), + "urshr z31.h, p2/m, z31.h, #16"); + COMPARE(urshr(z31.VnS(), p2.Merging(), z31.VnS(), 1), + "urshr z31.s, p2/m, z31.s, #1"); + COMPARE(urshr(z31.VnS(), p2.Merging(), z31.VnS(), 32), + "urshr z31.s, p2/m, z31.s, #32"); + COMPARE(urshr(z31.VnD(), p2.Merging(), z31.VnD(), 1), + "urshr z31.d, p2/m, z31.d, #1"); + COMPARE(urshr(z31.VnD(), p2.Merging(), z31.VnD(), 64), + "urshr z31.d, p2/m, z31.d, #64"); + + CLEANUP(); +} + +TEST(sve2_shift_sat) { + SETUP(); + + COMPARE(srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB()), + "srshl z31.b, p7/m, z31.b, z3.b"); + COMPARE(srshl(z31.VnD(), p7.Merging(), z31.VnD(), z3.VnD()), + "srshl z31.d, p7/m, z31.d, z3.d"); + COMPARE(srshl(z31.VnH(), p7.Merging(), z31.VnH(), z3.VnH()), + "srshl z31.h, p7/m, z31.h, z3.h"); + COMPARE(srshl(z31.VnS(), p7.Merging(), z31.VnS(), z3.VnS()), + "srshl z31.s, p7/m, z31.s, z3.s"); + + COMPARE(srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB()), + "srshlr z16.b, p7/m, z16.b, z29.b"); + COMPARE(srshlr(z16.VnD(), p7.Merging(), z16.VnD(), z29.VnD()), + "srshlr z16.d, p7/m, z16.d, z29.d"); + COMPARE(srshlr(z16.VnH(), p7.Merging(), z16.VnH(), z29.VnH()), + "srshlr z16.h, p7/m, z16.h, z29.h"); + COMPARE(srshlr(z16.VnS(), p7.Merging(), z16.VnS(), z29.VnS()), + "srshlr z16.s, p7/m, z16.s, z29.s"); + + COMPARE(urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB()), + "urshl z15.b, p2/m, z15.b, z3.b"); + COMPARE(urshl(z15.VnD(), p2.Merging(), z15.VnD(), z3.VnD()), + "urshl z15.d, p2/m, z15.d, z3.d"); + COMPARE(urshl(z15.VnH(), p2.Merging(), z15.VnH(), z3.VnH()), + "urshl z15.h, p2/m, z15.h, z3.h"); + COMPARE(urshl(z15.VnS(), p2.Merging(), z15.VnS(), z3.VnS()), + "urshl z15.s, p2/m, z15.s, z3.s"); + + COMPARE(urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB()), + "urshlr z27.b, p1/m, z27.b, z30.b"); + COMPARE(urshlr(z27.VnD(), p1.Merging(), z27.VnD(), z30.VnD()), + "urshlr z27.d, p1/m, z27.d, z30.d"); + COMPARE(urshlr(z27.VnH(), p1.Merging(), z27.VnH(), z30.VnH()), + "urshlr z27.h, p1/m, z27.h, z30.h"); + COMPARE(urshlr(z27.VnS(), p1.Merging(), z27.VnS(), z30.VnS()), + "urshlr z27.s, p1/m, z27.s, z30.s"); + + COMPARE(sqshl(z22.VnB(), p4.Merging(), z22.VnB(), z21.VnB()), + "sqshl z22.b, p4/m, z22.b, z21.b"); + COMPARE(sqshl(z22.VnD(), p4.Merging(), z22.VnD(), z21.VnD()), + "sqshl z22.d, p4/m, z22.d, z21.d"); + COMPARE(sqshl(z22.VnH(), p4.Merging(), z22.VnH(), z21.VnH()), + "sqshl z22.h, p4/m, z22.h, z21.h"); + COMPARE(sqshl(z22.VnS(), p4.Merging(), z22.VnS(), z21.VnS()), + "sqshl z22.s, p4/m, z22.s, z21.s"); + + COMPARE(sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB()), + "sqshlr z7.b, p3/m, z7.b, z5.b"); + COMPARE(sqshlr(z7.VnD(), p3.Merging(), z7.VnD(), z5.VnD()), + "sqshlr z7.d, p3/m, z7.d, z5.d"); + COMPARE(sqshlr(z7.VnH(), p3.Merging(), z7.VnH(), z5.VnH()), + "sqshlr z7.h, p3/m, z7.h, z5.h"); + COMPARE(sqshlr(z7.VnS(), p3.Merging(), z7.VnS(), z5.VnS()), + "sqshlr z7.s, p3/m, z7.s, z5.s"); + + COMPARE(uqshl(z10.VnB(), p0.Merging(), z10.VnB(), z21.VnB()), + "uqshl z10.b, p0/m, z10.b, z21.b"); + COMPARE(uqshl(z10.VnD(), p0.Merging(), z10.VnD(), z21.VnD()), + "uqshl z10.d, p0/m, z10.d, z21.d"); + COMPARE(uqshl(z10.VnH(), p0.Merging(), z10.VnH(), z21.VnH()), + "uqshl z10.h, p0/m, z10.h, z21.h"); + COMPARE(uqshl(z10.VnS(), p0.Merging(), z10.VnS(), z21.VnS()), + "uqshl z10.s, p0/m, z10.s, z21.s"); + + COMPARE(uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z12.VnB()), + "uqshlr z12.b, p1/m, z12.b, z12.b"); + COMPARE(uqshlr(z12.VnD(), p1.Merging(), z12.VnD(), z12.VnD()), + "uqshlr z12.d, p1/m, z12.d, z12.d"); + COMPARE(uqshlr(z12.VnH(), p1.Merging(), z12.VnH(), z12.VnH()), + "uqshlr z12.h, p1/m, z12.h, z12.h"); + COMPARE(uqshlr(z12.VnS(), p1.Merging(), z12.VnS(), z12.VnS()), + "uqshlr z12.s, p1/m, z12.s, z12.s"); + + COMPARE(sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB()), + "sqrshl z31.b, p5/m, z31.b, z27.b"); + COMPARE(sqrshl(z31.VnD(), p5.Merging(), z31.VnD(), z27.VnD()), + "sqrshl z31.d, p5/m, z31.d, z27.d"); + COMPARE(sqrshl(z31.VnH(), p5.Merging(), z31.VnH(), z27.VnH()), + "sqrshl z31.h, p5/m, z31.h, z27.h"); + COMPARE(sqrshl(z31.VnS(), p5.Merging(), z31.VnS(), z27.VnS()), + "sqrshl z31.s, p5/m, z31.s, z27.s"); + + COMPARE(sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB()), + "sqrshlr z25.b, p6/m, z25.b, z7.b"); + COMPARE(sqrshlr(z25.VnD(), p6.Merging(), z25.VnD(), z7.VnD()), + "sqrshlr z25.d, p6/m, z25.d, z7.d"); + COMPARE(sqrshlr(z25.VnH(), p6.Merging(), z25.VnH(), z7.VnH()), + "sqrshlr z25.h, p6/m, z25.h, z7.h"); + COMPARE(sqrshlr(z25.VnS(), p6.Merging(), z25.VnS(), z7.VnS()), + "sqrshlr z25.s, p6/m, z25.s, z7.s"); + + COMPARE(uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB()), + "uqrshl z20.b, p1/m, z20.b, z30.b"); + COMPARE(uqrshl(z20.VnD(), p1.Merging(), z20.VnD(), z30.VnD()), + "uqrshl z20.d, p1/m, z20.d, z30.d"); + COMPARE(uqrshl(z20.VnH(), p1.Merging(), z20.VnH(), z30.VnH()), + "uqrshl z20.h, p1/m, z20.h, z30.h"); + COMPARE(uqrshl(z20.VnS(), p1.Merging(), z20.VnS(), z30.VnS()), + "uqrshl z20.s, p1/m, z20.s, z30.s"); + + COMPARE(uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB()), + "uqrshlr z8.b, p5/m, z8.b, z9.b"); + COMPARE(uqrshlr(z8.VnD(), p5.Merging(), z8.VnD(), z9.VnD()), + "uqrshlr z8.d, p5/m, z8.d, z9.d"); + COMPARE(uqrshlr(z8.VnH(), p5.Merging(), z8.VnH(), z9.VnH()), + "uqrshlr z8.h, p5/m, z8.h, z9.h"); + COMPARE(uqrshlr(z8.VnS(), p5.Merging(), z8.VnS(), z9.VnS()), + "uqrshlr z8.s, p5/m, z8.s, z9.s"); + + CLEANUP(); +} + +TEST(sve2_sat_arith) { + SETUP(); + + COMPARE(sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB()), + "sqadd z28.b, p0/m, z28.b, z3.b"); + COMPARE(sqadd(z28.VnD(), p0.Merging(), z28.VnD(), z3.VnD()), + "sqadd z28.d, p0/m, z28.d, z3.d"); + COMPARE(sqadd(z28.VnH(), p0.Merging(), z28.VnH(), z3.VnH()), + "sqadd z28.h, p0/m, z28.h, z3.h"); + COMPARE(sqadd(z28.VnS(), p0.Merging(), z28.VnS(), z3.VnS()), + "sqadd z28.s, p0/m, z28.s, z3.s"); + COMPARE(sqsub(z6.VnB(), p0.Merging(), z6.VnB(), z12.VnB()), + "sqsub z6.b, p0/m, z6.b, z12.b"); + COMPARE(sqsub(z6.VnD(), p0.Merging(), z6.VnD(), z12.VnD()), + "sqsub z6.d, p0/m, z6.d, z12.d"); + COMPARE(sqsub(z6.VnH(), p0.Merging(), z6.VnH(), z12.VnH()), + "sqsub z6.h, p0/m, z6.h, z12.h"); + COMPARE(sqsub(z6.VnS(), p0.Merging(), z6.VnS(), z12.VnS()), + "sqsub z6.s, p0/m, z6.s, z12.s"); + COMPARE(sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()), + "sqsubr z16.b, p7/m, z16.b, z22.b"); + COMPARE(sqsubr(z16.VnD(), p7.Merging(), z16.VnD(), z22.VnD()), + "sqsubr z16.d, p7/m, z16.d, z22.d"); + COMPARE(sqsubr(z16.VnH(), p7.Merging(), z16.VnH(), z22.VnH()), + "sqsubr z16.h, p7/m, z16.h, z22.h"); + COMPARE(sqsubr(z16.VnS(), p7.Merging(), z16.VnS(), z22.VnS()), + "sqsubr z16.s, p7/m, z16.s, z22.s"); + COMPARE(suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB()), + "suqadd z26.b, p2/m, z26.b, z28.b"); + COMPARE(suqadd(z26.VnD(), p2.Merging(), z26.VnD(), z28.VnD()), + "suqadd z26.d, p2/m, z26.d, z28.d"); + COMPARE(suqadd(z26.VnH(), p2.Merging(), z26.VnH(), z28.VnH()), + "suqadd z26.h, p2/m, z26.h, z28.h"); + COMPARE(suqadd(z26.VnS(), p2.Merging(), z26.VnS(), z28.VnS()), + "suqadd z26.s, p2/m, z26.s, z28.s"); + COMPARE(usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB()), + "usqadd z25.b, p4/m, z25.b, z6.b"); + COMPARE(usqadd(z25.VnD(), p4.Merging(), z25.VnD(), z6.VnD()), + "usqadd z25.d, p4/m, z25.d, z6.d"); + COMPARE(usqadd(z25.VnH(), p4.Merging(), z25.VnH(), z6.VnH()), + "usqadd z25.h, p4/m, z25.h, z6.h"); + COMPARE(usqadd(z25.VnS(), p4.Merging(), z25.VnS(), z6.VnS()), + "usqadd z25.s, p4/m, z25.s, z6.s"); + COMPARE(uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()), + "uqadd z24.b, p7/m, z24.b, z1.b"); + COMPARE(uqadd(z24.VnD(), p7.Merging(), z24.VnD(), z1.VnD()), + "uqadd z24.d, p7/m, z24.d, z1.d"); + COMPARE(uqadd(z24.VnH(), p7.Merging(), z24.VnH(), z1.VnH()), + "uqadd z24.h, p7/m, z24.h, z1.h"); + COMPARE(uqadd(z24.VnS(), p7.Merging(), z24.VnS(), z1.VnS()), + "uqadd z24.s, p7/m, z24.s, z1.s"); + COMPARE(uqsub(z10.VnB(), p3.Merging(), z10.VnB(), z1.VnB()), + "uqsub z10.b, p3/m, z10.b, z1.b"); + COMPARE(uqsub(z10.VnD(), p3.Merging(), z10.VnD(), z1.VnD()), + "uqsub z10.d, p3/m, z10.d, z1.d"); + COMPARE(uqsub(z10.VnH(), p3.Merging(), z10.VnH(), z1.VnH()), + "uqsub z10.h, p3/m, z10.h, z1.h"); + COMPARE(uqsub(z10.VnS(), p3.Merging(), z10.VnS(), z1.VnS()), + "uqsub z10.s, p3/m, z10.s, z1.s"); + COMPARE(uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()), + "uqsubr z20.b, p0/m, z20.b, z6.b"); + COMPARE(uqsubr(z20.VnD(), p0.Merging(), z20.VnD(), z6.VnD()), + "uqsubr z20.d, p0/m, z20.d, z6.d"); + COMPARE(uqsubr(z20.VnH(), p0.Merging(), z20.VnH(), z6.VnH()), + "uqsubr z20.h, p0/m, z20.h, z6.h"); + COMPARE(uqsubr(z20.VnS(), p0.Merging(), z20.VnS(), z6.VnS()), + "uqsubr z20.s, p0/m, z20.s, z6.s"); + + COMPARE_MACRO(Sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()), + "sqadd z28.b, p0/m, z28.b, z28.b"); + COMPARE_MACRO(Sqadd(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()), + "movprfx z29.b, p0/m, z28.b\n" + "sqadd z29.b, p0/m, z29.b, z3.b"); + COMPARE_MACRO(Sqadd(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()), + "sqadd z29.b, p0/m, z29.b, z28.b"); + COMPARE_MACRO(Uqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()), + "uqadd z28.b, p0/m, z28.b, z28.b"); + COMPARE_MACRO(Uqadd(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()), + "movprfx z29.b, p0/m, z28.b\n" + "uqadd z29.b, p0/m, z29.b, z3.b"); + COMPARE_MACRO(Uqadd(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()), + "uqadd z29.b, p0/m, z29.b, z28.b"); + + COMPARE_MACRO(Sqsub(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()), + "sqsub z28.b, p0/m, z28.b, z28.b"); + COMPARE_MACRO(Sqsub(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()), + "movprfx z29.b, p0/m, z28.b\n" + "sqsub z29.b, p0/m, z29.b, z3.b"); + COMPARE_MACRO(Sqsub(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()), + "sqsubr z29.b, p0/m, z29.b, z28.b"); + COMPARE_MACRO(Uqsub(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()), + "uqsub z28.b, p0/m, z28.b, z28.b"); + COMPARE_MACRO(Uqsub(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()), + "movprfx z29.b, p0/m, z28.b\n" + "uqsub z29.b, p0/m, z29.b, z3.b"); + COMPARE_MACRO(Uqsub(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()), + "uqsubr z29.b, p0/m, z29.b, z28.b"); + + COMPARE_MACRO(Suqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()), + "suqadd z28.b, p0/m, z28.b, z28.b"); + COMPARE_MACRO(Suqadd(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()), + "movprfx z29.b, p0/m, z28.b\n" + "suqadd z29.b, p0/m, z29.b, z3.b"); + COMPARE_MACRO(Suqadd(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()), + "mov z31.d, z29.d\n" + "movprfx z29.b, p0/m, z28.b\n" + "suqadd z29.b, p0/m, z29.b, z31.b"); + COMPARE_MACRO(Usqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()), + "usqadd z28.b, p0/m, z28.b, z28.b"); + COMPARE_MACRO(Usqadd(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()), + "movprfx z29.b, p0/m, z28.b\n" + "usqadd z29.b, p0/m, z29.b, z3.b"); + COMPARE_MACRO(Usqadd(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()), + "mov z31.d, z29.d\n" + "movprfx z29.b, p0/m, z28.b\n" + "usqadd z29.b, p0/m, z29.b, z31.b"); + + CLEANUP(); +} + +TEST(sve2_pair_arith) { + SETUP(); + + COMPARE(addp(z3.VnB(), p1.Merging(), z3.VnB(), z0.VnB()), + "addp z3.b, p1/m, z3.b, z0.b"); + COMPARE(addp(z3.VnD(), p1.Merging(), z3.VnD(), z0.VnD()), + "addp z3.d, p1/m, z3.d, z0.d"); + COMPARE(addp(z3.VnH(), p1.Merging(), z3.VnH(), z0.VnH()), + "addp z3.h, p1/m, z3.h, z0.h"); + COMPARE(addp(z3.VnS(), p1.Merging(), z3.VnS(), z0.VnS()), + "addp z3.s, p1/m, z3.s, z0.s"); + COMPARE(smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB()), + "smaxp z5.b, p4/m, z5.b, z10.b"); + COMPARE(smaxp(z5.VnD(), p4.Merging(), z5.VnD(), z10.VnD()), + "smaxp z5.d, p4/m, z5.d, z10.d"); + COMPARE(smaxp(z5.VnH(), p4.Merging(), z5.VnH(), z10.VnH()), + "smaxp z5.h, p4/m, z5.h, z10.h"); + COMPARE(smaxp(z5.VnS(), p4.Merging(), z5.VnS(), z10.VnS()), + "smaxp z5.s, p4/m, z5.s, z10.s"); + COMPARE(sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB()), + "sminp z27.b, p3/m, z27.b, z1.b"); + COMPARE(sminp(z27.VnD(), p3.Merging(), z27.VnD(), z1.VnD()), + "sminp z27.d, p3/m, z27.d, z1.d"); + COMPARE(sminp(z27.VnH(), p3.Merging(), z27.VnH(), z1.VnH()), + "sminp z27.h, p3/m, z27.h, z1.h"); + COMPARE(sminp(z27.VnS(), p3.Merging(), z27.VnS(), z1.VnS()), + "sminp z27.s, p3/m, z27.s, z1.s"); + COMPARE(umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB()), + "umaxp z7.b, p2/m, z7.b, z23.b"); + COMPARE(umaxp(z7.VnD(), p2.Merging(), z7.VnD(), z23.VnD()), + "umaxp z7.d, p2/m, z7.d, z23.d"); + COMPARE(umaxp(z7.VnH(), p2.Merging(), z7.VnH(), z23.VnH()), + "umaxp z7.h, p2/m, z7.h, z23.h"); + COMPARE(umaxp(z7.VnS(), p2.Merging(), z7.VnS(), z23.VnS()), + "umaxp z7.s, p2/m, z7.s, z23.s"); + COMPARE(uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB()), + "uminp z10.b, p0/m, z10.b, z22.b"); + COMPARE(uminp(z10.VnD(), p0.Merging(), z10.VnD(), z22.VnD()), + "uminp z10.d, p0/m, z10.d, z22.d"); + COMPARE(uminp(z10.VnH(), p0.Merging(), z10.VnH(), z22.VnH()), + "uminp z10.h, p0/m, z10.h, z22.h"); + COMPARE(uminp(z10.VnS(), p0.Merging(), z10.VnS(), z22.VnS()), + "uminp z10.s, p0/m, z10.s, z22.s"); + + COMPARE_MACRO(Addp(z3.VnB(), p1.Merging(), z3.VnB(), z3.VnB()), + "addp z3.b, p1/m, z3.b, z3.b"); + COMPARE_MACRO(Addp(z4.VnB(), p1.Merging(), z3.VnB(), z3.VnB()), + "movprfx z4.b, p1/m, z3.b\n" + "addp z4.b, p1/m, z4.b, z3.b"); + COMPARE_MACRO(Addp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()), + "mov z31.d, z4.d\n" + "movprfx z4.b, p1/m, z3.b\n" + "addp z4.b, p1/m, z4.b, z31.b"); + COMPARE_MACRO(Smaxp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()), + "mov z31.d, z4.d\n" + "movprfx z4.b, p1/m, z3.b\n" + "smaxp z4.b, p1/m, z4.b, z31.b"); + COMPARE_MACRO(Sminp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()), + "mov z31.d, z4.d\n" + "movprfx z4.b, p1/m, z3.b\n" + "sminp z4.b, p1/m, z4.b, z31.b"); + COMPARE_MACRO(Umaxp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()), + "mov z31.d, z4.d\n" + "movprfx z4.b, p1/m, z3.b\n" + "umaxp z4.b, p1/m, z4.b, z31.b"); + COMPARE_MACRO(Uminp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()), + "mov z31.d, z4.d\n" + "movprfx z4.b, p1/m, z3.b\n" + "uminp z4.b, p1/m, z4.b, z31.b"); + CLEANUP(); +} + +TEST(sve2_extract_narrow) { + SETUP(); + + COMPARE(sqxtnb(z2.VnB(), z0.VnH()), "sqxtnb z2.b, z0.h"); + COMPARE(sqxtnb(z2.VnH(), z0.VnS()), "sqxtnb z2.h, z0.s"); + COMPARE(sqxtnb(z2.VnS(), z0.VnD()), "sqxtnb z2.s, z0.d"); + COMPARE(sqxtnt(z31.VnB(), z18.VnH()), "sqxtnt z31.b, z18.h"); + COMPARE(sqxtnt(z31.VnH(), z18.VnS()), "sqxtnt z31.h, z18.s"); + COMPARE(sqxtnt(z31.VnS(), z18.VnD()), "sqxtnt z31.s, z18.d"); + COMPARE(sqxtunb(z28.VnB(), z6.VnH()), "sqxtunb z28.b, z6.h"); + COMPARE(sqxtunb(z28.VnH(), z6.VnS()), "sqxtunb z28.h, z6.s"); + COMPARE(sqxtunb(z28.VnS(), z6.VnD()), "sqxtunb z28.s, z6.d"); + COMPARE(sqxtunt(z14.VnB(), z31.VnH()), "sqxtunt z14.b, z31.h"); + COMPARE(sqxtunt(z14.VnH(), z31.VnS()), "sqxtunt z14.h, z31.s"); + COMPARE(sqxtunt(z14.VnS(), z31.VnD()), "sqxtunt z14.s, z31.d"); + COMPARE(uqxtnb(z28.VnB(), z4.VnH()), "uqxtnb z28.b, z4.h"); + COMPARE(uqxtnb(z28.VnH(), z4.VnS()), "uqxtnb z28.h, z4.s"); + COMPARE(uqxtnb(z28.VnS(), z4.VnD()), "uqxtnb z28.s, z4.d"); + COMPARE(uqxtnt(z19.VnB(), z7.VnH()), "uqxtnt z19.b, z7.h"); + COMPARE(uqxtnt(z19.VnH(), z7.VnS()), "uqxtnt z19.h, z7.s"); + COMPARE(uqxtnt(z19.VnS(), z7.VnD()), "uqxtnt z19.s, z7.d"); + + CLEANUP(); +} + +TEST(sve2_eorbt_eortb) { + SETUP(); + + COMPARE(eorbt(z3.VnB(), z10.VnB(), z8.VnB()), "eorbt z3.b, z10.b, z8.b"); + COMPARE(eorbt(z3.VnD(), z10.VnD(), z8.VnD()), "eorbt z3.d, z10.d, z8.d"); + COMPARE(eorbt(z3.VnH(), z10.VnH(), z8.VnH()), "eorbt z3.h, z10.h, z8.h"); + COMPARE(eorbt(z3.VnS(), z10.VnS(), z8.VnS()), "eorbt z3.s, z10.s, z8.s"); + COMPARE(eortb(z21.VnB(), z21.VnB(), z15.VnB()), "eortb z21.b, z21.b, z15.b"); + COMPARE(eortb(z21.VnD(), z21.VnD(), z15.VnD()), "eortb z21.d, z21.d, z15.d"); + COMPARE(eortb(z21.VnH(), z21.VnH(), z15.VnH()), "eortb z21.h, z21.h, z15.h"); + COMPARE(eortb(z21.VnS(), z21.VnS(), z15.VnS()), "eortb z21.s, z21.s, z15.s"); + + CLEANUP(); +} + +TEST(sve2_saturating_multiply_add_high) { + SETUP(); + + COMPARE(sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB()), + "sqrdmlah z27.b, z28.b, z19.b"); + COMPARE(sqrdmlah(z27.VnD(), z28.VnD(), z19.VnD()), + "sqrdmlah z27.d, z28.d, z19.d"); + COMPARE(sqrdmlah(z27.VnH(), z28.VnH(), z19.VnH()), + "sqrdmlah z27.h, z28.h, z19.h"); + COMPARE(sqrdmlah(z27.VnS(), z28.VnS(), z19.VnS()), + "sqrdmlah z27.s, z28.s, z19.s"); + COMPARE(sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB()), + "sqrdmlsh z11.b, z16.b, z31.b"); + COMPARE(sqrdmlsh(z11.VnD(), z16.VnD(), z31.VnD()), + "sqrdmlsh z11.d, z16.d, z31.d"); + COMPARE(sqrdmlsh(z11.VnH(), z16.VnH(), z31.VnH()), + "sqrdmlsh z11.h, z16.h, z31.h"); + COMPARE(sqrdmlsh(z11.VnS(), z16.VnS(), z31.VnS()), + "sqrdmlsh z11.s, z16.s, z31.s"); + + COMPARE_MACRO(Sqrdmlah(z29.VnD(), z0.VnD(), z29.VnD(), z26.VnD()), + "movprfx z31, z0\n" + "sqrdmlah z31.d, z29.d, z26.d\n" + "mov z29.d, z31.d"); + COMPARE_MACRO(Sqrdmlah(z26.VnH(), z0.VnH(), z29.VnH(), z26.VnH()), + "movprfx z31, z0\n" + "sqrdmlah z31.h, z29.h, z26.h\n" + "mov z26.d, z31.d"); + COMPARE_MACRO(Sqrdmlsh(z23.VnS(), z31.VnS(), z26.VnS(), z29.VnS()), + "movprfx z23, z31\n" + "sqrdmlsh z23.s, z26.s, z29.s"); + COMPARE_MACRO(Sqrdmlsh(z4.VnB(), z31.VnB(), z4.VnB(), z4.VnB()), + "sqrdmlsh z31.b, z4.b, z4.b\n" + "mov z4.d, z31.d"); + + COMPARE(sqrdmlah(z10.VnD(), z30.VnD(), z11.VnD(), 1), + "sqrdmlah z10.d, z30.d, z11.d[1]"); + COMPARE(sqrdmlah(z11.VnH(), z8.VnH(), z3.VnH(), 7), + "sqrdmlah z11.h, z8.h, z3.h[7]"); + COMPARE(sqrdmlah(z21.VnS(), z29.VnS(), z7.VnS(), 3), + "sqrdmlah z21.s, z29.s, z7.s[3]"); + COMPARE(sqrdmlsh(z2.VnD(), z16.VnD(), z14.VnD(), 0), + "sqrdmlsh z2.d, z16.d, z14.d[0]"); + COMPARE(sqrdmlsh(z23.VnH(), z13.VnH(), z6.VnH(), 5), + "sqrdmlsh z23.h, z13.h, z6.h[5]"); + COMPARE(sqrdmlsh(z27.VnS(), z8.VnS(), z4.VnS(), 2), + "sqrdmlsh z27.s, z8.s, z4.s[2]"); + + COMPARE_MACRO(Sqrdmlah(z24.VnD(), z0.VnD(), z24.VnD(), z13.VnD(), 0), + "movprfx z31, z0\n" + "sqrdmlah z31.d, z24.d, z13.d[0]\n" + "mov z24.d, z31.d"); + COMPARE_MACRO(Sqrdmlah(z4.VnH(), z0.VnH(), z29.VnH(), z4.VnH(), 6), + "movprfx z31, z0\n" + "sqrdmlah z31.h, z29.h, z4.h[6]\n" + "mov z4.d, z31.d"); + COMPARE_MACRO(Sqrdmlsh(z12.VnS(), z31.VnS(), z26.VnS(), z2.VnS(), 2), + "movprfx z12, z31\n" + "sqrdmlsh z12.s, z26.s, z2.s[2]"); + COMPARE_MACRO(Sqrdmlsh(z0.VnD(), z31.VnD(), z0.VnD(), z0.VnD(), 1), + "sqrdmlsh z31.d, z0.d, z0.d[1]\n" + "mov z0.d, z31.d"); + + CLEANUP(); +} + +TEST(sve2_integer_pairwise_add_accumulate_long) { + SETUP(); + + COMPARE(sadalp(z19.VnD(), p5.Merging(), z9.VnS()), + "sadalp z19.d, p5/m, z9.s"); + COMPARE(sadalp(z19.VnH(), p5.Merging(), z9.VnB()), + "sadalp z19.h, p5/m, z9.b"); + COMPARE(sadalp(z19.VnS(), p5.Merging(), z9.VnH()), + "sadalp z19.s, p5/m, z9.h"); + COMPARE(uadalp(z20.VnD(), p4.Merging(), z5.VnS()), + "uadalp z20.d, p4/m, z5.s"); + COMPARE(uadalp(z20.VnH(), p4.Merging(), z5.VnB()), + "uadalp z20.h, p4/m, z5.b"); + COMPARE(uadalp(z20.VnS(), p4.Merging(), z5.VnH()), + "uadalp z20.s, p4/m, z5.h"); + + CLEANUP(); +} + +TEST(sve2_integer_multiply_vectors_unpredicated) { + SETUP(); + + COMPARE(mul(z23.VnB(), z0.VnB(), z12.VnB()), "mul z23.b, z0.b, z12.b"); + COMPARE(mul(z24.VnD(), z1.VnD(), z14.VnD()), "mul z24.d, z1.d, z14.d"); + COMPARE(mul(z25.VnH(), z2.VnH(), z16.VnH()), "mul z25.h, z2.h, z16.h"); + COMPARE(mul(z26.VnS(), z3.VnS(), z18.VnS()), "mul z26.s, z3.s, z18.s"); + + COMPARE(pmul(z0.VnB(), z5.VnB(), z5.VnB()), "pmul z0.b, z5.b, z5.b"); + + COMPARE(smulh(z11.VnB(), z9.VnB(), z1.VnB()), "smulh z11.b, z9.b, z1.b"); + COMPARE(smulh(z21.VnD(), z19.VnD(), z16.VnD()), "smulh z21.d, z19.d, z16.d"); + COMPARE(smulh(z11.VnH(), z9.VnH(), z1.VnH()), "smulh z11.h, z9.h, z1.h"); + COMPARE(smulh(z21.VnS(), z19.VnS(), z16.VnS()), "smulh z21.s, z19.s, z16.s"); + + COMPARE(umulh(z5.VnB(), z9.VnB(), z5.VnB()), "umulh z5.b, z9.b, z5.b"); + COMPARE(umulh(z18.VnD(), z9.VnD(), z5.VnD()), "umulh z18.d, z9.d, z5.d"); + COMPARE(umulh(z18.VnH(), z9.VnH(), z9.VnH()), "umulh z18.h, z9.h, z9.h"); + COMPARE(umulh(z18.VnS(), z9.VnS(), z18.VnS()), "umulh z18.s, z9.s, z18.s"); + + CLEANUP(); +} + +TEST(sve2_arith_interleaved_long) { + SETUP(); + + COMPARE(saddlbt(z15.VnD(), z6.VnS(), z18.VnS()), + "saddlbt z15.d, z6.s, z18.s"); + COMPARE(saddlbt(z15.VnH(), z6.VnB(), z18.VnB()), + "saddlbt z15.h, z6.b, z18.b"); + COMPARE(saddlbt(z15.VnS(), z6.VnH(), z18.VnH()), + "saddlbt z15.s, z6.h, z18.h"); + COMPARE(ssublbt(z6.VnD(), z28.VnS(), z12.VnS()), + "ssublbt z6.d, z28.s, z12.s"); + COMPARE(ssublbt(z6.VnH(), z28.VnB(), z12.VnB()), + "ssublbt z6.h, z28.b, z12.b"); + COMPARE(ssublbt(z6.VnS(), z28.VnH(), z12.VnH()), + "ssublbt z6.s, z28.h, z12.h"); + COMPARE(ssubltb(z11.VnD(), z18.VnS(), z19.VnS()), + "ssubltb z11.d, z18.s, z19.s"); + COMPARE(ssubltb(z11.VnH(), z18.VnB(), z19.VnB()), + "ssubltb z11.h, z18.b, z19.b"); + COMPARE(ssubltb(z11.VnS(), z18.VnH(), z19.VnH()), + "ssubltb z11.s, z18.h, z19.h"); + + CLEANUP(); +} + +TEST(sve2_int_unary_predicated) { + SETUP(); + + COMPARE_MACRO(Sqabs(z29.VnB(), p1.Merging(), z18.VnB()), + "sqabs z29.b, p1/m, z18.b"); + COMPARE_MACRO(Sqabs(z29.VnD(), p1.Merging(), z18.VnD()), + "sqabs z29.d, p1/m, z18.d"); + COMPARE_MACRO(Sqabs(z29.VnH(), p1.Merging(), z18.VnH()), + "sqabs z29.h, p1/m, z18.h"); + COMPARE_MACRO(Sqabs(z29.VnS(), p1.Merging(), z18.VnS()), + "sqabs z29.s, p1/m, z18.s"); + COMPARE_MACRO(Sqneg(z21.VnB(), p0.Merging(), z17.VnB()), + "sqneg z21.b, p0/m, z17.b"); + COMPARE_MACRO(Sqneg(z21.VnD(), p0.Merging(), z17.VnD()), + "sqneg z21.d, p0/m, z17.d"); + COMPARE_MACRO(Sqneg(z21.VnH(), p0.Merging(), z17.VnH()), + "sqneg z21.h, p0/m, z17.h"); + COMPARE_MACRO(Sqneg(z21.VnS(), p0.Merging(), z17.VnS()), + "sqneg z21.s, p0/m, z17.s"); + COMPARE_MACRO(Urecpe(z25.VnS(), p7.Merging(), z2.VnS()), + "urecpe z25.s, p7/m, z2.s"); + COMPARE_MACRO(Ursqrte(z4.VnS(), p3.Merging(), z3.VnS()), + "ursqrte z4.s, p3/m, z3.s"); + + COMPARE_MACRO(Sqabs(z29.VnS(), p1.Zeroing(), z18.VnS()), + "movprfx z29.s, p1/z, z29.s\n" + "sqabs z29.s, p1/m, z18.s"); + COMPARE_MACRO(Sqneg(z21.VnB(), p0.Zeroing(), z17.VnB()), + "movprfx z21.b, p0/z, z21.b\n" + "sqneg z21.b, p0/m, z17.b"); + COMPARE_MACRO(Urecpe(z25.VnS(), p7.Zeroing(), z2.VnS()), + "movprfx z25.s, p7/z, z25.s\n" + "urecpe z25.s, p7/m, z2.s"); + COMPARE_MACRO(Ursqrte(z4.VnS(), p3.Zeroing(), z3.VnS()), + "movprfx z4.s, p3/z, z4.s\n" + "ursqrte z4.s, p3/m, z3.s"); + CLEANUP(); +} + +TEST(sve2_arith_long) { + SETUP(); + + COMPARE_MACRO(Sabdlb(z2.VnD(), z21.VnS(), z3.VnS()), + "sabdlb z2.d, z21.s, z3.s"); + COMPARE_MACRO(Sabdlb(z2.VnH(), z21.VnB(), z3.VnB()), + "sabdlb z2.h, z21.b, z3.b"); + COMPARE_MACRO(Sabdlb(z2.VnS(), z21.VnH(), z3.VnH()), + "sabdlb z2.s, z21.h, z3.h"); + COMPARE_MACRO(Sabdlt(z25.VnD(), z23.VnS(), z17.VnS()), + "sabdlt z25.d, z23.s, z17.s"); + COMPARE_MACRO(Sabdlt(z25.VnH(), z23.VnB(), z17.VnB()), + "sabdlt z25.h, z23.b, z17.b"); + COMPARE_MACRO(Sabdlt(z25.VnS(), z23.VnH(), z17.VnH()), + "sabdlt z25.s, z23.h, z17.h"); + COMPARE_MACRO(Saddlb(z24.VnD(), z30.VnS(), z16.VnS()), + "saddlb z24.d, z30.s, z16.s"); + COMPARE_MACRO(Saddlb(z24.VnH(), z30.VnB(), z16.VnB()), + "saddlb z24.h, z30.b, z16.b"); + COMPARE_MACRO(Saddlb(z24.VnS(), z30.VnH(), z16.VnH()), + "saddlb z24.s, z30.h, z16.h"); + COMPARE_MACRO(Saddlt(z21.VnD(), z29.VnS(), z31.VnS()), + "saddlt z21.d, z29.s, z31.s"); + COMPARE_MACRO(Saddlt(z21.VnH(), z29.VnB(), z31.VnB()), + "saddlt z21.h, z29.b, z31.b"); + COMPARE_MACRO(Saddlt(z21.VnS(), z29.VnH(), z31.VnH()), + "saddlt z21.s, z29.h, z31.h"); + COMPARE_MACRO(Ssublb(z4.VnD(), z23.VnS(), z7.VnS()), + "ssublb z4.d, z23.s, z7.s"); + COMPARE_MACRO(Ssublb(z4.VnH(), z23.VnB(), z7.VnB()), + "ssublb z4.h, z23.b, z7.b"); + COMPARE_MACRO(Ssublb(z4.VnS(), z23.VnH(), z7.VnH()), + "ssublb z4.s, z23.h, z7.h"); + COMPARE_MACRO(Ssublt(z12.VnD(), z13.VnS(), z6.VnS()), + "ssublt z12.d, z13.s, z6.s"); + COMPARE_MACRO(Ssublt(z12.VnH(), z13.VnB(), z6.VnB()), + "ssublt z12.h, z13.b, z6.b"); + COMPARE_MACRO(Ssublt(z12.VnS(), z13.VnH(), z6.VnH()), + "ssublt z12.s, z13.h, z6.h"); + COMPARE_MACRO(Uabdlb(z1.VnD(), z26.VnS(), z12.VnS()), + "uabdlb z1.d, z26.s, z12.s"); + COMPARE_MACRO(Uabdlb(z1.VnH(), z26.VnB(), z12.VnB()), + "uabdlb z1.h, z26.b, z12.b"); + COMPARE_MACRO(Uabdlb(z1.VnS(), z26.VnH(), z12.VnH()), + "uabdlb z1.s, z26.h, z12.h"); + COMPARE_MACRO(Uabdlt(z25.VnD(), z29.VnS(), z14.VnS()), + "uabdlt z25.d, z29.s, z14.s"); + COMPARE_MACRO(Uabdlt(z25.VnH(), z29.VnB(), z14.VnB()), + "uabdlt z25.h, z29.b, z14.b"); + COMPARE_MACRO(Uabdlt(z25.VnS(), z29.VnH(), z14.VnH()), + "uabdlt z25.s, z29.h, z14.h"); + COMPARE_MACRO(Uaddlb(z3.VnD(), z5.VnS(), z2.VnS()), + "uaddlb z3.d, z5.s, z2.s"); + COMPARE_MACRO(Uaddlb(z3.VnH(), z5.VnB(), z2.VnB()), + "uaddlb z3.h, z5.b, z2.b"); + COMPARE_MACRO(Uaddlb(z3.VnS(), z5.VnH(), z2.VnH()), + "uaddlb z3.s, z5.h, z2.h"); + COMPARE_MACRO(Uaddlt(z15.VnD(), z28.VnS(), z20.VnS()), + "uaddlt z15.d, z28.s, z20.s"); + COMPARE_MACRO(Uaddlt(z15.VnH(), z28.VnB(), z20.VnB()), + "uaddlt z15.h, z28.b, z20.b"); + COMPARE_MACRO(Uaddlt(z15.VnS(), z28.VnH(), z20.VnH()), + "uaddlt z15.s, z28.h, z20.h"); + COMPARE_MACRO(Usublb(z25.VnD(), z9.VnS(), z17.VnS()), + "usublb z25.d, z9.s, z17.s"); + COMPARE_MACRO(Usublb(z25.VnH(), z9.VnB(), z17.VnB()), + "usublb z25.h, z9.b, z17.b"); + COMPARE_MACRO(Usublb(z25.VnS(), z9.VnH(), z17.VnH()), + "usublb z25.s, z9.h, z17.h"); + COMPARE_MACRO(Usublt(z5.VnD(), z11.VnS(), z15.VnS()), + "usublt z5.d, z11.s, z15.s"); + COMPARE_MACRO(Usublt(z5.VnH(), z11.VnB(), z15.VnB()), + "usublt z5.h, z11.b, z15.b"); + COMPARE_MACRO(Usublt(z5.VnS(), z11.VnH(), z15.VnH()), + "usublt z5.s, z11.h, z15.h"); + + CLEANUP(); +} + +TEST(sve2_arith_wide) { + SETUP(); + + COMPARE_MACRO(Saddwb(z12.VnD(), z8.VnD(), z8.VnS()), + "saddwb z12.d, z8.d, z8.s"); + COMPARE_MACRO(Saddwb(z12.VnH(), z8.VnH(), z8.VnB()), + "saddwb z12.h, z8.h, z8.b"); + COMPARE_MACRO(Saddwb(z12.VnS(), z8.VnS(), z8.VnH()), + "saddwb z12.s, z8.s, z8.h"); + COMPARE_MACRO(Saddwt(z24.VnD(), z0.VnD(), z3.VnS()), + "saddwt z24.d, z0.d, z3.s"); + COMPARE_MACRO(Saddwt(z24.VnH(), z0.VnH(), z3.VnB()), + "saddwt z24.h, z0.h, z3.b"); + COMPARE_MACRO(Saddwt(z24.VnS(), z0.VnS(), z3.VnH()), + "saddwt z24.s, z0.s, z3.h"); + COMPARE_MACRO(Ssubwb(z7.VnD(), z28.VnD(), z11.VnS()), + "ssubwb z7.d, z28.d, z11.s"); + COMPARE_MACRO(Ssubwb(z7.VnH(), z28.VnH(), z11.VnB()), + "ssubwb z7.h, z28.h, z11.b"); + COMPARE_MACRO(Ssubwb(z7.VnS(), z28.VnS(), z11.VnH()), + "ssubwb z7.s, z28.s, z11.h"); + COMPARE_MACRO(Ssubwt(z29.VnD(), z25.VnD(), z20.VnS()), + "ssubwt z29.d, z25.d, z20.s"); + COMPARE_MACRO(Ssubwt(z29.VnH(), z25.VnH(), z20.VnB()), + "ssubwt z29.h, z25.h, z20.b"); + COMPARE_MACRO(Ssubwt(z29.VnS(), z25.VnS(), z20.VnH()), + "ssubwt z29.s, z25.s, z20.h"); + COMPARE_MACRO(Uaddwb(z31.VnD(), z8.VnD(), z25.VnS()), + "uaddwb z31.d, z8.d, z25.s"); + COMPARE_MACRO(Uaddwb(z31.VnH(), z8.VnH(), z25.VnB()), + "uaddwb z31.h, z8.h, z25.b"); + COMPARE_MACRO(Uaddwb(z31.VnS(), z8.VnS(), z25.VnH()), + "uaddwb z31.s, z8.s, z25.h"); + COMPARE_MACRO(Uaddwt(z17.VnD(), z15.VnD(), z2.VnS()), + "uaddwt z17.d, z15.d, z2.s"); + COMPARE_MACRO(Uaddwt(z17.VnH(), z15.VnH(), z2.VnB()), + "uaddwt z17.h, z15.h, z2.b"); + COMPARE_MACRO(Uaddwt(z17.VnS(), z15.VnS(), z2.VnH()), + "uaddwt z17.s, z15.s, z2.h"); + COMPARE_MACRO(Usubwb(z10.VnD(), z13.VnD(), z20.VnS()), + "usubwb z10.d, z13.d, z20.s"); + COMPARE_MACRO(Usubwb(z10.VnH(), z13.VnH(), z20.VnB()), + "usubwb z10.h, z13.h, z20.b"); + COMPARE_MACRO(Usubwb(z10.VnS(), z13.VnS(), z20.VnH()), + "usubwb z10.s, z13.s, z20.h"); + COMPARE_MACRO(Usubwt(z15.VnD(), z8.VnD(), z23.VnS()), + "usubwt z15.d, z8.d, z23.s"); + COMPARE_MACRO(Usubwt(z15.VnH(), z8.VnH(), z23.VnB()), + "usubwt z15.h, z8.h, z23.b"); + COMPARE_MACRO(Usubwt(z15.VnS(), z8.VnS(), z23.VnH()), + "usubwt z15.s, z8.s, z23.h"); + + CLEANUP(); +} + +TEST(sve2_shift_long) { + SETUP(); + + COMPARE_MACRO(Sshllb(z2.VnH(), z20.VnB(), 0), "sshllb z2.h, z20.b, #0"); + COMPARE_MACRO(Sshllb(z2.VnH(), z20.VnB(), 1), "sshllb z2.h, z20.b, #1"); + COMPARE_MACRO(Sshllb(z2.VnH(), z20.VnB(), 5), "sshllb z2.h, z20.b, #5"); + COMPARE_MACRO(Sshllb(z2.VnH(), z20.VnB(), 7), "sshllb z2.h, z20.b, #7"); + COMPARE_MACRO(Sshllb(z2.VnS(), z20.VnH(), 0), "sshllb z2.s, z20.h, #0"); + COMPARE_MACRO(Sshllb(z2.VnS(), z20.VnH(), 15), "sshllb z2.s, z20.h, #15"); + COMPARE_MACRO(Sshllb(z2.VnD(), z20.VnS(), 0), "sshllb z2.d, z20.s, #0"); + COMPARE_MACRO(Sshllb(z2.VnD(), z20.VnS(), 31), "sshllb z2.d, z20.s, #31"); + COMPARE_MACRO(Sshllt(z27.VnH(), z8.VnB(), 0), "sshllt z27.h, z8.b, #0"); + COMPARE_MACRO(Sshllt(z27.VnH(), z8.VnB(), 7), "sshllt z27.h, z8.b, #7"); + COMPARE_MACRO(Sshllt(z27.VnS(), z8.VnH(), 0), "sshllt z27.s, z8.h, #0"); + COMPARE_MACRO(Sshllt(z27.VnS(), z8.VnH(), 15), "sshllt z27.s, z8.h, #15"); + COMPARE_MACRO(Sshllt(z27.VnD(), z8.VnS(), 0), "sshllt z27.d, z8.s, #0"); + COMPARE_MACRO(Sshllt(z27.VnD(), z8.VnS(), 31), "sshllt z27.d, z8.s, #31"); + COMPARE_MACRO(Ushllb(z8.VnH(), z31.VnB(), 0), "ushllb z8.h, z31.b, #0"); + COMPARE_MACRO(Ushllb(z8.VnH(), z31.VnB(), 7), "ushllb z8.h, z31.b, #7"); + COMPARE_MACRO(Ushllb(z8.VnS(), z31.VnH(), 0), "ushllb z8.s, z31.h, #0"); + COMPARE_MACRO(Ushllb(z8.VnS(), z31.VnH(), 15), "ushllb z8.s, z31.h, #15"); + COMPARE_MACRO(Ushllb(z8.VnD(), z31.VnS(), 0), "ushllb z8.d, z31.s, #0"); + COMPARE_MACRO(Ushllb(z8.VnD(), z31.VnS(), 31), "ushllb z8.d, z31.s, #31"); + COMPARE_MACRO(Ushllt(z3.VnH(), z21.VnB(), 0), "ushllt z3.h, z21.b, #0"); + COMPARE_MACRO(Ushllt(z3.VnH(), z21.VnB(), 7), "ushllt z3.h, z21.b, #7"); + COMPARE_MACRO(Ushllt(z3.VnS(), z21.VnH(), 0), "ushllt z3.s, z21.h, #0"); + COMPARE_MACRO(Ushllt(z3.VnS(), z21.VnH(), 15), "ushllt z3.s, z21.h, #15"); + COMPARE_MACRO(Ushllt(z3.VnD(), z21.VnS(), 0), "ushllt z3.d, z21.s, #0"); + COMPARE_MACRO(Ushllt(z3.VnD(), z21.VnS(), 31), "ushllt z3.d, z21.s, #31"); + + CLEANUP(); +} + +TEST(sve2_shift_narrow) { + SETUP(); + + COMPARE_MACRO(Shrnb(z7.VnB(), z4.VnH(), 1), "shrnb z7.b, z4.h, #1"); + COMPARE_MACRO(Shrnb(z7.VnB(), z4.VnH(), 2), "shrnb z7.b, z4.h, #2"); + COMPARE_MACRO(Shrnb(z7.VnB(), z4.VnH(), 5), "shrnb z7.b, z4.h, #5"); + COMPARE_MACRO(Shrnb(z7.VnB(), z4.VnH(), 8), "shrnb z7.b, z4.h, #8"); + COMPARE_MACRO(Shrnb(z7.VnH(), z4.VnS(), 1), "shrnb z7.h, z4.s, #1"); + COMPARE_MACRO(Shrnb(z7.VnH(), z4.VnS(), 16), "shrnb z7.h, z4.s, #16"); + COMPARE_MACRO(Shrnb(z7.VnS(), z4.VnD(), 1), "shrnb z7.s, z4.d, #1"); + COMPARE_MACRO(Shrnb(z7.VnS(), z4.VnD(), 32), "shrnb z7.s, z4.d, #32"); + COMPARE_MACRO(Shrnt(z21.VnB(), z29.VnH(), 1), "shrnt z21.b, z29.h, #1"); + COMPARE_MACRO(Shrnt(z21.VnB(), z29.VnH(), 8), "shrnt z21.b, z29.h, #8"); + COMPARE_MACRO(Shrnt(z21.VnH(), z29.VnS(), 1), "shrnt z21.h, z29.s, #1"); + COMPARE_MACRO(Shrnt(z21.VnH(), z29.VnS(), 16), "shrnt z21.h, z29.s, #16"); + COMPARE_MACRO(Shrnt(z21.VnS(), z29.VnD(), 1), "shrnt z21.s, z29.d, #1"); + COMPARE_MACRO(Shrnt(z21.VnS(), z29.VnD(), 32), "shrnt z21.s, z29.d, #32"); + + COMPARE_MACRO(Rshrnb(z5.VnB(), z1.VnH(), 1), "rshrnb z5.b, z1.h, #1"); + COMPARE_MACRO(Rshrnb(z5.VnB(), z1.VnH(), 8), "rshrnb z5.b, z1.h, #8"); + COMPARE_MACRO(Rshrnb(z5.VnH(), z1.VnS(), 16), "rshrnb z5.h, z1.s, #16"); + COMPARE_MACRO(Rshrnb(z5.VnS(), z1.VnD(), 32), "rshrnb z5.s, z1.d, #32"); + COMPARE_MACRO(Rshrnt(z5.VnB(), z1.VnH(), 8), "rshrnt z5.b, z1.h, #8"); + COMPARE_MACRO(Rshrnt(z5.VnH(), z1.VnS(), 16), "rshrnt z5.h, z1.s, #16"); + COMPARE_MACRO(Rshrnt(z5.VnS(), z1.VnD(), 32), "rshrnt z5.s, z1.d, #32"); + + COMPARE_MACRO(Sqrshrnb(z1.VnB(), z1.VnH(), 1), "sqrshrnb z1.b, z1.h, #1"); + COMPARE_MACRO(Sqrshrnb(z1.VnB(), z1.VnH(), 8), "sqrshrnb z1.b, z1.h, #8"); + COMPARE_MACRO(Sqrshrnb(z1.VnH(), z1.VnS(), 16), "sqrshrnb z1.h, z1.s, #16"); + COMPARE_MACRO(Sqrshrnb(z1.VnS(), z1.VnD(), 32), "sqrshrnb z1.s, z1.d, #32"); + COMPARE_MACRO(Sqrshrnt(z24.VnB(), z19.VnH(), 8), "sqrshrnt z24.b, z19.h, #8"); + COMPARE_MACRO(Sqrshrnt(z24.VnH(), z19.VnS(), 16), + "sqrshrnt z24.h, z19.s, #16"); + COMPARE_MACRO(Sqrshrnt(z24.VnS(), z19.VnD(), 32), + "sqrshrnt z24.s, z19.d, #32"); + + COMPARE_MACRO(Sqshrnb(z25.VnB(), z1.VnH(), 1), "sqshrnb z25.b, z1.h, #1"); + COMPARE_MACRO(Sqshrnb(z25.VnB(), z1.VnH(), 8), "sqshrnb z25.b, z1.h, #8"); + COMPARE_MACRO(Sqshrnb(z25.VnH(), z1.VnS(), 16), "sqshrnb z25.h, z1.s, #16"); + COMPARE_MACRO(Sqshrnb(z25.VnS(), z1.VnD(), 32), "sqshrnb z25.s, z1.d, #32"); + COMPARE_MACRO(Sqshrnt(z0.VnB(), z25.VnH(), 8), "sqshrnt z0.b, z25.h, #8"); + COMPARE_MACRO(Sqshrnt(z0.VnH(), z25.VnS(), 16), "sqshrnt z0.h, z25.s, #16"); + COMPARE_MACRO(Sqshrnt(z0.VnS(), z25.VnD(), 32), "sqshrnt z0.s, z25.d, #32"); + + COMPARE_MACRO(Uqrshrnb(z30.VnB(), z25.VnH(), 1), "uqrshrnb z30.b, z25.h, #1"); + COMPARE_MACRO(Uqrshrnb(z30.VnB(), z25.VnH(), 8), "uqrshrnb z30.b, z25.h, #8"); + COMPARE_MACRO(Uqrshrnb(z30.VnH(), z25.VnS(), 16), + "uqrshrnb z30.h, z25.s, #16"); + COMPARE_MACRO(Uqrshrnb(z30.VnS(), z25.VnD(), 32), + "uqrshrnb z30.s, z25.d, #32"); + COMPARE_MACRO(Uqrshrnt(z3.VnB(), z25.VnH(), 8), "uqrshrnt z3.b, z25.h, #8"); + COMPARE_MACRO(Uqrshrnt(z3.VnH(), z25.VnS(), 16), "uqrshrnt z3.h, z25.s, #16"); + COMPARE_MACRO(Uqrshrnt(z3.VnS(), z25.VnD(), 32), "uqrshrnt z3.s, z25.d, #32"); + + COMPARE_MACRO(Uqshrnb(z17.VnB(), z4.VnH(), 1), "uqshrnb z17.b, z4.h, #1"); + COMPARE_MACRO(Uqshrnb(z17.VnB(), z4.VnH(), 8), "uqshrnb z17.b, z4.h, #8"); + COMPARE_MACRO(Uqshrnb(z17.VnH(), z4.VnS(), 16), "uqshrnb z17.h, z4.s, #16"); + COMPARE_MACRO(Uqshrnb(z17.VnS(), z4.VnD(), 32), "uqshrnb z17.s, z4.d, #32"); + COMPARE_MACRO(Uqshrnt(z28.VnB(), z18.VnH(), 8), "uqshrnt z28.b, z18.h, #8"); + COMPARE_MACRO(Uqshrnt(z28.VnH(), z18.VnS(), 16), "uqshrnt z28.h, z18.s, #16"); + COMPARE_MACRO(Uqshrnt(z28.VnS(), z18.VnD(), 32), "uqshrnt z28.s, z18.d, #32"); + + COMPARE_MACRO(Sqrshrunb(z23.VnB(), z28.VnH(), 1), + "sqrshrunb z23.b, z28.h, #1"); + COMPARE_MACRO(Sqrshrunb(z23.VnB(), z28.VnH(), 8), + "sqrshrunb z23.b, z28.h, #8"); + COMPARE_MACRO(Sqrshrunb(z23.VnH(), z28.VnS(), 16), + "sqrshrunb z23.h, z28.s, #16"); + COMPARE_MACRO(Sqrshrunb(z23.VnS(), z28.VnD(), 32), + "sqrshrunb z23.s, z28.d, #32"); + COMPARE_MACRO(Sqrshrunt(z9.VnB(), z15.VnH(), 8), "sqrshrunt z9.b, z15.h, #8"); + COMPARE_MACRO(Sqrshrunt(z9.VnH(), z15.VnS(), 16), + "sqrshrunt z9.h, z15.s, #16"); + COMPARE_MACRO(Sqrshrunt(z9.VnS(), z15.VnD(), 32), + "sqrshrunt z9.s, z15.d, #32"); + + COMPARE_MACRO(Sqshrunb(z25.VnB(), z10.VnH(), 1), "sqshrunb z25.b, z10.h, #1"); + COMPARE_MACRO(Sqshrunb(z25.VnB(), z10.VnH(), 8), "sqshrunb z25.b, z10.h, #8"); + COMPARE_MACRO(Sqshrunb(z25.VnH(), z10.VnS(), 16), + "sqshrunb z25.h, z10.s, #16"); + COMPARE_MACRO(Sqshrunb(z25.VnS(), z10.VnD(), 32), + "sqshrunb z25.s, z10.d, #32"); + COMPARE_MACRO(Sqshrunt(z20.VnB(), z3.VnH(), 8), "sqshrunt z20.b, z3.h, #8"); + COMPARE_MACRO(Sqshrunt(z20.VnH(), z3.VnS(), 16), "sqshrunt z20.h, z3.s, #16"); + COMPARE_MACRO(Sqshrunt(z20.VnS(), z3.VnD(), 32), "sqshrunt z20.s, z3.d, #32"); + + CLEANUP(); +} + +TEST(sve2_aba_long) { + SETUP(); + + COMPARE(sabalb(z13.VnD(), z20.VnS(), z26.VnS()), + "sabalb z13.d, z20.s, z26.s"); + COMPARE(sabalb(z13.VnH(), z20.VnB(), z26.VnB()), + "sabalb z13.h, z20.b, z26.b"); + COMPARE(sabalb(z13.VnS(), z20.VnH(), z26.VnH()), + "sabalb z13.s, z20.h, z26.h"); + COMPARE(sabalt(z14.VnD(), z19.VnS(), z10.VnS()), + "sabalt z14.d, z19.s, z10.s"); + COMPARE(sabalt(z14.VnH(), z19.VnB(), z10.VnB()), + "sabalt z14.h, z19.b, z10.b"); + COMPARE(sabalt(z14.VnS(), z19.VnH(), z10.VnH()), + "sabalt z14.s, z19.h, z10.h"); + COMPARE(uabalb(z11.VnD(), z25.VnS(), z11.VnS()), + "uabalb z11.d, z25.s, z11.s"); + COMPARE(uabalb(z11.VnH(), z25.VnB(), z11.VnB()), + "uabalb z11.h, z25.b, z11.b"); + COMPARE(uabalb(z11.VnS(), z25.VnH(), z11.VnH()), + "uabalb z11.s, z25.h, z11.h"); + COMPARE(uabalt(z4.VnD(), z2.VnS(), z31.VnS()), "uabalt z4.d, z2.s, z31.s"); + COMPARE(uabalt(z4.VnH(), z2.VnB(), z31.VnB()), "uabalt z4.h, z2.b, z31.b"); + COMPARE(uabalt(z4.VnS(), z2.VnH(), z31.VnH()), "uabalt z4.s, z2.h, z31.h"); + + COMPARE_MACRO(Sabalb(z12.VnH(), z12.VnH(), z3.VnB(), z30.VnB()), + "sabalb z12.h, z3.b, z30.b"); + COMPARE_MACRO(Sabalt(z12.VnH(), z12.VnH(), z3.VnB(), z12.VnB()), + "sabalt z12.h, z3.b, z12.b"); + COMPARE_MACRO(Sabalb(z12.VnH(), z12.VnH(), z12.VnB(), z30.VnB()), + "sabalb z12.h, z12.b, z30.b"); + COMPARE_MACRO(Sabalt(z12.VnH(), z12.VnH(), z12.VnB(), z12.VnB()), ""); + COMPARE_MACRO(Sabalb(z12.VnH(), z13.VnH(), z3.VnB(), z30.VnB()), + "movprfx z12, z13\n" + "sabalb z12.h, z3.b, z30.b"); + COMPARE_MACRO(Sabalt(z12.VnH(), z3.VnH(), z3.VnB(), z30.VnB()), + "movprfx z12, z3\n" + "sabalt z12.h, z3.b, z30.b"); + COMPARE_MACRO(Sabalb(z12.VnH(), z30.VnH(), z3.VnB(), z30.VnB()), + "movprfx z12, z30\n" + "sabalb z12.h, z3.b, z30.b"); + COMPARE_MACRO(Sabalt(z12.VnH(), z3.VnH(), z3.VnB(), z3.VnB()), + "mov z12.d, z3.d"); + COMPARE_MACRO(Sabalb(z12.VnH(), z3.VnH(), z12.VnB(), z3.VnB()), + "mov z31.d, z12.d\n" + "movprfx z12, z3\n" + "sabalb z12.h, z31.b, z3.b"); + COMPARE_MACRO(Sabalt(z12.VnH(), z3.VnH(), z3.VnB(), z12.VnB()), + "mov z31.d, z12.d\n" + "movprfx z12, z3\n" + "sabalt z12.h, z3.b, z31.b"); + + COMPARE_MACRO(Uabalt(z12.VnH(), z12.VnH(), z3.VnB(), z30.VnB()), + "uabalt z12.h, z3.b, z30.b"); + COMPARE_MACRO(Uabalb(z12.VnH(), z12.VnH(), z3.VnB(), z12.VnB()), + "uabalb z12.h, z3.b, z12.b"); + COMPARE_MACRO(Uabalt(z12.VnH(), z12.VnH(), z12.VnB(), z30.VnB()), + "uabalt z12.h, z12.b, z30.b"); + COMPARE_MACRO(Uabalb(z12.VnH(), z12.VnH(), z12.VnB(), z12.VnB()), ""); + COMPARE_MACRO(Uabalt(z12.VnH(), z13.VnH(), z3.VnB(), z30.VnB()), + "movprfx z12, z13\n" + "uabalt z12.h, z3.b, z30.b"); + COMPARE_MACRO(Uabalb(z12.VnH(), z3.VnH(), z3.VnB(), z30.VnB()), + "movprfx z12, z3\n" + "uabalb z12.h, z3.b, z30.b"); + COMPARE_MACRO(Uabalt(z12.VnH(), z30.VnH(), z3.VnB(), z30.VnB()), + "movprfx z12, z30\n" + "uabalt z12.h, z3.b, z30.b"); + COMPARE_MACRO(Uabalb(z12.VnH(), z3.VnH(), z3.VnB(), z3.VnB()), + "mov z12.d, z3.d"); + COMPARE_MACRO(Uabalt(z12.VnH(), z3.VnH(), z12.VnB(), z3.VnB()), + "mov z31.d, z12.d\n" + "movprfx z12, z3\n" + "uabalt z12.h, z31.b, z3.b"); + COMPARE_MACRO(Uabalb(z12.VnH(), z3.VnH(), z3.VnB(), z12.VnB()), + "mov z31.d, z12.d\n" + "movprfx z12, z3\n" + "uabalb z12.h, z3.b, z31.b"); + CLEANUP(); +} + +TEST(sve2_add_sub_carry) { + SETUP(); + + COMPARE(adclb(z25.VnS(), z17.VnS(), z24.VnS()), "adclb z25.s, z17.s, z24.s"); + COMPARE(adclb(z25.VnD(), z17.VnD(), z24.VnD()), "adclb z25.d, z17.d, z24.d"); + COMPARE(adclt(z0.VnS(), z2.VnS(), z15.VnS()), "adclt z0.s, z2.s, z15.s"); + COMPARE(adclt(z0.VnD(), z2.VnD(), z15.VnD()), "adclt z0.d, z2.d, z15.d"); + COMPARE(sbclb(z17.VnS(), z10.VnS(), z8.VnS()), "sbclb z17.s, z10.s, z8.s"); + COMPARE(sbclb(z17.VnD(), z10.VnD(), z8.VnD()), "sbclb z17.d, z10.d, z8.d"); + COMPARE(sbclt(z20.VnS(), z0.VnS(), z13.VnS()), "sbclt z20.s, z0.s, z13.s"); + COMPARE(sbclt(z20.VnD(), z0.VnD(), z13.VnD()), "sbclt z20.d, z0.d, z13.d"); + + COMPARE_MACRO(Adclb(z25.VnS(), z25.VnS(), z17.VnS(), z24.VnS()), + "adclb z25.s, z17.s, z24.s"); + COMPARE_MACRO(Adclb(z25.VnS(), z20.VnS(), z17.VnS(), z24.VnS()), + "movprfx z25, z20\n" + "adclb z25.s, z17.s, z24.s"); + COMPARE_MACRO(Adclb(z25.VnS(), z20.VnS(), z25.VnS(), z24.VnS()), + "movprfx z31, z20\n" + "adclb z31.s, z25.s, z24.s\n" + "mov z25.d, z31.d"); + COMPARE_MACRO(Adclb(z25.VnS(), z20.VnS(), z24.VnS(), z25.VnS()), + "movprfx z31, z20\n" + "adclb z31.s, z24.s, z25.s\n" + "mov z25.d, z31.d"); + COMPARE_MACRO(Adclb(z25.VnS(), z20.VnS(), z25.VnS(), z25.VnS()), + "movprfx z31, z20\n" + "adclb z31.s, z25.s, z25.s\n" + "mov z25.d, z31.d"); + COMPARE_MACRO(Adclt(z25.VnS(), z20.VnS(), z25.VnS(), z25.VnS()), + "movprfx z31, z20\n" + "adclt z31.s, z25.s, z25.s\n" + "mov z25.d, z31.d"); + + COMPARE_MACRO(Sbclb(z30.VnS(), z30.VnS(), z7.VnS(), z29.VnS()), + "sbclb z30.s, z7.s, z29.s"); + COMPARE_MACRO(Sbclb(z30.VnS(), z2.VnS(), z7.VnS(), z29.VnS()), + "movprfx z30, z2\n" + "sbclb z30.s, z7.s, z29.s"); + COMPARE_MACRO(Sbclb(z30.VnS(), z2.VnS(), z30.VnS(), z29.VnS()), + "movprfx z31, z2\n" + "sbclb z31.s, z30.s, z29.s\n" + "mov z30.d, z31.d"); + COMPARE_MACRO(Sbclb(z30.VnS(), z2.VnS(), z29.VnS(), z30.VnS()), + "movprfx z31, z2\n" + "sbclb z31.s, z29.s, z30.s\n" + "mov z30.d, z31.d"); + COMPARE_MACRO(Sbclb(z30.VnS(), z2.VnS(), z30.VnS(), z30.VnS()), + "movprfx z31, z2\n" + "sbclb z31.s, z30.s, z30.s\n" + "mov z30.d, z31.d"); + COMPARE_MACRO(Sbclt(z30.VnS(), z2.VnS(), z30.VnS(), z30.VnS()), + "movprfx z31, z2\n" + "sbclt z31.s, z30.s, z30.s\n" + "mov z30.d, z31.d"); + CLEANUP(); +} + +TEST(sve2_add_sub_high) { + SETUP(); + + COMPARE_MACRO(Addhnb(z29.VnS(), z19.VnD(), z2.VnD()), + "addhnb z29.s, z19.d, z2.d"); + COMPARE_MACRO(Addhnb(z29.VnB(), z19.VnH(), z2.VnH()), + "addhnb z29.b, z19.h, z2.h"); + COMPARE_MACRO(Addhnb(z29.VnH(), z19.VnS(), z2.VnS()), + "addhnb z29.h, z19.s, z2.s"); + COMPARE_MACRO(Addhnt(z8.VnS(), z12.VnD(), z6.VnD()), + "addhnt z8.s, z12.d, z6.d"); + COMPARE_MACRO(Addhnt(z8.VnB(), z12.VnH(), z6.VnH()), + "addhnt z8.b, z12.h, z6.h"); + COMPARE_MACRO(Addhnt(z8.VnH(), z12.VnS(), z6.VnS()), + "addhnt z8.h, z12.s, z6.s"); + COMPARE_MACRO(Raddhnb(z0.VnS(), z11.VnD(), z10.VnD()), + "raddhnb z0.s, z11.d, z10.d"); + COMPARE_MACRO(Raddhnb(z0.VnB(), z11.VnH(), z10.VnH()), + "raddhnb z0.b, z11.h, z10.h"); + COMPARE_MACRO(Raddhnb(z0.VnH(), z11.VnS(), z10.VnS()), + "raddhnb z0.h, z11.s, z10.s"); + COMPARE_MACRO(Raddhnt(z23.VnS(), z27.VnD(), z9.VnD()), + "raddhnt z23.s, z27.d, z9.d"); + COMPARE_MACRO(Raddhnt(z23.VnB(), z27.VnH(), z9.VnH()), + "raddhnt z23.b, z27.h, z9.h"); + COMPARE_MACRO(Raddhnt(z23.VnH(), z27.VnS(), z9.VnS()), + "raddhnt z23.h, z27.s, z9.s"); + COMPARE_MACRO(Rsubhnb(z30.VnS(), z29.VnD(), z11.VnD()), + "rsubhnb z30.s, z29.d, z11.d"); + COMPARE_MACRO(Rsubhnb(z30.VnB(), z29.VnH(), z11.VnH()), + "rsubhnb z30.b, z29.h, z11.h"); + COMPARE_MACRO(Rsubhnb(z30.VnH(), z29.VnS(), z11.VnS()), + "rsubhnb z30.h, z29.s, z11.s"); + COMPARE_MACRO(Rsubhnt(z25.VnS(), z7.VnD(), z18.VnD()), + "rsubhnt z25.s, z7.d, z18.d"); + COMPARE_MACRO(Rsubhnt(z25.VnB(), z7.VnH(), z18.VnH()), + "rsubhnt z25.b, z7.h, z18.h"); + COMPARE_MACRO(Rsubhnt(z25.VnH(), z7.VnS(), z18.VnS()), + "rsubhnt z25.h, z7.s, z18.s"); + COMPARE_MACRO(Subhnb(z31.VnS(), z31.VnD(), z7.VnD()), + "subhnb z31.s, z31.d, z7.d"); + COMPARE_MACRO(Subhnb(z31.VnB(), z31.VnH(), z7.VnH()), + "subhnb z31.b, z31.h, z7.h"); + COMPARE_MACRO(Subhnb(z31.VnH(), z31.VnS(), z7.VnS()), + "subhnb z31.h, z31.s, z7.s"); + COMPARE_MACRO(Subhnt(z31.VnS(), z22.VnD(), z27.VnD()), + "subhnt z31.s, z22.d, z27.d"); + COMPARE_MACRO(Subhnt(z31.VnB(), z22.VnH(), z27.VnH()), + "subhnt z31.b, z22.h, z27.h"); + COMPARE_MACRO(Subhnt(z31.VnH(), z22.VnS(), z27.VnS()), + "subhnt z31.h, z22.s, z27.s"); + + CLEANUP(); +} + +TEST(sve2_complex_addition) { + SETUP(); + + COMPARE_MACRO(Cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90), + "cadd z5.b, z5.b, z12.b, #90"); + COMPARE_MACRO(Cadd(z5.VnD(), z5.VnD(), z12.VnD(), 90), + "cadd z5.d, z5.d, z12.d, #90"); + COMPARE_MACRO(Cadd(z5.VnH(), z5.VnH(), z12.VnH(), 90), + "cadd z5.h, z5.h, z12.h, #90"); + COMPARE_MACRO(Cadd(z5.VnS(), z5.VnS(), z12.VnS(), 90), + "cadd z5.s, z5.s, z12.s, #90"); + COMPARE_MACRO(Cadd(z5.VnB(), z5.VnB(), z12.VnB(), 270), + "cadd z5.b, z5.b, z12.b, #270"); + COMPARE_MACRO(Cadd(z5.VnD(), z5.VnD(), z12.VnD(), 270), + "cadd z5.d, z5.d, z12.d, #270"); + COMPARE_MACRO(Cadd(z5.VnH(), z5.VnH(), z12.VnH(), 270), + "cadd z5.h, z5.h, z12.h, #270"); + COMPARE_MACRO(Cadd(z5.VnS(), z5.VnS(), z12.VnS(), 270), + "cadd z5.s, z5.s, z12.s, #270"); + COMPARE_MACRO(Cadd(z5.VnS(), z6.VnS(), z12.VnS(), 270), + "movprfx z5, z6\n" + "cadd z5.s, z5.s, z12.s, #270"); + COMPARE_MACRO(Cadd(z5.VnS(), z6.VnS(), z5.VnS(), 270), + "mov z31.d, z5.d\n" + "movprfx z5, z6\n" + "cadd z5.s, z5.s, z31.s, #270"); + + COMPARE_MACRO(Sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90), + "sqcadd z20.b, z20.b, z23.b, #90"); + COMPARE_MACRO(Sqcadd(z20.VnD(), z20.VnD(), z23.VnD(), 90), + "sqcadd z20.d, z20.d, z23.d, #90"); + COMPARE_MACRO(Sqcadd(z20.VnH(), z20.VnH(), z23.VnH(), 90), + "sqcadd z20.h, z20.h, z23.h, #90"); + COMPARE_MACRO(Sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 270), + "sqcadd z20.b, z20.b, z23.b, #270"); + COMPARE_MACRO(Sqcadd(z20.VnD(), z20.VnD(), z23.VnD(), 270), + "sqcadd z20.d, z20.d, z23.d, #270"); + COMPARE_MACRO(Sqcadd(z20.VnH(), z20.VnH(), z23.VnH(), 270), + "sqcadd z20.h, z20.h, z23.h, #270"); + COMPARE_MACRO(Sqcadd(z20.VnS(), z20.VnS(), z23.VnS(), 270), + "sqcadd z20.s, z20.s, z23.s, #270"); + COMPARE_MACRO(Sqcadd(z20.VnH(), z21.VnH(), z23.VnH(), 270), + "movprfx z20, z21\n" + "sqcadd z20.h, z20.h, z23.h, #270"); + COMPARE_MACRO(Sqcadd(z20.VnH(), z21.VnH(), z20.VnH(), 270), + "mov z31.d, z20.d\n" + "movprfx z20, z21\n" + "sqcadd z20.h, z20.h, z31.h, #270"); + + CLEANUP(); +} + +TEST(sve2_bit_permute) { + SETUP(); + + COMPARE_MACRO(Bdep(z18.VnB(), z10.VnB(), z0.VnB()), + "bdep z18.b, z10.b, z0.b"); + COMPARE_MACRO(Bdep(z18.VnD(), z10.VnD(), z0.VnD()), + "bdep z18.d, z10.d, z0.d"); + COMPARE_MACRO(Bdep(z18.VnH(), z10.VnH(), z0.VnH()), + "bdep z18.h, z10.h, z0.h"); + COMPARE_MACRO(Bdep(z18.VnS(), z10.VnS(), z0.VnS()), + "bdep z18.s, z10.s, z0.s"); + COMPARE_MACRO(Bext(z6.VnB(), z2.VnB(), z5.VnB()), "bext z6.b, z2.b, z5.b"); + COMPARE_MACRO(Bext(z6.VnD(), z2.VnD(), z5.VnD()), "bext z6.d, z2.d, z5.d"); + COMPARE_MACRO(Bext(z6.VnH(), z2.VnH(), z5.VnH()), "bext z6.h, z2.h, z5.h"); + COMPARE_MACRO(Bext(z6.VnS(), z2.VnS(), z5.VnS()), "bext z6.s, z2.s, z5.s"); + COMPARE_MACRO(Bgrp(z24.VnB(), z9.VnB(), z5.VnB()), "bgrp z24.b, z9.b, z5.b"); + COMPARE_MACRO(Bgrp(z24.VnD(), z9.VnD(), z5.VnD()), "bgrp z24.d, z9.d, z5.d"); + COMPARE_MACRO(Bgrp(z24.VnH(), z9.VnH(), z5.VnH()), "bgrp z24.h, z9.h, z5.h"); + COMPARE_MACRO(Bgrp(z24.VnS(), z9.VnS(), z5.VnS()), "bgrp z24.s, z9.s, z5.s"); + + CLEANUP(); +} + +TEST(sve2_integer_multiply_long_vector) { + SETUP(); + + COMPARE(sqdmullb(z1.VnD(), z31.VnS(), z21.VnS()), + "sqdmullb z1.d, z31.s, z21.s"); + COMPARE(sqdmullb(z2.VnH(), z30.VnB(), z22.VnB()), + "sqdmullb z2.h, z30.b, z22.b"); + COMPARE(sqdmullb(z3.VnS(), z29.VnH(), z23.VnH()), + "sqdmullb z3.s, z29.h, z23.h"); + COMPARE(sqdmullb(z1.VnS(), z27.VnH(), z3.VnH(), 7), + "sqdmullb z1.s, z27.h, z3.h[7]"); + COMPARE(sqdmullb(z27.VnD(), z16.VnS(), z5.VnS(), 3), + "sqdmullb z27.d, z16.s, z5.s[3]"); + + COMPARE(sqdmullt(z2.VnD(), z1.VnS(), z5.VnS()), "sqdmullt z2.d, z1.s, z5.s"); + COMPARE(sqdmullt(z12.VnH(), z11.VnB(), z15.VnB()), + "sqdmullt z12.h, z11.b, z15.b"); + COMPARE(sqdmullt(z20.VnS(), z21.VnH(), z25.VnH()), + "sqdmullt z20.s, z21.h, z25.h"); + COMPARE(sqdmullt(z23.VnS(), z28.VnH(), z2.VnH(), 0), + "sqdmullt z23.s, z28.h, z2.h[0]"); + COMPARE(sqdmullt(z7.VnD(), z4.VnS(), z0.VnS(), 0), + "sqdmullt z7.d, z4.s, z0.s[0]"); + + // Feature `SVEPmull128` is not supported. + // COMPARE(pmullb(z12.VnQ(), z21.VnD(), z12.VnD()), + // "pmullb z12.q, z21.d, z12.d"); + COMPARE(pmullb(z12.VnH(), z21.VnB(), z12.VnB()), + "pmullb z12.h, z21.b, z12.b"); + COMPARE(pmullt(z31.VnD(), z30.VnS(), z26.VnS()), + "pmullt z31.d, z30.s, z26.s"); + + COMPARE(smullb(z10.VnD(), z4.VnS(), z4.VnS()), "smullb z10.d, z4.s, z4.s"); + COMPARE(smullb(z11.VnH(), z14.VnB(), z14.VnB()), + "smullb z11.h, z14.b, z14.b"); + COMPARE(smullb(z12.VnS(), z24.VnH(), z24.VnH()), + "smullb z12.s, z24.h, z24.h"); + + COMPARE(smullt(z31.VnD(), z26.VnS(), z5.VnS()), "smullt z31.d, z26.s, z5.s"); + COMPARE(smullt(z21.VnH(), z16.VnB(), z5.VnB()), "smullt z21.h, z16.b, z5.b"); + COMPARE(smullt(z11.VnS(), z6.VnH(), z5.VnH()), "smullt z11.s, z6.h, z5.h"); + + COMPARE(umullb(z12.VnD(), z5.VnS(), z2.VnS()), "umullb z12.d, z5.s, z2.s"); + COMPARE(umullb(z12.VnH(), z15.VnB(), z12.VnB()), + "umullb z12.h, z15.b, z12.b"); + COMPARE(umullb(z12.VnS(), z25.VnH(), z22.VnH()), + "umullb z12.s, z25.h, z22.h"); + + COMPARE(umullt(z24.VnD(), z6.VnS(), z6.VnS()), "umullt z24.d, z6.s, z6.s"); + COMPARE(umullt(z24.VnH(), z7.VnB(), z16.VnB()), "umullt z24.h, z7.b, z16.b"); + COMPARE(umullt(z24.VnS(), z8.VnH(), z26.VnH()), "umullt z24.s, z8.h, z26.h"); + + CLEANUP(); +} + +TEST(sve2_xar) { + SETUP(); + + COMPARE_MACRO(Xar(z16.VnB(), z16.VnB(), z13.VnB(), 1), + "xar z16.b, z16.b, z13.b, #1"); + COMPARE_MACRO(Xar(z16.VnB(), z16.VnB(), z13.VnB(), 8), + "xar z16.b, z16.b, z13.b, #8"); + COMPARE_MACRO(Xar(z16.VnH(), z16.VnH(), z13.VnH(), 1), + "xar z16.h, z16.h, z13.h, #1"); + COMPARE_MACRO(Xar(z16.VnH(), z16.VnH(), z13.VnH(), 16), + "xar z16.h, z16.h, z13.h, #16"); + COMPARE_MACRO(Xar(z16.VnS(), z16.VnS(), z13.VnS(), 1), + "xar z16.s, z16.s, z13.s, #1"); + COMPARE_MACRO(Xar(z16.VnS(), z16.VnS(), z13.VnS(), 32), + "xar z16.s, z16.s, z13.s, #32"); + COMPARE_MACRO(Xar(z16.VnD(), z16.VnD(), z13.VnD(), 1), + "xar z16.d, z16.d, z13.d, #1"); + COMPARE_MACRO(Xar(z16.VnD(), z16.VnD(), z13.VnD(), 64), + "xar z16.d, z16.d, z13.d, #64"); + + COMPARE_MACRO(Xar(z16.VnD(), z13.VnD(), z16.VnD(), 64), + "xar z16.d, z16.d, z13.d, #64"); + COMPARE_MACRO(Xar(z16.VnD(), z13.VnD(), z12.VnD(), 64), + "movprfx z16, z13\n" + "xar z16.d, z16.d, z12.d, #64"); + COMPARE_MACRO(Xar(z16.VnD(), z16.VnD(), z16.VnD(), 64), + "xar z16.d, z16.d, z16.d, #64"); + + CLEANUP(); +} + +TEST(sve2_histogram) { + SETUP(); + + COMPARE_MACRO(Histcnt(z24.VnS(), p6.Zeroing(), z3.VnS(), z10.VnS()), + "histcnt z24.s, p6/z, z3.s, z10.s"); + COMPARE_MACRO(Histcnt(z24.VnD(), p6.Zeroing(), z3.VnD(), z10.VnD()), + "histcnt z24.d, p6/z, z3.d, z10.d"); + COMPARE_MACRO(Histseg(z22.VnB(), z14.VnB(), z8.VnB()), + "histseg z22.b, z14.b, z8.b"); + + CLEANUP(); +} + +TEST(sve2_table) { + SETUP(); + + COMPARE_MACRO(Tbl(z17.VnB(), z1.VnB(), z2.VnB(), z22.VnB()), + "tbl z17.b, {z3.b, z4.b}, z22.b"); + COMPARE_MACRO(Tbl(z17.VnD(), z1.VnD(), z2.VnD(), z22.VnD()), + "tbl z17.d, {z3.d, z4.d}, z22.d"); + COMPARE_MACRO(Tbl(z17.VnH(), z1.VnH(), z2.VnH(), z22.VnH()), + "tbl z17.h, {z3.h, z4.h}, z22.h"); + COMPARE_MACRO(Tbl(z17.VnS(), z31.VnS(), z0.VnS(), z22.VnS()), + "tbl z17.s, {z31.s, z0.s}, z22.s"); + COMPARE_MACRO(Tbx(z22.VnB(), z15.VnB(), z19.VnB()), + "tbx z22.b, z15.b, z19.b"); + COMPARE_MACRO(Tbx(z22.VnD(), z15.VnD(), z19.VnD()), + "tbx z22.d, z15.d, z19.d"); + COMPARE_MACRO(Tbx(z22.VnH(), z15.VnH(), z19.VnH()), + "tbx z22.h, z15.h, z19.h"); + COMPARE_MACRO(Tbx(z22.VnS(), z15.VnS(), z19.VnS()), + "tbx z22.s, z15.s, z19.s"); + + CLEANUP(); +} + +TEST(sve2_cdot) { + SETUP(); + + COMPARE_MACRO(Cdot(z7.VnS(), z7.VnS(), z4.VnB(), z10.VnB(), 0), + "cdot z7.s, z4.b, z10.b, #0"); + COMPARE_MACRO(Cdot(z7.VnD(), z7.VnD(), z4.VnH(), z10.VnH(), 0), + "cdot z7.d, z4.h, z10.h, #0"); + COMPARE_MACRO(Cdot(z7.VnS(), z7.VnS(), z4.VnB(), z10.VnB(), 90), + "cdot z7.s, z4.b, z10.b, #90"); + COMPARE_MACRO(Cdot(z7.VnD(), z7.VnD(), z4.VnH(), z10.VnH(), 90), + "cdot z7.d, z4.h, z10.h, #90"); + COMPARE_MACRO(Cdot(z7.VnS(), z7.VnS(), z4.VnB(), z10.VnB(), 180), + "cdot z7.s, z4.b, z10.b, #180"); + COMPARE_MACRO(Cdot(z7.VnD(), z7.VnD(), z4.VnH(), z10.VnH(), 180), + "cdot z7.d, z4.h, z10.h, #180"); + COMPARE_MACRO(Cdot(z7.VnS(), z7.VnS(), z4.VnB(), z10.VnB(), 270), + "cdot z7.s, z4.b, z10.b, #270"); + COMPARE_MACRO(Cdot(z7.VnD(), z7.VnD(), z4.VnH(), z10.VnH(), 270), + "cdot z7.d, z4.h, z10.h, #270"); + + COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z3.VnB(), 0), + "movprfx z0, z1\n" + "cdot z0.s, z2.b, z3.b, #0"); + COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z3.VnB(), 0), + "mov z31.d, z0.d\n" + "movprfx z0, z1\n" + "cdot z0.s, z31.b, z3.b, #0"); + COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z0.VnB(), 0), + "mov z31.d, z0.d\n" + "movprfx z0, z1\n" + "cdot z0.s, z2.b, z31.b, #0"); + COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z0.VnB(), 0), + "mov z31.d, z0.d\n" + "movprfx z0, z1\n" + "cdot z0.s, z31.b, z31.b, #0"); + + COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 0, 0), + "cdot z18.s, z26.b, z7.b[0], #0"); + COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 1, 0), + "cdot z18.s, z26.b, z7.b[1], #0"); + COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 0), + "cdot z18.s, z26.b, z7.b[2], #0"); + COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 3, 0), + "cdot z18.s, z26.b, z7.b[3], #0"); + COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 90), + "cdot z18.s, z26.b, z7.b[2], #90"); + COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 180), + "cdot z18.s, z26.b, z7.b[2], #180"); + COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 270), + "cdot z18.s, z26.b, z7.b[2], #270"); + COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 0, 0), + "cdot z5.d, z7.h, z1.h[0], #0"); + COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 0), + "cdot z5.d, z7.h, z1.h[1], #0"); + COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 90), + "cdot z5.d, z7.h, z1.h[1], #90"); + COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 180), + "cdot z5.d, z7.h, z1.h[1], #180"); + COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 270), + "cdot z5.d, z7.h, z1.h[1], #270"); + + COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z3.VnB(), 0, 0), + "movprfx z0, z1\n" + "cdot z0.s, z2.b, z3.b[0], #0"); + COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z3.VnB(), 1, 90), + "movprfx z31, z1\n" + "cdot z31.s, z0.b, z3.b[1], #90\n" + "mov z0.d, z31.d"); + COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z0.VnB(), 2, 180), + "movprfx z31, z1\n" + "cdot z31.s, z2.b, z0.b[2], #180\n" + "mov z0.d, z31.d"); + COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z0.VnB(), 3, 270), + "movprfx z31, z1\n" + "cdot z31.s, z0.b, z0.b[3], #270\n" + "mov z0.d, z31.d"); + + CLEANUP(); +} + +TEST(sve2_ldnt1) { + SETUP(); + + COMPARE_MACRO(Ldnt1b(z24.VnS(), p4.Zeroing(), SVEMemOperand(z18.VnS(), x13)), + "ldnt1b {z24.s}, p4/z, [z18.s, x13]"); + COMPARE_MACRO(Ldnt1h(z3.VnS(), p4.Zeroing(), SVEMemOperand(z15.VnS(), x14)), + "ldnt1h {z3.s}, p4/z, [z15.s, x14]"); + COMPARE_MACRO(Ldnt1sb(z7.VnS(), p3.Zeroing(), SVEMemOperand(z18.VnS(), x11)), + "ldnt1sb {z7.s}, p3/z, [z18.s, x11]"); + COMPARE_MACRO(Ldnt1sh(z17.VnS(), p5.Zeroing(), SVEMemOperand(z31.VnS(), x19)), + "ldnt1sh {z17.s}, p5/z, [z31.s, x19]"); + COMPARE_MACRO(Ldnt1w(z18.VnS(), p5.Zeroing(), SVEMemOperand(z9.VnS(), x17)), + "ldnt1w {z18.s}, p5/z, [z9.s, x17]"); + + COMPARE_MACRO(Ldnt1b(z27.VnD(), p4.Zeroing(), SVEMemOperand(z27.VnD(), x24)), + "ldnt1b {z27.d}, p4/z, [z27.d, x24]"); + COMPARE_MACRO(Ldnt1d(z25.VnD(), p0.Zeroing(), SVEMemOperand(z10.VnD(), x0)), + "ldnt1d {z25.d}, p0/z, [z10.d, x0]"); + COMPARE_MACRO(Ldnt1h(z16.VnD(), p2.Zeroing(), SVEMemOperand(z10.VnD(), x9)), + "ldnt1h {z16.d}, p2/z, [z10.d, x9]"); + COMPARE_MACRO(Ldnt1sb(z25.VnD(), p0.Zeroing(), SVEMemOperand(z0.VnD(), x3)), + "ldnt1sb {z25.d}, p0/z, [z0.d, x3]"); + COMPARE_MACRO(Ldnt1sh(z4.VnD(), p1.Zeroing(), SVEMemOperand(z31.VnD(), x4)), + "ldnt1sh {z4.d}, p1/z, [z31.d, x4]"); + COMPARE_MACRO(Ldnt1sw(z3.VnD(), p7.Zeroing(), SVEMemOperand(z1.VnD(), x10)), + "ldnt1sw {z3.d}, p7/z, [z1.d, x10]"); + COMPARE_MACRO(Ldnt1w(z17.VnD(), p5.Zeroing(), SVEMemOperand(z8.VnD(), x12)), + "ldnt1w {z17.d}, p5/z, [z8.d, x12]"); + + CLEANUP(); +} + +TEST(sve2_stnt1) { + SETUP(); + + COMPARE_MACRO(Stnt1b(z29.VnD(), p7, SVEMemOperand(z29.VnD(), x21)), + "stnt1b {z29.d}, p7, [z29.d, x21]"); + COMPARE_MACRO(Stnt1d(z19.VnD(), p4, SVEMemOperand(z3.VnD(), x16)), + "stnt1d {z19.d}, p4, [z3.d, x16]"); + COMPARE_MACRO(Stnt1h(z11.VnS(), p3, SVEMemOperand(z2.VnS(), x16)), + "stnt1h {z11.s}, p3, [z2.s, x16]"); + COMPARE_MACRO(Stnt1h(z3.VnD(), p3, SVEMemOperand(z10.VnD(), x16)), + "stnt1h {z3.d}, p3, [z10.d, x16]"); + COMPARE_MACRO(Stnt1w(z11.VnS(), p4, SVEMemOperand(z14.VnS(), x15)), + "stnt1w {z11.s}, p4, [z14.s, x15]"); + COMPARE_MACRO(Stnt1w(z7.VnD(), p0, SVEMemOperand(z11.VnD(), x10)), + "stnt1w {z7.d}, p0, [z11.d, x10]"); + + CLEANUP(); +} + +TEST(sve2_bitwise_ternary) { + SETUP(); + + COMPARE_MACRO(Bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD()), + "bcax z6.d, z6.d, z12.d, z1.d"); + COMPARE_MACRO(Bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD()), + "bsl z21.d, z21.d, z2.d, z2.d"); + COMPARE_MACRO(Bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD()), + "bsl1n z18.d, z18.d, z8.d, z7.d"); + COMPARE_MACRO(Bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD()), + "bsl2n z7.d, z7.d, z3.d, z19.d"); + COMPARE_MACRO(Eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD()), + "eor3 z10.d, z10.d, z24.d, z23.d"); + COMPARE_MACRO(Nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD()), + "nbsl z17.d, z17.d, z21.d, z27.d"); + + COMPARE_MACRO(Nbsl(z17.VnD(), z18.VnD(), z21.VnD(), z27.VnD()), + "movprfx z17, z18\n" + "nbsl z17.d, z17.d, z21.d, z27.d"); + COMPARE_MACRO(Nbsl(z17.VnD(), z18.VnD(), z17.VnD(), z27.VnD()), + "movprfx z31, z18\n" + "nbsl z31.d, z31.d, z17.d, z27.d\n" + "mov z17.d, z31.d"); + COMPARE_MACRO(Nbsl(z17.VnD(), z18.VnD(), z21.VnD(), z17.VnD()), + "movprfx z31, z18\n" + "nbsl z31.d, z31.d, z21.d, z17.d\n" + "mov z17.d, z31.d"); + COMPARE_MACRO(Nbsl(z17.VnD(), z18.VnD(), z17.VnD(), z17.VnD()), + "movprfx z31, z18\n" + "nbsl z31.d, z31.d, z17.d, z17.d\n" + "mov z17.d, z31.d"); + + CLEANUP(); +} + +TEST(sve2_int_compare_scalars) { + SETUP(); + + COMPARE_MACRO(Whilege(p0.VnB(), w20, w29), "whilege p0.b, w20, w29"); + COMPARE_MACRO(Whilege(p0.VnB(), x20, x29), "whilege p0.b, x20, x29"); + COMPARE_MACRO(Whilege(p0.VnD(), w20, w29), "whilege p0.d, w20, w29"); + COMPARE_MACRO(Whilege(p0.VnD(), x20, x29), "whilege p0.d, x20, x29"); + COMPARE_MACRO(Whilege(p0.VnH(), w20, w29), "whilege p0.h, w20, w29"); + COMPARE_MACRO(Whilege(p0.VnH(), x20, x29), "whilege p0.h, x20, x29"); + COMPARE_MACRO(Whilege(p0.VnS(), w20, w29), "whilege p0.s, w20, w29"); + COMPARE_MACRO(Whilege(p0.VnS(), x20, x29), "whilege p0.s, x20, x29"); + COMPARE_MACRO(Whilegt(p11.VnB(), w24, w3), "whilegt p11.b, w24, w3"); + COMPARE_MACRO(Whilegt(p11.VnD(), w24, w3), "whilegt p11.d, w24, w3"); + COMPARE_MACRO(Whilegt(p11.VnH(), x24, x3), "whilegt p11.h, x24, x3"); + COMPARE_MACRO(Whilegt(p11.VnS(), x24, x3), "whilegt p11.s, x24, x3"); + COMPARE_MACRO(Whilehi(p2.VnB(), x20, x8), "whilehi p2.b, x20, x8"); + COMPARE_MACRO(Whilehi(p2.VnD(), x20, x8), "whilehi p2.d, x20, x8"); + COMPARE_MACRO(Whilehi(p2.VnH(), w20, w8), "whilehi p2.h, w20, w8"); + COMPARE_MACRO(Whilehi(p2.VnS(), w20, w8), "whilehi p2.s, w20, w8"); + COMPARE_MACRO(Whilehs(p4.VnB(), w22, w9), "whilehs p4.b, w22, w9"); + COMPARE_MACRO(Whilehs(p4.VnD(), x22, x9), "whilehs p4.d, x22, x9"); + COMPARE_MACRO(Whilehs(p4.VnH(), w22, w9), "whilehs p4.h, w22, w9"); + COMPARE_MACRO(Whilehs(p4.VnS(), x22, x9), "whilehs p4.s, x22, x9"); + + COMPARE_MACRO(Whilerw(p7.VnB(), x25, x27), "whilerw p7.b, x25, x27"); + COMPARE_MACRO(Whilerw(p7.VnD(), x25, x28), "whilerw p7.d, x25, x28"); + COMPARE_MACRO(Whilerw(p7.VnH(), x25, x29), "whilerw p7.h, x25, x29"); + COMPARE_MACRO(Whilerw(p7.VnS(), x25, x30), "whilerw p7.s, x25, x30"); + COMPARE_MACRO(Whilerw(p7.VnS(), x25, xzr), "whilerw p7.s, x25, xzr"); + COMPARE_MACRO(Whilewr(p8.VnB(), x14, x14), "whilewr p8.b, x14, x14"); + COMPARE_MACRO(Whilewr(p8.VnD(), x14, x13), "whilewr p8.d, x14, x13"); + COMPARE_MACRO(Whilewr(p8.VnH(), x14, x12), "whilewr p8.h, x14, x12"); + COMPARE_MACRO(Whilewr(p8.VnS(), x14, x11), "whilewr p8.s, x14, x11"); + COMPARE_MACRO(Whilewr(p8.VnS(), xzr, x11), "whilewr p8.s, xzr, x11"); + + CLEANUP(); +} + +TEST(sve2_splice) { + SETUP(); + + COMPARE_MACRO(Splice(z31.VnB(), p0, z21.VnB(), z22.VnB()), + "splice z31.b, p0, {z21.b, z22.b}"); + COMPARE_MACRO(Splice(z31.VnD(), p0, z21.VnD(), z22.VnD()), + "splice z31.d, p0, {z21.d, z22.d}"); + COMPARE_MACRO(Splice(z31.VnH(), p0, z21.VnH(), z22.VnH()), + "splice z31.h, p0, {z21.h, z22.h}"); + COMPARE_MACRO(Splice(z31.VnS(), p0, z31.VnS(), z0.VnS()), + "splice z31.s, p0, z31.s, z0.s"); + COMPARE_MACRO(Splice(z30.VnS(), p0, z31.VnS(), z0.VnS()), + "splice z30.s, p0, {z31.s, z0.s}"); + + CLEANUP(); +} + +TEST(sve2_mul_index) { + SETUP(); + + COMPARE_MACRO(Mul(z18.VnH(), z5.VnH(), z7.VnH(), 0), + "mul z18.h, z5.h, z7.h[0]"); + COMPARE_MACRO(Mul(z18.VnH(), z5.VnH(), z2.VnH(), 2), + "mul z18.h, z5.h, z2.h[2]"); + COMPARE_MACRO(Mul(z18.VnH(), z5.VnH(), z2.VnH(), 6), + "mul z18.h, z5.h, z2.h[6]"); + COMPARE_MACRO(Mul(z18.VnH(), z5.VnH(), z2.VnH(), 7), + "mul z18.h, z5.h, z2.h[7]"); + COMPARE_MACRO(Mul(z8.VnS(), z15.VnS(), z7.VnS(), 0), + "mul z8.s, z15.s, z7.s[0]"); + COMPARE_MACRO(Mul(z8.VnS(), z15.VnS(), z0.VnS(), 3), + "mul z8.s, z15.s, z0.s[3]"); + COMPARE_MACRO(Mul(z8.VnD(), z15.VnD(), z15.VnD(), 0), + "mul z8.d, z15.d, z15.d[0]"); + COMPARE_MACRO(Mul(z8.VnD(), z15.VnD(), z0.VnD(), 1), + "mul z8.d, z15.d, z0.d[1]"); + + CLEANUP(); +} + +TEST(sve2_mla_mls_index) { + SETUP(); + + COMPARE_MACRO(Mla(z1.VnH(), z1.VnH(), z9.VnH(), z0.VnH(), 0), + "mla z1.h, z9.h, z0.h[0]"); + COMPARE_MACRO(Mla(z1.VnH(), z1.VnH(), z9.VnH(), z1.VnH(), 2), + "mla z1.h, z9.h, z1.h[2]"); + COMPARE_MACRO(Mla(z1.VnH(), z1.VnH(), z9.VnH(), z2.VnH(), 6), + "mla z1.h, z9.h, z2.h[6]"); + COMPARE_MACRO(Mla(z1.VnH(), z1.VnH(), z9.VnH(), z3.VnH(), 7), + "mla z1.h, z9.h, z3.h[7]"); + COMPARE_MACRO(Mla(z10.VnS(), z10.VnS(), z22.VnS(), z7.VnS(), 0), + "mla z10.s, z22.s, z7.s[0]"); + COMPARE_MACRO(Mla(z10.VnS(), z10.VnS(), z22.VnS(), z0.VnS(), 3), + "mla z10.s, z22.s, z0.s[3]"); + COMPARE_MACRO(Mla(z4.VnD(), z4.VnD(), z0.VnD(), z15.VnD(), 0), + "mla z4.d, z0.d, z15.d[0]"); + COMPARE_MACRO(Mla(z4.VnD(), z4.VnD(), z0.VnD(), z0.VnD(), 1), + "mla z4.d, z0.d, z0.d[1]"); + + COMPARE_MACRO(Mla(z4.VnH(), z5.VnH(), z0.VnH(), z1.VnH(), 0), + "movprfx z4, z5\n" + "mla z4.h, z0.h, z1.h[0]"); + COMPARE_MACRO(Mla(z4.VnH(), z5.VnH(), z4.VnH(), z1.VnH(), 0), + "movprfx z31, z5\n" + "mla z31.h, z4.h, z1.h[0]\n" + "mov z4.d, z31.d"); + COMPARE_MACRO(Mla(z4.VnH(), z5.VnH(), z0.VnH(), z4.VnH(), 0), + "movprfx z31, z5\n" + "mla z31.h, z0.h, z4.h[0]\n" + "mov z4.d, z31.d"); + COMPARE_MACRO(Mla(z4.VnH(), z5.VnH(), z4.VnH(), z4.VnH(), 0), + "movprfx z31, z5\n" + "mla z31.h, z4.h, z4.h[0]\n" + "mov z4.d, z31.d"); + + COMPARE_MACRO(Mls(z1.VnH(), z1.VnH(), z9.VnH(), z0.VnH(), 0), + "mls z1.h, z9.h, z0.h[0]"); + COMPARE_MACRO(Mls(z1.VnH(), z1.VnH(), z9.VnH(), z1.VnH(), 2), + "mls z1.h, z9.h, z1.h[2]"); + COMPARE_MACRO(Mls(z1.VnH(), z1.VnH(), z9.VnH(), z2.VnH(), 6), + "mls z1.h, z9.h, z2.h[6]"); + COMPARE_MACRO(Mls(z1.VnH(), z1.VnH(), z9.VnH(), z3.VnH(), 7), + "mls z1.h, z9.h, z3.h[7]"); + COMPARE_MACRO(Mls(z10.VnS(), z10.VnS(), z22.VnS(), z7.VnS(), 0), + "mls z10.s, z22.s, z7.s[0]"); + COMPARE_MACRO(Mls(z10.VnS(), z10.VnS(), z22.VnS(), z0.VnS(), 3), + "mls z10.s, z22.s, z0.s[3]"); + COMPARE_MACRO(Mls(z4.VnD(), z4.VnD(), z0.VnD(), z15.VnD(), 0), + "mls z4.d, z0.d, z15.d[0]"); + COMPARE_MACRO(Mls(z4.VnD(), z4.VnD(), z0.VnD(), z0.VnD(), 1), + "mls z4.d, z0.d, z0.d[1]"); + + COMPARE_MACRO(Mls(z4.VnS(), z5.VnS(), z0.VnS(), z1.VnS(), 0), + "movprfx z4, z5\n" + "mls z4.s, z0.s, z1.s[0]"); + COMPARE_MACRO(Mls(z4.VnS(), z5.VnS(), z4.VnS(), z1.VnS(), 0), + "movprfx z31, z5\n" + "mls z31.s, z4.s, z1.s[0]\n" + "mov z4.d, z31.d"); + COMPARE_MACRO(Mls(z4.VnS(), z5.VnS(), z0.VnS(), z4.VnS(), 0), + "movprfx z31, z5\n" + "mls z31.s, z0.s, z4.s[0]\n" + "mov z4.d, z31.d"); + COMPARE_MACRO(Mls(z4.VnS(), z5.VnS(), z4.VnS(), z4.VnS(), 0), + "movprfx z31, z5\n" + "mls z31.s, z4.s, z4.s[0]\n" + "mov z4.d, z31.d"); + + CLEANUP(); +} + +TEST(sve2_mla_long) { + SETUP(); + + COMPARE_MACRO(Smlalb(z1.VnD(), z1.VnD(), z3.VnS(), z23.VnS()), + "smlalb z1.d, z3.s, z23.s"); + COMPARE_MACRO(Smlalb(z1.VnH(), z1.VnH(), z3.VnB(), z23.VnB()), + "smlalb z1.h, z3.b, z23.b"); + COMPARE_MACRO(Smlalb(z1.VnS(), z1.VnS(), z3.VnH(), z23.VnH()), + "smlalb z1.s, z3.h, z23.h"); + COMPARE_MACRO(Smlalt(z31.VnD(), z31.VnD(), z24.VnS(), z29.VnS()), + "smlalt z31.d, z24.s, z29.s"); + COMPARE_MACRO(Smlalt(z31.VnH(), z31.VnH(), z24.VnB(), z29.VnB()), + "smlalt z31.h, z24.b, z29.b"); + COMPARE_MACRO(Smlalt(z31.VnS(), z31.VnS(), z24.VnH(), z29.VnH()), + "smlalt z31.s, z24.h, z29.h"); + COMPARE_MACRO(Smlslb(z5.VnD(), z5.VnD(), z26.VnS(), z27.VnS()), + "smlslb z5.d, z26.s, z27.s"); + COMPARE_MACRO(Smlslb(z5.VnH(), z5.VnH(), z26.VnB(), z27.VnB()), + "smlslb z5.h, z26.b, z27.b"); + COMPARE_MACRO(Smlslb(z5.VnS(), z5.VnS(), z26.VnH(), z27.VnH()), + "smlslb z5.s, z26.h, z27.h"); + COMPARE_MACRO(Smlslt(z23.VnD(), z23.VnD(), z24.VnS(), z25.VnS()), + "smlslt z23.d, z24.s, z25.s"); + COMPARE_MACRO(Smlslt(z23.VnH(), z23.VnH(), z24.VnB(), z25.VnB()), + "smlslt z23.h, z24.b, z25.b"); + COMPARE_MACRO(Smlslt(z23.VnS(), z23.VnS(), z24.VnH(), z25.VnH()), + "smlslt z23.s, z24.h, z25.h"); + COMPARE_MACRO(Umlalb(z31.VnD(), z31.VnD(), z9.VnS(), z21.VnS()), + "umlalb z31.d, z9.s, z21.s"); + COMPARE_MACRO(Umlalb(z31.VnH(), z31.VnH(), z9.VnB(), z21.VnB()), + "umlalb z31.h, z9.b, z21.b"); + COMPARE_MACRO(Umlalb(z31.VnS(), z31.VnS(), z9.VnH(), z21.VnH()), + "umlalb z31.s, z9.h, z21.h"); + COMPARE_MACRO(Umlalt(z11.VnD(), z11.VnD(), z5.VnS(), z22.VnS()), + "umlalt z11.d, z5.s, z22.s"); + COMPARE_MACRO(Umlalt(z11.VnH(), z11.VnH(), z5.VnB(), z22.VnB()), + "umlalt z11.h, z5.b, z22.b"); + COMPARE_MACRO(Umlalt(z11.VnS(), z11.VnS(), z5.VnH(), z22.VnH()), + "umlalt z11.s, z5.h, z22.h"); + COMPARE_MACRO(Umlslb(z28.VnD(), z28.VnD(), z13.VnS(), z9.VnS()), + "umlslb z28.d, z13.s, z9.s"); + COMPARE_MACRO(Umlslb(z28.VnH(), z28.VnH(), z13.VnB(), z9.VnB()), + "umlslb z28.h, z13.b, z9.b"); + COMPARE_MACRO(Umlslb(z28.VnS(), z28.VnS(), z13.VnH(), z9.VnH()), + "umlslb z28.s, z13.h, z9.h"); + COMPARE_MACRO(Umlslt(z9.VnD(), z9.VnD(), z12.VnS(), z30.VnS()), + "umlslt z9.d, z12.s, z30.s"); + COMPARE_MACRO(Umlslt(z9.VnH(), z9.VnH(), z12.VnB(), z30.VnB()), + "umlslt z9.h, z12.b, z30.b"); + COMPARE_MACRO(Umlslt(z9.VnS(), z9.VnS(), z12.VnH(), z30.VnH()), + "umlslt z9.s, z12.h, z30.h"); + + COMPARE_MACRO(Smlalt(z0.VnD(), z1.VnD(), z2.VnS(), z3.VnS()), + "movprfx z0, z1\n" + "smlalt z0.d, z2.s, z3.s"); + COMPARE_MACRO(Smlalt(z0.VnD(), z1.VnD(), z0.VnS(), z3.VnS()), + "movprfx z31, z1\n" + "smlalt z31.d, z0.s, z3.s\n" + "mov z0.d, z31.d"); + COMPARE_MACRO(Smlalt(z0.VnD(), z1.VnD(), z2.VnS(), z0.VnS()), + "movprfx z31, z1\n" + "smlalt z31.d, z2.s, z0.s\n" + "mov z0.d, z31.d"); + COMPARE_MACRO(Smlalt(z0.VnD(), z1.VnD(), z0.VnS(), z0.VnS()), + "movprfx z31, z1\n" + "smlalt z31.d, z0.s, z0.s\n" + "mov z0.d, z31.d"); + + CLEANUP(); +} + +TEST(sve2_complex_integer_multiply_add) { + SETUP(); + + COMPARE(sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0), + "sqrdcmlah z31.b, z15.b, z20.b, #0"); + COMPARE(sqrdcmlah(z31.VnD(), z15.VnD(), z20.VnD(), 90), + "sqrdcmlah z31.d, z15.d, z20.d, #90"); + COMPARE(sqrdcmlah(z31.VnH(), z15.VnH(), z20.VnH(), 180), + "sqrdcmlah z31.h, z15.h, z20.h, #180"); + COMPARE(sqrdcmlah(z31.VnS(), z15.VnS(), z20.VnS(), 270), + "sqrdcmlah z31.s, z15.s, z20.s, #270"); + + COMPARE(sqrdcmlah(z14.VnS(), z11.VnS(), z8.VnS(), 1, 0), + "sqrdcmlah z14.s, z11.s, z8.s[1], #0"); + COMPARE(sqrdcmlah(z31.VnH(), z2.VnH(), z3.VnH(), 2, 180), + "sqrdcmlah z31.h, z2.h, z3.h[2], #180"); + + COMPARE_MACRO(Sqrdcmlah(z0.VnB(), z1.VnB(), z0.VnB(), z3.VnB(), 0), + "mov z31.d, z0.d\n" + "movprfx z0, z1\n" + "sqrdcmlah z0.b, z31.b, z3.b, #0"); + COMPARE_MACRO(Sqrdcmlah(z0.VnH(), z1.VnH(), z2.VnH(), z0.VnH(), 90), + "mov z31.d, z0.d\n" + "movprfx z0, z1\n" + "sqrdcmlah z0.h, z2.h, z31.h, #90"); + COMPARE_MACRO(Sqrdcmlah(z0.VnS(), z1.VnS(), z0.VnS(), z0.VnS(), 0, 180), + "movprfx z31, z1\n" + "sqrdcmlah z31.s, z0.s, z0.s[0], #180\n" + "mov z0.d, z31.d"); + COMPARE_MACRO(Sqrdcmlah(z5.VnH(), z1.VnH(), z2.VnH(), z5.VnH(), 3, 270), + "movprfx z31, z1\n" + "sqrdcmlah z31.h, z2.h, z5.h[3], #270\n" + "mov z5.d, z31.d"); + COMPARE_MACRO(Sqrdcmlah(z3.VnH(), z3.VnH(), z3.VnH(), z3.VnH(), 2, 90), + "sqrdcmlah z3.h, z3.h, z3.h[2], #90"); + + COMPARE(cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0), "cmla z19.b, z7.b, z2.b, #0"); + COMPARE(cmla(z19.VnD(), z7.VnD(), z2.VnD(), 90), + "cmla z19.d, z7.d, z2.d, #90"); + COMPARE(cmla(z19.VnH(), z7.VnH(), z2.VnH(), 180), + "cmla z19.h, z7.h, z2.h, #180"); + COMPARE(cmla(z19.VnS(), z7.VnS(), z2.VnS(), 270), + "cmla z19.s, z7.s, z2.s, #270"); + + COMPARE_MACRO(Cmla(z0.VnB(), z1.VnB(), z0.VnB(), z3.VnB(), 0), + "mov z31.d, z0.d\n" + "movprfx z0, z1\n" + "cmla z0.b, z31.b, z3.b, #0"); + COMPARE_MACRO(Cmla(z0.VnH(), z1.VnH(), z2.VnH(), z0.VnH(), 90), + "mov z31.d, z0.d\n" + "movprfx z0, z1\n" + "cmla z0.h, z2.h, z31.h, #90"); + COMPARE_MACRO(Cmla(z0.VnS(), z1.VnS(), z0.VnS(), z0.VnS(), 180), + "mov z31.d, z0.d\n" + "movprfx z0, z1\n" + "cmla z0.s, z31.s, z31.s, #180"); + COMPARE_MACRO(Cmla(z0.VnD(), z1.VnD(), z2.VnD(), z0.VnD(), 270), + "mov z31.d, z0.d\n" + "movprfx z0, z1\n" + "cmla z0.d, z2.d, z31.d, #270"); + + COMPARE_MACRO(Cmla(z17.VnS(), z17.VnS(), z29.VnS(), z0.VnS(), 1, 0), + "cmla z17.s, z29.s, z0.s[1], #0"); + COMPARE_MACRO(Cmla(z17.VnS(), z17.VnS(), z29.VnS(), z1.VnS(), 0, 0), + "cmla z17.s, z29.s, z1.s[0], #0"); + COMPARE_MACRO(Cmla(z17.VnS(), z17.VnS(), z29.VnS(), z8.VnS(), 1, 90), + "cmla z17.s, z29.s, z8.s[1], #90"); + COMPARE_MACRO(Cmla(z17.VnS(), z17.VnS(), z29.VnS(), z15.VnS(), 0, 180), + "cmla z17.s, z29.s, z15.s[0], #180"); + COMPARE_MACRO(Cmla(z18.VnH(), z18.VnH(), z22.VnH(), z0.VnH(), 3, 0), + "cmla z18.h, z22.h, z0.h[3], #0"); + COMPARE_MACRO(Cmla(z18.VnH(), z18.VnH(), z22.VnH(), z1.VnH(), 2, 0), + "cmla z18.h, z22.h, z1.h[2], #0"); + COMPARE_MACRO(Cmla(z18.VnH(), z18.VnH(), z22.VnH(), z4.VnH(), 1, 270), + "cmla z18.h, z22.h, z4.h[1], #270"); + COMPARE_MACRO(Cmla(z18.VnH(), z18.VnH(), z22.VnH(), z7.VnH(), 0, 90), + "cmla z18.h, z22.h, z7.h[0], #90"); + + COMPARE_MACRO(Cmla(z1.VnH(), z19.VnH(), z22.VnH(), z7.VnH(), 0, 90), + "movprfx z1, z19\n" + "cmla z1.h, z22.h, z7.h[0], #90"); + COMPARE_MACRO(Cmla(z1.VnH(), z19.VnH(), z1.VnH(), z7.VnH(), 0, 90), + "movprfx z31, z19\n" + "cmla z31.h, z1.h, z7.h[0], #90\n" + "mov z1.d, z31.d"); + COMPARE_MACRO(Cmla(z1.VnH(), z19.VnH(), z22.VnH(), z1.VnH(), 0, 90), + "movprfx z31, z19\n" + "cmla z31.h, z22.h, z1.h[0], #90\n" + "mov z1.d, z31.d"); + COMPARE_MACRO(Cmla(z1.VnH(), z19.VnH(), z1.VnH(), z1.VnH(), 0, 90), + "movprfx z31, z19\n" + "cmla z31.h, z1.h, z1.h[0], #90\n" + "mov z1.d, z31.d"); + + CLEANUP(); +} + +TEST(sve2_saturating_multiply_add_long) { + SETUP(); + + COMPARE(sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS()), + "sqdmlalb z6.d, z19.s, z25.s"); + COMPARE(sqdmlalb(z6.VnH(), z19.VnB(), z25.VnB()), + "sqdmlalb z6.h, z19.b, z25.b"); + COMPARE(sqdmlalb(z6.VnS(), z19.VnH(), z25.VnH()), + "sqdmlalb z6.s, z19.h, z25.h"); + COMPARE(sqdmlalt(z11.VnD(), z0.VnS(), z10.VnS()), + "sqdmlalt z11.d, z0.s, z10.s"); + COMPARE(sqdmlalt(z11.VnH(), z0.VnB(), z10.VnB()), + "sqdmlalt z11.h, z0.b, z10.b"); + COMPARE(sqdmlalt(z11.VnS(), z0.VnH(), z10.VnH()), + "sqdmlalt z11.s, z0.h, z10.h"); + COMPARE(sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS()), + "sqdmlslb z16.d, z26.s, z25.s"); + COMPARE(sqdmlslb(z16.VnH(), z26.VnB(), z25.VnB()), + "sqdmlslb z16.h, z26.b, z25.b"); + COMPARE(sqdmlslb(z16.VnS(), z26.VnH(), z25.VnH()), + "sqdmlslb z16.s, z26.h, z25.h"); + COMPARE(sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS()), + "sqdmlslt z21.d, z23.s, z9.s"); + COMPARE(sqdmlslt(z21.VnH(), z23.VnB(), z9.VnB()), + "sqdmlslt z21.h, z23.b, z9.b"); + COMPARE(sqdmlslt(z21.VnS(), z23.VnH(), z9.VnH()), + "sqdmlslt z21.s, z23.h, z9.h"); + + COMPARE(sqdmlalb(z1.VnD(), z27.VnS(), z11.VnS(), 0), + "sqdmlalb z1.d, z27.s, z11.s[0]"); + COMPARE(sqdmlalb(z30.VnS(), z6.VnH(), z3.VnH(), 0), + "sqdmlalb z30.s, z6.h, z3.h[0]"); + COMPARE(sqdmlalt(z30.VnD(), z25.VnS(), z15.VnS(), 1), + "sqdmlalt z30.d, z25.s, z15.s[1]"); + COMPARE(sqdmlalt(z10.VnS(), z1.VnH(), z1.VnH(), 3), + "sqdmlalt z10.s, z1.h, z1.h[3]"); + COMPARE(sqdmlslb(z15.VnD(), z27.VnS(), z15.VnS(), 2), + "sqdmlslb z15.d, z27.s, z15.s[2]"); + COMPARE(sqdmlslb(z5.VnS(), z5.VnH(), z7.VnH(), 6), + "sqdmlslb z5.s, z5.h, z7.h[6]"); + COMPARE(sqdmlslt(z21.VnD(), z28.VnS(), z13.VnS(), 3), + "sqdmlslt z21.d, z28.s, z13.s[3]"); + COMPARE(sqdmlslt(z5.VnS(), z3.VnH(), z1.VnH(), 7), + "sqdmlslt z5.s, z3.h, z1.h[7]"); + + COMPARE_MACRO(Sqdmlalb(z6.VnD(), z16.VnD(), z19.VnS(), z25.VnS()), + "movprfx z6, z16\n" + "sqdmlalb z6.d, z19.s, z25.s"); + COMPARE_MACRO(Sqdmlalt(z4.VnH(), z26.VnH(), z4.VnB(), z24.VnB()), + "movprfx z31, z26\n" + "sqdmlalt z31.h, z4.b, z24.b\n" + "mov z4.d, z31.d"); + COMPARE_MACRO(Sqdmlslb(z2.VnS(), z6.VnS(), z17.VnH(), z2.VnH()), + "movprfx z31, z6\n" + "sqdmlslb z31.s, z17.h, z2.h\n" + "mov z2.d, z31.d"); + COMPARE_MACRO(Sqdmlslt(z0.VnD(), z1.VnD(), z0.VnS(), z0.VnS()), + "movprfx z31, z1\n" + "sqdmlslt z31.d, z0.s, z0.s\n" + "mov z0.d, z31.d"); + + COMPARE_MACRO(Sqdmlalb(z6.VnD(), z16.VnD(), z9.VnS(), z15.VnS(), 0), + "movprfx z6, z16\n" + "sqdmlalb z6.d, z9.s, z15.s[0]"); + COMPARE_MACRO(Sqdmlalt(z4.VnS(), z6.VnS(), z4.VnH(), z4.VnH(), 3), + "movprfx z31, z6\n" + "sqdmlalt z31.s, z4.h, z4.h[3]\n" + "mov z4.d, z31.d"); + COMPARE_MACRO(Sqdmlslb(z2.VnS(), z16.VnS(), z17.VnH(), z2.VnH(), 6), + "movprfx z31, z16\n" + "sqdmlslb z31.s, z17.h, z2.h[6]\n" + "mov z2.d, z31.d"); + COMPARE_MACRO(Sqdmlslt(z6.VnD(), z1.VnD(), z6.VnS(), z6.VnS(), 2), + "movprfx z31, z1\n" + "sqdmlslt z31.d, z6.s, z6.s[2]\n" + "mov z6.d, z31.d"); + + CLEANUP(); +} + +TEST(sve2_saturating_multiply_add_interleaved_long) { + SETUP(); + + COMPARE(sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS()), + "sqdmlalbt z23.d, z29.s, z26.s"); + COMPARE(sqdmlalbt(z23.VnH(), z29.VnB(), z26.VnB()), + "sqdmlalbt z23.h, z29.b, z26.b"); + COMPARE(sqdmlalbt(z23.VnS(), z29.VnH(), z26.VnH()), + "sqdmlalbt z23.s, z29.h, z26.h"); + COMPARE(sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS()), + "sqdmlslbt z26.d, z23.s, z4.s"); + COMPARE(sqdmlslbt(z26.VnH(), z23.VnB(), z4.VnB()), + "sqdmlslbt z26.h, z23.b, z4.b"); + COMPARE(sqdmlslbt(z26.VnS(), z23.VnH(), z4.VnH()), + "sqdmlslbt z26.s, z23.h, z4.h"); + + COMPARE_MACRO(Sqdmlalbt(z29.VnD(), z0.VnD(), z29.VnS(), z26.VnS()), + "movprfx z31, z0\n" + "sqdmlalbt z31.d, z29.s, z26.s\n" + "mov z29.d, z31.d"); + COMPARE_MACRO(Sqdmlalbt(z26.VnH(), z0.VnH(), z29.VnB(), z26.VnB()), + "movprfx z31, z0\n" + "sqdmlalbt z31.h, z29.b, z26.b\n" + "mov z26.d, z31.d"); + COMPARE_MACRO(Sqdmlslbt(z23.VnS(), z31.VnS(), z26.VnH(), z29.VnH()), + "movprfx z23, z31\n" + "sqdmlslbt z23.s, z26.h, z29.h"); + COMPARE_MACRO(Sqdmlslbt(z4.VnD(), z31.VnD(), z4.VnS(), z4.VnS()), + "sqdmlslbt z31.d, z4.s, z4.s\n" + "mov z4.d, z31.d"); + + CLEANUP(); +} + +TEST(sve2_floating_multiply_add_long_vector) { + SETUP(); + + COMPARE(fmlalb(z16.VnS(), z18.VnH(), z29.VnH()), + "fmlalb z16.s, z18.h, z29.h"); + COMPARE(fmlalb(z3.VnS(), z8.VnH(), z7.VnH()), "fmlalb z3.s, z8.h, z7.h"); + COMPARE(fmlalt(z18.VnS(), z13.VnH(), z5.VnH()), "fmlalt z18.s, z13.h, z5.h"); + COMPARE(fmlalt(z18.VnS(), z7.VnH(), z16.VnH()), "fmlalt z18.s, z7.h, z16.h"); + COMPARE(fmlslb(z16.VnS(), z10.VnH(), z1.VnH()), "fmlslb z16.s, z10.h, z1.h"); + COMPARE(fmlslb(z25.VnS(), z11.VnH(), z0.VnH()), "fmlslb z25.s, z11.h, z0.h"); + COMPARE(fmlslt(z3.VnS(), z17.VnH(), z14.VnH()), "fmlslt z3.s, z17.h, z14.h"); + COMPARE(fmlslt(z5.VnS(), z1.VnH(), z7.VnH()), "fmlslt z5.s, z1.h, z7.h"); + + CLEANUP(); +} + +TEST(sve2_mla_long_index) { + SETUP(); + + COMPARE_MACRO(Smlalb(z11.VnD(), z11.VnD(), z29.VnS(), z0.VnS(), 3), + "smlalb z11.d, z29.s, z0.s[3]"); + COMPARE_MACRO(Smlalb(z18.VnS(), z18.VnS(), z17.VnH(), z0.VnH(), 7), + "smlalb z18.s, z17.h, z0.h[7]"); + COMPARE_MACRO(Smlalt(z10.VnD(), z10.VnD(), z30.VnS(), z15.VnS(), 0), + "smlalt z10.d, z30.s, z15.s[0]"); + COMPARE_MACRO(Smlalt(z23.VnS(), z23.VnS(), z31.VnH(), z7.VnH(), 0), + "smlalt z23.s, z31.h, z7.h[0]"); + COMPARE_MACRO(Smlslb(z12.VnD(), z12.VnD(), z23.VnS(), z3.VnS(), 1), + "smlslb z12.d, z23.s, z3.s[1]"); + COMPARE_MACRO(Smlslb(z5.VnS(), z5.VnS(), z4.VnH(), z4.VnH(), 2), + "smlslb z5.s, z4.h, z4.h[2]"); + COMPARE_MACRO(Smlslt(z7.VnD(), z7.VnD(), z9.VnS(), z6.VnS(), 3), + "smlslt z7.d, z9.s, z6.s[3]"); + COMPARE_MACRO(Smlslt(z9.VnS(), z9.VnS(), z21.VnH(), z3.VnH(), 4), + "smlslt z9.s, z21.h, z3.h[4]"); + COMPARE_MACRO(Umlalb(z9.VnD(), z9.VnD(), z1.VnS(), z11.VnS(), 0), + "umlalb z9.d, z1.s, z11.s[0]"); + COMPARE_MACRO(Umlalb(z9.VnS(), z9.VnS(), z5.VnH(), z1.VnH(), 6), + "umlalb z9.s, z5.h, z1.h[6]"); + COMPARE_MACRO(Umlalt(z6.VnD(), z6.VnD(), z17.VnS(), z14.VnS(), 1), + "umlalt z6.d, z17.s, z14.s[1]"); + COMPARE_MACRO(Umlalt(z9.VnS(), z9.VnS(), z11.VnH(), z3.VnH(), 7), + "umlalt z9.s, z11.h, z3.h[7]"); + COMPARE_MACRO(Umlslb(z12.VnD(), z12.VnD(), z15.VnS(), z9.VnS(), 2), + "umlslb z12.d, z15.s, z9.s[2]"); + COMPARE_MACRO(Umlslb(z14.VnS(), z14.VnS(), z10.VnH(), z2.VnH(), 0), + "umlslb z14.s, z10.h, z2.h[0]"); + COMPARE_MACRO(Umlslt(z12.VnD(), z12.VnD(), z28.VnS(), z8.VnS(), 3), + "umlslt z12.d, z28.s, z8.s[3]"); + COMPARE_MACRO(Umlslt(z24.VnS(), z24.VnS(), z12.VnH(), z6.VnH(), 1), + "umlslt z24.s, z12.h, z6.h[1]"); + + COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z12.VnH(), z6.VnH(), 1), + "movprfx z2, z23\n" + "umlslt z2.s, z12.h, z6.h[1]"); + COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z2.VnH(), z6.VnH(), 1), + "movprfx z31, z23\n" + "umlslt z31.s, z2.h, z6.h[1]\n" + "mov z2.d, z31.d"); + COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z12.VnH(), z2.VnH(), 1), + "movprfx z31, z23\n" + "umlslt z31.s, z12.h, z2.h[1]\n" + "mov z2.d, z31.d"); + COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z2.VnH(), z2.VnH(), 1), + "movprfx z31, z23\n" + "umlslt z31.s, z2.h, z2.h[1]\n" + "mov z2.d, z31.d"); + + CLEANUP(); +} + +TEST(sve2_mul_long_index) { + SETUP(); + + COMPARE_MACRO(Smullb(z13.VnS(), z31.VnH(), z0.VnH(), 0), + "smullb z13.s, z31.h, z0.h[0]"); + COMPARE_MACRO(Smullb(z8.VnD(), z22.VnS(), z0.VnS(), 0), + "smullb z8.d, z22.s, z0.s[0]"); + COMPARE_MACRO(Smullt(z14.VnS(), z30.VnH(), z7.VnH(), 7), + "smullt z14.s, z30.h, z7.h[7]"); + COMPARE_MACRO(Smullt(z22.VnD(), z28.VnS(), z15.VnS(), 3), + "smullt z22.d, z28.s, z15.s[3]"); + COMPARE_MACRO(Umullb(z24.VnD(), z20.VnS(), z5.VnS(), 1), + "umullb z24.d, z20.s, z5.s[1]"); + COMPARE_MACRO(Umullb(z28.VnS(), z19.VnH(), z3.VnH(), 4), + "umullb z28.s, z19.h, z3.h[4]"); + COMPARE_MACRO(Umullt(z0.VnD(), z31.VnS(), z8.VnS(), 2), + "umullt z0.d, z31.s, z8.s[2]"); + COMPARE_MACRO(Umullt(z14.VnS(), z20.VnH(), z5.VnH(), 6), + "umullt z14.s, z20.h, z5.h[6]"); + + CLEANUP(); +} + +TEST(sve2_sat_double_mul_high) { + SETUP(); + + COMPARE_MACRO(Sqdmulh(z18.VnB(), z25.VnB(), z1.VnB()), + "sqdmulh z18.b, z25.b, z1.b"); + COMPARE_MACRO(Sqdmulh(z18.VnD(), z25.VnD(), z1.VnD()), + "sqdmulh z18.d, z25.d, z1.d"); + COMPARE_MACRO(Sqdmulh(z18.VnH(), z25.VnH(), z1.VnH()), + "sqdmulh z18.h, z25.h, z1.h"); + COMPARE_MACRO(Sqdmulh(z18.VnS(), z25.VnS(), z1.VnS()), + "sqdmulh z18.s, z25.s, z1.s"); + COMPARE_MACRO(Sqrdmulh(z21.VnB(), z21.VnB(), z27.VnB()), + "sqrdmulh z21.b, z21.b, z27.b"); + COMPARE_MACRO(Sqrdmulh(z21.VnD(), z21.VnD(), z27.VnD()), + "sqrdmulh z21.d, z21.d, z27.d"); + COMPARE_MACRO(Sqrdmulh(z21.VnH(), z21.VnH(), z27.VnH()), + "sqrdmulh z21.h, z21.h, z27.h"); + COMPARE_MACRO(Sqrdmulh(z21.VnS(), z21.VnS(), z27.VnS()), + "sqrdmulh z21.s, z21.s, z27.s"); + + CLEANUP(); +} + +TEST(sve2_flogb) { + SETUP(); + + COMPARE_MACRO(Flogb(z15.VnH(), p0.Merging(), z3.VnH()), + "flogb z15.h, p0/m, z3.h"); + COMPARE_MACRO(Flogb(z15.VnS(), p0.Merging(), z3.VnS()), + "flogb z15.s, p0/m, z3.s"); + COMPARE_MACRO(Flogb(z15.VnD(), p0.Merging(), z3.VnD()), + "flogb z15.d, p0/m, z3.d"); + COMPARE_MACRO(Flogb(z15.VnD(), p0.Zeroing(), z3.VnD()), + "movprfx z15.d, p0/z, z15.d\n" + "flogb z15.d, p0/m, z3.d"); + + CLEANUP(); +} + +TEST(sve2_fp_pair) { + SETUP(); + + COMPARE_MACRO(Faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD()), + "faddp z14.d, p1/m, z14.d, z26.d"); + COMPARE_MACRO(Faddp(z14.VnH(), p1.Merging(), z14.VnH(), z26.VnH()), + "faddp z14.h, p1/m, z14.h, z26.h"); + COMPARE_MACRO(Faddp(z14.VnS(), p1.Merging(), z14.VnS(), z26.VnS()), + "faddp z14.s, p1/m, z14.s, z26.s"); + COMPARE_MACRO(Fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD()), + "fmaxnmp z2.d, p1/m, z2.d, z14.d"); + COMPARE_MACRO(Fmaxnmp(z2.VnH(), p1.Merging(), z2.VnH(), z14.VnH()), + "fmaxnmp z2.h, p1/m, z2.h, z14.h"); + COMPARE_MACRO(Fmaxnmp(z2.VnS(), p1.Merging(), z2.VnS(), z14.VnS()), + "fmaxnmp z2.s, p1/m, z2.s, z14.s"); + COMPARE_MACRO(Fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD()), + "fmaxp z22.d, p1/m, z22.d, z3.d"); + COMPARE_MACRO(Fmaxp(z22.VnH(), p1.Merging(), z22.VnH(), z3.VnH()), + "fmaxp z22.h, p1/m, z22.h, z3.h"); + COMPARE_MACRO(Fmaxp(z22.VnS(), p1.Merging(), z22.VnS(), z3.VnS()), + "fmaxp z22.s, p1/m, z22.s, z3.s"); + COMPARE_MACRO(Fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD()), + "fminnmp z1.d, p0/m, z1.d, z14.d"); + COMPARE_MACRO(Fminnmp(z1.VnH(), p0.Merging(), z1.VnH(), z14.VnH()), + "fminnmp z1.h, p0/m, z1.h, z14.h"); + COMPARE_MACRO(Fminnmp(z1.VnS(), p0.Merging(), z1.VnS(), z14.VnS()), + "fminnmp z1.s, p0/m, z1.s, z14.s"); + COMPARE_MACRO(Fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD()), + "fminp z16.d, p3/m, z16.d, z11.d"); + COMPARE_MACRO(Fminp(z16.VnH(), p3.Merging(), z16.VnH(), z11.VnH()), + "fminp z16.h, p3/m, z16.h, z11.h"); + COMPARE_MACRO(Fminp(z16.VnS(), p3.Merging(), z16.VnS(), z11.VnS()), + "fminp z16.s, p3/m, z16.s, z11.s"); + + COMPARE_MACRO(Faddp(z14.VnD(), p1.Merging(), z13.VnD(), z26.VnD()), + "movprfx z14.d, p1/m, z13.d\n" + "faddp z14.d, p1/m, z14.d, z26.d"); + COMPARE_MACRO(Fmaxnmp(z2.VnD(), p1.Merging(), z3.VnD(), z2.VnD()), + "mov z31.d, z2.d\n" + "movprfx z2.d, p1/m, z3.d\n" + "fmaxnmp z2.d, p1/m, z2.d, z31.d"); + COMPARE_MACRO(Fmaxp(z22.VnH(), p1.Merging(), z23.VnH(), z3.VnH()), + "movprfx z22.h, p1/m, z23.h\n" + "fmaxp z22.h, p1/m, z22.h, z3.h"); + COMPARE_MACRO(Fminnmp(z1.VnH(), p0.Merging(), z4.VnH(), z1.VnH()), + "mov z31.d, z1.d\n" + "movprfx z1.h, p0/m, z4.h\n" + "fminnmp z1.h, p0/m, z1.h, z31.h"); + COMPARE_MACRO(Fminp(z16.VnS(), p3.Merging(), z11.VnS(), z11.VnS()), + "movprfx z16.s, p3/m, z11.s\n" + "fminp z16.s, p3/m, z16.s, z11.s"); + CLEANUP(); +} + +TEST(sve2_fmlal_fmlsl_index) { + SETUP(); + + COMPARE_MACRO(Fmlalb(z16.VnS(), z16.VnS(), z18.VnH(), z2.VnH(), 0), + "fmlalb z16.s, z18.h, z2.h[0]"); + COMPARE_MACRO(Fmlalb(z3.VnS(), z3.VnS(), z8.VnH(), z7.VnH(), 7), + "fmlalb z3.s, z8.h, z7.h[7]"); + COMPARE_MACRO(Fmlalt(z18.VnS(), z18.VnS(), z13.VnH(), z5.VnH(), 6), + "fmlalt z18.s, z13.h, z5.h[6]"); + COMPARE_MACRO(Fmlalt(z18.VnS(), z18.VnS(), z7.VnH(), z6.VnH(), 5), + "fmlalt z18.s, z7.h, z6.h[5]"); + COMPARE_MACRO(Fmlslb(z16.VnS(), z16.VnS(), z10.VnH(), z1.VnH(), 4), + "fmlslb z16.s, z10.h, z1.h[4]"); + COMPARE_MACRO(Fmlslb(z25.VnS(), z25.VnS(), z11.VnH(), z0.VnH(), 3), + "fmlslb z25.s, z11.h, z0.h[3]"); + COMPARE_MACRO(Fmlslt(z3.VnS(), z3.VnS(), z17.VnH(), z4.VnH(), 2), + "fmlslt z3.s, z17.h, z4.h[2]"); + COMPARE_MACRO(Fmlslt(z5.VnS(), z5.VnS(), z1.VnH(), z7.VnH(), 1), + "fmlslt z5.s, z1.h, z7.h[1]"); + + COMPARE_MACRO(Fmlalb(z5.VnS(), z4.VnS(), z1.VnH(), z7.VnH(), 1), + "movprfx z5, z4\n" + "fmlalb z5.s, z1.h, z7.h[1]"); + COMPARE_MACRO(Fmlalt(z5.VnS(), z4.VnS(), z5.VnH(), z7.VnH(), 1), + "movprfx z31, z4\n" + "fmlalt z31.s, z5.h, z7.h[1]\n" + "mov z5.d, z31.d"); + COMPARE_MACRO(Fmlslb(z5.VnS(), z4.VnS(), z1.VnH(), z5.VnH(), 1), + "movprfx z31, z4\n" + "fmlslb z31.s, z1.h, z5.h[1]\n" + "mov z5.d, z31.d"); + COMPARE_MACRO(Fmlslt(z5.VnS(), z4.VnS(), z5.VnH(), z5.VnH(), 1), + "movprfx z31, z4\n" + "fmlslt z31.s, z5.h, z5.h[1]\n" + "mov z5.d, z31.d"); + CLEANUP(); +} + +TEST(sve2_fp_convert) { + SETUP(); + + COMPARE_MACRO(Fcvtx(z14.VnS(), p4.Merging(), z0.VnD()), + "fcvtx z14.s, p4/m, z0.d"); + COMPARE_MACRO(Fcvtx(z14.VnS(), p4.Zeroing(), z0.VnD()), + "movprfx z14.d, p4/z, z14.d\n" + "fcvtx z14.s, p4/m, z0.d"); + COMPARE_MACRO(Fcvtlt(z1.VnD(), p1.Merging(), z28.VnS()), + "fcvtlt z1.d, p1/m, z28.s"); + COMPARE_MACRO(Fcvtlt(z10.VnS(), p5.Merging(), z0.VnH()), + "fcvtlt z10.s, p5/m, z0.h"); + COMPARE_MACRO(Fcvtnt(z4.VnH(), p7.Merging(), z0.VnS()), + "fcvtnt z4.h, p7/m, z0.s"); + COMPARE_MACRO(Fcvtnt(z8.VnS(), p0.Merging(), z4.VnD()), + "fcvtnt z8.s, p0/m, z4.d"); + COMPARE_MACRO(Fcvtx(z14.VnS(), p4.Merging(), z0.VnD()), + "fcvtx z14.s, p4/m, z0.d"); + COMPARE_MACRO(Fcvtxnt(z27.VnS(), p0.Merging(), z17.VnD()), + "fcvtxnt z27.s, p0/m, z17.d"); + + CLEANUP(); +} + +TEST(sve2_sat_double_mul_high_index) { + SETUP(); + + COMPARE_MACRO(Sqdmulh(z11.VnH(), z20.VnH(), z7.VnH(), 1), + "sqdmulh z11.h, z20.h, z7.h[1]"); + COMPARE_MACRO(Sqdmulh(z11.VnH(), z20.VnH(), z2.VnH(), 7), + "sqdmulh z11.h, z20.h, z2.h[7]"); + COMPARE_MACRO(Sqdmulh(z8.VnS(), z4.VnS(), z7.VnS(), 1), + "sqdmulh z8.s, z4.s, z7.s[1]"); + COMPARE_MACRO(Sqdmulh(z8.VnS(), z4.VnS(), z3.VnS(), 3), + "sqdmulh z8.s, z4.s, z3.s[3]"); + COMPARE_MACRO(Sqdmulh(z6.VnD(), z13.VnD(), z0.VnD(), 1), + "sqdmulh z6.d, z13.d, z0.d[1]"); + COMPARE_MACRO(Sqdmulh(z6.VnD(), z13.VnD(), z15.VnD(), 0), + "sqdmulh z6.d, z13.d, z15.d[0]"); + + COMPARE_MACRO(Sqrdmulh(z3.VnH(), z29.VnH(), z7.VnH(), 2), + "sqrdmulh z3.h, z29.h, z7.h[2]"); + COMPARE_MACRO(Sqrdmulh(z3.VnH(), z29.VnH(), z3.VnH(), 7), + "sqrdmulh z3.h, z29.h, z3.h[7]"); + COMPARE_MACRO(Sqrdmulh(z19.VnS(), z15.VnS(), z7.VnS(), 0), + "sqrdmulh z19.s, z15.s, z7.s[0]"); + COMPARE_MACRO(Sqrdmulh(z19.VnS(), z15.VnS(), z2.VnS(), 3), + "sqrdmulh z19.s, z15.s, z2.s[3]"); + COMPARE_MACRO(Sqrdmulh(z29.VnD(), z13.VnD(), z0.VnD(), 1), + "sqrdmulh z29.d, z13.d, z0.d[1]"); + COMPARE_MACRO(Sqrdmulh(z29.VnD(), z13.VnD(), z15.VnD(), 0), + "sqrdmulh z29.d, z13.d, z15.d[0]"); + + CLEANUP(); +} + +TEST(sve2_extract) { + SETUP(); + + COMPARE_MACRO(Ext(z0.VnB(), z1.VnB(), z2.VnB(), 2), + "ext z0.b, {z1.b, z2.b}, #2"); + COMPARE_MACRO(Ext(z0.VnB(), z31.VnB(), z0.VnB(), 255), + "ext z0.b, {z31.b, z0.b}, #255"); + COMPARE_MACRO(Ext(z0.VnB(), z31.VnB(), z0.VnB(), 0), + "ext z0.b, {z31.b, z0.b}, #0"); + + // Check destructive form is preferred over constructive. + COMPARE_MACRO(Ext(z0.VnB(), z0.VnB(), z1.VnB(), 42), + "ext z0.b, z0.b, z1.b, #42"); CLEANUP(); } +TEST(sve_matmul) { + SETUP(); + + COMPARE_MACRO(Fmmla(z2.VnS(), z2.VnS(), z3.VnS(), z20.VnS()), + "fmmla z2.s, z3.s, z20.s"); + COMPARE_MACRO(Fmmla(z21.VnD(), z21.VnD(), z30.VnD(), z2.VnD()), + "fmmla z21.d, z30.d, z2.d"); + COMPARE_MACRO(Smmla(z31.VnS(), z31.VnS(), z7.VnB(), z19.VnB()), + "smmla z31.s, z7.b, z19.b"); + COMPARE_MACRO(Ummla(z0.VnS(), z0.VnS(), z1.VnB(), z2.VnB()), + "ummla z0.s, z1.b, z2.b"); + COMPARE_MACRO(Usmmla(z30.VnS(), z30.VnS(), z31.VnB(), z4.VnB()), + "usmmla z30.s, z31.b, z4.b"); + + COMPARE_MACRO(Fmmla(z0.VnS(), z1.VnS(), z2.VnS(), z3.VnS()), + "movprfx z0, z1\n" + "fmmla z0.s, z2.s, z3.s"); + COMPARE_MACRO(Smmla(z0.VnS(), z1.VnS(), z0.VnB(), z3.VnB()), + "movprfx z31, z1\n" + "smmla z31.s, z0.b, z3.b\n" + "mov z0.d, z31.d"); + COMPARE_MACRO(Ummla(z0.VnS(), z1.VnS(), z2.VnB(), z0.VnB()), + "movprfx z31, z1\n" + "ummla z31.s, z2.b, z0.b\n" + "mov z0.d, z31.d"); + COMPARE_MACRO(Usmmla(z0.VnS(), z1.VnS(), z0.VnB(), z0.VnB()), + "movprfx z31, z1\n" + "usmmla z31.s, z0.b, z0.b\n" + "mov z0.d, z31.d"); + + CLEANUP(); +} + +TEST(sve_usdot_sudot) { + SETUP(); + + COMPARE_MACRO(Usdot(z30.VnS(), z30.VnS(), z31.VnB(), z4.VnB()), + "usdot z30.s, z31.b, z4.b"); + COMPARE_MACRO(Usdot(z30.VnS(), z30.VnS(), z31.VnB(), z4.VnB(), 0), + "usdot z30.s, z31.b, z4.b[0]"); + COMPARE_MACRO(Sudot(z30.VnS(), z30.VnS(), z31.VnB(), z4.VnB(), 3), + "sudot z30.s, z31.b, z4.b[3]"); + + COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z29.VnB(), z28.VnB()), + "movprfx z0, z30\n" + "usdot z0.s, z29.b, z28.b"); + COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z29.VnB(), z0.VnB()), + "movprfx z31, z30\n" + "usdot z31.s, z29.b, z0.b\n" + "mov z0.d, z31.d"); + COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z0.VnB(), z28.VnB()), + "movprfx z31, z30\n" + "usdot z31.s, z0.b, z28.b\n" + "mov z0.d, z31.d"); + COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z0.VnB(), z0.VnB()), + "movprfx z31, z30\n" + "usdot z31.s, z0.b, z0.b\n" + "mov z0.d, z31.d"); + COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z29.VnB(), z4.VnB(), 0), + "movprfx z0, z30\n" + "usdot z0.s, z29.b, z4.b[0]"); + COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z0.VnB(), z4.VnB(), 0), + "movprfx z31, z30\n" + "usdot z31.s, z0.b, z4.b[0]\n" + "mov z0.d, z31.d"); + COMPARE_MACRO(Sudot(z0.VnS(), z30.VnS(), z29.VnB(), z0.VnB(), 0), + "movprfx z31, z30\n" + "sudot z31.s, z29.b, z0.b[0]\n" + "mov z0.d, z31.d"); + COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z0.VnB(), z0.VnB(), 0), + "movprfx z31, z30\n" + "usdot z31.s, z0.b, z0.b[0]\n" + "mov z0.d, z31.d"); + + CLEANUP(); +} } // namespace aarch64 } // namespace vixl diff --git a/test/aarch64/test-simulator-sve-aarch64.cc b/test/aarch64/test-simulator-sve-aarch64.cc new file mode 100644 index 00000000..58d9f48c --- /dev/null +++ b/test/aarch64/test-simulator-sve-aarch64.cc @@ -0,0 +1,271 @@ +// Copyright 2021, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "test-runner.h" +#include "test-utils.h" +#include "aarch64/test-utils-aarch64.h" + +#include "aarch64/cpu-aarch64.h" +#include "aarch64/disasm-aarch64.h" +#include "aarch64/macro-assembler-aarch64.h" +#include "aarch64/simulator-aarch64.h" +#include "test-assembler-aarch64.h" + +#define TEST_SVE(name) TEST_SVE_INNER("SIM", name) + +namespace vixl { +namespace aarch64 { + +TEST_SVE(sve_matmul) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVEI8MM, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x45179979); // smmla z25.s, z11.b, z23.b + // vl128 state = 0xf1ca8a4d + __ dci(0x45179b51); // smmla z17.s, z26.b, z23.b + // vl128 state = 0x4458ad10 + __ dci(0x45d79b53); // ummla z19.s, z26.b, z23.b + // vl128 state = 0x43d4d064 + __ dci(0x45d69b17); // ummla z23.s, z24.b, z22.b + // vl128 state = 0x601e77c8 + __ dci(0x45c69b33); // ummla z19.s, z25.b, z6.b + // vl128 state = 0x561b4e22 + __ dci(0x45c49b1b); // ummla z27.s, z24.b, z4.b + // vl128 state = 0x89b65d78 + __ dci(0x45dc9b1a); // ummla z26.s, z24.b, z28.b + // vl128 state = 0x85c9e62d + __ dci(0x45d99b1b); // ummla z27.s, z24.b, z25.b + // vl128 state = 0x3fc74134 + __ dci(0x45d99b19); // ummla z25.s, z24.b, z25.b + // vl128 state = 0xa2fa347b + __ dci(0x45d99b1b); // ummla z27.s, z24.b, z25.b + // vl128 state = 0xb9854782 + __ dci(0x45899b1a); // usmmla z26.s, z24.b, z9.b + // vl128 state = 0x7fd376d8 + __ dci(0x45099b8a); // smmla z10.s, z28.b, z9.b + // vl128 state = 0xb41d8433 + __ dci(0x45019bcb); // smmla z11.s, z30.b, z1.b + // vl128 state = 0xc9c0e80d + __ dci(0x45019bdb); // smmla z27.s, z30.b, z1.b + // vl128 state = 0xf1130e02 + __ dci(0x45019b6b); // smmla z11.s, z27.b, z1.b + // vl128 state = 0x282d3dc7 + __ dci(0x45019b6f); // smmla z15.s, z27.b, z1.b + // vl128 state = 0x34570238 + __ dci(0x45859b6b); // usmmla z11.s, z27.b, z5.b + // vl128 state = 0xc451206a + __ dci(0x45919b6a); // usmmla z10.s, z27.b, z17.b + // vl128 state = 0xa58e2ea8 + __ dci(0x45909a62); // usmmla z2.s, z19.b, z16.b + // vl128 state = 0x7b5f948d + __ dci(0x45809a52); // usmmla z18.s, z18.b, z0.b + // vl128 state = 0xf746260d + __ dci(0x45889b53); // usmmla z19.s, z26.b, z8.b + // vl128 state = 0xc31cc539 + __ dci(0x45809a57); // usmmla z23.s, z18.b, z0.b + // vl128 state = 0x736bb3ee + __ dci(0x45809a96); // usmmla z22.s, z20.b, z0.b + // vl128 state = 0xbb05fef6 + __ dci(0x45809a92); // usmmla z18.s, z20.b, z0.b + // vl128 state = 0xbc594372 + __ dci(0x45809a82); // usmmla z2.s, z20.b, z0.b + // vl128 state = 0x87c5a584 + __ dci(0x45829ad2); // usmmla z18.s, z22.b, z2.b + // vl128 state = 0xa413f733 + __ dci(0x45889ad6); // usmmla z22.s, z22.b, z8.b + // vl128 state = 0x87ec445d + __ dci(0x45c898d2); // ummla z18.s, z6.b, z8.b + // vl128 state = 0x3ca8a6e5 + __ dci(0x450898d0); // smmla z16.s, z6.b, z8.b + // vl128 state = 0x4300d87b + __ dci(0x45189ad8); // smmla z24.s, z22.b, z24.b + // vl128 state = 0x38be2e8a + __ dci(0x451c9bd9); // smmla z25.s, z30.b, z28.b + // vl128 state = 0x8a3e6103 + __ dci(0x45989bc9); // usmmla z9.s, z30.b, z24.b + // vl128 state = 0xc728e586 + __ dci(0x451c9bd9); // smmla z25.s, z30.b, z28.b + // vl128 state = 0x4cb44c0e + __ dci(0x459c99d1); // usmmla z17.s, z14.b, z28.b + // vl128 state = 0x84ebcb36 + __ dci(0x459c99d5); // usmmla z21.s, z14.b, z28.b + // vl128 state = 0x8813d2e2 + __ dci(0x451c999d); // smmla z29.s, z12.b, z28.b + // vl128 state = 0x8f26ee51 + __ dci(0x451c999f); // smmla z31.s, z12.b, z28.b + // vl128 state = 0x5d626fd0 + __ dci(0x459e998f); // usmmla z15.s, z12.b, z30.b + // vl128 state = 0x6b64cc8f + __ dci(0x459f991f); // usmmla z31.s, z8.b, z31.b + // vl128 state = 0x41648186 + __ dci(0x4587991e); // usmmla z30.s, z8.b, z7.b + // vl128 state = 0x701525ec + __ dci(0x45079816); // smmla z22.s, z0.b, z7.b + // vl128 state = 0x61a2d024 + __ dci(0x450f9897); // smmla z23.s, z4.b, z15.b + // vl128 state = 0x82ba6bd5 + __ dci(0x450b98d3); // smmla z19.s, z6.b, z11.b + // vl128 state = 0xa842bbde + __ dci(0x450b98db); // smmla z27.s, z6.b, z11.b + // vl128 state = 0x9977677a + __ dci(0x451f98d3); // smmla z19.s, z6.b, z31.b + // vl128 state = 0xe6d6c2ef + __ dci(0x451b9adb); // smmla z27.s, z22.b, z27.b + // vl128 state = 0xa535453f + __ dci(0x450b98d9); // smmla z25.s, z6.b, z11.b + // vl128 state = 0xeda3f381 + __ dci(0x458b9adb); // usmmla z27.s, z22.b, z11.b + // vl128 state = 0xd72dbdef + __ dci(0x45cb98da); // ummla z26.s, z6.b, z11.b + // vl128 state = 0xfae4975b + __ dci(0x45c999d2); // ummla z18.s, z14.b, z9.b + // vl128 state = 0x0aa6e1f6 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x0aa6e1f6, + 0xba2d4547, + 0x0e72a647, + 0x15b8fc1b, + 0x92eddc98, + 0xe0c72bcf, + 0x36b4e3ba, + 0x1041114e, + 0x4d44ebd4, + 0xfe0e3cbf, + 0x81c43455, + 0x678617c5, + 0xf72fac1f, + 0xabdcd4e4, + 0x108864bd, + 0x035f6eca, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve_fmatmul_s) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVEF32MM, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 20 * kInstructionSize); + __ dci(0x64a1e6ee); // fmmla z14.s, z23.s, z1.s + // vl128 state = 0x9db41bef + __ dci(0x64b1e7fe); // fmmla z30.s, z31.s, z17.s + // vl128 state = 0xc1535e55 + __ dci(0x64b9e7d6); // fmmla z22.s, z30.s, z25.s + // vl128 state = 0xc65aad35 + __ dci(0x64bde6c6); // fmmla z6.s, z22.s, z29.s + // vl128 state = 0x68387c22 + __ dci(0x64b9e4c2); // fmmla z2.s, z6.s, z25.s + // vl128 state = 0xcf08b3a4 + __ dci(0x64b9e543); // fmmla z3.s, z10.s, z25.s + // vl128 state = 0x969bbe77 + __ dci(0x64b9e553); // fmmla z19.s, z10.s, z25.s + // vl128 state = 0xc3f514e1 + __ dci(0x64b9e557); // fmmla z23.s, z10.s, z25.s + // vl128 state = 0x4b351c29 + __ dci(0x64b9e773); // fmmla z19.s, z27.s, z25.s + // vl128 state = 0x5e026315 + __ dci(0x64bbe757); // fmmla z23.s, z26.s, z27.s + // vl128 state = 0x61684fe6 + __ dci(0x64bbe755); // fmmla z21.s, z26.s, z27.s + // vl128 state = 0x719b4ce0 + __ dci(0x64bfe554); // fmmla z20.s, z10.s, z31.s + // vl128 state = 0xdf3d2a1c + __ dci(0x64bfe550); // fmmla z16.s, z10.s, z31.s + // vl128 state = 0x3279aab8 + __ dci(0x64bfe714); // fmmla z20.s, z24.s, z31.s + // vl128 state = 0x0b985869 + __ dci(0x64b7e756); // fmmla z22.s, z26.s, z23.s + // vl128 state = 0x14230587 + __ dci(0x64b7e737); // fmmla z23.s, z25.s, z23.s + // vl128 state = 0x2cb88e7f + __ dci(0x64bfe767); // fmmla z7.s, z27.s, z31.s + // vl128 state = 0xb5ec0c65 + __ dci(0x64bfe777); // fmmla z23.s, z27.s, z31.s + // vl128 state = 0xb5e5eab0 + __ dci(0x64bfe715); // fmmla z21.s, z24.s, z31.s + // vl128 state = 0xd0491fb5 + __ dci(0x64b7e797); // fmmla z23.s, z28.s, z23.s + // vl128 state = 0x98a55a30 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x98a55a30, + 0x590b7715, + 0x4562ccf3, + 0x1f8653a6, + 0x5fe174d5, + 0xb300dcb8, + 0x3cefa79e, + 0xa22484c7, + 0x380697ec, + 0xde9e699b, + 0x99d21870, + 0x456cb46b, + 0x207d2615, + 0xecaf9678, + 0x0949e2d2, + 0xa764c43f, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +} // namespace aarch64 +} // namespace vixl diff --git a/test/aarch64/test-simulator-sve2-aarch64.cc b/test/aarch64/test-simulator-sve2-aarch64.cc new file mode 100644 index 00000000..0a4c6d13 --- /dev/null +++ b/test/aarch64/test-simulator-sve2-aarch64.cc @@ -0,0 +1,9122 @@ +// Copyright 2020, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <sys/mman.h> +#include <unistd.h> + +#include <cfloat> +#include <cmath> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <functional> + +#include "test-runner.h" +#include "test-utils.h" +#include "aarch64/test-utils-aarch64.h" + +#include "aarch64/cpu-aarch64.h" +#include "aarch64/disasm-aarch64.h" +#include "aarch64/macro-assembler-aarch64.h" +#include "aarch64/simulator-aarch64.h" +#include "test-assembler-aarch64.h" + +#define TEST_SVE(name) TEST_SVE_INNER("SIM", name) + +namespace vixl { +namespace aarch64 { + +TEST_SVE(sve2_halving_arithmetic) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x441182b2); // uhadd z18.b, p0/m, z18.b, z21.b + // vl128 state = 0x8ac2942a + __ dci(0x441382f3); // uhsub z19.b, p0/m, z19.b, z23.b + // vl128 state = 0x0e0db643 + __ dci(0x449383fb); // uhsub z27.s, p0/m, z27.s, z31.s + // vl128 state = 0x6a97fc8c + __ dci(0x441283fa); // shsub z26.b, p0/m, z26.b, z31.b + // vl128 state = 0x48a5fd5f + __ dci(0x44928372); // shsub z18.s, p0/m, z18.s, z27.s + // vl128 state = 0x7c670d36 + __ dci(0x44d2827a); // shsub z26.d, p0/m, z26.d, z19.d + // vl128 state = 0x3a15c66f + __ dci(0x4492823b); // shsub z27.s, p0/m, z27.s, z17.s + // vl128 state = 0xe407c826 + __ dci(0x44978239); // uhsubr z25.s, p0/m, z25.s, z17.s + // vl128 state = 0xf7157dae + __ dci(0x4493827d); // uhsub z29.s, p0/m, z29.s, z19.s + // vl128 state = 0xcebff22f + __ dci(0x449782f9); // uhsubr z25.s, p0/m, z25.s, z23.s + // vl128 state = 0xbe691139 + __ dci(0x44978231); // uhsubr z17.s, p0/m, z17.s, z17.s + // vl128 state = 0x59b2af72 + __ dci(0x44578233); // uhsubr z19.h, p0/m, z19.h, z17.h + // vl128 state = 0xd7fad727 + __ dci(0x44578312); // uhsubr z18.h, p0/m, z18.h, z24.h + // vl128 state = 0x87b5d00a + __ dci(0x44578610); // uhsubr z16.h, p1/m, z16.h, z16.h + // vl128 state = 0xbaae097d + __ dci(0x44578618); // uhsubr z24.h, p1/m, z24.h, z16.h + // vl128 state = 0x3887509e + __ dci(0x44168608); // shsubr z8.b, p1/m, z8.b, z16.b + // vl128 state = 0xc16dc63b + __ dci(0x44128700); // shsub z0.b, p1/m, z0.b, z24.b + // vl128 state = 0x3eddcd6d + __ dci(0x44528f02); // shsub z2.h, p3/m, z2.h, z24.h + // vl128 state = 0x2e7ffa0d + __ dci(0x44538f40); // uhsub z0.h, p3/m, z0.h, z26.h + // vl128 state = 0x1f68bee5 + __ dci(0x44538342); // uhsub z2.h, p0/m, z2.h, z26.h + // vl128 state = 0x2a368049 + __ dci(0x44538040); // uhsub z0.h, p0/m, z0.h, z2.h + // vl128 state = 0x0537f844 + __ dci(0x44568044); // shsubr z4.h, p0/m, z4.h, z2.h + // vl128 state = 0x0dfac1b2 + __ dci(0x445688cc); // shsubr z12.h, p2/m, z12.h, z6.h + // vl128 state = 0xbefa909b + __ dci(0x44d288dc); // shsub z28.d, p2/m, z28.d, z6.d + // vl128 state = 0xbadc14bb + __ dci(0x44d288d8); // shsub z24.d, p2/m, z24.d, z6.d + // vl128 state = 0x518130c0 + __ dci(0x44d088f0); // shadd z16.d, p2/m, z16.d, z7.d + // vl128 state = 0xb01856bd + __ dci(0x44d08cd2); // shadd z18.d, p3/m, z18.d, z6.d + // vl128 state = 0xbbcfeaa2 + __ dci(0x44d484d0); // srhadd z16.d, p1/m, z16.d, z6.d + // vl128 state = 0xefe1d416 + __ dci(0x44d496d1); // srhadd z17.d, p5/m, z17.d, z22.d + // vl128 state = 0xceb574b8 + __ dci(0x44d196d5); // uhadd z21.d, p5/m, z21.d, z22.d + // vl128 state = 0x46cdd268 + __ dci(0x44d496dd); // srhadd z29.d, p5/m, z29.d, z22.d + // vl128 state = 0x21a81b6a + __ dci(0x4494969c); // srhadd z28.s, p5/m, z28.s, z20.s + // vl128 state = 0x2316cb04 + __ dci(0x4494968c); // srhadd z12.s, p5/m, z12.s, z20.s + // vl128 state = 0x6248cc0a + __ dci(0x4415968d); // urhadd z13.b, p5/m, z13.b, z20.b + // vl128 state = 0x6edd11e0 + __ dci(0x44119e8c); // uhadd z12.b, p7/m, z12.b, z20.b + // vl128 state = 0x81841eb6 + __ dci(0x4491968d); // uhadd z13.s, p5/m, z13.s, z20.s + // vl128 state = 0x02b8b893 + __ dci(0x44118685); // uhadd z5.b, p1/m, z5.b, z20.b + // vl128 state = 0x707db891 + __ dci(0x44138e8d); // uhsub z13.b, p3/m, z13.b, z20.b + // vl128 state = 0x2caa64dd + __ dci(0x44139e0c); // uhsub z12.b, p7/m, z12.b, z16.b + // vl128 state = 0xe34695ef + __ dci(0x44128e0d); // shsub z13.b, p3/m, z13.b, z16.b + // vl128 state = 0x477197dd + __ dci(0x44129a1d); // shsub z29.b, p6/m, z29.b, z16.b + // vl128 state = 0x19cebaa2 + __ dci(0x44129a19); // shsub z25.b, p6/m, z25.b, z16.b + // vl128 state = 0x0d62dca4 + __ dci(0x44129249); // shsub z9.b, p4/m, z9.b, z18.b + // vl128 state = 0x327e81e3 + __ dci(0x44129248); // shsub z8.b, p4/m, z8.b, z18.b + // vl128 state = 0x28ec9bf8 + __ dci(0x44169269); // shsubr z9.b, p4/m, z9.b, z19.b + // vl128 state = 0x652ca8c9 + __ dci(0x44168661); // shsubr z1.b, p1/m, z1.b, z19.b + // vl128 state = 0x46fcb15a + __ dci(0x44168420); // shsubr z0.b, p1/m, z0.b, z1.b + // vl128 state = 0x7151e02b + __ dci(0x44168428); // shsubr z8.b, p1/m, z8.b, z1.b + // vl128 state = 0x4c8921f6 + __ dci(0x44148409); // srhadd z9.b, p1/m, z9.b, z0.b + // vl128 state = 0xd0d2fc1c + __ dci(0x44148641); // srhadd z1.b, p1/m, z1.b, z18.b + // vl128 state = 0xc821f381 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xc821f381, + 0xc0ad3b7c, + 0x4eb4ba1b, + 0xdc8e061a, + 0x64675a15, + 0x923703bf, + 0x6944c0db, + 0x7ac89bae, + 0x8fa4c45f, + 0xf64c8b4c, + 0x8ba751b7, + 0x2fe8832e, + 0xc6b8000d, + 0x864ba0ff, + 0xded22c04, + 0x213cf65e, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_sli_sri) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x4509f07f); // sri z31.b, z3.b, #7 + // vl128 state = 0x509a7a2d + __ dci(0x454bf07e); // sri z30.s, z3.s, #21 + // vl128 state = 0xc973a4e8 + __ dci(0x450bf17a); // sri z26.b, z11.b, #5 + // vl128 state = 0xa9dcbcf5 + __ dci(0x450ef17b); // sri z27.b, z11.b, #2 + // vl128 state = 0xd56761c1 + __ dci(0x458ef1f9); // sri z25.d, z15.d, #50 + // vl128 state = 0xdd84a538 + __ dci(0x459ff1fb); // sri z27.d, z15.d, #33 + // vl128 state = 0x4e2dbf4a + __ dci(0x459ff5df); // sli z31.d, z14.d, #31 + // vl128 state = 0x46d9563e + __ dci(0x45d7f5cf); // sli z15.d, z14.d, #55 + // vl128 state = 0xf4fcf912 + __ dci(0x4593f5ce); // sli z14.d, z14.d, #19 + // vl128 state = 0xcef34d18 + __ dci(0x4593f1fe); // sri z30.d, z15.d, #45 + // vl128 state = 0x69509e94 + __ dci(0x4581f1ff); // sri z31.d, z15.d, #63 + // vl128 state = 0x09cd0cf7 + __ dci(0x45c1f1bd); // sri z29.d, z13.d, #31 + // vl128 state = 0xfc095f8b + __ dci(0x45c1f03c); // sri z28.d, z1.d, #31 + // vl128 state = 0x0ca836f0 + __ dci(0x45c1f4b4); // sli z20.d, z5.d, #33 + // vl128 state = 0x678be6b3 + __ dci(0x45c1f5f0); // sli z16.d, z15.d, #33 + // vl128 state = 0x7a743b56 + __ dci(0x45c7f5f2); // sli z18.d, z15.d, #39 + // vl128 state = 0x0bbc4117 + __ dci(0x45c7f5e2); // sli z2.d, z15.d, #39 + // vl128 state = 0x13e1a7ae + __ dci(0x45c7f1a0); // sri z0.d, z13.d, #25 + // vl128 state = 0x8014a497 + __ dci(0x4597f1b0); // sri z16.d, z13.d, #41 + // vl128 state = 0x5f7994a8 + __ dci(0x4593f5b1); // sli z17.d, z13.d, #19 + // vl128 state = 0x125f37b5 + __ dci(0x4591f5f0); // sli z16.d, z15.d, #17 + // vl128 state = 0x26f1fdf2 + __ dci(0x4581f5d2); // sli z18.d, z14.d, #1 + // vl128 state = 0x5b0baccc + __ dci(0x4541f5d6); // sli z22.s, z14.s, #1 + // vl128 state = 0x74f04ecb + __ dci(0x4551f1d4); // sri z20.s, z14.s, #15 + // vl128 state = 0xc43d0586 + __ dci(0x4553f150); // sri z16.s, z10.s, #13 + // vl128 state = 0xce8c688a + __ dci(0x4557f171); // sri z17.s, z11.s, #9 + // vl128 state = 0x03a5b3b0 + __ dci(0x4513f175); // sri z21.h, z11.h, #13 + // vl128 state = 0x392ab48e + __ dci(0x4551f177); // sri z23.s, z11.s, #15 + // vl128 state = 0xa886dbc8 + __ dci(0x4551f17f); // sri z31.s, z11.s, #15 + // vl128 state = 0x37c804bc + __ dci(0x4551f16f); // sri z15.s, z11.s, #15 + // vl128 state = 0x17e99d67 + __ dci(0x4550f067); // sri z7.s, z3.s, #16 + // vl128 state = 0xb0bd981a + __ dci(0x4550f077); // sri z23.s, z3.s, #16 + // vl128 state = 0x5f643b3e + __ dci(0x4551f0f5); // sri z21.s, z7.s, #15 + // vl128 state = 0xa0b83a32 + __ dci(0x4551f09d); // sri z29.s, z4.s, #15 + // vl128 state = 0x890807a1 + __ dci(0x4552f08d); // sri z13.s, z4.s, #14 + // vl128 state = 0x81cb8fa4 + __ dci(0x4512f01d); // sri z29.h, z0.h, #14 + // vl128 state = 0x62751a54 + __ dci(0x4552f419); // sli z25.s, z0.s, #18 + // vl128 state = 0xfd7c0337 + __ dci(0x4542f49b); // sli z27.s, z4.s, #2 + // vl128 state = 0x0089e534 + __ dci(0x454af09a); // sri z26.s, z4.s, #22 + // vl128 state = 0xea87d159 + __ dci(0x45caf0d8); // sri z24.d, z6.d, #22 + // vl128 state = 0x3c44b845 + __ dci(0x45c2f2dc); // sri z28.d, z22.d, #30 + // vl128 state = 0x9b8c17a7 + __ dci(0x45caf25d); // sri z29.d, z18.d, #22 + // vl128 state = 0x3e2c1797 + __ dci(0x45caf0dc); // sri z28.d, z6.d, #22 + // vl128 state = 0xbf933754 + __ dci(0x458af1cc); // sri z12.d, z14.d, #54 + // vl128 state = 0x93e91a23 + __ dci(0x4586f1cd); // sri z13.d, z14.d, #58 + // vl128 state = 0x0f7c6faa + __ dci(0x458ef0cc); // sri z12.d, z6.d, #50 + // vl128 state = 0x1d771f71 + __ dci(0x458ef00d); // sri z13.d, z0.d, #50 + // vl128 state = 0x29a23da7 + __ dci(0x450ef05d); // sri z29.b, z2.b, #2 + // vl128 state = 0x74fd2038 + __ dci(0x450cf00d); // sri z13.b, z0.b, #4 + // vl128 state = 0x075bc166 + __ dci(0x450cf00c); // sri z12.b, z0.b, #4 + // vl128 state = 0xfd3d290f + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xfd3d290f, + 0x8dd0bdab, + 0xa25ba843, + 0x484543ed, + 0x22df2f4f, + 0xb62769dc, + 0x795e30f7, + 0xe49948e7, + 0xd4ceb676, + 0xbf2d359a, + 0xcf4331a9, + 0x8cce4eef, + 0x4fbaec97, + 0x4fec4d88, + 0x3efc521d, + 0xffef31d1, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_srshr_urshr) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x04cc9074); // srshr z20.d, p4/m, z20.d, #29 + // vl128 state = 0xecefbcaa + __ dci(0x04cc9236); // srshr z22.d, p4/m, z22.d, #15 + // vl128 state = 0x7eef75c3 + __ dci(0x04cd927e); // urshr z30.d, p4/m, z30.d, #13 + // vl128 state = 0xf5ab0a43 + __ dci(0x04cd9e76); // urshr z22.d, p7/m, z22.d, #13 + // vl128 state = 0x67a9d15a + __ dci(0x04cd9a57); // urshr z23.d, p6/m, z23.d, #14 + // vl128 state = 0xf1591f3f + __ dci(0x044d9247); // urshr z7.s, p4/m, z7.s, #14 + // vl128 state = 0xcb770d03 + __ dci(0x044d9245); // urshr z5.s, p4/m, z5.s, #14 + // vl128 state = 0x7a225c92 + __ dci(0x044d9241); // urshr z1.s, p4/m, z1.s, #14 + // vl128 state = 0x31e4f59a + __ dci(0x044d8200); // urshr z0.s, p0/m, z0.s, #16 + // vl128 state = 0x7c0c67fa + __ dci(0x044d8330); // urshr z16.s, p0/m, z16.s, #7 + // vl128 state = 0x2aaa996d + __ dci(0x044d8340); // urshr z0.s, p0/m, z0.s, #6 + // vl128 state = 0x1999a541 + __ dci(0x044d8104); // urshr z4.s, p0/m, z4.s, #24 + // vl128 state = 0xbebc22f3 + __ dci(0x044d8526); // urshr z6.s, p1/m, z6.s, #23 + // vl128 state = 0x5e9c818d + __ dci(0x04cd8502); // urshr z2.d, p1/m, z2.d, #24 + // vl128 state = 0x9cd88e00 + __ dci(0x048d9506); // urshr z6.d, p5/m, z6.d, #56 + // vl128 state = 0xff60a16e + __ dci(0x048d9504); // urshr z4.d, p5/m, z4.d, #56 + // vl128 state = 0xfae64bf4 + __ dci(0x048d8705); // urshr z5.d, p1/m, z5.d, #40 + // vl128 state = 0xbd7bc8bb + __ dci(0x048d9307); // urshr z7.d, p4/m, z7.d, #40 + // vl128 state = 0x22e58729 + __ dci(0x048c9323); // srshr z3.d, p4/m, z3.d, #39 + // vl128 state = 0x1a2b90d1 + __ dci(0x048c8721); // srshr z1.d, p1/m, z1.d, #39 + // vl128 state = 0xf31798ea + __ dci(0x04cc8f20); // srshr z0.d, p3/m, z0.d, #7 + // vl128 state = 0x3a159e41 + __ dci(0x04cc87b0); // srshr z16.d, p1/m, z16.d, #3 + // vl128 state = 0x461819c6 + __ dci(0x04cc8778); // srshr z24.d, p1/m, z24.d, #5 + // vl128 state = 0x52c8c945 + __ dci(0x048c8730); // srshr z16.d, p1/m, z16.d, #39 + // vl128 state = 0xa6724c16 + __ dci(0x040c8534); // srshr z20.b, p1/m, z20.b, #7 + // vl128 state = 0xfeae5ea1 + __ dci(0x040c957c); // srshr z28.b, p5/m, z28.b, #5 + // vl128 state = 0xe55cac9f + __ dci(0x048c9554); // srshr z20.d, p5/m, z20.d, #54 + // vl128 state = 0x41ccbe50 + __ dci(0x048c8156); // srshr z22.d, p0/m, z22.d, #54 + // vl128 state = 0xfef5c71e + __ dci(0x040c8957); // srshr z23.b, p2/m, z23.b, #6 + // vl128 state = 0xac8cf177 + __ dci(0x040c8bd5); // srshr z21.h, p2/m, z21.h, #2 + // vl128 state = 0xfe7005fe + __ dci(0x040c8354); // srshr z20.h, p0/m, z20.h, #6 + // vl128 state = 0x1daa6598 + __ dci(0x040c931c); // srshr z28.h, p4/m, z28.h, #8 + // vl128 state = 0x8c7f2675 + __ dci(0x040c9798); // srshr z24.h, p5/m, z24.h, #4 + // vl128 state = 0x2349e927 + __ dci(0x044c97ba); // srshr z26.s, p5/m, z26.s, #3 + // vl128 state = 0xf3670053 + __ dci(0x040c9faa); // srshr z10.h, p7/m, z10.h, #3 + // vl128 state = 0x61333578 + __ dci(0x044d9fae); // urshr z14.s, p7/m, z14.s, #3 + // vl128 state = 0xdb1232a3 + __ dci(0x044d8f8f); // urshr z15.s, p3/m, z15.s, #4 + // vl128 state = 0xb1b4bda1 + __ dci(0x044d8f87); // urshr z7.s, p3/m, z7.s, #4 + // vl128 state = 0xba636ab8 + __ dci(0x044d9d97); // urshr z23.s, p7/m, z23.s, #20 + // vl128 state = 0x8ab01b49 + __ dci(0x040d9593); // urshr z19.b, p5/m, z19.b, #4 + // vl128 state = 0x20ee49b4 + __ dci(0x040d959b); // urshr z27.b, p5/m, z27.b, #4 + // vl128 state = 0xe34dcf2e + __ dci(0x044c959a); // srshr z26.s, p5/m, z26.s, #20 + // vl128 state = 0x65bafb28 + __ dci(0x044d9492); // urshr z18.s, p5/m, z18.s, #28 + // vl128 state = 0xcbed1382 + __ dci(0x044c8493); // srshr z19.s, p1/m, z19.s, #28 + // vl128 state = 0xa54fb84c + __ dci(0x044c8cc3); // srshr z3.s, p3/m, z3.s, #26 + // vl128 state = 0x257267ee + __ dci(0x044c8c0b); // srshr z11.s, p3/m, z11.s, #32 + // vl128 state = 0xd494a3e8 + __ dci(0x044c8c6f); // srshr z15.s, p3/m, z15.s, #29 + // vl128 state = 0x63621477 + __ dci(0x044c9c2e); // srshr z14.s, p7/m, z14.s, #31 + // vl128 state = 0x4cb2e888 + __ dci(0x04cc943e); // srshr z30.d, p5/m, z30.d, #31 + // vl128 state = 0x8e580ba2 + __ dci(0x04cd953f); // urshr z31.d, p5/m, z31.d, #23 + // vl128 state = 0x7678cc05 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x7678cc05, + 0x37f2893a, + 0xce2a105d, + 0x5a03f5a3, + 0x81444dfc, + 0x5581c0c1, + 0xfee622cc, + 0x0f6796a5, + 0xf151a5fd, + 0x13e9be9c, + 0x9685f8b5, + 0xa6827285, + 0x7ad6d004, + 0xba7989ae, + 0x96fe2826, + 0xd1ddc17e, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_sqshl_uqshl) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x044f86aa); // sqshlu z10.s, p1/m, z10.s, #21 + // vl128 state = 0x37777991 + __ dci(0x044f8482); // sqshlu z2.s, p1/m, z2.s, #4 + // vl128 state = 0x8119dd5a + __ dci(0x048f8480); // sqshlu z0.d, p1/m, z0.d, #4 + // vl128 state = 0x8966cd23 + __ dci(0x04cf8c82); // sqshlu z2.d, p3/m, z2.d, #36 + // vl128 state = 0x71b53135 + __ dci(0x044f8892); // sqshlu z18.s, p2/m, z18.s, #4 + // vl128 state = 0x44e0e9a7 + __ dci(0x04cf8996); // sqshlu z22.d, p2/m, z22.d, #44 + // vl128 state = 0x4e4b77b9 + __ dci(0x04cf9194); // sqshlu z20.d, p4/m, z20.d, #44 + // vl128 state = 0x66d72728 + __ dci(0x04cf9b9c); // sqshlu z28.d, p6/m, z28.d, #60 + // vl128 state = 0xa80f62ce + __ dci(0x04c79f8c); // uqshl z12.d, p7/m, z12.d, #60 + // vl128 state = 0x87a3a8c0 + __ dci(0x04469f88); // sqshl z8.s, p7/m, z8.s, #28 + // vl128 state = 0x3db302cb + __ dci(0x04469f8a); // sqshl z10.s, p7/m, z10.s, #28 + // vl128 state = 0x2d66bbb2 + __ dci(0x04469a8e); // sqshl z14.s, p6/m, z14.s, #20 + // vl128 state = 0x39524732 + __ dci(0x04c69a1e); // sqshl z30.d, p6/m, z30.d, #48 + // vl128 state = 0x39d71433 + __ dci(0x04c68a9a); // sqshl z26.d, p2/m, z26.d, #52 + // vl128 state = 0x58771cfb + __ dci(0x04469a8a); // sqshl z10.s, p6/m, z10.s, #20 + // vl128 state = 0xa773fcc9 + __ dci(0x04c68a88); // sqshl z8.d, p2/m, z8.d, #52 + // vl128 state = 0x9dce801c + __ dci(0x04469a89); // sqshl z9.s, p6/m, z9.s, #20 + // vl128 state = 0x4141302f + __ dci(0x04479b81); // uqshl z1.s, p6/m, z1.s, #28 + // vl128 state = 0x369084f9 + __ dci(0x044f9f91); // sqshlu z17.s, p7/m, z17.s, #28 + // vl128 state = 0x1570bb90 + __ dci(0x04479e90); // uqshl z16.s, p7/m, z16.s, #20 + // vl128 state = 0x27765662 + __ dci(0x044f9f94); // sqshlu z20.s, p7/m, z20.s, #28 + // vl128 state = 0xe99bcbb9 + __ dci(0x04479795); // uqshl z21.s, p5/m, z21.s, #28 + // vl128 state = 0xb36c3b9f + __ dci(0x04479754); // uqshl z20.s, p5/m, z20.s, #26 + // vl128 state = 0x435e0256 + __ dci(0x04479750); // uqshl z16.s, p5/m, z16.s, #26 + // vl128 state = 0x485471e9 + __ dci(0x04479740); // uqshl z0.s, p5/m, z0.s, #26 + // vl128 state = 0x170e10cb + __ dci(0x04079544); // uqshl z4.b, p5/m, z4.b, #2 + // vl128 state = 0x026fe32a + __ dci(0x04c79546); // uqshl z6.d, p5/m, z6.d, #42 + // vl128 state = 0x9a92b063 + __ dci(0x04c78504); // uqshl z4.d, p1/m, z4.d, #40 + // vl128 state = 0x4e9a105e + __ dci(0x04879500); // uqshl z0.d, p5/m, z0.d, #8 + // vl128 state = 0x958b4d28 + __ dci(0x04879908); // uqshl z8.d, p6/m, z8.d, #8 + // vl128 state = 0x420ff82d + __ dci(0x04879318); // uqshl z24.d, p4/m, z24.d, #24 + // vl128 state = 0x88002097 + __ dci(0x0487931a); // uqshl z26.d, p4/m, z26.d, #24 + // vl128 state = 0x3047401c + __ dci(0x0486938a); // sqshl z10.d, p4/m, z10.d, #28 + // vl128 state = 0x5b2b7938 + __ dci(0x04069188); // sqshl z8.b, p4/m, z8.b, #4 + // vl128 state = 0xb92dd260 + __ dci(0x04469389); // sqshl z9.s, p4/m, z9.s, #28 + // vl128 state = 0xdc6370c3 + __ dci(0x0447918b); // uqshl z11.s, p4/m, z11.s, #12 + // vl128 state = 0x5e6198f0 + __ dci(0x0447913b); // uqshl z27.s, p4/m, z27.s, #9 + // vl128 state = 0x935ed2a3 + __ dci(0x0447915f); // uqshl z31.s, p4/m, z31.s, #10 + // vl128 state = 0x76271654 + __ dci(0x0406915d); // sqshl z29.b, p4/m, z29.b, #2 + // vl128 state = 0x46a71ae3 + __ dci(0x0486911f); // sqshl z31.d, p4/m, z31.d, #8 + // vl128 state = 0x2c7320a6 + __ dci(0x0486911d); // sqshl z29.d, p4/m, z29.d, #8 + // vl128 state = 0x4aa0022d + __ dci(0x04869b1f); // sqshl z31.d, p6/m, z31.d, #24 + // vl128 state = 0x2de081d7 + __ dci(0x04069317); // sqshl z23.h, p4/m, z23.h, #8 + // vl128 state = 0x879c9ead + __ dci(0x0447931f); // uqshl z31.s, p4/m, z31.s, #24 + // vl128 state = 0x51070552 + __ dci(0x04479b9e); // uqshl z30.s, p6/m, z30.s, #28 + // vl128 state = 0x8cc26b2b + __ dci(0x04479adf); // uqshl z31.s, p6/m, z31.s, #22 + // vl128 state = 0x8f4512d3 + __ dci(0x04479adb); // uqshl z27.s, p6/m, z27.s, #22 + // vl128 state = 0x3d44e050 + __ dci(0x04079a99); // uqshl z25.h, p6/m, z25.h, #4 + // vl128 state = 0xede0c288 + __ dci(0x04079a89); // uqshl z9.h, p6/m, z9.h, #4 + // vl128 state = 0x928beed6 + __ dci(0x04879acb); // uqshl z11.d, p6/m, z11.d, #22 + // vl128 state = 0x6945e18a + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x6945e18a, + 0x0e954f70, + 0x3d269eb2, + 0xefeb5acb, + 0xfb27cb0c, + 0x651a1aea, + 0x07011083, + 0xd425418b, + 0xa0e026c6, + 0x407c416e, + 0x14e25761, + 0x21eef576, + 0xc6ad09eb, + 0x3642006b, + 0xdebec165, + 0x24ae8a32, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_unsigned_sat_round_shift) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 100 * kInstructionSize); + __ dci(0x44cb84cb); // uqrshl z11.d, p1/m, z11.d, z6.d + // vl128 state = 0x9794ef4a + __ dci(0x444b85db); // uqrshl z27.h, p1/m, z27.h, z14.h + // vl128 state = 0xda137fcc + __ dci(0x444b874b); // uqrshl z11.h, p1/m, z11.h, z26.h + // vl128 state = 0xafc1533b + __ dci(0x444b87fb); // uqrshl z27.h, p1/m, z27.h, z31.h + // vl128 state = 0x228890a2 + __ dci(0x444b87f3); // uqrshl z19.h, p1/m, z19.h, z31.h + // vl128 state = 0x5cb0d356 + __ dci(0x444385f1); // urshl z17.h, p1/m, z17.h, z15.h + // vl128 state = 0xbb6b6d1d + __ dci(0x444795f3); // urshlr z19.h, p5/m, z19.h, z15.h + // vl128 state = 0x98b43358 + __ dci(0x44479552); // urshlr z18.h, p5/m, z18.h, z10.h + // vl128 state = 0x472880b2 + __ dci(0x44c79502); // urshlr z2.d, p5/m, z2.d, z8.d + // vl128 state = 0x0995d86f + __ dci(0x44879406); // urshlr z6.s, p5/m, z6.s, z0.s + // vl128 state = 0x405211cd + __ dci(0x44079436); // urshlr z22.b, p5/m, z22.b, z1.b + // vl128 state = 0x563647b0 + __ dci(0x44078c34); // urshlr z20.b, p3/m, z20.b, z1.b + // vl128 state = 0x2eacf2d3 + __ dci(0x440f843c); // uqrshlr z28.b, p1/m, z28.b, z1.b + // vl128 state = 0x56f472ce + __ dci(0x440f8cbe); // uqrshlr z30.b, p3/m, z30.b, z5.b + // vl128 state = 0x910ce8d0 + __ dci(0x44078eba); // urshlr z26.b, p3/m, z26.b, z21.b + // vl128 state = 0xc47b6482 + __ dci(0x44078ebe); // urshlr z30.b, p3/m, z30.b, z21.b + // vl128 state = 0xff805975 + __ dci(0x440f86b6); // uqrshlr z22.b, p1/m, z22.b, z21.b + // vl128 state = 0x132fe792 + __ dci(0x444b86b7); // uqrshl z23.h, p1/m, z23.h, z21.h + // vl128 state = 0xabd3d85c + __ dci(0x440b84a7); // uqrshl z7.b, p1/m, z7.b, z5.b + // vl128 state = 0x8f718992 + __ dci(0x440b8085); // uqrshl z5.b, p0/m, z5.b, z4.b + // vl128 state = 0x1b05e694 + __ dci(0x440b8687); // uqrshl z7.b, p1/m, z7.b, z20.b + // vl128 state = 0xd9a0c225 + __ dci(0x440986cf); // uqshl z15.b, p1/m, z15.b, z22.b + // vl128 state = 0x98be170a + __ dci(0x440b87ce); // uqrshl z14.b, p1/m, z14.b, z30.b + // vl128 state = 0x0993d862 + __ dci(0x440b838c); // uqrshl z12.b, p0/m, z12.b, z28.b + // vl128 state = 0xbc95a037 + __ dci(0x440b839c); // uqrshl z28.b, p0/m, z28.b, z28.b + // vl128 state = 0x558159d9 + __ dci(0x444b8314); // uqrshl z20.h, p0/m, z20.h, z24.h + // vl128 state = 0x53798c6b + __ dci(0x44498b1c); // uqshl z28.h, p2/m, z28.h, z24.h + // vl128 state = 0x83db6a7c + __ dci(0x44498b0c); // uqshl z12.h, p2/m, z12.h, z24.h + // vl128 state = 0x62bda6cb + __ dci(0x44438b0e); // urshl z14.h, p2/m, z14.h, z24.h + // vl128 state = 0xc04356eb + __ dci(0x44438986); // urshl z6.h, p2/m, z6.h, z12.h + // vl128 state = 0x0e2e6682 + __ dci(0x444389e4); // urshl z4.h, p2/m, z4.h, z15.h + // vl128 state = 0xbb28cacd + __ dci(0x444391f4); // urshl z20.h, p4/m, z20.h, z15.h + // vl128 state = 0x5349f37a + __ dci(0x444391f6); // urshl z22.h, p4/m, z22.h, z15.h + // vl128 state = 0x99e66890 + __ dci(0x44c39177); // urshl z23.d, p4/m, z23.d, z11.d + // vl128 state = 0x2d48a891 + __ dci(0x44c79573); // urshlr z19.d, p5/m, z19.d, z11.d + // vl128 state = 0xd26e94f9 + __ dci(0x04c79d63); // uqshl z3.d, p7/m, z3.d, #43 + // vl128 state = 0x54801050 + __ dci(0x04c78c67); // uqshl z7.d, p3/m, z7.d, #35 + // vl128 state = 0xde9f357a + __ dci(0x04878c43); // uqshl z3.d, p3/m, z3.d, #2 + // vl128 state = 0x59e5d53c + __ dci(0x44878c0b); // urshlr z11.s, p3/m, z11.s, z0.s + // vl128 state = 0x8cfa7532 + __ dci(0x44878c03); // urshlr z3.s, p3/m, z3.s, z0.s + // vl128 state = 0xdb4e86b6 + __ dci(0x44878d42); // urshlr z2.s, p3/m, z2.s, z10.s + // vl128 state = 0x07467a7c + __ dci(0x44878d4a); // urshlr z10.s, p3/m, z10.s, z10.s + // vl128 state = 0x6a4ad81c + __ dci(0x44879948); // urshlr z8.s, p6/m, z8.s, z10.s + // vl128 state = 0x91d7bdc0 + __ dci(0x44879949); // urshlr z9.s, p6/m, z9.s, z10.s + // vl128 state = 0x2fe3b819 + __ dci(0x44879bcb); // urshlr z11.s, p6/m, z11.s, z30.s + // vl128 state = 0x5c121b68 + __ dci(0x04879b4f); // uqshl z15.d, p6/m, z15.d, #26 + // vl128 state = 0xe678f4f7 + __ dci(0x44879bdf); // urshlr z31.s, p6/m, z31.s, z30.s + // vl128 state = 0x6593da76 + __ dci(0x4487935e); // urshlr z30.s, p4/m, z30.s, z26.s + // vl128 state = 0xb558ba57 + __ dci(0x440f9356); // uqrshlr z22.b, p4/m, z22.b, z26.b + // vl128 state = 0x45d1775e + __ dci(0x440f93f7); // uqrshlr z23.b, p4/m, z23.b, z31.b + // vl128 state = 0x20974795 + __ dci(0x448793f5); // urshlr z21.s, p4/m, z21.s, z31.s + // vl128 state = 0xeb0bc2ab + __ dci(0x448383fd); // urshl z29.s, p0/m, z29.s, z31.s + // vl128 state = 0x74557d81 + __ dci(0x448b82f9); // uqrshl z25.s, p0/m, z25.s, z23.s + // vl128 state = 0x34518418 + __ dci(0x448f82b8); // uqrshlr z24.s, p0/m, z24.s, z21.s + // vl128 state = 0x93e637f3 + __ dci(0x448f82bc); // uqrshlr z28.s, p0/m, z28.s, z21.s + // vl128 state = 0x6e35e56a + __ dci(0x448f83fe); // uqrshlr z30.s, p0/m, z30.s, z31.s + // vl128 state = 0xf3c59bb1 + __ dci(0x448d83ae); // uqshlr z14.s, p0/m, z14.s, z29.s + // vl128 state = 0x95b401a3 + __ dci(0x448d83aa); // uqshlr z10.s, p0/m, z10.s, z29.s + // vl128 state = 0x56ec65b0 + __ dci(0x448993ae); // uqshl z14.s, p4/m, z14.s, z29.s + // vl128 state = 0x28f6e4c6 + __ dci(0x448993a6); // uqshl z6.s, p4/m, z6.s, z29.s + // vl128 state = 0x9ed5eaf3 + __ dci(0x44c991a4); // uqshl z4.d, p4/m, z4.d, z13.d + // vl128 state = 0xa8512b00 + __ dci(0x44c991a5); // uqshl z5.d, p4/m, z5.d, z13.d + // vl128 state = 0x49a10780 + __ dci(0x44c991a1); // uqshl z1.d, p4/m, z1.d, z13.d + // vl128 state = 0x465a2cb4 + __ dci(0x444b91a0); // uqrshl z0.h, p4/m, z0.h, z13.h + // vl128 state = 0x8f6dad8e + __ dci(0x444b91a1); // uqrshl z1.h, p4/m, z1.h, z13.h + // vl128 state = 0x50dec3f8 + __ dci(0x440391a3); // urshl z3.b, p4/m, z3.b, z13.b + // vl128 state = 0xab2b5ad7 + __ dci(0x448393a7); // urshl z7.s, p4/m, z7.s, z29.s + // vl128 state = 0x2ffd164f + __ dci(0x448393af); // urshl z15.s, p4/m, z15.s, z29.s + // vl128 state = 0x43a7959b + __ dci(0x448393ab); // urshl z11.s, p4/m, z11.s, z29.s + // vl128 state = 0xf9526723 + __ dci(0x448f93af); // uqrshlr z15.s, p4/m, z15.s, z29.s + // vl128 state = 0xf9081b27 + __ dci(0x448f93ae); // uqrshlr z14.s, p4/m, z14.s, z29.s + // vl128 state = 0x3a4f693e + __ dci(0x048793aa); // uqshl z10.d, p4/m, z10.d, #29 + // vl128 state = 0xbba37d9a + __ dci(0x04c79388); // uqshl z8.d, p4/m, z8.d, #60 + // vl128 state = 0x3b3f5fa4 + __ dci(0x04c79380); // uqshl z0.d, p4/m, z0.d, #60 + // vl128 state = 0xdac48ac2 + __ dci(0x04878390); // uqshl z16.d, p0/m, z16.d, #28 + // vl128 state = 0xe3c8148f + __ dci(0x44878794); // urshlr z20.s, p1/m, z20.s, z28.s + // vl128 state = 0xee2179ec + __ dci(0x04878384); // uqshl z4.d, p0/m, z4.d, #28 + // vl128 state = 0xc6a3796c + __ dci(0x048787ac); // uqshl z12.d, p1/m, z12.d, #29 + // vl128 state = 0x18e0fd43 + __ dci(0x04c786ae); // uqshl z14.d, p1/m, z14.d, #53 + // vl128 state = 0x9292503e + __ dci(0x04c786be); // uqshl z30.d, p1/m, z30.d, #53 + // vl128 state = 0xc1ebe042 + __ dci(0x44c782b6); // urshlr z22.d, p0/m, z22.d, z21.d + // vl128 state = 0x0badc025 + __ dci(0x44c78a3e); // urshlr z30.d, p2/m, z30.d, z17.d + // vl128 state = 0x51b3b5ac + __ dci(0x04c78b3a); // uqshl z26.d, p2/m, z26.d, #57 + // vl128 state = 0x334f52f8 + __ dci(0x04c78832); // uqshl z18.d, p2/m, z18.d, #33 + // vl128 state = 0xf95df0b7 + __ dci(0x44cf8833); // uqrshlr z19.d, p2/m, z19.d, z1.d + // vl128 state = 0xda88a00a + __ dci(0x44cf9811); // uqrshlr z17.d, p6/m, z17.d, z0.d + // vl128 state = 0x1e642a4c + __ dci(0x44cf9c41); // uqrshlr z1.d, p7/m, z1.d, z2.d + // vl128 state = 0xeb7fe4bd + __ dci(0x444f8c45); // uqrshlr z5.h, p3/m, z5.h, z2.h + // vl128 state = 0x5a82d833 + __ dci(0x44cf844d); // uqrshlr z13.d, p1/m, z13.d, z2.d + // vl128 state = 0x595d42a4 + __ dci(0x44c7841d); // urshlr z29.d, p1/m, z29.d, z0.d + // vl128 state = 0x0b433688 + __ dci(0x44c7805f); // urshlr z31.d, p0/m, z31.d, z2.d + // vl128 state = 0x14b8c29a + __ dci(0x44cf807b); // uqrshlr z27.d, p0/m, z27.d, z3.d + // vl128 state = 0x12a76015 + __ dci(0x44c780eb); // urshlr z11.d, p0/m, z11.d, z7.d + // vl128 state = 0x73fa7d24 + __ dci(0x44c794e3); // urshlr z3.d, p5/m, z3.d, z7.d + // vl128 state = 0x0a01c859 + __ dci(0x04c795eb); // uqshl z11.d, p5/m, z11.d, #47 + // vl128 state = 0x0e7024fd + __ dci(0x04c795e9); // uqshl z9.d, p5/m, z9.d, #47 + // vl128 state = 0x9ca5cb63 + __ dci(0x04c795f9); // uqshl z25.d, p5/m, z25.d, #47 + // vl128 state = 0x4c60da07 + __ dci(0x04c795fb); // uqshl z27.d, p5/m, z27.d, #47 + // vl128 state = 0x71114c19 + __ dci(0x04c799f3); // uqshl z19.d, p6/m, z19.d, #47 + // vl128 state = 0x32d71e12 + __ dci(0x04c79997); // uqshl z23.d, p6/m, z23.d, #44 + // vl128 state = 0xab0c9051 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xab0c9051, + 0xc2455013, + 0x6e4b3f1e, + 0x631ce7ed, + 0x031e4f7f, + 0xa2be23bd, + 0x2f5f74b0, + 0x9e60f1ea, + 0xb1080595, + 0x953020c9, + 0x7a5bfffb, + 0xf0a27817, + 0x83904886, + 0x04620572, + 0xbcd5c8c9, + 0x3d4abe12, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_signed_sat_round_shift) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 100 * kInstructionSize); + __ dci(0x048687c6); // sqshl z6.d, p1/m, z6.d, #30 + // vl128 state = 0xe81d8487 + __ dci(0x048687c4); // sqshl z4.d, p1/m, z4.d, #30 + // vl128 state = 0x47cc69b1 + __ dci(0x04868385); // sqshl z5.d, p0/m, z5.d, #28 + // vl128 state = 0xec4cab7b + __ dci(0x0486838d); // sqshl z13.d, p0/m, z13.d, #28 + // vl128 state = 0x23b07ac8 + __ dci(0x048681a9); // sqshl z9.d, p0/m, z9.d, #13 + // vl128 state = 0xace4253d + __ dci(0x04068139); // sqshl z25.b, p0/m, z25.b, #1 + // vl128 state = 0xf8f14a80 + __ dci(0x440681b8); // srshlr z24.b, p0/m, z24.b, z13.b + // vl128 state = 0xa79d8fc1 + __ dci(0x4406803a); // srshlr z26.b, p0/m, z26.b, z1.b + // vl128 state = 0xed9bb777 + __ dci(0x4406808a); // srshlr z10.b, p0/m, z10.b, z4.b + // vl128 state = 0xbd1dfa2f + __ dci(0x440688da); // srshlr z26.b, p2/m, z26.b, z6.b + // vl128 state = 0x8f9b61e6 + __ dci(0x448680db); // srshlr z27.s, p0/m, z27.s, z6.s + // vl128 state = 0x0a16f551 + __ dci(0x440684d3); // srshlr z19.b, p1/m, z19.b, z6.b + // vl128 state = 0x0a764f12 + __ dci(0x448694c3); // srshlr z3.s, p5/m, z3.s, z6.s + // vl128 state = 0x8d6f5613 + __ dci(0x448e9cc7); // sqrshlr z7.s, p7/m, z7.s, z6.s + // vl128 state = 0xaf7b559b + __ dci(0x448e9ef7); // sqrshlr z23.s, p7/m, z23.s, z23.s + // vl128 state = 0x086d6430 + __ dci(0x448e9673); // sqrshlr z19.s, p5/m, z19.s, z19.s + // vl128 state = 0x4a9a5736 + __ dci(0x448a8663); // sqrshl z3.s, p1/m, z3.s, z19.s + // vl128 state = 0x19adf50e + __ dci(0x440a8e6b); // sqrshl z11.b, p3/m, z11.b, z19.b + // vl128 state = 0x4a01719c + __ dci(0x44028eef); // srshl z15.b, p3/m, z15.b, z23.b + // vl128 state = 0x1af6d72e + __ dci(0x44028e8b); // srshl z11.b, p3/m, z11.b, z20.b + // vl128 state = 0xeca2061d + __ dci(0x44828f8f); // srshl z15.s, p3/m, z15.s, z28.s + // vl128 state = 0x61059832 + __ dci(0x44828f87); // srshl z7.s, p3/m, z7.s, z28.s + // vl128 state = 0x5e4d94cc + __ dci(0x44828a97); // srshl z23.s, p2/m, z23.s, z20.s + // vl128 state = 0xf5095aa8 + __ dci(0x44828a93); // srshl z19.s, p2/m, z19.s, z20.s + // vl128 state = 0x155ff234 + __ dci(0x44868a11); // srshlr z17.s, p2/m, z17.s, z16.s + // vl128 state = 0xf2844c7f + __ dci(0x44c68a90); // srshlr z16.d, p2/m, z16.d, z20.d + // vl128 state = 0xcf9f9508 + __ dci(0x44c68a80); // srshlr z0.d, p2/m, z0.d, z20.d + // vl128 state = 0xd476915b + __ dci(0x44868a02); // srshlr z2.s, p2/m, z2.s, z16.s + // vl128 state = 0x9acbc986 + __ dci(0x44868a12); // srshlr z18.s, p2/m, z18.s, z16.s + // vl128 state = 0xaf9e1114 + __ dci(0x4486921a); // srshlr z26.s, p4/m, z26.s, z16.s + // vl128 state = 0x9d188add + __ dci(0x4486909e); // srshlr z30.s, p4/m, z30.s, z4.s + // vl128 state = 0xb41018d5 + __ dci(0x448c9096); // sqshlr z22.s, p4/m, z22.s, z4.s + // vl128 state = 0x4ab51dea + __ dci(0x448890b4); // sqshl z20.s, p4/m, z20.s, z5.s + // vl128 state = 0x600dcc36 + __ dci(0x448884bc); // sqshl z28.s, p1/m, z28.s, z5.s + // vl128 state = 0x84f37050 + __ dci(0x44c88434); // sqshl z20.d, p1/m, z20.d, z1.d + // vl128 state = 0x1f19ce5a + __ dci(0x44cc8536); // sqshlr z22.d, p1/m, z22.d, z9.d + // vl128 state = 0xa51d3f31 + __ dci(0x448c8517); // sqshlr z23.s, p1/m, z23.s, z8.s + // vl128 state = 0x8d431292 + __ dci(0x448c8133); // sqshlr z19.s, p0/m, z19.s, z9.s + // vl128 state = 0xdd59917f + __ dci(0x448c8b23); // sqshlr z3.s, p2/m, z3.s, z25.s + // vl128 state = 0xfcdae7d4 + __ dci(0x448c8b21); // sqshlr z1.s, p2/m, z1.s, z25.s + // vl128 state = 0x0f1239a5 + __ dci(0x448c8b29); // sqshlr z9.s, p2/m, z9.s, z25.s + // vl128 state = 0xf6d1f180 + __ dci(0x448c8b2b); // sqshlr z11.s, p2/m, z11.s, z25.s + // vl128 state = 0xe7a1af08 + __ dci(0x448c8b89); // sqshlr z9.s, p2/m, z9.s, z28.s + // vl128 state = 0xa72666cb + __ dci(0x448c9bcb); // sqshlr z11.s, p6/m, z11.s, z30.s + // vl128 state = 0x9cae5fd7 + __ dci(0x44869bca); // srshlr z10.s, p6/m, z10.s, z30.s + // vl128 state = 0xda133b76 + __ dci(0x04869b8e); // sqshl z14.d, p6/m, z14.d, #28 + // vl128 state = 0xf8eb71c2 + __ dci(0x44869bca); // srshlr z10.s, p6/m, z10.s, z30.s + // vl128 state = 0xbe561563 + __ dci(0x44869ae2); // srshlr z2.s, p6/m, z2.s, z23.s + // vl128 state = 0x0c286f7e + __ dci(0x44869a46); // srshlr z6.s, p6/m, z6.s, z18.s + // vl128 state = 0x59da6464 + __ dci(0x44869a47); // srshlr z7.s, p6/m, z7.s, z18.s + // vl128 state = 0x908e5664 + __ dci(0x4486920f); // srshlr z15.s, p4/m, z15.s, z16.s + // vl128 state = 0x213d23db + __ dci(0x44869a87); // srshlr z7.s, p6/m, z7.s, z20.s + // vl128 state = 0xd81ea7fb + __ dci(0x44469a86); // srshlr z6.h, p6/m, z6.h, z20.h + // vl128 state = 0x27d44726 + __ dci(0x44029a82); // srshl z2.b, p6/m, z2.b, z20.b + // vl128 state = 0x2187127f + __ dci(0x44069aa0); // srshlr z0.b, p6/m, z0.b, z21.b + // vl128 state = 0x68ba9323 + __ dci(0x444692b0); // srshlr z16.h, p4/m, z16.h, z21.h + // vl128 state = 0x148619ff + __ dci(0x44468ab2); // srshlr z18.h, p2/m, z18.h, z21.h + // vl128 state = 0xae93eae6 + __ dci(0x444698b6); // srshlr z22.h, p6/m, z22.h, z5.h + // vl128 state = 0x0b875035 + __ dci(0x44469934); // srshlr z20.h, p6/m, z20.h, z9.h + // vl128 state = 0x559132ed + __ dci(0x0406993c); // sqshl z28.b, p6/m, z28.b, #1 + // vl128 state = 0xec1782e4 + __ dci(0x4406912c); // srshlr z12.b, p4/m, z12.b, z9.b + // vl128 state = 0x089d32a4 + __ dci(0x440291ae); // srshl z14.b, p4/m, z14.b, z13.b + // vl128 state = 0xde257893 + __ dci(0x44829126); // srshl z6.s, p4/m, z6.s, z9.s + // vl128 state = 0x318d27ef + __ dci(0x448a8127); // sqrshl z7.s, p0/m, z7.s, z9.s + // vl128 state = 0x1bc564fc + __ dci(0x448e8165); // sqrshlr z5.s, p0/m, z5.s, z11.s + // vl128 state = 0xa5e5c696 + __ dci(0x44869161); // srshlr z1.s, p4/m, z1.s, z11.s + // vl128 state = 0xd64b6830 + __ dci(0x44829120); // srshl z0.s, p4/m, z0.s, z9.s + // vl128 state = 0x107ca84d + __ dci(0x44829124); // srshl z4.s, p4/m, z4.s, z9.s + // vl128 state = 0xcd5688f3 + __ dci(0x4482912c); // srshl z12.s, p4/m, z12.s, z9.s + // vl128 state = 0x88dee210 + __ dci(0x44829128); // srshl z8.s, p4/m, z8.s, z9.s + // vl128 state = 0xfe8611fa + __ dci(0x44c69120); // srshlr z0.d, p4/m, z0.d, z9.d + // vl128 state = 0xe8b8cabd + __ dci(0x44ce9168); // sqrshlr z8.d, p4/m, z8.d, z11.d + // vl128 state = 0x269af804 + __ dci(0x448e9069); // sqrshlr z9.s, p4/m, z9.s, z3.s + // vl128 state = 0x7d425704 + __ dci(0x448e8461); // sqrshlr z1.s, p1/m, z1.s, z3.s + // vl128 state = 0x1577bd67 + __ dci(0x448e8460); // sqrshlr z0.s, p1/m, z0.s, z3.s + // vl128 state = 0x6966617f + __ dci(0x448a8428); // sqrshl z8.s, p1/m, z8.s, z1.s + // vl128 state = 0x6c9cc508 + __ dci(0x44ca8409); // sqrshl z9.d, p1/m, z9.d, z0.d + // vl128 state = 0xb3ea2e65 + __ dci(0x44c68408); // srshlr z8.d, p1/m, z8.d, z0.d + // vl128 state = 0x1aef7620 + __ dci(0x44c6840a); // srshlr z10.d, p1/m, z10.d, z0.d + // vl128 state = 0x63f2c5a3 + __ dci(0x44cc840e); // sqshlr z14.d, p1/m, z14.d, z0.d + // vl128 state = 0xb54a8f94 + __ dci(0x44cc8e1e); // sqshlr z30.d, p3/m, z30.d, z16.d + // vl128 state = 0xe247e0a3 + __ dci(0x44c68e1a); // srshlr z26.d, p3/m, z26.d, z16.d + // vl128 state = 0xfb8bf060 + __ dci(0x44c28a0a); // srshl z10.d, p2/m, z10.d, z16.d + // vl128 state = 0x829643e3 + __ dci(0x44c68e0e); // srshlr z14.d, p3/m, z14.d, z16.d + // vl128 state = 0x8bd62d7b + __ dci(0x44c6881e); // srshlr z30.d, p2/m, z30.d, z0.d + // vl128 state = 0x4d8caca2 + __ dci(0x44869816); // srshlr z22.s, p6/m, z22.s, z0.s + // vl128 state = 0x027f41ac + __ dci(0x44029817); // srshl z23.b, p6/m, z23.b, z0.b + // vl128 state = 0xab9c9627 + __ dci(0x4402993f); // srshl z31.b, p6/m, z31.b, z9.b + // vl128 state = 0x42a71056 + __ dci(0x4406991e); // srshlr z30.b, p6/m, z30.b, z8.b + // vl128 state = 0xdcdf1396 + __ dci(0x44068d1f); // srshlr z31.b, p3/m, z31.b, z8.b + // vl128 state = 0x84fa5cac + __ dci(0x44068d1d); // srshlr z29.b, p3/m, z29.b, z8.b + // vl128 state = 0x1239cdae + __ dci(0x44468d2d); // srshlr z13.h, p3/m, z13.h, z9.h + // vl128 state = 0xae689b2f + __ dci(0x4446850f); // srshlr z15.h, p1/m, z15.h, z8.h + // vl128 state = 0x6330c9c2 + __ dci(0x4446910e); // srshlr z14.h, p4/m, z14.h, z8.h + // vl128 state = 0x326ffb9f + __ dci(0x4446940f); // srshlr z15.h, p5/m, z15.h, z0.h + // vl128 state = 0x3f48f466 + __ dci(0x44468487); // srshlr z7.h, p1/m, z7.h, z4.h + // vl128 state = 0x0d3b6c65 + __ dci(0x444694b7); // srshlr z23.h, p5/m, z23.h, z5.h + // vl128 state = 0x5ef21cd8 + __ dci(0x44469c93); // srshlr z19.h, p7/m, z19.h, z4.h + // vl128 state = 0x413d5573 + __ dci(0x44069e92); // srshlr z18.b, p7/m, z18.b, z20.b + // vl128 state = 0xac59d0c3 + __ dci(0x44469693); // srshlr z19.h, p5/m, z19.h, z20.h + // vl128 state = 0xb3969968 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xb3969968, + 0x8ba60941, + 0x53937d52, + 0xe6737b5d, + 0x8649cf1f, + 0xb7ee12ca, + 0x6fd03bd4, + 0x4a82eb52, + 0xc0d52997, + 0xb52a263f, + 0x70599fa2, + 0x68cd2ef1, + 0x57b84410, + 0x1072dde9, + 0xe39a23c8, + 0xeded9f88, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_usra) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x450ce41d); // usra z29.b, z0.b, #4 + // vl128 state = 0x57e84943 + __ dci(0x450ce635); // usra z21.b, z17.b, #4 + // vl128 state = 0xc2696a7c + __ dci(0x45cce637); // usra z23.d, z17.d, #20 + // vl128 state = 0x97aec47c + __ dci(0x458cee35); // ursra z21.d, z17.d, #52 + // vl128 state = 0xab24864c + __ dci(0x450eee25); // ursra z5.b, z17.b, #2 + // vl128 state = 0x8aab49c9 + __ dci(0x458eef21); // ursra z1.d, z25.d, #50 + // vl128 state = 0x3db09e7f + __ dci(0x458fef65); // ursra z5.d, z27.d, #49 + // vl128 state = 0xa9905ae3 + __ dci(0x459fef41); // ursra z1.d, z26.d, #33 + // vl128 state = 0x624c2e4d + __ dci(0x459fe549); // usra z9.d, z10.d, #33 + // vl128 state = 0x5a158f70 + __ dci(0x459de561); // usra z1.d, z11.d, #35 + // vl128 state = 0xf24ffa83 + __ dci(0x451ce565); // usra z5.h, z11.h, #4 + // vl128 state = 0x0213f9c7 + __ dci(0x4519e564); // usra z4.h, z11.h, #7 + // vl128 state = 0x8903ccf3 + __ dci(0x4589e56c); // usra z12.d, z11.d, #55 + // vl128 state = 0x3c0f6e72 + __ dci(0x4589e56e); // usra z14.d, z11.d, #55 + // vl128 state = 0x5d9787fc + __ dci(0x4589e56c); // usra z12.d, z11.d, #55 + // vl128 state = 0x3bc6fced + __ dci(0x458bed64); // ursra z4.d, z11.d, #53 + // vl128 state = 0x966476e2 + __ dci(0x45dbed65); // ursra z5.d, z11.d, #5 + // vl128 state = 0xf85c4247 + __ dci(0x455bedf5); // ursra z21.s, z15.s, #5 + // vl128 state = 0xd342f9ae + __ dci(0x450bedfd); // ursra z29.b, z15.b, #5 + // vl128 state = 0xc03cb476 + __ dci(0x4549edf9); // ursra z25.s, z15.s, #23 + // vl128 state = 0x5649b073 + __ dci(0x4549ede9); // ursra z9.s, z15.s, #23 + // vl128 state = 0xce5a7dbb + __ dci(0x4549ed59); // ursra z25.s, z10.s, #23 + // vl128 state = 0x8c98ee08 + __ dci(0x4549ed5d); // ursra z29.s, z10.s, #23 + // vl128 state = 0xd991a574 + __ dci(0x45cded59); // ursra z25.d, z10.d, #19 + // vl128 state = 0xebc24746 + __ dci(0x45d9ed58); // ursra z24.d, z10.d, #7 + // vl128 state = 0x145d5970 + __ dci(0x45d8ec50); // ursra z16.d, z2.d, #8 + // vl128 state = 0x8f65850c + __ dci(0x45c8ec60); // ursra z0.d, z3.d, #24 + // vl128 state = 0xe510a1b4 + __ dci(0x45c0ed61); // ursra z1.d, z11.d, #32 + // vl128 state = 0xfef468e1 + __ dci(0x45c8ec65); // ursra z5.d, z3.d, #24 + // vl128 state = 0xa6754589 + __ dci(0x45c0e464); // usra z4.d, z3.d, #32 + // vl128 state = 0x2b4cd23a + __ dci(0x45c0e4a5); // usra z5.d, z5.d, #32 + // vl128 state = 0xfa58fea0 + __ dci(0x45c0e4a1); // usra z1.d, z5.d, #32 + // vl128 state = 0x015c4435 + __ dci(0x45c0e4b1); // usra z17.d, z5.d, #32 + // vl128 state = 0x67271050 + __ dci(0x45c2ecb3); // ursra z19.d, z5.d, #30 + // vl128 state = 0x1d3631c3 + __ dci(0x45c0ece3); // ursra z3.d, z7.d, #32 + // vl128 state = 0x646e0e43 + __ dci(0x45caece7); // ursra z7.d, z7.d, #22 + // vl128 state = 0x104bf393 + __ dci(0x458aeee3); // ursra z3.d, z23.d, #54 + // vl128 state = 0xbac8c54b + __ dci(0x454aeee1); // ursra z1.s, z23.s, #22 + // vl128 state = 0x5c2a40db + __ dci(0x4508eee9); // ursra z9.b, z23.b, #8 + // vl128 state = 0xe117d81a + __ dci(0x4518ece1); // ursra z1.h, z7.h, #8 + // vl128 state = 0xeb43265d + __ dci(0x451cede0); // ursra z0.h, z15.h, #4 + // vl128 state = 0xd5c8d09e + __ dci(0x4598edf0); // ursra z16.d, z15.d, #40 + // vl128 state = 0x0c060220 + __ dci(0x451cede0); // ursra z0.h, z15.h, #4 + // vl128 state = 0x0ea52d2d + __ dci(0x459cefe8); // ursra z8.d, z31.d, #36 + // vl128 state = 0xa6a7e977 + __ dci(0x459ce5f8); // usra z24.d, z15.d, #36 + // vl128 state = 0xb0192caf + __ dci(0x458cedfa); // ursra z26.d, z15.d, #52 + // vl128 state = 0x154fce29 + __ dci(0x458cedfe); // ursra z30.d, z15.d, #52 + // vl128 state = 0x369cc3e1 + __ dci(0x450cedb6); // ursra z22.b, z13.b, #4 + // vl128 state = 0xf613cb4b + __ dci(0x450cedb4); // ursra z20.b, z13.b, #4 + // vl128 state = 0xd075c8a9 + __ dci(0x458eeda4); // ursra z4.d, z13.d, #50 + // vl128 state = 0xc9366682 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xc9366682, + 0xaf202cff, + 0x0e90a7c4, + 0xa8c89f40, + 0xc7bb56ad, + 0xa203dd34, + 0xf3b3a749, + 0xf16c9d5f, + 0x9929dea8, + 0xd652c693, + 0xe76f701b, + 0xe2fe20a3, + 0x07182afb, + 0x816b928f, + 0x52baf33f, + 0x9ef46875, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_ssra) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x450ce01d); // ssra z29.b, z0.b, #4 + // vl128 state = 0xdf461c2b + __ dci(0x450ce235); // ssra z21.b, z17.b, #4 + // vl128 state = 0xd28868a9 + __ dci(0x45cce237); // ssra z23.d, z17.d, #20 + // vl128 state = 0x874fc6a9 + __ dci(0x458cea35); // srsra z21.d, z17.d, #52 + // vl128 state = 0xb848785b + __ dci(0x450eea25); // srsra z5.b, z17.b, #2 + // vl128 state = 0x8bca62e4 + __ dci(0x458eeb21); // srsra z1.d, z25.d, #50 + // vl128 state = 0x3cd1b552 + __ dci(0x458feb65); // srsra z5.d, z27.d, #49 + // vl128 state = 0xd78844fb + __ dci(0x459feb41); // srsra z1.d, z26.d, #33 + // vl128 state = 0xa948dc2f + __ dci(0x459fe149); // ssra z9.d, z10.d, #33 + // vl128 state = 0x709a83f1 + __ dci(0x459de161); // ssra z1.d, z11.d, #35 + // vl128 state = 0x1c21e4f6 + __ dci(0x451ce165); // ssra z5.h, z11.h, #4 + // vl128 state = 0x72288f41 + __ dci(0x4519e164); // ssra z4.h, z11.h, #7 + // vl128 state = 0x9a8c4c8c + __ dci(0x4589e16c); // ssra z12.d, z11.d, #55 + // vl128 state = 0x872585d4 + __ dci(0x4589e16e); // ssra z14.d, z11.d, #55 + // vl128 state = 0xd237aaa0 + __ dci(0x4589e16c); // ssra z12.d, z11.d, #55 + // vl128 state = 0x1c828333 + __ dci(0x458be964); // srsra z4.d, z11.d, #53 + // vl128 state = 0xc190178f + __ dci(0x45dbe965); // srsra z5.d, z11.d, #5 + // vl128 state = 0xe9e81bda + __ dci(0x455be9f5); // srsra z21.s, z15.s, #5 + // vl128 state = 0x8e58c7a1 + __ dci(0x450be9fd); // srsra z29.b, z15.b, #5 + // vl128 state = 0x904b404b + __ dci(0x4549e9f9); // srsra z25.s, z15.s, #23 + // vl128 state = 0x35a60481 + __ dci(0x4549e9e9); // srsra z9.s, z15.s, #23 + // vl128 state = 0x6911448b + __ dci(0x4549e959); // srsra z25.s, z10.s, #23 + // vl128 state = 0xdb384324 + __ dci(0x4549e95d); // srsra z29.s, z10.s, #23 + // vl128 state = 0x16acd8ee + __ dci(0x45cde959); // srsra z25.d, z10.d, #19 + // vl128 state = 0x56bf7bda + __ dci(0x45d9e958); // srsra z24.d, z10.d, #7 + // vl128 state = 0x6a713fa6 + __ dci(0x45d8e850); // srsra z16.d, z2.d, #8 + // vl128 state = 0xa6394cf3 + __ dci(0x45c8e860); // srsra z0.d, z3.d, #24 + // vl128 state = 0x829c3d2a + __ dci(0x45c0e961); // srsra z1.d, z11.d, #32 + // vl128 state = 0x006d1904 + __ dci(0x45c8e865); // srsra z5.d, z3.d, #24 + // vl128 state = 0xcc7dffaf + __ dci(0x45c0e064); // ssra z4.d, z3.d, #32 + // vl128 state = 0xc9eaddd0 + __ dci(0x45c0e0a5); // ssra z5.d, z5.d, #32 + // vl128 state = 0x643145e1 + __ dci(0x45c0e0a1); // ssra z1.d, z5.d, #32 + // vl128 state = 0x03f4c42e + __ dci(0x45c0e0b1); // ssra z17.d, z5.d, #32 + // vl128 state = 0x5a8cff35 + __ dci(0x45c2e8b3); // srsra z19.d, z5.d, #30 + // vl128 state = 0x3ee63e9f + __ dci(0x45c0e8e3); // srsra z3.d, z7.d, #32 + // vl128 state = 0x687d943b + __ dci(0x45cae8e7); // srsra z7.d, z7.d, #22 + // vl128 state = 0xf5a19cb2 + __ dci(0x458aeae3); // srsra z3.d, z23.d, #54 + // vl128 state = 0xd1371248 + __ dci(0x454aeae1); // srsra z1.s, z23.s, #22 + // vl128 state = 0xdb83ef8b + __ dci(0x455ae8e9); // srsra z9.s, z7.s, #6 + // vl128 state = 0xc831a54c + __ dci(0x455ee9e8); // srsra z8.s, z15.s, #2 + // vl128 state = 0x4342b823 + __ dci(0x45dae9f8); // srsra z24.d, z15.d, #6 + // vl128 state = 0x52a7151a + __ dci(0x455ee9e8); // srsra z8.s, z15.s, #2 + // vl128 state = 0xde8110e0 + __ dci(0x45deebe0); // srsra z0.d, z31.d, #2 + // vl128 state = 0xd2b28e81 + __ dci(0x45dee1f0); // ssra z16.d, z15.d, #2 + // vl128 state = 0x56d1c366 + __ dci(0x45cee9f2); // srsra z18.d, z15.d, #18 + // vl128 state = 0x53537689 + __ dci(0x45cee9f6); // srsra z22.d, z15.d, #18 + // vl128 state = 0x5e410508 + __ dci(0x454ee9be); // srsra z30.s, z13.s, #18 + // vl128 state = 0x06245094 + __ dci(0x454ee9bc); // srsra z28.s, z13.s, #18 + // vl128 state = 0xb92b3929 + __ dci(0x45cce9ac); // srsra z12.d, z13.d, #20 + // vl128 state = 0xfe6a2830 + __ dci(0x45cde93c); // srsra z28.d, z9.d, #19 + // vl128 state = 0x737461a1 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x737461a1, + 0xe1ef707c, + 0x9760ba4e, + 0x782dd4cd, + 0xe793d0c2, + 0x991e0de7, + 0x34627e21, + 0x76c89433, + 0x96c9f4ce, + 0x38ec4b6f, + 0x7aee3ec7, + 0x665f9b94, + 0x8e166fc3, + 0xb4461fac, + 0x215de9dc, + 0xc23ef1f9, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_sat_arith) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x44df9df9); // uqsubr z25.d, p7/m, z25.d, z15.d + // vl128 state = 0x7670ac87 + __ dci(0x445f9db1); // uqsubr z17.h, p7/m, z17.h, z13.h + // vl128 state = 0x3c5b39fe + __ dci(0x441f99a1); // uqsubr z1.b, p6/m, z1.b, z13.b + // vl128 state = 0x5df43635 + __ dci(0x441d9ba0); // usqadd z0.b, p6/m, z0.b, z29.b + // vl128 state = 0x737bc7a5 + __ dci(0x441d9ba8); // usqadd z8.b, p6/m, z8.b, z29.b + // vl128 state = 0xba69890b + __ dci(0x441d9bb8); // usqadd z24.b, p6/m, z24.b, z29.b + // vl128 state = 0x3f81c19d + __ dci(0x441d8b30); // usqadd z16.b, p2/m, z16.b, z25.b + // vl128 state = 0x076c5fc1 + __ dci(0x441d8a14); // usqadd z20.b, p2/m, z20.b, z16.b + // vl128 state = 0x67df29dd + __ dci(0x449d8215); // usqadd z21.s, p0/m, z21.s, z16.s + // vl128 state = 0x663b236f + __ dci(0x449d8205); // usqadd z5.s, p0/m, z5.s, z16.s + // vl128 state = 0xe58d41d0 + __ dci(0x449d8201); // usqadd z1.s, p0/m, z1.s, z16.s + // vl128 state = 0x82f89d40 + __ dci(0x449c8a09); // suqadd z9.s, p2/m, z9.s, z16.s + // vl128 state = 0xa0218390 + __ dci(0x44dd8a0d); // usqadd z13.d, p2/m, z13.d, z16.d + // vl128 state = 0xfab22f04 + __ dci(0x44d98a2c); // uqadd z12.d, p2/m, z12.d, z17.d + // vl128 state = 0x70911fc9 + __ dci(0x44598a0d); // uqadd z13.h, p2/m, z13.h, z16.h + // vl128 state = 0xcc12ec49 + __ dci(0x44d99a05); // uqadd z5.d, p6/m, z5.d, z16.d + // vl128 state = 0x31fef46f + __ dci(0x44d99004); // uqadd z4.d, p4/m, z4.d, z0.d + // vl128 state = 0xf81448db + __ dci(0x44d98020); // uqadd z0.d, p0/m, z0.d, z1.d + // vl128 state = 0xe6fe9d31 + __ dci(0x44d980e1); // uqadd z1.d, p0/m, z1.d, z7.d + // vl128 state = 0x76fecfc2 + __ dci(0x44d981c0); // uqadd z0.d, p0/m, z0.d, z14.d + // vl128 state = 0x4066a558 + __ dci(0x44d98161); // uqadd z1.d, p0/m, z1.d, z11.d + // vl128 state = 0x0d3a1487 + __ dci(0x44d98031); // uqadd z17.d, p0/m, z17.d, z1.d + // vl128 state = 0x061b4aed + __ dci(0x44d98039); // uqadd z25.d, p0/m, z25.d, z1.d + // vl128 state = 0x02172a17 + __ dci(0x44d98029); // uqadd z9.d, p0/m, z9.d, z1.d + // vl128 state = 0xebe138b3 + __ dci(0x44d8800d); // sqadd z13.d, p0/m, z13.d, z0.d + // vl128 state = 0x73f0114b + __ dci(0x44d8828f); // sqadd z15.d, p0/m, z15.d, z20.d + // vl128 state = 0x7a8689e0 + __ dci(0x44d8829f); // sqadd z31.d, p0/m, z31.d, z20.d + // vl128 state = 0x0800ae49 + __ dci(0x44d88e8f); // sqadd z15.d, p3/m, z15.d, z20.d + // vl128 state = 0x9b733fff + __ dci(0x44d88e8b); // sqadd z11.d, p3/m, z11.d, z20.d + // vl128 state = 0x6d01eb90 + __ dci(0x44d88e8f); // sqadd z15.d, p3/m, z15.d, z20.d + // vl128 state = 0x337692b3 + __ dci(0x44d8968e); // sqadd z14.d, p5/m, z14.d, z20.d + // vl128 state = 0xcd4478b6 + __ dci(0x44d886ca); // sqadd z10.d, p1/m, z10.d, z22.d + // vl128 state = 0x335fd099 + __ dci(0x44dc87ce); // suqadd z14.d, p1/m, z14.d, z30.d + // vl128 state = 0x0d3b6403 + __ dci(0x44de8fcf); // sqsubr z15.d, p3/m, z15.d, z30.d + // vl128 state = 0x41a1073f + __ dci(0x449e9fcd); // sqsubr z13.s, p7/m, z13.s, z30.s + // vl128 state = 0x5a4b1c22 + __ dci(0x445e9fcf); // sqsubr z15.h, p7/m, z15.h, z30.h + // vl128 state = 0x5a08ccf1 + __ dci(0x441e9ece); // sqsubr z14.b, p7/m, z14.b, z22.b + // vl128 state = 0x3f3c700c + __ dci(0x441e8cde); // sqsubr z30.b, p3/m, z30.b, z6.b + // vl128 state = 0x3b32b296 + __ dci(0x441e88fa); // sqsubr z26.b, p2/m, z26.b, z7.b + // vl128 state = 0x7a6472e3 + __ dci(0x441f98f8); // uqsubr z24.b, p6/m, z24.b, z7.b + // vl128 state = 0x1d72f5ea + __ dci(0x441f98fc); // uqsubr z28.b, p6/m, z28.b, z7.b + // vl128 state = 0x0245804b + __ dci(0x441b9afe); // uqsub z30.b, p6/m, z30.b, z23.b + // vl128 state = 0x8c7ac3d7 + __ dci(0x441b9afc); // uqsub z28.b, p6/m, z28.b, z23.b + // vl128 state = 0xa96d65cb + __ dci(0x449b9a74); // uqsub z20.s, p6/m, z20.s, z19.s + // vl128 state = 0x261eb58f + __ dci(0x449a9b75); // sqsub z21.s, p6/m, z21.s, z27.s + // vl128 state = 0x3464e3e5 + __ dci(0x449a9b7d); // sqsub z29.s, p6/m, z29.s, z27.s + // vl128 state = 0xfe3ab427 + __ dci(0x445a9b79); // sqsub z25.h, p6/m, z25.h, z27.h + // vl128 state = 0x609eef3a + __ dci(0x445a9b7d); // sqsub z29.h, p6/m, z29.h, z27.h + // vl128 state = 0x0e6d6940 + __ dci(0x445e9b5f); // sqsubr z31.h, p6/m, z31.h, z26.h + // vl128 state = 0x60a375e7 + __ dci(0x441e8b5b); // sqsubr z27.b, p2/m, z27.b, z26.b + // vl128 state = 0xea9bd16f + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xea9bd16f, + 0x1296119e, + 0x00aaf6dc, + 0xb6ce0579, + 0xdb3d0829, + 0x119f52d0, + 0xf697dcd8, + 0x2c46a66c, + 0x7d838497, + 0x6cd68fb3, + 0xf98a5c79, + 0x51685054, + 0xa9494104, + 0x8d012936, + 0x32726258, + 0x091f1956, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_pair_arith) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 64 * kInstructionSize); + __ dci(0x4414b214); // smaxp z20.b, p4/m, z20.b, z16.b + // vl128 state = 0x90adc6c9 + __ dci(0x4414ba5c); // smaxp z28.b, p6/m, z28.b, z18.b + // vl128 state = 0x0e41b2b9 + __ dci(0x4454ba0c); // smaxp z12.h, p6/m, z12.h, z16.h + // vl128 state = 0x472160b8 + __ dci(0x4454ba64); // smaxp z4.h, p6/m, z4.h, z19.h + // vl128 state = 0x4f485ba3 + __ dci(0x44d4bb65); // smaxp z5.d, p6/m, z5.d, z27.d + // vl128 state = 0x432f5185 + __ dci(0x4456bb64); // sminp z4.h, p6/m, z4.h, z27.h + // vl128 state = 0x01bd324a + __ dci(0x4455bb74); // umaxp z20.h, p6/m, z20.h, z27.h + // vl128 state = 0xaf795389 + __ dci(0x4451bb35); // addp z21.h, p6/m, z21.h, z25.h + // vl128 state = 0x5f4be111 + __ dci(0x4451ab71); // addp z17.h, p2/m, z17.h, z27.h + // vl128 state = 0xc16a8d03 + __ dci(0x4451ba75); // addp z21.h, p6/m, z21.h, z19.h + // vl128 state = 0x8cd36853 + __ dci(0x4451b225); // addp z5.h, p4/m, z5.h, z17.h + // vl128 state = 0xea3d5389 + __ dci(0x4455b627); // umaxp z7.h, p5/m, z7.h, z17.h + // vl128 state = 0xbb42a8e1 + __ dci(0x4415b426); // umaxp z6.b, p5/m, z6.b, z1.b + // vl128 state = 0x485ca761 + __ dci(0x4415b224); // umaxp z4.b, p4/m, z4.b, z17.b + // vl128 state = 0x6bcfd641 + __ dci(0x4455b02c); // umaxp z12.h, p4/m, z12.h, z1.h + // vl128 state = 0x84485a9f + __ dci(0x4455a12d); // umaxp z13.h, p0/m, z13.h, z9.h + // vl128 state = 0xed43519f + __ dci(0x4455b33d); // umaxp z29.h, p4/m, z29.h, z25.h + // vl128 state = 0xcc0b7c40 + __ dci(0x4455b7b9); // umaxp z25.h, p5/m, z25.h, z29.h + // vl128 state = 0xe1c14517 + __ dci(0x4454b6b8); // smaxp z24.h, p5/m, z24.h, z21.h + // vl128 state = 0x4c5e9f3c + __ dci(0x44d4b4bc); // smaxp z28.d, p5/m, z28.d, z5.d + // vl128 state = 0x7530a2f7 + __ dci(0x44d4b4bd); // smaxp z29.d, p5/m, z29.d, z5.d + // vl128 state = 0x37e61b68 + __ dci(0x44d4b5ed); // smaxp z13.d, p5/m, z13.d, z15.d + // vl128 state = 0xb592b6e9 + __ dci(0x4455b5fd); // umaxp z29.h, p5/m, z29.h, z15.h + // vl128 state = 0xe7f9e492 + __ dci(0x4415b57f); // umaxp z31.b, p5/m, z31.b, z11.b + // vl128 state = 0xe4e7b644 + __ dci(0x4411b5fe); // addp z30.b, p5/m, z30.b, z15.b + // vl128 state = 0x4bfe144d + __ dci(0x4411a576); // addp z22.b, p1/m, z22.b, z11.b + // vl128 state = 0xb1813df8 + __ dci(0x4455a566); // umaxp z6.h, p1/m, z6.h, z11.h + // vl128 state = 0x4aa8b50e + __ dci(0x4455adf6); // umaxp z22.h, p3/m, z22.h, z15.h + // vl128 state = 0xfc13568a + __ dci(0x4454acfe); // smaxp z30.h, p3/m, z30.h, z7.h + // vl128 state = 0x3aac7365 + __ dci(0x4454acff); // smaxp z31.h, p3/m, z31.h, z7.h + // vl128 state = 0x610991cf + __ dci(0x44d4a8fb); // smaxp z27.d, p2/m, z27.d, z7.d + // vl128 state = 0x36581f26 + __ dci(0x4456a8f3); // sminp z19.h, p2/m, z19.h, z7.h + // vl128 state = 0x249bb813 + __ dci(0x4457a8b1); // uminp z17.h, p2/m, z17.h, z5.h + // vl128 state = 0xd48d6d88 + __ dci(0x4457a8b5); // uminp z21.h, p2/m, z21.h, z5.h + // vl128 state = 0x1628fb6e + __ dci(0x4456a8f7); // sminp z23.h, p2/m, z23.h, z7.h + // vl128 state = 0x0bd3c76b + __ dci(0x4456a89f); // sminp z31.h, p2/m, z31.h, z4.h + // vl128 state = 0xf09d21e4 + __ dci(0x4456aa0f); // sminp z15.h, p2/m, z15.h, z16.h + // vl128 state = 0xd2a92168 + __ dci(0x4456b807); // sminp z7.h, p6/m, z7.h, z0.h + // vl128 state = 0x009d0ac8 + __ dci(0x4456bc26); // sminp z6.h, p7/m, z6.h, z1.h + // vl128 state = 0x716ddc73 + __ dci(0x4456beae); // sminp z14.h, p7/m, z14.h, z21.h + // vl128 state = 0x35a4d900 + __ dci(0x4416b6ac); // sminp z12.b, p5/m, z12.b, z21.b + // vl128 state = 0x7929e077 + __ dci(0x4416b6bc); // sminp z28.b, p5/m, z28.b, z21.b + // vl128 state = 0x259195ca + __ dci(0x4417b694); // uminp z20.b, p5/m, z20.b, z20.b + // vl128 state = 0x5cc3927b + __ dci(0x4417b684); // uminp z4.b, p5/m, z4.b, z20.b + // vl128 state = 0x2e7c4b88 + __ dci(0x4415b6a0); // umaxp z0.b, p5/m, z0.b, z21.b + // vl128 state = 0x1478d524 + __ dci(0x4415a690); // umaxp z16.b, p1/m, z16.b, z20.b + // vl128 state = 0xc3ac4a89 + __ dci(0x4415b614); // umaxp z20.b, p5/m, z20.b, z16.b + // vl128 state = 0xb94a5aeb + __ dci(0x4415b675); // umaxp z21.b, p5/m, z21.b, z19.b + // vl128 state = 0xabeed92b + __ dci(0x4415a63d); // umaxp z29.b, p1/m, z29.b, z17.b + // vl128 state = 0xe36835ea + __ dci(0x4415a63c); // umaxp z28.b, p1/m, z28.b, z17.b + // vl128 state = 0x087002bb + __ dci(0x4455a61d); // umaxp z29.h, p1/m, z29.h, z16.h + // vl128 state = 0x17388ea4 + __ dci(0x4451ae1f); // addp z31.h, p3/m, z31.h, z16.h + // vl128 state = 0x86ee7dbe + __ dci(0x4451ae1b); // addp z27.h, p3/m, z27.h, z16.h + // vl128 state = 0x9846169e + __ dci(0x4451bc0b); // addp z11.h, p7/m, z11.h, z0.h + // vl128 state = 0x5dc31eb0 + __ dci(0x4455bc4f); // umaxp z15.h, p7/m, z15.h, z2.h + // vl128 state = 0x9ec9086c + __ dci(0x4455bf47); // umaxp z7.h, p7/m, z7.h, z26.h + // vl128 state = 0xf3a2766b + __ dci(0x44d5b743); // umaxp z3.d, p5/m, z3.d, z26.d + // vl128 state = 0x1ce44f7e + __ dci(0x44d5b7e2); // umaxp z2.d, p5/m, z2.d, z31.d + // vl128 state = 0xf121f7c0 + __ dci(0x44d5b7e0); // umaxp z0.d, p5/m, z0.d, z31.d + // vl128 state = 0x4ac0d4f3 + __ dci(0x44d5b670); // umaxp z16.d, p5/m, z16.d, z19.d + // vl128 state = 0xdb0d62f5 + __ dci(0x44d1b272); // addp z18.d, p4/m, z18.d, z19.d + // vl128 state = 0x34b0c018 + __ dci(0x44d1be76); // addp z22.d, p7/m, z22.d, z19.d + // vl128 state = 0x1673f380 + __ dci(0x44d1b772); // addp z18.d, p5/m, z18.d, z27.d + // vl128 state = 0xe3e67205 + __ dci(0x44d1b162); // addp z2.d, p4/m, z2.d, z11.d + // vl128 state = 0x42907adc + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x42907adc, + 0xee2f21f5, + 0xcbfa0af4, + 0x42e7c862, + 0x10ef537f, + 0x83461e96, + 0x2dca0c37, + 0xf2080504, + 0xf615d956, + 0x1732775a, + 0x491fec07, + 0xf9e33ada, + 0x324435d7, + 0x08a9c2ca, + 0x87ce3994, + 0x338adb5d, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_extract_narrow) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 64 * kInstructionSize); + __ dci(0x45284000); // sqxtnb z0.b, z0.h + // vl128 state = 0x874f147b + __ dci(0x45284228); // sqxtnb z8.b, z17.h + // vl128 state = 0xf694d31e + __ dci(0x45284820); // uqxtnb z0.b, z1.h + // vl128 state = 0x5d25df42 + __ dci(0x45304821); // uqxtnb z1.h, z1.s + // vl128 state = 0x87eb933f + __ dci(0x45304823); // uqxtnb z3.h, z1.s + // vl128 state = 0x137eddc9 + __ dci(0x45604822); // uqxtnb z2.s, z1.d + // vl128 state = 0x26e237a3 + __ dci(0x45604d26); // uqxtnt z6.s, z9.d + // vl128 state = 0x72bcf361 + __ dci(0x45304d2e); // uqxtnt z14.h, z9.s + // vl128 state = 0x5bcdd232 + __ dci(0x45304d3e); // uqxtnt z30.h, z9.s + // vl128 state = 0x9a695f7e + __ dci(0x453049bc); // uqxtnb z28.h, z13.s + // vl128 state = 0x9c2fa230 + __ dci(0x453049b8); // uqxtnb z24.h, z13.s + // vl128 state = 0xb590179f + __ dci(0x45304979); // uqxtnb z25.h, z11.s + // vl128 state = 0xc8987735 + __ dci(0x4530497d); // uqxtnb z29.h, z11.s + // vl128 state = 0x380f8730 + __ dci(0x4530496d); // uqxtnb z13.h, z11.s + // vl128 state = 0x45bf22d4 + __ dci(0x45304565); // sqxtnt z5.h, z11.s + // vl128 state = 0xd9237f41 + __ dci(0x45304f75); // uqxtnt z21.h, z27.s + // vl128 state = 0x0726a49b + __ dci(0x45304f71); // uqxtnt z17.h, z27.s + // vl128 state = 0xcbc547e0 + __ dci(0x45304f73); // uqxtnt z19.h, z27.s + // vl128 state = 0x0b16d843 + __ dci(0x45284f72); // uqxtnt z18.b, z27.h + // vl128 state = 0xea84ff1f + __ dci(0x45284f7a); // uqxtnt z26.b, z27.h + // vl128 state = 0x4bdb094d + __ dci(0x45284fca); // uqxtnt z10.b, z30.h + // vl128 state = 0x5986f190 + __ dci(0x45284b8b); // uqxtnb z11.b, z28.h + // vl128 state = 0xb40f0b26 + __ dci(0x45284bef); // uqxtnb z15.b, z31.h + // vl128 state = 0x7abef2b5 + __ dci(0x45284fae); // uqxtnt z14.b, z29.h + // vl128 state = 0x79503b36 + __ dci(0x45284fac); // uqxtnt z12.b, z29.h + // vl128 state = 0x481a6879 + __ dci(0x45284eed); // uqxtnt z13.b, z23.h + // vl128 state = 0x32da844c + __ dci(0x45284ee9); // uqxtnt z9.b, z23.h + // vl128 state = 0xb8438ca7 + __ dci(0x45284ef9); // uqxtnt z25.b, z23.h + // vl128 state = 0x4aa26674 + __ dci(0x45284cd1); // uqxtnt z17.b, z6.h + // vl128 state = 0xc5411d78 + __ dci(0x45284cd5); // uqxtnt z21.b, z6.h + // vl128 state = 0xee446689 + __ dci(0x45284ad4); // uqxtnb z20.b, z22.h + // vl128 state = 0x66ef53ef + __ dci(0x45604adc); // uqxtnb z28.s, z22.d + // vl128 state = 0xa894f4d4 + __ dci(0x45604ade); // uqxtnb z30.s, z22.d + // vl128 state = 0x50215eb8 + __ dci(0x456040dc); // sqxtnb z28.s, z6.d + // vl128 state = 0x5ee8464d + __ dci(0x456048f4); // uqxtnb z20.s, z7.d + // vl128 state = 0xee2ca07b + __ dci(0x45604c75); // uqxtnt z21.s, z3.d + // vl128 state = 0x0e81e7e0 + __ dci(0x45604cb1); // uqxtnt z17.s, z5.d + // vl128 state = 0x5c448cac + __ dci(0x45604e33); // uqxtnt z19.s, z17.d + // vl128 state = 0xcd0d561e + __ dci(0x45604e23); // uqxtnt z3.s, z17.d + // vl128 state = 0x7b8b2204 + __ dci(0x45604cab); // uqxtnt z11.s, z5.d + // vl128 state = 0x418cec7f + __ dci(0x45604caa); // uqxtnt z10.s, z5.d + // vl128 state = 0x37064bb6 + __ dci(0x45604efa); // uqxtnt z26.s, z23.d + // vl128 state = 0xc83ef05d + __ dci(0x456046db); // sqxtnt z27.s, z22.d + // vl128 state = 0xe30a1f0f + __ dci(0x456046da); // sqxtnt z26.s, z22.d + // vl128 state = 0xe10b92fa + __ dci(0x4560424a); // sqxtnb z10.s, z18.d + // vl128 state = 0x2396410c + __ dci(0x45604a08); // uqxtnb z8.s, z16.d + // vl128 state = 0xf4ae5ad5 + __ dci(0x45304a00); // uqxtnb z0.h, z16.s + // vl128 state = 0x26bbb3d1 + __ dci(0x45304828); // uqxtnb z8.h, z1.s + // vl128 state = 0x57d91166 + __ dci(0x4530422c); // sqxtnb z12.h, z17.s + // vl128 state = 0x5548e0b4 + __ dci(0x45305324); // sqxtunb z4.h, z25.s + // vl128 state = 0xf7eb8d9c + __ dci(0x45305325); // sqxtunb z5.h, z25.s + // vl128 state = 0xcf294303 + __ dci(0x45305321); // sqxtunb z1.h, z25.s + // vl128 state = 0x6c7597d6 + __ dci(0x453057a9); // sqxtunt z9.h, z29.s + // vl128 state = 0xe7be4fd5 + __ dci(0x453043b9); // sqxtnb z25.h, z29.s + // vl128 state = 0x376f3f76 + __ dci(0x453043bb); // sqxtnb z27.h, z29.s + // vl128 state = 0xf8389159 + __ dci(0x4530431a); // sqxtnb z26.h, z24.s + // vl128 state = 0x8ca15413 + __ dci(0x45304312); // sqxtnb z18.h, z24.s + // vl128 state = 0x2a6d8b90 + __ dci(0x4530491a); // uqxtnb z26.h, z8.s + // vl128 state = 0x7119ff0d + __ dci(0x4530413b); // sqxtnb z27.h, z9.s + // vl128 state = 0x884748db + __ dci(0x4530482b); // uqxtnb z11.h, z1.s + // vl128 state = 0x43296aec + __ dci(0x4530483b); // uqxtnb z27.h, z1.s + // vl128 state = 0xdb9908f0 + __ dci(0x45304979); // uqxtnb z25.h, z11.s + // vl128 state = 0xef30bfc8 + __ dci(0x453049d1); // uqxtnb z17.h, z14.s + // vl128 state = 0xb46173d8 + __ dci(0x456049d3); // uqxtnb z19.s, z14.d + // vl128 state = 0xcb8c3b83 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xcb8c3b83, + 0x92fb7f98, + 0xb7ec6385, + 0x81de8602, + 0xd970d431, + 0x2fe61431, + 0x359b1355, + 0xdeec900e, + 0xfd0c7d7d, + 0x62e89b19, + 0x43039424, + 0xdd42efc9, + 0x861010f1, + 0x82d68f37, + 0x3761a1d0, + 0xbcf3c5c9, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_eorbt_eortb) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x451892b8); // eorbt z24.b, z21.b, z24.b + // vl128 state = 0xc3f2b082 + __ dci(0x455893ba); // eorbt z26.h, z29.h, z24.h + // vl128 state = 0xc7421198 + __ dci(0x455892f8); // eorbt z24.h, z23.h, z24.h + // vl128 state = 0x4e155b96 + __ dci(0x455092bc); // eorbt z28.h, z21.h, z16.h + // vl128 state = 0x09393ad0 + __ dci(0x455893be); // eorbt z30.h, z29.h, z24.h + // vl128 state = 0x6d660844 + __ dci(0x4558922e); // eorbt z14.h, z17.h, z24.h + // vl128 state = 0x84f1ff20 + __ dci(0x45d892aa); // eorbt z10.d, z21.d, z24.d + // vl128 state = 0x568612d4 + __ dci(0x454892a8); // eorbt z8.h, z21.h, z8.h + // vl128 state = 0x699a3e24 + __ dci(0x45c890ac); // eorbt z12.d, z5.d, z8.d + // vl128 state = 0x17bb6d9b + __ dci(0x45c990ed); // eorbt z13.d, z7.d, z9.d + // vl128 state = 0xee5be73f + __ dci(0x45c892fd); // eorbt z29.d, z23.d, z8.d + // vl128 state = 0x141c47ed + __ dci(0x45c892f9); // eorbt z25.d, z23.d, z8.d + // vl128 state = 0xc3259593 + __ dci(0x45c892f8); // eorbt z24.d, z23.d, z8.d + // vl128 state = 0x3bca0bcc + __ dci(0x45c892e8); // eorbt z8.d, z23.d, z8.d + // vl128 state = 0x4714ab64 + __ dci(0x454a92ea); // eorbt z10.h, z23.h, z10.h + // vl128 state = 0x51360c73 + __ dci(0x454092e2); // eorbt z2.h, z23.h, z0.h + // vl128 state = 0xe33859fe + __ dci(0x454092f2); // eorbt z18.h, z23.h, z0.h + // vl128 state = 0xa0d81168 + __ dci(0x4550927a); // eorbt z26.h, z19.h, z16.h + // vl128 state = 0xe4983274 + __ dci(0x4551923b); // eorbt z27.h, z17.h, z17.h + // vl128 state = 0x8e89eab7 + __ dci(0x45d3923f); // eorbt z31.d, z17.d, z19.d + // vl128 state = 0x472bd288 + __ dci(0x4553921d); // eorbt z29.h, z16.h, z19.h + // vl128 state = 0x61090ed4 + __ dci(0x4553932d); // eorbt z13.h, z25.h, z19.h + // vl128 state = 0x3ef228eb + __ dci(0x4513912c); // eorbt z12.b, z9.b, z19.b + // vl128 state = 0x96d4505c + __ dci(0x4551912d); // eorbt z13.h, z9.h, z17.h + // vl128 state = 0x1c32baef + __ dci(0x45119029); // eorbt z9.b, z1.b, z17.b + // vl128 state = 0xa138f554 + __ dci(0x45149028); // eorbt z8.b, z1.b, z20.b + // vl128 state = 0xf0681d9a + __ dci(0x459490aa); // eorbt z10.s, z5.s, z20.s + // vl128 state = 0xbd4b30f5 + __ dci(0x458590a8); // eorbt z8.s, z5.s, z5.s + // vl128 state = 0x45c5b437 + __ dci(0x4585948c); // eortb z12.s, z4.s, z5.s + // vl128 state = 0x22f90a7b + __ dci(0x45cd949c); // eortb z28.d, z4.d, z13.d + // vl128 state = 0x5e4584ca + __ dci(0x4589949d); // eortb z29.s, z4.s, z9.s + // vl128 state = 0x65ac913e + __ dci(0x458990ad); // eorbt z13.s, z5.s, z9.s + // vl128 state = 0x4f13d973 + __ dci(0x459b90ac); // eorbt z12.s, z5.s, z27.s + // vl128 state = 0xd13bb801 + __ dci(0x45db90ee); // eorbt z14.d, z7.d, z27.d + // vl128 state = 0xf24115d0 + __ dci(0x45db916f); // eorbt z15.d, z11.d, z27.d + // vl128 state = 0x04f38375 + __ dci(0x45db95e7); // eortb z7.d, z15.d, z27.d + // vl128 state = 0xe1046ae5 + __ dci(0x45db94a3); // eortb z3.d, z5.d, z27.d + // vl128 state = 0xaaeae67e + __ dci(0x45dd94a1); // eortb z1.d, z5.d, z29.d + // vl128 state = 0xd67f6823 + __ dci(0x45dd94b1); // eortb z17.d, z5.d, z29.d + // vl128 state = 0xf172245b + __ dci(0x45dd90f3); // eorbt z19.d, z7.d, z29.d + // vl128 state = 0xc99195b8 + __ dci(0x458d90e3); // eorbt z3.s, z7.s, z13.s + // vl128 state = 0xe1a146cf + __ dci(0x458994e2); // eortb z2.s, z7.s, z9.s + // vl128 state = 0x8038f273 + __ dci(0x458b94a3); // eortb z3.s, z5.s, z11.s + // vl128 state = 0x50bda372 + __ dci(0x459b9481); // eortb z1.s, z4.s, z27.s + // vl128 state = 0xe8d53012 + __ dci(0x455b9485); // eortb z5.h, z4.h, z27.h + // vl128 state = 0xdba33ea5 + __ dci(0x454b9087); // eorbt z7.h, z4.h, z11.h + // vl128 state = 0xff7f1815 + __ dci(0x45499003); // eorbt z3.h, z0.h, z9.h + // vl128 state = 0x5d6e0104 + __ dci(0x454d9022); // eorbt z2.h, z1.h, z13.h + // vl128 state = 0xe9161cfe + __ dci(0x45099026); // eorbt z6.b, z1.b, z9.b + // vl128 state = 0x48126fb9 + __ dci(0x454b9024); // eorbt z4.h, z1.h, z11.h + // vl128 state = 0x53cbfc46 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x53cbfc46, + 0x0f81a01e, + 0xf97c4e96, + 0x745e9ed6, + 0x4487a0a1, + 0x7ad79509, + 0x53577280, + 0x1e589717, + 0xaaa96af0, + 0x4f2b0884, + 0x24d2cd1c, + 0x4d89438d, + 0x9b327a12, + 0xeabfd558, + 0xb63e33f1, + 0xebd7d9ca, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_saturating_multiply_add_high_vector) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 40 * kInstructionSize); + __ dci(0x44d9721a); // sqrdmlah z26.d, z16.d, z25.d + // vl128 state = 0xc0474f3f + __ dci(0x44dd761b); // sqrdmlsh z27.d, z16.d, z29.d + // vl128 state = 0x102712ac + __ dci(0x44d4760b); // sqrdmlsh z11.d, z16.d, z20.d + // vl128 state = 0xe8666aa6 + __ dci(0x44947709); // sqrdmlsh z9.s, z24.s, z20.s + // vl128 state = 0xdd18f643 + __ dci(0x4494770b); // sqrdmlsh z11.s, z24.s, z20.s + // vl128 state = 0xac4a4d4c + __ dci(0x44d4773b); // sqrdmlsh z27.d, z25.d, z20.d + // vl128 state = 0x1a5447d4 + __ dci(0x44dc7639); // sqrdmlsh z25.d, z17.d, z28.d + // vl128 state = 0xf547ac30 + __ dci(0x44dc763b); // sqrdmlsh z27.d, z17.d, z28.d + // vl128 state = 0xb42d177a + __ dci(0x44d4743f); // sqrdmlsh z31.d, z1.d, z20.d + // vl128 state = 0xd0da2c6b + __ dci(0x449c742f); // sqrdmlsh z15.s, z1.s, z28.s + // vl128 state = 0xb24c8988 + __ dci(0x449c7487); // sqrdmlsh z7.s, z4.s, z28.s + // vl128 state = 0x9e67ddac + __ dci(0x449c7485); // sqrdmlsh z5.s, z4.s, z28.s + // vl128 state = 0xd96b34e2 + __ dci(0x448e7481); // sqrdmlsh z1.s, z4.s, z14.s + // vl128 state = 0x81d91007 + __ dci(0x448e7480); // sqrdmlsh z0.s, z4.s, z14.s + // vl128 state = 0x901fa692 + __ dci(0x449c7488); // sqrdmlsh z8.s, z4.s, z28.s + // vl128 state = 0xeedceee6 + __ dci(0x441c758a); // sqrdmlsh z10.b, z12.b, z28.b + // vl128 state = 0x8dc4d389 + __ dci(0x441475ae); // sqrdmlsh z14.b, z13.b, z20.b + // vl128 state = 0xb1711932 + __ dci(0x440075ac); // sqrdmlsh z12.b, z13.b, z0.b + // vl128 state = 0x8cacf188 + __ dci(0x440171bc); // sqrdmlah z28.b, z13.b, z1.b + // vl128 state = 0x9c8b9f4f + __ dci(0x440171b8); // sqrdmlah z24.b, z13.b, z1.b + // vl128 state = 0x562ebefa + __ dci(0x441971b9); // sqrdmlah z25.b, z13.b, z25.b + // vl128 state = 0x1ef60d31 + __ dci(0x440970bb); // sqrdmlah z27.b, z5.b, z9.b + // vl128 state = 0x69bd18ee + __ dci(0x441870ba); // sqrdmlah z26.b, z5.b, z24.b + // vl128 state = 0x525b1f84 + __ dci(0x441270b8); // sqrdmlah z24.b, z5.b, z18.b + // vl128 state = 0x3c7dadd8 + __ dci(0x44927090); // sqrdmlah z16.s, z4.s, z18.s + // vl128 state = 0x276f0567 + __ dci(0x44937292); // sqrdmlah z18.s, z20.s, z19.s + // vl128 state = 0x6f0f8bb4 + __ dci(0x4491721a); // sqrdmlah z26.s, z16.s, z17.s + // vl128 state = 0x28eb737a + __ dci(0x44d3721b); // sqrdmlah z27.d, z16.d, z19.d + // vl128 state = 0xa3bd1133 + __ dci(0x44d372ab); // sqrdmlah z11.d, z21.d, z19.d + // vl128 state = 0x6e81e8fd + __ dci(0x44d372a3); // sqrdmlah z3.d, z21.d, z19.d + // vl128 state = 0x55730750 + __ dci(0x445376a1); // sqrdmlsh z1.h, z21.h, z19.h + // vl128 state = 0x7c7afd6d + __ dci(0x44527685); // sqrdmlsh z5.h, z20.h, z18.h + // vl128 state = 0x1c9dc1a1 + __ dci(0x44127495); // sqrdmlsh z21.b, z4.b, z18.b + // vl128 state = 0xf2e07e92 + __ dci(0x44127794); // sqrdmlsh z20.b, z28.b, z18.b + // vl128 state = 0xc5a2e589 + __ dci(0x44527695); // sqrdmlsh z21.h, z20.h, z18.h + // vl128 state = 0x417df395 + __ dci(0x445274dd); // sqrdmlsh z29.h, z6.h, z18.h + // vl128 state = 0x2e223308 + __ dci(0x445774df); // sqrdmlsh z31.h, z6.h, z23.h + // vl128 state = 0x99047839 + __ dci(0x445775fe); // sqrdmlsh z30.h, z15.h, z23.h + // vl128 state = 0x34a4be39 + __ dci(0x445175ff); // sqrdmlsh z31.h, z15.h, z17.h + // vl128 state = 0x714b9d66 + __ dci(0x44517557); // sqrdmlsh z23.h, z10.h, z17.h + // vl128 state = 0x2aa51ff4 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x2aa51ff4, + 0xde163ba0, + 0x8b237661, + 0x30086cf2, + 0xabf248f0, + 0xcc183608, + 0xa4103141, + 0x521ebe39, + 0xd746470e, + 0x141a51a4, + 0x695a47fd, + 0x0a74d701, + 0xd14bae63, + 0xf967aadb, + 0xdaed8896, + 0x7ba556cb, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_integer_pairwise_add_accumulate_long) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 40 * kInstructionSize); + __ dci(0x4445b4e3); // uadalp z3.h, p5/m, z7.b + // vl128 state = 0x3ad015af + __ dci(0x4445b4e1); // uadalp z1.h, p5/m, z7.b + // vl128 state = 0x3f53978b + __ dci(0x4445bc65); // uadalp z5.h, p7/m, z3.b + // vl128 state = 0xf3340744 + __ dci(0x4445be35); // uadalp z21.h, p7/m, z17.b + // vl128 state = 0xb6f81377 + __ dci(0x4445be9d); // uadalp z29.h, p7/m, z20.b + // vl128 state = 0xaf772b37 + __ dci(0x4444bc9c); // sadalp z28.h, p7/m, z4.b + // vl128 state = 0x591be304 + __ dci(0x4444bc9d); // sadalp z29.h, p7/m, z4.b + // vl128 state = 0x406d9d34 + __ dci(0x4444ba99); // sadalp z25.h, p6/m, z20.b + // vl128 state = 0xb455880f + __ dci(0x44c4ba09); // sadalp z9.d, p6/m, z16.s + // vl128 state = 0x5ef8e2ed + __ dci(0x44c4ba01); // sadalp z1.d, p6/m, z16.s + // vl128 state = 0xca2ccf0d + __ dci(0x44c4ba11); // sadalp z17.d, p6/m, z16.s + // vl128 state = 0x33bb9903 + __ dci(0x4484bb15); // sadalp z21.s, p6/m, z24.h + // vl128 state = 0x3964a356 + __ dci(0x4484b957); // sadalp z23.s, p6/m, z10.h + // vl128 state = 0x1e1426d2 + __ dci(0x4484b953); // sadalp z19.s, p6/m, z10.h + // vl128 state = 0x83e2e1a6 + __ dci(0x4484b943); // sadalp z3.s, p6/m, z10.h + // vl128 state = 0x24335149 + __ dci(0x4484b102); // sadalp z2.s, p4/m, z8.h + // vl128 state = 0x8bde109a + __ dci(0x4484bd06); // sadalp z6.s, p7/m, z8.h + // vl128 state = 0x5abf30eb + __ dci(0x4484bdc2); // sadalp z2.s, p7/m, z14.h + // vl128 state = 0xcb199381 + __ dci(0x4485b5c6); // uadalp z6.s, p5/m, z14.h + // vl128 state = 0x5f3819ad + __ dci(0x4485b5c2); // uadalp z2.s, p5/m, z14.h + // vl128 state = 0x5f6d69e4 + __ dci(0x4485b5ca); // uadalp z10.s, p5/m, z14.h + // vl128 state = 0x1a0d7053 + __ dci(0x4485b15a); // uadalp z26.s, p4/m, z10.h + // vl128 state = 0x9081b6cd + __ dci(0x44c5b95e); // uadalp z30.d, p6/m, z10.s + // vl128 state = 0x6b15107e + __ dci(0x44c5a14e); // uadalp z14.d, p0/m, z10.s + // vl128 state = 0x4a127dc2 + __ dci(0x4445a1c6); // uadalp z6.h, p0/m, z14.b + // vl128 state = 0x06902399 + __ dci(0x4445a1ce); // uadalp z14.h, p0/m, z14.b + // vl128 state = 0x1789be4a + __ dci(0x4444a9de); // sadalp z30.h, p2/m, z14.b + // vl128 state = 0x86732543 + __ dci(0x4444adff); // sadalp z31.h, p3/m, z15.b + // vl128 state = 0xe326faef + __ dci(0x4444bdb7); // sadalp z23.h, p7/m, z13.b + // vl128 state = 0x46d5f328 + __ dci(0x4444bda7); // sadalp z7.h, p7/m, z13.b + // vl128 state = 0x5cf7a973 + __ dci(0x4445bd25); // uadalp z5.h, p7/m, z9.b + // vl128 state = 0xdf8cbb97 + __ dci(0x4485bd35); // uadalp z21.s, p7/m, z9.h + // vl128 state = 0x330c3d35 + __ dci(0x4485bc17); // uadalp z23.s, p7/m, z0.h + // vl128 state = 0x6ebfa4fe + __ dci(0x4485bc15); // uadalp z21.s, p7/m, z0.h + // vl128 state = 0x52f18385 + __ dci(0x4485be91); // uadalp z17.s, p7/m, z20.h + // vl128 state = 0x82fa2d85 + __ dci(0x4485be53); // uadalp z19.s, p7/m, z18.h + // vl128 state = 0xa7d6098b + __ dci(0x4485aa52); // uadalp z18.s, p2/m, z18.h + // vl128 state = 0xfe8faafa + __ dci(0x4485ae13); // uadalp z19.s, p3/m, z16.h + // vl128 state = 0xf2465f31 + __ dci(0x4485b617); // uadalp z23.s, p5/m, z16.h + // vl128 state = 0xed6be8ed + __ dci(0x4485bc13); // uadalp z19.s, p7/m, z0.h + // vl128 state = 0xb2f95c3d + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xb2f95c3d, + 0xa4189170, + 0xed9e7f9e, + 0xfca732cb, + 0x4c94b2d7, + 0x92a2fb21, + 0xbca62a5c, + 0x9aec54d6, + 0x8df82b02, + 0x50c18764, + 0xd27e5a0e, + 0x1a538cc6, + 0x538b673e, + 0x37e4b499, + 0x7160cbd5, + 0x113951bc, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_pmul_mul_vector_unpredicated) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 30 * kInstructionSize); + __ dci(0x04a56309); // mul z9.s, z24.s, z5.s + // vl128 state = 0x0ef461d5 + __ dci(0x04a56148); // mul z8.s, z10.s, z5.s + // vl128 state = 0xce9f1381 + __ dci(0x04a161d8); // mul z24.s, z14.s, z1.s + // vl128 state = 0x2a14ff8c + __ dci(0x04a16179); // mul z25.s, z11.s, z1.s + // vl128 state = 0x88a0241b + __ dci(0x04b36171); // mul z17.s, z11.s, z19.s + // vl128 state = 0x23aea8a6 + __ dci(0x04fb6170); // mul z16.d, z11.d, z27.d + // vl128 state = 0x58eaa46d + __ dci(0x04fb6171); // mul z17.d, z11.d, z27.d + // vl128 state = 0xc733a399 + __ dci(0x04fb6350); // mul z16.d, z26.d, z27.d + // vl128 state = 0x2806af41 + __ dci(0x04eb6372); // mul z18.d, z27.d, z11.d + // vl128 state = 0x5ec775d1 + __ dci(0x04eb6376); // mul z22.d, z27.d, z11.d + // vl128 state = 0x40d03f0d + __ dci(0x04ed637e); // mul z30.d, z27.d, z13.d + // vl128 state = 0xe3a61d56 + __ dci(0x04e8637f); // mul z31.d, z27.d, z8.d + // vl128 state = 0x2eb4313f + __ dci(0x04a86337); // mul z23.s, z25.s, z8.s + // vl128 state = 0xc68e329e + __ dci(0x04a86336); // mul z22.s, z25.s, z8.s + // vl128 state = 0x177b1a43 + __ dci(0x04ac63be); // mul z30.s, z29.s, z12.s + // vl128 state = 0xaaa415dd + __ dci(0x04ac63d6); // mul z22.s, z30.s, z12.s + // vl128 state = 0xaeb212b8 + __ dci(0x042c67d2); // pmul z18.b, z30.b, z12.b + // vl128 state = 0xa11be1c8 + __ dci(0x042c65f3); // pmul z19.b, z15.b, z12.b + // vl128 state = 0x8dd03a21 + __ dci(0x042e65d2); // pmul z18.b, z14.b, z14.b + // vl128 state = 0x83ef9a66 + __ dci(0x042f6550); // pmul z16.b, z10.b, z15.b + // vl128 state = 0x6a495368 + __ dci(0x042e6754); // pmul z20.b, z26.b, z14.b + // vl128 state = 0x0b6c3ccf + __ dci(0x042e6750); // pmul z16.b, z26.b, z14.b + // vl128 state = 0xa745457f + __ dci(0x042e6600); // pmul z0.b, z16.b, z14.b + // vl128 state = 0x92fe8b9d + __ dci(0x042e6602); // pmul z2.b, z16.b, z14.b + // vl128 state = 0xda39ebe2 + __ dci(0x043f6600); // pmul z0.b, z16.b, z31.b + // vl128 state = 0xcc36d223 + __ dci(0x042b6608); // pmul z8.b, z16.b, z11.b + // vl128 state = 0x8b94d25a + __ dci(0x042a6700); // pmul z0.b, z24.b, z10.b + // vl128 state = 0x0118ccba + __ dci(0x042a6710); // pmul z16.b, z24.b, z10.b + // vl128 state = 0x4b38543b + __ dci(0x042a6714); // pmul z20.b, z24.b, z10.b + // vl128 state = 0xa54e126f + __ dci(0x042a6716); // pmul z22.b, z24.b, z10.b + // vl128 state = 0x61ad87c9 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x61ad87c9, + 0x82df488f, + 0xc0d7c1a4, + 0x4f86e761, + 0x8d651d7b, + 0x294cf55a, + 0x060ab34c, + 0x1db0e99c, + 0x4b0b59d7, + 0xcee6dfd1, + 0x29575669, + 0x5c1c7922, + 0x4b1957ed, + 0x8bc5712b, + 0x6ac59fdc, + 0x048ce1b5, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_smulh_umulh_vector_unpredicated) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 30 * kInstructionSize); + __ dci(0x04e46c3b); // umulh z27.d, z1.d, z4.d + // vl128 state = 0xfb66ba83 + __ dci(0x04ac6c3a); // umulh z26.s, z1.s, z12.s + // vl128 state = 0x45cdb9a2 + __ dci(0x04a86e32); // umulh z18.s, z17.s, z8.s + // vl128 state = 0x4ad150dc + __ dci(0x04a86a7a); // smulh z26.s, z19.s, z8.s + // vl128 state = 0xbf08e2cb + __ dci(0x04e86b7b); // smulh z27.d, z27.d, z8.d + // vl128 state = 0x51ad0655 + __ dci(0x04ee6b73); // smulh z19.d, z27.d, z14.d + // vl128 state = 0xf764bda9 + __ dci(0x04ec6f7b); // umulh z27.d, z27.d, z12.d + // vl128 state = 0xc90f20ef + __ dci(0x04ac6f3a); // umulh z26.s, z25.s, z12.s + // vl128 state = 0x9ec08333 + __ dci(0x04ac6f32); // umulh z18.s, z25.s, z12.s + // vl128 state = 0x3620406c + __ dci(0x042e6f3a); // umulh z26.b, z25.b, z14.b + // vl128 state = 0x4e18467a + __ dci(0x042a6b2a); // smulh z10.b, z25.b, z10.b + // vl128 state = 0x13c7cd6f + __ dci(0x042a6b2b); // smulh z11.b, z25.b, z10.b + // vl128 state = 0x16a44c1b + __ dci(0x043a6b03); // smulh z3.b, z24.b, z26.b + // vl128 state = 0x9f8f203b + __ dci(0x047a690b); // smulh z11.h, z8.h, z26.h + // vl128 state = 0xce0aa45e + __ dci(0x047a690a); // smulh z10.h, z8.h, z26.h + // vl128 state = 0xb667d59b + __ dci(0x0479690e); // smulh z14.h, z8.h, z25.h + // vl128 state = 0xd76639b7 + __ dci(0x046d690c); // smulh z12.h, z8.h, z13.h + // vl128 state = 0x736b227e + __ dci(0x042f690e); // smulh z14.b, z8.b, z15.b + // vl128 state = 0xc0804df9 + __ dci(0x042f69ac); // smulh z12.b, z13.b, z15.b + // vl128 state = 0x8a5509f5 + __ dci(0x042f696e); // smulh z14.b, z11.b, z15.b + // vl128 state = 0x761f9cf8 + __ dci(0x042e6b6a); // smulh z10.b, z27.b, z14.b + // vl128 state = 0x3b5f2705 + __ dci(0x042e6b6e); // smulh z14.b, z27.b, z14.b + // vl128 state = 0x53b23a0a + __ dci(0x04366b6f); // smulh z15.b, z27.b, z22.b + // vl128 state = 0x5bd53ce9 + __ dci(0x04766f7f); // umulh z31.h, z27.h, z22.h + // vl128 state = 0x701bec8f + __ dci(0x04746fef); // umulh z15.h, z31.h, z20.h + // vl128 state = 0x29697c8c + __ dci(0x04706dee); // umulh z14.h, z15.h, z16.h + // vl128 state = 0x2088f1c2 + __ dci(0x04706c7e); // umulh z30.h, z3.h, z16.h + // vl128 state = 0x56224145 + __ dci(0x04306c2e); // umulh z14.b, z1.b, z16.b + // vl128 state = 0x2ba58c9c + __ dci(0x04b06e2a); // umulh z10.s, z17.s, z16.s + // vl128 state = 0xb933d058 + __ dci(0x04b56e2e); // umulh z14.s, z17.s, z21.s + // vl128 state = 0x184daee9 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x184daee9, + 0x19454232, + 0xa56823a3, + 0xe334897a, + 0xcaa988e1, + 0x614cbf4f, + 0xfaa384e4, + 0x4b45e885, + 0xef930ead, + 0x49304b9a, + 0x4f1d830e, + 0xa41c1a95, + 0xa1ea8d07, + 0x62ca97b4, + 0x15f52cac, + 0xc190cd57, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_arith_interleaved_long) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x459289bd); // ssublbt z29.s, z13.h, z18.h + // vl128 state = 0xe2e0965a + __ dci(0x459289bf); // ssublbt z31.s, z13.h, z18.h + // vl128 state = 0x64e3e1a3 + __ dci(0x45d689be); // ssublbt z30.d, z13.s, z22.s + // vl128 state = 0x02711ec2 + __ dci(0x45d68916); // ssublbt z22.d, z8.s, z22.s + // vl128 state = 0x7ff6f63f + __ dci(0x45968957); // ssublbt z23.s, z10.h, z22.h + // vl128 state = 0xa9aace7f + __ dci(0x45968a55); // ssublbt z21.s, z18.h, z22.h + // vl128 state = 0x6007d46c + __ dci(0x45868251); // saddlbt z17.s, z18.h, z6.h + // vl128 state = 0xecea329d + __ dci(0x45868230); // saddlbt z16.s, z17.h, z6.h + // vl128 state = 0xa16880b8 + __ dci(0x45868231); // saddlbt z17.s, z17.h, z6.h + // vl128 state = 0xcff73a01 + __ dci(0x458c8235); // saddlbt z21.s, z17.h, z12.h + // vl128 state = 0xf6486b24 + __ dci(0x458c8231); // saddlbt z17.s, z17.h, z12.h + // vl128 state = 0xa5612e07 + __ dci(0x459c8021); // saddlbt z1.s, z1.h, z28.h + // vl128 state = 0xd71ab1e8 + __ dci(0x458c8009); // saddlbt z9.s, z0.h, z12.h + // vl128 state = 0xaf74bd16 + __ dci(0x459e800b); // saddlbt z11.s, z0.h, z30.h + // vl128 state = 0x96dee616 + __ dci(0x45928003); // saddlbt z3.s, z0.h, z18.h + // vl128 state = 0x652e9cca + __ dci(0x45d28207); // saddlbt z7.d, z16.s, z18.s + // vl128 state = 0xc6b07290 + __ dci(0x45da8225); // saddlbt z5.d, z17.s, z26.s + // vl128 state = 0x8c74a35d + __ dci(0x45da830d); // saddlbt z13.d, z24.s, z26.s + // vl128 state = 0xff620001 + __ dci(0x45cb8309); // saddlbt z9.d, z24.s, z11.s + // vl128 state = 0x2147f374 + __ dci(0x45ca8119); // saddlbt z25.d, z8.s, z10.s + // vl128 state = 0x6f961936 + __ dci(0x45ce831d); // saddlbt z29.d, z24.s, z14.s + // vl128 state = 0xaa91e68a + __ dci(0x45ce8135); // saddlbt z21.d, z9.s, z14.s + // vl128 state = 0xa5635d0e + __ dci(0x458e8331); // saddlbt z17.s, z25.h, z14.h + // vl128 state = 0xa0705ea7 + __ dci(0x458e8030); // saddlbt z16.s, z1.h, z14.h + // vl128 state = 0x397dc4d5 + __ dci(0x458e8271); // saddlbt z17.s, z19.h, z14.h + // vl128 state = 0x5e975082 + __ dci(0x458a82e1); // saddlbt z1.s, z23.h, z10.h + // vl128 state = 0x048f8dea + __ dci(0x458a8240); // saddlbt z0.s, z18.h, z10.h + // vl128 state = 0xd9104514 + __ dci(0x458a8e50); // ssubltb z16.s, z18.h, z10.h + // vl128 state = 0x6afbf8b6 + __ dci(0x45988e58); // ssubltb z24.s, z18.h, z24.h + // vl128 state = 0xfe44a2f8 + __ dci(0x45d08e59); // ssubltb z25.d, z18.s, z16.s + // vl128 state = 0x050fb0ab + __ dci(0x45d08e58); // ssubltb z24.d, z18.s, z16.s + // vl128 state = 0xc9160f61 + __ dci(0x45d08259); // saddlbt z25.d, z18.s, z16.s + // vl128 state = 0x70ae0c4a + __ dci(0x45d08b51); // ssublbt z17.d, z26.s, z16.s + // vl128 state = 0xe627770c + __ dci(0x45d08970); // ssublbt z16.d, z11.s, z16.s + // vl128 state = 0x445fd924 + __ dci(0x45d28d74); // ssubltb z20.d, z11.s, z18.s + // vl128 state = 0x8c7dd6c0 + __ dci(0x45c28d56); // ssubltb z22.d, z10.s, z2.s + // vl128 state = 0x925de210 + __ dci(0x45c28d52); // ssubltb z18.d, z10.s, z2.s + // vl128 state = 0x28b67c05 + __ dci(0x45c48d5a); // ssubltb z26.d, z10.s, z4.s + // vl128 state = 0x48e8377c + __ dci(0x45c18d5b); // ssubltb z27.d, z10.s, z1.s + // vl128 state = 0xb46af33e + __ dci(0x45818d13); // ssubltb z19.s, z8.h, z1.h + // vl128 state = 0x12fada0b + __ dci(0x45818d12); // ssubltb z18.s, z8.h, z1.h + // vl128 state = 0xeaeea3cd + __ dci(0x45858d9a); // ssubltb z26.s, z12.h, z5.h + // vl128 state = 0x6d466bd8 + __ dci(0x45858df2); // ssubltb z18.s, z15.h, z5.h + // vl128 state = 0x60c67411 + __ dci(0x45c58d62); // ssubltb z2.d, z11.s, z5.s + // vl128 state = 0xec3b40ed + __ dci(0x45c58b72); // ssublbt z18.d, z27.s, z5.s + // vl128 state = 0x5b421b0a + __ dci(0x45858a76); // ssublbt z22.s, z19.h, z5.h + // vl128 state = 0x8a0f26e9 + __ dci(0x45878877); // ssublbt z23.s, z3.h, z7.h + // vl128 state = 0xc224293b + __ dci(0x458f8073); // saddlbt z19.s, z3.h, z15.h + // vl128 state = 0x9f5c0b50 + __ dci(0x45878051); // saddlbt z17.s, z2.h, z7.h + // vl128 state = 0x2ae674c9 + __ dci(0x45838841); // ssublbt z1.s, z2.h, z3.h + // vl128 state = 0x1dff4e20 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x1dff4e20, + 0x3d2c11df, + 0x64caeccf, + 0x7940c227, + 0xf5f59485, + 0x7ad48c48, + 0xcde4523b, + 0xcb5849f0, + 0x1e7e9722, + 0x8049333f, + 0x40d95eb3, + 0x628a428d, + 0x1cf123f2, + 0x8d377510, + 0x44a03b91, + 0xabe90e98, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_sqabs_sqneg) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x4448b23a); // sqabs z26.h, p4/m, z17.h + // vl128 state = 0x4aadd589 + __ dci(0x4448b23e); // sqabs z30.h, p4/m, z17.h + // vl128 state = 0x86da455e + __ dci(0x4448a21c); // sqabs z28.h, p0/m, z16.h + // vl128 state = 0x4eecab5c + __ dci(0x4408a298); // sqabs z24.b, p0/m, z20.b + // vl128 state = 0xf81ee16e + __ dci(0x4408a0dc); // sqabs z28.b, p0/m, z6.b + // vl128 state = 0x84b94ec5 + __ dci(0x4408a0de); // sqabs z30.b, p0/m, z6.b + // vl128 state = 0x626db033 + __ dci(0x4408a19c); // sqabs z28.b, p0/m, z12.b + // vl128 state = 0x181303a1 + __ dci(0x4408a3d4); // sqabs z20.b, p0/m, z30.b + // vl128 state = 0xf4e93ff3 + __ dci(0x4489a3dc); // sqneg z28.s, p0/m, z30.s + // vl128 state = 0xffe7a865 + __ dci(0x4409a1d4); // sqneg z20.b, p0/m, z14.b + // vl128 state = 0x6a27d8fe + __ dci(0x4408a3d0); // sqabs z16.b, p0/m, z30.b + // vl128 state = 0x9ffc0414 + __ dci(0x44c8a3d8); // sqabs z24.d, p0/m, z30.d + // vl128 state = 0xd59acd78 + __ dci(0x44c8b3fa); // sqabs z26.d, p4/m, z31.d + // vl128 state = 0x8853f8ac + __ dci(0x44c8a2fb); // sqabs z27.d, p0/m, z23.d + // vl128 state = 0x439e9079 + __ dci(0x44c8a2f9); // sqabs z25.d, p0/m, z23.d + // vl128 state = 0xbaaa56a6 + __ dci(0x4488a2db); // sqabs z27.s, p0/m, z22.s + // vl128 state = 0x328cbd5a + __ dci(0x4488a2df); // sqabs z31.s, p0/m, z22.s + // vl128 state = 0x4a74b2da + __ dci(0x4488a2cf); // sqabs z15.s, p0/m, z22.s + // vl128 state = 0x52af62a6 + __ dci(0x4488a04b); // sqabs z11.s, p0/m, z2.s + // vl128 state = 0xa45aef42 + __ dci(0x4488a02f); // sqabs z15.s, p0/m, z1.s + // vl128 state = 0x0b5444ed + __ dci(0x4489a06d); // sqneg z13.s, p0/m, z3.s + // vl128 state = 0x6f0912d5 + __ dci(0x4489a449); // sqneg z9.s, p1/m, z2.s + // vl128 state = 0x669ac78a + __ dci(0x4489a50b); // sqneg z11.s, p1/m, z8.s + // vl128 state = 0x58ae27ee + __ dci(0x4488a71b); // sqabs z27.s, p1/m, z24.s + // vl128 state = 0xa54925f9 + __ dci(0x4408a519); // sqabs z25.b, p1/m, z8.b + // vl128 state = 0x45c13095 + __ dci(0x4408a158); // sqabs z24.b, p0/m, z10.b + // vl128 state = 0x2d6d547a + __ dci(0x4488a168); // sqabs z8.s, p0/m, z11.s + // vl128 state = 0xc976b77b + __ dci(0x44c9a16c); // sqneg z12.d, p0/m, z11.d + // vl128 state = 0x766e750f + __ dci(0x44c9a17c); // sqneg z28.d, p0/m, z11.d + // vl128 state = 0xbf22858d + __ dci(0x44c9a878); // sqneg z24.d, p2/m, z3.d + // vl128 state = 0xe563a474 + __ dci(0x44c9a8d9); // sqneg z25.d, p2/m, z6.d + // vl128 state = 0x573c2648 + __ dci(0x44c9b85b); // sqneg z27.d, p6/m, z2.d + // vl128 state = 0x03cdf714 + __ dci(0x4449b87f); // sqneg z31.h, p6/m, z3.h + // vl128 state = 0xff4e2cb1 + __ dci(0x4449b81d); // sqneg z29.h, p6/m, z0.h + // vl128 state = 0xaab7065e + __ dci(0x4449a895); // sqneg z21.h, p2/m, z4.h + // vl128 state = 0x60d4a6d3 + __ dci(0x4449a825); // sqneg z5.h, p2/m, z1.h + // vl128 state = 0x3bed34e4 + __ dci(0x4449a821); // sqneg z1.h, p2/m, z1.h + // vl128 state = 0xaa750880 + __ dci(0x4449a820); // sqneg z0.h, p2/m, z1.h + // vl128 state = 0xfca9d635 + __ dci(0x4449a822); // sqneg z2.h, p2/m, z1.h + // vl128 state = 0x8a92f3e7 + __ dci(0x4449ae23); // sqneg z3.h, p3/m, z17.h + // vl128 state = 0xc2db1ac5 + __ dci(0x4449af73); // sqneg z19.h, p3/m, z27.h + // vl128 state = 0x386f5f27 + __ dci(0x4449af77); // sqneg z23.h, p3/m, z27.h + // vl128 state = 0xff4fd505 + __ dci(0x4489af67); // sqneg z7.s, p3/m, z27.s + // vl128 state = 0x4c897605 + __ dci(0x4489ad25); // sqneg z5.s, p3/m, z9.s + // vl128 state = 0xcc73333a + __ dci(0x4409ad07); // sqneg z7.b, p3/m, z8.b + // vl128 state = 0x58d37b50 + __ dci(0x4489ad85); // sqneg z5.s, p3/m, z12.s + // vl128 state = 0x2a142b9d + __ dci(0x44c9a984); // sqneg z4.d, p2/m, z12.d + // vl128 state = 0x006fd35a + __ dci(0x44c9a926); // sqneg z6.d, p2/m, z9.d + // vl128 state = 0x06c05c5d + __ dci(0x4449ab2e); // sqneg z14.h, p2/m, z25.h + // vl128 state = 0xe41a6fc4 + __ dci(0x4449ab3e); // sqneg z30.h, p2/m, z25.h + // vl128 state = 0x6e574bec + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x6e574bec, + 0xec677945, + 0xe7357ba7, + 0xbbf92859, + 0x3f42d943, + 0xe2db0bb1, + 0x704d1161, + 0xc0e1f809, + 0x887dd5e7, + 0x452b8b80, + 0xcf455511, + 0x821ad0bc, + 0xb98b1eac, + 0x49ae6871, + 0x16b2e0a6, + 0xaba4d260, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_urecpe_ursqrte) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 20 * kInstructionSize); + __ dci(0x4481bee8); // ursqrte z8.s, p7/m, z23.s + // vl128 state = 0x38c317d5 + __ dci(0x4480bea9); // urecpe z9.s, p7/m, z21.s + // vl128 state = 0x8412e46d + __ dci(0x4481bfab); // ursqrte z11.s, p7/m, z29.s + // vl128 state = 0xae6c2805 + __ dci(0x4481b9a3); // ursqrte z3.s, p6/m, z13.s + // vl128 state = 0x114331ab + __ dci(0x4481aba2); // ursqrte z2.s, p2/m, z29.s + // vl128 state = 0x88f2308d + __ dci(0x4480abe6); // urecpe z6.s, p2/m, z31.s + // vl128 state = 0x328b45b8 + __ dci(0x4480afa2); // urecpe z2.s, p3/m, z29.s + // vl128 state = 0x7b67ded4 + __ dci(0x4480ae23); // urecpe z3.s, p3/m, z17.s + // vl128 state = 0x48d1ac45 + __ dci(0x4481aa27); // ursqrte z7.s, p2/m, z17.s + // vl128 state = 0x475f61b6 + __ dci(0x4481a325); // ursqrte z5.s, p0/m, z25.s + // vl128 state = 0xfbf0b767 + __ dci(0x4481a321); // ursqrte z1.s, p0/m, z25.s + // vl128 state = 0x31481484 + __ dci(0x4481ab05); // ursqrte z5.s, p2/m, z24.s + // vl128 state = 0x5aca5e43 + __ dci(0x4481a995); // ursqrte z21.s, p2/m, z12.s + // vl128 state = 0xe3b96378 + __ dci(0x4481bb91); // ursqrte z17.s, p6/m, z28.s + // vl128 state = 0x9d469964 + __ dci(0x4481b199); // ursqrte z25.s, p4/m, z12.s + // vl128 state = 0xbbabbb9d + __ dci(0x4481a989); // ursqrte z9.s, p2/m, z12.s + // vl128 state = 0xf83e651c + __ dci(0x4481b18b); // ursqrte z11.s, p4/m, z12.s + // vl128 state = 0x70a808da + __ dci(0x4480b089); // urecpe z9.s, p4/m, z4.s + // vl128 state = 0x427916ac + __ dci(0x4480b2c1); // urecpe z1.s, p4/m, z22.s + // vl128 state = 0xbf35be88 + __ dci(0x4480aad1); // urecpe z17.s, p2/m, z22.s + // vl128 state = 0xaf69727b + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xaf69727b, + 0x7fda1a01, + 0xd299e078, + 0x9a794a84, + 0x47a453c1, + 0xecc67cf0, + 0x04122ec2, + 0x82dd5669, + 0xcb2bb910, + 0xcc73c54c, + 0x4660030f, + 0x7c42b056, + 0x498a73b1, + 0x1de89fad, + 0x5411c616, + 0x9f378bac, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_arith_long) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x45573eac); // uabdlt z12.h, z21.b, z23.b + // vl128 state = 0x2ee2e7d4 + __ dci(0x45573c84); // uabdlt z4.h, z4.b, z23.b + // vl128 state = 0x33413c6f + __ dci(0x45571d8c); // usublt z12.h, z12.b, z23.b + // vl128 state = 0xb95ffb7e + __ dci(0x45971d8e); // usublt z14.s, z12.h, z23.h + // vl128 state = 0xac4d0015 + __ dci(0x45d7158c); // ssublt z12.d, z12.s, z23.s + // vl128 state = 0xe5341703 + __ dci(0x4557119c); // ssublb z28.h, z12.b, z23.b + // vl128 state = 0x744f8598 + __ dci(0x45d5118c); // ssublb z12.d, z12.s, z21.s + // vl128 state = 0x120c8bf7 + __ dci(0x45551088); // ssublb z8.h, z4.b, z21.b + // vl128 state = 0xbf53c9ed + __ dci(0x455410cc); // ssublb z12.h, z6.b, z20.b + // vl128 state = 0x2642a908 + __ dci(0x454414c8); // ssublt z8.h, z6.b, z4.b + // vl128 state = 0x0682c7d0 + __ dci(0x454510c9); // ssublb z9.h, z6.b, z5.b + // vl128 state = 0x1966420e + __ dci(0x455510ed); // ssublb z13.h, z7.b, z21.b + // vl128 state = 0xdd0ec707 + __ dci(0x455508ef); // uaddlb z15.h, z7.b, z21.b + // vl128 state = 0x0756dbf9 + __ dci(0x455502e7); // saddlb z7.h, z23.b, z21.b + // vl128 state = 0xb991e688 + __ dci(0x455d06f7); // saddlt z23.h, z23.b, z29.b + // vl128 state = 0x55399de0 + __ dci(0x455f06df); // saddlt z31.h, z22.b, z31.b + // vl128 state = 0x3379dce4 + __ dci(0x45de06db); // saddlt z27.d, z22.s, z30.s + // vl128 state = 0xebf6b857 + __ dci(0x45c606da); // saddlt z26.d, z22.s, z6.s + // vl128 state = 0x7625ec15 + __ dci(0x45c306db); // saddlt z27.d, z22.s, z3.s + // vl128 state = 0x549988fd + __ dci(0x455306d3); // saddlt z19.h, z22.b, z19.b + // vl128 state = 0xb645cb0f + __ dci(0x455306d1); // saddlt z17.h, z22.b, z19.b + // vl128 state = 0x20a70427 + __ dci(0x455306d3); // saddlt z19.h, z22.b, z19.b + // vl128 state = 0xd263ec78 + __ dci(0x45510edb); // uaddlt z27.h, z22.b, z17.b + // vl128 state = 0xeecd9b44 + __ dci(0x45510bdf); // uaddlb z31.h, z30.b, z17.b + // vl128 state = 0x0577c3d4 + __ dci(0x45d10b4f); // uaddlb z15.d, z26.s, z17.s + // vl128 state = 0xca18b475 + __ dci(0x45810b47); // uaddlb z7.s, z26.h, z1.h + // vl128 state = 0xdfe68417 + __ dci(0x45811bc3); // usublb z3.s, z30.h, z1.h + // vl128 state = 0x96fe0360 + __ dci(0x45891b82); // usublb z2.s, z28.h, z9.h + // vl128 state = 0x7e58a9d5 + __ dci(0x4589398a); // uabdlb z10.s, z12.h, z9.h + // vl128 state = 0xd7612435 + __ dci(0x458919ab); // usublb z11.s, z13.h, z9.h + // vl128 state = 0x8842dbca + __ dci(0x45cb19af); // usublb z15.d, z13.s, z11.s + // vl128 state = 0xfcac3d0f + __ dci(0x45cb19bf); // usublb z31.d, z13.s, z11.s + // vl128 state = 0x7b4952d6 + __ dci(0x45cb190f); // usublb z15.d, z8.s, z11.s + // vl128 state = 0xb41cb8a3 + __ dci(0x45cb1d8d); // usublt z13.d, z12.s, z11.s + // vl128 state = 0x9197543e + __ dci(0x45cb1d89); // usublt z9.d, z12.s, z11.s + // vl128 state = 0x3cc7e16c + __ dci(0x454b0d8b); // uaddlt z11.h, z12.b, z11.b + // vl128 state = 0x5c52744d + __ dci(0x45cb1d8a); // usublt z10.d, z12.s, z11.s + // vl128 state = 0x24c91c53 + __ dci(0x454f1d8e); // usublt z14.h, z12.b, z15.b + // vl128 state = 0x0091f2f1 + __ dci(0x455b1d8f); // usublt z15.h, z12.b, z27.b + // vl128 state = 0x521f94f7 + __ dci(0x455a1c87); // usublt z7.h, z4.b, z26.b + // vl128 state = 0xa0631870 + __ dci(0x454a1cb7); // usublt z23.h, z5.b, z10.b + // vl128 state = 0x089384c7 + __ dci(0x454218a7); // usublb z7.h, z5.b, z2.b + // vl128 state = 0xe8c3c063 + __ dci(0x454a19a6); // usublb z6.h, z13.b, z10.b + // vl128 state = 0x7a9f53ab + __ dci(0x454a3da2); // uabdlt z2.h, z13.b, z10.b + // vl128 state = 0x68d5f375 + __ dci(0x45423ca6); // uabdlt z6.h, z5.b, z2.b + // vl128 state = 0x2c980ff7 + __ dci(0x454a34a7); // sabdlt z7.h, z5.b, z10.b + // vl128 state = 0xe38196aa + __ dci(0x454a3466); // sabdlt z6.h, z3.b, z10.b + // vl128 state = 0x86c5bcb2 + __ dci(0x454b146e); // ssublt z14.h, z3.b, z11.b + // vl128 state = 0xf8527375 + __ dci(0x454b146a); // ssublt z10.h, z3.b, z11.b + // vl128 state = 0xf4bfb710 + __ dci(0x454b147a); // ssublt z26.h, z3.b, z11.b + // vl128 state = 0xe1000ccf + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xe1000ccf, + 0xd320fd27, + 0x356a62d9, + 0xc6245994, + 0x78aeec8a, + 0xb5d0402b, + 0x06684b9e, + 0x6033f51d, + 0xd174ee86, + 0x80baaecc, + 0x2c9b263c, + 0x3fba551a, + 0x489fb8b7, + 0x862c9b27, + 0xc0549096, + 0xa927d570, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_arith_wide) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x45494683); // saddwt z3.h, z20.h, z9.b + // vl128 state = 0x9a3fc71a + __ dci(0x45494687); // saddwt z7.h, z20.h, z9.b + // vl128 state = 0xb016cb2f + __ dci(0x454b46d7); // saddwt z23.h, z22.h, z11.b + // vl128 state = 0x5ce3d8a0 + __ dci(0x455b56d5); // ssubwt z21.h, z22.h, z27.b + // vl128 state = 0xbace5453 + __ dci(0x455b567d); // ssubwt z29.h, z19.h, z27.b + // vl128 state = 0x1f510928 + __ dci(0x455b506d); // ssubwb z13.h, z3.h, z27.b + // vl128 state = 0x19ea553e + __ dci(0x4559502f); // ssubwb z15.h, z1.h, z25.b + // vl128 state = 0x4d88e5db + __ dci(0x45d95427); // ssubwt z7.d, z1.d, z25.s + // vl128 state = 0x069804b6 + __ dci(0x45d95426); // ssubwt z6.d, z1.d, z25.s + // vl128 state = 0xfe46cf10 + __ dci(0x45db5c36); // usubwt z22.d, z1.d, z27.s + // vl128 state = 0xad3c8120 + __ dci(0x45d95d37); // usubwt z23.d, z9.d, z25.s + // vl128 state = 0x833d76fb + __ dci(0x45d55d27); // usubwt z7.d, z9.d, z21.s + // vl128 state = 0xc536845d + __ dci(0x45d44d25); // uaddwt z5.d, z9.d, z20.s + // vl128 state = 0x21f5a29c + __ dci(0x45dc4927); // uaddwb z7.d, z9.d, z28.s + // vl128 state = 0xfe67da2a + __ dci(0x455c490f); // uaddwb z15.h, z8.h, z28.b + // vl128 state = 0x5ec5d506 + __ dci(0x455c490b); // uaddwb z11.h, z8.h, z28.b + // vl128 state = 0x74b7d2fc + __ dci(0x45584923); // uaddwb z3.h, z9.h, z24.b + // vl128 state = 0xa785f3c3 + __ dci(0x45584922); // uaddwb z2.h, z9.h, z24.b + // vl128 state = 0x373049c0 + __ dci(0x45584940); // uaddwb z0.h, z10.h, z24.b + // vl128 state = 0xbf385483 + __ dci(0x45da4944); // uaddwb z4.d, z10.d, z26.s + // vl128 state = 0x94cd3b86 + __ dci(0x45524945); // uaddwb z5.h, z10.h, z18.b + // vl128 state = 0x8535094f + __ dci(0x4540494d); // uaddwb z13.h, z10.h, z0.b + // vl128 state = 0x328abbdb + __ dci(0x45c04909); // uaddwb z9.d, z8.d, z0.s + // vl128 state = 0x253064cb + __ dci(0x45c8498d); // uaddwb z13.d, z12.d, z8.s + // vl128 state = 0xa1b39fe0 + __ dci(0x45c0418f); // saddwb z15.d, z12.d, z0.s + // vl128 state = 0xa72048d9 + __ dci(0x45d84187); // saddwb z7.d, z12.d, z24.s + // vl128 state = 0x4c8a23ac + __ dci(0x45dc5197); // ssubwb z23.d, z12.d, z28.s + // vl128 state = 0x352a3d60 + __ dci(0x45dc5d93); // usubwt z19.d, z12.d, z28.s + // vl128 state = 0x404b9e8b + __ dci(0x45dd5592); // ssubwt z18.d, z12.d, z29.s + // vl128 state = 0xf46cc758 + __ dci(0x45dd5550); // ssubwt z16.d, z10.d, z29.s + // vl128 state = 0x171ebd36 + __ dci(0x45cd55d4); // ssubwt z20.d, z14.d, z13.s + // vl128 state = 0x4f2ef46f + __ dci(0x45dd5dd5); // usubwt z21.d, z14.d, z29.s + // vl128 state = 0x0c9ab301 + __ dci(0x45dd5dc5); // usubwt z5.d, z14.d, z29.s + // vl128 state = 0x67a10e22 + __ dci(0x454d5dd5); // usubwt z21.h, z14.h, z13.b + // vl128 state = 0xb4bd21c0 + __ dci(0x454d4dfd); // uaddwt z29.h, z15.h, z13.b + // vl128 state = 0x8df5f90f + __ dci(0x45494fed); // uaddwt z13.h, z31.h, z9.b + // vl128 state = 0x913f7aa4 + __ dci(0x45cb4fef); // uaddwt z15.d, z31.d, z11.s + // vl128 state = 0xa23d1307 + __ dci(0x454b47ff); // saddwt z31.h, z31.h, z11.b + // vl128 state = 0x026ff306 + __ dci(0x454747f7); // saddwt z23.h, z31.h, z7.b + // vl128 state = 0x9abf0566 + __ dci(0x45c743f6); // saddwb z22.d, z31.d, z7.s + // vl128 state = 0x27031d0e + __ dci(0x45c74b66); // uaddwb z6.d, z27.d, z7.s + // vl128 state = 0xc6f3a976 + __ dci(0x45474be4); // uaddwb z4.h, z31.h, z7.b + // vl128 state = 0xededea24 + __ dci(0x454349e0); // uaddwb z0.h, z15.h, z3.b + // vl128 state = 0xf1092d40 + __ dci(0x454359c1); // usubwb z1.h, z14.h, z3.b + // vl128 state = 0x2d96f026 + __ dci(0x45535983); // usubwb z3.h, z12.h, z19.b + // vl128 state = 0x5a9cab0c + __ dci(0x45535981); // usubwb z1.h, z12.h, z19.b + // vl128 state = 0x7f8d695f + __ dci(0x45535a83); // usubwb z3.h, z20.h, z19.b + // vl128 state = 0xb0ae0f62 + __ dci(0x45d35e81); // usubwt z1.d, z20.d, z19.s + // vl128 state = 0xfe7e227b + __ dci(0x45d25ec9); // usubwt z9.d, z22.d, z18.s + // vl128 state = 0xed9dd734 + __ dci(0x45d35e88); // usubwt z8.d, z20.d, z19.s + // vl128 state = 0x943f8d24 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x943f8d24, + 0xfe956248, + 0xfefddb40, + 0x4d92bfb3, + 0x01dcd5b1, + 0x29a23c92, + 0xb7587530, + 0xa56fa28c, + 0xa0f8590d, + 0xa6b883a4, + 0x2e50d1fd, + 0x8e976f55, + 0xb21bd3b1, + 0x0c3586e5, + 0xe3d7e7e6, + 0xb1e0e34f, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_shift_long) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x4518aafc); // ushllb z28.s, z23.h, #8 + // vl128 state = 0x07dfb216 + __ dci(0x4518afec); // ushllt z12.s, z31.h, #8 + // vl128 state = 0xe3c5d68c + __ dci(0x4518adc4); // ushllt z4.s, z14.h, #8 + // vl128 state = 0xce8721fc + __ dci(0x4518a1c5); // sshllb z5.s, z14.h, #8 + // vl128 state = 0x71820bae + __ dci(0x4508a9cd); // ushllb z13.h, z14.b, #0 + // vl128 state = 0xfdc3f7b3 + __ dci(0x4508ad9d); // ushllt z29.h, z12.b, #0 + // vl128 state = 0x93c1f606 + __ dci(0x4508a795); // sshllt z21.h, z28.b, #0 + // vl128 state = 0x15ebcb72 + __ dci(0x450caf94); // ushllt z20.h, z28.b, #4 + // vl128 state = 0x76c630f5 + __ dci(0x4508afd6); // ushllt z22.h, z30.b, #0 + // vl128 state = 0xa9c6dfbc + __ dci(0x4509aed7); // ushllt z23.h, z22.b, #1 + // vl128 state = 0xa5942073 + __ dci(0x4508ae55); // ushllt z21.h, z18.b, #0 + // vl128 state = 0xe4348777 + __ dci(0x450cac51); // ushllt z17.h, z2.b, #4 + // vl128 state = 0x91c6e6ea + __ dci(0x450ca870); // ushllb z16.h, z3.b, #4 + // vl128 state = 0x40393ae8 + __ dci(0x450ca031); // sshllb z17.h, z1.b, #4 + // vl128 state = 0x8b9526e8 + __ dci(0x450aa030); // sshllb z16.h, z1.b, #2 + // vl128 state = 0xd3d0857a + __ dci(0x450aa031); // sshllb z17.h, z1.b, #2 + // vl128 state = 0xbdd18de2 + __ dci(0x450ba233); // sshllb z19.h, z17.b, #3 + // vl128 state = 0x5e5f6f2a + __ dci(0x4509a263); // sshllb z3.h, z19.b, #1 + // vl128 state = 0xa3b5427b + __ dci(0x450da673); // sshllt z19.h, z19.b, #5 + // vl128 state = 0x97472b22 + __ dci(0x451da477); // sshllt z23.s, z3.h, #13 + // vl128 state = 0xe6da4012 + __ dci(0x451da5f6); // sshllt z22.s, z15.h, #13 + // vl128 state = 0x11630552 + __ dci(0x450da5b4); // sshllt z20.h, z13.b, #5 + // vl128 state = 0xe9a4cad0 + __ dci(0x450da5d5); // sshllt z21.h, z14.b, #5 + // vl128 state = 0x750d4143 + __ dci(0x450fa4d7); // sshllt z23.h, z6.b, #7 + // vl128 state = 0xc441984c + __ dci(0x451ba4df); // sshllt z31.s, z6.h, #11 + // vl128 state = 0x9a3899af + __ dci(0x451ba4db); // sshllt z27.s, z6.h, #11 + // vl128 state = 0xbb6684bb + __ dci(0x451ba4bf); // sshllt z31.s, z5.h, #11 + // vl128 state = 0x45a2cf1e + __ dci(0x451aa49b); // sshllt z27.s, z4.h, #10 + // vl128 state = 0xac10df2f + __ dci(0x451aa49f); // sshllt z31.s, z4.h, #10 + // vl128 state = 0x9cecdbd8 + __ dci(0x451aa89b); // ushllb z27.s, z4.h, #10 + // vl128 state = 0x73fca806 + __ dci(0x4518aa9f); // ushllb z31.s, z20.h, #8 + // vl128 state = 0xf58883fb + __ dci(0x451aaab7); // ushllb z23.s, z21.h, #10 + // vl128 state = 0xf9476b16 + __ dci(0x4508aaa7); // ushllb z7.h, z21.b, #0 + // vl128 state = 0x6f65ea0e + __ dci(0x4508ae2f); // ushllt z15.h, z17.b, #0 + // vl128 state = 0x574341e2 + __ dci(0x4509ac27); // ushllt z7.h, z1.b, #1 + // vl128 state = 0xe373d23c + __ dci(0x450dae25); // ushllt z5.h, z17.b, #5 + // vl128 state = 0xc6ad882b + __ dci(0x4509aea7); // ushllt z7.h, z21.b, #1 + // vl128 state = 0xfce8617d + __ dci(0x4509adb7); // ushllt z23.h, z13.b, #1 + // vl128 state = 0x30f63baf + __ dci(0x4549ade7); // ushllt z7.d, z15.s, #9 + // vl128 state = 0x20522e02 + __ dci(0x4549adf7); // ushllt z23.d, z15.s, #9 + // vl128 state = 0x18c6aade + __ dci(0x4548aff6); // ushllt z22.d, z31.s, #8 + // vl128 state = 0x3ad49ec9 + __ dci(0x4548affe); // ushllt z30.d, z31.s, #8 + // vl128 state = 0x828be22f + __ dci(0x4548adda); // ushllt z26.d, z14.s, #8 + // vl128 state = 0xb4997aa9 + __ dci(0x4544add2); // ushllt z18.d, z14.s, #4 + // vl128 state = 0x6e7feb55 + __ dci(0x454cad42); // ushllt z2.d, z10.s, #12 + // vl128 state = 0xb8ff410d + __ dci(0x450dad40); // ushllt z0.h, z10.b, #5 + // vl128 state = 0x806bb38f + __ dci(0x4515ad50); // ushllt z16.s, z10.h, #5 + // vl128 state = 0x6bd247ad + __ dci(0x4557ad51); // ushllt z17.d, z10.s, #23 + // vl128 state = 0xc0959f27 + __ dci(0x4557ad41); // ushllt z1.d, z10.s, #23 + // vl128 state = 0xf0176482 + __ dci(0x4557ad40); // ushllt z0.d, z10.s, #23 + // vl128 state = 0xd5c958bf + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xd5c958bf, + 0xb7546431, + 0xee4f6b9f, + 0x74f31aeb, + 0x98282a7a, + 0xf2423509, + 0xe3ae7c5c, + 0xe544e7ba, + 0x7d52fba5, + 0x1520b68d, + 0xee539501, + 0x1a65ba45, + 0x0d4c2383, + 0x9f4a30c5, + 0xca6662a2, + 0x64dc5f23, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_shift_narrow) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x456b1458); // shrnt z24.s, z2.d, #21 + // vl128 state = 0x70323182 + __ dci(0x456b145c); // shrnt z28.s, z2.d, #21 + // vl128 state = 0x1d620da3 + __ dci(0x45291454); // shrnt z20.b, z2.h, #7 + // vl128 state = 0x8e6d3a55 + __ dci(0x4539141c); // shrnt z28.h, z0.s, #7 + // vl128 state = 0xbc19c1cc + __ dci(0x453914b8); // shrnt z24.h, z5.s, #7 + // vl128 state = 0x0bd4d1e8 + __ dci(0x453b14f9); // shrnt z25.h, z7.s, #5 + // vl128 state = 0x15622295 + __ dci(0x453315fd); // shrnt z29.h, z15.s, #13 + // vl128 state = 0x45bf3b94 + __ dci(0x45331d75); // rshrnt z21.h, z11.s, #13 + // vl128 state = 0xbb3574e6 + __ dci(0x45331945); // rshrnb z5.h, z10.s, #13 + // vl128 state = 0x7b72be5f + __ dci(0x45331941); // rshrnb z1.h, z10.s, #13 + // vl128 state = 0x073cdf1a + __ dci(0x45331949); // rshrnb z9.h, z10.s, #13 + // vl128 state = 0x3ecd1bf9 + __ dci(0x453b1979); // rshrnb z25.h, z11.s, #5 + // vl128 state = 0x19f7734e + __ dci(0x453b11f1); // shrnb z17.h, z15.s, #5 + // vl128 state = 0x47a3f036 + __ dci(0x453711f9); // shrnb z25.h, z15.s, #9 + // vl128 state = 0xff283fe4 + __ dci(0x453315f8); // shrnt z24.h, z15.s, #13 + // vl128 state = 0x1c19f8fb + __ dci(0x453319f0); // rshrnb z16.h, z15.s, #13 + // vl128 state = 0x3be08052 + __ dci(0x453b1972); // rshrnb z18.h, z11.s, #5 + // vl128 state = 0xc5ae76a0 + __ dci(0x453b1962); // rshrnb z2.h, z11.s, #5 + // vl128 state = 0x75ec3872 + __ dci(0x453b1c60); // rshrnt z0.h, z3.s, #5 + // vl128 state = 0x9b372229 + __ dci(0x45331c44); // rshrnt z4.h, z2.s, #13 + // vl128 state = 0xe4e22904 + __ dci(0x45371c0c); // rshrnt z12.h, z0.s, #9 + // vl128 state = 0x12bc6f4b + __ dci(0x45331d08); // rshrnt z8.h, z8.s, #13 + // vl128 state = 0x3ef95245 + __ dci(0x45331c98); // rshrnt z24.h, z4.s, #13 + // vl128 state = 0x0a4a0d68 + __ dci(0x45731e99); // rshrnt z25.s, z20.d, #13 + // vl128 state = 0xa01ca6c8 + __ dci(0x457b1a98); // rshrnb z24.s, z20.d, #5 + // vl128 state = 0x73a50e30 + __ dci(0x452b1a9c); // rshrnb z28.b, z20.h, #5 + // vl128 state = 0xbad3deda + __ dci(0x452b1818); // rshrnb z24.b, z0.h, #5 + // vl128 state = 0x579b3c8f + __ dci(0x452b181a); // rshrnb z26.b, z0.h, #5 + // vl128 state = 0xa2b0bf7c + __ dci(0x452b181b); // rshrnb z27.b, z0.h, #5 + // vl128 state = 0x7bebdf9e + __ dci(0x45291a1a); // rshrnb z26.b, z16.h, #7 + // vl128 state = 0x3f90e1b7 + __ dci(0x45681a12); // rshrnb z18.s, z16.d, #24 + // vl128 state = 0x57e6295e + __ dci(0x45681290); // shrnb z16.s, z20.d, #24 + // vl128 state = 0xa53f48b5 + __ dci(0x45281091); // shrnb z17.b, z4.h, #8 + // vl128 state = 0x65179ab4 + __ dci(0x45281401); // shrnt z1.b, z0.h, #8 + // vl128 state = 0x3cc490ba + __ dci(0x45281c83); // rshrnt z3.b, z4.h, #8 + // vl128 state = 0x3bc34e69 + __ dci(0x45281c93); // rshrnt z19.b, z4.h, #8 + // vl128 state = 0x6dded0bb + __ dci(0x45681cb7); // rshrnt z23.s, z5.d, #24 + // vl128 state = 0x378f83c0 + __ dci(0x45291cb6); // rshrnt z22.b, z5.h, #7 + // vl128 state = 0x7e4d1c44 + __ dci(0x45391eb2); // rshrnt z18.h, z21.s, #7 + // vl128 state = 0x66c0b784 + __ dci(0x45281ea2); // rshrnt z2.b, z21.h, #8 + // vl128 state = 0x62df2c82 + __ dci(0x452c1fa0); // rshrnt z0.b, z29.h, #4 + // vl128 state = 0xd79ee307 + __ dci(0x456c1ba2); // rshrnb z2.s, z29.d, #20 + // vl128 state = 0x8ebb2251 + __ dci(0x45641ab2); // rshrnb z18.s, z21.d, #28 + // vl128 state = 0x77ec053a + __ dci(0x456c12ba); // shrnb z26.s, z21.d, #20 + // vl128 state = 0xcf94b608 + __ dci(0x452812b8); // shrnb z24.b, z21.h, #8 + // vl128 state = 0x3e067a62 + __ dci(0x4568123a); // shrnb z26.s, z17.d, #24 + // vl128 state = 0xe451de0f + __ dci(0x456c1338); // shrnb z24.s, z25.d, #20 + // vl128 state = 0x4042d707 + __ dci(0x456813b9); // shrnb z25.s, z29.d, #24 + // vl128 state = 0x5184a2aa + __ dci(0x456812e9); // shrnb z9.s, z23.d, #24 + // vl128 state = 0x246344b8 + __ dci(0x456812e1); // shrnb z1.s, z23.d, #24 + // vl128 state = 0x76866e79 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x76866e79, + 0x42b52927, + 0x84a0bfcc, + 0xf8226fc2, + 0x444f6df5, + 0x2f8dcd68, + 0x5a48278a, + 0x1cdd7f2f, + 0x7816d36c, + 0xebae972f, + 0xa02adfbe, + 0xc93cde0f, + 0xce43287b, + 0x777d6ce0, + 0x9d3be904, + 0x3e059dd2, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_shift_narrow_usat) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x457a3207); // uqshrnb z7.s, z16.d, #6 + // vl128 state = 0x4b40d14e + __ dci(0x457a3206); // uqshrnb z6.s, z16.d, #6 + // vl128 state = 0x4dbc0377 + __ dci(0x457a3204); // uqshrnb z4.s, z16.d, #6 + // vl128 state = 0xa6fbc7f9 + __ dci(0x457e3a14); // uqrshrnb z20.s, z16.d, #2 + // vl128 state = 0x9e9414a9 + __ dci(0x457b3a15); // uqrshrnb z21.s, z16.d, #5 + // vl128 state = 0xe8824afd + __ dci(0x457b3ab7); // uqrshrnb z23.s, z21.d, #5 + // vl128 state = 0x81ce1be6 + __ dci(0x457b3ab6); // uqrshrnb z22.s, z21.d, #5 + // vl128 state = 0x5e343a1e + __ dci(0x457f3af7); // uqrshrnb z23.s, z23.d, #1 + // vl128 state = 0x09a5c3a0 + __ dci(0x457b38ff); // uqrshrnb z31.s, z7.d, #5 + // vl128 state = 0xb50710bf + __ dci(0x453338fe); // uqrshrnb z30.h, z7.s, #13 + // vl128 state = 0xfc719c85 + __ dci(0x453338ee); // uqrshrnb z14.h, z7.s, #13 + // vl128 state = 0x157d826a + __ dci(0x453b386a); // uqrshrnb z10.h, z3.s, #5 + // vl128 state = 0x9c735771 + __ dci(0x452f386e); // uqrshrnb z14.b, z3.h, #1 + // vl128 state = 0xe03bb4a4 + __ dci(0x452f3aea); // uqrshrnb z10.b, z23.h, #1 + // vl128 state = 0xa841b415 + __ dci(0x452f38ba); // uqrshrnb z26.b, z5.h, #1 + // vl128 state = 0x55302a6d + __ dci(0x452f3878); // uqrshrnb z24.b, z3.h, #1 + // vl128 state = 0x73bee182 + __ dci(0x453f385c); // uqrshrnb z28.h, z2.s, #1 + // vl128 state = 0x75f81ccc + __ dci(0x453f397d); // uqrshrnb z29.h, z11.s, #1 + // vl128 state = 0x856fecc9 + __ dci(0x457d397c); // uqrshrnb z28.s, z11.d, #3 + // vl128 state = 0x4b144bf2 + __ dci(0x457f3878); // uqrshrnb z24.s, z3.d, #1 + // vl128 state = 0x7ea5dad3 + __ dci(0x457b3c7a); // uqrshrnt z26.s, z3.d, #5 + // vl128 state = 0xa7d48543 + __ dci(0x45633c72); // uqrshrnt z18.s, z3.d, #29 + // vl128 state = 0x18f647a7 + __ dci(0x45613d76); // uqrshrnt z22.s, z11.d, #31 + // vl128 state = 0x96d4081b + __ dci(0x45693972); // uqrshrnb z18.s, z11.d, #23 + // vl128 state = 0xa8369e83 + __ dci(0x45693d53); // uqrshrnt z19.s, z10.d, #23 + // vl128 state = 0x7553ff55 + __ dci(0x45713d51); // uqrshrnt z17.s, z10.d, #15 + // vl128 state = 0x52a52ecc + __ dci(0x45713d99); // uqrshrnt z25.s, z12.d, #15 + // vl128 state = 0x4de78f7b + __ dci(0x45753f9d); // uqrshrnt z29.s, z28.d, #11 + // vl128 state = 0x0f8948cd + __ dci(0x45753f8d); // uqrshrnt z13.s, z28.d, #11 + // vl128 state = 0x7f2c1b05 + __ dci(0x45753685); // uqshrnt z5.s, z20.d, #11 + // vl128 state = 0xbe6f6ea9 + __ dci(0x457d3784); // uqshrnt z4.s, z28.d, #3 + // vl128 state = 0x716e1acd + __ dci(0x453c3785); // uqshrnt z5.h, z28.s, #4 + // vl128 state = 0x828a3cbb + __ dci(0x453837a4); // uqshrnt z4.h, z29.s, #8 + // vl128 state = 0x125ddc3c + __ dci(0x457a37a6); // uqshrnt z6.s, z29.d, #6 + // vl128 state = 0x8c5c5d4c + __ dci(0x453a37e4); // uqshrnt z4.h, z31.s, #6 + // vl128 state = 0xdea9801f + __ dci(0x453f37ec); // uqshrnt z12.h, z31.s, #1 + // vl128 state = 0x6caa6537 + __ dci(0x457f37dc); // uqshrnt z28.s, z30.d, #1 + // vl128 state = 0x66c0c05d + __ dci(0x45773fde); // uqrshrnt z30.s, z30.d, #9 + // vl128 state = 0xf8d495e2 + __ dci(0x45653fda); // uqrshrnt z26.s, z30.d, #27 + // vl128 state = 0xb543c017 + __ dci(0x45613ffb); // uqrshrnt z27.s, z31.d, #31 + // vl128 state = 0x58a69fb4 + __ dci(0x45613feb); // uqrshrnt z11.s, z31.d, #31 + // vl128 state = 0xb5a04d48 + __ dci(0x45653fca); // uqrshrnt z10.s, z30.d, #27 + // vl128 state = 0xd2d445e0 + __ dci(0x45753fe8); // uqrshrnt z8.s, z31.d, #11 + // vl128 state = 0x67d89d28 + __ dci(0x457537ca); // uqshrnt z10.s, z30.d, #11 + // vl128 state = 0xcaa2b6dc + __ dci(0x457d35ce); // uqshrnt z14.s, z14.d, #3 + // vl128 state = 0x9da6b10f + __ dci(0x452d35de); // uqshrnt z30.b, z14.h, #3 + // vl128 state = 0xda8663db + __ dci(0x452d314e); // uqshrnb z14.b, z10.h, #3 + // vl128 state = 0x761992a9 + __ dci(0x453d304f); // uqshrnb z15.h, z2.s, #3 + // vl128 state = 0x71587e6a + __ dci(0x453d386e); // uqrshrnb z14.h, z3.s, #3 + // vl128 state = 0xc6118398 + __ dci(0x453538ec); // uqrshrnb z12.h, z7.s, #11 + // vl128 state = 0x5e542c3a + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x5e542c3a, + 0xd9128c5a, + 0x73f430ed, + 0x160c07da, + 0x7bff9561, + 0x4b2d6335, + 0x3738197c, + 0x2b624a48, + 0xbb257999, + 0x0d5d8614, + 0xb031d1fc, + 0x60f2fce2, + 0x92770ad6, + 0x6e33aa78, + 0x8752089b, + 0x37b56a40, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_shift_narrow_ssat) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x456c0875); // sqrshrunb z21.s, z3.d, #20 + // vl128 state = 0x1446427d + __ dci(0x456c0877); // sqrshrunb z23.s, z3.d, #20 + // vl128 state = 0xd839ea94 + __ dci(0x456c0876); // sqrshrunb z22.s, z3.d, #20 + // vl128 state = 0xe4dd3104 + __ dci(0x456e0c77); // sqrshrunt z23.s, z3.d, #18 + // vl128 state = 0xd86dd8aa + __ dci(0x456e0a73); // sqrshrunb z19.s, z19.d, #18 + // vl128 state = 0x7aacf973 + __ dci(0x456c0e72); // sqrshrunt z18.s, z19.d, #20 + // vl128 state = 0x6e7b28b8 + __ dci(0x456c2c62); // sqrshrnt z2.s, z3.d, #20 + // vl128 state = 0x242e0a5e + __ dci(0x456c24f2); // sqshrnt z18.s, z7.d, #20 + // vl128 state = 0xf9c993ec + __ dci(0x456c2570); // sqshrnt z16.s, z11.d, #20 + // vl128 state = 0x087c4fc1 + __ dci(0x456e2478); // sqshrnt z24.s, z3.d, #18 + // vl128 state = 0x33fdae0c + __ dci(0x456e2c30); // sqrshrnt z16.s, z1.d, #18 + // vl128 state = 0x0c957ea2 + __ dci(0x456e2d78); // sqrshrnt z24.s, z11.d, #18 + // vl128 state = 0x0792e58a + __ dci(0x456f2970); // sqrshrnb z16.s, z11.d, #17 + // vl128 state = 0xe7169693 + __ dci(0x456b2938); // sqrshrnb z24.s, z9.d, #21 + // vl128 state = 0x1372a92d + __ dci(0x45692979); // sqrshrnb z25.s, z11.d, #23 + // vl128 state = 0xc1c31387 + __ dci(0x4563297d); // sqrshrnb z29.s, z11.d, #29 + // vl128 state = 0x50a08538 + __ dci(0x45632975); // sqrshrnb z21.s, z11.d, #29 + // vl128 state = 0xda962f25 + __ dci(0x456309f1); // sqrshrunb z17.s, z15.d, #29 + // vl128 state = 0xe149814e + __ dci(0x457308f3); // sqrshrunb z19.s, z7.d, #13 + // vl128 state = 0x6d5ea38b + __ dci(0x457329fb); // sqrshrnb z27.s, z15.d, #13 + // vl128 state = 0xee932acb + __ dci(0x457721f3); // sqshrnb z19.s, z15.d, #9 + // vl128 state = 0x7e05914b + __ dci(0x45732171); // sqshrnb z17.s, z11.d, #13 + // vl128 state = 0xe4bf82a4 + __ dci(0x45722070); // sqshrnb z16.s, z3.d, #14 + // vl128 state = 0xdfc01530 + __ dci(0x456a2078); // sqshrnb z24.s, z3.d, #22 + // vl128 state = 0x6b48fc15 + __ dci(0x452a287c); // sqrshrnb z28.b, z3.h, #6 + // vl128 state = 0x45e86048 + __ dci(0x45282c78); // sqrshrnt z24.b, z3.h, #8 + // vl128 state = 0xb8dc83dd + __ dci(0x45602c68); // sqrshrnt z8.s, z3.d, #32 + // vl128 state = 0xda536cf8 + __ dci(0x45602678); // sqshrnt z24.s, z19.d, #32 + // vl128 state = 0xb548f79b + __ dci(0x45682e70); // sqrshrnt z16.s, z19.d, #24 + // vl128 state = 0xd564dd2d + __ dci(0x45682260); // sqshrnb z0.s, z19.d, #24 + // vl128 state = 0x7b901f9b + __ dci(0x45682642); // sqshrnt z2.s, z18.d, #24 + // vl128 state = 0x1d4fe6f4 + __ dci(0x45680606); // sqshrunt z6.s, z16.d, #24 + // vl128 state = 0xe82d65a2 + __ dci(0x45680282); // sqshrunb z2.s, z20.d, #24 + // vl128 state = 0x8a1ae6f6 + __ dci(0x45680283); // sqshrunb z3.s, z20.d, #24 + // vl128 state = 0x5e345dcf + __ dci(0x4568238b); // sqshrnb z11.s, z28.d, #24 + // vl128 state = 0x31f54470 + __ dci(0x45682383); // sqshrnb z3.s, z28.d, #24 + // vl128 state = 0x6b48975d + __ dci(0x45682682); // sqshrnt z2.s, z20.d, #24 + // vl128 state = 0xa9fba153 + __ dci(0x45782e8a); // sqrshrnt z10.s, z20.d, #8 + // vl128 state = 0x0fe3100f + __ dci(0x45780eba); // sqrshrunt z26.s, z21.d, #8 + // vl128 state = 0x1a392151 + __ dci(0x45700e32); // sqrshrunt z18.s, z17.d, #16 + // vl128 state = 0x08cea935 + __ dci(0x45700e42); // sqrshrunt z2.s, z18.d, #16 + // vl128 state = 0x353f24b1 + __ dci(0x45782e52); // sqrshrnt z18.s, z18.d, #8 + // vl128 state = 0xe06219d0 + __ dci(0x45782e42); // sqrshrnt z2.s, z18.d, #8 + // vl128 state = 0xbb4c6d3b + __ dci(0x45742e46); // sqrshrnt z6.s, z18.d, #12 + // vl128 state = 0x77e7393c + __ dci(0x45642ec7); // sqrshrnt z7.s, z22.d, #28 + // vl128 state = 0x5201634c + __ dci(0x45642a97); // sqrshrnb z23.s, z20.d, #28 + // vl128 state = 0x49c32fc1 + __ dci(0x45640b87); // sqrshrunb z7.s, z28.d, #28 + // vl128 state = 0xdd09d56d + __ dci(0x45640f0f); // sqrshrunt z15.s, z24.d, #28 + // vl128 state = 0x50f7d144 + __ dci(0x45600e0e); // sqrshrunt z14.s, z16.d, #32 + // vl128 state = 0xd6bbd38a + __ dci(0x45620a0f); // sqrshrunb z15.s, z16.d, #30 + // vl128 state = 0x141e2991 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x141e2991, + 0x8cb951d0, + 0x74337526, + 0x515534c6, + 0xe3789189, + 0xfee7d505, + 0xfaae7ee8, + 0x71a110a3, + 0x6469dcda, + 0xe61425fc, + 0x6840f618, + 0xbc1b116d, + 0xaad97378, + 0x5d91b661, + 0x9eb84163, + 0xf8ca1e37, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_aba_long) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x45c2ca3e); // uabalb z30.d, z17.s, z2.s + // vl128 state = 0xac47a81c + __ dci(0x45caca7f); // uabalb z31.d, z19.s, z10.s + // vl128 state = 0x10cd4e69 + __ dci(0x455aca7e); // uabalb z30.h, z19.b, z26.b + // vl128 state = 0x8fba3755 + __ dci(0x45daca5f); // uabalb z31.d, z18.s, z26.s + // vl128 state = 0x8c18257c + __ dci(0x45d8ca1d); // uabalb z29.d, z16.s, z24.s + // vl128 state = 0xe6eef5ec + __ dci(0x45d8ce95); // uabalt z21.d, z20.s, z24.s + // vl128 state = 0x2368baee + __ dci(0x4598ce14); // uabalt z20.s, z16.h, z24.h + // vl128 state = 0xc9281174 + __ dci(0x4598ce04); // uabalt z4.s, z16.h, z24.h + // vl128 state = 0xa0b5fc24 + __ dci(0x45d8ce40); // uabalt z0.d, z18.s, z24.s + // vl128 state = 0xb3ef6f1d + __ dci(0x45daca44); // uabalb z4.d, z18.s, z26.s + // vl128 state = 0xcfa3666b + __ dci(0x45dace00); // uabalt z0.d, z16.s, z26.s + // vl128 state = 0x27bb4ba9 + __ dci(0x459ece04); // uabalt z4.s, z16.h, z30.h + // vl128 state = 0xb6628d3e + __ dci(0x458ece80); // uabalt z0.s, z20.h, z14.h + // vl128 state = 0xe8db526e + __ dci(0x458ec482); // sabalt z2.s, z4.h, z14.h + // vl128 state = 0x73cd8386 + __ dci(0x45cec4a3); // sabalt z3.d, z5.s, z14.s + // vl128 state = 0xba1c4507 + __ dci(0x45cec8a1); // uabalb z1.d, z5.s, z14.s + // vl128 state = 0x851cd798 + __ dci(0x458ec0a9); // sabalb z9.s, z5.h, z14.h + // vl128 state = 0xc85973b8 + __ dci(0x45c6c0ab); // sabalb z11.d, z5.s, z6.s + // vl128 state = 0x84072419 + __ dci(0x4544c0a9); // sabalb z9.h, z5.b, z4.b + // vl128 state = 0x533a377a + __ dci(0x4550c0a1); // sabalb z1.h, z5.b, z16.b + // vl128 state = 0x5a216f3a + __ dci(0x4550c0b1); // sabalb z17.h, z5.b, z16.b + // vl128 state = 0x9957b992 + __ dci(0x4552c095); // sabalb z21.h, z4.b, z18.b + // vl128 state = 0x666bd8db + __ dci(0x4543c094); // sabalb z20.h, z4.b, z3.b + // vl128 state = 0xd66d3d52 + __ dci(0x4543c095); // sabalb z21.h, z4.b, z3.b + // vl128 state = 0x5d47b643 + __ dci(0x4543c385); // sabalb z5.h, z28.b, z3.b + // vl128 state = 0x55fc0a65 + __ dci(0x4543c38d); // sabalb z13.h, z28.b, z3.b + // vl128 state = 0xbb5ccc0f + __ dci(0x45c3c19d); // sabalb z29.d, z12.s, z3.s + // vl128 state = 0xb3dedffd + __ dci(0x45d3c595); // sabalt z21.d, z12.s, z19.s + // vl128 state = 0xd80597a1 + __ dci(0x45d2c185); // sabalb z5.d, z12.s, z18.s + // vl128 state = 0x29a9fafc + __ dci(0x45d2c0b5); // sabalb z21.d, z5.s, z18.s + // vl128 state = 0x85dc16cb + __ dci(0x45d2c0bd); // sabalb z29.d, z5.s, z18.s + // vl128 state = 0xc38b621d + __ dci(0x45d2cab9); // uabalb z25.d, z21.s, z18.s + // vl128 state = 0x3801ad51 + __ dci(0x45d0ca9b); // uabalb z27.d, z20.s, z16.s + // vl128 state = 0xd5cc0a31 + __ dci(0x45d0ca39); // uabalb z25.d, z17.s, z16.s + // vl128 state = 0x272488a9 + __ dci(0x45d0ca3d); // uabalb z29.d, z17.s, z16.s + // vl128 state = 0xea109c4b + __ dci(0x4550ce3c); // uabalt z28.h, z17.b, z16.b + // vl128 state = 0x5a9bdb39 + __ dci(0x4559ce38); // uabalt z24.h, z17.b, z25.b + // vl128 state = 0xd90984c9 + __ dci(0x455bcf39); // uabalt z25.h, z25.b, z27.b + // vl128 state = 0x6c0884ed + __ dci(0x455bceb1); // uabalt z17.h, z21.b, z27.b + // vl128 state = 0x2f01a6ad + __ dci(0x455bceb3); // uabalt z19.h, z21.b, z27.b + // vl128 state = 0x72a428e1 + __ dci(0x455bceb1); // uabalt z17.h, z21.b, z27.b + // vl128 state = 0x27adcf54 + __ dci(0x4559ce21); // uabalt z1.h, z17.b, z25.b + // vl128 state = 0xf1899dea + __ dci(0x45d9ce05); // uabalt z5.d, z16.s, z25.s + // vl128 state = 0x41e92a5c + __ dci(0x45dbc604); // sabalt z4.d, z16.s, z27.s + // vl128 state = 0x96021962 + __ dci(0x45d3c634); // sabalt z20.d, z17.s, z19.s + // vl128 state = 0x4795c9e2 + __ dci(0x45dbc235); // sabalb z21.d, z17.s, z27.s + // vl128 state = 0x6e2eccdb + __ dci(0x45dbc07d); // sabalb z29.d, z3.s, z27.s + // vl128 state = 0x2c2e3625 + __ dci(0x459bc87c); // uabalb z28.s, z3.h, z27.h + // vl128 state = 0x618669ad + __ dci(0x459bc878); // uabalb z24.s, z3.h, z27.h + // vl128 state = 0x2d1a9a08 + __ dci(0x4593cc79); // uabalt z25.s, z3.h, z19.h + // vl128 state = 0xdb6575df + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xdb6575df, + 0x691c09fc, + 0x6d969d30, + 0x83db67a7, + 0x8ca1109d, + 0x5175b8ff, + 0xade3cb1b, + 0x1c7b0422, + 0x1199a415, + 0xd1c715e8, + 0x2053b361, + 0x577c4450, + 0x1557204a, + 0xe994b21a, + 0xec34be56, + 0x1c9e0136, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_add_sub_carry) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x4548d4a1); // adclt z1.d, z5.d, z8.d + // vl128 state = 0xde78ceb3 + __ dci(0x4588d4a5); // sbclt z5.s, z5.s, z8.s + // vl128 state = 0x35dc8534 + __ dci(0x4589d421); // sbclt z1.s, z1.s, z9.s + // vl128 state = 0xa72d158b + __ dci(0x45d9d423); // sbclt z3.d, z1.d, z25.d + // vl128 state = 0x197181b9 + __ dci(0x45dfd433); // sbclt z19.d, z1.d, z31.d + // vl128 state = 0xaad0d32d + __ dci(0x4597d437); // sbclt z23.s, z1.s, z23.s + // vl128 state = 0xb1c42b7d + __ dci(0x4597d436); // sbclt z22.s, z1.s, z23.s + // vl128 state = 0x6c51a28c + __ dci(0x4587d537); // sbclt z23.s, z9.s, z7.s + // vl128 state = 0x525b5cf8 + __ dci(0x4586d727); // sbclt z7.s, z25.s, z6.s + // vl128 state = 0x33942ff9 + __ dci(0x45c6d625); // sbclt z5.d, z17.d, z6.d + // vl128 state = 0x24de09b4 + __ dci(0x45c2d6b5); // sbclt z21.d, z21.d, z2.d + // vl128 state = 0xabc0063f + __ dci(0x4546d6b7); // adclt z23.d, z21.d, z6.d + // vl128 state = 0x52765e95 + __ dci(0x45c7d6a7); // sbclt z7.d, z21.d, z7.d + // vl128 state = 0x7045d250 + __ dci(0x4547d4a5); // adclt z5.d, z5.d, z7.d + // vl128 state = 0xb20f5c2a + __ dci(0x4517d4a1); // adclt z1.s, z5.s, z23.s + // vl128 state = 0x5c2c9c29 + __ dci(0x4507d5a5); // adclt z5.s, z13.s, z7.s + // vl128 state = 0x788b25f0 + __ dci(0x4507d5ad); // adclt z13.s, z13.s, z7.s + // vl128 state = 0xf27eff1e + __ dci(0x4507d0ac); // adclb z12.s, z5.s, z7.s + // vl128 state = 0xc0b629de + __ dci(0x450ed0ad); // adclb z13.s, z5.s, z14.s + // vl128 state = 0x3e15df94 + __ dci(0x458ad0a9); // sbclb z9.s, z5.s, z10.s + // vl128 state = 0x68f64c82 + __ dci(0x4582d2ad); // sbclb z13.s, z21.s, z2.s + // vl128 state = 0x882379e1 + __ dci(0x4502d3af); // adclb z15.s, z29.s, z2.s + // vl128 state = 0x6901994e + __ dci(0x450ad32b); // adclb z11.s, z25.s, z10.s + // vl128 state = 0xa67e9382 + __ dci(0x4582d329); // sbclb z9.s, z25.s, z2.s + // vl128 state = 0x9451d0c4 + __ dci(0x4592d22b); // sbclb z11.s, z17.s, z18.s + // vl128 state = 0xc19da52e + __ dci(0x459ad2a3); // sbclb z3.s, z21.s, z26.s + // vl128 state = 0x91065b69 + __ dci(0x451ad233); // adclb z19.s, z17.s, z26.s + // vl128 state = 0xe3fdc4a5 + __ dci(0x450bd232); // adclb z18.s, z17.s, z11.s + // vl128 state = 0x168abbff + __ dci(0x450ad2b6); // adclb z22.s, z21.s, z10.s + // vl128 state = 0x64d0c940 + __ dci(0x4582d2b4); // sbclb z20.s, z21.s, z2.s + // vl128 state = 0x37307824 + __ dci(0x4582d6e4); // sbclt z4.s, z23.s, z2.s + // vl128 state = 0xd35e02f7 + __ dci(0x4500d6f4); // adclt z20.s, z23.s, z0.s + // vl128 state = 0x017ed1b0 + __ dci(0x4501d2e4); // adclb z4.s, z23.s, z1.s + // vl128 state = 0x327242bc + __ dci(0x4501d1f4); // adclb z20.s, z15.s, z1.s + // vl128 state = 0x208174e8 + __ dci(0x4503d1b0); // adclb z16.s, z13.s, z3.s + // vl128 state = 0xa5a9f61d + __ dci(0x4501d198); // adclb z24.s, z12.s, z1.s + // vl128 state = 0x97e22c2b + __ dci(0x4501d3da); // adclb z26.s, z30.s, z1.s + // vl128 state = 0xd3ac35d5 + __ dci(0x4501d6de); // adclt z30.s, z22.s, z1.s + // vl128 state = 0xab835df9 + __ dci(0x4503d2dc); // adclb z28.s, z22.s, z3.s + // vl128 state = 0xa048599b + __ dci(0x4502d6d8); // adclt z24.s, z22.s, z2.s + // vl128 state = 0x4c245fee + __ dci(0x4502d6d0); // adclt z16.s, z22.s, z2.s + // vl128 state = 0x0222f3cc + __ dci(0x4502d280); // adclb z0.s, z20.s, z2.s + // vl128 state = 0x16bd7f6a + __ dci(0x458ad284); // sbclb z4.s, z20.s, z10.s + // vl128 state = 0x7ef7d0a2 + __ dci(0x458ad6d4); // sbclt z20.s, z22.s, z10.s + // vl128 state = 0x303d8262 + __ dci(0x458ad6dc); // sbclt z28.s, z22.s, z10.s + // vl128 state = 0x86b8b0e9 + __ dci(0x458bd7cc); // sbclt z12.s, z30.s, z11.s + // vl128 state = 0x068cc5cd + __ dci(0x45dbd7ce); // sbclt z14.d, z30.d, z27.d + // vl128 state = 0x30acfa7f + __ dci(0x45dfd75e); // sbclt z30.d, z26.d, z31.d + // vl128 state = 0xdbd8b32a + __ dci(0x45ddd7ce); // sbclt z14.d, z30.d, z29.d + // vl128 state = 0x59c3c1a9 + __ dci(0x45ddd7cf); // sbclt z15.d, z30.d, z29.d + // vl128 state = 0x5c953a50 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x5c953a50, + 0x22fea196, + 0x084c11a8, + 0x6e7e24d1, + 0x70965ff7, + 0x8c7cb797, + 0xdb846b66, + 0x512f049d, + 0x5c45d25c, + 0xa349606f, + 0x68a853e5, + 0xd92fbeff, + 0x52e59a6b, + 0xf77ee8ce, + 0x6c79623b, + 0x7efed6cc, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_add_sub_high) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x45fd7464); // subhnt z4.s, z3.d, z29.d + // vl128 state = 0x0eea0f4a + __ dci(0x45fc7c66); // rsubhnt z6.s, z3.d, z28.d + // vl128 state = 0x4dc0d938 + __ dci(0x45fc7c6e); // rsubhnt z14.s, z3.d, z28.d + // vl128 state = 0x33de615e + __ dci(0x45f46c7e); // raddhnt z30.s, z3.d, z20.d + // vl128 state = 0xa24af7ae + __ dci(0x45f06e7c); // raddhnt z28.s, z19.d, z16.d + // vl128 state = 0x13883aa2 + __ dci(0x45b06a6c); // raddhnb z12.h, z19.s, z16.s + // vl128 state = 0x5bf75f05 + __ dci(0x45b96a64); // raddhnb z4.h, z19.s, z25.s + // vl128 state = 0x0e489878 + __ dci(0x45b96820); // raddhnb z0.h, z1.s, z25.s + // vl128 state = 0x86df8f5f + __ dci(0x45b96a01); // raddhnb z1.h, z16.s, z25.s + // vl128 state = 0x0d1563f2 + __ dci(0x45b96900); // raddhnb z0.h, z8.s, z25.s + // vl128 state = 0xd66de87e + __ dci(0x45a97904); // rsubhnb z4.h, z8.s, z9.s + // vl128 state = 0x0c34bd33 + __ dci(0x45a9790c); // rsubhnb z12.h, z8.s, z9.s + // vl128 state = 0x7892f2c5 + __ dci(0x45e97988); // rsubhnb z8.s, z12.d, z9.d + // vl128 state = 0x9709efbd + __ dci(0x45f97909); // rsubhnb z9.s, z8.d, z25.d + // vl128 state = 0x029a3116 + __ dci(0x45ff790d); // rsubhnb z13.s, z8.d, z31.d + // vl128 state = 0x48cf21c1 + __ dci(0x45ff6d05); // raddhnt z5.s, z8.d, z31.d + // vl128 state = 0x44c94a11 + __ dci(0x45ff6dc1); // raddhnt z1.s, z14.d, z31.d + // vl128 state = 0x12fab619 + __ dci(0x45ff79d1); // rsubhnb z17.s, z14.d, z31.d + // vl128 state = 0x6f749933 + __ dci(0x457f7dd0); // rsubhnt z16.b, z14.h, z31.h + // vl128 state = 0x404889de + __ dci(0x457f75f1); // subhnt z17.b, z15.h, z31.h + // vl128 state = 0x1dae2a16 + __ dci(0x457f75f3); // subhnt z19.b, z15.h, z31.h + // vl128 state = 0xc441a9f0 + __ dci(0x456d75fb); // subhnt z27.b, z15.h, z13.h + // vl128 state = 0xdd79f567 + __ dci(0x45ed7dff); // rsubhnt z31.s, z15.d, z13.d + // vl128 state = 0x49b27a1f + __ dci(0x45e17dfe); // rsubhnt z30.s, z15.d, z1.d + // vl128 state = 0x19cddb35 + __ dci(0x45e17df6); // rsubhnt z22.s, z15.d, z1.d + // vl128 state = 0xea722faa + __ dci(0x45e37d72); // rsubhnt z18.s, z11.d, z3.d + // vl128 state = 0x907267b3 + __ dci(0x45737d62); // rsubhnt z2.b, z11.h, z19.h + // vl128 state = 0x1e5409d8 + __ dci(0x45726d6a); // raddhnt z10.b, z11.h, z18.h + // vl128 state = 0xce3b87ca + __ dci(0x45726f5a); // raddhnt z26.b, z26.h, z18.h + // vl128 state = 0x2f330789 + __ dci(0x45706f18); // raddhnt z24.b, z24.h, z16.h + // vl128 state = 0xff09606a + __ dci(0x45706f08); // raddhnt z8.b, z24.h, z16.h + // vl128 state = 0x062ac37b + __ dci(0x45706f09); // raddhnt z9.b, z24.h, z16.h + // vl128 state = 0xb12c9142 + __ dci(0x45786b08); // raddhnb z8.b, z24.h, z24.h + // vl128 state = 0x77e41545 + __ dci(0x45786b0c); // raddhnb z12.b, z24.h, z24.h + // vl128 state = 0x1f3a202d + __ dci(0x457a6308); // addhnb z8.b, z24.h, z26.h + // vl128 state = 0xea51f4b9 + __ dci(0x45fb6318); // addhnb z24.s, z24.d, z27.d + // vl128 state = 0x5b98747e + __ dci(0x45b96319); // addhnb z25.h, z24.s, z25.s + // vl128 state = 0xdcebf700 + __ dci(0x45bb621d); // addhnb z29.h, z16.s, z27.s + // vl128 state = 0x55a216b1 + __ dci(0x45b3625f); // addhnb z31.h, z18.s, z19.s + // vl128 state = 0x3e86d641 + __ dci(0x45b3631b); // addhnb z27.h, z24.s, z19.s + // vl128 state = 0x36d052e3 + __ dci(0x45bb6213); // addhnb z19.h, z16.s, z27.s + // vl128 state = 0xba012cb8 + __ dci(0x45bf7217); // subhnb z23.h, z16.s, z31.s + // vl128 state = 0xdef826a7 + __ dci(0x45b67213); // subhnb z19.h, z16.s, z22.s + // vl128 state = 0x5cd11781 + __ dci(0x45b66223); // addhnb z3.h, z17.s, z22.s + // vl128 state = 0x2f04c440 + __ dci(0x45f66a27); // raddhnb z7.s, z17.d, z22.d + // vl128 state = 0x486d0d03 + __ dci(0x45f76825); // raddhnb z5.s, z1.d, z23.d + // vl128 state = 0x8a94d5c9 + __ dci(0x45f668a1); // raddhnb z1.s, z5.d, z22.d + // vl128 state = 0x14e8e0e7 + __ dci(0x45f469b1); // raddhnb z17.s, z13.d, z20.d + // vl128 state = 0x19b96fb3 + __ dci(0x45f469b3); // raddhnb z19.s, z13.d, z20.d + // vl128 state = 0xc98e7d4e + __ dci(0x45f169b7); // raddhnb z23.s, z13.d, z17.d + // vl128 state = 0x7ff24d47 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x7ff24d47, + 0xc639a9b3, + 0x0a1df4a5, + 0x30db6e18, + 0xf3e2f795, + 0x36ff477d, + 0x162f1ca5, + 0x36da990b, + 0x110b2c35, + 0xaf1580f5, + 0x14e39873, + 0x7f5eb52c, + 0x2ececb6f, + 0x4e4d71f0, + 0x800769d1, + 0x1bcbe3a3, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_complex_addition) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x4500dc43); // cadd z3.b, z3.b, z2.b, #270 + // vl128 state = 0x998365c2 + __ dci(0x4540dc13); // cadd z19.h, z19.h, z0.h, #270 + // vl128 state = 0xcc866131 + __ dci(0x4541d81b); // sqcadd z27.h, z27.h, z0.h, #90 + // vl128 state = 0x2ae23a6a + __ dci(0x45c1d853); // sqcadd z19.d, z19.d, z2.d, #90 + // vl128 state = 0x1f8de2d3 + __ dci(0x4541d8c3); // sqcadd z3.h, z3.h, z6.h, #90 + // vl128 state = 0x3655c07c + __ dci(0x4541d8d3); // sqcadd z19.h, z19.h, z6.h, #90 + // vl128 state = 0x3a8fe2d9 + __ dci(0x4541d811); // sqcadd z17.h, z17.h, z0.h, #90 + // vl128 state = 0x003c88ea + __ dci(0x4540da10); // cadd z16.h, z16.h, z16.h, #90 + // vl128 state = 0xe20c1375 + __ dci(0x4540da18); // cadd z24.h, z24.h, z16.h, #90 + // vl128 state = 0x67bb0270 + __ dci(0x4540de5a); // cadd z26.h, z26.h, z18.h, #270 + // vl128 state = 0x7abb4f8f + __ dci(0x4540de4a); // cadd z10.h, z10.h, z18.h, #270 + // vl128 state = 0x42850f11 + __ dci(0x4500decb); // cadd z11.b, z11.b, z22.b, #270 + // vl128 state = 0xda605f59 + __ dci(0x4500da83); // cadd z3.b, z3.b, z20.b, #90 + // vl128 state = 0x99e63476 + __ dci(0x4500dc8b); // cadd z11.b, z11.b, z4.b, #270 + // vl128 state = 0xd444a939 + __ dci(0x4500dc8f); // cadd z15.b, z15.b, z4.b, #270 + // vl128 state = 0xde3ad968 + __ dci(0x4500d99f); // cadd z31.b, z31.b, z12.b, #90 + // vl128 state = 0xd7cdb177 + __ dci(0x4540d91e); // cadd z30.h, z30.h, z8.h, #90 + // vl128 state = 0x74575b36 + __ dci(0x4541d81a); // sqcadd z26.h, z26.h, z0.h, #90 + // vl128 state = 0x3d347b0b + __ dci(0x4501d83b); // sqcadd z27.b, z27.b, z1.b, #90 + // vl128 state = 0x03df7859 + __ dci(0x45c1d83f); // sqcadd z31.d, z31.d, z1.d, #90 + // vl128 state = 0xf0cdbf68 + __ dci(0x45c1d83e); // sqcadd z30.d, z30.d, z1.d, #90 + // vl128 state = 0x0931dda4 + __ dci(0x45c1d83c); // sqcadd z28.d, z28.d, z1.d, #90 + // vl128 state = 0x460b5369 + __ dci(0x4581da3e); // sqcadd z30.s, z30.s, z17.s, #90 + // vl128 state = 0x71af9203 + __ dci(0x45c1d83f); // sqcadd z31.d, z31.d, z1.d, #90 + // vl128 state = 0xd6babc53 + __ dci(0x4581da3e); // sqcadd z30.s, z30.s, z17.s, #90 + // vl128 state = 0xd3e4f42f + __ dci(0x4501d83f); // sqcadd z31.b, z31.b, z1.b, #90 + // vl128 state = 0x7a594239 + __ dci(0x4501dcbb); // sqcadd z27.b, z27.b, z5.b, #270 + // vl128 state = 0x24a5a8c9 + __ dci(0x4501dfba); // sqcadd z26.b, z26.b, z29.b, #270 + // vl128 state = 0x0c3df842 + __ dci(0x4581dfea); // sqcadd z10.s, z10.s, z31.s, #270 + // vl128 state = 0x6173c97f + __ dci(0x4581db7a); // sqcadd z26.s, z26.s, z27.s, #90 + // vl128 state = 0x55090d5f + __ dci(0x4581db1b); // sqcadd z27.s, z27.s, z24.s, #90 + // vl128 state = 0x63477385 + __ dci(0x4581da93); // sqcadd z19.s, z19.s, z20.s, #90 + // vl128 state = 0xc996545e + __ dci(0x45c1db92); // sqcadd z18.d, z18.d, z28.d, #90 + // vl128 state = 0xa48bf827 + __ dci(0x45c1db93); // sqcadd z19.d, z19.d, z28.d, #90 + // vl128 state = 0xf5a3b641 + __ dci(0x45c1daa3); // sqcadd z3.d, z3.d, z21.d, #90 + // vl128 state = 0x20ad4c28 + __ dci(0x4581dba7); // sqcadd z7.s, z7.s, z29.s, #90 + // vl128 state = 0xc9e36e96 + __ dci(0x45c1daaf); // sqcadd z15.d, z15.d, z21.d, #90 + // vl128 state = 0x6eb23fd2 + __ dci(0x45c1daae); // sqcadd z14.d, z14.d, z21.d, #90 + // vl128 state = 0x585d4d63 + __ dci(0x4541dae6); // sqcadd z6.h, z6.h, z23.h, #90 + // vl128 state = 0x827cc0a8 + __ dci(0x4541daee); // sqcadd z14.h, z14.h, z23.h, #90 + // vl128 state = 0xe00543a0 + __ dci(0x4501dabe); // sqcadd z30.b, z30.b, z21.b, #90 + // vl128 state = 0x2313db47 + __ dci(0x4501deff); // sqcadd z31.b, z31.b, z23.b, #270 + // vl128 state = 0xe30d4e83 + __ dci(0x4501defd); // sqcadd z29.b, z29.b, z23.b, #270 + // vl128 state = 0xb95d6d94 + __ dci(0x4501def5); // sqcadd z21.b, z21.b, z23.b, #270 + // vl128 state = 0x4f18b02e + __ dci(0x4501def4); // sqcadd z20.b, z20.b, z23.b, #270 + // vl128 state = 0x20ae9a78 + __ dci(0x4501dee4); // sqcadd z4.b, z4.b, z23.b, #270 + // vl128 state = 0x4eef87a9 + __ dci(0x4501dee6); // sqcadd z6.b, z6.b, z23.b, #270 + // vl128 state = 0x1b041a7b + __ dci(0x4501dfc2); // sqcadd z2.b, z2.b, z30.b, #270 + // vl128 state = 0xeaf5e18f + __ dci(0x4500df92); // cadd z18.b, z18.b, z28.b, #270 + // vl128 state = 0xc47ee5e7 + __ dci(0x4500de13); // cadd z19.b, z19.b, z16.b, #270 + // vl128 state = 0x6482d75c + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x6482d75c, + 0x48d9bd2f, + 0xd6bd52ae, + 0x56be94f0, + 0x620cfb69, + 0xb646e0fe, + 0x6034718f, + 0xd8187657, + 0x211218bb, + 0xc973a707, + 0x6020dcc9, + 0x8fadad0c, + 0x0132ecbc, + 0x3a07eb63, + 0x5c20eb82, + 0xc92d6cb2, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_bit_permute) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kSVEBitPerm, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x455fbb1a); // bgrp z26.h, z24.h, z31.h + // vl128 state = 0x39fb8e5b + __ dci(0x451fbb58); // bgrp z24.b, z26.b, z31.b + // vl128 state = 0x7fbccdbd + __ dci(0x4517bb19); // bgrp z25.b, z24.b, z23.b + // vl128 state = 0x67caf176 + __ dci(0x4517bb18); // bgrp z24.b, z24.b, z23.b + // vl128 state = 0x665fd977 + __ dci(0x4517ba5c); // bgrp z28.b, z18.b, z23.b + // vl128 state = 0x0f2c1473 + __ dci(0x4517ba38); // bgrp z24.b, z17.b, z23.b + // vl128 state = 0x253789a0 + __ dci(0x4517ba3c); // bgrp z28.b, z17.b, z23.b + // vl128 state = 0xd3b26fd2 + __ dci(0x4515ba6c); // bgrp z12.b, z19.b, z21.b + // vl128 state = 0x4bad6941 + __ dci(0x4515bac4); // bgrp z4.b, z22.b, z21.b + // vl128 state = 0x7c70d2d2 + __ dci(0x4517ba86); // bgrp z6.b, z20.b, z23.b + // vl128 state = 0x5794816b + __ dci(0x4517ba87); // bgrp z7.b, z20.b, z23.b + // vl128 state = 0xe67993b1 + __ dci(0x4515b297); // bext z23.b, z20.b, z21.b + // vl128 state = 0x3041b7ee + __ dci(0x4517b396); // bext z22.b, z28.b, z23.b + // vl128 state = 0xb571d524 + __ dci(0x451bb386); // bext z6.b, z28.b, z27.b + // vl128 state = 0x73ce1823 + __ dci(0x4513b784); // bdep z4.b, z28.b, z19.b + // vl128 state = 0x4264f0f2 + __ dci(0x4593b7ac); // bdep z12.s, z29.s, z19.s + // vl128 state = 0xf9cb9d26 + __ dci(0x4593b7a8); // bdep z8.s, z29.s, z19.s + // vl128 state = 0xa2b310a0 + __ dci(0x4597b780); // bdep z0.s, z28.s, z23.s + // vl128 state = 0xee25c82f + __ dci(0x4597b781); // bdep z1.s, z28.s, z23.s + // vl128 state = 0xdca7577f + __ dci(0x4597b7e3); // bdep z3.s, z31.s, z23.s + // vl128 state = 0x32294429 + __ dci(0x45dfb7e1); // bdep z1.d, z31.d, z31.d + // vl128 state = 0xc147e511 + __ dci(0x455db7e5); // bdep z5.h, z31.h, z29.h + // vl128 state = 0x7a51d422 + __ dci(0x45d5b7e4); // bdep z4.d, z31.d, z21.d + // vl128 state = 0x512ad92a + __ dci(0x45c7b7ec); // bdep z12.d, z31.d, z7.d + // vl128 state = 0xe59fbf5c + __ dci(0x4547b7a8); // bdep z8.h, z29.h, z7.h + // vl128 state = 0xb85fd3b1 + __ dci(0x454fb72c); // bdep z12.h, z25.h, z15.h + // vl128 state = 0xc820e9d0 + __ dci(0x4557b724); // bdep z4.h, z25.h, z23.h + // vl128 state = 0x814ff3f4 + __ dci(0x4557bb20); // bgrp z0.h, z25.h, z23.h + // vl128 state = 0xc58dee50 + __ dci(0x4556b321); // bext z1.h, z25.h, z22.h + // vl128 state = 0xf19c0956 + __ dci(0x4556b3e3); // bext z3.h, z31.h, z22.h + // vl128 state = 0x2a256808 + __ dci(0x4546b367); // bext z7.h, z27.h, z6.h + // vl128 state = 0x1c6696f4 + __ dci(0x4556bb66); // bgrp z6.h, z27.h, z22.h + // vl128 state = 0x32522ca2 + __ dci(0x4556bb76); // bgrp z22.h, z27.h, z22.h + // vl128 state = 0x33fe6590 + __ dci(0x45c6bb66); // bgrp z6.d, z27.d, z6.d + // vl128 state = 0x45d26723 + __ dci(0x45c2b976); // bgrp z22.d, z11.d, z2.d + // vl128 state = 0x364d9885 + __ dci(0x4540b974); // bgrp z20.h, z11.h, z0.h + // vl128 state = 0x36a0bd94 + __ dci(0x45c0b164); // bext z4.d, z11.d, z0.d + // vl128 state = 0x4ee9a90c + __ dci(0x45ccb16c); // bext z12.d, z11.d, z12.d + // vl128 state = 0x30c32d69 + __ dci(0x458cb368); // bext z8.s, z27.s, z12.s + // vl128 state = 0xfc2c912f + __ dci(0x450cb769); // bdep z9.b, z27.b, z12.b + // vl128 state = 0xef976b44 + __ dci(0x458cb7eb); // bdep z11.s, z31.s, z12.s + // vl128 state = 0x6f9e21b8 + __ dci(0x4588b5ef); // bdep z15.s, z15.s, z8.s + // vl128 state = 0xa1f212e2 + __ dci(0x4598b5ad); // bdep z13.s, z13.s, z24.s + // vl128 state = 0xe4286a40 + __ dci(0x4598b5af); // bdep z15.s, z13.s, z24.s + // vl128 state = 0x7d6622e5 + __ dci(0x4598b6ad); // bdep z13.s, z21.s, z24.s + // vl128 state = 0xcd00829c + __ dci(0x4518b2af); // bext z15.b, z21.b, z24.b + // vl128 state = 0xa8d58b2d + __ dci(0x4519b2e7); // bext z7.b, z23.b, z25.b + // vl128 state = 0x2b7b7c44 + __ dci(0x4518b2a6); // bext z6.b, z21.b, z24.b + // vl128 state = 0x09c81b7e + __ dci(0x4518b2a7); // bext z7.b, z21.b, z24.b + // vl128 state = 0xab1b2b22 + __ dci(0x4519b6a5); // bdep z5.b, z21.b, z25.b + // vl128 state = 0x03476e4c + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x03476e4c, + 0xcc54e76f, + 0x08324d66, + 0xcc289ee1, + 0xacd3ba43, + 0xe961aeda, + 0x60a204b1, + 0xde020904, + 0x0652d1e5, + 0x7982dc25, + 0x02a2c1cb, + 0x4dd9e71b, + 0xb57f587f, + 0xb75e0d62, + 0x78330809, + 0xbc7046ae, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_smullb_smullt_umullb_umullt_vector) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x455a7bc2); // umullb z2.h, z30.b, z26.b + // vl128 state = 0xe2a2b611 + __ dci(0x454a7b92); // umullb z18.h, z28.b, z10.b + // vl128 state = 0x12b3b0c6 + __ dci(0x45427bda); // umullb z26.h, z30.b, z2.b + // vl128 state = 0x74f4a891 + __ dci(0x45c67bde); // umullb z30.d, z30.s, z6.s + // vl128 state = 0x20402d9f + __ dci(0x45467b56); // umullb z22.h, z26.b, z6.b + // vl128 state = 0x75e15413 + __ dci(0x45427f54); // umullt z20.h, z26.b, z2.b + // vl128 state = 0x51478ee1 + __ dci(0x45427fe4); // umullt z4.h, z31.b, z2.b + // vl128 state = 0x63381b63 + __ dci(0x45567fe5); // umullt z5.h, z31.b, z22.b + // vl128 state = 0x0967f882 + __ dci(0x45467df5); // umullt z21.h, z15.b, z6.b + // vl128 state = 0x753e96b9 + __ dci(0x454279f1); // umullb z17.h, z15.b, z2.b + // vl128 state = 0xcff906e6 + __ dci(0x454078f5); // umullb z21.h, z7.b, z0.b + // vl128 state = 0x5609bd14 + __ dci(0x454070d4); // smullb z20.h, z6.b, z0.b + // vl128 state = 0xf284d300 + __ dci(0x45407016); // smullb z22.h, z0.b, z0.b + // vl128 state = 0xbb549bf7 + __ dci(0x45487086); // smullb z6.h, z4.b, z8.b + // vl128 state = 0x6ef99ff1 + __ dci(0x454070c7); // smullb z7.h, z6.b, z0.b + // vl128 state = 0x90177a84 + __ dci(0x45407846); // umullb z6.h, z2.b, z0.b + // vl128 state = 0xd3dbb2fe + __ dci(0x45417a56); // umullb z22.h, z18.b, z1.b + // vl128 state = 0x7d30cf73 + __ dci(0x45417877); // umullb z23.h, z3.b, z1.b + // vl128 state = 0x0623e678 + __ dci(0x45417807); // umullb z7.h, z0.b, z1.b + // vl128 state = 0xe849cf35 + __ dci(0x454178a3); // umullb z3.h, z5.b, z1.b + // vl128 state = 0xcad236a9 + __ dci(0x45437cab); // umullt z11.h, z5.b, z3.b + // vl128 state = 0xc8dfcb1d + __ dci(0x454b7c3b); // umullt z27.h, z1.b, z11.b + // vl128 state = 0x6136e2d6 + __ dci(0x454b7a3a); // umullb z26.h, z17.b, z11.b + // vl128 state = 0x091beb5a + __ dci(0x454b72b2); // smullb z18.h, z21.b, z11.b + // vl128 state = 0x932b30ec + __ dci(0x454b7622); // smullt z2.h, z17.b, z11.b + // vl128 state = 0xee51239c + __ dci(0x454b76ea); // smullt z10.h, z23.b, z11.b + // vl128 state = 0xf4fcc577 + __ dci(0x454b74ab); // smullt z11.h, z5.b, z11.b + // vl128 state = 0xcf0c8028 + __ dci(0x454d74bb); // smullt z27.h, z5.b, z13.b + // vl128 state = 0x0f8523c8 + __ dci(0x454d740b); // smullt z11.h, z0.b, z13.b + // vl128 state = 0xc02b2f52 + __ dci(0x454d7403); // smullt z3.h, z0.b, z13.b + // vl128 state = 0x11b4180c + __ dci(0x45557413); // smullt z19.h, z0.b, z21.b + // vl128 state = 0x26eef57a + __ dci(0x45557531); // smullt z17.h, z9.b, z21.b + // vl128 state = 0x6f3fce98 + __ dci(0x455574b9); // smullt z25.h, z5.b, z21.b + // vl128 state = 0x0d4ac272 + __ dci(0x455571b1); // smullb z17.h, z13.b, z21.b + // vl128 state = 0x7c866a41 + __ dci(0x455573e1); // smullb z1.h, z31.b, z21.b + // vl128 state = 0x9c724758 + __ dci(0x455473c9); // smullb z9.h, z30.b, z20.b + // vl128 state = 0xa9a8d0aa + __ dci(0x455473cb); // smullb z11.h, z30.b, z20.b + // vl128 state = 0xd7eec117 + __ dci(0x455473a9); // smullb z9.h, z29.b, z20.b + // vl128 state = 0x35caaa62 + __ dci(0x455473a8); // smullb z8.h, z29.b, z20.b + // vl128 state = 0x97a1d399 + __ dci(0x455473b8); // smullb z24.h, z29.b, z20.b + // vl128 state = 0x3adce4ee + __ dci(0x455673fa); // smullb z26.h, z31.b, z22.b + // vl128 state = 0xd17120ea + __ dci(0x455e77ea); // smullt z10.h, z31.b, z30.b + // vl128 state = 0x1e238a9e + __ dci(0x455677da); // smullt z26.h, z30.b, z22.b + // vl128 state = 0xfbccf6c2 + __ dci(0x454673d8); // smullb z24.h, z30.b, z6.b + // vl128 state = 0xa47583be + __ dci(0x45c67359); // smullb z25.d, z26.s, z6.s + // vl128 state = 0x4e8a9b37 + __ dci(0x45c47751); // smullt z17.d, z26.s, z4.s + // vl128 state = 0xe3c06571 + __ dci(0x45d67741); // smullt z1.d, z26.s, z22.s + // vl128 state = 0x6629e034 + __ dci(0x45d67b45); // umullb z5.d, z26.s, z22.s + // vl128 state = 0x66a99e85 + __ dci(0x45867b47); // umullb z7.s, z26.h, z6.h + // vl128 state = 0xf1cc3339 + __ dci(0x45867b45); // umullb z5.s, z26.h, z6.h + // vl128 state = 0x8bf658d7 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x8bf658d7, + 0x82fac555, + 0x07c3d434, + 0x25d2ee2b, + 0xe70f4394, + 0x79223404, + 0x368ed35f, + 0x6565d842, + 0xead08c30, + 0xae35e083, + 0xe1959b85, + 0x94ad31e7, + 0x9caeda4d, + 0x7611d6dc, + 0x22977911, + 0xcf3754ec, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_sqdmullb_sqdmullt_pmullb_pmullb_vector) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x45936164); // sqdmullb z4.s, z11.h, z19.h + // vl128 state = 0xacc89592 + __ dci(0x459161f4); // sqdmullb z20.s, z15.h, z17.h + // vl128 state = 0x142c66e5 + __ dci(0x459563f5); // sqdmullb z21.s, z31.h, z21.h + // vl128 state = 0x5cfcb839 + __ dci(0x45956265); // sqdmullb z5.s, z19.h, z21.h + // vl128 state = 0x33616223 + __ dci(0x45d56235); // sqdmullb z21.d, z17.s, z21.s + // vl128 state = 0x987a4a0d + __ dci(0x45556031); // sqdmullb z17.h, z1.b, z21.b + // vl128 state = 0xf7dd9b01 + __ dci(0x45506035); // sqdmullb z21.h, z1.b, z16.b + // vl128 state = 0x6fa54cf3 + __ dci(0x45506334); // sqdmullb z20.h, z25.b, z16.b + // vl128 state = 0x04398c6e + __ dci(0x45486336); // sqdmullb z22.h, z25.b, z8.b + // vl128 state = 0x4cda753c + __ dci(0x45486334); // sqdmullb z20.h, z25.b, z8.b + // vl128 state = 0x53993d4a + __ dci(0x45496b35); // pmullb z21.h, z25.b, z9.b + // vl128 state = 0xa591f97c + __ dci(0x45496b37); // pmullb z23.h, z25.b, z9.b + // vl128 state = 0x5cb91e99 + __ dci(0x45496fb3); // pmullt z19.h, z29.b, z9.b + // vl128 state = 0x5031ac4d + __ dci(0x45596f3b); // pmullt z27.h, z25.b, z25.b + // vl128 state = 0xb0a76e75 + __ dci(0x455d6f13); // pmullt z19.h, z24.b, z29.b + // vl128 state = 0xe84ca196 + __ dci(0x455d6fb2); // pmullt z18.h, z29.b, z29.b + // vl128 state = 0xd294ce54 + __ dci(0x455c6bb0); // pmullb z16.h, z29.b, z28.b + // vl128 state = 0x90f01471 + __ dci(0x45546bf8); // pmullb z24.h, z31.b, z20.b + // vl128 state = 0xd15f23fa + __ dci(0x45546bf9); // pmullb z25.h, z31.b, z20.b + // vl128 state = 0x62ca83ea + __ dci(0x45546bfb); // pmullb z27.h, z31.b, z20.b + // vl128 state = 0xf786c1e4 + __ dci(0x454469eb); // pmullb z11.h, z15.b, z4.b + // vl128 state = 0x3cc8c789 + __ dci(0x455069fb); // pmullb z27.h, z15.b, z16.b + // vl128 state = 0xb14709ca + __ dci(0x45546dfa); // pmullt z26.h, z15.b, z20.b + // vl128 state = 0x38257820 + __ dci(0x45546df8); // pmullt z24.h, z15.b, z20.b + // vl128 state = 0x9cc5cd3a + __ dci(0x45576dfc); // pmullt z28.h, z15.b, z23.b + // vl128 state = 0x704543ec + __ dci(0x45d76d6c); // pmullt z12.d, z11.s, z23.s + // vl128 state = 0x15ec8e77 + __ dci(0x455f6d68); // pmullt z8.h, z11.b, z31.b + // vl128 state = 0xfa379a67 + __ dci(0x45596d6a); // pmullt z10.h, z11.b, z25.b + // vl128 state = 0x27fcfa49 + __ dci(0x45596d7a); // pmullt z26.h, z11.b, z25.b + // vl128 state = 0x13883ef0 + __ dci(0x45596532); // sqdmullt z18.h, z9.b, z25.b + // vl128 state = 0x667f8699 + __ dci(0x45596536); // sqdmullt z22.h, z9.b, z25.b + // vl128 state = 0x477ded37 + __ dci(0x45d16537); // sqdmullt z23.d, z9.s, z17.s + // vl128 state = 0x3323eb48 + __ dci(0x45c16515); // sqdmullt z21.d, z8.s, z1.s + // vl128 state = 0x3f581e83 + __ dci(0x45456517); // sqdmullt z23.h, z8.b, z5.b + // vl128 state = 0xd844e48b + __ dci(0x45556555); // sqdmullt z21.h, z10.b, z21.b + // vl128 state = 0x95e6094e + __ dci(0x45c56554); // sqdmullt z20.d, z10.s, z5.s + // vl128 state = 0x198a6f75 + __ dci(0x45cd6456); // sqdmullt z22.d, z2.s, z13.s + // vl128 state = 0x4d6b7178 + __ dci(0x45c96406); // sqdmullt z6.d, z0.s, z9.s + // vl128 state = 0xd989cd0f + __ dci(0x45d96482); // sqdmullt z2.d, z4.s, z25.s + // vl128 state = 0xa80fdf92 + __ dci(0x45dd6406); // sqdmullt z6.d, z0.s, z29.s + // vl128 state = 0x9876a20d + __ dci(0x45596404); // sqdmullt z4.h, z0.b, z25.b + // vl128 state = 0x5ad5787c + __ dci(0x454b6414); // sqdmullt z20.h, z0.b, z11.b + // vl128 state = 0x86c077d7 + __ dci(0x454a601c); // sqdmullb z28.h, z0.b, z10.b + // vl128 state = 0xfe867841 + __ dci(0x4542641d); // sqdmullt z29.h, z0.b, z2.b + // vl128 state = 0x7bf363f1 + __ dci(0x4552643c); // sqdmullt z28.h, z1.b, z18.b + // vl128 state = 0x7cf26ed3 + __ dci(0x4552673d); // sqdmullt z29.h, z25.b, z18.b + // vl128 state = 0x748f1a99 + __ dci(0x45d6673f); // sqdmullt z31.d, z25.s, z22.s + // vl128 state = 0xbb15fd07 + __ dci(0x45d2633d); // sqdmullb z29.d, z25.s, z18.s + // vl128 state = 0x28e0985a + __ dci(0x455a6339); // sqdmullb z25.h, z25.b, z26.b + // vl128 state = 0x9c0da0fd + __ dci(0x45526738); // sqdmullt z24.h, z25.b, z18.b + // vl128 state = 0xa970ebb8 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xa970ebb8, + 0xc665eff5, + 0x8cc21595, + 0x0ea984f6, + 0x1dbce326, + 0x0845e911, + 0xa6fb6cf4, + 0x8544239a, + 0x2412d23d, + 0xbce6f5e0, + 0x780ff264, + 0xcf6cf172, + 0xef93a3b4, + 0x94080541, + 0xa0aedeba, + 0x8e8bddaa, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_sqdmullt_sqdmullb_z_zzi) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 30 * kInstructionSize); + __ dci(0x44eae5a9); // sqdmullt z9.d, z13.s, z10.s[#0] + // vl128 state = 0x311dfe35 + __ dci(0x44eae9a1); // sqdmullb z1.d, z13.s, z10.s[#1] + // vl128 state = 0x559243c3 + __ dci(0x44eae9a5); // sqdmullb z5.d, z13.s, z10.s[#1] + // vl128 state = 0x44d6824c + __ dci(0x44e2edad); // sqdmullt z13.d, z13.s, z2.s[#1] + // vl128 state = 0xb5539592 + __ dci(0x44e6e9ac); // sqdmullb z12.d, z13.s, z6.s[#1] + // vl128 state = 0x5e66b9f8 + __ dci(0x44e4ebae); // sqdmullb z14.d, z29.s, z4.s[#1] + // vl128 state = 0x4347620a + __ dci(0x44e4ebaf); // sqdmullb z15.d, z29.s, z4.s[#1] + // vl128 state = 0xe7cfe898 + __ dci(0x44a5ebad); // sqdmullb z13.s, z29.h, z5.h[#1] + // vl128 state = 0x0ca455c7 + __ dci(0x44a5e9fd); // sqdmullb z29.s, z15.h, z5.h[#1] + // vl128 state = 0xcac072a9 + __ dci(0x44e5e8fc); // sqdmullb z28.d, z7.s, z5.s[#1] + // vl128 state = 0xe18e8c66 + __ dci(0x44ede9ec); // sqdmullb z12.d, z15.s, z13.s[#1] + // vl128 state = 0x32f642cb + __ dci(0x44ede9fc); // sqdmullb z28.d, z15.s, z13.s[#1] + // vl128 state = 0xa0467c8a + __ dci(0x44fce9f4); // sqdmullb z20.d, z15.s, z12.s[#3] + // vl128 state = 0x7ada4130 + __ dci(0x44e4e9f6); // sqdmullb z22.d, z15.s, z4.s[#1] + // vl128 state = 0xc87deb44 + __ dci(0x44f4e9d2); // sqdmullb z18.d, z14.s, z4.s[#3] + // vl128 state = 0x6dc052ca + __ dci(0x44f5e9e2); // sqdmullb z2.d, z15.s, z5.s[#3] + // vl128 state = 0xe05110d4 + __ dci(0x44f5ebb2); // sqdmullb z18.d, z29.s, z5.s[#3] + // vl128 state = 0x7ed21594 + __ dci(0x44b5efba); // sqdmullt z26.s, z29.h, z5.h[#5] + // vl128 state = 0x7d5dad40 + __ dci(0x44b5ef78); // sqdmullt z24.s, z27.h, z5.h[#5] + // vl128 state = 0x418f84bc + __ dci(0x44f5eb70); // sqdmullb z16.d, z27.s, z5.s[#3] + // vl128 state = 0x72d78d32 + __ dci(0x44e5ebf4); // sqdmullb z20.d, z31.s, z5.s[#1] + // vl128 state = 0x391fad35 + __ dci(0x44e5efbc); // sqdmullt z28.d, z29.s, z5.s[#1] + // vl128 state = 0xb2143633 + __ dci(0x44e1ebbd); // sqdmullb z29.d, z29.s, z1.s[#1] + // vl128 state = 0x468dac6e + __ dci(0x44f1ebed); // sqdmullb z13.d, z31.s, z1.s[#3] + // vl128 state = 0x9ab292bd + __ dci(0x44f5efe5); // sqdmullt z5.d, z31.s, z5.s[#3] + // vl128 state = 0x4f2bd5d1 + __ dci(0x44fdeee7); // sqdmullt z7.d, z23.s, z13.s[#3] + // vl128 state = 0x7a810779 + __ dci(0x44fdee25); // sqdmullt z5.d, z17.s, z13.s[#3] + // vl128 state = 0x05d23734 + __ dci(0x44f5ea27); // sqdmullb z7.d, z17.s, z5.s[#3] + // vl128 state = 0x878580f5 + __ dci(0x44f1e225); // sqdmullb z5.d, z17.s, z1.s[#2] + // vl128 state = 0x5fa56f94 + __ dci(0x44e1ea21); // sqdmullb z1.d, z17.s, z1.s[#1] + // vl128 state = 0x05f1cdf0 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x05f1cdf0, + 0x6b88d4f2, + 0x83bf279d, + 0x12f21868, + 0x6c68a5ce, + 0x5710343f, + 0xa4d0d0ee, + 0x335b20c5, + 0x0dd491c5, + 0x98966292, + 0xb68cdacd, + 0xa26f9914, + 0x6dd60ced, + 0x5cd0d62c, + 0xebe3fb25, + 0xb264d998, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_xar) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 20 * kInstructionSize); + __ dci(0x04293719); // xar z25.b, z25.b, z24.b, #7 + // vl128 state = 0x596046c4 + __ dci(0x04293531); // xar z17.b, z17.b, z9.b, #7 + // vl128 state = 0x38332d55 + __ dci(0x04e93533); // xar z19.d, z19.d, z9.d, #23 + // vl128 state = 0x535c8af7 + __ dci(0x046b3523); // xar z3.s, z3.s, z9.s, #21 + // vl128 state = 0x879a489f + __ dci(0x04eb3427); // xar z7.d, z7.d, z1.d, #21 + // vl128 state = 0xfbac317f + __ dci(0x04ea3463); // xar z3.d, z3.d, z3.d, #22 + // vl128 state = 0xfb44482e + __ dci(0x04fa3447); // xar z7.d, z7.d, z2.d, #6 + // vl128 state = 0xa59e324c + __ dci(0x04f8346f); // xar z15.d, z15.d, z3.d, #8 + // vl128 state = 0x7f064300 + __ dci(0x0479346b); // xar z11.s, z11.s, z3.s, #7 + // vl128 state = 0x0c0d3573 + __ dci(0x0461346a); // xar z10.s, z10.s, z3.s, #31 + // vl128 state = 0x3c61530d + __ dci(0x0464346b); // xar z11.s, z11.s, z3.s, #28 + // vl128 state = 0x137c1433 + __ dci(0x04643469); // xar z9.s, z9.s, z3.s, #28 + // vl128 state = 0x81d55bb1 + __ dci(0x0464346b); // xar z11.s, z11.s, z3.s, #28 + // vl128 state = 0xad2ac5c0 + __ dci(0x0434346a); // xar z10.h, z10.h, z3.h, #12 + // vl128 state = 0x2997a1d9 + __ dci(0x04b434fa); // xar z26.d, z26.d, z7.d, #44 + // vl128 state = 0x715f758d + __ dci(0x04e434f2); // xar z18.d, z18.d, z7.d, #28 + // vl128 state = 0x8bfa19ef + __ dci(0x04ec34b3); // xar z19.d, z19.d, z5.d, #20 + // vl128 state = 0xa8d646a5 + __ dci(0x04ae34b7); // xar z23.d, z23.d, z5.d, #50 + // vl128 state = 0xf590c489 + __ dci(0x04ae34a7); // xar z7.d, z7.d, z5.d, #50 + // vl128 state = 0xd6aafb5e + __ dci(0x04ae3417); // xar z23.d, z23.d, z0.d, #50 + // vl128 state = 0xd40a8d1a + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xd40a8d1a, + 0x834982b0, + 0x6fd8c07b, + 0x2654e6f3, + 0x79fa44fb, + 0xc8a60223, + 0xd12f35f0, + 0x1e0a3315, + 0x6970dcd2, + 0x62305aed, + 0xb9846a55, + 0x1147e436, + 0x97a8ceaa, + 0xe8f80c0e, + 0xea3ab3e7, + 0xb2abd654, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_histcnt) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 100 * kInstructionSize); + __ dci(0x45e8c2f9); // histcnt z25.d, p0/z, z23.d, z8.d + // vl128 state = 0x892c6962 + __ dci(0x45e8c1f1); // histcnt z17.d, p0/z, z15.d, z8.d + // vl128 state = 0x6ef7d729 + __ dci(0x45e8c3a1); // histcnt z1.d, p0/z, z29.d, z8.d + // vl128 state = 0x17654f81 + __ dci(0x45e8c3a9); // histcnt z9.d, p0/z, z29.d, z8.d + // vl128 state = 0xe1a0067e + __ dci(0x45e8c0a8); // histcnt z8.d, p0/z, z5.d, z8.d + // vl128 state = 0xd41f511b + __ dci(0x45e8d0f8); // histcnt z24.d, p4/z, z7.d, z8.d + // vl128 state = 0x8b73945a + __ dci(0x45e8d0fa); // histcnt z26.d, p4/z, z7.d, z8.d + // vl128 state = 0xc175acec + __ dci(0x45aad0fb); // histcnt z27.s, p4/z, z7.s, z10.s + // vl128 state = 0x44f8385b + __ dci(0x45aad2df); // histcnt z31.s, p4/z, z22.s, z10.s + // vl128 state = 0x52cd5d17 + __ dci(0x45aad2dd); // histcnt z29.s, p4/z, z22.s, z10.s + // vl128 state = 0x9f8d9611 + __ dci(0x45abd2f5); // histcnt z21.s, p4/z, z23.s, z11.s + // vl128 state = 0x5cc45fb0 + __ dci(0x45aad0f7); // histcnt z23.s, p4/z, z7.s, z10.s + // vl128 state = 0x5096a07f + __ dci(0x45aad1b3); // histcnt z19.s, p4/z, z13.s, z10.s + // vl128 state = 0xf25781a6 + __ dci(0x45a8d1f2); // histcnt z18.s, p4/z, z15.s, z8.s + // vl128 state = 0xc7025934 + __ dci(0x45a0d0f6); // histcnt z22.s, p4/z, z7.s, z0.s + // vl128 state = 0xcda9c72a + __ dci(0x45a0d87e); // histcnt z30.s, p6/z, z3.s, z0.s + // vl128 state = 0x75f6bbcc + __ dci(0x45a0dc4e); // histcnt z14.s, p7/z, z2.s, z0.s + // vl128 state = 0x5e4e9fe0 + __ dci(0x45a0dc4a); // histcnt z10.s, p7/z, z2.s, z0.s + // vl128 state = 0x0ec8d2b8 + __ dci(0x45b0cc4b); // histcnt z11.s, p3/z, z2.s, z16.s + // vl128 state = 0x1228c442 + __ dci(0x45b0cc43); // histcnt z3.s, p3/z, z2.s, z16.s + // vl128 state = 0xc6067f7b + __ dci(0x45b8cc73); // histcnt z19.s, p3/z, z3.s, z24.s + // vl128 state = 0xf04f9753 + __ dci(0x45b8d877); // histcnt z23.s, p6/z, z3.s, z24.s + // vl128 state = 0xdeb83b41 + __ dci(0x45b8d47f); // histcnt z31.s, p5/z, z3.s, z24.s + // vl128 state = 0x8ab3905f + __ dci(0x45b8d46f); // histcnt z15.s, p5/z, z3.s, z24.s + // vl128 state = 0x762bf277 + __ dci(0x45b8d16d); // histcnt z13.s, p4/z, z11.s, z24.s + // vl128 state = 0x9a670783 + __ dci(0x45bcd125); // histcnt z5.s, p4/z, z9.s, z28.s + // vl128 state = 0x3e399489 + __ dci(0x45b8d021); // histcnt z1.s, p4/z, z1.s, z24.s + // vl128 state = 0x7fc8f1e7 + __ dci(0x45f8d220); // histcnt z0.d, p4/z, z17.d, z24.d + // vl128 state = 0x9cb004db + __ dci(0x45f0d621); // histcnt z1.d, p5/z, z17.d, z16.d + // vl128 state = 0xdd4161b5 + __ dci(0x45a0d625); // histcnt z5.s, p5/z, z17.s, z0.s + // vl128 state = 0xb5cb70bb + __ dci(0x45a0d4a1); // histcnt z1.s, p5/z, z5.s, z0.s + // vl128 state = 0x4452182b + __ dci(0x45a0d4a3); // histcnt z3.s, p5/z, z5.s, z0.s + // vl128 state = 0x71298d3c + __ dci(0x45a0d4a2); // histcnt z2.s, p5/z, z5.s, z0.s + // vl128 state = 0xa22914e1 + __ dci(0x45a2d6a3); // histcnt z3.s, p5/z, z21.s, z2.s + // vl128 state = 0x6183bfbc + __ dci(0x45a2de21); // histcnt z1.s, p7/z, z17.s, z2.s + // vl128 state = 0xd1ebb242 + __ dci(0x45e2dc20); // histcnt z0.d, p7/z, z1.d, z2.d + // vl128 state = 0x297a432d + __ dci(0x45e2d8b0); // histcnt z16.d, p6/z, z5.d, z2.d + // vl128 state = 0x1d2557c0 + __ dci(0x45eed8b8); // histcnt z24.d, p6/z, z5.d, z14.d + // vl128 state = 0xe6ef07fa + __ dci(0x45eed8a8); // histcnt z8.d, p6/z, z5.d, z14.d + // vl128 state = 0xaf3665bb + __ dci(0x45aed88c); // histcnt z12.s, p6/z, z4.s, z14.s + // vl128 state = 0x5c2b38bc + __ dci(0x45efd88d); // histcnt z13.d, p6/z, z4.d, z15.d + // vl128 state = 0x8d5527d8 + __ dci(0x45ffc88f); // histcnt z15.d, p2/z, z4.d, z31.d + // vl128 state = 0x1d2e08d2 + __ dci(0x45fbc98d); // histcnt z13.d, p2/z, z12.d, z27.d + // vl128 state = 0x007388b0 + __ dci(0x45bbcd8f); // histcnt z15.s, p3/z, z12.s, z27.s + // vl128 state = 0x9008a7ba + __ dci(0x45b3cc9f); // histcnt z31.s, p3/z, z4.s, z19.s + // vl128 state = 0xc4030ca4 + __ dci(0x45bbc497); // histcnt z23.s, p1/z, z4.s, z27.s + // vl128 state = 0xeaf4a0b6 + __ dci(0x45fbc415); // histcnt z21.d, p1/z, z0.d, z27.d + // vl128 state = 0x03d85428 + __ dci(0x45ffc517); // histcnt z23.d, p1/z, z8.d, z31.d + // vl128 state = 0xa836a751 + __ dci(0x45fbc596); // histcnt z22.d, p1/z, z12.d, z27.d + // vl128 state = 0x77e33f69 + __ dci(0x45fbc4c6); // histcnt z6.d, p1/z, z6.d, z27.d + // vl128 state = 0xf47bb379 + __ dci(0x45fbc4ce); // histcnt z14.d, p1/z, z6.d, z27.d + // vl128 state = 0x6dbfff33 + __ dci(0x45fad4ca); // histcnt z10.d, p5/z, z6.d, z26.d + // vl128 state = 0xbc04915a + __ dci(0x45ead45a); // histcnt z26.d, p5/z, z2.d, z10.d + // vl128 state = 0x8969b1c5 + __ dci(0x45aad4ca); // histcnt z10.s, p5/z, z6.s, z10.s + // vl128 state = 0x58d2dfac + __ dci(0x45aed0ce); // histcnt z14.s, p4/z, z6.s, z14.s + // vl128 state = 0xfa793cc7 + __ dci(0x45aec4c6); // histcnt z6.s, p1/z, z6.s, z14.s + // vl128 state = 0xff4c99d8 + __ dci(0x45abc4c7); // histcnt z7.s, p1/z, z6.s, z11.s + // vl128 state = 0x2b44a4ae + __ dci(0x45abc4cf); // histcnt z15.s, p1/z, z6.s, z11.s + // vl128 state = 0xbb3f8ba4 + __ dci(0x45a9c44e); // histcnt z14.s, p1/z, z2.s, z9.s + // vl128 state = 0x5a3a40a6 + __ dci(0x45b9c46f); // histcnt z15.s, p1/z, z3.s, z25.s + // vl128 state = 0x72e31c5f + __ dci(0x45b9c46e); // histcnt z14.s, p1/z, z3.s, z25.s + // vl128 state = 0xde56263e + __ dci(0x45b1c67e); // histcnt z30.s, p1/z, z19.s, z17.s + // vl128 state = 0xc570f0b9 + __ dci(0x45b5c63a); // histcnt z26.s, p1/z, z17.s, z21.s + // vl128 state = 0x72ab1716 + __ dci(0x45a5c72a); // histcnt z10.s, p1/z, z25.s, z5.s + // vl128 state = 0xe8848b2d + __ dci(0x45a1c77a); // histcnt z26.s, p1/z, z27.s, z1.s + // vl128 state = 0x2975ac38 + __ dci(0x45a1c77b); // histcnt z27.s, p1/z, z27.s, z1.s + // vl128 state = 0xb0638363 + __ dci(0x45a1c773); // histcnt z19.s, p1/z, z27.s, z1.s + // vl128 state = 0xc9620a45 + __ dci(0x45e9c777); // histcnt z23.d, p1/z, z27.d, z9.d + // vl128 state = 0x0414c679 + __ dci(0x45ebc67f); // histcnt z31.d, p1/z, z19.d, z11.d + // vl128 state = 0xc1d4410e + __ dci(0x45ebc37b); // histcnt z27.d, p0/z, z27.d, z11.d + // vl128 state = 0x3ae32e36 + __ dci(0x45abd373); // histcnt z19.s, p4/z, z27.s, z11.s + // vl128 state = 0x75ffe12c + __ dci(0x45fbd363); // histcnt z3.d, p4/z, z27.d, z27.d + // vl128 state = 0x4084743b + __ dci(0x45ffc36b); // histcnt z11.d, p0/z, z27.d, z31.d + // vl128 state = 0xfade136b + __ dci(0x45ffc3ca); // histcnt z10.d, p0/z, z30.d, z31.d + // vl128 state = 0x60f18f50 + __ dci(0x45efc2ce); // histcnt z14.d, p0/z, z22.d, z15.d + // vl128 state = 0x162ed112 + __ dci(0x45adc2c6); // histcnt z6.s, p0/z, z22.s, z13.s + // vl128 state = 0x4f84cb96 + __ dci(0x45adc2c4); // histcnt z4.s, p0/z, z22.s, z13.s + // vl128 state = 0x5d04ccb6 + __ dci(0x45a7c2d4); // histcnt z20.s, p0/z, z22.s, z7.s + // vl128 state = 0x38efdab7 + __ dci(0x45a6c0c4); // histcnt z4.s, p0/z, z6.s, z6.s + // vl128 state = 0xff7a0a24 + __ dci(0x45a7c2c0); // histcnt z0.s, p0/z, z22.s, z7.s + // vl128 state = 0x5f7b0a31 + __ dci(0x45a7d6c1); // histcnt z1.s, p5/z, z22.s, z7.s + // vl128 state = 0x1e8a6f5f + __ dci(0x45afd7c5); // histcnt z5.s, p5/z, z30.s, z15.s + // vl128 state = 0x655ed237 + __ dci(0x45add3d5); // histcnt z21.s, p4/z, z30.s, z13.s + // vl128 state = 0x8c7226a9 + __ dci(0x45add3d4); // histcnt z20.s, p4/z, z30.s, z13.s + // vl128 state = 0x727304ad + __ dci(0x45bcd3dc); // histcnt z28.s, p4/z, z30.s, z28.s + // vl128 state = 0xce4e49d0 + __ dci(0x45bcd3cc); // histcnt z12.s, p4/z, z30.s, z28.s + // vl128 state = 0x5c252d7d + __ dci(0x45bcd15c); // histcnt z28.s, p4/z, z10.s, z28.s + // vl128 state = 0x5e1163f7 + __ dci(0x45b5d154); // histcnt z20.s, p4/z, z10.s, z21.s + // vl128 state = 0xf77c50ee + __ dci(0x45b5d156); // histcnt z22.s, p4/z, z10.s, z21.s + // vl128 state = 0xe35c8438 + __ dci(0x45b3d157); // histcnt z23.s, p4/z, z10.s, z19.s + // vl128 state = 0xf6926673 + __ dci(0x45b3d156); // histcnt z22.s, p4/z, z10.s, z19.s + // vl128 state = 0xf9022ad2 + __ dci(0x45b3c554); // histcnt z20.s, p1/z, z10.s, z19.s + // vl128 state = 0xb90dfe28 + __ dci(0x45bbd55c); // histcnt z28.s, p5/z, z10.s, z27.s + // vl128 state = 0x9a939b84 + __ dci(0x45abd57e); // histcnt z30.s, p5/z, z11.s, z11.s + // vl128 state = 0xd9ad8be7 + __ dci(0x45abcd7a); // histcnt z26.s, p3/z, z11.s, z11.s + // vl128 state = 0x14869e4f + __ dci(0x45bbc57b); // histcnt z27.s, p1/z, z11.s, z27.s + // vl128 state = 0x25130793 + __ dci(0x45bfcd73); // histcnt z19.s, p3/z, z11.s, z31.s + // vl128 state = 0x53adf455 + __ dci(0x45bfc863); // histcnt z3.s, p2/z, z3.s, z31.s + // vl128 state = 0x82fa6c44 + __ dci(0x45b7cc62); // histcnt z2.s, p3/z, z3.s, z23.s + // vl128 state = 0xfaefda71 + __ dci(0x45b6cce3); // histcnt z3.s, p3/z, z7.s, z22.s + // vl128 state = 0xdd697c2a + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xdd697c2a, + 0x1415ff61, + 0xb9e154c8, + 0x566a2af5, + 0xef7574b4, + 0x6da83471, + 0x356d5c4d, + 0x798a2403, + 0x2c16e862, + 0x6fa84021, + 0x6e09e8ff, + 0xc13a0eb6, + 0x88c92928, + 0xe51672fe, + 0x229b8ed5, + 0x9e662757, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_histseg) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 100 * kInstructionSize); + __ dci(0x4524a228); // histseg z8.b, z17.b, z4.b + // vl128 state = 0x21ed28a1 + __ dci(0x452ca20c); // histseg z12.b, z16.b, z12.b + // vl128 state = 0xc135d593 + __ dci(0x453ca288); // histseg z8.b, z20.b, z28.b + // vl128 state = 0xb86cd6e7 + __ dci(0x4538a380); // histseg z0.b, z28.b, z24.b + // vl128 state = 0xd28ddd71 + __ dci(0x452aa388); // histseg z8.b, z28.b, z10.b + // vl128 state = 0x322d3aa8 + __ dci(0x452aa38c); // histseg z12.b, z28.b, z10.b + // vl128 state = 0x67d668fc + __ dci(0x4532a384); // histseg z4.b, z28.b, z18.b + // vl128 state = 0xc57505d4 + __ dci(0x4537a380); // histseg z0.b, z28.b, z23.b + // vl128 state = 0xb47d0a11 + __ dci(0x4535a3a8); // histseg z8.b, z29.b, z21.b + // vl128 state = 0x347adf6f + __ dci(0x4535a3ac); // histseg z12.b, z29.b, z21.b + // vl128 state = 0xb763510c + __ dci(0x4535a3ae); // histseg z14.b, z29.b, z21.b + // vl128 state = 0xb28319d5 + __ dci(0x4525a39e); // histseg z30.b, z28.b, z5.b + // vl128 state = 0x0adc6533 + __ dci(0x4525a38e); // histseg z14.b, z28.b, z5.b + // vl128 state = 0x248409c6 + __ dci(0x452da3c6); // histseg z6.b, z30.b, z13.b + // vl128 state = 0xa71c85d6 + __ dci(0x452da187); // histseg z7.b, z12.b, z13.b + // vl128 state = 0x7314b8a0 + __ dci(0x4525a1a6); // histseg z6.b, z13.b, z5.b + // vl128 state = 0x129013d5 + __ dci(0x4527a18e); // histseg z14.b, z12.b, z7.b + // vl128 state = 0xc6b207b7 + __ dci(0x4521a18c); // histseg z12.b, z12.b, z1.b + // vl128 state = 0x03957bb5 + __ dci(0x4524a18d); // histseg z13.b, z12.b, z4.b + // vl128 state = 0x379af1c6 + __ dci(0x4524a125); // histseg z5.b, z9.b, z4.b + // vl128 state = 0x93c462cc + __ dci(0x4522a127); // histseg z7.b, z9.b, z2.b + // vl128 state = 0xc95cb1a9 + __ dci(0x4532a117); // histseg z23.b, z8.b, z18.b + // vl128 state = 0xc50e4e66 + __ dci(0x4533a15f); // histseg z31.b, z10.b, z19.b + // vl128 state = 0x76663e3e + __ dci(0x4533a14f); // histseg z15.b, z10.b, z19.b + // vl128 state = 0x84f5ca5f + __ dci(0x4533a0ce); // histseg z14.b, z6.b, z19.b + // vl128 state = 0x50d7de3d + __ dci(0x453ba1cc); // histseg z12.b, z14.b, z27.b + // vl128 state = 0x32e3b53f + __ dci(0x453ba0fc); // histseg z28.b, z7.b, z27.b + // vl128 state = 0x0a5d4180 + __ dci(0x452ba2f4); // histseg z20.b, z23.b, z11.b + // vl128 state = 0x91b77585 + __ dci(0x453ba2c4); // histseg z4.b, z22.b, z27.b + // vl128 state = 0x5cd0c690 + __ dci(0x453ba2cc); // histseg z12.b, z22.b, z27.b + // vl128 state = 0xa6a5f749 + __ dci(0x453ba1c8); // histseg z8.b, z14.b, z27.b + // vl128 state = 0xe5036937 + __ dci(0x4529a1c9); // histseg z9.b, z14.b, z9.b + // vl128 state = 0x13c620c8 + __ dci(0x4529a1a8); // histseg z8.b, z13.b, z9.b + // vl128 state = 0xbf71d421 + __ dci(0x4521a198); // histseg z24.b, z12.b, z1.b + // vl128 state = 0xe01d1160 + __ dci(0x4529a1ba); // histseg z26.b, z13.b, z9.b + // vl128 state = 0xaa1b29d6 + __ dci(0x452fa1bb); // histseg z27.b, z13.b, z15.b + // vl128 state = 0x2f96bd61 + __ dci(0x452fa0ff); // histseg z31.b, z7.b, z15.b + // vl128 state = 0x5aeb6bec + __ dci(0x4527a0de); // histseg z30.b, z6.b, z7.b + // vl128 state = 0xbcb1b299 + __ dci(0x4525a1d6); // histseg z22.b, z14.b, z5.b + // vl128 state = 0x0f89ea9b + __ dci(0x4525a1d7); // histseg z23.b, z14.b, z5.b + // vl128 state = 0xe40f30a2 + __ dci(0x4521a3df); // histseg z31.b, z30.b, z1.b + // vl128 state = 0x342ff33b + __ dci(0x4521a197); // histseg z23.b, z12.b, z1.b + // vl128 state = 0xdfa92902 + __ dci(0x4521a187); // histseg z7.b, z12.b, z1.b + // vl128 state = 0x8531fa67 + __ dci(0x4535a186); // histseg z6.b, z12.b, z21.b + // vl128 state = 0xe4b55112 + __ dci(0x4535a196); // histseg z22.b, z12.b, z21.b + // vl128 state = 0x5d26970e + __ dci(0x4525a097); // histseg z23.b, z4.b, z5.b + // vl128 state = 0x7dcb1d13 + __ dci(0x4525a095); // histseg z21.b, z4.b, z5.b + // vl128 state = 0x5fb0789c + __ dci(0x452da017); // histseg z23.b, z0.b, z13.b + // vl128 state = 0x7f5df281 + __ dci(0x452da295); // histseg z21.b, z20.b, z13.b + // vl128 state = 0x9e6f5eaf + __ dci(0x453da39d); // histseg z29.b, z28.b, z29.b + // vl128 state = 0x532f95a9 + __ dci(0x453da39c); // histseg z28.b, z28.b, z29.b + // vl128 state = 0x64202514 + __ dci(0x4535a29e); // histseg z30.b, z20.b, z21.b + // vl128 state = 0x44bda972 + __ dci(0x4535a0bf); // histseg z31.b, z5.b, z21.b + // vl128 state = 0x258125d6 + __ dci(0x4535a0bb); // histseg z27.b, z5.b, z21.b + // vl128 state = 0xec63caaf + __ dci(0x4537a2b3); // histseg z19.b, z21.b, z23.b + // vl128 state = 0xb937b6e8 + __ dci(0x4525a2b1); // histseg z17.b, z21.b, z5.b + // vl128 state = 0x1515ee94 + __ dci(0x4525a2b5); // histseg z21.b, z21.b, z5.b + // vl128 state = 0x4bb06873 + __ dci(0x4525a0fd); // histseg z29.b, z7.b, z5.b + // vl128 state = 0x23446114 + __ dci(0x4524a079); // histseg z25.b, z3.b, z4.b + // vl128 state = 0x48d52cf6 + __ dci(0x4524a0d8); // histseg z24.b, z6.b, z4.b + // vl128 state = 0x0deef019 + __ dci(0x452ca09c); // histseg z28.b, z4.b, z12.b + // vl128 state = 0xaba6e202 + __ dci(0x453ca018); // histseg z24.b, z0.b, z28.b + // vl128 state = 0xee9d3eed + __ dci(0x4539a008); // histseg z8.b, z0.b, z25.b + // vl128 state = 0x254c57f3 + __ dci(0x4539a00c); // histseg z12.b, z0.b, z25.b + // vl128 state = 0x28fea24d + __ dci(0x4531a048); // histseg z8.b, z2.b, z17.b + // vl128 state = 0xe32fcb53 + __ dci(0x4530a0ca); // histseg z10.b, z6.b, z16.b + // vl128 state = 0xb3a9860b + __ dci(0x4520a0ee); // histseg z14.b, z7.b, z0.b + // vl128 state = 0xef9e57fa + __ dci(0x4520a1de); // histseg z30.b, z14.b, z0.b + // vl128 state = 0x295902e9 + __ dci(0x4520a38e); // histseg z14.b, z28.b, z0.b + // vl128 state = 0x756ed318 + __ dci(0x4528a30f); // histseg z15.b, z24.b, z8.b + // vl128 state = 0x8591dff9 + __ dci(0x4538a39f); // histseg z31.b, z28.b, z24.b + // vl128 state = 0xe4ad535d + __ dci(0x4538a39b); // histseg z27.b, z28.b, z24.b + // vl128 state = 0x2d4fbc24 + __ dci(0x4538a093); // histseg z19.b, z4.b, z24.b + // vl128 state = 0xd8ee932a + __ dci(0x453aa0a3); // histseg z3.b, z5.b, z26.b + // vl128 state = 0x768b71a6 + __ dci(0x453aa0ab); // histseg z11.b, z5.b, z26.b + // vl128 state = 0xa78673d7 + __ dci(0x452ea0bb); // histseg z27.b, z5.b, z14.b + // vl128 state = 0x6e649cae + __ dci(0x452fa1bf); // histseg z31.b, z13.b, z15.b + // vl128 state = 0x0f58100a + __ dci(0x452fa1be); // histseg z30.b, z13.b, z15.b + // vl128 state = 0xc99f4519 + __ dci(0x452fa3f6); // histseg z22.b, z31.b, z15.b + // vl128 state = 0x700c8305 + __ dci(0x452fa3f4); // histseg z20.b, z31.b, z15.b + // vl128 state = 0xbdecfddc + __ dci(0x453fa3b0); // histseg z16.b, z29.b, z31.b + // vl128 state = 0x3f5b7578 + __ dci(0x453fa3b8); // histseg z24.b, z29.b, z31.b + // vl128 state = 0xf0076715 + __ dci(0x453fa228); // histseg z8.b, z17.b, z31.b + // vl128 state = 0x3bd60e0b + __ dci(0x4536a22a); // histseg z10.b, z17.b, z22.b + // vl128 state = 0x1171f63c + __ dci(0x4530a23a); // histseg z26.b, z17.b, z16.b + // vl128 state = 0x3fef270c + __ dci(0x4522a23e); // histseg z30.b, z17.b, z2.b + // vl128 state = 0xf928721f + __ dci(0x4524a23c); // histseg z28.b, z17.b, z4.b + // vl128 state = 0xecec697b + __ dci(0x4527a238); // histseg z24.b, z17.b, z7.b + // vl128 state = 0x23b07b16 + __ dci(0x4525a210); // histseg z16.b, z16.b, z5.b + // vl128 state = 0x9c1c2ac5 + __ dci(0x4525a200); // histseg z0.b, z16.b, z5.b + // vl128 state = 0xc446f89b + __ dci(0x4520a202); // histseg z2.b, z16.b, z0.b + // vl128 state = 0x8afba046 + __ dci(0x4521a303); // histseg z3.b, z24.b, z1.b + // vl128 state = 0xf0b0f9f3 + __ dci(0x4520a201); // histseg z1.b, z16.b, z0.b + // vl128 state = 0x8922615b + __ dci(0x4528a223); // histseg z3.b, z17.b, z8.b + // vl128 state = 0xf36938ee + __ dci(0x4528a367); // histseg z7.b, z27.b, z8.b + // vl128 state = 0xc2d96c41 + __ dci(0x452ca3e6); // histseg z6.b, z31.b, z12.b + // vl128 state = 0xf15e835f + __ dci(0x452ea3c4); // histseg z4.b, z30.b, z14.b + // vl128 state = 0xb3964bd8 + __ dci(0x452da3c6); // histseg z6.b, z30.b, z13.b + // vl128 state = 0x8011a4c6 + __ dci(0x452da0c4); // histseg z4.b, z6.b, z13.b + // vl128 state = 0x0fbedf54 + __ dci(0x4529a0ec); // histseg z12.b, z7.b, z9.b + // vl128 state = 0x9a4d7031 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x9a4d7031, + 0xebaa80ad, + 0x702155a3, + 0x181fff8d, + 0x7b071373, + 0x1bf0af96, + 0x9ca15297, + 0x615d2f4a, + 0x7658b554, + 0xd2bf7319, + 0xddf8d492, + 0xf5938d08, + 0xbe354cb1, + 0xfe2d5d63, + 0x29818684, + 0x2c862ef9, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_table) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x05212a38); // tbl z24.b, {z17.b, z18.b}, z1.b + // vl128 state = 0xbdd1e1c1 + __ dci(0x05212810); // tbl z16.b, {z0.b, z1.b}, z1.b + // vl128 state = 0x80ca38b6 + __ dci(0x05e12812); // tbl z18.d, {z0.d, z1.d}, z1.d + // vl128 state = 0xb59fe024 + __ dci(0x05632802); // tbl z2.h, {z0.h, z1.h}, z3.h + // vl128 state = 0xfb22b8f9 + __ dci(0x05e32906); // tbl z6.d, {z8.d, z9.d}, z3.d + // vl128 state = 0x78ba34e9 + __ dci(0x05e22942); // tbl z2.d, {z10.d, z11.d}, z2.d + // vl128 state = 0x000b006f + __ dci(0x05f22d46); // tbx z6.d, z10.d, z18.d + // vl128 state = 0x28b746e5 + __ dci(0x05f32947); // tbl z7.d, {z10.d, z11.d}, z19.d + // vl128 state = 0xfcbf7b93 + __ dci(0x05e32963); // tbl z3.d, {z11.d, z12.d}, z3.d + // vl128 state = 0x2891c0aa + __ dci(0x05e33161); // tbl z1.d, {z11.d}, z3.d + // vl128 state = 0x3468b9d4 + __ dci(0x05e13149); // tbl z9.d, {z10.d}, z1.d + // vl128 state = 0xc2adf02b + __ dci(0x0560314d); // tbl z13.h, {z10.h}, z0.h + // vl128 state = 0xff9f1abb + __ dci(0x0578314c); // tbl z12.h, {z10.h}, z24.h + // vl128 state = 0x2cffcd38 + __ dci(0x05e83144); // tbl z4.d, {z10.d}, z8.d + // vl128 state = 0x8e5ca010 + __ dci(0x05e83146); // tbl z6.d, {z10.d}, z8.d + // vl128 state = 0xa6e0e69a + __ dci(0x05b83147); // tbl z7.s, {z10.s}, z24.s + // vl128 state = 0x513e6328 + __ dci(0x053831d7); // tbl z23.b, {z14.b}, z24.b + // vl128 state = 0xe2bd7bdf + __ dci(0x056831df); // tbl z31.h, {z14.h}, z8.h + // vl128 state = 0xf4881e93 + __ dci(0x0560319e); // tbl z30.h, {z12.h}, z0.h + // vl128 state = 0x4cd76275 + __ dci(0x0522319a); // tbl z26.b, {z12.b}, z2.b + // vl128 state = 0x06d15ac3 + __ dci(0x0522318a); // tbl z10.b, {z12.b}, z2.b + // vl128 state = 0x5657179b + __ dci(0x0522318e); // tbl z14.b, {z12.b}, z2.b + // vl128 state = 0x7def33b7 + __ dci(0x05a6318a); // tbl z10.s, {z12.s}, z6.s + // vl128 state = 0x38ee6756 + __ dci(0x05b2318b); // tbl z11.s, {z12.s}, z18.s + // vl128 state = 0x6ba1d599 + __ dci(0x05a231bb); // tbl z27.s, {z13.s}, z2.s + // vl128 state = 0xee2c412e + __ dci(0x05a231ab); // tbl z11.s, {z13.s}, z2.s + // vl128 state = 0xa183e51b + __ dci(0x05a831af); // tbl z15.s, {z13.s}, z8.s + // vl128 state = 0xcd60a839 + __ dci(0x05ea31a7); // tbl z7.d, {z13.d}, z10.d + // vl128 state = 0x3abe2d8b + __ dci(0x05fa33af); // tbl z15.d, {z29.d}, z26.d + // vl128 state = 0xf596f00c + __ dci(0x05fe32ae); // tbl z14.d, {z21.d}, z30.d + // vl128 state = 0x3e791a5a + __ dci(0x057a32be); // tbl z30.h, {z21.h}, z26.h + // vl128 state = 0x27f4086e + __ dci(0x05fe32ae); // tbl z14.d, {z21.d}, z30.d + // vl128 state = 0xec1be238 + __ dci(0x05fe32aa); // tbl z10.d, {z21.d}, z30.d + // vl128 state = 0xa91ab6d9 + __ dci(0x057e32e2); // tbl z2.h, {z23.h}, z30.h + // vl128 state = 0xd1ab825f + __ dci(0x057e32e0); // tbl z0.h, {z23.h}, z30.h + // vl128 state = 0xca42860c + __ dci(0x057f3270); // tbl z16.h, {z19.h}, z31.h + // vl128 state = 0xff27daa0 + __ dci(0x05673271); // tbl z17.h, {z19.h}, z7.h + // vl128 state = 0x9b358bbf + __ dci(0x05e73379); // tbl z25.d, {z27.d}, z7.d + // vl128 state = 0xf0a4c65d + __ dci(0x05e3333d); // tbl z29.d, {z25.d}, z3.d + // vl128 state = 0x3de40d5b + __ dci(0x05e33335); // tbl z21.d, {z25.d}, z3.d + // vl128 state = 0xfeadc4fa + __ dci(0x05f33137); // tbl z23.d, {z9.d}, z19.d + // vl128 state = 0x417c23c2 + __ dci(0x05b33336); // tbl z22.s, {z25.s}, z19.s + // vl128 state = 0x4bd7bddc + __ dci(0x05b1323e); // tbl z30.s, {z17.s}, z17.s + // vl128 state = 0x525aafe8 + __ dci(0x05b0303c); // tbl z28.s, {z1.s}, z16.s + // vl128 state = 0xee67e295 + __ dci(0x05b0308c); // tbl z12.s, {z4.s}, z16.s + // vl128 state = 0xce1a6811 + __ dci(0x05b030e8); // tbl z8.s, {z7.s}, z16.s + // vl128 state = 0xfba53f74 + __ dci(0x05a030b8); // tbl z24.s, {z5.s}, z0.s + // vl128 state = 0x56a69350 + __ dci(0x05e830b0); // tbl z16.d, {z5.d}, z8.d + // vl128 state = 0xe0665941 + __ dci(0x05e830b2); // tbl z18.d, {z5.d}, z8.d + // vl128 state = 0xc6680470 + __ dci(0x05e931b3); // tbl z19.d, {z13.d}, z9.d + // vl128 state = 0x64a925a9 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x64a925a9, + 0x89750b9d, + 0xb803659e, + 0xa21efc63, + 0x67f967b8, + 0x4e52e209, + 0x42c1692f, + 0x4d8539c7, + 0x6828f0f4, + 0x3c75d27a, + 0x2e3341c9, + 0xfe4a8f4f, + 0xd27b47ae, + 0x665d8f8b, + 0x3230c584, + 0xcf1d6e82, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_cdot) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x4488104f); // cdot z15.s, z2.b, z8.b, #0 + // vl128 state = 0x25fd51d1 + __ dci(0x448a106e); // cdot z14.s, z3.b, z10.b, #0 + // vl128 state = 0x490576d5 + __ dci(0x448a1246); // cdot z6.s, z18.b, z10.b, #0 + // vl128 state = 0x25a6fe4b + __ dci(0x448e12ce); // cdot z14.s, z22.b, z14.b, #0 + // vl128 state = 0xc378b2df + __ dci(0x448412cf); // cdot z15.s, z22.b, z4.b, #0 + // vl128 state = 0xe92a358d + __ dci(0x448412c7); // cdot z7.s, z22.b, z4.b, #0 + // vl128 state = 0x7408b292 + __ dci(0x44c41257); // cdot z23.d, z18.h, z4.h, #0 + // vl128 state = 0xebc02289 + __ dci(0x448412d5); // cdot z21.s, z22.b, z4.b, #0 + // vl128 state = 0x9a7c2f1a + __ dci(0x448712d7); // cdot z23.s, z22.b, z7.b, #0 + // vl128 state = 0xed91e0b4 + __ dci(0x44831295); // cdot z21.s, z20.b, z3.b, #0 + // vl128 state = 0x3dae4184 + __ dci(0x44821385); // cdot z5.s, z28.b, z2.b, #0 + // vl128 state = 0x213fb541 + __ dci(0x44c213c1); // cdot z1.d, z30.h, z2.h, #0 + // vl128 state = 0xcba3207a + __ dci(0x44c61340); // cdot z0.d, z26.h, z6.h, #0 + // vl128 state = 0x9d6041f3 + __ dci(0x44c413d0); // cdot z16.d, z30.h, z4.h, #0 + // vl128 state = 0x4b931738 + __ dci(0x44cc12d8); // cdot z24.d, z22.h, z12.h, #0 + // vl128 state = 0x2503fbcc + __ dci(0x448c1ac8); // cdot z8.s, z22.b, z12.b, #180 + // vl128 state = 0x53bc5303 + __ dci(0x448c12ec); // cdot z12.s, z23.b, z12.b, #0 + // vl128 state = 0xb3bf45c7 + __ dci(0x448812ad); // cdot z13.s, z21.b, z8.b, #0 + // vl128 state = 0x938b4e4f + __ dci(0x44881689); // cdot z9.s, z20.b, z8.b, #90 + // vl128 state = 0x70106ddd + __ dci(0x4498128b); // cdot z11.s, z20.b, z24.b, #0 + // vl128 state = 0x92108bb2 + __ dci(0x4498129b); // cdot z27.s, z20.b, z24.b, #0 + // vl128 state = 0x545230eb + __ dci(0x449a12bf); // cdot z31.s, z21.b, z26.b, #0 + // vl128 state = 0x5cd2fb12 + __ dci(0x44da10af); // cdot z15.d, z5.h, z26.h, #0 + // vl128 state = 0xc03d9146 + __ dci(0x44da10ae); // cdot z14.d, z5.h, z26.h, #0 + // vl128 state = 0xbc2712f7 + __ dci(0x44db12be); // cdot z30.d, z21.h, z27.h, #0 + // vl128 state = 0xccf9d667 + __ dci(0x449b12ee); // cdot z14.s, z23.b, z27.b, #0 + // vl128 state = 0x2c1e08f1 + __ dci(0x449b12ef); // cdot z15.s, z23.b, z27.b, #0 + // vl128 state = 0x159d17d7 + __ dci(0x449b14ee); // cdot z14.s, z7.b, z27.b, #90 + // vl128 state = 0x892c97d3 + __ dci(0x449b1cac); // cdot z12.s, z5.b, z27.b, #270 + // vl128 state = 0x3841ce24 + __ dci(0x449b1aae); // cdot z14.s, z21.b, z27.b, #180 + // vl128 state = 0x30a24868 + __ dci(0x449a1aec); // cdot z12.s, z23.b, z26.b, #180 + // vl128 state = 0x2b836c8a + __ dci(0x44981ace); // cdot z14.s, z22.b, z24.b, #180 + // vl128 state = 0x16a81963 + __ dci(0x44901a86); // cdot z6.s, z20.b, z16.b, #180 + // vl128 state = 0x924ac9ee + __ dci(0x44981b8e); // cdot z14.s, z28.b, z24.b, #180 + // vl128 state = 0x3953da61 + __ dci(0x44891b8a); // cdot z10.s, z28.b, z9.b, #180 + // vl128 state = 0xad72b6d5 + __ dci(0x4499138b); // cdot z11.s, z28.b, z25.b, #0 + // vl128 state = 0x569b1b2c + __ dci(0x4498119b); // cdot z27.s, z12.b, z24.b, #0 + // vl128 state = 0xdbb36925 + __ dci(0x449c199a); // cdot z26.s, z12.b, z28.b, #180 + // vl128 state = 0x4be861d1 + __ dci(0x44901992); // cdot z18.s, z12.b, z16.b, #180 + // vl128 state = 0x1e83ddb5 + __ dci(0x44901a90); // cdot z16.s, z20.b, z16.b, #180 + // vl128 state = 0x180556e0 + __ dci(0x44911ac0); // cdot z0.s, z22.b, z17.b, #180 + // vl128 state = 0x2cbf5db5 + __ dci(0x44951bc1); // cdot z1.s, z30.b, z21.b, #180 + // vl128 state = 0x428f97bd + __ dci(0x44851b40); // cdot z0.s, z26.b, z5.b, #180 + // vl128 state = 0xe0f0659f + __ dci(0x44851a70); // cdot z16.s, z19.b, z5.b, #180 + // vl128 state = 0x4142d23c + __ dci(0x44861a74); // cdot z20.s, z19.b, z6.b, #180 + // vl128 state = 0x74f7d373 + __ dci(0x44921a76); // cdot z22.s, z19.b, z18.b, #180 + // vl128 state = 0x5b4ef670 + __ dci(0x44921246); // cdot z6.s, z18.b, z18.b, #0 + // vl128 state = 0x1fe5d31d + __ dci(0x44981247); // cdot z7.s, z18.b, z24.b, #0 + // vl128 state = 0x782a0559 + __ dci(0x44981746); // cdot z6.s, z26.b, z24.b, #90 + // vl128 state = 0x84cbc61d + __ dci(0x449816c4); // cdot z4.s, z22.b, z24.b, #90 + // vl128 state = 0x078aa009 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x078aa009, + 0x3c4026df, + 0x3ae8e644, + 0x514dfdcd, + 0x2649444a, + 0x74a87bbe, + 0x14b8e9b3, + 0x92c65f4d, + 0xa3015fc1, + 0xab48b8fa, + 0x9e80ef05, + 0xb59b0dde, + 0xbcf04e6f, + 0xa7fa54a1, + 0xaed81dfc, + 0xdc7ffb07, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_bitwise_ternary) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x04793f99); // bsl1n z25.d, z25.d, z25.d, z28.d + // vl128 state = 0x70294e62 + __ dci(0x04b93f9b); // bsl2n z27.d, z27.d, z25.d, z28.d + // vl128 state = 0x0a3f0dc1 + __ dci(0x04b93f93); // bsl2n z19.d, z19.d, z25.d, z28.d + // vl128 state = 0x46500e35 + __ dci(0x04b93dbb); // bsl2n z27.d, z27.d, z25.d, z13.d + // vl128 state = 0x25bdcc83 + __ dci(0x04b53db9); // bsl2n z25.d, z25.d, z21.d, z13.d + // vl128 state = 0x6d33b943 + __ dci(0x04bd3d29); // bsl2n z9.d, z9.d, z29.d, z9.d + // vl128 state = 0xa218e11a + __ dci(0x04ad3d0d); // bsl2n z13.d, z13.d, z13.d, z8.d + // vl128 state = 0xc5e2f5a2 + __ dci(0x04a53d4f); // bsl2n z15.d, z15.d, z5.d, z10.d + // vl128 state = 0x519e4735 + __ dci(0x04653d47); // bsl1n z7.d, z7.d, z5.d, z10.d + // vl128 state = 0x132f7ce6 + __ dci(0x04613dc6); // bsl1n z6.d, z6.d, z1.d, z14.d + // vl128 state = 0x91bcf19b + __ dci(0x04673dc7); // bsl1n z7.d, z7.d, z7.d, z14.d + // vl128 state = 0x3bd0ba20 + __ dci(0x04673dc5); // bsl1n z5.d, z5.d, z7.d, z14.d + // vl128 state = 0xbf3b39fa + __ dci(0x04e73cc1); // nbsl z1.d, z1.d, z7.d, z6.d + // vl128 state = 0xd304b643 + __ dci(0x04773cc5); // bsl1n z5.d, z5.d, z23.d, z6.d + // vl128 state = 0xdd6cd3ce + __ dci(0x04773ac1); // bcax z1.d, z1.d, z23.d, z22.d + // vl128 state = 0x3f456acf + __ dci(0x04773ac3); // bcax z3.d, z3.d, z23.d, z22.d + // vl128 state = 0xbe117f80 + __ dci(0x047739c7); // bcax z7.d, z7.d, z23.d, z14.d + // vl128 state = 0xd3cd3dcd + __ dci(0x047439c5); // bcax z5.d, z5.d, z20.d, z14.d + // vl128 state = 0xee4f636d + __ dci(0x04743841); // bcax z1.d, z1.d, z20.d, z2.d + // vl128 state = 0xf21b00a1 + __ dci(0x04753811); // bcax z17.d, z17.d, z21.d, z0.d + // vl128 state = 0x597ab14d + __ dci(0x04753815); // bcax z21.d, z21.d, z21.d, z0.d + // vl128 state = 0xf5d56322 + __ dci(0x04713917); // bcax z23.d, z23.d, z17.d, z8.d + // vl128 state = 0x17f3cedf + __ dci(0x04793987); // bcax z7.d, z7.d, z25.d, z12.d + // vl128 state = 0x7492c4e5 + __ dci(0x04693885); // bcax z5.d, z5.d, z9.d, z4.d + // vl128 state = 0xb796548c + __ dci(0x046838d5); // bcax z21.d, z21.d, z8.d, z6.d + // vl128 state = 0xf4e12422 + __ dci(0x046838d4); // bcax z20.d, z20.d, z8.d, z6.d + // vl128 state = 0x16187a4c + __ dci(0x043838d6); // eor3 z22.d, z22.d, z24.d, z6.d + // vl128 state = 0xd95e6713 + __ dci(0x043c39de); // eor3 z30.d, z30.d, z28.d, z14.d + // vl128 state = 0xb8322807 + __ dci(0x047c38ce); // bcax z14.d, z14.d, z28.d, z6.d + // vl128 state = 0x6871619d + __ dci(0x047c38cf); // bcax z15.d, z15.d, z28.d, z6.d + // vl128 state = 0x57c5a4af + __ dci(0x043c384e); // eor3 z14.d, z14.d, z28.d, z2.d + // vl128 state = 0x1a62efdf + __ dci(0x0474385e); // bcax z30.d, z30.d, z20.d, z2.d + // vl128 state = 0xc9d1ea1e + __ dci(0x047c3a4e); // bcax z14.d, z14.d, z28.d, z18.d + // vl128 state = 0xd5ced43e + __ dci(0x047c3c4f); // bsl1n z15.d, z15.d, z28.d, z2.d + // vl128 state = 0x79f22e16 + __ dci(0x047d3d4b); // bsl1n z11.d, z11.d, z29.d, z10.d + // vl128 state = 0xc4ee5d6e + __ dci(0x04793c49); // bsl1n z9.d, z9.d, z25.d, z2.d + // vl128 state = 0xea11e840 + __ dci(0x04793c99); // bsl1n z25.d, z25.d, z25.d, z4.d + // vl128 state = 0x95221bc2 + __ dci(0x04613c91); // bsl1n z17.d, z17.d, z1.d, z4.d + // vl128 state = 0xa40acfbe + __ dci(0x04233c90); // bsl z16.d, z16.d, z3.d, z4.d + // vl128 state = 0x8d3ef22f + __ dci(0x04233c80); // bsl z0.d, z0.d, z3.d, z4.d + // vl128 state = 0xd07d1bb2 + __ dci(0x04223ca4); // bsl z4.d, z4.d, z2.d, z5.d + // vl128 state = 0xa2c4169c + __ dci(0x04223ca5); // bsl z5.d, z5.d, z2.d, z5.d + // vl128 state = 0x3c6415e5 + __ dci(0x04a03ca1); // bsl2n z1.d, z1.d, z0.d, z5.d + // vl128 state = 0x55b93add + __ dci(0x04a03cb1); // bsl2n z17.d, z17.d, z0.d, z5.d + // vl128 state = 0x9b86e5b3 + __ dci(0x04a13cf9); // bsl2n z25.d, z25.d, z1.d, z7.d + // vl128 state = 0xdd310e8f + __ dci(0x04a13cfd); // bsl2n z29.d, z29.d, z1.d, z7.d + // vl128 state = 0xae66fb44 + __ dci(0x04a13ced); // bsl2n z13.d, z13.d, z1.d, z7.d + // vl128 state = 0xc69dd926 + __ dci(0x04b93ce9); // bsl2n z9.d, z9.d, z25.d, z7.d + // vl128 state = 0x15592b37 + __ dci(0x04b93dcb); // bsl2n z11.d, z11.d, z25.d, z14.d + // vl128 state = 0xbfcda4d3 + __ dci(0x04b83d4f); // bsl2n z15.d, z15.d, z24.d, z10.d + // vl128 state = 0xaef1e0b6 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xaef1e0b6, + 0xc9b3303f, + 0xc547c948, + 0x0fc817f7, + 0x22d2eab3, + 0x225b3ecd, + 0xf7a34a06, + 0xa07e68ed, + 0xdba0f9fa, + 0x64199691, + 0xa650bfa3, + 0xc6bfeab9, + 0x7efe63c4, + 0x66e4139c, + 0xc580dcf5, + 0x95687693, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_while) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x257109e3); // whilehs p3.h, w15, w17 + // vl128 state = 0x4568cc4c + __ dci(0x257709f3); // whilehi p3.h, w15, w23 + // vl128 state = 0xf148a8ac + __ dci(0x25f509f7); // whilehi p7.d, w15, w21 + // vl128 state = 0x2fe3dcb9 + __ dci(0x257508f5); // whilehi p5.h, w7, w21 + // vl128 state = 0x88429dee + __ dci(0x257100f1); // whilegt p1.h, w7, w17 + // vl128 state = 0x5a3b89ec + __ dci(0x253108f0); // whilehi p0.b, w7, w17 + // vl128 state = 0x73276c52 + __ dci(0x253108f1); // whilehi p1.b, w7, w17 + // vl128 state = 0xa278d7f0 + __ dci(0x257508f9); // whilehi p9.h, w7, w21 + // vl128 state = 0xa438aefc + __ dci(0x25750858); // whilehi p8.h, w2, w21 + // vl128 state = 0x33e13c17 + __ dci(0x25770a50); // whilehi p0.h, w18, w23 + // vl128 state = 0x01947abe + __ dci(0x25751a52); // whilehi p2.h, x18, x21 + // vl128 state = 0x2cf410f2 + __ dci(0x25711a7a); // whilehi p10.h, x19, x17 + // vl128 state = 0x4bb6efc1 + __ dci(0x25391a78); // whilehi p8.b, x19, x25 + // vl128 state = 0xec1afdd6 + __ dci(0x25290a70); // whilehi p0.b, w19, w9 + // vl128 state = 0xde6fbb7f + __ dci(0x25290a78); // whilehi p8.b, w19, w9 + // vl128 state = 0x79c3a968 + __ dci(0x25a90b68); // whilehs p8.s, w27, w9 + // vl128 state = 0x4b32e81a + __ dci(0x25a903e9); // whilege p9.s, wzr, w9 + // vl128 state = 0x994bfc18 + __ dci(0x25a909ed); // whilehs p13.s, w15, w9 + // vl128 state = 0x6d6e231f + __ dci(0x25a909ef); // whilehs p15.s, w15, w9 + // vl128 state = 0x41945298 + __ dci(0x25a909eb); // whilehs p11.s, w15, w9 + // vl128 state = 0x659ccb75 + __ dci(0x25b909c9); // whilehs p9.s, w14, w25 + // vl128 state = 0xd078a7ed + __ dci(0x25bd098d); // whilehs p13.s, w12, w29 + // vl128 state = 0xf6f2d8ae + __ dci(0x25b90909); // whilehs p9.s, w8, w25 + // vl128 state = 0x248bccac + __ dci(0x25fb090b); // whilehs p11.d, w8, w27 + // vl128 state = 0x09b0b9cc + __ dci(0x25fb090a); // whilehs p10.d, w8, w27 + // vl128 state = 0xfa811fef + __ dci(0x25eb0b02); // whilehs p2.d, w24, w11 + // vl128 state = 0xdcb96f30 + __ dci(0x25eb0bc3); // whilehs p3.d, w30, w11 + // vl128 state = 0xbae01fd2 + __ dci(0x25e30acb); // whilehs p11.d, w22, w3 + // vl128 state = 0xbcfdc2b8 + __ dci(0x25eb08c9); // whilehs p9.d, w6, w11 + // vl128 state = 0xdb60ba22 + __ dci(0x25a308c1); // whilehs p1.s, w6, w3 + // vl128 state = 0xe895df80 + __ dci(0x25a108e5); // whilehs p5.s, w7, w1 + // vl128 state = 0x3aeccb82 + __ dci(0x25a009e4); // whilehs p4.s, w15, w0 + // vl128 state = 0xe6b1b3b3 + __ dci(0x25a009ec); // whilehs p12.s, w15, w0 + // vl128 state = 0xd2e10d82 + __ dci(0x25a019ae); // whilehs p14.s, x13, x0 + // vl128 state = 0x4bf596b8 + __ dci(0x25e018af); // whilehs p15.d, x5, x0 + // vl128 state = 0xb8d27541 + __ dci(0x25e918ad); // whilehs p13.d, x5, x9 + // vl128 state = 0x01b6f92f + __ dci(0x25eb188c); // whilehs p12.d, x4, x11 + // vl128 state = 0xd3cfed2d + __ dci(0x25eb188e); // whilehs p14.d, x4, x11 + // vl128 state = 0x9947e07e + __ dci(0x25e21886); // whilehs p6.d, x4, x2 + // vl128 state = 0xd9995e11 + __ dci(0x25a21084); // whilege p4.s, x4, x2 + // vl128 state = 0xd45d81ed + __ dci(0x25b31085); // whilege p5.s, x4, x19 + // vl128 state = 0x4d67b543 + __ dci(0x25a3100d); // whilege p13.s, x0, x3 + // vl128 state = 0x00f0526c + __ dci(0x252b101d); // whilegt p13.b, x0, x11 + // vl128 state = 0x9d176025 + __ dci(0x253b1095); // whilegt p5.b, x4, x27 + // vl128 state = 0xd6544089 + __ dci(0x253b1091); // whilegt p1.b, x4, x27 + // vl128 state = 0x37d83129 + __ dci(0x253f10d5); // whilegt p5.b, x6, xzr + // vl128 state = 0x8e121615 + __ dci(0x252f11d4); // whilegt p4.b, x14, x15 + // vl128 state = 0x83d6c9e9 + __ dci(0x25af01d5); // whilegt p5.s, w14, w15 + // vl128 state = 0xe865fad7 + __ dci(0x25eb01c5); // whilege p5.d, w14, w11 + // vl128 state = 0x5eaf208e + __ dci(0x25fb0144); // whilege p4.d, w10, w27 + // vl128 state = 0x8cd6348c + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x8cd6348c, + 0x42a1f9b4, + 0x13fc2001, + 0x492cb2ac, + 0xa67cfb65, + 0x80d4639f, + 0xfa388a09, + 0x8c7ad8d9, + 0x299c5bfe, + 0x9183808a, + 0x3fc14d86, + 0x7cc08a05, + 0x9c85cd48, + 0xd06e8299, + 0x6a107152, + 0x81d99d7c, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_cdot_index) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x44bb4ef6); // cdot z22.s, z23.b, z3.b[3], #270 + // vl128 state = 0x452d1d6e + __ dci(0x44b94ff7); // cdot z23.s, z31.b, z1.b[3], #270 + // vl128 state = 0x546c9569 + __ dci(0x44b94dd5); // cdot z21.s, z14.b, z1.b[3], #270 + // vl128 state = 0xa2abf834 + __ dci(0x44bd45d7); // cdot z23.s, z14.b, z5.b[3], #90 + // vl128 state = 0xba77ed64 + __ dci(0x44fc45df); // cdot z31.d, z14.h, z12.h[1], #90 + // vl128 state = 0xe78163f2 + __ dci(0x44f441db); // cdot z27.d, z14.h, z4.h[1], #0 + // vl128 state = 0xca3b116d + __ dci(0x44f44dd3); // cdot z19.d, z14.h, z4.h[1], #270 + // vl128 state = 0x57ba3771 + __ dci(0x44b44d83); // cdot z3.s, z12.b, z4.b[2], #270 + // vl128 state = 0x4edccb88 + __ dci(0x44ac4d82); // cdot z2.s, z12.b, z4.b[1], #270 + // vl128 state = 0xc9543499 + __ dci(0x44a84f8a); // cdot z10.s, z28.b, z0.b[1], #270 + // vl128 state = 0x9d8fe439 + __ dci(0x44a84d08); // cdot z8.s, z8.b, z0.b[1], #270 + // vl128 state = 0x3c1bf0cc + __ dci(0x44ba4d09); // cdot z9.s, z8.b, z2.b[3], #270 + // vl128 state = 0x983716f1 + __ dci(0x44ea4d0d); // cdot z13.d, z8.h, z10.h[0], #270 + // vl128 state = 0x2df96300 + __ dci(0x44eb491d); // cdot z29.d, z8.h, z11.h[0], #180 + // vl128 state = 0xc23edde3 + __ dci(0x44e9499f); // cdot z31.d, z12.h, z9.h[0], #180 + // vl128 state = 0xef0ace9d + __ dci(0x44e84b9d); // cdot z29.d, z28.h, z8.h[0], #180 + // vl128 state = 0x2cce8002 + __ dci(0x44e84b99); // cdot z25.d, z28.h, z8.h[0], #180 + // vl128 state = 0xd07f46a1 + __ dci(0x44f84a9d); // cdot z29.d, z20.h, z8.h[1], #180 + // vl128 state = 0x239831e8 + __ dci(0x44f84a99); // cdot z25.d, z20.h, z8.h[1], #180 + // vl128 state = 0xa110988d + __ dci(0x44e84a09); // cdot z9.d, z16.h, z8.h[0], #180 + // vl128 state = 0x2b9ef292 + __ dci(0x44e84a19); // cdot z25.d, z16.h, z8.h[0], #180 + // vl128 state = 0x50eeb818 + __ dci(0x44e04b1b); // cdot z27.d, z24.h, z0.h[0], #180 + // vl128 state = 0xc33ce03b + __ dci(0x44e04a2b); // cdot z11.d, z17.h, z0.h[0], #180 + // vl128 state = 0xe163b5c9 + __ dci(0x44e04b0f); // cdot z15.d, z24.h, z0.h[0], #180 + // vl128 state = 0x052a34eb + __ dci(0x44e04b1f); // cdot z31.d, z24.h, z0.h[0], #180 + // vl128 state = 0x0660afb4 + __ dci(0x44e84b4f); // cdot z15.d, z26.h, z8.h[0], #180 + // vl128 state = 0x0ae01233 + __ dci(0x44ee4b4e); // cdot z14.d, z26.h, z14.h[0], #180 + // vl128 state = 0xde7bdd15 + __ dci(0x44ae4b7e); // cdot z30.s, z27.b, z6.b[1], #180 + // vl128 state = 0x758973a1 + __ dci(0x44a6497f); // cdot z31.s, z11.b, z6.b[0], #180 + // vl128 state = 0xb3c5df37 + __ dci(0x44a64df7); // cdot z23.s, z15.b, z6.b[0], #270 + // vl128 state = 0xe652f054 + __ dci(0x44a64c73); // cdot z19.s, z3.b, z6.b[0], #270 + // vl128 state = 0xc4b58041 + __ dci(0x44a64de3); // cdot z3.s, z15.b, z6.b[0], #270 + // vl128 state = 0x1239ca90 + __ dci(0x44a749e2); // cdot z2.s, z15.b, z7.b[0], #180 + // vl128 state = 0x4a01cdcb + __ dci(0x44a740e0); // cdot z0.s, z7.b, z7.b[0], #0 + // vl128 state = 0x604e45cf + __ dci(0x44a344e2); // cdot z2.s, z7.b, z3.b[0], #90 + // vl128 state = 0x12fe2972 + __ dci(0x44a34ca3); // cdot z3.s, z5.b, z3.b[0], #270 + // vl128 state = 0x78e0bb2e + __ dci(0x44e14cb3); // cdot z19.d, z5.h, z1.h[0], #270 + // vl128 state = 0xe3a69b46 + __ dci(0x44e14d31); // cdot z17.d, z9.h, z1.h[0], #270 + // vl128 state = 0xe6b58aa4 + __ dci(0x44f14d01); // cdot z1.d, z8.h, z1.h[1], #270 + // vl128 state = 0xffcfb597 + __ dci(0x44f14551); // cdot z17.d, z10.h, z1.h[1], #90 + // vl128 state = 0x2745934b + __ dci(0x44f345d5); // cdot z21.d, z14.h, z3.h[1], #90 + // vl128 state = 0xa38b5571 + __ dci(0x44f34574); // cdot z20.d, z11.h, z3.h[1], #90 + // vl128 state = 0x978afd92 + __ dci(0x44f34576); // cdot z22.d, z11.h, z3.h[1], #90 + // vl128 state = 0x9f1b19c9 + __ dci(0x44f34f77); // cdot z23.d, z27.h, z3.h[1], #270 + // vl128 state = 0x61a31d64 + __ dci(0x44f24f5f); // cdot z31.d, z26.h, z2.h[1], #270 + // vl128 state = 0x1e71023e + __ dci(0x44fa4fcf); // cdot z15.d, z30.h, z10.h[1], #270 + // vl128 state = 0xdbe5ffb3 + __ dci(0x44ba4f4e); // cdot z14.s, z26.b, z2.b[3], #270 + // vl128 state = 0x51390e81 + __ dci(0x44ba470c); // cdot z12.s, z24.b, z2.b[3], #90 + // vl128 state = 0x59ad5198 + __ dci(0x44b2479c); // cdot z28.s, z28.b, z2.b[2], #90 + // vl128 state = 0xe997de49 + __ dci(0x44b24fbd); // cdot z29.s, z29.b, z2.b[2], #270 + // vl128 state = 0x5533cefa + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x5533cefa, + 0x1462a298, + 0x1acb4ead, + 0xeb05ddf0, + 0x23fe8c86, + 0xbb1e9f8c, + 0x4a933f43, + 0x4cd64b55, + 0x84a4b8b7, + 0x52019619, + 0x4442432b, + 0x9b353ce8, + 0x333c9eef, + 0x291eac87, + 0x110f7371, + 0x009b25cb, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_splice) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x05ed89a7); // splice z7.d, p2, {z13.d, z14.d} + // vl128 state = 0x6acff994 + __ dci(0x05ed81e5); // splice z5.d, p0, {z15.d, z16.d} + // vl128 state = 0x2c8b3e5d + __ dci(0x05ed8375); // splice z21.d, p0, {z27.d, z28.d} + // vl128 state = 0x2588e208 + __ dci(0x05ed9174); // splice z20.d, p4, {z11.d, z12.d} + // vl128 state = 0x4d6fa6b3 + __ dci(0x056d91f6); // splice z22.h, p4, {z15.h, z16.h} + // vl128 state = 0x9f00a308 + __ dci(0x056d92f2); // splice z18.h, p4, {z23.h, z24.h} + // vl128 state = 0x5479cc74 + __ dci(0x056d96a2); // splice z2.h, p5, {z21.h, z22.h} + // vl128 state = 0xca7a6a63 + __ dci(0x056d9fa6); // splice z6.h, p7, {z29.h, z30.h} + // vl128 state = 0x007fc934 + __ dci(0x056d9be4); // splice z4.h, p6, {z31.h, z0.h} + // vl128 state = 0x8186741b + __ dci(0x056d97ec); // splice z12.h, p5, {z31.h, z0.h} + // vl128 state = 0x26ab76b9 + __ dci(0x056d979c); // splice z28.h, p5, {z28.h, z29.h} + // vl128 state = 0x933201f4 + __ dci(0x056d9794); // splice z20.h, p5, {z28.h, z29.h} + // vl128 state = 0x42cf6784 + __ dci(0x052d9f96); // splice z22.b, p7, {z28.b, z29.b} + // vl128 state = 0x0838e776 + __ dci(0x056d8f9e); // splice z30.h, p3, {z28.h, z29.h} + // vl128 state = 0x89637e78 + __ dci(0x056d9fd6); // splice z22.h, p7, {z30.h, z31.h} + // vl128 state = 0xb94dbb49 + __ dci(0x056d8dd7); // splice z23.h, p3, {z14.h, z15.h} + // vl128 state = 0x260f8127 + __ dci(0x05ad8ddf); // splice z31.s, p3, {z14.s, z15.s} + // vl128 state = 0x16257a12 + __ dci(0x05ad8ddd); // splice z29.s, p3, {z14.s, z15.s} + // vl128 state = 0x803d0766 + __ dci(0x05ad8d7c); // splice z28.s, p3, {z11.s, z12.s} + // vl128 state = 0xcc405331 + __ dci(0x05ad8d74); // splice z20.s, p3, {z11.s, z12.s} + // vl128 state = 0x0ed25e4c + __ dci(0x05ad8d64); // splice z4.s, p3, {z11.s, z12.s} + // vl128 state = 0x167daf8b + __ dci(0x05ed8c6c); // splice z12.d, p3, {z3.d, z4.d} + // vl128 state = 0x435f3bb9 + __ dci(0x05ed8cad); // splice z13.d, p3, {z5.d, z6.d} + // vl128 state = 0xe49df619 + __ dci(0x056d8dbd); // splice z29.h, p3, {z13.h, z14.h} + // vl128 state = 0x1f54e928 + __ dci(0x056d8f2d); // splice z13.h, p3, {z25.h, z26.h} + // vl128 state = 0x24adbe77 + __ dci(0x056d8f9d); // splice z29.h, p3, {z28.h, z29.h} + // vl128 state = 0xcc2ec3e6 + __ dci(0x056d8f95); // splice z21.h, p3, {z28.h, z29.h} + // vl128 state = 0xb71c64f7 + __ dci(0x056d8f34); // splice z20.h, p3, {z25.h, z26.h} + // vl128 state = 0xb32756f0 + __ dci(0x05ed8f64); // splice z4.d, p3, {z27.d, z28.d} + // vl128 state = 0x3f7d1f13 + __ dci(0x05ad8e60); // splice z0.s, p3, {z19.s, z20.s} + // vl128 state = 0x9a7ffbde + __ dci(0x052d8e50); // splice z16.b, p3, {z18.b, z19.b} + // vl128 state = 0x5c82ed17 + __ dci(0x052d9652); // splice z18.b, p5, {z18.b, z19.b} + // vl128 state = 0x28b9cd60 + __ dci(0x052d9ed0); // splice z16.b, p7, {z22.b, z23.b} + // vl128 state = 0xab0238ba + __ dci(0x052d9ed4); // splice z20.b, p7, {z22.b, z23.b} + // vl128 state = 0x9f0e0ef9 + __ dci(0x056d9cc4); // splice z4.h, p7, {z6.h, z7.h} + // vl128 state = 0xec31d5e7 + __ dci(0x056d98e6); // splice z6.h, p6, {z7.h, z8.h} + // vl128 state = 0xbc9c0048 + __ dci(0x056d9ee4); // splice z4.h, p7, {z23.h, z24.h} + // vl128 state = 0xe2e9c9a3 + __ dci(0x056d9ef4); // splice z20.h, p7, {z23.h, z24.h} + // vl128 state = 0x60ffa98a + __ dci(0x056d9ab6); // splice z22.h, p6, {z21.h, z22.h} + // vl128 state = 0xae70ed0f + __ dci(0x056d9294); // splice z20.h, p4, {z20.h, z21.h} + // vl128 state = 0x5736c563 + __ dci(0x056d9284); // splice z4.h, p4, {z20.h, z21.h} + // vl128 state = 0xf31dd2d9 + __ dci(0x052d920c); // splice z12.b, p4, {z16.b, z17.b} + // vl128 state = 0x04502fea + __ dci(0x052d921c); // splice z28.b, p4, {z16.b, z17.b} + // vl128 state = 0x852f98b1 + __ dci(0x052d9094); // splice z20.b, p4, {z4.b, z5.b} + // vl128 state = 0xb40c5931 + __ dci(0x052d90f6); // splice z22.b, p4, {z7.b, z8.b} + // vl128 state = 0x64d6138d + __ dci(0x052d88e6); // splice z6.b, p2, {z7.b, z8.b} + // vl128 state = 0x51bb6564 + __ dci(0x052d88e4); // splice z4.b, p2, {z7.b, z8.b} + // vl128 state = 0x7ed599b0 + __ dci(0x05ad8865); // splice z5.s, p2, {z3.s, z4.s} + // vl128 state = 0xa201547d + __ dci(0x05ad9961); // splice z1.s, p6, {z11.s, z12.s} + // vl128 state = 0x9508f19c + __ dci(0x05ed9945); // splice z5.d, p6, {z10.d, z11.d} + // vl128 state = 0x95399cfd + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x95399cfd, + 0xa960b01e, + 0x1fedaa18, + 0xe2fd3ec3, + 0x3edc353b, + 0xd809efd8, + 0x2a04f527, + 0xe4b9bb4a, + 0x72e5ed3e, + 0x63d6fe93, + 0xd2ad18fa, + 0x522fe057, + 0xc7ba2f7d, + 0x2dd44bd3, + 0x68b62ae6, + 0x06ea6854, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_whilerw_whilewr) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x25ac3026); // whilewr p6.s, x1, x12 + // vl128 state = 0x91e301ae + __ dci(0x25ac3024); // whilewr p4.s, x1, x12 + // vl128 state = 0x9203b261 + __ dci(0x25af3020); // whilewr p0.s, x1, x15 + // vl128 state = 0x87505080 + __ dci(0x25ef3222); // whilewr p2.d, x17, x15 + // vl128 state = 0x4ba695cb + __ dci(0x25eb320a); // whilewr p10.d, x16, x11 + // vl128 state = 0x5909d726 + __ dci(0x25e33308); // whilewr p8.d, x24, x3 + // vl128 state = 0x52766071 + __ dci(0x25ea3309); // whilewr p9.d, x24, x10 + // vl128 state = 0xe906a65a + __ dci(0x25aa3101); // whilewr p1.s, x8, x10 + // vl128 state = 0xd9d56c58 + __ dci(0x252b3100); // whilewr p0.b, x8, x11 + // vl128 state = 0xcc868eb9 + __ dci(0x252a3008); // whilewr p8.b, x0, x10 + // vl128 state = 0xf78cb912 + __ dci(0x2528304c); // whilewr p12.b, x2, x8 + // vl128 state = 0x5493a6c4 + __ dci(0x25203004); // whilewr p4.b, x0, x0 + // vl128 state = 0xb3d754b6 + __ dci(0x25303105); // whilewr p5.b, x8, x16 + // vl128 state = 0x7fc526df + __ dci(0x25b4310d); // whilewr p13.s, x8, x20 + // vl128 state = 0x5999edda + __ dci(0x25ac310c); // whilewr p12.s, x8, x12 + // vl128 state = 0x46a86248 + __ dci(0x25ac310e); // whilewr p14.s, x8, x12 + // vl128 state = 0x0dc5ed70 + __ dci(0x252c330a); // whilewr p10.b, x24, x12 + // vl128 state = 0x453a1aa9 + __ dci(0x252f330b); // whilewr p11.b, x24, x15 + // vl128 state = 0x98fbdcdf + __ dci(0x256e330f); // whilewr p15.h, x24, x14 + // vl128 state = 0x84699750 + __ dci(0x252e334d); // whilewr p13.b, x26, x14 + // vl128 state = 0x198ea519 + __ dci(0x252e3349); // whilewr p9.b, x26, x14 + // vl128 state = 0xb4956673 + __ dci(0x253e33c1); // whilewr p1.b, x30, x30 + // vl128 state = 0xfd88dd74 + __ dci(0x252e33e3); // whilewr p3.b, xzr, x14 + // vl128 state = 0x68cda9df + __ dci(0x25ae33cb); // whilewr p11.s, x30, x14 + // vl128 state = 0x9104f644 + __ dci(0x25ae33ca); // whilewr p10.s, x30, x14 + // vl128 state = 0xd9079300 + __ dci(0x25ea33da); // whilerw p10.d, x30, x10 + // vl128 state = 0xd9fb019d + __ dci(0x25ae33d8); // whilerw p8.s, x30, x14 + // vl128 state = 0x9edf46fa + __ dci(0x25ae32f9); // whilerw p9.s, x23, x14 + // vl128 state = 0x3b10562f + __ dci(0x25ee32d8); // whilerw p8.d, x22, x14 + // vl128 state = 0x473e26e3 + __ dci(0x25ec3299); // whilerw p9.d, x20, x12 + // vl128 state = 0x4feaf55c + __ dci(0x25ec329d); // whilerw p13.d, x20, x12 + // vl128 state = 0x9f9a203a + __ dci(0x25e8321c); // whilerw p12.d, x16, x8 + // vl128 state = 0xd8f32d11 + __ dci(0x2568301d); // whilerw p13.h, x0, x8 + // vl128 state = 0xf04b6bb8 + __ dci(0x2528320d); // whilewr p13.b, x16, x8 + // vl128 state = 0x0883f877 + __ dci(0x25a8323d); // whilerw p13.s, x17, x8 + // vl128 state = 0x9564ca3e + __ dci(0x25a8323f); // whilerw p15.s, x17, x8 + // vl128 state = 0xa50cf036 + __ dci(0x25e8303d); // whilerw p13.d, x1, x8 + // vl128 state = 0xe89b1719 + __ dci(0x25e83175); // whilerw p5.d, x11, x8 + // vl128 state = 0xe79bea7c + __ dci(0x256a3174); // whilerw p4.h, x11, x10 + // vl128 state = 0xc8ca3b74 + __ dci(0x256a317c); // whilerw p12.h, x11, x10 + // vl128 state = 0xc3c88548 + __ dci(0x256a33f8); // whilerw p8.h, xzr, x10 + // vl128 state = 0x8b25acc6 + __ dci(0x256a33f0); // whilerw p0.h, xzr, x10 + // vl128 state = 0x904c0fd1 + __ dci(0x25e833e0); // whilewr p0.d, xzr, x8 + // vl128 state = 0xc893f4c8 + __ dci(0x25ec32e8); // whilewr p8.d, x23, x12 + // vl128 state = 0x807edd46 + __ dci(0x25ed326c); // whilewr p12.d, x19, x13 + // vl128 state = 0x8b7c637a + __ dci(0x256d32ed); // whilewr p13.h, x23, x13 + // vl128 state = 0xa3c425d3 + __ dci(0x252d30e9); // whilewr p9.b, x7, x13 + // vl128 state = 0x0edfe6b9 + __ dci(0x252531eb); // whilewr p11.b, x15, x5 + // vl128 state = 0xf716b922 + __ dci(0x252733ef); // whilewr p15.b, xzr, x7 + // vl128 state = 0xbf9aea3e + __ dci(0x25253367); // whilewr p7.b, x27, x5 + // vl128 state = 0x357fc408 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x357fc408, + 0x8d6fc283, + 0x5f73c1df, + 0x2963d995, + 0x80713760, + 0x4638fc82, + 0x23955ead, + 0x52e4c002, + 0xd56ab65c, + 0x0e5bb2f2, + 0x8c78ec14, + 0xd9b634d2, + 0x83adc3a2, + 0x3b664eea, + 0x3d1f5422, + 0x7cdcd310, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_mul_index) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x4468fb6e); // mul z14.h, z27.h, z0.h[5] + // vl128 state = 0xcbe81b96 + __ dci(0x4468f93e); // mul z30.h, z9.h, z0.h[5] + // vl128 state = 0x8a75362d + __ dci(0x4428f976); // mul z22.h, z11.h, z0.h[1] + // vl128 state = 0x1e3c5184 + __ dci(0x4428fa77); // mul z23.h, z19.h, z0.h[1] + // vl128 state = 0x173f58b5 + __ dci(0x4429fb67); // mul z7.h, z27.h, z1.h[1] + // vl128 state = 0x15686c87 + __ dci(0x4429fb63); // mul z3.h, z27.h, z1.h[1] + // vl128 state = 0x41068a87 + __ dci(0x4428fb53); // mul z19.h, z26.h, z0.h[1] + // vl128 state = 0xcfd6e02c + __ dci(0x4429fbd1); // mul z17.h, z30.h, z1.h[1] + // vl128 state = 0xfd3e0e3c + __ dci(0x442afbd9); // mul z25.h, z30.h, z2.h[1] + // vl128 state = 0x1e660bf7 + __ dci(0x442afa5b); // mul z27.h, z18.h, z2.h[1] + // vl128 state = 0xb5378f4e + __ dci(0x44abfa4b); // mul z11.s, z18.s, z3.s[1] + // vl128 state = 0xf34416fe + __ dci(0x44abfa4f); // mul z15.s, z18.s, z3.s[1] + // vl128 state = 0xc80d6ad9 + __ dci(0x44a9f84e); // mul z14.s, z2.s, z1.s[1] + // vl128 state = 0xa4fe2be7 + __ dci(0x44e9fa46); // mul z6.d, z18.d, z9.d[0] + // vl128 state = 0xaf461ebb + __ dci(0x44e9fa8e); // mul z14.d, z20.d, z9.d[0] + // vl128 state = 0x9f7acd20 + __ dci(0x44f1fa8f); // mul z15.d, z20.d, z1.d[1] + // vl128 state = 0x1b710469 + __ dci(0x4471fa07); // mul z7.h, z16.h, z1.h[6] + // vl128 state = 0xa2120b4c + __ dci(0x4470fa43); // mul z3.h, z18.h, z0.h[6] + // vl128 state = 0xb6d6ce4c + __ dci(0x4474fb47); // mul z7.h, z26.h, z4.h[6] + // vl128 state = 0xeec634bf + __ dci(0x4476fa57); // mul z23.h, z18.h, z6.h[6] + // vl128 state = 0x893bbe37 + __ dci(0x447cfa53); // mul z19.h, z18.h, z4.h[7] + // vl128 state = 0x8373940b + __ dci(0x447dfb52); // mul z18.h, z26.h, z5.h[7] + // vl128 state = 0xd1c86434 + __ dci(0x4477fb56); // mul z22.h, z26.h, z7.h[6] + // vl128 state = 0xb247cf9e + __ dci(0x4476fb77); // mul z23.h, z27.h, z6.h[6] + // vl128 state = 0x6106a868 + __ dci(0x4467fb7f); // mul z31.h, z27.h, z7.h[4] + // vl128 state = 0xc0a11edf + __ dci(0x446ffa77); // mul z23.h, z19.h, z7.h[5] + // vl128 state = 0xe1879a44 + __ dci(0x442bfa76); // mul z22.h, z19.h, z3.h[1] + // vl128 state = 0xc773115b + __ dci(0x442bfa7e); // mul z30.h, z19.h, z3.h[1] + // vl128 state = 0x5f5b4793 + __ dci(0x442afa2e); // mul z14.h, z17.h, z2.h[1] + // vl128 state = 0x144b30b2 + __ dci(0x442afa26); // mul z6.h, z17.h, z2.h[1] + // vl128 state = 0x905f8608 + __ dci(0x442afb6e); // mul z14.h, z27.h, z2.h[1] + // vl128 state = 0x0f826c19 + __ dci(0x44aefb66); // mul z6.s, z27.s, z6.s[1] + // vl128 state = 0x7043c090 + __ dci(0x44aefba4); // mul z4.s, z29.s, z6.s[1] + // vl128 state = 0xab3921a9 + __ dci(0x44aefbb4); // mul z20.s, z29.s, z6.s[1] + // vl128 state = 0x7d420495 + __ dci(0x44acfbf0); // mul z16.s, z31.s, z4.s[1] + // vl128 state = 0xceb17a45 + __ dci(0x44a4fb60); // mul z0.s, z27.s, z4.s[0] + // vl128 state = 0x97ed0929 + __ dci(0x44a5fb30); // mul z16.s, z25.s, z5.s[0] + // vl128 state = 0xb7fa54a5 + __ dci(0x4425f938); // mul z24.h, z9.h, z5.h[0] + // vl128 state = 0xfcc1c192 + __ dci(0x442df830); // mul z16.h, z1.h, z5.h[1] + // vl128 state = 0x933ed51d + __ dci(0x4427f832); // mul z18.h, z1.h, z7.h[0] + // vl128 state = 0x2129d4f0 + __ dci(0x442ef822); // mul z2.h, z1.h, z6.h[1] + // vl128 state = 0x76f6854c + __ dci(0x442af803); // mul z3.h, z0.h, z2.h[1] + // vl128 state = 0xe763df2d + __ dci(0x442af801); // mul z1.h, z0.h, z2.h[1] + // vl128 state = 0x61db5a87 + __ dci(0x442bf900); // mul z0.h, z8.h, z3.h[1] + // vl128 state = 0x90883cfb + __ dci(0x442bf881); // mul z1.h, z4.h, z3.h[1] + // vl128 state = 0xb4afb9b2 + __ dci(0x4427f885); // mul z5.h, z4.h, z7.h[0] + // vl128 state = 0xe512adca + __ dci(0x4425f8ad); // mul z13.h, z5.h, z5.h[0] + // vl128 state = 0xd820475a + __ dci(0x4420f8a5); // mul z5.h, z5.h, z0.h[0] + // vl128 state = 0xea9a6f50 + __ dci(0x4431f8a4); // mul z4.h, z5.h, z1.h[2] + // vl128 state = 0x9343e341 + __ dci(0x4425f8a0); // mul z0.h, z5.h, z5.h[0] + // vl128 state = 0x20a5f202 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x20a5f202, + 0xdb7b10ee, + 0x0607441b, + 0x4966f0ff, + 0x5f750338, + 0x9be09ff4, + 0x8805a320, + 0x52cf70b0, + 0x5f4c6d92, + 0xf8009f1f, + 0x56cd1ff6, + 0x345f063d, + 0x3807ccf3, + 0xf7eb85a8, + 0x1600c143, + 0x97be6c01, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_mla_mls_index) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x44200800); // mla z0.h, z0.h, z0.h[0] + // vl128 state = 0x06aac22e + __ dci(0x44200a28); // mla z8.h, z17.h, z0.h[0] + // vl128 state = 0xde2255a4 + __ dci(0x44e00a2a); // mla z10.d, z17.d, z0.d[0] + // vl128 state = 0x9bf1bae6 + __ dci(0x44600e3a); // mls z26.h, z17.h, z0.h[4] + // vl128 state = 0x28b58feb + __ dci(0x44e20e2a); // mls z10.d, z17.d, z2.d[0] + // vl128 state = 0x0ac8fcc8 + __ dci(0x44620f2e); // mls z14.h, z25.h, z2.h[4] + // vl128 state = 0x955da860 + __ dci(0x44630f6a); // mls z10.h, z27.h, z3.h[4] + // vl128 state = 0x654ee915 + __ dci(0x44730b6e); // mla z14.h, z27.h, z3.h[6] + // vl128 state = 0x3fd3e02c + __ dci(0x44720f6f); // mls z15.h, z27.h, z2.h[6] + // vl128 state = 0x46031098 + __ dci(0x44620f4b); // mls z11.h, z26.h, z2.h[4] + // vl128 state = 0xd49183cf + __ dci(0x446a0b5b); // mla z27.h, z26.h, z2.h[5] + // vl128 state = 0x4fe290c1 + __ dci(0x44680b73); // mla z19.h, z27.h, z0.h[5] + // vl128 state = 0xf6fccd86 + __ dci(0x44e90b77); // mla z23.d, z27.d, z9.d[0] + // vl128 state = 0x57b2090d + __ dci(0x44f10b76); // mla z22.d, z27.d, z1.d[1] + // vl128 state = 0x5a6932eb + __ dci(0x44f40b77); // mla z23.d, z27.d, z4.d[1] + // vl128 state = 0x8e33d7d5 + __ dci(0x44640b7f); // mla z31.h, z27.h, z4.h[4] + // vl128 state = 0xaa01885d + __ dci(0x44640b7d); // mla z29.h, z27.h, z4.h[4] + // vl128 state = 0x2ef00e60 + __ dci(0x44640b7f); // mla z31.h, z27.h, z4.h[4] + // vl128 state = 0x94ac10d3 + __ dci(0x44340b7e); // mla z30.h, z27.h, z4.h[2] + // vl128 state = 0x48211118 + __ dci(0x44340e7a); // mls z26.h, z19.h, z4.h[2] + // vl128 state = 0x72cc2767 + __ dci(0x44b40eea); // mls z10.s, z23.s, z4.s[2] + // vl128 state = 0x3855f70f + __ dci(0x44e40ee2); // mls z2.d, z23.d, z4.d[0] + // vl128 state = 0xf9225160 + __ dci(0x44ec0ea3); // mls z3.d, z21.d, z12.d[0] + // vl128 state = 0xf9b94fd0 + __ dci(0x44ae0ea7); // mls z7.s, z21.s, z6.s[1] + // vl128 state = 0x06070917 + __ dci(0x44ae0eb7); // mls z23.s, z21.s, z6.s[1] + // vl128 state = 0x26ecdd18 + __ dci(0x44ae0e07); // mls z7.s, z16.s, z6.s[1] + // vl128 state = 0xaa8e3a32 + __ dci(0x44ae0a85); // mla z5.s, z20.s, z6.s[1] + // vl128 state = 0x2379cba0 + __ dci(0x44ae0a81); // mla z1.s, z20.s, z6.s[1] + // vl128 state = 0x3cc8a61c + __ dci(0x442a0a85); // mla z5.h, z20.h, z2.h[1] + // vl128 state = 0x96f118ef + __ dci(0x443e0a84); // mla z4.h, z20.h, z6.h[3] + // vl128 state = 0xa3f8cb41 + __ dci(0x443f0b8c); // mla z12.h, z28.h, z7.h[3] + // vl128 state = 0x97fcb1da + __ dci(0x442f0bbc); // mla z28.h, z29.h, z7.h[1] + // vl128 state = 0x761e9499 + __ dci(0x44270fac); // mls z12.h, z29.h, z7.h[0] + // vl128 state = 0xfb28f943 + __ dci(0x442f0ead); // mls z13.h, z21.h, z7.h[1] + // vl128 state = 0x387a2623 + __ dci(0x44270fa9); // mls z9.h, z29.h, z7.h[0] + // vl128 state = 0x22f03847 + __ dci(0x44270f68); // mls z8.h, z27.h, z7.h[0] + // vl128 state = 0xada4998b + __ dci(0x44270f6c); // mls z12.h, z27.h, z7.h[0] + // vl128 state = 0xdf80a034 + __ dci(0x44270f7c); // mls z28.h, z27.h, z7.h[0] + // vl128 state = 0x3ccddaa6 + __ dci(0x44250f2c); // mls z12.h, z25.h, z5.h[0] + // vl128 state = 0x588502cb + __ dci(0x442f0f28); // mls z8.h, z25.h, z7.h[1] + // vl128 state = 0x79c90307 + __ dci(0x446f0d2c); // mls z12.h, z9.h, z7.h[5] + // vl128 state = 0xaa0b21a9 + __ dci(0x44af0d2e); // mls z14.s, z9.s, z7.s[1] + // vl128 state = 0xd5ccc60c + __ dci(0x44ed0d26); // mls z6.d, z9.d, z13.d[0] + // vl128 state = 0x15037cbe + __ dci(0x44fd0f2e); // mls z14.d, z25.d, z13.d[1] + // vl128 state = 0x9f481fdf + __ dci(0x44f90e2f); // mls z15.d, z17.d, z9.d[1] + // vl128 state = 0x93fe8537 + __ dci(0x447d0e3f); // mls z31.h, z17.h, z5.h[7] + // vl128 state = 0x14b9edf2 + __ dci(0x44f90e2f); // mls z15.d, z17.d, z9.d[1] + // vl128 state = 0xde1c0d1c + __ dci(0x44790c27); // mls z7.h, z1.h, z1.h[7] + // vl128 state = 0x563d614a + __ dci(0x44790c23); // mls z3.h, z1.h, z1.h[7] + // vl128 state = 0x8c6d9baf + __ dci(0x44f90c6b); // mls z11.d, z3.d, z9.d[1] + // vl128 state = 0x1a25c073 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x1a25c073, + 0xfbb2c945, + 0x932b8ab7, + 0x99370bee, + 0x44a15f80, + 0xae898f1d, + 0x97382827, + 0xafec059e, + 0xf11bc007, + 0x34c49b30, + 0x73b95606, + 0x77324772, + 0x9ad7d21b, + 0x0d0958a7, + 0xee4accc3, + 0x31d34df8, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_mla_long) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x44935abe); // umlslb z30.s, z21.h, z19.h + // vl128 state = 0x4fac8e49 + __ dci(0x449358fa); // umlslb z26.s, z7.h, z19.h + // vl128 state = 0xca971f04 + __ dci(0x44935adb); // umlslb z27.s, z22.h, z19.h + // vl128 state = 0x5652564b + __ dci(0x449359da); // umlslb z26.s, z14.h, z19.h + // vl128 state = 0xf2d81244 + __ dci(0x448349de); // umlalb z30.s, z14.h, z3.h + // vl128 state = 0x7cbaa548 + __ dci(0x448349d6); // umlalb z22.s, z14.h, z3.h + // vl128 state = 0x9e7b4915 + __ dci(0x44c34952); // umlalb z18.d, z10.s, z3.s + // vl128 state = 0x550af70e + __ dci(0x44d349d3); // umlalb z19.d, z14.s, z19.s + // vl128 state = 0x676743b2 + __ dci(0x44d549d7); // umlalb z23.d, z14.s, z21.s + // vl128 state = 0x602e09e4 + __ dci(0x44d55ddf); // umlslt z31.d, z14.s, z21.s + // vl128 state = 0xd4c245de + __ dci(0x44d55d1b); // umlslt z27.d, z8.s, z21.s + // vl128 state = 0x9c2c1cb4 + __ dci(0x44d5490b); // umlalb z11.d, z8.s, z21.s + // vl128 state = 0x8a702002 + __ dci(0x44554d0a); // umlalt z10.h, z8.b, z21.b + // vl128 state = 0x6758ce3c + __ dci(0x4455452b); // smlalt z11.h, z9.b, z21.b + // vl128 state = 0x967e596e + __ dci(0x44554529); // smlalt z9.h, z9.b, z21.b + // vl128 state = 0x1300909a + __ dci(0x44474521); // smlalt z1.h, z9.b, z7.b + // vl128 state = 0x01ca26c1 + __ dci(0x44c74d25); // umlalt z5.d, z9.s, z7.s + // vl128 state = 0x8e6313b9 + __ dci(0x44cb4d24); // umlalt z4.d, z9.s, z11.s + // vl128 state = 0xdb41e004 + __ dci(0x44cb4d2c); // umlalt z12.d, z9.s, z11.s + // vl128 state = 0x941401ca + __ dci(0x44c94da8); // umlalt z8.d, z13.s, z9.s + // vl128 state = 0x8a57334b + __ dci(0x44594db8); // umlalt z24.h, z13.b, z25.b + // vl128 state = 0x94333fae + __ dci(0x44585db0); // umlslt z16.h, z13.b, z24.b + // vl128 state = 0xf4fbe251 + __ dci(0x44585f80); // umlslt z0.h, z28.b, z24.b + // vl128 state = 0x1f5aeef3 + __ dci(0x445a5fc2); // umlslt z2.h, z30.b, z26.b + // vl128 state = 0x4b153d20 + __ dci(0x445a5fd2); // umlslt z18.h, z30.b, z26.b + // vl128 state = 0xbd82f0a2 + __ dci(0x445a5fd3); // umlslt z19.h, z30.b, z26.b + // vl128 state = 0x72d7083d + __ dci(0x44525bd2); // umlslb z18.h, z30.b, z18.b + // vl128 state = 0x5018a138 + __ dci(0x44525bd6); // umlslb z22.h, z30.b, z18.b + // vl128 state = 0xcaf48a01 + __ dci(0x445053d2); // smlslb z18.h, z30.b, z16.b + // vl128 state = 0x76e2d850 + __ dci(0x44d153c2); // smlslb z2.d, z30.s, z17.s + // vl128 state = 0x8594d6c9 + __ dci(0x449353c3); // smlslb z3.s, z30.h, z19.h + // vl128 state = 0x8e0da89d + __ dci(0x449152c7); // smlslb z7.s, z22.h, z17.h + // vl128 state = 0xe7d08864 + __ dci(0x44995285); // smlslb z5.s, z20.h, z25.h + // vl128 state = 0xd7c49fca + __ dci(0x449953c1); // smlslb z1.s, z30.h, z25.h + // vl128 state = 0x3b648b39 + __ dci(0x449152c9); // smlslb z9.s, z22.h, z17.h + // vl128 state = 0x5b5bab94 + __ dci(0x449542cd); // smlalb z13.s, z22.h, z21.h + // vl128 state = 0x65282d76 + __ dci(0x449c42c9); // smlalb z9.s, z22.h, z28.h + // vl128 state = 0x94a92486 + __ dci(0x449c52f9); // smlslb z25.s, z23.h, z28.h + // vl128 state = 0xd4f62835 + __ dci(0x44dc5afd); // umlslb z29.d, z23.s, z28.s + // vl128 state = 0xf124c6a1 + __ dci(0x44dd58ff); // umlslb z31.d, z7.s, z29.s + // vl128 state = 0xbc694f1c + __ dci(0x44dc587b); // umlslb z27.d, z3.s, z28.s + // vl128 state = 0xf1621eb2 + __ dci(0x44de596b); // umlslb z11.d, z11.s, z30.s + // vl128 state = 0x944b4b75 + __ dci(0x44de5969); // umlslb z9.d, z11.s, z30.s + // vl128 state = 0xa98a2c38 + __ dci(0x44db596d); // umlslb z13.d, z11.s, z27.s + // vl128 state = 0x6bd60807 + __ dci(0x44db5d5d); // umlslt z29.d, z10.s, z27.s + // vl128 state = 0x9c377b51 + __ dci(0x449b555f); // smlslt z31.s, z10.h, z27.h + // vl128 state = 0x7c81f1d5 + __ dci(0x449b555d); // smlslt z29.s, z10.h, z27.h + // vl128 state = 0xdaab1edb + __ dci(0x44d35559); // smlslt z25.d, z10.s, z19.s + // vl128 state = 0xdc3f25f1 + __ dci(0x44d355f8); // smlslt z24.d, z15.s, z19.s + // vl128 state = 0x9c75a3cf + __ dci(0x44d356f9); // smlslt z25.d, z23.s, z19.s + // vl128 state = 0x5b999178 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x5b999178, + 0xd6191e64, + 0x1f3bd2a1, + 0x1e0ac282, + 0x8d13f5d3, + 0x97157e8f, + 0x5d6e4134, + 0x8d2186b4, + 0x88078c65, + 0x6dd92db3, + 0xfcd02d21, + 0x81738dc2, + 0x644e3c06, + 0x9c9d2ac8, + 0xaaa43548, + 0x871e9b08, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_complex_integer_multiply_add_vector) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 40 * kInstructionSize); + __ dci(0x44dd2f34); // cmla z20.d, z25.d, z29.d, #270 + // vl128 state = 0x12e9bd68 + __ dci(0x44dd2f3c); // cmla z28.d, z25.d, z29.d, #270 + // vl128 state = 0x4fd8ba3e + __ dci(0x44dc2734); // cmla z20.d, z25.d, z28.d, #90 + // vl128 state = 0x9b11d64f + __ dci(0x44dc2e36); // cmla z22.d, z17.d, z28.d, #270 + // vl128 state = 0x4658e6ae + __ dci(0x44dd2f34); // cmla z20.d, z25.d, z29.d, #270 + // vl128 state = 0x5151ea16 + __ dci(0x44dc2fb5); // cmla z21.d, z29.d, z28.d, #270 + // vl128 state = 0x21c497cc + __ dci(0x44dc2fbd); // cmla z29.d, z29.d, z28.d, #270 + // vl128 state = 0xe823fd46 + __ dci(0x44dc2e3c); // cmla z28.d, z17.d, z28.d, #270 + // vl128 state = 0xcc35cda6 + __ dci(0x44dc2e34); // cmla z20.d, z17.d, z28.d, #270 + // vl128 state = 0x963047c0 + __ dci(0x44d42c30); // cmla z16.d, z1.d, z20.d, #270 + // vl128 state = 0x5d2c5643 + __ dci(0x44c42c60); // cmla z0.d, z3.d, z4.d, #270 + // vl128 state = 0xfd400169 + __ dci(0x44842464); // cmla z4.s, z3.s, z4.s, #90 + // vl128 state = 0x00116098 + __ dci(0x44842d60); // cmla z0.s, z11.s, z4.s, #270 + // vl128 state = 0x582d46e3 + __ dci(0x44042562); // cmla z2.b, z11.b, z4.b, #90 + // vl128 state = 0x1bd70bf0 + __ dci(0x44042420); // cmla z0.b, z1.b, z4.b, #90 + // vl128 state = 0x7682807d + __ dci(0x44062401); // cmla z1.b, z0.b, z6.b, #90 + // vl128 state = 0xaa3e2c64 + __ dci(0x44042449); // cmla z9.b, z2.b, z4.b, #90 + // vl128 state = 0xd81638f9 + __ dci(0x44052059); // cmla z25.b, z2.b, z5.b, #0 + // vl128 state = 0x38cb5d96 + __ dci(0x4415305d); // sqrdcmlah z29.b, z2.b, z21.b, #0 + // vl128 state = 0x4c6b85e0 + __ dci(0x44153819); // sqrdcmlah z25.b, z0.b, z21.b, #180 + // vl128 state = 0x229b5be9 + __ dci(0x4405391b); // sqrdcmlah z27.b, z8.b, z5.b, #180 + // vl128 state = 0x82611aec + __ dci(0x4405314b); // sqrdcmlah z11.b, z10.b, z5.b, #0 + // vl128 state = 0xe58c48e0 + __ dci(0x4407316a); // sqrdcmlah z10.b, z11.b, z7.b, #0 + // vl128 state = 0x5282838a + __ dci(0x4407347a); // sqrdcmlah z26.b, z3.b, z7.b, #90 + // vl128 state = 0x134a0891 + __ dci(0x4413347e); // sqrdcmlah z30.b, z3.b, z19.b, #90 + // vl128 state = 0x455ab9e0 + __ dci(0x4443347f); // sqrdcmlah z31.h, z3.h, z3.h, #90 + // vl128 state = 0x030d9d2c + __ dci(0x444b307e); // sqrdcmlah z30.h, z3.h, z11.h, #0 + // vl128 state = 0x91a95a2c + __ dci(0x444b301f); // sqrdcmlah z31.h, z0.h, z11.h, #0 + // vl128 state = 0x0f1c8468 + __ dci(0x4409300f); // sqrdcmlah z15.b, z0.b, z9.b, #0 + // vl128 state = 0x95f802b7 + __ dci(0x440c300e); // sqrdcmlah z14.b, z0.b, z12.b, #0 + // vl128 state = 0x5fa6d2c6 + __ dci(0x4404310c); // sqrdcmlah z12.b, z8.b, z4.b, #0 + // vl128 state = 0x192b05a4 + __ dci(0x4415310d); // sqrdcmlah z13.b, z8.b, z21.b, #0 + // vl128 state = 0xa8a8d37f + __ dci(0x4414350f); // sqrdcmlah z15.b, z8.b, z20.b, #90 + // vl128 state = 0xcd890d8c + __ dci(0x4454354d); // sqrdcmlah z13.h, z10.h, z20.h, #90 + // vl128 state = 0x91ab863e + __ dci(0x444435c5); // sqrdcmlah z5.h, z14.h, z4.h, #90 + // vl128 state = 0x41bbc90c + __ dci(0x444c34c7); // sqrdcmlah z7.h, z6.h, z12.h, #90 + // vl128 state = 0xb6329344 + __ dci(0x444836c6); // sqrdcmlah z6.h, z22.h, z8.h, #90 + // vl128 state = 0xdf5f443c + __ dci(0x444836d6); // sqrdcmlah z22.h, z22.h, z8.h, #90 + // vl128 state = 0x719a2e70 + __ dci(0x44403694); // sqrdcmlah z20.h, z20.h, z0.h, #90 + // vl128 state = 0x28a64934 + __ dci(0x4449369c); // sqrdcmlah z28.h, z20.h, z9.h, #90 + // vl128 state = 0x5d41ba84 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x5d41ba84, + 0xd5e52f4d, + 0x9f627c0d, + 0x111f21a7, + 0x5d7b356e, + 0x1f345c0e, + 0xd881296e, + 0x819f9091, + 0x59823550, + 0xbe2162c7, + 0x5f5dca40, + 0xad7e429e, + 0x4f66661f, + 0x7c5fbca0, + 0x819ff997, + 0x68ebdb56, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_complex_integer_multiply_add_indexed) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 20 * kInstructionSize); + __ dci(0x44fd7d52); // sqrdcmlah z18.s, z10.s, z13.s[1], #270 + // vl128 state = 0x5c66baad + __ dci(0x44fd7c13); // sqrdcmlah z19.s, z0.s, z13.s[1], #270 + // vl128 state = 0xac8c451b + __ dci(0x44f97e11); // sqrdcmlah z17.s, z16.s, z9.s[1], #270 + // vl128 state = 0x02ebccdb + __ dci(0x44e97615); // sqrdcmlah z21.s, z16.s, z9.s[0], #90 + // vl128 state = 0xe43b1032 + __ dci(0x44e97614); // sqrdcmlah z20.s, z16.s, z9.s[0], #90 + // vl128 state = 0xa28d9898 + __ dci(0x44e17635); // sqrdcmlah z21.s, z17.s, z1.s[0], #90 + // vl128 state = 0x021764c6 + __ dci(0x44e17634); // sqrdcmlah z20.s, z17.s, z1.s[0], #90 + // vl128 state = 0x812dbf22 + __ dci(0x44f07635); // sqrdcmlah z21.s, z17.s, z0.s[1], #90 + // vl128 state = 0x5e87a59e + __ dci(0x44f07465); // sqrdcmlah z5.s, z3.s, z0.s[1], #90 + // vl128 state = 0xd1a78d9d + __ dci(0x44f87675); // sqrdcmlah z21.s, z19.s, z8.s[1], #90 + // vl128 state = 0xd4500975 + __ dci(0x44b87e7d); // sqrdcmlah z29.h, z19.h, z0.h[3], #270 + // vl128 state = 0x765230ab + __ dci(0x44b876f9); // sqrdcmlah z25.h, z23.h, z0.h[3], #90 + // vl128 state = 0xca9c5bb4 + __ dci(0x44f874fb); // sqrdcmlah z27.s, z7.s, z8.s[1], #90 + // vl128 state = 0xa4bc044a + __ dci(0x44f070fa); // sqrdcmlah z26.s, z7.s, z0.s[1], #0 + // vl128 state = 0xd0eaa1df + __ dci(0x44f07038); // sqrdcmlah z24.s, z1.s, z0.s[1], #0 + // vl128 state = 0x80836f9f + __ dci(0x44b17030); // sqrdcmlah z16.h, z1.h, z1.h[2], #0 + // vl128 state = 0x59ffa1ce + __ dci(0x44b17032); // sqrdcmlah z18.h, z1.h, z1.h[2], #0 + // vl128 state = 0xdb8beca5 + __ dci(0x44b07430); // sqrdcmlah z16.h, z1.h, z0.h[2], #90 + // vl128 state = 0xe5b6a0e3 + __ dci(0x44b07438); // sqrdcmlah z24.h, z1.h, z0.h[2], #90 + // vl128 state = 0x19cc8c20 + __ dci(0x44b0743a); // sqrdcmlah z26.h, z1.h, z0.h[2], #90 + // vl128 state = 0x19c819af + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x19c819af, + 0xbb2225f2, + 0x7e54f513, + 0xdcbf6f0f, + 0x2bfdc97d, + 0x48890c54, + 0x65542c02, + 0xaef6b224, + 0x993b14fd, + 0x244d27c5, + 0xe8767ba8, + 0x4397a148, + 0xb3efcd2e, + 0xb5894aba, + 0x2a0f6f7a, + 0xbe45142c, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_saturating_multiply_add_long_vector) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 40 * kInstructionSize); + __ dci(0x44db629b); // sqdmlalb z27.d, z20.s, z27.s + // vl128 state = 0x61e408e4 + __ dci(0x44db631f); // sqdmlalb z31.d, z24.s, z27.s + // vl128 state = 0xf146813f + __ dci(0x44da6b1d); // sqdmlslb z29.d, z24.s, z26.s + // vl128 state = 0xb8d07371 + __ dci(0x44da6a35); // sqdmlslb z21.d, z17.s, z26.s + // vl128 state = 0xaf43cc88 + __ dci(0x444a6a3d); // sqdmlslb z29.h, z17.b, z10.b + // vl128 state = 0xba4c5067 + __ dci(0x444a6a39); // sqdmlslb z25.h, z17.b, z10.b + // vl128 state = 0x396202c3 + __ dci(0x445a6829); // sqdmlslb z9.h, z1.b, z26.b + // vl128 state = 0x22095f7f + __ dci(0x445a6b28); // sqdmlslb z8.h, z25.b, z26.b + // vl128 state = 0xa9516b4b + __ dci(0x44da6b69); // sqdmlslb z9.d, z27.s, z26.s + // vl128 state = 0x1f048226 + __ dci(0x44da616d); // sqdmlalb z13.d, z11.s, z26.s + // vl128 state = 0x0fdd982f + __ dci(0x4458616f); // sqdmlalb z15.h, z11.b, z24.b + // vl128 state = 0x461ba137 + __ dci(0x4449617f); // sqdmlalb z31.h, z11.b, z9.b + // vl128 state = 0xd1071b0c + __ dci(0x4459614f); // sqdmlalb z15.h, z10.b, z25.b + // vl128 state = 0x0fa6bae7 + __ dci(0x4458654d); // sqdmlalt z13.h, z10.b, z24.b + // vl128 state = 0xebd08a80 + __ dci(0x44586d05); // sqdmlslt z5.h, z8.b, z24.b + // vl128 state = 0xd4c41665 + __ dci(0x44506d84); // sqdmlslt z4.h, z12.b, z16.b + // vl128 state = 0x80f619f9 + __ dci(0x44506fc6); // sqdmlslt z6.h, z30.b, z16.b + // vl128 state = 0xb588af21 + __ dci(0x44566fc4); // sqdmlslt z4.h, z30.b, z22.b + // vl128 state = 0x4dd8437a + __ dci(0x44566f0c); // sqdmlslt z12.h, z24.b, z22.b + // vl128 state = 0x48ca6e5c + __ dci(0x44566f0e); // sqdmlslt z14.h, z24.b, z22.b + // vl128 state = 0x02d6f977 + __ dci(0x44566746); // sqdmlalt z6.h, z26.b, z22.b + // vl128 state = 0x179f59f4 + __ dci(0x445767c4); // sqdmlalt z4.h, z30.b, z23.b + // vl128 state = 0xf2d2823c + __ dci(0x44d667c0); // sqdmlalt z0.d, z30.s, z22.s + // vl128 state = 0x404c277e + __ dci(0x44566742); // sqdmlalt z2.h, z26.b, z22.b + // vl128 state = 0x986a72c1 + __ dci(0x44c6674a); // sqdmlalt z10.d, z26.s, z6.s + // vl128 state = 0xbb8044ab + __ dci(0x44c66742); // sqdmlalt z2.d, z26.s, z6.s + // vl128 state = 0x9f5b244b + __ dci(0x44ce6706); // sqdmlalt z6.d, z24.s, z14.s + // vl128 state = 0xc6ce6266 + __ dci(0x44ce670e); // sqdmlalt z14.d, z24.s, z14.s + // vl128 state = 0xc9e1a461 + __ dci(0x44de6746); // sqdmlalt z6.d, z26.s, z30.s + // vl128 state = 0x9f133504 + __ dci(0x44dc6342); // sqdmlalb z2.d, z26.s, z28.s + // vl128 state = 0x42deb468 + __ dci(0x44d46366); // sqdmlalb z6.d, z27.s, z20.s + // vl128 state = 0xb3436cd4 + __ dci(0x44d5626e); // sqdmlalb z14.d, z19.s, z21.s + // vl128 state = 0x0e0533ac + __ dci(0x44d5646f); // sqdmlalt z15.d, z3.s, z21.s + // vl128 state = 0x92d04e7b + __ dci(0x44d36467); // sqdmlalt z7.d, z3.s, z19.s + // vl128 state = 0xd9fa8b4d + __ dci(0x44d360ef); // sqdmlalb z15.d, z7.s, z19.s + // vl128 state = 0x9c9a5778 + __ dci(0x44d3646b); // sqdmlalt z11.d, z3.s, z19.s + // vl128 state = 0x40d7c923 + __ dci(0x4492646f); // sqdmlalt z15.s, z3.h, z18.h + // vl128 state = 0x0b5b2334 + __ dci(0x4492647f); // sqdmlalt z31.s, z3.h, z18.h + // vl128 state = 0xfe6302c1 + __ dci(0x4494647d); // sqdmlalt z29.s, z3.h, z20.h + // vl128 state = 0xe3c05a37 + __ dci(0x4484666d); // sqdmlalt z13.s, z19.h, z4.h + // vl128 state = 0x15169e94 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x15169e94, + 0x6101102c, + 0xa5586d26, + 0x3fbf4f9f, + 0x8e62994d, + 0x4d77a9e5, + 0x4ceadc9e, + 0x8247db61, + 0x4aa10859, + 0x0b3280b3, + 0x015d75ea, + 0x1cf4825e, + 0xda7d3fea, + 0xc24bd624, + 0x60ee565a, + 0x7ac92c39, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_saturating_multiply_add_interleaved_long) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 30 * kInstructionSize); + __ dci(0x449e0ac6); // sqdmlalbt z6.s, z22.h, z30.h + // vl128 state = 0x1f0ef37c + __ dci(0x449c0ae4); // sqdmlalbt z4.s, z23.h, z28.h + // vl128 state = 0xa80bf2c8 + __ dci(0x449c0ae6); // sqdmlalbt z6.s, z23.h, z28.h + // vl128 state = 0x4c5b0e8f + __ dci(0x449e0aae); // sqdmlalbt z14.s, z21.h, z30.h + // vl128 state = 0xa6482041 + __ dci(0x449e0aaf); // sqdmlalbt z15.s, z21.h, z30.h + // vl128 state = 0x6ef82b7a + __ dci(0x449c0a2b); // sqdmlalbt z11.s, z17.h, z28.h + // vl128 state = 0x0070a7fa + __ dci(0x449e0829); // sqdmlalbt z9.s, z1.h, z30.h + // vl128 state = 0x08b9efc6 + __ dci(0x449e0c61); // sqdmlslbt z1.s, z3.h, z30.h + // vl128 state = 0xebd25c16 + __ dci(0x449e0c60); // sqdmlslbt z0.s, z3.h, z30.h + // vl128 state = 0x0926abbe + __ dci(0x449e0c70); // sqdmlslbt z16.s, z3.h, z30.h + // vl128 state = 0xe9d3e5a7 + __ dci(0x449f0cf4); // sqdmlslbt z20.s, z7.h, z31.h + // vl128 state = 0xf062523d + __ dci(0x449f08b5); // sqdmlalbt z21.s, z5.h, z31.h + // vl128 state = 0x6034c14e + __ dci(0x449f08a5); // sqdmlalbt z5.s, z5.h, z31.h + // vl128 state = 0x0a73c74b + __ dci(0x448e08b5); // sqdmlalbt z21.s, z5.h, z14.h + // vl128 state = 0xa4af2700 + __ dci(0x448c08e5); // sqdmlalbt z5.s, z7.h, z12.h + // vl128 state = 0x7499c587 + __ dci(0x448c08e1); // sqdmlalbt z1.s, z7.h, z12.h + // vl128 state = 0x968bca0e + __ dci(0x448c0971); // sqdmlalbt z17.s, z11.h, z12.h + // vl128 state = 0xd7890449 + __ dci(0x448f0975); // sqdmlalbt z21.s, z11.h, z15.h + // vl128 state = 0xa2393863 + __ dci(0x448f0977); // sqdmlalbt z23.s, z11.h, z15.h + // vl128 state = 0x0f7d9688 + __ dci(0x449f093f); // sqdmlalbt z31.s, z9.h, z31.h + // vl128 state = 0xeb16ca99 + __ dci(0x449f09f7); // sqdmlalbt z23.s, z15.h, z31.h + // vl128 state = 0x5eca8b00 + __ dci(0x449f0987); // sqdmlalbt z7.s, z12.h, z31.h + // vl128 state = 0xf8f22744 + __ dci(0x449f0a83); // sqdmlalbt z3.s, z20.h, z31.h + // vl128 state = 0xc20d54f5 + __ dci(0x449b0ac1); // sqdmlalbt z1.s, z22.h, z27.h + // vl128 state = 0xf371a13b + __ dci(0x449b0aa9); // sqdmlalbt z9.s, z21.h, z27.h + // vl128 state = 0xffae55ce + __ dci(0x449b0ab9); // sqdmlalbt z25.s, z21.h, z27.h + // vl128 state = 0x0c5ab866 + __ dci(0x44d30aa9); // sqdmlalbt z9.d, z21.s, z19.s + // vl128 state = 0x388bfe27 + __ dci(0x44d30aab); // sqdmlalbt z11.d, z21.s, z19.s + // vl128 state = 0x6dc15ec8 + __ dci(0x44d70baf); // sqdmlalbt z15.d, z29.s, z23.s + // vl128 state = 0x6a858021 + __ dci(0x44d70ba7); // sqdmlalbt z7.d, z29.s, z23.s + // vl128 state = 0x52416517 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x52416517, + 0x1a625e10, + 0x3eaaa30f, + 0x0eefe820, + 0x9e2f7744, + 0x3dbc3206, + 0xca85b926, + 0x9428c809, + 0x7c35818c, + 0xb8bc3648, + 0x5b215c50, + 0xbdb56ba5, + 0xe4e4bc54, + 0x69ba132f, + 0xa498b17a, + 0xf482b2a6, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_saturating_multiply_add_long_indexed) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x44f52e3d); // sqdmlalt z29.d, z17.s, z5.s[3] + // vl128 state = 0x2a284ede + __ dci(0x44f52e3c); // sqdmlalt z28.d, z17.s, z5.s[3] + // vl128 state = 0x48a615e9 + __ dci(0x44f72c3d); // sqdmlalt z29.d, z1.s, z7.s[3] + // vl128 state = 0x1bbe9cc5 + __ dci(0x44b62c35); // sqdmlalt z21.s, z1.h, z6.h[5] + // vl128 state = 0x99966225 + __ dci(0x44b624b7); // sqdmlalt z23.s, z5.h, z6.h[4] + // vl128 state = 0x36da4a3a + __ dci(0x44f626b6); // sqdmlalt z22.d, z21.s, z6.s[2] + // vl128 state = 0xc009e514 + __ dci(0x44f62226); // sqdmlalb z6.d, z17.s, z6.s[2] + // vl128 state = 0x2140ee4b + __ dci(0x44fa222e); // sqdmlalb z14.d, z17.s, z10.s[2] + // vl128 state = 0xf78c8bec + __ dci(0x44fa2aac); // sqdmlalb z12.d, z21.s, z10.s[3] + // vl128 state = 0x329238c6 + __ dci(0x44fa2abc); // sqdmlalb z28.d, z21.s, z10.s[3] + // vl128 state = 0xadc9f9db + __ dci(0x44fa2aac); // sqdmlalb z12.d, z21.s, z10.s[3] + // vl128 state = 0x877f64cf + __ dci(0x44ba2a88); // sqdmlalb z8.s, z20.h, z2.h[7] + // vl128 state = 0x4e4a3117 + __ dci(0x44fb2a89); // sqdmlalb z9.d, z20.s, z11.s[3] + // vl128 state = 0xe26b041b + __ dci(0x44f32ab9); // sqdmlalb z25.d, z21.s, z3.s[3] + // vl128 state = 0xbcf4e0b2 + __ dci(0x44e328bd); // sqdmlalb z29.d, z5.s, z3.s[1] + // vl128 state = 0x31391cc2 + __ dci(0x44f228ad); // sqdmlalb z13.d, z5.s, z2.s[3] + // vl128 state = 0xf4c6c098 + __ dci(0x44e238af); // sqdmlslb z15.d, z5.s, z2.s[1] + // vl128 state = 0x6e7cb20c + __ dci(0x44e639ad); // sqdmlslb z13.d, z13.s, z6.s[1] + // vl128 state = 0xed16e292 + __ dci(0x44a63daf); // sqdmlslt z15.s, z13.h, z6.h[1] + // vl128 state = 0x7c0c3a9a + __ dci(0x44ae3cbf); // sqdmlslt z31.s, z5.h, z6.h[3] + // vl128 state = 0x0e2dce8d + __ dci(0x44a634b7); // sqdmlslt z23.s, z5.h, z6.h[0] + // vl128 state = 0xf3eeab27 + __ dci(0x44e234b5); // sqdmlslt z21.d, z5.s, z2.s[0] + // vl128 state = 0x55193209 + __ dci(0x44a23437); // sqdmlslt z23.s, z1.h, z2.h[0] + // vl128 state = 0x7652b538 + __ dci(0x44a63535); // sqdmlslt z21.s, z9.h, z6.h[0] + // vl128 state = 0x76046ab4 + __ dci(0x44a235b4); // sqdmlslt z20.s, z13.h, z2.h[0] + // vl128 state = 0x2f23fd0d + __ dci(0x44a234e4); // sqdmlslt z4.s, z7.h, z2.h[0] + // vl128 state = 0x2a50774c + __ dci(0x44a234ec); // sqdmlslt z12.s, z7.h, z2.h[0] + // vl128 state = 0x01ea8843 + __ dci(0x44a324e8); // sqdmlalt z8.s, z7.h, z3.h[0] + // vl128 state = 0xed54a157 + __ dci(0x44a334c9); // sqdmlslt z9.s, z6.h, z3.h[0] + // vl128 state = 0x39e0227b + __ dci(0x44a324f9); // sqdmlalt z25.s, z7.h, z3.h[0] + // vl128 state = 0xf163fa0b + __ dci(0x44a224d8); // sqdmlalt z24.s, z6.h, z2.h[0] + // vl128 state = 0xbb4e0d24 + __ dci(0x44b22448); // sqdmlalt z8.s, z2.h, z2.h[4] + // vl128 state = 0x26c102cc + __ dci(0x44f224d8); // sqdmlalt z24.d, z6.s, z2.s[2] + // vl128 state = 0x40f79dde + __ dci(0x44f220f9); // sqdmlalb z25.d, z7.s, z2.s[2] + // vl128 state = 0xf9d62034 + __ dci(0x44f020a9); // sqdmlalb z9.d, z5.s, z0.s[2] + // vl128 state = 0x2b78be2f + __ dci(0x44f424ad); // sqdmlalt z13.d, z5.s, z4.s[2] + // vl128 state = 0xf0701e23 + __ dci(0x44f430a5); // sqdmlslb z5.d, z5.s, z4.s[2] + // vl128 state = 0x992b12d6 + __ dci(0x44f130a4); // sqdmlslb z4.d, z5.s, z1.s[2] + // vl128 state = 0x50292759 + __ dci(0x44f130ac); // sqdmlslb z12.d, z5.s, z1.s[2] + // vl128 state = 0x795462f2 + __ dci(0x44f3302d); // sqdmlslb z13.d, z1.s, z3.s[2] + // vl128 state = 0x8ac29815 + __ dci(0x44e3300c); // sqdmlslb z12.d, z0.s, z3.s[0] + // vl128 state = 0x842471eb + __ dci(0x44e3300d); // sqdmlslb z13.d, z0.s, z3.s[0] + // vl128 state = 0x28762af1 + __ dci(0x44eb321d); // sqdmlslb z29.d, z16.s, z11.s[0] + // vl128 state = 0x352de071 + __ dci(0x44ef3259); // sqdmlslb z25.d, z18.s, z15.s[0] + // vl128 state = 0x90a4cf15 + __ dci(0x44ff3349); // sqdmlslb z9.d, z26.s, z15.s[2] + // vl128 state = 0x6be7e76a + __ dci(0x44fb3319); // sqdmlslb z25.d, z24.s, z11.s[2] + // vl128 state = 0x7023e2de + __ dci(0x44bb3b18); // sqdmlslb z24.s, z24.h, z3.h[7] + // vl128 state = 0xad48664c + __ dci(0x44bb3b19); // sqdmlslb z25.s, z24.h, z3.h[7] + // vl128 state = 0xc7d8239b + __ dci(0x44bb3b11); // sqdmlslb z17.s, z24.h, z3.h[7] + // vl128 state = 0x0d9b2b9b + __ dci(0x44f33b15); // sqdmlslb z21.d, z24.s, z3.s[3] + // vl128 state = 0xbdb9c559 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xbdb9c559, + 0x0c2f83d5, + 0x3e1f2607, + 0x2db954ea, + 0xff33857d, + 0xd567c205, + 0x8b5ced4c, + 0x19ecc4d9, + 0x8581949e, + 0x30f1a921, + 0x8c94071b, + 0xb9ad4919, + 0x32dbb108, + 0x634f9cd4, + 0x2a122429, + 0xdae127f1, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_floating_multiply_add_long_vector) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm, kFpInputSet); + // state = 0x1e5cbcac + + { + ExactAssemblyScope scope(&masm, 30 * kInstructionSize); + __ dci(0x64bfa635); // fmlslt z21.s, z17.h, z31.h + // vl128 state = 0x48383595 + __ dci(0x64bf867d); // fmlalt z29.s, z19.h, z31.h + // vl128 state = 0xf2812c0e + __ dci(0x64af877c); // fmlalt z28.s, z27.h, z15.h + // vl128 state = 0x161daf06 + __ dci(0x64af8774); // fmlalt z20.s, z27.h, z15.h + // vl128 state = 0x8146f2bf + __ dci(0x64be877c); // fmlalt z28.s, z27.h, z30.h + // vl128 state = 0x90bcd864 + __ dci(0x64bd876c); // fmlalt z12.s, z27.h, z29.h + // vl128 state = 0x22b60b78 + __ dci(0x64bf8728); // fmlalt z8.s, z25.h, z31.h + // vl128 state = 0x2c9ce51a + __ dci(0x64bf836a); // fmlalb z10.s, z27.h, z31.h + // vl128 state = 0x40e6b398 + __ dci(0x64bf87eb); // fmlalt z11.s, z31.h, z31.h + // vl128 state = 0x479c4a98 + __ dci(0x64bf87e9); // fmlalt z9.s, z31.h, z31.h + // vl128 state = 0x25c987ad + __ dci(0x64b78779); // fmlalt z25.s, z27.h, z23.h + // vl128 state = 0xb4fbc429 + __ dci(0x64b1877b); // fmlalt z27.s, z27.h, z17.h + // vl128 state = 0x390616d8 + __ dci(0x64b1871f); // fmlalt z31.s, z24.h, z17.h + // vl128 state = 0x7f24d2bf + __ dci(0x64b5878f); // fmlalt z15.s, z28.h, z21.h + // vl128 state = 0x01a90318 + __ dci(0x64b4870d); // fmlalt z13.s, z24.h, z20.h + // vl128 state = 0x08789c2c + __ dci(0x64b48709); // fmlalt z9.s, z24.h, z20.h + // vl128 state = 0x169f9b57 + __ dci(0x64b48779); // fmlalt z25.s, z27.h, z20.h + // vl128 state = 0xad4f23d7 + __ dci(0x64bc8671); // fmlalt z17.s, z19.h, z28.h + // vl128 state = 0xf86b0a64 + __ dci(0x64b98673); // fmlalt z19.s, z19.h, z25.h + // vl128 state = 0x78a848b2 + __ dci(0x64b18623); // fmlalt z3.s, z17.h, z17.h + // vl128 state = 0xcac211c9 + __ dci(0x64b18642); // fmlalt z2.s, z18.h, z17.h + // vl128 state = 0x9afcbe3f + __ dci(0x64b1a6c0); // fmlslt z0.s, z22.h, z17.h + // vl128 state = 0x0047e4b2 + __ dci(0x64b086c4); // fmlalt z4.s, z22.h, z16.h + // vl128 state = 0x203324b5 + __ dci(0x64b28645); // fmlalt z5.s, z18.h, z18.h + // vl128 state = 0x7340c432 + __ dci(0x64b28264); // fmlalb z4.s, z19.h, z18.h + // vl128 state = 0x6dc657a9 + __ dci(0x64b28765); // fmlalt z5.s, z27.h, z18.h + // vl128 state = 0xa5d3889b + __ dci(0x64ba8561); // fmlalt z1.s, z11.h, z26.h + // vl128 state = 0x5bbd2dd9 + __ dci(0x64aa8543); // fmlalt z3.s, z10.h, z10.h + // vl128 state = 0xa65ec305 + __ dci(0x64ae8141); // fmlalb z1.s, z10.h, z14.h + // vl128 state = 0xd23d588c + __ dci(0x64ae80c3); // fmlalb z3.s, z6.h, z14.h + // vl128 state = 0x5a082bbc + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x5a082bbc, + 0x23c41852, + 0xf462f328, + 0x6fa4d12b, + 0x5e5f3e79, + 0x9939c7e6, + 0x0ed39313, + 0x2911107c, + 0x18f77b9a, + 0x7226d5b3, + 0x05df3c07, + 0x1653749c, + 0xcb4f6acf, + 0x4c5f0755, + 0xc4eed654, + 0x47893eeb, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_mla_long_index) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x44ea8d67); // smlalt z7.d, z11.s, z10.s[1] + // vl128 state = 0xd08dbe24 + __ dci(0x44ea9d2f); // umlalt z15.d, z9.s, z10.s[1] + // vl128 state = 0x56f6f237 + __ dci(0x44ea9d2d); // umlalt z13.d, z9.s, z10.s[1] + // vl128 state = 0x00f89e4d + __ dci(0x44eb992f); // umlalb z15.d, z9.s, z11.s[1] + // vl128 state = 0xca4e469e + __ dci(0x44ab99ae); // umlalb z14.s, z13.h, z3.h[3] + // vl128 state = 0xd4b18276 + __ dci(0x44ad99be); // umlalb z30.s, z13.h, z5.h[3] + // vl128 state = 0x8650a79e + __ dci(0x44ad99ba); // umlalb z26.s, z13.h, z5.h[3] + // vl128 state = 0x6fa1a501 + __ dci(0x44adb9f2); // umlslb z18.s, z15.h, z5.h[3] + // vl128 state = 0x1a56a5d4 + __ dci(0x44bda9f3); // smlslb z19.s, z15.h, z5.h[7] + // vl128 state = 0xfdb18057 + __ dci(0x44b9a1fb); // smlslb z27.s, z15.h, z1.h[6] + // vl128 state = 0xb46b6c28 + __ dci(0x44b8a1b3); // smlslb z19.s, z13.h, z0.h[6] + // vl128 state = 0x623c62c3 + __ dci(0x44bc81b1); // smlalb z17.s, z13.h, z4.h[6] + // vl128 state = 0x2abab4d3 + __ dci(0x44bc82b0); // smlalb z16.s, z21.h, z4.h[6] + // vl128 state = 0x7a028731 + __ dci(0x44ac92b8); // umlalb z24.s, z21.h, z4.h[2] + // vl128 state = 0xf48f6936 + __ dci(0x44a4923a); // umlalb z26.s, z17.h, z4.h[0] + // vl128 state = 0xbcdf888d + __ dci(0x44b49a3e); // umlalb z30.s, z17.h, z4.h[5] + // vl128 state = 0x5060778e + __ dci(0x44b69a1c); // umlalb z28.s, z16.h, z6.h[5] + // vl128 state = 0x16da3835 + __ dci(0x44b6b218); // umlslb z24.s, z16.h, z6.h[4] + // vl128 state = 0xac7fb4d0 + __ dci(0x44b2b25a); // umlslb z26.s, z18.h, z2.h[4] + // vl128 state = 0x8d05433b + __ dci(0x44b2ba0a); // umlslb z10.s, z16.h, z2.h[5] + // vl128 state = 0x62630101 + __ dci(0x44b29b08); // umlalb z8.s, z24.h, z2.h[5] + // vl128 state = 0x31ae445b + __ dci(0x44b29b00); // umlalb z0.s, z24.h, z2.h[5] + // vl128 state = 0x539a5875 + __ dci(0x44b29e08); // umlalt z8.s, z16.h, z2.h[5] + // vl128 state = 0x07d4bf73 + __ dci(0x44b29eaa); // umlalt z10.s, z21.h, z2.h[5] + // vl128 state = 0x314f48a8 + __ dci(0x44b2be2e); // umlslt z14.s, z17.h, z2.h[5] + // vl128 state = 0x91bd2c17 + __ dci(0x44b2be3e); // umlslt z30.s, z17.h, z2.h[5] + // vl128 state = 0x4cbf4360 + __ dci(0x44f2be7a); // umlslt z26.d, z19.s, z2.s[3] + // vl128 state = 0xe94e76a9 + __ dci(0x44f2ae4a); // smlslt z10.d, z18.s, z2.s[3] + // vl128 state = 0xd0c2c4cc + __ dci(0x44faae6e); // smlslt z14.d, z19.s, z10.s[3] + // vl128 state = 0xc64d6839 + __ dci(0x44faae6f); // smlslt z15.d, z19.s, z10.s[3] + // vl128 state = 0xa74358aa + __ dci(0x44faae67); // smlslt z7.d, z19.s, z10.s[3] + // vl128 state = 0xb8d9664b + __ dci(0x44fa8e57); // smlalt z23.d, z18.s, z10.s[3] + // vl128 state = 0xf1032ab4 + __ dci(0x44fa8c67); // smlalt z7.d, z3.s, z10.s[3] + // vl128 state = 0x763732f4 + __ dci(0x44eaac66); // smlslt z6.d, z3.s, z10.s[1] + // vl128 state = 0xdcf39367 + __ dci(0x44eaa456); // smlslt z22.d, z2.s, z10.s[0] + // vl128 state = 0x5ea67d82 + __ dci(0x44aea45e); // smlslt z30.s, z2.h, z6.h[2] + // vl128 state = 0x55da0908 + __ dci(0x44aaa64e); // smlslt z14.s, z18.h, z2.h[2] + // vl128 state = 0x69d105f5 + __ dci(0x44baa75e); // smlslt z30.s, z26.h, z2.h[6] + // vl128 state = 0x191bc065 + __ dci(0x44baa75a); // smlslt z26.s, z26.h, z2.h[6] + // vl128 state = 0xbf62d2a0 + __ dci(0x44eaa75b); // smlslt z27.d, z26.s, z10.s[0] + // vl128 state = 0x43803a21 + __ dci(0x44eabf5f); // umlslt z31.d, z26.s, z10.s[1] + // vl128 state = 0x0b33725c + __ dci(0x44ebbd57); // umlslt z23.d, z10.s, z11.s[1] + // vl128 state = 0x0059a0f5 + __ dci(0x44abbf55); // umlslt z21.s, z26.h, z3.h[3] + // vl128 state = 0xb587057f + __ dci(0x44abab5d); // smlslb z29.s, z26.h, z3.h[3] + // vl128 state = 0x0bfa30c6 + __ dci(0x44abab5c); // smlslb z28.s, z26.h, z3.h[3] + // vl128 state = 0x151045b4 + __ dci(0x44abaf78); // smlslt z24.s, z27.h, z3.h[3] + // vl128 state = 0xedb7fca9 + __ dci(0x44aaa77c); // smlslt z28.s, z27.h, z2.h[2] + // vl128 state = 0xb68216f9 + __ dci(0x44aaa178); // smlslb z24.s, z11.h, z2.h[2] + // vl128 state = 0x35447b11 + __ dci(0x44aa81fa); // smlalb z26.s, z15.h, z2.h[2] + // vl128 state = 0xf532285f + __ dci(0x44aa8198); // smlalb z24.s, z12.h, z2.h[2] + // vl128 state = 0xd414889b + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xd414889b, + 0x79d8f659, + 0xe2c8f06b, + 0x91aadf3d, + 0xffb92c3e, + 0xc2d3138e, + 0xdd9f4396, + 0xce39a88e, + 0xfe68a5ca, + 0xdcb072b2, + 0x3756ede6, + 0x5c2eef22, + 0x01fd02a4, + 0xdd8d4890, + 0x87500dc9, + 0x8c895325, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_mul_long_index) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x44f1d492); // umullt z18.d, z4.s, z1.s[2] + // vl128 state = 0x4377a821 + __ dci(0x44fdd490); // umullt z16.d, z4.s, z13.s[2] + // vl128 state = 0x5879cb00 + __ dci(0x44fdc080); // smullb z0.d, z4.s, z13.s[2] + // vl128 state = 0xbe0f85f8 + __ dci(0x44fdc081); // smullb z1.d, z4.s, z13.s[2] + // vl128 state = 0xa0eb0d63 + __ dci(0x44fcc000); // smullb z0.d, z0.s, z12.s[2] + // vl128 state = 0xf023feb2 + __ dci(0x44ffc001); // smullb z1.d, z0.s, z15.s[2] + // vl128 state = 0xcc0dcc10 + __ dci(0x44ffc0c9); // smullb z9.d, z6.s, z15.s[2] + // vl128 state = 0x8e0d2525 + __ dci(0x44f7d0c8); // umullb z8.d, z6.s, z7.s[2] + // vl128 state = 0xaf711253 + __ dci(0x44b7d080); // umullb z0.s, z4.h, z7.h[4] + // vl128 state = 0x8cea3501 + __ dci(0x44f7d290); // umullb z16.d, z20.s, z7.s[2] + // vl128 state = 0x09be9a84 + __ dci(0x44f6da92); // umullb z18.d, z20.s, z6.s[3] + // vl128 state = 0x3906715f + __ dci(0x44fed296); // umullb z22.d, z20.s, z14.s[2] + // vl128 state = 0xf399bb76 + __ dci(0x44f6c292); // smullb z18.d, z20.s, z6.s[2] + // vl128 state = 0x33ceff98 + __ dci(0x44e6c2a2); // smullb z2.d, z21.s, z6.s[0] + // vl128 state = 0x00765739 + __ dci(0x44e6c323); // smullb z3.d, z25.s, z6.s[0] + // vl128 state = 0x3dad5b1f + __ dci(0x44e6c333); // smullb z19.d, z25.s, z6.s[0] + // vl128 state = 0xc5b39601 + __ dci(0x44e7c377); // smullb z23.d, z27.s, z7.s[0] + // vl128 state = 0x134b3d1f + __ dci(0x44e7d3ff); // umullb z31.d, z31.s, z7.s[0] + // vl128 state = 0xc4be3961 + __ dci(0x44e7d3fe); // umullb z30.d, z31.s, z7.s[0] + // vl128 state = 0x195e406b + __ dci(0x44e7c3da); // smullb z26.d, z30.s, z7.s[0] + // vl128 state = 0xae2522f9 + __ dci(0x44e7c2fe); // smullb z30.d, z23.s, z7.s[0] + // vl128 state = 0xed267bfb + __ dci(0x44e3c3f6); // smullb z22.d, z31.s, z3.s[0] + // vl128 state = 0x6f6eeec4 + __ dci(0x44f3c2f2); // smullb z18.d, z23.s, z3.s[2] + // vl128 state = 0x1689afdf + __ dci(0x44f3c2e2); // smullb z2.d, z23.s, z3.s[2] + // vl128 state = 0x24999374 + __ dci(0x44f3c06a); // smullb z10.d, z3.s, z3.s[2] + // vl128 state = 0x046126eb + __ dci(0x44f3c06b); // smullb z11.d, z3.s, z3.s[2] + // vl128 state = 0x6b39941f + __ dci(0x44f3c449); // smullt z9.d, z2.s, z3.s[2] + // vl128 state = 0xf161bcc6 + __ dci(0x44f3ccc8); // smullt z8.d, z6.s, z3.s[3] + // vl128 state = 0xbdc67c89 + __ dci(0x44f9ccd8); // smullt z24.d, z6.s, z9.s[3] + // vl128 state = 0xfed59871 + __ dci(0x44ffccdc); // smullt z28.d, z6.s, z15.s[3] + // vl128 state = 0x72746ff6 + __ dci(0x44fecc58); // smullt z24.d, z2.s, z14.s[3] + // vl128 state = 0xa15ee8f2 + __ dci(0x44bfcc48); // smullt z8.s, z2.h, z7.h[7] + // vl128 state = 0x3dccd2d6 + __ dci(0x44b7c84a); // smullb z10.s, z2.h, z7.h[5] + // vl128 state = 0x4537f0b2 + __ dci(0x44a5c84e); // smullb z14.s, z2.h, z5.h[1] + // vl128 state = 0x60e30690 + __ dci(0x44adca46); // smullb z6.s, z18.h, z5.h[3] + // vl128 state = 0xaef15cb5 + __ dci(0x44add847); // umullb z7.s, z2.h, z5.h[3] + // vl128 state = 0xe7df553d + __ dci(0x44bdd04f); // umullb z15.s, z2.h, z5.h[6] + // vl128 state = 0xa713f809 + __ dci(0x44bdc007); // smullb z7.s, z0.h, z5.h[6] + // vl128 state = 0x4907c6b7 + __ dci(0x44bdc005); // smullb z5.s, z0.h, z5.h[6] + // vl128 state = 0x98a83fd0 + __ dci(0x44bdc0b5); // smullb z21.s, z5.h, z5.h[6] + // vl128 state = 0x3e6cb588 + __ dci(0x44bcc094); // smullb z20.s, z4.h, z4.h[6] + // vl128 state = 0x37e5a4ce + __ dci(0x44bcc09c); // smullb z28.s, z4.h, z4.h[6] + // vl128 state = 0x719de631 + __ dci(0x44acc88c); // smullb z12.s, z4.h, z4.h[3] + // vl128 state = 0xf0f3dffe + __ dci(0x44aac884); // smullb z4.s, z4.h, z2.h[3] + // vl128 state = 0x61a714ff + __ dci(0x44a8c8ac); // smullb z12.s, z5.h, z0.h[3] + // vl128 state = 0xc47542ea + __ dci(0x44a8cea4); // smullt z4.s, z21.h, z0.h[3] + // vl128 state = 0x37865031 + __ dci(0x44a8daa5); // umullb z5.s, z21.h, z0.h[3] + // vl128 state = 0x28cf4dc6 + __ dci(0x44b8dae4); // umullb z4.s, z23.h, z0.h[7] + // vl128 state = 0x6fe181d0 + __ dci(0x44b9da6c); // umullb z12.s, z19.h, z1.h[7] + // vl128 state = 0xde65c7e3 + __ dci(0x44b9da64); // umullb z4.s, z19.h, z1.h[7] + // vl128 state = 0x040a7e45 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x040a7e45, + 0x48fc4c2b, + 0x9a1c67d1, + 0xcb88ffdd, + 0xcda205bc, + 0x7a47b6fb, + 0x68ae16c8, + 0x483353c9, + 0x91d91835, + 0x17a9ca4a, + 0x4f3d394f, + 0x5182776c, + 0xc03c1d3b, + 0xe52799db, + 0x1ddd328e, + 0xe33903de, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_sat_double_mul_high) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x046c711a); // sqdmulh z26.h, z8.h, z12.h + // vl128 state = 0xe962209c + __ dci(0x047c7138); // sqdmulh z24.h, z9.h, z28.h + // vl128 state = 0x06a43320 + __ dci(0x04fc7539); // sqrdmulh z25.d, z9.d, z28.d + // vl128 state = 0x8ce1cad6 + __ dci(0x04fc7029); // sqdmulh z9.d, z1.d, z28.d + // vl128 state = 0x6f3d1b22 + __ dci(0x04ac702d); // sqdmulh z13.s, z1.s, z12.s + // vl128 state = 0x14b0451c + __ dci(0x04a4742c); // sqrdmulh z12.s, z1.s, z4.s + // vl128 state = 0x60206a6a + __ dci(0x04a574ad); // sqrdmulh z13.s, z5.s, z5.s + // vl128 state = 0x388a9786 + __ dci(0x04a574a9); // sqrdmulh z9.s, z5.s, z5.s + // vl128 state = 0xee590c43 + __ dci(0x04e574e8); // sqrdmulh z8.d, z7.d, z5.d + // vl128 state = 0x8d16295c + __ dci(0x04e570ca); // sqdmulh z10.d, z6.d, z5.d + // vl128 state = 0x2a5c234c + __ dci(0x04e670cb); // sqdmulh z11.d, z6.d, z6.d + // vl128 state = 0xfacc9e06 + __ dci(0x04f6708f); // sqdmulh z15.d, z4.d, z22.d + // vl128 state = 0x2167ca56 + __ dci(0x04f67087); // sqdmulh z7.d, z4.d, z22.d + // vl128 state = 0xc7d7af1d + __ dci(0x04f77185); // sqdmulh z5.d, z12.d, z23.d + // vl128 state = 0x15f82ac2 + __ dci(0x04f67104); // sqdmulh z4.d, z8.d, z22.d + // vl128 state = 0xb2484707 + __ dci(0x04f6710c); // sqdmulh z12.d, z8.d, z22.d + // vl128 state = 0x5a53b8e7 + __ dci(0x04f6708d); // sqdmulh z13.d, z4.d, z22.d + // vl128 state = 0xa9affac2 + __ dci(0x04f67085); // sqdmulh z5.d, z4.d, z22.d + // vl128 state = 0xa425052d + __ dci(0x04fe7281); // sqdmulh z1.d, z20.d, z30.d + // vl128 state = 0x1c0f565c + __ dci(0x04ee72d1); // sqdmulh z17.d, z22.d, z14.d + // vl128 state = 0xff12c401 + __ dci(0x04ee7393); // sqdmulh z19.d, z28.d, z14.d + // vl128 state = 0xcd1d9d3a + __ dci(0x04ec73b2); // sqdmulh z18.d, z29.d, z12.d + // vl128 state = 0x2aa94767 + __ dci(0x04ee73fa); // sqdmulh z26.d, z31.d, z14.d + // vl128 state = 0x5ca68e9c + __ dci(0x04ef77ea); // sqrdmulh z10.d, z31.d, z15.d + // vl128 state = 0xe5b65473 + __ dci(0x04ff76e8); // sqrdmulh z8.d, z23.d, z31.d + // vl128 state = 0xcc4e8803 + __ dci(0x04fd76c9); // sqrdmulh z9.d, z22.d, z29.d + // vl128 state = 0x19fff884 + __ dci(0x04fd73d9); // sqdmulh z25.d, z30.d, z29.d + // vl128 state = 0xb99d6147 + __ dci(0x04e973dd); // sqdmulh z29.d, z30.d, z9.d + // vl128 state = 0xe8f11301 + __ dci(0x04b973dc); // sqdmulh z28.s, z30.s, z25.s + // vl128 state = 0x24af5ffe + __ dci(0x04b177dd); // sqrdmulh z29.s, z30.s, z17.s + // vl128 state = 0x5c32a08e + __ dci(0x04b177bc); // sqrdmulh z28.s, z29.s, z17.s + // vl128 state = 0x12c8c1c4 + __ dci(0x04f377ac); // sqrdmulh z12.d, z29.d, z19.d + // vl128 state = 0x7bc1f2e6 + __ dci(0x04f677ad); // sqrdmulh z13.d, z29.d, z22.d + // vl128 state = 0x67d2640f + __ dci(0x04fe76af); // sqrdmulh z15.d, z21.d, z30.d + // vl128 state = 0x98035fbd + __ dci(0x04ef76ae); // sqrdmulh z14.d, z21.d, z15.d + // vl128 state = 0x5e561fd3 + __ dci(0x04ee72ac); // sqdmulh z12.d, z21.d, z14.d + // vl128 state = 0xb56c3914 + __ dci(0x04ae72ee); // sqdmulh z14.s, z23.s, z14.s + // vl128 state = 0x6bb1c4b1 + __ dci(0x04be7266); // sqdmulh z6.s, z19.s, z30.s + // vl128 state = 0x5a5bdda6 + __ dci(0x04b67364); // sqdmulh z4.s, z27.s, z22.s + // vl128 state = 0x09a447ea + __ dci(0x04b27165); // sqdmulh z5.s, z11.s, z18.s + // vl128 state = 0xee84be35 + __ dci(0x04b27175); // sqdmulh z21.s, z11.s, z18.s + // vl128 state = 0x84146d85 + __ dci(0x04ba7137); // sqdmulh z23.s, z9.s, z26.s + // vl128 state = 0x92c2e5f6 + __ dci(0x04b3713f); // sqdmulh z31.s, z9.s, z19.s + // vl128 state = 0xe3836fb8 + __ dci(0x04b37017); // sqdmulh z23.s, z0.s, z19.s + // vl128 state = 0xb5225206 + __ dci(0x04b37615); // sqrdmulh z21.s, z16.s, z19.s + // vl128 state = 0x157484c7 + __ dci(0x04b37491); // sqrdmulh z17.s, z4.s, z19.s + // vl128 state = 0x586c4bbf + __ dci(0x04b37481); // sqrdmulh z1.s, z4.s, z19.s + // vl128 state = 0xf5dc07cb + __ dci(0x04b37489); // sqrdmulh z9.s, z4.s, z19.s + // vl128 state = 0x591875a8 + __ dci(0x04b5748d); // sqrdmulh z13.s, z4.s, z21.s + // vl128 state = 0xb01f8fd5 + __ dci(0x043d748f); // sqrdmulh z15.b, z4.b, z29.b + // vl128 state = 0xd466a58c + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xd466a58c, + 0xe2ec7fba, + 0x1644e93a, + 0x7c3ecb2e, + 0xed4ecd78, + 0xfd5b5783, + 0xa7094efe, + 0x92bd623f, + 0x6da5e423, + 0x1648b588, + 0x63ce5947, + 0xba9c7d90, + 0x756ae20d, + 0x6d4032ba, + 0x87ae8b8f, + 0x722b2f6f, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_cmla_index) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x44e867e6); // cmla z6.s, z31.s, z8.s[0], #90 + // vl128 state = 0xee56e69b + __ dci(0x44e86de4); // cmla z4.s, z15.s, z8.s[0], #270 + // vl128 state = 0x0ed2e9f5 + __ dci(0x44e86be5); // cmla z5.s, z31.s, z8.s[0], #180 + // vl128 state = 0x9074e2a6 + __ dci(0x44eb6bf5); // cmla z21.s, z31.s, z11.s[0], #180 + // vl128 state = 0x8f43b8a8 + __ dci(0x44eb6b31); // cmla z17.s, z25.s, z11.s[0], #180 + // vl128 state = 0xb6c51b97 + __ dci(0x44eb6135); // cmla z21.s, z9.s, z11.s[0], #0 + // vl128 state = 0x4236beed + __ dci(0x44e9633d); // cmla z29.s, z25.s, z9.s[0], #0 + // vl128 state = 0x21879fe6 + __ dci(0x44f96379); // cmla z25.s, z27.s, z9.s[1], #0 + // vl128 state = 0x78172805 + __ dci(0x44fd6349); // cmla z9.s, z26.s, z13.s[1], #0 + // vl128 state = 0x242a3ae5 + __ dci(0x44f76341); // cmla z1.s, z26.s, z7.s[1], #0 + // vl128 state = 0xa734ef3b + __ dci(0x44f36305); // cmla z5.s, z24.s, z3.s[1], #0 + // vl128 state = 0x00a035b1 + __ dci(0x44f76381); // cmla z1.s, z28.s, z7.s[1], #0 + // vl128 state = 0xbdfda3d4 + __ dci(0x44f763e3); // cmla z3.s, z31.s, z7.s[1], #0 + // vl128 state = 0xe1ed6ed9 + __ dci(0x44b763cb); // cmla z11.h, z30.h, z7.h[2], #0 + // vl128 state = 0xae645ea8 + __ dci(0x44a763e9); // cmla z9.h, z31.h, z7.h[0], #0 + // vl128 state = 0x392b3511 + __ dci(0x44a762ab); // cmla z11.h, z21.h, z7.h[0], #0 + // vl128 state = 0x3a05f729 + __ dci(0x44a66aaf); // cmla z15.h, z21.h, z6.h[0], #180 + // vl128 state = 0x7cfa0c08 + __ dci(0x44a66aa7); // cmla z7.h, z21.h, z6.h[0], #180 + // vl128 state = 0x91749f43 + __ dci(0x44a663a5); // cmla z5.h, z29.h, z6.h[0], #0 + // vl128 state = 0x438479ab + __ dci(0x44a66bed); // cmla z13.h, z31.h, z6.h[0], #180 + // vl128 state = 0xc25ce86d + __ dci(0x44f66be9); // cmla z9.s, z31.s, z6.s[1], #180 + // vl128 state = 0x6e8bdeca + __ dci(0x44b66bd9); // cmla z25.h, z30.h, z6.h[2], #180 + // vl128 state = 0x04745a63 + __ dci(0x44b66bd8); // cmla z24.h, z30.h, z6.h[2], #180 + // vl128 state = 0xbfc59a82 + __ dci(0x44b66b7c); // cmla z28.h, z27.h, z6.h[2], #180 + // vl128 state = 0x12d70fc2 + __ dci(0x44b6617e); // cmla z30.h, z11.h, z6.h[2], #0 + // vl128 state = 0x53f4b9a1 + __ dci(0x44b7697c); // cmla z28.h, z11.h, z7.h[2], #180 + // vl128 state = 0x74e99c24 + __ dci(0x44b3692c); // cmla z12.h, z9.h, z3.h[2], #180 + // vl128 state = 0xdc80a875 + __ dci(0x44a1692e); // cmla z14.h, z9.h, z1.h[0], #180 + // vl128 state = 0x307af313 + __ dci(0x44b169af); // cmla z15.h, z13.h, z1.h[2], #180 + // vl128 state = 0xc92b23fe + __ dci(0x44b165a7); // cmla z7.h, z13.h, z1.h[2], #90 + // vl128 state = 0x33a52d1c + __ dci(0x44b165a5); // cmla z5.h, z13.h, z1.h[2], #90 + // vl128 state = 0xbc53ebfc + __ dci(0x44f161a1); // cmla z1.s, z13.s, z1.s[1], #0 + // vl128 state = 0x7ba34076 + __ dci(0x44f261a0); // cmla z0.s, z13.s, z2.s[1], #0 + // vl128 state = 0x6fa2bab8 + __ dci(0x44b361b0); // cmla z16.h, z13.h, z3.h[2], #0 + // vl128 state = 0xaae67807 + __ dci(0x44b36092); // cmla z18.h, z4.h, z3.h[2], #0 + // vl128 state = 0xf1b05dff + __ dci(0x44b36202); // cmla z2.h, z16.h, z3.h[2], #0 + // vl128 state = 0xd226bf15 + __ dci(0x44b36a20); // cmla z0.h, z17.h, z3.h[2], #180 + // vl128 state = 0x6a8ade58 + __ dci(0x44b26a10); // cmla z16.h, z16.h, z2.h[2], #180 + // vl128 state = 0x075e00e4 + __ dci(0x44b26a18); // cmla z24.h, z16.h, z2.h[2], #180 + // vl128 state = 0x9bcef7bd + __ dci(0x44b06a28); // cmla z8.h, z17.h, z0.h[2], #180 + // vl128 state = 0x8ac6d4b3 + __ dci(0x44b06a2a); // cmla z10.h, z17.h, z0.h[2], #180 + // vl128 state = 0x51993d51 + __ dci(0x44b0620b); // cmla z11.h, z16.h, z0.h[2], #0 + // vl128 state = 0x6d134734 + __ dci(0x44b06209); // cmla z9.h, z16.h, z0.h[2], #0 + // vl128 state = 0x0ee4031f + __ dci(0x44f06a0d); // cmla z13.s, z16.s, z0.s[1], #180 + // vl128 state = 0x08ea247b + __ dci(0x44f06b2c); // cmla z12.s, z25.s, z0.s[1], #180 + // vl128 state = 0x6acbb19a + __ dci(0x44f1692d); // cmla z13.s, z9.s, z1.s[1], #180 + // vl128 state = 0x3ea2d161 + __ dci(0x44b36925); // cmla z5.h, z9.h, z3.h[2], #180 + // vl128 state = 0x5b962e9b + __ dci(0x44b36921); // cmla z1.h, z9.h, z3.h[2], #180 + // vl128 state = 0x029f0eca + __ dci(0x44b36d69); // cmla z9.h, z11.h, z3.h[2], #270 + // vl128 state = 0x39a63c65 + __ dci(0x44bb6d28); // cmla z8.h, z9.h, z3.h[3], #270 + // vl128 state = 0x6d58c136 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x6d58c136, + 0xfbdbae97, + 0x85c3cf1a, + 0xe4b53177, + 0x2f714586, + 0xde1afee8, + 0xd9613d2e, + 0x842c85a6, + 0xdc285523, + 0xccba7ba9, + 0x79e1e6f7, + 0xb19427f4, + 0x20d08a3a, + 0xfb7f4c43, + 0x0721ed60, + 0x4ee795ab, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_flogb) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x651cb31e); // flogb z30.s, p4/m, z24.s + // vl128 state = 0x161f1855 + __ dci(0x651cb3ae); // flogb z14.s, p4/m, z29.s + // vl128 state = 0xf9e5ce4d + __ dci(0x651cb3be); // flogb z30.s, p4/m, z29.s + // vl128 state = 0xa06176bc + __ dci(0x651ea3bc); // flogb z28.d, p0/m, z29.d + // vl128 state = 0xf793f7bb + __ dci(0x651ea3cc); // flogb z12.d, p0/m, z30.d + // vl128 state = 0xe5d71081 + __ dci(0x651ea3dc); // flogb z28.d, p0/m, z30.d + // vl128 state = 0x33ffc09f + __ dci(0x651ea3d4); // flogb z20.d, p0/m, z30.d + // vl128 state = 0xd908a72e + __ dci(0x651ea3d5); // flogb z21.d, p0/m, z30.d + // vl128 state = 0x9528251a + __ dci(0x651ca394); // flogb z20.s, p0/m, z28.s + // vl128 state = 0xb1ac4188 + __ dci(0x651ca396); // flogb z22.s, p0/m, z28.s + // vl128 state = 0xdc328726 + __ dci(0x651ca1d7); // flogb z23.s, p0/m, z14.s + // vl128 state = 0xfc232eb7 + __ dci(0x651ca947); // flogb z7.s, p2/m, z10.s + // vl128 state = 0xa9c53a1a + __ dci(0x651ca805); // flogb z5.s, p2/m, z0.s + // vl128 state = 0x9e4a47e9 + __ dci(0x651ea841); // flogb z1.d, p2/m, z2.d + // vl128 state = 0x7a2aeaf6 + __ dci(0x651ea843); // flogb z3.d, p2/m, z2.d + // vl128 state = 0xedd4aa97 + __ dci(0x651caa4b); // flogb z11.s, p2/m, z18.s + // vl128 state = 0x7bfefefb + __ dci(0x651cab6f); // flogb z15.s, p2/m, z27.s + // vl128 state = 0x91b5a183 + __ dci(0x651ca86b); // flogb z11.s, p2/m, z3.s + // vl128 state = 0x7b2776c2 + __ dci(0x651ca47b); // flogb z27.s, p1/m, z3.s + // vl128 state = 0x46ea46c7 + __ dci(0x651ca47f); // flogb z31.s, p1/m, z3.s + // vl128 state = 0x6e1d4e89 + __ dci(0x651ca477); // flogb z23.s, p1/m, z3.s + // vl128 state = 0x5ea1220c + __ dci(0x651ca035); // flogb z21.s, p0/m, z1.s + // vl128 state = 0xb06e32be + __ dci(0x651ca2a5); // flogb z5.s, p0/m, z21.s + // vl128 state = 0xb856d206 + __ dci(0x651caa2d); // flogb z13.s, p2/m, z17.s + // vl128 state = 0xebfd587f + __ dci(0x651caa3d); // flogb z29.s, p2/m, z17.s + // vl128 state = 0xb029ba8d + __ dci(0x651eaa7f); // flogb z31.d, p2/m, z19.d + // vl128 state = 0x07fd3f42 + __ dci(0x651ebb7e); // flogb z30.d, p6/m, z27.d + // vl128 state = 0x79761d7a + __ dci(0x651ebb76); // flogb z22.d, p6/m, z27.d + // vl128 state = 0xdf56dd22 + __ dci(0x651ebb72); // flogb z18.d, p6/m, z27.d + // vl128 state = 0xce798ad7 + __ dci(0x651eb276); // flogb z22.d, p4/m, z19.d + // vl128 state = 0x84dd46d6 + __ dci(0x651eb652); // flogb z18.d, p5/m, z18.d + // vl128 state = 0x2ea4a0df + __ dci(0x651cbe42); // flogb z2.s, p7/m, z18.s + // vl128 state = 0x8cdd1250 + __ dci(0x651cb852); // flogb z18.s, p6/m, z2.s + // vl128 state = 0x5f5b051d + __ dci(0x651eb956); // flogb z22.d, p6/m, z10.d + // vl128 state = 0x7a17cdd1 + __ dci(0x651eb11e); // flogb z30.d, p4/m, z8.d + // vl128 state = 0x7367f8ec + __ dci(0x651ab016); // flogb z22.h, p4/m, z0.h + // vl128 state = 0x8e1bfb06 + __ dci(0x651ab014); // flogb z20.h, p4/m, z0.h + // vl128 state = 0x2bcfa0f0 + __ dci(0x651aa81c); // flogb z28.h, p2/m, z0.h + // vl128 state = 0xeb9615e8 + __ dci(0x651aa80c); // flogb z12.h, p2/m, z0.h + // vl128 state = 0x5b55f5cd + __ dci(0x651aa808); // flogb z8.h, p2/m, z0.h + // vl128 state = 0xdd1718f2 + __ dci(0x651aa20a); // flogb z10.h, p0/m, z16.h + // vl128 state = 0x205e88ed + __ dci(0x651ab24e); // flogb z14.h, p4/m, z18.h + // vl128 state = 0x1c9f2035 + __ dci(0x651ab36f); // flogb z15.h, p4/m, z27.h + // vl128 state = 0xea22efaf + __ dci(0x651ab36b); // flogb z11.h, p4/m, z27.h + // vl128 state = 0x0cd0b8cd + __ dci(0x651abb29); // flogb z9.h, p6/m, z25.h + // vl128 state = 0xa1a017d1 + __ dci(0x651abb2d); // flogb z13.h, p6/m, z25.h + // vl128 state = 0x37d033d2 + __ dci(0x651aba0c); // flogb z12.h, p6/m, z16.h + // vl128 state = 0x971bde83 + __ dci(0x651cba1c); // flogb z28.s, p6/m, z16.s + // vl128 state = 0xb6b23bc2 + __ dci(0x651cba1d); // flogb z29.s, p6/m, z16.s + // vl128 state = 0x1af298e0 + __ dci(0x651cba15); // flogb z21.s, p6/m, z16.s + // vl128 state = 0x077a2869 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x077a2869, + 0xde5bc452, + 0xe80f0bc6, + 0x1c078cf2, + 0x66064034, + 0xa9f5264d, + 0xb19b24c1, + 0xb394864c, + 0x42991ea7, + 0xcf33094e, + 0xc4656d85, + 0x4cfa5b7e, + 0xbb7c121f, + 0xd2e8c839, + 0x028134cf, + 0x2f3e9779, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_fp_pair) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm, kFpInputSet); + // state = 0x1e5cbcac + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x64d591aa); // fminnmp z10.d, p4/m, z10.d, z13.d + // vl128 state = 0x02a0f18c + __ dci(0x64d59dab); // fminnmp z11.d, p7/m, z11.d, z13.d + // vl128 state = 0xd6d0a87f + __ dci(0x64d59d7b); // fminnmp z27.d, p7/m, z27.d, z11.d + // vl128 state = 0x364f93b4 + __ dci(0x64d59c2b); // fminnmp z11.d, p7/m, z11.d, z1.d + // vl128 state = 0xc7ed7476 + __ dci(0x64d59f23); // fminnmp z3.d, p7/m, z3.d, z25.d + // vl128 state = 0x7a1ec868 + __ dci(0x64d59f22); // fminnmp z2.d, p7/m, z2.d, z25.d + // vl128 state = 0x862a3f3d + __ dci(0x64d49fa0); // fmaxnmp z0.d, p7/m, z0.d, z29.d + // vl128 state = 0x11f71743 + __ dci(0x64d49fa8); // fmaxnmp z8.d, p7/m, z8.d, z29.d + // vl128 state = 0x302e45cd + __ dci(0x64d49fa9); // fmaxnmp z9.d, p7/m, z9.d, z29.d + // vl128 state = 0x11cca180 + __ dci(0x64d68fb9); // fmaxp z25.d, p3/m, z25.d, z29.d + // vl128 state = 0xee6b2d42 + __ dci(0x64d68fb8); // fmaxp z24.d, p3/m, z24.d, z29.d + // vl128 state = 0x060efb2c + __ dci(0x64d49fba); // fmaxnmp z26.d, p7/m, z26.d, z29.d + // vl128 state = 0x4f4232ac + __ dci(0x649497b2); // fmaxnmp z18.s, p5/m, z18.s, z29.s + // vl128 state = 0xe3e04479 + __ dci(0x649096b6); // faddp z22.s, p5/m, z22.s, z21.s + // vl128 state = 0x2a407146 + __ dci(0x64909237); // faddp z23.s, p4/m, z23.s, z17.s + // vl128 state = 0x6d0b2bb8 + __ dci(0x64d09027); // faddp z7.d, p4/m, z7.d, z1.d + // vl128 state = 0x5e7d175f + __ dci(0x64509006); // faddp z6.h, p4/m, z6.h, z0.h + // vl128 state = 0xa0a4cd20 + __ dci(0x64d0940e); // faddp z14.d, p5/m, z14.d, z0.d + // vl128 state = 0xf66b9cde + __ dci(0x64d09c4f); // faddp z15.d, p7/m, z15.d, z2.d + // vl128 state = 0x5a2d08c9 + __ dci(0x64d09c5f); // faddp z31.d, p7/m, z31.d, z2.d + // vl128 state = 0x2e390409 + __ dci(0x64d09c57); // faddp z23.d, p7/m, z23.d, z2.d + // vl128 state = 0xfb4af476 + __ dci(0x64d09c56); // faddp z22.d, p7/m, z22.d, z2.d + // vl128 state = 0x8d8c621b + __ dci(0x64d08e5e); // faddp z30.d, p3/m, z30.d, z18.d + // vl128 state = 0xba8962e6 + __ dci(0x64d0845c); // faddp z28.d, p1/m, z28.d, z2.d + // vl128 state = 0x224654c6 + __ dci(0x64d0845d); // faddp z29.d, p1/m, z29.d, z2.d + // vl128 state = 0xef608134 + __ dci(0x64d08e4d); // faddp z13.d, p3/m, z13.d, z18.d + // vl128 state = 0x5adedbf3 + __ dci(0x64908645); // faddp z5.s, p1/m, z5.s, z18.s + // vl128 state = 0x04b4f366 + __ dci(0x64908a4d); // faddp z13.s, p2/m, z13.s, z18.s + // vl128 state = 0xf0a7482a + __ dci(0x64d08245); // faddp z5.d, p0/m, z5.d, z18.d + // vl128 state = 0x0f2ccd61 + __ dci(0x64909255); // faddp z21.s, p4/m, z21.s, z18.s + // vl128 state = 0x7665491f + __ dci(0x649096c5); // faddp z5.s, p5/m, z5.s, z22.s + // vl128 state = 0xc3b53fd3 + __ dci(0x649492c1); // fmaxnmp z1.s, p4/m, z1.s, z22.s + // vl128 state = 0x589fd64a + __ dci(0x649096d1); // faddp z17.s, p5/m, z17.s, z22.s + // vl128 state = 0x5a0d0d52 + __ dci(0x649096d5); // faddp z21.s, p5/m, z21.s, z22.s + // vl128 state = 0xba57cd51 + __ dci(0x649096d4); // faddp z20.s, p5/m, z20.s, z22.s + // vl128 state = 0xa5d7b29d + __ dci(0x649093d0); // faddp z16.s, p4/m, z16.s, z30.s + // vl128 state = 0xa62cce9e + __ dci(0x64909318); // faddp z24.s, p4/m, z24.s, z24.s + // vl128 state = 0x8cc209c7 + __ dci(0x64909008); // faddp z8.s, p4/m, z8.s, z0.s + // vl128 state = 0x56a9af04 + __ dci(0x64969000); // fmaxp z0.s, p4/m, z0.s, z0.s + // vl128 state = 0xc45f824a + __ dci(0x64569004); // fmaxp z4.h, p4/m, z4.h, z0.h + // vl128 state = 0x82da5cb7 + __ dci(0x64569000); // fmaxp z0.h, p4/m, z0.h, z0.h + // vl128 state = 0xa9fff0bf + __ dci(0x64569001); // fmaxp z1.h, p4/m, z1.h, z0.h + // vl128 state = 0x71c2e09a + __ dci(0x64569605); // fmaxp z5.h, p5/m, z5.h, z16.h + // vl128 state = 0xe50c8b49 + __ dci(0x64579624); // fminp z4.h, p5/m, z4.h, z17.h + // vl128 state = 0x4f3817cb + __ dci(0x6457962c); // fminp z12.h, p5/m, z12.h, z17.h + // vl128 state = 0x5a773e57 + __ dci(0x64d5963c); // fminnmp z28.d, p5/m, z28.d, z17.d + // vl128 state = 0xa5c5e37c + __ dci(0x64d7943e); // fminp z30.d, p5/m, z30.d, z1.d + // vl128 state = 0xc778f8a3 + __ dci(0x6457953a); // fminp z26.h, p5/m, z26.h, z9.h + // vl128 state = 0x01abc4af + __ dci(0x6457952a); // fminp z10.h, p5/m, z10.h, z9.h + // vl128 state = 0x45483a17 + __ dci(0x64579d7a); // fminp z26.h, p7/m, z26.h, z11.h + // vl128 state = 0x355b08b3 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x355b08b3, + 0x8f7890cd, + 0x5dddb069, + 0x030a5f52, + 0xc569c150, + 0x060423ba, + 0x5d729bd0, + 0x079b4f8b, + 0x06e75e58, + 0x6f631884, + 0xddc735f0, + 0x7213b8e2, + 0x8cbf507c, + 0x40654268, + 0x3cd7ad6c, + 0xfba0ee9e, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_fmlal_fmlsl_index) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x64a94f15); // fmlalt z21.s, z24.h, z1.h[3] + // vl128 state = 0x0895849b + __ dci(0x64ab4f9d); // fmlalt z29.s, z28.h, z3.h[3] + // vl128 state = 0x6e0cf3fe + __ dci(0x64a74f9c); // fmlalt z28.s, z28.h, z7.h[1] + // vl128 state = 0x482b4f57 + __ dci(0x64a74dde); // fmlalt z30.s, z14.h, z7.h[1] + // vl128 state = 0xf047791e + __ dci(0x64a74cee); // fmlalt z14.s, z7.h, z7.h[1] + // vl128 state = 0xde33332c + __ dci(0x64a648ef); // fmlalb z15.s, z7.h, z6.h[1] + // vl128 state = 0xf7148941 + __ dci(0x64a648ee); // fmlalb z14.s, z7.h, z6.h[1] + // vl128 state = 0x69f23fcb + __ dci(0x64b649ea); // fmlalb z10.s, z15.h, z6.h[5] + // vl128 state = 0x979eea1a + __ dci(0x64b649ee); // fmlalb z14.s, z15.h, z6.h[5] + // vl128 state = 0x522917a9 + __ dci(0x64b649e6); // fmlalb z6.s, z15.h, z6.h[5] + // vl128 state = 0x7d773525 + __ dci(0x64b64ba2); // fmlalb z2.s, z29.h, z6.h[5] + // vl128 state = 0x220960c6 + __ dci(0x64b46baa); // fmlslb z10.s, z29.h, z4.h[5] + // vl128 state = 0x2c8e384a + __ dci(0x64b46dab); // fmlslt z11.s, z13.h, z4.h[5] + // vl128 state = 0xa592cde1 + __ dci(0x64b467bb); // fmlslt z27.s, z29.h, z4.h[4] + // vl128 state = 0xba31bd61 + __ dci(0x64b665b3); // fmlslt z19.s, z13.h, z6.h[4] + // vl128 state = 0x75dade04 + __ dci(0x64b663bb); // fmlslb z27.s, z29.h, z6.h[4] + // vl128 state = 0xa7358466 + __ dci(0x64a662bf); // fmlslb z31.s, z21.h, z6.h[0] + // vl128 state = 0x6125ca9d + __ dci(0x64a7623e); // fmlslb z30.s, z17.h, z7.h[0] + // vl128 state = 0x4b1cda83 + __ dci(0x64a7462e); // fmlalt z14.s, z17.h, z7.h[0] + // vl128 state = 0x00d73a44 + __ dci(0x64a6662f); // fmlslt z15.s, z17.h, z6.h[0] + // vl128 state = 0xc5ea9f30 + __ dci(0x64a666ed); // fmlslt z13.s, z23.h, z6.h[0] + // vl128 state = 0xe17ba118 + __ dci(0x64a26eec); // fmlslt z12.s, z23.h, z2.h[1] + // vl128 state = 0xd1962c7a + __ dci(0x64a26cbc); // fmlslt z28.s, z5.h, z2.h[1] + // vl128 state = 0xde6f1ace + __ dci(0x64a26cb4); // fmlslt z20.s, z5.h, z2.h[1] + // vl128 state = 0x10d69920 + __ dci(0x64a26cbc); // fmlslt z28.s, z5.h, z2.h[1] + // vl128 state = 0x8d190aec + __ dci(0x64a26cd8); // fmlslt z24.s, z6.h, z2.h[1] + // vl128 state = 0x432fdda3 + __ dci(0x64a26c1a); // fmlslt z26.s, z0.h, z2.h[1] + // vl128 state = 0x9ababf0a + __ dci(0x64a24d1e); // fmlalt z30.s, z8.h, z2.h[1] + // vl128 state = 0x609040ae + __ dci(0x64a24d1c); // fmlalt z28.s, z8.h, z2.h[1] + // vl128 state = 0x0a047710 + __ dci(0x64a24d1e); // fmlalt z30.s, z8.h, z2.h[1] + // vl128 state = 0xf273945a + __ dci(0x64a0490e); // fmlalb z14.s, z8.h, z0.h[1] + // vl128 state = 0x3a5456f1 + __ dci(0x64a0490c); // fmlalb z12.s, z8.h, z0.h[1] + // vl128 state = 0xdb948daf + __ dci(0x64b04b04); // fmlalb z4.s, z24.h, z0.h[5] + // vl128 state = 0xd2eae2af + __ dci(0x64b04b06); // fmlalb z6.s, z24.h, z0.h[5] + // vl128 state = 0x26627a2c + __ dci(0x64b04b07); // fmlalb z7.s, z24.h, z0.h[5] + // vl128 state = 0x2841173d + __ dci(0x64b84b26); // fmlalb z6.s, z25.h, z0.h[7] + // vl128 state = 0x9b52bcc6 + __ dci(0x64ba4f27); // fmlalt z7.s, z25.h, z2.h[7] + // vl128 state = 0x813bbabe + __ dci(0x64ba4923); // fmlalb z3.s, z9.h, z2.h[7] + // vl128 state = 0xbb608dad + __ dci(0x64b84d22); // fmlalt z2.s, z9.h, z0.h[7] + // vl128 state = 0xf4d84ed6 + __ dci(0x64b84d23); // fmlalt z3.s, z9.h, z0.h[7] + // vl128 state = 0x1cc0784e + __ dci(0x64bc4527); // fmlalt z7.s, z9.h, z4.h[6] + // vl128 state = 0x4eece4b7 + __ dci(0x64bc6737); // fmlslt z23.s, z25.h, z4.h[6] + // vl128 state = 0x00dacf34 + __ dci(0x64bc6fa7); // fmlslt z7.s, z29.h, z4.h[7] + // vl128 state = 0x597e23d4 + __ dci(0x64bc6e25); // fmlslt z5.s, z17.h, z4.h[7] + // vl128 state = 0xa66b843c + __ dci(0x64be6f2d); // fmlslt z13.s, z25.h, z6.h[7] + // vl128 state = 0xb595ec08 + __ dci(0x64be6765); // fmlslt z5.s, z27.h, z6.h[6] + // vl128 state = 0xd6c3af0a + __ dci(0x64be662d); // fmlslt z13.s, z17.h, z6.h[6] + // vl128 state = 0x864f26a8 + __ dci(0x64bf6225); // fmlslb z5.s, z17.h, z7.h[6] + // vl128 state = 0xb969be4d + __ dci(0x64bb626d); // fmlslb z13.s, z19.h, z3.h[6] + // vl128 state = 0x73329b58 + __ dci(0x64b9622c); // fmlslb z12.s, z17.h, z1.h[6] + // vl128 state = 0xfb7e2da2 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xfb7e2da2, + 0x34ad546c, + 0xd914c0d4, + 0xc173287c, + 0x07db96b2, + 0xab5ece8c, + 0xcda13318, + 0x6e62dc3f, + 0x0268d9b4, + 0x15118567, + 0xf55fb24f, + 0xc4ab4b56, + 0x5911f225, + 0x6d9c320c, + 0xc69bdedf, + 0x1635a43f, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_fp_convert) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x640ab3ee); // fcvtxnt z14.s, p4/m, z31.d + // vl128 state = 0x3ea71f7a + __ dci(0x64caa9e0); // fcvtnt z0.s, p2/m, z15.d + // vl128 state = 0xe9d750a1 + __ dci(0x64cab83d); // fcvtnt z29.s, p6/m, z1.d + // vl128 state = 0x9ce43257 + __ dci(0x650aad62); // fcvtx z2.s, p3/m, z11.d + // vl128 state = 0x60283e22 + __ dci(0x64cbb42b); // fcvtlt z11.d, p5/m, z1.s + // vl128 state = 0xfbecbe4a + __ dci(0x6488ba54); // fcvtnt z20.h, p6/m, z18.s + // vl128 state = 0xbb81cc05 + __ dci(0x64cbb730); // fcvtlt z16.d, p5/m, z25.s + // vl128 state = 0xd9cebdf5 + __ dci(0x640aa5e4); // fcvtxnt z4.s, p1/m, z15.d + // vl128 state = 0x9dba64db + __ dci(0x650aa715); // fcvtx z21.s, p1/m, z24.d + // vl128 state = 0x0e68fab9 + __ dci(0x64cabe86); // fcvtnt z6.s, p7/m, z20.d + // vl128 state = 0x5936ac32 + __ dci(0x64cba075); // fcvtlt z21.d, p0/m, z3.s + // vl128 state = 0x2eb8a37b + __ dci(0x6488b3c5); // fcvtnt z5.h, p4/m, z30.s + // vl128 state = 0x9f471340 + __ dci(0x6489b24a); // fcvtlt z10.s, p4/m, z18.h + // vl128 state = 0xcf5e5808 + __ dci(0x64cbb514); // fcvtlt z20.d, p5/m, z8.s + // vl128 state = 0x870c5b85 + __ dci(0x650ab090); // fcvtx z16.s, p4/m, z4.d + // vl128 state = 0x305da0a0 + __ dci(0x64cbb2d3); // fcvtlt z19.d, p4/m, z22.s + // vl128 state = 0x8eb1b5fc + __ dci(0x64cbb093); // fcvtlt z19.d, p4/m, z4.s + // vl128 state = 0x3c070332 + __ dci(0x6488b9b8); // fcvtnt z24.h, p6/m, z13.s + // vl128 state = 0xe0fc3455 + __ dci(0x650aa64d); // fcvtx z13.s, p1/m, z18.d + // vl128 state = 0x65556c34 + __ dci(0x6488b2d7); // fcvtnt z23.h, p4/m, z22.s + // vl128 state = 0xc9ccae47 + __ dci(0x650ab36d); // fcvtx z13.s, p4/m, z27.d + // vl128 state = 0x31d942a1 + __ dci(0x650aba2c); // fcvtx z12.s, p6/m, z17.d + // vl128 state = 0x27497e26 + __ dci(0x650aa377); // fcvtx z23.s, p0/m, z27.d + // vl128 state = 0xbe0a7446 + __ dci(0x6489a3a5); // fcvtlt z5.s, p0/m, z29.h + // vl128 state = 0x454c62cc + __ dci(0x64cabeb9); // fcvtnt z25.s, p7/m, z21.d + // vl128 state = 0x808a014f + __ dci(0x6489b4c2); // fcvtlt z2.s, p5/m, z6.h + // vl128 state = 0x55ae2250 + __ dci(0x64cba246); // fcvtlt z6.d, p0/m, z18.s + // vl128 state = 0x7ce05c24 + __ dci(0x650ab2a6); // fcvtx z6.s, p4/m, z21.d + // vl128 state = 0xa26121f5 + __ dci(0x64cbb239); // fcvtlt z25.d, p4/m, z17.s + // vl128 state = 0xb40c58e1 + __ dci(0x64cabdd9); // fcvtnt z25.s, p7/m, z14.d + // vl128 state = 0xf5077a54 + __ dci(0x650ab75a); // fcvtx z26.s, p5/m, z26.d + // vl128 state = 0x95b006de + __ dci(0x650aa08b); // fcvtx z11.s, p0/m, z4.d + // vl128 state = 0x9ca5060c + __ dci(0x640aafd3); // fcvtxnt z19.s, p3/m, z30.d + // vl128 state = 0x85c89705 + __ dci(0x64caaf3a); // fcvtnt z26.s, p3/m, z25.d + // vl128 state = 0x6b6aa4f9 + __ dci(0x640abda1); // fcvtxnt z1.s, p7/m, z13.d + // vl128 state = 0x769cf76e + __ dci(0x6489a6f9); // fcvtlt z25.s, p1/m, z23.h + // vl128 state = 0x0a291b3b + __ dci(0x6489b38d); // fcvtlt z13.s, p4/m, z28.h + // vl128 state = 0x6b72e558 + __ dci(0x650aaf63); // fcvtx z3.s, p3/m, z27.d + // vl128 state = 0xf4a004e0 + __ dci(0x6488bfa4); // fcvtnt z4.h, p7/m, z29.s + // vl128 state = 0xe01c349e + __ dci(0x6489a6ee); // fcvtlt z14.s, p1/m, z23.h + // vl128 state = 0x3b06da53 + __ dci(0x64cabbf8); // fcvtnt z24.s, p6/m, z31.d + // vl128 state = 0xc60fbbf0 + __ dci(0x6489bc7f); // fcvtlt z31.s, p7/m, z3.h + // vl128 state = 0x8b281c78 + __ dci(0x64caaf1f); // fcvtnt z31.s, p3/m, z24.d + // vl128 state = 0x0f17afbb + __ dci(0x650aac71); // fcvtx z17.s, p3/m, z3.d + // vl128 state = 0xce0ac3e1 + __ dci(0x650aa1df); // fcvtx z31.s, p0/m, z14.d + // vl128 state = 0x71ba2085 + __ dci(0x650aaf9f); // fcvtx z31.s, p3/m, z28.d + // vl128 state = 0xe42caea0 + __ dci(0x640abff9); // fcvtxnt z25.s, p7/m, z31.d + // vl128 state = 0xec3c032c + __ dci(0x6489b8e5); // fcvtlt z5.s, p6/m, z7.h + // vl128 state = 0xe41850f7 + __ dci(0x640aa1a1); // fcvtxnt z1.s, p0/m, z13.d + // vl128 state = 0xaf3944b4 + __ dci(0x6488bf41); // fcvtnt z1.h, p7/m, z26.s + // vl128 state = 0xdffd02bd + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xdffd02bd, + 0x03d1f711, + 0x41cf3358, + 0xa351d0f6, + 0xffba25ff, + 0x14092947, + 0x26b194fe, + 0x42acd8a3, + 0xc0498960, + 0xcccf1171, + 0x8dca76ed, + 0xefbda194, + 0xcf04a23d, + 0x91e2629f, + 0xf05e8f52, + 0x4994ad4a, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_saturating_multiply_add_high_indexed) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 40 * kInstructionSize); + __ dci(0x442b1100); // sqrdmlah z0.h, z8.h, z3.h[1] + // vl128 state = 0xb012d377 + __ dci(0x44211108); // sqrdmlah z8.h, z8.h, z1.h[0] + // vl128 state = 0xae399e50 + __ dci(0x4421110c); // sqrdmlah z12.h, z8.h, z1.h[0] + // vl128 state = 0x1a46b700 + __ dci(0x44291188); // sqrdmlah z8.h, z12.h, z1.h[1] + // vl128 state = 0x7525090a + __ dci(0x442811a9); // sqrdmlah z9.h, z13.h, z0.h[1] + // vl128 state = 0xf2907eb8 + __ dci(0x442c11eb); // sqrdmlah z11.h, z15.h, z4.h[1] + // vl128 state = 0x65a71d51 + __ dci(0x442c11e3); // sqrdmlah z3.h, z15.h, z4.h[1] + // vl128 state = 0x8b30e19b + __ dci(0x442413e1); // sqrdmlah z1.h, z31.h, z4.h[0] + // vl128 state = 0x448e4c0f + __ dci(0x44a413a0); // sqrdmlah z0.s, z29.s, z4.s[0] + // vl128 state = 0x1745e0db + __ dci(0x44241321); // sqrdmlah z1.h, z25.h, z4.h[0] + // vl128 state = 0xe07b491b + __ dci(0x44a413a5); // sqrdmlah z5.s, z29.s, z4.s[0] + // vl128 state = 0xad39c91c + __ dci(0x44e41327); // sqrdmlah z7.d, z25.d, z4.d[0] + // vl128 state = 0xd327dc1c + __ dci(0x44e4132f); // sqrdmlah z15.d, z25.d, z4.d[0] + // vl128 state = 0x8da341ca + __ dci(0x44e5130b); // sqrdmlah z11.d, z24.d, z5.d[0] + // vl128 state = 0x4dbd3ee1 + __ dci(0x44e3130a); // sqrdmlah z10.d, z24.d, z3.d[0] + // vl128 state = 0x71452896 + __ dci(0x44e3131a); // sqrdmlah z26.d, z24.d, z3.d[0] + // vl128 state = 0x4d6d8b90 + __ dci(0x4463135e); // sqrdmlah z30.h, z26.h, z3.h[4] + // vl128 state = 0x0b53f7b4 + __ dci(0x44e7135c); // sqrdmlah z28.d, z26.d, z7.d[0] + // vl128 state = 0x78ab2bb9 + __ dci(0x44e7134c); // sqrdmlah z12.d, z26.d, z7.d[0] + // vl128 state = 0x3773b9e2 + __ dci(0x44e51144); // sqrdmlah z4.d, z10.d, z5.d[0] + // vl128 state = 0x8f8883da + __ dci(0x44e411c0); // sqrdmlah z0.d, z14.d, z4.d[0] + // vl128 state = 0xa27ef92f + __ dci(0x44ec15c4); // sqrdmlsh z4.d, z14.d, z12.d[0] + // vl128 state = 0x6cea3cee + __ dci(0x44ec14e0); // sqrdmlsh z0.d, z7.d, z12.d[0] + // vl128 state = 0xb5e40d5f + __ dci(0x44ee16f0); // sqrdmlsh z16.d, z23.d, z14.d[0] + // vl128 state = 0xacf903eb + __ dci(0x44ea16d4); // sqrdmlsh z20.d, z22.d, z10.d[0] + // vl128 state = 0x698246a6 + __ dci(0x44ea16d0); // sqrdmlsh z16.d, z22.d, z10.d[0] + // vl128 state = 0x58015eeb + __ dci(0x44ea16d1); // sqrdmlsh z17.d, z22.d, z10.d[0] + // vl128 state = 0xdbf1d9a6 + __ dci(0x44ab16d3); // sqrdmlsh z19.s, z22.s, z3.s[1] + // vl128 state = 0xbde312bb + __ dci(0x44aa17d1); // sqrdmlsh z17.s, z30.s, z2.s[1] + // vl128 state = 0xc033b9a1 + __ dci(0x44aa1650); // sqrdmlsh z16.s, z18.s, z2.s[1] + // vl128 state = 0x0e3b4c59 + __ dci(0x44aa1632); // sqrdmlsh z18.s, z17.s, z2.s[1] + // vl128 state = 0x6f849e01 + __ dci(0x44aa1710); // sqrdmlsh z16.s, z24.s, z2.s[1] + // vl128 state = 0x701e7316 + __ dci(0x44aa1711); // sqrdmlsh z17.s, z24.s, z2.s[1] + // vl128 state = 0xbfbc7895 + __ dci(0x44a91715); // sqrdmlsh z21.s, z24.s, z1.s[1] + // vl128 state = 0x2307c6f3 + __ dci(0x44a91697); // sqrdmlsh z23.s, z20.s, z1.s[1] + // vl128 state = 0x78db6627 + __ dci(0x44a91696); // sqrdmlsh z22.s, z20.s, z1.s[1] + // vl128 state = 0x37d25a35 + __ dci(0x44a816de); // sqrdmlsh z30.s, z22.s, z0.s[1] + // vl128 state = 0xf611db46 + __ dci(0x44ab16dc); // sqrdmlsh z28.s, z22.s, z3.s[1] + // vl128 state = 0x699a840f + __ dci(0x44af165d); // sqrdmlsh z29.s, z18.s, z7.s[1] + // vl128 state = 0x0b5d451f + __ dci(0x44af16f5); // sqrdmlsh z21.s, z23.s, z7.s[1] + // vl128 state = 0xe49e3b59 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xe49e3b59, + 0xce0062c7, + 0xf796ec27, + 0x1f952649, + 0x4e4354e6, + 0x90cb0c51, + 0xf0688aee, + 0xae9de352, + 0x652f0c0d, + 0x0000db74, + 0xdc23fff7, + 0x228c116c, + 0x8477dd7c, + 0x08377c46, + 0x6e05a40f, + 0x874126fb, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_sat_double_mul_high_index) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x447bf609); // sqrdmulh z9.h, z16.h, z3.h[7] + // vl128 state = 0xacad7d7c + __ dci(0x447bf601); // sqrdmulh z1.h, z16.h, z3.h[7] + // vl128 state = 0xd6a976fe + __ dci(0x447bf600); // sqrdmulh z0.h, z16.h, z3.h[7] + // vl128 state = 0x959d4287 + __ dci(0x446bf710); // sqrdmulh z16.h, z24.h, z3.h[5] + // vl128 state = 0x88b70b0e + __ dci(0x446af612); // sqrdmulh z18.h, z16.h, z2.h[5] + // vl128 state = 0xea48068a + __ dci(0x442af636); // sqrdmulh z22.h, z17.h, z2.h[1] + // vl128 state = 0x22135bae + __ dci(0x442af626); // sqrdmulh z6.h, z17.h, z2.h[1] + // vl128 state = 0x1ed137a8 + __ dci(0x442af624); // sqrdmulh z4.h, z17.h, z2.h[1] + // vl128 state = 0x37aa44d4 + __ dci(0x4420f625); // sqrdmulh z5.h, z17.h, z0.h[0] + // vl128 state = 0x9747863a + __ dci(0x4460f604); // sqrdmulh z4.h, z16.h, z0.h[4] + // vl128 state = 0xf6487f4b + __ dci(0x4460f605); // sqrdmulh z5.h, z16.h, z0.h[4] + // vl128 state = 0xb85302a6 + __ dci(0x4420f641); // sqrdmulh z1.h, z18.h, z0.h[0] + // vl128 state = 0xfc85ce98 + __ dci(0x4424f669); // sqrdmulh z9.h, z19.h, z4.h[0] + // vl128 state = 0xf0b36dd3 + __ dci(0x4460f668); // sqrdmulh z8.h, z19.h, z0.h[4] + // vl128 state = 0x227fe9fe + __ dci(0x4462f6f8); // sqrdmulh z24.h, z23.h, z2.h[4] + // vl128 state = 0x7f4d89ab + __ dci(0x4462f6f0); // sqrdmulh z16.h, z23.h, z2.h[4] + // vl128 state = 0x61520386 + __ dci(0x4472f6d1); // sqrdmulh z17.h, z22.h, z2.h[6] + // vl128 state = 0x34d07c81 + __ dci(0x4472f250); // sqdmulh z16.h, z18.h, z2.h[6] + // vl128 state = 0x74313b89 + __ dci(0x44b2f254); // sqdmulh z20.s, z18.s, z2.s[2] + // vl128 state = 0x7acc9692 + __ dci(0x44e2f250); // sqdmulh z16.d, z18.d, z2.d[0] + // vl128 state = 0x3a1f908e + __ dci(0x44e4f251); // sqdmulh z17.d, z18.d, z4.d[0] + // vl128 state = 0xd2ae3642 + __ dci(0x44e0f650); // sqrdmulh z16.d, z18.d, z0.d[0] + // vl128 state = 0x74da2dcc + __ dci(0x44f8f640); // sqrdmulh z0.d, z18.d, z8.d[1] + // vl128 state = 0x0273639a + __ dci(0x44f9f742); // sqrdmulh z2.d, z26.d, z9.d[1] + // vl128 state = 0x9c5062c9 + __ dci(0x44f9f7e6); // sqrdmulh z6.d, z31.d, z9.d[1] + // vl128 state = 0x095e8fd7 + __ dci(0x44fdf7ae); // sqrdmulh z14.d, z29.d, z13.d[1] + // vl128 state = 0x4ab7c261 + __ dci(0x44fdf7af); // sqrdmulh z15.d, z29.d, z13.d[1] + // vl128 state = 0x7913f02e + __ dci(0x44f9f7ed); // sqrdmulh z13.d, z31.d, z9.d[1] + // vl128 state = 0xbbffd120 + __ dci(0x44f9f7e5); // sqrdmulh z5.d, z31.d, z9.d[1] + // vl128 state = 0xc9cc793f + __ dci(0x44f5f7e4); // sqrdmulh z4.d, z31.d, z5.d[1] + // vl128 state = 0xc7cc2e4b + __ dci(0x44e5f3e0); // sqdmulh z0.d, z31.d, z5.d[0] + // vl128 state = 0x8a4efda7 + __ dci(0x44e4f364); // sqdmulh z4.d, z27.d, z4.d[0] + // vl128 state = 0xfa30239a + __ dci(0x44edf366); // sqdmulh z6.d, z27.d, z13.d[0] + // vl128 state = 0x9c538671 + __ dci(0x44adf322); // sqdmulh z2.s, z25.s, z5.s[1] + // vl128 state = 0xafb03157 + __ dci(0x44adf263); // sqdmulh z3.s, z19.s, z5.s[1] + // vl128 state = 0x6ea1e1ff + __ dci(0x44bdf22b); // sqdmulh z11.s, z17.s, z5.s[3] + // vl128 state = 0x0040a3a0 + __ dci(0x44adf62a); // sqrdmulh z10.s, z17.s, z5.s[1] + // vl128 state = 0x8b3e6419 + __ dci(0x44adf622); // sqrdmulh z2.s, z17.s, z5.s[1] + // vl128 state = 0x579bf738 + __ dci(0x44abf632); // sqrdmulh z18.s, z17.s, z3.s[1] + // vl128 state = 0x2678c680 + __ dci(0x44a9f6ba); // sqrdmulh z26.s, z21.s, z1.s[1] + // vl128 state = 0xee25a322 + __ dci(0x44a9f6aa); // sqrdmulh z10.s, z21.s, z1.s[1] + // vl128 state = 0x99cfcf9f + __ dci(0x44b1f6ab); // sqrdmulh z11.s, z21.s, z1.s[2] + // vl128 state = 0xa6785a38 + __ dci(0x44b1f0bb); // sqdmulh z27.s, z5.s, z1.s[2] + // vl128 state = 0xfc822233 + __ dci(0x4439f0bf); // sqdmulh z31.h, z5.h, z1.h[3] + // vl128 state = 0x322d49df + __ dci(0x4433f0be); // sqdmulh z30.h, z5.h, z3.h[2] + // vl128 state = 0xbf6733d2 + __ dci(0x4433f0d6); // sqdmulh z22.h, z6.h, z3.h[2] + // vl128 state = 0x99f11483 + __ dci(0x4437f2d7); // sqdmulh z23.h, z22.h, z7.h[2] + // vl128 state = 0x9c146ede + __ dci(0x4426f2d6); // sqdmulh z22.h, z22.h, z6.h[0] + // vl128 state = 0xc089284f + __ dci(0x44a6f0de); // sqdmulh z30.s, z6.s, z6.s[0] + // vl128 state = 0xe962a269 + __ dci(0x44a4f04e); // sqdmulh z14.s, z2.s, z4.s[0] + // vl128 state = 0xaea2f35e + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xaea2f35e, + 0xb4e17c50, + 0x97dfb966, + 0x070d3c78, + 0x5b2f880d, + 0x8e643be0, + 0x4d7f006b, + 0xfbd08185, + 0x4960a97d, + 0x1e85903f, + 0x443b62e4, + 0xf196453a, + 0x50dae6ef, + 0x0e4bb245, + 0x69d661ab, + 0x7d6fb839, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(sve2_extract) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 60 * kInstructionSize); + __ dci(0x056a1008); // ext z8.b, {z0.b, z1.b}, #84 + // vl128 state = 0x06ae6d5d + __ dci(0x05601418); // ext z24.b, {z0.b, z1.b}, #5 + // vl128 state = 0x3b73c922 + __ dci(0x05601708); // ext z8.b, {z24.b, z25.b}, #5 + // vl128 state = 0xc3526a3d + __ dci(0x05601d0c); // ext z12.b, {z8.b, z9.b}, #7 + // vl128 state = 0xbde17731 + __ dci(0x05600c1c); // ext z28.b, {z0.b, z1.b}, #3 + // vl128 state = 0x9ac72141 + __ dci(0x05600c58); // ext z24.b, {z2.b, z3.b}, #3 + // vl128 state = 0xccecefc0 + __ dci(0x05600410); // ext z16.b, {z0.b, z1.b}, #1 + // vl128 state = 0xe49d5f89 + __ dci(0x05600438); // ext z24.b, {z1.b, z2.b}, #1 + // vl128 state = 0x9967df9d + __ dci(0x0560067a); // ext z26.b, {z19.b, z20.b}, #1 + // vl128 state = 0x110a8b46 + __ dci(0x05601478); // ext z24.b, {z3.b, z4.b}, #5 + // vl128 state = 0x558f95f2 + __ dci(0x0560117c); // ext z28.b, {z11.b, z12.b}, #4 + // vl128 state = 0x18d0f048 + __ dci(0x0560157e); // ext z30.b, {z11.b, z12.b}, #5 + // vl128 state = 0x1719547f + __ dci(0x05601c7a); // ext z26.b, {z3.b, z4.b}, #7 + // vl128 state = 0x600cfa8a + __ dci(0x0560187e); // ext z30.b, {z3.b, z4.b}, #6 + // vl128 state = 0xc93e431e + __ dci(0x05601876); // ext z22.b, {z3.b, z4.b}, #6 + // vl128 state = 0x5be7af00 + __ dci(0x05601c26); // ext z6.b, {z1.b, z2.b}, #7 + // vl128 state = 0xd3d69d02 + __ dci(0x05601c2e); // ext z14.b, {z1.b, z2.b}, #7 + // vl128 state = 0x1d88c27b + __ dci(0x05601d3e); // ext z30.b, {z9.b, z10.b}, #7 + // vl128 state = 0x56f91523 + __ dci(0x05601dae); // ext z14.b, {z13.b, z14.b}, #7 + // vl128 state = 0xbc175582 + __ dci(0x056015ef); // ext z15.b, {z15.b, z16.b}, #5 + // vl128 state = 0x9289a9ba + __ dci(0x0560157f); // ext z31.b, {z11.b, z12.b}, #5 + // vl128 state = 0x46be3725 + __ dci(0x0560157e); // ext z30.b, {z11.b, z12.b}, #5 + // vl128 state = 0xa4fd59e9 + __ dci(0x0560156e); // ext z14.b, {z11.b, z12.b}, #5 + // vl128 state = 0x88b9ba85 + __ dci(0x05601566); // ext z6.b, {z11.b, z12.b}, #5 + // vl128 state = 0x7f3b2a36 + __ dci(0x056017e4); // ext z4.b, {z31.b, z0.b}, #5 + // vl128 state = 0xa71b8fa9 + __ dci(0x05601f74); // ext z20.b, {z27.b, z28.b}, #7 + // vl128 state = 0x89dcdeac + __ dci(0x05601f44); // ext z4.b, {z26.b, z27.b}, #7 + // vl128 state = 0xa877313f + __ dci(0x05601e45); // ext z5.b, {z18.b, z19.b}, #7 + // vl128 state = 0x6181834a + __ dci(0x05601255); // ext z21.b, {z18.b, z19.b}, #4 + // vl128 state = 0x7c3595cd + __ dci(0x05701a51); // ext z17.b, {z18.b, z19.b}, #134 + // vl128 state = 0x10fdfe4d + __ dci(0x05701ad3); // ext z19.b, {z22.b, z23.b}, #134 + // vl128 state = 0x08e923c5 + __ dci(0x05701ad1); // ext z17.b, {z22.b, z23.b}, #134 + // vl128 state = 0xefb2c9e9 + __ dci(0x05701b41); // ext z1.b, {z26.b, z27.b}, #134 + // vl128 state = 0xd5dccda9 + __ dci(0x05701b40); // ext z0.b, {z26.b, z27.b}, #134 + // vl128 state = 0xd424c039 + __ dci(0x05701bd0); // ext z16.b, {z30.b, z31.b}, #134 + // vl128 state = 0xd914c077 + __ dci(0x057013d8); // ext z24.b, {z30.b, z31.b}, #132 + // vl128 state = 0x32459b3a + __ dci(0x05701259); // ext z25.b, {z18.b, z19.b}, #132 + // vl128 state = 0x422ed7bf + __ dci(0x0570125d); // ext z29.b, {z18.b, z19.b}, #132 + // vl128 state = 0x6bfc46ef + __ dci(0x05700215); // ext z21.b, {z16.b, z17.b}, #128 + // vl128 state = 0xc53b85ed + __ dci(0x0560021d); // ext z29.b, {z16.b, z17.b}, #0 + // vl128 state = 0xd391e5ec + __ dci(0x0570121c); // ext z28.b, {z16.b, z17.b}, #132 + // vl128 state = 0x7990c1d7 + __ dci(0x0570030c); // ext z12.b, {z24.b, z25.b}, #128 + // vl128 state = 0xca0d3db8 + __ dci(0x05700b88); // ext z8.b, {z28.b, z29.b}, #130 + // vl128 state = 0xe5c71442 + __ dci(0x05600b0c); // ext z12.b, {z24.b, z25.b}, #2 + // vl128 state = 0x68510d62 + __ dci(0x05600f1c); // ext z28.b, {z24.b, z25.b}, #3 + // vl128 state = 0x77f9f046 + __ dci(0x05600e14); // ext z20.b, {z16.b, z17.b}, #3 + // vl128 state = 0x7068dedf + __ dci(0x05600604); // ext z4.b, {z16.b, z17.b}, #1 + // vl128 state = 0x8b70c406 + __ dci(0x05600406); // ext z6.b, {z0.b, z1.b}, #1 + // vl128 state = 0x10e6b48c + __ dci(0x05600056); // ext z22.b, {z2.b, z3.b}, #0 + // vl128 state = 0xe1294d7a + __ dci(0x05600052); // ext z18.b, {z2.b, z3.b}, #0 + // vl128 state = 0x0762bbb0 + __ dci(0x056000d6); // ext z22.b, {z6.b, z7.b}, #0 + // vl128 state = 0x58be0ba4 + __ dci(0x057008de); // ext z30.b, {z6.b, z7.b}, #130 + // vl128 state = 0x8a2018e9 + __ dci(0x0570085a); // ext z26.b, {z2.b, z3.b}, #130 + // vl128 state = 0xb019b7e0 + __ dci(0x057009d2); // ext z18.b, {z14.b, z15.b}, #130 + // vl128 state = 0x9e6e14ed + __ dci(0x057008fa); // ext z26.b, {z7.b, z8.b}, #130 + // vl128 state = 0x4cf64d22 + __ dci(0x057008f2); // ext z18.b, {z7.b, z8.b}, #130 + // vl128 state = 0x048c30f9 + __ dci(0x057002f3); // ext z19.b, {z23.b, z24.b}, #128 + // vl128 state = 0x2d7eb43b + __ dci(0x057006a3); // ext z3.b, {z21.b, z22.b}, #129 + // vl128 state = 0xa37aeb5e + __ dci(0x05700687); // ext z7.b, {z20.b, z21.b}, #129 + // vl128 state = 0xd8d7cdc7 + __ dci(0x056006b7); // ext z23.b, {z21.b, z22.b}, #1 + // vl128 state = 0x2480e1d4 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x2480e1d4, + 0x4dc42cc5, + 0x7ac24121, + 0x9eaf5c98, + 0x1b7b35dc, + 0x1b1035fc, + 0xe15f6899, + 0xaad14717, + 0x3327c3fc, + 0x7f349408, + 0x2d865b00, + 0x9819cd29, + 0x7f64cace, + 0x3751e2c1, + 0x7e60fc24, + 0xc6b308fc, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +} // namespace aarch64 +} // namespace vixl diff --git a/test/aarch64/test-trace-aarch64.cc b/test/aarch64/test-trace-aarch64.cc index a0a2172f..27e71d57 100644 --- a/test/aarch64/test-trace-aarch64.cc +++ b/test/aarch64/test-trace-aarch64.cc @@ -2881,6 +2881,55 @@ static void GenerateTestSequenceSVE(MacroAssembler* masm) { SVEMemOperand(x0, 4, SVE_MUL_VL)); } +static void GenerateTestSequenceAtomics(MacroAssembler* masm) { + ExactAssemblyScope guard(masm, + masm->GetBuffer()->GetRemainingBytes(), + ExactAssemblyScope::kMaximumSize); + CPUFeaturesScope feature_guard(masm, CPUFeatures::kAtomics); + __ sub(sp, sp, 16); // Claim some working space on the stack. + __ mov(x0, 0x5555555555555555); + __ str(x0, MemOperand(sp)); // Initialise working space. + +#define INST_LIST(OP) \ + __ ld##OP##b(w0, w0, MemOperand(sp)); \ + __ ld##OP##ab(w0, w1, MemOperand(sp)); \ + __ ld##OP##lb(w0, w2, MemOperand(sp)); \ + __ ld##OP##alb(w0, w3, MemOperand(sp)); \ + __ ld##OP##h(w0, w0, MemOperand(sp)); \ + __ ld##OP##ah(w0, w1, MemOperand(sp)); \ + __ ld##OP##lh(w0, w2, MemOperand(sp)); \ + __ ld##OP##alh(w0, w3, MemOperand(sp)); \ + __ ld##OP(w0, w0, MemOperand(sp)); \ + __ ld##OP##a(w0, w1, MemOperand(sp)); \ + __ ld##OP##l(w0, w2, MemOperand(sp)); \ + __ ld##OP##al(w0, w3, MemOperand(sp)); \ + __ ld##OP(x0, x0, MemOperand(sp)); \ + __ ld##OP##a(x0, x1, MemOperand(sp)); \ + __ ld##OP##l(x0, x2, MemOperand(sp)); \ + __ ld##OP##al(x0, x3, MemOperand(sp)); \ + __ st##OP##b(w0, MemOperand(sp)); \ + __ st##OP##lb(w0, MemOperand(sp)); \ + __ st##OP##h(w0, MemOperand(sp)); \ + __ st##OP##lh(w0, MemOperand(sp)); \ + __ st##OP(w0, MemOperand(sp)); \ + __ st##OP##l(w0, MemOperand(sp)); \ + __ st##OP(x0, MemOperand(sp)); \ + __ st##OP##l(x0, MemOperand(sp)); + + INST_LIST(add); + INST_LIST(set); + INST_LIST(eor); + INST_LIST(smin); + INST_LIST(smax); + INST_LIST(umin); + INST_LIST(umax); + INST_LIST(clr); + +#undef INST_LIST + + __ add(sp, sp, 16); // Restore stack pointer. +} + static void MaskAddresses(const char* trace) { #define VIXL_COLOUR "(\x1b\\[[01];([0-9][0-9])?m)?" // All patterns are replaced with "$1~~~~~~~~~~~~~~~~". @@ -3036,6 +3085,7 @@ static void TraceTestHelper(bool coloured_trace, GenerateTestSequenceNEON(&masm); GenerateTestSequenceNEONFP(&masm); GenerateTestSequenceSVE(&masm); + GenerateTestSequenceAtomics(&masm); masm.Ret(); masm.FinalizeCode(); @@ -3127,6 +3177,7 @@ static void PrintDisassemblerTestHelper(const char* prefix, GenerateTestSequenceNEON(&masm); GenerateTestSequenceNEONFP(&masm); GenerateTestSequenceSVE(&masm); + GenerateTestSequenceAtomics(&masm); masm.FinalizeCode(); Decoder decoder; diff --git a/test/aarch64/test-utils-aarch64.cc b/test/aarch64/test-utils-aarch64.cc index 5e6a3519..76e7eae0 100644 --- a/test/aarch64/test-utils-aarch64.cc +++ b/test/aarch64/test-utils-aarch64.cc @@ -30,6 +30,7 @@ #include "test-runner.h" #include "test-utils-aarch64.h" +#include "../test/aarch64/test-simulator-inputs-aarch64.h" #include "aarch64/cpu-aarch64.h" #include "aarch64/disasm-aarch64.h" #include "aarch64/macro-assembler-aarch64.h" @@ -778,5 +779,240 @@ bool CanRun(const CPUFeatures& required, bool* queried_can_run) { #endif } +// Note that the function assumes p0, p1, p2 and p3 are set to all true in b-, +// h-, s- and d-lane sizes respectively, and p4, p5 are clobberred as a temp +// predicate. +template <typename T, size_t N> +void SetFpData(MacroAssembler* masm, + int esize, + const T (&values)[N], + uint64_t lcg_mult) { + uint64_t a = 0; + uint64_t b = lcg_mult; + // Be used to populate the assigned element slots of register based on the + // type of floating point. + __ Pfalse(p5.VnB()); + switch (esize) { + case kHRegSize: + a = Float16ToRawbits(Float16(1.5)); + // Pick a convenient number within largest normal half-precision floating + // point. + b = Float16ToRawbits(Float16(lcg_mult % 1024)); + // Step 1: Set fp16 numbers to the undefined registers. + // p4< 15:0>: 0b0101010101010101 + // z{code}<127:0>: 0xHHHHHHHHHHHHHHHH + __ Zip1(p4.VnB(), p0.VnB(), p5.VnB()); + break; + case kSRegSize: + a = FloatToRawbits(1.5); + b = FloatToRawbits(lcg_mult); + // Step 2: Set fp32 numbers to register on top of fp16 initialized. + // p4< 15:0>: 0b0000000100000001 + // z{code}<127:0>: 0xHHHHSSSSHHHHSSSS + __ Zip1(p4.VnS(), p2.VnS(), p5.VnS()); + break; + case kDRegSize: + a = DoubleToRawbits(1.5); + b = DoubleToRawbits(lcg_mult); + // Step 3: Set fp64 numbers to register on top of both fp16 and fp 32 + // initialized. + // p4< 15:0>: 0b0000000000000001 + // z{code}<127:0>: 0xHHHHSSSSDDDDDDDD + __ Zip1(p4.VnD(), p3.VnD(), p5.VnD()); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + __ Dup(z30.WithLaneSize(esize), a); + __ Dup(z31.WithLaneSize(esize), b); + + for (unsigned j = 0; j <= (kZRegMaxSize / (N * esize)); j++) { + // As floating point operations on random values have a tendency to + // converge on special-case numbers like NaNs, adopt normal floating point + // values be the seed instead. + InsrHelper(masm, z0.WithLaneSize(esize), values); + } + + __ Fmla(z0.WithLaneSize(esize), + p4.Merging(), + z30.WithLaneSize(esize), + z0.WithLaneSize(esize), + z31.WithLaneSize(esize), + FastNaNPropagation); + + for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) { + __ Fmla(ZRegister(i).WithLaneSize(esize), + p4.Merging(), + z30.WithLaneSize(esize), + ZRegister(i - 1).WithLaneSize(esize), + z31.WithLaneSize(esize), + FastNaNPropagation); + } + + __ Fmul(z31.WithLaneSize(esize), + p4.Merging(), + z31.WithLaneSize(esize), + z30.WithLaneSize(esize), + FastNaNPropagation); + __ Fadd(z31.WithLaneSize(esize), p4.Merging(), z31.WithLaneSize(esize), 1); +} + +// Set z0 - z31 to some normal floating point data. +void InitialiseRegisterFp(MacroAssembler* masm, uint64_t lcg_mult) { + // Initialise each Z registers to a mixture of fp16/32/64 values as following + // pattern: + // z0.h[0-1] = fp16, z0.s[1] = fp32, z0.d[1] = fp64 repeatedly throughout the + // register. + // + // For example: + // z{code}<2047:1920>: 0x{< fp64 >< fp32 ><fp16><fp16>} + // ... + // z{code}< 127: 0>: 0x{< fp64 >< fp32 ><fp16><fp16>} + // + // In current manner, in order to make a desired mixture, each part of + // initialization have to be called in the following order. + SetFpData(masm, kHRegSize, kInputFloat16Basic, lcg_mult); + SetFpData(masm, kSRegSize, kInputFloatBasic, lcg_mult); + SetFpData(masm, kDRegSize, kInputDoubleBasic, lcg_mult); +} + +void SetInitialMachineState(MacroAssembler* masm, InputSet input_set) { + USE(input_set); + uint64_t lcg_mult = 6364136223846793005; + + // Set x0 - x30 to pseudo-random data. + __ Mov(x29, 1); // LCG increment. + __ Mov(x30, lcg_mult); + __ Mov(x0, 42); // LCG seed. + + __ Cmn(x0, 0); // Clear NZCV flags for later. + + __ Madd(x0, x0, x30, x29); // First pseudo-random number. + + // Registers 1 - 29. + for (unsigned i = 1; i < 30; i++) { + __ Madd(XRegister(i), XRegister(i - 1), x30, x29); + } + __ Mul(x30, x29, x30); + __ Add(x30, x30, 1); + + + // Set first four predicate registers to true for increasing lane sizes. + __ Ptrue(p0.VnB()); + __ Ptrue(p1.VnH()); + __ Ptrue(p2.VnS()); + __ Ptrue(p3.VnD()); + + // Set z0 - z31 to pseudo-random data. + if (input_set == kIntInputSet) { + __ Dup(z30.VnD(), 1); + __ Dup(z31.VnD(), lcg_mult); + __ Index(z0.VnB(), -16, 13); // LCG seeds. + + __ Mla(z0.VnD(), p0.Merging(), z30.VnD(), z0.VnD(), z31.VnD()); + for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) { + __ Mla(ZRegister(i).VnD(), + p0.Merging(), + z30.VnD(), + ZRegister(i - 1).VnD(), + z31.VnD()); + } + __ Mul(z31.VnD(), p0.Merging(), z31.VnD(), z30.VnD()); + __ Add(z31.VnD(), z31.VnD(), 1); + + } else { + VIXL_ASSERT(input_set == kFpInputSet); + InitialiseRegisterFp(masm, lcg_mult); + } + + // Set remaining predicate registers based on earlier pseudo-random data. + for (unsigned i = 4; i < kNumberOfPRegisters; i++) { + __ Cmpge(PRegister(i).VnB(), p0.Zeroing(), ZRegister(i).VnB(), 0); + } + for (unsigned i = 4; i < kNumberOfPRegisters; i += 2) { + __ Zip1(p0.VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB()); + __ Zip2(PRegister(i + 1).VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB()); + __ Mov(PRegister(i), p0); + } + __ Ptrue(p0.VnB()); + + // At this point, only sp and a few status registers are undefined. These + // must be ignored when computing the state hash. +} + +void ComputeMachineStateHash(MacroAssembler* masm, uint32_t* dst) { + // Use explicit registers, to avoid hash order varying if + // UseScratchRegisterScope changes. + UseScratchRegisterScope temps(masm); + temps.ExcludeAll(); + Register t0 = w0; + Register t1 = x1; + + // Compute hash of x0 - x30. + __ Push(t0.X(), t1); + __ Crc32x(t0, wzr, t0.X()); + for (unsigned i = 0; i < kNumberOfRegisters; i++) { + if (i == xzr.GetCode()) continue; // Skip sp. + if (t0.Is(WRegister(i))) continue; // Skip t0, as it's already hashed. + __ Crc32x(t0, t0, XRegister(i)); + } + + // Hash the status flags. + __ Mrs(t1, NZCV); + __ Crc32x(t0, t0, t1); + + // Acquire another temp, as integer registers have been hashed already. + __ Push(x30, xzr); + Register t2 = x30; + + // Compute hash of all bits in z0 - z31. This implies different hashes are + // produced for machines of different vector length. + for (unsigned i = 0; i < kNumberOfZRegisters; i++) { + __ Rdvl(t2, 1); + __ Lsr(t2, t2, 4); + Label vl_loop; + __ Bind(&vl_loop); + __ Umov(t1, VRegister(i).V2D(), 0); + __ Crc32x(t0, t0, t1); + __ Umov(t1, VRegister(i).V2D(), 1); + __ Crc32x(t0, t0, t1); + __ Ext(ZRegister(i).VnB(), ZRegister(i).VnB(), ZRegister(i).VnB(), 16); + __ Sub(t2, t2, 1); + __ Cbnz(t2, &vl_loop); + } + + // Hash predicate registers. For simplicity, this writes the predicate + // registers to a zero-initialised area of stack of the maximum size required + // for P registers. It then computes a hash of that entire stack area. + unsigned p_stack_space = kNumberOfPRegisters * kPRegMaxSizeInBytes; + + // Zero claimed stack area. + for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) { + __ Push(xzr, xzr); + } + + // Store all P registers to the stack. + __ Mov(t1, sp); + for (unsigned i = 0; i < kNumberOfPRegisters; i++) { + __ Str(PRegister(i), SVEMemOperand(t1)); + __ Add(t1, t1, kPRegMaxSizeInBytes); + } + + // Hash the entire stack area. + for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) { + __ Pop(t1, t2); + __ Crc32x(t0, t0, t1); + __ Crc32x(t0, t0, t2); + } + + __ Mov(t1, reinterpret_cast<uint64_t>(dst)); + __ Str(t0, MemOperand(t1)); + + __ Pop(xzr, x30); + __ Pop(t1, t0.X()); +} + } // namespace aarch64 } // namespace vixl diff --git a/test/aarch64/test-utils-aarch64.h b/test/aarch64/test-utils-aarch64.h index 554cd83f..b1c28986 100644 --- a/test/aarch64/test-utils-aarch64.h +++ b/test/aarch64/test-utils-aarch64.h @@ -583,6 +583,86 @@ bool CanRun(const CPUFeatures& required, bool* queried_can_run = NULL); // we need to enable it in the infrastructure code for each test. static const CPUFeatures kInfrastructureCPUFeatures(CPUFeatures::kNEON); +enum InputSet { + kIntInputSet = 0, + kFpInputSet, +}; + +// Initialise CPU registers to a predictable, non-zero set of values. This +// sets core, vector, predicate and flag registers, though leaves the stack +// pointer at its original value. +void SetInitialMachineState(MacroAssembler* masm, + InputSet input_set = kIntInputSet); + +// Compute a CRC32 hash of the machine state, and store it to dst. The hash +// covers core (not sp), vector (lower 128 bits), predicate (lower 16 bits) +// and flag registers. +void ComputeMachineStateHash(MacroAssembler* masm, uint32_t* dst); + +// The TEST_SVE macro works just like the usual TEST macro, but the resulting +// function receives a `const Test& config` argument, to allow it to query the +// vector length. +#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 + +#define TEST_SVE_INNER(type, name) \ + void Test##name(Test* config); \ + Test* test_##name##_list[] = \ + {Test::MakeSVETest(128, \ + "AARCH64_" type "_" #name "_vl128", \ + &Test##name), \ + Test::MakeSVETest(384, \ + "AARCH64_" type "_" #name "_vl384", \ + &Test##name), \ + Test::MakeSVETest(2048, \ + "AARCH64_" type "_" #name "_vl2048", \ + &Test##name)}; \ + void Test##name(Test* config) + +#define SVE_SETUP_WITH_FEATURES(...) \ + SETUP_WITH_FEATURES(__VA_ARGS__); \ + simulator.SetVectorLengthInBits(config->sve_vl_in_bits()) + +#else +// Otherwise, just use whatever the hardware provides. +static const int kSVEVectorLengthInBits = + CPUFeatures::InferFromOS().Has(CPUFeatures::kSVE) + ? CPU::ReadSVEVectorLengthInBits() + : kZRegMinSize; + +#define TEST_SVE_INNER(type, name) \ + void Test##name(Test* config); \ + Test* test_##name##_vlauto = \ + Test::MakeSVETest(kSVEVectorLengthInBits, \ + "AARCH64_" type "_" #name "_vlauto", \ + &Test##name); \ + void Test##name(Test* config) + +#define SVE_SETUP_WITH_FEATURES(...) \ + SETUP_WITH_FEATURES(__VA_ARGS__); \ + USE(config) + +#endif + +// Call masm->Insr repeatedly to allow test inputs to be set up concisely. This +// is optimised for call-site clarity, not generated code quality, so it doesn't +// exist in the MacroAssembler itself. +// +// Usage: +// +// int values[] = { 42, 43, 44 }; +// InsrHelper(&masm, z0.VnS(), values); // Sets z0.S = { ..., 42, 43, 44 } +// +// The rightmost (highest-indexed) array element maps to the lowest-numbered +// lane. +template <typename T, size_t N> +void InsrHelper(MacroAssembler* masm, + const ZRegister& zdn, + const T (&values)[N]) { + for (size_t i = 0; i < N; i++) { + masm->Insr(zdn, values[i]); + } +} + } // namespace aarch64 } // namespace vixl diff --git a/test/test-donkey.cc b/test/test-donkey.cc new file mode 100644 index 00000000..250fa5d3 --- /dev/null +++ b/test/test-donkey.cc @@ -0,0 +1,327 @@ +// Copyright 2020, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <regex> +#include <set> + +#include "aarch64/test-utils-aarch64.h" + +using namespace vixl; +using namespace vixl::aarch64; + +#define __ masm-> + +class InstructionReporter : public DecoderVisitor { + public: + InstructionReporter() : DecoderVisitor(kNonConstVisitor) {} + + void Visit(Metadata *metadata, const Instruction *instr) VIXL_OVERRIDE { + USE(instr); + instr_form_ = (*metadata)["form"]; + } + + std::string MoveForm() { return std::move(instr_form_); } + + private: + std::string instr_form_; +}; + +Instr Mutate(Instr base) { + Instr result = base; + while ((result == base) || (result == 0)) { + // Flip two bits somewhere in the most-significant 27. + for (int i = 0; i < 2; i++) { + uint32_t pos = 5 + ((lrand48() >> 20) % 27); + result = result ^ (1 << pos); + } + + // Always flip one of the low five bits, as that's where the destination + // register is often encoded. + uint32_t dst_pos = (lrand48() >> 20) % 5; + result = result ^ (1 << dst_pos); + } + return result; +} + +#ifndef VIXL_INCLUDE_SIMULATOR_AARCH64 +int main(void) { + printf("Test donkey requires a simulator build to be useful.\n"); + return 0; +} +#else +int main(int argc, char **argv) { + if ((argc < 3) || (argc > 5)) { + printf( + "Usage: test-donkey <instruction form regex> <number of instructions " + "to emit in test> <encoding generation manner> <input data type>\n" + " regex - ECMAScript (C++11) regular expression to match instruction " + "form\n" + " encoding=random - use rng only to select new instructions\n" + " (can take longer, but gives better coverage for disparate " + "encodings)\n" + " encoding=`initial hex` - hex encoding of first instruction in test, " + "eg. 1234abcd\n" + " input data type - used to specify the data type of generating " + "input, e.g. input=fp, default set to integer type\n" + " command examples :\n" + " ./test-donkey \"fml[as]l[bt]\" 50 encoding=random input=fp\n" + " ./test-donkey \"fml[as]l[bt]\" 30 input=int\n"); + exit(1); + } + + // Use LC-RNG only to select instructions. + bool random_only = false; + + std::string target_re = argv[1]; + uint32_t count = static_cast<uint32_t>(strtoul(argv[2], NULL, 10)); + uint32_t cmdline_encoding = 0; + InputSet input_set = kIntInputSet; + if (argc > 3) { + // The arguments of instruction pattern and the number of generating + // instructions are processed. + int32_t i = 3; + std::string argv_s(argv[i]); + if (argv_s.find("encoding=") != std::string::npos) { + char *c = argv[i]; + c += 9; + if (strcmp(c, "random") == 0) { + random_only = true; + } else { + cmdline_encoding = static_cast<uint32_t>(strtoul(c, NULL, 16)); + } + i++; + } + + if ((argc > 4) || (i == 3)) { + argv_s = std::string(argv[i]); + if (argv_s.find("input=") != std::string::npos) { + char *c = argv[i]; + c += 6; + if (strcmp(c, "fp") == 0) { + input_set = kFpInputSet; + } else { + VIXL_ASSERT(strcmp(c, "int") == 0); + } + i++; + } + } + + // Ensure all arguments have been processed. + VIXL_ASSERT(argc == i); + } + + srand48(42); + + MacroAssembler masm; + masm.GetCPUFeatures()->Combine(CPUFeatures::kSVE); + + std::map<int, Simulator *> sim_vl; + for (int i = 128; i <= 2048; i += 128) { + sim_vl[i] = new Simulator(new Decoder()); + sim_vl[i]->SetVectorLengthInBits(i); + } + + char buffer[256]; + Decoder trial_decoder; + Disassembler disasm(buffer, sizeof(buffer)); + InstructionReporter reporter; + trial_decoder.AppendVisitor(&reporter); + trial_decoder.AppendVisitor(&disasm); + + using InstrData = struct { + Instr inst; + std::string disasm; + uint32_t state_hash; + }; + std::vector<InstrData> useful_insts; + + // Seen states are only considered for vl128. It's assumed that a new state + // for vl128 implies a new state for all other vls. + std::set<uint32_t> seen_states; + uint32_t state_hash; + + std::map<int, uint32_t> initial_state_vl; + std::map<int, uint32_t> state_hash_vl; + + // Compute hash of the initial state of the machine. + Label test; + masm.Bind(&test); + masm.PushCalleeSavedRegisters(); + SetInitialMachineState(&masm, input_set); + ComputeMachineStateHash(&masm, &state_hash); + masm.PopCalleeSavedRegisters(); + masm.Ret(); + masm.FinalizeCode(); + masm.GetBuffer()->SetExecutable(); + + for (std::pair<int, Simulator *> s : sim_vl) { + s.second->RunFrom(masm.GetLabelAddress<Instruction *>(&test)); + initial_state_vl[s.first] = state_hash; + if (s.first == 128) seen_states.insert(state_hash); + } + + masm.GetBuffer()->SetWritable(); + masm.Reset(); + + // Count number of failed instructions, in order to allow changing instruction + // candidate strategy. + int miss_count = 0; + + while (useful_insts.size() < count) { + miss_count++; + + Instr inst; + if (cmdline_encoding != 0) { + // Initial instruction encoding supplied on the command line. + inst = cmdline_encoding; + cmdline_encoding = 0; + } else if (useful_insts.empty() || random_only || (miss_count > 10000)) { + // LCG-random instruction. + inst = static_cast<Instr>(mrand48()); + } else { + // Instruction based on mutation of last successful instruction. + inst = Mutate(useful_insts.back().inst); + } + + trial_decoder.Decode(reinterpret_cast<Instruction *>(&inst)); + if (std::regex_search(reporter.MoveForm(), std::regex(target_re))) { + // Disallow "unimplemented" instructions. + std::string buffer_s(buffer); + if (buffer_s.find("unimplemented") != std::string::npos) continue; + + // Disallow instructions with "sp" in their arguments, as we don't support + // instructions operating on memory, and the OS expects sp to be valid for + // signal handlers, etc. + size_t space = buffer_s.find(' '); + if ((space != std::string::npos) && + (buffer_s.substr(space).find("sp") != std::string::npos)) + continue; + + fprintf(stderr, "Trying 0x%08x (%s)\n", inst, buffer); + + // TODO: factorise this code into a CalculateState helper function. + + // Initialise the machine to a known state. + masm.PushCalleeSavedRegisters(); + SetInitialMachineState(&masm, input_set); + + { + ExactAssemblyScope scope(&masm, + (useful_insts.size() + 1) * kInstructionSize); + + // Emit any instructions already found to move the state to somewhere + // new. + for (const InstrData &i : useful_insts) { + masm.dci(i.inst); + } + + // Try a new instruction. + masm.dci(inst); + } + + // Compute the new state of the machine. + ComputeMachineStateHash(&masm, &state_hash); + masm.PopCalleeSavedRegisters(); + masm.Ret(); + masm.FinalizeCode(); + masm.GetBuffer()->SetExecutable(); + + // Try the new instruction for VL128. + sim_vl[128]->RunFrom(masm.GetLabelAddress<Instruction *>(&test)); + state_hash_vl[128] = state_hash; + + if (seen_states.count(state_hash_vl[128]) == 0) { + // A new state! Run for all VLs, record it, add the instruction to the + // list of useful ones. + + for (std::pair<int, Simulator *> s : sim_vl) { + if (s.first == 128) continue; + s.second->RunFrom(masm.GetLabelAddress<Instruction *>(&test)); + state_hash_vl[s.first] = state_hash; + } + + seen_states.insert(state_hash_vl[128]); + useful_insts.push_back({inst, buffer, state_hash_vl[128]}); + miss_count = 0; + } else { + // Machine already reached here. Probably not an interesting + // instruction. NB. it's possible for an instruction to reach the same + // machine state as two or more others, but for these purposes, let's + // call that not useful. + fprintf(stderr, + "Already reached state 0x%08x, skipping 0x%08x, miss_count " + "%d\n", + state_hash_vl[128], + inst, + miss_count); + } + + // Restart generation. + masm.GetBuffer()->SetWritable(); + masm.Reset(); + } + } + + // Emit test case based on identified instructions and associated hashes. + printf("TEST_SVE(sve2_%s) {\n", target_re.c_str()); + printf( + " SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2, " + "CPUFeatures::kNEON, " + "CPUFeatures::kCRC32);\n"); + printf(" START();\n\n"); + printf((input_set == kFpInputSet) + ? " SetInitialMachineState(&masm, kFpInputSet);\n" + : " SetInitialMachineState(&masm);\n"); + printf(" // state = 0x%08x\n\n", initial_state_vl[128]); + + printf(" {\n"); + printf(" ExactAssemblyScope scope(&masm, %lu * kInstructionSize);\n", + useful_insts.size()); + for (InstrData &i : useful_insts) { + printf(" __ dci(0x%08x); // %s\n", i.inst, i.disasm.c_str()); + printf(" // vl128 state = 0x%08x\n", i.state_hash); + } + printf(" }\n\n"); + printf(" uint32_t state;\n"); + printf(" ComputeMachineStateHash(&masm, &state);\n"); + printf(" __ Mov(x0, reinterpret_cast<uint64_t>(&state));\n"); + printf(" __ Ldr(w0, MemOperand(x0));\n\n"); + printf(" END();\n"); + printf(" if (CAN_RUN()) {\n"); + printf(" RUN();\n"); + printf(" uint32_t expected_hashes[] = {\n"); + for (std::pair<int, uint32_t> h : state_hash_vl) { + printf(" 0x%08x,\n", h.second); + } + printf(" };\n"); + printf( + " ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - " + "1], x0);\n"); + printf(" }\n}\n"); + + return 0; +} +#endif diff --git a/test/test-pool-manager.cc b/test/test-pool-manager.cc index 0073d4f6..df2f32b9 100644 --- a/test/test-pool-manager.cc +++ b/test/test-pool-manager.cc @@ -421,9 +421,9 @@ TEST(FuzzObjectDeletedWhenPlaced) { // Remove bound objects. for (std::vector<TestObject *>::iterator iter = objects.begin(); iter != objects.end();) { - TestObject *object = *iter; - if (object->IsBound()) { - delete object; + TestObject *obj = *iter; + if (obj->IsBound()) { + delete obj; iter = objects.erase(iter); } else { ++iter; @@ -494,7 +494,7 @@ TEST(FuzzObjectUpdatedWhenPlaced) { // Pick another random label to bind. const int kProbabilityToBind = 20; if ((Random() % 100) < kProbabilityToBind) { - TestBranchObject *object = objects[RandomObjectID(objects.size())]; + TestBranchObject *object2 = objects[RandomObjectID(objects.size())]; // Binding can cause the pool emission, so check if we need to emit // the pools. The actual backends will know the max alignment we // might need here, so can simplify the check (won't need to check @@ -503,15 +503,15 @@ TEST(FuzzObjectUpdatedWhenPlaced) { if (pool_manager.MustEmit(pc, max_padding)) { pc = pool_manager.Emit(&masm, pc, max_padding); } - pc = pool_manager.Bind(&masm, object, pc); + pc = pool_manager.Bind(&masm, object2, pc); } // Remove bound objects. for (std::vector<TestBranchObject *>::iterator iter = objects.begin(); iter != objects.end();) { - TestBranchObject *object = *iter; - if (object->IsBound()) { - delete object; + TestBranchObject *obj = *iter; + if (obj->IsBound()) { + delete obj; iter = objects.erase(iter); } else { ++iter; @@ -818,9 +818,9 @@ TEST(MustEmitNewReferenceDueToSizeOfObject) { { // If the object is smaller, we can emit the reference. TestObject smaller_object(kBigObjectSize - 4, 1); - ForwardReference<int32_t> temp_ref(pc, kBranchSize, pc, pc + kPoolSize); + ForwardReference<int32_t> temp_ref2(pc, kBranchSize, pc, pc + kPoolSize); VIXL_ASSERT( - !pool_manager.MustEmit(pc, kBranchSize, &temp_ref, &smaller_object)); + !pool_manager.MustEmit(pc, kBranchSize, &temp_ref2, &smaller_object)); // If the reference is going to be added after the current objects in the // pool, we can still emit it. diff --git a/test/test-runner.h b/test/test-runner.h index ffc8c2ad..bb72ce67 100644 --- a/test/test-runner.h +++ b/test/test-runner.h @@ -32,9 +32,12 @@ namespace vixl { -// Each actual test is represented by a Test instance. +// Each test is represented by a Test instance. // Tests are appended to a static linked list upon creation. class Test { + typedef void(TestFunction)(); + typedef void(TestFunctionWithConfig)(Test* config); + public: // Most tests require no per-test configuration, and so take no arguments. A // few tests require dynamic configuration, and are passed a `Test` object. @@ -52,6 +55,17 @@ class Test { last_ = this; } + static Test* MakeSVETest(int vl, + const char* name, + TestFunctionWithConfig* fn) { + // We never free this memory, but we need it to live for as long as the + // static + // linked list of tests, and this is the easiest way to do it. + Test* test = new Test(name, fn); + test->set_sve_vl_in_bits(vl); + return test; + } + const char* name() { return name_; } void run(); @@ -98,9 +112,6 @@ class Test { generate_test_trace_ = value; } - typedef void(TestFunction)(); - typedef void(TestFunctionWithConfig)(Test* config); - private: const char* name_; diff --git a/test/test-trace-reference/log-all b/test/test-trace-reference/log-all index 7f4dc150..9e9904ab 100644 --- a/test/test-trace-reference/log-all +++ b/test/test-trace-reference/log-all @@ -1420,9 +1420,9 @@ 0x~~~~~~~~~~~~~~~~ 9e42d90f scvtf d15, x8, #10 # d15: 0x0000000000000000 0x~~~~~~~~~~~~~~~~ 5e21d887 scvtf s7, s4 -# v7: 0x00000000000000007ff000004e81442e +# v7: 0x0000000000000000000000004e81442e 0x~~~~~~~~~~~~~~~~ 5f32e5e8 scvtf s8, s15, #14 -# v8: 0x0000000000000000c004000000000000 +# v8: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 1e22015d scvtf s29, w10 # s29: 0x00000000 0x~~~~~~~~~~~~~~~~ 1e02d6af scvtf s15, w21, #11 @@ -1444,9 +1444,9 @@ 0x~~~~~~~~~~~~~~~~ 9e4377db ucvtf d27, x30, #35 # d27: 0x0000000000000000 0x~~~~~~~~~~~~~~~~ 7e21d8ab ucvtf s11, s5 -# v11: 0x0000000000000000400000004f7fe000 +# v11: 0x0000000000000000000000004f7fe000 0x~~~~~~~~~~~~~~~~ 7f32e6e0 ucvtf s0, s23, #14 -# v0: 0x000000000000000043d21c00480a8294 +# v0: 0x000000000000000000000000480a8294 0x~~~~~~~~~~~~~~~~ 1e230274 ucvtf s20, w19 # s20: 0x00000000 0x~~~~~~~~~~~~~~~~ 1e03bad5 ucvtf s21, w22, #18 @@ -1456,9 +1456,9 @@ 0x~~~~~~~~~~~~~~~~ 9e03ac47 ucvtf s7, x2, #21 # s7: 0x38ff0000 0x~~~~~~~~~~~~~~~~ 5ee0b813 abs d19, d0 -# v19: 0x000000000000000043d21c00480a8294 +# v19: 0x000000000000000000000000480a8294 0x~~~~~~~~~~~~~~~~ 4e20b970 abs v16.16b, v11.16b -# v16: 0x0000000000000000400000004f7f2000 +# v16: 0x0000000000000000000000004f7f2000 0x~~~~~~~~~~~~~~~~ 4ee0bbe0 abs v0.2d, v31.2d # v0: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 0ea0bb3b abs v27.2s, v25.2s @@ -1478,7 +1478,7 @@ 0x~~~~~~~~~~~~~~~~ 4eee87ea add v10.2d, v31.2d, v14.2d # v10: 0x495000018a83940168a6954c14cfd693 0x~~~~~~~~~~~~~~~~ 0eb385cf add v15.2s, v14.2s, v19.2s -# v15: 0x00000000000000006328b14b89d7c527 +# v15: 0x00000000000000001f56954b89d7c527 0x~~~~~~~~~~~~~~~~ 0e7186fb add v27.4h, v23.4h, v17.4h # v27: 0x0000000000000000495000000a029400 0x~~~~~~~~~~~~~~~~ 4ebd8799 add v25.4s, v28.4s, v29.4s @@ -1488,7 +1488,7 @@ 0x~~~~~~~~~~~~~~~~ 4e618444 add v4.8h, v2.8h, v1.8h # v4: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 0eaf41ca addhn v10.2s, v14.2d, v15.2d -# v10: 0x000000000000000000000000827f4696 +# v10: 0x0000000000000000000000003ead2a96 0x~~~~~~~~~~~~~~~~ 0e7a43ca addhn v10.4h, v30.4s, v26.4s # v10: 0x00000000000000000000000000003c7f 0x~~~~~~~~~~~~~~~~ 0e36419f addhn v31.8b, v12.8h, v22.8h @@ -1500,15 +1500,15 @@ 0x~~~~~~~~~~~~~~~~ 4e7140ff addhn2 v31.8h, v7.4s, v17.4s # v31: 0x000000000000b87f000000000000ffff 0x~~~~~~~~~~~~~~~~ 5ef1ba6e addp d14, v19.2d -# v14: 0x000000000000000043d21c00480a8294 +# v14: 0x000000000000000000000000480a8294 0x~~~~~~~~~~~~~~~~ 4e3cbd03 addp v3.16b, v8.16b, v28.16b -# v3: 0x0000000020febf0000000000c4000000 +# v3: 0x0000000020febf000000000000000000 0x~~~~~~~~~~~~~~~~ 4ef1bca8 addp v8.2d, v5.2d, v17.2d # v8: 0x000000007f8000003effffffffe00000 0x~~~~~~~~~~~~~~~~ 0ebabfd6 addp v22.2s, v30.2s, v26.2s # v22: 0x00000000000000003c7fffff00000000 0x~~~~~~~~~~~~~~~~ 0e6ebf1d addp v29.4h, v24.4h, v14.4h -# v29: 0x00000000000000005fd2ca9e00000000 +# v29: 0x00000000000000000000ca9e00000000 0x~~~~~~~~~~~~~~~~ 4eb8bf5e addp v30.4s, v26.4s, v24.4s # v30: 0x0000000000000000000000003c7fffff 0x~~~~~~~~~~~~~~~~ 0e27bf4c addp v12.8b, v26.8b, v7.8b @@ -1522,7 +1522,7 @@ 0x~~~~~~~~~~~~~~~~ 0e71bbdb addv h27, v30.4h # v27: 0x00000000000000000000000000003c7e 0x~~~~~~~~~~~~~~~~ 4e71b9d3 addv h19, v14.8h -# v19: 0x00000000000000000000000000002a70 +# v19: 0x0000000000000000000000000000ca9e 0x~~~~~~~~~~~~~~~~ 4eb1bb6e addv s14, v27.4s # v14: 0x00000000000000000000000000003c7e 0x~~~~~~~~~~~~~~~~ 4e3b1d0a and v10.16b, v8.16b, v27.16b @@ -1530,13 +1530,13 @@ 0x~~~~~~~~~~~~~~~~ 0e301c25 and v5.8b, v1.8b, v16.8b # v5: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 4e781c7a bic v26.16b, v3.16b, v24.16b -# v26: 0x0000000020febf0000000000c4000000 +# v26: 0x0000000020febf000000000000000000 0x~~~~~~~~~~~~~~~~ 2f075487 bic v7.2s, #0xe4, lsl #16 # v7: 0x000000000000000000000000381b0000 0x~~~~~~~~~~~~~~~~ 2f01b47c bic v28.4h, #0x23, lsl #8 # v28: 0x000000000000000040dfdcffdcc00000 0x~~~~~~~~~~~~~~~~ 6f05159d bic v29.4s, #0xac, lsl #0 -# v29: 0x00000000000000005fd2ca1200000000 +# v29: 0x00000000000000000000ca1200000000 0x~~~~~~~~~~~~~~~~ 0e751fec bic v12.8b, v31.8b, v21.8b # v12: 0x0000000000000000000000000000ffff 0x~~~~~~~~~~~~~~~~ 6f049712 bic v18.8h, #0x98, lsl #0 @@ -1546,13 +1546,13 @@ 0x~~~~~~~~~~~~~~~~ 2efb1ee2 bif v2.8b, v23.8b, v27.8b # v2: 0x0000000000000000495000008a828000 0x~~~~~~~~~~~~~~~~ 6ead1c68 bit v8.16b, v3.16b, v13.16b -# v8: 0x000000007f8000003effffffcc000000 +# v8: 0x000000007f8000003effffffc8000000 0x~~~~~~~~~~~~~~~~ 2eb71ca5 bit v5.8b, v5.8b, v23.8b # v5: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 6e771fe9 bsl v9.16b, v31.16b, v23.16b # v9: 0x0100000008009801010000000800dc00 0x~~~~~~~~~~~~~~~~ 2e631cee bsl v14.8b, v7.8b, v3.8b -# v14: 0x000000000000000000000000c4000000 +# v14: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 4e2048bd cls v29.16b, v5.16b # v29: 0x07070707070707070707070707070707 0x~~~~~~~~~~~~~~~~ 0ea04815 cls v21.2s, v0.2s @@ -1564,7 +1564,7 @@ 0x~~~~~~~~~~~~~~~~ 0e204893 cls v19.8b, v4.8b # v19: 0x00000000000000000707070707070707 0x~~~~~~~~~~~~~~~~ 4e6049cf cls v15.8h, v14.8h -# v15: 0x000f000f000f000f000f000f0001000f +# v15: 0x000f000f000f000f000f000f000f000f 0x~~~~~~~~~~~~~~~~ 6e204881 clz v1.16b, v4.16b # v1: 0x08080808080808080808080808080808 0x~~~~~~~~~~~~~~~~ 2ea04a3b clz v27.2s, v17.2s @@ -1572,17 +1572,17 @@ 0x~~~~~~~~~~~~~~~~ 2e604929 clz v9.4h, v9.4h # v9: 0x00000000000000000007001000040000 0x~~~~~~~~~~~~~~~~ 6ea049ff clz v31.4s, v15.4s -# v31: 0x0000000c0000000c0000000c0000000f +# v31: 0x0000000c0000000c0000000c0000000c 0x~~~~~~~~~~~~~~~~ 2e204a6e clz v14.8b, v19.8b # v14: 0x00000000000000000505050505050505 0x~~~~~~~~~~~~~~~~ 6e604966 clz v6.8h, v11.8h -# v6: 0x00100010001000100001001000010000 +# v6: 0x00100010001000100010001000010000 0x~~~~~~~~~~~~~~~~ 7efd8cb2 cmeq d18, d5, d29 # v18: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 5ee09bee cmeq d14, d31, #0 # v14: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 6e368c73 cmeq v19.16b, v3.16b, v22.16b -# v19: 0xffffffff000000ff0000000000ffffff +# v19: 0xffffffff000000ff00000000ffffffff 0x~~~~~~~~~~~~~~~~ 4e20992f cmeq v15.16b, v9.16b, #0 # v15: 0xffffffffffffffffff00ff00ff00ffff 0x~~~~~~~~~~~~~~~~ 6eea8e0c cmeq v12.2d, v16.2d, v10.2d @@ -1622,15 +1622,15 @@ 0x~~~~~~~~~~~~~~~~ 6ee08ae6 cmge v6.2d, v23.2d, #0 # v6: 0xffffffffffffffffffffffffffffffff 0x~~~~~~~~~~~~~~~~ 0ea33ed9 cmge v25.2s, v22.2s, v3.2s -# v25: 0x000000000000000000000000ffffffff +# v25: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 2ea08975 cmge v21.2s, v11.2s, #0 # v21: 0x0000000000000000ffffffffffffffff 0x~~~~~~~~~~~~~~~~ 0e6c3c70 cmge v16.4h, v3.4h, v12.4h -# v16: 0x0000000000000000ffffffff0000ffff +# v16: 0x0000000000000000ffffffffffffffff 0x~~~~~~~~~~~~~~~~ 2e608937 cmge v23.4h, v9.4h, #0 # v23: 0x0000000000000000ffffffffffffffff 0x~~~~~~~~~~~~~~~~ 4eab3c47 cmge v7.4s, v2.4s, v11.4s -# v7: 0xffffffffffffffff0000000000000000 +# v7: 0xffffffffffffffffffffffff00000000 0x~~~~~~~~~~~~~~~~ 6ea08ac0 cmge v0.4s, v22.4s, #0 # v0: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 0e293fca cmge v10.8b, v30.8b, v9.8b @@ -1662,11 +1662,11 @@ 0x~~~~~~~~~~~~~~~~ 0e608876 cmgt v22.4h, v3.4h, #0 # v22: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 4ebb3565 cmgt v5.4s, v11.4s, v27.4s -# v5: 0x0000000000000000ffffffffffffffff +# v5: 0x000000000000000000000000ffffffff 0x~~~~~~~~~~~~~~~~ 4ea08a8d cmgt v13.4s, v20.4s, #0 # v13: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 0e2737fb cmgt v27.8b, v31.8b, v7.8b -# v27: 0x0000000000000000000000ff000000ff +# v27: 0x0000000000000000ffffffff000000ff 0x~~~~~~~~~~~~~~~~ 0e208805 cmgt v5.8b, v0.8b, #0 # v5: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 4e6d3796 cmgt v22.8h, v28.8h, v13.8h @@ -1688,7 +1688,7 @@ 0x~~~~~~~~~~~~~~~~ 2e3c3707 cmhi v7.8b, v24.8b, v28.8b # v7: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 6e79354b cmhi v11.8h, v10.8h, v25.8h -# v11: 0x0000000000000000ffffffff00000000 +# v11: 0x0000000000000000ffffffffffff0000 0x~~~~~~~~~~~~~~~~ 7ef13d81 cmhs d1, d12, d17 # v1: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 6e3e3f35 cmhs v21.16b, v25.16b, v30.16b @@ -1730,11 +1730,11 @@ 0x~~~~~~~~~~~~~~~~ 0ea0ab99 cmlt v25.2s, v28.2s, #0 # v25: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 0e60a960 cmlt v0.4h, v11.4h, #0 -# v0: 0x0000000000000000ffffffff00000000 +# v0: 0x0000000000000000ffffffffffff0000 0x~~~~~~~~~~~~~~~~ 4ea0a8b8 cmlt v24.4s, v5.4s, #0 # v24: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 0e20a97a cmlt v26.8b, v11.8b, #0 -# v26: 0x0000000000000000ffffffff00000000 +# v26: 0x0000000000000000ffffffffffff0000 0x~~~~~~~~~~~~~~~~ 4e60aaa1 cmlt v1.8h, v21.8h, #0 # v1: 0x0000000000000000ffffffffffffffff 0x~~~~~~~~~~~~~~~~ 5efe8efc cmtst d28, d23, d30 @@ -9708,7 +9708,7 @@ 0x~~~~~~~~~~~~~~~~ 0e61682b fcvtn v11.2s, v1.2d # v11: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 0e216848 fcvtn v8.4h, v2.4s -# v8: 0x37a00000000000000000000000000000 +# v8: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 4e616bb8 fcvtn2 v24.4s, v29.2d # v24: 0x0000000000000000377f0000377f0000 0x~~~~~~~~~~~~~~~~ 4e216944 fcvtn2 v4.8h, v10.4s @@ -9766,41 +9766,41 @@ 0x~~~~~~~~~~~~~~~~ 6f2efed3 fcvtzu v19.4s, v22.4s, #18 # v19: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 6e6ffd0f fdiv v15.2d, v8.2d, v15.2d -# v15: 0x7ff00000000000007ff8000000000000 +# v15: 0x7ff80000000000007ff8000000000000 0x~~~~~~~~~~~~~~~~ 2e3afd2c fdiv v12.2s, v9.2s, v26.2s # v12: 0x00000000000000007fc000007fc00000 0x~~~~~~~~~~~~~~~~ 6e33fed3 fdiv v19.4s, v22.4s, v19.4s # v19: 0xffffffffffffffffffffffffffffffff 0x~~~~~~~~~~~~~~~~ 4e68f4f3 fmax v19.2d, v7.2d, v8.2d -# v19: 0x37a00000000000000000000000000000 +# v19: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 0e3df599 fmax v25.2s, v12.2s, v29.2s # v25: 0x00000000000000007fc000007fc00000 0x~~~~~~~~~~~~~~~~ 4e25f5e6 fmax v6.4s, v15.4s, v5.4s -# v6: 0x7ff00000000000007ff8000000000000 +# v6: 0x7ff80000000000007ff8000000000000 0x~~~~~~~~~~~~~~~~ 4e74c510 fmaxnm v16.2d, v8.2d, v20.2d -# v16: 0x37a0000000000000000000000180fe00 +# v16: 0x0000000000000000000000000180fe00 0x~~~~~~~~~~~~~~~~ 0e39c74f fmaxnm v15.2s, v26.2s, v25.2s # v15: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 4e30c5d7 fmaxnm v23.4s, v14.4s, v16.4s -# v23: 0x37a0000000000000000000000180fe00 +# v23: 0x0000000000000000000000000180fe00 0x~~~~~~~~~~~~~~~~ 7e70ca66 fmaxnmp d6, v19.2d -# v6: 0x000000000000000037a0000000000000 +# v6: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 7e30cb5b fmaxnmp s27, v26.2s # v27: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 6e77c588 fmaxnmp v8.2d, v12.2d, v23.2d -# v8: 0x37a00000000000007fc000007fc00000 +# v8: 0x000000000180fe007fc000007fc00000 0x~~~~~~~~~~~~~~~~ 2e36c72d fmaxnmp v13.2s, v25.2s, v22.2s # v13: 0x0000000000000000ffffffff7fc00000 0x~~~~~~~~~~~~~~~~ 6e31c56f fmaxnmp v15.4s, v11.4s, v17.4s # v15: 0xffffffff000000000000000000000000 0x~~~~~~~~~~~~~~~~ 6e30ca7b fmaxnmv s27, v19.4s -# v27: 0x00000000000000000000000037a00000 +# v27: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 7e70f9d4 fmaxp d20, v14.2d # v20: 0x0000000000000000ffffffff00000000 0x~~~~~~~~~~~~~~~~ 7e30f852 fmaxp s18, v2.2s # v18: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 6e7ff6e9 fmaxp v9.2d, v23.2d, v31.2d -# v9: 0x00000000ffffffff37a0000000000000 +# v9: 0x00000000ffffffff000000000180fe00 0x~~~~~~~~~~~~~~~~ 2e3ff6c7 fmaxp v7.2s, v22.2s, v31.2s # v7: 0x0000000000000000ffffffffffffffff 0x~~~~~~~~~~~~~~~~ 6e3df4f2 fmaxp v18.4s, v7.4s, v29.4s @@ -9843,7 +9843,7 @@ # v0: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 6eb0f919 fminv s25, v8.4s # v25: 0x0000000000000000000000007fc00000 -0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[2] +0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[1] # v23: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 5f8711f7 fmla s23, s15, v7.s[0] # v23: 0x00000000000000000000000000000000 @@ -9858,9 +9858,9 @@ 0x~~~~~~~~~~~~~~~~ 4e2bcd70 fmla v16.4s, v11.4s, v11.4s # v16: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 4f891afb fmla v27.4s, v23.4s, v9.s[2] -# v27: 0x00000000000000000000000037a00000 +# v27: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 5fc653db fmls d27, d30, v6.d[0] -# v27: 0x00000000000000000000000037a00000 +# v27: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 5f825215 fmls s21, s16, v2.s[0] # v21: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 4ef5ce65 fmls v5.2d, v19.2d, v21.2d @@ -9872,7 +9872,7 @@ 0x~~~~~~~~~~~~~~~~ 0fab5243 fmls v3.2s, v18.2s, v11.s[1] # v3: 0x0000000000000000000000007fffffff 0x~~~~~~~~~~~~~~~~ 4ebeccbb fmls v27.4s, v5.4s, v30.4s -# v27: 0x00000000000000000000000037a00000 +# v27: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 4fa45a9a fmls v26.4s, v20.4s, v4.s[3] # v26: 0x00000000000000007fffffff00000000 0x~~~~~~~~~~~~~~~~ 6f06f6ce fmov v14.2d, #0xd6 (-0.3438) @@ -9885,13 +9885,13 @@ # v28: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 9eae0052 fmov x18, v2.D[1] # x18: 0x0000000000000000 -0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[2] +0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[1] # v12: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 5faf983e fmul s30, s1, v15.s[3] # v30: 0x000000000000000000000000ffffffff 0x~~~~~~~~~~~~~~~~ 6e75dc19 fmul v25.2d, v0.2d, v21.2d # v25: 0x00000000000000000000000000000000 -0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[2] +0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[1] # v10: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 2e30df07 fmul v7.2s, v24.2s, v16.2s # v7: 0x00000000000000000000000000000000 @@ -9901,7 +9901,7 @@ # v5: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 4f88906b fmul v11.4s, v3.4s, v8.s[0] # v11: 0x7fc000007fc000007fc000007fffffff -0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[2] +0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[1] # v28: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 7faf92b9 fmulx s25, s21, v15.s[1] # v25: 0x00000000000000000000000000000000 @@ -9928,7 +9928,7 @@ 0x~~~~~~~~~~~~~~~~ 0ea1daca frecpe v10.2s, v22.2s # v10: 0x00000000000000007f8000007f800000 0x~~~~~~~~~~~~~~~~ 4ea1d8c5 frecpe v5.4s, v6.4s -# v5: 0x7f8000007f800000474c80007f800000 +# v5: 0x7f8000007f8000007f8000007f800000 0x~~~~~~~~~~~~~~~~ 4e7afcf6 frecps v22.2d, v7.2d, v26.2d # v22: 0x40000000000000004000000000000000 0x~~~~~~~~~~~~~~~~ 0e22ff7f frecps v31.2s, v27.2s, v2.2s @@ -10020,7 +10020,7 @@ 0x~~~~~~~~~~~~~~~~ 2e21d88b ucvtf v11.2s, v4.2s # v11: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 2f27e47d ucvtf v29.2s, v3.2s, #25 -# v29: 0x7fc000007fc000000000000000000000 +# v29: 0x00000000000000000000000000000000 0x~~~~~~~~~~~~~~~~ 6e21daf6 ucvtf v22.4s, v23.4s # v22: 0x4effe000000000004e001a4000000000 0x~~~~~~~~~~~~~~~~ 6f27e532 ucvtf v18.4s, v9.4s, #25 @@ -11173,10 +11173,10 @@ 0x~~~~~~~~~~~~~~~~ e551ec06 st3w {z6.s, z7.s, z8.s}, p3, [x0, #3, mul vl] # z6<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000) # z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000) -# z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000, ..., 0.000) +# z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000, ..., 0.000) # ║ ║ ╙─ 0x7fc00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ -# ║ ╙───────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ -# ╙───────────────────────── 0x37a00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ +# ║ ╙───────────────── 0x0180fe00'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ +# ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ # z6<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000) # z7<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000) # z8<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000) @@ -11198,10 +11198,10 @@ # ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ 0x~~~~~~~~~~~~~~~~ e5c27007 st3d {z7.d, z8.d, z9.d}, p4, [x0, x2, lsl #3] # z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000) -# z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000) +# z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000) # z9<127:0>: 0x43dfe000001fe0000000000000000000 (0.000, 0.000) # ║ ╙─ 0x0000000000000000'7fc000007fc00000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~ -# ╙───────────────── 0x43dfe000001fe000'37a0000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +# ╙───────────────── 0x43dfe000001fe000'000000000180fe00'0000000000000000 -> 0x~~~~~~~~~~~~~~~~ # z7<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) # z8<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) # z9<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) @@ -11223,9 +11223,9 @@ # ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~ # ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~ 0x~~~~~~~~~~~~~~~~ a441f418 ld3b {z24.b, z25.b, z26.b}, p5/z, [x0, #3, mul vl] -# z24<127:0>: 0x0000000000000000000000ff00000000 -# z25<127:0>: 0xa000000000000000000000ffc0000000 -# z26<127:0>: 0x3700000000000000000000007f000000 +# z24<127:0>: 0x00000000fe000000000000ff00000000 +# z25<127:0>: 0x0000000080000000000000ffc0000000 +# z26<127:0>: 0x0000000001000000000000007f000000 # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─ 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙───── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ @@ -11236,10 +11236,10 @@ # ║ ║ ║ ║ ║ ║ ╙───────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ║ ║ ║ ╙─────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ║ ║ ╙───────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ -# ║ ║ ║ ╙─────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ +# ║ ║ ║ ╙─────────────────────── 0x01'80'fe <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ╙───────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # ║ ╙───────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ -# ╙─────────────────────────────── 0x37'a0'00 <- 0x~~~~~~~~~~~~~~~~ +# ╙─────────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # z24<255:128>: 0x00000000000000000000000000000000 # z25<255:128>: 0x00000000000000000000000000000000 # z26<255:128>: 0x00000000000000000000000000000000 @@ -11366,11 +11366,11 @@ 0x~~~~~~~~~~~~~~~~ a541f81a ld3w {z26.s, z27.s, z28.s}, p6/z, [x0, #3, mul vl] # z26<127:0>: 0x00000000000000000000ffff00000000 (0.000, 0.000, 9.183e-41, 0.000) # z27<127:0>: 0x00000000000000007fff000000000000 (0.000, 0.000, 9.183e-41, 0.000) -# z28<127:0>: 0x37a0000000000000000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000) +# z28<127:0>: 0x000000000180fe00000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000) # ║ ║ ║ ╙─ 0x7fc00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ╙───────── 0x00000000'7fff0000'0000ffff <- 0x~~~~~~~~~~~~~~~~ -# ║ ╙───────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ -# ╙───────────────────────── 0x37a00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ +# ║ ╙───────────────── 0x0180fe00'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ +# ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ # z26<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000) # z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000) # z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000) @@ -11401,10 +11401,10 @@ # ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ 0x~~~~~~~~~~~~~~~~ a5c1f41b ld3d {z27.d, z28.d, z29.d}, p5/z, [x0, #3, mul vl] # z27<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000) -# z28<127:0>: 0x00000000000000000000ffff7fc00000 (0.000, 0.000) -# z29<127:0>: 0x37a0000000000000000000007fff0000 (0.000, 0.000) +# z28<127:0>: 0x000000000180fe000000ffff7fc00000 (0.000, 0.000) +# z29<127:0>: 0x0000000000000000000000007fff0000 (0.000, 0.000) # ║ ╙─ 0x000000007fff0000'0000ffff7fc00000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ -# ╙───────────────── 0x37a0000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# ╙───────────────── 0x0000000000000000'000000000180fe00'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ # z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) # z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) # z29<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) @@ -11856,5 +11856,685 @@ # z31<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000) # ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ # ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ d10043ff sub sp, sp, #0x10 (16) +# sp: 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b200f3e0 mov x0, #0x5555555555555555 +# x0: 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f90003e0 str x0, [sp] +# x0<63:0>: 0x5555555555555555 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 382003e0 ldaddb w0, w0, [sp] +# w0: 0x00000055 +# ╙─ 0x55 <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 38a003e1 ldaddab w0, w1, [sp] +# w1: 0x000000aa +# ╙─ 0xaa <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 386003e2 ldaddlb w0, w2, [sp] +# w2: 0x000000ff +# ╙─ 0xff <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 38e003e3 ldaddalb w0, w3, [sp] +# w3: 0x00000054 +# ╙─ 0x54 <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 782003e0 ldaddh w0, w0, [sp] +# w0: 0x000055a9 +# ╙─ 0x55a9 <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 78a003e1 ldaddah w0, w1, [sp] +# w1: 0x000055fe +# ╙─ 0x55fe <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 786003e2 ldaddlh w0, w2, [sp] +# w2: 0x0000aba7 +# ╙─ 0xaba7 <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 78e003e3 ldaddalh w0, w3, [sp] +# w3: 0x00000150 +# ╙─ 0x0150 <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b82003e0 ldadd w0, w0, [sp] +# w0: 0x555556f9 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b8a003e1 ldadda w0, w1, [sp] +# w1: 0x5555aca2 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b86003e2 ldaddl w0, w2, [sp] +# w2: 0xaaab039b <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b8e003e3 ldaddal w0, w3, [sp] +# w3: 0x00005a94 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f82003e0 ldadd x0, x0, [sp] +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f8a003e1 ldadda x0, x1, [sp] +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f86003e2 ldaddl x0, x2, [sp] +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f8e003e3 ldaddal x0, x3, [sp] +# x3: 0x0000000055566ba0 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 382003ff staddb w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x2d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x8d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 386003ff staddlb w0, [sp] +# wzr: 0x00000000 +# ╙─ 0xba <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x8d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 782003ff staddh w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x1d47 <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0xb18d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 786003ff staddlh w0, [sp] +# wzr: 0x00000000 +# ╙─ 0xced4 <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0xb18d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b82003ff stadd w0, [sp] +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x5555b18d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b86003ff staddl w0, [sp] +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x5555b18d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f82003ff stadd x0, [sp] +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f86003ff staddl x0, [sp] +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 382033e0 ldsetb w0, w0, [sp] +# w0: 0x00000095 +# ╙─ 0x95 <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 38a033e1 ldsetab w0, w1, [sp] +# w1: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 386033e2 ldsetlb w0, w2, [sp] +# w2: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 38e033e3 ldsetalb w0, w3, [sp] +# w3: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 782033e0 ldseth w0, w0, [sp] +# w0: 0x0000469d +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 78a033e1 ldsetah w0, w1, [sp] +# w1: 0x0000469d +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 786033e2 ldsetlh w0, w2, [sp] +# w2: 0x0000469d +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 78e033e3 ldsetalh w0, w3, [sp] +# w3: 0x0000469d +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b82033e0 ldset w0, w0, [sp] +# w0: 0x0003469d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b8a033e1 ldseta w0, w1, [sp] +# w1: 0x0003469d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b86033e2 ldsetl w0, w2, [sp] +# w2: 0x0003469d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b8e033e3 ldsetal w0, w3, [sp] +# w3: 0x0003469d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f82033e0 ldset x0, x0, [sp] +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f8a033e1 ldseta x0, x1, [sp] +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f86033e2 ldsetl x0, x2, [sp] +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f8e033e3 ldsetal x0, x3, [sp] +# x3: 0x000000000003469d <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 382033ff stsetb w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 386033ff stsetlb w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 782033ff stseth w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 786033ff stsetlh w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b82033ff stset w0, [sp] +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b86033ff stsetl w0, [sp] +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f82033ff stset x0, [sp] +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f86033ff stsetl x0, [sp] +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 382023e0 ldeorb w0, w0, [sp] +# w0: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 38a023e1 ldeorab w0, w1, [sp] +# w1: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 386023e2 ldeorlb w0, w2, [sp] +# w2: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 38e023e3 ldeoralb w0, w3, [sp] +# w3: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 782023e0 ldeorh w0, w0, [sp] +# w0: 0x0000469d +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 78a023e1 ldeorah w0, w1, [sp] +# w1: 0x00004600 +# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 786023e2 ldeorlh w0, w2, [sp] +# w2: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 78e023e3 ldeoralh w0, w3, [sp] +# w3: 0x00004600 +# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b82023e0 ldeor w0, w0, [sp] +# w0: 0x0003009d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b8a023e1 ldeora w0, w1, [sp] +# w1: 0x00034600 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b86023e2 ldeorl w0, w2, [sp] +# w2: 0x0000469d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b8e023e3 ldeoral w0, w3, [sp] +# w3: 0x00034600 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f82023e0 ldeor x0, x0, [sp] +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f8a023e1 ldeora x0, x1, [sp] +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f86023e2 ldeorl x0, x2, [sp] +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f8e023e3 ldeoral x0, x3, [sp] +# x3: 0x0000000000034600 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 382023ff steorb w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 386023ff steorlb w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 782023ff steorh w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 786023ff steorlh w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b82023ff steor w0, [sp] +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b86023ff steorl w0, [sp] +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f82023ff steor x0, [sp] +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f86023ff steorl x0, [sp] +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 382053e0 ldsminb w0, w0, [sp] +# w0: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 38a053e1 ldsminab w0, w1, [sp] +# w1: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 386053e2 ldsminlb w0, w2, [sp] +# w2: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 38e053e3 ldsminalb w0, w3, [sp] +# w3: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 782053e0 ldsminh w0, w0, [sp] +# w0: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 78a053e1 ldsminah w0, w1, [sp] +# w1: 0xffffff9d +# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 786053e2 ldsminlh w0, w2, [sp] +# w2: 0xffffff9d +# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 78e053e3 ldsminalh w0, w3, [sp] +# w3: 0xffffff9d +# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b82053e0 ldsmin w0, w0, [sp] +# w0: 0x0003ff9d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b8a053e1 ldsmina w0, w1, [sp] +# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b86053e2 ldsminl w0, w2, [sp] +# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b8e053e3 ldsminal w0, w3, [sp] +# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f82053e0 ldsmin x0, x0, [sp] +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f8a053e1 ldsmina x0, x1, [sp] +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f86053e2 ldsminl x0, x2, [sp] +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f8e053e3 ldsminal x0, x3, [sp] +# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 382053ff stsminb w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 386053ff stsminlb w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 782053ff stsminh w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 786053ff stsminlh w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b82053ff stsmin w0, [sp] +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b86053ff stsminl w0, [sp] +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f82053ff stsmin x0, [sp] +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f86053ff stsminl x0, [sp] +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 382043e0 ldsmaxb w0, w0, [sp] +# w0: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 38a043e1 ldsmaxab w0, w1, [sp] +# w1: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 386043e2 ldsmaxlb w0, w2, [sp] +# w2: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 38e043e3 ldsmaxalb w0, w3, [sp] +# w3: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 782043e0 ldsmaxh w0, w0, [sp] +# w0: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 78a043e1 ldsmaxah w0, w1, [sp] +# w1: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 786043e2 ldsmaxlh w0, w2, [sp] +# w2: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 78e043e3 ldsmaxalh w0, w3, [sp] +# w3: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b82043e0 ldsmax w0, w0, [sp] +# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b8a043e1 ldsmaxa w0, w1, [sp] +# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b86043e2 ldsmaxl w0, w2, [sp] +# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b8e043e3 ldsmaxal w0, w3, [sp] +# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f82043e0 ldsmax x0, x0, [sp] +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f8a043e1 ldsmaxa x0, x1, [sp] +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f86043e2 ldsmaxl x0, x2, [sp] +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f8e043e3 ldsmaxal x0, x3, [sp] +# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 382043ff stsmaxb w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 386043ff stsmaxlb w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 782043ff stsmaxh w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 786043ff stsmaxlh w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b82043ff stsmax w0, [sp] +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b86043ff stsmaxl w0, [sp] +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f82043ff stsmax x0, [sp] +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f86043ff stsmaxl x0, [sp] +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 382073e0 lduminb w0, w0, [sp] +# w0: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 38a073e1 lduminab w0, w1, [sp] +# w1: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 386073e2 lduminlb w0, w2, [sp] +# w2: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 38e073e3 lduminalb w0, w3, [sp] +# w3: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 782073e0 lduminh w0, w0, [sp] +# w0: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 78a073e1 lduminah w0, w1, [sp] +# w1: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 786073e2 lduminlh w0, w2, [sp] +# w2: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 78e073e3 lduminalh w0, w3, [sp] +# w3: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b82073e0 ldumin w0, w0, [sp] +# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b8a073e1 ldumina w0, w1, [sp] +# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b86073e2 lduminl w0, w2, [sp] +# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b8e073e3 lduminal w0, w3, [sp] +# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f82073e0 ldumin x0, x0, [sp] +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f8a073e1 ldumina x0, x1, [sp] +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f86073e2 lduminl x0, x2, [sp] +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f8e073e3 lduminal x0, x3, [sp] +# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 382073ff stuminb w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 386073ff stuminlb w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 782073ff stuminh w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 786073ff stuminlh w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b82073ff stumin w0, [sp] +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b86073ff stuminl w0, [sp] +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f82073ff stumin x0, [sp] +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f86073ff stuminl x0, [sp] +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 382063e0 ldumaxb w0, w0, [sp] +# w0: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 38a063e1 ldumaxab w0, w1, [sp] +# w1: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 386063e2 ldumaxlb w0, w2, [sp] +# w2: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 38e063e3 ldumaxalb w0, w3, [sp] +# w3: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 782063e0 ldumaxh w0, w0, [sp] +# w0: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 78a063e1 ldumaxah w0, w1, [sp] +# w1: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 786063e2 ldumaxlh w0, w2, [sp] +# w2: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 78e063e3 ldumaxalh w0, w3, [sp] +# w3: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b82063e0 ldumax w0, w0, [sp] +# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b8a063e1 ldumaxa w0, w1, [sp] +# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b86063e2 ldumaxl w0, w2, [sp] +# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b8e063e3 ldumaxal w0, w3, [sp] +# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f82063e0 ldumax x0, x0, [sp] +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f8a063e1 ldumaxa x0, x1, [sp] +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f86063e2 ldumaxl x0, x2, [sp] +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f8e063e3 ldumaxal x0, x3, [sp] +# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 382063ff stumaxb w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 386063ff stumaxlb w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 782063ff stumaxh w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 786063ff stumaxlh w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b82063ff stumax w0, [sp] +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b86063ff stumaxl w0, [sp] +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f82063ff stumax x0, [sp] +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f86063ff stumaxl x0, [sp] +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 382013e0 ldclrb w0, w0, [sp] +# w0: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 38a013e1 ldclrab w0, w1, [sp] +# w1: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 386013e2 ldclrlb w0, w2, [sp] +# w2: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 38e013e3 ldclralb w0, w3, [sp] +# w3: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 782013e0 ldclrh w0, w0, [sp] +# w0: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 78a013e1 ldclrah w0, w1, [sp] +# w1: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 786013e2 ldclrlh w0, w2, [sp] +# w2: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 78e013e3 ldclralh w0, w3, [sp] +# w3: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b82013e0 ldclr w0, w0, [sp] +# w0: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b8a013e1 ldclra w0, w1, [sp] +# w1: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b86013e2 ldclrl w0, w2, [sp] +# w2: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b8e013e3 ldclral w0, w3, [sp] +# w3: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f82013e0 ldclr x0, x0, [sp] +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f8a013e1 ldclra x0, x1, [sp] +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f86013e2 ldclrl x0, x2, [sp] +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f8e013e3 ldclral x0, x3, [sp] +# x3: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 382013ff stclrb w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x00 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 386013ff stclrlb w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x00 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 782013ff stclrh w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 786013ff stclrlh w0, [sp] +# wzr: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b82013ff stclr w0, [sp] +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ b86013ff stclrl w0, [sp] +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f82013ff stclr x0, [sp] +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ f86013ff stclrl x0, [sp] +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +0x~~~~~~~~~~~~~~~~ 910043ff add sp, sp, #0x10 (16) +# sp: 0x~~~~~~~~~~~~~~~~ 0x~~~~~~~~~~~~~~~~ d65f03c0 ret # Branch to 0x~~~~~~~~~~~~~~~~. diff --git a/test/test-trace-reference/log-all-colour b/test/test-trace-reference/log-all-colour index a844829f..bf5ec20f 100644 --- a/test/test-trace-reference/log-all-colour +++ b/test/test-trace-reference/log-all-colour @@ -1420,9 +1420,9 @@ 0x~~~~~~~~~~~~~~~~ 9e42d90f scvtf d15, x8, #10 # [1;35m d15: [0;35m 0x0000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 5e21d887 scvtf s7, s4 -# [1;35m v7: [0;35m0x00000000000000007ff000004e81442e[0;m +# [1;35m v7: [0;35m0x0000000000000000000000004e81442e[0;m 0x~~~~~~~~~~~~~~~~ 5f32e5e8 scvtf s8, s15, #14 -# [1;35m v8: [0;35m0x0000000000000000c004000000000000[0;m +# [1;35m v8: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 1e22015d scvtf s29, w10 # [1;35m s29: [0;35m 0x00000000[0;m 0x~~~~~~~~~~~~~~~~ 1e02d6af scvtf s15, w21, #11 @@ -1444,9 +1444,9 @@ 0x~~~~~~~~~~~~~~~~ 9e4377db ucvtf d27, x30, #35 # [1;35m d27: [0;35m 0x0000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 7e21d8ab ucvtf s11, s5 -# [1;35m v11: [0;35m0x0000000000000000400000004f7fe000[0;m +# [1;35m v11: [0;35m0x0000000000000000000000004f7fe000[0;m 0x~~~~~~~~~~~~~~~~ 7f32e6e0 ucvtf s0, s23, #14 -# [1;35m v0: [0;35m0x000000000000000043d21c00480a8294[0;m +# [1;35m v0: [0;35m0x000000000000000000000000480a8294[0;m 0x~~~~~~~~~~~~~~~~ 1e230274 ucvtf s20, w19 # [1;35m s20: [0;35m 0x00000000[0;m 0x~~~~~~~~~~~~~~~~ 1e03bad5 ucvtf s21, w22, #18 @@ -1456,9 +1456,9 @@ 0x~~~~~~~~~~~~~~~~ 9e03ac47 ucvtf s7, x2, #21 # [1;35m s7: [0;35m 0x38ff0000[0;m 0x~~~~~~~~~~~~~~~~ 5ee0b813 abs d19, d0 -# [1;35m v19: [0;35m0x000000000000000043d21c00480a8294[0;m +# [1;35m v19: [0;35m0x000000000000000000000000480a8294[0;m 0x~~~~~~~~~~~~~~~~ 4e20b970 abs v16.16b, v11.16b -# [1;35m v16: [0;35m0x0000000000000000400000004f7f2000[0;m +# [1;35m v16: [0;35m0x0000000000000000000000004f7f2000[0;m 0x~~~~~~~~~~~~~~~~ 4ee0bbe0 abs v0.2d, v31.2d # [1;35m v0: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 0ea0bb3b abs v27.2s, v25.2s @@ -1478,7 +1478,7 @@ 0x~~~~~~~~~~~~~~~~ 4eee87ea add v10.2d, v31.2d, v14.2d # [1;35m v10: [0;35m0x495000018a83940168a6954c14cfd693[0;m 0x~~~~~~~~~~~~~~~~ 0eb385cf add v15.2s, v14.2s, v19.2s -# [1;35m v15: [0;35m0x00000000000000006328b14b89d7c527[0;m +# [1;35m v15: [0;35m0x00000000000000001f56954b89d7c527[0;m 0x~~~~~~~~~~~~~~~~ 0e7186fb add v27.4h, v23.4h, v17.4h # [1;35m v27: [0;35m0x0000000000000000495000000a029400[0;m 0x~~~~~~~~~~~~~~~~ 4ebd8799 add v25.4s, v28.4s, v29.4s @@ -1488,7 +1488,7 @@ 0x~~~~~~~~~~~~~~~~ 4e618444 add v4.8h, v2.8h, v1.8h # [1;35m v4: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 0eaf41ca addhn v10.2s, v14.2d, v15.2d -# [1;35m v10: [0;35m0x000000000000000000000000827f4696[0;m +# [1;35m v10: [0;35m0x0000000000000000000000003ead2a96[0;m 0x~~~~~~~~~~~~~~~~ 0e7a43ca addhn v10.4h, v30.4s, v26.4s # [1;35m v10: [0;35m0x00000000000000000000000000003c7f[0;m 0x~~~~~~~~~~~~~~~~ 0e36419f addhn v31.8b, v12.8h, v22.8h @@ -1500,15 +1500,15 @@ 0x~~~~~~~~~~~~~~~~ 4e7140ff addhn2 v31.8h, v7.4s, v17.4s # [1;35m v31: [0;35m0x000000000000b87f000000000000ffff[0;m 0x~~~~~~~~~~~~~~~~ 5ef1ba6e addp d14, v19.2d -# [1;35m v14: [0;35m0x000000000000000043d21c00480a8294[0;m +# [1;35m v14: [0;35m0x000000000000000000000000480a8294[0;m 0x~~~~~~~~~~~~~~~~ 4e3cbd03 addp v3.16b, v8.16b, v28.16b -# [1;35m v3: [0;35m0x0000000020febf0000000000c4000000[0;m +# [1;35m v3: [0;35m0x0000000020febf000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 4ef1bca8 addp v8.2d, v5.2d, v17.2d # [1;35m v8: [0;35m0x000000007f8000003effffffffe00000[0;m 0x~~~~~~~~~~~~~~~~ 0ebabfd6 addp v22.2s, v30.2s, v26.2s # [1;35m v22: [0;35m0x00000000000000003c7fffff00000000[0;m 0x~~~~~~~~~~~~~~~~ 0e6ebf1d addp v29.4h, v24.4h, v14.4h -# [1;35m v29: [0;35m0x00000000000000005fd2ca9e00000000[0;m +# [1;35m v29: [0;35m0x00000000000000000000ca9e00000000[0;m 0x~~~~~~~~~~~~~~~~ 4eb8bf5e addp v30.4s, v26.4s, v24.4s # [1;35m v30: [0;35m0x0000000000000000000000003c7fffff[0;m 0x~~~~~~~~~~~~~~~~ 0e27bf4c addp v12.8b, v26.8b, v7.8b @@ -1522,7 +1522,7 @@ 0x~~~~~~~~~~~~~~~~ 0e71bbdb addv h27, v30.4h # [1;35m v27: [0;35m0x00000000000000000000000000003c7e[0;m 0x~~~~~~~~~~~~~~~~ 4e71b9d3 addv h19, v14.8h -# [1;35m v19: [0;35m0x00000000000000000000000000002a70[0;m +# [1;35m v19: [0;35m0x0000000000000000000000000000ca9e[0;m 0x~~~~~~~~~~~~~~~~ 4eb1bb6e addv s14, v27.4s # [1;35m v14: [0;35m0x00000000000000000000000000003c7e[0;m 0x~~~~~~~~~~~~~~~~ 4e3b1d0a and v10.16b, v8.16b, v27.16b @@ -1530,13 +1530,13 @@ 0x~~~~~~~~~~~~~~~~ 0e301c25 and v5.8b, v1.8b, v16.8b # [1;35m v5: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 4e781c7a bic v26.16b, v3.16b, v24.16b -# [1;35m v26: [0;35m0x0000000020febf0000000000c4000000[0;m +# [1;35m v26: [0;35m0x0000000020febf000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 2f075487 bic v7.2s, #0xe4, lsl #16 # [1;35m v7: [0;35m0x000000000000000000000000381b0000[0;m 0x~~~~~~~~~~~~~~~~ 2f01b47c bic v28.4h, #0x23, lsl #8 # [1;35m v28: [0;35m0x000000000000000040dfdcffdcc00000[0;m 0x~~~~~~~~~~~~~~~~ 6f05159d bic v29.4s, #0xac, lsl #0 -# [1;35m v29: [0;35m0x00000000000000005fd2ca1200000000[0;m +# [1;35m v29: [0;35m0x00000000000000000000ca1200000000[0;m 0x~~~~~~~~~~~~~~~~ 0e751fec bic v12.8b, v31.8b, v21.8b # [1;35m v12: [0;35m0x0000000000000000000000000000ffff[0;m 0x~~~~~~~~~~~~~~~~ 6f049712 bic v18.8h, #0x98, lsl #0 @@ -1546,13 +1546,13 @@ 0x~~~~~~~~~~~~~~~~ 2efb1ee2 bif v2.8b, v23.8b, v27.8b # [1;35m v2: [0;35m0x0000000000000000495000008a828000[0;m 0x~~~~~~~~~~~~~~~~ 6ead1c68 bit v8.16b, v3.16b, v13.16b -# [1;35m v8: [0;35m0x000000007f8000003effffffcc000000[0;m +# [1;35m v8: [0;35m0x000000007f8000003effffffc8000000[0;m 0x~~~~~~~~~~~~~~~~ 2eb71ca5 bit v5.8b, v5.8b, v23.8b # [1;35m v5: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 6e771fe9 bsl v9.16b, v31.16b, v23.16b # [1;35m v9: [0;35m0x0100000008009801010000000800dc00[0;m 0x~~~~~~~~~~~~~~~~ 2e631cee bsl v14.8b, v7.8b, v3.8b -# [1;35m v14: [0;35m0x000000000000000000000000c4000000[0;m +# [1;35m v14: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 4e2048bd cls v29.16b, v5.16b # [1;35m v29: [0;35m0x07070707070707070707070707070707[0;m 0x~~~~~~~~~~~~~~~~ 0ea04815 cls v21.2s, v0.2s @@ -1564,7 +1564,7 @@ 0x~~~~~~~~~~~~~~~~ 0e204893 cls v19.8b, v4.8b # [1;35m v19: [0;35m0x00000000000000000707070707070707[0;m 0x~~~~~~~~~~~~~~~~ 4e6049cf cls v15.8h, v14.8h -# [1;35m v15: [0;35m0x000f000f000f000f000f000f0001000f[0;m +# [1;35m v15: [0;35m0x000f000f000f000f000f000f000f000f[0;m 0x~~~~~~~~~~~~~~~~ 6e204881 clz v1.16b, v4.16b # [1;35m v1: [0;35m0x08080808080808080808080808080808[0;m 0x~~~~~~~~~~~~~~~~ 2ea04a3b clz v27.2s, v17.2s @@ -1572,17 +1572,17 @@ 0x~~~~~~~~~~~~~~~~ 2e604929 clz v9.4h, v9.4h # [1;35m v9: [0;35m0x00000000000000000007001000040000[0;m 0x~~~~~~~~~~~~~~~~ 6ea049ff clz v31.4s, v15.4s -# [1;35m v31: [0;35m0x0000000c0000000c0000000c0000000f[0;m +# [1;35m v31: [0;35m0x0000000c0000000c0000000c0000000c[0;m 0x~~~~~~~~~~~~~~~~ 2e204a6e clz v14.8b, v19.8b # [1;35m v14: [0;35m0x00000000000000000505050505050505[0;m 0x~~~~~~~~~~~~~~~~ 6e604966 clz v6.8h, v11.8h -# [1;35m v6: [0;35m0x00100010001000100001001000010000[0;m +# [1;35m v6: [0;35m0x00100010001000100010001000010000[0;m 0x~~~~~~~~~~~~~~~~ 7efd8cb2 cmeq d18, d5, d29 # [1;35m v18: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 5ee09bee cmeq d14, d31, #0 # [1;35m v14: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 6e368c73 cmeq v19.16b, v3.16b, v22.16b -# [1;35m v19: [0;35m0xffffffff000000ff0000000000ffffff[0;m +# [1;35m v19: [0;35m0xffffffff000000ff00000000ffffffff[0;m 0x~~~~~~~~~~~~~~~~ 4e20992f cmeq v15.16b, v9.16b, #0 # [1;35m v15: [0;35m0xffffffffffffffffff00ff00ff00ffff[0;m 0x~~~~~~~~~~~~~~~~ 6eea8e0c cmeq v12.2d, v16.2d, v10.2d @@ -1622,15 +1622,15 @@ 0x~~~~~~~~~~~~~~~~ 6ee08ae6 cmge v6.2d, v23.2d, #0 # [1;35m v6: [0;35m0xffffffffffffffffffffffffffffffff[0;m 0x~~~~~~~~~~~~~~~~ 0ea33ed9 cmge v25.2s, v22.2s, v3.2s -# [1;35m v25: [0;35m0x000000000000000000000000ffffffff[0;m +# [1;35m v25: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 2ea08975 cmge v21.2s, v11.2s, #0 # [1;35m v21: [0;35m0x0000000000000000ffffffffffffffff[0;m 0x~~~~~~~~~~~~~~~~ 0e6c3c70 cmge v16.4h, v3.4h, v12.4h -# [1;35m v16: [0;35m0x0000000000000000ffffffff0000ffff[0;m +# [1;35m v16: [0;35m0x0000000000000000ffffffffffffffff[0;m 0x~~~~~~~~~~~~~~~~ 2e608937 cmge v23.4h, v9.4h, #0 # [1;35m v23: [0;35m0x0000000000000000ffffffffffffffff[0;m 0x~~~~~~~~~~~~~~~~ 4eab3c47 cmge v7.4s, v2.4s, v11.4s -# [1;35m v7: [0;35m0xffffffffffffffff0000000000000000[0;m +# [1;35m v7: [0;35m0xffffffffffffffffffffffff00000000[0;m 0x~~~~~~~~~~~~~~~~ 6ea08ac0 cmge v0.4s, v22.4s, #0 # [1;35m v0: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 0e293fca cmge v10.8b, v30.8b, v9.8b @@ -1662,11 +1662,11 @@ 0x~~~~~~~~~~~~~~~~ 0e608876 cmgt v22.4h, v3.4h, #0 # [1;35m v22: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 4ebb3565 cmgt v5.4s, v11.4s, v27.4s -# [1;35m v5: [0;35m0x0000000000000000ffffffffffffffff[0;m +# [1;35m v5: [0;35m0x000000000000000000000000ffffffff[0;m 0x~~~~~~~~~~~~~~~~ 4ea08a8d cmgt v13.4s, v20.4s, #0 # [1;35m v13: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 0e2737fb cmgt v27.8b, v31.8b, v7.8b -# [1;35m v27: [0;35m0x0000000000000000000000ff000000ff[0;m +# [1;35m v27: [0;35m0x0000000000000000ffffffff000000ff[0;m 0x~~~~~~~~~~~~~~~~ 0e208805 cmgt v5.8b, v0.8b, #0 # [1;35m v5: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 4e6d3796 cmgt v22.8h, v28.8h, v13.8h @@ -1688,7 +1688,7 @@ 0x~~~~~~~~~~~~~~~~ 2e3c3707 cmhi v7.8b, v24.8b, v28.8b # [1;35m v7: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 6e79354b cmhi v11.8h, v10.8h, v25.8h -# [1;35m v11: [0;35m0x0000000000000000ffffffff00000000[0;m +# [1;35m v11: [0;35m0x0000000000000000ffffffffffff0000[0;m 0x~~~~~~~~~~~~~~~~ 7ef13d81 cmhs d1, d12, d17 # [1;35m v1: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 6e3e3f35 cmhs v21.16b, v25.16b, v30.16b @@ -1730,11 +1730,11 @@ 0x~~~~~~~~~~~~~~~~ 0ea0ab99 cmlt v25.2s, v28.2s, #0 # [1;35m v25: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 0e60a960 cmlt v0.4h, v11.4h, #0 -# [1;35m v0: [0;35m0x0000000000000000ffffffff00000000[0;m +# [1;35m v0: [0;35m0x0000000000000000ffffffffffff0000[0;m 0x~~~~~~~~~~~~~~~~ 4ea0a8b8 cmlt v24.4s, v5.4s, #0 # [1;35m v24: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 0e20a97a cmlt v26.8b, v11.8b, #0 -# [1;35m v26: [0;35m0x0000000000000000ffffffff00000000[0;m +# [1;35m v26: [0;35m0x0000000000000000ffffffffffff0000[0;m 0x~~~~~~~~~~~~~~~~ 4e60aaa1 cmlt v1.8h, v21.8h, #0 # [1;35m v1: [0;35m0x0000000000000000ffffffffffffffff[0;m 0x~~~~~~~~~~~~~~~~ 5efe8efc cmtst d28, d23, d30 @@ -9708,7 +9708,7 @@ 0x~~~~~~~~~~~~~~~~ 0e61682b fcvtn v11.2s, v1.2d # [1;35m v11: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 0e216848 fcvtn v8.4h, v2.4s -# [1;35m v8: [0;35m0x37a00000000000000000000000000000[0;m +# [1;35m v8: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 4e616bb8 fcvtn2 v24.4s, v29.2d # [1;35m v24: [0;35m0x0000000000000000377f0000377f0000[0;m 0x~~~~~~~~~~~~~~~~ 4e216944 fcvtn2 v4.8h, v10.4s @@ -9766,41 +9766,41 @@ 0x~~~~~~~~~~~~~~~~ 6f2efed3 fcvtzu v19.4s, v22.4s, #18 # [1;35m v19: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 6e6ffd0f fdiv v15.2d, v8.2d, v15.2d -# [1;35m v15: [0;35m0x7ff00000000000007ff8000000000000[0;m +# [1;35m v15: [0;35m0x7ff80000000000007ff8000000000000[0;m 0x~~~~~~~~~~~~~~~~ 2e3afd2c fdiv v12.2s, v9.2s, v26.2s # [1;35m v12: [0;35m0x00000000000000007fc000007fc00000[0;m 0x~~~~~~~~~~~~~~~~ 6e33fed3 fdiv v19.4s, v22.4s, v19.4s # [1;35m v19: [0;35m0xffffffffffffffffffffffffffffffff[0;m 0x~~~~~~~~~~~~~~~~ 4e68f4f3 fmax v19.2d, v7.2d, v8.2d -# [1;35m v19: [0;35m0x37a00000000000000000000000000000[0;m +# [1;35m v19: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 0e3df599 fmax v25.2s, v12.2s, v29.2s # [1;35m v25: [0;35m0x00000000000000007fc000007fc00000[0;m 0x~~~~~~~~~~~~~~~~ 4e25f5e6 fmax v6.4s, v15.4s, v5.4s -# [1;35m v6: [0;35m0x7ff00000000000007ff8000000000000[0;m +# [1;35m v6: [0;35m0x7ff80000000000007ff8000000000000[0;m 0x~~~~~~~~~~~~~~~~ 4e74c510 fmaxnm v16.2d, v8.2d, v20.2d -# [1;35m v16: [0;35m0x37a0000000000000000000000180fe00[0;m +# [1;35m v16: [0;35m0x0000000000000000000000000180fe00[0;m 0x~~~~~~~~~~~~~~~~ 0e39c74f fmaxnm v15.2s, v26.2s, v25.2s # [1;35m v15: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 4e30c5d7 fmaxnm v23.4s, v14.4s, v16.4s -# [1;35m v23: [0;35m0x37a0000000000000000000000180fe00[0;m +# [1;35m v23: [0;35m0x0000000000000000000000000180fe00[0;m 0x~~~~~~~~~~~~~~~~ 7e70ca66 fmaxnmp d6, v19.2d -# [1;35m v6: [0;35m0x000000000000000037a0000000000000[0;m +# [1;35m v6: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 7e30cb5b fmaxnmp s27, v26.2s # [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 6e77c588 fmaxnmp v8.2d, v12.2d, v23.2d -# [1;35m v8: [0;35m0x37a00000000000007fc000007fc00000[0;m +# [1;35m v8: [0;35m0x000000000180fe007fc000007fc00000[0;m 0x~~~~~~~~~~~~~~~~ 2e36c72d fmaxnmp v13.2s, v25.2s, v22.2s # [1;35m v13: [0;35m0x0000000000000000ffffffff7fc00000[0;m 0x~~~~~~~~~~~~~~~~ 6e31c56f fmaxnmp v15.4s, v11.4s, v17.4s # [1;35m v15: [0;35m0xffffffff000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 6e30ca7b fmaxnmv s27, v19.4s -# [1;35m v27: [0;35m0x00000000000000000000000037a00000[0;m +# [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 7e70f9d4 fmaxp d20, v14.2d # [1;35m v20: [0;35m0x0000000000000000ffffffff00000000[0;m 0x~~~~~~~~~~~~~~~~ 7e30f852 fmaxp s18, v2.2s # [1;35m v18: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 6e7ff6e9 fmaxp v9.2d, v23.2d, v31.2d -# [1;35m v9: [0;35m0x00000000ffffffff37a0000000000000[0;m +# [1;35m v9: [0;35m0x00000000ffffffff000000000180fe00[0;m 0x~~~~~~~~~~~~~~~~ 2e3ff6c7 fmaxp v7.2s, v22.2s, v31.2s # [1;35m v7: [0;35m0x0000000000000000ffffffffffffffff[0;m 0x~~~~~~~~~~~~~~~~ 6e3df4f2 fmaxp v18.4s, v7.4s, v29.4s @@ -9843,7 +9843,7 @@ # [1;35m v0: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 6eb0f919 fminv s25, v8.4s # [1;35m v25: [0;35m0x0000000000000000000000007fc00000[0;m -0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[2] +0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[1] # [1;35m v23: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 5f8711f7 fmla s23, s15, v7.s[0] # [1;35m v23: [0;35m0x00000000000000000000000000000000[0;m @@ -9858,9 +9858,9 @@ 0x~~~~~~~~~~~~~~~~ 4e2bcd70 fmla v16.4s, v11.4s, v11.4s # [1;35m v16: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 4f891afb fmla v27.4s, v23.4s, v9.s[2] -# [1;35m v27: [0;35m0x00000000000000000000000037a00000[0;m +# [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 5fc653db fmls d27, d30, v6.d[0] -# [1;35m v27: [0;35m0x00000000000000000000000037a00000[0;m +# [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 5f825215 fmls s21, s16, v2.s[0] # [1;35m v21: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 4ef5ce65 fmls v5.2d, v19.2d, v21.2d @@ -9872,7 +9872,7 @@ 0x~~~~~~~~~~~~~~~~ 0fab5243 fmls v3.2s, v18.2s, v11.s[1] # [1;35m v3: [0;35m0x0000000000000000000000007fffffff[0;m 0x~~~~~~~~~~~~~~~~ 4ebeccbb fmls v27.4s, v5.4s, v30.4s -# [1;35m v27: [0;35m0x00000000000000000000000037a00000[0;m +# [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 4fa45a9a fmls v26.4s, v20.4s, v4.s[3] # [1;35m v26: [0;35m0x00000000000000007fffffff00000000[0;m 0x~~~~~~~~~~~~~~~~ 6f06f6ce fmov v14.2d, #0xd6 (-0.3438) @@ -9885,13 +9885,13 @@ # [1;35m v28: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 9eae0052 fmov x18, v2.D[1] # [1;36m x18: [0;36m0x0000000000000000[0;m -0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[2] +0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[1] # [1;35m v12: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 5faf983e fmul s30, s1, v15.s[3] # [1;35m v30: [0;35m0x000000000000000000000000ffffffff[0;m 0x~~~~~~~~~~~~~~~~ 6e75dc19 fmul v25.2d, v0.2d, v21.2d # [1;35m v25: [0;35m0x00000000000000000000000000000000[0;m -0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[2] +0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[1] # [1;35m v10: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 2e30df07 fmul v7.2s, v24.2s, v16.2s # [1;35m v7: [0;35m0x00000000000000000000000000000000[0;m @@ -9901,7 +9901,7 @@ # [1;35m v5: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 4f88906b fmul v11.4s, v3.4s, v8.s[0] # [1;35m v11: [0;35m0x7fc000007fc000007fc000007fffffff[0;m -0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[2] +0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[1] # [1;35m v28: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 7faf92b9 fmulx s25, s21, v15.s[1] # [1;35m v25: [0;35m0x00000000000000000000000000000000[0;m @@ -9928,7 +9928,7 @@ 0x~~~~~~~~~~~~~~~~ 0ea1daca frecpe v10.2s, v22.2s # [1;35m v10: [0;35m0x00000000000000007f8000007f800000[0;m 0x~~~~~~~~~~~~~~~~ 4ea1d8c5 frecpe v5.4s, v6.4s -# [1;35m v5: [0;35m0x7f8000007f800000474c80007f800000[0;m +# [1;35m v5: [0;35m0x7f8000007f8000007f8000007f800000[0;m 0x~~~~~~~~~~~~~~~~ 4e7afcf6 frecps v22.2d, v7.2d, v26.2d # [1;35m v22: [0;35m0x40000000000000004000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 0e22ff7f frecps v31.2s, v27.2s, v2.2s @@ -10020,7 +10020,7 @@ 0x~~~~~~~~~~~~~~~~ 2e21d88b ucvtf v11.2s, v4.2s # [1;35m v11: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 2f27e47d ucvtf v29.2s, v3.2s, #25 -# [1;35m v29: [0;35m0x7fc000007fc000000000000000000000[0;m +# [1;35m v29: [0;35m0x00000000000000000000000000000000[0;m 0x~~~~~~~~~~~~~~~~ 6e21daf6 ucvtf v22.4s, v23.4s # [1;35m v22: [0;35m0x4effe000000000004e001a4000000000[0;m 0x~~~~~~~~~~~~~~~~ 6f27e532 ucvtf v18.4s, v9.4s, #25 @@ -11173,10 +11173,10 @@ 0x~~~~~~~~~~~~~~~~ e551ec06 st3w {z6.s, z7.s, z8.s}, p3, [x0, #3, mul vl] # [1;35m z6<127:0>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, ..., [0;35m0.000[0;m) # [1;35m z7<127:0>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, ..., [0;35m0.000[0;m) -# [1;35m z8<127:0>: [0;35m0x37a00000000000007fc000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, ..., [0;35m0.000[0;m) +# [1;35m z8<127:0>: [0;35m0x000000000180fe007fc000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, ..., [0;35m0.000[0;m) # ║ ║ ╙─ 0x7fc00000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ║ ╙───────────────── 0x00000000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ╙───────────────────────── 0x37a00000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ║ ╙───────────────── 0x0180fe00'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ╙───────────────────────── 0x00000000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z6<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, ..., [0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z7<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, ..., [0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z8<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, ..., [0;35m0.000[0;m, [0;35m0.000[0;m) @@ -11198,10 +11198,10 @@ # ╙───────────────────────── 0x00000000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m 0x~~~~~~~~~~~~~~~~ e5c27007 st3d {z7.d, z8.d, z9.d}, p4, [x0, x2, lsl #3] # [1;35m z7<127:0>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) -# [1;35m z8<127:0>: [0;35m0x37a00000000000007fc000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) +# [1;35m z8<127:0>: [0;35m0x000000000180fe007fc000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z9<127:0>: [0;35m0x43dfe000001fe0000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # ║ ╙─ 0x0000000000000000'7fc000007fc00000'0000000000000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ╙───────────────── 0x43dfe000001fe000'37a0000000000000'0000000000000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ╙───────────────── 0x43dfe000001fe000'000000000180fe00'0000000000000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z7<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z8<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z9<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) @@ -11223,9 +11223,9 @@ # ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m # ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m 0x~~~~~~~~~~~~~~~~ a441f418 ld3b {z24.b, z25.b, z26.b}, p5/z, [x0, #3, mul vl] -# [1;35m z24<127:0>: [0;35m0x0000000000000000000000ff00000000[0;m -# [1;35m z25<127:0>: [0;35m0xa000000000000000000000ffc0000000[0;m -# [1;35m z26<127:0>: [0;35m0x3700000000000000000000007f000000[0;m +# [1;35m z24<127:0>: [0;35m0x00000000fe000000000000ff00000000[0;m +# [1;35m z25<127:0>: [0;35m0x0000000080000000000000ffc0000000[0;m +# [1;35m z26<127:0>: [0;35m0x0000000001000000000000007f000000[0;m # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─ 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙───── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m @@ -11236,10 +11236,10 @@ # ║ ║ ║ ║ ║ ║ ╙───────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ║ ║ ║ ╙─────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ║ ║ ╙───────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ║ ║ ║ ╙─────────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ║ ║ ║ ╙─────────────────────── 0x01'80'fe <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ╙───────────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ╙───────────────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ╙─────────────────────────────── 0x37'a0'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ╙─────────────────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z24<255:128>: [0;35m0x00000000000000000000000000000000[0;m # [1;35m z25<255:128>: [0;35m0x00000000000000000000000000000000[0;m # [1;35m z26<255:128>: [0;35m0x00000000000000000000000000000000[0;m @@ -11366,11 +11366,11 @@ 0x~~~~~~~~~~~~~~~~ a541f81a ld3w {z26.s, z27.s, z28.s}, p6/z, [x0, #3, mul vl] # [1;35m z26<127:0>: [0;35m0x00000000000000000000ffff00000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m9.183e-41[0;m, [0;35m0.000[0;m) # [1;35m z27<127:0>: [0;35m0x00000000000000007fff000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m9.183e-41[0;m, [0;35m0.000[0;m) -# [1;35m z28<127:0>: [0;35m0x37a0000000000000000000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m9.183e-41[0;m, [0;35m0.000[0;m) +# [1;35m z28<127:0>: [0;35m0x000000000180fe00000000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m9.183e-41[0;m, [0;35m0.000[0;m) # ║ ║ ║ ╙─ 0x7fc00000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ╙───────── 0x00000000'7fff0000'0000ffff <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ║ ╙───────────────── 0x00000000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ╙───────────────────────── 0x37a00000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ║ ╙───────────────── 0x0180fe00'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ╙───────────────────────── 0x00000000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z26<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z27<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z28<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m) @@ -11401,10 +11401,10 @@ # ╙───────────────────────── 0x00000000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m 0x~~~~~~~~~~~~~~~~ a5c1f41b ld3d {z27.d, z28.d, z29.d}, p5/z, [x0, #3, mul vl] # [1;35m z27<127:0>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) -# [1;35m z28<127:0>: [0;35m0x00000000000000000000ffff7fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) -# [1;35m z29<127:0>: [0;35m0x37a0000000000000000000007fff0000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) +# [1;35m z28<127:0>: [0;35m0x000000000180fe000000ffff7fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) +# [1;35m z29<127:0>: [0;35m0x0000000000000000000000007fff0000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # ║ ╙─ 0x000000007fff0000'0000ffff7fc00000'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ╙───────────────── 0x37a0000000000000'0000000000000000'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ╙───────────────── 0x0000000000000000'000000000180fe00'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z27<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z28<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z29<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) @@ -11856,5 +11856,685 @@ # [1;35m z31<639:512>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ d10043ff sub sp, sp, #0x10 (16) +# [1;36m sp: [0;36m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b200f3e0 mov x0, #0x5555555555555555 +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f90003e0 str x0, [sp] +# [1;36m x0<63:0>: [0;36m0x5555555555555555[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 382003e0 ldaddb w0, w0, [sp] +# [1;36m w0: [0;36m 0x00000055[0;m +# ╙─ 0x55 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x55[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 38a003e1 ldaddab w0, w1, [sp] +# [1;36m w1: [0;36m 0x000000aa[0;m +# ╙─ 0xaa <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x55[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 386003e2 ldaddlb w0, w2, [sp] +# [1;36m w2: [0;36m 0x000000ff[0;m +# ╙─ 0xff <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x55[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 38e003e3 ldaddalb w0, w3, [sp] +# [1;36m w3: [0;36m 0x00000054[0;m +# ╙─ 0x54 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x55[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 782003e0 ldaddh w0, w0, [sp] +# [1;36m w0: [0;36m 0x000055a9[0;m +# ╙─ 0x55a9 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x55a9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 78a003e1 ldaddah w0, w1, [sp] +# [1;36m w1: [0;36m 0x000055fe[0;m +# ╙─ 0x55fe <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x55a9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 786003e2 ldaddlh w0, w2, [sp] +# [1;36m w2: [0;36m 0x0000aba7[0;m +# ╙─ 0xaba7 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x55a9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 78e003e3 ldaddalh w0, w3, [sp] +# [1;36m w3: [0;36m 0x00000150[0;m +# ╙─ 0x0150 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x55a9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b82003e0 ldadd w0, w0, [sp] +# [1;36m w0: [0;36m 0x555556f9[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x555556f9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b8a003e1 ldadda w0, w1, [sp] +# [1;36m w1: [0;36m 0x5555aca2[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x555556f9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b86003e2 ldaddl w0, w2, [sp] +# [1;36m w2: [0;36m 0xaaab039b[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x555556f9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b8e003e3 ldaddal w0, w3, [sp] +# [1;36m w3: [0;36m 0x00005a94[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x555556f9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f82003e0 ldadd x0, x0, [sp] +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f8a003e1 ldadda x0, x1, [sp] +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f86003e2 ldaddl x0, x2, [sp] +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f8e003e3 ldaddal x0, x3, [sp] +# [1;36m x3: [0;36m0x0000000055566ba0[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 382003ff staddb w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x2d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x8d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 386003ff staddlb w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0xba <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x8d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 782003ff staddh w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x1d47 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0xb18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 786003ff staddlh w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0xced4 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0xb18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b82003ff stadd w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x5555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b86003ff staddl w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x5555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f82003ff stadd x0, [sp] +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f86003ff staddl x0, [sp] +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 382033e0 ldsetb w0, w0, [sp] +# [1;36m w0: [0;36m 0x00000095[0;m +# ╙─ 0x95 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x95[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 38a033e1 ldsetab w0, w1, [sp] +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x95[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 386033e2 ldsetlb w0, w2, [sp] +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x95[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 38e033e3 ldsetalb w0, w3, [sp] +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x95[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 782033e0 ldseth w0, w0, [sp] +# [1;36m w0: [0;36m 0x0000469d[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 78a033e1 ldsetah w0, w1, [sp] +# [1;36m w1: [0;36m 0x0000469d[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 786033e2 ldsetlh w0, w2, [sp] +# [1;36m w2: [0;36m 0x0000469d[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 78e033e3 ldsetalh w0, w3, [sp] +# [1;36m w3: [0;36m 0x0000469d[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b82033e0 ldset w0, w0, [sp] +# [1;36m w0: [0;36m 0x0003469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b8a033e1 ldseta w0, w1, [sp] +# [1;36m w1: [0;36m 0x0003469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b86033e2 ldsetl w0, w2, [sp] +# [1;36m w2: [0;36m 0x0003469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b8e033e3 ldsetal w0, w3, [sp] +# [1;36m w3: [0;36m 0x0003469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f82033e0 ldset x0, x0, [sp] +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f8a033e1 ldseta x0, x1, [sp] +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f86033e2 ldsetl x0, x2, [sp] +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f8e033e3 ldsetal x0, x3, [sp] +# [1;36m x3: [0;36m0x000000000003469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 382033ff stsetb w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 386033ff stsetlb w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 782033ff stseth w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 786033ff stsetlh w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b82033ff stset w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b86033ff stsetl w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f82033ff stset x0, [sp] +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f86033ff stsetl x0, [sp] +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 382023e0 ldeorb w0, w0, [sp] +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 38a023e1 ldeorab w0, w1, [sp] +# [1;36m w1: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 386023e2 ldeorlb w0, w2, [sp] +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 38e023e3 ldeoralb w0, w3, [sp] +# [1;36m w3: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 782023e0 ldeorh w0, w0, [sp] +# [1;36m w0: [0;36m 0x0000469d[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 78a023e1 ldeorah w0, w1, [sp] +# [1;36m w1: [0;36m 0x00004600[0;m +# ╙─ 0x4600 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 786023e2 ldeorlh w0, w2, [sp] +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 78e023e3 ldeoralh w0, w3, [sp] +# [1;36m w3: [0;36m 0x00004600[0;m +# ╙─ 0x4600 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b82023e0 ldeor w0, w0, [sp] +# [1;36m w0: [0;36m 0x0003009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b8a023e1 ldeora w0, w1, [sp] +# [1;36m w1: [0;36m 0x00034600[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b86023e2 ldeorl w0, w2, [sp] +# [1;36m w2: [0;36m 0x0000469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b8e023e3 ldeoral w0, w3, [sp] +# [1;36m w3: [0;36m 0x00034600[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f82023e0 ldeor x0, x0, [sp] +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f8a023e1 ldeora x0, x1, [sp] +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f86023e2 ldeorl x0, x2, [sp] +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f8e023e3 ldeoral x0, x3, [sp] +# [1;36m x3: [0;36m0x0000000000034600[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 382023ff steorb w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 386023ff steorlb w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 782023ff steorh w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 786023ff steorlh w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x4600 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b82023ff steor w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b86023ff steorl w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f82023ff steor x0, [sp] +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f86023ff steorl x0, [sp] +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 382053e0 ldsminb w0, w0, [sp] +# [1;36m w0: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 38a053e1 ldsminab w0, w1, [sp] +# [1;36m w1: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 386053e2 ldsminlb w0, w2, [sp] +# [1;36m w2: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 38e053e3 ldsminalb w0, w3, [sp] +# [1;36m w3: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 782053e0 ldsminh w0, w0, [sp] +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 78a053e1 ldsminah w0, w1, [sp] +# [1;36m w1: [0;36m 0xffffff9d[0;m +# ╙─ 0xff9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 786053e2 ldsminlh w0, w2, [sp] +# [1;36m w2: [0;36m 0xffffff9d[0;m +# ╙─ 0xff9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 78e053e3 ldsminalh w0, w3, [sp] +# [1;36m w3: [0;36m 0xffffff9d[0;m +# ╙─ 0xff9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b82053e0 ldsmin w0, w0, [sp] +# [1;36m w0: [0;36m 0x0003ff9d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003ff9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b8a053e1 ldsmina w0, w1, [sp] +# [1;36m w1: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003ff9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b86053e2 ldsminl w0, w2, [sp] +# [1;36m w2: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003ff9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b8e053e3 ldsminal w0, w3, [sp] +# [1;36m w3: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003ff9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f82053e0 ldsmin x0, x0, [sp] +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f8a053e1 ldsmina x0, x1, [sp] +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f86053e2 ldsminl x0, x2, [sp] +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f8e053e3 ldsminal x0, x3, [sp] +# [1;36m x3: [0;36m0x000000000000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 382053ff stsminb w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 386053ff stsminlb w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 782053ff stsminh w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 786053ff stsminlh w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b82053ff stsmin w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b86053ff stsminl w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f82053ff stsmin x0, [sp] +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f86053ff stsminl x0, [sp] +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 382043e0 ldsmaxb w0, w0, [sp] +# [1;36m w0: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 38a043e1 ldsmaxab w0, w1, [sp] +# [1;36m w1: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 386043e2 ldsmaxlb w0, w2, [sp] +# [1;36m w2: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 38e043e3 ldsmaxalb w0, w3, [sp] +# [1;36m w3: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 782043e0 ldsmaxh w0, w0, [sp] +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 78a043e1 ldsmaxah w0, w1, [sp] +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 786043e2 ldsmaxlh w0, w2, [sp] +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 78e043e3 ldsmaxalh w0, w3, [sp] +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b82043e0 ldsmax w0, w0, [sp] +# [1;36m w0: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b8a043e1 ldsmaxa w0, w1, [sp] +# [1;36m w1: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b86043e2 ldsmaxl w0, w2, [sp] +# [1;36m w2: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b8e043e3 ldsmaxal w0, w3, [sp] +# [1;36m w3: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f82043e0 ldsmax x0, x0, [sp] +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f8a043e1 ldsmaxa x0, x1, [sp] +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f86043e2 ldsmaxl x0, x2, [sp] +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f8e043e3 ldsmaxal x0, x3, [sp] +# [1;36m x3: [0;36m0x000000000000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 382043ff stsmaxb w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 386043ff stsmaxlb w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 782043ff stsmaxh w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 786043ff stsmaxlh w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b82043ff stsmax w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b86043ff stsmaxl w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f82043ff stsmax x0, [sp] +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f86043ff stsmaxl x0, [sp] +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 382073e0 lduminb w0, w0, [sp] +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 38a073e1 lduminab w0, w1, [sp] +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 386073e2 lduminlb w0, w2, [sp] +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 38e073e3 lduminalb w0, w3, [sp] +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 782073e0 lduminh w0, w0, [sp] +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 78a073e1 lduminah w0, w1, [sp] +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 786073e2 lduminlh w0, w2, [sp] +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 78e073e3 lduminalh w0, w3, [sp] +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b82073e0 ldumin w0, w0, [sp] +# [1;36m w0: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b8a073e1 ldumina w0, w1, [sp] +# [1;36m w1: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b86073e2 lduminl w0, w2, [sp] +# [1;36m w2: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b8e073e3 lduminal w0, w3, [sp] +# [1;36m w3: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f82073e0 ldumin x0, x0, [sp] +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f8a073e1 ldumina x0, x1, [sp] +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f86073e2 lduminl x0, x2, [sp] +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f8e073e3 lduminal x0, x3, [sp] +# [1;36m x3: [0;36m0x000000000000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 382073ff stuminb w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 386073ff stuminlb w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 782073ff stuminh w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 786073ff stuminlh w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b82073ff stumin w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b86073ff stuminl w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f82073ff stumin x0, [sp] +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f86073ff stuminl x0, [sp] +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 382063e0 ldumaxb w0, w0, [sp] +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 38a063e1 ldumaxab w0, w1, [sp] +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 386063e2 ldumaxlb w0, w2, [sp] +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 38e063e3 ldumaxalb w0, w3, [sp] +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 782063e0 ldumaxh w0, w0, [sp] +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 78a063e1 ldumaxah w0, w1, [sp] +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 786063e2 ldumaxlh w0, w2, [sp] +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 78e063e3 ldumaxalh w0, w3, [sp] +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b82063e0 ldumax w0, w0, [sp] +# [1;36m w0: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b8a063e1 ldumaxa w0, w1, [sp] +# [1;36m w1: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b86063e2 ldumaxl w0, w2, [sp] +# [1;36m w2: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b8e063e3 ldumaxal w0, w3, [sp] +# [1;36m w3: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f82063e0 ldumax x0, x0, [sp] +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f8a063e1 ldumaxa x0, x1, [sp] +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f86063e2 ldumaxl x0, x2, [sp] +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f8e063e3 ldumaxal x0, x3, [sp] +# [1;36m x3: [0;36m0x000000000000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 382063ff stumaxb w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 386063ff stumaxlb w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 782063ff stumaxh w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 786063ff stumaxlh w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b82063ff stumax w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b86063ff stumaxl w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f82063ff stumax x0, [sp] +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f86063ff stumaxl x0, [sp] +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 382013e0 ldclrb w0, w0, [sp] +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 38a013e1 ldclrab w0, w1, [sp] +# [1;36m w1: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 386013e2 ldclrlb w0, w2, [sp] +# [1;36m w2: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 38e013e3 ldclralb w0, w3, [sp] +# [1;36m w3: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 782013e0 ldclrh w0, w0, [sp] +# [1;36m w0: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 78a013e1 ldclrah w0, w1, [sp] +# [1;36m w1: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 786013e2 ldclrlh w0, w2, [sp] +# [1;36m w2: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 78e013e3 ldclralh w0, w3, [sp] +# [1;36m w3: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b82013e0 ldclr w0, w0, [sp] +# [1;36m w0: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b8a013e1 ldclra w0, w1, [sp] +# [1;36m w1: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b86013e2 ldclrl w0, w2, [sp] +# [1;36m w2: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b8e013e3 ldclral w0, w3, [sp] +# [1;36m w3: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f82013e0 ldclr x0, x0, [sp] +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f8a013e1 ldclra x0, x1, [sp] +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f86013e2 ldclrl x0, x2, [sp] +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f8e013e3 ldclral x0, x3, [sp] +# [1;36m x3: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 382013ff stclrb w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x00[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 386013ff stclrlb w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x00[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 782013ff stclrh w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 786013ff stclrlh w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b82013ff stclr w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ b86013ff stclrl w0, [sp] +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f82013ff stclr x0, [sp] +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ f86013ff stclrl x0, [sp] +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +0x~~~~~~~~~~~~~~~~ 910043ff add sp, sp, #0x10 (16) +# [1;36m sp: [0;36m0x~~~~~~~~~~~~~~~~[0;m 0x~~~~~~~~~~~~~~~~ d65f03c0 ret # [0;30m[43mBranch[0;m to 0x~~~~~~~~~~~~~~~~. diff --git a/test/test-trace-reference/log-branch b/test/test-trace-reference/log-branch index fff3143f..0491d505 100644 --- a/test/test-trace-reference/log-branch +++ b/test/test-trace-reference/log-branch @@ -2934,10 +2934,10 @@ # ╙───────────────────────────── 0x0000'0000'0000 -> 0x~~~~~~~~~~~~~~~~ # z6<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000) # z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000) -# z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000, ..., 0.000) +# z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000, ..., 0.000) # ║ ║ ╙─ 0x7fc00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ -# ║ ╙───────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ -# ╙───────────────────────── 0x37a00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ +# ║ ╙───────────────── 0x0180fe00'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ +# ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ # z6<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000) # z7<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000) # z8<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000) @@ -2958,10 +2958,10 @@ # ║ ╙───────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ # ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ # z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000) -# z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000) +# z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000) # z9<127:0>: 0x43dfe000001fe0000000000000000000 (0.000, 0.000) # ║ ╙─ 0x0000000000000000'7fc000007fc00000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~ -# ╙───────────────── 0x43dfe000001fe000'37a0000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +# ╙───────────────── 0x43dfe000001fe000'000000000180fe00'0000000000000000 -> 0x~~~~~~~~~~~~~~~~ # z7<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) # z8<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) # z9<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) @@ -3188,3 +3188,196 @@ # z5<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000) # ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~ # ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x5555555555555555 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x8d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x8d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0xb18d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0xb18d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x5555b18d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x5555b18d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x00 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x00 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ diff --git a/test/test-trace-reference/log-branch-colour b/test/test-trace-reference/log-branch-colour index 92faa1fd..7caf1a45 100644 --- a/test/test-trace-reference/log-branch-colour +++ b/test/test-trace-reference/log-branch-colour @@ -2934,10 +2934,10 @@ # ╙───────────────────────────── 0x0000'0000'0000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z6<127:0>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, ..., [0;35m0.000[0;m) # [1;35m z7<127:0>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, ..., [0;35m0.000[0;m) -# [1;35m z8<127:0>: [0;35m0x37a00000000000007fc000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, ..., [0;35m0.000[0;m) +# [1;35m z8<127:0>: [0;35m0x000000000180fe007fc000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, ..., [0;35m0.000[0;m) # ║ ║ ╙─ 0x7fc00000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ║ ╙───────────────── 0x00000000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ╙───────────────────────── 0x37a00000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ║ ╙───────────────── 0x0180fe00'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ╙───────────────────────── 0x00000000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z6<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, ..., [0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z7<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, ..., [0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z8<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, ..., [0;35m0.000[0;m, [0;35m0.000[0;m) @@ -2958,10 +2958,10 @@ # ║ ╙───────────────── 0x00000000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m # ╙───────────────────────── 0x00000000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z7<127:0>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) -# [1;35m z8<127:0>: [0;35m0x37a00000000000007fc000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) +# [1;35m z8<127:0>: [0;35m0x000000000180fe007fc000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z9<127:0>: [0;35m0x43dfe000001fe0000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # ║ ╙─ 0x0000000000000000'7fc000007fc00000'0000000000000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ╙───────────────── 0x43dfe000001fe000'37a0000000000000'0000000000000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ╙───────────────── 0x43dfe000001fe000'000000000180fe00'0000000000000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z7<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z8<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z9<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) @@ -3188,3 +3188,196 @@ # [1;35m z5<639:512>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m # ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x5555555555555555[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x55[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x55[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x55[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x55[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x55a9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x55a9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x55a9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x55a9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x555556f9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x555556f9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x555556f9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x555556f9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x8d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x8d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0xb18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0xb18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x5555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x5555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x95[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x95[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x95[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x95[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003ff9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003ff9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003ff9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003ff9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x00[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x00[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m diff --git a/test/test-trace-reference/log-cpufeatures b/test/test-trace-reference/log-cpufeatures index 804c06f5..795d3580 100644 --- a/test/test-trace-reference/log-cpufeatures +++ b/test/test-trace-reference/log-cpufeatures @@ -2292,7 +2292,7 @@ 0x~~~~~~~~~~~~~~~~ 2ea9f6bd fminp v29.2s, v21.2s, v9.2s // Needs: FP, NEON 0x~~~~~~~~~~~~~~~~ 6eb5f700 fminp v0.4s, v24.4s, v21.4s // Needs: FP, NEON 0x~~~~~~~~~~~~~~~~ 6eb0f919 fminv s25, v8.4s // Needs: FP, NEON -0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[2] // Needs: FP, NEON +0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[1] // Needs: FP, NEON 0x~~~~~~~~~~~~~~~~ 5f8711f7 fmla s23, s15, v7.s[0] // Needs: FP, NEON 0x~~~~~~~~~~~~~~~~ 4e66cd71 fmla v17.2d, v11.2d, v6.2d // Needs: FP, NEON 0x~~~~~~~~~~~~~~~~ 4fcb13de fmla v30.2d, v30.2d, v11.d[0] // Needs: FP, NEON @@ -2313,15 +2313,15 @@ 0x~~~~~~~~~~~~~~~~ 4f04f69f fmov v31.4s, #0x94 (-5.0000) // Needs: FP, NEON 0x~~~~~~~~~~~~~~~~ 9eaf033c fmov v28.D[1], x25 // Needs: FP, NEON 0x~~~~~~~~~~~~~~~~ 9eae0052 fmov x18, v2.D[1] // Needs: FP, NEON -0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[2] // Needs: FP, NEON +0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[1] // Needs: FP, NEON 0x~~~~~~~~~~~~~~~~ 5faf983e fmul s30, s1, v15.s[3] // Needs: FP, NEON 0x~~~~~~~~~~~~~~~~ 6e75dc19 fmul v25.2d, v0.2d, v21.2d // Needs: FP, NEON -0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[2] // Needs: FP, NEON +0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[1] // Needs: FP, NEON 0x~~~~~~~~~~~~~~~~ 2e30df07 fmul v7.2s, v24.2s, v16.2s // Needs: FP, NEON 0x~~~~~~~~~~~~~~~~ 0f849a01 fmul v1.2s, v16.2s, v4.s[2] // Needs: FP, NEON 0x~~~~~~~~~~~~~~~~ 6e39df85 fmul v5.4s, v28.4s, v25.4s // Needs: FP, NEON 0x~~~~~~~~~~~~~~~~ 4f88906b fmul v11.4s, v3.4s, v8.s[0] // Needs: FP, NEON -0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[2] // Needs: FP, NEON +0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[1] // Needs: FP, NEON 0x~~~~~~~~~~~~~~~~ 7faf92b9 fmulx s25, s21, v15.s[1] // Needs: FP, NEON 0x~~~~~~~~~~~~~~~~ 4e68df9f fmulx v31.2d, v28.2d, v8.2d // Needs: FP, NEON 0x~~~~~~~~~~~~~~~~ 6fc692a3 fmulx v3.2d, v21.2d, v6.d[0] // Needs: FP, NEON @@ -2449,3 +2449,199 @@ 0x~~~~~~~~~~~~~~~~ a4e1f81a ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, [x0, #4, mul vl] // Needs: SVE 0x~~~~~~~~~~~~~~~~ a562d81b ld4w {z27.s, z28.s, z29.s, z30.s}, p6/z, [x0, x2, lsl #2] // Needs: SVE 0x~~~~~~~~~~~~~~~~ a5e1f41c ld4d {z28.d, z29.d, z30.d, z31.d}, p5/z, [x0, #4, mul vl] // Needs: SVE +0x~~~~~~~~~~~~~~~~ d10043ff sub sp, sp, #0x10 (16) +0x~~~~~~~~~~~~~~~~ b200f3e0 mov x0, #0x5555555555555555 +0x~~~~~~~~~~~~~~~~ f90003e0 str x0, [sp] +0x~~~~~~~~~~~~~~~~ 382003e0 ldaddb w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 38a003e1 ldaddab w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 386003e2 ldaddlb w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 38e003e3 ldaddalb w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 782003e0 ldaddh w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 78a003e1 ldaddah w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 786003e2 ldaddlh w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 78e003e3 ldaddalh w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b82003e0 ldadd w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b8a003e1 ldadda w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b86003e2 ldaddl w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b8e003e3 ldaddal w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f82003e0 ldadd x0, x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f8a003e1 ldadda x0, x1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f86003e2 ldaddl x0, x2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f8e003e3 ldaddal x0, x3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 382003ff staddb w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 386003ff staddlb w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 782003ff staddh w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 786003ff staddlh w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b82003ff stadd w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b86003ff staddl w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f82003ff stadd x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f86003ff staddl x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 382033e0 ldsetb w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 38a033e1 ldsetab w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 386033e2 ldsetlb w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 38e033e3 ldsetalb w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 782033e0 ldseth w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 78a033e1 ldsetah w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 786033e2 ldsetlh w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 78e033e3 ldsetalh w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b82033e0 ldset w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b8a033e1 ldseta w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b86033e2 ldsetl w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b8e033e3 ldsetal w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f82033e0 ldset x0, x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f8a033e1 ldseta x0, x1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f86033e2 ldsetl x0, x2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f8e033e3 ldsetal x0, x3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 382033ff stsetb w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 386033ff stsetlb w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 782033ff stseth w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 786033ff stsetlh w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b82033ff stset w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b86033ff stsetl w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f82033ff stset x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f86033ff stsetl x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 382023e0 ldeorb w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 38a023e1 ldeorab w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 386023e2 ldeorlb w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 38e023e3 ldeoralb w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 782023e0 ldeorh w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 78a023e1 ldeorah w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 786023e2 ldeorlh w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 78e023e3 ldeoralh w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b82023e0 ldeor w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b8a023e1 ldeora w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b86023e2 ldeorl w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b8e023e3 ldeoral w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f82023e0 ldeor x0, x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f8a023e1 ldeora x0, x1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f86023e2 ldeorl x0, x2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f8e023e3 ldeoral x0, x3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 382023ff steorb w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 386023ff steorlb w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 782023ff steorh w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 786023ff steorlh w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b82023ff steor w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b86023ff steorl w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f82023ff steor x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f86023ff steorl x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 382053e0 ldsminb w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 38a053e1 ldsminab w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 386053e2 ldsminlb w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 38e053e3 ldsminalb w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 782053e0 ldsminh w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 78a053e1 ldsminah w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 786053e2 ldsminlh w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 78e053e3 ldsminalh w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b82053e0 ldsmin w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b8a053e1 ldsmina w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b86053e2 ldsminl w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b8e053e3 ldsminal w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f82053e0 ldsmin x0, x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f8a053e1 ldsmina x0, x1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f86053e2 ldsminl x0, x2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f8e053e3 ldsminal x0, x3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 382053ff stsminb w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 386053ff stsminlb w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 782053ff stsminh w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 786053ff stsminlh w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b82053ff stsmin w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b86053ff stsminl w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f82053ff stsmin x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f86053ff stsminl x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 382043e0 ldsmaxb w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 38a043e1 ldsmaxab w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 386043e2 ldsmaxlb w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 38e043e3 ldsmaxalb w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 782043e0 ldsmaxh w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 78a043e1 ldsmaxah w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 786043e2 ldsmaxlh w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 78e043e3 ldsmaxalh w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b82043e0 ldsmax w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b8a043e1 ldsmaxa w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b86043e2 ldsmaxl w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b8e043e3 ldsmaxal w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f82043e0 ldsmax x0, x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f8a043e1 ldsmaxa x0, x1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f86043e2 ldsmaxl x0, x2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f8e043e3 ldsmaxal x0, x3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 382043ff stsmaxb w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 386043ff stsmaxlb w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 782043ff stsmaxh w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 786043ff stsmaxlh w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b82043ff stsmax w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b86043ff stsmaxl w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f82043ff stsmax x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f86043ff stsmaxl x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 382073e0 lduminb w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 38a073e1 lduminab w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 386073e2 lduminlb w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 38e073e3 lduminalb w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 782073e0 lduminh w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 78a073e1 lduminah w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 786073e2 lduminlh w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 78e073e3 lduminalh w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b82073e0 ldumin w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b8a073e1 ldumina w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b86073e2 lduminl w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b8e073e3 lduminal w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f82073e0 ldumin x0, x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f8a073e1 ldumina x0, x1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f86073e2 lduminl x0, x2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f8e073e3 lduminal x0, x3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 382073ff stuminb w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 386073ff stuminlb w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 782073ff stuminh w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 786073ff stuminlh w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b82073ff stumin w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b86073ff stuminl w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f82073ff stumin x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f86073ff stuminl x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 382063e0 ldumaxb w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 38a063e1 ldumaxab w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 386063e2 ldumaxlb w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 38e063e3 ldumaxalb w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 782063e0 ldumaxh w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 78a063e1 ldumaxah w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 786063e2 ldumaxlh w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 78e063e3 ldumaxalh w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b82063e0 ldumax w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b8a063e1 ldumaxa w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b86063e2 ldumaxl w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b8e063e3 ldumaxal w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f82063e0 ldumax x0, x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f8a063e1 ldumaxa x0, x1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f86063e2 ldumaxl x0, x2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f8e063e3 ldumaxal x0, x3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 382063ff stumaxb w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 386063ff stumaxlb w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 782063ff stumaxh w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 786063ff stumaxlh w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b82063ff stumax w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b86063ff stumaxl w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f82063ff stumax x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f86063ff stumaxl x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 382013e0 ldclrb w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 38a013e1 ldclrab w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 386013e2 ldclrlb w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 38e013e3 ldclralb w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 782013e0 ldclrh w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 78a013e1 ldclrah w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 786013e2 ldclrlh w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 78e013e3 ldclralh w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b82013e0 ldclr w0, w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b8a013e1 ldclra w0, w1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b86013e2 ldclrl w0, w2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b8e013e3 ldclral w0, w3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f82013e0 ldclr x0, x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f8a013e1 ldclra x0, x1, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f86013e2 ldclrl x0, x2, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f8e013e3 ldclral x0, x3, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 382013ff stclrb w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 386013ff stclrlb w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 782013ff stclrh w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 786013ff stclrlh w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b82013ff stclr w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ b86013ff stclrl w0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f82013ff stclr x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ f86013ff stclrl x0, [sp] // Needs: Atomics +0x~~~~~~~~~~~~~~~~ 910043ff add sp, sp, #0x10 (16) diff --git a/test/test-trace-reference/log-cpufeatures-colour b/test/test-trace-reference/log-cpufeatures-colour index 58f04790..170f34d5 100644 --- a/test/test-trace-reference/log-cpufeatures-colour +++ b/test/test-trace-reference/log-cpufeatures-colour @@ -2292,7 +2292,7 @@ 0x~~~~~~~~~~~~~~~~ 2ea9f6bd fminp v29.2s, v21.2s, v9.2s [1;35mFP, NEON[0;m 0x~~~~~~~~~~~~~~~~ 6eb5f700 fminp v0.4s, v24.4s, v21.4s [1;35mFP, NEON[0;m 0x~~~~~~~~~~~~~~~~ 6eb0f919 fminv s25, v8.4s [1;35mFP, NEON[0;m -0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[2] [1;35mFP, NEON[0;m +0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[1] [1;35mFP, NEON[0;m 0x~~~~~~~~~~~~~~~~ 5f8711f7 fmla s23, s15, v7.s[0] [1;35mFP, NEON[0;m 0x~~~~~~~~~~~~~~~~ 4e66cd71 fmla v17.2d, v11.2d, v6.2d [1;35mFP, NEON[0;m 0x~~~~~~~~~~~~~~~~ 4fcb13de fmla v30.2d, v30.2d, v11.d[0] [1;35mFP, NEON[0;m @@ -2313,15 +2313,15 @@ 0x~~~~~~~~~~~~~~~~ 4f04f69f fmov v31.4s, #0x94 (-5.0000) [1;35mFP, NEON[0;m 0x~~~~~~~~~~~~~~~~ 9eaf033c fmov v28.D[1], x25 [1;35mFP, NEON[0;m 0x~~~~~~~~~~~~~~~~ 9eae0052 fmov x18, v2.D[1] [1;35mFP, NEON[0;m -0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[2] [1;35mFP, NEON[0;m +0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[1] [1;35mFP, NEON[0;m 0x~~~~~~~~~~~~~~~~ 5faf983e fmul s30, s1, v15.s[3] [1;35mFP, NEON[0;m 0x~~~~~~~~~~~~~~~~ 6e75dc19 fmul v25.2d, v0.2d, v21.2d [1;35mFP, NEON[0;m -0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[2] [1;35mFP, NEON[0;m +0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[1] [1;35mFP, NEON[0;m 0x~~~~~~~~~~~~~~~~ 2e30df07 fmul v7.2s, v24.2s, v16.2s [1;35mFP, NEON[0;m 0x~~~~~~~~~~~~~~~~ 0f849a01 fmul v1.2s, v16.2s, v4.s[2] [1;35mFP, NEON[0;m 0x~~~~~~~~~~~~~~~~ 6e39df85 fmul v5.4s, v28.4s, v25.4s [1;35mFP, NEON[0;m 0x~~~~~~~~~~~~~~~~ 4f88906b fmul v11.4s, v3.4s, v8.s[0] [1;35mFP, NEON[0;m -0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[2] [1;35mFP, NEON[0;m +0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[1] [1;35mFP, NEON[0;m 0x~~~~~~~~~~~~~~~~ 7faf92b9 fmulx s25, s21, v15.s[1] [1;35mFP, NEON[0;m 0x~~~~~~~~~~~~~~~~ 4e68df9f fmulx v31.2d, v28.2d, v8.2d [1;35mFP, NEON[0;m 0x~~~~~~~~~~~~~~~~ 6fc692a3 fmulx v3.2d, v21.2d, v6.d[0] [1;35mFP, NEON[0;m @@ -2449,3 +2449,199 @@ 0x~~~~~~~~~~~~~~~~ a4e1f81a ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, [x0, #4, mul vl] [1;35mSVE[0;m 0x~~~~~~~~~~~~~~~~ a562d81b ld4w {z27.s, z28.s, z29.s, z30.s}, p6/z, [x0, x2, lsl #2] [1;35mSVE[0;m 0x~~~~~~~~~~~~~~~~ a5e1f41c ld4d {z28.d, z29.d, z30.d, z31.d}, p5/z, [x0, #4, mul vl] [1;35mSVE[0;m +0x~~~~~~~~~~~~~~~~ d10043ff sub sp, sp, #0x10 (16) +0x~~~~~~~~~~~~~~~~ b200f3e0 mov x0, #0x5555555555555555 +0x~~~~~~~~~~~~~~~~ f90003e0 str x0, [sp] +0x~~~~~~~~~~~~~~~~ 382003e0 ldaddb w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 38a003e1 ldaddab w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 386003e2 ldaddlb w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 38e003e3 ldaddalb w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 782003e0 ldaddh w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 78a003e1 ldaddah w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 786003e2 ldaddlh w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 78e003e3 ldaddalh w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b82003e0 ldadd w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b8a003e1 ldadda w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b86003e2 ldaddl w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b8e003e3 ldaddal w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f82003e0 ldadd x0, x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f8a003e1 ldadda x0, x1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f86003e2 ldaddl x0, x2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f8e003e3 ldaddal x0, x3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 382003ff staddb w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 386003ff staddlb w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 782003ff staddh w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 786003ff staddlh w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b82003ff stadd w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b86003ff staddl w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f82003ff stadd x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f86003ff staddl x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 382033e0 ldsetb w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 38a033e1 ldsetab w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 386033e2 ldsetlb w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 38e033e3 ldsetalb w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 782033e0 ldseth w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 78a033e1 ldsetah w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 786033e2 ldsetlh w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 78e033e3 ldsetalh w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b82033e0 ldset w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b8a033e1 ldseta w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b86033e2 ldsetl w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b8e033e3 ldsetal w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f82033e0 ldset x0, x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f8a033e1 ldseta x0, x1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f86033e2 ldsetl x0, x2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f8e033e3 ldsetal x0, x3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 382033ff stsetb w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 386033ff stsetlb w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 782033ff stseth w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 786033ff stsetlh w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b82033ff stset w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b86033ff stsetl w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f82033ff stset x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f86033ff stsetl x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 382023e0 ldeorb w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 38a023e1 ldeorab w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 386023e2 ldeorlb w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 38e023e3 ldeoralb w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 782023e0 ldeorh w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 78a023e1 ldeorah w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 786023e2 ldeorlh w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 78e023e3 ldeoralh w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b82023e0 ldeor w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b8a023e1 ldeora w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b86023e2 ldeorl w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b8e023e3 ldeoral w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f82023e0 ldeor x0, x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f8a023e1 ldeora x0, x1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f86023e2 ldeorl x0, x2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f8e023e3 ldeoral x0, x3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 382023ff steorb w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 386023ff steorlb w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 782023ff steorh w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 786023ff steorlh w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b82023ff steor w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b86023ff steorl w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f82023ff steor x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f86023ff steorl x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 382053e0 ldsminb w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 38a053e1 ldsminab w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 386053e2 ldsminlb w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 38e053e3 ldsminalb w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 782053e0 ldsminh w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 78a053e1 ldsminah w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 786053e2 ldsminlh w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 78e053e3 ldsminalh w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b82053e0 ldsmin w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b8a053e1 ldsmina w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b86053e2 ldsminl w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b8e053e3 ldsminal w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f82053e0 ldsmin x0, x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f8a053e1 ldsmina x0, x1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f86053e2 ldsminl x0, x2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f8e053e3 ldsminal x0, x3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 382053ff stsminb w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 386053ff stsminlb w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 782053ff stsminh w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 786053ff stsminlh w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b82053ff stsmin w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b86053ff stsminl w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f82053ff stsmin x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f86053ff stsminl x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 382043e0 ldsmaxb w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 38a043e1 ldsmaxab w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 386043e2 ldsmaxlb w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 38e043e3 ldsmaxalb w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 782043e0 ldsmaxh w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 78a043e1 ldsmaxah w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 786043e2 ldsmaxlh w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 78e043e3 ldsmaxalh w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b82043e0 ldsmax w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b8a043e1 ldsmaxa w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b86043e2 ldsmaxl w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b8e043e3 ldsmaxal w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f82043e0 ldsmax x0, x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f8a043e1 ldsmaxa x0, x1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f86043e2 ldsmaxl x0, x2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f8e043e3 ldsmaxal x0, x3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 382043ff stsmaxb w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 386043ff stsmaxlb w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 782043ff stsmaxh w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 786043ff stsmaxlh w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b82043ff stsmax w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b86043ff stsmaxl w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f82043ff stsmax x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f86043ff stsmaxl x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 382073e0 lduminb w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 38a073e1 lduminab w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 386073e2 lduminlb w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 38e073e3 lduminalb w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 782073e0 lduminh w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 78a073e1 lduminah w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 786073e2 lduminlh w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 78e073e3 lduminalh w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b82073e0 ldumin w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b8a073e1 ldumina w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b86073e2 lduminl w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b8e073e3 lduminal w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f82073e0 ldumin x0, x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f8a073e1 ldumina x0, x1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f86073e2 lduminl x0, x2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f8e073e3 lduminal x0, x3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 382073ff stuminb w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 386073ff stuminlb w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 782073ff stuminh w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 786073ff stuminlh w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b82073ff stumin w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b86073ff stuminl w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f82073ff stumin x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f86073ff stuminl x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 382063e0 ldumaxb w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 38a063e1 ldumaxab w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 386063e2 ldumaxlb w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 38e063e3 ldumaxalb w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 782063e0 ldumaxh w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 78a063e1 ldumaxah w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 786063e2 ldumaxlh w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 78e063e3 ldumaxalh w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b82063e0 ldumax w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b8a063e1 ldumaxa w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b86063e2 ldumaxl w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b8e063e3 ldumaxal w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f82063e0 ldumax x0, x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f8a063e1 ldumaxa x0, x1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f86063e2 ldumaxl x0, x2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f8e063e3 ldumaxal x0, x3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 382063ff stumaxb w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 386063ff stumaxlb w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 782063ff stumaxh w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 786063ff stumaxlh w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b82063ff stumax w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b86063ff stumaxl w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f82063ff stumax x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f86063ff stumaxl x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 382013e0 ldclrb w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 38a013e1 ldclrab w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 386013e2 ldclrlb w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 38e013e3 ldclralb w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 782013e0 ldclrh w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 78a013e1 ldclrah w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 786013e2 ldclrlh w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 78e013e3 ldclralh w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b82013e0 ldclr w0, w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b8a013e1 ldclra w0, w1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b86013e2 ldclrl w0, w2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b8e013e3 ldclral w0, w3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f82013e0 ldclr x0, x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f8a013e1 ldclra x0, x1, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f86013e2 ldclrl x0, x2, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f8e013e3 ldclral x0, x3, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 382013ff stclrb w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 386013ff stclrlb w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 782013ff stclrh w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 786013ff stclrlh w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b82013ff stclr w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ b86013ff stclrl w0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f82013ff stclr x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ f86013ff stclrl x0, [sp] [1;35mAtomics[0;m +0x~~~~~~~~~~~~~~~~ 910043ff add sp, sp, #0x10 (16) diff --git a/test/test-trace-reference/log-cpufeatures-custom b/test/test-trace-reference/log-cpufeatures-custom index 1e572f28..3975ec9d 100644 --- a/test/test-trace-reference/log-cpufeatures-custom +++ b/test/test-trace-reference/log-cpufeatures-custom @@ -2292,7 +2292,7 @@ 0x~~~~~~~~~~~~~~~~ 2ea9f6bd fminp v29.2s, v21.2s, v9.2s ### {FP, NEON} ### 0x~~~~~~~~~~~~~~~~ 6eb5f700 fminp v0.4s, v24.4s, v21.4s ### {FP, NEON} ### 0x~~~~~~~~~~~~~~~~ 6eb0f919 fminv s25, v8.4s ### {FP, NEON} ### -0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[2] ### {FP, NEON} ### +0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[1] ### {FP, NEON} ### 0x~~~~~~~~~~~~~~~~ 5f8711f7 fmla s23, s15, v7.s[0] ### {FP, NEON} ### 0x~~~~~~~~~~~~~~~~ 4e66cd71 fmla v17.2d, v11.2d, v6.2d ### {FP, NEON} ### 0x~~~~~~~~~~~~~~~~ 4fcb13de fmla v30.2d, v30.2d, v11.d[0] ### {FP, NEON} ### @@ -2313,15 +2313,15 @@ 0x~~~~~~~~~~~~~~~~ 4f04f69f fmov v31.4s, #0x94 (-5.0000) ### {FP, NEON} ### 0x~~~~~~~~~~~~~~~~ 9eaf033c fmov v28.D[1], x25 ### {FP, NEON} ### 0x~~~~~~~~~~~~~~~~ 9eae0052 fmov x18, v2.D[1] ### {FP, NEON} ### -0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[2] ### {FP, NEON} ### +0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[1] ### {FP, NEON} ### 0x~~~~~~~~~~~~~~~~ 5faf983e fmul s30, s1, v15.s[3] ### {FP, NEON} ### 0x~~~~~~~~~~~~~~~~ 6e75dc19 fmul v25.2d, v0.2d, v21.2d ### {FP, NEON} ### -0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[2] ### {FP, NEON} ### +0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[1] ### {FP, NEON} ### 0x~~~~~~~~~~~~~~~~ 2e30df07 fmul v7.2s, v24.2s, v16.2s ### {FP, NEON} ### 0x~~~~~~~~~~~~~~~~ 0f849a01 fmul v1.2s, v16.2s, v4.s[2] ### {FP, NEON} ### 0x~~~~~~~~~~~~~~~~ 6e39df85 fmul v5.4s, v28.4s, v25.4s ### {FP, NEON} ### 0x~~~~~~~~~~~~~~~~ 4f88906b fmul v11.4s, v3.4s, v8.s[0] ### {FP, NEON} ### -0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[2] ### {FP, NEON} ### +0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[1] ### {FP, NEON} ### 0x~~~~~~~~~~~~~~~~ 7faf92b9 fmulx s25, s21, v15.s[1] ### {FP, NEON} ### 0x~~~~~~~~~~~~~~~~ 4e68df9f fmulx v31.2d, v28.2d, v8.2d ### {FP, NEON} ### 0x~~~~~~~~~~~~~~~~ 6fc692a3 fmulx v3.2d, v21.2d, v6.d[0] ### {FP, NEON} ### @@ -2449,3 +2449,199 @@ 0x~~~~~~~~~~~~~~~~ a4e1f81a ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, [x0, #4, mul vl] ### {SVE} ### 0x~~~~~~~~~~~~~~~~ a562d81b ld4w {z27.s, z28.s, z29.s, z30.s}, p6/z, [x0, x2, lsl #2] ### {SVE} ### 0x~~~~~~~~~~~~~~~~ a5e1f41c ld4d {z28.d, z29.d, z30.d, z31.d}, p5/z, [x0, #4, mul vl] ### {SVE} ### +0x~~~~~~~~~~~~~~~~ d10043ff sub sp, sp, #0x10 (16) +0x~~~~~~~~~~~~~~~~ b200f3e0 mov x0, #0x5555555555555555 +0x~~~~~~~~~~~~~~~~ f90003e0 str x0, [sp] +0x~~~~~~~~~~~~~~~~ 382003e0 ldaddb w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 38a003e1 ldaddab w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 386003e2 ldaddlb w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 38e003e3 ldaddalb w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 782003e0 ldaddh w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 78a003e1 ldaddah w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 786003e2 ldaddlh w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 78e003e3 ldaddalh w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b82003e0 ldadd w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b8a003e1 ldadda w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b86003e2 ldaddl w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b8e003e3 ldaddal w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f82003e0 ldadd x0, x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f8a003e1 ldadda x0, x1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f86003e2 ldaddl x0, x2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f8e003e3 ldaddal x0, x3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 382003ff staddb w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 386003ff staddlb w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 782003ff staddh w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 786003ff staddlh w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b82003ff stadd w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b86003ff staddl w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f82003ff stadd x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f86003ff staddl x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 382033e0 ldsetb w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 38a033e1 ldsetab w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 386033e2 ldsetlb w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 38e033e3 ldsetalb w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 782033e0 ldseth w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 78a033e1 ldsetah w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 786033e2 ldsetlh w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 78e033e3 ldsetalh w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b82033e0 ldset w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b8a033e1 ldseta w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b86033e2 ldsetl w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b8e033e3 ldsetal w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f82033e0 ldset x0, x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f8a033e1 ldseta x0, x1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f86033e2 ldsetl x0, x2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f8e033e3 ldsetal x0, x3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 382033ff stsetb w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 386033ff stsetlb w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 782033ff stseth w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 786033ff stsetlh w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b82033ff stset w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b86033ff stsetl w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f82033ff stset x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f86033ff stsetl x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 382023e0 ldeorb w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 38a023e1 ldeorab w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 386023e2 ldeorlb w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 38e023e3 ldeoralb w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 782023e0 ldeorh w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 78a023e1 ldeorah w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 786023e2 ldeorlh w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 78e023e3 ldeoralh w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b82023e0 ldeor w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b8a023e1 ldeora w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b86023e2 ldeorl w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b8e023e3 ldeoral w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f82023e0 ldeor x0, x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f8a023e1 ldeora x0, x1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f86023e2 ldeorl x0, x2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f8e023e3 ldeoral x0, x3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 382023ff steorb w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 386023ff steorlb w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 782023ff steorh w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 786023ff steorlh w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b82023ff steor w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b86023ff steorl w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f82023ff steor x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f86023ff steorl x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 382053e0 ldsminb w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 38a053e1 ldsminab w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 386053e2 ldsminlb w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 38e053e3 ldsminalb w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 782053e0 ldsminh w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 78a053e1 ldsminah w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 786053e2 ldsminlh w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 78e053e3 ldsminalh w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b82053e0 ldsmin w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b8a053e1 ldsmina w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b86053e2 ldsminl w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b8e053e3 ldsminal w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f82053e0 ldsmin x0, x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f8a053e1 ldsmina x0, x1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f86053e2 ldsminl x0, x2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f8e053e3 ldsminal x0, x3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 382053ff stsminb w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 386053ff stsminlb w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 782053ff stsminh w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 786053ff stsminlh w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b82053ff stsmin w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b86053ff stsminl w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f82053ff stsmin x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f86053ff stsminl x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 382043e0 ldsmaxb w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 38a043e1 ldsmaxab w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 386043e2 ldsmaxlb w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 38e043e3 ldsmaxalb w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 782043e0 ldsmaxh w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 78a043e1 ldsmaxah w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 786043e2 ldsmaxlh w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 78e043e3 ldsmaxalh w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b82043e0 ldsmax w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b8a043e1 ldsmaxa w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b86043e2 ldsmaxl w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b8e043e3 ldsmaxal w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f82043e0 ldsmax x0, x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f8a043e1 ldsmaxa x0, x1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f86043e2 ldsmaxl x0, x2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f8e043e3 ldsmaxal x0, x3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 382043ff stsmaxb w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 386043ff stsmaxlb w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 782043ff stsmaxh w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 786043ff stsmaxlh w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b82043ff stsmax w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b86043ff stsmaxl w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f82043ff stsmax x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f86043ff stsmaxl x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 382073e0 lduminb w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 38a073e1 lduminab w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 386073e2 lduminlb w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 38e073e3 lduminalb w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 782073e0 lduminh w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 78a073e1 lduminah w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 786073e2 lduminlh w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 78e073e3 lduminalh w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b82073e0 ldumin w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b8a073e1 ldumina w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b86073e2 lduminl w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b8e073e3 lduminal w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f82073e0 ldumin x0, x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f8a073e1 ldumina x0, x1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f86073e2 lduminl x0, x2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f8e073e3 lduminal x0, x3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 382073ff stuminb w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 386073ff stuminlb w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 782073ff stuminh w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 786073ff stuminlh w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b82073ff stumin w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b86073ff stuminl w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f82073ff stumin x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f86073ff stuminl x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 382063e0 ldumaxb w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 38a063e1 ldumaxab w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 386063e2 ldumaxlb w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 38e063e3 ldumaxalb w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 782063e0 ldumaxh w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 78a063e1 ldumaxah w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 786063e2 ldumaxlh w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 78e063e3 ldumaxalh w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b82063e0 ldumax w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b8a063e1 ldumaxa w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b86063e2 ldumaxl w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b8e063e3 ldumaxal w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f82063e0 ldumax x0, x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f8a063e1 ldumaxa x0, x1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f86063e2 ldumaxl x0, x2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f8e063e3 ldumaxal x0, x3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 382063ff stumaxb w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 386063ff stumaxlb w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 782063ff stumaxh w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 786063ff stumaxlh w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b82063ff stumax w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b86063ff stumaxl w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f82063ff stumax x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f86063ff stumaxl x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 382013e0 ldclrb w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 38a013e1 ldclrab w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 386013e2 ldclrlb w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 38e013e3 ldclralb w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 782013e0 ldclrh w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 78a013e1 ldclrah w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 786013e2 ldclrlh w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 78e013e3 ldclralh w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b82013e0 ldclr w0, w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b8a013e1 ldclra w0, w1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b86013e2 ldclrl w0, w2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b8e013e3 ldclral w0, w3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f82013e0 ldclr x0, x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f8a013e1 ldclra x0, x1, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f86013e2 ldclrl x0, x2, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f8e013e3 ldclral x0, x3, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 382013ff stclrb w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 386013ff stclrlb w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 782013ff stclrh w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 786013ff stclrlh w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b82013ff stclr w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ b86013ff stclrl w0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f82013ff stclr x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ f86013ff stclrl x0, [sp] ### {Atomics} ### +0x~~~~~~~~~~~~~~~~ 910043ff add sp, sp, #0x10 (16) diff --git a/test/test-trace-reference/log-disasm b/test/test-trace-reference/log-disasm index 583328a3..53f8f010 100644 --- a/test/test-trace-reference/log-disasm +++ b/test/test-trace-reference/log-disasm @@ -2293,7 +2293,7 @@ 0x~~~~~~~~~~~~~~~~ 2ea9f6bd fminp v29.2s, v21.2s, v9.2s 0x~~~~~~~~~~~~~~~~ 6eb5f700 fminp v0.4s, v24.4s, v21.4s 0x~~~~~~~~~~~~~~~~ 6eb0f919 fminv s25, v8.4s -0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[2] +0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[1] 0x~~~~~~~~~~~~~~~~ 5f8711f7 fmla s23, s15, v7.s[0] 0x~~~~~~~~~~~~~~~~ 4e66cd71 fmla v17.2d, v11.2d, v6.2d 0x~~~~~~~~~~~~~~~~ 4fcb13de fmla v30.2d, v30.2d, v11.d[0] @@ -2314,15 +2314,15 @@ 0x~~~~~~~~~~~~~~~~ 4f04f69f fmov v31.4s, #0x94 (-5.0000) 0x~~~~~~~~~~~~~~~~ 9eaf033c fmov v28.D[1], x25 0x~~~~~~~~~~~~~~~~ 9eae0052 fmov x18, v2.D[1] -0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[2] +0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[1] 0x~~~~~~~~~~~~~~~~ 5faf983e fmul s30, s1, v15.s[3] 0x~~~~~~~~~~~~~~~~ 6e75dc19 fmul v25.2d, v0.2d, v21.2d -0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[2] +0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[1] 0x~~~~~~~~~~~~~~~~ 2e30df07 fmul v7.2s, v24.2s, v16.2s 0x~~~~~~~~~~~~~~~~ 0f849a01 fmul v1.2s, v16.2s, v4.s[2] 0x~~~~~~~~~~~~~~~~ 6e39df85 fmul v5.4s, v28.4s, v25.4s 0x~~~~~~~~~~~~~~~~ 4f88906b fmul v11.4s, v3.4s, v8.s[0] -0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[2] +0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[1] 0x~~~~~~~~~~~~~~~~ 7faf92b9 fmulx s25, s21, v15.s[1] 0x~~~~~~~~~~~~~~~~ 4e68df9f fmulx v31.2d, v28.2d, v8.2d 0x~~~~~~~~~~~~~~~~ 6fc692a3 fmulx v3.2d, v21.2d, v6.d[0] @@ -2450,4 +2450,200 @@ 0x~~~~~~~~~~~~~~~~ a4e1f81a ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, [x0, #4, mul vl] 0x~~~~~~~~~~~~~~~~ a562d81b ld4w {z27.s, z28.s, z29.s, z30.s}, p6/z, [x0, x2, lsl #2] 0x~~~~~~~~~~~~~~~~ a5e1f41c ld4d {z28.d, z29.d, z30.d, z31.d}, p5/z, [x0, #4, mul vl] +0x~~~~~~~~~~~~~~~~ d10043ff sub sp, sp, #0x10 (16) +0x~~~~~~~~~~~~~~~~ b200f3e0 mov x0, #0x5555555555555555 +0x~~~~~~~~~~~~~~~~ f90003e0 str x0, [sp] +0x~~~~~~~~~~~~~~~~ 382003e0 ldaddb w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 38a003e1 ldaddab w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 386003e2 ldaddlb w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 38e003e3 ldaddalb w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ 782003e0 ldaddh w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 78a003e1 ldaddah w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 786003e2 ldaddlh w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 78e003e3 ldaddalh w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ b82003e0 ldadd w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ b8a003e1 ldadda w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ b86003e2 ldaddl w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ b8e003e3 ldaddal w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ f82003e0 ldadd x0, x0, [sp] +0x~~~~~~~~~~~~~~~~ f8a003e1 ldadda x0, x1, [sp] +0x~~~~~~~~~~~~~~~~ f86003e2 ldaddl x0, x2, [sp] +0x~~~~~~~~~~~~~~~~ f8e003e3 ldaddal x0, x3, [sp] +0x~~~~~~~~~~~~~~~~ 382003ff staddb w0, [sp] +0x~~~~~~~~~~~~~~~~ 386003ff staddlb w0, [sp] +0x~~~~~~~~~~~~~~~~ 782003ff staddh w0, [sp] +0x~~~~~~~~~~~~~~~~ 786003ff staddlh w0, [sp] +0x~~~~~~~~~~~~~~~~ b82003ff stadd w0, [sp] +0x~~~~~~~~~~~~~~~~ b86003ff staddl w0, [sp] +0x~~~~~~~~~~~~~~~~ f82003ff stadd x0, [sp] +0x~~~~~~~~~~~~~~~~ f86003ff staddl x0, [sp] +0x~~~~~~~~~~~~~~~~ 382033e0 ldsetb w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 38a033e1 ldsetab w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 386033e2 ldsetlb w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 38e033e3 ldsetalb w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ 782033e0 ldseth w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 78a033e1 ldsetah w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 786033e2 ldsetlh w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 78e033e3 ldsetalh w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ b82033e0 ldset w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ b8a033e1 ldseta w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ b86033e2 ldsetl w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ b8e033e3 ldsetal w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ f82033e0 ldset x0, x0, [sp] +0x~~~~~~~~~~~~~~~~ f8a033e1 ldseta x0, x1, [sp] +0x~~~~~~~~~~~~~~~~ f86033e2 ldsetl x0, x2, [sp] +0x~~~~~~~~~~~~~~~~ f8e033e3 ldsetal x0, x3, [sp] +0x~~~~~~~~~~~~~~~~ 382033ff stsetb w0, [sp] +0x~~~~~~~~~~~~~~~~ 386033ff stsetlb w0, [sp] +0x~~~~~~~~~~~~~~~~ 782033ff stseth w0, [sp] +0x~~~~~~~~~~~~~~~~ 786033ff stsetlh w0, [sp] +0x~~~~~~~~~~~~~~~~ b82033ff stset w0, [sp] +0x~~~~~~~~~~~~~~~~ b86033ff stsetl w0, [sp] +0x~~~~~~~~~~~~~~~~ f82033ff stset x0, [sp] +0x~~~~~~~~~~~~~~~~ f86033ff stsetl x0, [sp] +0x~~~~~~~~~~~~~~~~ 382023e0 ldeorb w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 38a023e1 ldeorab w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 386023e2 ldeorlb w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 38e023e3 ldeoralb w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ 782023e0 ldeorh w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 78a023e1 ldeorah w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 786023e2 ldeorlh w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 78e023e3 ldeoralh w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ b82023e0 ldeor w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ b8a023e1 ldeora w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ b86023e2 ldeorl w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ b8e023e3 ldeoral w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ f82023e0 ldeor x0, x0, [sp] +0x~~~~~~~~~~~~~~~~ f8a023e1 ldeora x0, x1, [sp] +0x~~~~~~~~~~~~~~~~ f86023e2 ldeorl x0, x2, [sp] +0x~~~~~~~~~~~~~~~~ f8e023e3 ldeoral x0, x3, [sp] +0x~~~~~~~~~~~~~~~~ 382023ff steorb w0, [sp] +0x~~~~~~~~~~~~~~~~ 386023ff steorlb w0, [sp] +0x~~~~~~~~~~~~~~~~ 782023ff steorh w0, [sp] +0x~~~~~~~~~~~~~~~~ 786023ff steorlh w0, [sp] +0x~~~~~~~~~~~~~~~~ b82023ff steor w0, [sp] +0x~~~~~~~~~~~~~~~~ b86023ff steorl w0, [sp] +0x~~~~~~~~~~~~~~~~ f82023ff steor x0, [sp] +0x~~~~~~~~~~~~~~~~ f86023ff steorl x0, [sp] +0x~~~~~~~~~~~~~~~~ 382053e0 ldsminb w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 38a053e1 ldsminab w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 386053e2 ldsminlb w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 38e053e3 ldsminalb w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ 782053e0 ldsminh w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 78a053e1 ldsminah w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 786053e2 ldsminlh w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 78e053e3 ldsminalh w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ b82053e0 ldsmin w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ b8a053e1 ldsmina w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ b86053e2 ldsminl w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ b8e053e3 ldsminal w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ f82053e0 ldsmin x0, x0, [sp] +0x~~~~~~~~~~~~~~~~ f8a053e1 ldsmina x0, x1, [sp] +0x~~~~~~~~~~~~~~~~ f86053e2 ldsminl x0, x2, [sp] +0x~~~~~~~~~~~~~~~~ f8e053e3 ldsminal x0, x3, [sp] +0x~~~~~~~~~~~~~~~~ 382053ff stsminb w0, [sp] +0x~~~~~~~~~~~~~~~~ 386053ff stsminlb w0, [sp] +0x~~~~~~~~~~~~~~~~ 782053ff stsminh w0, [sp] +0x~~~~~~~~~~~~~~~~ 786053ff stsminlh w0, [sp] +0x~~~~~~~~~~~~~~~~ b82053ff stsmin w0, [sp] +0x~~~~~~~~~~~~~~~~ b86053ff stsminl w0, [sp] +0x~~~~~~~~~~~~~~~~ f82053ff stsmin x0, [sp] +0x~~~~~~~~~~~~~~~~ f86053ff stsminl x0, [sp] +0x~~~~~~~~~~~~~~~~ 382043e0 ldsmaxb w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 38a043e1 ldsmaxab w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 386043e2 ldsmaxlb w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 38e043e3 ldsmaxalb w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ 782043e0 ldsmaxh w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 78a043e1 ldsmaxah w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 786043e2 ldsmaxlh w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 78e043e3 ldsmaxalh w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ b82043e0 ldsmax w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ b8a043e1 ldsmaxa w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ b86043e2 ldsmaxl w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ b8e043e3 ldsmaxal w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ f82043e0 ldsmax x0, x0, [sp] +0x~~~~~~~~~~~~~~~~ f8a043e1 ldsmaxa x0, x1, [sp] +0x~~~~~~~~~~~~~~~~ f86043e2 ldsmaxl x0, x2, [sp] +0x~~~~~~~~~~~~~~~~ f8e043e3 ldsmaxal x0, x3, [sp] +0x~~~~~~~~~~~~~~~~ 382043ff stsmaxb w0, [sp] +0x~~~~~~~~~~~~~~~~ 386043ff stsmaxlb w0, [sp] +0x~~~~~~~~~~~~~~~~ 782043ff stsmaxh w0, [sp] +0x~~~~~~~~~~~~~~~~ 786043ff stsmaxlh w0, [sp] +0x~~~~~~~~~~~~~~~~ b82043ff stsmax w0, [sp] +0x~~~~~~~~~~~~~~~~ b86043ff stsmaxl w0, [sp] +0x~~~~~~~~~~~~~~~~ f82043ff stsmax x0, [sp] +0x~~~~~~~~~~~~~~~~ f86043ff stsmaxl x0, [sp] +0x~~~~~~~~~~~~~~~~ 382073e0 lduminb w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 38a073e1 lduminab w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 386073e2 lduminlb w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 38e073e3 lduminalb w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ 782073e0 lduminh w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 78a073e1 lduminah w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 786073e2 lduminlh w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 78e073e3 lduminalh w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ b82073e0 ldumin w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ b8a073e1 ldumina w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ b86073e2 lduminl w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ b8e073e3 lduminal w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ f82073e0 ldumin x0, x0, [sp] +0x~~~~~~~~~~~~~~~~ f8a073e1 ldumina x0, x1, [sp] +0x~~~~~~~~~~~~~~~~ f86073e2 lduminl x0, x2, [sp] +0x~~~~~~~~~~~~~~~~ f8e073e3 lduminal x0, x3, [sp] +0x~~~~~~~~~~~~~~~~ 382073ff stuminb w0, [sp] +0x~~~~~~~~~~~~~~~~ 386073ff stuminlb w0, [sp] +0x~~~~~~~~~~~~~~~~ 782073ff stuminh w0, [sp] +0x~~~~~~~~~~~~~~~~ 786073ff stuminlh w0, [sp] +0x~~~~~~~~~~~~~~~~ b82073ff stumin w0, [sp] +0x~~~~~~~~~~~~~~~~ b86073ff stuminl w0, [sp] +0x~~~~~~~~~~~~~~~~ f82073ff stumin x0, [sp] +0x~~~~~~~~~~~~~~~~ f86073ff stuminl x0, [sp] +0x~~~~~~~~~~~~~~~~ 382063e0 ldumaxb w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 38a063e1 ldumaxab w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 386063e2 ldumaxlb w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 38e063e3 ldumaxalb w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ 782063e0 ldumaxh w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 78a063e1 ldumaxah w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 786063e2 ldumaxlh w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 78e063e3 ldumaxalh w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ b82063e0 ldumax w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ b8a063e1 ldumaxa w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ b86063e2 ldumaxl w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ b8e063e3 ldumaxal w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ f82063e0 ldumax x0, x0, [sp] +0x~~~~~~~~~~~~~~~~ f8a063e1 ldumaxa x0, x1, [sp] +0x~~~~~~~~~~~~~~~~ f86063e2 ldumaxl x0, x2, [sp] +0x~~~~~~~~~~~~~~~~ f8e063e3 ldumaxal x0, x3, [sp] +0x~~~~~~~~~~~~~~~~ 382063ff stumaxb w0, [sp] +0x~~~~~~~~~~~~~~~~ 386063ff stumaxlb w0, [sp] +0x~~~~~~~~~~~~~~~~ 782063ff stumaxh w0, [sp] +0x~~~~~~~~~~~~~~~~ 786063ff stumaxlh w0, [sp] +0x~~~~~~~~~~~~~~~~ b82063ff stumax w0, [sp] +0x~~~~~~~~~~~~~~~~ b86063ff stumaxl w0, [sp] +0x~~~~~~~~~~~~~~~~ f82063ff stumax x0, [sp] +0x~~~~~~~~~~~~~~~~ f86063ff stumaxl x0, [sp] +0x~~~~~~~~~~~~~~~~ 382013e0 ldclrb w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 38a013e1 ldclrab w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 386013e2 ldclrlb w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 38e013e3 ldclralb w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ 782013e0 ldclrh w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 78a013e1 ldclrah w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 786013e2 ldclrlh w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 78e013e3 ldclralh w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ b82013e0 ldclr w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ b8a013e1 ldclra w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ b86013e2 ldclrl w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ b8e013e3 ldclral w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ f82013e0 ldclr x0, x0, [sp] +0x~~~~~~~~~~~~~~~~ f8a013e1 ldclra x0, x1, [sp] +0x~~~~~~~~~~~~~~~~ f86013e2 ldclrl x0, x2, [sp] +0x~~~~~~~~~~~~~~~~ f8e013e3 ldclral x0, x3, [sp] +0x~~~~~~~~~~~~~~~~ 382013ff stclrb w0, [sp] +0x~~~~~~~~~~~~~~~~ 386013ff stclrlb w0, [sp] +0x~~~~~~~~~~~~~~~~ 782013ff stclrh w0, [sp] +0x~~~~~~~~~~~~~~~~ 786013ff stclrlh w0, [sp] +0x~~~~~~~~~~~~~~~~ b82013ff stclr w0, [sp] +0x~~~~~~~~~~~~~~~~ b86013ff stclrl w0, [sp] +0x~~~~~~~~~~~~~~~~ f82013ff stclr x0, [sp] +0x~~~~~~~~~~~~~~~~ f86013ff stclrl x0, [sp] +0x~~~~~~~~~~~~~~~~ 910043ff add sp, sp, #0x10 (16) 0x~~~~~~~~~~~~~~~~ d65f03c0 ret diff --git a/test/test-trace-reference/log-disasm-colour b/test/test-trace-reference/log-disasm-colour index 583328a3..53f8f010 100644 --- a/test/test-trace-reference/log-disasm-colour +++ b/test/test-trace-reference/log-disasm-colour @@ -2293,7 +2293,7 @@ 0x~~~~~~~~~~~~~~~~ 2ea9f6bd fminp v29.2s, v21.2s, v9.2s 0x~~~~~~~~~~~~~~~~ 6eb5f700 fminp v0.4s, v24.4s, v21.4s 0x~~~~~~~~~~~~~~~~ 6eb0f919 fminv s25, v8.4s -0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[2] +0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[1] 0x~~~~~~~~~~~~~~~~ 5f8711f7 fmla s23, s15, v7.s[0] 0x~~~~~~~~~~~~~~~~ 4e66cd71 fmla v17.2d, v11.2d, v6.2d 0x~~~~~~~~~~~~~~~~ 4fcb13de fmla v30.2d, v30.2d, v11.d[0] @@ -2314,15 +2314,15 @@ 0x~~~~~~~~~~~~~~~~ 4f04f69f fmov v31.4s, #0x94 (-5.0000) 0x~~~~~~~~~~~~~~~~ 9eaf033c fmov v28.D[1], x25 0x~~~~~~~~~~~~~~~~ 9eae0052 fmov x18, v2.D[1] -0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[2] +0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[1] 0x~~~~~~~~~~~~~~~~ 5faf983e fmul s30, s1, v15.s[3] 0x~~~~~~~~~~~~~~~~ 6e75dc19 fmul v25.2d, v0.2d, v21.2d -0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[2] +0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[1] 0x~~~~~~~~~~~~~~~~ 2e30df07 fmul v7.2s, v24.2s, v16.2s 0x~~~~~~~~~~~~~~~~ 0f849a01 fmul v1.2s, v16.2s, v4.s[2] 0x~~~~~~~~~~~~~~~~ 6e39df85 fmul v5.4s, v28.4s, v25.4s 0x~~~~~~~~~~~~~~~~ 4f88906b fmul v11.4s, v3.4s, v8.s[0] -0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[2] +0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[1] 0x~~~~~~~~~~~~~~~~ 7faf92b9 fmulx s25, s21, v15.s[1] 0x~~~~~~~~~~~~~~~~ 4e68df9f fmulx v31.2d, v28.2d, v8.2d 0x~~~~~~~~~~~~~~~~ 6fc692a3 fmulx v3.2d, v21.2d, v6.d[0] @@ -2450,4 +2450,200 @@ 0x~~~~~~~~~~~~~~~~ a4e1f81a ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, [x0, #4, mul vl] 0x~~~~~~~~~~~~~~~~ a562d81b ld4w {z27.s, z28.s, z29.s, z30.s}, p6/z, [x0, x2, lsl #2] 0x~~~~~~~~~~~~~~~~ a5e1f41c ld4d {z28.d, z29.d, z30.d, z31.d}, p5/z, [x0, #4, mul vl] +0x~~~~~~~~~~~~~~~~ d10043ff sub sp, sp, #0x10 (16) +0x~~~~~~~~~~~~~~~~ b200f3e0 mov x0, #0x5555555555555555 +0x~~~~~~~~~~~~~~~~ f90003e0 str x0, [sp] +0x~~~~~~~~~~~~~~~~ 382003e0 ldaddb w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 38a003e1 ldaddab w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 386003e2 ldaddlb w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 38e003e3 ldaddalb w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ 782003e0 ldaddh w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 78a003e1 ldaddah w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 786003e2 ldaddlh w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 78e003e3 ldaddalh w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ b82003e0 ldadd w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ b8a003e1 ldadda w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ b86003e2 ldaddl w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ b8e003e3 ldaddal w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ f82003e0 ldadd x0, x0, [sp] +0x~~~~~~~~~~~~~~~~ f8a003e1 ldadda x0, x1, [sp] +0x~~~~~~~~~~~~~~~~ f86003e2 ldaddl x0, x2, [sp] +0x~~~~~~~~~~~~~~~~ f8e003e3 ldaddal x0, x3, [sp] +0x~~~~~~~~~~~~~~~~ 382003ff staddb w0, [sp] +0x~~~~~~~~~~~~~~~~ 386003ff staddlb w0, [sp] +0x~~~~~~~~~~~~~~~~ 782003ff staddh w0, [sp] +0x~~~~~~~~~~~~~~~~ 786003ff staddlh w0, [sp] +0x~~~~~~~~~~~~~~~~ b82003ff stadd w0, [sp] +0x~~~~~~~~~~~~~~~~ b86003ff staddl w0, [sp] +0x~~~~~~~~~~~~~~~~ f82003ff stadd x0, [sp] +0x~~~~~~~~~~~~~~~~ f86003ff staddl x0, [sp] +0x~~~~~~~~~~~~~~~~ 382033e0 ldsetb w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 38a033e1 ldsetab w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 386033e2 ldsetlb w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 38e033e3 ldsetalb w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ 782033e0 ldseth w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 78a033e1 ldsetah w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 786033e2 ldsetlh w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 78e033e3 ldsetalh w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ b82033e0 ldset w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ b8a033e1 ldseta w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ b86033e2 ldsetl w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ b8e033e3 ldsetal w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ f82033e0 ldset x0, x0, [sp] +0x~~~~~~~~~~~~~~~~ f8a033e1 ldseta x0, x1, [sp] +0x~~~~~~~~~~~~~~~~ f86033e2 ldsetl x0, x2, [sp] +0x~~~~~~~~~~~~~~~~ f8e033e3 ldsetal x0, x3, [sp] +0x~~~~~~~~~~~~~~~~ 382033ff stsetb w0, [sp] +0x~~~~~~~~~~~~~~~~ 386033ff stsetlb w0, [sp] +0x~~~~~~~~~~~~~~~~ 782033ff stseth w0, [sp] +0x~~~~~~~~~~~~~~~~ 786033ff stsetlh w0, [sp] +0x~~~~~~~~~~~~~~~~ b82033ff stset w0, [sp] +0x~~~~~~~~~~~~~~~~ b86033ff stsetl w0, [sp] +0x~~~~~~~~~~~~~~~~ f82033ff stset x0, [sp] +0x~~~~~~~~~~~~~~~~ f86033ff stsetl x0, [sp] +0x~~~~~~~~~~~~~~~~ 382023e0 ldeorb w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 38a023e1 ldeorab w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 386023e2 ldeorlb w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 38e023e3 ldeoralb w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ 782023e0 ldeorh w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 78a023e1 ldeorah w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 786023e2 ldeorlh w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 78e023e3 ldeoralh w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ b82023e0 ldeor w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ b8a023e1 ldeora w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ b86023e2 ldeorl w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ b8e023e3 ldeoral w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ f82023e0 ldeor x0, x0, [sp] +0x~~~~~~~~~~~~~~~~ f8a023e1 ldeora x0, x1, [sp] +0x~~~~~~~~~~~~~~~~ f86023e2 ldeorl x0, x2, [sp] +0x~~~~~~~~~~~~~~~~ f8e023e3 ldeoral x0, x3, [sp] +0x~~~~~~~~~~~~~~~~ 382023ff steorb w0, [sp] +0x~~~~~~~~~~~~~~~~ 386023ff steorlb w0, [sp] +0x~~~~~~~~~~~~~~~~ 782023ff steorh w0, [sp] +0x~~~~~~~~~~~~~~~~ 786023ff steorlh w0, [sp] +0x~~~~~~~~~~~~~~~~ b82023ff steor w0, [sp] +0x~~~~~~~~~~~~~~~~ b86023ff steorl w0, [sp] +0x~~~~~~~~~~~~~~~~ f82023ff steor x0, [sp] +0x~~~~~~~~~~~~~~~~ f86023ff steorl x0, [sp] +0x~~~~~~~~~~~~~~~~ 382053e0 ldsminb w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 38a053e1 ldsminab w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 386053e2 ldsminlb w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 38e053e3 ldsminalb w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ 782053e0 ldsminh w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 78a053e1 ldsminah w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 786053e2 ldsminlh w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 78e053e3 ldsminalh w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ b82053e0 ldsmin w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ b8a053e1 ldsmina w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ b86053e2 ldsminl w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ b8e053e3 ldsminal w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ f82053e0 ldsmin x0, x0, [sp] +0x~~~~~~~~~~~~~~~~ f8a053e1 ldsmina x0, x1, [sp] +0x~~~~~~~~~~~~~~~~ f86053e2 ldsminl x0, x2, [sp] +0x~~~~~~~~~~~~~~~~ f8e053e3 ldsminal x0, x3, [sp] +0x~~~~~~~~~~~~~~~~ 382053ff stsminb w0, [sp] +0x~~~~~~~~~~~~~~~~ 386053ff stsminlb w0, [sp] +0x~~~~~~~~~~~~~~~~ 782053ff stsminh w0, [sp] +0x~~~~~~~~~~~~~~~~ 786053ff stsminlh w0, [sp] +0x~~~~~~~~~~~~~~~~ b82053ff stsmin w0, [sp] +0x~~~~~~~~~~~~~~~~ b86053ff stsminl w0, [sp] +0x~~~~~~~~~~~~~~~~ f82053ff stsmin x0, [sp] +0x~~~~~~~~~~~~~~~~ f86053ff stsminl x0, [sp] +0x~~~~~~~~~~~~~~~~ 382043e0 ldsmaxb w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 38a043e1 ldsmaxab w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 386043e2 ldsmaxlb w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 38e043e3 ldsmaxalb w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ 782043e0 ldsmaxh w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 78a043e1 ldsmaxah w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 786043e2 ldsmaxlh w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 78e043e3 ldsmaxalh w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ b82043e0 ldsmax w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ b8a043e1 ldsmaxa w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ b86043e2 ldsmaxl w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ b8e043e3 ldsmaxal w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ f82043e0 ldsmax x0, x0, [sp] +0x~~~~~~~~~~~~~~~~ f8a043e1 ldsmaxa x0, x1, [sp] +0x~~~~~~~~~~~~~~~~ f86043e2 ldsmaxl x0, x2, [sp] +0x~~~~~~~~~~~~~~~~ f8e043e3 ldsmaxal x0, x3, [sp] +0x~~~~~~~~~~~~~~~~ 382043ff stsmaxb w0, [sp] +0x~~~~~~~~~~~~~~~~ 386043ff stsmaxlb w0, [sp] +0x~~~~~~~~~~~~~~~~ 782043ff stsmaxh w0, [sp] +0x~~~~~~~~~~~~~~~~ 786043ff stsmaxlh w0, [sp] +0x~~~~~~~~~~~~~~~~ b82043ff stsmax w0, [sp] +0x~~~~~~~~~~~~~~~~ b86043ff stsmaxl w0, [sp] +0x~~~~~~~~~~~~~~~~ f82043ff stsmax x0, [sp] +0x~~~~~~~~~~~~~~~~ f86043ff stsmaxl x0, [sp] +0x~~~~~~~~~~~~~~~~ 382073e0 lduminb w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 38a073e1 lduminab w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 386073e2 lduminlb w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 38e073e3 lduminalb w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ 782073e0 lduminh w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 78a073e1 lduminah w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 786073e2 lduminlh w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 78e073e3 lduminalh w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ b82073e0 ldumin w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ b8a073e1 ldumina w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ b86073e2 lduminl w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ b8e073e3 lduminal w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ f82073e0 ldumin x0, x0, [sp] +0x~~~~~~~~~~~~~~~~ f8a073e1 ldumina x0, x1, [sp] +0x~~~~~~~~~~~~~~~~ f86073e2 lduminl x0, x2, [sp] +0x~~~~~~~~~~~~~~~~ f8e073e3 lduminal x0, x3, [sp] +0x~~~~~~~~~~~~~~~~ 382073ff stuminb w0, [sp] +0x~~~~~~~~~~~~~~~~ 386073ff stuminlb w0, [sp] +0x~~~~~~~~~~~~~~~~ 782073ff stuminh w0, [sp] +0x~~~~~~~~~~~~~~~~ 786073ff stuminlh w0, [sp] +0x~~~~~~~~~~~~~~~~ b82073ff stumin w0, [sp] +0x~~~~~~~~~~~~~~~~ b86073ff stuminl w0, [sp] +0x~~~~~~~~~~~~~~~~ f82073ff stumin x0, [sp] +0x~~~~~~~~~~~~~~~~ f86073ff stuminl x0, [sp] +0x~~~~~~~~~~~~~~~~ 382063e0 ldumaxb w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 38a063e1 ldumaxab w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 386063e2 ldumaxlb w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 38e063e3 ldumaxalb w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ 782063e0 ldumaxh w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 78a063e1 ldumaxah w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 786063e2 ldumaxlh w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 78e063e3 ldumaxalh w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ b82063e0 ldumax w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ b8a063e1 ldumaxa w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ b86063e2 ldumaxl w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ b8e063e3 ldumaxal w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ f82063e0 ldumax x0, x0, [sp] +0x~~~~~~~~~~~~~~~~ f8a063e1 ldumaxa x0, x1, [sp] +0x~~~~~~~~~~~~~~~~ f86063e2 ldumaxl x0, x2, [sp] +0x~~~~~~~~~~~~~~~~ f8e063e3 ldumaxal x0, x3, [sp] +0x~~~~~~~~~~~~~~~~ 382063ff stumaxb w0, [sp] +0x~~~~~~~~~~~~~~~~ 386063ff stumaxlb w0, [sp] +0x~~~~~~~~~~~~~~~~ 782063ff stumaxh w0, [sp] +0x~~~~~~~~~~~~~~~~ 786063ff stumaxlh w0, [sp] +0x~~~~~~~~~~~~~~~~ b82063ff stumax w0, [sp] +0x~~~~~~~~~~~~~~~~ b86063ff stumaxl w0, [sp] +0x~~~~~~~~~~~~~~~~ f82063ff stumax x0, [sp] +0x~~~~~~~~~~~~~~~~ f86063ff stumaxl x0, [sp] +0x~~~~~~~~~~~~~~~~ 382013e0 ldclrb w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 38a013e1 ldclrab w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 386013e2 ldclrlb w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 38e013e3 ldclralb w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ 782013e0 ldclrh w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ 78a013e1 ldclrah w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ 786013e2 ldclrlh w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ 78e013e3 ldclralh w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ b82013e0 ldclr w0, w0, [sp] +0x~~~~~~~~~~~~~~~~ b8a013e1 ldclra w0, w1, [sp] +0x~~~~~~~~~~~~~~~~ b86013e2 ldclrl w0, w2, [sp] +0x~~~~~~~~~~~~~~~~ b8e013e3 ldclral w0, w3, [sp] +0x~~~~~~~~~~~~~~~~ f82013e0 ldclr x0, x0, [sp] +0x~~~~~~~~~~~~~~~~ f8a013e1 ldclra x0, x1, [sp] +0x~~~~~~~~~~~~~~~~ f86013e2 ldclrl x0, x2, [sp] +0x~~~~~~~~~~~~~~~~ f8e013e3 ldclral x0, x3, [sp] +0x~~~~~~~~~~~~~~~~ 382013ff stclrb w0, [sp] +0x~~~~~~~~~~~~~~~~ 386013ff stclrlb w0, [sp] +0x~~~~~~~~~~~~~~~~ 782013ff stclrh w0, [sp] +0x~~~~~~~~~~~~~~~~ 786013ff stclrlh w0, [sp] +0x~~~~~~~~~~~~~~~~ b82013ff stclr w0, [sp] +0x~~~~~~~~~~~~~~~~ b86013ff stclrl w0, [sp] +0x~~~~~~~~~~~~~~~~ f82013ff stclr x0, [sp] +0x~~~~~~~~~~~~~~~~ f86013ff stclrl x0, [sp] +0x~~~~~~~~~~~~~~~~ 910043ff add sp, sp, #0x10 (16) 0x~~~~~~~~~~~~~~~~ d65f03c0 ret diff --git a/test/test-trace-reference/log-regs b/test/test-trace-reference/log-regs index 0894e3d3..c350f523 100644 --- a/test/test-trace-reference/log-regs +++ b/test/test-trace-reference/log-regs @@ -792,3 +792,294 @@ # x1: 0x~~~~~~~~~~~~~~~~ # lr: 0x0000000000000000 # x18: 0x0000000000000000 +# sp: 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ +# w0: 0x00000055 +# ╙─ 0x55 <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x000000aa +# ╙─ 0xaa <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x000000ff +# ╙─ 0xff <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00000054 +# ╙─ 0x54 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x000055a9 +# ╙─ 0x55a9 <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x000055fe +# ╙─ 0x55fe <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000aba7 +# ╙─ 0xaba7 <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00000150 +# ╙─ 0x0150 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x555556f9 <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x5555aca2 <- 0x~~~~~~~~~~~~~~~~ +# w2: 0xaaab039b <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00005a94 <- 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x3: 0x0000000055566ba0 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x2d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0xba <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x1d47 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0xced4 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x00000095 +# ╙─ 0x95 <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000469d +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000469d +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000469d +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000469d +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0003469d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0003469d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0003469d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0003469d <- 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x3: 0x000000000003469d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000469d +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x00004600 +# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00004600 +# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0003009d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x00034600 <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000469d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00034600 <- 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x3: 0x0000000000034600 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0xffffff9d +# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0xffffff9d +# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0xffffff9d +# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0003ff9d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x3: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# sp: 0x~~~~~~~~~~~~~~~~ diff --git a/test/test-trace-reference/log-regs-colour b/test/test-trace-reference/log-regs-colour index 0df9f2e5..55839980 100644 --- a/test/test-trace-reference/log-regs-colour +++ b/test/test-trace-reference/log-regs-colour @@ -792,3 +792,294 @@ # [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m # [1;36m lr: [0;36m0x0000000000000000[0;m # [1;36m x18: [0;36m0x0000000000000000[0;m +# [1;36m sp: [0;36m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x00000055[0;m +# ╙─ 0x55 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x000000aa[0;m +# ╙─ 0xaa <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x000000ff[0;m +# ╙─ 0xff <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00000054[0;m +# ╙─ 0x54 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x000055a9[0;m +# ╙─ 0x55a9 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x000055fe[0;m +# ╙─ 0x55fe <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000aba7[0;m +# ╙─ 0xaba7 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00000150[0;m +# ╙─ 0x0150 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x555556f9[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x5555aca2[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0xaaab039b[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00005a94[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x3: [0;36m0x0000000055566ba0[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x2d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0xba <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x1d47 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0xced4 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x00000095[0;m +# ╙─ 0x95 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000469d[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000469d[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000469d[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000469d[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0003469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0003469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0003469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0003469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x3: [0;36m0x000000000003469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000469d[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x00004600[0;m +# ╙─ 0x4600 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00004600[0;m +# ╙─ 0x4600 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0003009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x00034600[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00034600[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x3: [0;36m0x0000000000034600[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x4600 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0xffffff9d[0;m +# ╙─ 0xff9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0xffffff9d[0;m +# ╙─ 0xff9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0xffffff9d[0;m +# ╙─ 0xff9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0003ff9d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x3: [0;36m0x000000000000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x3: [0;36m0x000000000000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x3: [0;36m0x000000000000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x3: [0;36m0x000000000000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x3: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m sp: [0;36m0x~~~~~~~~~~~~~~~~[0;m diff --git a/test/test-trace-reference/log-state b/test/test-trace-reference/log-state index 8113a01d..0b5069dc 100644 --- a/test/test-trace-reference/log-state +++ b/test/test-trace-reference/log-state @@ -861,8 +861,8 @@ # d5: 0x3f90000000000000 # d16: 0x0000000000000000 # d15: 0x0000000000000000 -# v7: 0x00000000000000007ff000004e81442e -# v8: 0x0000000000000000c004000000000000 +# v7: 0x0000000000000000000000004e81442e +# v8: 0x00000000000000000000000000000000 # s29: 0x00000000 # s15: 0x49800000 # s27: 0x00000000 @@ -873,14 +873,14 @@ # d3: 0x0000000000000000 # d28: 0x41dfffffffc00000 # d27: 0x0000000000000000 -# v11: 0x0000000000000000400000004f7fe000 -# v0: 0x000000000000000043d21c00480a8294 +# v11: 0x0000000000000000000000004f7fe000 +# v0: 0x000000000000000000000000480a8294 # s20: 0x00000000 # s21: 0x00000000 # s6: 0x5f000000 # s7: 0x38ff0000 -# v19: 0x000000000000000043d21c00480a8294 -# v16: 0x0000000000000000400000004f7f2000 +# v19: 0x000000000000000000000000480a8294 +# v16: 0x0000000000000000000000004f7f2000 # v0: 0x00000000000000000000000000000000 # v27: 0x00000000000000000000000000000000 # v21: 0x00000000000000000000000000000000 @@ -890,59 +890,59 @@ # v10: 0x00000000000000003f0000007f600000 # v31: 0x495000018a83940149500000d3029400 # v10: 0x495000018a83940168a6954c14cfd693 -# v15: 0x00000000000000006328b14b89d7c527 +# v15: 0x00000000000000001f56954b89d7c527 # v27: 0x0000000000000000495000000a029400 # v25: 0x000000000000000041efffffffc00000 # v13: 0x00000000000000000000000037feffff # v4: 0x00000000000000000000000000000000 -# v10: 0x000000000000000000000000827f4696 +# v10: 0x0000000000000000000000003ead2a96 # v10: 0x00000000000000000000000000003c7f # v31: 0x0000000000000000000000000000ffff # v16: 0x00000000000000000000000000000000 # v0: 0x00000000000000000000000000000000 # v31: 0x000000000000b87f000000000000ffff -# v14: 0x000000000000000043d21c00480a8294 -# v3: 0x0000000020febf0000000000c4000000 +# v14: 0x000000000000000000000000480a8294 +# v3: 0x0000000020febf000000000000000000 # v8: 0x000000007f8000003effffffffe00000 # v22: 0x00000000000000003c7fffff00000000 -# v29: 0x00000000000000005fd2ca9e00000000 +# v29: 0x00000000000000000000ca9e00000000 # v30: 0x0000000000000000000000003c7fffff # v12: 0x0000000000000000000037000000bbfe # v17: 0x000000003700bbfe00007f803efeffe0 # v27: 0x00000000000000000000000000000075 # v12: 0x00000000000000000000000000000000 # v27: 0x00000000000000000000000000003c7e -# v19: 0x00000000000000000000000000002a70 +# v19: 0x0000000000000000000000000000ca9e # v14: 0x00000000000000000000000000003c7e # v10: 0x00000000000000000000000000000000 # v5: 0x00000000000000000000000000000000 -# v26: 0x0000000020febf0000000000c4000000 +# v26: 0x0000000020febf000000000000000000 # v7: 0x000000000000000000000000381b0000 # v28: 0x000000000000000040dfdcffdcc00000 -# v29: 0x00000000000000005fd2ca1200000000 +# v29: 0x00000000000000000000ca1200000000 # v12: 0x0000000000000000000000000000ffff # v18: 0x000000000000000000000000ff67ff67 # v12: 0x00000000007ebf000000000000000000 # v2: 0x0000000000000000495000008a828000 -# v8: 0x000000007f8000003effffffcc000000 +# v8: 0x000000007f8000003effffffc8000000 # v5: 0x00000000000000000000000000000000 # v9: 0x0100000008009801010000000800dc00 -# v14: 0x000000000000000000000000c4000000 +# v14: 0x00000000000000000000000000000000 # v29: 0x07070707070707070707070707070707 # v21: 0x00000000000000000000001f0000001f # v1: 0x0000000000000000000f000f000f000f # v27: 0x0000001f0000001f0000001f0000001f # v19: 0x00000000000000000707070707070707 -# v15: 0x000f000f000f000f000f000f0001000f +# v15: 0x000f000f000f000f000f000f000f000f # v1: 0x08080808080808080808080808080808 # v27: 0x00000000000000000000001100000002 # v9: 0x00000000000000000007001000040000 -# v31: 0x0000000c0000000c0000000c0000000f +# v31: 0x0000000c0000000c0000000c0000000c # v14: 0x00000000000000000505050505050505 -# v6: 0x00100010001000100001001000010000 +# v6: 0x00100010001000100010001000010000 # v18: 0x00000000000000000000000000000000 # v14: 0x00000000000000000000000000000000 -# v19: 0xffffffff000000ff0000000000ffffff +# v19: 0xffffffff000000ff00000000ffffffff # v15: 0xffffffffffffffffff00ff00ff00ffff # v12: 0xffffffffffffffffffffffffffffffff # v8: 0xffffffffffffffff0000000000000000 @@ -962,11 +962,11 @@ # v22: 0xffffffffffffffffffffffffffff0000 # v28: 0x0000000000000000ffffffffffffffff # v6: 0xffffffffffffffffffffffffffffffff -# v25: 0x000000000000000000000000ffffffff +# v25: 0x00000000000000000000000000000000 # v21: 0x0000000000000000ffffffffffffffff -# v16: 0x0000000000000000ffffffff0000ffff +# v16: 0x0000000000000000ffffffffffffffff # v23: 0x0000000000000000ffffffffffffffff -# v7: 0xffffffffffffffff0000000000000000 +# v7: 0xffffffffffffffffffffffff00000000 # v0: 0x00000000000000000000000000000000 # v10: 0x0000000000000000ff00ff00ffff0000 # v21: 0x0000000000000000ffffffffffffffff @@ -982,9 +982,9 @@ # v12: 0x00000000000000000000000000000000 # v28: 0x0000000000000000000000000000ffff # v22: 0x00000000000000000000000000000000 -# v5: 0x0000000000000000ffffffffffffffff +# v5: 0x000000000000000000000000ffffffff # v13: 0x00000000000000000000000000000000 -# v27: 0x0000000000000000000000ff000000ff +# v27: 0x0000000000000000ffffffff000000ff # v5: 0x00000000000000000000000000000000 # v22: 0x00000000000000000000000000000000 # v6: 0x00000000000000000000000000000000 @@ -995,7 +995,7 @@ # v31: 0x00000000000000000000000000000000 # v9: 0x00000000000000000000000000000000 # v7: 0x00000000000000000000000000000000 -# v11: 0x0000000000000000ffffffff00000000 +# v11: 0x0000000000000000ffffffffffff0000 # v1: 0x00000000000000000000000000000000 # v21: 0xffffffffffffffffffffffffffffffff # v8: 0xffffffffffffffffffffffffffffffff @@ -1016,9 +1016,9 @@ # v7: 0x0000000000000000ffffffffffffffff # v7: 0x0000000000000000ffffffffffffffff # v25: 0x00000000000000000000000000000000 -# v0: 0x0000000000000000ffffffff00000000 +# v0: 0x0000000000000000ffffffffffff0000 # v24: 0x00000000000000000000000000000000 -# v26: 0x0000000000000000ffffffff00000000 +# v26: 0x0000000000000000ffffffffffff0000 # v1: 0x0000000000000000ffffffffffffffff # v28: 0x0000000000000000ffffffffffffffff # v26: 0x00000000000000000000000000000000 @@ -5088,7 +5088,7 @@ # v26: 0x00000000000000000000000000000000 # v21: 0x00000000000000000000000000000000 # v11: 0x00000000000000000000000000000000 -# v8: 0x37a00000000000000000000000000000 +# v8: 0x00000000000000000000000000000000 # v24: 0x0000000000000000377f0000377f0000 # v4: 0x0000000000000000ffffffffffffffff # v25: 0x00000000000000000000000000000000 @@ -5117,24 +5117,24 @@ # v9: 0x00000000000000000000000000000000 # v30: 0x00000000000000000000000000000000 # v19: 0x00000000000000000000000000000000 -# v15: 0x7ff00000000000007ff8000000000000 +# v15: 0x7ff80000000000007ff8000000000000 # v12: 0x00000000000000007fc000007fc00000 # v19: 0xffffffffffffffffffffffffffffffff -# v19: 0x37a00000000000000000000000000000 +# v19: 0x00000000000000000000000000000000 # v25: 0x00000000000000007fc000007fc00000 -# v6: 0x7ff00000000000007ff8000000000000 -# v16: 0x37a0000000000000000000000180fe00 +# v6: 0x7ff80000000000007ff8000000000000 +# v16: 0x0000000000000000000000000180fe00 # v15: 0x00000000000000000000000000000000 -# v23: 0x37a0000000000000000000000180fe00 -# v6: 0x000000000000000037a0000000000000 +# v23: 0x0000000000000000000000000180fe00 +# v6: 0x00000000000000000000000000000000 # v27: 0x00000000000000000000000000000000 -# v8: 0x37a00000000000007fc000007fc00000 +# v8: 0x000000000180fe007fc000007fc00000 # v13: 0x0000000000000000ffffffff7fc00000 # v15: 0xffffffff000000000000000000000000 -# v27: 0x00000000000000000000000037a00000 +# v27: 0x00000000000000000000000000000000 # v20: 0x0000000000000000ffffffff00000000 # v18: 0x00000000000000000000000000000000 -# v9: 0x00000000ffffffff37a0000000000000 +# v9: 0x00000000ffffffff000000000180fe00 # v7: 0x0000000000000000ffffffffffffffff # v18: 0x000000000000000000000000ffffffff # v31: 0x00000000000000000000000000000000 @@ -5163,14 +5163,14 @@ # v19: 0x00000000000000007fc000007fc00000 # v24: 0x00000000000000000000000000000000 # v16: 0x00000000000000000000000000000000 -# v27: 0x00000000000000000000000037a00000 -# v27: 0x00000000000000000000000037a00000 +# v27: 0x00000000000000000000000000000000 +# v27: 0x00000000000000000000000000000000 # v21: 0x00000000000000000000000000000000 # v5: 0x00000000000000000000000000000000 # v18: 0x000000000000000000000000ffffffff # v5: 0x00000000000000000000000000000000 # v3: 0x0000000000000000000000007fffffff -# v27: 0x00000000000000000000000037a00000 +# v27: 0x00000000000000000000000000000000 # v26: 0x00000000000000007fffffff00000000 # v14: 0xbfd6000000000000bfd6000000000000 # v26: 0x00000000000000003f6800003f680000 @@ -5198,7 +5198,7 @@ # v5: 0x80000000800000008000000080000000 # v18: 0x7ff00000000000007ff0000000000000 # v10: 0x00000000000000007f8000007f800000 -# v5: 0x7f8000007f800000474c80007f800000 +# v5: 0x7f8000007f8000007f8000007f800000 # v22: 0x40000000000000004000000000000000 # v31: 0x00000000000000004000000040000000 # v18: 0x40000000400000004000000040000000 @@ -5244,7 +5244,7 @@ # v9: 0x43dfe000001fe0000000000000000000 # v26: 0x000000000000000040fff00000200000 # v11: 0x00000000000000000000000000000000 -# v29: 0x7fc000007fc000000000000000000000 +# v29: 0x00000000000000000000000000000000 # v22: 0x4effe000000000004e001a4000000000 # v18: 0x4207bfc03d7f00000000000000000000 # p8<15:0>: 0b 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 <- 0x~~~~~~~~~~~~~~~~ @@ -5795,9 +5795,9 @@ # z24<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000) # ║ ╙─ 0x0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ # ╙───────────────── 0x0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ -# z24<127:0>: 0x0000000000000000000000ff00000000 -# z25<127:0>: 0xa000000000000000000000ffc0000000 -# z26<127:0>: 0x3700000000000000000000007f000000 +# z24<127:0>: 0x00000000fe000000000000ff00000000 +# z25<127:0>: 0x0000000080000000000000ffc0000000 +# z26<127:0>: 0x0000000001000000000000007f000000 # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─ 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙───── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ @@ -5808,10 +5808,10 @@ # ║ ║ ║ ║ ║ ║ ╙───────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ║ ║ ║ ╙─────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ║ ║ ╙───────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ -# ║ ║ ║ ╙─────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ +# ║ ║ ║ ╙─────────────────────── 0x01'80'fe <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ╙───────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # ║ ╙───────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ -# ╙─────────────────────────────── 0x37'a0'00 <- 0x~~~~~~~~~~~~~~~~ +# ╙─────────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # z24<255:128>: 0x00000000000000000000000000000000 # z25<255:128>: 0x00000000000000000000000000000000 # z26<255:128>: 0x00000000000000000000000000000000 @@ -5936,11 +5936,11 @@ # ╙───────────────────────────── 0xedec'ebea'e9e8 <- 0x~~~~~~~~~~~~~~~~ # z26<127:0>: 0x00000000000000000000ffff00000000 (0.000, 0.000, 9.183e-41, 0.000) # z27<127:0>: 0x00000000000000007fff000000000000 (0.000, 0.000, 9.183e-41, 0.000) -# z28<127:0>: 0x37a0000000000000000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000) +# z28<127:0>: 0x000000000180fe00000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000) # ║ ║ ║ ╙─ 0x7fc00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ╙───────── 0x00000000'7fff0000'0000ffff <- 0x~~~~~~~~~~~~~~~~ -# ║ ╙───────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ -# ╙───────────────────────── 0x37a00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ +# ║ ╙───────────────── 0x0180fe00'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ +# ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ # z26<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000) # z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000) # z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000) @@ -5970,10 +5970,10 @@ # ║ ╙───────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ # ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ # z27<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000) -# z28<127:0>: 0x00000000000000000000ffff7fc00000 (0.000, 0.000) -# z29<127:0>: 0x37a0000000000000000000007fff0000 (0.000, 0.000) +# z28<127:0>: 0x000000000180fe000000ffff7fc00000 (0.000, 0.000) +# z29<127:0>: 0x0000000000000000000000007fff0000 (0.000, 0.000) # ║ ╙─ 0x000000007fff0000'0000ffff7fc00000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ -# ╙───────────────── 0x37a0000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# ╙───────────────── 0x0000000000000000'000000000180fe00'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ # z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) # z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) # z29<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) @@ -6211,3 +6211,294 @@ # z31<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000) # ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ # ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# sp: 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ +# w0: 0x00000055 +# ╙─ 0x55 <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x000000aa +# ╙─ 0xaa <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x000000ff +# ╙─ 0xff <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00000054 +# ╙─ 0x54 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x000055a9 +# ╙─ 0x55a9 <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x000055fe +# ╙─ 0x55fe <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000aba7 +# ╙─ 0xaba7 <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00000150 +# ╙─ 0x0150 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x555556f9 <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x5555aca2 <- 0x~~~~~~~~~~~~~~~~ +# w2: 0xaaab039b <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00005a94 <- 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x3: 0x0000000055566ba0 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x2d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0xba <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x1d47 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0xced4 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x00000095 +# ╙─ 0x95 <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000469d +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000469d +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000469d +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000469d +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0003469d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0003469d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0003469d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0003469d <- 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x3: 0x000000000003469d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000469d +# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x00004600 +# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00004600 +# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0003009d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x00034600 <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000469d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00034600 <- 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x3: 0x0000000000034600 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0xffffff9d +# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0xffffff9d +# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0xffffff9d +# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0003ff9d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0xffffff9d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x0000009d +# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# w0: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# w1: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# w2: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# w3: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~ +# x3: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 +# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# sp: 0x~~~~~~~~~~~~~~~~ diff --git a/test/test-trace-reference/log-state-colour b/test/test-trace-reference/log-state-colour index 0f1b75e4..3234c7a8 100644 --- a/test/test-trace-reference/log-state-colour +++ b/test/test-trace-reference/log-state-colour @@ -861,8 +861,8 @@ # [1;35m d5: [0;35m 0x3f90000000000000[0;m # [1;35m d16: [0;35m 0x0000000000000000[0;m # [1;35m d15: [0;35m 0x0000000000000000[0;m -# [1;35m v7: [0;35m0x00000000000000007ff000004e81442e[0;m -# [1;35m v8: [0;35m0x0000000000000000c004000000000000[0;m +# [1;35m v7: [0;35m0x0000000000000000000000004e81442e[0;m +# [1;35m v8: [0;35m0x00000000000000000000000000000000[0;m # [1;35m s29: [0;35m 0x00000000[0;m # [1;35m s15: [0;35m 0x49800000[0;m # [1;35m s27: [0;35m 0x00000000[0;m @@ -873,14 +873,14 @@ # [1;35m d3: [0;35m 0x0000000000000000[0;m # [1;35m d28: [0;35m 0x41dfffffffc00000[0;m # [1;35m d27: [0;35m 0x0000000000000000[0;m -# [1;35m v11: [0;35m0x0000000000000000400000004f7fe000[0;m -# [1;35m v0: [0;35m0x000000000000000043d21c00480a8294[0;m +# [1;35m v11: [0;35m0x0000000000000000000000004f7fe000[0;m +# [1;35m v0: [0;35m0x000000000000000000000000480a8294[0;m # [1;35m s20: [0;35m 0x00000000[0;m # [1;35m s21: [0;35m 0x00000000[0;m # [1;35m s6: [0;35m 0x5f000000[0;m # [1;35m s7: [0;35m 0x38ff0000[0;m -# [1;35m v19: [0;35m0x000000000000000043d21c00480a8294[0;m -# [1;35m v16: [0;35m0x0000000000000000400000004f7f2000[0;m +# [1;35m v19: [0;35m0x000000000000000000000000480a8294[0;m +# [1;35m v16: [0;35m0x0000000000000000000000004f7f2000[0;m # [1;35m v0: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v21: [0;35m0x00000000000000000000000000000000[0;m @@ -890,59 +890,59 @@ # [1;35m v10: [0;35m0x00000000000000003f0000007f600000[0;m # [1;35m v31: [0;35m0x495000018a83940149500000d3029400[0;m # [1;35m v10: [0;35m0x495000018a83940168a6954c14cfd693[0;m -# [1;35m v15: [0;35m0x00000000000000006328b14b89d7c527[0;m +# [1;35m v15: [0;35m0x00000000000000001f56954b89d7c527[0;m # [1;35m v27: [0;35m0x0000000000000000495000000a029400[0;m # [1;35m v25: [0;35m0x000000000000000041efffffffc00000[0;m # [1;35m v13: [0;35m0x00000000000000000000000037feffff[0;m # [1;35m v4: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v10: [0;35m0x000000000000000000000000827f4696[0;m +# [1;35m v10: [0;35m0x0000000000000000000000003ead2a96[0;m # [1;35m v10: [0;35m0x00000000000000000000000000003c7f[0;m # [1;35m v31: [0;35m0x0000000000000000000000000000ffff[0;m # [1;35m v16: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v0: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v31: [0;35m0x000000000000b87f000000000000ffff[0;m -# [1;35m v14: [0;35m0x000000000000000043d21c00480a8294[0;m -# [1;35m v3: [0;35m0x0000000020febf0000000000c4000000[0;m +# [1;35m v14: [0;35m0x000000000000000000000000480a8294[0;m +# [1;35m v3: [0;35m0x0000000020febf000000000000000000[0;m # [1;35m v8: [0;35m0x000000007f8000003effffffffe00000[0;m # [1;35m v22: [0;35m0x00000000000000003c7fffff00000000[0;m -# [1;35m v29: [0;35m0x00000000000000005fd2ca9e00000000[0;m +# [1;35m v29: [0;35m0x00000000000000000000ca9e00000000[0;m # [1;35m v30: [0;35m0x0000000000000000000000003c7fffff[0;m # [1;35m v12: [0;35m0x0000000000000000000037000000bbfe[0;m # [1;35m v17: [0;35m0x000000003700bbfe00007f803efeffe0[0;m # [1;35m v27: [0;35m0x00000000000000000000000000000075[0;m # [1;35m v12: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v27: [0;35m0x00000000000000000000000000003c7e[0;m -# [1;35m v19: [0;35m0x00000000000000000000000000002a70[0;m +# [1;35m v19: [0;35m0x0000000000000000000000000000ca9e[0;m # [1;35m v14: [0;35m0x00000000000000000000000000003c7e[0;m # [1;35m v10: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v5: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v26: [0;35m0x0000000020febf0000000000c4000000[0;m +# [1;35m v26: [0;35m0x0000000020febf000000000000000000[0;m # [1;35m v7: [0;35m0x000000000000000000000000381b0000[0;m # [1;35m v28: [0;35m0x000000000000000040dfdcffdcc00000[0;m -# [1;35m v29: [0;35m0x00000000000000005fd2ca1200000000[0;m +# [1;35m v29: [0;35m0x00000000000000000000ca1200000000[0;m # [1;35m v12: [0;35m0x0000000000000000000000000000ffff[0;m # [1;35m v18: [0;35m0x000000000000000000000000ff67ff67[0;m # [1;35m v12: [0;35m0x00000000007ebf000000000000000000[0;m # [1;35m v2: [0;35m0x0000000000000000495000008a828000[0;m -# [1;35m v8: [0;35m0x000000007f8000003effffffcc000000[0;m +# [1;35m v8: [0;35m0x000000007f8000003effffffc8000000[0;m # [1;35m v5: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v9: [0;35m0x0100000008009801010000000800dc00[0;m -# [1;35m v14: [0;35m0x000000000000000000000000c4000000[0;m +# [1;35m v14: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v29: [0;35m0x07070707070707070707070707070707[0;m # [1;35m v21: [0;35m0x00000000000000000000001f0000001f[0;m # [1;35m v1: [0;35m0x0000000000000000000f000f000f000f[0;m # [1;35m v27: [0;35m0x0000001f0000001f0000001f0000001f[0;m # [1;35m v19: [0;35m0x00000000000000000707070707070707[0;m -# [1;35m v15: [0;35m0x000f000f000f000f000f000f0001000f[0;m +# [1;35m v15: [0;35m0x000f000f000f000f000f000f000f000f[0;m # [1;35m v1: [0;35m0x08080808080808080808080808080808[0;m # [1;35m v27: [0;35m0x00000000000000000000001100000002[0;m # [1;35m v9: [0;35m0x00000000000000000007001000040000[0;m -# [1;35m v31: [0;35m0x0000000c0000000c0000000c0000000f[0;m +# [1;35m v31: [0;35m0x0000000c0000000c0000000c0000000c[0;m # [1;35m v14: [0;35m0x00000000000000000505050505050505[0;m -# [1;35m v6: [0;35m0x00100010001000100001001000010000[0;m +# [1;35m v6: [0;35m0x00100010001000100010001000010000[0;m # [1;35m v18: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v14: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v19: [0;35m0xffffffff000000ff0000000000ffffff[0;m +# [1;35m v19: [0;35m0xffffffff000000ff00000000ffffffff[0;m # [1;35m v15: [0;35m0xffffffffffffffffff00ff00ff00ffff[0;m # [1;35m v12: [0;35m0xffffffffffffffffffffffffffffffff[0;m # [1;35m v8: [0;35m0xffffffffffffffff0000000000000000[0;m @@ -962,11 +962,11 @@ # [1;35m v22: [0;35m0xffffffffffffffffffffffffffff0000[0;m # [1;35m v28: [0;35m0x0000000000000000ffffffffffffffff[0;m # [1;35m v6: [0;35m0xffffffffffffffffffffffffffffffff[0;m -# [1;35m v25: [0;35m0x000000000000000000000000ffffffff[0;m +# [1;35m v25: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v21: [0;35m0x0000000000000000ffffffffffffffff[0;m -# [1;35m v16: [0;35m0x0000000000000000ffffffff0000ffff[0;m +# [1;35m v16: [0;35m0x0000000000000000ffffffffffffffff[0;m # [1;35m v23: [0;35m0x0000000000000000ffffffffffffffff[0;m -# [1;35m v7: [0;35m0xffffffffffffffff0000000000000000[0;m +# [1;35m v7: [0;35m0xffffffffffffffffffffffff00000000[0;m # [1;35m v0: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v10: [0;35m0x0000000000000000ff00ff00ffff0000[0;m # [1;35m v21: [0;35m0x0000000000000000ffffffffffffffff[0;m @@ -982,9 +982,9 @@ # [1;35m v12: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v28: [0;35m0x0000000000000000000000000000ffff[0;m # [1;35m v22: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v5: [0;35m0x0000000000000000ffffffffffffffff[0;m +# [1;35m v5: [0;35m0x000000000000000000000000ffffffff[0;m # [1;35m v13: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v27: [0;35m0x0000000000000000000000ff000000ff[0;m +# [1;35m v27: [0;35m0x0000000000000000ffffffff000000ff[0;m # [1;35m v5: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v22: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v6: [0;35m0x00000000000000000000000000000000[0;m @@ -995,7 +995,7 @@ # [1;35m v31: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v9: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v7: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v11: [0;35m0x0000000000000000ffffffff00000000[0;m +# [1;35m v11: [0;35m0x0000000000000000ffffffffffff0000[0;m # [1;35m v1: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v21: [0;35m0xffffffffffffffffffffffffffffffff[0;m # [1;35m v8: [0;35m0xffffffffffffffffffffffffffffffff[0;m @@ -1016,9 +1016,9 @@ # [1;35m v7: [0;35m0x0000000000000000ffffffffffffffff[0;m # [1;35m v7: [0;35m0x0000000000000000ffffffffffffffff[0;m # [1;35m v25: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v0: [0;35m0x0000000000000000ffffffff00000000[0;m +# [1;35m v0: [0;35m0x0000000000000000ffffffffffff0000[0;m # [1;35m v24: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v26: [0;35m0x0000000000000000ffffffff00000000[0;m +# [1;35m v26: [0;35m0x0000000000000000ffffffffffff0000[0;m # [1;35m v1: [0;35m0x0000000000000000ffffffffffffffff[0;m # [1;35m v28: [0;35m0x0000000000000000ffffffffffffffff[0;m # [1;35m v26: [0;35m0x00000000000000000000000000000000[0;m @@ -5088,7 +5088,7 @@ # [1;35m v26: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v21: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v11: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v8: [0;35m0x37a00000000000000000000000000000[0;m +# [1;35m v8: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v24: [0;35m0x0000000000000000377f0000377f0000[0;m # [1;35m v4: [0;35m0x0000000000000000ffffffffffffffff[0;m # [1;35m v25: [0;35m0x00000000000000000000000000000000[0;m @@ -5117,24 +5117,24 @@ # [1;35m v9: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v30: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v19: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v15: [0;35m0x7ff00000000000007ff8000000000000[0;m +# [1;35m v15: [0;35m0x7ff80000000000007ff8000000000000[0;m # [1;35m v12: [0;35m0x00000000000000007fc000007fc00000[0;m # [1;35m v19: [0;35m0xffffffffffffffffffffffffffffffff[0;m -# [1;35m v19: [0;35m0x37a00000000000000000000000000000[0;m +# [1;35m v19: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v25: [0;35m0x00000000000000007fc000007fc00000[0;m -# [1;35m v6: [0;35m0x7ff00000000000007ff8000000000000[0;m -# [1;35m v16: [0;35m0x37a0000000000000000000000180fe00[0;m +# [1;35m v6: [0;35m0x7ff80000000000007ff8000000000000[0;m +# [1;35m v16: [0;35m0x0000000000000000000000000180fe00[0;m # [1;35m v15: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v23: [0;35m0x37a0000000000000000000000180fe00[0;m -# [1;35m v6: [0;35m0x000000000000000037a0000000000000[0;m +# [1;35m v23: [0;35m0x0000000000000000000000000180fe00[0;m +# [1;35m v6: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v8: [0;35m0x37a00000000000007fc000007fc00000[0;m +# [1;35m v8: [0;35m0x000000000180fe007fc000007fc00000[0;m # [1;35m v13: [0;35m0x0000000000000000ffffffff7fc00000[0;m # [1;35m v15: [0;35m0xffffffff000000000000000000000000[0;m -# [1;35m v27: [0;35m0x00000000000000000000000037a00000[0;m +# [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v20: [0;35m0x0000000000000000ffffffff00000000[0;m # [1;35m v18: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v9: [0;35m0x00000000ffffffff37a0000000000000[0;m +# [1;35m v9: [0;35m0x00000000ffffffff000000000180fe00[0;m # [1;35m v7: [0;35m0x0000000000000000ffffffffffffffff[0;m # [1;35m v18: [0;35m0x000000000000000000000000ffffffff[0;m # [1;35m v31: [0;35m0x00000000000000000000000000000000[0;m @@ -5163,14 +5163,14 @@ # [1;35m v19: [0;35m0x00000000000000007fc000007fc00000[0;m # [1;35m v24: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v16: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v27: [0;35m0x00000000000000000000000037a00000[0;m -# [1;35m v27: [0;35m0x00000000000000000000000037a00000[0;m +# [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m +# [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v21: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v5: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v18: [0;35m0x000000000000000000000000ffffffff[0;m # [1;35m v5: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v3: [0;35m0x0000000000000000000000007fffffff[0;m -# [1;35m v27: [0;35m0x00000000000000000000000037a00000[0;m +# [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v26: [0;35m0x00000000000000007fffffff00000000[0;m # [1;35m v14: [0;35m0xbfd6000000000000bfd6000000000000[0;m # [1;35m v26: [0;35m0x00000000000000003f6800003f680000[0;m @@ -5198,7 +5198,7 @@ # [1;35m v5: [0;35m0x80000000800000008000000080000000[0;m # [1;35m v18: [0;35m0x7ff00000000000007ff0000000000000[0;m # [1;35m v10: [0;35m0x00000000000000007f8000007f800000[0;m -# [1;35m v5: [0;35m0x7f8000007f800000474c80007f800000[0;m +# [1;35m v5: [0;35m0x7f8000007f8000007f8000007f800000[0;m # [1;35m v22: [0;35m0x40000000000000004000000000000000[0;m # [1;35m v31: [0;35m0x00000000000000004000000040000000[0;m # [1;35m v18: [0;35m0x40000000400000004000000040000000[0;m @@ -5244,7 +5244,7 @@ # [1;35m v9: [0;35m0x43dfe000001fe0000000000000000000[0;m # [1;35m v26: [0;35m0x000000000000000040fff00000200000[0;m # [1;35m v11: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v29: [0;35m0x7fc000007fc000000000000000000000[0;m +# [1;35m v29: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v22: [0;35m0x4effe000000000004e001a4000000000[0;m # [1;35m v18: [0;35m0x4207bfc03d7f00000000000000000000[0;m # [1;32m p8<15:0>: [0;32m0b 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m @@ -5795,9 +5795,9 @@ # [1;35m z24<639:512>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # ║ ╙─ 0x0000000000000000'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ╙───────────────── 0x0000000000000000'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# [1;35m z24<127:0>: [0;35m0x0000000000000000000000ff00000000[0;m -# [1;35m z25<127:0>: [0;35m0xa000000000000000000000ffc0000000[0;m -# [1;35m z26<127:0>: [0;35m0x3700000000000000000000007f000000[0;m +# [1;35m z24<127:0>: [0;35m0x00000000fe000000000000ff00000000[0;m +# [1;35m z25<127:0>: [0;35m0x0000000080000000000000ffc0000000[0;m +# [1;35m z26<127:0>: [0;35m0x0000000001000000000000007f000000[0;m # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─ 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙───── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m @@ -5808,10 +5808,10 @@ # ║ ║ ║ ║ ║ ║ ╙───────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ║ ║ ║ ╙─────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ║ ║ ╙───────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ║ ║ ║ ╙─────────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ║ ║ ║ ╙─────────────────────── 0x01'80'fe <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ╙───────────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ╙───────────────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ╙─────────────────────────────── 0x37'a0'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ╙─────────────────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z24<255:128>: [0;35m0x00000000000000000000000000000000[0;m # [1;35m z25<255:128>: [0;35m0x00000000000000000000000000000000[0;m # [1;35m z26<255:128>: [0;35m0x00000000000000000000000000000000[0;m @@ -5936,11 +5936,11 @@ # ╙───────────────────────────── 0xedec'ebea'e9e8 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z26<127:0>: [0;35m0x00000000000000000000ffff00000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m9.183e-41[0;m, [0;35m0.000[0;m) # [1;35m z27<127:0>: [0;35m0x00000000000000007fff000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m9.183e-41[0;m, [0;35m0.000[0;m) -# [1;35m z28<127:0>: [0;35m0x37a0000000000000000000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m9.183e-41[0;m, [0;35m0.000[0;m) +# [1;35m z28<127:0>: [0;35m0x000000000180fe00000000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m9.183e-41[0;m, [0;35m0.000[0;m) # ║ ║ ║ ╙─ 0x7fc00000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ╙───────── 0x00000000'7fff0000'0000ffff <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ║ ╙───────────────── 0x00000000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ╙───────────────────────── 0x37a00000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ║ ╙───────────────── 0x0180fe00'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ╙───────────────────────── 0x00000000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z26<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z27<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z28<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m) @@ -5970,10 +5970,10 @@ # ║ ╙───────────────── 0x00000000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ╙───────────────────────── 0x00000000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z27<127:0>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) -# [1;35m z28<127:0>: [0;35m0x00000000000000000000ffff7fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) -# [1;35m z29<127:0>: [0;35m0x37a0000000000000000000007fff0000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) +# [1;35m z28<127:0>: [0;35m0x000000000180fe000000ffff7fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) +# [1;35m z29<127:0>: [0;35m0x0000000000000000000000007fff0000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # ║ ╙─ 0x000000007fff0000'0000ffff7fc00000'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ╙───────────────── 0x37a0000000000000'0000000000000000'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ╙───────────────── 0x0000000000000000'000000000180fe00'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z27<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z28<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z29<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) @@ -6211,3 +6211,294 @@ # [1;35m z31<639:512>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m sp: [0;36m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x00000055[0;m +# ╙─ 0x55 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x000000aa[0;m +# ╙─ 0xaa <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x000000ff[0;m +# ╙─ 0xff <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00000054[0;m +# ╙─ 0x54 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x000055a9[0;m +# ╙─ 0x55a9 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x000055fe[0;m +# ╙─ 0x55fe <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000aba7[0;m +# ╙─ 0xaba7 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00000150[0;m +# ╙─ 0x0150 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x555556f9[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x5555aca2[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0xaaab039b[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00005a94[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x3: [0;36m0x0000000055566ba0[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x2d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0xba <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x1d47 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0xced4 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x00000095[0;m +# ╙─ 0x95 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000469d[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000469d[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000469d[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000469d[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0003469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0003469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0003469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0003469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x3: [0;36m0x000000000003469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000469d[0;m +# ╙─ 0x469d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x00004600[0;m +# ╙─ 0x4600 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00004600[0;m +# ╙─ 0x4600 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0003009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x00034600[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000469d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00034600[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x3: [0;36m0x0000000000034600[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x4600 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0xffffff9d[0;m +# ╙─ 0xff9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0xffffff9d[0;m +# ╙─ 0xff9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0xffffff9d[0;m +# ╙─ 0xff9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0003ff9d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x3: [0;36m0x000000000000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0xffffff9d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x3: [0;36m0x000000000000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x3: [0;36m0x000000000000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x0000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x3: [0;36m0x000000000000009d[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x009d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x0000009d[0;m +# ╙─ 0x9d <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w0: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w1: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w2: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m w3: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x1: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x2: [0;36m0x~~~~~~~~~~~~~~~~[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x3: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m +# ╙─ 0x0000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m wzr: [0;36m 0x00000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m xzr: [0;36m0x0000000000000000[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m sp: [0;36m0x~~~~~~~~~~~~~~~~[0;m diff --git a/test/test-trace-reference/log-vregs b/test/test-trace-reference/log-vregs index 60df96af..72a2d0ef 100644 --- a/test/test-trace-reference/log-vregs +++ b/test/test-trace-reference/log-vregs @@ -368,8 +368,8 @@ # d5: 0x3f90000000000000 # d16: 0x0000000000000000 # d15: 0x0000000000000000 -# v7: 0x00000000000000007ff000004e81442e -# v8: 0x0000000000000000c004000000000000 +# v7: 0x0000000000000000000000004e81442e +# v8: 0x00000000000000000000000000000000 # s29: 0x00000000 # s15: 0x49800000 # s27: 0x00000000 @@ -380,14 +380,14 @@ # d3: 0x0000000000000000 # d28: 0x41dfffffffc00000 # d27: 0x0000000000000000 -# v11: 0x0000000000000000400000004f7fe000 -# v0: 0x000000000000000043d21c00480a8294 +# v11: 0x0000000000000000000000004f7fe000 +# v0: 0x000000000000000000000000480a8294 # s20: 0x00000000 # s21: 0x00000000 # s6: 0x5f000000 # s7: 0x38ff0000 -# v19: 0x000000000000000043d21c00480a8294 -# v16: 0x0000000000000000400000004f7f2000 +# v19: 0x000000000000000000000000480a8294 +# v16: 0x0000000000000000000000004f7f2000 # v0: 0x00000000000000000000000000000000 # v27: 0x00000000000000000000000000000000 # v21: 0x00000000000000000000000000000000 @@ -397,59 +397,59 @@ # v10: 0x00000000000000003f0000007f600000 # v31: 0x495000018a83940149500000d3029400 # v10: 0x495000018a83940168a6954c14cfd693 -# v15: 0x00000000000000006328b14b89d7c527 +# v15: 0x00000000000000001f56954b89d7c527 # v27: 0x0000000000000000495000000a029400 # v25: 0x000000000000000041efffffffc00000 # v13: 0x00000000000000000000000037feffff # v4: 0x00000000000000000000000000000000 -# v10: 0x000000000000000000000000827f4696 +# v10: 0x0000000000000000000000003ead2a96 # v10: 0x00000000000000000000000000003c7f # v31: 0x0000000000000000000000000000ffff # v16: 0x00000000000000000000000000000000 # v0: 0x00000000000000000000000000000000 # v31: 0x000000000000b87f000000000000ffff -# v14: 0x000000000000000043d21c00480a8294 -# v3: 0x0000000020febf0000000000c4000000 +# v14: 0x000000000000000000000000480a8294 +# v3: 0x0000000020febf000000000000000000 # v8: 0x000000007f8000003effffffffe00000 # v22: 0x00000000000000003c7fffff00000000 -# v29: 0x00000000000000005fd2ca9e00000000 +# v29: 0x00000000000000000000ca9e00000000 # v30: 0x0000000000000000000000003c7fffff # v12: 0x0000000000000000000037000000bbfe # v17: 0x000000003700bbfe00007f803efeffe0 # v27: 0x00000000000000000000000000000075 # v12: 0x00000000000000000000000000000000 # v27: 0x00000000000000000000000000003c7e -# v19: 0x00000000000000000000000000002a70 +# v19: 0x0000000000000000000000000000ca9e # v14: 0x00000000000000000000000000003c7e # v10: 0x00000000000000000000000000000000 # v5: 0x00000000000000000000000000000000 -# v26: 0x0000000020febf0000000000c4000000 +# v26: 0x0000000020febf000000000000000000 # v7: 0x000000000000000000000000381b0000 # v28: 0x000000000000000040dfdcffdcc00000 -# v29: 0x00000000000000005fd2ca1200000000 +# v29: 0x00000000000000000000ca1200000000 # v12: 0x0000000000000000000000000000ffff # v18: 0x000000000000000000000000ff67ff67 # v12: 0x00000000007ebf000000000000000000 # v2: 0x0000000000000000495000008a828000 -# v8: 0x000000007f8000003effffffcc000000 +# v8: 0x000000007f8000003effffffc8000000 # v5: 0x00000000000000000000000000000000 # v9: 0x0100000008009801010000000800dc00 -# v14: 0x000000000000000000000000c4000000 +# v14: 0x00000000000000000000000000000000 # v29: 0x07070707070707070707070707070707 # v21: 0x00000000000000000000001f0000001f # v1: 0x0000000000000000000f000f000f000f # v27: 0x0000001f0000001f0000001f0000001f # v19: 0x00000000000000000707070707070707 -# v15: 0x000f000f000f000f000f000f0001000f +# v15: 0x000f000f000f000f000f000f000f000f # v1: 0x08080808080808080808080808080808 # v27: 0x00000000000000000000001100000002 # v9: 0x00000000000000000007001000040000 -# v31: 0x0000000c0000000c0000000c0000000f +# v31: 0x0000000c0000000c0000000c0000000c # v14: 0x00000000000000000505050505050505 -# v6: 0x00100010001000100001001000010000 +# v6: 0x00100010001000100010001000010000 # v18: 0x00000000000000000000000000000000 # v14: 0x00000000000000000000000000000000 -# v19: 0xffffffff000000ff0000000000ffffff +# v19: 0xffffffff000000ff00000000ffffffff # v15: 0xffffffffffffffffff00ff00ff00ffff # v12: 0xffffffffffffffffffffffffffffffff # v8: 0xffffffffffffffff0000000000000000 @@ -469,11 +469,11 @@ # v22: 0xffffffffffffffffffffffffffff0000 # v28: 0x0000000000000000ffffffffffffffff # v6: 0xffffffffffffffffffffffffffffffff -# v25: 0x000000000000000000000000ffffffff +# v25: 0x00000000000000000000000000000000 # v21: 0x0000000000000000ffffffffffffffff -# v16: 0x0000000000000000ffffffff0000ffff +# v16: 0x0000000000000000ffffffffffffffff # v23: 0x0000000000000000ffffffffffffffff -# v7: 0xffffffffffffffff0000000000000000 +# v7: 0xffffffffffffffffffffffff00000000 # v0: 0x00000000000000000000000000000000 # v10: 0x0000000000000000ff00ff00ffff0000 # v21: 0x0000000000000000ffffffffffffffff @@ -489,9 +489,9 @@ # v12: 0x00000000000000000000000000000000 # v28: 0x0000000000000000000000000000ffff # v22: 0x00000000000000000000000000000000 -# v5: 0x0000000000000000ffffffffffffffff +# v5: 0x000000000000000000000000ffffffff # v13: 0x00000000000000000000000000000000 -# v27: 0x0000000000000000000000ff000000ff +# v27: 0x0000000000000000ffffffff000000ff # v5: 0x00000000000000000000000000000000 # v22: 0x00000000000000000000000000000000 # v6: 0x00000000000000000000000000000000 @@ -502,7 +502,7 @@ # v31: 0x00000000000000000000000000000000 # v9: 0x00000000000000000000000000000000 # v7: 0x00000000000000000000000000000000 -# v11: 0x0000000000000000ffffffff00000000 +# v11: 0x0000000000000000ffffffffffff0000 # v1: 0x00000000000000000000000000000000 # v21: 0xffffffffffffffffffffffffffffffff # v8: 0xffffffffffffffffffffffffffffffff @@ -523,9 +523,9 @@ # v7: 0x0000000000000000ffffffffffffffff # v7: 0x0000000000000000ffffffffffffffff # v25: 0x00000000000000000000000000000000 -# v0: 0x0000000000000000ffffffff00000000 +# v0: 0x0000000000000000ffffffffffff0000 # v24: 0x00000000000000000000000000000000 -# v26: 0x0000000000000000ffffffff00000000 +# v26: 0x0000000000000000ffffffffffff0000 # v1: 0x0000000000000000ffffffffffffffff # v28: 0x0000000000000000ffffffffffffffff # v26: 0x00000000000000000000000000000000 @@ -4247,7 +4247,7 @@ # v26: 0x00000000000000000000000000000000 # v21: 0x00000000000000000000000000000000 # v11: 0x00000000000000000000000000000000 -# v8: 0x37a00000000000000000000000000000 +# v8: 0x00000000000000000000000000000000 # v24: 0x0000000000000000377f0000377f0000 # v4: 0x0000000000000000ffffffffffffffff # v25: 0x00000000000000000000000000000000 @@ -4276,24 +4276,24 @@ # v9: 0x00000000000000000000000000000000 # v30: 0x00000000000000000000000000000000 # v19: 0x00000000000000000000000000000000 -# v15: 0x7ff00000000000007ff8000000000000 +# v15: 0x7ff80000000000007ff8000000000000 # v12: 0x00000000000000007fc000007fc00000 # v19: 0xffffffffffffffffffffffffffffffff -# v19: 0x37a00000000000000000000000000000 +# v19: 0x00000000000000000000000000000000 # v25: 0x00000000000000007fc000007fc00000 -# v6: 0x7ff00000000000007ff8000000000000 -# v16: 0x37a0000000000000000000000180fe00 +# v6: 0x7ff80000000000007ff8000000000000 +# v16: 0x0000000000000000000000000180fe00 # v15: 0x00000000000000000000000000000000 -# v23: 0x37a0000000000000000000000180fe00 -# v6: 0x000000000000000037a0000000000000 +# v23: 0x0000000000000000000000000180fe00 +# v6: 0x00000000000000000000000000000000 # v27: 0x00000000000000000000000000000000 -# v8: 0x37a00000000000007fc000007fc00000 +# v8: 0x000000000180fe007fc000007fc00000 # v13: 0x0000000000000000ffffffff7fc00000 # v15: 0xffffffff000000000000000000000000 -# v27: 0x00000000000000000000000037a00000 +# v27: 0x00000000000000000000000000000000 # v20: 0x0000000000000000ffffffff00000000 # v18: 0x00000000000000000000000000000000 -# v9: 0x00000000ffffffff37a0000000000000 +# v9: 0x00000000ffffffff000000000180fe00 # v7: 0x0000000000000000ffffffffffffffff # v18: 0x000000000000000000000000ffffffff # v31: 0x00000000000000000000000000000000 @@ -4322,14 +4322,14 @@ # v19: 0x00000000000000007fc000007fc00000 # v24: 0x00000000000000000000000000000000 # v16: 0x00000000000000000000000000000000 -# v27: 0x00000000000000000000000037a00000 -# v27: 0x00000000000000000000000037a00000 +# v27: 0x00000000000000000000000000000000 +# v27: 0x00000000000000000000000000000000 # v21: 0x00000000000000000000000000000000 # v5: 0x00000000000000000000000000000000 # v18: 0x000000000000000000000000ffffffff # v5: 0x00000000000000000000000000000000 # v3: 0x0000000000000000000000007fffffff -# v27: 0x00000000000000000000000037a00000 +# v27: 0x00000000000000000000000000000000 # v26: 0x00000000000000007fffffff00000000 # v14: 0xbfd6000000000000bfd6000000000000 # v26: 0x00000000000000003f6800003f680000 @@ -4356,7 +4356,7 @@ # v5: 0x80000000800000008000000080000000 # v18: 0x7ff00000000000007ff0000000000000 # v10: 0x00000000000000007f8000007f800000 -# v5: 0x7f8000007f800000474c80007f800000 +# v5: 0x7f8000007f8000007f8000007f800000 # v22: 0x40000000000000004000000000000000 # v31: 0x00000000000000004000000040000000 # v18: 0x40000000400000004000000040000000 @@ -4402,7 +4402,7 @@ # v9: 0x43dfe000001fe0000000000000000000 # v26: 0x000000000000000040fff00000200000 # v11: 0x00000000000000000000000000000000 -# v29: 0x7fc000007fc000000000000000000000 +# v29: 0x00000000000000000000000000000000 # v22: 0x4effe000000000004e001a4000000000 # v18: 0x4207bfc03d7f00000000000000000000 # p8<15:0>: 0b 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 <- 0x~~~~~~~~~~~~~~~~ @@ -4953,9 +4953,9 @@ # z24<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000) # ║ ╙─ 0x0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ # ╙───────────────── 0x0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ -# z24<127:0>: 0x0000000000000000000000ff00000000 -# z25<127:0>: 0xa000000000000000000000ffc0000000 -# z26<127:0>: 0x3700000000000000000000007f000000 +# z24<127:0>: 0x00000000fe000000000000ff00000000 +# z25<127:0>: 0x0000000080000000000000ffc0000000 +# z26<127:0>: 0x0000000001000000000000007f000000 # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─ 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙───── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ @@ -4966,10 +4966,10 @@ # ║ ║ ║ ║ ║ ║ ╙───────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ║ ║ ║ ╙─────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ║ ║ ╙───────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ -# ║ ║ ║ ╙─────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ +# ║ ║ ║ ╙─────────────────────── 0x01'80'fe <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ╙───────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # ║ ╙───────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ -# ╙─────────────────────────────── 0x37'a0'00 <- 0x~~~~~~~~~~~~~~~~ +# ╙─────────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~ # z24<255:128>: 0x00000000000000000000000000000000 # z25<255:128>: 0x00000000000000000000000000000000 # z26<255:128>: 0x00000000000000000000000000000000 @@ -5094,11 +5094,11 @@ # ╙───────────────────────────── 0xedec'ebea'e9e8 <- 0x~~~~~~~~~~~~~~~~ # z26<127:0>: 0x00000000000000000000ffff00000000 (0.000, 0.000, 9.183e-41, 0.000) # z27<127:0>: 0x00000000000000007fff000000000000 (0.000, 0.000, 9.183e-41, 0.000) -# z28<127:0>: 0x37a0000000000000000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000) +# z28<127:0>: 0x000000000180fe00000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000) # ║ ║ ║ ╙─ 0x7fc00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ # ║ ║ ╙───────── 0x00000000'7fff0000'0000ffff <- 0x~~~~~~~~~~~~~~~~ -# ║ ╙───────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ -# ╙───────────────────────── 0x37a00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ +# ║ ╙───────────────── 0x0180fe00'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ +# ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ # z26<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000) # z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000) # z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000) @@ -5128,10 +5128,10 @@ # ║ ╙───────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ # ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~ # z27<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000) -# z28<127:0>: 0x00000000000000000000ffff7fc00000 (0.000, 0.000) -# z29<127:0>: 0x37a0000000000000000000007fff0000 (0.000, 0.000) +# z28<127:0>: 0x000000000180fe000000ffff7fc00000 (0.000, 0.000) +# z29<127:0>: 0x0000000000000000000000007fff0000 (0.000, 0.000) # ║ ╙─ 0x000000007fff0000'0000ffff7fc00000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ -# ╙───────────────── 0x37a0000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ +# ╙───────────────── 0x0000000000000000'000000000180fe00'0000000000000000 <- 0x~~~~~~~~~~~~~~~~ # z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) # z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) # z29<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) diff --git a/test/test-trace-reference/log-vregs-colour b/test/test-trace-reference/log-vregs-colour index 31d1c24c..d15dcfec 100644 --- a/test/test-trace-reference/log-vregs-colour +++ b/test/test-trace-reference/log-vregs-colour @@ -368,8 +368,8 @@ # [1;35m d5: [0;35m 0x3f90000000000000[0;m # [1;35m d16: [0;35m 0x0000000000000000[0;m # [1;35m d15: [0;35m 0x0000000000000000[0;m -# [1;35m v7: [0;35m0x00000000000000007ff000004e81442e[0;m -# [1;35m v8: [0;35m0x0000000000000000c004000000000000[0;m +# [1;35m v7: [0;35m0x0000000000000000000000004e81442e[0;m +# [1;35m v8: [0;35m0x00000000000000000000000000000000[0;m # [1;35m s29: [0;35m 0x00000000[0;m # [1;35m s15: [0;35m 0x49800000[0;m # [1;35m s27: [0;35m 0x00000000[0;m @@ -380,14 +380,14 @@ # [1;35m d3: [0;35m 0x0000000000000000[0;m # [1;35m d28: [0;35m 0x41dfffffffc00000[0;m # [1;35m d27: [0;35m 0x0000000000000000[0;m -# [1;35m v11: [0;35m0x0000000000000000400000004f7fe000[0;m -# [1;35m v0: [0;35m0x000000000000000043d21c00480a8294[0;m +# [1;35m v11: [0;35m0x0000000000000000000000004f7fe000[0;m +# [1;35m v0: [0;35m0x000000000000000000000000480a8294[0;m # [1;35m s20: [0;35m 0x00000000[0;m # [1;35m s21: [0;35m 0x00000000[0;m # [1;35m s6: [0;35m 0x5f000000[0;m # [1;35m s7: [0;35m 0x38ff0000[0;m -# [1;35m v19: [0;35m0x000000000000000043d21c00480a8294[0;m -# [1;35m v16: [0;35m0x0000000000000000400000004f7f2000[0;m +# [1;35m v19: [0;35m0x000000000000000000000000480a8294[0;m +# [1;35m v16: [0;35m0x0000000000000000000000004f7f2000[0;m # [1;35m v0: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v21: [0;35m0x00000000000000000000000000000000[0;m @@ -397,59 +397,59 @@ # [1;35m v10: [0;35m0x00000000000000003f0000007f600000[0;m # [1;35m v31: [0;35m0x495000018a83940149500000d3029400[0;m # [1;35m v10: [0;35m0x495000018a83940168a6954c14cfd693[0;m -# [1;35m v15: [0;35m0x00000000000000006328b14b89d7c527[0;m +# [1;35m v15: [0;35m0x00000000000000001f56954b89d7c527[0;m # [1;35m v27: [0;35m0x0000000000000000495000000a029400[0;m # [1;35m v25: [0;35m0x000000000000000041efffffffc00000[0;m # [1;35m v13: [0;35m0x00000000000000000000000037feffff[0;m # [1;35m v4: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v10: [0;35m0x000000000000000000000000827f4696[0;m +# [1;35m v10: [0;35m0x0000000000000000000000003ead2a96[0;m # [1;35m v10: [0;35m0x00000000000000000000000000003c7f[0;m # [1;35m v31: [0;35m0x0000000000000000000000000000ffff[0;m # [1;35m v16: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v0: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v31: [0;35m0x000000000000b87f000000000000ffff[0;m -# [1;35m v14: [0;35m0x000000000000000043d21c00480a8294[0;m -# [1;35m v3: [0;35m0x0000000020febf0000000000c4000000[0;m +# [1;35m v14: [0;35m0x000000000000000000000000480a8294[0;m +# [1;35m v3: [0;35m0x0000000020febf000000000000000000[0;m # [1;35m v8: [0;35m0x000000007f8000003effffffffe00000[0;m # [1;35m v22: [0;35m0x00000000000000003c7fffff00000000[0;m -# [1;35m v29: [0;35m0x00000000000000005fd2ca9e00000000[0;m +# [1;35m v29: [0;35m0x00000000000000000000ca9e00000000[0;m # [1;35m v30: [0;35m0x0000000000000000000000003c7fffff[0;m # [1;35m v12: [0;35m0x0000000000000000000037000000bbfe[0;m # [1;35m v17: [0;35m0x000000003700bbfe00007f803efeffe0[0;m # [1;35m v27: [0;35m0x00000000000000000000000000000075[0;m # [1;35m v12: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v27: [0;35m0x00000000000000000000000000003c7e[0;m -# [1;35m v19: [0;35m0x00000000000000000000000000002a70[0;m +# [1;35m v19: [0;35m0x0000000000000000000000000000ca9e[0;m # [1;35m v14: [0;35m0x00000000000000000000000000003c7e[0;m # [1;35m v10: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v5: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v26: [0;35m0x0000000020febf0000000000c4000000[0;m +# [1;35m v26: [0;35m0x0000000020febf000000000000000000[0;m # [1;35m v7: [0;35m0x000000000000000000000000381b0000[0;m # [1;35m v28: [0;35m0x000000000000000040dfdcffdcc00000[0;m -# [1;35m v29: [0;35m0x00000000000000005fd2ca1200000000[0;m +# [1;35m v29: [0;35m0x00000000000000000000ca1200000000[0;m # [1;35m v12: [0;35m0x0000000000000000000000000000ffff[0;m # [1;35m v18: [0;35m0x000000000000000000000000ff67ff67[0;m # [1;35m v12: [0;35m0x00000000007ebf000000000000000000[0;m # [1;35m v2: [0;35m0x0000000000000000495000008a828000[0;m -# [1;35m v8: [0;35m0x000000007f8000003effffffcc000000[0;m +# [1;35m v8: [0;35m0x000000007f8000003effffffc8000000[0;m # [1;35m v5: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v9: [0;35m0x0100000008009801010000000800dc00[0;m -# [1;35m v14: [0;35m0x000000000000000000000000c4000000[0;m +# [1;35m v14: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v29: [0;35m0x07070707070707070707070707070707[0;m # [1;35m v21: [0;35m0x00000000000000000000001f0000001f[0;m # [1;35m v1: [0;35m0x0000000000000000000f000f000f000f[0;m # [1;35m v27: [0;35m0x0000001f0000001f0000001f0000001f[0;m # [1;35m v19: [0;35m0x00000000000000000707070707070707[0;m -# [1;35m v15: [0;35m0x000f000f000f000f000f000f0001000f[0;m +# [1;35m v15: [0;35m0x000f000f000f000f000f000f000f000f[0;m # [1;35m v1: [0;35m0x08080808080808080808080808080808[0;m # [1;35m v27: [0;35m0x00000000000000000000001100000002[0;m # [1;35m v9: [0;35m0x00000000000000000007001000040000[0;m -# [1;35m v31: [0;35m0x0000000c0000000c0000000c0000000f[0;m +# [1;35m v31: [0;35m0x0000000c0000000c0000000c0000000c[0;m # [1;35m v14: [0;35m0x00000000000000000505050505050505[0;m -# [1;35m v6: [0;35m0x00100010001000100001001000010000[0;m +# [1;35m v6: [0;35m0x00100010001000100010001000010000[0;m # [1;35m v18: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v14: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v19: [0;35m0xffffffff000000ff0000000000ffffff[0;m +# [1;35m v19: [0;35m0xffffffff000000ff00000000ffffffff[0;m # [1;35m v15: [0;35m0xffffffffffffffffff00ff00ff00ffff[0;m # [1;35m v12: [0;35m0xffffffffffffffffffffffffffffffff[0;m # [1;35m v8: [0;35m0xffffffffffffffff0000000000000000[0;m @@ -469,11 +469,11 @@ # [1;35m v22: [0;35m0xffffffffffffffffffffffffffff0000[0;m # [1;35m v28: [0;35m0x0000000000000000ffffffffffffffff[0;m # [1;35m v6: [0;35m0xffffffffffffffffffffffffffffffff[0;m -# [1;35m v25: [0;35m0x000000000000000000000000ffffffff[0;m +# [1;35m v25: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v21: [0;35m0x0000000000000000ffffffffffffffff[0;m -# [1;35m v16: [0;35m0x0000000000000000ffffffff0000ffff[0;m +# [1;35m v16: [0;35m0x0000000000000000ffffffffffffffff[0;m # [1;35m v23: [0;35m0x0000000000000000ffffffffffffffff[0;m -# [1;35m v7: [0;35m0xffffffffffffffff0000000000000000[0;m +# [1;35m v7: [0;35m0xffffffffffffffffffffffff00000000[0;m # [1;35m v0: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v10: [0;35m0x0000000000000000ff00ff00ffff0000[0;m # [1;35m v21: [0;35m0x0000000000000000ffffffffffffffff[0;m @@ -489,9 +489,9 @@ # [1;35m v12: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v28: [0;35m0x0000000000000000000000000000ffff[0;m # [1;35m v22: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v5: [0;35m0x0000000000000000ffffffffffffffff[0;m +# [1;35m v5: [0;35m0x000000000000000000000000ffffffff[0;m # [1;35m v13: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v27: [0;35m0x0000000000000000000000ff000000ff[0;m +# [1;35m v27: [0;35m0x0000000000000000ffffffff000000ff[0;m # [1;35m v5: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v22: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v6: [0;35m0x00000000000000000000000000000000[0;m @@ -502,7 +502,7 @@ # [1;35m v31: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v9: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v7: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v11: [0;35m0x0000000000000000ffffffff00000000[0;m +# [1;35m v11: [0;35m0x0000000000000000ffffffffffff0000[0;m # [1;35m v1: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v21: [0;35m0xffffffffffffffffffffffffffffffff[0;m # [1;35m v8: [0;35m0xffffffffffffffffffffffffffffffff[0;m @@ -523,9 +523,9 @@ # [1;35m v7: [0;35m0x0000000000000000ffffffffffffffff[0;m # [1;35m v7: [0;35m0x0000000000000000ffffffffffffffff[0;m # [1;35m v25: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v0: [0;35m0x0000000000000000ffffffff00000000[0;m +# [1;35m v0: [0;35m0x0000000000000000ffffffffffff0000[0;m # [1;35m v24: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v26: [0;35m0x0000000000000000ffffffff00000000[0;m +# [1;35m v26: [0;35m0x0000000000000000ffffffffffff0000[0;m # [1;35m v1: [0;35m0x0000000000000000ffffffffffffffff[0;m # [1;35m v28: [0;35m0x0000000000000000ffffffffffffffff[0;m # [1;35m v26: [0;35m0x00000000000000000000000000000000[0;m @@ -4247,7 +4247,7 @@ # [1;35m v26: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v21: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v11: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v8: [0;35m0x37a00000000000000000000000000000[0;m +# [1;35m v8: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v24: [0;35m0x0000000000000000377f0000377f0000[0;m # [1;35m v4: [0;35m0x0000000000000000ffffffffffffffff[0;m # [1;35m v25: [0;35m0x00000000000000000000000000000000[0;m @@ -4276,24 +4276,24 @@ # [1;35m v9: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v30: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v19: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v15: [0;35m0x7ff00000000000007ff8000000000000[0;m +# [1;35m v15: [0;35m0x7ff80000000000007ff8000000000000[0;m # [1;35m v12: [0;35m0x00000000000000007fc000007fc00000[0;m # [1;35m v19: [0;35m0xffffffffffffffffffffffffffffffff[0;m -# [1;35m v19: [0;35m0x37a00000000000000000000000000000[0;m +# [1;35m v19: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v25: [0;35m0x00000000000000007fc000007fc00000[0;m -# [1;35m v6: [0;35m0x7ff00000000000007ff8000000000000[0;m -# [1;35m v16: [0;35m0x37a0000000000000000000000180fe00[0;m +# [1;35m v6: [0;35m0x7ff80000000000007ff8000000000000[0;m +# [1;35m v16: [0;35m0x0000000000000000000000000180fe00[0;m # [1;35m v15: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v23: [0;35m0x37a0000000000000000000000180fe00[0;m -# [1;35m v6: [0;35m0x000000000000000037a0000000000000[0;m +# [1;35m v23: [0;35m0x0000000000000000000000000180fe00[0;m +# [1;35m v6: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v8: [0;35m0x37a00000000000007fc000007fc00000[0;m +# [1;35m v8: [0;35m0x000000000180fe007fc000007fc00000[0;m # [1;35m v13: [0;35m0x0000000000000000ffffffff7fc00000[0;m # [1;35m v15: [0;35m0xffffffff000000000000000000000000[0;m -# [1;35m v27: [0;35m0x00000000000000000000000037a00000[0;m +# [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v20: [0;35m0x0000000000000000ffffffff00000000[0;m # [1;35m v18: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v9: [0;35m0x00000000ffffffff37a0000000000000[0;m +# [1;35m v9: [0;35m0x00000000ffffffff000000000180fe00[0;m # [1;35m v7: [0;35m0x0000000000000000ffffffffffffffff[0;m # [1;35m v18: [0;35m0x000000000000000000000000ffffffff[0;m # [1;35m v31: [0;35m0x00000000000000000000000000000000[0;m @@ -4322,14 +4322,14 @@ # [1;35m v19: [0;35m0x00000000000000007fc000007fc00000[0;m # [1;35m v24: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v16: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v27: [0;35m0x00000000000000000000000037a00000[0;m -# [1;35m v27: [0;35m0x00000000000000000000000037a00000[0;m +# [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m +# [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v21: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v5: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v18: [0;35m0x000000000000000000000000ffffffff[0;m # [1;35m v5: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v3: [0;35m0x0000000000000000000000007fffffff[0;m -# [1;35m v27: [0;35m0x00000000000000000000000037a00000[0;m +# [1;35m v27: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v26: [0;35m0x00000000000000007fffffff00000000[0;m # [1;35m v14: [0;35m0xbfd6000000000000bfd6000000000000[0;m # [1;35m v26: [0;35m0x00000000000000003f6800003f680000[0;m @@ -4356,7 +4356,7 @@ # [1;35m v5: [0;35m0x80000000800000008000000080000000[0;m # [1;35m v18: [0;35m0x7ff00000000000007ff0000000000000[0;m # [1;35m v10: [0;35m0x00000000000000007f8000007f800000[0;m -# [1;35m v5: [0;35m0x7f8000007f800000474c80007f800000[0;m +# [1;35m v5: [0;35m0x7f8000007f8000007f8000007f800000[0;m # [1;35m v22: [0;35m0x40000000000000004000000000000000[0;m # [1;35m v31: [0;35m0x00000000000000004000000040000000[0;m # [1;35m v18: [0;35m0x40000000400000004000000040000000[0;m @@ -4402,7 +4402,7 @@ # [1;35m v9: [0;35m0x43dfe000001fe0000000000000000000[0;m # [1;35m v26: [0;35m0x000000000000000040fff00000200000[0;m # [1;35m v11: [0;35m0x00000000000000000000000000000000[0;m -# [1;35m v29: [0;35m0x7fc000007fc000000000000000000000[0;m +# [1;35m v29: [0;35m0x00000000000000000000000000000000[0;m # [1;35m v22: [0;35m0x4effe000000000004e001a4000000000[0;m # [1;35m v18: [0;35m0x4207bfc03d7f00000000000000000000[0;m # [1;32m p8<15:0>: [0;32m0b 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1[0;m <- [1;34m0x~~~~~~~~~~~~~~~~[0;m @@ -4953,9 +4953,9 @@ # [1;35m z24<639:512>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # ║ ╙─ 0x0000000000000000'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ╙───────────────── 0x0000000000000000'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# [1;35m z24<127:0>: [0;35m0x0000000000000000000000ff00000000[0;m -# [1;35m z25<127:0>: [0;35m0xa000000000000000000000ffc0000000[0;m -# [1;35m z26<127:0>: [0;35m0x3700000000000000000000007f000000[0;m +# [1;35m z24<127:0>: [0;35m0x00000000fe000000000000ff00000000[0;m +# [1;35m z25<127:0>: [0;35m0x0000000080000000000000ffc0000000[0;m +# [1;35m z26<127:0>: [0;35m0x0000000001000000000000007f000000[0;m # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─ 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙───── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m @@ -4966,10 +4966,10 @@ # ║ ║ ║ ║ ║ ║ ╙───────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ║ ║ ║ ╙─────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ║ ║ ╙───────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ║ ║ ║ ╙─────────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ║ ║ ║ ╙─────────────────────── 0x01'80'fe <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ╙───────────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ╙───────────────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ╙─────────────────────────────── 0x37'a0'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ╙─────────────────────────────── 0x00'00'00 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z24<255:128>: [0;35m0x00000000000000000000000000000000[0;m # [1;35m z25<255:128>: [0;35m0x00000000000000000000000000000000[0;m # [1;35m z26<255:128>: [0;35m0x00000000000000000000000000000000[0;m @@ -5094,11 +5094,11 @@ # ╙───────────────────────────── 0xedec'ebea'e9e8 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z26<127:0>: [0;35m0x00000000000000000000ffff00000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m9.183e-41[0;m, [0;35m0.000[0;m) # [1;35m z27<127:0>: [0;35m0x00000000000000007fff000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m9.183e-41[0;m, [0;35m0.000[0;m) -# [1;35m z28<127:0>: [0;35m0x37a0000000000000000000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m9.183e-41[0;m, [0;35m0.000[0;m) +# [1;35m z28<127:0>: [0;35m0x000000000180fe00000000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m9.183e-41[0;m, [0;35m0.000[0;m) # ║ ║ ║ ╙─ 0x7fc00000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ║ ║ ╙───────── 0x00000000'7fff0000'0000ffff <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ║ ╙───────────────── 0x00000000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ╙───────────────────────── 0x37a00000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ║ ╙───────────────── 0x0180fe00'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ╙───────────────────────── 0x00000000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z26<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z27<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z28<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m, [0;35m0.000[0;m) @@ -5128,10 +5128,10 @@ # ║ ╙───────────────── 0x00000000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # ╙───────────────────────── 0x00000000'00000000'00000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z27<127:0>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) -# [1;35m z28<127:0>: [0;35m0x00000000000000000000ffff7fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) -# [1;35m z29<127:0>: [0;35m0x37a0000000000000000000007fff0000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) +# [1;35m z28<127:0>: [0;35m0x000000000180fe000000ffff7fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) +# [1;35m z29<127:0>: [0;35m0x0000000000000000000000007fff0000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # ║ ╙─ 0x000000007fff0000'0000ffff7fc00000'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ╙───────────────── 0x37a0000000000000'0000000000000000'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ╙───────────────── 0x0000000000000000'000000000180fe00'0000000000000000 <- [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z27<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z28<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z29<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) diff --git a/test/test-trace-reference/log-write b/test/test-trace-reference/log-write index fff3143f..0491d505 100644 --- a/test/test-trace-reference/log-write +++ b/test/test-trace-reference/log-write @@ -2934,10 +2934,10 @@ # ╙───────────────────────────── 0x0000'0000'0000 -> 0x~~~~~~~~~~~~~~~~ # z6<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000) # z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000) -# z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000, ..., 0.000) +# z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000, ..., 0.000) # ║ ║ ╙─ 0x7fc00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ -# ║ ╙───────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ -# ╙───────────────────────── 0x37a00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ +# ║ ╙───────────────── 0x0180fe00'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ +# ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ # z6<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000) # z7<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000) # z8<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000) @@ -2958,10 +2958,10 @@ # ║ ╙───────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ # ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~ # z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000) -# z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000) +# z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000) # z9<127:0>: 0x43dfe000001fe0000000000000000000 (0.000, 0.000) # ║ ╙─ 0x0000000000000000'7fc000007fc00000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~ -# ╙───────────────── 0x43dfe000001fe000'37a0000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +# ╙───────────────── 0x43dfe000001fe000'000000000180fe00'0000000000000000 -> 0x~~~~~~~~~~~~~~~~ # z7<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) # z8<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) # z9<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000) @@ -3188,3 +3188,196 @@ # z5<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000) # ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~ # ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x5555555555555555 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x8d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x8d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0xb18d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0xb18d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x5555b18d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x5555b18d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000469d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x00 -> 0x~~~~~~~~~~~~~~~~ +# x0<7:0>: 0x00 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ +# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~ diff --git a/test/test-trace-reference/log-write-colour b/test/test-trace-reference/log-write-colour index 92faa1fd..7caf1a45 100644 --- a/test/test-trace-reference/log-write-colour +++ b/test/test-trace-reference/log-write-colour @@ -2934,10 +2934,10 @@ # ╙───────────────────────────── 0x0000'0000'0000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z6<127:0>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, ..., [0;35m0.000[0;m) # [1;35m z7<127:0>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, ..., [0;35m0.000[0;m) -# [1;35m z8<127:0>: [0;35m0x37a00000000000007fc000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, ..., [0;35m0.000[0;m) +# [1;35m z8<127:0>: [0;35m0x000000000180fe007fc000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m, ..., [0;35m0.000[0;m) # ║ ║ ╙─ 0x7fc00000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ║ ╙───────────────── 0x00000000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ╙───────────────────────── 0x37a00000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ║ ╙───────────────── 0x0180fe00'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ╙───────────────────────── 0x00000000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z6<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, ..., [0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z7<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, ..., [0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z8<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, ..., [0;35m0.000[0;m, [0;35m0.000[0;m) @@ -2958,10 +2958,10 @@ # ║ ╙───────────────── 0x00000000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m # ╙───────────────────────── 0x00000000'00000000'00000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z7<127:0>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) -# [1;35m z8<127:0>: [0;35m0x37a00000000000007fc000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) +# [1;35m z8<127:0>: [0;35m0x000000000180fe007fc000007fc00000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z9<127:0>: [0;35m0x43dfe000001fe0000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # ║ ╙─ 0x0000000000000000'7fc000007fc00000'0000000000000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m -# ╙───────────────── 0x43dfe000001fe000'37a0000000000000'0000000000000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# ╙───────────────── 0x43dfe000001fe000'000000000180fe00'0000000000000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m # [1;35m z7<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z8<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # [1;35m z9<255:128>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) @@ -3188,3 +3188,196 @@ # [1;35m z5<639:512>: [0;35m0x00000000000000000000000000000000[0;m ([0;35m0.000[0;m, [0;35m0.000[0;m) # ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m # ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x5555555555555555[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x55[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x55[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x55[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x55[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x55a9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x55a9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x55a9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x55a9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x555556f9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x555556f9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x555556f9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x555556f9[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x8d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x8d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0xb18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0xb18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x5555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x5555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x555555555555b18d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x95[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x95[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x95[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x95[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000003469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000469d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003ff9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003ff9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003ff9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0003ff9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x0000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x000000000000009d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x9d[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x00[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<7:0>: [0;36m 0x00[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<15:0>: [0;36m 0x0000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<31:0>: [0;36m 0x00000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m +# [1;36m x0<63:0>: [0;36m0x0000000000000000[0;m -> [1;34m0x~~~~~~~~~~~~~~~~[0;m diff --git a/tools/check_recent_coverage.sh b/tools/check_recent_coverage.sh new file mode 100755 index 00000000..f6168a2e --- /dev/null +++ b/tools/check_recent_coverage.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# Copyright 2021, VIXL authors +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of ARM Limited nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This code coverage script assumes a Linux-like environment, and has been +# tested on Ubuntu 18.04. + +COVERAGELOG="tools/code_coverage.log" +MONTHSECONDS=$(( 60*60*24*30 )) + +if [ ! -f "$COVERAGELOG" ]; then + echo "No code coverage log found." + echo "Run tools/code_coverage.sh to generate one." + exit 2; +fi + +LASTCOMMIT=`git log -1 --date=format:%s | grep -P "^Date:" | grep -Po "\d+"` +LASTCOVERAGE=`tail -n1 $COVERAGELOG | cut -d' ' -f1` + +d=$(( $LASTCOMMIT - $LASTCOVERAGE )) +if (( d < $MONTHSECONDS )); then + exit 0; +fi + +echo "Code coverage record too old." +echo "Run tools/code_coverage.sh to generate a newer one." +exit 1; diff --git a/tools/clang_tidy.py b/tools/clang_tidy.py index 7ba18746..8607547b 100755 --- a/tools/clang_tidy.py +++ b/tools/clang_tidy.py @@ -143,7 +143,7 @@ def ClangTidyFiles(files, clang_tidy, jobs = 1, progress_prefix = ''): return -1 opts = ['--', '-DVIXL_INCLUDE_TARGET_AARCH64', '-DVIXL_CODE_BUFFER_MALLOC', - '-DVIXL_DEBUG','-DVIXL_INCLUDE_SIMLUATOR_AARCH64', + '-DVIXL_DEBUG','-DVIXL_INCLUDE_SIMULATOR_AARCH64', '-DVIXL_INCLUDE_TARGET_A32','-DVIXL_INCLUDE_TARGET_T32', '-DVIXL_INCLUDE_TARGET_A64'] opts += ['-I%s' % config.dir_src_vixl] diff --git a/tools/code_coverage.log b/tools/code_coverage.log new file mode 100644 index 00000000..f19570a8 --- /dev/null +++ b/tools/code_coverage.log @@ -0,0 +1,10 @@ +1624976463 83.00% 97.44% 95.16% +1628075147 83.04% 97.52% 95.33% +1633016028 83.00% 97.52% 95.32% +1636647628 82.97% 97.54% 95.28% +1639684221 82.92% 97.51% 94.06% +1642688881 82.94% 97.51% 95.27% +1646150629 82.94% 97.51% 95.36% +1647535694 82.93% 97.52% 95.36% +1651138061 82.94% 97.52% 95.36% +1653484786 82.79% 97.46% 95.51% diff --git a/tools/code_coverage.sh b/tools/code_coverage.sh new file mode 100755 index 00000000..5525bb0e --- /dev/null +++ b/tools/code_coverage.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +# Copyright 2021, VIXL authors +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of ARM Limited nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This code coverage script assumes a Linux-like environment, and has been +# tested on Ubuntu 18.04. + +if ! hash pv 2>/dev/null ; then + echo "This script requires 'pv'" + echo "On Ubuntu, install it with 'sudo apt-get install pv'" + exit 1; +fi + +export CXX=clang++ +export LLVM_PROFILE_FILE=$(mktemp) +PROFDATA=$(mktemp) +BUILDDIR="obj/target_a64/mode_debug/symbols_on/compiler_clang++/std_c++14/simulator_aarch64/negative_testing_off/code_buffer_allocator_mmap" +RUNNER="$BUILDDIR/test/test-runner" + +# Build with code coverage instrumentation enabled. +scons mode=debug coverage=on target=a64 all -j8 + +if [ ! -f "$RUNNER" ]; then + echo "$RUNNER not found." + echo "No test-runner for profiling." + exit 1; +fi + +# Count the number of tests. +tests=`$RUNNER --list | wc -l` + +# Generate a raw profile for a run using all tests. +echo "Running $tests tests. This may take a while..." +$RUNNER --run-all 2>&1 | grep -P "^Running [A-Z0-9]{3,}_" | pv -lbp -w 40 -s $tests >/dev/null + +# Process the raw profile data for reporting. +llvm-profdata merge -sparse $LLVM_PROFILE_FILE -o $PROFDATA + +# Print a coverage report for the source files in src/ +REPORT="llvm-cov report $RUNNER -instr-profile=$PROFDATA $BUILDDIR/src/" +eval $REPORT + +# Log the report summary line. +eval $REPORT | tail -n1 | tr -s " " | cut -d" " -f4,7,10 | xargs -i printf "%s %s\n" `date +%s` {} >>tools/code_coverage.log + +# Clean up. +rm -f $LLVM_PROFILE_FILE +rm -f $PROFDATA diff --git a/tools/make_instruction_doc_aarch64.pl b/tools/make_instruction_doc_aarch64.pl index 4ecb9c69..9ff32f8d 100755 --- a/tools/make_instruction_doc_aarch64.pl +++ b/tools/make_instruction_doc_aarch64.pl @@ -35,6 +35,9 @@ my $hfile = "src/aarch64/assembler-aarch64.h"; # Extra pseudo instructions added to AArch64. my @extras = qw/bind debug dci dc32 dc64 place/; +# SVE instructions that can't be inferred from their argument types. +my @sves = qw/addvl addpl rdvl cntb cnth cntw cntd ctermeq ctermne setffr/; + my %inst = (); # Global hash of instructions. # Set record separator to one or more consecutive new lines. This causes $_ to @@ -45,7 +48,7 @@ open(IN, "<$hfile") or die("Can't open header file $hfile.\n"); while(<IN>) { # Find a function formatted like an instruction. - if(my($t) = /^ ((?:void|inline void) [a-z][a-z0-9]{0,8}_?)\(/mgp) + if(my($t) = /^ ((?:void|inline void) [a-z][a-z0-9]{0,9}_?)\(/mgp) { # Everything before the function match, ie. the comments. my $before = ${^PREMATCH}; @@ -55,7 +58,7 @@ while(<IN>) my $after = ${^POSTMATCH}; # Extract the instruction. - my($i) = $t =~ /(?:void|inline void) ([a-z][a-z0-9]{0,8})/; + my($i) = $t =~ /(?:void|inline void) ([a-z][a-z0-9]{0,9})/; # Extract the comment from before the function. Drop comment characters # and format the architecture version suffix, if present. @@ -76,7 +79,13 @@ while(<IN>) # Establish the type of the instruction. my $type = 'integer'; - ($p =~ /VRegister/) and $type = 'float'; + if ($p =~ /([PZ]Register|SVEMemOperand)/) { + $type = 'sve'; + } elsif ($i =~ /[su]?q?(inc|dec)[bhwd]/ || $i ~~ @sves) { + $type = 'sve'; + } elsif ($p =~ /VRegister/) { + $type = 'float'; + } ($i ~~ @extras) and $type = 'pseudo'; # Special case to distinguish dc() the data constant placing function from @@ -89,10 +98,13 @@ while(<IN>) $inst{$p}->{'type'} = $type; $inst{$p}->{'mnemonic'} = $i; $inst{$p}->{'description'} = $d; + $inst{$p}->{'initial'} = substr($i, 0, 1); } } close(IN); +my $links = get_links_list(\%inst); + print <<HEADER; VIXL Supported Instruction List =============================== @@ -102,12 +114,45 @@ disassembler and simulator. The simulator may not support all floating point operations to the precision required by AArch64 - please check the simulator source code for details. +#### AAch64 integer instructions #### +$links->{'integer'} + +#### AArch64 floating point and NEON instructions #### +$links->{'float'} + +#### AArch64 Scalable Vector Extension (SVE) instructions #### +$links->{'sve'} + +#### Additional or pseudo instructions #### +$links->{'pseudo'} + +___ + HEADER print describe_insts('AArch64 integer instructions', 'integer'); print describe_insts('AArch64 floating point and NEON instructions', 'float'); +print describe_insts('AArch64 Scalable Vector Extension (SVE) instructions', 'sve'); print describe_insts('Additional or pseudo instructions', 'pseudo'); +# Get a hash of links to each initialed section of the document, keyed by type. +sub get_links_list { + my $insts = shift; + my %initials; + foreach my $i (sort(keys(%$insts))) { + my $inst = $insts->{$i}; + $initials{$inst->{type}}->{$inst->{initial}}++; + } + my %result; + foreach my $t (keys(%initials)) { + foreach my $i (sort(keys(%{$initials{$t}}))) { + push(@{$result{$t}}, "[$i](#$t-$i)"); + } + $result{$t} = join(' ', @{$result{$t}}); + } + return \%result; +} + # Sort instructions by mnemonic and then description. sub inst_sort { @@ -125,9 +170,14 @@ sub describe_insts $result .= '-' x length($title); $result .= "\n\n"; + my $last_initial = ''; foreach my $i (sort inst_sort keys(%inst)) { next if($inst{$i}->{'type'} ne $type); + unless ($last_initial eq $inst{$i}->{'initial'}) { + $last_initial = $inst{$i}->{'initial'}; + $result .= sprintf("<a id=\"%s-%s\">\n\n", lc($type), $last_initial); + } $result .= sprintf("### %s ###\n\n%s\n\n", uc($inst{$i}->{'mnemonic'}), $inst{$i}->{'description'}); diff --git a/tools/test.py b/tools/test.py index 9a081d0c..75c700d3 100755 --- a/tools/test.py +++ b/tools/test.py @@ -168,6 +168,8 @@ def BuildOptions(): help='Do not run clang-tidy.') general_arguments.add_argument('--notest', action='store_true', help='Do not run tests.') + general_arguments.add_argument('--nocheck-code-coverage', action='store_true', + help='Do not check code coverage results log.') general_arguments.add_argument('--fail-early', action='store_true', help='Exit as soon as a test fails.') general_arguments.add_argument( @@ -273,6 +275,10 @@ def RunClangTidy(clang_path, jobs): jobs = jobs, progress_prefix = 'clang-tidy: ') +def CheckCodeCoverage(): + command = ['tools/check_recent_coverage.sh'] + return RunCommand(command) + def BuildAll(build_options, jobs, environment_options): scons_command = ['scons', '-C', dir_root, 'all', '-j', str(jobs)] if util.IsCommandAvailable('ccache'): @@ -359,6 +365,9 @@ if __name__ == '__main__': if args.under_valgrind: util.require_program('valgrind') + if not args.nocheck_code_coverage: + rc.Combine(CheckCodeCoverage()) + tests = test_runner.TestQueue() if not args.nolint and not args.dry_run: rc.Combine(RunLinter(args.jobs)) |