diff options
author | Martyn Capewell <martyn.capewell@arm.com> | 2020-06-05 18:20:11 +0100 |
---|---|---|
committer | TatWai Chong <tatwai.chong@arm.com> | 2020-06-22 12:51:34 -0700 |
commit | a5112344aa6a2c562379ec67398a6719360965bf (patch) | |
tree | 4ea88d7d4a0701721cb5702f510d04af85fe8f66 /src | |
parent | cd3f6c5ec96ff6d8240a07e7084ae5de700dc9c7 (diff) | |
download | vixl-a5112344aa6a2c562379ec67398a6719360965bf.tar.gz |
[sve] Complete remaining gather loads.
Implement remaining 64-bit gather loads including unpacking, unscaled and
scaled offset form.
Change-Id: I208de1fabfe40f7095f9848c3ebf9de82a5f7416
Diffstat (limited to 'src')
-rw-r--r-- | src/aarch64/assembler-aarch64.h | 42 | ||||
-rw-r--r-- | src/aarch64/assembler-sve-aarch64.cc | 161 | ||||
-rw-r--r-- | src/aarch64/disasm-aarch64.cc | 20 | ||||
-rw-r--r-- | src/aarch64/macro-assembler-sve-aarch64.cc | 1 | ||||
-rw-r--r-- | src/aarch64/operands-aarch64.h | 1 | ||||
-rw-r--r-- | src/aarch64/simulator-aarch64.cc | 38 |
6 files changed, 33 insertions, 230 deletions
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h index 312acb53..74e02121 100644 --- a/src/aarch64/assembler-aarch64.h +++ b/src/aarch64/assembler-aarch64.h @@ -4586,30 +4586,6 @@ class Assembler : public vixl::internal::AssemblerBase { // TODO: Merge other loads into the SVEMemOperand versions. - // Gather load unsigned bytes to vector (vector index). - void ld1b(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm); - - // Gather load doublewords to vector (vector index). - void ld1d(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm); - - // Gather load unsigned halfwords to vector (vector index). - void ld1h(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm); - - // Gather load unsigned words to vector (vector index). - void ld1w(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm); - // Load and broadcast unsigned byte to vector. void ld1rb(const ZRegister& zt, const PRegisterZ& pg, @@ -4682,24 +4658,6 @@ class Assembler : public vixl::internal::AssemblerBase { // TODO: Merge other loads into the SVEMemOperand versions. - // Gather load signed bytes to vector (vector index). - void ld1sb(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm); - - // Gather load signed halfwords to vector (vector index). - void ld1sh(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm); - - // Gather load signed words to vector (vector index). - void ld1sw(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm); - // Contiguous load two-byte structures to two vectors. void ld2b(const ZRegister& zt1, const ZRegister& zt2, diff --git a/src/aarch64/assembler-sve-aarch64.cc b/src/aarch64/assembler-sve-aarch64.cc index b6e1c8d9..8361193e 100644 --- a/src/aarch64/assembler-sve-aarch64.cc +++ b/src/aarch64/assembler-sve-aarch64.cc @@ -3948,19 +3948,26 @@ void Assembler::SVEScatterGatherHelper(unsigned msize_in_bytes_log2, break; } } else if (zt.IsLaneSizeD()) { - if (mod == NO_SVE_OFFSET_MODIFIER) { - op = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed; - } else if (mod == SVE_LSL) { - op = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed; - } else { - VIXL_ASSERT((mod == SVE_UXTW) || (mod == SVE_SXTW)); - unsigned shift_amount = addr.GetShiftAmount(); - if (shift_amount == 0) { - op = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed; - } else { - VIXL_ASSERT(shift_amount == msize_in_bytes_log2); - op = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed; + switch (mod) { + case NO_SVE_OFFSET_MODIFIER: + op = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed; + break; + case SVE_LSL: + op = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed; + break; + case SVE_UXTW: + case SVE_SXTW: { + unsigned shift_amount = addr.GetShiftAmount(); + if (shift_amount == 0) { + op = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed; + } else { + VIXL_ASSERT(shift_amount == msize_in_bytes_log2); + op = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed; + } + break; } + default: + VIXL_UNIMPLEMENTED(); } } } else { @@ -4198,136 +4205,6 @@ void Assembler::ldr(const CPURegister& rt, const SVEMemOperand& addr) { // SVEMem64BitGather. // This prototype maps to 3 instruction encodings: -// LD1B_z_p_bz_d_64_unscaled -// LD1B_z_p_bz_d_x32_unscaled -void Assembler::ld1b(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] - // 1100 0100 010. .... 110. .... .... .... - // msz<24:23> = 00 | Zm<20:16> | U<14> = 1 | ff<13> = 0 | Pg<12:10> | Rn<9:5> - // | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LD1B_z_p_bz_d_64_unscaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | Rm(zm)); -} - -// This prototype maps to 4 instruction encodings: -// LD1D_z_p_bz_d_64_scaled -// LD1D_z_p_bz_d_64_unscaled -// LD1D_z_p_bz_d_x32_scaled -// LD1D_z_p_bz_d_x32_unscaled -void Assembler::ld1d(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3] - // 1100 0101 111. .... 110. .... .... .... - // msz<24:23> = 11 | Zm<20:16> | U<14> = 1 | ff<13> = 0 | Pg<12:10> | Rn<9:5> - // | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LD1D_z_p_bz_d_64_scaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | Rm(zm)); -} - -// This prototype maps to 6 instruction encodings: -// LD1H_z_p_bz_d_64_scaled -// LD1H_z_p_bz_d_64_unscaled -// LD1H_z_p_bz_d_x32_scaled -// LD1H_z_p_bz_d_x32_unscaled -void Assembler::ld1h(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] - // 1100 0100 111. .... 110. .... .... .... - // msz<24:23> = 01 | Zm<20:16> | U<14> = 1 | ff<13> = 0 | Pg<12:10> | Rn<9:5> - // | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LD1H_z_p_bz_d_64_scaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | Rm(zm)); -} - -// This prototype maps to 3 instruction encodings: -// LD1SB_z_p_bz_d_64_unscaled -// LD1SB_z_p_bz_d_x32_unscaled -void Assembler::ld1sb(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] - // 1100 0100 010. .... 100. .... .... .... - // msz<24:23> = 00 | Zm<20:16> | U<14> = 0 | ff<13> = 0 | Pg<12:10> | Rn<9:5> - // | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LD1SB_z_p_bz_d_64_unscaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | - Rm(zm)); -} - -// This prototype maps to 6 instruction encodings: -// LD1SH_z_p_bz_d_64_scaled -// LD1SH_z_p_bz_d_64_unscaled -// LD1SH_z_p_bz_d_x32_scaled -// LD1SH_z_p_bz_d_x32_unscaled -void Assembler::ld1sh(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] - // 1100 0100 111. .... 100. .... .... .... - // msz<24:23> = 01 | Zm<20:16> | U<14> = 0 | ff<13> = 0 | Pg<12:10> | Rn<9:5> - // | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LD1SH_z_p_bz_d_64_scaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | Rm(zm)); -} - -// This prototype maps to 4 instruction encodings: -// LD1SW_z_p_bz_d_64_scaled -// LD1SW_z_p_bz_d_64_unscaled -// LD1SW_z_p_bz_d_x32_scaled -// LD1SW_z_p_bz_d_x32_unscaled -void Assembler::ld1sw(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] - // 1100 0101 011. .... 100. .... .... .... - // msz<24:23> = 10 | Zm<20:16> | U<14> = 0 | ff<13> = 0 | Pg<12:10> | Rn<9:5> - // | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LD1SW_z_p_bz_d_64_scaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | Rm(zm)); -} - -// This prototype maps to 6 instruction encodings: -// LD1W_z_p_bz_d_64_scaled -// LD1W_z_p_bz_d_64_unscaled -// LD1W_z_p_bz_d_x32_scaled -// LD1W_z_p_bz_d_x32_unscaled -void Assembler::ld1w(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] - // 1100 0101 011. .... 110. .... .... .... - // msz<24:23> = 10 | Zm<20:16> | U<14> = 1 | ff<13> = 0 | Pg<12:10> | Rn<9:5> - // | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LD1W_z_p_bz_d_64_scaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | Rm(zm)); -} - -// This prototype maps to 3 instruction encodings: // LDFF1B_z_p_bz_d_64_unscaled // LDFF1B_z_p_bz_d_x32_unscaled void Assembler::ldff1b(const ZRegister& zt, diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc index c503628b..9549f1cb 100644 --- a/src/aarch64/disasm-aarch64.cc +++ b/src/aarch64/disasm-aarch64.cc @@ -5218,9 +5218,9 @@ void Disassembler::VisitSVE32BitScatterStore_VectorPlusImm( void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets( const Instruction *instr) { const char *mnemonic = "unimplemented"; - const char *form = (instr->ExtractBit(22) == 0) - ? "{ 'Zt.d }, 'Pgl/z, ['Xns, 'Zm.d, uxtw #'u2423]" - : "{ 'Zt.d }, 'Pgl/z, ['Xns, 'Zm.d, sxtw #'u2423]"; + const char *form = "{ 'Zt.d }, 'Pgl/z, ['Xns, 'Zm.d, "; + const char *suffix = + (instr->ExtractBit(22) == 0) ? "uxtw #'u2423]" : "sxtw #'u2423]"; switch (instr->Mask( SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) { @@ -5255,9 +5255,11 @@ void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets( mnemonic = "ldff1w"; break; default: + form = "(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)"; + suffix = NULL; break; } - Format(instr, mnemonic, form); + Format(instr, mnemonic, form, suffix); } void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets( @@ -5297,6 +5299,7 @@ void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets( mnemonic = "ldff1w"; break; default: + form = "(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)"; break; } Format(instr, mnemonic, form); @@ -5375,9 +5378,8 @@ void Disassembler:: VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets( const Instruction *instr) { const char *mnemonic = "unimplemented"; - const char *form = (instr->ExtractBit(22) == 0) - ? "{ 'Zt.d }, 'Pgl/z, ['Xns, 'Zm.d, uxtw]" - : "{ 'Zt.d }, 'Pgl/z, ['Xns, 'Zm.d, sxtw]"; + const char *form = "{ 'Zt.d }, 'Pgl/z, ['Xns, 'Zm.d, "; + const char *suffix = (instr->ExtractBit(22) == 0) ? "uxtw]" : "sxtw]"; switch (instr->Mask( SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) { @@ -5424,9 +5426,11 @@ void Disassembler:: mnemonic = "ldff1w"; break; default: + form = "(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)"; + suffix = NULL; break; } - Format(instr, mnemonic, form); + Format(instr, mnemonic, form, suffix); } void Disassembler::VisitSVE64BitGatherLoad_VectorPlusImm( diff --git a/src/aarch64/macro-assembler-sve-aarch64.cc b/src/aarch64/macro-assembler-sve-aarch64.cc index 5052b61b..b107f132 100644 --- a/src/aarch64/macro-assembler-sve-aarch64.cc +++ b/src/aarch64/macro-assembler-sve-aarch64.cc @@ -1210,6 +1210,7 @@ void MacroAssembler::SVELoadStore1Helper(int msize_in_bytes_log2, } if (addr.IsScalarPlusVector()) { + VIXL_ASSERT(addr.IsScatterGather()); SingleEmissionCheckScope guard(this); (this->*fn)(zt, pg, addr); return; diff --git a/src/aarch64/operands-aarch64.h b/src/aarch64/operands-aarch64.h index c46f1dcb..ad03a9ee 100644 --- a/src/aarch64/operands-aarch64.h +++ b/src/aarch64/operands-aarch64.h @@ -663,6 +663,7 @@ class SVEMemOperand { // Allow standard `Shift` and `Extend` arguments to be used. SVEOffsetModifier GetSVEOffsetModifierFor(Shift shift) { if (shift == LSL) return SVE_LSL; + if (shift == NO_SHIFT) return NO_SVE_OFFSET_MODIFIER; // SVE does not accept any other shift. VIXL_UNIMPLEMENTED(); return NO_SVE_OFFSET_MODIFIER; diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc index d0fa2b90..1a05462d 100644 --- a/src/aarch64/simulator-aarch64.cc +++ b/src/aarch64/simulator-aarch64.cc @@ -9779,24 +9779,13 @@ void Simulator::VisitSVELoadVectorRegister(const Instruction* instr) { void Simulator::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets( const Instruction* instr) { - USE(instr); switch (instr->Mask( SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) { case LD1D_z_p_bz_d_x32_scaled: - VIXL_UNIMPLEMENTED(); - break; case LD1H_z_p_bz_d_x32_scaled: - VIXL_UNIMPLEMENTED(); - break; case LD1SH_z_p_bz_d_x32_scaled: - VIXL_UNIMPLEMENTED(); - break; case LD1SW_z_p_bz_d_x32_scaled: - VIXL_UNIMPLEMENTED(); - break; case LD1W_z_p_bz_d_x32_scaled: - VIXL_UNIMPLEMENTED(); - break; case LDFF1H_z_p_bz_d_x32_scaled: case LDFF1W_z_p_bz_d_x32_scaled: case LDFF1D_z_p_bz_d_x32_scaled: @@ -9814,23 +9803,12 @@ void Simulator::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets( void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets( const Instruction* instr) { - USE(instr); switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) { case LD1D_z_p_bz_d_64_scaled: - VIXL_UNIMPLEMENTED(); - break; case LD1H_z_p_bz_d_64_scaled: - VIXL_UNIMPLEMENTED(); - break; case LD1SH_z_p_bz_d_64_scaled: - VIXL_UNIMPLEMENTED(); - break; case LD1SW_z_p_bz_d_64_scaled: - VIXL_UNIMPLEMENTED(); - break; case LD1W_z_p_bz_d_64_scaled: - VIXL_UNIMPLEMENTED(); - break; case LDFF1H_z_p_bz_d_64_scaled: case LDFF1W_z_p_bz_d_64_scaled: case LDFF1D_z_p_bz_d_64_scaled: @@ -9847,7 +9825,6 @@ void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets( void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets( const Instruction* instr) { - USE(instr); switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) { case LD1B_z_p_bz_d_64_unscaled: case LD1D_z_p_bz_d_64_unscaled: @@ -9876,30 +9853,15 @@ void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets( void Simulator::VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets( const Instruction* instr) { - USE(instr); switch (instr->Mask( SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) { case LD1B_z_p_bz_d_x32_unscaled: - VIXL_UNIMPLEMENTED(); - break; case LD1D_z_p_bz_d_x32_unscaled: - VIXL_UNIMPLEMENTED(); - break; case LD1H_z_p_bz_d_x32_unscaled: - VIXL_UNIMPLEMENTED(); - break; case LD1SB_z_p_bz_d_x32_unscaled: - VIXL_UNIMPLEMENTED(); - break; case LD1SH_z_p_bz_d_x32_unscaled: - VIXL_UNIMPLEMENTED(); - break; case LD1SW_z_p_bz_d_x32_unscaled: - VIXL_UNIMPLEMENTED(); - break; case LD1W_z_p_bz_d_x32_unscaled: - VIXL_UNIMPLEMENTED(); - break; case LDFF1B_z_p_bz_d_x32_unscaled: case LDFF1H_z_p_bz_d_x32_unscaled: case LDFF1W_z_p_bz_d_x32_unscaled: |