diff options
author | TatWai Chong <tatwai.chong@arm.com> | 2020-06-11 00:09:20 -0700 |
---|---|---|
committer | TatWai Chong <tatwai.chong@arm.com> | 2020-06-23 06:12:50 -0700 |
commit | 5f3928c0ca5971252d92c874686261a9c706dce7 (patch) | |
tree | 3dc0eb222188a3de9a7f0bdf9c6f392d64126d71 /src | |
parent | fa098bcfbca210b2b10eab10de19369e822654e2 (diff) | |
download | vixl-5f3928c0ca5971252d92c874686261a9c706dce7.tar.gz |
[sve] Implement 32-bit scatter store (scalar plus vector mode).
Include st1b, st1h, st1w and st1d.
Change-Id: I868c47a984723ea94ddc78f65f8c68680209c19c
Diffstat (limited to 'src')
-rw-r--r-- | src/aarch64/assembler-aarch64.h | 31 | ||||
-rw-r--r-- | src/aarch64/assembler-sve-aarch64.cc | 160 | ||||
-rw-r--r-- | src/aarch64/disasm-aarch64.cc | 46 | ||||
-rw-r--r-- | src/aarch64/simulator-aarch64.cc | 96 |
4 files changed, 134 insertions, 199 deletions
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h index 74e02121..b575f454 100644 --- a/src/aarch64/assembler-aarch64.h +++ b/src/aarch64/assembler-aarch64.h @@ -5470,32 +5470,6 @@ class Assembler : public vixl::internal::AssemblerBase { const PRegister& pg, const SVEMemOperand& addr); - // TODO: Merge other stores into the SVEMemOperand versions. - - // Scatter store bytes from a vector (vector index). - void st1b(const ZRegister& zt, - const PRegister& pg, - const Register& xn, - const ZRegister& zm); - - // Scatter store doublewords from a vector (vector index). - void st1d(const ZRegister& zt, - const PRegister& pg, - const Register& xn, - const ZRegister& zm); - - // Scatter store halfwords from a vector (vector index). - void st1h(const ZRegister& zt, - const PRegister& pg, - const Register& xn, - const ZRegister& zm); - - // Scatter store words from a vector (vector index). - void st1w(const ZRegister& zt, - const PRegister& pg, - const Register& xn, - const ZRegister& zm); - // Contiguous store two-byte structures from two vectors. void st2b(const ZRegister& zt1, const ZRegister& zt2, @@ -6629,9 +6603,12 @@ class Assembler : public vixl::internal::AssemblerBase { const MemOperand& addr, Instr op); + // Set `is_load` to false in default as it's only used in the + // scalar-plus-vector form. Instr SVEMemOperandHelper(unsigned msize_in_bytes_log2, int num_regs, - const SVEMemOperand& addr); + const SVEMemOperand& addr, + bool is_load = false); // E.g. st1b, st1h, ... // This supports both contiguous and scatter stores. diff --git a/src/aarch64/assembler-sve-aarch64.cc b/src/aarch64/assembler-sve-aarch64.cc index 6343b6bf..158ae19f 100644 --- a/src/aarch64/assembler-sve-aarch64.cc +++ b/src/aarch64/assembler-sve-aarch64.cc @@ -3926,26 +3926,34 @@ void Assembler::SVEScatterGatherHelper(unsigned msize_in_bytes_log2, op = SVE64BitScatterStore_VectorPlusImmFixed; } } - } else if (addr.IsScalarPlusVector()) { + } else { + VIXL_ASSERT(addr.IsScalarPlusVector()); VIXL_ASSERT(AreSameLaneSize(zt, addr.GetVectorOffset())); SVEOffsetModifier mod = addr.GetOffsetModifier(); if (zt.IsLaneSizeS()) { VIXL_ASSERT((mod == SVE_UXTW) || (mod == SVE_SXTW)); - switch (addr.GetShiftAmount()) { - case 0: + unsigned shift_amount = addr.GetShiftAmount(); + if (shift_amount == 0) { + if (is_load) { op = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed; - break; - case 1: - VIXL_ASSERT(msize_in_bytes_log2 == kHRegSizeInBytesLog2); + } else { + op = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed; + } + } else if (shift_amount == 1) { + VIXL_ASSERT(msize_in_bytes_log2 == kHRegSizeInBytesLog2); + if (is_load) { op = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed; - break; - case 2: - VIXL_ASSERT(msize_in_bytes_log2 == kSRegSizeInBytesLog2); + } else { + op = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed; + } + } else { + VIXL_ASSERT(shift_amount == 2); + VIXL_ASSERT(msize_in_bytes_log2 == kSRegSizeInBytesLog2); + if (is_load) { op = SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed; - break; - default: - VIXL_UNIMPLEMENTED(); - break; + } else { + op = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed; + } } } else if (zt.IsLaneSizeD()) { switch (mod) { @@ -3967,10 +3975,21 @@ void Assembler::SVEScatterGatherHelper(unsigned msize_in_bytes_log2, case SVE_SXTW: { unsigned shift_amount = addr.GetShiftAmount(); if (shift_amount == 0) { - op = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed; + if (is_load) { + op = + SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed; + } else { + op = + SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed; + } } else { VIXL_ASSERT(shift_amount == msize_in_bytes_log2); - op = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed; + if (is_load) { + op = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed; + } else { + op = + SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed; + } } break; } @@ -3978,33 +3997,13 @@ void Assembler::SVEScatterGatherHelper(unsigned msize_in_bytes_log2, VIXL_UNIMPLEMENTED(); } } - } else { - // All gather loads are either vector-plus-immediate or scalar-plus-vector. - VIXL_UNREACHABLE(); } - if ((op == SVE32BitGatherLoad_VectorPlusImmFixed) || - (op == SVE64BitGatherLoad_VectorPlusImmFixed) || - (op == SVE32BitScatterStore_VectorPlusImmFixed) || - (op == SVE64BitScatterStore_VectorPlusImmFixed) || - (op == SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed) || - (op == SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed) || - (op == SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed) || - (op == SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed) || - (op == SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed) || - (op == SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed) || - (op == SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed) || - (op == SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed) || - (op == SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed)) { - Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, 1, addr); - Instr msz = ImmUnsignedField<24, 23>(msize_in_bytes_log2); - Instr u = (!is_load || is_signed) ? 0 : (1 << 14); - Instr ff = is_first_fault ? (1 << 13) : 0; - Emit(op | mem_op | msz | u | ff | Rt(zt) | PgLow8(pg)); - } else { - // Other groups are encoded slightly differently. - VIXL_UNIMPLEMENTED(); - } + Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, 1, addr, is_load); + Instr msz = ImmUnsignedField<24, 23>(msize_in_bytes_log2); + Instr u = (!is_load || is_signed) ? 0 : (1 << 14); + Instr ff = is_first_fault ? (1 << 13) : 0; + Emit(op | mem_op | msz | u | ff | Rt(zt) | PgLow8(pg)); } void Assembler::SVELd234Helper(int num_regs, @@ -4943,7 +4942,8 @@ void Assembler::ldnt1w(const ZRegister& zt, Instr Assembler::SVEMemOperandHelper(unsigned msize_in_bytes_log2, int num_regs, - const SVEMemOperand& addr) { + const SVEMemOperand& addr, + bool is_load) { VIXL_ASSERT((num_regs >= 1) && (num_regs <= 4)); Instr op = 0xfffffff; @@ -4972,7 +4972,8 @@ Instr Assembler::SVEMemOperandHelper(unsigned msize_in_bytes_log2, Register xn = addr.GetScalarBase(); ZRegister zm = addr.GetVectorOffset(); SVEOffsetModifier mod = addr.GetOffsetModifier(); - Instr xs = (mod == SVE_SXTW) ? (1 << 22) : 0; + Instr modifier_bit = 1 << (is_load ? 22 : 14); + Instr xs = (mod == SVE_SXTW) ? modifier_bit : 0; VIXL_ASSERT(num_regs == 1); if (mod == SVE_LSL) { @@ -5113,81 +5114,6 @@ VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST2) VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST3) VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST4) -// This prototype maps to 3 instruction encodings: -// ST1B_z_p_bz_d_64_unscaled -// ST1B_z_p_bz_d_x32_unscaled -// ST1B_z_p_bz_s_x32_unscaled -void Assembler::st1b(const ZRegister& zt, - const PRegister& pg, - const Register& xn, - const ZRegister& zm) { - // ST1B { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] - // 1110 0100 000. .... 101. .... .... .... - // msz<24:23> = 00 | Zm<20:16> | Pg<12:10> | Rn<9:5> | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(ST1B_z_p_bz_d_64_unscaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | Rm(zm)); -} - -// This prototype maps to 4 instruction encodings: -// ST1D_z_p_bz_d_64_scaled -// ST1D_z_p_bz_d_64_unscaled -// ST1D_z_p_bz_d_x32_scaled -// ST1D_z_p_bz_d_x32_unscaled -void Assembler::st1d(const ZRegister& zt, - const PRegister& pg, - const Register& xn, - const ZRegister& zm) { - // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #3] - // 1110 0101 101. .... 101. .... .... .... - // msz<24:23> = 11 | Zm<20:16> | Pg<12:10> | Rn<9:5> | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(ST1D_z_p_bz_d_64_scaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | Rm(zm)); -} - -// This prototype maps to 6 instruction encodings: -// ST1H_z_p_bz_d_64_scaled -// ST1H_z_p_bz_d_64_unscaled -// ST1H_z_p_bz_d_x32_scaled -// ST1H_z_p_bz_d_x32_unscaled -// ST1H_z_p_bz_s_x32_scaled -// ST1H_z_p_bz_s_x32_unscaled -void Assembler::st1h(const ZRegister& zt, - const PRegister& pg, - const Register& xn, - const ZRegister& zm) { - // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #1] - // 1110 0100 101. .... 101. .... .... .... - // msz<24:23> = 01 | Zm<20:16> | Pg<12:10> | Rn<9:5> | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(ST1H_z_p_bz_d_64_scaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | Rm(zm)); -} - -// This prototype maps to 6 instruction encodings: -// ST1W_z_p_bz_d_64_scaled -// ST1W_z_p_bz_d_64_unscaled -// ST1W_z_p_bz_d_x32_scaled -// ST1W_z_p_bz_d_x32_unscaled -// ST1W_z_p_bz_s_x32_scaled -// ST1W_z_p_bz_s_x32_unscaled -void Assembler::st1w(const ZRegister& zt, - const PRegister& pg, - const Register& xn, - const ZRegister& zm) { - // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #2] - // 1110 0101 001. .... 101. .... .... .... - // msz<24:23> = 10 | Zm<20:16> | Pg<12:10> | Rn<9:5> | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(ST1W_z_p_bz_d_64_scaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | Rm(zm)); -} - void Assembler::stnt1b(const ZRegister& zt, const PRegister& pg, const SVEMemOperand& addr) { diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc index 8b0e19a4..12f21762 100644 --- a/src/aarch64/disasm-aarch64.cc +++ b/src/aarch64/disasm-aarch64.cc @@ -5142,49 +5142,46 @@ void Disassembler::VisitSVE32BitGatherPrefetch_VectorPlusImm( void Disassembler::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets( const Instruction *instr) { const char *mnemonic = "unimplemented"; - const char *form = "(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets)"; + const char *form = "{ 'Zt.s }, 'Pgl, ['Xns, 'Zm.s, "; + const char *suffix = + (instr->ExtractBit(14) == 0) ? "uxtw #'u2423]" : "sxtw #'u2423]"; switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) { - // ST1H { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #1] case ST1H_z_p_bz_s_x32_scaled: mnemonic = "st1h"; - form = "{ 'Zt.s }, 'Pgl, ['Xns, 'Zm.s, <mod> #1]"; break; - // ST1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2] case ST1W_z_p_bz_s_x32_scaled: mnemonic = "st1w"; - form = "{ 'Zt.s }, 'Pgl, ['Xns, 'Zm.s, <mod> #2]"; break; default: + form = "(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets)"; break; } - Format(instr, mnemonic, form); + Format(instr, mnemonic, form, suffix); } void Disassembler::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets( const Instruction *instr) { const char *mnemonic = "unimplemented"; - // { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] - const char *form = "{ 'Zt.s }, 'Pgl, ['Xns, 'Zm.s, <mod>]"; + const char *form = "{ 'Zt.s }, 'Pgl, ['Xns, 'Zm.s, "; + const char *suffix = (instr->ExtractBit(14) == 0) ? "uxtw]" : "sxtw]"; switch ( instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) { - // ST1B { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] case ST1B_z_p_bz_s_x32_unscaled: mnemonic = "st1b"; break; - // ST1H { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] case ST1H_z_p_bz_s_x32_unscaled: mnemonic = "st1h"; break; - // ST1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] case ST1W_z_p_bz_s_x32_unscaled: mnemonic = "st1w"; break; default: + form = "(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets)"; break; } - Format(instr, mnemonic, form); + Format(instr, mnemonic, form, suffix); } void Disassembler::VisitSVE32BitScatterStore_VectorPlusImm( @@ -5643,61 +5640,54 @@ void Disassembler:: VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets( const Instruction *instr) { const char *mnemonic = "unimplemented"; - const char *form = - "(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets)"; + const char *form = "{ 'Zt.d }, 'Pgl, ['Xns, 'Zm.d, "; + const char *suffix = + (instr->ExtractBit(14) == 0) ? "uxtw #'u2423]" : "sxtw #'u2423]"; switch (instr->Mask( SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) { - // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3] case ST1D_z_p_bz_d_x32_scaled: mnemonic = "st1d"; - form = "{ 'Zt.d }, 'Pgl, ['Xns, 'Zm.d, <mod> #3]"; break; - // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1] case ST1H_z_p_bz_d_x32_scaled: mnemonic = "st1h"; - form = "{ 'Zt.d }, 'Pgl, ['Xns, 'Zm.d, <mod> #1]"; break; - // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2] case ST1W_z_p_bz_d_x32_scaled: mnemonic = "st1w"; - form = "{ 'Zt.d }, 'Pgl, ['Xns, 'Zm.d, <mod> #2]"; break; default: + form = "(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets)"; break; } - Format(instr, mnemonic, form); + Format(instr, mnemonic, form, suffix); } void Disassembler:: VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets( const Instruction *instr) { const char *mnemonic = "unimplemented"; - // { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] - const char *form = "{ 'Zt.d }, 'Pgl, ['Xns, 'Zm.d, <mod>]"; + const char *form = "{ 'Zt.d }, 'Pgl, ['Xns, 'Zm.d, "; + const char *suffix = (instr->ExtractBit(14) == 0) ? "uxtw]" : "sxtw]"; switch (instr->Mask( SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) { - // ST1B { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] case ST1B_z_p_bz_d_x32_unscaled: mnemonic = "st1b"; break; - // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] case ST1D_z_p_bz_d_x32_unscaled: mnemonic = "st1d"; break; - // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] case ST1H_z_p_bz_d_x32_unscaled: mnemonic = "st1h"; break; - // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] case ST1W_z_p_bz_d_x32_unscaled: mnemonic = "st1w"; break; default: + form = "(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets)"; break; } - Format(instr, mnemonic, form); + Format(instr, mnemonic, form, suffix); } void Disassembler::VisitSVE64BitScatterStore_VectorPlusImm( diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc index f4f0d8aa..60e1bd41 100644 --- a/src/aarch64/simulator-aarch64.cc +++ b/src/aarch64/simulator-aarch64.cc @@ -10271,11 +10271,25 @@ void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets( USE(instr); switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) { case ST1H_z_p_bz_s_x32_scaled: - VIXL_UNIMPLEMENTED(); - break; - case ST1W_z_p_bz_s_x32_scaled: - VIXL_UNIMPLEMENTED(); + case ST1W_z_p_bz_s_x32_scaled: { + unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); + VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); + int scale = instr->ExtractBit(21) * msize_in_bytes_log2; + uint64_t base = ReadXRegister(instr->GetRn()); + SVEOffsetModifier mod = + (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW; + LogicSVEAddressVector addr(base, + &ReadVRegister(instr->GetRm()), + kFormatVnS, + mod, + scale); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredStoreHelper(kFormatVnS, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); break; + } default: VIXL_UNIMPLEMENTED(); break; @@ -10288,14 +10302,24 @@ void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets( switch ( instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) { case ST1B_z_p_bz_s_x32_unscaled: - VIXL_UNIMPLEMENTED(); - break; case ST1H_z_p_bz_s_x32_unscaled: - VIXL_UNIMPLEMENTED(); - break; - case ST1W_z_p_bz_s_x32_unscaled: - VIXL_UNIMPLEMENTED(); + case ST1W_z_p_bz_s_x32_unscaled: { + unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); + VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); + uint64_t base = ReadXRegister(instr->GetRn()); + SVEOffsetModifier mod = + (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW; + LogicSVEAddressVector addr(base, + &ReadVRegister(instr->GetRm()), + kFormatVnS, + mod); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredStoreHelper(kFormatVnS, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); break; + } default: VIXL_UNIMPLEMENTED(); break; @@ -10368,13 +10392,11 @@ void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets( case ST1W_z_p_bz_d_64_unscaled: { unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); - int scale = instr->ExtractBit(21) * msize_in_bytes_log2; uint64_t base = ReadXRegister(instr->GetRn()); LogicSVEAddressVector addr(base, &ReadVRegister(instr->GetRm()), kFormatVnD, - SVE_LSL, - scale); + NO_SVE_OFFSET_MODIFIER); addr.SetMsizeInBytesLog2(msize_in_bytes_log2); SVEStructuredStoreHelper(kFormatVnD, ReadPRegister(instr->GetPgLow8()), @@ -10394,14 +10416,26 @@ void Simulator::VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets( switch (instr->Mask( SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) { case ST1D_z_p_bz_d_x32_scaled: - VIXL_UNIMPLEMENTED(); - break; case ST1H_z_p_bz_d_x32_scaled: - VIXL_UNIMPLEMENTED(); - break; - case ST1W_z_p_bz_d_x32_scaled: - VIXL_UNIMPLEMENTED(); + case ST1W_z_p_bz_d_x32_scaled: { + unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); + VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); + int scale = instr->ExtractBit(21) * msize_in_bytes_log2; + uint64_t base = ReadXRegister(instr->GetRn()); + SVEOffsetModifier mod = + (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW; + LogicSVEAddressVector addr(base, + &ReadVRegister(instr->GetRm()), + kFormatVnD, + mod, + scale); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredStoreHelper(kFormatVnD, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); break; + } default: VIXL_UNIMPLEMENTED(); break; @@ -10415,17 +10449,25 @@ void Simulator:: switch (instr->Mask( SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) { case ST1B_z_p_bz_d_x32_unscaled: - VIXL_UNIMPLEMENTED(); - break; case ST1D_z_p_bz_d_x32_unscaled: - VIXL_UNIMPLEMENTED(); - break; case ST1H_z_p_bz_d_x32_unscaled: - VIXL_UNIMPLEMENTED(); - break; - case ST1W_z_p_bz_d_x32_unscaled: - VIXL_UNIMPLEMENTED(); + case ST1W_z_p_bz_d_x32_unscaled: { + unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); + VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); + uint64_t base = ReadXRegister(instr->GetRn()); + SVEOffsetModifier mod = + (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW; + LogicSVEAddressVector addr(base, + &ReadVRegister(instr->GetRm()), + kFormatVnD, + mod); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredStoreHelper(kFormatVnD, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); break; + } default: VIXL_UNIMPLEMENTED(); break; |