aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMartyn Capewell <martyn.capewell@arm.com>2020-06-05 18:20:11 +0100
committerTatWai Chong <tatwai.chong@arm.com>2020-06-22 12:51:34 -0700
commita5112344aa6a2c562379ec67398a6719360965bf (patch)
tree4ea88d7d4a0701721cb5702f510d04af85fe8f66 /src
parentcd3f6c5ec96ff6d8240a07e7084ae5de700dc9c7 (diff)
downloadvixl-a5112344aa6a2c562379ec67398a6719360965bf.tar.gz
[sve] Complete remaining gather loads.
Implement remaining 64-bit gather loads including unpacking, unscaled and scaled offset form. Change-Id: I208de1fabfe40f7095f9848c3ebf9de82a5f7416
Diffstat (limited to 'src')
-rw-r--r--src/aarch64/assembler-aarch64.h42
-rw-r--r--src/aarch64/assembler-sve-aarch64.cc161
-rw-r--r--src/aarch64/disasm-aarch64.cc20
-rw-r--r--src/aarch64/macro-assembler-sve-aarch64.cc1
-rw-r--r--src/aarch64/operands-aarch64.h1
-rw-r--r--src/aarch64/simulator-aarch64.cc38
6 files changed, 33 insertions, 230 deletions
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h
index 312acb53..74e02121 100644
--- a/src/aarch64/assembler-aarch64.h
+++ b/src/aarch64/assembler-aarch64.h
@@ -4586,30 +4586,6 @@ class Assembler : public vixl::internal::AssemblerBase {
// TODO: Merge other loads into the SVEMemOperand versions.
- // Gather load unsigned bytes to vector (vector index).
- void ld1b(const ZRegister& zt,
- const PRegisterZ& pg,
- const Register& xn,
- const ZRegister& zm);
-
- // Gather load doublewords to vector (vector index).
- void ld1d(const ZRegister& zt,
- const PRegisterZ& pg,
- const Register& xn,
- const ZRegister& zm);
-
- // Gather load unsigned halfwords to vector (vector index).
- void ld1h(const ZRegister& zt,
- const PRegisterZ& pg,
- const Register& xn,
- const ZRegister& zm);
-
- // Gather load unsigned words to vector (vector index).
- void ld1w(const ZRegister& zt,
- const PRegisterZ& pg,
- const Register& xn,
- const ZRegister& zm);
-
// Load and broadcast unsigned byte to vector.
void ld1rb(const ZRegister& zt,
const PRegisterZ& pg,
@@ -4682,24 +4658,6 @@ class Assembler : public vixl::internal::AssemblerBase {
// TODO: Merge other loads into the SVEMemOperand versions.
- // Gather load signed bytes to vector (vector index).
- void ld1sb(const ZRegister& zt,
- const PRegisterZ& pg,
- const Register& xn,
- const ZRegister& zm);
-
- // Gather load signed halfwords to vector (vector index).
- void ld1sh(const ZRegister& zt,
- const PRegisterZ& pg,
- const Register& xn,
- const ZRegister& zm);
-
- // Gather load signed words to vector (vector index).
- void ld1sw(const ZRegister& zt,
- const PRegisterZ& pg,
- const Register& xn,
- const ZRegister& zm);
-
// Contiguous load two-byte structures to two vectors.
void ld2b(const ZRegister& zt1,
const ZRegister& zt2,
diff --git a/src/aarch64/assembler-sve-aarch64.cc b/src/aarch64/assembler-sve-aarch64.cc
index b6e1c8d9..8361193e 100644
--- a/src/aarch64/assembler-sve-aarch64.cc
+++ b/src/aarch64/assembler-sve-aarch64.cc
@@ -3948,19 +3948,26 @@ void Assembler::SVEScatterGatherHelper(unsigned msize_in_bytes_log2,
break;
}
} else if (zt.IsLaneSizeD()) {
- if (mod == NO_SVE_OFFSET_MODIFIER) {
- op = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed;
- } else if (mod == SVE_LSL) {
- op = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed;
- } else {
- VIXL_ASSERT((mod == SVE_UXTW) || (mod == SVE_SXTW));
- unsigned shift_amount = addr.GetShiftAmount();
- if (shift_amount == 0) {
- op = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed;
- } else {
- VIXL_ASSERT(shift_amount == msize_in_bytes_log2);
- op = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed;
+ switch (mod) {
+ case NO_SVE_OFFSET_MODIFIER:
+ op = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed;
+ break;
+ case SVE_LSL:
+ op = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed;
+ break;
+ case SVE_UXTW:
+ case SVE_SXTW: {
+ unsigned shift_amount = addr.GetShiftAmount();
+ if (shift_amount == 0) {
+ op = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed;
+ } else {
+ VIXL_ASSERT(shift_amount == msize_in_bytes_log2);
+ op = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed;
+ }
+ break;
}
+ default:
+ VIXL_UNIMPLEMENTED();
}
}
} else {
@@ -4198,136 +4205,6 @@ void Assembler::ldr(const CPURegister& rt, const SVEMemOperand& addr) {
// SVEMem64BitGather.
// This prototype maps to 3 instruction encodings:
-// LD1B_z_p_bz_d_64_unscaled
-// LD1B_z_p_bz_d_x32_unscaled
-void Assembler::ld1b(const ZRegister& zt,
- const PRegisterZ& pg,
- const Register& xn,
- const ZRegister& zm) {
- // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]
- // 1100 0100 010. .... 110. .... .... ....
- // msz<24:23> = 00 | Zm<20:16> | U<14> = 1 | ff<13> = 0 | Pg<12:10> | Rn<9:5>
- // | Zt<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
-
- Emit(LD1B_z_p_bz_d_64_unscaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | Rm(zm));
-}
-
-// This prototype maps to 4 instruction encodings:
-// LD1D_z_p_bz_d_64_scaled
-// LD1D_z_p_bz_d_64_unscaled
-// LD1D_z_p_bz_d_x32_scaled
-// LD1D_z_p_bz_d_x32_unscaled
-void Assembler::ld1d(const ZRegister& zt,
- const PRegisterZ& pg,
- const Register& xn,
- const ZRegister& zm) {
- // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3]
- // 1100 0101 111. .... 110. .... .... ....
- // msz<24:23> = 11 | Zm<20:16> | U<14> = 1 | ff<13> = 0 | Pg<12:10> | Rn<9:5>
- // | Zt<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
-
- Emit(LD1D_z_p_bz_d_64_scaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | Rm(zm));
-}
-
-// This prototype maps to 6 instruction encodings:
-// LD1H_z_p_bz_d_64_scaled
-// LD1H_z_p_bz_d_64_unscaled
-// LD1H_z_p_bz_d_x32_scaled
-// LD1H_z_p_bz_d_x32_unscaled
-void Assembler::ld1h(const ZRegister& zt,
- const PRegisterZ& pg,
- const Register& xn,
- const ZRegister& zm) {
- // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1]
- // 1100 0100 111. .... 110. .... .... ....
- // msz<24:23> = 01 | Zm<20:16> | U<14> = 1 | ff<13> = 0 | Pg<12:10> | Rn<9:5>
- // | Zt<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
-
- Emit(LD1H_z_p_bz_d_64_scaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | Rm(zm));
-}
-
-// This prototype maps to 3 instruction encodings:
-// LD1SB_z_p_bz_d_64_unscaled
-// LD1SB_z_p_bz_d_x32_unscaled
-void Assembler::ld1sb(const ZRegister& zt,
- const PRegisterZ& pg,
- const Register& xn,
- const ZRegister& zm) {
- // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]
- // 1100 0100 010. .... 100. .... .... ....
- // msz<24:23> = 00 | Zm<20:16> | U<14> = 0 | ff<13> = 0 | Pg<12:10> | Rn<9:5>
- // | Zt<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
-
- Emit(LD1SB_z_p_bz_d_64_unscaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) |
- Rm(zm));
-}
-
-// This prototype maps to 6 instruction encodings:
-// LD1SH_z_p_bz_d_64_scaled
-// LD1SH_z_p_bz_d_64_unscaled
-// LD1SH_z_p_bz_d_x32_scaled
-// LD1SH_z_p_bz_d_x32_unscaled
-void Assembler::ld1sh(const ZRegister& zt,
- const PRegisterZ& pg,
- const Register& xn,
- const ZRegister& zm) {
- // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1]
- // 1100 0100 111. .... 100. .... .... ....
- // msz<24:23> = 01 | Zm<20:16> | U<14> = 0 | ff<13> = 0 | Pg<12:10> | Rn<9:5>
- // | Zt<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
-
- Emit(LD1SH_z_p_bz_d_64_scaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | Rm(zm));
-}
-
-// This prototype maps to 4 instruction encodings:
-// LD1SW_z_p_bz_d_64_scaled
-// LD1SW_z_p_bz_d_64_unscaled
-// LD1SW_z_p_bz_d_x32_scaled
-// LD1SW_z_p_bz_d_x32_unscaled
-void Assembler::ld1sw(const ZRegister& zt,
- const PRegisterZ& pg,
- const Register& xn,
- const ZRegister& zm) {
- // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2]
- // 1100 0101 011. .... 100. .... .... ....
- // msz<24:23> = 10 | Zm<20:16> | U<14> = 0 | ff<13> = 0 | Pg<12:10> | Rn<9:5>
- // | Zt<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
-
- Emit(LD1SW_z_p_bz_d_64_scaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | Rm(zm));
-}
-
-// This prototype maps to 6 instruction encodings:
-// LD1W_z_p_bz_d_64_scaled
-// LD1W_z_p_bz_d_64_unscaled
-// LD1W_z_p_bz_d_x32_scaled
-// LD1W_z_p_bz_d_x32_unscaled
-void Assembler::ld1w(const ZRegister& zt,
- const PRegisterZ& pg,
- const Register& xn,
- const ZRegister& zm) {
- // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2]
- // 1100 0101 011. .... 110. .... .... ....
- // msz<24:23> = 10 | Zm<20:16> | U<14> = 1 | ff<13> = 0 | Pg<12:10> | Rn<9:5>
- // | Zt<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
-
- Emit(LD1W_z_p_bz_d_64_scaled | Rt(zt) | Rx<12, 10>(pg) | RnSP(xn) | Rm(zm));
-}
-
-// This prototype maps to 3 instruction encodings:
// LDFF1B_z_p_bz_d_64_unscaled
// LDFF1B_z_p_bz_d_x32_unscaled
void Assembler::ldff1b(const ZRegister& zt,
diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc
index c503628b..9549f1cb 100644
--- a/src/aarch64/disasm-aarch64.cc
+++ b/src/aarch64/disasm-aarch64.cc
@@ -5218,9 +5218,9 @@ void Disassembler::VisitSVE32BitScatterStore_VectorPlusImm(
void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets(
const Instruction *instr) {
const char *mnemonic = "unimplemented";
- const char *form = (instr->ExtractBit(22) == 0)
- ? "{ 'Zt.d }, 'Pgl/z, ['Xns, 'Zm.d, uxtw #'u2423]"
- : "{ 'Zt.d }, 'Pgl/z, ['Xns, 'Zm.d, sxtw #'u2423]";
+ const char *form = "{ 'Zt.d }, 'Pgl/z, ['Xns, 'Zm.d, ";
+ const char *suffix =
+ (instr->ExtractBit(22) == 0) ? "uxtw #'u2423]" : "sxtw #'u2423]";
switch (instr->Mask(
SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) {
@@ -5255,9 +5255,11 @@ void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets(
mnemonic = "ldff1w";
break;
default:
+ form = "(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)";
+ suffix = NULL;
break;
}
- Format(instr, mnemonic, form);
+ Format(instr, mnemonic, form, suffix);
}
void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets(
@@ -5297,6 +5299,7 @@ void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets(
mnemonic = "ldff1w";
break;
default:
+ form = "(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)";
break;
}
Format(instr, mnemonic, form);
@@ -5375,9 +5378,8 @@ void Disassembler::
VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets(
const Instruction *instr) {
const char *mnemonic = "unimplemented";
- const char *form = (instr->ExtractBit(22) == 0)
- ? "{ 'Zt.d }, 'Pgl/z, ['Xns, 'Zm.d, uxtw]"
- : "{ 'Zt.d }, 'Pgl/z, ['Xns, 'Zm.d, sxtw]";
+ const char *form = "{ 'Zt.d }, 'Pgl/z, ['Xns, 'Zm.d, ";
+ const char *suffix = (instr->ExtractBit(22) == 0) ? "uxtw]" : "sxtw]";
switch (instr->Mask(
SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
@@ -5424,9 +5426,11 @@ void Disassembler::
mnemonic = "ldff1w";
break;
default:
+ form = "(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)";
+ suffix = NULL;
break;
}
- Format(instr, mnemonic, form);
+ Format(instr, mnemonic, form, suffix);
}
void Disassembler::VisitSVE64BitGatherLoad_VectorPlusImm(
diff --git a/src/aarch64/macro-assembler-sve-aarch64.cc b/src/aarch64/macro-assembler-sve-aarch64.cc
index 5052b61b..b107f132 100644
--- a/src/aarch64/macro-assembler-sve-aarch64.cc
+++ b/src/aarch64/macro-assembler-sve-aarch64.cc
@@ -1210,6 +1210,7 @@ void MacroAssembler::SVELoadStore1Helper(int msize_in_bytes_log2,
}
if (addr.IsScalarPlusVector()) {
+ VIXL_ASSERT(addr.IsScatterGather());
SingleEmissionCheckScope guard(this);
(this->*fn)(zt, pg, addr);
return;
diff --git a/src/aarch64/operands-aarch64.h b/src/aarch64/operands-aarch64.h
index c46f1dcb..ad03a9ee 100644
--- a/src/aarch64/operands-aarch64.h
+++ b/src/aarch64/operands-aarch64.h
@@ -663,6 +663,7 @@ class SVEMemOperand {
// Allow standard `Shift` and `Extend` arguments to be used.
SVEOffsetModifier GetSVEOffsetModifierFor(Shift shift) {
if (shift == LSL) return SVE_LSL;
+ if (shift == NO_SHIFT) return NO_SVE_OFFSET_MODIFIER;
// SVE does not accept any other shift.
VIXL_UNIMPLEMENTED();
return NO_SVE_OFFSET_MODIFIER;
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index d0fa2b90..1a05462d 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -9779,24 +9779,13 @@ void Simulator::VisitSVELoadVectorRegister(const Instruction* instr) {
void Simulator::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets(
const Instruction* instr) {
- USE(instr);
switch (instr->Mask(
SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) {
case LD1D_z_p_bz_d_x32_scaled:
- VIXL_UNIMPLEMENTED();
- break;
case LD1H_z_p_bz_d_x32_scaled:
- VIXL_UNIMPLEMENTED();
- break;
case LD1SH_z_p_bz_d_x32_scaled:
- VIXL_UNIMPLEMENTED();
- break;
case LD1SW_z_p_bz_d_x32_scaled:
- VIXL_UNIMPLEMENTED();
- break;
case LD1W_z_p_bz_d_x32_scaled:
- VIXL_UNIMPLEMENTED();
- break;
case LDFF1H_z_p_bz_d_x32_scaled:
case LDFF1W_z_p_bz_d_x32_scaled:
case LDFF1D_z_p_bz_d_x32_scaled:
@@ -9814,23 +9803,12 @@ void Simulator::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets(
void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets(
const Instruction* instr) {
- USE(instr);
switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) {
case LD1D_z_p_bz_d_64_scaled:
- VIXL_UNIMPLEMENTED();
- break;
case LD1H_z_p_bz_d_64_scaled:
- VIXL_UNIMPLEMENTED();
- break;
case LD1SH_z_p_bz_d_64_scaled:
- VIXL_UNIMPLEMENTED();
- break;
case LD1SW_z_p_bz_d_64_scaled:
- VIXL_UNIMPLEMENTED();
- break;
case LD1W_z_p_bz_d_64_scaled:
- VIXL_UNIMPLEMENTED();
- break;
case LDFF1H_z_p_bz_d_64_scaled:
case LDFF1W_z_p_bz_d_64_scaled:
case LDFF1D_z_p_bz_d_64_scaled:
@@ -9847,7 +9825,6 @@ void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets(
void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets(
const Instruction* instr) {
- USE(instr);
switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) {
case LD1B_z_p_bz_d_64_unscaled:
case LD1D_z_p_bz_d_64_unscaled:
@@ -9876,30 +9853,15 @@ void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets(
void Simulator::VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets(
const Instruction* instr) {
- USE(instr);
switch (instr->Mask(
SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
case LD1B_z_p_bz_d_x32_unscaled:
- VIXL_UNIMPLEMENTED();
- break;
case LD1D_z_p_bz_d_x32_unscaled:
- VIXL_UNIMPLEMENTED();
- break;
case LD1H_z_p_bz_d_x32_unscaled:
- VIXL_UNIMPLEMENTED();
- break;
case LD1SB_z_p_bz_d_x32_unscaled:
- VIXL_UNIMPLEMENTED();
- break;
case LD1SH_z_p_bz_d_x32_unscaled:
- VIXL_UNIMPLEMENTED();
- break;
case LD1SW_z_p_bz_d_x32_unscaled:
- VIXL_UNIMPLEMENTED();
- break;
case LD1W_z_p_bz_d_x32_unscaled:
- VIXL_UNIMPLEMENTED();
- break;
case LDFF1B_z_p_bz_d_x32_unscaled:
case LDFF1H_z_p_bz_d_x32_unscaled:
case LDFF1W_z_p_bz_d_x32_unscaled: