diff options
author | Martyn Capewell <martyn.capewell@arm.com> | 2020-07-02 15:47:54 +0100 |
---|---|---|
committer | Martyn Capewell <martyn.capewell@arm.com> | 2020-07-13 10:05:58 +0100 |
commit | 4635261c9edcf4b056f9e1b26052b5612dee61ba (patch) | |
tree | fb7a3744324a31bb56d24008269965ce257795a9 /test | |
parent | 3eb24e95aabe254788d21fd8c647964ec93aca67 (diff) | |
download | vixl-4635261c9edcf4b056f9e1b26052b5612dee61ba.tar.gz |
Use segments in SVE indexed fmul simulation
The value used for the second operand in indexed multiplies differs for each
segment (128-bit part) of a vector, but the simulator wasn't doing this for
FP multiplies. Fix and update the tests.
Change-Id: I9cc37ebef9d216243a23bedebea256826e1016cb
Diffstat (limited to 'test')
-rw-r--r-- | test/aarch64/test-assembler-sve-aarch64.cc | 69 |
1 files changed, 42 insertions, 27 deletions
diff --git a/test/aarch64/test-assembler-sve-aarch64.cc b/test/aarch64/test-assembler-sve-aarch64.cc index ba67400a..84b27372 100644 --- a/test/aarch64/test-assembler-sve-aarch64.cc +++ b/test/aarch64/test-assembler-sve-aarch64.cc @@ -14825,6 +14825,22 @@ TEST_SVE(sve_fcmla) { } } +// Create a pattern in dst where the value of each element in src is incremented +// by the segment number. This allows varying a short input by a predictable +// pattern for each segment. +static void FPSegmentPatternHelper(MacroAssembler* masm, + const ZRegister& dst, + const PRegisterM& ptrue, + const ZRegister& src) { + VIXL_ASSERT(AreSameLaneSize(dst, src)); + UseScratchRegisterScope temps(masm); + ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(dst); + masm->Index(ztmp, 0, 1); + masm->Asr(ztmp, ztmp, kQRegSizeInBytesLog2 - dst.GetLaneSizeInBytesLog2()); + masm->Scvtf(ztmp, ptrue, ztmp); + masm->Fadd(dst, src, ztmp); +} + TEST_SVE(sve_fpmul_index) { SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE); START(); @@ -14832,9 +14848,14 @@ TEST_SVE(sve_fpmul_index) { uint64_t in0[] = {0x3ff000003f803c00, 0xbff00000bf80bc00}; uint64_t in1[] = {0x3ff012343ff03c76, 0xbff01234bff0bc76}; - InsrHelper(&masm, z0.VnD(), in0); + __ Ptrue(p0.VnB()); + // Repeat indexed vector across up to 2048-bit VL. + for (size_t i = 0; i < (kZRegMaxSize / kDRegSize); i++) { + InsrHelper(&masm, z25.VnD(), in0); + } InsrHelper(&masm, z1.VnD(), in1); + FPSegmentPatternHelper(&masm, z0.VnH(), p0.Merging(), z25.VnH()); __ Fmul(z2.VnH(), z1.VnH(), z0.VnH(), 0); __ Fmul(z3.VnH(), z1.VnH(), z0.VnH(), 1); __ Fmul(z4.VnH(), z1.VnH(), z0.VnH(), 4); @@ -14849,27 +14870,37 @@ TEST_SVE(sve_fpmul_index) { __ Fmul(z11.VnD(), z1.VnD(), z0.VnD(), 1); // Compute the results using other instructions. - __ Dup(z12.VnH(), z0.VnH(), 0); + __ Dup(z12.VnH(), z25.VnH(), 0); + FPSegmentPatternHelper(&masm, z12.VnH(), p0.Merging(), z12.VnH()); __ Fmul(z12.VnH(), z1.VnH(), z12.VnH()); - __ Dup(z13.VnH(), z0.VnH(), 1); + __ Dup(z13.VnH(), z25.VnH(), 1); + FPSegmentPatternHelper(&masm, z13.VnH(), p0.Merging(), z13.VnH()); __ Fmul(z13.VnH(), z1.VnH(), z13.VnH()); - __ Dup(z14.VnH(), z0.VnH(), 4); + __ Dup(z14.VnH(), z25.VnH(), 4); + FPSegmentPatternHelper(&masm, z14.VnH(), p0.Merging(), z14.VnH()); __ Fmul(z14.VnH(), z1.VnH(), z14.VnH()); - __ Dup(z15.VnH(), z0.VnH(), 7); + __ Dup(z15.VnH(), z25.VnH(), 7); + FPSegmentPatternHelper(&masm, z15.VnH(), p0.Merging(), z15.VnH()); __ Fmul(z15.VnH(), z1.VnH(), z15.VnH()); - __ Dup(z16.VnS(), z0.VnS(), 0); + __ Dup(z16.VnS(), z25.VnS(), 0); + FPSegmentPatternHelper(&masm, z16.VnH(), p0.Merging(), z16.VnH()); __ Fmul(z16.VnS(), z1.VnS(), z16.VnS()); - __ Dup(z17.VnS(), z0.VnS(), 1); + __ Dup(z17.VnS(), z25.VnS(), 1); + FPSegmentPatternHelper(&masm, z17.VnH(), p0.Merging(), z17.VnH()); __ Fmul(z17.VnS(), z1.VnS(), z17.VnS()); - __ Dup(z18.VnS(), z0.VnS(), 2); + __ Dup(z18.VnS(), z25.VnS(), 2); + FPSegmentPatternHelper(&masm, z18.VnH(), p0.Merging(), z18.VnH()); __ Fmul(z18.VnS(), z1.VnS(), z18.VnS()); - __ Dup(z19.VnS(), z0.VnS(), 3); + __ Dup(z19.VnS(), z25.VnS(), 3); + FPSegmentPatternHelper(&masm, z19.VnH(), p0.Merging(), z19.VnH()); __ Fmul(z19.VnS(), z1.VnS(), z19.VnS()); - __ Dup(z20.VnD(), z0.VnD(), 0); + __ Dup(z20.VnD(), z25.VnD(), 0); + FPSegmentPatternHelper(&masm, z20.VnH(), p0.Merging(), z20.VnH()); __ Fmul(z20.VnD(), z1.VnD(), z20.VnD()); - __ Dup(z21.VnD(), z0.VnD(), 1); + __ Dup(z21.VnD(), z25.VnD(), 1); + FPSegmentPatternHelper(&masm, z21.VnH(), p0.Merging(), z21.VnH()); __ Fmul(z21.VnD(), z1.VnD(), z21.VnD()); END(); @@ -16962,22 +16993,6 @@ TEST_SVE(sve_fnmls_fnmsb) { fnmls_result_d); } -// Create a pattern in dst where the value of each element in src is incremented -// by the segment number. This allows varying a short input by a predictable -// pattern for each segment. -static void FPSegmentPatternHelper(MacroAssembler* masm, - const ZRegister& dst, - const PRegisterM& ptrue, - const ZRegister& src) { - VIXL_ASSERT(AreSameLaneSize(dst, src)); - UseScratchRegisterScope temps(masm); - ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(dst); - masm->Index(ztmp, 0, 1); - masm->Asr(ztmp, ztmp, kQRegSizeInBytesLog2 - dst.GetLaneSizeInBytesLog2()); - masm->Scvtf(ztmp, ptrue, ztmp); - masm->Fadd(dst, src, ztmp); -} - typedef void (MacroAssembler::*FPMulAccIdxFn)(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, |