diff options
author | Carey Williams <carey.williams@arm.com> | 2018-04-10 11:58:07 +0100 |
---|---|---|
committer | Carey Williams <carey.williams@arm.com> | 2018-04-23 16:45:15 +0100 |
commit | d8bb357091538ce9c84518eea4bc0792e10c417a (patch) | |
tree | d35112f8a3947d78228a0aeca11fa6c44aecddf2 /test/aarch64 | |
parent | 2c3cebbd93cc0db27e1e55740bd89a8045148d47 (diff) | |
download | vixl-d8bb357091538ce9c84518eea4bc0792e10c417a.tar.gz |
Initial Support + FMOV [FP16]
This patch adds the basic groundwork for the v8.2 half precision extension.
This includes an F16 wrapper class, the fmov instruction support, tests and some
related trace/logging support.
The trace additions in the simulator are "disabled" as they currently cause the trace tests to fail.
These need to be re-generated before we can enable their half precision support.
This should be handled under a separate patch.
Change-Id: I431558a21649a612ba31f91cc4ab8eba9b657947
Diffstat (limited to 'test/aarch64')
-rw-r--r-- | test/aarch64/test-assembler-aarch64.cc | 80 | ||||
-rw-r--r-- | test/aarch64/test-disasm-aarch64.cc | 9 | ||||
-rw-r--r-- | test/aarch64/test-utils-aarch64.cc | 55 | ||||
-rw-r--r-- | test/aarch64/test-utils-aarch64.h | 16 |
4 files changed, 149 insertions, 11 deletions
diff --git a/test/aarch64/test-assembler-aarch64.cc b/test/aarch64/test-assembler-aarch64.cc index 65dbed9b..a399a936 100644 --- a/test/aarch64/test-assembler-aarch64.cc +++ b/test/aarch64/test-assembler-aarch64.cc @@ -272,6 +272,9 @@ namespace aarch64 { #define ASSERT_EQUAL_REGISTERS(expected) \ VIXL_CHECK(EqualRegisters(&expected, &core)) +#define ASSERT_EQUAL_FP16(expected, result) \ + VIXL_CHECK(EqualFP16(expected, &core, result)) + #define ASSERT_EQUAL_32(expected, result) \ VIXL_CHECK(Equal32(static_cast<uint32_t>(expected), &core, result)) @@ -9850,26 +9853,74 @@ TEST(fmov_imm) { SETUP(); START(); - __ Fmov(s11, 1.0); - __ Fmov(d22, -13.0); __ Fmov(s1, 255.0); __ Fmov(d2, 12.34567); __ Fmov(s3, 0.0); __ Fmov(d4, 0.0); __ Fmov(s5, kFP32PositiveInfinity); __ Fmov(d6, kFP64NegativeInfinity); + __ Fmov(h7, F16::FromRawbits(0x6400U)); + __ Fmov(h8, F16::FromRawbits(kFP16PositiveInfinity)); + __ Fmov(s11, 1.0); + __ Fmov(h12, F16::FromRawbits(0x7BFF)); + __ Fmov(h13, F16::FromRawbits(0x57F2)); + __ Fmov(d22, -13.0); + __ Fmov(h23, F16::FromRawbits(0xC500U)); + __ Fmov(h24, F16(-5.0)); + __ Fmov(h25, F16(2049.0)); + __ Fmov(h21, F16::FromRawbits(0x6404U)); + __ Fmov(h26, F16::FromRawbits(0x0U)); + __ Fmov(h27, F16::FromRawbits(0x7e00U)); END(); - +#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 RUN(); - ASSERT_EQUAL_FP32(1.0, s11); - ASSERT_EQUAL_FP64(-13.0, d22); ASSERT_EQUAL_FP32(255.0, s1); ASSERT_EQUAL_FP64(12.34567, d2); ASSERT_EQUAL_FP32(0.0, s3); ASSERT_EQUAL_FP64(0.0, d4); ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5); ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d6); + ASSERT_EQUAL_FP16(F16::FromRawbits(0x6400U), h7); + ASSERT_EQUAL_FP16(F16::FromRawbits(kFP16PositiveInfinity), h8); + ASSERT_EQUAL_FP32(1.0, s11); + ASSERT_EQUAL_FP16(F16::FromRawbits(0x7BFF), h12); + ASSERT_EQUAL_FP16(F16::FromRawbits(0x57F2U), h13); + ASSERT_EQUAL_FP16(F16::FromRawbits(0x6404), h21); + ASSERT_EQUAL_FP64(-13.0, d22); + ASSERT_EQUAL_FP16(F16(-5.0), h23); + ASSERT_EQUAL_FP16(F16::FromRawbits(0xC500), h24); + // 2049 is unpresentable. + ASSERT_EQUAL_FP16(F16::FromRawbits(0x6800), h25); + ASSERT_EQUAL_FP16(F16::FromRawbits(0x0), h26); + // NaN check. + ASSERT_EQUAL_FP16(F16::FromRawbits(0x7e00), h27); +#endif + + TEARDOWN(); +} + + +TEST(fmov_vec_imm) { + SETUP(); + + START(); + + __ Fmov(v0.V2S(), 20.0); + __ Fmov(v1.V4S(), 1024.0); + + __ Fmov(v2.V4H(), F16::FromRawbits(0xC500U)); + __ Fmov(v3.V8H(), F16::FromRawbits(0x4A80U)); + + END(); +#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 + RUN(); + + ASSERT_EQUAL_64(0x41A0000041A00000, d0); + ASSERT_EQUAL_128(0x4480000044800000, 0x4480000044800000, q1); + ASSERT_EQUAL_64(0xC500C500C500C500, d2); + ASSERT_EQUAL_128(0x4A804A804A804A80, 0x4A804A804A804A80, q3); +#endif TEARDOWN(); } @@ -9879,6 +9930,15 @@ TEST(fmov_reg) { SETUP(); START(); + + __ Fmov(h3, F16::FromRawbits(0xCA80U)); + __ Fmov(h7, h3); + __ Fmov(h8, -5.0); + __ Fmov(w3, h8); + __ Fmov(h9, w3); + __ Fmov(h8, F16(1024.0)); + __ Fmov(x4, h8); + __ Fmov(h10, x4); __ Fmov(s20, 1.0); __ Fmov(w10, s20); __ Fmov(s30, w10); @@ -9889,15 +9949,19 @@ TEST(fmov_reg) { __ Fmov(d4, d1); __ Fmov(d6, RawbitsToDouble(0x0123456789abcdef)); __ Fmov(s6, s6); - __ Fmov(d0, 0.0); __ Fmov(v0.D(), 1, x1); __ Fmov(x2, v0.D(), 1); END(); - +#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 RUN(); + ASSERT_EQUAL_FP16(F16::FromRawbits(0xCA80U), h7); + ASSERT_EQUAL_FP16(F16::FromRawbits(0xC500U), h9); + ASSERT_EQUAL_32(0x0000C500, w3); + ASSERT_EQUAL_64(0x0000000000006400, x4); + ASSERT_EQUAL_FP16(F16::FromRawbits(0x6400), h10); ASSERT_EQUAL_32(FloatToRawbits(1.0), w10); ASSERT_EQUAL_FP32(1.0, s30); ASSERT_EQUAL_FP32(1.0, s5); @@ -9907,6 +9971,8 @@ TEST(fmov_reg) { ASSERT_EQUAL_FP32(RawbitsToFloat(0x89abcdef), s6); ASSERT_EQUAL_128(DoubleToRawbits(-13.0), 0x0000000000000000, q0); ASSERT_EQUAL_64(DoubleToRawbits(-13.0), x2); +#endif + TEARDOWN(); } diff --git a/test/aarch64/test-disasm-aarch64.cc b/test/aarch64/test-disasm-aarch64.cc index 2cd0a60b..d7abd8cf 100644 --- a/test/aarch64/test-disasm-aarch64.cc +++ b/test/aarch64/test-disasm-aarch64.cc @@ -2594,6 +2594,8 @@ TEST(cond_cmp_macro) { TEST(fmov_imm) { SETUP(); + COMPARE(fmov(h2, F16(-5.0)), "fmov h2, #0x94 (-5.0000)"); + COMPARE(fmov(h30, F16(29.0)), "fmov h30, #0x3d (29.0000)"); COMPARE(fmov(s0, 1.0f), "fmov s0, #0x70 (1.0000)"); COMPARE(fmov(s31, -13.0f), "fmov s31, #0xaa (-13.0000)"); COMPARE(fmov(d1, 1.0), "fmov d1, #0x70 (1.0000)"); @@ -5776,9 +5778,10 @@ TEST(neon_modimm) { COMPARE_MACRO(Fmov(v31.V4S(), -13.0f), "fmov v31.4s, #0xaa (-13.0000)"); COMPARE_MACRO(Fmov(v1.V2D(), 1.0), "fmov v1.2d, #0x70 (1.0000)"); COMPARE_MACRO(Fmov(v29.V2D(), -13.0), "fmov v29.2d, #0xaa (-13.0000)"); - - // An unallocated form of fmov. - COMPARE(dci(0x2f07ffff), "unallocated (NEONModifiedImmediate)"); + COMPARE_MACRO(Fmov(v0.V4H(), F16(-5.0f)), "fmov v0.4h, #0x94 (-5.0000)"); + COMPARE_MACRO(Fmov(v31.V8H(), F16(29.0f)), "fmov v31.8h, #0x3d (29.0000)"); + COMPARE_MACRO(Fmov(v0.V4H(), F16(-5.0)), "fmov v0.4h, #0x94 (-5.0000)"); + COMPARE_MACRO(Fmov(v31.V8H(), F16(29.0)), "fmov v31.8h, #0x3d (29.0000)"); CLEANUP(); } diff --git a/test/aarch64/test-utils-aarch64.cc b/test/aarch64/test-utils-aarch64.cc index eafe2730..cce728a2 100644 --- a/test/aarch64/test-utils-aarch64.cc +++ b/test/aarch64/test-utils-aarch64.cc @@ -33,6 +33,7 @@ #include "aarch64/disasm-aarch64.h" #include "aarch64/macro-assembler-aarch64.h" #include "aarch64/simulator-aarch64.h" +#include "aarch64/utils-aarch64.h" #define __ masm-> @@ -87,6 +88,30 @@ bool Equal128(vec128_t expected, const RegisterDump*, vec128_t result) { } +bool EqualFP16(F16 expected, const RegisterDump*, F16 result) { + uint16_t e_rawbits = expected.ToRawbits(); + uint16_t r_rawbits = result.ToRawbits(); + if (e_rawbits == r_rawbits) { + return true; + } else { + if (IsNaN(e_rawbits) || (e_rawbits == 0x0)) { + printf("Expected 0x%04" PRIx16 "\t Found 0x%04" PRIx16 "\n", + e_rawbits, + r_rawbits); + } else { + printf("Expected %.6f (16 bit): (0x%04" PRIx16 + ")\t " + "Found %.6f (0x%04" PRIx16 ")\n", + FPToFloat(e_rawbits, kIgnoreDefaultNaN), + e_rawbits, + FPToFloat(r_rawbits, kIgnoreDefaultNaN), + r_rawbits); + } + return false; + } +} + + bool EqualFP32(float expected, const RegisterDump*, float result) { if (FloatToRawbits(expected) == FloatToRawbits(result)) { return true; @@ -165,6 +190,24 @@ bool Equal128(uint64_t expected_h, } +bool EqualFP16(F16 expected, + const RegisterDump* core, + const FPRegister& fpreg) { + VIXL_ASSERT(fpreg.Is16Bits()); + // Retrieve the corresponding D register so we can check that the upper part + // was properly cleared. + uint64_t result_64 = core->dreg_bits(fpreg.GetCode()); + if ((result_64 & 0xfffffffffff0000) != 0) { + printf("Expected 0x%04" PRIx16 " (%f)\t Found 0x%016" PRIx64 "\n", + expected.ToRawbits(), + FPToFloat(expected.ToRawbits(), kIgnoreDefaultNaN), + result_64); + return false; + } + return EqualFP16(expected, core, core->hreg(fpreg.GetCode())); +} + + bool EqualFP32(float expected, const RegisterDump* core, const FPRegister& fpreg) { @@ -407,6 +450,7 @@ void RegisterDump::Dump(MacroAssembler* masm) { const int w_offset = offsetof(dump_t, w_); const int d_offset = offsetof(dump_t, d_); const int s_offset = offsetof(dump_t, s_); + const int h_offset = offsetof(dump_t, h_); const int q_offset = offsetof(dump_t, q_); const int sp_offset = offsetof(dump_t, sp_); const int wsp_offset = offsetof(dump_t, wsp_); @@ -458,6 +502,17 @@ void RegisterDump::Dump(MacroAssembler* masm) { MemOperand(dump, i * kSRegSizeInBytes)); } +#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 + // Dump H registers. Note: Stp does not support 16 bits. + __ Add(dump, dump_base, h_offset); + for (unsigned i = 0; i < kNumberOfFPRegisters; i++) { + __ Str(FPRegister::GetHRegFromCode(i), + MemOperand(dump, i * kHRegSizeInBytes)); + } +#else + USE(h_offset); +#endif + // Dump Q registers. __ Add(dump, dump_base, q_offset); for (unsigned i = 0; i < kNumberOfVRegisters; i += 2) { diff --git a/test/aarch64/test-utils-aarch64.h b/test/aarch64/test-utils-aarch64.h index f12bdb0e..20449e76 100644 --- a/test/aarch64/test-utils-aarch64.h +++ b/test/aarch64/test-utils-aarch64.h @@ -61,6 +61,7 @@ class RegisterDump { RegisterDump() : completed_(false) { VIXL_ASSERT(sizeof(dump_.d_[0]) == kDRegSizeInBytes); VIXL_ASSERT(sizeof(dump_.s_[0]) == kSRegSizeInBytes); + VIXL_ASSERT(sizeof(dump_.h_[0]) == kHRegSizeInBytes); VIXL_ASSERT(sizeof(dump_.d_[0]) == kXRegSizeInBytes); VIXL_ASSERT(sizeof(dump_.s_[0]) == kWRegSizeInBytes); VIXL_ASSERT(sizeof(dump_.x_[0]) == kXRegSizeInBytes); @@ -95,11 +96,20 @@ class RegisterDump { } // FPRegister accessors. + inline uint16_t hreg_bits(unsigned code) const { + VIXL_ASSERT(FPRegAliasesMatch(code)); + return dump_.h_[code]; + } + inline uint32_t sreg_bits(unsigned code) const { VIXL_ASSERT(FPRegAliasesMatch(code)); return dump_.s_[code]; } + inline F16 hreg(unsigned code) const { + return F16::FromRawbits(hreg_bits(code)); + } + inline float sreg(unsigned code) const { return RawbitsToFloat(sreg_bits(code)); } @@ -158,7 +168,8 @@ class RegisterDump { bool FPRegAliasesMatch(unsigned code) const { VIXL_ASSERT(IsComplete()); VIXL_ASSERT(code < kNumberOfFPRegisters); - return (dump_.d_[code] & kSRegMask) == dump_.s_[code]; + return (((dump_.d_[code] & kSRegMask) == dump_.s_[code]) || + ((dump_.s_[code] & kHRegMask) == dump_.h_[code])); } // Store all the dumped elements in a simple struct so the implementation can @@ -171,6 +182,7 @@ class RegisterDump { // Floating-point registers, as raw bits. uint64_t d_[kNumberOfFPRegisters]; uint32_t s_[kNumberOfFPRegisters]; + uint16_t h_[kNumberOfFPRegisters]; // Vector registers. vec128_t q_[kNumberOfVRegisters]; @@ -193,6 +205,7 @@ class RegisterDump { bool Equal32(uint32_t expected, const RegisterDump*, uint32_t result); bool Equal64(uint64_t expected, const RegisterDump*, uint64_t result); +bool EqualFP16(F16 expected, const RegisterDump*, uint16_t result); bool EqualFP32(float expected, const RegisterDump*, float result); bool EqualFP64(double expected, const RegisterDump*, double result); @@ -202,6 +215,7 @@ bool Equal64(uint64_t expected, const RegisterDump* core, const VRegister& vreg); +bool EqualFP16(F16 expected, const RegisterDump* core, const FPRegister& fpreg); bool EqualFP32(float expected, const RegisterDump* core, const FPRegister& fpreg); |