Initial Support + FMOV [FP16]

This patch adds the basic groundwork for the v8.2 half precision extension. This includes an F16 wrapper class, the fmov instruction support, tests and some related trace/logging support. The trace additions in the simulator are "disabled" as they currently cause the trace tests to fail. These need to be re-generated before we can enable their half precision support. This should be handled under a separate patch. Change-Id: I431558a21649a612ba31f91cc4ab8eba9b657947
author: Carey Williams <carey.williams@arm.com> 2018-04-10 11:58:07 +0100
committer: Carey Williams <carey.williams@arm.com> 2018-04-23 16:45:15 +0100
commit: d8bb357091538ce9c84518eea4bc0792e10c417a (patch)
tree: d35112f8a3947d78228a0aeca11fa6c44aecddf2 /test/aarch64
parent: 2c3cebbd93cc0db27e1e55740bd89a8045148d47 (diff)
download: vixl-d8bb357091538ce9c84518eea4bc0792e10c417a.tar.gz
4 files changed, 149 insertions, 11 deletions
diff --git a/test/aarch64/test-assembler-aarch64.cc b/test/aarch64/test-assembler-aarch64.cc
index 65dbed9b..a399a936 100644
--- a/test/aarch64/test-assembler-aarch64.cc
+++ b/test/aarch64/test-assembler-aarch64.cc
@@ -272,6 +272,9 @@ namespace aarch64 {
 #define ASSERT_EQUAL_REGISTERS(expected) \
   VIXL_CHECK(EqualRegisters(&expected, &core))
 
+#define ASSERT_EQUAL_FP16(expected, result) \
+  VIXL_CHECK(EqualFP16(expected, &core, result))
+
 #define ASSERT_EQUAL_32(expected, result) \
   VIXL_CHECK(Equal32(static_cast<uint32_t>(expected), &core, result))
 
@@ -9850,26 +9853,74 @@ TEST(fmov_imm) {
   SETUP();
 
   START();
-  __ Fmov(s11, 1.0);
-  __ Fmov(d22, -13.0);
   __ Fmov(s1, 255.0);
   __ Fmov(d2, 12.34567);
   __ Fmov(s3, 0.0);
   __ Fmov(d4, 0.0);
   __ Fmov(s5, kFP32PositiveInfinity);
   __ Fmov(d6, kFP64NegativeInfinity);
+  __ Fmov(h7, F16::FromRawbits(0x6400U));
+  __ Fmov(h8, F16::FromRawbits(kFP16PositiveInfinity));
+  __ Fmov(s11, 1.0);
+  __ Fmov(h12, F16::FromRawbits(0x7BFF));
+  __ Fmov(h13, F16::FromRawbits(0x57F2));
+  __ Fmov(d22, -13.0);
+  __ Fmov(h23, F16::FromRawbits(0xC500U));
+  __ Fmov(h24, F16(-5.0));
+  __ Fmov(h25, F16(2049.0));
+  __ Fmov(h21, F16::FromRawbits(0x6404U));
+  __ Fmov(h26, F16::FromRawbits(0x0U));
+  __ Fmov(h27, F16::FromRawbits(0x7e00U));
   END();
-
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
   RUN();
 
-  ASSERT_EQUAL_FP32(1.0, s11);
-  ASSERT_EQUAL_FP64(-13.0, d22);
   ASSERT_EQUAL_FP32(255.0, s1);
   ASSERT_EQUAL_FP64(12.34567, d2);
   ASSERT_EQUAL_FP32(0.0, s3);
   ASSERT_EQUAL_FP64(0.0, d4);
   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5);
   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d6);
+  ASSERT_EQUAL_FP16(F16::FromRawbits(0x6400U), h7);
+  ASSERT_EQUAL_FP16(F16::FromRawbits(kFP16PositiveInfinity), h8);
+  ASSERT_EQUAL_FP32(1.0, s11);
+  ASSERT_EQUAL_FP16(F16::FromRawbits(0x7BFF), h12);
+  ASSERT_EQUAL_FP16(F16::FromRawbits(0x57F2U), h13);
+  ASSERT_EQUAL_FP16(F16::FromRawbits(0x6404), h21);
+  ASSERT_EQUAL_FP64(-13.0, d22);
+  ASSERT_EQUAL_FP16(F16(-5.0), h23);
+  ASSERT_EQUAL_FP16(F16::FromRawbits(0xC500), h24);
+  // 2049 is unpresentable.
+  ASSERT_EQUAL_FP16(F16::FromRawbits(0x6800), h25);
+  ASSERT_EQUAL_FP16(F16::FromRawbits(0x0), h26);
+  // NaN check.
+  ASSERT_EQUAL_FP16(F16::FromRawbits(0x7e00), h27);
+#endif
+
+  TEARDOWN();
+}
+
+
+TEST(fmov_vec_imm) {
+  SETUP();
+
+  START();
+
+  __ Fmov(v0.V2S(), 20.0);
+  __ Fmov(v1.V4S(), 1024.0);
+
+  __ Fmov(v2.V4H(), F16::FromRawbits(0xC500U));
+  __ Fmov(v3.V8H(), F16::FromRawbits(0x4A80U));
+
+  END();
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+  RUN();
+
+  ASSERT_EQUAL_64(0x41A0000041A00000, d0);
+  ASSERT_EQUAL_128(0x4480000044800000, 0x4480000044800000, q1);
+  ASSERT_EQUAL_64(0xC500C500C500C500, d2);
+  ASSERT_EQUAL_128(0x4A804A804A804A80, 0x4A804A804A804A80, q3);
+#endif
 
   TEARDOWN();
 }
@@ -9879,6 +9930,15 @@ TEST(fmov_reg) {
   SETUP();
 
   START();
+
+  __ Fmov(h3, F16::FromRawbits(0xCA80U));
+  __ Fmov(h7, h3);
+  __ Fmov(h8, -5.0);
+  __ Fmov(w3, h8);
+  __ Fmov(h9, w3);
+  __ Fmov(h8, F16(1024.0));
+  __ Fmov(x4, h8);
+  __ Fmov(h10, x4);
   __ Fmov(s20, 1.0);
   __ Fmov(w10, s20);
   __ Fmov(s30, w10);
@@ -9889,15 +9949,19 @@ TEST(fmov_reg) {
   __ Fmov(d4, d1);
   __ Fmov(d6, RawbitsToDouble(0x0123456789abcdef));
   __ Fmov(s6, s6);
-
   __ Fmov(d0, 0.0);
   __ Fmov(v0.D(), 1, x1);
   __ Fmov(x2, v0.D(), 1);
 
   END();
-
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
   RUN();
 
+  ASSERT_EQUAL_FP16(F16::FromRawbits(0xCA80U), h7);
+  ASSERT_EQUAL_FP16(F16::FromRawbits(0xC500U), h9);
+  ASSERT_EQUAL_32(0x0000C500, w3);
+  ASSERT_EQUAL_64(0x0000000000006400, x4);
+  ASSERT_EQUAL_FP16(F16::FromRawbits(0x6400), h10);
   ASSERT_EQUAL_32(FloatToRawbits(1.0), w10);
   ASSERT_EQUAL_FP32(1.0, s30);
   ASSERT_EQUAL_FP32(1.0, s5);
@@ -9907,6 +9971,8 @@ TEST(fmov_reg) {
   ASSERT_EQUAL_FP32(RawbitsToFloat(0x89abcdef), s6);
   ASSERT_EQUAL_128(DoubleToRawbits(-13.0), 0x0000000000000000, q0);
   ASSERT_EQUAL_64(DoubleToRawbits(-13.0), x2);
+#endif
+
   TEARDOWN();
 }
 
diff --git a/test/aarch64/test-disasm-aarch64.cc b/test/aarch64/test-disasm-aarch64.cc
index 2cd0a60b..d7abd8cf 100644
--- a/test/aarch64/test-disasm-aarch64.cc
+++ b/test/aarch64/test-disasm-aarch64.cc
@@ -2594,6 +2594,8 @@ TEST(cond_cmp_macro) {
 TEST(fmov_imm) {
   SETUP();
 
+  COMPARE(fmov(h2, F16(-5.0)), "fmov h2, #0x94 (-5.0000)");
+  COMPARE(fmov(h30, F16(29.0)), "fmov h30, #0x3d (29.0000)");
   COMPARE(fmov(s0, 1.0f), "fmov s0, #0x70 (1.0000)");
   COMPARE(fmov(s31, -13.0f), "fmov s31, #0xaa (-13.0000)");
   COMPARE(fmov(d1, 1.0), "fmov d1, #0x70 (1.0000)");
@@ -5776,9 +5778,10 @@ TEST(neon_modimm) {
   COMPARE_MACRO(Fmov(v31.V4S(), -13.0f), "fmov v31.4s, #0xaa (-13.0000)");
   COMPARE_MACRO(Fmov(v1.V2D(), 1.0), "fmov v1.2d, #0x70 (1.0000)");
   COMPARE_MACRO(Fmov(v29.V2D(), -13.0), "fmov v29.2d, #0xaa (-13.0000)");
-
-  // An unallocated form of fmov.
-  COMPARE(dci(0x2f07ffff), "unallocated (NEONModifiedImmediate)");
+  COMPARE_MACRO(Fmov(v0.V4H(), F16(-5.0f)), "fmov v0.4h, #0x94 (-5.0000)");
+  COMPARE_MACRO(Fmov(v31.V8H(), F16(29.0f)), "fmov v31.8h, #0x3d (29.0000)");
+  COMPARE_MACRO(Fmov(v0.V4H(), F16(-5.0)), "fmov v0.4h, #0x94 (-5.0000)");
+  COMPARE_MACRO(Fmov(v31.V8H(), F16(29.0)), "fmov v31.8h, #0x3d (29.0000)");
 
   CLEANUP();
 }
diff --git a/test/aarch64/test-utils-aarch64.cc b/test/aarch64/test-utils-aarch64.cc
index eafe2730..cce728a2 100644
--- a/test/aarch64/test-utils-aarch64.cc
+++ b/test/aarch64/test-utils-aarch64.cc
@@ -33,6 +33,7 @@
 #include "aarch64/disasm-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 #include "aarch64/simulator-aarch64.h"
+#include "aarch64/utils-aarch64.h"
 
 #define __ masm->
 
@@ -87,6 +88,30 @@ bool Equal128(vec128_t expected, const RegisterDump*, vec128_t result) {
 }
 
 
+bool EqualFP16(F16 expected, const RegisterDump*, F16 result) {
+  uint16_t e_rawbits = expected.ToRawbits();
+  uint16_t r_rawbits = result.ToRawbits();
+  if (e_rawbits == r_rawbits) {
+    return true;
+  } else {
+    if (IsNaN(e_rawbits) || (e_rawbits == 0x0)) {
+      printf("Expected 0x%04" PRIx16 "\t Found 0x%04" PRIx16 "\n",
+             e_rawbits,
+             r_rawbits);
+    } else {
+      printf("Expected %.6f (16 bit): (0x%04" PRIx16
+             ")\t "
+             "Found %.6f (0x%04" PRIx16 ")\n",
+             FPToFloat(e_rawbits, kIgnoreDefaultNaN),
+             e_rawbits,
+             FPToFloat(r_rawbits, kIgnoreDefaultNaN),
+             r_rawbits);
+    }
+    return false;
+  }
+}
+
+
 bool EqualFP32(float expected, const RegisterDump*, float result) {
   if (FloatToRawbits(expected) == FloatToRawbits(result)) {
     return true;
@@ -165,6 +190,24 @@ bool Equal128(uint64_t expected_h,
 }
 
 
+bool EqualFP16(F16 expected,
+               const RegisterDump* core,
+               const FPRegister& fpreg) {
+  VIXL_ASSERT(fpreg.Is16Bits());
+  // Retrieve the corresponding D register so we can check that the upper part
+  // was properly cleared.
+  uint64_t result_64 = core->dreg_bits(fpreg.GetCode());
+  if ((result_64 & 0xfffffffffff0000) != 0) {
+    printf("Expected 0x%04" PRIx16 " (%f)\t Found 0x%016" PRIx64 "\n",
+           expected.ToRawbits(),
+           FPToFloat(expected.ToRawbits(), kIgnoreDefaultNaN),
+           result_64);
+    return false;
+  }
+  return EqualFP16(expected, core, core->hreg(fpreg.GetCode()));
+}
+
+
 bool EqualFP32(float expected,
                const RegisterDump* core,
                const FPRegister& fpreg) {
@@ -407,6 +450,7 @@ void RegisterDump::Dump(MacroAssembler* masm) {
   const int w_offset = offsetof(dump_t, w_);
   const int d_offset = offsetof(dump_t, d_);
   const int s_offset = offsetof(dump_t, s_);
+  const int h_offset = offsetof(dump_t, h_);
   const int q_offset = offsetof(dump_t, q_);
   const int sp_offset = offsetof(dump_t, sp_);
   const int wsp_offset = offsetof(dump_t, wsp_);
@@ -458,6 +502,17 @@ void RegisterDump::Dump(MacroAssembler* masm) {
            MemOperand(dump, i * kSRegSizeInBytes));
   }
 
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+  // Dump H registers. Note: Stp does not support 16 bits.
+  __ Add(dump, dump_base, h_offset);
+  for (unsigned i = 0; i < kNumberOfFPRegisters; i++) {
+    __ Str(FPRegister::GetHRegFromCode(i),
+           MemOperand(dump, i * kHRegSizeInBytes));
+  }
+#else
+  USE(h_offset);
+#endif
+
   // Dump Q registers.
   __ Add(dump, dump_base, q_offset);
   for (unsigned i = 0; i < kNumberOfVRegisters; i += 2) {
diff --git a/test/aarch64/test-utils-aarch64.h b/test/aarch64/test-utils-aarch64.h
index f12bdb0e..20449e76 100644
--- a/test/aarch64/test-utils-aarch64.h
+++ b/test/aarch64/test-utils-aarch64.h
@@ -61,6 +61,7 @@ class RegisterDump {
   RegisterDump() : completed_(false) {
     VIXL_ASSERT(sizeof(dump_.d_[0]) == kDRegSizeInBytes);
     VIXL_ASSERT(sizeof(dump_.s_[0]) == kSRegSizeInBytes);
+    VIXL_ASSERT(sizeof(dump_.h_[0]) == kHRegSizeInBytes);
     VIXL_ASSERT(sizeof(dump_.d_[0]) == kXRegSizeInBytes);
     VIXL_ASSERT(sizeof(dump_.s_[0]) == kWRegSizeInBytes);
     VIXL_ASSERT(sizeof(dump_.x_[0]) == kXRegSizeInBytes);
@@ -95,11 +96,20 @@ class RegisterDump {
   }
 
   // FPRegister accessors.
+  inline uint16_t hreg_bits(unsigned code) const {
+    VIXL_ASSERT(FPRegAliasesMatch(code));
+    return dump_.h_[code];
+  }
+
   inline uint32_t sreg_bits(unsigned code) const {
     VIXL_ASSERT(FPRegAliasesMatch(code));
     return dump_.s_[code];
   }
 
+  inline F16 hreg(unsigned code) const {
+    return F16::FromRawbits(hreg_bits(code));
+  }
+
   inline float sreg(unsigned code) const {
     return RawbitsToFloat(sreg_bits(code));
   }
@@ -158,7 +168,8 @@ class RegisterDump {
   bool FPRegAliasesMatch(unsigned code) const {
     VIXL_ASSERT(IsComplete());
     VIXL_ASSERT(code < kNumberOfFPRegisters);
-    return (dump_.d_[code] & kSRegMask) == dump_.s_[code];
+    return (((dump_.d_[code] & kSRegMask) == dump_.s_[code]) ||
+            ((dump_.s_[code] & kHRegMask) == dump_.h_[code]));
   }
 
   // Store all the dumped elements in a simple struct so the implementation can
@@ -171,6 +182,7 @@ class RegisterDump {
     // Floating-point registers, as raw bits.
     uint64_t d_[kNumberOfFPRegisters];
     uint32_t s_[kNumberOfFPRegisters];
+    uint16_t h_[kNumberOfFPRegisters];
 
     // Vector registers.
     vec128_t q_[kNumberOfVRegisters];
@@ -193,6 +205,7 @@ class RegisterDump {
 bool Equal32(uint32_t expected, const RegisterDump*, uint32_t result);
 bool Equal64(uint64_t expected, const RegisterDump*, uint64_t result);
 
+bool EqualFP16(F16 expected, const RegisterDump*, uint16_t result);
 bool EqualFP32(float expected, const RegisterDump*, float result);
 bool EqualFP64(double expected, const RegisterDump*, double result);
 
@@ -202,6 +215,7 @@ bool Equal64(uint64_t expected,
              const RegisterDump* core,
              const VRegister& vreg);
 
+bool EqualFP16(F16 expected, const RegisterDump* core, const FPRegister& fpreg);
 bool EqualFP32(float expected,
                const RegisterDump* core,
                const FPRegister& fpreg);
author	Carey Williams <carey.williams@arm.com>	2018-04-10 11:58:07 +0100
committer	Carey Williams <carey.williams@arm.com>	2018-04-23 16:45:15 +0100
commit	d8bb357091538ce9c84518eea4bc0792e10c417a (patch)
tree	d35112f8a3947d78228a0aeca11fa6c44aecddf2 /test/aarch64
parent	2c3cebbd93cc0db27e1e55740bd89a8045148d47 (diff)
download	vixl-d8bb357091538ce9c84518eea4bc0792e10c417a.tar.gz