Snap for 9990577 from 1bf3e03bcb66e3a87a0bc3c69affd8ee56a45a00 to udc-releaseandroid-vts-14.0_r4 android-vts-14.0_r3 android-vts-14.0_r2 android-vts-14.0_r1 android-security-14.0.0_r9 android-security-14.0.0_r8 android-security-14.0.0_r7 android-security-14.0.0_r6 android-security-14.0.0_r5 android-security-14.0.0_r4 android-security-14.0.0_r3 android-security-14.0.0_r2 android-security-14.0.0_r1 android-platform-14.0.0_r8 android-platform-14.0.0_r7 android-platform-14.0.0_r6 android-platform-14.0.0_r5 android-platform-14.0.0_r4 android-platform-14.0.0_r3 android-platform-14.0.0_r2 android-platform-14.0.0_r1 android-cts-14.0_r4 android-cts-14.0_r3 android-cts-14.0_r2 android-cts-14.0_r1 android-14.0.0_r28 android-14.0.0_r2 android-14.0.0_r15 android-14.0.0_r14 android-14.0.0_r13 android-14.0.0_r1 android14-tests-release android14-security-release android14-s2-release android14-s1-release android14-release android14-platform-release

Change-Id: I28d0860d6f3122d1425a55e2224d9e7cbc5dd0e7
author: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2023-04-22 01:27:17 +0000
committer: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2023-04-22 01:27:17 +0000
commit: 65790b034e7d97022c7c8ea9a6e930333d3c1fd9 (patch)
tree: a076320347f30694dd7f815ea3c7ef0519645811
parent: 722006fdeb16a49976325a0d74ab662992adba1a (diff)
parent: 1bf3e03bcb66e3a87a0bc3c69affd8ee56a45a00 (diff)
download: binary_translation-65790b034e7d97022c7c8ea9a6e930333d3c1fd9.tar.gz
10 files changed, 633 insertions, 108 deletions
diff --git a/base/include/berberis/base/dependent_false.h b/base/include/berberis/base/dependent_false.h
new file mode 100644
index 00000000..f01a48e0
--- /dev/null
+++ b/base/include/berberis/base/dependent_false.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_BASE_DEPENDENT_FALSE_H_
+#define BERBERIS_BASE_DEPENDENT_FALSE_H_
+
+#include <type_traits>
+
+namespace berberis {
+
+template <typename T>
+inline constexpr bool kDependentTypeFalse = false;
+
+template <auto T>
+inline constexpr bool kDependentValueFalse = false;
+
+}  // namespace berberis
+
+#endif  // BERBERIS_BASE_DEPENDENT_FALSE_H_
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h
index dc8303d1..9f785732 100644
--- a/decoder/include/berberis/decoder/riscv64/decoder.h
+++ b/decoder/include/berberis/decoder/riscv64/decoder.h
@@ -194,6 +194,17 @@ class Decoder {
     kMaxAmoOpcode = 0b11111'111,
   };
 
+  enum class OpFpOpcode {
+    // Bit #2 = 1 means rm is an opcode extension.
+    // Bit #3 = 1 means rs2 is an opcode extension
+    // Bits #4, #1, and #0 - actual opcode.
+    kFAdd = 0b0'0'0'00,
+    kFSub = 0b0'0'0'01,
+    kFMul = 0b0'0'0'10,
+    kFDiv = 0b0'0'0'11,
+    kMaxOpFpOpcode = 0b1'1'1'11,
+  };
+
   enum class LoadOpcode {
     kLb = 0b000,
     kLh = 0b001,
@@ -277,6 +288,14 @@ class Decoder {
     kMaxCsrRegister = 0b11'11'1111'1111,
   };
 
+  enum class FloatSize {
+    kFloat = 0b00,
+    kDouble = 0b01,
+    kHalf = 0b10,
+    kQuad = 0b11,
+    kMaxFloatSize = 0b11,
+  };
+
   struct AmoArgs {
     AmoOpcode opcode;
     uint8_t dst;
@@ -379,6 +398,15 @@ class Decoder {
   using StoreArgs = StoreArgsTemplate<StoreOpcode>;
   using StoreFpArgs = StoreArgsTemplate<StoreFpOpcode>;
 
+  struct OpFpArgs {
+    OpFpOpcode opcode;
+    FloatSize float_size;
+    uint8_t dst;
+    uint8_t src1;
+    uint8_t src2;
+    uint8_t rm;
+  };
+
   struct BranchArgs {
     BranchOpcode opcode;
     uint8_t src1;
@@ -430,13 +458,19 @@ class Decoder {
         DecodeCAddi();
         break;
       case CompressedOpcode::kFld:
-        DecodeCFld();
+        DecodeCompressedLoadStore<LoadFpOpcode::kFld>();
         break;
       case CompressedOpcode::kLw:
-        DecodeCLw();
+        DecodeCompressedLoadStore<LoadOpcode::kLw>();
         break;
       case CompressedOpcode::kLd:
-        DecodeCLd();
+        DecodeCompressedLoadStore<LoadOpcode::kLd>();
+        break;
+      case CompressedOpcode::kFsd:
+        DecodeCompressedLoadStore<StoreFpOpcode::kFsd>();
+        break;
+      case CompressedOpcode::kSd:
+        DecodeCompressedLoadStore<StoreOpcode::kSd>();
         break;
       default:
         insn_consumer_->Unimplemented();
@@ -444,50 +478,37 @@ class Decoder {
     return 2;
   }
 
-  void DecodeCLd() {
+  template <auto opcode>
+  void DecodeCompressedLoadStore() {
     uint8_t low_imm = GetBits<uint8_t, 5, 2>();
     uint8_t high_imm = GetBits<uint8_t, 10, 3>();
-    uint8_t imm = (low_imm << 6 | high_imm << 3);
-    uint8_t rd = GetBits<uint8_t, 2, 3>();
-    uint8_t rs = GetBits<uint8_t, 7, 3>();
-    const LoadArgs args = {
-        .opcode = LoadOpcode::kLd,
-        .dst = uint8_t(8 + rd),
-        .src = uint8_t(8 + rs),
-        .offset = imm,
-    };
-    insn_consumer_->Load(args);
-  }
-
-  void DecodeCLw() {
-    constexpr uint8_t kLwLow[4] = {0x0, 0x40, 0x04, 0x44};
-    uint8_t low_imm = GetBits<uint8_t, 5, 2>();
-    uint8_t high_imm = GetBits<uint8_t, 10, 3>();
-    uint8_t imm = (kLwLow[low_imm] | high_imm << 3);
-    uint8_t rd = GetBits<uint8_t, 2, 3>();
-    uint8_t rs = GetBits<uint8_t, 7, 3>();
-    const LoadArgs args = {
-        .opcode = LoadOpcode::kLw,
-        .dst = uint8_t(8 + rd),
-        .src = uint8_t(8 + rs),
-        .offset = imm,
-    };
-    insn_consumer_->Load(args);
-  }
-
-  void DecodeCFld() {
-    uint8_t low_imm = GetBits<uint8_t, 5, 2>();
-    uint8_t high_imm = GetBits<uint8_t, 10, 3>();
-    uint8_t imm = (low_imm << 6 | high_imm << 3);
+    uint8_t imm;
+    if constexpr ((uint8_t(opcode) & 1) == 0) {
+      constexpr uint8_t kLwLow[4] = {0x0, 0x40, 0x04, 0x44};
+      imm = (kLwLow[low_imm] | high_imm << 3);
+    } else {
+      imm = (low_imm << 6 | high_imm << 3);
+    }
     uint8_t rd = GetBits<uint8_t, 2, 3>();
     uint8_t rs = GetBits<uint8_t, 7, 3>();
-    const LoadFpArgs args = {
-        .opcode = LoadFpOpcode::kFld,
-        .dst = uint8_t(8 + rd),
-        .src = uint8_t(8 + rs),
-        .offset = imm,
-    };
-    insn_consumer_->Load(args);
+    if constexpr (std::is_same_v<decltype(opcode), StoreOpcode> ||
+                  std::is_same_v<decltype(opcode), StoreFpOpcode>) {
+      const StoreArgsTemplate<decltype(opcode)> args = {
+          .opcode = opcode,
+          .src = uint8_t(8 + rs),
+          .offset = imm,
+          .data = uint8_t(8 + rd),
+      };
+      insn_consumer_->Store(args);
+    } else {
+      const LoadArgsTemplate<decltype(opcode)> args = {
+          .opcode = opcode,
+          .dst = uint8_t(8 + rd),
+          .src = uint8_t(8 + rs),
+          .offset = imm,
+      };
+      insn_consumer_->Load(args);
+    }
   }
 
   void DecodeCAddi() {
@@ -578,6 +599,9 @@ class Decoder {
       case BaseOpcode::kOpImm32:
         DecodeOp<OpImm32Opcode, ShiftImm32Opcode, 5>();
         break;
+      case BaseOpcode::kOpFp:
+        DecodeOpFp();
+        break;
       case BaseOpcode::kStore:
         DecodeStore<StoreOpcode>();
         break;
@@ -822,6 +846,20 @@ class Decoder {
     insn_consumer_->JumpAndLink(args);
   }
 
+  void DecodeOpFp() {
+    uint8_t float_size = GetBits<uint8_t, 25, 2>();
+    uint8_t opcode_bits = GetBits<uint8_t, 27, 5>();
+    const OpFpArgs args = {
+        .opcode = OpFpOpcode(opcode_bits),
+        .float_size = FloatSize(float_size),
+        .dst = GetBits<uint8_t, 7, 5>(),
+        .src1 = GetBits<uint8_t, 15, 5>(),
+        .src2 = GetBits<uint8_t, 20, 5>(),
+        .rm = GetBits<uint8_t, 12, 3>(),
+    };
+    insn_consumer_->OpFp(args);
+  }
+
   void DecodeSystem() {
     uint8_t low_opcode = GetBits<uint8_t, 12, 2>();
     if (low_opcode == 0b00) {
diff --git a/decoder/include/berberis/decoder/riscv64/semantics_player.h b/decoder/include/berberis/decoder/riscv64/semantics_player.h
index 40b559d3..e320b7ce 100644
--- a/decoder/include/berberis/decoder/riscv64/semantics_player.h
+++ b/decoder/include/berberis/decoder/riscv64/semantics_player.h
@@ -135,6 +135,13 @@ class SemanticsPlayer {
     SetRegOrIgnore(args.dst, result);
   };
 
+  void OpFp(const typename Decoder::OpFpArgs& args) {
+    FpRegister arg1 = GetFpReg(args.src1);
+    FpRegister arg2 = GetFpReg(args.src2);
+    FpRegister result = listener_->OpFp(args.opcode, args.float_size, args.rm, arg1, arg2);
+    SetFpReg(args.dst, result);
+  }
+
   void Store(const typename Decoder::StoreArgs& args) {
     Register arg = GetRegOrZero(args.src);
     Register data = GetRegOrZero(args.data);
diff --git a/guest_state/include/berberis/guest_state/guest_state_riscv64.h b/guest_state/include/berberis/guest_state/guest_state_riscv64.h
index 8546311f..82aad665 100644
--- a/guest_state/include/berberis/guest_state/guest_state_riscv64.h
+++ b/guest_state/include/berberis/guest_state/guest_state_riscv64.h
@@ -19,6 +19,7 @@
 
 #include <cstdint>
 
+#include "berberis/base/dependent_false.h"
 #include "berberis/base/macros.h"
 #include "berberis/guest_state/guest_addr.h"
 
@@ -74,6 +75,33 @@ inline void SetFReg(CPUState& state, uint64_t val) {
   state.f[kIndex] = val;
 }
 
+enum class RegisterType {
+  kReg,
+  kFpReg,
+};
+
+template <RegisterType register_type, uint8_t kIndex>
+inline auto GetReg(const CPUState& state) {
+  if constexpr (register_type == RegisterType::kReg) {
+    return GetXReg<kIndex>(state);
+  } else if constexpr (register_type == RegisterType::kFpReg) {
+    return GetFReg<kIndex>(state);
+  } else {
+    static_assert(kDependentValueFalse<register_type>, "Unsupported register type");
+  }
+}
+
+template <RegisterType register_type, uint8_t kIndex, typename Register>
+inline auto SetReg(CPUState& state, Register val) {
+  if constexpr (register_type == RegisterType::kReg) {
+    return SetXReg<kIndex>(state, val);
+  } else if constexpr (register_type == RegisterType::kFpReg) {
+    return SetFReg<kIndex>(state, val);
+  } else {
+    static_assert(kDependentValueFalse<register_type>, "Unsupported register type");
+  }
+}
+
 struct ThreadState {
   CPUState cpu;
 };
diff --git a/interpreter/riscv64/fp_regs.h b/interpreter/riscv64/fp_regs.h
new file mode 100644
index 00000000..dd49f19d
--- /dev/null
+++ b/interpreter/riscv64/fp_regs.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_FP_REGS_H_
+#define BERBERIS_FP_REGS_H_
+
+#include <cstring>
+
+#include "berberis/base/bit_util.h"
+#include "berberis/intrinsics/intrinsics_float.h"
+
+namespace berberis {
+
+template <typename FloatType>
+inline FloatType NanUnboxFPRegToFloat(uint64_t arg);
+
+template <>
+inline intrinsics::Float32 NanUnboxFPRegToFloat(uint64_t arg) {
+  // Apart from transfer operations (e.g. loads and stores), all other floating-point operations on
+  // narrower n-bit operations, n < FLEN, check if the input operands are correctly NaN-boxed, i.e.,
+  // all upper FLEN−n bits are 1. If so, the n least-significant bits of the input are used as the
+  // input value, otherwise the input value is treated as an n-bit canonical NaN.
+  if ((arg & 0xffff'ffff'0000'0000) != 0xffff'ffff'0000'0000) {
+    return bit_cast<intrinsics::Float32>(0x7fc00000);
+  }
+  intrinsics::Float32 result;
+  memcpy(&result, &arg, sizeof(intrinsics::Float32));
+  return result;
+}
+
+template <>
+inline intrinsics::Float64 NanUnboxFPRegToFloat(uint64_t arg) {
+  return bit_cast<intrinsics::Float64>(arg);
+}
+
+template <typename FloatType>
+inline uint64_t NanBoxFloatToFPReg(FloatType arg);
+
+template <>
+inline uint64_t NanBoxFloatToFPReg(intrinsics::Float32 arg) {
+  return bit_cast<uint32_t>(arg) | 0xffff'ffff'0000'0000;
+}
+
+template <>
+inline uint64_t NanBoxFloatToFPReg(intrinsics::Float64 arg) {
+  return bit_cast<uint64_t>(arg);
+}
+
+}  // namespace berberis
+
+#endif  // BERBERIS_FP_REGS_H_
diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc
index f4e82fea..a8fcbf61 100644
--- a/interpreter/riscv64/interpreter.cc
+++ b/interpreter/riscv64/interpreter.cc
@@ -28,10 +28,11 @@
 #include "berberis/decoder/riscv64/semantics_player.h"
 #include "berberis/guest_state/guest_addr.h"
 #include "berberis/guest_state/guest_state_riscv64.h"
-#include "berberis/intrinsics/riscv64/guest_fpstate.h"
+#include "berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h"
 #include "berberis/kernel_api/run_guest_syscall.h"
 
 #include "atomics.h"
+#include "fp_regs.h"
 
 namespace berberis {
 
@@ -42,6 +43,8 @@ class Interpreter {
   using Decoder = Decoder<SemanticsPlayer<Interpreter>>;
   using Register = uint64_t;
   using FpRegister = uint64_t;
+  using Float32 = intrinsics::Float32;
+  using Float64 = intrinsics::Float64;
 
   explicit Interpreter(ThreadState* state) : state_(state), branch_taken_(false) {}
 
@@ -352,6 +355,38 @@ class Interpreter {
     return RunGuestSyscall(syscall_nr, arg0, arg1, arg2, arg3, arg4, arg5);
   }
 
+  FpRegister OpFp(Decoder::OpFpOpcode opcode,
+                  Decoder::FloatSize float_size,
+                  uint8_t rm,
+                  FpRegister arg1,
+                  FpRegister arg2) {
+    switch (float_size) {
+      case Decoder::FloatSize::kFloat:
+        return NanBoxFloatToFPReg(OpFp<Float32>(
+            opcode, rm, NanUnboxFPRegToFloat<Float32>(arg1), NanUnboxFPRegToFloat<Float32>(arg2)));
+      case Decoder::FloatSize::kDouble:
+        return NanBoxFloatToFPReg(OpFp<Float64>(
+            opcode, rm, NanUnboxFPRegToFloat<Float64>(arg1), NanUnboxFPRegToFloat<Float64>(arg2)));
+      default:
+        Unimplemented();
+        return {};
+    }
+  }
+
+  // TODO(b/278812060): switch to intrinsics when they would become available and stop using
+  // ExecuteFloatOperation directly.
+  template <typename FloatType>
+  FloatType OpFp(Decoder::OpFpOpcode opcode, uint8_t rm, FloatType arg1, FloatType arg2) {
+    switch (opcode) {
+      case Decoder::OpFpOpcode::kFAdd:
+        return intrinsics::ExecuteFloatOperation<FloatType>(
+            rm, state_->cpu.frm, [](auto x, auto y) { return x + y; }, arg1, arg2);
+      default:
+        Unimplemented();
+        return {};
+    }
+  }
+
   Register ShiftImm(Decoder::ShiftImmOpcode opcode, Register arg, uint16_t imm) {
     switch (opcode) {
       case Decoder::ShiftImmOpcode::kSlli:
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index e35b58f3..b6edfc4f 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -35,28 +35,24 @@ namespace {
 
 class Riscv64InterpreterTest : public ::testing::Test {
  public:
-  void InterpretCLd(uint16_t insn_bytes, uint64_t offset) {
+  template <RegisterType register_type, uint64_t expected_result>
+  void InterpretCompressedStore(uint16_t insn_bytes, uint64_t offset) {
     auto code_start = ToGuestAddr(&insn_bytes);
     state_.cpu.insn_addr = code_start;
-    SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset));
-    InterpretInsn(&state_);
-    EXPECT_EQ(GetXReg<8>(state_.cpu), kDataToLoad);
-  }
-
-  void InterpretCLw(uint16_t insn_bytes, uint64_t offset) {
-    auto code_start = ToGuestAddr(&insn_bytes);
-    state_.cpu.insn_addr = code_start;
-    SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset));
+    store_area_ = 0;
+    SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - offset));
+    SetReg<register_type, 9>(state_.cpu, kDataToLoad);
     InterpretInsn(&state_);
-    EXPECT_EQ(GetXReg<8>(state_.cpu), uint64_t(int32_t(kDataToLoad)));
+    EXPECT_EQ(store_area_, expected_result);
   }
 
-  void InterpretCFld(uint16_t insn_bytes, uint64_t offset) {
+  template <RegisterType register_type, uint64_t expected_result>
+  void InterpretCompressedLoad(uint16_t insn_bytes, uint64_t offset) {
     auto code_start = ToGuestAddr(&insn_bytes);
     state_.cpu.insn_addr = code_start;
     SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset));
     InterpretInsn(&state_);
-    EXPECT_EQ(GetFReg<8>(state_.cpu), kDataToLoad);
+    EXPECT_EQ((GetReg<register_type, 9>(state_.cpu)), expected_result);
   }
 
   void InterpretCAddi4spn(uint16_t insn_bytes, uint64_t expected_offset) {
@@ -102,6 +98,17 @@ class Riscv64InterpreterTest : public ::testing::Test {
     }
   }
 
+  void InterpretOpFp(uint32_t insn_bytes,
+                     std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) {
+    for (auto [arg1, arg2, expected_result] : args) {
+      state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
+      SetFReg<2>(state_.cpu, arg1);
+      SetFReg<3>(state_.cpu, arg2);
+      InterpretInsn(&state_);
+      EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result);
+    }
+  }
+
   void InterpretFence(uint32_t insn_bytes) {
     state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
     InterpretInsn(&state_);
@@ -224,38 +231,6 @@ class Riscv64InterpreterTest : public ::testing::Test {
   ThreadState state_;
 };
 
-TEST_F(Riscv64InterpreterTest, CLd) {
-  union {
-    uint16_t offset;
-    struct {
-      uint8_t : 3;
-      uint8_t i3_i5 : 3;
-      uint8_t i6_i7 : 2;
-    } i_bits;
-  };
-  for (offset = int16_t{0}; offset < int16_t{256}; offset += 8) {
-    union {
-      int16_t parcel;
-      struct {
-        uint8_t low_opcode : 2;
-        uint8_t rd : 3;
-        uint8_t i6_i7 : 2;
-        uint8_t rs : 3;
-        uint8_t i3_i5 : 3;
-        uint8_t high_opcode : 3;
-      } __attribute__((__packed__));
-    } o_bits = {
-        .low_opcode = 0b00,
-        .rd = 0,
-        .i6_i7 = i_bits.i6_i7,
-        .rs = 0,
-        .i3_i5 = i_bits.i3_i5,
-        .high_opcode = 0b011,
-    };
-    InterpretCLd(o_bits.parcel, offset);
-  }
-}
-
 TEST_F(Riscv64InterpreterTest, CLw) {
   union {
     uint16_t offset;
@@ -280,18 +255,21 @@ TEST_F(Riscv64InterpreterTest, CLw) {
       } __attribute__((__packed__));
     } o_bits = {
         .low_opcode = 0b00,
-        .rd = 0,
+        .rd = 1,
         .i6 = i_bits.i6,
         .i2 = i_bits.i2,
         .rs = 0,
         .i3_i5 = i_bits.i3_i5,
         .high_opcode = 0b010,
     };
-    InterpretCLw(o_bits.parcel, offset);
+    InterpretCompressedLoad<RegisterType::kReg,
+                            static_cast<uint64_t>(static_cast<int32_t>(kDataToLoad))>(o_bits.parcel,
+                                                                                      offset);
   }
 }
 
-TEST_F(Riscv64InterpreterTest, CFld) {
+template <uint16_t opcode, auto execute_instruction_func>
+void TestCompressedLoadOrStore(Riscv64InterpreterTest* that) {
   union {
     uint16_t offset;
     struct {
@@ -303,26 +281,45 @@ TEST_F(Riscv64InterpreterTest, CFld) {
   for (offset = int16_t{0}; offset < int16_t{256}; offset += 8) {
     union {
       int16_t parcel;
-      struct {
+      struct [[gnu::packed]] {
         uint8_t low_opcode : 2;
         uint8_t rd : 3;
         uint8_t i6_i7 : 2;
         uint8_t rs : 3;
         uint8_t i3_i5 : 3;
         uint8_t high_opcode : 3;
-      } __attribute__((__packed__));
+      };
     } o_bits = {
         .low_opcode = 0b00,
-        .rd = 0,
+        .rd = 1,
         .i6_i7 = i_bits.i6_i7,
         .rs = 0,
         .i3_i5 = i_bits.i3_i5,
-        .high_opcode = 0b001,
+        .high_opcode = 0b000,
     };
-    InterpretCFld(o_bits.parcel, offset);
+    (that->*execute_instruction_func)(o_bits.parcel | opcode, offset);
   }
 }
 
+TEST_F(Riscv64InterpreterTest, CompressedLoadAndStores) {
+  // c.Fld
+  TestCompressedLoadOrStore<
+      0b001'000'000'00'000'00,
+      &Riscv64InterpreterTest::InterpretCompressedLoad<RegisterType::kFpReg, kDataToLoad>>(this);
+  // c.Ld
+  TestCompressedLoadOrStore<
+      0b011'000'000'00'000'00,
+      &Riscv64InterpreterTest::InterpretCompressedLoad<RegisterType::kReg, kDataToLoad>>(this);
+  // c.Fsd
+  TestCompressedLoadOrStore<
+      0b101'000'000'00'000'00,
+      &Riscv64InterpreterTest::InterpretCompressedStore<RegisterType::kFpReg, kDataToLoad>>(this);
+  // c.Sd
+  TestCompressedLoadOrStore<
+      0b111'000'000'00'000'00,
+      &Riscv64InterpreterTest::InterpretCompressedStore<RegisterType::kReg, kDataToLoad>>(this);
+}
+
 TEST_F(Riscv64InterpreterTest, CAddi) {
   union {
     int8_t offset;
@@ -633,6 +630,231 @@ TEST_F(Riscv64InterpreterTest, OpImm32Instructions) {
   InterpretOpImm(0x4001509b, {{0x0000'0000'f000'0000ULL, 12, 0xffff'ffff'ffff'0000ULL}});
 }
 
+TEST_F(Riscv64InterpreterTest, OpFpInstructions) {
+  // FAdd.S
+  InterpretOpFp(0x003100d3,
+                {{bit_cast<uint32_t>(1.0f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(2.0f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(3.0f) | 0xffff'ffff'0000'0000}});
+  // FAdd.D
+  InterpretOpFp(0x023100d3,
+                {{bit_cast<uint64_t>(1.0), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0)}});
+}
+
+TEST_F(Riscv64InterpreterTest, RoundingModeTest) {
+  // FAdd.S
+  InterpretOpFp(0x003100d3,
+                // Test RNE
+                {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000005f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000005f) | 0xffff'ffff'0000'0000}});
+  // FAdd.S
+  InterpretOpFp(0x003110d3,
+                // Test RTZ
+                {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000}});
+  // FAdd.S
+  InterpretOpFp(0x003120d3,
+                // Test RDN
+                {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000005f) | 0xffff'ffff'0000'0000}});
+  // FAdd.S
+  InterpretOpFp(0x003130d3,
+                // Test RUP
+                {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000005f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000}});
+  // FAdd.S
+  InterpretOpFp(0x003140d3,
+                // Test RMM
+                {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000005f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000005f) | 0xffff'ffff'0000'0000}});
+
+  // FAdd.D
+  InterpretOpFp(0x023100d3,
+                // Test RNE
+                {{bit_cast<uint64_t>(1.0000000000000002),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000004)},
+                 {bit_cast<uint64_t>(1.0000000000000004),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000004)},
+                 {bit_cast<uint64_t>(1.0000000000000007),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000009)},
+                 {bit_cast<uint64_t>(-1.0000000000000002),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000004)},
+                 {bit_cast<uint64_t>(-1.0000000000000004),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000004)},
+                 {bit_cast<uint64_t>(-1.0000000000000007),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000009)}});
+  // FAdd.D
+  InterpretOpFp(0x023110d3,
+                // Test RTZ
+                {{bit_cast<uint64_t>(1.0000000000000002),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000002)},
+                 {bit_cast<uint64_t>(1.0000000000000004),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000004)},
+                 {bit_cast<uint64_t>(1.0000000000000007),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000007)},
+                 {bit_cast<uint64_t>(-1.0000000000000002),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000002)},
+                 {bit_cast<uint64_t>(-1.0000000000000004),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000004)},
+                 {bit_cast<uint64_t>(-1.0000000000000007),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000007)}});
+  // FAdd.D
+  InterpretOpFp(0x023120d3,
+                // Test RDN
+                {{bit_cast<uint64_t>(1.0000000000000002),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000002)},
+                 {bit_cast<uint64_t>(1.0000000000000004),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000004)},
+                 {bit_cast<uint64_t>(1.0000000000000007),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000007)},
+                 {bit_cast<uint64_t>(-1.0000000000000002),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000004)},
+                 {bit_cast<uint64_t>(-1.0000000000000004),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000007)},
+                 {bit_cast<uint64_t>(-1.0000000000000007),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000009)}});
+  // FAdd.D
+  InterpretOpFp(0x023130d3,
+                // Test RUP
+                {{bit_cast<uint64_t>(1.0000000000000002),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000004)},
+                 {bit_cast<uint64_t>(1.0000000000000004),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000007)},
+                 {bit_cast<uint64_t>(1.0000000000000007),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000009)},
+                 {bit_cast<uint64_t>(-1.0000000000000002),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000002)},
+                 {bit_cast<uint64_t>(-1.0000000000000004),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000004)},
+                 {bit_cast<uint64_t>(-1.0000000000000007),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000007)}});
+  // FAdd.D
+  InterpretOpFp(0x023140d3,
+                // Test RMM
+                {{bit_cast<uint64_t>(1.0000000000000002),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000004)},
+                 {bit_cast<uint64_t>(1.0000000000000004),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000007)},
+                 {bit_cast<uint64_t>(1.0000000000000007),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000009)},
+                 {bit_cast<uint64_t>(-1.0000000000000002),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000004)},
+                 {bit_cast<uint64_t>(-1.0000000000000004),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000007)},
+                 {bit_cast<uint64_t>(-1.0000000000000007),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000009)}});
+}
+
 TEST_F(Riscv64InterpreterTest, LoadInstructions) {
   // Offset is always 8.
   // Lbu
diff --git a/intrinsics/include/berberis/intrinsics/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/intrinsics_float.h
index 30f83e55..694afba9 100644
--- a/intrinsics/include/berberis/intrinsics/intrinsics_float.h
+++ b/intrinsics/include/berberis/intrinsics/intrinsics_float.h
@@ -64,23 +64,15 @@ class WrappedFloatType {
   explicit constexpr operator uint32_t() const { return value_; }
   explicit constexpr operator int64_t() const { return value_; }
   explicit constexpr operator uint64_t() const { return value_; }
-
-  auto BitCastToIntOfSameSize() {
-    if constexpr (std::is_same_v<BaseType, float>) {
-      return bit_cast<int32_t>(value_);
-    } else {
-      static_assert(std::is_same_v<BaseType, double>, "Only float and double BaseType supported.");
-      return bit_cast<int64_t>(value_);
-    }
-  }
-
-  // Only valid for BaseType==double. Returns the bit representation of the fp value.
   explicit constexpr operator WrappedFloatType<float>() const {
     return WrappedFloatType<float>(value_);
   }
   explicit constexpr operator WrappedFloatType<double>() const {
     return WrappedFloatType<double>(value_);
   }
+#if defined(__i386__) || defined(__x86_64__)
+  explicit constexpr operator long double() const { return value_; }
+#endif
   // Note: we don't provide unary operator-.  That's done on purpose: with floats -x and 0.-x
   // produce different results which could be surprising.  Use fneg instead of unary operator-.
   friend WrappedFloatType operator+(const WrappedFloatType& v1, const WrappedFloatType& v2);
diff --git a/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h
new file mode 100644
index 00000000..db8ff249
--- /dev/null
+++ b/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
+#define BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
+
+#include <limits>
+
+#include "berberis/base/bit_util.h"
+#include "berberis/intrinsics/intrinsics_float.h"
+#include "berberis/intrinsics/riscv64/guest_fpstate.h"  // ScopedRoundingMode
+#include "berberis/intrinsics/type_traits.h"
+
+namespace berberis::intrinsics {
+
+// x86 architecture doesn't support RMM (aka FE_TIESAWAY), but it can be easily emulated since it
+// have support for 80bit floats: if calculations are done with one bit (or more) of extra precision
+// in the FE_TOWARDZERO mode then we can easily adjust fraction part and would only need to remember
+// this addition may overflow.
+template <typename FloatType, typename OperationType, typename... Args>
+inline auto ExecuteFloatOperationRmm(OperationType operation, Args... args)
+    -> std::enable_if_t<(std::is_same_v<Args, FloatType> && ...), FloatType> {
+  using Wide = typename TypeTraits<FloatType>::Wide;
+  Wide wide_result = operation(Wide(args)...);
+  if constexpr (std::is_same_v<FloatType, Float32>) {
+    // In the 32bit->64bit case everything happens almost automatically, we just need to clear low
+    // bits to ensure that we are getting ±∞ and not NaN.
+    auto int_result = bit_cast<std::make_unsigned_t<typename TypeTraits<Wide>::Int>>(wide_result);
+    if ((int_result & 0x7ff0'0000'0000'0000) == 0x7ff0'0000'0000'0000) {
+      return FloatType(wide_result);
+    }
+    int_result += 0x0000'0000'1000'0000;
+    int_result &= 0xffff'ffff'e000'0000;
+    wide_result = bit_cast<Wide>(int_result);
+  } else if constexpr (std::is_same_v<FloatType, Float64>) {
+    // In 64bit->80bit case we need to adjust significand bits to ensure we are creating ±∞ and not
+    // pseudo-infinity (supported on 8087/80287, but not on modern CPUs).
+    struct {
+      uint64_t significand;
+      uint16_t exponent;
+      uint8_t padding[sizeof(Wide) - sizeof(uint64_t) - sizeof(uint16_t)];
+    } fp80_parts;
+    static_assert(sizeof fp80_parts == sizeof(Wide));
+    memcpy(&fp80_parts, &wide_result, sizeof(wide_result));
+    // Don't try to round ±∞, NaNs and ±0 (denormals are not supported by RISC-V).
+    if ((fp80_parts.exponent & 0x7fff) == 0x7fff ||
+        (fp80_parts.significand & 0x8000'0000'0000'0000) == 0) {
+      return FloatType(wide_result);
+    }
+    fp80_parts.significand += 0x0000'0000'0000'0400;
+    fp80_parts.significand &= 0xffff'ffff'ffff'f800;
+    if (fp80_parts.significand == 0) {
+      fp80_parts.exponent++;
+      fp80_parts.significand = 0x8000'0000'0000'0000;
+    }
+    memcpy(&wide_result, &fp80_parts, sizeof(wide_result));
+  }
+  return FloatType(wide_result);
+}
+
+// Note: first round of rm/frm verification must happen before that function because RISC-V
+// postulates that invalid rm or frm should trigger illegal instruction exception.
+// Here we can assume both rm and frm fields are valid.
+template <typename FloatType, typename OperationType, typename... Args>
+inline auto ExecuteFloatOperation(uint8_t requested_rm,
+                                  uint8_t current_rm,
+                                  OperationType operation,
+                                  Args... args)
+    -> std::enable_if_t<(std::is_same_v<Args, FloatType> && ...), FloatType> {
+  int host_requested_rm = ToHostRoundingMode(requested_rm);
+  int host_current_rm = ToHostRoundingMode(current_rm);
+  if (requested_rm == FPFlags::DYN || host_requested_rm == host_current_rm) {
+    uint8_t rm = requested_rm == FPFlags::DYN ? current_rm : requested_rm;
+    if (rm == FPFlags::RMM) {
+      return ExecuteFloatOperationRmm<FloatType>(operation, args...);
+    }
+    return operation(args...);
+  }
+  ScopedRoundingMode scoped_rounding_mode{host_requested_rm};
+  if (requested_rm == FPFlags::RMM) {
+    return ExecuteFloatOperationRmm<FloatType>(operation, args...);
+  }
+  return operation(args...);
+}
+
+}  // namespace berberis::intrinsics
+
+#endif  // BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
diff --git a/intrinsics/include/berberis/intrinsics/type_traits.h b/intrinsics/include/berberis/intrinsics/type_traits.h
index f1f6f75d..2785b92d 100644
--- a/intrinsics/include/berberis/intrinsics/type_traits.h
+++ b/intrinsics/include/berberis/intrinsics/type_traits.h
@@ -93,11 +93,17 @@ struct TypeTraits<int64_t> {
 template <>
 struct TypeTraits<intrinsics::Float32> {
   using Int = int32_t;
+  using Wide = intrinsics::Float64;
 };
 
 template <>
 struct TypeTraits<intrinsics::Float64> {
   using Int = int64_t;
+  using Narrow = intrinsics::Float32;
+#if defined(__i386__) || defined(__x86_64__)
+  static_assert(sizeof(long double) > sizeof(intrinsics::Float64));
+  using Wide = long double;
+#endif
 };
 
 #if defined(__x86_64__)
author	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2023-04-22 01:27:17 +0000
committer	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2023-04-22 01:27:17 +0000
commit	65790b034e7d97022c7c8ea9a6e930333d3c1fd9 (patch)
tree	a076320347f30694dd7f815ea3c7ef0519645811
parent	722006fdeb16a49976325a0d74ab662992adba1a (diff)
parent	1bf3e03bcb66e3a87a0bc3c69affd8ee56a45a00 (diff)
download	binary_translation-65790b034e7d97022c7c8ea9a6e930333d3c1fd9.tar.gz