From 447a9c00cad71c90e76bb476276ac0edc74cf758 Mon Sep 17 00:00:00 2001
From: Victor Khimenko <khim@google.com>
Date: Wed, 19 Apr 2023 22:12:47 +0000
Subject: Merge similar compressed instructions

Bug: 265372622

Test: berberis_host_tests/berberis_host_tests

Change-Id: Ic4c7813319a06afde2425e4acf1fc94c2e4e226b
---
 decoder/include/berberis/decoder/riscv64/decoder.h | 28 +++-------
 interpreter/riscv64/interpreter_test.cc            | 62 +++++++---------------
 2 files changed, 26 insertions(+), 64 deletions(-)
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h
index dc8303d1..e91ee18f 100644
--- a/decoder/include/berberis/decoder/riscv64/decoder.h
+++ b/decoder/include/berberis/decoder/riscv64/decoder.h
@@ -430,13 +430,13 @@ class Decoder {
         DecodeCAddi();
         break;
       case CompressedOpcode::kFld:
-        DecodeCFld();
+        DecodeCLoadStore<LoadFpArgs, LoadFpOpcode::kFld, &InsnConsumer::Load>();
         break;
       case CompressedOpcode::kLw:
         DecodeCLw();
         break;
       case CompressedOpcode::kLd:
-        DecodeCLd();
+        DecodeCLoadStore<LoadArgs, LoadOpcode::kLd, &InsnConsumer::Load>();
         break;
       default:
         insn_consumer_->Unimplemented();
@@ -444,19 +444,20 @@ class Decoder {
     return 2;
   }
 
-  void DecodeCLd() {
+  template <typename Args, auto opcode, void (InsnConsumer::*Op)(const Args&)>
+  void DecodeCLoadStore() {
     uint8_t low_imm = GetBits<uint8_t, 5, 2>();
     uint8_t high_imm = GetBits<uint8_t, 10, 3>();
     uint8_t imm = (low_imm << 6 | high_imm << 3);
     uint8_t rd = GetBits<uint8_t, 2, 3>();
     uint8_t rs = GetBits<uint8_t, 7, 3>();
-    const LoadArgs args = {
-        .opcode = LoadOpcode::kLd,
+    const Args args = {
+        .opcode = opcode,
         .dst = uint8_t(8 + rd),
         .src = uint8_t(8 + rs),
         .offset = imm,
     };
-    insn_consumer_->Load(args);
+    (insn_consumer_->*Op)(args);
   }
 
   void DecodeCLw() {
@@ -475,21 +476,6 @@ class Decoder {
     insn_consumer_->Load(args);
   }
 
-  void DecodeCFld() {
-    uint8_t low_imm = GetBits<uint8_t, 5, 2>();
-    uint8_t high_imm = GetBits<uint8_t, 10, 3>();
-    uint8_t imm = (low_imm << 6 | high_imm << 3);
-    uint8_t rd = GetBits<uint8_t, 2, 3>();
-    uint8_t rs = GetBits<uint8_t, 7, 3>();
-    const LoadFpArgs args = {
-        .opcode = LoadFpOpcode::kFld,
-        .dst = uint8_t(8 + rd),
-        .src = uint8_t(8 + rs),
-        .offset = imm,
-    };
-    insn_consumer_->Load(args);
-  }
-
   void DecodeCAddi() {
     uint8_t low_imm = GetBits<uint8_t, 2, 5>();
     uint8_t high_imm = GetBits<uint8_t, 12, 1>();
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index e35b58f3..c3d2b5f1 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -35,28 +35,28 @@ namespace {
 
 class Riscv64InterpreterTest : public ::testing::Test {
  public:
-  void InterpretCLd(uint16_t insn_bytes, uint64_t offset) {
+  void InterpretCFld(uint16_t insn_bytes, uint64_t offset) {
     auto code_start = ToGuestAddr(&insn_bytes);
     state_.cpu.insn_addr = code_start;
     SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset));
     InterpretInsn(&state_);
-    EXPECT_EQ(GetXReg<8>(state_.cpu), kDataToLoad);
+    EXPECT_EQ(GetFReg<8>(state_.cpu), kDataToLoad);
   }
 
-  void InterpretCLw(uint16_t insn_bytes, uint64_t offset) {
+  void InterpretCLd(uint16_t insn_bytes, uint64_t offset) {
     auto code_start = ToGuestAddr(&insn_bytes);
     state_.cpu.insn_addr = code_start;
     SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset));
     InterpretInsn(&state_);
-    EXPECT_EQ(GetXReg<8>(state_.cpu), uint64_t(int32_t(kDataToLoad)));
+    EXPECT_EQ(GetXReg<8>(state_.cpu), kDataToLoad);
   }
 
-  void InterpretCFld(uint16_t insn_bytes, uint64_t offset) {
+  void InterpretCLw(uint16_t insn_bytes, uint64_t offset) {
     auto code_start = ToGuestAddr(&insn_bytes);
     state_.cpu.insn_addr = code_start;
     SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset));
     InterpretInsn(&state_);
-    EXPECT_EQ(GetFReg<8>(state_.cpu), kDataToLoad);
+    EXPECT_EQ(GetXReg<8>(state_.cpu), uint64_t(int32_t(kDataToLoad)));
   }
 
   void InterpretCAddi4spn(uint16_t insn_bytes, uint64_t expected_offset) {
@@ -224,38 +224,6 @@ class Riscv64InterpreterTest : public ::testing::Test {
   ThreadState state_;
 };
 
-TEST_F(Riscv64InterpreterTest, CLd) {
-  union {
-    uint16_t offset;
-    struct {
-      uint8_t : 3;
-      uint8_t i3_i5 : 3;
-      uint8_t i6_i7 : 2;
-    } i_bits;
-  };
-  for (offset = int16_t{0}; offset < int16_t{256}; offset += 8) {
-    union {
-      int16_t parcel;
-      struct {
-        uint8_t low_opcode : 2;
-        uint8_t rd : 3;
-        uint8_t i6_i7 : 2;
-        uint8_t rs : 3;
-        uint8_t i3_i5 : 3;
-        uint8_t high_opcode : 3;
-      } __attribute__((__packed__));
-    } o_bits = {
-        .low_opcode = 0b00,
-        .rd = 0,
-        .i6_i7 = i_bits.i6_i7,
-        .rs = 0,
-        .i3_i5 = i_bits.i3_i5,
-        .high_opcode = 0b011,
-    };
-    InterpretCLd(o_bits.parcel, offset);
-  }
-}
-
 TEST_F(Riscv64InterpreterTest, CLw) {
   union {
     uint16_t offset;
@@ -291,7 +259,8 @@ TEST_F(Riscv64InterpreterTest, CLw) {
   }
 }
 
-TEST_F(Riscv64InterpreterTest, CFld) {
+template <uint16_t opcode, auto execute_instruction_func>
+void TestCompressedLoadOrStore(Riscv64InterpreterTest* that) {
   union {
     uint16_t offset;
     struct {
@@ -303,26 +272,33 @@ TEST_F(Riscv64InterpreterTest, CFld) {
   for (offset = int16_t{0}; offset < int16_t{256}; offset += 8) {
     union {
       int16_t parcel;
-      struct {
+      struct [[gnu::packed]] {
         uint8_t low_opcode : 2;
         uint8_t rd : 3;
         uint8_t i6_i7 : 2;
         uint8_t rs : 3;
         uint8_t i3_i5 : 3;
         uint8_t high_opcode : 3;
-      } __attribute__((__packed__));
+      };
     } o_bits = {
         .low_opcode = 0b00,
         .rd = 0,
         .i6_i7 = i_bits.i6_i7,
         .rs = 0,
         .i3_i5 = i_bits.i3_i5,
-        .high_opcode = 0b001,
+        .high_opcode = 0b000,
     };
-    InterpretCFld(o_bits.parcel, offset);
+    (that->*execute_instruction_func)(o_bits.parcel | opcode, offset);
   }
 }
 
+TEST_F(Riscv64InterpreterTest, CompressedLoadAndStores) {
+  // c.Fld
+  TestCompressedLoadOrStore<0b001'000'000'00'000'00, &Riscv64InterpreterTest::InterpretCFld>(this);
+  // c.Ld
+  TestCompressedLoadOrStore<0b011'000'000'00'000'00, &Riscv64InterpreterTest::InterpretCLd>(this);
+}
+
 TEST_F(Riscv64InterpreterTest, CAddi) {
   union {
     int8_t offset;
-- 
cgit v1.2.3


From 7358ad077e6046160d27b1dc9096e9d8689ad3b7 Mon Sep 17 00:00:00 2001
From: Gaurav Kumar <gauravroy2599@gmail.com>
Date: Fri, 21 Apr 2023 00:10:05 +0530
Subject: interp: Add c.Fsd and c.Sd compressed instruction.

Bug: 265372622

Test: berberis_host_tests/berberis_host_test

Change-Id: I3c9bec244db31ea1330144deb4f0815d68cf8fbc
---
 decoder/include/berberis/decoder/riscv64/decoder.h | 36 ++++++++++++++++------
 interpreter/riscv64/interpreter_test.cc            | 30 ++++++++++++++++--
 2 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h
index e91ee18f..ec51fe37 100644
--- a/decoder/include/berberis/decoder/riscv64/decoder.h
+++ b/decoder/include/berberis/decoder/riscv64/decoder.h
@@ -430,13 +430,19 @@ class Decoder {
         DecodeCAddi();
         break;
       case CompressedOpcode::kFld:
-        DecodeCLoadStore<LoadFpArgs, LoadFpOpcode::kFld, &InsnConsumer::Load>();
+        DecodeCLoadStore<LoadFpArgs, LoadFpOpcode::kFld, false>();
         break;
       case CompressedOpcode::kLw:
         DecodeCLw();
         break;
       case CompressedOpcode::kLd:
-        DecodeCLoadStore<LoadArgs, LoadOpcode::kLd, &InsnConsumer::Load>();
+        DecodeCLoadStore<LoadArgs, LoadOpcode::kLd, false>();
+        break;
+      case CompressedOpcode::kFsd:
+        DecodeCLoadStore<StoreFpArgs, StoreFpOpcode::kFsd, true>();
+        break;
+      case CompressedOpcode::kSd:
+        DecodeCLoadStore<StoreArgs, StoreOpcode::kSd, true>();
         break;
       default:
         insn_consumer_->Unimplemented();
@@ -444,20 +450,30 @@ class Decoder {
     return 2;
   }
 
-  template <typename Args, auto opcode, void (InsnConsumer::*Op)(const Args&)>
+  template <typename Args, auto opcode, bool kStore>
   void DecodeCLoadStore() {
     uint8_t low_imm = GetBits<uint8_t, 5, 2>();
     uint8_t high_imm = GetBits<uint8_t, 10, 3>();
     uint8_t imm = (low_imm << 6 | high_imm << 3);
     uint8_t rd = GetBits<uint8_t, 2, 3>();
     uint8_t rs = GetBits<uint8_t, 7, 3>();
-    const Args args = {
-        .opcode = opcode,
-        .dst = uint8_t(8 + rd),
-        .src = uint8_t(8 + rs),
-        .offset = imm,
-    };
-    (insn_consumer_->*Op)(args);
+    if constexpr (kStore) {
+      const Args args = {
+          .opcode = opcode,
+          .src = uint8_t(8 + rs),
+          .offset = imm,
+          .data = uint8_t(8 + rd),
+      };
+      insn_consumer_->Store(args);
+    } else {
+      const Args args = {
+          .opcode = opcode,
+          .dst = uint8_t(8 + rd),
+          .src = uint8_t(8 + rs),
+          .offset = imm,
+      };
+      insn_consumer_->Load(args);
+    }
   }
 
   void DecodeCLw() {
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index c3d2b5f1..742cf028 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -35,12 +35,32 @@ namespace {
 
 class Riscv64InterpreterTest : public ::testing::Test {
  public:
+  void InterpretCSd(uint16_t insn_bytes, uint64_t offset) {
+    auto code_start = ToGuestAddr(&insn_bytes);
+    state_.cpu.insn_addr = code_start;
+    store_area_ = 0;
+    SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - offset));
+    SetXReg<9>(state_.cpu, kDataToLoad);
+    InterpretInsn(&state_);
+    EXPECT_EQ(store_area_, kDataToLoad);
+  }
+
+  void InterpretCFsd(uint16_t insn_bytes, uint64_t offset) {
+    auto code_start = ToGuestAddr(&insn_bytes);
+    state_.cpu.insn_addr = code_start;
+    store_area_ = 0;
+    SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - offset));
+    SetFReg<9>(state_.cpu, kDataToLoad);
+    InterpretInsn(&state_);
+    EXPECT_EQ(store_area_, kDataToLoad);
+  }
+
   void InterpretCFld(uint16_t insn_bytes, uint64_t offset) {
     auto code_start = ToGuestAddr(&insn_bytes);
     state_.cpu.insn_addr = code_start;
     SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset));
     InterpretInsn(&state_);
-    EXPECT_EQ(GetFReg<8>(state_.cpu), kDataToLoad);
+    EXPECT_EQ(GetFReg<9>(state_.cpu), kDataToLoad);
   }
 
   void InterpretCLd(uint16_t insn_bytes, uint64_t offset) {
@@ -48,7 +68,7 @@ class Riscv64InterpreterTest : public ::testing::Test {
     state_.cpu.insn_addr = code_start;
     SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset));
     InterpretInsn(&state_);
-    EXPECT_EQ(GetXReg<8>(state_.cpu), kDataToLoad);
+    EXPECT_EQ(GetXReg<9>(state_.cpu), kDataToLoad);
   }
 
   void InterpretCLw(uint16_t insn_bytes, uint64_t offset) {
@@ -282,7 +302,7 @@ void TestCompressedLoadOrStore(Riscv64InterpreterTest* that) {
       };
     } o_bits = {
         .low_opcode = 0b00,
-        .rd = 0,
+        .rd = 1,
         .i6_i7 = i_bits.i6_i7,
         .rs = 0,
         .i3_i5 = i_bits.i3_i5,
@@ -297,6 +317,10 @@ TEST_F(Riscv64InterpreterTest, CompressedLoadAndStores) {
   TestCompressedLoadOrStore<0b001'000'000'00'000'00, &Riscv64InterpreterTest::InterpretCFld>(this);
   // c.Ld
   TestCompressedLoadOrStore<0b011'000'000'00'000'00, &Riscv64InterpreterTest::InterpretCLd>(this);
+  // c.Fsd
+  TestCompressedLoadOrStore<0b101'000'000'00'000'00, &Riscv64InterpreterTest::InterpretCFsd>(this);
+  // c.Sd
+  TestCompressedLoadOrStore<0b111'000'000'00'000'00, &Riscv64InterpreterTest::InterpretCSd>(this);
 }
 
 TEST_F(Riscv64InterpreterTest, CAddi) {
-- 
cgit v1.2.3


From 96dedfd85fa7f3648ed487f910ea48db1f68a5ec Mon Sep 17 00:00:00 2001
From: Victor Khimenko <khim@google.com>
Date: Fri, 21 Apr 2023 00:09:04 +0000
Subject: Merge load/store helper functions.

Bug: 265372622

Test: berberis_host_tests/berberis_host_tests

Change-Id: I1cbab04c23b06c4f4b64c195aeed81ed4b620f94
---
 base/include/berberis/base/dependent_false.h       | 32 ++++++++++++
 decoder/include/berberis/decoder/riscv64/decoder.h | 45 +++++++---------
 .../berberis/guest_state/guest_state_riscv64.h     | 28 ++++++++++
 interpreter/riscv64/interpreter_test.cc            | 60 +++++++++-------------
 4 files changed, 101 insertions(+), 64 deletions(-)
 create mode 100644 base/include/berberis/base/dependent_false.h

diff --git a/base/include/berberis/base/dependent_false.h b/base/include/berberis/base/dependent_false.h
new file mode 100644
index 00000000..f01a48e0
--- /dev/null
+++ b/base/include/berberis/base/dependent_false.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_BASE_DEPENDENT_FALSE_H_
+#define BERBERIS_BASE_DEPENDENT_FALSE_H_
+
+#include <type_traits>
+
+namespace berberis {
+
+template <typename T>
+inline constexpr bool kDependentTypeFalse = false;
+
+template <auto T>
+inline constexpr bool kDependentValueFalse = false;
+
+}  // namespace berberis
+
+#endif  // BERBERIS_BASE_DEPENDENT_FALSE_H_
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h
index ec51fe37..585b16e8 100644
--- a/decoder/include/berberis/decoder/riscv64/decoder.h
+++ b/decoder/include/berberis/decoder/riscv64/decoder.h
@@ -430,19 +430,19 @@ class Decoder {
         DecodeCAddi();
         break;
       case CompressedOpcode::kFld:
-        DecodeCLoadStore<LoadFpArgs, LoadFpOpcode::kFld, false>();
+        DecodeCompressedLoadStore<LoadFpOpcode::kFld>();
         break;
       case CompressedOpcode::kLw:
-        DecodeCLw();
+        DecodeCompressedLoadStore<LoadOpcode::kLw>();
         break;
       case CompressedOpcode::kLd:
-        DecodeCLoadStore<LoadArgs, LoadOpcode::kLd, false>();
+        DecodeCompressedLoadStore<LoadOpcode::kLd>();
         break;
       case CompressedOpcode::kFsd:
-        DecodeCLoadStore<StoreFpArgs, StoreFpOpcode::kFsd, true>();
+        DecodeCompressedLoadStore<StoreFpOpcode::kFsd>();
         break;
       case CompressedOpcode::kSd:
-        DecodeCLoadStore<StoreArgs, StoreOpcode::kSd, true>();
+        DecodeCompressedLoadStore<StoreOpcode::kSd>();
         break;
       default:
         insn_consumer_->Unimplemented();
@@ -450,15 +450,22 @@ class Decoder {
     return 2;
   }
 
-  template <typename Args, auto opcode, bool kStore>
-  void DecodeCLoadStore() {
+  template <auto opcode>
+  void DecodeCompressedLoadStore() {
     uint8_t low_imm = GetBits<uint8_t, 5, 2>();
     uint8_t high_imm = GetBits<uint8_t, 10, 3>();
-    uint8_t imm = (low_imm << 6 | high_imm << 3);
+    uint8_t imm;
+    if constexpr ((uint8_t(opcode) & 1) == 0) {
+      constexpr uint8_t kLwLow[4] = {0x0, 0x40, 0x04, 0x44};
+      imm = (kLwLow[low_imm] | high_imm << 3);
+    } else {
+      imm = (low_imm << 6 | high_imm << 3);
+    }
     uint8_t rd = GetBits<uint8_t, 2, 3>();
     uint8_t rs = GetBits<uint8_t, 7, 3>();
-    if constexpr (kStore) {
-      const Args args = {
+    if constexpr (std::is_same_v<decltype(opcode), StoreOpcode> ||
+                  std::is_same_v<decltype(opcode), StoreFpOpcode>) {
+      const StoreArgsTemplate<decltype(opcode)> args = {
           .opcode = opcode,
           .src = uint8_t(8 + rs),
           .offset = imm,
@@ -466,7 +473,7 @@ class Decoder {
       };
       insn_consumer_->Store(args);
     } else {
-      const Args args = {
+      const LoadArgsTemplate<decltype(opcode)> args = {
           .opcode = opcode,
           .dst = uint8_t(8 + rd),
           .src = uint8_t(8 + rs),
@@ -476,22 +483,6 @@ class Decoder {
     }
   }
 
-  void DecodeCLw() {
-    constexpr uint8_t kLwLow[4] = {0x0, 0x40, 0x04, 0x44};
-    uint8_t low_imm = GetBits<uint8_t, 5, 2>();
-    uint8_t high_imm = GetBits<uint8_t, 10, 3>();
-    uint8_t imm = (kLwLow[low_imm] | high_imm << 3);
-    uint8_t rd = GetBits<uint8_t, 2, 3>();
-    uint8_t rs = GetBits<uint8_t, 7, 3>();
-    const LoadArgs args = {
-        .opcode = LoadOpcode::kLw,
-        .dst = uint8_t(8 + rd),
-        .src = uint8_t(8 + rs),
-        .offset = imm,
-    };
-    insn_consumer_->Load(args);
-  }
-
   void DecodeCAddi() {
     uint8_t low_imm = GetBits<uint8_t, 2, 5>();
     uint8_t high_imm = GetBits<uint8_t, 12, 1>();
diff --git a/guest_state/include/berberis/guest_state/guest_state_riscv64.h b/guest_state/include/berberis/guest_state/guest_state_riscv64.h
index 8546311f..82aad665 100644
--- a/guest_state/include/berberis/guest_state/guest_state_riscv64.h
+++ b/guest_state/include/berberis/guest_state/guest_state_riscv64.h
@@ -19,6 +19,7 @@
 
 #include <cstdint>
 
+#include "berberis/base/dependent_false.h"
 #include "berberis/base/macros.h"
 #include "berberis/guest_state/guest_addr.h"
 
@@ -74,6 +75,33 @@ inline void SetFReg(CPUState& state, uint64_t val) {
   state.f[kIndex] = val;
 }
 
+enum class RegisterType {
+  kReg,
+  kFpReg,
+};
+
+template <RegisterType register_type, uint8_t kIndex>
+inline auto GetReg(const CPUState& state) {
+  if constexpr (register_type == RegisterType::kReg) {
+    return GetXReg<kIndex>(state);
+  } else if constexpr (register_type == RegisterType::kFpReg) {
+    return GetFReg<kIndex>(state);
+  } else {
+    static_assert(kDependentValueFalse<register_type>, "Unsupported register type");
+  }
+}
+
+template <RegisterType register_type, uint8_t kIndex, typename Register>
+inline auto SetReg(CPUState& state, Register val) {
+  if constexpr (register_type == RegisterType::kReg) {
+    return SetXReg<kIndex>(state, val);
+  } else if constexpr (register_type == RegisterType::kFpReg) {
+    return SetFReg<kIndex>(state, val);
+  } else {
+    static_assert(kDependentValueFalse<register_type>, "Unsupported register type");
+  }
+}
+
 struct ThreadState {
   CPUState cpu;
 };
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index 742cf028..3582e588 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -35,48 +35,24 @@ namespace {
 
 class Riscv64InterpreterTest : public ::testing::Test {
  public:
-  void InterpretCSd(uint16_t insn_bytes, uint64_t offset) {
+  template <RegisterType register_type, uint64_t expected_result>
+  void InterpretCompressedStore(uint16_t insn_bytes, uint64_t offset) {
     auto code_start = ToGuestAddr(&insn_bytes);
     state_.cpu.insn_addr = code_start;
     store_area_ = 0;
     SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - offset));
-    SetXReg<9>(state_.cpu, kDataToLoad);
+    SetReg<register_type, 9>(state_.cpu, kDataToLoad);
     InterpretInsn(&state_);
-    EXPECT_EQ(store_area_, kDataToLoad);
-  }
-
-  void InterpretCFsd(uint16_t insn_bytes, uint64_t offset) {
-    auto code_start = ToGuestAddr(&insn_bytes);
-    state_.cpu.insn_addr = code_start;
-    store_area_ = 0;
-    SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - offset));
-    SetFReg<9>(state_.cpu, kDataToLoad);
-    InterpretInsn(&state_);
-    EXPECT_EQ(store_area_, kDataToLoad);
-  }
-
-  void InterpretCFld(uint16_t insn_bytes, uint64_t offset) {
-    auto code_start = ToGuestAddr(&insn_bytes);
-    state_.cpu.insn_addr = code_start;
-    SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset));
-    InterpretInsn(&state_);
-    EXPECT_EQ(GetFReg<9>(state_.cpu), kDataToLoad);
-  }
-
-  void InterpretCLd(uint16_t insn_bytes, uint64_t offset) {
-    auto code_start = ToGuestAddr(&insn_bytes);
-    state_.cpu.insn_addr = code_start;
-    SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset));
-    InterpretInsn(&state_);
-    EXPECT_EQ(GetXReg<9>(state_.cpu), kDataToLoad);
+    EXPECT_EQ(store_area_, expected_result);
   }
 
-  void InterpretCLw(uint16_t insn_bytes, uint64_t offset) {
+  template <RegisterType register_type, uint64_t expected_result>
+  void InterpretCompressedLoad(uint16_t insn_bytes, uint64_t offset) {
     auto code_start = ToGuestAddr(&insn_bytes);
     state_.cpu.insn_addr = code_start;
     SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset));
     InterpretInsn(&state_);
-    EXPECT_EQ(GetXReg<8>(state_.cpu), uint64_t(int32_t(kDataToLoad)));
+    EXPECT_EQ((GetReg<register_type, 9>(state_.cpu)), expected_result);
   }
 
   void InterpretCAddi4spn(uint16_t insn_bytes, uint64_t expected_offset) {
@@ -268,14 +244,16 @@ TEST_F(Riscv64InterpreterTest, CLw) {
       } __attribute__((__packed__));
     } o_bits = {
         .low_opcode = 0b00,
-        .rd = 0,
+        .rd = 1,
         .i6 = i_bits.i6,
         .i2 = i_bits.i2,
         .rs = 0,
         .i3_i5 = i_bits.i3_i5,
         .high_opcode = 0b010,
     };
-    InterpretCLw(o_bits.parcel, offset);
+    InterpretCompressedLoad<RegisterType::kReg,
+                            static_cast<uint64_t>(static_cast<int32_t>(kDataToLoad))>(o_bits.parcel,
+                                                                                      offset);
   }
 }
 
@@ -314,13 +292,21 @@ void TestCompressedLoadOrStore(Riscv64InterpreterTest* that) {
 
 TEST_F(Riscv64InterpreterTest, CompressedLoadAndStores) {
   // c.Fld
-  TestCompressedLoadOrStore<0b001'000'000'00'000'00, &Riscv64InterpreterTest::InterpretCFld>(this);
+  TestCompressedLoadOrStore<
+      0b001'000'000'00'000'00,
+      &Riscv64InterpreterTest::InterpretCompressedLoad<RegisterType::kFpReg, kDataToLoad>>(this);
   // c.Ld
-  TestCompressedLoadOrStore<0b011'000'000'00'000'00, &Riscv64InterpreterTest::InterpretCLd>(this);
+  TestCompressedLoadOrStore<
+      0b011'000'000'00'000'00,
+      &Riscv64InterpreterTest::InterpretCompressedLoad<RegisterType::kReg, kDataToLoad>>(this);
   // c.Fsd
-  TestCompressedLoadOrStore<0b101'000'000'00'000'00, &Riscv64InterpreterTest::InterpretCFsd>(this);
+  TestCompressedLoadOrStore<
+      0b101'000'000'00'000'00,
+      &Riscv64InterpreterTest::InterpretCompressedStore<RegisterType::kFpReg, kDataToLoad>>(this);
   // c.Sd
-  TestCompressedLoadOrStore<0b111'000'000'00'000'00, &Riscv64InterpreterTest::InterpretCSd>(this);
+  TestCompressedLoadOrStore<
+      0b111'000'000'00'000'00,
+      &Riscv64InterpreterTest::InterpretCompressedStore<RegisterType::kReg, kDataToLoad>>(this);
 }
 
 TEST_F(Riscv64InterpreterTest, CAddi) {
-- 
cgit v1.2.3


From c1a367e7cf18df4f18a91246e25b439ec1457a44 Mon Sep 17 00:00:00 2001
From: Aman <amankumar2198@gmail.com>
Date: Tue, 18 Apr 2023 19:35:45 +0530
Subject: interp: added FAdd instruction.

Bug: 265372622

Test: berberis_host_tests/berberis_host_tests

Change-Id: I546086534d44fc09892d752dd1ed52008822d418
---
 decoder/include/berberis/decoder/riscv64/decoder.h |  45 +++++++++
 .../berberis/decoder/riscv64/semantics_player.h    |   7 ++
 interpreter/riscv64/fp_regs.h                      |  64 +++++++++++++
 interpreter/riscv64/interpreter.cc                 |  37 +++++++-
 interpreter/riscv64/interpreter_test.cc            |  22 +++++
 .../include/berberis/intrinsics/intrinsics_float.h |  14 +--
 .../riscv64_to_x86_64/intrinsics_float.h           | 101 +++++++++++++++++++++
 .../include/berberis/intrinsics/type_traits.h      |   6 ++
 8 files changed, 284 insertions(+), 12 deletions(-)
 create mode 100644 interpreter/riscv64/fp_regs.h
 create mode 100644 intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h

diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h
index e91ee18f..1fda1316 100644
--- a/decoder/include/berberis/decoder/riscv64/decoder.h
+++ b/decoder/include/berberis/decoder/riscv64/decoder.h
@@ -194,6 +194,17 @@ class Decoder {
     kMaxAmoOpcode = 0b11111'111,
   };
 
+  enum class OpFpOpcode {
+    // Bit #2 = 1 means rm is an opcode extension.
+    // Bit #3 = 1 means rs2 is an opcode extension
+    // Bits #4, #1, and #0 - actual opcode.
+    kFAdd = 0b0'0'0'00,
+    kFSub = 0b0'0'0'01,
+    kFMul = 0b0'0'0'10,
+    kFDiv = 0b0'0'0'11,
+    kMaxOpFpOpcode = 0b1'1'1'11,
+  };
+
   enum class LoadOpcode {
     kLb = 0b000,
     kLh = 0b001,
@@ -277,6 +288,14 @@ class Decoder {
     kMaxCsrRegister = 0b11'11'1111'1111,
   };
 
+  enum class FloatSize {
+    kFloat = 0b00,
+    kDouble = 0b01,
+    kHalf = 0b10,
+    kQuad = 0b11,
+    kMaxFloatSize = 0b11,
+  };
+
   struct AmoArgs {
     AmoOpcode opcode;
     uint8_t dst;
@@ -379,6 +398,15 @@ class Decoder {
   using StoreArgs = StoreArgsTemplate<StoreOpcode>;
   using StoreFpArgs = StoreArgsTemplate<StoreFpOpcode>;
 
+  struct OpFpArgs {
+    OpFpOpcode opcode;
+    FloatSize float_size;
+    uint8_t dst;
+    uint8_t src1;
+    uint8_t src2;
+    uint8_t rm;
+  };
+
   struct BranchArgs {
     BranchOpcode opcode;
     uint8_t src1;
@@ -564,6 +592,9 @@ class Decoder {
       case BaseOpcode::kOpImm32:
         DecodeOp<OpImm32Opcode, ShiftImm32Opcode, 5>();
         break;
+      case BaseOpcode::kOpFp:
+        DecodeOpFp();
+        break;
       case BaseOpcode::kStore:
         DecodeStore<StoreOpcode>();
         break;
@@ -808,6 +839,20 @@ class Decoder {
     insn_consumer_->JumpAndLink(args);
   }
 
+  void DecodeOpFp() {
+    uint8_t float_size = GetBits<uint8_t, 25, 2>();
+    uint8_t opcode_bits = GetBits<uint8_t, 27, 5>();
+    const OpFpArgs args = {
+        .opcode = OpFpOpcode(opcode_bits),
+        .float_size = FloatSize(float_size),
+        .dst = GetBits<uint8_t, 7, 5>(),
+        .src1 = GetBits<uint8_t, 15, 5>(),
+        .src2 = GetBits<uint8_t, 20, 5>(),
+        .rm = GetBits<uint8_t, 12, 3>(),
+    };
+    insn_consumer_->OpFp(args);
+  }
+
   void DecodeSystem() {
     uint8_t low_opcode = GetBits<uint8_t, 12, 2>();
     if (low_opcode == 0b00) {
diff --git a/decoder/include/berberis/decoder/riscv64/semantics_player.h b/decoder/include/berberis/decoder/riscv64/semantics_player.h
index 40b559d3..e320b7ce 100644
--- a/decoder/include/berberis/decoder/riscv64/semantics_player.h
+++ b/decoder/include/berberis/decoder/riscv64/semantics_player.h
@@ -135,6 +135,13 @@ class SemanticsPlayer {
     SetRegOrIgnore(args.dst, result);
   };
 
+  void OpFp(const typename Decoder::OpFpArgs& args) {
+    FpRegister arg1 = GetFpReg(args.src1);
+    FpRegister arg2 = GetFpReg(args.src2);
+    FpRegister result = listener_->OpFp(args.opcode, args.float_size, args.rm, arg1, arg2);
+    SetFpReg(args.dst, result);
+  }
+
   void Store(const typename Decoder::StoreArgs& args) {
     Register arg = GetRegOrZero(args.src);
     Register data = GetRegOrZero(args.data);
diff --git a/interpreter/riscv64/fp_regs.h b/interpreter/riscv64/fp_regs.h
new file mode 100644
index 00000000..dd49f19d
--- /dev/null
+++ b/interpreter/riscv64/fp_regs.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_FP_REGS_H_
+#define BERBERIS_FP_REGS_H_
+
+#include <cstring>
+
+#include "berberis/base/bit_util.h"
+#include "berberis/intrinsics/intrinsics_float.h"
+
+namespace berberis {
+
+template <typename FloatType>
+inline FloatType NanUnboxFPRegToFloat(uint64_t arg);
+
+template <>
+inline intrinsics::Float32 NanUnboxFPRegToFloat(uint64_t arg) {
+  // Apart from transfer operations (e.g. loads and stores), all other floating-point operations on
+  // narrower n-bit operations, n < FLEN, check if the input operands are correctly NaN-boxed, i.e.,
+  // all upper FLEN−n bits are 1. If so, the n least-significant bits of the input are used as the
+  // input value, otherwise the input value is treated as an n-bit canonical NaN.
+  if ((arg & 0xffff'ffff'0000'0000) != 0xffff'ffff'0000'0000) {
+    return bit_cast<intrinsics::Float32>(0x7fc00000);
+  }
+  intrinsics::Float32 result;
+  memcpy(&result, &arg, sizeof(intrinsics::Float32));
+  return result;
+}
+
+template <>
+inline intrinsics::Float64 NanUnboxFPRegToFloat(uint64_t arg) {
+  return bit_cast<intrinsics::Float64>(arg);
+}
+
+template <typename FloatType>
+inline uint64_t NanBoxFloatToFPReg(FloatType arg);
+
+template <>
+inline uint64_t NanBoxFloatToFPReg(intrinsics::Float32 arg) {
+  return bit_cast<uint32_t>(arg) | 0xffff'ffff'0000'0000;
+}
+
+template <>
+inline uint64_t NanBoxFloatToFPReg(intrinsics::Float64 arg) {
+  return bit_cast<uint64_t>(arg);
+}
+
+}  // namespace berberis
+
+#endif  // BERBERIS_FP_REGS_H_
diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc
index f4e82fea..a8fcbf61 100644
--- a/interpreter/riscv64/interpreter.cc
+++ b/interpreter/riscv64/interpreter.cc
@@ -28,10 +28,11 @@
 #include "berberis/decoder/riscv64/semantics_player.h"
 #include "berberis/guest_state/guest_addr.h"
 #include "berberis/guest_state/guest_state_riscv64.h"
-#include "berberis/intrinsics/riscv64/guest_fpstate.h"
+#include "berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h"
 #include "berberis/kernel_api/run_guest_syscall.h"
 
 #include "atomics.h"
+#include "fp_regs.h"
 
 namespace berberis {
 
@@ -42,6 +43,8 @@ class Interpreter {
   using Decoder = Decoder<SemanticsPlayer<Interpreter>>;
   using Register = uint64_t;
   using FpRegister = uint64_t;
+  using Float32 = intrinsics::Float32;
+  using Float64 = intrinsics::Float64;
 
   explicit Interpreter(ThreadState* state) : state_(state), branch_taken_(false) {}
 
@@ -352,6 +355,38 @@ class Interpreter {
     return RunGuestSyscall(syscall_nr, arg0, arg1, arg2, arg3, arg4, arg5);
   }
 
+  FpRegister OpFp(Decoder::OpFpOpcode opcode,
+                  Decoder::FloatSize float_size,
+                  uint8_t rm,
+                  FpRegister arg1,
+                  FpRegister arg2) {
+    switch (float_size) {
+      case Decoder::FloatSize::kFloat:
+        return NanBoxFloatToFPReg(OpFp<Float32>(
+            opcode, rm, NanUnboxFPRegToFloat<Float32>(arg1), NanUnboxFPRegToFloat<Float32>(arg2)));
+      case Decoder::FloatSize::kDouble:
+        return NanBoxFloatToFPReg(OpFp<Float64>(
+            opcode, rm, NanUnboxFPRegToFloat<Float64>(arg1), NanUnboxFPRegToFloat<Float64>(arg2)));
+      default:
+        Unimplemented();
+        return {};
+    }
+  }
+
+  // TODO(b/278812060): switch to intrinsics when they would become available and stop using
+  // ExecuteFloatOperation directly.
+  template <typename FloatType>
+  FloatType OpFp(Decoder::OpFpOpcode opcode, uint8_t rm, FloatType arg1, FloatType arg2) {
+    switch (opcode) {
+      case Decoder::OpFpOpcode::kFAdd:
+        return intrinsics::ExecuteFloatOperation<FloatType>(
+            rm, state_->cpu.frm, [](auto x, auto y) { return x + y; }, arg1, arg2);
+      default:
+        Unimplemented();
+        return {};
+    }
+  }
+
   Register ShiftImm(Decoder::ShiftImmOpcode opcode, Register arg, uint16_t imm) {
     switch (opcode) {
       case Decoder::ShiftImmOpcode::kSlli:
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index c3d2b5f1..0d5c344a 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -102,6 +102,17 @@ class Riscv64InterpreterTest : public ::testing::Test {
     }
   }
 
+  void InterpretOpFp(uint32_t insn_bytes,
+                     std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) {
+    for (auto [arg1, arg2, expected_result] : args) {
+      state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
+      SetFReg<2>(state_.cpu, arg1);
+      SetFReg<3>(state_.cpu, arg2);
+      InterpretInsn(&state_);
+      EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result);
+    }
+  }
+
   void InterpretFence(uint32_t insn_bytes) {
     state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
     InterpretInsn(&state_);
@@ -609,6 +620,17 @@ TEST_F(Riscv64InterpreterTest, OpImm32Instructions) {
   InterpretOpImm(0x4001509b, {{0x0000'0000'f000'0000ULL, 12, 0xffff'ffff'ffff'0000ULL}});
 }
 
+TEST_F(Riscv64InterpreterTest, OpFpInstructions) {
+  // FAdd.S
+  InterpretOpFp(0x003100d3,
+                {{bit_cast<uint32_t>(1.0f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(2.0f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(3.0f) | 0xffff'ffff'0000'0000}});
+  // FAdd.D
+  InterpretOpFp(0x023100d3,
+                {{bit_cast<uint64_t>(1.0), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0)}});
+}
+
 TEST_F(Riscv64InterpreterTest, LoadInstructions) {
   // Offset is always 8.
   // Lbu
diff --git a/intrinsics/include/berberis/intrinsics/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/intrinsics_float.h
index 30f83e55..694afba9 100644
--- a/intrinsics/include/berberis/intrinsics/intrinsics_float.h
+++ b/intrinsics/include/berberis/intrinsics/intrinsics_float.h
@@ -64,23 +64,15 @@ class WrappedFloatType {
   explicit constexpr operator uint32_t() const { return value_; }
   explicit constexpr operator int64_t() const { return value_; }
   explicit constexpr operator uint64_t() const { return value_; }
-
-  auto BitCastToIntOfSameSize() {
-    if constexpr (std::is_same_v<BaseType, float>) {
-      return bit_cast<int32_t>(value_);
-    } else {
-      static_assert(std::is_same_v<BaseType, double>, "Only float and double BaseType supported.");
-      return bit_cast<int64_t>(value_);
-    }
-  }
-
-  // Only valid for BaseType==double. Returns the bit representation of the fp value.
   explicit constexpr operator WrappedFloatType<float>() const {
     return WrappedFloatType<float>(value_);
   }
   explicit constexpr operator WrappedFloatType<double>() const {
     return WrappedFloatType<double>(value_);
   }
+#if defined(__i386__) || defined(__x86_64__)
+  explicit constexpr operator long double() const { return value_; }
+#endif
   // Note: we don't provide unary operator-.  That's done on purpose: with floats -x and 0.-x
   // produce different results which could be surprising.  Use fneg instead of unary operator-.
   friend WrappedFloatType operator+(const WrappedFloatType& v1, const WrappedFloatType& v2);
diff --git a/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h
new file mode 100644
index 00000000..db8ff249
--- /dev/null
+++ b/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
+#define BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
+
+#include <limits>
+
+#include "berberis/base/bit_util.h"
+#include "berberis/intrinsics/intrinsics_float.h"
+#include "berberis/intrinsics/riscv64/guest_fpstate.h"  // ScopedRoundingMode
+#include "berberis/intrinsics/type_traits.h"
+
+namespace berberis::intrinsics {
+
+// x86 architecture doesn't support RMM (aka FE_TIESAWAY), but it can be easily emulated since it
+// have support for 80bit floats: if calculations are done with one bit (or more) of extra precision
+// in the FE_TOWARDZERO mode then we can easily adjust fraction part and would only need to remember
+// this addition may overflow.
+template <typename FloatType, typename OperationType, typename... Args>
+inline auto ExecuteFloatOperationRmm(OperationType operation, Args... args)
+    -> std::enable_if_t<(std::is_same_v<Args, FloatType> && ...), FloatType> {
+  using Wide = typename TypeTraits<FloatType>::Wide;
+  Wide wide_result = operation(Wide(args)...);
+  if constexpr (std::is_same_v<FloatType, Float32>) {
+    // In the 32bit->64bit case everything happens almost automatically, we just need to clear low
+    // bits to ensure that we are getting ±∞ and not NaN.
+    auto int_result = bit_cast<std::make_unsigned_t<typename TypeTraits<Wide>::Int>>(wide_result);
+    if ((int_result & 0x7ff0'0000'0000'0000) == 0x7ff0'0000'0000'0000) {
+      return FloatType(wide_result);
+    }
+    int_result += 0x0000'0000'1000'0000;
+    int_result &= 0xffff'ffff'e000'0000;
+    wide_result = bit_cast<Wide>(int_result);
+  } else if constexpr (std::is_same_v<FloatType, Float64>) {
+    // In 64bit->80bit case we need to adjust significand bits to ensure we are creating ±∞ and not
+    // pseudo-infinity (supported on 8087/80287, but not on modern CPUs).
+    struct {
+      uint64_t significand;
+      uint16_t exponent;
+      uint8_t padding[sizeof(Wide) - sizeof(uint64_t) - sizeof(uint16_t)];
+    } fp80_parts;
+    static_assert(sizeof fp80_parts == sizeof(Wide));
+    memcpy(&fp80_parts, &wide_result, sizeof(wide_result));
+    // Don't try to round ±∞, NaNs and ±0 (denormals are not supported by RISC-V).
+    if ((fp80_parts.exponent & 0x7fff) == 0x7fff ||
+        (fp80_parts.significand & 0x8000'0000'0000'0000) == 0) {
+      return FloatType(wide_result);
+    }
+    fp80_parts.significand += 0x0000'0000'0000'0400;
+    fp80_parts.significand &= 0xffff'ffff'ffff'f800;
+    if (fp80_parts.significand == 0) {
+      fp80_parts.exponent++;
+      fp80_parts.significand = 0x8000'0000'0000'0000;
+    }
+    memcpy(&wide_result, &fp80_parts, sizeof(wide_result));
+  }
+  return FloatType(wide_result);
+}
+
+// Note: first round of rm/frm verification must happen before that function because RISC-V
+// postulates that invalid rm or frm should trigger illegal instruction exception.
+// Here we can assume both rm and frm fields are valid.
+template <typename FloatType, typename OperationType, typename... Args>
+inline auto ExecuteFloatOperation(uint8_t requested_rm,
+                                  uint8_t current_rm,
+                                  OperationType operation,
+                                  Args... args)
+    -> std::enable_if_t<(std::is_same_v<Args, FloatType> && ...), FloatType> {
+  int host_requested_rm = ToHostRoundingMode(requested_rm);
+  int host_current_rm = ToHostRoundingMode(current_rm);
+  if (requested_rm == FPFlags::DYN || host_requested_rm == host_current_rm) {
+    uint8_t rm = requested_rm == FPFlags::DYN ? current_rm : requested_rm;
+    if (rm == FPFlags::RMM) {
+      return ExecuteFloatOperationRmm<FloatType>(operation, args...);
+    }
+    return operation(args...);
+  }
+  ScopedRoundingMode scoped_rounding_mode{host_requested_rm};
+  if (requested_rm == FPFlags::RMM) {
+    return ExecuteFloatOperationRmm<FloatType>(operation, args...);
+  }
+  return operation(args...);
+}
+
+}  // namespace berberis::intrinsics
+
+#endif  // BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
diff --git a/intrinsics/include/berberis/intrinsics/type_traits.h b/intrinsics/include/berberis/intrinsics/type_traits.h
index f1f6f75d..2785b92d 100644
--- a/intrinsics/include/berberis/intrinsics/type_traits.h
+++ b/intrinsics/include/berberis/intrinsics/type_traits.h
@@ -93,11 +93,17 @@ struct TypeTraits<int64_t> {
 template <>
 struct TypeTraits<intrinsics::Float32> {
   using Int = int32_t;
+  using Wide = intrinsics::Float64;
 };
 
 template <>
 struct TypeTraits<intrinsics::Float64> {
   using Int = int64_t;
+  using Narrow = intrinsics::Float32;
+#if defined(__i386__) || defined(__x86_64__)
+  static_assert(sizeof(long double) > sizeof(intrinsics::Float64));
+  using Wide = long double;
+#endif
 };
 
 #if defined(__x86_64__)
-- 
cgit v1.2.3


From 77233039396513a90f828585d7ab8c7457216abf Mon Sep 17 00:00:00 2001
From: Aman <amankumar2198@gmail.com>
Date: Wed, 19 Apr 2023 18:10:30 +0530
Subject: interp: Tests for Rounding modes.

Bug: 265372622

Test: berberis_host_tests/berberis_host_tests

Change-Id: Id66918d24e1c6e4ebf4ae5efff4b157a45324867
---
 interpreter/riscv64/interpreter_test.cc | 214 ++++++++++++++++++++++++++++++++
 1 file changed, 214 insertions(+)

diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index 0d5c344a..cb5b4083 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -631,6 +631,220 @@ TEST_F(Riscv64InterpreterTest, OpFpInstructions) {
                 {{bit_cast<uint64_t>(1.0), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0)}});
 }
 
+TEST_F(Riscv64InterpreterTest, RoundingModeTest) {
+  // FAdd.S
+  InterpretOpFp(0x003100d3,
+                // Test RNE
+                {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000005f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000005f) | 0xffff'ffff'0000'0000}});
+  // FAdd.S
+  InterpretOpFp(0x003110d3,
+                // Test RTZ
+                {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000}});
+  // FAdd.S
+  InterpretOpFp(0x003120d3,
+                // Test RDN
+                {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000005f) | 0xffff'ffff'0000'0000}});
+  // FAdd.S
+  InterpretOpFp(0x003130d3,
+                // Test RUP
+                {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000005f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000}});
+  // FAdd.S
+  InterpretOpFp(0x003140d3,
+                // Test RMM
+                {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(1.0000005f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000},
+                 {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+                  bit_cast<uint32_t>(-1.0000005f) | 0xffff'ffff'0000'0000}});
+
+  // FAdd.D
+  InterpretOpFp(0x023100d3,
+                // Test RNE
+                {{bit_cast<uint64_t>(1.0000000000000002),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000004)},
+                 {bit_cast<uint64_t>(1.0000000000000004),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000004)},
+                 {bit_cast<uint64_t>(1.0000000000000007),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000009)},
+                 {bit_cast<uint64_t>(-1.0000000000000002),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000004)},
+                 {bit_cast<uint64_t>(-1.0000000000000004),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000004)},
+                 {bit_cast<uint64_t>(-1.0000000000000007),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000009)}});
+  // FAdd.D
+  InterpretOpFp(0x023110d3,
+                // Test RTZ
+                {{bit_cast<uint64_t>(1.0000000000000002),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000002)},
+                 {bit_cast<uint64_t>(1.0000000000000004),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000004)},
+                 {bit_cast<uint64_t>(1.0000000000000007),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000007)},
+                 {bit_cast<uint64_t>(-1.0000000000000002),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000002)},
+                 {bit_cast<uint64_t>(-1.0000000000000004),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000004)},
+                 {bit_cast<uint64_t>(-1.0000000000000007),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000007)}});
+  // FAdd.D
+  InterpretOpFp(0x023120d3,
+                // Test RDN
+                {{bit_cast<uint64_t>(1.0000000000000002),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000002)},
+                 {bit_cast<uint64_t>(1.0000000000000004),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000004)},
+                 {bit_cast<uint64_t>(1.0000000000000007),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000007)},
+                 {bit_cast<uint64_t>(-1.0000000000000002),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000004)},
+                 {bit_cast<uint64_t>(-1.0000000000000004),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000007)},
+                 {bit_cast<uint64_t>(-1.0000000000000007),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000009)}});
+  // FAdd.D
+  InterpretOpFp(0x023130d3,
+                // Test RUP
+                {{bit_cast<uint64_t>(1.0000000000000002),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000004)},
+                 {bit_cast<uint64_t>(1.0000000000000004),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000007)},
+                 {bit_cast<uint64_t>(1.0000000000000007),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000009)},
+                 {bit_cast<uint64_t>(-1.0000000000000002),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000002)},
+                 {bit_cast<uint64_t>(-1.0000000000000004),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000004)},
+                 {bit_cast<uint64_t>(-1.0000000000000007),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000007)}});
+  // FAdd.D
+  InterpretOpFp(0x023140d3,
+                // Test RMM
+                {{bit_cast<uint64_t>(1.0000000000000002),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000004)},
+                 {bit_cast<uint64_t>(1.0000000000000004),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000007)},
+                 {bit_cast<uint64_t>(1.0000000000000007),
+                  bit_cast<uint64_t>(0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(1.0000000000000009)},
+                 {bit_cast<uint64_t>(-1.0000000000000002),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000004)},
+                 {bit_cast<uint64_t>(-1.0000000000000004),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000007)},
+                 {bit_cast<uint64_t>(-1.0000000000000007),
+                  bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+                  bit_cast<uint64_t>(-1.0000000000000009)}});
+}
+
 TEST_F(Riscv64InterpreterTest, LoadInstructions) {
   // Offset is always 8.
   // Lbu
-- 
cgit v1.2.3