diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-04-22 01:27:17 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-04-22 01:27:17 +0000 |
commit | 65790b034e7d97022c7c8ea9a6e930333d3c1fd9 (patch) | |
tree | a076320347f30694dd7f815ea3c7ef0519645811 | |
parent | 722006fdeb16a49976325a0d74ab662992adba1a (diff) | |
parent | 1bf3e03bcb66e3a87a0bc3c69affd8ee56a45a00 (diff) | |
download | binary_translation-65790b034e7d97022c7c8ea9a6e930333d3c1fd9.tar.gz |
Snap for 9990577 from 1bf3e03bcb66e3a87a0bc3c69affd8ee56a45a00 to udc-releaseandroid-vts-14.0_r4android-vts-14.0_r3android-vts-14.0_r2android-vts-14.0_r1android-security-14.0.0_r9android-security-14.0.0_r8android-security-14.0.0_r7android-security-14.0.0_r6android-security-14.0.0_r5android-security-14.0.0_r4android-security-14.0.0_r3android-security-14.0.0_r2android-security-14.0.0_r1android-platform-14.0.0_r8android-platform-14.0.0_r7android-platform-14.0.0_r6android-platform-14.0.0_r5android-platform-14.0.0_r4android-platform-14.0.0_r3android-platform-14.0.0_r2android-platform-14.0.0_r1android-cts-14.0_r4android-cts-14.0_r3android-cts-14.0_r2android-cts-14.0_r1android-14.0.0_r28android-14.0.0_r2android-14.0.0_r15android-14.0.0_r14android-14.0.0_r13android-14.0.0_r1android14-tests-releaseandroid14-security-releaseandroid14-s2-releaseandroid14-s1-releaseandroid14-releaseandroid14-platform-release
Change-Id: I28d0860d6f3122d1425a55e2224d9e7cbc5dd0e7
-rw-r--r-- | base/include/berberis/base/dependent_false.h | 32 | ||||
-rw-r--r-- | decoder/include/berberis/decoder/riscv64/decoder.h | 124 | ||||
-rw-r--r-- | decoder/include/berberis/decoder/riscv64/semantics_player.h | 7 | ||||
-rw-r--r-- | guest_state/include/berberis/guest_state/guest_state_riscv64.h | 28 | ||||
-rw-r--r-- | interpreter/riscv64/fp_regs.h | 64 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter.cc | 37 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter_test.cc | 328 | ||||
-rw-r--r-- | intrinsics/include/berberis/intrinsics/intrinsics_float.h | 14 | ||||
-rw-r--r-- | intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h | 101 | ||||
-rw-r--r-- | intrinsics/include/berberis/intrinsics/type_traits.h | 6 |
10 files changed, 633 insertions, 108 deletions
diff --git a/base/include/berberis/base/dependent_false.h b/base/include/berberis/base/dependent_false.h new file mode 100644 index 00000000..f01a48e0 --- /dev/null +++ b/base/include/berberis/base/dependent_false.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BERBERIS_BASE_DEPENDENT_FALSE_H_ +#define BERBERIS_BASE_DEPENDENT_FALSE_H_ + +#include <type_traits> + +namespace berberis { + +template <typename T> +inline constexpr bool kDependentTypeFalse = false; + +template <auto T> +inline constexpr bool kDependentValueFalse = false; + +} // namespace berberis + +#endif // BERBERIS_BASE_DEPENDENT_FALSE_H_ diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h index dc8303d1..9f785732 100644 --- a/decoder/include/berberis/decoder/riscv64/decoder.h +++ b/decoder/include/berberis/decoder/riscv64/decoder.h @@ -194,6 +194,17 @@ class Decoder { kMaxAmoOpcode = 0b11111'111, }; + enum class OpFpOpcode { + // Bit #2 = 1 means rm is an opcode extension. + // Bit #3 = 1 means rs2 is an opcode extension + // Bits #4, #1, and #0 - actual opcode. + kFAdd = 0b0'0'0'00, + kFSub = 0b0'0'0'01, + kFMul = 0b0'0'0'10, + kFDiv = 0b0'0'0'11, + kMaxOpFpOpcode = 0b1'1'1'11, + }; + enum class LoadOpcode { kLb = 0b000, kLh = 0b001, @@ -277,6 +288,14 @@ class Decoder { kMaxCsrRegister = 0b11'11'1111'1111, }; + enum class FloatSize { + kFloat = 0b00, + kDouble = 0b01, + kHalf = 0b10, + kQuad = 0b11, + kMaxFloatSize = 0b11, + }; + struct AmoArgs { AmoOpcode opcode; uint8_t dst; @@ -379,6 +398,15 @@ class Decoder { using StoreArgs = StoreArgsTemplate<StoreOpcode>; using StoreFpArgs = StoreArgsTemplate<StoreFpOpcode>; + struct OpFpArgs { + OpFpOpcode opcode; + FloatSize float_size; + uint8_t dst; + uint8_t src1; + uint8_t src2; + uint8_t rm; + }; + struct BranchArgs { BranchOpcode opcode; uint8_t src1; @@ -430,13 +458,19 @@ class Decoder { DecodeCAddi(); break; case CompressedOpcode::kFld: - DecodeCFld(); + DecodeCompressedLoadStore<LoadFpOpcode::kFld>(); break; case CompressedOpcode::kLw: - DecodeCLw(); + DecodeCompressedLoadStore<LoadOpcode::kLw>(); break; case CompressedOpcode::kLd: - DecodeCLd(); + DecodeCompressedLoadStore<LoadOpcode::kLd>(); + break; + case CompressedOpcode::kFsd: + DecodeCompressedLoadStore<StoreFpOpcode::kFsd>(); + break; + case CompressedOpcode::kSd: + DecodeCompressedLoadStore<StoreOpcode::kSd>(); break; default: insn_consumer_->Unimplemented(); @@ -444,50 +478,37 @@ class Decoder { return 2; } - void DecodeCLd() { + template <auto opcode> + void DecodeCompressedLoadStore() { uint8_t low_imm = GetBits<uint8_t, 5, 2>(); uint8_t high_imm = GetBits<uint8_t, 10, 3>(); - uint8_t imm = (low_imm << 6 | high_imm << 3); - uint8_t rd = GetBits<uint8_t, 2, 3>(); - uint8_t rs = GetBits<uint8_t, 7, 3>(); - const LoadArgs args = { - .opcode = LoadOpcode::kLd, - .dst = uint8_t(8 + rd), - .src = uint8_t(8 + rs), - .offset = imm, - }; - insn_consumer_->Load(args); - } - - void DecodeCLw() { - constexpr uint8_t kLwLow[4] = {0x0, 0x40, 0x04, 0x44}; - uint8_t low_imm = GetBits<uint8_t, 5, 2>(); - uint8_t high_imm = GetBits<uint8_t, 10, 3>(); - uint8_t imm = (kLwLow[low_imm] | high_imm << 3); - uint8_t rd = GetBits<uint8_t, 2, 3>(); - uint8_t rs = GetBits<uint8_t, 7, 3>(); - const LoadArgs args = { - .opcode = LoadOpcode::kLw, - .dst = uint8_t(8 + rd), - .src = uint8_t(8 + rs), - .offset = imm, - }; - insn_consumer_->Load(args); - } - - void DecodeCFld() { - uint8_t low_imm = GetBits<uint8_t, 5, 2>(); - uint8_t high_imm = GetBits<uint8_t, 10, 3>(); - uint8_t imm = (low_imm << 6 | high_imm << 3); + uint8_t imm; + if constexpr ((uint8_t(opcode) & 1) == 0) { + constexpr uint8_t kLwLow[4] = {0x0, 0x40, 0x04, 0x44}; + imm = (kLwLow[low_imm] | high_imm << 3); + } else { + imm = (low_imm << 6 | high_imm << 3); + } uint8_t rd = GetBits<uint8_t, 2, 3>(); uint8_t rs = GetBits<uint8_t, 7, 3>(); - const LoadFpArgs args = { - .opcode = LoadFpOpcode::kFld, - .dst = uint8_t(8 + rd), - .src = uint8_t(8 + rs), - .offset = imm, - }; - insn_consumer_->Load(args); + if constexpr (std::is_same_v<decltype(opcode), StoreOpcode> || + std::is_same_v<decltype(opcode), StoreFpOpcode>) { + const StoreArgsTemplate<decltype(opcode)> args = { + .opcode = opcode, + .src = uint8_t(8 + rs), + .offset = imm, + .data = uint8_t(8 + rd), + }; + insn_consumer_->Store(args); + } else { + const LoadArgsTemplate<decltype(opcode)> args = { + .opcode = opcode, + .dst = uint8_t(8 + rd), + .src = uint8_t(8 + rs), + .offset = imm, + }; + insn_consumer_->Load(args); + } } void DecodeCAddi() { @@ -578,6 +599,9 @@ class Decoder { case BaseOpcode::kOpImm32: DecodeOp<OpImm32Opcode, ShiftImm32Opcode, 5>(); break; + case BaseOpcode::kOpFp: + DecodeOpFp(); + break; case BaseOpcode::kStore: DecodeStore<StoreOpcode>(); break; @@ -822,6 +846,20 @@ class Decoder { insn_consumer_->JumpAndLink(args); } + void DecodeOpFp() { + uint8_t float_size = GetBits<uint8_t, 25, 2>(); + uint8_t opcode_bits = GetBits<uint8_t, 27, 5>(); + const OpFpArgs args = { + .opcode = OpFpOpcode(opcode_bits), + .float_size = FloatSize(float_size), + .dst = GetBits<uint8_t, 7, 5>(), + .src1 = GetBits<uint8_t, 15, 5>(), + .src2 = GetBits<uint8_t, 20, 5>(), + .rm = GetBits<uint8_t, 12, 3>(), + }; + insn_consumer_->OpFp(args); + } + void DecodeSystem() { uint8_t low_opcode = GetBits<uint8_t, 12, 2>(); if (low_opcode == 0b00) { diff --git a/decoder/include/berberis/decoder/riscv64/semantics_player.h b/decoder/include/berberis/decoder/riscv64/semantics_player.h index 40b559d3..e320b7ce 100644 --- a/decoder/include/berberis/decoder/riscv64/semantics_player.h +++ b/decoder/include/berberis/decoder/riscv64/semantics_player.h @@ -135,6 +135,13 @@ class SemanticsPlayer { SetRegOrIgnore(args.dst, result); }; + void OpFp(const typename Decoder::OpFpArgs& args) { + FpRegister arg1 = GetFpReg(args.src1); + FpRegister arg2 = GetFpReg(args.src2); + FpRegister result = listener_->OpFp(args.opcode, args.float_size, args.rm, arg1, arg2); + SetFpReg(args.dst, result); + } + void Store(const typename Decoder::StoreArgs& args) { Register arg = GetRegOrZero(args.src); Register data = GetRegOrZero(args.data); diff --git a/guest_state/include/berberis/guest_state/guest_state_riscv64.h b/guest_state/include/berberis/guest_state/guest_state_riscv64.h index 8546311f..82aad665 100644 --- a/guest_state/include/berberis/guest_state/guest_state_riscv64.h +++ b/guest_state/include/berberis/guest_state/guest_state_riscv64.h @@ -19,6 +19,7 @@ #include <cstdint> +#include "berberis/base/dependent_false.h" #include "berberis/base/macros.h" #include "berberis/guest_state/guest_addr.h" @@ -74,6 +75,33 @@ inline void SetFReg(CPUState& state, uint64_t val) { state.f[kIndex] = val; } +enum class RegisterType { + kReg, + kFpReg, +}; + +template <RegisterType register_type, uint8_t kIndex> +inline auto GetReg(const CPUState& state) { + if constexpr (register_type == RegisterType::kReg) { + return GetXReg<kIndex>(state); + } else if constexpr (register_type == RegisterType::kFpReg) { + return GetFReg<kIndex>(state); + } else { + static_assert(kDependentValueFalse<register_type>, "Unsupported register type"); + } +} + +template <RegisterType register_type, uint8_t kIndex, typename Register> +inline auto SetReg(CPUState& state, Register val) { + if constexpr (register_type == RegisterType::kReg) { + return SetXReg<kIndex>(state, val); + } else if constexpr (register_type == RegisterType::kFpReg) { + return SetFReg<kIndex>(state, val); + } else { + static_assert(kDependentValueFalse<register_type>, "Unsupported register type"); + } +} + struct ThreadState { CPUState cpu; }; diff --git a/interpreter/riscv64/fp_regs.h b/interpreter/riscv64/fp_regs.h new file mode 100644 index 00000000..dd49f19d --- /dev/null +++ b/interpreter/riscv64/fp_regs.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BERBERIS_FP_REGS_H_ +#define BERBERIS_FP_REGS_H_ + +#include <cstring> + +#include "berberis/base/bit_util.h" +#include "berberis/intrinsics/intrinsics_float.h" + +namespace berberis { + +template <typename FloatType> +inline FloatType NanUnboxFPRegToFloat(uint64_t arg); + +template <> +inline intrinsics::Float32 NanUnboxFPRegToFloat(uint64_t arg) { + // Apart from transfer operations (e.g. loads and stores), all other floating-point operations on + // narrower n-bit operations, n < FLEN, check if the input operands are correctly NaN-boxed, i.e., + // all upper FLEN−n bits are 1. If so, the n least-significant bits of the input are used as the + // input value, otherwise the input value is treated as an n-bit canonical NaN. + if ((arg & 0xffff'ffff'0000'0000) != 0xffff'ffff'0000'0000) { + return bit_cast<intrinsics::Float32>(0x7fc00000); + } + intrinsics::Float32 result; + memcpy(&result, &arg, sizeof(intrinsics::Float32)); + return result; +} + +template <> +inline intrinsics::Float64 NanUnboxFPRegToFloat(uint64_t arg) { + return bit_cast<intrinsics::Float64>(arg); +} + +template <typename FloatType> +inline uint64_t NanBoxFloatToFPReg(FloatType arg); + +template <> +inline uint64_t NanBoxFloatToFPReg(intrinsics::Float32 arg) { + return bit_cast<uint32_t>(arg) | 0xffff'ffff'0000'0000; +} + +template <> +inline uint64_t NanBoxFloatToFPReg(intrinsics::Float64 arg) { + return bit_cast<uint64_t>(arg); +} + +} // namespace berberis + +#endif // BERBERIS_FP_REGS_H_ diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc index f4e82fea..a8fcbf61 100644 --- a/interpreter/riscv64/interpreter.cc +++ b/interpreter/riscv64/interpreter.cc @@ -28,10 +28,11 @@ #include "berberis/decoder/riscv64/semantics_player.h" #include "berberis/guest_state/guest_addr.h" #include "berberis/guest_state/guest_state_riscv64.h" -#include "berberis/intrinsics/riscv64/guest_fpstate.h" +#include "berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h" #include "berberis/kernel_api/run_guest_syscall.h" #include "atomics.h" +#include "fp_regs.h" namespace berberis { @@ -42,6 +43,8 @@ class Interpreter { using Decoder = Decoder<SemanticsPlayer<Interpreter>>; using Register = uint64_t; using FpRegister = uint64_t; + using Float32 = intrinsics::Float32; + using Float64 = intrinsics::Float64; explicit Interpreter(ThreadState* state) : state_(state), branch_taken_(false) {} @@ -352,6 +355,38 @@ class Interpreter { return RunGuestSyscall(syscall_nr, arg0, arg1, arg2, arg3, arg4, arg5); } + FpRegister OpFp(Decoder::OpFpOpcode opcode, + Decoder::FloatSize float_size, + uint8_t rm, + FpRegister arg1, + FpRegister arg2) { + switch (float_size) { + case Decoder::FloatSize::kFloat: + return NanBoxFloatToFPReg(OpFp<Float32>( + opcode, rm, NanUnboxFPRegToFloat<Float32>(arg1), NanUnboxFPRegToFloat<Float32>(arg2))); + case Decoder::FloatSize::kDouble: + return NanBoxFloatToFPReg(OpFp<Float64>( + opcode, rm, NanUnboxFPRegToFloat<Float64>(arg1), NanUnboxFPRegToFloat<Float64>(arg2))); + default: + Unimplemented(); + return {}; + } + } + + // TODO(b/278812060): switch to intrinsics when they would become available and stop using + // ExecuteFloatOperation directly. + template <typename FloatType> + FloatType OpFp(Decoder::OpFpOpcode opcode, uint8_t rm, FloatType arg1, FloatType arg2) { + switch (opcode) { + case Decoder::OpFpOpcode::kFAdd: + return intrinsics::ExecuteFloatOperation<FloatType>( + rm, state_->cpu.frm, [](auto x, auto y) { return x + y; }, arg1, arg2); + default: + Unimplemented(); + return {}; + } + } + Register ShiftImm(Decoder::ShiftImmOpcode opcode, Register arg, uint16_t imm) { switch (opcode) { case Decoder::ShiftImmOpcode::kSlli: diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc index e35b58f3..b6edfc4f 100644 --- a/interpreter/riscv64/interpreter_test.cc +++ b/interpreter/riscv64/interpreter_test.cc @@ -35,28 +35,24 @@ namespace { class Riscv64InterpreterTest : public ::testing::Test { public: - void InterpretCLd(uint16_t insn_bytes, uint64_t offset) { + template <RegisterType register_type, uint64_t expected_result> + void InterpretCompressedStore(uint16_t insn_bytes, uint64_t offset) { auto code_start = ToGuestAddr(&insn_bytes); state_.cpu.insn_addr = code_start; - SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset)); - InterpretInsn(&state_); - EXPECT_EQ(GetXReg<8>(state_.cpu), kDataToLoad); - } - - void InterpretCLw(uint16_t insn_bytes, uint64_t offset) { - auto code_start = ToGuestAddr(&insn_bytes); - state_.cpu.insn_addr = code_start; - SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset)); + store_area_ = 0; + SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - offset)); + SetReg<register_type, 9>(state_.cpu, kDataToLoad); InterpretInsn(&state_); - EXPECT_EQ(GetXReg<8>(state_.cpu), uint64_t(int32_t(kDataToLoad))); + EXPECT_EQ(store_area_, expected_result); } - void InterpretCFld(uint16_t insn_bytes, uint64_t offset) { + template <RegisterType register_type, uint64_t expected_result> + void InterpretCompressedLoad(uint16_t insn_bytes, uint64_t offset) { auto code_start = ToGuestAddr(&insn_bytes); state_.cpu.insn_addr = code_start; SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset)); InterpretInsn(&state_); - EXPECT_EQ(GetFReg<8>(state_.cpu), kDataToLoad); + EXPECT_EQ((GetReg<register_type, 9>(state_.cpu)), expected_result); } void InterpretCAddi4spn(uint16_t insn_bytes, uint64_t expected_offset) { @@ -102,6 +98,17 @@ class Riscv64InterpreterTest : public ::testing::Test { } } + void InterpretOpFp(uint32_t insn_bytes, + std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) { + for (auto [arg1, arg2, expected_result] : args) { + state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); + SetFReg<2>(state_.cpu, arg1); + SetFReg<3>(state_.cpu, arg2); + InterpretInsn(&state_); + EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result); + } + } + void InterpretFence(uint32_t insn_bytes) { state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); InterpretInsn(&state_); @@ -224,38 +231,6 @@ class Riscv64InterpreterTest : public ::testing::Test { ThreadState state_; }; -TEST_F(Riscv64InterpreterTest, CLd) { - union { - uint16_t offset; - struct { - uint8_t : 3; - uint8_t i3_i5 : 3; - uint8_t i6_i7 : 2; - } i_bits; - }; - for (offset = int16_t{0}; offset < int16_t{256}; offset += 8) { - union { - int16_t parcel; - struct { - uint8_t low_opcode : 2; - uint8_t rd : 3; - uint8_t i6_i7 : 2; - uint8_t rs : 3; - uint8_t i3_i5 : 3; - uint8_t high_opcode : 3; - } __attribute__((__packed__)); - } o_bits = { - .low_opcode = 0b00, - .rd = 0, - .i6_i7 = i_bits.i6_i7, - .rs = 0, - .i3_i5 = i_bits.i3_i5, - .high_opcode = 0b011, - }; - InterpretCLd(o_bits.parcel, offset); - } -} - TEST_F(Riscv64InterpreterTest, CLw) { union { uint16_t offset; @@ -280,18 +255,21 @@ TEST_F(Riscv64InterpreterTest, CLw) { } __attribute__((__packed__)); } o_bits = { .low_opcode = 0b00, - .rd = 0, + .rd = 1, .i6 = i_bits.i6, .i2 = i_bits.i2, .rs = 0, .i3_i5 = i_bits.i3_i5, .high_opcode = 0b010, }; - InterpretCLw(o_bits.parcel, offset); + InterpretCompressedLoad<RegisterType::kReg, + static_cast<uint64_t>(static_cast<int32_t>(kDataToLoad))>(o_bits.parcel, + offset); } } -TEST_F(Riscv64InterpreterTest, CFld) { +template <uint16_t opcode, auto execute_instruction_func> +void TestCompressedLoadOrStore(Riscv64InterpreterTest* that) { union { uint16_t offset; struct { @@ -303,26 +281,45 @@ TEST_F(Riscv64InterpreterTest, CFld) { for (offset = int16_t{0}; offset < int16_t{256}; offset += 8) { union { int16_t parcel; - struct { + struct [[gnu::packed]] { uint8_t low_opcode : 2; uint8_t rd : 3; uint8_t i6_i7 : 2; uint8_t rs : 3; uint8_t i3_i5 : 3; uint8_t high_opcode : 3; - } __attribute__((__packed__)); + }; } o_bits = { .low_opcode = 0b00, - .rd = 0, + .rd = 1, .i6_i7 = i_bits.i6_i7, .rs = 0, .i3_i5 = i_bits.i3_i5, - .high_opcode = 0b001, + .high_opcode = 0b000, }; - InterpretCFld(o_bits.parcel, offset); + (that->*execute_instruction_func)(o_bits.parcel | opcode, offset); } } +TEST_F(Riscv64InterpreterTest, CompressedLoadAndStores) { + // c.Fld + TestCompressedLoadOrStore< + 0b001'000'000'00'000'00, + &Riscv64InterpreterTest::InterpretCompressedLoad<RegisterType::kFpReg, kDataToLoad>>(this); + // c.Ld + TestCompressedLoadOrStore< + 0b011'000'000'00'000'00, + &Riscv64InterpreterTest::InterpretCompressedLoad<RegisterType::kReg, kDataToLoad>>(this); + // c.Fsd + TestCompressedLoadOrStore< + 0b101'000'000'00'000'00, + &Riscv64InterpreterTest::InterpretCompressedStore<RegisterType::kFpReg, kDataToLoad>>(this); + // c.Sd + TestCompressedLoadOrStore< + 0b111'000'000'00'000'00, + &Riscv64InterpreterTest::InterpretCompressedStore<RegisterType::kReg, kDataToLoad>>(this); +} + TEST_F(Riscv64InterpreterTest, CAddi) { union { int8_t offset; @@ -633,6 +630,231 @@ TEST_F(Riscv64InterpreterTest, OpImm32Instructions) { InterpretOpImm(0x4001509b, {{0x0000'0000'f000'0000ULL, 12, 0xffff'ffff'ffff'0000ULL}}); } +TEST_F(Riscv64InterpreterTest, OpFpInstructions) { + // FAdd.S + InterpretOpFp(0x003100d3, + {{bit_cast<uint32_t>(1.0f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(2.0f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(3.0f) | 0xffff'ffff'0000'0000}}); + // FAdd.D + InterpretOpFp(0x023100d3, + {{bit_cast<uint64_t>(1.0), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0)}}); +} + +TEST_F(Riscv64InterpreterTest, RoundingModeTest) { + // FAdd.S + InterpretOpFp(0x003100d3, + // Test RNE + {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000005f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000005f) | 0xffff'ffff'0000'0000}}); + // FAdd.S + InterpretOpFp(0x003110d3, + // Test RTZ + {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000}}); + // FAdd.S + InterpretOpFp(0x003120d3, + // Test RDN + {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000005f) | 0xffff'ffff'0000'0000}}); + // FAdd.S + InterpretOpFp(0x003130d3, + // Test RUP + {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000005f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000}}); + // FAdd.S + InterpretOpFp(0x003140d3, + // Test RMM + {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000005f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000005f) | 0xffff'ffff'0000'0000}}); + + // FAdd.D + InterpretOpFp(0x023100d3, + // Test RNE + {{bit_cast<uint64_t>(1.0000000000000002), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000004)}, + {bit_cast<uint64_t>(1.0000000000000004), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000004)}, + {bit_cast<uint64_t>(1.0000000000000007), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000009)}, + {bit_cast<uint64_t>(-1.0000000000000002), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000004)}, + {bit_cast<uint64_t>(-1.0000000000000004), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000004)}, + {bit_cast<uint64_t>(-1.0000000000000007), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000009)}}); + // FAdd.D + InterpretOpFp(0x023110d3, + // Test RTZ + {{bit_cast<uint64_t>(1.0000000000000002), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000002)}, + {bit_cast<uint64_t>(1.0000000000000004), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000004)}, + {bit_cast<uint64_t>(1.0000000000000007), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000007)}, + {bit_cast<uint64_t>(-1.0000000000000002), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000002)}, + {bit_cast<uint64_t>(-1.0000000000000004), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000004)}, + {bit_cast<uint64_t>(-1.0000000000000007), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000007)}}); + // FAdd.D + InterpretOpFp(0x023120d3, + // Test RDN + {{bit_cast<uint64_t>(1.0000000000000002), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000002)}, + {bit_cast<uint64_t>(1.0000000000000004), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000004)}, + {bit_cast<uint64_t>(1.0000000000000007), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000007)}, + {bit_cast<uint64_t>(-1.0000000000000002), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000004)}, + {bit_cast<uint64_t>(-1.0000000000000004), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000007)}, + {bit_cast<uint64_t>(-1.0000000000000007), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000009)}}); + // FAdd.D + InterpretOpFp(0x023130d3, + // Test RUP + {{bit_cast<uint64_t>(1.0000000000000002), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000004)}, + {bit_cast<uint64_t>(1.0000000000000004), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000007)}, + {bit_cast<uint64_t>(1.0000000000000007), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000009)}, + {bit_cast<uint64_t>(-1.0000000000000002), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000002)}, + {bit_cast<uint64_t>(-1.0000000000000004), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000004)}, + {bit_cast<uint64_t>(-1.0000000000000007), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000007)}}); + // FAdd.D + InterpretOpFp(0x023140d3, + // Test RMM + {{bit_cast<uint64_t>(1.0000000000000002), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000004)}, + {bit_cast<uint64_t>(1.0000000000000004), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000007)}, + {bit_cast<uint64_t>(1.0000000000000007), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000009)}, + {bit_cast<uint64_t>(-1.0000000000000002), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000004)}, + {bit_cast<uint64_t>(-1.0000000000000004), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000007)}, + {bit_cast<uint64_t>(-1.0000000000000007), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000009)}}); +} + TEST_F(Riscv64InterpreterTest, LoadInstructions) { // Offset is always 8. // Lbu diff --git a/intrinsics/include/berberis/intrinsics/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/intrinsics_float.h index 30f83e55..694afba9 100644 --- a/intrinsics/include/berberis/intrinsics/intrinsics_float.h +++ b/intrinsics/include/berberis/intrinsics/intrinsics_float.h @@ -64,23 +64,15 @@ class WrappedFloatType { explicit constexpr operator uint32_t() const { return value_; } explicit constexpr operator int64_t() const { return value_; } explicit constexpr operator uint64_t() const { return value_; } - - auto BitCastToIntOfSameSize() { - if constexpr (std::is_same_v<BaseType, float>) { - return bit_cast<int32_t>(value_); - } else { - static_assert(std::is_same_v<BaseType, double>, "Only float and double BaseType supported."); - return bit_cast<int64_t>(value_); - } - } - - // Only valid for BaseType==double. Returns the bit representation of the fp value. explicit constexpr operator WrappedFloatType<float>() const { return WrappedFloatType<float>(value_); } explicit constexpr operator WrappedFloatType<double>() const { return WrappedFloatType<double>(value_); } +#if defined(__i386__) || defined(__x86_64__) + explicit constexpr operator long double() const { return value_; } +#endif // Note: we don't provide unary operator-. That's done on purpose: with floats -x and 0.-x // produce different results which could be surprising. Use fneg instead of unary operator-. friend WrappedFloatType operator+(const WrappedFloatType& v1, const WrappedFloatType& v2); diff --git a/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h new file mode 100644 index 00000000..db8ff249 --- /dev/null +++ b/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_ +#define BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_ + +#include <limits> + +#include "berberis/base/bit_util.h" +#include "berberis/intrinsics/intrinsics_float.h" +#include "berberis/intrinsics/riscv64/guest_fpstate.h" // ScopedRoundingMode +#include "berberis/intrinsics/type_traits.h" + +namespace berberis::intrinsics { + +// x86 architecture doesn't support RMM (aka FE_TIESAWAY), but it can be easily emulated since it +// have support for 80bit floats: if calculations are done with one bit (or more) of extra precision +// in the FE_TOWARDZERO mode then we can easily adjust fraction part and would only need to remember +// this addition may overflow. +template <typename FloatType, typename OperationType, typename... Args> +inline auto ExecuteFloatOperationRmm(OperationType operation, Args... args) + -> std::enable_if_t<(std::is_same_v<Args, FloatType> && ...), FloatType> { + using Wide = typename TypeTraits<FloatType>::Wide; + Wide wide_result = operation(Wide(args)...); + if constexpr (std::is_same_v<FloatType, Float32>) { + // In the 32bit->64bit case everything happens almost automatically, we just need to clear low + // bits to ensure that we are getting ±∞ and not NaN. + auto int_result = bit_cast<std::make_unsigned_t<typename TypeTraits<Wide>::Int>>(wide_result); + if ((int_result & 0x7ff0'0000'0000'0000) == 0x7ff0'0000'0000'0000) { + return FloatType(wide_result); + } + int_result += 0x0000'0000'1000'0000; + int_result &= 0xffff'ffff'e000'0000; + wide_result = bit_cast<Wide>(int_result); + } else if constexpr (std::is_same_v<FloatType, Float64>) { + // In 64bit->80bit case we need to adjust significand bits to ensure we are creating ±∞ and not + // pseudo-infinity (supported on 8087/80287, but not on modern CPUs). + struct { + uint64_t significand; + uint16_t exponent; + uint8_t padding[sizeof(Wide) - sizeof(uint64_t) - sizeof(uint16_t)]; + } fp80_parts; + static_assert(sizeof fp80_parts == sizeof(Wide)); + memcpy(&fp80_parts, &wide_result, sizeof(wide_result)); + // Don't try to round ±∞, NaNs and ±0 (denormals are not supported by RISC-V). + if ((fp80_parts.exponent & 0x7fff) == 0x7fff || + (fp80_parts.significand & 0x8000'0000'0000'0000) == 0) { + return FloatType(wide_result); + } + fp80_parts.significand += 0x0000'0000'0000'0400; + fp80_parts.significand &= 0xffff'ffff'ffff'f800; + if (fp80_parts.significand == 0) { + fp80_parts.exponent++; + fp80_parts.significand = 0x8000'0000'0000'0000; + } + memcpy(&wide_result, &fp80_parts, sizeof(wide_result)); + } + return FloatType(wide_result); +} + +// Note: first round of rm/frm verification must happen before that function because RISC-V +// postulates that invalid rm or frm should trigger illegal instruction exception. +// Here we can assume both rm and frm fields are valid. +template <typename FloatType, typename OperationType, typename... Args> +inline auto ExecuteFloatOperation(uint8_t requested_rm, + uint8_t current_rm, + OperationType operation, + Args... args) + -> std::enable_if_t<(std::is_same_v<Args, FloatType> && ...), FloatType> { + int host_requested_rm = ToHostRoundingMode(requested_rm); + int host_current_rm = ToHostRoundingMode(current_rm); + if (requested_rm == FPFlags::DYN || host_requested_rm == host_current_rm) { + uint8_t rm = requested_rm == FPFlags::DYN ? current_rm : requested_rm; + if (rm == FPFlags::RMM) { + return ExecuteFloatOperationRmm<FloatType>(operation, args...); + } + return operation(args...); + } + ScopedRoundingMode scoped_rounding_mode{host_requested_rm}; + if (requested_rm == FPFlags::RMM) { + return ExecuteFloatOperationRmm<FloatType>(operation, args...); + } + return operation(args...); +} + +} // namespace berberis::intrinsics + +#endif // BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_ diff --git a/intrinsics/include/berberis/intrinsics/type_traits.h b/intrinsics/include/berberis/intrinsics/type_traits.h index f1f6f75d..2785b92d 100644 --- a/intrinsics/include/berberis/intrinsics/type_traits.h +++ b/intrinsics/include/berberis/intrinsics/type_traits.h @@ -93,11 +93,17 @@ struct TypeTraits<int64_t> { template <> struct TypeTraits<intrinsics::Float32> { using Int = int32_t; + using Wide = intrinsics::Float64; }; template <> struct TypeTraits<intrinsics::Float64> { using Int = int64_t; + using Narrow = intrinsics::Float32; +#if defined(__i386__) || defined(__x86_64__) + static_assert(sizeof(long double) > sizeof(intrinsics::Float64)); + using Wide = long double; +#endif }; #if defined(__x86_64__) |