From c1a367e7cf18df4f18a91246e25b439ec1457a44 Mon Sep 17 00:00:00 2001 From: Aman Date: Tue, 18 Apr 2023 19:35:45 +0530 Subject: interp: added FAdd instruction. Bug: 265372622 Test: berberis_host_tests/berberis_host_tests Change-Id: I546086534d44fc09892d752dd1ed52008822d418 --- decoder/include/berberis/decoder/riscv64/decoder.h | 45 +++++++++ .../berberis/decoder/riscv64/semantics_player.h | 7 ++ interpreter/riscv64/fp_regs.h | 64 +++++++++++++ interpreter/riscv64/interpreter.cc | 37 +++++++- interpreter/riscv64/interpreter_test.cc | 22 +++++ .../include/berberis/intrinsics/intrinsics_float.h | 14 +-- .../riscv64_to_x86_64/intrinsics_float.h | 101 +++++++++++++++++++++ .../include/berberis/intrinsics/type_traits.h | 6 ++ 8 files changed, 284 insertions(+), 12 deletions(-) create mode 100644 interpreter/riscv64/fp_regs.h create mode 100644 intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h index e91ee18f..1fda1316 100644 --- a/decoder/include/berberis/decoder/riscv64/decoder.h +++ b/decoder/include/berberis/decoder/riscv64/decoder.h @@ -194,6 +194,17 @@ class Decoder { kMaxAmoOpcode = 0b11111'111, }; + enum class OpFpOpcode { + // Bit #2 = 1 means rm is an opcode extension. + // Bit #3 = 1 means rs2 is an opcode extension + // Bits #4, #1, and #0 - actual opcode. + kFAdd = 0b0'0'0'00, + kFSub = 0b0'0'0'01, + kFMul = 0b0'0'0'10, + kFDiv = 0b0'0'0'11, + kMaxOpFpOpcode = 0b1'1'1'11, + }; + enum class LoadOpcode { kLb = 0b000, kLh = 0b001, @@ -277,6 +288,14 @@ class Decoder { kMaxCsrRegister = 0b11'11'1111'1111, }; + enum class FloatSize { + kFloat = 0b00, + kDouble = 0b01, + kHalf = 0b10, + kQuad = 0b11, + kMaxFloatSize = 0b11, + }; + struct AmoArgs { AmoOpcode opcode; uint8_t dst; @@ -379,6 +398,15 @@ class Decoder { using StoreArgs = StoreArgsTemplate; using StoreFpArgs = StoreArgsTemplate; + struct OpFpArgs { + OpFpOpcode opcode; + FloatSize float_size; + uint8_t dst; + uint8_t src1; + uint8_t src2; + uint8_t rm; + }; + struct BranchArgs { BranchOpcode opcode; uint8_t src1; @@ -564,6 +592,9 @@ class Decoder { case BaseOpcode::kOpImm32: DecodeOp(); break; + case BaseOpcode::kOpFp: + DecodeOpFp(); + break; case BaseOpcode::kStore: DecodeStore(); break; @@ -808,6 +839,20 @@ class Decoder { insn_consumer_->JumpAndLink(args); } + void DecodeOpFp() { + uint8_t float_size = GetBits(); + uint8_t opcode_bits = GetBits(); + const OpFpArgs args = { + .opcode = OpFpOpcode(opcode_bits), + .float_size = FloatSize(float_size), + .dst = GetBits(), + .src1 = GetBits(), + .src2 = GetBits(), + .rm = GetBits(), + }; + insn_consumer_->OpFp(args); + } + void DecodeSystem() { uint8_t low_opcode = GetBits(); if (low_opcode == 0b00) { diff --git a/decoder/include/berberis/decoder/riscv64/semantics_player.h b/decoder/include/berberis/decoder/riscv64/semantics_player.h index 40b559d3..e320b7ce 100644 --- a/decoder/include/berberis/decoder/riscv64/semantics_player.h +++ b/decoder/include/berberis/decoder/riscv64/semantics_player.h @@ -135,6 +135,13 @@ class SemanticsPlayer { SetRegOrIgnore(args.dst, result); }; + void OpFp(const typename Decoder::OpFpArgs& args) { + FpRegister arg1 = GetFpReg(args.src1); + FpRegister arg2 = GetFpReg(args.src2); + FpRegister result = listener_->OpFp(args.opcode, args.float_size, args.rm, arg1, arg2); + SetFpReg(args.dst, result); + } + void Store(const typename Decoder::StoreArgs& args) { Register arg = GetRegOrZero(args.src); Register data = GetRegOrZero(args.data); diff --git a/interpreter/riscv64/fp_regs.h b/interpreter/riscv64/fp_regs.h new file mode 100644 index 00000000..dd49f19d --- /dev/null +++ b/interpreter/riscv64/fp_regs.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BERBERIS_FP_REGS_H_ +#define BERBERIS_FP_REGS_H_ + +#include + +#include "berberis/base/bit_util.h" +#include "berberis/intrinsics/intrinsics_float.h" + +namespace berberis { + +template +inline FloatType NanUnboxFPRegToFloat(uint64_t arg); + +template <> +inline intrinsics::Float32 NanUnboxFPRegToFloat(uint64_t arg) { + // Apart from transfer operations (e.g. loads and stores), all other floating-point operations on + // narrower n-bit operations, n < FLEN, check if the input operands are correctly NaN-boxed, i.e., + // all upper FLEN−n bits are 1. If so, the n least-significant bits of the input are used as the + // input value, otherwise the input value is treated as an n-bit canonical NaN. + if ((arg & 0xffff'ffff'0000'0000) != 0xffff'ffff'0000'0000) { + return bit_cast(0x7fc00000); + } + intrinsics::Float32 result; + memcpy(&result, &arg, sizeof(intrinsics::Float32)); + return result; +} + +template <> +inline intrinsics::Float64 NanUnboxFPRegToFloat(uint64_t arg) { + return bit_cast(arg); +} + +template +inline uint64_t NanBoxFloatToFPReg(FloatType arg); + +template <> +inline uint64_t NanBoxFloatToFPReg(intrinsics::Float32 arg) { + return bit_cast(arg) | 0xffff'ffff'0000'0000; +} + +template <> +inline uint64_t NanBoxFloatToFPReg(intrinsics::Float64 arg) { + return bit_cast(arg); +} + +} // namespace berberis + +#endif // BERBERIS_FP_REGS_H_ diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc index f4e82fea..a8fcbf61 100644 --- a/interpreter/riscv64/interpreter.cc +++ b/interpreter/riscv64/interpreter.cc @@ -28,10 +28,11 @@ #include "berberis/decoder/riscv64/semantics_player.h" #include "berberis/guest_state/guest_addr.h" #include "berberis/guest_state/guest_state_riscv64.h" -#include "berberis/intrinsics/riscv64/guest_fpstate.h" +#include "berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h" #include "berberis/kernel_api/run_guest_syscall.h" #include "atomics.h" +#include "fp_regs.h" namespace berberis { @@ -42,6 +43,8 @@ class Interpreter { using Decoder = Decoder>; using Register = uint64_t; using FpRegister = uint64_t; + using Float32 = intrinsics::Float32; + using Float64 = intrinsics::Float64; explicit Interpreter(ThreadState* state) : state_(state), branch_taken_(false) {} @@ -352,6 +355,38 @@ class Interpreter { return RunGuestSyscall(syscall_nr, arg0, arg1, arg2, arg3, arg4, arg5); } + FpRegister OpFp(Decoder::OpFpOpcode opcode, + Decoder::FloatSize float_size, + uint8_t rm, + FpRegister arg1, + FpRegister arg2) { + switch (float_size) { + case Decoder::FloatSize::kFloat: + return NanBoxFloatToFPReg(OpFp( + opcode, rm, NanUnboxFPRegToFloat(arg1), NanUnboxFPRegToFloat(arg2))); + case Decoder::FloatSize::kDouble: + return NanBoxFloatToFPReg(OpFp( + opcode, rm, NanUnboxFPRegToFloat(arg1), NanUnboxFPRegToFloat(arg2))); + default: + Unimplemented(); + return {}; + } + } + + // TODO(b/278812060): switch to intrinsics when they would become available and stop using + // ExecuteFloatOperation directly. + template + FloatType OpFp(Decoder::OpFpOpcode opcode, uint8_t rm, FloatType arg1, FloatType arg2) { + switch (opcode) { + case Decoder::OpFpOpcode::kFAdd: + return intrinsics::ExecuteFloatOperation( + rm, state_->cpu.frm, [](auto x, auto y) { return x + y; }, arg1, arg2); + default: + Unimplemented(); + return {}; + } + } + Register ShiftImm(Decoder::ShiftImmOpcode opcode, Register arg, uint16_t imm) { switch (opcode) { case Decoder::ShiftImmOpcode::kSlli: diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc index c3d2b5f1..0d5c344a 100644 --- a/interpreter/riscv64/interpreter_test.cc +++ b/interpreter/riscv64/interpreter_test.cc @@ -102,6 +102,17 @@ class Riscv64InterpreterTest : public ::testing::Test { } } + void InterpretOpFp(uint32_t insn_bytes, + std::initializer_list> args) { + for (auto [arg1, arg2, expected_result] : args) { + state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); + SetFReg<2>(state_.cpu, arg1); + SetFReg<3>(state_.cpu, arg2); + InterpretInsn(&state_); + EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result); + } + } + void InterpretFence(uint32_t insn_bytes) { state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); InterpretInsn(&state_); @@ -609,6 +620,17 @@ TEST_F(Riscv64InterpreterTest, OpImm32Instructions) { InterpretOpImm(0x4001509b, {{0x0000'0000'f000'0000ULL, 12, 0xffff'ffff'ffff'0000ULL}}); } +TEST_F(Riscv64InterpreterTest, OpFpInstructions) { + // FAdd.S + InterpretOpFp(0x003100d3, + {{bit_cast(1.0f) | 0xffff'ffff'0000'0000, + bit_cast(2.0f) | 0xffff'ffff'0000'0000, + bit_cast(3.0f) | 0xffff'ffff'0000'0000}}); + // FAdd.D + InterpretOpFp(0x023100d3, + {{bit_cast(1.0), bit_cast(2.0), bit_cast(3.0)}}); +} + TEST_F(Riscv64InterpreterTest, LoadInstructions) { // Offset is always 8. // Lbu diff --git a/intrinsics/include/berberis/intrinsics/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/intrinsics_float.h index 30f83e55..694afba9 100644 --- a/intrinsics/include/berberis/intrinsics/intrinsics_float.h +++ b/intrinsics/include/berberis/intrinsics/intrinsics_float.h @@ -64,23 +64,15 @@ class WrappedFloatType { explicit constexpr operator uint32_t() const { return value_; } explicit constexpr operator int64_t() const { return value_; } explicit constexpr operator uint64_t() const { return value_; } - - auto BitCastToIntOfSameSize() { - if constexpr (std::is_same_v) { - return bit_cast(value_); - } else { - static_assert(std::is_same_v, "Only float and double BaseType supported."); - return bit_cast(value_); - } - } - - // Only valid for BaseType==double. Returns the bit representation of the fp value. explicit constexpr operator WrappedFloatType() const { return WrappedFloatType(value_); } explicit constexpr operator WrappedFloatType() const { return WrappedFloatType(value_); } +#if defined(__i386__) || defined(__x86_64__) + explicit constexpr operator long double() const { return value_; } +#endif // Note: we don't provide unary operator-. That's done on purpose: with floats -x and 0.-x // produce different results which could be surprising. Use fneg instead of unary operator-. friend WrappedFloatType operator+(const WrappedFloatType& v1, const WrappedFloatType& v2); diff --git a/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h new file mode 100644 index 00000000..db8ff249 --- /dev/null +++ b/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_ +#define BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_ + +#include + +#include "berberis/base/bit_util.h" +#include "berberis/intrinsics/intrinsics_float.h" +#include "berberis/intrinsics/riscv64/guest_fpstate.h" // ScopedRoundingMode +#include "berberis/intrinsics/type_traits.h" + +namespace berberis::intrinsics { + +// x86 architecture doesn't support RMM (aka FE_TIESAWAY), but it can be easily emulated since it +// have support for 80bit floats: if calculations are done with one bit (or more) of extra precision +// in the FE_TOWARDZERO mode then we can easily adjust fraction part and would only need to remember +// this addition may overflow. +template +inline auto ExecuteFloatOperationRmm(OperationType operation, Args... args) + -> std::enable_if_t<(std::is_same_v && ...), FloatType> { + using Wide = typename TypeTraits::Wide; + Wide wide_result = operation(Wide(args)...); + if constexpr (std::is_same_v) { + // In the 32bit->64bit case everything happens almost automatically, we just need to clear low + // bits to ensure that we are getting ±∞ and not NaN. + auto int_result = bit_cast::Int>>(wide_result); + if ((int_result & 0x7ff0'0000'0000'0000) == 0x7ff0'0000'0000'0000) { + return FloatType(wide_result); + } + int_result += 0x0000'0000'1000'0000; + int_result &= 0xffff'ffff'e000'0000; + wide_result = bit_cast(int_result); + } else if constexpr (std::is_same_v) { + // In 64bit->80bit case we need to adjust significand bits to ensure we are creating ±∞ and not + // pseudo-infinity (supported on 8087/80287, but not on modern CPUs). + struct { + uint64_t significand; + uint16_t exponent; + uint8_t padding[sizeof(Wide) - sizeof(uint64_t) - sizeof(uint16_t)]; + } fp80_parts; + static_assert(sizeof fp80_parts == sizeof(Wide)); + memcpy(&fp80_parts, &wide_result, sizeof(wide_result)); + // Don't try to round ±∞, NaNs and ±0 (denormals are not supported by RISC-V). + if ((fp80_parts.exponent & 0x7fff) == 0x7fff || + (fp80_parts.significand & 0x8000'0000'0000'0000) == 0) { + return FloatType(wide_result); + } + fp80_parts.significand += 0x0000'0000'0000'0400; + fp80_parts.significand &= 0xffff'ffff'ffff'f800; + if (fp80_parts.significand == 0) { + fp80_parts.exponent++; + fp80_parts.significand = 0x8000'0000'0000'0000; + } + memcpy(&wide_result, &fp80_parts, sizeof(wide_result)); + } + return FloatType(wide_result); +} + +// Note: first round of rm/frm verification must happen before that function because RISC-V +// postulates that invalid rm or frm should trigger illegal instruction exception. +// Here we can assume both rm and frm fields are valid. +template +inline auto ExecuteFloatOperation(uint8_t requested_rm, + uint8_t current_rm, + OperationType operation, + Args... args) + -> std::enable_if_t<(std::is_same_v && ...), FloatType> { + int host_requested_rm = ToHostRoundingMode(requested_rm); + int host_current_rm = ToHostRoundingMode(current_rm); + if (requested_rm == FPFlags::DYN || host_requested_rm == host_current_rm) { + uint8_t rm = requested_rm == FPFlags::DYN ? current_rm : requested_rm; + if (rm == FPFlags::RMM) { + return ExecuteFloatOperationRmm(operation, args...); + } + return operation(args...); + } + ScopedRoundingMode scoped_rounding_mode{host_requested_rm}; + if (requested_rm == FPFlags::RMM) { + return ExecuteFloatOperationRmm(operation, args...); + } + return operation(args...); +} + +} // namespace berberis::intrinsics + +#endif // BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_ diff --git a/intrinsics/include/berberis/intrinsics/type_traits.h b/intrinsics/include/berberis/intrinsics/type_traits.h index f1f6f75d..2785b92d 100644 --- a/intrinsics/include/berberis/intrinsics/type_traits.h +++ b/intrinsics/include/berberis/intrinsics/type_traits.h @@ -93,11 +93,17 @@ struct TypeTraits { template <> struct TypeTraits { using Int = int32_t; + using Wide = intrinsics::Float64; }; template <> struct TypeTraits { using Int = int64_t; + using Narrow = intrinsics::Float32; +#if defined(__i386__) || defined(__x86_64__) + static_assert(sizeof(long double) > sizeof(intrinsics::Float64)); + using Wide = long double; +#endif }; #if defined(__x86_64__) -- cgit v1.2.3