diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-12-15 00:37:31 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-12-15 00:37:31 +0000 |
commit | f940c1f5262a9c153421c7e34384a6dd3f686d95 (patch) | |
tree | a91a3b3bcfd79f16aaad77c86a21d799c005d1d0 | |
parent | 2e3405345ffa06e045abd38aede28b36ad572647 (diff) | |
parent | 87ada30fbde026edeb4a9300daad285f188e6b69 (diff) | |
download | binary_translation-f940c1f5262a9c153421c7e34384a6dd3f686d95.tar.gz |
Snap for 11220357 from 87ada30fbde026edeb4a9300daad285f188e6b69 to 24Q1-releaseandroid-14.0.0_r37android-14.0.0_r36android-14.0.0_r35android-14.0.0_r34android-14.0.0_r33android-14.0.0_r32android-14.0.0_r31android-14.0.0_r30android-14.0.0_r29android14-qpr2-s5-releaseandroid14-qpr2-s4-releaseandroid14-qpr2-s3-releaseandroid14-qpr2-s2-releaseandroid14-qpr2-s1-releaseandroid14-qpr2-release
Change-Id: Ida0c9e19cfea25dcca6d4e4b1db91ba8740b0926
-rw-r--r-- | base/include/berberis/base/bit_util.h | 5 | ||||
-rw-r--r-- | decoder/include/berberis/decoder/riscv64/decoder.h | 343 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter.cc | 62 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter_test.cc | 208 | ||||
-rw-r--r-- | intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h | 16 |
5 files changed, 456 insertions, 178 deletions
diff --git a/base/include/berberis/base/bit_util.h b/base/include/berberis/base/bit_util.h index e3610dde..4fb08470 100644 --- a/base/include/berberis/base/bit_util.h +++ b/base/include/berberis/base/bit_util.h @@ -70,8 +70,9 @@ constexpr bool IsAligned(T* p, size_t align) { template <typename T> constexpr T BitUtilLog2(T x) { static_assert(std::is_integral_v<T>, "Log2: T must be integral"); - DCHECK(IsPowerOf2(x)); - return x == 1 ? 0 : BitUtilLog2(x >> 1) + 1; + CHECK(IsPowerOf2(x)); + // TODO(b/260725458): Use std::countr_zero after C++20 becomes available + return __builtin_ctz(x); } // Verify that argument value fits into a target. diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h index 7ad556c1..159d591c 100644 --- a/decoder/include/berberis/decoder/riscv64/decoder.h +++ b/decoder/include/berberis/decoder/riscv64/decoder.h @@ -341,7 +341,10 @@ class Decoder { }; enum class VOpMVvOpcode : uint8_t { + kVmaddvv = 0b101001, + kVnmsubvv = 0b101011, kVmaccvv = 0b101101, + kVnmsacvv = 0b101111, kMaxValue = 0b111111 }; @@ -390,7 +393,10 @@ class Decoder { }; enum class VOpMVxOpcode : uint8_t { + kVmaddvx = 0b101001, + kVnmsubvx = 0b101011, kVmaccvx = 0b101101, + kVnmsacvx = 0b101111, kMaxValue = 0b111111 }; @@ -760,7 +766,7 @@ class Decoder { } uint8_t DecodeCompressedInstruction() { - CompressedOpcode opcode_bits{(GetBits<uint8_t, 13, 3>() << 2) | GetBits<uint8_t, 0, 2>()}; + CompressedOpcode opcode_bits{(GetBits<13, 3>() << 2) | GetBits<0, 2>()}; switch (opcode_bits) { case CompressedOpcode::kAddi4spn: @@ -837,9 +843,9 @@ class Decoder { } void DecodeCompressedLi() { - uint8_t low_imm = GetBits<uint8_t, 2, 5>(); - uint8_t high_imm = GetBits<uint8_t, 12, 1>(); - uint8_t rd = GetBits<uint8_t, 7, 5>(); + uint8_t low_imm = GetBits<2, 5>(); + uint8_t high_imm = GetBits<12, 1>(); + uint8_t rd = GetBits<7, 5>(); int8_t imm = SignExtend<6>((high_imm << 5) + low_imm); const OpImmArgs args = { .opcode = OpImmOpcode::kAddi, @@ -851,11 +857,11 @@ class Decoder { } void DecodeCompressedMiscAlu() { - uint8_t r = GetBits<uint8_t, 7, 3>() + 8; - uint8_t low_imm = GetBits<uint8_t, 2, 5>(); - uint8_t high_imm = GetBits<uint8_t, 12, 1>(); + uint8_t r = GetBits<7, 3>() + 8; + uint8_t low_imm = GetBits<2, 5>(); + uint8_t high_imm = GetBits<12, 1>(); uint8_t imm = (high_imm << 5) + low_imm; - switch (GetBits<uint8_t, 10, 2>()) { + switch (GetBits<10, 2>()) { case 0b00: { const ShiftImmArgs args = { .opcode = ShiftImmOpcode::kSrli, @@ -884,10 +890,10 @@ class Decoder { return insn_consumer_->OpImm(args); } } - uint8_t rs2 = GetBits<uint8_t, 2, 3>() + 8; - if (GetBits<uint8_t, 12, 1>() == 0) { + uint8_t rs2 = GetBits<2, 3>() + 8; + if (GetBits<12, 1>() == 0) { OpOpcode opcode; - switch (GetBits<uint8_t, 5, 2>()) { + switch (GetBits<5, 2>()) { case 0b00: opcode = OpOpcode::kSub; break; @@ -910,7 +916,7 @@ class Decoder { return insn_consumer_->Op(args); } else { Op32Opcode opcode; - switch (GetBits<uint8_t, 5, 2>()) { + switch (GetBits<5, 2>()) { case 0b00: opcode = Op32Opcode::kSubw; break; @@ -932,8 +938,8 @@ class Decoder { template <auto kOperandType> void DecodeCompressedStoresp() { - uint8_t raw_imm = GetBits<uint8_t, 7, 6>(); - uint8_t rs2 = GetBits<uint8_t, 2, 5>(); + uint8_t raw_imm = GetBits<7, 6>(); + uint8_t rs2 = GetBits<2, 5>(); constexpr uint8_t k32bit[64] = { 0x00, 0x10, 0x20, 0x30, 0x01, 0x11, 0x21, 0x31, 0x02, 0x12, 0x22, 0x32, 0x03, 0x13, 0x23, 0x33, 0x04, 0x14, 0x24, 0x34, 0x05, 0x15, 0x25, 0x35, 0x06, 0x16, @@ -957,9 +963,9 @@ class Decoder { } void DecodeCompressedLuiAddi16sp() { - uint8_t low_imm = GetBits<uint8_t, 2, 5>(); - uint8_t high_imm = GetBits<uint8_t, 12, 1>(); - uint8_t rd = GetBits<uint8_t, 7, 5>(); + uint8_t low_imm = GetBits<2, 5>(); + uint8_t high_imm = GetBits<12, 1>(); + uint8_t rd = GetBits<7, 5>(); if (rd != 2) { int32_t imm = SignExtend<18>((high_imm << 17) + (low_imm << 12)); const UpperImmArgs args = { @@ -986,8 +992,8 @@ class Decoder { template <enum LoadStore kLoadStore, auto kOperandType> void DecodeCompressedLoadStore() { - uint8_t low_imm = GetBits<uint8_t, 5, 2>(); - uint8_t high_imm = GetBits<uint8_t, 10, 3>(); + uint8_t low_imm = GetBits<5, 2>(); + uint8_t high_imm = GetBits<10, 3>(); uint8_t imm; if constexpr ((uint8_t(kOperandType) & 1) == 0) { constexpr uint8_t kLwLow[4] = {0x0, 0x40, 0x04, 0x44}; @@ -995,8 +1001,8 @@ class Decoder { } else { imm = (low_imm << 6 | high_imm << 3); } - uint8_t rd = GetBits<uint8_t, 2, 3>(); - uint8_t rs = GetBits<uint8_t, 7, 3>(); + uint8_t rd = GetBits<2, 3>(); + uint8_t rs = GetBits<7, 3>(); if constexpr (kLoadStore == LoadStore::kStore) { const StoreArgsTemplate<decltype(kOperandType)> args = { .operand_type = kOperandType, @@ -1018,9 +1024,9 @@ class Decoder { template <auto kOperandType> void DecodeCompressedLoadsp() { - uint8_t low_imm = GetBits<uint8_t, 2, 5>(); - uint8_t high_imm = GetBits<uint8_t, 12, 1>(); - uint8_t rd = GetBits<uint8_t, 7, 5>(); + uint8_t low_imm = GetBits<2, 5>(); + uint8_t high_imm = GetBits<12, 1>(); + uint8_t rd = GetBits<7, 5>(); constexpr uint8_t k32bitLow[32] = {0x00, 0x10, 0x20, 0x30, 0x01, 0x11, 0x21, 0x31, 0x02, 0x12, 0x22, 0x32, 0x03, 0x13, 0x23, 0x33, 0x04, 0x14, 0x24, 0x34, 0x05, 0x15, 0x25, 0x35, @@ -1041,10 +1047,10 @@ class Decoder { } void DecodeCompressedAddi() { - uint8_t low_imm = GetBits<uint8_t, 2, 5>(); - uint8_t high_imm = GetBits<uint8_t, 12, 1>(); + uint8_t low_imm = GetBits<2, 5>(); + uint8_t high_imm = GetBits<12, 1>(); int8_t imm = SignExtend<6>(high_imm << 5 | low_imm); - uint8_t r = GetBits<uint8_t, 7, 5>(); + uint8_t r = GetBits<7, 5>(); if (r == 0 || imm == 0) { insn_consumer_->Nop(); } @@ -1058,10 +1064,10 @@ class Decoder { } void DecodeCompressedAddiw() { - uint8_t low_imm = GetBits<uint8_t, 2, 5>(); - uint8_t high_imm = GetBits<uint8_t, 12, 1>(); + uint8_t low_imm = GetBits<2, 5>(); + uint8_t high_imm = GetBits<12, 1>(); int8_t imm = SignExtend<6>(high_imm << 5 | low_imm); - uint8_t r = GetBits<uint8_t, 7, 5>(); + uint8_t r = GetBits<7, 5>(); const OpImm32Args args = { .opcode = OpImm32Opcode::kAddiw, .dst = r, @@ -1076,12 +1082,12 @@ class Decoder { constexpr uint8_t kBLow[32] = {0x00, 0x20, 0x02, 0x22, 0x04, 0x24, 0x06, 0x26, 0x40, 0x60, 0x42, 0x62, 0x44, 0x64, 0x46, 0x66, 0x80, 0xa0, 0x82, 0xa2, 0x84, 0xa4, 0x86, 0xa6, 0xc0, 0xe0, 0xc2, 0xe2, 0xc4, 0xe4, 0xc6, 0xe6}; - uint8_t low_imm = GetBits<uint8_t, 2, 5>(); - uint8_t high_imm = GetBits<uint8_t, 10, 3>(); - uint8_t rs = GetBits<uint8_t, 7, 3>(); + uint8_t low_imm = GetBits<2, 5>(); + uint8_t high_imm = GetBits<10, 3>(); + uint8_t rs = GetBits<7, 3>(); const BranchArgs args = { - .opcode = BranchOpcode(GetBits<uint8_t, 13, 1>()), + .opcode = BranchOpcode(GetBits<13, 1>()), .src1 = uint8_t(8 + rs), .src2 = 0, .offset = static_cast<int16_t>(SignExtend<9>(kBHigh[high_imm] + kBLow[low_imm])), @@ -1104,8 +1110,7 @@ class Decoder { }; const JumpAndLinkArgs args = { .dst = 0, - .offset = - bit_cast<int16_t>(kJHigh[GetBits<uint16_t, 8, 5>()]) | kJLow[GetBits<uint16_t, 2, 6>()], + .offset = bit_cast<int16_t>(kJHigh[GetBits<8, 5>()]) | kJLow[GetBits<2, 6>()], .insn_len = 2, }; insn_consumer_->JumpAndLink(args); @@ -1116,8 +1121,7 @@ class Decoder { 0x0, 0x40, 0x80, 0xc0, 0x4, 0x44, 0x84, 0xc4, 0x8, 0x48, 0x88, 0xc8, 0xc, 0x4c, 0x8c, 0xcc}; constexpr uint8_t kAddi4spnLow[16] = { 0x0, 0x2, 0x1, 0x3, 0x10, 0x12, 0x11, 0x13, 0x20, 0x22, 0x21, 0x23, 0x30, 0x32, 0x31, 0x33}; - int16_t imm = (kAddi4spnHigh[GetBits<uint8_t, 9, 4>()] | kAddi4spnLow[GetBits<uint8_t, 5, 4>()]) - << 2; + int16_t imm = (kAddi4spnHigh[GetBits<9, 4>()] | kAddi4spnLow[GetBits<5, 4>()]) << 2; // If immediate is zero then this instruction is treated as unimplemented. // This includes RISC-V dedicated 16bit “unimplemented instruction” 0x0000. if (imm == 0) { @@ -1125,7 +1129,7 @@ class Decoder { } const OpImmArgs args = { .opcode = OpImmOpcode::kAddi, - .dst = uint8_t(8 + GetBits<uint8_t, 2, 3>()), + .dst = uint8_t(8 + GetBits<2, 3>()), .src = 2, .imm = imm, }; @@ -1133,9 +1137,9 @@ class Decoder { } void DecodeCompressedJr_Jalr_Mv_Add() { - uint8_t r = GetBits<uint8_t, 7, 5>(); - uint8_t rs2 = GetBits<uint8_t, 2, 5>(); - if (GetBits<uint8_t, 12, 1>()) { + uint8_t r = GetBits<7, 5>(); + uint8_t rs2 = GetBits<2, 5>(); + if (GetBits<12, 1>()) { if (r == 0 && rs2 == 0) { const SystemArgs args = { .opcode = SystemOpcode::kEbreak, @@ -1180,9 +1184,9 @@ class Decoder { } void DecodeCompressedSlli() { - uint8_t r = GetBits<uint8_t, 7, 5>(); - uint8_t low_imm = GetBits<uint8_t, 2, 5>(); - uint8_t high_imm = GetBits<uint8_t, 12, 1>(); + uint8_t r = GetBits<7, 5>(); + uint8_t low_imm = GetBits<2, 5>(); + uint8_t high_imm = GetBits<12, 1>(); uint8_t imm = (high_imm << 5) + low_imm; const ShiftImmArgs args = { .opcode = ShiftImmOpcode::kSlli, @@ -1194,7 +1198,7 @@ class Decoder { } uint8_t DecodeBaseInstruction() { - BaseOpcode opcode_bits{GetBits<uint8_t, 2, 5>()}; + BaseOpcode opcode_bits{GetBits<2, 5>()}; switch (opcode_bits) { case BaseOpcode::kLoad: @@ -1264,11 +1268,13 @@ class Decoder { } private: - template <typename ResultType, uint32_t start, uint32_t size> - ResultType GetBits() { - static_assert(std::is_unsigned_v<ResultType>, "Only unsigned types are supported"); - static_assert(sizeof(ResultType) * CHAR_BIT >= size, "Too small ResultType for size"); + template <uint32_t start, uint32_t size> + auto GetBits() { static_assert((start + size) <= 32 && size > 0, "Invalid start or size value"); + using ResultType = std::conditional_t< + size == 1, + bool, + std::conditional_t<size <= 8, uint8_t, std::conditional_t<size <= 16, uint16_t, uint32_t>>>; uint32_t shifted_val = code_ << (32 - start - size); return static_cast<ResultType>(shifted_val >> (32 - size)); } @@ -1294,32 +1300,32 @@ class Decoder { } void DecodeMiscMem() { - uint8_t low_opcode = GetBits<uint8_t, 12, 3>(); + uint8_t low_opcode = GetBits<12, 3>(); switch (low_opcode) { case 0b000: { - uint8_t high_opcode = GetBits<uint8_t, 28, 4>(); + uint8_t high_opcode = GetBits<28, 4>(); FenceOpcode opcode = FenceOpcode{high_opcode}; const FenceArgs args = { .opcode = opcode, - .dst = GetBits<uint8_t, 7, 5>(), - .src = GetBits<uint8_t, 15, 5>(), - .sw = bool(GetBits<uint8_t, 20, 1>()), - .sr = bool(GetBits<uint8_t, 21, 1>()), - .so = bool(GetBits<uint8_t, 22, 1>()), - .si = bool(GetBits<uint8_t, 23, 1>()), - .pw = bool(GetBits<uint8_t, 24, 1>()), - .pr = bool(GetBits<uint8_t, 25, 1>()), - .po = bool(GetBits<uint8_t, 26, 1>()), - .pi = bool(GetBits<uint8_t, 27, 1>()), + .dst = GetBits<7, 5>(), + .src = GetBits<15, 5>(), + .sw = GetBits<20, 1>(), + .sr = GetBits<21, 1>(), + .so = GetBits<22, 1>(), + .si = GetBits<23, 1>(), + .pw = GetBits<24, 1>(), + .pr = GetBits<25, 1>(), + .po = GetBits<26, 1>(), + .pi = GetBits<27, 1>(), }; insn_consumer_->Fence(args); break; } case 0b001: { - uint16_t imm = GetBits<uint16_t, 20, 12>(); + uint16_t imm = GetBits<20, 12>(); const FenceIArgs args = { - .dst = GetBits<uint8_t, 7, 5>(), - .src = GetBits<uint8_t, 15, 5>(), + .dst = GetBits<7, 5>(), + .src = GetBits<15, 5>(), .imm = SignExtend<12>(imm), }; insn_consumer_->FenceI(args); @@ -1332,8 +1338,8 @@ class Decoder { template <typename OpcodeType> void DecodeOp() { - uint8_t low_opcode = GetBits<uint8_t, 12, 3>(); - uint8_t high_opcode = GetBits<uint8_t, 25, 7>(); + uint8_t low_opcode = GetBits<12, 3>(); + uint8_t high_opcode = GetBits<25, 7>(); uint16_t opcode_bits = static_cast<int16_t>(low_opcode | (high_opcode << 3)); OpcodeType opcode{opcode_bits}; OpSingleInputOpcode single_input_opcode{opcode_bits}; @@ -1348,29 +1354,29 @@ class Decoder { } const OpArgsTemplate<OpcodeType> args = { .opcode = opcode, - .dst = GetBits<uint8_t, 7, 5>(), - .src1 = GetBits<uint8_t, 15, 5>(), - .src2 = GetBits<uint8_t, 20, 5>(), + .dst = GetBits<7, 5>(), + .src1 = GetBits<15, 5>(), + .src2 = GetBits<20, 5>(), }; insn_consumer_->Op(args); } void DecodeSingleInputOp(OpSingleInputOpcode opcode) { - uint8_t src1 = GetBits<uint8_t, 15, 5>(); - uint8_t src2 = GetBits<uint8_t, 20, 5>(); + uint8_t src1 = GetBits<15, 5>(); + uint8_t src2 = GetBits<20, 5>(); if (src2 != 0) { return Undefined(); } - const OpSingleInputArgs args = {.opcode = opcode, .dst = GetBits<uint8_t, 7, 5>(), .src = src1}; + const OpSingleInputArgs args = {.opcode = opcode, .dst = GetBits<7, 5>(), .src = src1}; insn_consumer_->OpSingleInput(args); } void DecodeAmo() { - uint8_t low_opcode = GetBits<uint8_t, 12, 3>(); - uint8_t high_opcode = GetBits<uint8_t, 27, 5>(); + uint8_t low_opcode = GetBits<12, 3>(); + uint8_t high_opcode = GetBits<27, 5>(); // lr instruction must have rs2 == 0 - if (high_opcode == 0b00010 && GetBits<uint8_t, 20, 5>() != 0) { + if (high_opcode == 0b00010 && GetBits<20, 5>() != 0) { return Undefined(); } AmoOpcode opcode = AmoOpcode{high_opcode}; @@ -1378,43 +1384,43 @@ class Decoder { const AmoArgs args = { .opcode = opcode, .operand_type = operand_type, - .dst = GetBits<uint8_t, 7, 5>(), - .src1 = GetBits<uint8_t, 15, 5>(), - .src2 = GetBits<uint8_t, 20, 5>(), - .rl = bool(GetBits<uint8_t, 25, 1>()), - .aq = bool(GetBits<uint8_t, 26, 1>()), + .dst = GetBits<7, 5>(), + .src1 = GetBits<15, 5>(), + .src2 = GetBits<20, 5>(), + .rl = GetBits<25, 1>(), + .aq = GetBits<26, 1>(), }; insn_consumer_->Amo(args); } void DecodeFma() { - uint8_t operand_type = GetBits<uint8_t, 25, 2>(); - uint8_t opcode_bits = GetBits<uint8_t, 2, 2>(); + uint8_t operand_type = GetBits<25, 2>(); + uint8_t opcode_bits = GetBits<2, 2>(); const FmaArgs args = { .opcode = FmaOpcode(opcode_bits), .operand_type = FloatOperandType(operand_type), - .dst = GetBits<uint8_t, 7, 5>(), - .src1 = GetBits<uint8_t, 15, 5>(), - .src2 = GetBits<uint8_t, 20, 5>(), - .src3 = GetBits<uint8_t, 27, 5>(), - .rm = GetBits<uint8_t, 12, 3>(), + .dst = GetBits<7, 5>(), + .src1 = GetBits<15, 5>(), + .src2 = GetBits<20, 5>(), + .src3 = GetBits<27, 5>(), + .rm = GetBits<12, 3>(), }; insn_consumer_->Fma(args); } void DecodeLui() { - int32_t imm = GetBits<uint32_t, 12, 20>(); + int32_t imm = GetBits<12, 20>(); const UpperImmArgs args = { - .dst = GetBits<uint8_t, 7, 5>(), + .dst = GetBits<7, 5>(), .imm = imm << 12, }; insn_consumer_->Lui(args); } void DecodeAuipc() { - int32_t imm = GetBits<uint32_t, 12, 20>(); + int32_t imm = GetBits<12, 20>(); const UpperImmArgs args = { - .dst = GetBits<uint8_t, 7, 5>(), + .dst = GetBits<7, 5>(), .imm = imm << 12, }; insn_consumer_->Auipc(args); @@ -1424,19 +1430,19 @@ class Decoder { void DecodeLoad() { OperandTypeEnum operand_type; if constexpr (std::is_same_v<OperandTypeEnum, FloatOperandType>) { - auto decoded_operand_type = kLoadStoreWidthToFloatOperandType[GetBits<uint8_t, 12, 3>()]; + auto decoded_operand_type = kLoadStoreWidthToFloatOperandType[GetBits<12, 3>()]; if (!decoded_operand_type.has_value()) { return Undefined(); } operand_type = *decoded_operand_type; } else { - operand_type = OperandTypeEnum{GetBits<uint8_t, 12, 3>()}; + operand_type = OperandTypeEnum{GetBits<12, 3>()}; } const LoadArgsTemplate<OperandTypeEnum> args = { .operand_type = operand_type, - .dst = GetBits<uint8_t, 7, 5>(), - .src = GetBits<uint8_t, 15, 5>(), - .offset = SignExtend<12>(GetBits<uint16_t, 20, 12>()), + .dst = GetBits<7, 5>(), + .src = GetBits<15, 5>(), + .offset = SignExtend<12>(GetBits<20, 12>()), }; insn_consumer_->Load(args); } @@ -1445,23 +1451,23 @@ class Decoder { void DecodeStore() { OperandTypeEnum operand_type; if constexpr (std::is_same_v<OperandTypeEnum, FloatOperandType>) { - auto decoded_operand_type = kLoadStoreWidthToFloatOperandType[GetBits<uint8_t, 12, 3>()]; + auto decoded_operand_type = kLoadStoreWidthToFloatOperandType[GetBits<12, 3>()]; if (!decoded_operand_type.has_value()) { return Undefined(); } operand_type = *decoded_operand_type; } else { - operand_type = OperandTypeEnum{GetBits<uint8_t, 12, 3>()}; + operand_type = OperandTypeEnum{GetBits<12, 3>()}; } - uint16_t low_imm = GetBits<uint16_t, 7, 5>(); - uint16_t high_imm = GetBits<uint16_t, 25, 7>(); + uint16_t low_imm = GetBits<7, 5>(); + uint16_t high_imm = GetBits<25, 7>(); const StoreArgsTemplate<OperandTypeEnum> args = { .operand_type = operand_type, - .src = GetBits<uint8_t, 15, 5>(), + .src = GetBits<15, 5>(), .offset = SignExtend<12>(static_cast<int16_t>(low_imm | (high_imm << 5))), - .data = GetBits<uint8_t, 20, 5>(), + .data = GetBits<20, 5>(), }; insn_consumer_->Store(args); } @@ -1471,37 +1477,36 @@ class Decoder { typename BitmanipOpcodeType, uint32_t kShiftFieldSize> void DecodeOp() { - uint8_t low_opcode = GetBits<uint8_t, 12, 3>(); + uint8_t low_opcode = GetBits<12, 3>(); if (low_opcode != 0b001 && low_opcode != 0b101) { OpOpcodeType opcode{low_opcode}; - uint16_t imm = GetBits<uint16_t, 20, 12>(); + uint16_t imm = GetBits<20, 12>(); const OpImmArgsTemplate<OpOpcodeType> args = { .opcode = opcode, - .dst = GetBits<uint8_t, 7, 5>(), - .src = GetBits<uint8_t, 15, 5>(), + .dst = GetBits<7, 5>(), + .src = GetBits<15, 5>(), .imm = SignExtend<12>(imm), }; insn_consumer_->OpImm(args); - } else if ((GetBits<uint16_t, 31, 1>() + - GetBits<uint16_t, 20 + kShiftFieldSize, 10 - kShiftFieldSize>()) == + } else if ((GetBits<31, 1>() + GetBits<20 + kShiftFieldSize, 10 - kShiftFieldSize>()) == 0) { // For Canonical Shift Instructions from RV64G the opcode contains all // zeros except for the 30th (second highest) bit. - uint16_t high_opcode = GetBits<uint16_t, 20 + kShiftFieldSize, 12 - kShiftFieldSize>(); + uint16_t high_opcode = GetBits<20 + kShiftFieldSize, 12 - kShiftFieldSize>(); ShiftOpcodeType opcode{ static_cast<std::underlying_type_t<ShiftOpcodeType>>(low_opcode | (high_opcode << 3))}; const ShiftImmArgsTemplate<ShiftOpcodeType> args = { .opcode = opcode, - .dst = GetBits<uint8_t, 7, 5>(), - .src = GetBits<uint8_t, 15, 5>(), - .imm = GetBits<uint8_t, 20, kShiftFieldSize>(), + .dst = GetBits<7, 5>(), + .src = GetBits<15, 5>(), + .imm = GetBits<20, kShiftFieldSize>(), }; insn_consumer_->OpImm(args); } else { - uint8_t shamt = GetBits<uint8_t, 20, kShiftFieldSize>(); - uint16_t high_opcode = GetBits<uint16_t, 20 + kShiftFieldSize, 12 - kShiftFieldSize>(); + uint8_t shamt = GetBits<20, kShiftFieldSize>(); + uint16_t high_opcode = GetBits<20 + kShiftFieldSize, 12 - kShiftFieldSize>(); BitmanipOpcodeType opcode{static_cast<uint16_t>(low_opcode | (high_opcode << 3))}; bool has_shamt = false; @@ -1527,14 +1532,14 @@ class Decoder { } // TODO(b/291851792): Refactor instructions with shamt into ShiftImmArgs if (!has_shamt) { - high_opcode = GetBits<uint16_t, 20, 12>(); + high_opcode = GetBits<20, 12>(); opcode = BitmanipOpcodeType{static_cast<uint16_t>(low_opcode | (high_opcode << 3))}; shamt = 0; } const BitmanipImmArgsTemplate<BitmanipOpcodeType> args = { .opcode = opcode, - .dst = GetBits<uint8_t, 7, 5>(), - .src = GetBits<uint8_t, 15, 5>(), + .dst = GetBits<7, 5>(), + .src = GetBits<15, 5>(), .shamt = shamt, }; insn_consumer_->OpImm(args); @@ -1542,20 +1547,20 @@ class Decoder { } void DecodeBranch() { - BranchOpcode opcode{GetBits<uint8_t, 12, 3>()}; + BranchOpcode opcode{GetBits<12, 3>()}; // Decode the offset. - auto low_imm = GetBits<uint16_t, 8, 4>(); - auto mid_imm = GetBits<uint16_t, 25, 6>(); - auto bit11_imm = GetBits<uint16_t, 7, 1>(); - auto bit12_imm = GetBits<uint16_t, 31, 1>(); + auto low_imm = GetBits<8, 4>(); + auto mid_imm = GetBits<25, 6>(); + auto bit11_imm = GetBits<7, 1>(); + auto bit12_imm = GetBits<31, 1>(); auto offset = static_cast<int16_t>(low_imm | (mid_imm << 4) | (bit11_imm << 10) | (bit12_imm << 11)); const BranchArgs args = { .opcode = opcode, - .src1 = GetBits<uint8_t, 15, 5>(), - .src2 = GetBits<uint8_t, 20, 5>(), + .src1 = GetBits<15, 5>(), + .src2 = GetBits<20, 5>(), // The offset is encoded as 2-byte units, we need to multiply by 2. .offset = SignExtend<13>(static_cast<int16_t>(offset * 2)), }; @@ -1564,15 +1569,15 @@ class Decoder { void DecodeJumpAndLink() { // Decode the offset. - auto low_imm = GetBits<uint32_t, 21, 10>(); - auto mid_imm = GetBits<uint32_t, 12, 8>(); - auto bit11_imm = GetBits<uint32_t, 20, 1>(); - auto bit20_imm = GetBits<uint32_t, 31, 1>(); + auto low_imm = GetBits<21, 10>(); + auto mid_imm = GetBits<12, 8>(); + auto bit11_imm = GetBits<20, 1>(); + auto bit20_imm = GetBits<31, 1>(); auto offset = static_cast<int32_t>(low_imm | (bit11_imm << 10) | (mid_imm << 11) | (bit20_imm << 19)); const JumpAndLinkArgs args = { - .dst = GetBits<uint8_t, 7, 5>(), + .dst = GetBits<7, 5>(), // The offset is encoded as 2-byte units, we need to multiply by 2. .offset = SignExtend<21>(offset * 2), .insn_len = 4, @@ -1584,13 +1589,13 @@ class Decoder { // Bit #29 = 1: means rm is an opcode extension and not operand. // Bit #30 = 1: means rs2 is an opcode extension and not operand. // Bit #31 = 1: selects general purpose register instead of floating point register as target. - uint8_t operand_type = GetBits<uint8_t, 25, 2>(); - uint8_t opcode_bits = GetBits<uint8_t, 27, 2>(); - uint8_t rd = GetBits<uint8_t, 7, 5>(); - uint8_t rs1 = GetBits<uint8_t, 15, 5>(); - uint8_t rs2 = GetBits<uint8_t, 20, 5>(); - uint8_t rm = GetBits<uint8_t, 12, 3>(); - switch (GetBits<uint8_t, 29, 3>()) { + uint8_t operand_type = GetBits<25, 2>(); + uint8_t opcode_bits = GetBits<27, 2>(); + uint8_t rd = GetBits<7, 5>(); + uint8_t rs1 = GetBits<15, 5>(); + uint8_t rs2 = GetBits<20, 5>(); + uint8_t rm = GetBits<12, 3>(); + switch (GetBits<29, 3>()) { case 0b000: { const OpFpArgs args = { .opcode = OpFpOpcode(opcode_bits), @@ -1728,14 +1733,14 @@ class Decoder { } void DecodeOpV() { - uint8_t low_opcode = GetBits<uint8_t, 12, 3>(); - bool vm = GetBits<uint8_t, 25, 1>(); - uint8_t opcode = GetBits<uint8_t, 26, 6>(); - uint8_t dst = GetBits<uint8_t, 7, 5>(); - // Note: in vector instructions vs2 field is 2nd operand while vs1 field is 2rd operand. + uint8_t low_opcode = GetBits<12, 3>(); + bool vm = GetBits<25, 1>(); + uint8_t opcode = GetBits<26, 6>(); + uint8_t dst = GetBits<7, 5>(); + // Note: in vector instructions vs2 field is 2nd operand while vs1 field is 3rd operand. // FMA instructions are exception, but there are not that many of these. - uint8_t src1 = GetBits<uint8_t, 20, 5>(); - uint8_t src2 = GetBits<uint8_t, 15, 5>(); + uint8_t src1 = GetBits<20, 5>(); + uint8_t src2 = GetBits<15, 5>(); switch (low_opcode) { case 0b000: { const VOpIVvArgs args = { @@ -1788,25 +1793,25 @@ class Decoder { return insn_consumer_->OpVector(args); } case 0b111: - if (GetBits<uint8_t, 31, 1>() == 0) { + if (GetBits<31, 1>() == 0) { const VsetvliArgs args = { - .dst = GetBits<uint8_t, 7, 5>(), - .src = GetBits<uint8_t, 15, 5>(), - .vtype = GetBits<uint16_t, 20, 11>(), + .dst = GetBits<7, 5>(), + .src = GetBits<15, 5>(), + .vtype = GetBits<20, 11>(), }; return insn_consumer_->Vsetvli(args); - } else if (GetBits<uint8_t, 30, 1>() == 1) { + } else if (GetBits<30, 1>() == 1) { const VsetivliArgs args = { - .dst = GetBits<uint8_t, 7, 5>(), - .avl = GetBits<uint8_t, 15, 5>(), - .vtype = GetBits<uint16_t, 20, 10>(), + .dst = GetBits<7, 5>(), + .avl = GetBits<15, 5>(), + .vtype = GetBits<20, 10>(), }; return insn_consumer_->Vsetivli(args); - } else if (GetBits<uint8_t, 25, 6>() == 0) { + } else if (GetBits<25, 6>() == 0) { const VsetvlArgs args = { - .dst = GetBits<uint8_t, 7, 5>(), - .src1 = GetBits<uint8_t, 15, 5>(), - .src2 = GetBits<uint8_t, 20, 5>(), + .dst = GetBits<7, 5>(), + .src1 = GetBits<15, 5>(), + .src2 = GetBits<20, 5>(), }; return insn_consumer_->Vsetvl(args); } @@ -1814,46 +1819,46 @@ class Decoder { } void DecodeSystem() { - uint8_t low_opcode = GetBits<uint8_t, 12, 2>(); + uint8_t low_opcode = GetBits<12, 2>(); if (low_opcode == 0b00) { - int32_t opcode = GetBits<uint32_t, 7, 25>(); + int32_t opcode = GetBits<7, 25>(); const SystemArgs args = { .opcode = SystemOpcode(opcode), }; return insn_consumer_->System(args); } - if (GetBits<uint8_t, 14, 1>()) { + if (GetBits<14, 1>()) { CsrImmOpcode opcode = CsrImmOpcode(low_opcode); const CsrImmArgs args = { .opcode = opcode, - .dst = GetBits<uint8_t, 7, 5>(), - .imm = GetBits<uint8_t, 15, 5>(), - .csr = GetBits<uint16_t, 20, 12>(), + .dst = GetBits<7, 5>(), + .imm = GetBits<15, 5>(), + .csr = GetBits<20, 12>(), }; return insn_consumer_->Csr(args); } CsrOpcode opcode = CsrOpcode(low_opcode); const CsrArgs args = { .opcode = opcode, - .dst = GetBits<uint8_t, 7, 5>(), - .src = GetBits<uint8_t, 15, 5>(), - .csr = GetBits<uint16_t, 20, 12>(), + .dst = GetBits<7, 5>(), + .src = GetBits<15, 5>(), + .csr = GetBits<20, 12>(), }; return insn_consumer_->Csr(args); } void DecodeJumpAndLinkRegister() { - if (GetBits<uint8_t, 12, 3>() != 0b000) { + if (GetBits<12, 3>() != 0b000) { Undefined(); return; } // Decode sign-extend offset. - int16_t offset = GetBits<uint16_t, 20, 12>(); + int16_t offset = GetBits<20, 12>(); offset = static_cast<int16_t>(offset << 4) >> 4; const JumpAndLinkRegisterArgs args = { - .dst = GetBits<uint8_t, 7, 5>(), - .base = GetBits<uint8_t, 15, 5>(), + .dst = GetBits<7, 5>(), + .base = GetBits<15, 5>(), .offset = offset, .insn_len = 4, }; diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc index b48d9349..18b94dc3 100644 --- a/interpreter/riscv64/interpreter.cc +++ b/interpreter/riscv64/interpreter.cc @@ -654,9 +654,18 @@ class Interpreter { template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta> void OpVector(const Decoder::VOpMVvArgs& args) { switch (args.opcode) { + case Decoder::VOpMVvOpcode::kVmaddvv: + return OpVectorvv<intrinsics::Vmaddvv<ElementType, vta>, ElementType, vlmul, vta>( + args.dst, args.src1, args.src2); + case Decoder::VOpMVvOpcode::kVnmsubvv: + return OpVectorvv<intrinsics::Vnmsubvv<ElementType, vta>, ElementType, vlmul, vta>( + args.dst, args.src1, args.src2); case Decoder::VOpMVvOpcode::kVmaccvv: return OpVectorvv<intrinsics::Vmaccvv<ElementType, vta>, ElementType, vlmul, vta>( args.dst, args.src1, args.src2); + case Decoder::VOpMVvOpcode::kVnmsacvv: + return OpVectorvv<intrinsics::Vnmsacvv<ElementType, vta>, ElementType, vlmul, vta>( + args.dst, args.src1, args.src2); default: Unimplemented(); } @@ -730,9 +739,18 @@ class Interpreter { template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta> void OpVector(const Decoder::VOpMVxArgs& args, Register arg2) { switch (args.opcode) { + case Decoder::VOpMVxOpcode::kVmaddvx: + return OpVectorvx<intrinsics::Vmaddvx<ElementType, vta>, ElementType, vlmul, vta>( + args.dst, args.src1, arg2); + case Decoder::VOpMVxOpcode::kVnmsubvx: + return OpVectorvx<intrinsics::Vnmsubvx<ElementType, vta>, ElementType, vlmul, vta>( + args.dst, args.src1, arg2); case Decoder::VOpMVxOpcode::kVmaccvx: return OpVectorvx<intrinsics::Vmaccvx<ElementType, vta>, ElementType, vlmul, vta>( args.dst, args.src1, arg2); + case Decoder::VOpMVxOpcode::kVnmsacvx: + return OpVectorvx<intrinsics::Vnmsacvx<ElementType, vta>, ElementType, vlmul, vta>( + args.dst, args.src1, arg2); default: Unimplemented(); } @@ -926,12 +944,30 @@ class Interpreter { InactiveProcessing vma> void OpVector(const Decoder::VOpMVvArgs& args) { switch (args.opcode) { + case Decoder::VOpMVvOpcode::kVmaddvv: + return OpVectorvv<intrinsics::Vmaddvvm<ElementType, vta, vma>, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1, args.src2); + case Decoder::VOpMVvOpcode::kVnmsubvv: + return OpVectorvv<intrinsics::Vnmsubvvm<ElementType, vta, vma>, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1, args.src2); case Decoder::VOpMVvOpcode::kVmaccvv: return OpVectorvv<intrinsics::Vmaccvvm<ElementType, vta, vma>, - ElementType, - vlmul, - vta, - vma>(args.dst, args.src1, args.src2); + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1, args.src2); + case Decoder::VOpMVvOpcode::kVnmsacvv: + return OpVectorvv<intrinsics::Vnmsacvvm<ElementType, vta, vma>, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1, args.src2); default: Unimplemented(); } @@ -1023,12 +1059,30 @@ class Interpreter { InactiveProcessing vma> void OpVector(const Decoder::VOpMVxArgs& args, Register arg2) { switch (args.opcode) { + case Decoder::VOpMVxOpcode::kVmaddvx: + return OpVectorvx<intrinsics::Vmaddvxm<ElementType, vta, vma>, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1, arg2); + case Decoder::VOpMVxOpcode::kVnmsubvx: + return OpVectorvx<intrinsics::Vnmsubvxm<ElementType, vta, vma>, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1, arg2); case Decoder::VOpMVxOpcode::kVmaccvx: return OpVectorvx<intrinsics::Vmaccvxm<ElementType, vta, vma>, ElementType, vlmul, vta, vma>(args.dst, args.src1, arg2); + case Decoder::VOpMVxOpcode::kVnmsacvx: + return OpVectorvx<intrinsics::Vnmsacvxm<ElementType, vta, vma>, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1, arg2); default: Unimplemented(); } diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc index 724c4892..f4fe0a56 100644 --- a/interpreter/riscv64/interpreter_test.cc +++ b/interpreter/riscv64/interpreter_test.cc @@ -1853,6 +1853,212 @@ TEST_F(Riscv64InterpreterTest, TestVmacc) { {0xbb11'11bd'1313'bf15, 0x6061'0c62'630e'6465}, {0x05b1'0707'b309'09b5, 0xab01'01ad'0303'af05}}); } -} // namespace +TEST_F(Riscv64InterpreterTest, TestVnmsac) { + TestVectorInstruction(0xbd882457, // vnmsac.vv v8, v16, v24, v0.t + {{85, 83, 77, 67, 49, 35, 13, 243, 205, 179, 141, 99, 53, 3, 205, 147}, + {85, 19, 205, 131, 33, 227, 141, 51, 189, 115, 13, 163, 53, 195, 77, 211}, + {85, 211, 77, 195, 17, 163, 13, 115, 173, 51, 141, 227, 53, 131, 205, 19}, + {85, 147, 205, 3, 1, 99, 141, 179, 157, 243, 13, 35, 53, 67, 77, 83}, + {85, 83, 77, 67, 241, 35, 13, 243, 141, 179, 141, 99, 53, 3, 205, 147}, + {85, 19, 205, 131, 225, 227, 141, 51, 125, 115, 13, 163, 53, 195, 77, 211}, + {85, 211, 77, 195, 209, 163, 13, 115, 109, 51, 141, 227, 53, 131, 205, 19}, + {85, 147, 205, 3, 193, 99, 141, 179, 93, 243, 13, 35, 53, 67, 77, 83}}, + {{0x5555, 0x3d4d, 0x0031, 0xad0d, 0x2bcd, 0x9c8d, 0xe435, 0x0bcd}, + {0x1355, 0xfacd, 0xad21, 0x698d, 0xd7bd, 0x580d, 0x9f35, 0xc64d}, + {0xcd55, 0xb44d, 0x5611, 0x220d, 0x7fad, 0x0f8d, 0x5635, 0x7ccd}, + {0x8355, 0x69cd, 0xfb01, 0xd68d, 0x239d, 0xc30d, 0x0935, 0x2f4d}, + {0x3555, 0x1b4d, 0x9bf1, 0x870d, 0xc38d, 0x728d, 0xb835, 0xddcd}, + {0xe355, 0xc8cd, 0x38e1, 0x338d, 0x5f7d, 0x1e0d, 0x6335, 0x884d}, + {0x8d55, 0x724d, 0xd1d1, 0xdc0d, 0xf76d, 0xc58d, 0x0a35, 0x2ecd}, + {0x3355, 0x17cd, 0x66c1, 0x808d, 0x8b5d, 0x690d, 0xad35, 0xd14d}}, + {{0x4d53'5555, 0x65bd'0031, 0x8068'2bcd, 0xa960'e435}, + {0xc68f'1355, 0xcbe6'ad21, 0xe38f'd7bd, 0x1996'9f35}, + {0x33c2'cd55, 0x2608'5611, 0x3aaf'7fad, 0x7dc4'5635}, + {0x94ee'8355, 0x7421'fb01, 0x85c7'239d, 0xd5ea'0935}, + {0xea12'3555, 0xb633'9bf1, 0xc4d6'c38d, 0x2207'b835}, + {0x332d'e355, 0xec3d'38e1, 0xf7de'5f7d, 0x621d'6335}, + {0x7041'8d55, 0x163e'd1d1, 0x1edd'f76d, 0x962b'0a35}, + {0xa14d'3355, 0x3438'66c1, 0x39d5'8b5d, 0xbe30'ad35}}, + {{0xe20d'2c41'4d53'5555, 0x4fdc'3c72'8068'2bcd}, + {0xbead'4fa7'c68f'1355, 0x1e70'55d0'e38f'd7bd}, + {0x7f35'5efe'33c2'cd55, 0xd0ec'5b1f'3aaf'7fad}, + {0x23a5'5a44'94ee'8355, 0x6750'4c5d'85c7'239d}, + {0xabfd'417a'ea12'3555, 0xe19c'298b'c4d6'c38d}, + {0x183d'14a1'332d'e355, 0x3fcf'f2a9'f7de'5f7d}, + {0x6864'd3b7'7041'8d55, 0x81eb'a7b8'1edd'f76d}, + {0x9c74'7ebd'a14d'3355, 0xa7ef'48b6'39d5'8b5d}}); + TestVectorInstruction(0xbd00e457, // vnmsac.vx v8, x1, v16, v0.t + {{85, 171, 1, 87, 173, 3, 89, 175, 5, 91, 177, 7, 93, 179, 9, 95}, + {181, 11, 97, 183, 13, 99, 185, 15, 101, 187, 17, 103, 189, 19, 105, 191}, + {21, 107, 193, 23, 109, 195, 25, 111, 197, 27, 113, 199, 29, 115, 201, 31}, + {117, 203, 33, 119, 205, 35, 121, 207, 37, 123, 209, 39, 125, 211, 41, 127}, + {213, 43, 129, 215, 45, 131, 217, 47, 133, 219, 49, 135, 221, 51, 137, 223}, + {53, 139, 225, 55, 141, 227, 57, 143, 229, 59, 145, 231, 61, 147, 233, 63}, + {149, 235, 65, 151, 237, 67, 153, 239, 69, 155, 241, 71, 157, 243, 73, 159}, + {245, 75, 161, 247, 77, 163, 249, 79, 165, 251, 81, 167, 253, 83, 169, 255}}, + {{0xab55, 0x0201, 0x58ad, 0xaf59, 0x0605, 0x5cb1, 0xb35d, 0x0a09}, + {0x60b5, 0xb761, 0x0e0d, 0x64b9, 0xbb65, 0x1211, 0x68bd, 0xbf69}, + {0x1615, 0x6cc1, 0xc36d, 0x1a19, 0x70c5, 0xc771, 0x1e1d, 0x74c9}, + {0xcb75, 0x2221, 0x78cd, 0xcf79, 0x2625, 0x7cd1, 0xd37d, 0x2a29}, + {0x80d5, 0xd781, 0x2e2d, 0x84d9, 0xdb85, 0x3231, 0x88dd, 0xdf89}, + {0x3635, 0x8ce1, 0xe38d, 0x3a39, 0x90e5, 0xe791, 0x3e3d, 0x94e9}, + {0xeb95, 0x4241, 0x98ed, 0xef99, 0x4645, 0x9cf1, 0xf39d, 0x4a49}, + {0xa0f5, 0xf7a1, 0x4e4d, 0xa4f9, 0xfba5, 0x5251, 0xa8fd, 0xffa9}}, + {{0x5756'ab55, 0xaf59'58ad, 0x075c'0605, 0x5f5e'b35d}, + {0xb761'60b5, 0x0f64'0e0d, 0x6766'bb65, 0xbf69'68bd}, + {0x176c'1615, 0x6f6e'c36d, 0xc771'70c5, 0x1f74'1e1d}, + {0x7776'cb75, 0xcf79'78cd, 0x277c'2625, 0x7f7e'd37d}, + {0xd781'80d5, 0x2f84'2e2d, 0x8786'db85, 0xdf89'88dd}, + {0x378c'3635, 0x8f8e'e38d, 0xe791'90e5, 0x3f94'3e3d}, + {0x9796'eb95, 0xef99'98ed, 0x479c'4645, 0x9f9e'f39d}, + {0xf7a1'a0f5, 0x4fa4'4e4d, 0xa7a6'fba5, 0xffa9'a8fd}}, + {{0xaf59'58ad'5756'ab55, 0x0a09'5e08'075c'0605}, + {0x64b9'6362'b761'60b5, 0xbf69'68bd'6766'bb65}, + {0x1a19'6e18'176c'1615, 0x74c9'7372'c771'70c5}, + {0xcf79'78cd'7776'cb75, 0x2a29'7e28'277c'2625}, + {0x84d9'8382'd781'80d5, 0xdf89'88dd'8786'db85}, + {0x3a39'8e38'378c'3635, 0x94e9'9392'e791'90e5}, + {0xef99'98ed'9796'eb95, 0x4a49'9e48'479c'4645}, + {0xa4f9'a3a2'f7a1'a0f5, 0xffa9'a8fd'a7a6'fba5}}); +} + +TEST_F(Riscv64InterpreterTest, TestVmadd) { + TestVectorInstruction(0xa5882457, // vmadd.vv v8, v16, v24, v0.t + {{0, 87, 174, 5, 93, 179, 10, 97, 185, 15, 102, 189, 20, 107, 194, 25}, + {112, 199, 30, 117, 205, 35, 122, 209, 41, 127, 214, 45, 132, 219, 50, 137}, + {224, 55, 142, 229, 61, 147, 234, 65, 153, 239, 70, 157, 244, 75, 162, 249}, + {80, 167, 254, 85, 173, 3, 90, 177, 9, 95, 182, 13, 100, 187, 18, 105}, + {192, 23, 110, 197, 29, 115, 202, 33, 121, 207, 38, 125, 212, 43, 130, 217}, + {48, 135, 222, 53, 141, 227, 58, 145, 233, 63, 150, 237, 68, 155, 242, 73}, + {160, 247, 78, 165, 253, 83, 170, 1, 89, 175, 6, 93, 180, 11, 98, 185}, + {16, 103, 190, 21, 109, 195, 26, 113, 201, 31, 118, 205, 36, 123, 210, 41}}, + {{0x5700, 0xafae, 0x085d, 0x610a, 0xb9b9, 0x1266, 0x6b14, 0xc3c2}, + {0x1c70, 0x751e, 0xcdcd, 0x267a, 0x7f29, 0xd7d6, 0x3084, 0x8932}, + {0xe1e0, 0x3a8e, 0x933d, 0xebea, 0x4499, 0x9d46, 0xf5f4, 0x4ea2}, + {0xa750, 0xfffe, 0x58ad, 0xb15a, 0x0a09, 0x62b6, 0xbb64, 0x1412}, + {0x6cc0, 0xc56e, 0x1e1d, 0x76ca, 0xcf79, 0x2826, 0x80d4, 0xd982}, + {0x3230, 0x8ade, 0xe38d, 0x3c3a, 0x94e9, 0xed96, 0x4644, 0x9ef2}, + {0xf7a0, 0x504e, 0xa8fd, 0x01aa, 0x5a59, 0xb306, 0x0bb4, 0x6462}, + {0xbd10, 0x15be, 0x6e6d, 0xc71a, 0x1fc9, 0x7876, 0xd124, 0x29d2}}, + {{0x0503'5700, 0x610a'085d, 0xbd10'b9b9, 0x1917'6b14}, + {0x751e'1c70, 0xd124'cdcd, 0x2d2b'7f29, 0x8932'3084}, + {0xe538'e1e0, 0x413f'933d, 0x9d46'4499, 0xf94c'f5f4}, + {0x5553'a750, 0xb15a'58ad, 0x0d61'0a09, 0x6967'bb64}, + {0xc56e'6cc0, 0x2175'1e1d, 0x7d7b'cf79, 0xd982'80d4}, + {0x3589'3230, 0x918f'e38d, 0xed96'94e9, 0x499d'4644}, + {0xa5a3'f7a0, 0x01aa'a8fd, 0x5db1'5a59, 0xb9b8'0bb4}, + {0x15be'bd10, 0x71c5'6e6d, 0xcdcc'1fc9, 0x29d2'd124}}, + {{0x610a'085d'0503'5700, 0xc3c2'15be'bd10'b9b9}, + {0x267a'2322'751e'1c70, 0x8932'3084'2d2b'7f29}, + {0xebea'3de7'e538'e1e0, 0x4ea2'4b49'9d46'4499}, + {0xb15a'58ad'5553'a750, 0x1412'660f'0d61'0a09}, + {0x76ca'7372'c56e'6cc0, 0xd982'80d4'7d7b'cf79}, + {0x3c3a'8e38'3589'3230, 0x9ef2'9b99'ed96'94e9}, + {0x01aa'a8fd'a5a3'f7a0, 0x6462'b65f'5db1'5a59}, + {0xc71a'c3c3'15be'bd10, 0x29d2'd124'cdcc'1fc9}}); + TestVectorInstruction(0xa500e457, // vmadd.vx v8, x1, v16, v0.t + {{114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145}, + {146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161}, + {162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177}, + {178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193}, + {194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209}, + {210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225}, + {226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241}}, + {{0x1d72, 0x1f74, 0x2176, 0x2378, 0x257a, 0x277c, 0x297e, 0x2b80}, + {0x2d82, 0x2f84, 0x3186, 0x3388, 0x358a, 0x378c, 0x398e, 0x3b90}, + {0x3d92, 0x3f94, 0x4196, 0x4398, 0x459a, 0x479c, 0x499e, 0x4ba0}, + {0x4da2, 0x4fa4, 0x51a6, 0x53a8, 0x55aa, 0x57ac, 0x59ae, 0x5bb0}, + {0x5db2, 0x5fb4, 0x61b6, 0x63b8, 0x65ba, 0x67bc, 0x69be, 0x6bc0}, + {0x6dc2, 0x6fc4, 0x71c6, 0x73c8, 0x75ca, 0x77cc, 0x79ce, 0x7bd0}, + {0x7dd2, 0x7fd4, 0x81d6, 0x83d8, 0x85da, 0x87dc, 0x89de, 0x8be0}, + {0x8de2, 0x8fe4, 0x91e6, 0x93e8, 0x95ea, 0x97ec, 0x99ee, 0x9bf0}}, + {{0x74c9'1d72, 0x78cd'2176, 0x7cd1'257a, 0x80d5'297e}, + {0x84d9'2d82, 0x88dd'3186, 0x8ce1'358a, 0x90e5'398e}, + {0x94e9'3d92, 0x98ed'4196, 0x9cf1'459a, 0xa0f5'499e}, + {0xa4f9'4da2, 0xa8fd'51a6, 0xad01'55aa, 0xb105'59ae}, + {0xb509'5db2, 0xb90d'61b6, 0xbd11'65ba, 0xc115'69be}, + {0xc519'6dc2, 0xc91d'71c6, 0xcd21'75ca, 0xd125'79ce}, + {0xd529'7dd2, 0xd92d'81d6, 0xdd31'85da, 0xe135'89de}, + {0xe539'8de2, 0xe93d'91e6, 0xed41'95ea, 0xf145'99ee}}, + {{0x2377'cc20'74c9'1d72, 0x2b7f'd428'7cd1'257a}, + {0x3387'dc30'84d9'2d82, 0x3b8f'e438'8ce1'358a}, + {0x4397'ec40'94e9'3d92, 0x4b9f'f448'9cf1'459a}, + {0x53a7'fc50'a4f9'4da2, 0x5bb0'0458'ad01'55aa}, + {0x63b8'0c60'b509'5db2, 0x6bc0'1468'bd11'65ba}, + {0x73c8'1c70'c519'6dc2, 0x7bd0'2478'cd21'75ca}, + {0x83d8'2c80'd529'7dd2, 0x8be0'3488'dd31'85da}, + {0x93e8'3c90'e539'8de2, 0x9bf0'4498'ed41'95ea}}); +} + +TEST_F(Riscv64InterpreterTest, TestVnmsub) { + TestVectorInstruction(0xad882457, // vnmsub.vv v8, v16, v24, v0.t + {{0, 173, 90, 7, 181, 97, 14, 187, 105, 21, 194, 111, 28, 201, 118, 35}, + {208, 125, 42, 215, 133, 49, 222, 139, 57, 229, 146, 63, 236, 153, 70, 243}, + {160, 77, 250, 167, 85, 1, 174, 91, 9, 181, 98, 15, 188, 105, 22, 195}, + {112, 29, 202, 119, 37, 209, 126, 43, 217, 133, 50, 223, 140, 57, 230, 147}, + {64, 237, 154, 71, 245, 161, 78, 251, 169, 85, 2, 175, 92, 9, 182, 99}, + {16, 189, 106, 23, 197, 113, 30, 203, 121, 37, 210, 127, 44, 217, 134, 51}, + {224, 141, 58, 231, 149, 65, 238, 155, 73, 245, 162, 79, 252, 169, 86, 3}, + {176, 93, 10, 183, 101, 17, 190, 107, 25, 197, 114, 31, 204, 121, 38, 211}}, + {{0xad00, 0x5c5a, 0x0bb5, 0xbb0e, 0x6a69, 0x19c2, 0xc91c, 0x7876}, + {0x27d0, 0xd72a, 0x8685, 0x35de, 0xe539, 0x9492, 0x43ec, 0xf346}, + {0xa2a0, 0x51fa, 0x0155, 0xb0ae, 0x6009, 0x0f62, 0xbebc, 0x6e16}, + {0x1d70, 0xccca, 0x7c25, 0x2b7e, 0xdad9, 0x8a32, 0x398c, 0xe8e6}, + {0x9840, 0x479a, 0xf6f5, 0xa64e, 0x55a9, 0x0502, 0xb45c, 0x63b6}, + {0x1310, 0xc26a, 0x71c5, 0x211e, 0xd079, 0x7fd2, 0x2f2c, 0xde86}, + {0x8de0, 0x3d3a, 0xec95, 0x9bee, 0x4b49, 0xfaa2, 0xa9fc, 0x5956}, + {0x08b0, 0xb80a, 0x6765, 0x16be, 0xc619, 0x7572, 0x24cc, 0xd426}}, + {{0x0704'ad00, 0xbb0e'0bb5, 0x6f17'6a69, 0x2320'c91c}, + {0xd72a'27d0, 0x8b33'8685, 0x3f3c'e539, 0xf346'43ec}, + {0xa74f'a2a0, 0x5b59'0155, 0x0f62'6009, 0xc36b'bebc}, + {0x7775'1d70, 0x2b7e'7c25, 0xdf87'dad9, 0x9391'398c}, + {0x479a'9840, 0xfba3'f6f5, 0xafad'55a9, 0x63b6'b45c}, + {0x17c0'1310, 0xcbc9'71c5, 0x7fd2'd079, 0x33dc'2f2c}, + {0xe7e5'8de0, 0x9bee'ec95, 0x4ff8'4b49, 0x0401'a9fc}, + {0xb80b'08b0, 0x6c14'6765, 0x201d'c619, 0xd427'24cc}}, + {{0xbb0e'0bb5'0704'ad00, 0x7876'1e71'6f17'6a69}, + {0x35de'312f'd72a'27d0, 0xf346'43ec'3f3c'e539}, + {0xb0ae'56aa'a74f'a2a0, 0x6e16'6967'0f62'6009}, + {0x2b7e'7c25'7775'1d70, 0xe8e6'8ee1'df87'dad9}, + {0xa64e'a1a0'479a'9840, 0x63b6'b45c'afad'55a9}, + {0x211e'c71b'17c0'1310, 0xde86'd9d7'7fd2'd079}, + {0x9bee'ec95'e7e5'8de0, 0x5956'ff52'4ff8'4b49}, + {0x16bf'1210'b80b'08b0, 0xd427'24cd'201d'c619}}); + TestVectorInstruction(0xad00e457, // vnmsub.vx v8, x1, v16, v0.t + {{142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157}, + {158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173}, + {174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189}, + {190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205}, + {206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221}, + {222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237}, + {238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253}, + {254, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}}, + {{0xe48e, 0xe690, 0xe892, 0xea94, 0xec96, 0xee98, 0xf09a, 0xf29c}, + {0xf49e, 0xf6a0, 0xf8a2, 0xfaa4, 0xfca6, 0xfea8, 0x00aa, 0x02ac}, + {0x04ae, 0x06b0, 0x08b2, 0x0ab4, 0x0cb6, 0x0eb8, 0x10ba, 0x12bc}, + {0x14be, 0x16c0, 0x18c2, 0x1ac4, 0x1cc6, 0x1ec8, 0x20ca, 0x22cc}, + {0x24ce, 0x26d0, 0x28d2, 0x2ad4, 0x2cd6, 0x2ed8, 0x30da, 0x32dc}, + {0x34de, 0x36e0, 0x38e2, 0x3ae4, 0x3ce6, 0x3ee8, 0x40ea, 0x42ec}, + {0x44ee, 0x46f0, 0x48f2, 0x4af4, 0x4cf6, 0x4ef8, 0x50fa, 0x52fc}, + {0x54fe, 0x5700, 0x5902, 0x5b04, 0x5d06, 0x5f08, 0x610a, 0x630c}}, + {{0x913a'e48e, 0x953e'e892, 0x9942'ec96, 0x9d46'f09a}, + {0xa14a'f49e, 0xa54e'f8a2, 0xa952'fca6, 0xad57'00aa}, + {0xb15b'04ae, 0xb55f'08b2, 0xb963'0cb6, 0xbd67'10ba}, + {0xc16b'14be, 0xc56f'18c2, 0xc973'1cc6, 0xcd77'20ca}, + {0xd17b'24ce, 0xd57f'28d2, 0xd983'2cd6, 0xdd87'30da}, + {0xe18b'34de, 0xe58f'38e2, 0xe993'3ce6, 0xed97'40ea}, + {0xf19b'44ee, 0xf59f'48f2, 0xf9a3'4cf6, 0xfda7'50fa}, + {0x01ab'54fe, 0x05af'5902, 0x09b3'5d06, 0x0db7'610a}}, + {{0xea94'3de7'913a'e48e, 0xf29c'45ef'9942'ec96}, + {0xfaa4'4df7'a14a'f49e, 0x02ac'55ff'a952'fca6}, + {0x0ab4'5e07'b15b'04ae, 0x12bc'660f'b963'0cb6}, + {0x1ac4'6e17'c16b'14be, 0x22cc'761f'c973'1cc6}, + {0x2ad4'7e27'd17b'24ce, 0x32dc'862f'd983'2cd6}, + {0x3ae4'8e37'e18b'34de, 0x42ec'963f'e993'3ce6}, + {0x4af4'9e47'f19b'44ee, 0x52fc'a64f'f9a3'4cf6}, + {0x5b04'ae58'01ab'54fe, 0x630c'b660'09b3'5d06}}); +} +} // namespace } // namespace berberis diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h index bb3b19d9..d034e33b 100644 --- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h +++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h @@ -250,9 +250,21 @@ DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(sll, auto [arg1, arg2] = std::tuple{args...}; DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(sll, auto [arg1, arg2] = std::tuple{args...}; (arg1 << mask_bits(arg2))) DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(macc, auto [arg1, arg2] = std::tuple{args...}; - ((arg1 * arg2) + vd)); + ((arg2 * arg1) + vd)) DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(macc, auto [arg1, arg2] = std::tuple{args...}; - ((arg1 * arg2) + vd)); + ((arg2 * arg1) + vd)) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(nmsac, auto [arg1, arg2] = std::tuple{args...}; + (-(arg2 * arg1) + vd)) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(nmsac, auto [arg1, arg2] = std::tuple{args...}; + (-(arg2 * arg1) + vd)) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(madd, auto [arg1, arg2] = std::tuple{args...}; + ((arg2 * vd) + arg1)) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(madd, auto [arg1, arg2] = std::tuple{args...}; + ((arg2 * vd) + arg1)) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(nmsub, auto [arg1, arg2] = std::tuple{args...}; + (-(arg2 * vd) + arg1)) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(nmsub, auto [arg1, arg2] = std::tuple{args...}; + (-(arg2 * vd) + arg1)) #undef DEFINE_ARITHMETIC_INTRINSIC #undef DEFINE_ARITHMETIC_PARAMETERS_OR_ARGUMENTS |