diff options
author | Prashanth Swaminathan <prashanthsw@google.com> | 2023-12-14 01:36:23 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2023-12-14 01:36:23 +0000 |
commit | d316749481670b67e653c8c4eb503e460f219d4e (patch) | |
tree | 8c5bf79ccfce177733592ac687a611a137e8382b | |
parent | a731010b146e7eb40cfbc1d7876aac47cdd275e6 (diff) | |
parent | f573d4bc112c43b4cefc4df12e6b5403cfa0c303 (diff) | |
download | binary_translation-d316749481670b67e653c8c4eb503e460f219d4e.tar.gz |
Enable [vnsmac|vmadd|vnmsub].[vx|vv] am: 7a974a4e53 am: f573d4bc11
Original change: https://android-review.googlesource.com/c/platform/frameworks/libs/binary_translation/+/2871711
Change-Id: I0da157da76daeee1e9ee39e851c8a1eee0eb08b3
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
-rw-r--r-- | decoder/include/berberis/decoder/riscv64/decoder.h | 8 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter.cc | 62 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter_test.cc | 208 | ||||
-rw-r--r-- | intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h | 16 |
4 files changed, 286 insertions, 8 deletions
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h index f6a10695..159d591c 100644 --- a/decoder/include/berberis/decoder/riscv64/decoder.h +++ b/decoder/include/berberis/decoder/riscv64/decoder.h @@ -341,7 +341,10 @@ class Decoder { }; enum class VOpMVvOpcode : uint8_t { + kVmaddvv = 0b101001, + kVnmsubvv = 0b101011, kVmaccvv = 0b101101, + kVnmsacvv = 0b101111, kMaxValue = 0b111111 }; @@ -390,7 +393,10 @@ class Decoder { }; enum class VOpMVxOpcode : uint8_t { + kVmaddvx = 0b101001, + kVnmsubvx = 0b101011, kVmaccvx = 0b101101, + kVnmsacvx = 0b101111, kMaxValue = 0b111111 }; @@ -1731,7 +1737,7 @@ class Decoder { bool vm = GetBits<25, 1>(); uint8_t opcode = GetBits<26, 6>(); uint8_t dst = GetBits<7, 5>(); - // Note: in vector instructions vs2 field is 2nd operand while vs1 field is 2rd operand. + // Note: in vector instructions vs2 field is 2nd operand while vs1 field is 3rd operand. // FMA instructions are exception, but there are not that many of these. uint8_t src1 = GetBits<20, 5>(); uint8_t src2 = GetBits<15, 5>(); diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc index b48d9349..18b94dc3 100644 --- a/interpreter/riscv64/interpreter.cc +++ b/interpreter/riscv64/interpreter.cc @@ -654,9 +654,18 @@ class Interpreter { template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta> void OpVector(const Decoder::VOpMVvArgs& args) { switch (args.opcode) { + case Decoder::VOpMVvOpcode::kVmaddvv: + return OpVectorvv<intrinsics::Vmaddvv<ElementType, vta>, ElementType, vlmul, vta>( + args.dst, args.src1, args.src2); + case Decoder::VOpMVvOpcode::kVnmsubvv: + return OpVectorvv<intrinsics::Vnmsubvv<ElementType, vta>, ElementType, vlmul, vta>( + args.dst, args.src1, args.src2); case Decoder::VOpMVvOpcode::kVmaccvv: return OpVectorvv<intrinsics::Vmaccvv<ElementType, vta>, ElementType, vlmul, vta>( args.dst, args.src1, args.src2); + case Decoder::VOpMVvOpcode::kVnmsacvv: + return OpVectorvv<intrinsics::Vnmsacvv<ElementType, vta>, ElementType, vlmul, vta>( + args.dst, args.src1, args.src2); default: Unimplemented(); } @@ -730,9 +739,18 @@ class Interpreter { template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta> void OpVector(const Decoder::VOpMVxArgs& args, Register arg2) { switch (args.opcode) { + case Decoder::VOpMVxOpcode::kVmaddvx: + return OpVectorvx<intrinsics::Vmaddvx<ElementType, vta>, ElementType, vlmul, vta>( + args.dst, args.src1, arg2); + case Decoder::VOpMVxOpcode::kVnmsubvx: + return OpVectorvx<intrinsics::Vnmsubvx<ElementType, vta>, ElementType, vlmul, vta>( + args.dst, args.src1, arg2); case Decoder::VOpMVxOpcode::kVmaccvx: return OpVectorvx<intrinsics::Vmaccvx<ElementType, vta>, ElementType, vlmul, vta>( args.dst, args.src1, arg2); + case Decoder::VOpMVxOpcode::kVnmsacvx: + return OpVectorvx<intrinsics::Vnmsacvx<ElementType, vta>, ElementType, vlmul, vta>( + args.dst, args.src1, arg2); default: Unimplemented(); } @@ -926,12 +944,30 @@ class Interpreter { InactiveProcessing vma> void OpVector(const Decoder::VOpMVvArgs& args) { switch (args.opcode) { + case Decoder::VOpMVvOpcode::kVmaddvv: + return OpVectorvv<intrinsics::Vmaddvvm<ElementType, vta, vma>, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1, args.src2); + case Decoder::VOpMVvOpcode::kVnmsubvv: + return OpVectorvv<intrinsics::Vnmsubvvm<ElementType, vta, vma>, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1, args.src2); case Decoder::VOpMVvOpcode::kVmaccvv: return OpVectorvv<intrinsics::Vmaccvvm<ElementType, vta, vma>, - ElementType, - vlmul, - vta, - vma>(args.dst, args.src1, args.src2); + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1, args.src2); + case Decoder::VOpMVvOpcode::kVnmsacvv: + return OpVectorvv<intrinsics::Vnmsacvvm<ElementType, vta, vma>, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1, args.src2); default: Unimplemented(); } @@ -1023,12 +1059,30 @@ class Interpreter { InactiveProcessing vma> void OpVector(const Decoder::VOpMVxArgs& args, Register arg2) { switch (args.opcode) { + case Decoder::VOpMVxOpcode::kVmaddvx: + return OpVectorvx<intrinsics::Vmaddvxm<ElementType, vta, vma>, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1, arg2); + case Decoder::VOpMVxOpcode::kVnmsubvx: + return OpVectorvx<intrinsics::Vnmsubvxm<ElementType, vta, vma>, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1, arg2); case Decoder::VOpMVxOpcode::kVmaccvx: return OpVectorvx<intrinsics::Vmaccvxm<ElementType, vta, vma>, ElementType, vlmul, vta, vma>(args.dst, args.src1, arg2); + case Decoder::VOpMVxOpcode::kVnmsacvx: + return OpVectorvx<intrinsics::Vnmsacvxm<ElementType, vta, vma>, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1, arg2); default: Unimplemented(); } diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc index 724c4892..f4fe0a56 100644 --- a/interpreter/riscv64/interpreter_test.cc +++ b/interpreter/riscv64/interpreter_test.cc @@ -1853,6 +1853,212 @@ TEST_F(Riscv64InterpreterTest, TestVmacc) { {0xbb11'11bd'1313'bf15, 0x6061'0c62'630e'6465}, {0x05b1'0707'b309'09b5, 0xab01'01ad'0303'af05}}); } -} // namespace +TEST_F(Riscv64InterpreterTest, TestVnmsac) { + TestVectorInstruction(0xbd882457, // vnmsac.vv v8, v16, v24, v0.t + {{85, 83, 77, 67, 49, 35, 13, 243, 205, 179, 141, 99, 53, 3, 205, 147}, + {85, 19, 205, 131, 33, 227, 141, 51, 189, 115, 13, 163, 53, 195, 77, 211}, + {85, 211, 77, 195, 17, 163, 13, 115, 173, 51, 141, 227, 53, 131, 205, 19}, + {85, 147, 205, 3, 1, 99, 141, 179, 157, 243, 13, 35, 53, 67, 77, 83}, + {85, 83, 77, 67, 241, 35, 13, 243, 141, 179, 141, 99, 53, 3, 205, 147}, + {85, 19, 205, 131, 225, 227, 141, 51, 125, 115, 13, 163, 53, 195, 77, 211}, + {85, 211, 77, 195, 209, 163, 13, 115, 109, 51, 141, 227, 53, 131, 205, 19}, + {85, 147, 205, 3, 193, 99, 141, 179, 93, 243, 13, 35, 53, 67, 77, 83}}, + {{0x5555, 0x3d4d, 0x0031, 0xad0d, 0x2bcd, 0x9c8d, 0xe435, 0x0bcd}, + {0x1355, 0xfacd, 0xad21, 0x698d, 0xd7bd, 0x580d, 0x9f35, 0xc64d}, + {0xcd55, 0xb44d, 0x5611, 0x220d, 0x7fad, 0x0f8d, 0x5635, 0x7ccd}, + {0x8355, 0x69cd, 0xfb01, 0xd68d, 0x239d, 0xc30d, 0x0935, 0x2f4d}, + {0x3555, 0x1b4d, 0x9bf1, 0x870d, 0xc38d, 0x728d, 0xb835, 0xddcd}, + {0xe355, 0xc8cd, 0x38e1, 0x338d, 0x5f7d, 0x1e0d, 0x6335, 0x884d}, + {0x8d55, 0x724d, 0xd1d1, 0xdc0d, 0xf76d, 0xc58d, 0x0a35, 0x2ecd}, + {0x3355, 0x17cd, 0x66c1, 0x808d, 0x8b5d, 0x690d, 0xad35, 0xd14d}}, + {{0x4d53'5555, 0x65bd'0031, 0x8068'2bcd, 0xa960'e435}, + {0xc68f'1355, 0xcbe6'ad21, 0xe38f'd7bd, 0x1996'9f35}, + {0x33c2'cd55, 0x2608'5611, 0x3aaf'7fad, 0x7dc4'5635}, + {0x94ee'8355, 0x7421'fb01, 0x85c7'239d, 0xd5ea'0935}, + {0xea12'3555, 0xb633'9bf1, 0xc4d6'c38d, 0x2207'b835}, + {0x332d'e355, 0xec3d'38e1, 0xf7de'5f7d, 0x621d'6335}, + {0x7041'8d55, 0x163e'd1d1, 0x1edd'f76d, 0x962b'0a35}, + {0xa14d'3355, 0x3438'66c1, 0x39d5'8b5d, 0xbe30'ad35}}, + {{0xe20d'2c41'4d53'5555, 0x4fdc'3c72'8068'2bcd}, + {0xbead'4fa7'c68f'1355, 0x1e70'55d0'e38f'd7bd}, + {0x7f35'5efe'33c2'cd55, 0xd0ec'5b1f'3aaf'7fad}, + {0x23a5'5a44'94ee'8355, 0x6750'4c5d'85c7'239d}, + {0xabfd'417a'ea12'3555, 0xe19c'298b'c4d6'c38d}, + {0x183d'14a1'332d'e355, 0x3fcf'f2a9'f7de'5f7d}, + {0x6864'd3b7'7041'8d55, 0x81eb'a7b8'1edd'f76d}, + {0x9c74'7ebd'a14d'3355, 0xa7ef'48b6'39d5'8b5d}}); + TestVectorInstruction(0xbd00e457, // vnmsac.vx v8, x1, v16, v0.t + {{85, 171, 1, 87, 173, 3, 89, 175, 5, 91, 177, 7, 93, 179, 9, 95}, + {181, 11, 97, 183, 13, 99, 185, 15, 101, 187, 17, 103, 189, 19, 105, 191}, + {21, 107, 193, 23, 109, 195, 25, 111, 197, 27, 113, 199, 29, 115, 201, 31}, + {117, 203, 33, 119, 205, 35, 121, 207, 37, 123, 209, 39, 125, 211, 41, 127}, + {213, 43, 129, 215, 45, 131, 217, 47, 133, 219, 49, 135, 221, 51, 137, 223}, + {53, 139, 225, 55, 141, 227, 57, 143, 229, 59, 145, 231, 61, 147, 233, 63}, + {149, 235, 65, 151, 237, 67, 153, 239, 69, 155, 241, 71, 157, 243, 73, 159}, + {245, 75, 161, 247, 77, 163, 249, 79, 165, 251, 81, 167, 253, 83, 169, 255}}, + {{0xab55, 0x0201, 0x58ad, 0xaf59, 0x0605, 0x5cb1, 0xb35d, 0x0a09}, + {0x60b5, 0xb761, 0x0e0d, 0x64b9, 0xbb65, 0x1211, 0x68bd, 0xbf69}, + {0x1615, 0x6cc1, 0xc36d, 0x1a19, 0x70c5, 0xc771, 0x1e1d, 0x74c9}, + {0xcb75, 0x2221, 0x78cd, 0xcf79, 0x2625, 0x7cd1, 0xd37d, 0x2a29}, + {0x80d5, 0xd781, 0x2e2d, 0x84d9, 0xdb85, 0x3231, 0x88dd, 0xdf89}, + {0x3635, 0x8ce1, 0xe38d, 0x3a39, 0x90e5, 0xe791, 0x3e3d, 0x94e9}, + {0xeb95, 0x4241, 0x98ed, 0xef99, 0x4645, 0x9cf1, 0xf39d, 0x4a49}, + {0xa0f5, 0xf7a1, 0x4e4d, 0xa4f9, 0xfba5, 0x5251, 0xa8fd, 0xffa9}}, + {{0x5756'ab55, 0xaf59'58ad, 0x075c'0605, 0x5f5e'b35d}, + {0xb761'60b5, 0x0f64'0e0d, 0x6766'bb65, 0xbf69'68bd}, + {0x176c'1615, 0x6f6e'c36d, 0xc771'70c5, 0x1f74'1e1d}, + {0x7776'cb75, 0xcf79'78cd, 0x277c'2625, 0x7f7e'd37d}, + {0xd781'80d5, 0x2f84'2e2d, 0x8786'db85, 0xdf89'88dd}, + {0x378c'3635, 0x8f8e'e38d, 0xe791'90e5, 0x3f94'3e3d}, + {0x9796'eb95, 0xef99'98ed, 0x479c'4645, 0x9f9e'f39d}, + {0xf7a1'a0f5, 0x4fa4'4e4d, 0xa7a6'fba5, 0xffa9'a8fd}}, + {{0xaf59'58ad'5756'ab55, 0x0a09'5e08'075c'0605}, + {0x64b9'6362'b761'60b5, 0xbf69'68bd'6766'bb65}, + {0x1a19'6e18'176c'1615, 0x74c9'7372'c771'70c5}, + {0xcf79'78cd'7776'cb75, 0x2a29'7e28'277c'2625}, + {0x84d9'8382'd781'80d5, 0xdf89'88dd'8786'db85}, + {0x3a39'8e38'378c'3635, 0x94e9'9392'e791'90e5}, + {0xef99'98ed'9796'eb95, 0x4a49'9e48'479c'4645}, + {0xa4f9'a3a2'f7a1'a0f5, 0xffa9'a8fd'a7a6'fba5}}); +} + +TEST_F(Riscv64InterpreterTest, TestVmadd) { + TestVectorInstruction(0xa5882457, // vmadd.vv v8, v16, v24, v0.t + {{0, 87, 174, 5, 93, 179, 10, 97, 185, 15, 102, 189, 20, 107, 194, 25}, + {112, 199, 30, 117, 205, 35, 122, 209, 41, 127, 214, 45, 132, 219, 50, 137}, + {224, 55, 142, 229, 61, 147, 234, 65, 153, 239, 70, 157, 244, 75, 162, 249}, + {80, 167, 254, 85, 173, 3, 90, 177, 9, 95, 182, 13, 100, 187, 18, 105}, + {192, 23, 110, 197, 29, 115, 202, 33, 121, 207, 38, 125, 212, 43, 130, 217}, + {48, 135, 222, 53, 141, 227, 58, 145, 233, 63, 150, 237, 68, 155, 242, 73}, + {160, 247, 78, 165, 253, 83, 170, 1, 89, 175, 6, 93, 180, 11, 98, 185}, + {16, 103, 190, 21, 109, 195, 26, 113, 201, 31, 118, 205, 36, 123, 210, 41}}, + {{0x5700, 0xafae, 0x085d, 0x610a, 0xb9b9, 0x1266, 0x6b14, 0xc3c2}, + {0x1c70, 0x751e, 0xcdcd, 0x267a, 0x7f29, 0xd7d6, 0x3084, 0x8932}, + {0xe1e0, 0x3a8e, 0x933d, 0xebea, 0x4499, 0x9d46, 0xf5f4, 0x4ea2}, + {0xa750, 0xfffe, 0x58ad, 0xb15a, 0x0a09, 0x62b6, 0xbb64, 0x1412}, + {0x6cc0, 0xc56e, 0x1e1d, 0x76ca, 0xcf79, 0x2826, 0x80d4, 0xd982}, + {0x3230, 0x8ade, 0xe38d, 0x3c3a, 0x94e9, 0xed96, 0x4644, 0x9ef2}, + {0xf7a0, 0x504e, 0xa8fd, 0x01aa, 0x5a59, 0xb306, 0x0bb4, 0x6462}, + {0xbd10, 0x15be, 0x6e6d, 0xc71a, 0x1fc9, 0x7876, 0xd124, 0x29d2}}, + {{0x0503'5700, 0x610a'085d, 0xbd10'b9b9, 0x1917'6b14}, + {0x751e'1c70, 0xd124'cdcd, 0x2d2b'7f29, 0x8932'3084}, + {0xe538'e1e0, 0x413f'933d, 0x9d46'4499, 0xf94c'f5f4}, + {0x5553'a750, 0xb15a'58ad, 0x0d61'0a09, 0x6967'bb64}, + {0xc56e'6cc0, 0x2175'1e1d, 0x7d7b'cf79, 0xd982'80d4}, + {0x3589'3230, 0x918f'e38d, 0xed96'94e9, 0x499d'4644}, + {0xa5a3'f7a0, 0x01aa'a8fd, 0x5db1'5a59, 0xb9b8'0bb4}, + {0x15be'bd10, 0x71c5'6e6d, 0xcdcc'1fc9, 0x29d2'd124}}, + {{0x610a'085d'0503'5700, 0xc3c2'15be'bd10'b9b9}, + {0x267a'2322'751e'1c70, 0x8932'3084'2d2b'7f29}, + {0xebea'3de7'e538'e1e0, 0x4ea2'4b49'9d46'4499}, + {0xb15a'58ad'5553'a750, 0x1412'660f'0d61'0a09}, + {0x76ca'7372'c56e'6cc0, 0xd982'80d4'7d7b'cf79}, + {0x3c3a'8e38'3589'3230, 0x9ef2'9b99'ed96'94e9}, + {0x01aa'a8fd'a5a3'f7a0, 0x6462'b65f'5db1'5a59}, + {0xc71a'c3c3'15be'bd10, 0x29d2'd124'cdcc'1fc9}}); + TestVectorInstruction(0xa500e457, // vmadd.vx v8, x1, v16, v0.t + {{114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145}, + {146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161}, + {162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177}, + {178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193}, + {194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209}, + {210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225}, + {226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241}}, + {{0x1d72, 0x1f74, 0x2176, 0x2378, 0x257a, 0x277c, 0x297e, 0x2b80}, + {0x2d82, 0x2f84, 0x3186, 0x3388, 0x358a, 0x378c, 0x398e, 0x3b90}, + {0x3d92, 0x3f94, 0x4196, 0x4398, 0x459a, 0x479c, 0x499e, 0x4ba0}, + {0x4da2, 0x4fa4, 0x51a6, 0x53a8, 0x55aa, 0x57ac, 0x59ae, 0x5bb0}, + {0x5db2, 0x5fb4, 0x61b6, 0x63b8, 0x65ba, 0x67bc, 0x69be, 0x6bc0}, + {0x6dc2, 0x6fc4, 0x71c6, 0x73c8, 0x75ca, 0x77cc, 0x79ce, 0x7bd0}, + {0x7dd2, 0x7fd4, 0x81d6, 0x83d8, 0x85da, 0x87dc, 0x89de, 0x8be0}, + {0x8de2, 0x8fe4, 0x91e6, 0x93e8, 0x95ea, 0x97ec, 0x99ee, 0x9bf0}}, + {{0x74c9'1d72, 0x78cd'2176, 0x7cd1'257a, 0x80d5'297e}, + {0x84d9'2d82, 0x88dd'3186, 0x8ce1'358a, 0x90e5'398e}, + {0x94e9'3d92, 0x98ed'4196, 0x9cf1'459a, 0xa0f5'499e}, + {0xa4f9'4da2, 0xa8fd'51a6, 0xad01'55aa, 0xb105'59ae}, + {0xb509'5db2, 0xb90d'61b6, 0xbd11'65ba, 0xc115'69be}, + {0xc519'6dc2, 0xc91d'71c6, 0xcd21'75ca, 0xd125'79ce}, + {0xd529'7dd2, 0xd92d'81d6, 0xdd31'85da, 0xe135'89de}, + {0xe539'8de2, 0xe93d'91e6, 0xed41'95ea, 0xf145'99ee}}, + {{0x2377'cc20'74c9'1d72, 0x2b7f'd428'7cd1'257a}, + {0x3387'dc30'84d9'2d82, 0x3b8f'e438'8ce1'358a}, + {0x4397'ec40'94e9'3d92, 0x4b9f'f448'9cf1'459a}, + {0x53a7'fc50'a4f9'4da2, 0x5bb0'0458'ad01'55aa}, + {0x63b8'0c60'b509'5db2, 0x6bc0'1468'bd11'65ba}, + {0x73c8'1c70'c519'6dc2, 0x7bd0'2478'cd21'75ca}, + {0x83d8'2c80'd529'7dd2, 0x8be0'3488'dd31'85da}, + {0x93e8'3c90'e539'8de2, 0x9bf0'4498'ed41'95ea}}); +} + +TEST_F(Riscv64InterpreterTest, TestVnmsub) { + TestVectorInstruction(0xad882457, // vnmsub.vv v8, v16, v24, v0.t + {{0, 173, 90, 7, 181, 97, 14, 187, 105, 21, 194, 111, 28, 201, 118, 35}, + {208, 125, 42, 215, 133, 49, 222, 139, 57, 229, 146, 63, 236, 153, 70, 243}, + {160, 77, 250, 167, 85, 1, 174, 91, 9, 181, 98, 15, 188, 105, 22, 195}, + {112, 29, 202, 119, 37, 209, 126, 43, 217, 133, 50, 223, 140, 57, 230, 147}, + {64, 237, 154, 71, 245, 161, 78, 251, 169, 85, 2, 175, 92, 9, 182, 99}, + {16, 189, 106, 23, 197, 113, 30, 203, 121, 37, 210, 127, 44, 217, 134, 51}, + {224, 141, 58, 231, 149, 65, 238, 155, 73, 245, 162, 79, 252, 169, 86, 3}, + {176, 93, 10, 183, 101, 17, 190, 107, 25, 197, 114, 31, 204, 121, 38, 211}}, + {{0xad00, 0x5c5a, 0x0bb5, 0xbb0e, 0x6a69, 0x19c2, 0xc91c, 0x7876}, + {0x27d0, 0xd72a, 0x8685, 0x35de, 0xe539, 0x9492, 0x43ec, 0xf346}, + {0xa2a0, 0x51fa, 0x0155, 0xb0ae, 0x6009, 0x0f62, 0xbebc, 0x6e16}, + {0x1d70, 0xccca, 0x7c25, 0x2b7e, 0xdad9, 0x8a32, 0x398c, 0xe8e6}, + {0x9840, 0x479a, 0xf6f5, 0xa64e, 0x55a9, 0x0502, 0xb45c, 0x63b6}, + {0x1310, 0xc26a, 0x71c5, 0x211e, 0xd079, 0x7fd2, 0x2f2c, 0xde86}, + {0x8de0, 0x3d3a, 0xec95, 0x9bee, 0x4b49, 0xfaa2, 0xa9fc, 0x5956}, + {0x08b0, 0xb80a, 0x6765, 0x16be, 0xc619, 0x7572, 0x24cc, 0xd426}}, + {{0x0704'ad00, 0xbb0e'0bb5, 0x6f17'6a69, 0x2320'c91c}, + {0xd72a'27d0, 0x8b33'8685, 0x3f3c'e539, 0xf346'43ec}, + {0xa74f'a2a0, 0x5b59'0155, 0x0f62'6009, 0xc36b'bebc}, + {0x7775'1d70, 0x2b7e'7c25, 0xdf87'dad9, 0x9391'398c}, + {0x479a'9840, 0xfba3'f6f5, 0xafad'55a9, 0x63b6'b45c}, + {0x17c0'1310, 0xcbc9'71c5, 0x7fd2'd079, 0x33dc'2f2c}, + {0xe7e5'8de0, 0x9bee'ec95, 0x4ff8'4b49, 0x0401'a9fc}, + {0xb80b'08b0, 0x6c14'6765, 0x201d'c619, 0xd427'24cc}}, + {{0xbb0e'0bb5'0704'ad00, 0x7876'1e71'6f17'6a69}, + {0x35de'312f'd72a'27d0, 0xf346'43ec'3f3c'e539}, + {0xb0ae'56aa'a74f'a2a0, 0x6e16'6967'0f62'6009}, + {0x2b7e'7c25'7775'1d70, 0xe8e6'8ee1'df87'dad9}, + {0xa64e'a1a0'479a'9840, 0x63b6'b45c'afad'55a9}, + {0x211e'c71b'17c0'1310, 0xde86'd9d7'7fd2'd079}, + {0x9bee'ec95'e7e5'8de0, 0x5956'ff52'4ff8'4b49}, + {0x16bf'1210'b80b'08b0, 0xd427'24cd'201d'c619}}); + TestVectorInstruction(0xad00e457, // vnmsub.vx v8, x1, v16, v0.t + {{142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157}, + {158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173}, + {174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189}, + {190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205}, + {206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221}, + {222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237}, + {238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253}, + {254, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}}, + {{0xe48e, 0xe690, 0xe892, 0xea94, 0xec96, 0xee98, 0xf09a, 0xf29c}, + {0xf49e, 0xf6a0, 0xf8a2, 0xfaa4, 0xfca6, 0xfea8, 0x00aa, 0x02ac}, + {0x04ae, 0x06b0, 0x08b2, 0x0ab4, 0x0cb6, 0x0eb8, 0x10ba, 0x12bc}, + {0x14be, 0x16c0, 0x18c2, 0x1ac4, 0x1cc6, 0x1ec8, 0x20ca, 0x22cc}, + {0x24ce, 0x26d0, 0x28d2, 0x2ad4, 0x2cd6, 0x2ed8, 0x30da, 0x32dc}, + {0x34de, 0x36e0, 0x38e2, 0x3ae4, 0x3ce6, 0x3ee8, 0x40ea, 0x42ec}, + {0x44ee, 0x46f0, 0x48f2, 0x4af4, 0x4cf6, 0x4ef8, 0x50fa, 0x52fc}, + {0x54fe, 0x5700, 0x5902, 0x5b04, 0x5d06, 0x5f08, 0x610a, 0x630c}}, + {{0x913a'e48e, 0x953e'e892, 0x9942'ec96, 0x9d46'f09a}, + {0xa14a'f49e, 0xa54e'f8a2, 0xa952'fca6, 0xad57'00aa}, + {0xb15b'04ae, 0xb55f'08b2, 0xb963'0cb6, 0xbd67'10ba}, + {0xc16b'14be, 0xc56f'18c2, 0xc973'1cc6, 0xcd77'20ca}, + {0xd17b'24ce, 0xd57f'28d2, 0xd983'2cd6, 0xdd87'30da}, + {0xe18b'34de, 0xe58f'38e2, 0xe993'3ce6, 0xed97'40ea}, + {0xf19b'44ee, 0xf59f'48f2, 0xf9a3'4cf6, 0xfda7'50fa}, + {0x01ab'54fe, 0x05af'5902, 0x09b3'5d06, 0x0db7'610a}}, + {{0xea94'3de7'913a'e48e, 0xf29c'45ef'9942'ec96}, + {0xfaa4'4df7'a14a'f49e, 0x02ac'55ff'a952'fca6}, + {0x0ab4'5e07'b15b'04ae, 0x12bc'660f'b963'0cb6}, + {0x1ac4'6e17'c16b'14be, 0x22cc'761f'c973'1cc6}, + {0x2ad4'7e27'd17b'24ce, 0x32dc'862f'd983'2cd6}, + {0x3ae4'8e37'e18b'34de, 0x42ec'963f'e993'3ce6}, + {0x4af4'9e47'f19b'44ee, 0x52fc'a64f'f9a3'4cf6}, + {0x5b04'ae58'01ab'54fe, 0x630c'b660'09b3'5d06}}); +} +} // namespace } // namespace berberis diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h index bb3b19d9..d034e33b 100644 --- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h +++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h @@ -250,9 +250,21 @@ DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(sll, auto [arg1, arg2] = std::tuple{args...}; DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(sll, auto [arg1, arg2] = std::tuple{args...}; (arg1 << mask_bits(arg2))) DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(macc, auto [arg1, arg2] = std::tuple{args...}; - ((arg1 * arg2) + vd)); + ((arg2 * arg1) + vd)) DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(macc, auto [arg1, arg2] = std::tuple{args...}; - ((arg1 * arg2) + vd)); + ((arg2 * arg1) + vd)) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(nmsac, auto [arg1, arg2] = std::tuple{args...}; + (-(arg2 * arg1) + vd)) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(nmsac, auto [arg1, arg2] = std::tuple{args...}; + (-(arg2 * arg1) + vd)) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(madd, auto [arg1, arg2] = std::tuple{args...}; + ((arg2 * vd) + arg1)) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(madd, auto [arg1, arg2] = std::tuple{args...}; + ((arg2 * vd) + arg1)) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(nmsub, auto [arg1, arg2] = std::tuple{args...}; + (-(arg2 * vd) + arg1)) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(nmsub, auto [arg1, arg2] = std::tuple{args...}; + (-(arg2 * vd) + arg1)) #undef DEFINE_ARITHMETIC_INTRINSIC #undef DEFINE_ARITHMETIC_PARAMETERS_OR_ARGUMENTS |