aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--backend/include/berberis/backend/x86_64/machine_ir_builder.h31
-rw-r--r--backend/x86_64/context_liveness_analyzer_test.cc22
-rw-r--r--backend/x86_64/lir_instructions.json12
-rw-r--r--backend/x86_64/local_guest_context_optimizer_test.cc12
-rw-r--r--backend/x86_64/loop_guest_context_optimizer_test.cc58
-rw-r--r--backend/x86_64/machine_ir_opt_test.cc30
-rw-r--r--guest_state/include/berberis/guest_state/guest_state_opaque.h8
-rw-r--r--guest_state/riscv64/guest_state_arch.cc22
-rw-r--r--guest_state/riscv64/include/berberis/guest_state/guest_state_arch.h3
-rw-r--r--heavy_optimizer/riscv64/frontend.cc287
-rw-r--r--heavy_optimizer/riscv64/frontend.h335
-rw-r--r--test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h28
-rw-r--r--tests/run_host_tests.mk5
13 files changed, 764 insertions, 89 deletions
diff --git a/backend/include/berberis/backend/x86_64/machine_ir_builder.h b/backend/include/berberis/backend/x86_64/machine_ir_builder.h
index 536e6790..f95d7606 100644
--- a/backend/include/berberis/backend/x86_64/machine_ir_builder.h
+++ b/backend/include/berberis/backend/x86_64/machine_ir_builder.h
@@ -23,6 +23,7 @@
#include "berberis/backend/common/machine_ir_builder.h"
#include "berberis/backend/x86_64/machine_ir.h"
#include "berberis/base/logging.h"
+#include "berberis/guest_state/guest_addr.h"
#include "berberis/guest_state/guest_state_opaque.h"
namespace berberis::x86_64 {
@@ -53,6 +54,14 @@ class MachineIRBuilder : public MachineIRBuilderBase<MachineIR> {
x86_64::kMachineRegRBP, GetThreadStateRegOffset(guest_reg), src_reg);
}
+ void GenGetOffset(MachineReg dst_reg, int32_t offset) {
+ Gen<x86_64::MovqRegMemBaseDisp>(dst_reg, x86_64::kMachineRegRBP, offset);
+ }
+
+ void GenPutOffset(int32_t offset, MachineReg src_reg) {
+ Gen<x86_64::MovqMemBaseDispReg>(x86_64::kMachineRegRBP, offset, src_reg);
+ }
+
void GenGetSimd(MachineReg dst_reg, int guest_reg) {
int32_t offset = GetThreadStateSimdRegOffset(guest_reg);
Gen<x86_64::MovdqaXRegMemBaseDisp>(dst_reg, x86_64::kMachineRegRBP, offset);
@@ -63,6 +72,28 @@ class MachineIRBuilder : public MachineIRBuilderBase<MachineIR> {
Gen<x86_64::MovdqaMemBaseDispXReg>(x86_64::kMachineRegRBP, offset, src_reg);
}
+ template <size_t kSize>
+ void GenGetSimd(MachineReg dst_reg, int32_t offset) {
+ if constexpr (kSize == 8) {
+ Gen<x86_64::MovsdXRegMemBaseDisp>(dst_reg, x86_64::kMachineRegRBP, offset);
+ } else if constexpr (kSize == 16) {
+ Gen<x86_64::MovdqaXRegMemBaseDisp>(dst_reg, x86_64::kMachineRegRBP, offset);
+ } else {
+ static_assert(kDependentValueFalse<kSize>);
+ }
+ }
+
+ template <size_t kSize>
+ void GenSetSimd(int32_t offset, MachineReg src_reg) {
+ if constexpr (kSize == 8) {
+ Gen<x86_64::MovsdMemBaseDispXReg>(x86_64::kMachineRegRBP, offset, src_reg);
+ } else if constexpr (kSize == 16) {
+ Gen<x86_64::MovdqaMemBaseDispXReg>(x86_64::kMachineRegRBP, offset, src_reg);
+ } else {
+ static_assert(kDependentValueFalse<kSize>);
+ }
+ }
+
// Please use GenCallImm instead
template <typename CallImmType,
typename IntegralType,
diff --git a/backend/x86_64/context_liveness_analyzer_test.cc b/backend/x86_64/context_liveness_analyzer_test.cc
index cba84aaa..4746ded7 100644
--- a/backend/x86_64/context_liveness_analyzer_test.cc
+++ b/backend/x86_64/context_liveness_analyzer_test.cc
@@ -58,7 +58,7 @@ TEST(MachineIRContextLivenessAnalyzerTest, PutKillsLiveIn) {
auto vreg = machine_ir.AllocVReg();
builder.StartBasicBlock(bb);
- builder.GenPut(0, vreg);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg);
builder.Gen<PseudoJump>(kNullGuestAddr);
EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
@@ -79,11 +79,11 @@ TEST(MachineIRContextLivenessAnalyzerTest, GetRevivesLiveInKilledByPut) {
auto vreg = machine_ir.AllocVReg();
builder.StartBasicBlock(bb1);
- builder.GenGet(vreg, 0);
+ builder.GenGetOffset(vreg, GetThreadStateRegOffset(0));
builder.Gen<PseudoBranch>(bb2);
builder.StartBasicBlock(bb2);
- builder.GenPut(0, vreg);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg);
builder.Gen<PseudoJump>(kNullGuestAddr);
EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
@@ -106,13 +106,13 @@ TEST(MachineIRContextLivenessAnalyzerTest,
auto vreg = machine_ir.AllocVReg();
builder.StartBasicBlock(bb1);
- builder.GenGet(vreg, 1);
+ builder.GenGetOffset(vreg, GetThreadStateRegOffset(1));
builder.Gen<PseudoBranch>(bb2);
builder.StartBasicBlock(bb2);
- builder.GenPut(0, vreg);
- builder.GenPut(1, vreg);
- builder.GenPut(2, vreg);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg);
+ builder.GenPutOffset(GetThreadStateRegOffset(1), vreg);
+ builder.GenPutOffset(GetThreadStateRegOffset(2), vreg);
builder.Gen<PseudoJump>(kNullGuestAddr);
EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
@@ -139,13 +139,13 @@ TEST(MachineIRContextLivenessAnalyzerTest, ContextWritesOnlyKillLiveInIfHappenIn
builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, bb2, bb3, x86_64::kMachineRegFLAGS);
builder.StartBasicBlock(bb2);
- builder.GenPut(0, vreg);
- builder.GenPut(1, vreg);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg);
+ builder.GenPutOffset(GetThreadStateRegOffset(1), vreg);
builder.Gen<PseudoJump>(kNullGuestAddr);
builder.StartBasicBlock(bb3);
- builder.GenPut(0, vreg);
- builder.GenPut(2, vreg);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg);
+ builder.GenPutOffset(GetThreadStateRegOffset(2), vreg);
builder.Gen<PseudoJump>(kNullGuestAddr);
EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
diff --git a/backend/x86_64/lir_instructions.json b/backend/x86_64/lir_instructions.json
index 2c0b6e7c..e242543e 100644
--- a/backend/x86_64/lir_instructions.json
+++ b/backend/x86_64/lir_instructions.json
@@ -23,6 +23,8 @@
"AddqRegImm",
"AddqRegReg",
"AddqRegMemInsns",
+ "AndbRegImm",
+ "AndbMemImmInsns",
"AndlRegImm",
"AndlRegReg",
"AndnqRegRegReg",
@@ -66,10 +68,13 @@
"SarlRegReg",
"SarqRegImm",
"SarqRegReg",
+ "ShlbRegImm",
+ "ShldlRegRegImm",
"ShllRegImm",
"ShllRegReg",
"ShlqRegImm",
"ShlqRegReg",
+ "ShrbRegImm",
"ShrlRegImm",
"ShrlRegReg",
"ShrqRegImm",
@@ -83,8 +88,10 @@
"TestqRegReg",
"TestwRegImm",
"TestwRegReg",
+ "LockCmpXchgqRegMemRegInsns",
"LockCmpXchg16bRegRegRegRegMemInsns",
"Mfence",
+ "MovbMemImmInsns",
"MovbMemRegInsns",
"MovdMemXRegInsns",
"MovdRegXReg",
@@ -111,6 +118,8 @@
"MovqRegXReg",
"MovqXRegMemInsns",
"MovqXRegReg",
+ "MovsdMemXRegInsns",
+ "MovsdXRegMemInsns",
"MovsdXRegXReg",
"MovssXRegXReg",
"MovsxbqRegMemInsns",
@@ -136,6 +145,9 @@
"MulsdXRegXReg",
"MulssXRegXReg",
"NotqReg",
+ "OrbMemImmInsns",
+ "OrbMemRegInsns",
+ "OrbRegReg",
"OrlRegImm",
"OrlRegReg",
"OrqRegImm",
diff --git a/backend/x86_64/local_guest_context_optimizer_test.cc b/backend/x86_64/local_guest_context_optimizer_test.cc
index 811fcccb..65c1fc18 100644
--- a/backend/x86_64/local_guest_context_optimizer_test.cc
+++ b/backend/x86_64/local_guest_context_optimizer_test.cc
@@ -39,8 +39,8 @@ TEST(MachineIRLocalGuestContextOptimizer, RemoveReadAfterWrite) {
builder.StartBasicBlock(bb);
auto reg1 = machine_ir.AllocVReg();
auto reg2 = machine_ir.AllocVReg();
- builder.GenPut(0, reg1);
- builder.GenGet(reg2, 0);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), reg1);
+ builder.GenGetOffset(reg2, GetThreadStateRegOffset(0));
builder.Gen<PseudoJump>(kNullGuestAddr);
x86_64::RemoveLocalGuestContextAccesses(&machine_ir);
@@ -71,8 +71,8 @@ TEST(MachineIRLocalGuestContextOptimizer, RemoveReadAfterRead) {
builder.StartBasicBlock(bb);
auto reg1 = machine_ir.AllocVReg();
auto reg2 = machine_ir.AllocVReg();
- builder.GenGet(reg1, 0);
- builder.GenGet(reg2, 0);
+ builder.GenGetOffset(reg1, GetThreadStateRegOffset(0));
+ builder.GenGetOffset(reg2, GetThreadStateRegOffset(0));
builder.Gen<PseudoJump>(kNullGuestAddr);
x86_64::RemoveLocalGuestContextAccesses(&machine_ir);
@@ -101,8 +101,8 @@ TEST(MachineIRLocalGuestContextOptimizer, RemoveWriteAfterWrite) {
builder.StartBasicBlock(bb);
auto reg1 = machine_ir.AllocVReg();
auto reg2 = machine_ir.AllocVReg();
- builder.GenPut(0, reg1);
- builder.GenPut(0, reg2);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), reg1);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), reg2);
builder.Gen<PseudoJump>(kNullGuestAddr);
x86_64::RemoveLocalGuestContextAccesses(&machine_ir);
diff --git a/backend/x86_64/loop_guest_context_optimizer_test.cc b/backend/x86_64/loop_guest_context_optimizer_test.cc
index 37bc0667..291f0ae4 100644
--- a/backend/x86_64/loop_guest_context_optimizer_test.cc
+++ b/backend/x86_64/loop_guest_context_optimizer_test.cc
@@ -41,7 +41,7 @@ TEST(MachineIRLoopGuestContextOptimizer, ReplaceGetAndUpdateMap) {
auto bb = machine_ir.NewBasicBlock();
builder.StartBasicBlock(bb);
auto reg1 = machine_ir.AllocVReg();
- builder.GenGet(reg1, 0);
+ builder.GenGetOffset(reg1, GetThreadStateRegOffset(0));
builder.Gen<PseudoJump>(kNullGuestAddr);
auto insn_it = bb->insn_list().begin();
@@ -66,7 +66,7 @@ TEST(MachineIRLoopGuestContextOptimizer, ReplacePutAndUpdateMap) {
auto bb = machine_ir.NewBasicBlock();
builder.StartBasicBlock(bb);
auto reg1 = machine_ir.AllocVReg();
- builder.GenPut(1, reg1);
+ builder.GenPutOffset(GetThreadStateRegOffset(1), reg1);
builder.Gen<PseudoJump>(kNullGuestAddr);
auto insn_it = bb->insn_list().begin();
@@ -92,8 +92,8 @@ TEST(MachineIRLoopGuestContextOptimizer, ReplaceGetPutAndUpdateMap) {
builder.StartBasicBlock(bb);
auto reg1 = machine_ir.AllocVReg();
auto reg2 = machine_ir.AllocVReg();
- builder.GenGet(reg1, 1);
- builder.GenPut(1, reg2);
+ builder.GenGetOffset(reg1, GetThreadStateRegOffset(1));
+ builder.GenPutOffset(GetThreadStateRegOffset(1), reg2);
builder.Gen<PseudoJump>(kNullGuestAddr);
auto insn_it = bb->insn_list().begin();
@@ -122,7 +122,7 @@ TEST(MachineIRLoopGuestContextOptimizer, ReplaceGetSimdAndUpdateMap) {
auto bb = machine_ir.NewBasicBlock();
builder.StartBasicBlock(bb);
auto reg1 = machine_ir.AllocVReg();
- builder.GenGetSimd(reg1, 0);
+ builder.GenGetSimd<16>(reg1, GetThreadStateSimdRegOffset(0));
builder.Gen<PseudoJump>(kNullGuestAddr);
auto insn_it = bb->insn_list().begin();
@@ -147,7 +147,7 @@ TEST(MachineIRLoopGuestContextOptimizer, ReplacePutSimdAndUpdateMap) {
auto bb = machine_ir.NewBasicBlock();
builder.StartBasicBlock(bb);
auto reg1 = machine_ir.AllocVReg();
- builder.GenSetSimd(0, reg1);
+ builder.GenSetSimd<16>(GetThreadStateSimdRegOffset(0), reg1);
builder.Gen<PseudoJump>(kNullGuestAddr);
auto insn_it = bb->insn_list().begin();
@@ -493,7 +493,7 @@ TEST(MachineIRLoopGuestContextOptimizer, RemovePutInSelfLoop) {
builder.Gen<PseudoBranch>(body);
builder.StartBasicBlock(body);
- builder.GenPut(0, vreg1);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg1);
builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body, afterloop, kMachineRegFLAGS);
builder.StartBasicBlock(afterloop);
@@ -537,7 +537,7 @@ TEST(MachineIRLoopGuestContextOptimizer, RemoveGetInSelfLoop) {
builder.Gen<PseudoBranch>(body);
builder.StartBasicBlock(body);
- builder.GenGet(vreg1, 0);
+ builder.GenGetOffset(vreg1, GetThreadStateRegOffset(0));
builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body, afterloop, kMachineRegFLAGS);
builder.StartBasicBlock(afterloop);
@@ -580,8 +580,8 @@ TEST(MachineIRLoopGuestContextOptimizer, RemoveGetPutInSelfLoop) {
builder.Gen<PseudoBranch>(body);
builder.StartBasicBlock(body);
- builder.GenGet(vreg1, 0);
- builder.GenPut(0, vreg2);
+ builder.GenGetOffset(vreg1, GetThreadStateRegOffset(0));
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg2);
builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body, afterloop, kMachineRegFLAGS);
builder.StartBasicBlock(afterloop);
@@ -634,7 +634,7 @@ TEST(MachineIRLoopGuestContextOptimizer, RemovePutInLoopWithMultipleExits) {
builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body2, afterloop1, kMachineRegFLAGS);
builder.StartBasicBlock(body2);
- builder.GenPut(0, vreg1);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg1);
builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body1, afterloop2, kMachineRegFLAGS);
builder.StartBasicBlock(afterloop1);
@@ -687,14 +687,14 @@ TEST(MachineIRLoopGuestContextOptimizer, CountGuestRegAccesses) {
builder.Gen<PseudoBranch>(body1);
builder.StartBasicBlock(body1);
- builder.GenPut(0, vreg1);
- builder.GenGetSimd(vreg2, 0);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg1);
+ builder.GenGetSimd<16>(vreg2, GetThreadStateSimdRegOffset(0));
builder.Gen<PseudoBranch>(body2);
builder.StartBasicBlock(body2);
- builder.GenGet(vreg1, 1);
- builder.GenPut(1, vreg1);
- builder.GenSetSimd(0, vreg2);
+ builder.GenGetOffset(vreg1, GetThreadStateRegOffset(1));
+ builder.GenPutOffset(GetThreadStateRegOffset(1), vreg1);
+ builder.GenSetSimd<16>(GetThreadStateSimdRegOffset(0), vreg2);
builder.Gen<PseudoBranch>(body1);
Loop loop({body1, body2}, machine_ir.arena());
@@ -723,15 +723,15 @@ TEST(MachineIRLoopGuestContextOptimizer, GetOffsetCounters) {
builder.Gen<PseudoBranch>(body1);
builder.StartBasicBlock(body1);
- builder.GenPut(0, vreg1);
- builder.GenGet(vreg1, 0);
- builder.GenGet(vreg1, 1);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg1);
+ builder.GenGetOffset(vreg1, GetThreadStateRegOffset(0));
+ builder.GenGetOffset(vreg1, GetThreadStateRegOffset(1));
builder.Gen<PseudoBranch>(body2);
builder.StartBasicBlock(body2);
- builder.GenGet(vreg1, 2);
- builder.GenPut(2, vreg1);
- builder.GenPut(0, vreg1);
+ builder.GenGetOffset(vreg1, GetThreadStateRegOffset(2));
+ builder.GenPutOffset(GetThreadStateRegOffset(2), vreg1);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg1);
builder.Gen<PseudoBranch>(body1);
Loop loop({body1, body2}, machine_ir.arena());
@@ -769,16 +769,16 @@ TEST(MachineIRLoopGuestContextOptimizer, OptimizeLoopWithPriority) {
// Regular reg 0 has 3 uses.
// Regular reg 1 has 1 use.
builder.StartBasicBlock(body);
- builder.GenGet(vreg1, 0);
- builder.GenPut(0, vreg1);
- builder.GenGet(vreg1, 0);
- builder.GenGet(vreg1, 1);
+ builder.GenGetOffset(vreg1, GetThreadStateRegOffset(0));
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg1);
+ builder.GenGetOffset(vreg1, GetThreadStateRegOffset(0));
+ builder.GenGetOffset(vreg1, GetThreadStateRegOffset(1));
// Simd reg 0 has 2 uses.
// Simd reg 1 has 1 use.
- builder.GenGetSimd(vreg2, 0);
- builder.GenSetSimd(0, vreg2);
- builder.GenGetSimd(vreg2, 1);
+ builder.GenGetSimd<16>(vreg2, GetThreadStateSimdRegOffset(0));
+ builder.GenSetSimd<16>(GetThreadStateSimdRegOffset(0), vreg2);
+ builder.GenGetSimd<16>(vreg2, GetThreadStateSimdRegOffset(1));
builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body, afterloop, kMachineRegFLAGS);
builder.StartBasicBlock(afterloop);
diff --git a/backend/x86_64/machine_ir_opt_test.cc b/backend/x86_64/machine_ir_opt_test.cc
index b3ea4623..b9814469 100644
--- a/backend/x86_64/machine_ir_opt_test.cc
+++ b/backend/x86_64/machine_ir_opt_test.cc
@@ -402,15 +402,15 @@ TEST(MachineIR, PutsInSuccessorsKillPut) {
auto vreg = machine_ir.AllocVReg();
builder.StartBasicBlock(bb1);
- builder.GenPut(0, vreg);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg);
builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, bb2, bb3, x86_64::kMachineRegFLAGS);
builder.StartBasicBlock(bb2);
- builder.GenPut(0, vreg);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg);
builder.Gen<PseudoJump>(kNullGuestAddr);
builder.StartBasicBlock(bb3);
- builder.GenPut(0, vreg);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg);
builder.Gen<PseudoJump>(kNullGuestAddr);
EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
@@ -435,11 +435,11 @@ TEST(MachineIR, PutInOneOfTwoSuccessorsDoesNotKillPut) {
auto vreg = machine_ir.AllocVReg();
builder.StartBasicBlock(bb1);
- builder.GenPut(0, vreg);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg);
builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, bb2, bb3, x86_64::kMachineRegFLAGS);
builder.StartBasicBlock(bb2);
- builder.GenPut(0, vreg);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg);
builder.Gen<PseudoJump>(kNullGuestAddr);
builder.StartBasicBlock(bb3);
@@ -468,18 +468,18 @@ TEST(MachineIR, MultiplePutsCanBeKilled) {
auto vreg1 = machine_ir.AllocVReg();
auto vreg2 = machine_ir.AllocVReg();
builder.StartBasicBlock(bb1);
- builder.GenPut(0, vreg1);
- builder.GenPut(1, vreg2);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg1);
+ builder.GenPutOffset(GetThreadStateRegOffset(1), vreg2);
builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, bb2, bb3, x86_64::kMachineRegFLAGS);
builder.StartBasicBlock(bb2);
- builder.GenPut(0, vreg1);
- builder.GenPut(1, vreg2);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg1);
+ builder.GenPutOffset(GetThreadStateRegOffset(1), vreg2);
builder.Gen<PseudoJump>(kNullGuestAddr);
builder.StartBasicBlock(bb3);
- builder.GenPut(0, vreg1);
- builder.GenPut(1, vreg2);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg1);
+ builder.GenPutOffset(GetThreadStateRegOffset(1), vreg2);
builder.Gen<PseudoJump>(kNullGuestAddr);
EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
@@ -504,16 +504,16 @@ TEST(MachineIR, GetInOneOfTheSuccessorsMakesPutLive) {
auto vreg = machine_ir.AllocVReg();
builder.StartBasicBlock(bb1);
- builder.GenPut(0, vreg);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg);
builder.Gen<PseudoBranch>(bb2);
builder.StartBasicBlock(bb2);
- builder.GenGet(vreg, 0);
- builder.GenPut(0, vreg);
+ builder.GenGetOffset(vreg, GetThreadStateRegOffset(0));
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg);
builder.Gen<PseudoJump>(kNullGuestAddr);
builder.StartBasicBlock(bb3);
- builder.GenPut(0, vreg);
+ builder.GenPutOffset(GetThreadStateRegOffset(0), vreg);
builder.Gen<PseudoJump>(kNullGuestAddr);
EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
diff --git a/guest_state/include/berberis/guest_state/guest_state_opaque.h b/guest_state/include/berberis/guest_state/guest_state_opaque.h
index 714de181..a2adf57a 100644
--- a/guest_state/include/berberis/guest_state/guest_state_opaque.h
+++ b/guest_state/include/berberis/guest_state/guest_state_opaque.h
@@ -92,6 +92,14 @@ void SetShadowCallStackPointer(CPUState& cpu, GuestAddr scs_sp);
void InitFloatingPointState();
std::size_t GetThreadStateRegOffset(int reg);
+
+bool DoesCpuStateHaveDedicatedFpRegs();
+std::size_t GetThreadStateFRegOffset(int freg);
+
+bool DoesCpuStateHaveDedicatedVecRegs();
+std::size_t GetThreadStateVRegOffset(int vreg);
+
+bool DoesCpuStateHaveDedicatedSimdRegs();
std::size_t GetThreadStateSimdRegOffset(int simd_reg);
bool IsSimdOffset(size_t offset);
diff --git a/guest_state/riscv64/guest_state_arch.cc b/guest_state/riscv64/guest_state_arch.cc
index 1d0fbaae..86d5ac87 100644
--- a/guest_state/riscv64/guest_state_arch.cc
+++ b/guest_state/riscv64/guest_state_arch.cc
@@ -72,7 +72,16 @@ std::size_t GetThreadStateRegOffset(int reg) {
return offsetof(ThreadState, cpu.x[reg]);
}
+std::size_t GetThreadStateFRegOffset(int freg) {
+ return offsetof(ThreadState, cpu.f[freg]);
+}
+
+std::size_t GetThreadStateVRegOffset(int vreg) {
+ return offsetof(ThreadState, cpu.v[vreg]);
+}
+
std::size_t GetThreadStateSimdRegOffset(int simd_reg) {
+ // TODO(b/291126259) Switch to CHECK(false) after we switch frontend to F regs.
return offsetof(ThreadState, cpu.v[simd_reg]);
}
@@ -93,6 +102,19 @@ bool DoesCpuStateHaveFlags() {
return false;
}
+bool DoesCpuStateHaveDedicatedFpRegs() {
+ return true;
+}
+
+bool DoesCpuStateHaveDedicatedVecRegs() {
+ return true;
+}
+
+bool DoesCpuStateHaveDedicatedSimdRegs() {
+ // TODO(b/291126259) Return false after we switch frontend to F regs.
+ return true;
+}
+
std::size_t GetThreadStateFlagOffset() {
// RISCV64 Does not have flags in its CPUState
CHECK(false);
diff --git a/guest_state/riscv64/include/berberis/guest_state/guest_state_arch.h b/guest_state/riscv64/include/berberis/guest_state/guest_state_arch.h
index 59195c27..9c7f9044 100644
--- a/guest_state/riscv64/include/berberis/guest_state/guest_state_arch.h
+++ b/guest_state/riscv64/include/berberis/guest_state/guest_state_arch.h
@@ -20,6 +20,7 @@
#include <array>
#include <atomic>
#include <cstdint>
+#include <type_traits>
#include "berberis/base/config.h"
#include "berberis/base/dependent_false.h"
@@ -110,6 +111,8 @@ struct CPUState {
uint8_t frm;
};
+static_assert(std::is_standard_layout_v<CPUState>);
+
constexpr uint32_t kNumGuestRegs = std::size(CPUState{}.x);
constexpr uint32_t kNumGuestFpRegs = std::size(CPUState{}.f);
diff --git a/heavy_optimizer/riscv64/frontend.cc b/heavy_optimizer/riscv64/frontend.cc
index cd1c0f1b..b04144cb 100644
--- a/heavy_optimizer/riscv64/frontend.cc
+++ b/heavy_optimizer/riscv64/frontend.cc
@@ -20,10 +20,12 @@
#include "berberis/assembler/x86_64.h"
#include "berberis/backend/common/machine_ir.h"
+#include "berberis/backend/x86_64/machine_ir.h"
#include "berberis/base/checks.h"
#include "berberis/base/config.h"
#include "berberis/guest_state/guest_state_arch.h"
#include "berberis/guest_state/guest_state_opaque.h"
+#include "berberis/runtime_primitives/memory_region_reservation.h"
#include "berberis/runtime_primitives/platform.h"
namespace berberis {
@@ -260,18 +262,18 @@ void HeavyOptimizerFrontend::UpdateBranchTargetsAfterSplit(GuestAddr addr,
Register HeavyOptimizerFrontend::GetReg(uint8_t reg) {
CHECK_LT(reg, kNumGuestRegs);
Register dst = AllocTempReg();
- builder_.GenGet(dst, reg);
+ builder_.GenGetOffset(dst, GetThreadStateRegOffset(reg));
return dst;
}
void HeavyOptimizerFrontend::SetReg(uint8_t reg, Register value) {
CHECK_LT(reg, kNumGuestRegs);
- builder_.GenPut(reg, value);
+ builder_.GenPutOffset(GetThreadStateRegOffset(reg), value);
}
FpRegister HeavyOptimizerFrontend::GetFpReg(uint8_t reg) {
FpRegister result = AllocTempSimdReg();
- builder_.GenGetSimd(result.machine_reg(), reg);
+ builder_.GenGetSimd<8>(result.machine_reg(), GetThreadStateFRegOffset(reg));
return result;
}
@@ -733,4 +735,283 @@ void HeavyOptimizerFrontend::Finalize(GuestAddr stop_pc) {
ResolveJumps();
}
+Register HeavyOptimizerFrontend::LoadWithoutRecovery(Decoder::LoadOperandType operand_type,
+ Register base,
+ int32_t disp) {
+ auto res = AllocTempReg();
+ switch (operand_type) {
+ case Decoder::LoadOperandType::k8bitUnsigned:
+ Gen<x86_64::MovzxblRegMemBaseDisp>(res, base, disp);
+ break;
+ case Decoder::LoadOperandType::k16bitUnsigned:
+ Gen<x86_64::MovzxwlRegMemBaseDisp>(res, base, disp);
+ break;
+ case Decoder::LoadOperandType::k32bitUnsigned:
+ Gen<x86_64::MovlRegMemBaseDisp>(res, base, disp);
+ break;
+ case Decoder::LoadOperandType::k64bit:
+ Gen<x86_64::MovqRegMemBaseDisp>(res, base, disp);
+ break;
+ case Decoder::LoadOperandType::k8bitSigned:
+ Gen<x86_64::MovsxbqRegMemBaseDisp>(res, base, disp);
+ break;
+ case Decoder::LoadOperandType::k16bitSigned:
+ Gen<x86_64::MovsxwqRegMemBaseDisp>(res, base, disp);
+ break;
+ case Decoder::LoadOperandType::k32bitSigned:
+ Gen<x86_64::MovsxlqRegMemBaseDisp>(res, base, disp);
+ break;
+ default:
+ Unimplemented();
+ return {};
+ }
+
+ return res;
+}
+
+Register HeavyOptimizerFrontend::LoadWithoutRecovery(Decoder::LoadOperandType operand_type,
+ Register base,
+ Register index,
+ int32_t disp) {
+ auto res = AllocTempReg();
+ switch (operand_type) {
+ case Decoder::LoadOperandType::k8bitUnsigned:
+ Gen<x86_64::MovzxblRegMemBaseIndexDisp>(
+ res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
+ break;
+ case Decoder::LoadOperandType::k16bitUnsigned:
+ Gen<x86_64::MovzxwlRegMemBaseIndexDisp>(
+ res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
+ break;
+ case Decoder::LoadOperandType::k32bitUnsigned:
+ Gen<x86_64::MovlRegMemBaseIndexDisp>(
+ res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
+ break;
+ case Decoder::LoadOperandType::k64bit:
+ Gen<x86_64::MovqRegMemBaseIndexDisp>(
+ res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
+ break;
+ case Decoder::LoadOperandType::k8bitSigned:
+ Gen<x86_64::MovsxbqRegMemBaseIndexDisp>(
+ res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
+ break;
+ case Decoder::LoadOperandType::k16bitSigned:
+ Gen<x86_64::MovsxwqRegMemBaseIndexDisp>(
+ res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
+ break;
+ case Decoder::LoadOperandType::k32bitSigned:
+ Gen<x86_64::MovsxlqRegMemBaseIndexDisp>(
+ res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
+ break;
+ default:
+ Unimplemented();
+ return {};
+ }
+ return res;
+}
+
+Register HeavyOptimizerFrontend::UpdateCsr(Decoder::CsrOpcode opcode, Register arg, Register csr) {
+ Register res = AllocTempReg();
+ switch (opcode) {
+ case Decoder::CsrOpcode::kCsrrs:
+ Gen<PseudoCopy>(res, arg, 8);
+ Gen<x86_64::OrqRegReg>(res, csr, GetFlagsRegister());
+ break;
+ case Decoder::CsrOpcode::kCsrrc:
+ if (host_platform::kHasBMI) {
+ Gen<x86_64::AndnqRegRegReg>(res, arg, csr, GetFlagsRegister());
+ } else {
+ Gen<PseudoCopy>(res, arg, 8);
+ Gen<x86_64::NotqReg>(res);
+ Gen<x86_64::AndqRegReg>(res, csr, GetFlagsRegister());
+ }
+ break;
+ default:
+ Unimplemented();
+ return {};
+ }
+ return arg;
+}
+
+Register HeavyOptimizerFrontend::UpdateCsr(Decoder::CsrImmOpcode opcode,
+ uint8_t imm,
+ Register csr) {
+ Register res = AllocTempReg();
+ switch (opcode) {
+ case Decoder::CsrImmOpcode::kCsrrwi:
+ Gen<x86_64::MovlRegImm>(res, imm);
+ break;
+ case Decoder::CsrImmOpcode::kCsrrsi:
+ Gen<x86_64::MovlRegImm>(res, imm);
+ Gen<x86_64::OrqRegReg>(res, csr, GetFlagsRegister());
+ break;
+ case Decoder::CsrImmOpcode::kCsrrci:
+ Gen<x86_64::MovqRegImm>(res, static_cast<int8_t>(~imm));
+ Gen<x86_64::AndqRegReg>(res, csr, GetFlagsRegister());
+ break;
+ default:
+ Unimplemented();
+ return {};
+ }
+ return res;
+}
+
+void HeavyOptimizerFrontend::StoreWithoutRecovery(Decoder::StoreOperandType operand_type,
+ Register base,
+ int32_t disp,
+ Register data) {
+ switch (operand_type) {
+ case Decoder::StoreOperandType::k8bit:
+ Gen<x86_64::MovbMemBaseDispReg>(base, disp, data);
+ break;
+ case Decoder::StoreOperandType::k16bit:
+ Gen<x86_64::MovwMemBaseDispReg>(base, disp, data);
+ break;
+ case Decoder::StoreOperandType::k32bit:
+ Gen<x86_64::MovlMemBaseDispReg>(base, disp, data);
+ break;
+ case Decoder::StoreOperandType::k64bit:
+ Gen<x86_64::MovqMemBaseDispReg>(base, disp, data);
+ break;
+ default:
+ return Unimplemented();
+ }
+}
+
+void HeavyOptimizerFrontend::StoreWithoutRecovery(Decoder::StoreOperandType operand_type,
+ Register base,
+ Register index,
+ int32_t disp,
+ Register data) {
+ switch (operand_type) {
+ case Decoder::StoreOperandType::k8bit:
+ Gen<x86_64::MovbMemBaseIndexDispReg>(
+ base, index, x86_64::MachineMemOperandScale::kOne, disp, data);
+ break;
+ case Decoder::StoreOperandType::k16bit:
+ Gen<x86_64::MovwMemBaseIndexDispReg>(
+ base, index, x86_64::MachineMemOperandScale::kOne, disp, data);
+ break;
+ case Decoder::StoreOperandType::k32bit:
+ Gen<x86_64::MovlMemBaseIndexDispReg>(
+ base, index, x86_64::MachineMemOperandScale::kOne, disp, data);
+ break;
+ case Decoder::StoreOperandType::k64bit:
+ Gen<x86_64::MovqMemBaseIndexDispReg>(
+ base, index, x86_64::MachineMemOperandScale::kOne, disp, data);
+ break;
+ default:
+ return Unimplemented();
+ }
+}
+
+void HeavyOptimizerFrontend::MemoryRegionReservationLoad(Register aligned_addr) {
+ // Store aligned_addr in CPUState.
+ int32_t address_offset = GetThreadStateReservationAddressOffset();
+ Gen<x86_64::MovqMemBaseDispReg>(x86_64::kMachineRegRBP, address_offset, aligned_addr);
+
+ // MemoryRegionReservation::SetOwner(aligned_addr, &(state->cpu)).
+ builder_.GenCallImm(bit_cast<uint64_t>(&MemoryRegionReservation::SetOwner),
+ GetFlagsRegister(),
+ std::array<x86_64::CallImm::Arg, 2>{{
+ {aligned_addr, x86_64::CallImm::kIntRegType},
+ {x86_64::kMachineRegRBP, x86_64::CallImm::kIntRegType},
+ }});
+
+ // Load monitor value and store it in CPUState.
+ auto monitor = AllocTempSimdReg();
+ MachineReg reservation_reg = monitor.machine_reg();
+ Gen<x86_64::MovqRegMemBaseDisp>(reservation_reg, aligned_addr, 0);
+ int32_t value_offset = GetThreadStateReservationValueOffset();
+ Gen<x86_64::MovqMemBaseDispReg>(x86_64::kMachineRegRBP, value_offset, reservation_reg);
+}
+
+Register HeavyOptimizerFrontend::MemoryRegionReservationExchange(Register aligned_addr,
+ Register curr_reservation_value) {
+ auto* ir = builder_.ir();
+ auto* cur_bb = builder_.bb();
+ auto* addr_match_bb = ir->NewBasicBlock();
+ auto* failure_bb = ir->NewBasicBlock();
+ auto* continue_bb = ir->NewBasicBlock();
+ ir->AddEdge(cur_bb, addr_match_bb);
+ ir->AddEdge(cur_bb, failure_bb);
+ ir->AddEdge(failure_bb, continue_bb);
+ Register result = AllocTempReg();
+
+ // MemoryRegionReservation::Clear.
+ Register stored_aligned_addr = AllocTempReg();
+ int32_t address_offset = GetThreadStateReservationAddressOffset();
+ Gen<x86_64::MovqRegMemBaseDisp>(stored_aligned_addr, x86_64::kMachineRegRBP, address_offset);
+ Gen<x86_64::MovqMemBaseDispImm>(x86_64::kMachineRegRBP, address_offset, kNullGuestAddr);
+ // Compare aligned_addr to the one in CPUState.
+ Gen<x86_64::CmpqRegReg>(stored_aligned_addr, aligned_addr, GetFlagsRegister());
+ Gen<PseudoCondBranch>(
+ x86_64::Assembler::Condition::kNotEqual, failure_bb, addr_match_bb, GetFlagsRegister());
+
+ builder_.StartBasicBlock(addr_match_bb);
+ // Load new reservation value into integer register where CmpXchgq expects it.
+ Register new_reservation_value = AllocTempReg();
+ int32_t value_offset = GetThreadStateReservationValueOffset();
+ Gen<x86_64::MovqRegMemBaseDisp>(new_reservation_value, x86_64::kMachineRegRBP, value_offset);
+
+ MemoryRegionReservationSwapWithLockedOwner(
+ aligned_addr, curr_reservation_value, new_reservation_value, failure_bb);
+
+ ir->AddEdge(builder_.bb(), continue_bb);
+ // Pseudo-def for use-def operand of XOR to make sure data-flow is integrate.
+ Gen<PseudoDefReg>(result);
+ Gen<x86_64::XorqRegReg>(result, result, GetFlagsRegister());
+ Gen<PseudoBranch>(continue_bb);
+
+ builder_.StartBasicBlock(failure_bb);
+ Gen<x86_64::MovqRegImm>(result, 1);
+ Gen<PseudoBranch>(continue_bb);
+
+ builder_.StartBasicBlock(continue_bb);
+
+ return result;
+}
+
+void HeavyOptimizerFrontend::MemoryRegionReservationSwapWithLockedOwner(
+ Register aligned_addr,
+ Register curr_reservation_value,
+ Register new_reservation_value,
+ MachineBasicBlock* failure_bb) {
+ auto* ir = builder_.ir();
+ auto* lock_success_bb = ir->NewBasicBlock();
+ auto* swap_success_bb = ir->NewBasicBlock();
+ ir->AddEdge(builder_.bb(), lock_success_bb);
+ ir->AddEdge(builder_.bb(), failure_bb);
+ ir->AddEdge(lock_success_bb, swap_success_bb);
+ ir->AddEdge(lock_success_bb, failure_bb);
+
+ // lock_entry = MemoryRegionReservation::TryLock(aligned_addr, &(state->cpu)).
+ auto* call = builder_.GenCallImm(bit_cast<uint64_t>(&MemoryRegionReservation::TryLock),
+ GetFlagsRegister(),
+ std::array<x86_64::CallImm::Arg, 2>{{
+ {aligned_addr, x86_64::CallImm::kIntRegType},
+ {x86_64::kMachineRegRBP, x86_64::CallImm::kIntRegType},
+ }});
+ Register lock_entry = AllocTempReg();
+ // Limit life-time of a narrow reg-class call result.
+ Gen<PseudoCopy>(lock_entry, call->IntResultAt(0), 8);
+ Gen<x86_64::TestqRegReg>(lock_entry, lock_entry, GetFlagsRegister());
+ Gen<PseudoCondBranch>(
+ x86_64::Assembler::Condition::kZero, failure_bb, lock_success_bb, GetFlagsRegister());
+
+ builder_.StartBasicBlock(lock_success_bb);
+ auto rax = AllocTempReg();
+ Gen<PseudoCopy>(rax, curr_reservation_value, 8);
+ Gen<x86_64::LockCmpXchgqRegMemBaseDispReg>(
+ rax, aligned_addr, 0, new_reservation_value, GetFlagsRegister());
+
+ // MemoryRegionReservation::Unlock(lock_entry)
+ Gen<x86_64::MovqMemBaseDispImm>(lock_entry, 0, 0);
+ // Zero-flag is set if CmpXchg is successful.
+ Gen<PseudoCondBranch>(
+ x86_64::Assembler::Condition::kNotZero, failure_bb, swap_success_bb, GetFlagsRegister());
+
+ builder_.StartBasicBlock(swap_success_bb);
+}
+
} // namespace berberis \ No newline at end of file
diff --git a/heavy_optimizer/riscv64/frontend.h b/heavy_optimizer/riscv64/frontend.h
index d88920fe..67b64fbc 100644
--- a/heavy_optimizer/riscv64/frontend.h
+++ b/heavy_optimizer/riscv64/frontend.h
@@ -20,13 +20,16 @@
#include "berberis/backend/x86_64/machine_ir.h"
#include "berberis/backend/x86_64/machine_ir_builder.h"
#include "berberis/base/arena_map.h"
+#include "berberis/base/checks.h"
#include "berberis/base/dependent_false.h"
#include "berberis/decoder/riscv64/decoder.h"
#include "berberis/decoder/riscv64/semantics_player.h"
#include "berberis/guest_state/guest_addr.h"
#include "berberis/guest_state/guest_state_arch.h"
+#include "berberis/guest_state/guest_state_opaque.h"
#include "berberis/intrinsics/intrinsics.h"
#include "berberis/intrinsics/macro_assembler.h"
+#include "berberis/runtime_primitives/memory_region_reservation.h"
#include "berberis/runtime_primitives/platform.h"
#include "call_intrinsic.h"
@@ -108,20 +111,103 @@ class HeavyOptimizerFrontend {
void Store(Decoder::StoreOperandType operand_type, Register arg, int16_t offset, Register data);
Register Load(Decoder::LoadOperandType operand_type, Register arg, int16_t offset);
+ template <typename IntType>
+ constexpr Decoder::LoadOperandType ToLoadOperandType() {
+ if constexpr (std::is_same_v<IntType, int8_t>) {
+ return Decoder::LoadOperandType::k8bitSigned;
+ } else if constexpr (std::is_same_v<IntType, int16_t>) {
+ return Decoder::LoadOperandType::k16bitSigned;
+ } else if constexpr (std::is_same_v<IntType, int32_t>) {
+ return Decoder::LoadOperandType::k32bitSigned;
+ } else if constexpr (std::is_same_v<IntType, int64_t> || std::is_same_v<IntType, uint64_t>) {
+ return Decoder::LoadOperandType::k64bit;
+ } else if constexpr (std::is_same_v<IntType, uint8_t>) {
+ return Decoder::LoadOperandType::k8bitUnsigned;
+ } else if constexpr (std::is_same_v<IntType, uint16_t>) {
+ return Decoder::LoadOperandType::k16bitUnsigned;
+ } else if constexpr (std::is_same_v<IntType, uint32_t>) {
+ return Decoder::LoadOperandType::k32bitUnsigned;
+ } else {
+ static_assert(kDependentTypeFalse<IntType>);
+ }
+ }
+
+ template <typename IntType>
+ constexpr Decoder::StoreOperandType ToStoreOperandType() {
+ if constexpr (std::is_same_v<IntType, int8_t> || std::is_same_v<IntType, uint8_t>) {
+ return Decoder::StoreOperandType::k8bit;
+ } else if constexpr (std::is_same_v<IntType, int16_t> || std::is_same_v<IntType, uint16_t>) {
+ return Decoder::StoreOperandType::k16bit;
+ } else if constexpr (std::is_same_v<IntType, int32_t> || std::is_same_v<IntType, uint32_t>) {
+ return Decoder::StoreOperandType::k32bit;
+ } else if constexpr (std::is_same_v<IntType, int64_t> || std::is_same_v<IntType, uint64_t>) {
+ return Decoder::StoreOperandType::k64bit;
+ } else {
+ static_assert(kDependentTypeFalse<IntType>);
+ }
+ }
+
+ // Versions without recovery can be used to access non-guest memory (e.g. CPUState).
+ Register LoadWithoutRecovery(Decoder::LoadOperandType operand_type, Register base, int32_t disp);
+ Register LoadWithoutRecovery(Decoder::LoadOperandType operand_type,
+ Register base,
+ Register index,
+ int32_t disp);
+ void StoreWithoutRecovery(Decoder::StoreOperandType operand_type,
+ Register base,
+ int32_t disp,
+ Register val);
+ void StoreWithoutRecovery(Decoder::StoreOperandType operand_type,
+ Register base,
+ Register index,
+ int32_t disp,
+ Register val);
+
//
// Atomic extensions.
//
template <typename IntType, bool aq, bool rl>
- Register Lr(Register /* addr */) {
- Unimplemented();
- return {};
+ Register Lr(Register addr) {
+ Register aligned_addr = AllocTempReg();
+ Gen<PseudoCopy>(aligned_addr, addr, 8);
+ // The immediate is sign extended to 64-bit.
+ Gen<x86_64::AndqRegImm>(aligned_addr, ~int32_t{0xf}, GetFlagsRegister());
+
+ MemoryRegionReservationLoad(aligned_addr);
+
+ Register addr_offset = AllocTempReg();
+ Gen<PseudoCopy>(addr_offset, addr, 8);
+ Gen<x86_64::SubqRegReg>(addr_offset, aligned_addr, GetFlagsRegister());
+
+ // Load the requested part from CPUState.
+ return LoadWithoutRecovery(ToLoadOperandType<IntType>(),
+ x86_64::kMachineRegRBP,
+ addr_offset,
+ GetThreadStateReservationValueOffset());
}
template <typename IntType, bool aq, bool rl>
- Register Sc(Register /* addr */, Register /* data */) {
- Unimplemented();
- return {};
+ Register Sc(Register addr, Register data) {
+ // Compute aligned_addr.
+ auto aligned_addr = AllocTempReg();
+ Gen<PseudoCopy>(aligned_addr, addr, 8);
+ // The immediate is sign extended to 64-bit.
+ Gen<x86_64::AndqRegImm>(aligned_addr, ~int32_t{0xf}, GetFlagsRegister());
+
+ // Load current monitor value before we clobber it.
+ auto reservation_value = AllocTempReg();
+ int32_t value_offset = GetThreadStateReservationValueOffset();
+ Gen<x86_64::MovqRegMemBaseDisp>(reservation_value, x86_64::kMachineRegRBP, value_offset);
+ Register addr_offset = AllocTempReg();
+ Gen<PseudoCopy>(addr_offset, addr, 8);
+ Gen<x86_64::SubqRegReg>(addr_offset, aligned_addr, GetFlagsRegister());
+ // It's okay to clobber reservation_value since we clear out reservation_address in
+ // MemoryRegionReservationExchange anyway.
+ StoreWithoutRecovery(
+ ToStoreOperandType<IntType>(), x86_64::kMachineRegRBP, addr_offset, value_offset, data);
+
+ return MemoryRegionReservationExchange(aligned_addr, reservation_value);
}
void Fence(Decoder::FenceOpcode /*opcode*/,
@@ -149,7 +235,7 @@ class HeavyOptimizerFrontend {
[[nodiscard]] FpRegister GetFRegAndUnboxNan(uint8_t reg) {
CHECK_LE(reg, kNumGuestFpRegs);
FpRegister result = AllocTempSimdReg();
- builder_.GenGetSimd(result.machine_reg(), reg);
+ builder_.GenGetSimd<8>(result.machine_reg(), GetThreadStateFRegOffset(reg));
FpRegister unboxed_result = AllocTempSimdReg();
if (host_platform::kHasAVX) {
builder_.Gen<x86_64::MacroUnboxNanFloat32AVX>(unboxed_result.machine_reg(),
@@ -169,8 +255,7 @@ class HeavyOptimizerFrontend {
} else {
builder_.Gen<x86_64::MacroNanBoxFloat32>(value.machine_reg());
}
-
- builder_.GenSetSimd(reg, value.machine_reg());
+ builder_.GenSetSimd<8>(GetThreadStateFRegOffset(reg), value.machine_reg());
}
template <typename DataType>
@@ -204,15 +289,8 @@ class HeavyOptimizerFrontend {
// Csr
//
- Register UpdateCsr(Decoder::CsrOpcode /* opcode */, Register /* arg */, Register /* csr */) {
- Unimplemented();
- return {};
- }
-
- Register UpdateCsr(Decoder::CsrImmOpcode /* opcode */, uint8_t /* imm */, Register /* csr */) {
- Unimplemented();
- return {};
- }
+ Register UpdateCsr(Decoder::CsrOpcode opcode, Register arg, Register csr);
+ Register UpdateCsr(Decoder::CsrImmOpcode opcode, uint8_t imm, Register csr);
[[nodiscard]] bool success() const { return success_; }
@@ -240,18 +318,48 @@ class HeavyOptimizerFrontend {
template <CsrName kName>
[[nodiscard]] Register GetCsr() {
- Unimplemented();
- return {};
+ auto csr_reg = AllocTempReg();
+ if constexpr (std::is_same_v<CsrFieldType<kName>, uint8_t>) {
+ Gen<x86_64::MovzxblRegMemBaseDisp>(csr_reg, x86_64::kMachineRegRBP, kCsrFieldOffset<kName>);
+ } else if constexpr (std::is_same_v<CsrFieldType<kName>, uint64_t>) {
+ Gen<x86_64::MovqRegMemBaseDisp>(csr_reg, x86_64::kMachineRegRBP, kCsrFieldOffset<kName>);
+ } else {
+ static_assert(kDependentTypeFalse<CsrFieldType<kName>>);
+ }
+ return csr_reg;
}
template <CsrName kName>
- void SetCsr(uint8_t /* imm */) {
- Unimplemented();
+ void SetCsr(uint8_t imm) {
+ // Note: csr immediate only have 5 bits in RISC-V encoding which guarantess us that
+ // “imm & kCsrMask<kName>”can be used as 8-bit immediate.
+ if constexpr (std::is_same_v<CsrFieldType<kName>, uint8_t>) {
+ Gen<x86_64::MovbMemBaseDispImm>(x86_64::kMachineRegRBP,
+ kCsrFieldOffset<kName>,
+ static_cast<int8_t>(imm & kCsrMask<kName>));
+ } else if constexpr (std::is_same_v<CsrFieldType<kName>, uint64_t>) {
+ Gen<x86_64::MovbMemBaseDispImm>(x86_64::kMachineRegRBP,
+ kCsrFieldOffset<kName>,
+ static_cast<int8_t>(imm & kCsrMask<kName>));
+ } else {
+ static_assert(kDependentTypeFalse<CsrFieldType<kName>>);
+ }
}
template <CsrName kName>
- void SetCsr(Register /* arg */) {
- Unimplemented();
+ void SetCsr(Register arg) {
+ auto tmp = AllocTempReg();
+ Gen<PseudoCopy>(tmp, arg, sizeof(CsrFieldType<kName>));
+ if constexpr (sizeof(CsrFieldType<kName>) == 1) {
+ Gen<x86_64::AndbRegImm>(tmp, kCsrMask<kName>, GetFlagsRegister());
+ Gen<x86_64::MovbMemBaseDispReg>(x86_64::kMachineRegRBP, kCsrFieldOffset<kName>, tmp);
+ } else if constexpr (sizeof(CsrFieldType<kName>) == 8) {
+ Gen<x86_64::AndqRegImm>(
+ tmp, constants_pool::kConst<uint64_t{kCsrMask<kName>}>, GetFlagsRegister());
+ Gen<x86_64::MovqMemBaseDispReg>(x86_64::kMachineRegRBP, kCsrFieldOffset<kName>, tmp);
+ } else {
+ static_assert(kDependentTypeFalse<CsrFieldType<kName>>);
+ }
}
private:
@@ -305,6 +413,13 @@ class HeavyOptimizerFrontend {
return result;
}
+ void MemoryRegionReservationLoad(Register aligned_addr);
+ Register MemoryRegionReservationExchange(Register aligned_addr, Register curr_reservation_value);
+ void MemoryRegionReservationSwapWithLockedOwner(Register aligned_addr,
+ Register curr_reservation_value,
+ Register new_reservation_value,
+ MachineBasicBlock* failure_bb);
+
// Syntax sugar.
template <typename InsnType, typename... Args>
/*may_discard*/ InsnType* Gen(Args... args) {
@@ -350,6 +465,178 @@ class HeavyOptimizerFrontend {
ArenaMap<GuestAddr, MachineInsnPosition> branch_targets_;
};
+template <>
+[[nodiscard]] inline HeavyOptimizerFrontend::Register
+HeavyOptimizerFrontend::GetCsr<CsrName::kFCsr>() {
+ auto csr_reg = AllocTempReg();
+ auto tmp = AllocTempReg();
+ bool inline_successful = TryInlineIntrinsicForHeavyOptimizer<&intrinsics::FeGetExceptions>(
+ &builder_, GetFlagsRegister(), tmp);
+ CHECK(inline_successful);
+ Gen<x86_64::MovzxbqRegMemBaseDisp>(
+ csr_reg, x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kFrm>);
+ Gen<x86_64::ShlbRegImm>(csr_reg, 5, GetFlagsRegister());
+ Gen<x86_64::OrbRegReg>(csr_reg, tmp, GetFlagsRegister());
+ return csr_reg;
+}
+
+template <>
+[[nodiscard]] inline HeavyOptimizerFrontend::Register
+HeavyOptimizerFrontend::GetCsr<CsrName::kFFlags>() {
+ return FeGetExceptions();
+}
+
+template <>
+[[nodiscard]] inline HeavyOptimizerFrontend::Register
+HeavyOptimizerFrontend::GetCsr<CsrName::kVlenb>() {
+ return GetImm(16);
+}
+
+template <>
+[[nodiscard]] inline HeavyOptimizerFrontend::Register
+HeavyOptimizerFrontend::GetCsr<CsrName::kVxrm>() {
+ auto reg = AllocTempReg();
+ Gen<x86_64::MovzxbqRegMemBaseDisp>(reg, x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kVcsr>);
+ Gen<x86_64::AndbRegImm>(reg, 0b11, GetFlagsRegister());
+ return reg;
+}
+
+template <>
+[[nodiscard]] inline HeavyOptimizerFrontend::Register
+HeavyOptimizerFrontend::GetCsr<CsrName::kVxsat>() {
+ auto reg = AllocTempReg();
+ Gen<x86_64::MovzxbqRegMemBaseDisp>(reg, x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kVcsr>);
+ Gen<x86_64::ShrbRegImm>(reg, 2, GetFlagsRegister());
+ return reg;
+}
+
+template <>
+inline void HeavyOptimizerFrontend::SetCsr<CsrName::kFCsr>(uint8_t /* imm */) {
+ Unimplemented();
+ // TODO(b/291126436) Figure out how to pass Mem arg to FeSetExceptionsAndRoundImmTranslate.
+ // // Note: instructions Csrrci or Csrrsi couldn't affect Frm because immediate only has five
+ // bits.
+ // // But these instruction don't pass their immediate-specified argument into `SetCsr`, they
+ // combine
+ // // it with register first. Fixing that can only be done by changing code in the semantics
+ // player.
+ // //
+ // // But Csrrwi may clear it. And we actually may only arrive here from Csrrwi.
+ // // Thus, technically, we know that imm >> 5 is always zero, but it doesn't look like a good
+ // idea
+ // // to rely on that: it's very subtle and it only affects code generation speed.
+ // Gen<x86_64::MovbMemBaseDispImm>(x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kFrm>,
+ // static_cast<int8_t>(imm >> 5)); bool successful =
+ // TryInlineIntrinsicForHeavyOptimizer<&intrinsics::FeSetExceptionsAndRoundImmTranslate>(
+ // &builder_,
+ // GetFlagsRegister(),
+ // x86_64::kMachineRegRBP,
+ // static_cast<int>(offsetof(ThreadState, intrinsics_scratch_area)),
+ // imm);
+ // CHECK(successful);
+}
+
+template <>
+inline void HeavyOptimizerFrontend::SetCsr<CsrName::kFCsr>(Register /* arg */) {
+ Unimplemented();
+ // TODO(b/291126436) Figure out how to pass Mem arg to FeSetExceptionsAndRoundTranslate.
+ // auto tmp1 = AllocTempReg();
+ // auto tmp2 = AllocTempReg();
+ // Gen<PseudoCopy>(tmp1, arg, 1);
+ // Gen<x86_64::AndlRegImm>(tmp1, 0b1'1111, GetFlagsRegister());
+ // Gen<x86_64::ShldlRegRegImm>(tmp2, arg, int8_t{32 - 5}, GetFlagsRegister());
+ // Gen<x86_64::AndbRegImm>(tmp2, kCsrMask<CsrName::kFrm>, GetFlagsRegister());
+ // Gen<x86_64::MovbMemBaseDispReg>(x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kFrm>,
+ // tmp2);
+ // bool successful =
+ // TryInlineIntrinsicForHeavyOptimizer<&intrinsics::FeSetExceptionsAndRoundTranslate>(
+ // &builder_,
+ // GetFlagsRegister(),
+ // tmp1,
+ // x86_64::kMachineRegRBP,
+ // static_cast<int>(offsetof(ThreadState, intrinsics_scratch_area)),
+ // tmp1);
+ // CHECK(successful);
+}
+
+template <>
+inline void HeavyOptimizerFrontend::SetCsr<CsrName::kFFlags>(uint8_t imm) {
+ FeSetExceptionsImm(static_cast<int8_t>(imm & 0b1'1111));
+}
+
+template <>
+inline void HeavyOptimizerFrontend::SetCsr<CsrName::kFFlags>(Register arg) {
+ auto tmp = AllocTempReg();
+ Gen<PseudoCopy>(tmp, arg, 1);
+ Gen<x86_64::AndlRegImm>(tmp, 0b1'1111, GetFlagsRegister());
+ FeSetExceptions(tmp);
+}
+
+template <>
+inline void HeavyOptimizerFrontend::SetCsr<CsrName::kFrm>(uint8_t imm) {
+ Gen<x86_64::MovbMemBaseDispImm>(x86_64::kMachineRegRBP,
+ kCsrFieldOffset<CsrName::kFrm>,
+ static_cast<int8_t>(imm & kCsrMask<CsrName::kFrm>));
+ FeSetRoundImm(static_cast<int8_t>(imm & kCsrMask<CsrName::kFrm>));
+}
+
+template <>
+inline void HeavyOptimizerFrontend::SetCsr<CsrName::kFrm>(Register arg) {
+ // Use RCX as temporary register. We know it would be used by FeSetRound, too.
+ auto tmp = AllocTempReg();
+ Gen<PseudoCopy>(tmp, arg, 1);
+ Gen<x86_64::AndbRegImm>(tmp, kCsrMask<CsrName::kFrm>, GetFlagsRegister());
+ Gen<x86_64::MovbMemBaseDispReg>(x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kFrm>, tmp);
+ FeSetRound(tmp);
+}
+
+template <>
+inline void HeavyOptimizerFrontend::SetCsr<CsrName::kVxrm>(uint8_t imm) {
+ imm &= 0b11;
+ if (imm != 0b11) {
+ Gen<x86_64::AndbMemBaseDispImm>(
+ x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kVcsr>, 0b100, GetFlagsRegister());
+ }
+ if (imm != 0b00) {
+ Gen<x86_64::OrbMemBaseDispImm>(
+ x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kVcsr>, imm, GetFlagsRegister());
+ }
+}
+
+template <>
+inline void HeavyOptimizerFrontend::SetCsr<CsrName::kVxrm>(Register arg) {
+ Gen<x86_64::AndbMemBaseDispImm>(
+ x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kVcsr>, 0b100, GetFlagsRegister());
+ Gen<x86_64::AndbRegImm>(arg, 0b11, GetFlagsRegister());
+ Gen<x86_64::OrbMemBaseDispReg>(
+ x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kVcsr>, arg, GetFlagsRegister());
+}
+
+template <>
+inline void HeavyOptimizerFrontend::SetCsr<CsrName::kVxsat>(uint8_t imm) {
+ if (imm & 0b1) {
+ Gen<x86_64::OrbMemBaseDispImm>(
+ x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kVcsr>, 0b100, GetFlagsRegister());
+ } else {
+ Gen<x86_64::AndbMemBaseDispImm>(
+ x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kVcsr>, 0b11, GetFlagsRegister());
+ }
+}
+
+template <>
+inline void HeavyOptimizerFrontend::SetCsr<CsrName::kVxsat>(Register arg) {
+ using Condition = x86_64::Assembler::Condition;
+ Gen<x86_64::AndbMemBaseDispImm>(
+ x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kVcsr>, 0b11, GetFlagsRegister());
+ Gen<x86_64::TestbRegImm>(arg, 1, GetFlagsRegister());
+ auto tmp = AllocTempReg();
+ Gen<x86_64::SetccReg>(Condition::kNotZero, tmp, GetFlagsRegister());
+ Gen<x86_64::MovzxbqRegReg>(tmp, tmp);
+ Gen<x86_64::ShlbRegImm>(tmp, int8_t{2}, GetFlagsRegister());
+ Gen<x86_64::OrbMemBaseDispReg>(
+ x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kVcsr>, tmp, GetFlagsRegister());
+}
+
} // namespace berberis
#endif /* BERBERIS_HEAVY_OPTIMIZER_RISCV64_FRONTEND_H_ */
diff --git a/test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h b/test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h
index 1e952a95..43b5e9c4 100644
--- a/test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h
+++ b/test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h
@@ -222,6 +222,10 @@ class TESTSUITE : public ::testing::Test {
EXPECT_EQ(GetXReg<2>(state_.cpu), expected_fflags);
}
+#endif // defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR)
+#if defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR) || \
+ defined(TESTING_HEAVY_OPTIMIZER)
+
void TestFrm(uint32_t insn_bytes, uint8_t frm_to_set, uint8_t expected_rm) {
auto code_start = ToGuestAddr(&insn_bytes);
state_.cpu.insn_addr = code_start;
@@ -362,6 +366,10 @@ class TESTSUITE : public ::testing::Test {
}
}
+#endif // defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR)
+#if defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR) || \
+ defined(TESTING_HEAVY_OPTIMIZER)
+
void TestAmo(uint32_t insn_bytes,
uint64_t arg1,
uint64_t arg2,
@@ -390,6 +398,10 @@ class TESTSUITE : public ::testing::Test {
expected_memory);
}
+#endif // defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR) ||
+ // defined(TESTING_HEAVY_OPTIMIZER)
+#if defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR)
+
template <typename... Types>
void TestFmvFloatToInteger(uint32_t insn_bytes,
std::initializer_list<std::tuple<Types...>> args) {
@@ -1135,6 +1147,10 @@ TEST_F(TESTSUITE, CJalr) {
// Tests for Non-Compressed Instructions.
+#endif // defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR)
+#if defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR) || \
+ defined(TESTING_HEAVY_OPTIMIZER)
+
TEST_F(TESTSUITE, CsrInstructions) {
ScopedRoundingMode scoped_rounding_mode;
// Csrrw x2, frm, 2
@@ -1145,6 +1161,10 @@ TEST_F(TESTSUITE, CsrInstructions) {
TestFrm(0x0020f173, 0, 0);
}
+#endif // defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR) ||
+ // defined(TESTING_HEAVY_OPTIMIZER)
+#if defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR)
+
TEST_F(TESTSUITE, FCsrRegister) {
fenv_t saved_environment;
EXPECT_EQ(fegetenv(&saved_environment), 0);
@@ -1759,6 +1779,10 @@ TEST_F(TESTSUITE, FmaInstructions) {
TestFma(0x223170cf, {std::tuple{1.0, 2.0, 3.0, -5.0}});
}
+#endif // defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR)
+#if defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR) || \
+ defined(TESTING_HEAVY_OPTIMIZER)
+
TEST_F(TESTSUITE, AmoInstructions) {
// Verifying that all aq and rl combinations work for Amoswap, but only test relaxed one for most
// other instructions for brevity.
@@ -1800,6 +1824,10 @@ TEST_F(TESTSUITE, AmoInstructions) {
TestAmo(0xe03120af, 0xe03130af, 0xffff'eeee'dddd'ccccULL);
}
+#endif // defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR) ||
+ // defined(TESTING_HEAVY_OPTIMIZER)
+#if defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR)
+
TEST_F(TESTSUITE, OpFpSingleInputInstructions) {
// FSqrt.S
TestOpFpSingleInput(0x580170d3, {std::tuple{4.0f, 2.0f}});
diff --git a/tests/run_host_tests.mk b/tests/run_host_tests.mk
index e8e6a2b9..812ebc99 100644
--- a/tests/run_host_tests.mk
+++ b/tests/run_host_tests.mk
@@ -29,7 +29,10 @@
.PHONY: berberis_run_host_tests
-berberis_all: berberis_host_tests_result
+# TODO(b/295236834): Add berberis_host_tests_result to berberis_all once the tests pass in
+# post-submit. They are currently failing due to unimplemented bit manipulation instructions in
+# stock builds.
+# berberis_all: berberis_host_tests_result
test_dir := $(call intermediates-dir-for,PACKAGING,berberis_tests)