From 6d67c5925dfb5eae9a15380403fa2a81e18a91ea Mon Sep 17 00:00:00 2001 From: Ben Cheng Date: Tue, 5 Feb 2013 12:28:19 -0800 Subject: Add support for integer divide (sdiv and udiv) instructions. With this patch Valgrind is working again on master with GCC 4.7 plus -cpu=cortex-a15. Change-Id: I4557b8a522c228e378fa8027358e57ed5ab3784f --- main/VEX/priv/guest_arm_toIR.c | 83 ++++++++++++++++++++++++++++++++++++++++++ main/VEX/priv/host_arm_defs.c | 49 ++++++++++++++++++++++++- main/VEX/priv/host_arm_defs.h | 23 +++++++++--- main/VEX/priv/host_arm_isel.c | 15 +++++++- 4 files changed, 162 insertions(+), 8 deletions(-) diff --git a/main/VEX/priv/guest_arm_toIR.c b/main/VEX/priv/guest_arm_toIR.c index f54ffb5fa..5397d7604 100644 --- a/main/VEX/priv/guest_arm_toIR.c +++ b/main/VEX/priv/guest_arm_toIR.c @@ -13671,6 +13671,51 @@ DisResult disInstr_ARM_WRK ( /* fall through */ } + /* --------------------- Integer Divides --------------------- */ + // SDIV + if (BITS8(0,1,1,1,0,0,0,1) == INSN(27,20) + && INSN(15,12) == BITS4(1,1,1,1) + && INSN(7,4) == BITS4(0,0,0,1)) { + UInt rD = INSN(19,16); + UInt rM = INSN(11,8); + UInt rN = INSN(3,0); + if (rD == 15 || rM == 15 || rN == 15) { + /* Unpredictable; don't decode; fall through */ + } else { + IRTemp res = newTemp(Ity_I32); + IRTemp argL = newTemp(Ity_I32); + IRTemp argR = newTemp(Ity_I32); + assign(argL, getIRegA(rN)); + assign(argR, getIRegA(rM)); + assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR))); + putIRegA(rD, mkexpr(res), condT, Ijk_Boring); + DIP("sdiv r%u, r%u, r%u\n", rD, rN, rM); + goto decode_success; + } + } + + // UDIV + if (BITS8(0,1,1,1,0,0,1,1) == INSN(27,20) + && INSN(15,12) == BITS4(1,1,1,1) + && INSN(7,4) == BITS4(0,0,0,1)) { + UInt rD = INSN(19,16); + UInt rM = INSN(11,8); + UInt rN = INSN(3,0); + if (rD == 15 || rM == 15 || rN == 15) { + /* Unpredictable; don't decode; fall through */ + } else { + IRTemp res = newTemp(Ity_I32); + IRTemp argL = newTemp(Ity_I32); + IRTemp argR = newTemp(Ity_I32); + assign(argL, getIRegA(rN)); + assign(argR, getIRegA(rM)); + assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR))); + putIRegA(rD, mkexpr(res), condT, Ijk_Boring); + DIP("udiv r%u, r%u, r%u\n", rD, rN, rM); + goto decode_success; + } + } + // MLA, MLS if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0)) && INSN(7,4) == BITS4(1,0,0,1)) { @@ -18119,6 +18164,44 @@ DisResult disInstr_THUMB_WRK ( } } + /* -------------- SDIV.W Rd, Rn, Rm -------------- */ + if (INSN0(15,4) == 0xFB9 + && (INSN1(15,0) & 0xF0F0) == 0xF0F0) { + UInt rN = INSN0(3,0); + UInt rD = INSN1(11,8); + UInt rM = INSN1(3,0); + if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) { + IRTemp res = newTemp(Ity_I32); + IRTemp argL = newTemp(Ity_I32); + IRTemp argR = newTemp(Ity_I32); + assign(argL, getIRegT(rN)); + assign(argR, getIRegT(rM)); + assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR))); + putIRegT(rD, mkexpr(res), condT); + DIP("sdiv.w r%u, r%u, r%u\n", rD, rN, rM); + goto decode_success; + } + } + + /* -------------- UDIV.W Rd, Rn, Rm -------------- */ + if (INSN0(15,4) == 0xFBB + && (INSN1(15,0) & 0xF0F0) == 0xF0F0) { + UInt rN = INSN0(3,0); + UInt rD = INSN1(11,8); + UInt rM = INSN1(3,0); + if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) { + IRTemp res = newTemp(Ity_I32); + IRTemp argL = newTemp(Ity_I32); + IRTemp argR = newTemp(Ity_I32); + assign(argL, getIRegT(rN)); + assign(argR, getIRegT(rM)); + assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR))); + putIRegT(rD, mkexpr(res), condT); + DIP("udiv.w r%u, r%u, r%u\n", rD, rN, rM); + goto decode_success; + } + } + /* ------------------ {U,S}MULL ------------------ */ if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA) && INSN1(7,4) == BITS4(0,0,0,0)) { diff --git a/main/VEX/priv/host_arm_defs.c b/main/VEX/priv/host_arm_defs.c index e428da0f6..6428b35c2 100644 --- a/main/VEX/priv/host_arm_defs.c +++ b/main/VEX/priv/host_arm_defs.c @@ -708,7 +708,7 @@ HChar* showARMUnaryOp ( ARMUnaryOp op ) { } } -HChar* showARMMulOp ( ARMMulOp op ) { +HChar* showARMMulOp ( ARMMulDivOp op ) { switch (op) { case ARMmul_PLAIN: return "mul"; case ARMmul_ZX: return "umull"; @@ -717,6 +717,14 @@ HChar* showARMMulOp ( ARMMulOp op ) { } } +HChar* showARMDivOp ( ARMMulDivOp op ) { + switch (op) { + case ARMdiv_S: return "sdiv"; + case ARMdiv_U: return "udiv"; + default: vpanic("showARMDivOp"); + } +} + HChar* showARMVfpOp ( ARMVfpOp op ) { switch (op) { case ARMvfp_ADD: return "add"; @@ -1216,12 +1224,21 @@ ARMInstr* ARMInstr_Call ( ARMCondCode cond, HWord target, Int nArgRegs ) { i->ARMin.Call.nArgRegs = nArgRegs; return i; } -ARMInstr* ARMInstr_Mul ( ARMMulOp op ) { +ARMInstr* ARMInstr_Mul ( ARMMulDivOp op ) { ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); i->tag = ARMin_Mul; i->ARMin.Mul.op = op; return i; } +ARMInstr* ARMInstr_Div ( ARMMulDivOp op, HReg dst, HReg argL, HReg argR ) { + ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); + i->tag = ARMin_Div; + i->ARMin.Div.op = op; + i->ARMin.Div.dst = dst; + i->ARMin.Div.argL = argL; + i->ARMin.Div.argR = argR; + return i; +} ARMInstr* ARMInstr_LdrEX ( Int szB ) { ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); i->tag = ARMin_LdrEX; @@ -1662,6 +1679,14 @@ void ppARMInstr ( ARMInstr* i ) { vex_printf("r1:r0, r2, r3"); } return; + case ARMin_Div: + vex_printf("%-5s ", showARMDivOp(i->ARMin.Div.op)); + ppHRegARM(i->ARMin.Div.dst); + vex_printf(", "); + ppHRegARM(i->ARMin.Div.argL); + vex_printf(", "); + ppHRegARM(i->ARMin.Div.argR); + return; case ARMin_LdrEX: { HChar* sz = ""; switch (i->ARMin.LdrEX.szB) { @@ -2083,6 +2108,11 @@ void getRegUsage_ARMInstr ( HRegUsage* u, ARMInstr* i, Bool mode64 ) if (i->ARMin.Mul.op != ARMmul_PLAIN) addHRegUse(u, HRmWrite, hregARM_R1()); return; + case ARMin_Div: + addHRegUse(u, HRmWrite, i->ARMin.Div.dst); + addHRegUse(u, HRmRead, i->ARMin.Div.argL); + addHRegUse(u, HRmRead, i->ARMin.Div.argR); + return; case ARMin_LdrEX: addHRegUse(u, HRmRead, hregARM_R4()); addHRegUse(u, HRmWrite, hregARM_R2()); @@ -2318,6 +2348,11 @@ void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 ) return; case ARMin_Mul: return; + case ARMin_Div: + i->ARMin.Div.dst = lookupHRegRemap(m, i->ARMin.Div.dst); + i->ARMin.Div.argL = lookupHRegRemap(m, i->ARMin.Div.argL); + i->ARMin.Div.argR = lookupHRegRemap(m, i->ARMin.Div.argR); + return; case ARMin_LdrEX: return; case ARMin_StrEX: @@ -3303,6 +3338,16 @@ Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc, } goto bad; } + case ARMin_Div: { + UInt subopc = i->ARMin.Div.op == ARMdiv_U ? + X0011 : X0001; + UInt rD = iregNo(i->ARMin.Div.dst); + UInt rN = iregNo(i->ARMin.Div.argL); + UInt rM = iregNo(i->ARMin.Div.argR); + UInt instr = XXXXXXXX(X1110, X0111, subopc, rD, 0xF, rM, X0001, rN); + *p++ = instr; + goto done; + } case ARMin_LdrEX: { /* E1D42F9F ldrexb r2, [r4] E1F42F9F ldrexh r2, [r4] diff --git a/main/VEX/priv/host_arm_defs.h b/main/VEX/priv/host_arm_defs.h index 91a6757c5..7c80beace 100644 --- a/main/VEX/priv/host_arm_defs.h +++ b/main/VEX/priv/host_arm_defs.h @@ -383,12 +383,15 @@ typedef enum { ARMmul_PLAIN=60, ARMmul_ZX, - ARMmul_SX + ARMmul_SX, + ARMdiv_S, + ARMdiv_U } - ARMMulOp; + ARMMulDivOp; -extern HChar* showARMMulOp ( ARMMulOp op ); +extern HChar* showARMMulOp ( ARMMulDivOp op ); +extern HChar* showARMDivOp ( ARMMulDivOp op ); typedef enum { @@ -570,6 +573,7 @@ typedef ARMin_CMov, ARMin_Call, ARMin_Mul, + ARMin_Div, ARMin_LdrEX, ARMin_StrEX, /* vfp */ @@ -727,8 +731,15 @@ typedef complexity). Hence hardwire it. At least using caller-saves registers, which are less likely to be in use. */ struct { - ARMMulOp op; + ARMMulDivOp op; } Mul; + /* ARMdiv_S/ARMdiv_U: signed/unsigned integer divides, respectively. */ + struct { + ARMMulDivOp op; + HReg dst; + HReg argL; + HReg argR; + } Div; /* LDREX{,H,B} r2, [r4] and LDREXD r2, r3, [r4] (on LE hosts, transferred value is r3:r2) Again, hardwired registers since this is not performance @@ -958,7 +969,9 @@ extern ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T, ARMCondCode cond, IRJumpKind jk ); extern ARMInstr* ARMInstr_CMov ( ARMCondCode, HReg dst, ARMRI84* src ); extern ARMInstr* ARMInstr_Call ( ARMCondCode, HWord, Int nArgRegs ); -extern ARMInstr* ARMInstr_Mul ( ARMMulOp op ); +extern ARMInstr* ARMInstr_Mul ( ARMMulDivOp op ); +extern ARMInstr* ARMInstr_Div ( ARMMulDivOp op, HReg dst, HReg argL, + HReg argR ); extern ARMInstr* ARMInstr_LdrEX ( Int szB ); extern ARMInstr* ARMInstr_StrEX ( Int szB ); extern ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg, ARMAModeV* ); diff --git a/main/VEX/priv/host_arm_isel.c b/main/VEX/priv/host_arm_isel.c index 62739fdd2..13c1f2d8c 100644 --- a/main/VEX/priv/host_arm_isel.c +++ b/main/VEX/priv/host_arm_isel.c @@ -1193,6 +1193,19 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) default: break; } + /* SDIV/UDIV */ + if (e->Iex.Binop.op == Iop_DivU32 || e->Iex.Binop.op == Iop_DivS32) { + HReg dst = newVRegI(env); + HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); + HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); + + addInstr(env, + ARMInstr_Div(e->Iex.Binop.op == Iop_DivU32 ? + ARMdiv_U : ARMdiv_S, + dst, argL, argR)); + return dst; + } + /* SHL/SHR/SAR */ switch (e->Iex.Binop.op) { case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop; @@ -1889,7 +1902,7 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); HReg tHi = newVRegI(env); HReg tLo = newVRegI(env); - ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32 + ARMMulDivOp mop = e->Iex.Binop.op == Iop_MullS32 ? ARMmul_SX : ARMmul_ZX; addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL)); addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR)); -- cgit v1.2.3