diff options
author | Dmitriy Ivanov <dimitry@google.com> | 2014-06-09 10:51:43 -0700 |
---|---|---|
committer | Dmitriy Ivanov <dimitry@google.com> | 2014-06-09 19:01:17 -0700 |
commit | 0a8046eb0993fb7e155b03cc5ef3323fca3467c0 (patch) | |
tree | f5b652fc5e648cfb210dfe8ef2e6ff37f4761a25 | |
parent | 3361c08e1a742df84c7c059bd5039a70d4d1b232 (diff) | |
download | valgrind-0a8046eb0993fb7e155b03cc5ef3323fca3467c0.tar.gz |
Handle umull (vector) and vmov
* Vmov is needed for umull translation logic
in memcheck.
Bug: 15145107
Change-Id: I703d850d31e849af9f77a411cff5f3a439127265
-rw-r--r-- | main/VEX/priv/guest_arm64_toIR.c | 23 | ||||
-rw-r--r-- | main/VEX/priv/host_arm64_defs.c | 85 | ||||
-rw-r--r-- | main/VEX/priv/host_arm64_defs.h | 9 | ||||
-rw-r--r-- | main/VEX/priv/host_arm64_isel.c | 76 | ||||
-rw-r--r-- | main/none/tests/arm64/fp_and_simd.c | 6 |
5 files changed, 157 insertions, 42 deletions
diff --git a/main/VEX/priv/guest_arm64_toIR.c b/main/VEX/priv/guest_arm64_toIR.c index 24e89114f..0c775d920 100644 --- a/main/VEX/priv/guest_arm64_toIR.c +++ b/main/VEX/priv/guest_arm64_toIR.c @@ -6588,6 +6588,29 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) } /* else fall through */ } + /* ------------ UMULL (vector) ------------ */ + /* 31 28 23 21 20 15 9 4 + 001 01110 sz 1 m 110000 n d UMULL Vd.Ta, Vn.Tb, Vm.Tb + + */ + if (INSN(31,24) == BITS8(0,0,1,0,1,1,1,0) && INSN(23,22) != BITS2(1,1) + && INSN(21,21) == 1 && INSN(15,10) == BITS6(1,1,0,0,0,0)) { + UInt mm = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + UInt sz = INSN(23,22); + + const HChar* nameTa[3] = { "8h", "4s", "2d" }; + const HChar* nameTb[3] = { "8b", "4h", "2s" }; + const IROp ops[3] = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2 }; + + putQReg128(dd, binop(ops[sz], getQRegLO(nn, Ity_I64), getQRegLO(mm, Ity_I64))); + + DIP("umull %s.%s, %s.%s, %s.%s\n", nameQReg128(dd), nameTa[sz], + nameQReg128(nn), nameTb[sz], nameQReg128(mm), nameTb[sz]); + return True; + } + /* ------------ {AND,BIC,ORR,ORN} (vector) ------------ */ /* 31 28 23 20 15 9 4 diff --git a/main/VEX/priv/host_arm64_defs.c b/main/VEX/priv/host_arm64_defs.c index cd68d6844..bad2acca5 100644 --- a/main/VEX/priv/host_arm64_defs.c +++ b/main/VEX/priv/host_arm64_defs.c @@ -875,6 +875,9 @@ static void showARM64VecBinOp(/*OUT*/const HChar** nm, case ARM64vecb_UMIN32x4: *nm = "umin"; *ar = "4s"; return; case ARM64vecb_UMIN16x8: *nm = "umin"; *ar = "8h"; return; case ARM64vecb_UMIN8x16: *nm = "umin"; *ar = "16b"; return; + case ARM64vecb_UMULL32x2: *nm = "umull"; *ar = "2d"; return; + case ARM64vecb_UMULL16x4: *nm = "umull"; *ar = "4s"; return; + case ARM64vecb_UMULL8x8: *nm = "umull"; *ar = "8b"; return; case ARM64vecb_SMAX32x4: *nm = "smax"; *ar = "4s"; return; case ARM64vecb_SMAX16x8: *nm = "smax"; *ar = "8h"; return; case ARM64vecb_SMAX8x16: *nm = "smax"; *ar = "16b"; return; @@ -915,6 +918,12 @@ static void showARM64VecUnaryOp(/*OUT*/const HChar** nm, case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return; case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return; case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return; + case ARM64vecu_VMOVL8U: *nm = "vmovl.u8"; *ar = "all"; return; + case ARM64vecu_VMOVL16U: *nm = "vmovl.u16"; *ar = "all"; return; + case ARM64vecu_VMOVL32U: *nm = "vmovl.u32"; *ar = "all"; return; + case ARM64vecu_VMOVL8S: *nm = "vmovl.s8"; *ar = "all"; return; + case ARM64vecu_VMOVL16S: *nm = "vmovl.s16"; *ar = "all"; return; + case ARM64vecu_VMOVL32S: *nm = "vmovl.s32"; *ar = "all"; return; case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return; case ARM64vecu_CNT: *nm = "cnt "; *ar = "16b"; return; case ARM64vecu_UADDLV8x16: *nm = "uaddlv "; *ar = "16b"; return; @@ -3312,7 +3321,7 @@ static inline UChar iregNo ( HReg r ) static inline UChar dregNo ( HReg r ) { UInt n; - vassert(hregClass(r) == HRcFlt64); + vassert(hregClass(r) == HRcFlt64 || hregClass(r) == HRcInt64); vassert(!hregIsVirtual(r)); n = hregNumber(r); vassert(n <= 31); @@ -3382,12 +3391,14 @@ static inline UChar qregNo ( HReg r ) #define X011011 BITS8(0,0, 0,1,1,0,1,1) #define X011110 BITS8(0,0, 0,1,1,1,1,0) #define X011111 BITS8(0,0, 0,1,1,1,1,1) +#define X100000 BITS8(0,0, 1,0,0,0,0,0) #define X100001 BITS8(0,0, 1,0,0,0,0,1) #define X100011 BITS8(0,0, 1,0,0,0,1,1) #define X100100 BITS8(0,0, 1,0,0,1,0,0) #define X100101 BITS8(0,0, 1,0,0,1,0,1) #define X100110 BITS8(0,0, 1,0,0,1,1,0) #define X100111 BITS8(0,0, 1,0,0,1,1,1) +#define X101000 BITS8(0,0, 1,0,1,0,0,0) #define X110000 BITS8(0,0, 1,1,0,0,0,0) #define X110001 BITS8(0,0, 1,1,0,0,0,1) #define X110101 BITS8(0,0, 1,1,0,1,0,1) @@ -3430,6 +3441,12 @@ static inline UChar qregNo ( HReg r ) #define X11110001 BITS8(1,1,1,1,0,0,0,1) #define X11110011 BITS8(1,1,1,1,0,0,1,1) +#define BITS9(zzb8,zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \ + ((BITS8(zzb8,zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1) << 1) | zzb0) + +#define X111100111 BITS9(1,1,1,1,0,0,1,1,1) +#define X111100101 BITS9(1,1,1,1,0,0,1,0,1) + /* --- 4 fields --- */ @@ -3588,6 +3605,29 @@ static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3, return w; } +static inline UInt X_9_1_6_4_6_1_1_4( UInt f1, UInt f2, UInt f3, UInt f4, + UInt f5, UInt f6, UInt f7, UInt f8) { + vassert(9+1+6+4+6+1+1+4 == 32); + vassert(f1 < (1<<9)); + vassert(f2 < (1<<1)); + vassert(f3 < (1<<6)); + vassert(f4 < (1<<4)); + vassert(f5 < (1<<6)); + vassert(f6 < (1<<1)); + vassert(f7 < (1<<1)); + vassert(f8 < (1<<9)); + UInt w = 0; + w = (w << 9) | f1; + w = (w << 1) | f2; + w = (w << 6) | f3; + w = (w << 4) | f4; + w = (w << 6) | f5; + w = (w << 1) | f6; + w = (w << 1) | f7; + w = (w << 4) | f8; + return w; +} + //ZZ #define X0000 BITS4(0,0,0,0) //ZZ #define X0001 BITS4(0,0,0,1) @@ -5022,8 +5062,14 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, */ UInt vD = qregNo(i->ARM64in.VBinV.dst); - UInt vN = qregNo(i->ARM64in.VBinV.argL); - UInt vM = qregNo(i->ARM64in.VBinV.argR); + ARM64VecBinOp op = i->ARM64in.VBinV.op; + Bool isV128 = (op != ARM64vecb_UMULL8x8 + && op != ARM64vecb_UMULL16x4 + && op != ARM64vecb_UMULL32x2); + UInt vN = isV128 ? qregNo(i->ARM64in.VBinV.argL) + : dregNo(i->ARM64in.VBinV.argL); + UInt vM = isV128 ? qregNo(i->ARM64in.VBinV.argR) + : dregNo(i->ARM64in.VBinV.argR); switch (i->ARM64in.VBinV.op) { case ARM64vecb_ADD64x2: *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X100001, vN, vD); @@ -5103,6 +5149,16 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011011, vN, vD); break; + case ARM64vecb_UMULL32x2: + *p++ = X_3_8_5_6_5_5(X001, X01110101, vM, X110000, vN, vD); + break; + case ARM64vecb_UMULL16x4: + *p++ = X_3_8_5_6_5_5(X001, X01110011, vM, X110000, vN, vD); + break; + case ARM64vecb_UMULL8x8: + *p++ = X_3_8_5_6_5_5(X001, X01110001, vM, X110000, vN, vD); + break; + case ARM64vecb_SMAX32x4: *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011001, vN, vD); break; @@ -5219,8 +5275,11 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 010 01110 01 1 10000 001110 n d SADDLV Sd, Vn.8h 010 01110 10 1 10000 001110 n d SADDLV Dd, Vn.4s */ + ARM64VecUnaryOp op = i->ARM64in.VUnaryV.op; UInt vD = qregNo(i->ARM64in.VUnaryV.dst); - UInt vN = qregNo(i->ARM64in.VUnaryV.arg); + Bool isV128 = !(op >= ARM64vecu_VMOVL8U && op <= ARM64vecu_VMOVL32S); + UInt vN = isV128 ? qregNo(i->ARM64in.VUnaryV.arg) + : dregNo(i->ARM64in.VUnaryV.arg); switch (i->ARM64in.VUnaryV.op) { case ARM64vecu_FABS64x2: *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD); @@ -5234,6 +5293,24 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, case ARM64vecu_FNEG32x4: *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X111110, vN, vD); break; + case ARM64vecu_VMOVL8U: + *p++ = X_9_1_6_4_6_1_1_4(X111100111, vD >> 4, X001000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU); + break; + case ARM64vecu_VMOVL16U: + *p++ = X_9_1_6_4_6_1_1_4(X111100111, vD >> 4, X010000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU); + break; + case ARM64vecu_VMOVL32U: + *p++ = X_9_1_6_4_6_1_1_4(X111100111, vD >> 4, X100000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU); + break; + case ARM64vecu_VMOVL8S: + *p++ = X_9_1_6_4_6_1_1_4(X111100101, vD >> 4, X001000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU); + break; + case ARM64vecu_VMOVL16S: + *p++ = X_9_1_6_4_6_1_1_4(X111100101, vD >> 4, X010000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU); + break; + case ARM64vecu_VMOVL32S: + *p++ = X_9_1_6_4_6_1_1_4(X111100101, vD >> 4, X100000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU); + break; case ARM64vecu_NOT: *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010110, vN, vD); break; diff --git a/main/VEX/priv/host_arm64_defs.h b/main/VEX/priv/host_arm64_defs.h index 63f6baf89..b22730b2f 100644 --- a/main/VEX/priv/host_arm64_defs.h +++ b/main/VEX/priv/host_arm64_defs.h @@ -332,6 +332,9 @@ typedef ARM64vecb_UMIN32x4, ARM64vecb_UMIN16x8, ARM64vecb_UMIN8x16, + ARM64vecb_UMULL32x2, + ARM64vecb_UMULL16x4, + ARM64vecb_UMULL8x8, ARM64vecb_SMAX32x4, ARM64vecb_SMAX16x8, ARM64vecb_SMAX8x16, @@ -370,6 +373,12 @@ typedef ARM64vecu_FNEG32x4, ARM64vecu_FABS64x2, ARM64vecu_FABS32x4, + ARM64vecu_VMOVL8U, + ARM64vecu_VMOVL16U, + ARM64vecu_VMOVL32U, + ARM64vecu_VMOVL8S, + ARM64vecu_VMOVL16S, + ARM64vecu_VMOVL32S, ARM64vecu_NOT, ARM64vecu_CNT, ARM64vecu_UADDLV8x16, diff --git a/main/VEX/priv/host_arm64_isel.c b/main/VEX/priv/host_arm64_isel.c index 52babc0e3..d02be24f2 100644 --- a/main/VEX/priv/host_arm64_isel.c +++ b/main/VEX/priv/host_arm64_isel.c @@ -4453,6 +4453,27 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) return res; } + case Iop_Widen8Uto16x8: + case Iop_Widen16Uto32x4: + case Iop_Widen32Uto64x2: + case Iop_Widen8Sto16x8: + case Iop_Widen16Sto32x4: + case Iop_Widen32Sto64x2: { + HReg res = newVRegV(env); + HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); + ARM64VecUnaryOp wop = ARM64vecu_INVALID; + switch (e->Iex.Unop.op) { + case Iop_Widen8Uto16x8: wop = ARM64vecu_VMOVL8U; break; + case Iop_Widen16Uto32x4: wop = ARM64vecu_VMOVL16U; break; + case Iop_Widen32Uto64x2: wop = ARM64vecu_VMOVL32U; break; + case Iop_Widen8Sto16x8: wop = ARM64vecu_VMOVL8S; break; + case Iop_Widen16Sto32x4: wop = ARM64vecu_VMOVL16S; break; + case Iop_Widen32Sto64x2: wop = ARM64vecu_VMOVL32S; break; + default: vassert(0); + } + addInstr(env, ARM64Instr_VUnaryV(wop, res, arg)); + return res; + } //ZZ case Iop_NotV128: { //ZZ DECLARE_PATTERN(p_veqz_8x16); //ZZ DECLARE_PATTERN(p_veqz_16x8); @@ -4708,22 +4729,6 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) //ZZ res, tmp, x, 0, True)); //ZZ return res; //ZZ } -//ZZ case Iop_Widen8Uto16x8: -//ZZ case Iop_Widen16Uto32x4: -//ZZ case Iop_Widen32Uto64x2: { -//ZZ HReg res = newVRegV(env); -//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); -//ZZ UInt size; -//ZZ switch (e->Iex.Unop.op) { -//ZZ case Iop_Widen8Uto16x8: size = 0; break; -//ZZ case Iop_Widen16Uto32x4: size = 1; break; -//ZZ case Iop_Widen32Uto64x2: size = 2; break; -//ZZ default: vassert(0); -//ZZ } -//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU, -//ZZ res, arg, size, True)); -//ZZ return res; -//ZZ } //ZZ case Iop_Widen8Sto16x8: //ZZ case Iop_Widen16Sto32x4: //ZZ case Iop_Widen32Sto64x2: { @@ -5648,23 +5653,24 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } -//ZZ case Iop_Mull8Ux8: -//ZZ case Iop_Mull16Ux4: -//ZZ case Iop_Mull32Ux2: { -//ZZ HReg res = newVRegV(env); -//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); -//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); -//ZZ UInt size = 0; -//ZZ switch(e->Iex.Binop.op) { -//ZZ case Iop_Mull8Ux8: size = 0; break; -//ZZ case Iop_Mull16Ux4: size = 1; break; -//ZZ case Iop_Mull32Ux2: size = 2; break; -//ZZ default: vassert(0); -//ZZ } -//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU, -//ZZ res, argL, argR, size, True)); -//ZZ return res; -//ZZ } + case Iop_Mull8Ux8: + case Iop_Mull16Ux4: + case Iop_Mull32Ux2: { + HReg res = newVRegV(env); + HReg argL = iselDblExpr(env, e->Iex.Binop.arg1); + HReg argR = iselDblExpr(env, e->Iex.Binop.arg2); + UInt size = 0; + ARM64VecBinOp op = ARM64vecb_INVALID; + + switch(e->Iex.Binop.op) { + case Iop_Mull8Ux8: op = ARM64vecb_UMULL8x8; break; + case Iop_Mull16Ux4: op = ARM64vecb_UMULL16x4; break; + case Iop_Mull32Ux2: op = ARM64vecb_UMULL32x2; break; + default: vassert(0); + } + addInstr(env, ARM64Instr_VBinV(op, res, argL, argR)); + return res; + } //ZZ //ZZ case Iop_Mull8Sx8: //ZZ case Iop_Mull16Sx4: @@ -5975,7 +5981,7 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) { IRType ty = typeOfIRExpr(env->type_env,e); vassert(e); - vassert(ty == Ity_F64); + vassert(ty == Ity_F64 || ty == Ity_I64); if (e->tag == Iex_RdTmp) { return lookupIRTemp(env, e->Iex.RdTmp.tmp); @@ -5993,7 +5999,7 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) } if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { - vassert(e->Iex.Load.ty == Ity_F64); + vassert(e->Iex.Load.ty == Ity_F64 || e->Iex.Load.ty == Ity_I64); HReg addr = iselIntExpr_R(env, e->Iex.Load.addr); HReg res = newVRegD(env); addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0)); diff --git a/main/none/tests/arm64/fp_and_simd.c b/main/none/tests/arm64/fp_and_simd.c index 6cfc32ff5..14b4da404 100644 --- a/main/none/tests/arm64/fp_and_simd.c +++ b/main/none/tests/arm64/fp_and_simd.c @@ -4174,11 +4174,11 @@ int main ( void ) if (0) test_smull2_4s_8h_8h(TyH); if (0) test_smull_8h_8b_8b(TyB); if (0) test_smull2_8h_16b_16b(TyB); - if (0) test_umull_2d_2s_2s(TyS); + if (1) test_umull_2d_2s_2s(TyS); if (0) test_umull2_2d_4s_4s(TyS); - if (0) test_umull_4s_4h_4h(TyH); + if (1) test_umull_4s_4h_4h(TyH); if (0) test_umull2_4s_8h_8h(TyH); - if (0) test_umull_8h_8b_8b(TyB); + if (1) test_umull_8h_8b_8b(TyB); if (0) test_umull2_8h_16b_16b(TyB); // smov w_b[], w_h[], x_b[], x_h[], x_s[] |