aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDmitriy Ivanov <dimitry@google.com>2014-06-09 10:51:43 -0700
committerDmitriy Ivanov <dimitry@google.com>2014-06-09 19:01:17 -0700
commit0a8046eb0993fb7e155b03cc5ef3323fca3467c0 (patch)
treef5b652fc5e648cfb210dfe8ef2e6ff37f4761a25
parent3361c08e1a742df84c7c059bd5039a70d4d1b232 (diff)
downloadvalgrind-0a8046eb0993fb7e155b03cc5ef3323fca3467c0.tar.gz
Handle umull (vector) and vmov
* Vmov is needed for umull translation logic in memcheck. Bug: 15145107 Change-Id: I703d850d31e849af9f77a411cff5f3a439127265
-rw-r--r--main/VEX/priv/guest_arm64_toIR.c23
-rw-r--r--main/VEX/priv/host_arm64_defs.c85
-rw-r--r--main/VEX/priv/host_arm64_defs.h9
-rw-r--r--main/VEX/priv/host_arm64_isel.c76
-rw-r--r--main/none/tests/arm64/fp_and_simd.c6
5 files changed, 157 insertions, 42 deletions
diff --git a/main/VEX/priv/guest_arm64_toIR.c b/main/VEX/priv/guest_arm64_toIR.c
index 24e89114f..0c775d920 100644
--- a/main/VEX/priv/guest_arm64_toIR.c
+++ b/main/VEX/priv/guest_arm64_toIR.c
@@ -6588,6 +6588,29 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
}
/* else fall through */
}
+ /* ------------ UMULL (vector) ------------ */
+ /* 31 28 23 21 20 15 9 4
+ 001 01110 sz 1 m 110000 n d UMULL Vd.Ta, Vn.Tb, Vm.Tb
+
+ */
+ if (INSN(31,24) == BITS8(0,0,1,0,1,1,1,0) && INSN(23,22) != BITS2(1,1)
+ && INSN(21,21) == 1 && INSN(15,10) == BITS6(1,1,0,0,0,0)) {
+ UInt mm = INSN(20,16);
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+ UInt sz = INSN(23,22);
+
+ const HChar* nameTa[3] = { "8h", "4s", "2d" };
+ const HChar* nameTb[3] = { "8b", "4h", "2s" };
+ const IROp ops[3] = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2 };
+
+ putQReg128(dd, binop(ops[sz], getQRegLO(nn, Ity_I64), getQRegLO(mm, Ity_I64)));
+
+ DIP("umull %s.%s, %s.%s, %s.%s\n", nameQReg128(dd), nameTa[sz],
+ nameQReg128(nn), nameTb[sz], nameQReg128(mm), nameTb[sz]);
+ return True;
+ }
+
/* ------------ {AND,BIC,ORR,ORN} (vector) ------------ */
/* 31 28 23 20 15 9 4
diff --git a/main/VEX/priv/host_arm64_defs.c b/main/VEX/priv/host_arm64_defs.c
index cd68d6844..bad2acca5 100644
--- a/main/VEX/priv/host_arm64_defs.c
+++ b/main/VEX/priv/host_arm64_defs.c
@@ -875,6 +875,9 @@ static void showARM64VecBinOp(/*OUT*/const HChar** nm,
case ARM64vecb_UMIN32x4: *nm = "umin"; *ar = "4s"; return;
case ARM64vecb_UMIN16x8: *nm = "umin"; *ar = "8h"; return;
case ARM64vecb_UMIN8x16: *nm = "umin"; *ar = "16b"; return;
+ case ARM64vecb_UMULL32x2: *nm = "umull"; *ar = "2d"; return;
+ case ARM64vecb_UMULL16x4: *nm = "umull"; *ar = "4s"; return;
+ case ARM64vecb_UMULL8x8: *nm = "umull"; *ar = "8b"; return;
case ARM64vecb_SMAX32x4: *nm = "smax"; *ar = "4s"; return;
case ARM64vecb_SMAX16x8: *nm = "smax"; *ar = "8h"; return;
case ARM64vecb_SMAX8x16: *nm = "smax"; *ar = "16b"; return;
@@ -915,6 +918,12 @@ static void showARM64VecUnaryOp(/*OUT*/const HChar** nm,
case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return;
case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return;
case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return;
+ case ARM64vecu_VMOVL8U: *nm = "vmovl.u8"; *ar = "all"; return;
+ case ARM64vecu_VMOVL16U: *nm = "vmovl.u16"; *ar = "all"; return;
+ case ARM64vecu_VMOVL32U: *nm = "vmovl.u32"; *ar = "all"; return;
+ case ARM64vecu_VMOVL8S: *nm = "vmovl.s8"; *ar = "all"; return;
+ case ARM64vecu_VMOVL16S: *nm = "vmovl.s16"; *ar = "all"; return;
+ case ARM64vecu_VMOVL32S: *nm = "vmovl.s32"; *ar = "all"; return;
case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return;
case ARM64vecu_CNT: *nm = "cnt "; *ar = "16b"; return;
case ARM64vecu_UADDLV8x16: *nm = "uaddlv "; *ar = "16b"; return;
@@ -3312,7 +3321,7 @@ static inline UChar iregNo ( HReg r )
static inline UChar dregNo ( HReg r )
{
UInt n;
- vassert(hregClass(r) == HRcFlt64);
+ vassert(hregClass(r) == HRcFlt64 || hregClass(r) == HRcInt64);
vassert(!hregIsVirtual(r));
n = hregNumber(r);
vassert(n <= 31);
@@ -3382,12 +3391,14 @@ static inline UChar qregNo ( HReg r )
#define X011011 BITS8(0,0, 0,1,1,0,1,1)
#define X011110 BITS8(0,0, 0,1,1,1,1,0)
#define X011111 BITS8(0,0, 0,1,1,1,1,1)
+#define X100000 BITS8(0,0, 1,0,0,0,0,0)
#define X100001 BITS8(0,0, 1,0,0,0,0,1)
#define X100011 BITS8(0,0, 1,0,0,0,1,1)
#define X100100 BITS8(0,0, 1,0,0,1,0,0)
#define X100101 BITS8(0,0, 1,0,0,1,0,1)
#define X100110 BITS8(0,0, 1,0,0,1,1,0)
#define X100111 BITS8(0,0, 1,0,0,1,1,1)
+#define X101000 BITS8(0,0, 1,0,1,0,0,0)
#define X110000 BITS8(0,0, 1,1,0,0,0,0)
#define X110001 BITS8(0,0, 1,1,0,0,0,1)
#define X110101 BITS8(0,0, 1,1,0,1,0,1)
@@ -3430,6 +3441,12 @@ static inline UChar qregNo ( HReg r )
#define X11110001 BITS8(1,1,1,1,0,0,0,1)
#define X11110011 BITS8(1,1,1,1,0,0,1,1)
+#define BITS9(zzb8,zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
+ ((BITS8(zzb8,zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1) << 1) | zzb0)
+
+#define X111100111 BITS9(1,1,1,1,0,0,1,1,1)
+#define X111100101 BITS9(1,1,1,1,0,0,1,0,1)
+
/* --- 4 fields --- */
@@ -3588,6 +3605,29 @@ static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3,
return w;
}
+static inline UInt X_9_1_6_4_6_1_1_4( UInt f1, UInt f2, UInt f3, UInt f4,
+ UInt f5, UInt f6, UInt f7, UInt f8) {
+ vassert(9+1+6+4+6+1+1+4 == 32);
+ vassert(f1 < (1<<9));
+ vassert(f2 < (1<<1));
+ vassert(f3 < (1<<6));
+ vassert(f4 < (1<<4));
+ vassert(f5 < (1<<6));
+ vassert(f6 < (1<<1));
+ vassert(f7 < (1<<1));
+ vassert(f8 < (1<<9));
+ UInt w = 0;
+ w = (w << 9) | f1;
+ w = (w << 1) | f2;
+ w = (w << 6) | f3;
+ w = (w << 4) | f4;
+ w = (w << 6) | f5;
+ w = (w << 1) | f6;
+ w = (w << 1) | f7;
+ w = (w << 4) | f8;
+ return w;
+}
+
//ZZ #define X0000 BITS4(0,0,0,0)
//ZZ #define X0001 BITS4(0,0,0,1)
@@ -5022,8 +5062,14 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
*/
UInt vD = qregNo(i->ARM64in.VBinV.dst);
- UInt vN = qregNo(i->ARM64in.VBinV.argL);
- UInt vM = qregNo(i->ARM64in.VBinV.argR);
+ ARM64VecBinOp op = i->ARM64in.VBinV.op;
+ Bool isV128 = (op != ARM64vecb_UMULL8x8
+ && op != ARM64vecb_UMULL16x4
+ && op != ARM64vecb_UMULL32x2);
+ UInt vN = isV128 ? qregNo(i->ARM64in.VBinV.argL)
+ : dregNo(i->ARM64in.VBinV.argL);
+ UInt vM = isV128 ? qregNo(i->ARM64in.VBinV.argR)
+ : dregNo(i->ARM64in.VBinV.argR);
switch (i->ARM64in.VBinV.op) {
case ARM64vecb_ADD64x2:
*p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X100001, vN, vD);
@@ -5103,6 +5149,16 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
*p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011011, vN, vD);
break;
+ case ARM64vecb_UMULL32x2:
+ *p++ = X_3_8_5_6_5_5(X001, X01110101, vM, X110000, vN, vD);
+ break;
+ case ARM64vecb_UMULL16x4:
+ *p++ = X_3_8_5_6_5_5(X001, X01110011, vM, X110000, vN, vD);
+ break;
+ case ARM64vecb_UMULL8x8:
+ *p++ = X_3_8_5_6_5_5(X001, X01110001, vM, X110000, vN, vD);
+ break;
+
case ARM64vecb_SMAX32x4:
*p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011001, vN, vD);
break;
@@ -5219,8 +5275,11 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
010 01110 01 1 10000 001110 n d SADDLV Sd, Vn.8h
010 01110 10 1 10000 001110 n d SADDLV Dd, Vn.4s
*/
+ ARM64VecUnaryOp op = i->ARM64in.VUnaryV.op;
UInt vD = qregNo(i->ARM64in.VUnaryV.dst);
- UInt vN = qregNo(i->ARM64in.VUnaryV.arg);
+ Bool isV128 = !(op >= ARM64vecu_VMOVL8U && op <= ARM64vecu_VMOVL32S);
+ UInt vN = isV128 ? qregNo(i->ARM64in.VUnaryV.arg)
+ : dregNo(i->ARM64in.VUnaryV.arg);
switch (i->ARM64in.VUnaryV.op) {
case ARM64vecu_FABS64x2:
*p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD);
@@ -5234,6 +5293,24 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
case ARM64vecu_FNEG32x4:
*p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X111110, vN, vD);
break;
+ case ARM64vecu_VMOVL8U:
+ *p++ = X_9_1_6_4_6_1_1_4(X111100111, vD >> 4, X001000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
+ break;
+ case ARM64vecu_VMOVL16U:
+ *p++ = X_9_1_6_4_6_1_1_4(X111100111, vD >> 4, X010000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
+ break;
+ case ARM64vecu_VMOVL32U:
+ *p++ = X_9_1_6_4_6_1_1_4(X111100111, vD >> 4, X100000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
+ break;
+ case ARM64vecu_VMOVL8S:
+ *p++ = X_9_1_6_4_6_1_1_4(X111100101, vD >> 4, X001000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
+ break;
+ case ARM64vecu_VMOVL16S:
+ *p++ = X_9_1_6_4_6_1_1_4(X111100101, vD >> 4, X010000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
+ break;
+ case ARM64vecu_VMOVL32S:
+ *p++ = X_9_1_6_4_6_1_1_4(X111100101, vD >> 4, X100000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
+ break;
case ARM64vecu_NOT:
*p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010110, vN, vD);
break;
diff --git a/main/VEX/priv/host_arm64_defs.h b/main/VEX/priv/host_arm64_defs.h
index 63f6baf89..b22730b2f 100644
--- a/main/VEX/priv/host_arm64_defs.h
+++ b/main/VEX/priv/host_arm64_defs.h
@@ -332,6 +332,9 @@ typedef
ARM64vecb_UMIN32x4,
ARM64vecb_UMIN16x8,
ARM64vecb_UMIN8x16,
+ ARM64vecb_UMULL32x2,
+ ARM64vecb_UMULL16x4,
+ ARM64vecb_UMULL8x8,
ARM64vecb_SMAX32x4,
ARM64vecb_SMAX16x8,
ARM64vecb_SMAX8x16,
@@ -370,6 +373,12 @@ typedef
ARM64vecu_FNEG32x4,
ARM64vecu_FABS64x2,
ARM64vecu_FABS32x4,
+ ARM64vecu_VMOVL8U,
+ ARM64vecu_VMOVL16U,
+ ARM64vecu_VMOVL32U,
+ ARM64vecu_VMOVL8S,
+ ARM64vecu_VMOVL16S,
+ ARM64vecu_VMOVL32S,
ARM64vecu_NOT,
ARM64vecu_CNT,
ARM64vecu_UADDLV8x16,
diff --git a/main/VEX/priv/host_arm64_isel.c b/main/VEX/priv/host_arm64_isel.c
index 52babc0e3..d02be24f2 100644
--- a/main/VEX/priv/host_arm64_isel.c
+++ b/main/VEX/priv/host_arm64_isel.c
@@ -4453,6 +4453,27 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
return res;
}
+ case Iop_Widen8Uto16x8:
+ case Iop_Widen16Uto32x4:
+ case Iop_Widen32Uto64x2:
+ case Iop_Widen8Sto16x8:
+ case Iop_Widen16Sto32x4:
+ case Iop_Widen32Sto64x2: {
+ HReg res = newVRegV(env);
+ HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
+ ARM64VecUnaryOp wop = ARM64vecu_INVALID;
+ switch (e->Iex.Unop.op) {
+ case Iop_Widen8Uto16x8: wop = ARM64vecu_VMOVL8U; break;
+ case Iop_Widen16Uto32x4: wop = ARM64vecu_VMOVL16U; break;
+ case Iop_Widen32Uto64x2: wop = ARM64vecu_VMOVL32U; break;
+ case Iop_Widen8Sto16x8: wop = ARM64vecu_VMOVL8S; break;
+ case Iop_Widen16Sto32x4: wop = ARM64vecu_VMOVL16S; break;
+ case Iop_Widen32Sto64x2: wop = ARM64vecu_VMOVL32S; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARM64Instr_VUnaryV(wop, res, arg));
+ return res;
+ }
//ZZ case Iop_NotV128: {
//ZZ DECLARE_PATTERN(p_veqz_8x16);
//ZZ DECLARE_PATTERN(p_veqz_16x8);
@@ -4708,22 +4729,6 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
//ZZ res, tmp, x, 0, True));
//ZZ return res;
//ZZ }
-//ZZ case Iop_Widen8Uto16x8:
-//ZZ case Iop_Widen16Uto32x4:
-//ZZ case Iop_Widen32Uto64x2: {
-//ZZ HReg res = newVRegV(env);
-//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
-//ZZ UInt size;
-//ZZ switch (e->Iex.Unop.op) {
-//ZZ case Iop_Widen8Uto16x8: size = 0; break;
-//ZZ case Iop_Widen16Uto32x4: size = 1; break;
-//ZZ case Iop_Widen32Uto64x2: size = 2; break;
-//ZZ default: vassert(0);
-//ZZ }
-//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
-//ZZ res, arg, size, True));
-//ZZ return res;
-//ZZ }
//ZZ case Iop_Widen8Sto16x8:
//ZZ case Iop_Widen16Sto32x4:
//ZZ case Iop_Widen32Sto64x2: {
@@ -5648,23 +5653,24 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
//ZZ res, argL, argR, size, True));
//ZZ return res;
//ZZ }
-//ZZ case Iop_Mull8Ux8:
-//ZZ case Iop_Mull16Ux4:
-//ZZ case Iop_Mull32Ux2: {
-//ZZ HReg res = newVRegV(env);
-//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
-//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
-//ZZ UInt size = 0;
-//ZZ switch(e->Iex.Binop.op) {
-//ZZ case Iop_Mull8Ux8: size = 0; break;
-//ZZ case Iop_Mull16Ux4: size = 1; break;
-//ZZ case Iop_Mull32Ux2: size = 2; break;
-//ZZ default: vassert(0);
-//ZZ }
-//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
-//ZZ res, argL, argR, size, True));
-//ZZ return res;
-//ZZ }
+ case Iop_Mull8Ux8:
+ case Iop_Mull16Ux4:
+ case Iop_Mull32Ux2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselDblExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselDblExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ ARM64VecBinOp op = ARM64vecb_INVALID;
+
+ switch(e->Iex.Binop.op) {
+ case Iop_Mull8Ux8: op = ARM64vecb_UMULL8x8; break;
+ case Iop_Mull16Ux4: op = ARM64vecb_UMULL16x4; break;
+ case Iop_Mull32Ux2: op = ARM64vecb_UMULL32x2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
+ return res;
+ }
//ZZ
//ZZ case Iop_Mull8Sx8:
//ZZ case Iop_Mull16Sx4:
@@ -5975,7 +5981,7 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
{
IRType ty = typeOfIRExpr(env->type_env,e);
vassert(e);
- vassert(ty == Ity_F64);
+ vassert(ty == Ity_F64 || ty == Ity_I64);
if (e->tag == Iex_RdTmp) {
return lookupIRTemp(env, e->Iex.RdTmp.tmp);
@@ -5993,7 +5999,7 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
}
if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
- vassert(e->Iex.Load.ty == Ity_F64);
+ vassert(e->Iex.Load.ty == Ity_F64 || e->Iex.Load.ty == Ity_I64);
HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
HReg res = newVRegD(env);
addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
diff --git a/main/none/tests/arm64/fp_and_simd.c b/main/none/tests/arm64/fp_and_simd.c
index 6cfc32ff5..14b4da404 100644
--- a/main/none/tests/arm64/fp_and_simd.c
+++ b/main/none/tests/arm64/fp_and_simd.c
@@ -4174,11 +4174,11 @@ int main ( void )
if (0) test_smull2_4s_8h_8h(TyH);
if (0) test_smull_8h_8b_8b(TyB);
if (0) test_smull2_8h_16b_16b(TyB);
- if (0) test_umull_2d_2s_2s(TyS);
+ if (1) test_umull_2d_2s_2s(TyS);
if (0) test_umull2_2d_4s_4s(TyS);
- if (0) test_umull_4s_4h_4h(TyH);
+ if (1) test_umull_4s_4h_4h(TyH);
if (0) test_umull2_4s_8h_8h(TyH);
- if (0) test_umull_8h_8b_8b(TyB);
+ if (1) test_umull_8h_8b_8b(TyB);
if (0) test_umull2_8h_16b_16b(TyB);
// smov w_b[], w_h[], x_b[], x_h[], x_s[]