diff options
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMExpandPseudoInsts.cpp | 4 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 35 | ||||
-rw-r--r-- | test/CodeGen/ARM/vld3.ll | 13 | ||||
-rw-r--r-- | test/CodeGen/ARM/vld4.ll | 13 | ||||
-rw-r--r-- | test/CodeGen/ARM/vst3.ll | 12 | ||||
-rw-r--r-- | test/CodeGen/ARM/vst4.ll | 12 |
7 files changed, 75 insertions, 16 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 3688db943d5..4151175a019 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -4135,6 +4135,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD3d32Pseudo: case ARM::VLD1d64TPseudo: case ARM::VLD1d64TPseudoWB_fixed: + case ARM::VLD1d64TPseudoWB_register: case ARM::VLD3d8Pseudo_UPD: case ARM::VLD3d16Pseudo_UPD: case ARM::VLD3d32Pseudo_UPD: @@ -4152,6 +4153,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD4d32Pseudo: case ARM::VLD1d64QPseudo: case ARM::VLD1d64QPseudoWB_fixed: + case ARM::VLD1d64QPseudoWB_register: case ARM::VLD4d8Pseudo_UPD: case ARM::VLD4d16Pseudo_UPD: case ARM::VLD4d32Pseudo_UPD: diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index be7afae5ea4..a488eee92f9 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -156,8 +156,10 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false}, { ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false}, +{ ARM::VLD1d64QPseudoWB_register, ARM::VLD1d64Qwb_register, true, true, true, SingleSpc, 4, 1 ,false}, { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false}, { ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false}, +{ ARM::VLD1d64TPseudoWB_register, ARM::VLD1d64Twb_register, true, true, true, SingleSpc, 3, 1 ,false}, { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true}, { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true}, @@ -1496,6 +1498,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD3d32Pseudo: case ARM::VLD1d64TPseudo: case ARM::VLD1d64TPseudoWB_fixed: + case ARM::VLD1d64TPseudoWB_register: case ARM::VLD3d8Pseudo_UPD: case ARM::VLD3d16Pseudo_UPD: case ARM::VLD3d32Pseudo_UPD: @@ -1513,6 +1516,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD4d32Pseudo: case ARM::VLD1d64QPseudo: case ARM::VLD1d64QPseudoWB_fixed: + case ARM::VLD1d64QPseudoWB_register: case ARM::VLD4d8Pseudo_UPD: case ARM::VLD4d16Pseudo_UPD: case ARM::VLD4d32Pseudo_UPD: diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index efeef6c9943..387e8dc582c 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1932,15 +1932,17 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 - // case entirely when the rest are updated to that form, too. bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); - if ((NumVecs <= 2) && !IsImmUpdate) - Opc = getVLDSTRegisterUpdateOpcode(Opc); - // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so - // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate) - Ops.push_back(IsImmUpdate ? Reg0 : Inc); + if (!IsImmUpdate) { + // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so + // check for the opcode rather than the number of vector elements. + if (isVLDfixed(Opc)) + Opc = getVLDSTRegisterUpdateOpcode(Opc); + Ops.push_back(Inc); + // VLD1/VLD2 fixed increment does not need Reg0 so only include it in + // the operands if not such an opcode. + } else if (!isVLDfixed(Opc)) + Ops.push_back(Reg0); } Ops.push_back(Pred); Ops.push_back(Reg0); @@ -2086,16 +2088,17 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 - // case entirely when the rest are updated to that form, too. bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); - if (NumVecs <= 2 && !IsImmUpdate) - Opc = getVLDSTRegisterUpdateOpcode(Opc); - // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so - // check for that explicitly too. Horribly hacky, but temporary. - if (!IsImmUpdate) + if (!IsImmUpdate) { + // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so + // check for the opcode rather than the number of vector elements. + if (isVSTfixed(Opc)) + Opc = getVLDSTRegisterUpdateOpcode(Opc); Ops.push_back(Inc); - else if (NumVecs > 2 && !isVSTfixed(Opc)) + } + // VST1/VST2 fixed increment does not need Reg0 so only include it in + // the operands if not such an opcode. + else if (!isVSTfixed(Opc)) Ops.push_back(Reg0); } Ops.push_back(SrcReg); diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll index 0eaad0f9003..46e17c97e6e 100644 --- a/test/CodeGen/ARM/vld3.ll +++ b/test/CodeGen/ARM/vld3.ll @@ -96,6 +96,19 @@ define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind { ret <1 x i64> %tmp4 } +define <1 x i64> @vld3i64_reg_update(i64** %ptr, i64* %A) nounwind { +;CHECK-LABEL: vld3i64_reg_update: +;CHECK: vld1.64 {d16, d17, d18}, [{{r[0-9]+|lr}}:64], {{r[0-9]+|lr}} + %tmp0 = bitcast i64* %A to i8* + %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8* %tmp0, i32 16) + %tmp5 = getelementptr i64, i64* %A, i32 1 + store i64* %tmp5, i64** %ptr + %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2 + %tmp4 = add <1 x i64> %tmp2, %tmp3 + ret <1 x i64> %tmp4 +} + define <16 x i8> @vld3Qi8(i8* %A) nounwind { ;CHECK-LABEL: vld3Qi8: ;Check the alignment value. Max for this instruction is 64 bits: diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll index 5663e6d41f0..679c9ae4450 100644 --- a/test/CodeGen/ARM/vld4.ll +++ b/test/CodeGen/ARM/vld4.ll @@ -96,6 +96,19 @@ define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind { ret <1 x i64> %tmp4 } +define <1 x i64> @vld4i64_reg_update(i64** %ptr, i64* %A) nounwind { +;CHECK-LABEL: vld4i64_reg_update: +;CHECK: vld1.64 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:256], {{r[0-9]+|lr}} + %tmp0 = bitcast i64* %A to i8* + %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64.p0i8(i8* %tmp0, i32 64) + %tmp5 = getelementptr i64, i64* %A, i32 1 + store i64* %tmp5, i64** %ptr + %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2 + %tmp4 = add <1 x i64> %tmp2, %tmp3 + ret <1 x i64> %tmp4 +} + define <16 x i8> @vld4Qi8(i8* %A) nounwind { ;CHECK-LABEL: vld4Qi8: ;Check the alignment value. Max for this instruction is 256 bits: diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll index d70d5957900..1723304e82b 100644 --- a/test/CodeGen/ARM/vst3.ll +++ b/test/CodeGen/ARM/vst3.ll @@ -73,6 +73,18 @@ define void @vst3i64_update(i64** %ptr, <1 x i64>* %B) nounwind { ret void } +define void @vst3i64_reg_update(i64** %ptr, <1 x i64>* %B) nounwind { +;CHECK-LABEL: vst3i64_reg_update +;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}], r{{.*}} + %A = load i64*, i64** %ptr + %tmp0 = bitcast i64* %A to i8* + %tmp1 = load <1 x i64>, <1 x i64>* %B + call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) + %tmp2 = getelementptr i64, i64* %A, i32 1 + store i64* %tmp2, i64** %ptr + ret void +} + define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: vst3Qi8: ;Check the alignment value. Max for this instruction is 64 bits: diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll index afa4321c91a..ca9e5e7a59d 100644 --- a/test/CodeGen/ARM/vst4.ll +++ b/test/CodeGen/ARM/vst4.ll @@ -72,6 +72,18 @@ define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind { ret void } +define void @vst4i64_reg_update(i64** %ptr, <1 x i64>* %B) nounwind { +;CHECK-LABEL: vst4i64_reg_update: +;CHECK: vst1.64 {d16, d17, d18, d19}, [r{{[0-9]+}}], r{{[0-9]+}} + %A = load i64*, i64** %ptr + %tmp0 = bitcast i64* %A to i8* + %tmp1 = load <1 x i64>, <1 x i64>* %B + call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) + %tmp2 = getelementptr i64, i64* %A, i32 1 + store i64* %tmp2, i64** %ptr + ret void +} + define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: vst4Qi8: ;Check the alignment value. Max for this instruction is 256 bits: |