aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/CodeGen/MachineMemOperand.h5
-rw-r--r--lib/CodeGen/MachineInstr.cpp18
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp33
-rw-r--r--test/CodeGen/PowerPC/memcpy_dereferenceable.ll74
4 files changed, 122 insertions, 8 deletions
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h
index 4d83f27eac3..78adce507b8 100644
--- a/include/llvm/CodeGen/MachineMemOperand.h
+++ b/include/llvm/CodeGen/MachineMemOperand.h
@@ -59,6 +59,11 @@ struct MachinePointerInfo {
return MachinePointerInfo(V.get<const PseudoSourceValue*>(), Offset+O);
}
+ /// Return true if memory region [V, V+Offset+Size) is known to be
+ /// dereferenceable.
+ bool isDereferenceable(unsigned Size, LLVMContext &C,
+ const DataLayout &DL) const;
+
/// Return the LLVM IR address space number that this pointer points into.
unsigned getAddrSpace() const;
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 2a6cb07dbd2..81c6dace92e 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -558,6 +559,23 @@ unsigned MachinePointerInfo::getAddrSpace() const {
return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace();
}
+/// isDereferenceable - Return true if V is always dereferenceable for
+/// Offset + Size byte.
+bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
+ const DataLayout &DL) const {
+ if (!V.is<const Value*>())
+ return false;
+
+ const Value *BasePtr = V.get<const Value*>();
+ if (BasePtr == nullptr)
+ return false;
+
+ return isDereferenceableAndAlignedPointer(BasePtr, 1,
+ APInt(DL.getPointerSize(),
+ Offset + Size),
+ DL);
+}
+
/// getConstantPool - Return a MachinePointerInfo record that refers to the
/// constant pool.
MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7abdc76cb00..98553152117 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4897,6 +4897,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// TODO: In the AlwaysInline case, if the size is big then generate a loop
// rather than maybe a humongous number of loads and stores.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ LLVMContext &C = *DAG.getContext();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
@@ -4923,15 +4925,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
return SDValue();
if (DstAlignCanChange) {
- Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
+ Type *Ty = MemOps[0].getTypeForEVT(C);
+ unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
// Don't promote to an alignment that would require dynamic stack
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
while (NewAlign > Align &&
- DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign))
+ DL.exceedsNaturalStackAlignment(NewAlign))
NewAlign /= 2;
if (NewAlign > Align) {
@@ -4991,12 +4993,19 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// thing to do is generate a LoadExt/StoreTrunc pair. These simplify
// to Load/Store if NVT==VT.
// FIXME does the case above also need this?
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ EVT NVT = TLI.getTypeToTransformTo(C, VT);
assert(NVT.bitsGE(VT));
+
+ bool isDereferenceable =
+ SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
+ MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
+ if (isDereferenceable)
+ SrcMMOFlags |= MachineMemOperand::MODereferenceable;
+
Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
DAG.getMemBasePlusOffset(Src, SrcOff, dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
- MinAlign(SrcAlign, SrcOff), MMOFlags);
+ MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
OutChains.push_back(Value.getValue(1));
Store = DAG.getTruncStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
@@ -5024,6 +5033,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// Expand memmove to a series of load and store ops if the size operand falls
// below a certain threshold.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ LLVMContext &C = *DAG.getContext();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
@@ -5046,8 +5057,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
return SDValue();
if (DstAlignCanChange) {
- Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
+ Type *Ty = MemOps[0].getTypeForEVT(C);
+ unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -5068,9 +5079,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Value;
+ bool isDereferenceable =
+ SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
+ MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
+ if (isDereferenceable)
+ SrcMMOFlags |= MachineMemOperand::MODereferenceable;
+
Value =
DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
- SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, MMOFlags);
+ SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
diff --git a/test/CodeGen/PowerPC/memcpy_dereferenceable.ll b/test/CodeGen/PowerPC/memcpy_dereferenceable.ll
new file mode 100644
index 00000000000..ed821849f09
--- /dev/null
+++ b/test/CodeGen/PowerPC/memcpy_dereferenceable.ll
@@ -0,0 +1,74 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+; This code causes an assertion failure if dereferenceable flag is not properly set in the load generated for memcpy
+
+; CHECK-LABEL: @func
+; CHECK: lxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-NOT: lxvd2x
+; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: blr
+
+define void @func(i1 %flag) {
+entry:
+ %pairs = alloca [4 x <2 x i64>], align 8
+ %pair1 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 1
+ %pair2 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 2
+ %pvec1 = bitcast <2 x i64>* %pair1 to <2 x i64>*
+ %pvec2 = bitcast <2 x i64>* %pair2 to <2 x i64>*
+ %dst = bitcast [4 x <2 x i64>]* %pairs to i8*
+ %src = bitcast <2 x i64>* %pair2 to i8*
+ br i1 %flag, label %end, label %dummy
+
+end:
+ ; copy third element into first element by memcpy
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %dst, i8* %src, i64 16, i32 8, i1 false)
+ ; copy third element into second element by LD/ST
+ %vec2 = load <2 x i64>, <2 x i64>* %pvec2, align 8
+ store <2 x i64> %vec2, <2 x i64>* %pvec1, align 8
+ ret void
+
+dummy:
+ ; to make use of %src in another BB
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %src, i8* %src, i64 0, i32 0, i1 false)
+ br label %end
+}
+
+
+; CHECK-LABEL: @func2
+; CHECK: lxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-NOT: lxvd2x
+; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: blr
+
+define void @func2(i1 %flag) {
+entry:
+ %pairs = alloca [4 x <2 x i64>], align 8
+ %pair1 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 1
+ %pair2 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 2
+ %pvec1 = bitcast <2 x i64>* %pair1 to <2 x i64>*
+ %pvec2 = bitcast <2 x i64>* %pair2 to <2 x i64>*
+ %dst = bitcast [4 x <2 x i64>]* %pairs to i8*
+ %src = bitcast <2 x i64>* %pair2 to i8*
+ br i1 %flag, label %end, label %dummy
+
+end:
+ ; copy third element into first element by memcpy
+ call void @llvm.memmove.p0i8.p0i8.i64(i8* nonnull %dst, i8* %src, i64 16, i32 8, i1 false)
+ ; copy third element into second element by LD/ST
+ %vec2 = load <2 x i64>, <2 x i64>* %pvec2, align 8
+ store <2 x i64> %vec2, <2 x i64>* %pvec1, align 8
+ ret void
+
+dummy:
+ ; to make use of %src in another BB
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %src, i8* %src, i64 0, i32 0, i1 false)
+ br label %end
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
+
+attributes #1 = { argmemonly nounwind }