diff options
-rw-r--r-- | include/llvm/CodeGen/MachineMemOperand.h | 5 | ||||
-rw-r--r-- | lib/CodeGen/MachineInstr.cpp | 18 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 33 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/memcpy_dereferenceable.ll | 74 |
4 files changed, 122 insertions, 8 deletions
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h index 4d83f27eac3..78adce507b8 100644 --- a/include/llvm/CodeGen/MachineMemOperand.h +++ b/include/llvm/CodeGen/MachineMemOperand.h @@ -59,6 +59,11 @@ struct MachinePointerInfo { return MachinePointerInfo(V.get<const PseudoSourceValue*>(), Offset+O); } + /// Return true if memory region [V, V+Offset+Size) is known to be + /// dereferenceable. + bool isDereferenceable(unsigned Size, LLVMContext &C, + const DataLayout &DL) const; + /// Return the LLVM IR address space number that this pointer points into. unsigned getAddrSpace() const; diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 2a6cb07dbd2..81c6dace92e 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -558,6 +559,23 @@ unsigned MachinePointerInfo::getAddrSpace() const { return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace(); } +/// isDereferenceable - Return true if V is always dereferenceable for +/// Offset + Size byte. +bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C, + const DataLayout &DL) const { + if (!V.is<const Value*>()) + return false; + + const Value *BasePtr = V.get<const Value*>(); + if (BasePtr == nullptr) + return false; + + return isDereferenceableAndAlignedPointer(BasePtr, 1, + APInt(DL.getPointerSize(), + Offset + Size), + DL); +} + /// getConstantPool - Return a MachinePointerInfo record that refers to the /// constant pool. MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 7abdc76cb00..98553152117 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4897,6 +4897,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // TODO: In the AlwaysInline case, if the size is big then generate a loop // rather than maybe a humongous number of loads and stores. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &C = *DAG.getContext(); std::vector<EVT> MemOps; bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); @@ -4923,15 +4925,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, return SDValue(); if (DstAlignCanChange) { - Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); + Type *Ty = MemOps[0].getTypeForEVT(C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); // Don't promote to an alignment that would require dynamic stack // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) while (NewAlign > Align && - DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign)) + DL.exceedsNaturalStackAlignment(NewAlign)) NewAlign /= 2; if (NewAlign > Align) { @@ -4991,12 +4993,19 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // thing to do is generate a LoadExt/StoreTrunc pair. These simplify // to Load/Store if NVT==VT. // FIXME does the case above also need this? - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + EVT NVT = TLI.getTypeToTransformTo(C, VT); assert(NVT.bitsGE(VT)); + + bool isDereferenceable = + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + MachineMemOperand::Flags SrcMMOFlags = MMOFlags; + if (isDereferenceable) + SrcMMOFlags |= MachineMemOperand::MODereferenceable; + Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), SrcPtrInfo.getWithOffset(SrcOff), VT, - MinAlign(SrcAlign, SrcOff), MMOFlags); + MinAlign(SrcAlign, SrcOff), SrcMMOFlags); OutChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), @@ -5024,6 +5033,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // Expand memmove to a series of load and store ops if the size operand falls // below a certain threshold. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &C = *DAG.getContext(); std::vector<EVT> MemOps; bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); @@ -5046,8 +5057,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, return SDValue(); if (DstAlignCanChange) { - Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); + Type *Ty = MemOps[0].getTypeForEVT(C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) @@ -5068,9 +5079,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value; + bool isDereferenceable = + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + MachineMemOperand::Flags SrcMMOFlags = MMOFlags; + if (isDereferenceable) + SrcMMOFlags |= MachineMemOperand::MODereferenceable; + Value = DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), - SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, MMOFlags); + SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; diff --git a/test/CodeGen/PowerPC/memcpy_dereferenceable.ll b/test/CodeGen/PowerPC/memcpy_dereferenceable.ll new file mode 100644 index 00000000000..ed821849f09 --- /dev/null +++ b/test/CodeGen/PowerPC/memcpy_dereferenceable.ll @@ -0,0 +1,74 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +; This code causes an assertion failure if dereferenceable flag is not properly set in the load generated for memcpy + +; CHECK-LABEL: @func +; CHECK: lxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK-NOT: lxvd2x +; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: blr + +define void @func(i1 %flag) { +entry: + %pairs = alloca [4 x <2 x i64>], align 8 + %pair1 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 1 + %pair2 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 2 + %pvec1 = bitcast <2 x i64>* %pair1 to <2 x i64>* + %pvec2 = bitcast <2 x i64>* %pair2 to <2 x i64>* + %dst = bitcast [4 x <2 x i64>]* %pairs to i8* + %src = bitcast <2 x i64>* %pair2 to i8* + br i1 %flag, label %end, label %dummy + +end: + ; copy third element into first element by memcpy + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %dst, i8* %src, i64 16, i32 8, i1 false) + ; copy third element into second element by LD/ST + %vec2 = load <2 x i64>, <2 x i64>* %pvec2, align 8 + store <2 x i64> %vec2, <2 x i64>* %pvec1, align 8 + ret void + +dummy: + ; to make use of %src in another BB + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %src, i8* %src, i64 0, i32 0, i1 false) + br label %end +} + + +; CHECK-LABEL: @func2 +; CHECK: lxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK-NOT: lxvd2x +; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: blr + +define void @func2(i1 %flag) { +entry: + %pairs = alloca [4 x <2 x i64>], align 8 + %pair1 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 1 + %pair2 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 2 + %pvec1 = bitcast <2 x i64>* %pair1 to <2 x i64>* + %pvec2 = bitcast <2 x i64>* %pair2 to <2 x i64>* + %dst = bitcast [4 x <2 x i64>]* %pairs to i8* + %src = bitcast <2 x i64>* %pair2 to i8* + br i1 %flag, label %end, label %dummy + +end: + ; copy third element into first element by memcpy + call void @llvm.memmove.p0i8.p0i8.i64(i8* nonnull %dst, i8* %src, i64 16, i32 8, i1 false) + ; copy third element into second element by LD/ST + %vec2 = load <2 x i64>, <2 x i64>* %pvec2, align 8 + store <2 x i64> %vec2, <2 x i64>* %pvec1, align 8 + ret void + +dummy: + ; to make use of %src in another BB + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %src, i8* %src, i64 0, i32 0, i1 false) + br label %end +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1 +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1 + +attributes #1 = { argmemonly nounwind } |