diff options
-rw-r--r-- | lib/Transforms/InstCombine/InstructionCombining.cpp | 3 | ||||
-rw-r--r-- | test/Transforms/InstCombine/vec_shuffle.ll | 15 |
2 files changed, 17 insertions, 1 deletions
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 8cbf57a97ef..723bb4c0d68 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1375,7 +1375,8 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) { if (match(LHS, m_ShuffleVector(m_Value(L0), m_Value(L1), m_Constant(Mask))) && match(RHS, m_ShuffleVector(m_Value(R0), m_Value(R1), m_Specific(Mask))) && LHS->hasOneUse() && RHS->hasOneUse() && - cast<ShuffleVectorInst>(LHS)->isConcat()) { + cast<ShuffleVectorInst>(LHS)->isConcat() && + cast<ShuffleVectorInst>(RHS)->isConcat()) { // This transform does not have the speculative execution constraint as // below because the shuffle is a concatenation. The new binops are // operating on exactly the same elements as the existing binop. diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll index aae337d0de3..354256aab54 100644 --- a/test/Transforms/InstCombine/vec_shuffle.ll +++ b/test/Transforms/InstCombine/vec_shuffle.ll @@ -1125,3 +1125,18 @@ define <2 x float> @frem_splat_constant1(<2 x float> %x) { ret <2 x float> %r } +; Equivalent shuffle masks, but only one is a narrowing op. + +define <2 x i1> @PR40734(<1 x i1> %x, <4 x i1> %y) { +; CHECK-LABEL: @PR40734( +; CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <1 x i1> zeroinitializer, <1 x i1> [[X:%.*]], <2 x i32> <i32 0, i32 1> +; CHECK-NEXT: [[NARROW:%.*]] = shufflevector <4 x i1> [[Y:%.*]], <4 x i1> undef, <2 x i32> <i32 0, i32 1> +; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[WIDEN]], [[NARROW]] +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %widen = shufflevector <1 x i1> zeroinitializer, <1 x i1> %x, <2 x i32> <i32 0, i32 1> + %narrow = shufflevector <4 x i1> %y, <4 x i1> undef, <2 x i32> <i32 0, i32 1> + %r = and <2 x i1> %widen, %narrow + ret <2 x i1> %r +} + |