; RUN: llc -mtriple armv7 %s -o - | FileCheck %s ; CHECK-LABEL: f: define float @f(<4 x i16>* nocapture %in) { ; CHECK: vld1 ; CHECK: vmovl.u16 ; CHECK-NOT: vand %1 = load <4 x i16>, <4 x i16>* %in ; CHECK: vcvt.f32.u32 %2 = uitofp <4 x i16> %1 to <4 x float> %3 = extractelement <4 x float> %2, i32 0 %4 = extractelement <4 x float> %2, i32 1 %5 = extractelement <4 x float> %2, i32 2 ; CHECK: vadd.f32 %6 = fadd float %3, %4 %7 = fadd float %6, %5 ret float %7 } ; CHECK-LABEL: g: define float @g(<4 x i16>* nocapture %in) { ; CHECK: vldr %1 = load <4 x i16>, <4 x i16>* %in ; For now we're generating a vmov.16 and a uxth instruction. ; The uxth is redundant, and we should be able to extend without ; having to generate cross-domain copies. Once we can do this ; we should modify the checks below. ; CHECK: uxth %2 = extractelement <4 x i16> %1, i32 0 ; CHECK: vcvt.f32.u32 %3 = uitofp i16 %2 to float ret float %3 } ; The backend generates for the following code an ; (and 0xff (i32 extract_vector_elt (zext load <4 x i8> to 4 x i16))) ; ; The and is not redundant and cannot be removed. Since ; extract_vector_elt is doing an implicit any_ext, the and ; is required to guarantee that the top bits are set to zero. ; Ideally should be a zext from <4 x i8> to <4 x 32>. ; CHECK-LABEL: h: ; CHECK: vld1.32 ; CHECK: uxtb define <4 x i32> @h(<4 x i8> *%in) { %1 = load <4 x i8>, <4 x i8>* %in, align 4 %2 = extractelement <4 x i8> %1, i32 0 %3 = zext i8 %2 to i32 %4 = insertelement <4 x i32> undef, i32 %3, i32 0 %5 = extractelement <4 x i8> %1, i32 1 %6 = zext i8 %5 to i32 %7 = insertelement <4 x i32> %4, i32 %6, i32 1 %8 = extractelement <4 x i8> %1, i32 2 %9 = zext i8 %8 to i32 %10 = insertelement <4 x i32> %7, i32 %9, i32 2 %11 = extractelement <4 x i8> %1, i32 3 %12 = zext i8 %11 to i32 %13 = insertelement <4 x i32> %10, i32 %12, i32 3 ret <4 x i32> %13 }