diff options
Diffstat (limited to 'driver/runtime')
-rw-r--r-- | driver/runtime/arch/asimd.ll | 78 | ||||
-rw-r--r-- | driver/runtime/arch/neon.ll | 72 | ||||
-rw-r--r-- | driver/runtime/ll32/allocation.ll | 140 | ||||
-rw-r--r-- | driver/runtime/ll64/allocation.ll | 164 |
4 files changed, 227 insertions, 227 deletions
diff --git a/driver/runtime/arch/asimd.ll b/driver/runtime/arch/asimd.ll index efc53c89..682f045e 100644 --- a/driver/runtime/arch/asimd.ll +++ b/driver/runtime/arch/asimd.ll @@ -844,8 +844,8 @@ define <4 x i8> @_Z18rsYuvToRGBA_uchar4hhh(i8 %pY, i8 %pU, i8 %pV) nounwind read %_u = tail call <4 x i32> @smear_4i32(i32 %_su2) nounwind readnone %_v = tail call <4 x i32> @smear_4i32(i32 %_sv2) nounwind readnone - %mu = load <4 x i32>* @yuv_U, align 8 - %mv = load <4 x i32>* @yuv_V, align 8 + %mu = load <4 x i32>, <4 x i32>* @yuv_U, align 8 + %mv = load <4 x i32>, <4 x i32>* @yuv_V, align 8 %_u2 = mul <4 x i32> %_u, %mu %_v2 = mul <4 x i32> %_v, %mv %_y2 = add <4 x i32> %_y, %_u2 @@ -855,8 +855,8 @@ define <4 x i8> @_Z18rsYuvToRGBA_uchar4hhh(i8 %pY, i8 %pU, i8 %pV) nounwind read ; %r2 = trunc <4 x i16> %r1 to <4 x i8> ; ret <4 x i8> %r2 - %c0 = load <4 x i32>* @yuv_0, align 8 - %c255 = load <4 x i32>* @yuv_255, align 8 + %c0 = load <4 x i32>, <4 x i32>* @yuv_0, align 8 + %c255 = load <4 x i32>, <4 x i32>* @yuv_255, align 8 %r1 = tail call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %_y3, <4 x i32> %c0) nounwind readnone %r2 = tail call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %r1, <4 x i32> %c255) nounwind readnone %r3 = lshr <4 x i32> %r2, <i32 8, i32 8, i32 8, i32 8> @@ -959,19 +959,19 @@ define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3 %z0 = extractelement <3 x float> %in, i32 2 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone - %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0 + %px = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0 %px2 = bitcast float* %px to <4 x float>* - %xm = load <4 x float>* %px2, align 4 + %xm = load <4 x float>, <4 x float>* %px2, align 4 - %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3 + %py = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3 %py2 = bitcast float* %py to <4 x float>* ; %ym = call <4 x float> @llvm.aarch64.neon.ld4.v4f32(i8* %py2, i32 4) nounwind - %ym = load <4 x float>* %py2, align 4 + %ym = load <4 x float>, <4 x float>* %py2, align 4 - %pz = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 5 + %pz = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 5 %pz2 = bitcast float* %pz to <4 x float>* ; %zm2 = call <4 x float> @llvm.aarch64.neon.ld4.v4f32(i8* %pz2, i32 4) nounwind - %zm2 = load <4 x float>* %pz2, align 4 + %zm2 = load <4 x float>, <4 x float>* %pz2, align 4 %zm = shufflevector <4 x float> %zm2, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4> %a1 = fmul <4 x float> %x, %xm @@ -989,12 +989,12 @@ define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3 %y0 = extractelement <2 x float> %in, i32 1 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone - %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0 + %px = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0 %px2 = bitcast float* %px to <4 x float>* - %xm = load <4 x float>* %px2, align 4 - %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3 + %xm = load <4 x float>, <4 x float>* %px2, align 4 + %py = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3 %py2 = bitcast float* %py to <4 x float>* - %ym = load <4 x float>* %py2, align 4 + %ym = load <4 x float>, <4 x float>* %py2, align 4 %a1 = fmul <4 x float> %x, %xm %a2 = fmul <4 x float> %y, %ym @@ -1013,18 +1013,18 @@ define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4 %w0 = extractelement <4 x float> %in, i32 3 %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone - %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 + %px = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 %px2 = bitcast float* %px to <4 x float>* - %xm = load <4 x float>* %px2, align 4 - %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 + %xm = load <4 x float>, <4 x float>* %px2, align 4 + %py = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 %py2 = bitcast float* %py to <4 x float>* - %ym = load <4 x float>* %py2, align 4 - %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8 + %ym = load <4 x float>, <4 x float>* %py2, align 4 + %pz = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8 %pz2 = bitcast float* %pz to <4 x float>* - %zm = load <4 x float>* %pz2, align 4 - %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 + %zm = load <4 x float>, <4 x float>* %pz2, align 4 + %pw = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 %pw2 = bitcast float* %pw to <4 x float>* - %wm = load <4 x float>* %pw2, align 4 + %wm = load <4 x float>, <4 x float>* %pw2, align 4 %a1 = fmul <4 x float> %x, %xm %a2 = fmul <4 x float> %y, %ym @@ -1044,18 +1044,18 @@ define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4 %z0 = extractelement <3 x float> %in, i32 2 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone - %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 + %px = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 %px2 = bitcast float* %px to <4 x float>* - %xm = load <4 x float>* %px2, align 4 - %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 + %xm = load <4 x float>, <4 x float>* %px2, align 4 + %py = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 %py2 = bitcast float* %py to <4 x float>* - %ym = load <4 x float>* %py2, align 4 - %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8 + %ym = load <4 x float>, <4 x float>* %py2, align 4 + %pz = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8 %pz2 = bitcast float* %pz to <4 x float>* - %zm = load <4 x float>* %pz2, align 4 - %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 + %zm = load <4 x float>, <4 x float>* %pz2, align 4 + %pw = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 %pw2 = bitcast float* %pw to <4 x float>* - %wm = load <4 x float>* %pw2, align 4 + %wm = load <4 x float>, <4 x float>* %pw2, align 4 %a1 = fmul <4 x float> %x, %xm %a2 = fadd <4 x float> %wm, %a1 @@ -1072,15 +1072,15 @@ define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4 %y0 = extractelement <2 x float> %in, i32 1 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone - %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 + %px = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 %px2 = bitcast float* %px to <4 x float>* - %xm = load <4 x float>* %px2, align 4 - %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 + %xm = load <4 x float>, <4 x float>* %px2, align 4 + %py = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 %py2 = bitcast float* %py to <4 x float>* - %ym = load <4 x float>* %py2, align 4 - %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 + %ym = load <4 x float>, <4 x float>* %py2, align 4 + %pw = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 %pw2 = bitcast float* %pw to <4 x float>* - %wm = load <4 x float>* %pw2, align 4 + %wm = load <4 x float>, <4 x float>* %pw2, align 4 %a1 = fmul <4 x float> %x, %xm %a2 = fadd <4 x float> %wm, %a1 @@ -1105,9 +1105,9 @@ declare <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> %in) nounwind readnone ; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color) define <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %color) nounwind readnone { - %f255 = load <4 x float>* @fc_255.0, align 16 - %f05 = load <4 x float>* @fc_0.5, align 16 - %f0 = load <4 x float>* @fc_0, align 16 + %f255 = load <4 x float>, <4 x float>* @fc_255.0, align 16 + %f05 = load <4 x float>, <4 x float>* @fc_0.5, align 16 + %f0 = load <4 x float>, <4 x float>* @fc_0, align 16 %v1 = fmul <4 x float> %f255, %color %v2 = fadd <4 x float> %f05, %v1 %v3 = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %v2, <4 x float> %f0, <4 x float> %f255) nounwind readnone diff --git a/driver/runtime/arch/neon.ll b/driver/runtime/arch/neon.ll index 67e5dbf6..6bb65b67 100644 --- a/driver/runtime/arch/neon.ll +++ b/driver/runtime/arch/neon.ll @@ -848,8 +848,8 @@ define <4 x i8> @_Z18rsYuvToRGBA_uchar4hhh(i8 %pY, i8 %pU, i8 %pV) nounwind read %_u = tail call <4 x i32> @smear_4i32(i32 %_su2) nounwind readnone %_v = tail call <4 x i32> @smear_4i32(i32 %_sv2) nounwind readnone - %mu = load <4 x i32>* @yuv_U, align 8 - %mv = load <4 x i32>* @yuv_V, align 8 + %mu = load <4 x i32>, <4 x i32>* @yuv_U, align 8 + %mv = load <4 x i32>, <4 x i32>* @yuv_V, align 8 %_u2 = mul <4 x i32> %_u, %mu %_v2 = mul <4 x i32> %_v, %mv %_y2 = add <4 x i32> %_y, %_u2 @@ -859,8 +859,8 @@ define <4 x i8> @_Z18rsYuvToRGBA_uchar4hhh(i8 %pY, i8 %pU, i8 %pV) nounwind read ; %r2 = trunc <4 x i16> %r1 to <4 x i8> ; ret <4 x i8> %r2 - %c0 = load <4 x i32>* @yuv_0, align 8 - %c255 = load <4 x i32>* @yuv_255, align 8 + %c0 = load <4 x i32>, <4 x i32>* @yuv_0, align 8 + %c255 = load <4 x i32>, <4 x i32>* @yuv_255, align 8 %r1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %_y3, <4 x i32> %c0) nounwind readnone %r2 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %r1, <4 x i32> %c255) nounwind readnone %r3 = lshr <4 x i32> %r2, <i32 8, i32 8, i32 8, i32 8> @@ -953,15 +953,15 @@ define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3 %z0 = extractelement <3 x float> %in, i32 2 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone - %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0 + %px = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0 %px2 = bitcast float* %px to i8* %xm = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %px2, i32 4) nounwind - %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3 + %py = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3 %py2 = bitcast float* %py to i8* %ym = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %py2, i32 4) nounwind - %pz = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 5 + %pz = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 5 %pz2 = bitcast float* %pz to i8* %zm2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %pz2, i32 4) nounwind %zm = shufflevector <4 x float> %zm2, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4> @@ -981,12 +981,12 @@ define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3 %y0 = extractelement <2 x float> %in, i32 1 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone - %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0 + %px = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0 %px2 = bitcast float* %px to <4 x float>* - %xm = load <4 x float>* %px2, align 4 - %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3 + %xm = load <4 x float>, <4 x float>* %px2, align 4 + %py = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3 %py2 = bitcast float* %py to <4 x float>* - %ym = load <4 x float>* %py2, align 4 + %ym = load <4 x float>, <4 x float>* %py2, align 4 %a1 = fmul <4 x float> %x, %xm %a2 = fmul <4 x float> %y, %ym @@ -1005,18 +1005,18 @@ define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4 %w0 = extractelement <4 x float> %in, i32 3 %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone - %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 + %px = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 %px2 = bitcast float* %px to <4 x float>* - %xm = load <4 x float>* %px2, align 4 - %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 + %xm = load <4 x float>, <4 x float>* %px2, align 4 + %py = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 %py2 = bitcast float* %py to <4 x float>* - %ym = load <4 x float>* %py2, align 4 - %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8 + %ym = load <4 x float>, <4 x float>* %py2, align 4 + %pz = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8 %pz2 = bitcast float* %pz to <4 x float>* - %zm = load <4 x float>* %pz2, align 4 - %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 + %zm = load <4 x float>, <4 x float>* %pz2, align 4 + %pw = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 %pw2 = bitcast float* %pw to <4 x float>* - %wm = load <4 x float>* %pw2, align 4 + %wm = load <4 x float>, <4 x float>* %pw2, align 4 %a1 = fmul <4 x float> %x, %xm %a2 = fmul <4 x float> %y, %ym @@ -1036,18 +1036,18 @@ define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4 %z0 = extractelement <3 x float> %in, i32 2 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone - %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 + %px = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 %px2 = bitcast float* %px to <4 x float>* - %xm = load <4 x float>* %px2, align 4 - %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 + %xm = load <4 x float>, <4 x float>* %px2, align 4 + %py = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 %py2 = bitcast float* %py to <4 x float>* - %ym = load <4 x float>* %py2, align 4 - %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8 + %ym = load <4 x float>, <4 x float>* %py2, align 4 + %pz = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8 %pz2 = bitcast float* %pz to <4 x float>* - %zm = load <4 x float>* %pz2, align 4 - %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 + %zm = load <4 x float>, <4 x float>* %pz2, align 4 + %pw = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 %pw2 = bitcast float* %pw to <4 x float>* - %wm = load <4 x float>* %pw2, align 4 + %wm = load <4 x float>, <4 x float>* %pw2, align 4 %a1 = fmul <4 x float> %x, %xm %a2 = fadd <4 x float> %wm, %a1 @@ -1064,15 +1064,15 @@ define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4 %y0 = extractelement <2 x float> %in, i32 1 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone - %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 + %px = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 %px2 = bitcast float* %px to <4 x float>* - %xm = load <4 x float>* %px2, align 4 - %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 + %xm = load <4 x float>, <4 x float>* %px2, align 4 + %py = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 %py2 = bitcast float* %py to <4 x float>* - %ym = load <4 x float>* %py2, align 4 - %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 + %ym = load <4 x float>, <4 x float>* %py2, align 4 + %pw = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 %pw2 = bitcast float* %pw to <4 x float>* - %wm = load <4 x float>* %pw2, align 4 + %wm = load <4 x float>, <4 x float>* %pw2, align 4 %a1 = fmul <4 x float> %x, %xm %a2 = fadd <4 x float> %wm, %a1 @@ -1097,9 +1097,9 @@ declare <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> %in) nounwind readnone ; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color) define <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %color) nounwind readnone { - %f255 = load <4 x float>* @fc_255.0, align 16 - %f05 = load <4 x float>* @fc_0.5, align 16 - %f0 = load <4 x float>* @fc_0, align 16 + %f255 = load <4 x float>, <4 x float>* @fc_255.0, align 16 + %f05 = load <4 x float>, <4 x float>* @fc_0.5, align 16 + %f0 = load <4 x float>, <4 x float>* @fc_0, align 16 %v1 = fmul <4 x float> %f255, %color %v2 = fadd <4 x float> %f05, %v1 %v3 = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %v2, <4 x float> %f0, <4 x float> %f255) nounwind readnone diff --git a/driver/runtime/ll32/allocation.ll b/driver/runtime/ll32/allocation.ll index 1af9c89d..1ba8222b 100644 --- a/driver/runtime/ll32/allocation.ll +++ b/driver/runtime/ll32/allocation.ll @@ -41,7 +41,7 @@ define void @rsSetElementAtImpl_char([1 x i32] %a.coerce, i8 signext %val, i32 % define signext i8 @rsGetElementAtImpl_char([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 1, i32 %x, i32 %y, i32 %z) #2 - %2 = load i8* %1, align 1, !tbaa !21 + %2 = load i8, i8* %1, align 1, !tbaa !21 ret i8 %2 } @@ -56,7 +56,7 @@ define void @rsSetElementAtImpl_char2([1 x i32] %a.coerce, <2 x i8> %val, i32 %x define <2 x i8> @rsGetElementAtImpl_char2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i8>* - %3 = load <2 x i8>* %2, align 2, !tbaa !22 + %3 = load <2 x i8>, <2 x i8>* %2, align 2, !tbaa !22 ret <2 x i8> %3 } @@ -72,7 +72,7 @@ define void @rsSetElementAtImpl_char3([1 x i32] %a.coerce, <3 x i8> %val, i32 %x define <3 x i8> @rsGetElementAtImpl_char3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i8>* - %3 = load <4 x i8>* %2, align 4, !tbaa !23 + %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !23 %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x i8> %4 } @@ -88,7 +88,7 @@ define void @rsSetElementAtImpl_char4([1 x i32] %a.coerce, <4 x i8> %val, i32 %x define <4 x i8> @rsGetElementAtImpl_char4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i8>* - %3 = load <4 x i8>* %2, align 4, !tbaa !24 + %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !24 ret <4 x i8> %3 } @@ -101,7 +101,7 @@ define void @rsSetElementAtImpl_uchar([1 x i32] %a.coerce, i8 zeroext %val, i32 define zeroext i8 @rsGetElementAtImpl_uchar([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 1, i32 %x, i32 %y, i32 %z) #2 - %2 = load i8* %1, align 1, !tbaa !25 + %2 = load i8, i8* %1, align 1, !tbaa !25 ret i8 %2 } @@ -116,7 +116,7 @@ define void @rsSetElementAtImpl_uchar2([1 x i32] %a.coerce, <2 x i8> %val, i32 % define <2 x i8> @rsGetElementAtImpl_uchar2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i8>* - %3 = load <2 x i8>* %2, align 2, !tbaa !26 + %3 = load <2 x i8>, <2 x i8>* %2, align 2, !tbaa !26 ret <2 x i8> %3 } @@ -132,7 +132,7 @@ define void @rsSetElementAtImpl_uchar3([1 x i32] %a.coerce, <3 x i8> %val, i32 % define <3 x i8> @rsGetElementAtImpl_uchar3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i8>* - %3 = load <4 x i8>* %2, align 4, !tbaa !27 + %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !27 %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x i8> %4 } @@ -148,7 +148,7 @@ define void @rsSetElementAtImpl_uchar4([1 x i32] %a.coerce, <4 x i8> %val, i32 % define <4 x i8> @rsGetElementAtImpl_uchar4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i8>* - %3 = load <4 x i8>* %2, align 4, !tbaa !28 + %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !28 ret <4 x i8> %3 } @@ -163,7 +163,7 @@ define void @rsSetElementAtImpl_short([1 x i32] %a.coerce, i16 signext %val, i32 define signext i16 @rsGetElementAtImpl_short([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to i16* - %3 = load i16* %2, align 2, !tbaa !29 + %3 = load i16, i16* %2, align 2, !tbaa !29 ret i16 %3 } @@ -178,7 +178,7 @@ define void @rsSetElementAtImpl_short2([1 x i32] %a.coerce, <2 x i16> %val, i32 define <2 x i16> @rsGetElementAtImpl_short2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i16>* - %3 = load <2 x i16>* %2, align 4, !tbaa !30 + %3 = load <2 x i16>, <2 x i16>* %2, align 4, !tbaa !30 ret <2 x i16> %3 } @@ -194,7 +194,7 @@ define void @rsSetElementAtImpl_short3([1 x i32] %a.coerce, <3 x i16> %val, i32 define <3 x i16> @rsGetElementAtImpl_short3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i16>* - %3 = load <4 x i16>* %2, align 8, !tbaa !31 + %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !31 %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x i16> %4 } @@ -210,7 +210,7 @@ define void @rsSetElementAtImpl_short4([1 x i32] %a.coerce, <4 x i16> %val, i32 define <4 x i16> @rsGetElementAtImpl_short4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i16>* - %3 = load <4 x i16>* %2, align 8, !tbaa !32 + %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !32 ret <4 x i16> %3 } @@ -225,7 +225,7 @@ define void @rsSetElementAtImpl_ushort([1 x i32] %a.coerce, i16 zeroext %val, i3 define zeroext i16 @rsGetElementAtImpl_ushort([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to i16* - %3 = load i16* %2, align 2, !tbaa !33 + %3 = load i16, i16* %2, align 2, !tbaa !33 ret i16 %3 } @@ -240,7 +240,7 @@ define void @rsSetElementAtImpl_ushort2([1 x i32] %a.coerce, <2 x i16> %val, i32 define <2 x i16> @rsGetElementAtImpl_ushort2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i16>* - %3 = load <2 x i16>* %2, align 4, !tbaa !34 + %3 = load <2 x i16>, <2 x i16>* %2, align 4, !tbaa !34 ret <2 x i16> %3 } @@ -256,7 +256,7 @@ define void @rsSetElementAtImpl_ushort3([1 x i32] %a.coerce, <3 x i16> %val, i32 define <3 x i16> @rsGetElementAtImpl_ushort3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i16>* - %3 = load <4 x i16>* %2, align 8, !tbaa !35 + %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !35 %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x i16> %4 } @@ -272,7 +272,7 @@ define void @rsSetElementAtImpl_ushort4([1 x i32] %a.coerce, <4 x i16> %val, i32 define <4 x i16> @rsGetElementAtImpl_ushort4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i16>* - %3 = load <4 x i16>* %2, align 8, !tbaa !36 + %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !36 ret <4 x i16> %3 } @@ -287,7 +287,7 @@ define void @rsSetElementAtImpl_int([1 x i32] %a.coerce, i32 %val, i32 %x, i32 % define i32 @rsGetElementAtImpl_int([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to i32* - %3 = load i32* %2, align 4, !tbaa !37 + %3 = load i32, i32* %2, align 4, !tbaa !37 ret i32 %3 } @@ -302,7 +302,7 @@ define void @rsSetElementAtImpl_int2([1 x i32] %a.coerce, <2 x i32> %val, i32 %x define <2 x i32> @rsGetElementAtImpl_int2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i32>* - %3 = load <2 x i32>* %2, align 8, !tbaa !38 + %3 = load <2 x i32>, <2 x i32>* %2, align 8, !tbaa !38 ret <2 x i32> %3 } @@ -318,7 +318,7 @@ define void @rsSetElementAtImpl_int3([1 x i32] %a.coerce, <3 x i32> %val, i32 %x define <3 x i32> @rsGetElementAtImpl_int3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i32>* - %3 = load <4 x i32>* %2, align 8, !tbaa !39 + %3 = load <4 x i32>, <4 x i32>* %2, align 8, !tbaa !39 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x i32> %4 } @@ -334,7 +334,7 @@ define void @rsSetElementAtImpl_int4([1 x i32] %a.coerce, <4 x i32> %val, i32 %x define <4 x i32> @rsGetElementAtImpl_int4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i32>* - %3 = load <4 x i32>* %2, align 16, !tbaa !40 + %3 = load <4 x i32>, <4 x i32>* %2, align 16, !tbaa !40 ret <4 x i32> %3 } @@ -349,7 +349,7 @@ define void @rsSetElementAtImpl_uint([1 x i32] %a.coerce, i32 %val, i32 %x, i32 define i32 @rsGetElementAtImpl_uint([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to i32* - %3 = load i32* %2, align 4, !tbaa !41 + %3 = load i32, i32* %2, align 4, !tbaa !41 ret i32 %3 } @@ -364,7 +364,7 @@ define void @rsSetElementAtImpl_uint2([1 x i32] %a.coerce, <2 x i32> %val, i32 % define <2 x i32> @rsGetElementAtImpl_uint2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i32>* - %3 = load <2 x i32>* %2, align 8, !tbaa !42 + %3 = load <2 x i32>, <2 x i32>* %2, align 8, !tbaa !42 ret <2 x i32> %3 } @@ -380,7 +380,7 @@ define void @rsSetElementAtImpl_uint3([1 x i32] %a.coerce, <3 x i32> %val, i32 % define <3 x i32> @rsGetElementAtImpl_uint3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i32>* - %3 = load <4 x i32>* %2, align 8, !tbaa !43 + %3 = load <4 x i32>, <4 x i32>* %2, align 8, !tbaa !43 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x i32> %4 } @@ -396,7 +396,7 @@ define void @rsSetElementAtImpl_uint4([1 x i32] %a.coerce, <4 x i32> %val, i32 % define <4 x i32> @rsGetElementAtImpl_uint4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i32>* - %3 = load <4 x i32>* %2, align 16, !tbaa !44 + %3 = load <4 x i32>, <4 x i32>* %2, align 16, !tbaa !44 ret <4 x i32> %3 } @@ -411,7 +411,7 @@ define void @rsSetElementAtImpl_long([1 x i32] %a.coerce, i64 %val, i32 %x, i32 define i64 @rsGetElementAtImpl_long([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to i64* - %3 = load i64* %2, align 8, !tbaa !45 + %3 = load i64, i64* %2, align 8, !tbaa !45 ret i64 %3 } @@ -426,7 +426,7 @@ define void @rsSetElementAtImpl_long2([1 x i32] %a.coerce, <2 x i64> %val, i32 % define <2 x i64> @rsGetElementAtImpl_long2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i64>* - %3 = load <2 x i64>* %2, align 16, !tbaa !46 + %3 = load <2 x i64>, <2 x i64>* %2, align 16, !tbaa !46 ret <2 x i64> %3 } @@ -442,7 +442,7 @@ define void @rsSetElementAtImpl_long3([1 x i32] %a.coerce, <3 x i64> %val, i32 % define void @rsGetElementAtImpl_long3(<3 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i64>* - %3 = load <4 x i64>* %2, align 32 + %3 = load <4 x i64>, <4 x i64>* %2, align 32 %4 = bitcast <3 x i64>* %agg.result to <4 x i64>* store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !47 ret void @@ -459,7 +459,7 @@ define void @rsSetElementAtImpl_long4([1 x i32] %a.coerce, <4 x i64> %val, i32 % define void @rsGetElementAtImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i64>* - %3 = load <4 x i64>* %2, align 32, !tbaa !15 + %3 = load <4 x i64>, <4 x i64>* %2, align 32, !tbaa !15 store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !48 ret void } @@ -475,7 +475,7 @@ define void @rsSetElementAtImpl_ulong([1 x i32] %a.coerce, i64 %val, i32 %x, i32 define i64 @rsGetElementAtImpl_ulong([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to i64* - %3 = load i64* %2, align 8, !tbaa !49 + %3 = load i64, i64* %2, align 8, !tbaa !49 ret i64 %3 } @@ -490,7 +490,7 @@ define void @rsSetElementAtImpl_ulong2([1 x i32] %a.coerce, <2 x i64> %val, i32 define <2 x i64> @rsGetElementAtImpl_ulong2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i64>* - %3 = load <2 x i64>* %2, align 16, !tbaa !50 + %3 = load <2 x i64>, <2 x i64>* %2, align 16, !tbaa !50 ret <2 x i64> %3 } @@ -506,7 +506,7 @@ define void @rsSetElementAtImpl_ulong3([1 x i32] %a.coerce, <3 x i64> %val, i32 define void @rsGetElementAtImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i64>* - %3 = load <4 x i64>* %2, align 32 + %3 = load <4 x i64>, <4 x i64>* %2, align 32 %4 = bitcast <3 x i64>* %agg.result to <4 x i64>* store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !51 ret void @@ -523,7 +523,7 @@ define void @rsSetElementAtImpl_ulong4([1 x i32] %a.coerce, <4 x i64> %val, i32 define void @rsGetElementAtImpl_ulong4(<4 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i64>* - %3 = load <4 x i64>* %2, align 32, !tbaa !15 + %3 = load <4 x i64>, <4 x i64>* %2, align 32, !tbaa !15 store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !52 ret void } @@ -539,7 +539,7 @@ define void @rsSetElementAtImpl_float([1 x i32] %a.coerce, float %val, i32 %x, i define float @rsGetElementAtImpl_float([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to float* - %3 = load float* %2, align 4, !tbaa !53 + %3 = load float, float* %2, align 4, !tbaa !53 ret float %3 } @@ -554,7 +554,7 @@ define void @rsSetElementAtImpl_float2([1 x i32] %a.coerce, <2 x float> %val, i3 define <2 x float> @rsGetElementAtImpl_float2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x float>* - %3 = load <2 x float>* %2, align 8, !tbaa !54 + %3 = load <2 x float>, <2 x float>* %2, align 8, !tbaa !54 ret <2 x float> %3 } @@ -570,7 +570,7 @@ define void @rsSetElementAtImpl_float3([1 x i32] %a.coerce, <3 x float> %val, i3 define <3 x float> @rsGetElementAtImpl_float3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x float>* - %3 = load <4 x float>* %2, align 8, !tbaa !55 + %3 = load <4 x float>, <4 x float>* %2, align 8, !tbaa !55 %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x float> %4 } @@ -586,7 +586,7 @@ define void @rsSetElementAtImpl_float4([1 x i32] %a.coerce, <4 x float> %val, i3 define <4 x float> @rsGetElementAtImpl_float4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x float>* - %3 = load <4 x float>* %2, align 16, !tbaa !56 + %3 = load <4 x float>, <4 x float>* %2, align 16, !tbaa !56 ret <4 x float> %3 } @@ -601,7 +601,7 @@ define void @rsSetElementAtImpl_double([1 x i32] %a.coerce, double %val, i32 %x, define double @rsGetElementAtImpl_double([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to double* - %3 = load double* %2, align 8, !tbaa !57 + %3 = load double, double* %2, align 8, !tbaa !57 ret double %3 } @@ -616,7 +616,7 @@ define void @rsSetElementAtImpl_double2([1 x i32] %a.coerce, <2 x double> %val, define <2 x double> @rsGetElementAtImpl_double2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x double>* - %3 = load <2 x double>* %2, align 16, !tbaa !58 + %3 = load <2 x double>, <2 x double>* %2, align 16, !tbaa !58 ret <2 x double> %3 } @@ -633,7 +633,7 @@ define void @rsSetElementAtImpl_double3([1 x i32] %a.coerce, <3 x double> %val, define void @rsGetElementAtImpl_double3(<3 x double>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x double>* - %3 = load <4 x double>* %2, align 32 + %3 = load <4 x double>, <4 x double>* %2, align 32 %4 = bitcast <3 x double>* %agg.result to <4 x double>* store <4 x double> %3, <4 x double>* %4, align 32, !tbaa !59 ret void @@ -649,7 +649,7 @@ define void @rsSetElementAtImpl_double4([1 x i32] %a.coerce, <4 x double> %val, define void @rsGetElementAtImpl_double4(<4 x double>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x double>* - %3 = load <4 x double>* %2, align 32, !tbaa !15 + %3 = load <4 x double>, <4 x double>* %2, align 32, !tbaa !15 store <4 x double> %3, <4 x double>* %agg.result, align 32, !tbaa !60 ret void } @@ -658,14 +658,14 @@ define void @rsGetElementAtImpl_double4(<4 x double>* noalias nocapture sret %ag define void @__rsAllocationVLoadXImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i64>* - %3 = load <4 x i64>* %2, align 8 + %3 = load <4 x i64>, <4 x i64>* %2, align 8 store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !52 ret void } define void @__rsAllocationVLoadXImpl_long3(<3 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i64>* - %3 = load <4 x i64>* %2, align 8 + %3 = load <4 x i64>, <4 x i64>* %2, align 8 %4 = bitcast <3 x i64>* %agg.result to <4 x i64>* store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !47 ret void @@ -673,21 +673,21 @@ define void @__rsAllocationVLoadXImpl_long3(<3 x i64>* noalias nocapture sret %a define <2 x i64> @__rsAllocationVLoadXImpl_long2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i64>* - %3 = load <2 x i64>* %2, align 8 + %3 = load <2 x i64>, <2 x i64>* %2, align 8 ret <2 x i64> %3 } define void @__rsAllocationVLoadXImpl_ulong4(<4 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i64>* - %3 = load <4 x i64>* %2, align 8 + %3 = load <4 x i64>, <4 x i64>* %2, align 8 store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !48 ret void } define void @__rsAllocationVLoadXImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i64>* - %3 = load <4 x i64>* %2, align 8 + %3 = load <4 x i64>, <4 x i64>* %2, align 8 %4 = bitcast <3 x i64>* %agg.result to <4 x i64>* store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !51 ret void @@ -695,154 +695,154 @@ define void @__rsAllocationVLoadXImpl_ulong3(<3 x i64>* noalias nocapture sret % define <2 x i64> @__rsAllocationVLoadXImpl_ulong2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i64>* - %3 = load <2 x i64>* %2, align 8 + %3 = load <2 x i64>, <2 x i64>* %2, align 8 ret <2 x i64> %3 } define <4 x i32> @__rsAllocationVLoadXImpl_int4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i32>* - %3 = load <4 x i32>* %2, align 4 + %3 = load <4 x i32>, <4 x i32>* %2, align 4 ret <4 x i32> %3 } define <3 x i32> @__rsAllocationVLoadXImpl_int3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x i32>* - %3 = load <3 x i32>* %2, align 4 + %3 = load <3 x i32>, <3 x i32>* %2, align 4 ret <3 x i32> %3 } define <2 x i32> @__rsAllocationVLoadXImpl_int2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i32>* - %3 = load <2 x i32>* %2, align 4 + %3 = load <2 x i32>, <2 x i32>* %2, align 4 ret <2 x i32> %3 } define <4 x i32> @__rsAllocationVLoadXImpl_uint4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i32>* - %3 = load <4 x i32>* %2, align 4 + %3 = load <4 x i32>, <4 x i32>* %2, align 4 ret <4 x i32> %3 } define <3 x i32> @__rsAllocationVLoadXImpl_uint3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x i32>* - %3 = load <3 x i32>* %2, align 4 + %3 = load <3 x i32>, <3 x i32>* %2, align 4 ret <3 x i32> %3 } define <2 x i32> @__rsAllocationVLoadXImpl_uint2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i32>* - %3 = load <2 x i32>* %2, align 4 + %3 = load <2 x i32>, <2 x i32>* %2, align 4 ret <2 x i32> %3 } define <4 x i16> @__rsAllocationVLoadXImpl_short4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i16>* - %3 = load <4 x i16>* %2, align 2 + %3 = load <4 x i16>, <4 x i16>* %2, align 2 ret <4 x i16> %3 } define <3 x i16> @__rsAllocationVLoadXImpl_short3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x i16>* - %3 = load <3 x i16>* %2, align 2 + %3 = load <3 x i16>, <3 x i16>* %2, align 2 ret <3 x i16> %3 } define <2 x i16> @__rsAllocationVLoadXImpl_short2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i16>* - %3 = load <2 x i16>* %2, align 2 + %3 = load <2 x i16>, <2 x i16>* %2, align 2 ret <2 x i16> %3 } define <4 x i16> @__rsAllocationVLoadXImpl_ushort4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i16>* - %3 = load <4 x i16>* %2, align 2 + %3 = load <4 x i16>, <4 x i16>* %2, align 2 ret <4 x i16> %3 } define <3 x i16> @__rsAllocationVLoadXImpl_ushort3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x i16>* - %3 = load <3 x i16>* %2, align 2 + %3 = load <3 x i16>, <3 x i16>* %2, align 2 ret <3 x i16> %3 } define <2 x i16> @__rsAllocationVLoadXImpl_ushort2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i16>* - %3 = load <2 x i16>* %2, align 2 + %3 = load <2 x i16>, <2 x i16>* %2, align 2 ret <2 x i16> %3 } define <4 x i8> @__rsAllocationVLoadXImpl_char4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i8>* - %3 = load <4 x i8>* %2, align 1 + %3 = load <4 x i8>, <4 x i8>* %2, align 1 ret <4 x i8> %3 } define <3 x i8> @__rsAllocationVLoadXImpl_char3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x i8>* - %3 = load <3 x i8>* %2, align 1 + %3 = load <3 x i8>, <3 x i8>* %2, align 1 ret <3 x i8> %3 } define <2 x i8> @__rsAllocationVLoadXImpl_char2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i8>* - %3 = load <2 x i8>* %2, align 1 + %3 = load <2 x i8>, <2 x i8>* %2, align 1 ret <2 x i8> %3 } define <4 x i8> @__rsAllocationVLoadXImpl_uchar4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i8>* - %3 = load <4 x i8>* %2, align 1 + %3 = load <4 x i8>, <4 x i8>* %2, align 1 ret <4 x i8> %3 } define <3 x i8> @__rsAllocationVLoadXImpl_uchar3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x i8>* - %3 = load <3 x i8>* %2, align 1 + %3 = load <3 x i8>, <3 x i8>* %2, align 1 ret <3 x i8> %3 } define <2 x i8> @__rsAllocationVLoadXImpl_uchar2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i8>* - %3 = load <2 x i8>* %2, align 1 + %3 = load <2 x i8>, <2 x i8>* %2, align 1 ret <2 x i8> %3 } define <4 x float> @__rsAllocationVLoadXImpl_float4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x float>* - %3 = load <4 x float>* %2, align 4 + %3 = load <4 x float>, <4 x float>* %2, align 4 ret <4 x float> %3 } define <3 x float> @__rsAllocationVLoadXImpl_float3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x float>* - %3 = load <3 x float>* %2, align 4 + %3 = load <3 x float>, <3 x float>* %2, align 4 ret <3 x float> %3 } define <2 x float> @__rsAllocationVLoadXImpl_float2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x float>* - %3 = load <2 x float>* %2, align 4 + %3 = load <2 x float>, <2 x float>* %2, align 4 ret <2 x float> %3 } define void @__rsAllocationVLoadXImpl_double4(<4 x double>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x double>* - %3 = load <4 x double>* %2, align 8 + %3 = load <4 x double>, <4 x double>* %2, align 8 store <4 x double> %3, <4 x double>* %agg.result, align 32, !tbaa !60 ret void } define void @__rsAllocationVLoadXImpl_double3(<3 x double>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x double>* - %3 = load <4 x double>* %2, align 8 + %3 = load <4 x double>, <4 x double>* %2, align 8 %4 = bitcast <3 x double>* %agg.result to <4 x double>* store <4 x double> %3, <4 x double>* %4, align 32, !tbaa !59 ret void @@ -850,7 +850,7 @@ define void @__rsAllocationVLoadXImpl_double3(<3 x double>* noalias nocapture sr define <2 x double> @__rsAllocationVLoadXImpl_double2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x double>* - %3 = load <2 x double>* %2, align 8 + %3 = load <2 x double>, <2 x double>* %2, align 8 ret <2 x double> %3 } diff --git a/driver/runtime/ll64/allocation.ll b/driver/runtime/ll64/allocation.ll index a956bb23..adb385c4 100644 --- a/driver/runtime/ll64/allocation.ll +++ b/driver/runtime/ll64/allocation.ll @@ -43,7 +43,7 @@ define void @rsSetElementAtImpl_char(%struct.rs_allocation* nocapture readonly % define signext i8 @rsGetElementAtImpl_char(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #2 - %2 = load i8* %1, align 1, !tbaa !21 + %2 = load i8, i8* %1, align 1, !tbaa !21 ret i8 %2 } @@ -59,7 +59,7 @@ define void @rsSetElementAtImpl_char2(%struct.rs_allocation* nocapture readonly define <2 x i8> @rsGetElementAtImpl_char2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i8>* - %3 = load <2 x i8>* %2, align 2, !tbaa !22 + %3 = load <2 x i8>, <2 x i8>* %2, align 2, !tbaa !22 ret <2 x i8> %3 } @@ -76,7 +76,7 @@ define void @rsSetElementAtImpl_char3(%struct.rs_allocation* nocapture readonly define <3 x i8> @rsGetElementAtImpl_char3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i8>* - %3 = load <4 x i8>* %2, align 4, !tbaa !23 + %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !23 %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x i8> %4 } @@ -92,7 +92,7 @@ define void @rsSetElementAtImpl_char4(%struct.rs_allocation* nocapture readonly define <4 x i8> @rsGetElementAtImpl_char4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i8>* - %3 = load <4 x i8>* %2, align 4, !tbaa !24 + %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !24 ret <4 x i8> %3 } @@ -105,7 +105,7 @@ define void @rsSetElementAtImpl_uchar(%struct.rs_allocation* nocapture readonly define zeroext i8 @rsGetElementAtImpl_uchar(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #2 - %2 = load i8* %1, align 1, !tbaa !25 + %2 = load i8, i8* %1, align 1, !tbaa !25 ret i8 %2 } @@ -121,7 +121,7 @@ define void @rsSetElementAtImpl_uchar2(%struct.rs_allocation* nocapture readonly define <2 x i8> @rsGetElementAtImpl_uchar2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i8>* - %3 = load <2 x i8>* %2, align 2, !tbaa !26 + %3 = load <2 x i8>, <2 x i8>* %2, align 2, !tbaa !26 ret <2 x i8> %3 } @@ -138,7 +138,7 @@ define void @rsSetElementAtImpl_uchar3(%struct.rs_allocation* nocapture readonly define <3 x i8> @rsGetElementAtImpl_uchar3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i8>* - %3 = load <4 x i8>* %2, align 4, !tbaa !27 + %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !27 %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x i8> %4 } @@ -154,7 +154,7 @@ define void @rsSetElementAtImpl_uchar4(%struct.rs_allocation* nocapture readonly define <4 x i8> @rsGetElementAtImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i8>* - %3 = load <4 x i8>* %2, align 4, !tbaa !28 + %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !28 ret <4 x i8> %3 } @@ -169,7 +169,7 @@ define void @rsSetElementAtImpl_short(%struct.rs_allocation* nocapture readonly define signext i16 @rsGetElementAtImpl_short(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to i16* - %3 = load i16* %2, align 2, !tbaa !29 + %3 = load i16, i16* %2, align 2, !tbaa !29 ret i16 %3 } @@ -184,7 +184,7 @@ define void @rsSetElementAtImpl_short2(%struct.rs_allocation* nocapture readonly define <2 x i16> @rsGetElementAtImpl_short2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i16>* - %3 = load <2 x i16>* %2, align 4, !tbaa !30 + %3 = load <2 x i16>, <2 x i16>* %2, align 4, !tbaa !30 ret <2 x i16> %3 } @@ -201,7 +201,7 @@ define void @rsSetElementAtImpl_short3(%struct.rs_allocation* nocapture readonly define <3 x i16> @rsGetElementAtImpl_short3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i16>* - %3 = load <4 x i16>* %2, align 8, !tbaa !31 + %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !31 %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x i16> %4 } @@ -217,7 +217,7 @@ define void @rsSetElementAtImpl_short4(%struct.rs_allocation* nocapture readonly define <4 x i16> @rsGetElementAtImpl_short4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i16>* - %3 = load <4 x i16>* %2, align 8, !tbaa !32 + %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !32 ret <4 x i16> %3 } @@ -232,7 +232,7 @@ define void @rsSetElementAtImpl_ushort(%struct.rs_allocation* nocapture readonly define zeroext i16 @rsGetElementAtImpl_ushort(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to i16* - %3 = load i16* %2, align 2, !tbaa !33 + %3 = load i16, i16* %2, align 2, !tbaa !33 ret i16 %3 } @@ -247,7 +247,7 @@ define void @rsSetElementAtImpl_ushort2(%struct.rs_allocation* nocapture readonl define <2 x i16> @rsGetElementAtImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i16>* - %3 = load <2 x i16>* %2, align 4, !tbaa !34 + %3 = load <2 x i16>, <2 x i16>* %2, align 4, !tbaa !34 ret <2 x i16> %3 } @@ -264,7 +264,7 @@ define void @rsSetElementAtImpl_ushort3(%struct.rs_allocation* nocapture readonl define <3 x i16> @rsGetElementAtImpl_ushort3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i16>* - %3 = load <4 x i16>* %2, align 8, !tbaa !35 + %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !35 %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x i16> %4 } @@ -280,7 +280,7 @@ define void @rsSetElementAtImpl_ushort4(%struct.rs_allocation* nocapture readonl define <4 x i16> @rsGetElementAtImpl_ushort4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i16>* - %3 = load <4 x i16>* %2, align 8, !tbaa !36 + %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !36 ret <4 x i16> %3 } @@ -295,7 +295,7 @@ define void @rsSetElementAtImpl_int(%struct.rs_allocation* nocapture readonly %a define i32 @rsGetElementAtImpl_int(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to i32* - %3 = load i32* %2, align 4, !tbaa !37 + %3 = load i32, i32* %2, align 4, !tbaa !37 ret i32 %3 } @@ -310,7 +310,7 @@ define void @rsSetElementAtImpl_int2(%struct.rs_allocation* nocapture readonly % define <2 x i32> @rsGetElementAtImpl_int2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i32>* - %3 = load <2 x i32>* %2, align 8, !tbaa !38 + %3 = load <2 x i32>, <2 x i32>* %2, align 8, !tbaa !38 ret <2 x i32> %3 } @@ -326,7 +326,7 @@ define void @rsSetElementAtImpl_int3(%struct.rs_allocation* nocapture readonly % define <3 x i32> @rsGetElementAtImpl_int3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i32>* - %3 = load <4 x i32>* %2, align 8, !tbaa !39 + %3 = load <4 x i32>, <4 x i32>* %2, align 8, !tbaa !39 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x i32> %4 } @@ -342,7 +342,7 @@ define void @rsSetElementAtImpl_int4(%struct.rs_allocation* nocapture readonly % define <4 x i32> @rsGetElementAtImpl_int4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i32>* - %3 = load <4 x i32>* %2, align 16, !tbaa !40 + %3 = load <4 x i32>, <4 x i32>* %2, align 16, !tbaa !40 ret <4 x i32> %3 } @@ -357,7 +357,7 @@ define void @rsSetElementAtImpl_uint(%struct.rs_allocation* nocapture readonly % define i32 @rsGetElementAtImpl_uint(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to i32* - %3 = load i32* %2, align 4, !tbaa !41 + %3 = load i32, i32* %2, align 4, !tbaa !41 ret i32 %3 } @@ -372,7 +372,7 @@ define void @rsSetElementAtImpl_uint2(%struct.rs_allocation* nocapture readonly define <2 x i32> @rsGetElementAtImpl_uint2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i32>* - %3 = load <2 x i32>* %2, align 8, !tbaa !42 + %3 = load <2 x i32>, <2 x i32>* %2, align 8, !tbaa !42 ret <2 x i32> %3 } @@ -388,7 +388,7 @@ define void @rsSetElementAtImpl_uint3(%struct.rs_allocation* nocapture readonly define <3 x i32> @rsGetElementAtImpl_uint3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i32>* - %3 = load <4 x i32>* %2, align 8, !tbaa !43 + %3 = load <4 x i32>, <4 x i32>* %2, align 8, !tbaa !43 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x i32> %4 } @@ -404,7 +404,7 @@ define void @rsSetElementAtImpl_uint4(%struct.rs_allocation* nocapture readonly define <4 x i32> @rsGetElementAtImpl_uint4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i32>* - %3 = load <4 x i32>* %2, align 16, !tbaa !44 + %3 = load <4 x i32>, <4 x i32>* %2, align 16, !tbaa !44 ret <4 x i32> %3 } @@ -419,7 +419,7 @@ define void @rsSetElementAtImpl_long(%struct.rs_allocation* nocapture readonly % define i64 @rsGetElementAtImpl_long(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to i64* - %3 = load i64* %2, align 8, !tbaa !45 + %3 = load i64, i64* %2, align 8, !tbaa !45 ret i64 %3 } @@ -434,14 +434,14 @@ define void @rsSetElementAtImpl_long2(%struct.rs_allocation* nocapture readonly define <2 x i64> @rsGetElementAtImpl_long2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i64>* - %3 = load <2 x i64>* %2, align 16, !tbaa !46 + %3 = load <2 x i64>, <2 x i64>* %2, align 16, !tbaa !46 ret <2 x i64> %3 } !47 = !{!"long3", !15} define void @rsSetElementAtImpl_long3(%struct.rs_allocation* nocapture readonly %a, <3 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2 - %2 = load <3 x i64>* %val + %2 = load <3 x i64>, <3 x i64>* %val %3 = shufflevector <3 x i64> %2, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> %4 = bitcast i8* %1 to <4 x i64>* store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !47 @@ -451,7 +451,7 @@ define void @rsSetElementAtImpl_long3(%struct.rs_allocation* nocapture readonly define void @rsGetElementAtImpl_long3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i64>* - %3 = load <4 x i64>* %2, align 32 + %3 = load <4 x i64>, <4 x i64>* %2, align 32 %4 = bitcast <3 x i64>* %agg.result to <4 x i64>* store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !47 ret void @@ -460,7 +460,7 @@ define void @rsGetElementAtImpl_long3(<3 x i64>* noalias nocapture sret %agg.res !48 = !{!"long4", !15} define void @rsSetElementAtImpl_long4(%struct.rs_allocation* nocapture readonly %a, <4 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2 - %2 = load <4 x i64>* %val + %2 = load <4 x i64>, <4 x i64>* %val %3 = bitcast i8* %1 to <4 x i64>* store <4 x i64> %2, <4 x i64>* %3, align 32, !tbaa !48 ret void @@ -469,7 +469,7 @@ define void @rsSetElementAtImpl_long4(%struct.rs_allocation* nocapture readonly define void @rsGetElementAtImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i64>* - %3 = load <4 x i64>* %2, align 32, !tbaa !15 + %3 = load <4 x i64>, <4 x i64>* %2, align 32, !tbaa !15 store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !48 ret void } @@ -485,7 +485,7 @@ define void @rsSetElementAtImpl_ulong(%struct.rs_allocation* nocapture readonly define i64 @rsGetElementAtImpl_ulong(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to i64* - %3 = load i64* %2, align 8, !tbaa !49 + %3 = load i64, i64* %2, align 8, !tbaa !49 ret i64 %3 } @@ -500,14 +500,14 @@ define void @rsSetElementAtImpl_ulong2(%struct.rs_allocation* nocapture readonly define <2 x i64> @rsGetElementAtImpl_ulong2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i64>* - %3 = load <2 x i64>* %2, align 16, !tbaa !50 + %3 = load <2 x i64>, <2 x i64>* %2, align 16, !tbaa !50 ret <2 x i64> %3 } !51 = !{!"ulong3", !15} define void @rsSetElementAtImpl_ulong3(%struct.rs_allocation* nocapture readonly %a, <3 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2 - %2 = load <3 x i64>* %val + %2 = load <3 x i64>, <3 x i64>* %val %3 = shufflevector <3 x i64> %2, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> %4 = bitcast i8* %1 to <4 x i64>* store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !47 @@ -517,7 +517,7 @@ define void @rsSetElementAtImpl_ulong3(%struct.rs_allocation* nocapture readonly define void @rsGetElementAtImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i64>* - %3 = load <4 x i64>* %2, align 32 + %3 = load <4 x i64>, <4 x i64>* %2, align 32 %4 = bitcast <3 x i64>* %agg.result to <4 x i64>* store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !51 ret void @@ -526,7 +526,7 @@ define void @rsGetElementAtImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.re !52 = !{!"ulong4", !15} define void @rsSetElementAtImpl_ulong4(%struct.rs_allocation* nocapture readonly %a, <4 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2 - %2 = load <4 x i64>* %val + %2 = load <4 x i64>, <4 x i64>* %val %3 = bitcast i8* %1 to <4 x i64>* store <4 x i64> %2, <4 x i64>* %3, align 32, !tbaa !52 ret void @@ -535,7 +535,7 @@ define void @rsSetElementAtImpl_ulong4(%struct.rs_allocation* nocapture readonly define void @rsGetElementAtImpl_ulong4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i64>* - %3 = load <4 x i64>* %2, align 32, !tbaa !15 + %3 = load <4 x i64>, <4 x i64>* %2, align 32, !tbaa !15 store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !52 ret void } @@ -551,7 +551,7 @@ define void @rsSetElementAtImpl_float(%struct.rs_allocation* nocapture readonly define float @rsGetElementAtImpl_float(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to float* - %3 = load float* %2, align 4, !tbaa !53 + %3 = load float, float* %2, align 4, !tbaa !53 ret float %3 } @@ -566,7 +566,7 @@ define void @rsSetElementAtImpl_float2(%struct.rs_allocation* nocapture readonly define <2 x float> @rsGetElementAtImpl_float2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x float>* - %3 = load <2 x float>* %2, align 8, !tbaa !54 + %3 = load <2 x float>, <2 x float>* %2, align 8, !tbaa !54 ret <2 x float> %3 } @@ -583,7 +583,7 @@ define void @rsSetElementAtImpl_float3(%struct.rs_allocation* nocapture readonly define <3 x float> @rsGetElementAtImpl_float3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x float>* - %3 = load <4 x float>* %2, align 8, !tbaa !55 + %3 = load <4 x float>, <4 x float>* %2, align 8, !tbaa !55 %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x float> %4 } @@ -599,7 +599,7 @@ define void @rsSetElementAtImpl_float4(%struct.rs_allocation* nocapture readonly define <4 x float> @rsGetElementAtImpl_float4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x float>* - %3 = load <4 x float>* %2, align 16, !tbaa !56 + %3 = load <4 x float>, <4 x float>* %2, align 16, !tbaa !56 ret <4 x float> %3 } @@ -614,7 +614,7 @@ define void @rsSetElementAtImpl_double(%struct.rs_allocation* nocapture readonly define double @rsGetElementAtImpl_double(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to double* - %3 = load double* %2, align 8, !tbaa !57 + %3 = load double, double* %2, align 8, !tbaa !57 ret double %3 } @@ -629,14 +629,14 @@ define void @rsSetElementAtImpl_double2(%struct.rs_allocation* nocapture readonl define <2 x double> @rsGetElementAtImpl_double2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x double>* - %3 = load <2 x double>* %2, align 16, !tbaa !58 + %3 = load <2 x double>, <2 x double>* %2, align 16, !tbaa !58 ret <2 x double> %3 } !59 = !{!"double3", !15} define void @rsSetElementAtImpl_double3(%struct.rs_allocation* nocapture readonly %a, <3 x double>* %val, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2 - %2 = load <3 x double>* %val + %2 = load <3 x double>, <3 x double>* %val %3 = shufflevector <3 x double> %2, <3 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> %4 = bitcast i8* %1 to <4 x double>* store <4 x double> %3, <4 x double>* %4, align 32, !tbaa !47 @@ -647,7 +647,7 @@ define void @rsSetElementAtImpl_double3(%struct.rs_allocation* nocapture readonl define void @rsGetElementAtImpl_double3(<3 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x double>* - %3 = load <4 x double>* %2, align 32 + %3 = load <4 x double>, <4 x double>* %2, align 32 %4 = bitcast <3 x double>* %agg.result to <4 x double>* store <4 x double> %3, <4 x double>* %4, align 32, !tbaa !59 ret void @@ -656,7 +656,7 @@ define void @rsGetElementAtImpl_double3(<3 x double>* noalias nocapture sret %ag !60 = !{!"double4", !15} define void @rsSetElementAtImpl_double4(%struct.rs_allocation* nocapture readonly %a, <4 x double>* %val, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2 - %2 = load <4 x double>* %val + %2 = load <4 x double>, <4 x double>* %val %3 = bitcast i8* %1 to <4 x double>* store <4 x double> %2, <4 x double>* %3, align 32, !tbaa !60 ret void @@ -664,7 +664,7 @@ define void @rsSetElementAtImpl_double4(%struct.rs_allocation* nocapture readonl define void @rsGetElementAtImpl_double4(<4 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x double>* - %3 = load <4 x double>* %2, align 32, !tbaa !15 + %3 = load <4 x double>, <4 x double>* %2, align 32, !tbaa !15 store <4 x double> %3, <4 x double>* %agg.result, align 32, !tbaa !60 ret void } @@ -673,210 +673,210 @@ define void @rsGetElementAtImpl_double4(<4 x double>* noalias nocapture sret %ag define void @__rsAllocationVLoadXImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i64>* - %3 = load <4 x i64>* %2, align 8 + %3 = load <4 x i64>, <4 x i64>* %2, align 8 store <4 x i64> %3, <4 x i64>* %agg.result ret void } define void @__rsAllocationVLoadXImpl_long3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x i64>* - %3 = load <3 x i64>* %2, align 8 + %3 = load <3 x i64>, <3 x i64>* %2, align 8 store <3 x i64> %3, <3 x i64>* %agg.result ret void } define <2 x i64> @__rsAllocationVLoadXImpl_long2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i64>* - %3 = load <2 x i64>* %2, align 8 + %3 = load <2 x i64>, <2 x i64>* %2, align 8 ret <2 x i64> %3 } define void @__rsAllocationVLoadXImpl_ulong4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i64>* - %3 = load <4 x i64>* %2, align 8 + %3 = load <4 x i64>, <4 x i64>* %2, align 8 store <4 x i64> %3, <4 x i64>* %agg.result ret void } define void @__rsAllocationVLoadXImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x i64>* - %3 = load <3 x i64>* %2, align 8 + %3 = load <3 x i64>, <3 x i64>* %2, align 8 store <3 x i64> %3, <3 x i64>* %agg.result ret void } define <2 x i64> @__rsAllocationVLoadXImpl_ulong2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i64>* - %3 = load <2 x i64>* %2, align 8 + %3 = load <2 x i64>, <2 x i64>* %2, align 8 ret <2 x i64> %3 } define <4 x i32> @__rsAllocationVLoadXImpl_int4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i32>* - %3 = load <4 x i32>* %2, align 4 + %3 = load <4 x i32>, <4 x i32>* %2, align 4 ret <4 x i32> %3 } define <3 x i32> @__rsAllocationVLoadXImpl_int3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x i32>* - %3 = load <3 x i32>* %2, align 4 + %3 = load <3 x i32>, <3 x i32>* %2, align 4 ret <3 x i32> %3 } define <2 x i32> @__rsAllocationVLoadXImpl_int2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i32>* - %3 = load <2 x i32>* %2, align 4 + %3 = load <2 x i32>, <2 x i32>* %2, align 4 ret <2 x i32> %3 } define <4 x i32> @__rsAllocationVLoadXImpl_uint4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i32>* - %3 = load <4 x i32>* %2, align 4 + %3 = load <4 x i32>, <4 x i32>* %2, align 4 ret <4 x i32> %3 } define <3 x i32> @__rsAllocationVLoadXImpl_uint3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x i32>* - %3 = load <3 x i32>* %2, align 4 + %3 = load <3 x i32>, <3 x i32>* %2, align 4 ret <3 x i32> %3 } define <2 x i32> @__rsAllocationVLoadXImpl_uint2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i32>* - %3 = load <2 x i32>* %2, align 4 + %3 = load <2 x i32>, <2 x i32>* %2, align 4 ret <2 x i32> %3 } define <4 x i16> @__rsAllocationVLoadXImpl_short4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i16>* - %3 = load <4 x i16>* %2, align 2 + %3 = load <4 x i16>, <4 x i16>* %2, align 2 ret <4 x i16> %3 } define <3 x i16> @__rsAllocationVLoadXImpl_short3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x i16>* - %3 = load <3 x i16>* %2, align 2 + %3 = load <3 x i16>, <3 x i16>* %2, align 2 ret <3 x i16> %3 } define <2 x i16> @__rsAllocationVLoadXImpl_short2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i16>* - %3 = load <2 x i16>* %2, align 2 + %3 = load <2 x i16>, <2 x i16>* %2, align 2 ret <2 x i16> %3 } define <4 x i16> @__rsAllocationVLoadXImpl_ushort4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i16>* - %3 = load <4 x i16>* %2, align 2 + %3 = load <4 x i16>, <4 x i16>* %2, align 2 ret <4 x i16> %3 } define <3 x i16> @__rsAllocationVLoadXImpl_ushort3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x i16>* - %3 = load <3 x i16>* %2, align 2 + %3 = load <3 x i16>, <3 x i16>* %2, align 2 ret <3 x i16> %3 } define <2 x i16> @__rsAllocationVLoadXImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i16>* - %3 = load <2 x i16>* %2, align 2 + %3 = load <2 x i16>, <2 x i16>* %2, align 2 ret <2 x i16> %3 } define <4 x i8> @__rsAllocationVLoadXImpl_char4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i8>* - %3 = load <4 x i8>* %2, align 1 + %3 = load <4 x i8>, <4 x i8>* %2, align 1 ret <4 x i8> %3 } define <3 x i8> @__rsAllocationVLoadXImpl_char3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x i8>* - %3 = load <3 x i8>* %2, align 1 + %3 = load <3 x i8>, <3 x i8>* %2, align 1 ret <3 x i8> %3 } define <2 x i8> @__rsAllocationVLoadXImpl_char2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i8>* - %3 = load <2 x i8>* %2, align 1 + %3 = load <2 x i8>, <2 x i8>* %2, align 1 ret <2 x i8> %3 } define <4 x i8> @__rsAllocationVLoadXImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i8>* - %3 = load <4 x i8>* %2, align 1 + %3 = load <4 x i8>, <4 x i8>* %2, align 1 ret <4 x i8> %3 } define <3 x i8> @__rsAllocationVLoadXImpl_uchar3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x i8>* - %3 = load <3 x i8>* %2, align 1 + %3 = load <3 x i8>, <3 x i8>* %2, align 1 ret <3 x i8> %3 } define <2 x i8> @__rsAllocationVLoadXImpl_uchar2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x i8>* - %3 = load <2 x i8>* %2, align 1 + %3 = load <2 x i8>, <2 x i8>* %2, align 1 ret <2 x i8> %3 } define <4 x float> @__rsAllocationVLoadXImpl_float4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x float>* - %3 = load <4 x float>* %2, align 4 + %3 = load <4 x float>, <4 x float>* %2, align 4 ret <4 x float> %3 } define <3 x float> @__rsAllocationVLoadXImpl_float3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x float>* - %3 = load <3 x float>* %2, align 4 + %3 = load <3 x float>, <3 x float>* %2, align 4 ret <3 x float> %3 } define <2 x float> @__rsAllocationVLoadXImpl_float2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x float>* - %3 = load <2 x float>* %2, align 4 + %3 = load <2 x float>, <2 x float>* %2, align 4 ret <2 x float> %3 } define void @__rsAllocationVLoadXImpl_double4(<4 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x double>* - %3 = load <4 x double>* %2, align 8 + %3 = load <4 x double>, <4 x double>* %2, align 8 store <4 x double> %3, <4 x double>* %agg.result ret void } define void @__rsAllocationVLoadXImpl_double3(<3 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <3 x double>* - %3 = load <3 x double>* %2, align 8 + %3 = load <3 x double>, <3 x double>* %2, align 8 store <3 x double> %3, <3 x double>* %agg.result ret void } define <2 x double> @__rsAllocationVLoadXImpl_double2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <2 x double>* - %3 = load <2 x double>* %2, align 8 + %3 = load <2 x double>, <2 x double>* %2, align 8 ret <2 x double> %3 } define void @__rsAllocationVStoreXImpl_long4(%struct.rs_allocation* nocapture readonly %a, <4 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 - %2 = load <4 x i64>* %val + %2 = load <4 x i64>, <4 x i64>* %val %3 = bitcast i8* %1 to <4 x i64>* store <4 x i64> %2, <4 x i64>* %3, align 8 ret void } define void @__rsAllocationVStoreXImpl_long3(%struct.rs_allocation* nocapture readonly %a, <3 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 - %2 = load <3 x i64>* %val + %2 = load <3 x i64>, <3 x i64>* %val %3 = bitcast i8* %1 to <3 x i64>* store <3 x i64> %2, <3 x i64>* %3, align 8 ret void @@ -890,14 +890,14 @@ define void @__rsAllocationVStoreXImpl_long2(%struct.rs_allocation* nocapture re define void @__rsAllocationVStoreXImpl_ulong4(%struct.rs_allocation* nocapture readonly %a, <4 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 - %2 = load <4 x i64>* %val + %2 = load <4 x i64>, <4 x i64>* %val %3 = bitcast i8* %1 to <4 x i64>* store <4 x i64> %2, <4 x i64>* %3, align 8 ret void } define void @__rsAllocationVStoreXImpl_ulong3(%struct.rs_allocation* nocapture readonly %a, <3 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 - %2 = load <3 x i64>* %val + %2 = load <3 x i64>, <3 x i64>* %val %3 = bitcast i8* %1 to <3 x i64>* store <3 x i64> %2, <3 x i64>* %3, align 8 ret void @@ -1058,14 +1058,14 @@ define void @__rsAllocationVStoreXImpl_float2(%struct.rs_allocation* nocapture r define void @__rsAllocationVStoreXImpl_double4(%struct.rs_allocation* nocapture readonly %a, <4 x double>* %val, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 - %2 = load <4 x double>* %val + %2 = load <4 x double>, <4 x double>* %val %3 = bitcast i8* %1 to <4 x double>* store <4 x double> %2, <4 x double>* %3, align 8 ret void } define void @__rsAllocationVStoreXImpl_double3(%struct.rs_allocation* nocapture readonly %a, <3 x double>* %val, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 - %2 = load <3 x double>* %val + %2 = load <3 x double>, <3 x double>* %val %3 = bitcast i8* %1 to <3 x double>* store <3 x double> %2, <3 x double>* %3, align 8 ret void |