aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-10-14 04:18:08 +0000
committerCraig Topper <craig.topper@intel.com>2017-10-14 04:18:08 +0000
commit3207cbf119ffa421f24618234bea32825039b3b7 (patch)
treef75c0413363b06c8679f55ce0f1d07fdf684532c
parent3ea3a006fe54de8a8ad971b994d1bec4c34c25da (diff)
downloadllvm-3207cbf119ffa421f24618234bea32825039b3b7.tar.gz
[X86] Add AVX512 flavors of VCVTDQ2PD plus VCVTUDQ2PD to the load folding tables.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315796 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp6
-rw-r--r--test/CodeGen/X86/stack-folding-fp-avx512.ll16
-rw-r--r--test/CodeGen/X86/stack-folding-fp-avx512vl.ll34
3 files changed, 56 insertions, 0 deletions
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index fdac9345d02..b9f78d3ad43 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -929,6 +929,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// AVX-512 foldable instructions
{ X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE },
{ X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE },
+ { X86::VCVTDQ2PDZrr, X86::VCVTDQ2PDZrm, 0 },
+ { X86::VCVTUDQ2PDZrr, X86::VCVTUDQ2PDZrm, 0 },
{ X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
{ X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 },
{ X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 },
@@ -988,6 +990,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// AVX-512 foldable instructions (256-bit versions)
{ X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
{ X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
+ { X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rm, 0 },
+ { X86::VCVTUDQ2PDZ256rr, X86::VCVTUDQ2PDZ256rm, 0 },
{ X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
{ X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 },
{ X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
@@ -1039,6 +1043,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// AVX-512 foldable instructions (128-bit versions)
{ X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
+ { X86::VCVTDQ2PDZ128rr, X86::VCVTDQ2PDZ128rm, TB_NO_REVERSE },
+ { X86::VCVTUDQ2PDZ128rr, X86::VCVTUDQ2PDZ128rm, TB_NO_REVERSE },
{ X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
{ X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 },
{ X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
diff --git a/test/CodeGen/X86/stack-folding-fp-avx512.ll b/test/CodeGen/X86/stack-folding-fp-avx512.ll
index 7a6d3b4713e..e22e1a28e95 100644
--- a/test/CodeGen/X86/stack-folding-fp-avx512.ll
+++ b/test/CodeGen/X86/stack-folding-fp-avx512.ll
@@ -184,6 +184,22 @@ define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) {
ret <4 x float> %5
}
+define <8 x double> @stack_fold_cvtdq2pd(<8 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtdq2pd
+ ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = sitofp <8 x i32> %a0 to <8 x double>
+ ret <8 x double> %2
+}
+
+define <8 x double> @stack_fold_cvtudq2pd(<8 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtudq2pd
+ ;CHECK: vcvtudq2pd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = uitofp <8 x i32> %a0 to <8 x double>
+ ret <8 x double> %2
+}
+
define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
;CHECK-LABEL: stack_fold_insertps
;CHECK: vinsertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
diff --git a/test/CodeGen/X86/stack-folding-fp-avx512vl.ll b/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
index 292829a01cb..331af4b6799 100644
--- a/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
+++ b/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
@@ -216,6 +216,40 @@ define <8 x float> @stack_fold_divps_ymm(<8 x float> %a0, <8 x float> %a1) {
ret <8 x float> %2
}
+define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtdq2pd
+ ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %3 = sitofp <2 x i32> %2 to <2 x double>
+ ret <2 x double> %3
+}
+
+define <4 x double> @stack_fold_cvtdq2pd_ymm(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtdq2pd_ymm
+ ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = sitofp <4 x i32> %a0 to <4 x double>
+ ret <4 x double> %2
+}
+
+define <2 x double> @stack_fold_cvtudq2pd(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtudq2pd
+ ;CHECK: vcvtudq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %3 = uitofp <2 x i32> %2 to <2 x double>
+ ret <2 x double> %3
+}
+
+define <4 x double> @stack_fold_cvtudq2pd_ymm(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtudq2pd_ymm
+ ;CHECK: vcvtudq2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = uitofp <4 x i32> %a0 to <4 x double>
+ ret <4 x double> %2
+}
+
define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 {
;CHECK-LABEL: stack_fold_maxpd
;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload