From 6d08abd8ca4485a9a2fa070c6b5bfa7fec2e5a86 Mon Sep 17 00:00:00 2001 From: Yi Kong Date: Thu, 18 Oct 2018 14:05:58 -0700 Subject: Revert "[X86] Fix register resizings for inline assembly register operands." This reverts commit 28e4a69190d84f8ac6f5386b42bdd1060b95d834. Change-Id: I4fec60729c93632530b89bc30cafeac960debfc9 --- lib/Target/X86/X86ISelLowering.cpp | 36 +-- lib/Target/X86/X86RegisterInfo.td | 10 - test/CodeGen/X86/atomic_mi.ll | 2 +- test/CodeGen/X86/avx512-regcall-Mask.ll | 160 ++++++------- test/CodeGen/X86/physreg-pairs-error.ll | 12 - test/CodeGen/X86/physreg-pairs.ll | 165 ------------- .../X86/unfold-masked-merge-vector-variablemask.ll | 256 +++++++++++---------- 7 files changed, 220 insertions(+), 421 deletions(-) delete mode 100644 test/CodeGen/X86/physreg-pairs-error.ll delete mode 100644 test/CodeGen/X86/physreg-pairs.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 38d3a30cb19..2addda405c6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -41484,36 +41484,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, Size == 8 ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass) : Size == 16 ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass) : Size == 32 ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass) - : Size == 64 ? (is64Bit ? &X86::GR64RegClass : nullptr) - : nullptr; - if (Size == 64 && !is64Bit) { - // Model GCC's behavior here and select a fixed pair of 32-bit - // registers. - switch (Res.first) { - case X86::EAX: - return std::make_pair(X86::EAX, &X86::GR32_ADRegClass); - case X86::EDX: - return std::make_pair(X86::EDX, &X86::GR32_DCRegClass); - case X86::ECX: - return std::make_pair(X86::ECX, &X86::GR32_CBRegClass); - case X86::EBX: - return std::make_pair(X86::EBX, &X86::GR32_BSIRegClass); - case X86::ESI: - return std::make_pair(X86::ESI, &X86::GR32_SIDIRegClass); - case X86::EDI: - return std::make_pair(X86::EDI, &X86::GR32_DIBPRegClass); - case X86::EBP: - return std::make_pair(X86::EBP, &X86::GR32_BPSPRegClass); - default: - return std::make_pair(0, nullptr); - } - } - if (RC && RC->contains(DestReg)) - return std::make_pair(DestReg, RC); - return Res; + : &X86::GR64RegClass; + if (RC->contains(DestReg)) + Res = std::make_pair(DestReg, RC); + } else { + // No register found/type mismatch. + Res.first = 0; + Res.second = nullptr; } - // No register found/type mismatch. - return std::make_pair(0, nullptr); } else if (isFRClass(*Class)) { // Handle references to XMM physical registers that got mapped into the // wrong class. This can happen with constraints like {xmm0} where the diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 0c1b05fd3ab..1e5c84badc4 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -500,16 +500,6 @@ def LOW32_ADDR_ACCESS_RBP : RegisterClass<"X86", [i32], 32, def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>; def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>; -// Classes to support the 64-bit assembler constraint tied to a fixed -// register in 32-bit mode. The second register is always the next in -// the list. Wrap around causes an error. -def GR32_DC : RegisterClass<"X86", [i32], 32, (add EDX, ECX)>; -def GR32_CB : RegisterClass<"X86", [i32], 32, (add ECX, EBX)>; -def GR32_BSI : RegisterClass<"X86", [i32], 32, (add EBX, ESI)>; -def GR32_SIDI : RegisterClass<"X86", [i32], 32, (add ESI, EDI)>; -def GR32_DIBP : RegisterClass<"X86", [i32], 32, (add EDI, EBP)>; -def GR32_BPSP : RegisterClass<"X86", [i32], 32, (add EBP, ESP)>; - // Scalar SSE2 floating point registers. def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>; diff --git a/test/CodeGen/X86/atomic_mi.ll b/test/CodeGen/X86/atomic_mi.ll index f0d2c1596f8..308e138acd8 100644 --- a/test/CodeGen/X86/atomic_mi.ll +++ b/test/CodeGen/X86/atomic_mi.ll @@ -2245,11 +2245,11 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) { ; X32-NEXT: .cfi_offset %edi, -16 ; X32-NEXT: .cfi_offset %ebx, -12 ; X32-NEXT: movl 20(%ebp), %esi +; X32-NEXT: movl 8(%ebp), %edi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: movl 8(%ebp), %edi ; X32-NEXT: lock cmpxchg8b (%edi,%esi,8) ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) diff --git a/test/CodeGen/X86/avx512-regcall-Mask.ll b/test/CodeGen/X86/avx512-regcall-Mask.ll index 41dfe16402a..68009744d41 100644 --- a/test/CodeGen/X86/avx512-regcall-Mask.ll +++ b/test/CodeGen/X86/avx512-regcall-Mask.ll @@ -129,9 +129,9 @@ define i64 @caller_argv64i1() #0 { ; WIN64-NEXT: .seh_pushreg 7 ; WIN64-NEXT: subq $48, %rsp ; WIN64-NEXT: .seh_stackalloc 48 -; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 7, 32 -; WIN64-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm6, {{[0-9]+}}(%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 6, 16 ; WIN64-NEXT: .seh_endprologue ; WIN64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002 @@ -139,6 +139,7 @@ define i64 @caller_argv64i1() #0 { ; WIN64-NEXT: movq %rax, %rcx ; WIN64-NEXT: movq %rax, %rdx ; WIN64-NEXT: movq %rax, %rdi +; WIN64-NEXT: movq %rax, %rsi ; WIN64-NEXT: movq %rax, %r8 ; WIN64-NEXT: movq %rax, %r9 ; WIN64-NEXT: movq %rax, %r10 @@ -146,10 +147,9 @@ define i64 @caller_argv64i1() #0 { ; WIN64-NEXT: movq %rax, %r12 ; WIN64-NEXT: movq %rax, %r14 ; WIN64-NEXT: movq %rax, %r15 -; WIN64-NEXT: movq %rax, %rsi ; WIN64-NEXT: callq test_argv64i1 -; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload -; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm6 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload ; WIN64-NEXT: addq $48, %rsp ; WIN64-NEXT: popq %rdi ; WIN64-NEXT: popq %rsi @@ -181,13 +181,13 @@ define i64 @caller_argv64i1() #0 { ; LINUXOSX64-NEXT: movq %rax, %rcx ; LINUXOSX64-NEXT: movq %rax, %rdx ; LINUXOSX64-NEXT: movq %rax, %rdi +; LINUXOSX64-NEXT: movq %rax, %rsi ; LINUXOSX64-NEXT: movq %rax, %r8 ; LINUXOSX64-NEXT: movq %rax, %r9 ; LINUXOSX64-NEXT: movq %rax, %r12 ; LINUXOSX64-NEXT: movq %rax, %r13 ; LINUXOSX64-NEXT: movq %rax, %r14 ; LINUXOSX64-NEXT: movq %rax, %r15 -; LINUXOSX64-NEXT: movq %rax, %rsi ; LINUXOSX64-NEXT: pushq %rax ; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8 ; LINUXOSX64-NEXT: pushq %rax @@ -249,7 +249,7 @@ define <64 x i1> @caller_retv64i1() #0 { ; WIN64-NEXT: .seh_pushreg 7 ; WIN64-NEXT: subq $40, %rsp ; WIN64-NEXT: .seh_stackalloc 40 -; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 7, 16 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 6, 0 @@ -258,7 +258,7 @@ define <64 x i1> @caller_retv64i1() #0 { ; WIN64-NEXT: kmovq %rax, %k0 ; WIN64-NEXT: vpmovm2b %k0, %zmm0 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload -; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload ; WIN64-NEXT: addq $40, %rsp ; WIN64-NEXT: popq %rdi ; WIN64-NEXT: popq %rsi @@ -289,9 +289,9 @@ define x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %esp ; X32-NEXT: subl $72, %esp -; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill ; X32-NEXT: kmovd %edx, %k0 ; X32-NEXT: kmovd %ecx, %k1 @@ -304,9 +304,9 @@ define x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> ; X32-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2 ; X32-NEXT: calll _test_argv32i1helper ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload -; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload -; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload -; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload ; X32-NEXT: addl $72, %esp ; X32-NEXT: popl %esp ; X32-NEXT: vzeroupper @@ -349,13 +349,13 @@ define x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> ; LINUXOSX64-NEXT: pushq %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 ; LINUXOSX64-NEXT: subq $128, %rsp -; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 ; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 @@ -378,13 +378,13 @@ define x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> ; LINUXOSX64-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2 ; LINUXOSX64-NEXT: callq test_argv32i1helper ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload ; LINUXOSX64-NEXT: addq $128, %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 ; LINUXOSX64-NEXT: popq %rsp @@ -414,7 +414,7 @@ define i32 @caller_argv32i1() #0 { ; WIN64-NEXT: .seh_pushreg 7 ; WIN64-NEXT: subq $40, %rsp ; WIN64-NEXT: .seh_stackalloc 40 -; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 7, 16 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 6, 0 @@ -424,7 +424,7 @@ define i32 @caller_argv32i1() #0 { ; WIN64-NEXT: movl $1, %edx ; WIN64-NEXT: callq test_argv32i1 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload -; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload ; WIN64-NEXT: addq $40, %rsp ; WIN64-NEXT: popq %rdi ; WIN64-NEXT: popq %rsi @@ -481,7 +481,7 @@ define i32 @caller_retv32i1() #0 { ; WIN64-NEXT: .seh_pushreg 7 ; WIN64-NEXT: subq $40, %rsp ; WIN64-NEXT: .seh_stackalloc 40 -; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 7, 16 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 6, 0 @@ -489,7 +489,7 @@ define i32 @caller_retv32i1() #0 { ; WIN64-NEXT: callq test_retv32i1 ; WIN64-NEXT: incl %eax ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload -; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload ; WIN64-NEXT: addq $40, %rsp ; WIN64-NEXT: popq %rdi ; WIN64-NEXT: popq %rsi @@ -521,9 +521,9 @@ define x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> ; X32: # %bb.0: ; X32-NEXT: pushl %esp ; X32-NEXT: subl $72, %esp -; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill ; X32-NEXT: kmovd %edx, %k0 ; X32-NEXT: kmovd %ecx, %k1 @@ -537,9 +537,9 @@ define x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> ; X32-NEXT: vzeroupper ; X32-NEXT: calll _test_argv16i1helper ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload -; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload -; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload -; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload ; X32-NEXT: addl $72, %esp ; X32-NEXT: popl %esp ; X32-NEXT: retl @@ -581,13 +581,13 @@ define x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> ; LINUXOSX64-NEXT: pushq %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 ; LINUXOSX64-NEXT: subq $128, %rsp -; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 ; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 @@ -611,13 +611,13 @@ define x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> ; LINUXOSX64-NEXT: vzeroupper ; LINUXOSX64-NEXT: callq test_argv16i1helper ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload ; LINUXOSX64-NEXT: addq $128, %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 ; LINUXOSX64-NEXT: popq %rsp @@ -645,7 +645,7 @@ define i16 @caller_argv16i1() #0 { ; WIN64-NEXT: .seh_pushreg 7 ; WIN64-NEXT: subq $40, %rsp ; WIN64-NEXT: .seh_stackalloc 40 -; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 7, 16 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 6, 0 @@ -655,7 +655,7 @@ define i16 @caller_argv16i1() #0 { ; WIN64-NEXT: movl $1, %edx ; WIN64-NEXT: callq test_argv16i1 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload -; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload ; WIN64-NEXT: addq $40, %rsp ; WIN64-NEXT: popq %rdi ; WIN64-NEXT: popq %rsi @@ -714,7 +714,7 @@ define i16 @caller_retv16i1() #0 { ; WIN64-NEXT: .seh_pushreg 7 ; WIN64-NEXT: subq $40, %rsp ; WIN64-NEXT: .seh_stackalloc 40 -; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 7, 16 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 6, 0 @@ -724,7 +724,7 @@ define i16 @caller_retv16i1() #0 { ; WIN64-NEXT: incl %eax ; WIN64-NEXT: # kill: def $ax killed $ax killed $eax ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload -; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload ; WIN64-NEXT: addq $40, %rsp ; WIN64-NEXT: popq %rdi ; WIN64-NEXT: popq %rsi @@ -758,9 +758,9 @@ define x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) ; X32: # %bb.0: ; X32-NEXT: pushl %esp ; X32-NEXT: subl $72, %esp -; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill ; X32-NEXT: kmovd %edx, %k0 ; X32-NEXT: kmovd %ecx, %k1 @@ -774,9 +774,9 @@ define x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) ; X32-NEXT: vzeroupper ; X32-NEXT: calll _test_argv8i1helper ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload -; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload -; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload -; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload +; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload ; X32-NEXT: addl $72, %esp ; X32-NEXT: popl %esp ; X32-NEXT: retl @@ -818,13 +818,13 @@ define x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) ; LINUXOSX64-NEXT: pushq %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 ; LINUXOSX64-NEXT: subq $128, %rsp -; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 ; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 @@ -848,13 +848,13 @@ define x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) ; LINUXOSX64-NEXT: vzeroupper ; LINUXOSX64-NEXT: callq test_argv8i1helper ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload ; LINUXOSX64-NEXT: addq $128, %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 ; LINUXOSX64-NEXT: popq %rsp @@ -882,7 +882,7 @@ define i8 @caller_argv8i1() #0 { ; WIN64-NEXT: .seh_pushreg 7 ; WIN64-NEXT: subq $40, %rsp ; WIN64-NEXT: .seh_stackalloc 40 -; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 7, 16 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 6, 0 @@ -892,7 +892,7 @@ define i8 @caller_argv8i1() #0 { ; WIN64-NEXT: movl $1, %edx ; WIN64-NEXT: callq test_argv8i1 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload -; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload ; WIN64-NEXT: addq $40, %rsp ; WIN64-NEXT: popq %rdi ; WIN64-NEXT: popq %rsi @@ -953,7 +953,7 @@ define <8 x i1> @caller_retv8i1() #0 { ; WIN64-NEXT: .seh_pushreg 7 ; WIN64-NEXT: subq $40, %rsp ; WIN64-NEXT: .seh_stackalloc 40 -; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 7, 16 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 6, 0 @@ -964,7 +964,7 @@ define <8 x i1> @caller_retv8i1() #0 { ; WIN64-NEXT: vpmovm2w %k0, %zmm0 ; WIN64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload -; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload ; WIN64-NEXT: addq $40, %rsp ; WIN64-NEXT: popq %rdi ; WIN64-NEXT: popq %rsi diff --git a/test/CodeGen/X86/physreg-pairs-error.ll b/test/CodeGen/X86/physreg-pairs-error.ll deleted file mode 100644 index 3ee52023b61..00000000000 --- a/test/CodeGen/X86/physreg-pairs-error.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: not llc -mtriple=i386-unknown-linux-gnu -o - %s 2>&1 | FileCheck %s - -; CHECK: error: couldn't allocate input reg for constraint '{esp}' -define dso_local i64 @test_esp(i64 %in) local_unnamed_addr nounwind { -entry: - %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{esp},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895) - %conv = trunc i64 %0 to i32 - %add = add nsw i32 %conv, 3 - %conv1 = sext i32 %add to i64 - ret i64 %conv1 -} - diff --git a/test/CodeGen/X86/physreg-pairs.ll b/test/CodeGen/X86/physreg-pairs.ll deleted file mode 100644 index 5e1d430311a..00000000000 --- a/test/CodeGen/X86/physreg-pairs.ll +++ /dev/null @@ -1,165 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=i386-unknown-linux-gnu -o - %s | FileCheck %s - -; To match GCC's behavior in assigning 64-bit values to a 32-bit -; register, we bind the a subsequence of 2 registers starting with the -; explicitly given register from the following sequence: EAX, EDX, -; ECX, EBX, ESI, EDI, EBP, ESP, to the value. There is no wrapping -; from the sequence, so this will fail given ESP. - -define dso_local i64 @test_eax(i64 %in) local_unnamed_addr nounwind { -; CHECK-LABEL: test_eax: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl $-1985229329, %eax # imm = 0x89ABCDEF -; CHECK-NEXT: movl $19088743, %edx # imm = 0x1234567 -; CHECK-NEXT: #APP -; CHECK-NEXT: movl %eax, %eax -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: addl $3, %eax -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: sarl $31, %edx -; CHECK-NEXT: retl -entry: - %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{eax},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895) - %conv = trunc i64 %0 to i32 - %add = add nsw i32 %conv, 3 - %conv1 = sext i32 %add to i64 - ret i64 %conv1 -} - -define dso_local i64 @test_edx(i64 %in) local_unnamed_addr nounwind { -; CHECK-LABEL: test_edx: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl $-1985229329, %edx # imm = 0x89ABCDEF -; CHECK-NEXT: movl $19088743, %ecx # imm = 0x1234567 -; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: addl $3, %eax -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: sarl $31, %edx -; CHECK-NEXT: retl -entry: - %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{edx},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895) - %conv = trunc i64 %0 to i32 - %add = add nsw i32 %conv, 3 - %conv1 = sext i32 %add to i64 - ret i64 %conv1 -} - -define dso_local i64 @test_ecx(i64 %in) local_unnamed_addr nounwind { -; CHECK-LABEL: test_ecx: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushl %ebx -; CHECK-NEXT: movl $-1985229329, %ecx # imm = 0x89ABCDEF -; CHECK-NEXT: movl $19088743, %ebx # imm = 0x1234567 -; CHECK-NEXT: #APP -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: addl $3, %eax -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: sarl $31, %edx -; CHECK-NEXT: popl %ebx -; CHECK-NEXT: retl -entry: - %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{ecx},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895) - %conv = trunc i64 %0 to i32 - %add = add nsw i32 %conv, 3 - %conv1 = sext i32 %add to i64 - ret i64 %conv1 -} - -define dso_local i64 @test_ebx(i64 %in) local_unnamed_addr nounwind { -; CHECK-LABEL: test_ebx: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushl %ebx -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movl $-1985229329, %ebx # imm = 0x89ABCDEF -; CHECK-NEXT: movl $19088743, %esi # imm = 0x1234567 -; CHECK-NEXT: #APP -; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: addl $3, %eax -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: sarl $31, %edx -; CHECK-NEXT: popl %esi -; CHECK-NEXT: popl %ebx -; CHECK-NEXT: retl -entry: - %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{ebx},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895) - %conv = trunc i64 %0 to i32 - %add = add nsw i32 %conv, 3 - %conv1 = sext i32 %add to i64 - ret i64 %conv1 -} - -define dso_local i64 @test_esi(i64 %in) local_unnamed_addr nounwind { -; CHECK-LABEL: test_esi: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushl %edi -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movl $-1985229329, %esi # imm = 0x89ABCDEF -; CHECK-NEXT: movl $19088743, %edi # imm = 0x1234567 -; CHECK-NEXT: #APP -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: addl $3, %eax -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: sarl $31, %edx -; CHECK-NEXT: popl %esi -; CHECK-NEXT: popl %edi -; CHECK-NEXT: retl -entry: - %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{esi},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895) - %conv = trunc i64 %0 to i32 - %add = add nsw i32 %conv, 3 - %conv1 = sext i32 %add to i64 - ret i64 %conv1 -} - -define dso_local i64 @test_edi(i64 %in) local_unnamed_addr nounwind { -; CHECK-LABEL: test_edi: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushl %ebp -; CHECK-NEXT: pushl %edi -; CHECK-NEXT: movl $-1985229329, %edi # imm = 0x89ABCDEF -; CHECK-NEXT: movl $19088743, %ebp # imm = 0x1234567 -; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: addl $3, %eax -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: sarl $31, %edx -; CHECK-NEXT: popl %edi -; CHECK-NEXT: popl %ebp -; CHECK-NEXT: retl -entry: - %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{edi},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895) - %conv = trunc i64 %0 to i32 - %add = add nsw i32 %conv, 3 - %conv1 = sext i32 %add to i64 - ret i64 %conv1 -} - -define dso_local i64 @test_ebp(i64 %in) local_unnamed_addr nounwind { -; CHECK-LABEL: test_ebp: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushl %ebp -; CHECK-NEXT: movl $19088743, %esp # imm = 0x1234567 -; CHECK-NEXT: movl $-1985229329, %ebp # imm = 0x89ABCDEF -; CHECK-NEXT: #APP -; CHECK-NEXT: movl %ebp, %eax -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: addl $3, %eax -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: sarl $31, %edx -; CHECK-NEXT: popl %ebp -; CHECK-NEXT: retl -entry: - %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{ebp},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895) - %conv = trunc i64 %0 to i32 - %add = add nsw i32 %conv, 3 - %conv1 = sext i32 %add to i64 - ret i64 %conv1 -} - diff --git a/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll b/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll index 4061f47aa1f..44759ba86c1 100644 --- a/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll +++ b/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll @@ -1354,6 +1354,8 @@ define <32 x i8> @out_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) n ; CHECK-BASELINE-NEXT: movq %rcx, %r15 ; CHECK-BASELINE-NEXT: movq %rsi, %r14 ; CHECK-BASELINE-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-BASELINE-NEXT: movb 15(%rcx), %al +; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 16(%rcx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 17(%rcx), %al @@ -1365,11 +1367,11 @@ define <32 x i8> @out_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) n ; CHECK-BASELINE-NEXT: movb 20(%rcx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 21(%rcx), %r12b -; CHECK-BASELINE-NEXT: movb 22(%rcx), %r9b -; CHECK-BASELINE-NEXT: movb 23(%rcx), %r10b -; CHECK-BASELINE-NEXT: movb 24(%rcx), %r11b -; CHECK-BASELINE-NEXT: movb 25(%rcx), %bpl -; CHECK-BASELINE-NEXT: movb 26(%rcx), %r13b +; CHECK-BASELINE-NEXT: movb 22(%rcx), %r10b +; CHECK-BASELINE-NEXT: movb 23(%rcx), %r11b +; CHECK-BASELINE-NEXT: movb 24(%rcx), %bpl +; CHECK-BASELINE-NEXT: movb 25(%rcx), %r13b +; CHECK-BASELINE-NEXT: movb 26(%rcx), %r9b ; CHECK-BASELINE-NEXT: movb 27(%rcx), %r8b ; CHECK-BASELINE-NEXT: movb 28(%rcx), %dil ; CHECK-BASELINE-NEXT: movb 29(%rcx), %sil @@ -1406,35 +1408,35 @@ define <32 x i8> @out_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) n ; CHECK-BASELINE-NEXT: orb %al, %r8b ; CHECK-BASELINE-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 26(%r14), %al +; CHECK-BASELINE-NEXT: andb %r9b, %al +; CHECK-BASELINE-NEXT: notb %r9b +; CHECK-BASELINE-NEXT: andb 26(%rdx), %r9b +; CHECK-BASELINE-NEXT: orb %al, %r9b +; CHECK-BASELINE-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-BASELINE-NEXT: movb 25(%r14), %al ; CHECK-BASELINE-NEXT: andb %r13b, %al ; CHECK-BASELINE-NEXT: notb %r13b -; CHECK-BASELINE-NEXT: andb 26(%rdx), %r13b +; CHECK-BASELINE-NEXT: andb 25(%rdx), %r13b ; CHECK-BASELINE-NEXT: orb %al, %r13b ; CHECK-BASELINE-NEXT: movb %r13b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movb 25(%r14), %al +; CHECK-BASELINE-NEXT: movb 24(%r14), %al ; CHECK-BASELINE-NEXT: andb %bpl, %al ; CHECK-BASELINE-NEXT: notb %bpl -; CHECK-BASELINE-NEXT: andb 25(%rdx), %bpl +; CHECK-BASELINE-NEXT: andb 24(%rdx), %bpl ; CHECK-BASELINE-NEXT: orb %al, %bpl ; CHECK-BASELINE-NEXT: movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movb 24(%r14), %al +; CHECK-BASELINE-NEXT: movb 23(%r14), %al ; CHECK-BASELINE-NEXT: andb %r11b, %al ; CHECK-BASELINE-NEXT: notb %r11b -; CHECK-BASELINE-NEXT: andb 24(%rdx), %r11b +; CHECK-BASELINE-NEXT: andb 23(%rdx), %r11b ; CHECK-BASELINE-NEXT: orb %al, %r11b ; CHECK-BASELINE-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movb 23(%r14), %al +; CHECK-BASELINE-NEXT: movb 22(%r14), %al ; CHECK-BASELINE-NEXT: andb %r10b, %al ; CHECK-BASELINE-NEXT: notb %r10b -; CHECK-BASELINE-NEXT: andb 23(%rdx), %r10b +; CHECK-BASELINE-NEXT: andb 22(%rdx), %r10b ; CHECK-BASELINE-NEXT: orb %al, %r10b ; CHECK-BASELINE-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movb 22(%r14), %al -; CHECK-BASELINE-NEXT: andb %r9b, %al -; CHECK-BASELINE-NEXT: notb %r9b -; CHECK-BASELINE-NEXT: andb 22(%rdx), %r9b -; CHECK-BASELINE-NEXT: orb %al, %r9b -; CHECK-BASELINE-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 21(%r14), %al ; CHECK-BASELINE-NEXT: andb %r12b, %al ; CHECK-BASELINE-NEXT: notb %r12b @@ -1466,7 +1468,6 @@ define <32 x i8> @out_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) n ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-BASELINE-NEXT: andb %cl, %al ; CHECK-BASELINE-NEXT: notb %cl -; CHECK-BASELINE-NEXT: movq %rdx, %rbx ; CHECK-BASELINE-NEXT: andb 17(%rdx), %cl ; CHECK-BASELINE-NEXT: orb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill @@ -1474,11 +1475,12 @@ define <32 x i8> @out_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) n ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-BASELINE-NEXT: andb %cl, %al ; CHECK-BASELINE-NEXT: notb %cl +; CHECK-BASELINE-NEXT: movq %rdx, %rbx ; CHECK-BASELINE-NEXT: andb 16(%rdx), %cl ; CHECK-BASELINE-NEXT: orb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movb 15(%r15), %cl ; CHECK-BASELINE-NEXT: movb 15(%r14), %al +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-BASELINE-NEXT: andb %cl, %al ; CHECK-BASELINE-NEXT: notb %cl ; CHECK-BASELINE-NEXT: andb 15(%rdx), %cl @@ -1650,6 +1652,8 @@ define <32 x i8> @out_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) n ; CHECK-SSE1-NEXT: movq %rcx, %r15 ; CHECK-SSE1-NEXT: movq %rsi, %r14 ; CHECK-SSE1-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-SSE1-NEXT: movb 15(%rcx), %al +; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 16(%rcx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 17(%rcx), %al @@ -1661,11 +1665,11 @@ define <32 x i8> @out_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) n ; CHECK-SSE1-NEXT: movb 20(%rcx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 21(%rcx), %r12b -; CHECK-SSE1-NEXT: movb 22(%rcx), %r9b -; CHECK-SSE1-NEXT: movb 23(%rcx), %r10b -; CHECK-SSE1-NEXT: movb 24(%rcx), %r11b -; CHECK-SSE1-NEXT: movb 25(%rcx), %bpl -; CHECK-SSE1-NEXT: movb 26(%rcx), %r13b +; CHECK-SSE1-NEXT: movb 22(%rcx), %r10b +; CHECK-SSE1-NEXT: movb 23(%rcx), %r11b +; CHECK-SSE1-NEXT: movb 24(%rcx), %bpl +; CHECK-SSE1-NEXT: movb 25(%rcx), %r13b +; CHECK-SSE1-NEXT: movb 26(%rcx), %r9b ; CHECK-SSE1-NEXT: movb 27(%rcx), %r8b ; CHECK-SSE1-NEXT: movb 28(%rcx), %dil ; CHECK-SSE1-NEXT: movb 29(%rcx), %sil @@ -1702,35 +1706,35 @@ define <32 x i8> @out_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) n ; CHECK-SSE1-NEXT: orb %al, %r8b ; CHECK-SSE1-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 26(%r14), %al +; CHECK-SSE1-NEXT: andb %r9b, %al +; CHECK-SSE1-NEXT: notb %r9b +; CHECK-SSE1-NEXT: andb 26(%rdx), %r9b +; CHECK-SSE1-NEXT: orb %al, %r9b +; CHECK-SSE1-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-SSE1-NEXT: movb 25(%r14), %al ; CHECK-SSE1-NEXT: andb %r13b, %al ; CHECK-SSE1-NEXT: notb %r13b -; CHECK-SSE1-NEXT: andb 26(%rdx), %r13b +; CHECK-SSE1-NEXT: andb 25(%rdx), %r13b ; CHECK-SSE1-NEXT: orb %al, %r13b ; CHECK-SSE1-NEXT: movb %r13b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movb 25(%r14), %al +; CHECK-SSE1-NEXT: movb 24(%r14), %al ; CHECK-SSE1-NEXT: andb %bpl, %al ; CHECK-SSE1-NEXT: notb %bpl -; CHECK-SSE1-NEXT: andb 25(%rdx), %bpl +; CHECK-SSE1-NEXT: andb 24(%rdx), %bpl ; CHECK-SSE1-NEXT: orb %al, %bpl ; CHECK-SSE1-NEXT: movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movb 24(%r14), %al +; CHECK-SSE1-NEXT: movb 23(%r14), %al ; CHECK-SSE1-NEXT: andb %r11b, %al ; CHECK-SSE1-NEXT: notb %r11b -; CHECK-SSE1-NEXT: andb 24(%rdx), %r11b +; CHECK-SSE1-NEXT: andb 23(%rdx), %r11b ; CHECK-SSE1-NEXT: orb %al, %r11b ; CHECK-SSE1-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movb 23(%r14), %al +; CHECK-SSE1-NEXT: movb 22(%r14), %al ; CHECK-SSE1-NEXT: andb %r10b, %al ; CHECK-SSE1-NEXT: notb %r10b -; CHECK-SSE1-NEXT: andb 23(%rdx), %r10b +; CHECK-SSE1-NEXT: andb 22(%rdx), %r10b ; CHECK-SSE1-NEXT: orb %al, %r10b ; CHECK-SSE1-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movb 22(%r14), %al -; CHECK-SSE1-NEXT: andb %r9b, %al -; CHECK-SSE1-NEXT: notb %r9b -; CHECK-SSE1-NEXT: andb 22(%rdx), %r9b -; CHECK-SSE1-NEXT: orb %al, %r9b -; CHECK-SSE1-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 21(%r14), %al ; CHECK-SSE1-NEXT: andb %r12b, %al ; CHECK-SSE1-NEXT: notb %r12b @@ -1762,7 +1766,6 @@ define <32 x i8> @out_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) n ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-SSE1-NEXT: andb %cl, %al ; CHECK-SSE1-NEXT: notb %cl -; CHECK-SSE1-NEXT: movq %rdx, %rbx ; CHECK-SSE1-NEXT: andb 17(%rdx), %cl ; CHECK-SSE1-NEXT: orb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill @@ -1770,11 +1773,12 @@ define <32 x i8> @out_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) n ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-SSE1-NEXT: andb %cl, %al ; CHECK-SSE1-NEXT: notb %cl +; CHECK-SSE1-NEXT: movq %rdx, %rbx ; CHECK-SSE1-NEXT: andb 16(%rdx), %cl ; CHECK-SSE1-NEXT: orb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movb 15(%r15), %cl ; CHECK-SSE1-NEXT: movb 15(%r14), %al +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-SSE1-NEXT: andb %cl, %al ; CHECK-SSE1-NEXT: notb %cl ; CHECK-SSE1-NEXT: andb 15(%rdx), %cl @@ -3535,7 +3539,9 @@ define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) no ; CHECK-BASELINE-NEXT: movq %rdx, %r13 ; CHECK-BASELINE-NEXT: movq %rsi, %rbx ; CHECK-BASELINE-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-BASELINE-NEXT: movb 15(%rdx), %r12b +; CHECK-BASELINE-NEXT: movb 16(%rdx), %r12b +; CHECK-BASELINE-NEXT: movb 15(%rdx), %al +; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 14(%rdx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 13(%rdx), %al @@ -3546,13 +3552,13 @@ define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) no ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 10(%rdx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movb 9(%rdx), %r9b -; CHECK-BASELINE-NEXT: movb 8(%rdx), %r10b -; CHECK-BASELINE-NEXT: movb 7(%rdx), %r11b +; CHECK-BASELINE-NEXT: movb 9(%rdx), %r10b +; CHECK-BASELINE-NEXT: movb 8(%rdx), %r11b +; CHECK-BASELINE-NEXT: movb 7(%rdx), %r9b ; CHECK-BASELINE-NEXT: movb 6(%rdx), %r8b ; CHECK-BASELINE-NEXT: movb 5(%rdx), %bpl -; CHECK-BASELINE-NEXT: movb 4(%rdx), %sil -; CHECK-BASELINE-NEXT: movb 3(%rdx), %dil +; CHECK-BASELINE-NEXT: movb 4(%rdx), %dil +; CHECK-BASELINE-NEXT: movb 3(%rdx), %sil ; CHECK-BASELINE-NEXT: movb 2(%rdx), %r14b ; CHECK-BASELINE-NEXT: movb (%rdx), %al ; CHECK-BASELINE-NEXT: movb 1(%rdx), %r15b @@ -3572,14 +3578,14 @@ define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) no ; CHECK-BASELINE-NEXT: xorb %r14b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 3(%rbx), %al -; CHECK-BASELINE-NEXT: xorb %dil, %al +; CHECK-BASELINE-NEXT: xorb %sil, %al ; CHECK-BASELINE-NEXT: andb 3(%rcx), %al -; CHECK-BASELINE-NEXT: xorb %dil, %al +; CHECK-BASELINE-NEXT: xorb %sil, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 4(%rbx), %al -; CHECK-BASELINE-NEXT: xorb %sil, %al +; CHECK-BASELINE-NEXT: xorb %dil, %al ; CHECK-BASELINE-NEXT: andb 4(%rcx), %al -; CHECK-BASELINE-NEXT: xorb %sil, %al +; CHECK-BASELINE-NEXT: xorb %dil, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 5(%rbx), %al ; CHECK-BASELINE-NEXT: xorb %bpl, %al @@ -3592,19 +3598,19 @@ define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) no ; CHECK-BASELINE-NEXT: xorb %r8b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 7(%rbx), %al -; CHECK-BASELINE-NEXT: xorb %r11b, %al +; CHECK-BASELINE-NEXT: xorb %r9b, %al ; CHECK-BASELINE-NEXT: andb 7(%rcx), %al -; CHECK-BASELINE-NEXT: xorb %r11b, %al +; CHECK-BASELINE-NEXT: xorb %r9b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 8(%rbx), %al -; CHECK-BASELINE-NEXT: xorb %r10b, %al +; CHECK-BASELINE-NEXT: xorb %r11b, %al ; CHECK-BASELINE-NEXT: andb 8(%rcx), %al -; CHECK-BASELINE-NEXT: xorb %r10b, %al +; CHECK-BASELINE-NEXT: xorb %r11b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 9(%rbx), %al -; CHECK-BASELINE-NEXT: xorb %r9b, %al +; CHECK-BASELINE-NEXT: xorb %r10b, %al ; CHECK-BASELINE-NEXT: andb 9(%rcx), %al -; CHECK-BASELINE-NEXT: xorb %r9b, %al +; CHECK-BASELINE-NEXT: xorb %r10b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 10(%rbx), %dl ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload @@ -3636,17 +3642,17 @@ define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) no ; CHECK-BASELINE-NEXT: andb 14(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movb 15(%rbx), %al -; CHECK-BASELINE-NEXT: xorb %r12b, %al -; CHECK-BASELINE-NEXT: andb 15(%rcx), %al -; CHECK-BASELINE-NEXT: xorb %r12b, %al -; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movb 16(%r13), %al -; CHECK-BASELINE-NEXT: movb 16(%rbx), %dl +; CHECK-BASELINE-NEXT: movb 15(%rbx), %dl +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: xorb %al, %dl -; CHECK-BASELINE-NEXT: andb 16(%rcx), %dl +; CHECK-BASELINE-NEXT: andb 15(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-BASELINE-NEXT: movb 16(%rbx), %al +; CHECK-BASELINE-NEXT: xorb %r12b, %al +; CHECK-BASELINE-NEXT: andb 16(%rcx), %al +; CHECK-BASELINE-NEXT: xorb %r12b, %al +; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 17(%r13), %al ; CHECK-BASELINE-NEXT: movb 17(%rbx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl @@ -3663,18 +3669,18 @@ define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) no ; CHECK-BASELINE-NEXT: movb 19(%rbx), %r12b ; CHECK-BASELINE-NEXT: xorb %al, %r12b ; CHECK-BASELINE-NEXT: andb 19(%rcx), %r12b +; CHECK-BASELINE-NEXT: movq %rcx, %rdx ; CHECK-BASELINE-NEXT: xorb %al, %r12b ; CHECK-BASELINE-NEXT: movb 20(%r13), %al -; CHECK-BASELINE-NEXT: movb 20(%rbx), %r15b -; CHECK-BASELINE-NEXT: xorb %al, %r15b -; CHECK-BASELINE-NEXT: andb 20(%rcx), %r15b -; CHECK-BASELINE-NEXT: movq %rcx, %rsi -; CHECK-BASELINE-NEXT: xorb %al, %r15b -; CHECK-BASELINE-NEXT: movb 21(%r13), %al -; CHECK-BASELINE-NEXT: movb 21(%rbx), %r14b +; CHECK-BASELINE-NEXT: movb 20(%rbx), %r14b ; CHECK-BASELINE-NEXT: xorb %al, %r14b -; CHECK-BASELINE-NEXT: andb 21(%rcx), %r14b +; CHECK-BASELINE-NEXT: andb 20(%rcx), %r14b ; CHECK-BASELINE-NEXT: xorb %al, %r14b +; CHECK-BASELINE-NEXT: movb 21(%r13), %al +; CHECK-BASELINE-NEXT: movb 21(%rbx), %r15b +; CHECK-BASELINE-NEXT: xorb %al, %r15b +; CHECK-BASELINE-NEXT: andb 21(%rcx), %r15b +; CHECK-BASELINE-NEXT: xorb %al, %r15b ; CHECK-BASELINE-NEXT: movb 22(%r13), %al ; CHECK-BASELINE-NEXT: movb 22(%rbx), %bpl ; CHECK-BASELINE-NEXT: xorb %al, %bpl @@ -3706,39 +3712,39 @@ define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) no ; CHECK-BASELINE-NEXT: andb 27(%rcx), %dil ; CHECK-BASELINE-NEXT: xorb %al, %dil ; CHECK-BASELINE-NEXT: movb 28(%r13), %al -; CHECK-BASELINE-NEXT: movb 28(%rbx), %dl -; CHECK-BASELINE-NEXT: xorb %al, %dl -; CHECK-BASELINE-NEXT: andb 28(%rcx), %dl -; CHECK-BASELINE-NEXT: xorb %al, %dl +; CHECK-BASELINE-NEXT: movb 28(%rbx), %sil +; CHECK-BASELINE-NEXT: xorb %al, %sil +; CHECK-BASELINE-NEXT: andb 28(%rcx), %sil +; CHECK-BASELINE-NEXT: xorb %al, %sil ; CHECK-BASELINE-NEXT: movb 29(%r13), %al ; CHECK-BASELINE-NEXT: movb 29(%rbx), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl -; CHECK-BASELINE-NEXT: andb 29(%rsi), %cl +; CHECK-BASELINE-NEXT: andb 29(%rdx), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb 30(%r13), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 30(%rbx), %al ; CHECK-BASELINE-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload -; CHECK-BASELINE-NEXT: andb 30(%rsi), %al +; CHECK-BASELINE-NEXT: andb 30(%rdx), %al ; CHECK-BASELINE-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb 31(%r13), %r13b ; CHECK-BASELINE-NEXT: movb 31(%rbx), %bl ; CHECK-BASELINE-NEXT: xorb %r13b, %bl -; CHECK-BASELINE-NEXT: andb 31(%rsi), %bl +; CHECK-BASELINE-NEXT: andb 31(%rdx), %bl ; CHECK-BASELINE-NEXT: xorb %r13b, %bl ; CHECK-BASELINE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; CHECK-BASELINE-NEXT: movb %bl, 31(%r13) ; CHECK-BASELINE-NEXT: movb %al, 30(%r13) ; CHECK-BASELINE-NEXT: movb %cl, 29(%r13) -; CHECK-BASELINE-NEXT: movb %dl, 28(%r13) +; CHECK-BASELINE-NEXT: movb %sil, 28(%r13) ; CHECK-BASELINE-NEXT: movb %dil, 27(%r13) ; CHECK-BASELINE-NEXT: movb %r8b, 26(%r13) ; CHECK-BASELINE-NEXT: movb %r9b, 25(%r13) ; CHECK-BASELINE-NEXT: movb %r10b, 24(%r13) ; CHECK-BASELINE-NEXT: movb %r11b, 23(%r13) ; CHECK-BASELINE-NEXT: movb %bpl, 22(%r13) -; CHECK-BASELINE-NEXT: movb %r14b, 21(%r13) -; CHECK-BASELINE-NEXT: movb %r15b, 20(%r13) +; CHECK-BASELINE-NEXT: movb %r15b, 21(%r13) +; CHECK-BASELINE-NEXT: movb %r14b, 20(%r13) ; CHECK-BASELINE-NEXT: movb %r12b, 19(%r13) ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 18(%r13) @@ -3798,7 +3804,9 @@ define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) no ; CHECK-SSE1-NEXT: movq %rdx, %r13 ; CHECK-SSE1-NEXT: movq %rsi, %rbx ; CHECK-SSE1-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-SSE1-NEXT: movb 15(%rdx), %r12b +; CHECK-SSE1-NEXT: movb 16(%rdx), %r12b +; CHECK-SSE1-NEXT: movb 15(%rdx), %al +; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 14(%rdx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 13(%rdx), %al @@ -3809,13 +3817,13 @@ define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) no ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 10(%rdx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movb 9(%rdx), %r9b -; CHECK-SSE1-NEXT: movb 8(%rdx), %r10b -; CHECK-SSE1-NEXT: movb 7(%rdx), %r11b +; CHECK-SSE1-NEXT: movb 9(%rdx), %r10b +; CHECK-SSE1-NEXT: movb 8(%rdx), %r11b +; CHECK-SSE1-NEXT: movb 7(%rdx), %r9b ; CHECK-SSE1-NEXT: movb 6(%rdx), %r8b ; CHECK-SSE1-NEXT: movb 5(%rdx), %bpl -; CHECK-SSE1-NEXT: movb 4(%rdx), %sil -; CHECK-SSE1-NEXT: movb 3(%rdx), %dil +; CHECK-SSE1-NEXT: movb 4(%rdx), %dil +; CHECK-SSE1-NEXT: movb 3(%rdx), %sil ; CHECK-SSE1-NEXT: movb 2(%rdx), %r14b ; CHECK-SSE1-NEXT: movb (%rdx), %al ; CHECK-SSE1-NEXT: movb 1(%rdx), %r15b @@ -3835,14 +3843,14 @@ define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) no ; CHECK-SSE1-NEXT: xorb %r14b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 3(%rbx), %al -; CHECK-SSE1-NEXT: xorb %dil, %al +; CHECK-SSE1-NEXT: xorb %sil, %al ; CHECK-SSE1-NEXT: andb 3(%rcx), %al -; CHECK-SSE1-NEXT: xorb %dil, %al +; CHECK-SSE1-NEXT: xorb %sil, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 4(%rbx), %al -; CHECK-SSE1-NEXT: xorb %sil, %al +; CHECK-SSE1-NEXT: xorb %dil, %al ; CHECK-SSE1-NEXT: andb 4(%rcx), %al -; CHECK-SSE1-NEXT: xorb %sil, %al +; CHECK-SSE1-NEXT: xorb %dil, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 5(%rbx), %al ; CHECK-SSE1-NEXT: xorb %bpl, %al @@ -3855,19 +3863,19 @@ define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) no ; CHECK-SSE1-NEXT: xorb %r8b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 7(%rbx), %al -; CHECK-SSE1-NEXT: xorb %r11b, %al +; CHECK-SSE1-NEXT: xorb %r9b, %al ; CHECK-SSE1-NEXT: andb 7(%rcx), %al -; CHECK-SSE1-NEXT: xorb %r11b, %al +; CHECK-SSE1-NEXT: xorb %r9b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 8(%rbx), %al -; CHECK-SSE1-NEXT: xorb %r10b, %al +; CHECK-SSE1-NEXT: xorb %r11b, %al ; CHECK-SSE1-NEXT: andb 8(%rcx), %al -; CHECK-SSE1-NEXT: xorb %r10b, %al +; CHECK-SSE1-NEXT: xorb %r11b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 9(%rbx), %al -; CHECK-SSE1-NEXT: xorb %r9b, %al +; CHECK-SSE1-NEXT: xorb %r10b, %al ; CHECK-SSE1-NEXT: andb 9(%rcx), %al -; CHECK-SSE1-NEXT: xorb %r9b, %al +; CHECK-SSE1-NEXT: xorb %r10b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 10(%rbx), %dl ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload @@ -3899,17 +3907,17 @@ define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) no ; CHECK-SSE1-NEXT: andb 14(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movb 15(%rbx), %al -; CHECK-SSE1-NEXT: xorb %r12b, %al -; CHECK-SSE1-NEXT: andb 15(%rcx), %al -; CHECK-SSE1-NEXT: xorb %r12b, %al -; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movb 16(%r13), %al -; CHECK-SSE1-NEXT: movb 16(%rbx), %dl +; CHECK-SSE1-NEXT: movb 15(%rbx), %dl +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: xorb %al, %dl -; CHECK-SSE1-NEXT: andb 16(%rcx), %dl +; CHECK-SSE1-NEXT: andb 15(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-SSE1-NEXT: movb 16(%rbx), %al +; CHECK-SSE1-NEXT: xorb %r12b, %al +; CHECK-SSE1-NEXT: andb 16(%rcx), %al +; CHECK-SSE1-NEXT: xorb %r12b, %al +; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 17(%r13), %al ; CHECK-SSE1-NEXT: movb 17(%rbx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl @@ -3926,18 +3934,18 @@ define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) no ; CHECK-SSE1-NEXT: movb 19(%rbx), %r12b ; CHECK-SSE1-NEXT: xorb %al, %r12b ; CHECK-SSE1-NEXT: andb 19(%rcx), %r12b +; CHECK-SSE1-NEXT: movq %rcx, %rdx ; CHECK-SSE1-NEXT: xorb %al, %r12b ; CHECK-SSE1-NEXT: movb 20(%r13), %al -; CHECK-SSE1-NEXT: movb 20(%rbx), %r15b -; CHECK-SSE1-NEXT: xorb %al, %r15b -; CHECK-SSE1-NEXT: andb 20(%rcx), %r15b -; CHECK-SSE1-NEXT: movq %rcx, %rsi -; CHECK-SSE1-NEXT: xorb %al, %r15b -; CHECK-SSE1-NEXT: movb 21(%r13), %al -; CHECK-SSE1-NEXT: movb 21(%rbx), %r14b +; CHECK-SSE1-NEXT: movb 20(%rbx), %r14b ; CHECK-SSE1-NEXT: xorb %al, %r14b -; CHECK-SSE1-NEXT: andb 21(%rcx), %r14b +; CHECK-SSE1-NEXT: andb 20(%rcx), %r14b ; CHECK-SSE1-NEXT: xorb %al, %r14b +; CHECK-SSE1-NEXT: movb 21(%r13), %al +; CHECK-SSE1-NEXT: movb 21(%rbx), %r15b +; CHECK-SSE1-NEXT: xorb %al, %r15b +; CHECK-SSE1-NEXT: andb 21(%rcx), %r15b +; CHECK-SSE1-NEXT: xorb %al, %r15b ; CHECK-SSE1-NEXT: movb 22(%r13), %al ; CHECK-SSE1-NEXT: movb 22(%rbx), %bpl ; CHECK-SSE1-NEXT: xorb %al, %bpl @@ -3969,39 +3977,39 @@ define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) no ; CHECK-SSE1-NEXT: andb 27(%rcx), %dil ; CHECK-SSE1-NEXT: xorb %al, %dil ; CHECK-SSE1-NEXT: movb 28(%r13), %al -; CHECK-SSE1-NEXT: movb 28(%rbx), %dl -; CHECK-SSE1-NEXT: xorb %al, %dl -; CHECK-SSE1-NEXT: andb 28(%rcx), %dl -; CHECK-SSE1-NEXT: xorb %al, %dl +; CHECK-SSE1-NEXT: movb 28(%rbx), %sil +; CHECK-SSE1-NEXT: xorb %al, %sil +; CHECK-SSE1-NEXT: andb 28(%rcx), %sil +; CHECK-SSE1-NEXT: xorb %al, %sil ; CHECK-SSE1-NEXT: movb 29(%r13), %al ; CHECK-SSE1-NEXT: movb 29(%rbx), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl -; CHECK-SSE1-NEXT: andb 29(%rsi), %cl +; CHECK-SSE1-NEXT: andb 29(%rdx), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb 30(%r13), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 30(%rbx), %al ; CHECK-SSE1-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload -; CHECK-SSE1-NEXT: andb 30(%rsi), %al +; CHECK-SSE1-NEXT: andb 30(%rdx), %al ; CHECK-SSE1-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb 31(%r13), %r13b ; CHECK-SSE1-NEXT: movb 31(%rbx), %bl ; CHECK-SSE1-NEXT: xorb %r13b, %bl -; CHECK-SSE1-NEXT: andb 31(%rsi), %bl +; CHECK-SSE1-NEXT: andb 31(%rdx), %bl ; CHECK-SSE1-NEXT: xorb %r13b, %bl ; CHECK-SSE1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; CHECK-SSE1-NEXT: movb %bl, 31(%r13) ; CHECK-SSE1-NEXT: movb %al, 30(%r13) ; CHECK-SSE1-NEXT: movb %cl, 29(%r13) -; CHECK-SSE1-NEXT: movb %dl, 28(%r13) +; CHECK-SSE1-NEXT: movb %sil, 28(%r13) ; CHECK-SSE1-NEXT: movb %dil, 27(%r13) ; CHECK-SSE1-NEXT: movb %r8b, 26(%r13) ; CHECK-SSE1-NEXT: movb %r9b, 25(%r13) ; CHECK-SSE1-NEXT: movb %r10b, 24(%r13) ; CHECK-SSE1-NEXT: movb %r11b, 23(%r13) ; CHECK-SSE1-NEXT: movb %bpl, 22(%r13) -; CHECK-SSE1-NEXT: movb %r14b, 21(%r13) -; CHECK-SSE1-NEXT: movb %r15b, 20(%r13) +; CHECK-SSE1-NEXT: movb %r15b, 21(%r13) +; CHECK-SSE1-NEXT: movb %r14b, 20(%r13) ; CHECK-SSE1-NEXT: movb %r12b, 19(%r13) ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 18(%r13) -- cgit v1.2.3