diff options
author | Chandler Carruth <chandlerc@gmail.com> | 2018-04-18 15:52:50 +0000 |
---|---|---|
committer | Pirama Arumuga Nainar <pirama@google.com> | 2018-04-27 14:03:19 -0700 |
commit | 1d739ffb0366421d383e04ff80ec2ee591315116 (patch) | |
tree | cc7ba2b038abdbbea54c2fa2a865bd13b1a0d6cb | |
parent | 395a261979d5c097f7ae7fb19ef452839acf3901 (diff) | |
download | llvm-1d739ffb0366421d383e04ff80ec2ee591315116.tar.gz |
[x86] Switch EFLAGS copy lowering to use reg-reg form of testing for
a zero register.
Previously I tried this and saw LLVM unable to transform this to fold
with memory operands such as spill slot rematerialization. However, it
clearly works as shown in this patch. We turn these into `cmpb $0,
<mem>` when useful for folding a memory operand without issue. This form
has no disadvantage compared to `testb $-1, <mem>`. So overall, this is
likely no worse and may be slightly smaller in some cases due to the
`testb %reg, %reg` form.
Differential Revision: https://reviews.llvm.org/D45475
Change-Id: I162029e7a214e2dbeefa34c4f77558ee041042c8
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@330269 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86FlagsCopyLowering.cpp | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/cmpxchg-clobber-flags.ll | 65 |
2 files changed, 66 insertions, 1 deletions
diff --git a/lib/Target/X86/X86FlagsCopyLowering.cpp b/lib/Target/X86/X86FlagsCopyLowering.cpp index 1fd1c704d79..9821eae2660 100644 --- a/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -636,7 +636,7 @@ void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB, // also allow us to select a shorter encoding of `testb %reg, %reg` when that // would be equivalent. auto TestI = - BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8ri)).addReg(Reg).addImm(-1); + BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg); (void)TestI; DEBUG(dbgs() << " test cond: "; TestI->dump()); ++NumTestsInserted; diff --git a/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/test/CodeGen/X86/cmpxchg-clobber-flags.ll index 8d289fa9fb0..04a13f3a56d 100644 --- a/test/CodeGen/X86/cmpxchg-clobber-flags.ll +++ b/test/CodeGen/X86/cmpxchg-clobber-flags.ll @@ -158,6 +158,7 @@ cond.end: ; This one is an interesting case because CMOV doesn't have a chain ; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here. +<<<<<<< HEAD define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) { ; i386-LABEL: test_feed_cmov: ; i386: cmpxchgl @@ -204,6 +205,70 @@ define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) { ; x8664-sahf-NEXT: sahf ; x8664-sahf-NEXT: popq %rax +======= +define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) nounwind { +; 32-GOOD-RA-LABEL: test_feed_cmov: +; 32-GOOD-RA: # %bb.0: # %entry +; 32-GOOD-RA-NEXT: pushl %ebx +; 32-GOOD-RA-NEXT: pushl %esi +; 32-GOOD-RA-NEXT: pushl %eax +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx +; 32-GOOD-RA-NEXT: lock cmpxchgl %esi, (%ecx) +; 32-GOOD-RA-NEXT: sete %bl +; 32-GOOD-RA-NEXT: calll foo +; 32-GOOD-RA-NEXT: testb %bl, %bl +; 32-GOOD-RA-NEXT: jne .LBB2_2 +; 32-GOOD-RA-NEXT: # %bb.1: # %entry +; 32-GOOD-RA-NEXT: movl %eax, %esi +; 32-GOOD-RA-NEXT: .LBB2_2: # %entry +; 32-GOOD-RA-NEXT: movl %esi, %eax +; 32-GOOD-RA-NEXT: addl $4, %esp +; 32-GOOD-RA-NEXT: popl %esi +; 32-GOOD-RA-NEXT: popl %ebx +; 32-GOOD-RA-NEXT: retl +; +; 32-FAST-RA-LABEL: test_feed_cmov: +; 32-FAST-RA: # %bb.0: # %entry +; 32-FAST-RA-NEXT: pushl %ebx +; 32-FAST-RA-NEXT: pushl %esi +; 32-FAST-RA-NEXT: pushl %eax +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax +; 32-FAST-RA-NEXT: lock cmpxchgl %esi, (%ecx) +; 32-FAST-RA-NEXT: sete %bl +; 32-FAST-RA-NEXT: calll foo +; 32-FAST-RA-NEXT: testb %bl, %bl +; 32-FAST-RA-NEXT: jne .LBB2_2 +; 32-FAST-RA-NEXT: # %bb.1: # %entry +; 32-FAST-RA-NEXT: movl %eax, %esi +; 32-FAST-RA-NEXT: .LBB2_2: # %entry +; 32-FAST-RA-NEXT: movl %esi, %eax +; 32-FAST-RA-NEXT: addl $4, %esp +; 32-FAST-RA-NEXT: popl %esi +; 32-FAST-RA-NEXT: popl %ebx +; 32-FAST-RA-NEXT: retl +; +; 64-ALL-LABEL: test_feed_cmov: +; 64-ALL: # %bb.0: # %entry +; 64-ALL-NEXT: pushq %rbp +; 64-ALL-NEXT: pushq %rbx +; 64-ALL-NEXT: pushq %rax +; 64-ALL-NEXT: movl %edx, %ebx +; 64-ALL-NEXT: movl %esi, %eax +; 64-ALL-NEXT: lock cmpxchgl %edx, (%rdi) +; 64-ALL-NEXT: sete %bpl +; 64-ALL-NEXT: callq foo +; 64-ALL-NEXT: testb %bpl, %bpl +; 64-ALL-NEXT: cmovnel %ebx, %eax +; 64-ALL-NEXT: addq $8, %rsp +; 64-ALL-NEXT: popq %rbx +; 64-ALL-NEXT: popq %rbp +; 64-ALL-NEXT: retq +entry: +>>>>>>> bb1ae438ca5... [x86] Switch EFLAGS copy lowering to use reg-reg form of testing for %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst %success = extractvalue { i32, i1 } %res, 1 |