xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86FixupSetCC.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1  //===- X86FixupSetCC.cpp - fix zero-extension of setcc patterns -----------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This file defines a pass that fixes zero-extension of setcc patterns.
10  // X86 setcc instructions are modeled to have no input arguments, and a single
11  // GR8 output argument. This is consistent with other similar instructions
12  // (e.g. movb), but means it is impossible to directly generate a setcc into
13  // the lower GR8 of a specified GR32.
14  // This means that ISel must select (zext (setcc)) into something like
15  // seta %al; movzbl %al, %eax.
16  // Unfortunately, this can cause a stall due to the partial register write
17  // performed by the setcc. Instead, we can use:
18  // xor %eax, %eax; seta %al
19  // This both avoids the stall, and encodes shorter.
20  //
21  // Furthurmore, we can use:
22  // setzua %al
23  // if feature zero-upper is available. It's faster than the xor+setcc sequence.
24  // When r16-r31 is used, it even encodes shorter.
25  //===----------------------------------------------------------------------===//
26  
27  #include "X86.h"
28  #include "X86InstrInfo.h"
29  #include "X86Subtarget.h"
30  #include "llvm/ADT/Statistic.h"
31  #include "llvm/CodeGen/MachineFunctionPass.h"
32  #include "llvm/CodeGen/MachineInstrBuilder.h"
33  #include "llvm/CodeGen/MachineRegisterInfo.h"
34  
35  using namespace llvm;
36  
37  #define DEBUG_TYPE "x86-fixup-setcc"
38  
39  STATISTIC(NumSubstZexts, "Number of setcc + zext pairs substituted");
40  
41  namespace {
42  class X86FixupSetCCPass : public MachineFunctionPass {
43  public:
44    static char ID;
45  
X86FixupSetCCPass()46    X86FixupSetCCPass() : MachineFunctionPass(ID) {}
47  
getPassName() const48    StringRef getPassName() const override { return "X86 Fixup SetCC"; }
49  
50    bool runOnMachineFunction(MachineFunction &MF) override;
51  
52  private:
53    MachineRegisterInfo *MRI = nullptr;
54    const X86Subtarget *ST = nullptr;
55    const X86InstrInfo *TII = nullptr;
56  
57    enum { SearchBound = 16 };
58  };
59  } // end anonymous namespace
60  
61  char X86FixupSetCCPass::ID = 0;
62  
INITIALIZE_PASS(X86FixupSetCCPass,DEBUG_TYPE,DEBUG_TYPE,false,false)63  INITIALIZE_PASS(X86FixupSetCCPass, DEBUG_TYPE, DEBUG_TYPE, false, false)
64  
65  FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); }
66  
runOnMachineFunction(MachineFunction & MF)67  bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
68    bool Changed = false;
69    MRI = &MF.getRegInfo();
70    ST = &MF.getSubtarget<X86Subtarget>();
71    TII = ST->getInstrInfo();
72  
73    SmallVector<MachineInstr*, 4> ToErase;
74  
75    for (auto &MBB : MF) {
76      MachineInstr *FlagsDefMI = nullptr;
77      for (auto &MI : MBB) {
78        // Remember the most recent preceding eflags defining instruction.
79        if (MI.definesRegister(X86::EFLAGS, /*TRI=*/nullptr))
80          FlagsDefMI = &MI;
81  
82        // Find a setcc that is used by a zext.
83        // This doesn't have to be the only use, the transformation is safe
84        // regardless.
85        if (MI.getOpcode() != X86::SETCCr)
86          continue;
87  
88        MachineInstr *ZExt = nullptr;
89        Register Reg0 = MI.getOperand(0).getReg();
90        for (auto &Use : MRI->use_instructions(Reg0))
91          if (Use.getOpcode() == X86::MOVZX32rr8)
92            ZExt = &Use;
93  
94        if (!ZExt)
95          continue;
96  
97        if (!FlagsDefMI)
98          continue;
99  
100        // We'd like to put something that clobbers eflags directly before
101        // FlagsDefMI. This can't hurt anything after FlagsDefMI, because
102        // it, itself, by definition, clobbers eflags. But it may happen that
103        // FlagsDefMI also *uses* eflags, in which case the transformation is
104        // invalid.
105        if (FlagsDefMI->readsRegister(X86::EFLAGS, /*TRI=*/nullptr))
106          continue;
107  
108        // On 32-bit, we need to be careful to force an ABCD register.
109        const TargetRegisterClass *RC =
110            ST->is64Bit() ? &X86::GR32RegClass : &X86::GR32_ABCDRegClass;
111        if (!MRI->constrainRegClass(ZExt->getOperand(0).getReg(), RC)) {
112          // If we cannot constrain the register, we would need an additional copy
113          // and are better off keeping the MOVZX32rr8 we have now.
114          continue;
115        }
116  
117        ++NumSubstZexts;
118        Changed = true;
119  
120        // X86 setcc/setzucc only takes an output GR8, so fake a GR32 input by
121        // inserting the setcc/setzucc result into the low byte of the zeroed
122        // register.
123        Register ZeroReg = MRI->createVirtualRegister(RC);
124        if (ST->hasZU()) {
125          MI.setDesc(TII->get(X86::SETZUCCr));
126          BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(),
127                  TII->get(TargetOpcode::IMPLICIT_DEF), ZeroReg);
128        } else {
129          // Initialize a register with 0. This must go before the eflags def
130          BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0),
131                  ZeroReg);
132        }
133  
134        BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(),
135                TII->get(X86::INSERT_SUBREG), ZExt->getOperand(0).getReg())
136            .addReg(ZeroReg)
137            .addReg(Reg0)
138            .addImm(X86::sub_8bit);
139        ToErase.push_back(ZExt);
140      }
141    }
142  
143    for (auto &I : ToErase)
144      I->eraseFromParent();
145  
146    return Changed;
147  }
148