1 //===- X86FixupSetCC.cpp - fix zero-extension of setcc patterns -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a pass that fixes zero-extension of setcc patterns. 10 // X86 setcc instructions are modeled to have no input arguments, and a single 11 // GR8 output argument. This is consistent with other similar instructions 12 // (e.g. movb), but means it is impossible to directly generate a setcc into 13 // the lower GR8 of a specified GR32. 14 // This means that ISel must select (zext (setcc)) into something like 15 // seta %al; movzbl %al, %eax. 16 // Unfortunately, this can cause a stall due to the partial register write 17 // performed by the setcc. Instead, we can use: 18 // xor %eax, %eax; seta %al 19 // This both avoids the stall, and encodes shorter. 20 // 21 // Furthurmore, we can use: 22 // setzua %al 23 // if feature zero-upper is available. It's faster than the xor+setcc sequence. 24 // When r16-r31 is used, it even encodes shorter. 25 //===----------------------------------------------------------------------===// 26 27 #include "X86.h" 28 #include "X86InstrInfo.h" 29 #include "X86Subtarget.h" 30 #include "llvm/ADT/Statistic.h" 31 #include "llvm/CodeGen/MachineFunctionPass.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 35 using namespace llvm; 36 37 #define DEBUG_TYPE "x86-fixup-setcc" 38 39 STATISTIC(NumSubstZexts, "Number of setcc + zext pairs substituted"); 40 41 namespace { 42 class X86FixupSetCCPass : public MachineFunctionPass { 43 public: 44 static char ID; 45 46 X86FixupSetCCPass() : MachineFunctionPass(ID) {} 47 48 StringRef getPassName() const override { return "X86 Fixup SetCC"; } 49 50 bool runOnMachineFunction(MachineFunction &MF) override; 51 52 private: 53 MachineRegisterInfo *MRI = nullptr; 54 const X86Subtarget *ST = nullptr; 55 const X86InstrInfo *TII = nullptr; 56 57 enum { SearchBound = 16 }; 58 }; 59 } // end anonymous namespace 60 61 char X86FixupSetCCPass::ID = 0; 62 63 INITIALIZE_PASS(X86FixupSetCCPass, DEBUG_TYPE, DEBUG_TYPE, false, false) 64 65 FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); } 66 67 bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) { 68 bool Changed = false; 69 MRI = &MF.getRegInfo(); 70 ST = &MF.getSubtarget<X86Subtarget>(); 71 TII = ST->getInstrInfo(); 72 73 SmallVector<MachineInstr*, 4> ToErase; 74 75 for (auto &MBB : MF) { 76 MachineInstr *FlagsDefMI = nullptr; 77 for (auto &MI : MBB) { 78 // Remember the most recent preceding eflags defining instruction. 79 if (MI.definesRegister(X86::EFLAGS, /*TRI=*/nullptr)) 80 FlagsDefMI = &MI; 81 82 // Find a setcc that is used by a zext. 83 // This doesn't have to be the only use, the transformation is safe 84 // regardless. 85 if (MI.getOpcode() != X86::SETCCr) 86 continue; 87 88 MachineInstr *ZExt = nullptr; 89 Register Reg0 = MI.getOperand(0).getReg(); 90 for (auto &Use : MRI->use_instructions(Reg0)) 91 if (Use.getOpcode() == X86::MOVZX32rr8) 92 ZExt = &Use; 93 94 if (!ZExt) 95 continue; 96 97 if (!FlagsDefMI) 98 continue; 99 100 // We'd like to put something that clobbers eflags directly before 101 // FlagsDefMI. This can't hurt anything after FlagsDefMI, because 102 // it, itself, by definition, clobbers eflags. But it may happen that 103 // FlagsDefMI also *uses* eflags, in which case the transformation is 104 // invalid. 105 if (FlagsDefMI->readsRegister(X86::EFLAGS, /*TRI=*/nullptr)) 106 continue; 107 108 // On 32-bit, we need to be careful to force an ABCD register. 109 const TargetRegisterClass *RC = 110 ST->is64Bit() ? &X86::GR32RegClass : &X86::GR32_ABCDRegClass; 111 if (!MRI->constrainRegClass(ZExt->getOperand(0).getReg(), RC)) { 112 // If we cannot constrain the register, we would need an additional copy 113 // and are better off keeping the MOVZX32rr8 we have now. 114 continue; 115 } 116 117 ++NumSubstZexts; 118 Changed = true; 119 120 // X86 setcc/setzucc only takes an output GR8, so fake a GR32 input by 121 // inserting the setcc/setzucc result into the low byte of the zeroed 122 // register. 123 Register ZeroReg = MRI->createVirtualRegister(RC); 124 if (ST->hasZU()) { 125 MI.setDesc(TII->get(X86::SETZUCCr)); 126 BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(), 127 TII->get(TargetOpcode::IMPLICIT_DEF), ZeroReg); 128 } else { 129 // Initialize a register with 0. This must go before the eflags def 130 BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0), 131 ZeroReg); 132 } 133 134 BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(), 135 TII->get(X86::INSERT_SUBREG), ZExt->getOperand(0).getReg()) 136 .addReg(ZeroReg) 137 .addReg(Reg0) 138 .addImm(X86::sub_8bit); 139 ToErase.push_back(ZExt); 140 } 141 } 142 143 for (auto &I : ToErase) 144 I->eraseFromParent(); 145 146 return Changed; 147 } 148