1*0b57cec5SDimitry Andric //===---- X86FixupSetCC.cpp - optimize usage of LEA instructions ----------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // This file defines a pass that fixes zero-extension of setcc patterns. 10*0b57cec5SDimitry Andric // X86 setcc instructions are modeled to have no input arguments, and a single 11*0b57cec5SDimitry Andric // GR8 output argument. This is consistent with other similar instructions 12*0b57cec5SDimitry Andric // (e.g. movb), but means it is impossible to directly generate a setcc into 13*0b57cec5SDimitry Andric // the lower GR8 of a specified GR32. 14*0b57cec5SDimitry Andric // This means that ISel must select (zext (setcc)) into something like 15*0b57cec5SDimitry Andric // seta %al; movzbl %al, %eax. 16*0b57cec5SDimitry Andric // Unfortunately, this can cause a stall due to the partial register write 17*0b57cec5SDimitry Andric // performed by the setcc. Instead, we can use: 18*0b57cec5SDimitry Andric // xor %eax, %eax; seta %al 19*0b57cec5SDimitry Andric // This both avoids the stall, and encodes shorter. 20*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 21*0b57cec5SDimitry Andric 22*0b57cec5SDimitry Andric #include "X86.h" 23*0b57cec5SDimitry Andric #include "X86InstrInfo.h" 24*0b57cec5SDimitry Andric #include "X86Subtarget.h" 25*0b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 26*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 27*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 28*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 29*0b57cec5SDimitry Andric 30*0b57cec5SDimitry Andric using namespace llvm; 31*0b57cec5SDimitry Andric 32*0b57cec5SDimitry Andric #define DEBUG_TYPE "x86-fixup-setcc" 33*0b57cec5SDimitry Andric 34*0b57cec5SDimitry Andric STATISTIC(NumSubstZexts, "Number of setcc + zext pairs substituted"); 35*0b57cec5SDimitry Andric 36*0b57cec5SDimitry Andric namespace { 37*0b57cec5SDimitry Andric class X86FixupSetCCPass : public MachineFunctionPass { 38*0b57cec5SDimitry Andric public: 39*0b57cec5SDimitry Andric X86FixupSetCCPass() : MachineFunctionPass(ID) {} 40*0b57cec5SDimitry Andric 41*0b57cec5SDimitry Andric StringRef getPassName() const override { return "X86 Fixup SetCC"; } 42*0b57cec5SDimitry Andric 43*0b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 44*0b57cec5SDimitry Andric 45*0b57cec5SDimitry Andric private: 46*0b57cec5SDimitry Andric // Find the preceding instruction that imp-defs eflags. 47*0b57cec5SDimitry Andric MachineInstr *findFlagsImpDef(MachineBasicBlock *MBB, 48*0b57cec5SDimitry Andric MachineBasicBlock::reverse_iterator MI); 49*0b57cec5SDimitry Andric 50*0b57cec5SDimitry Andric // Return true if MI imp-uses eflags. 51*0b57cec5SDimitry Andric bool impUsesFlags(MachineInstr *MI); 52*0b57cec5SDimitry Andric 53*0b57cec5SDimitry Andric // Return true if this is the opcode of a SetCC instruction with a register 54*0b57cec5SDimitry Andric // output. 55*0b57cec5SDimitry Andric bool isSetCCr(unsigned Opode); 56*0b57cec5SDimitry Andric 57*0b57cec5SDimitry Andric MachineRegisterInfo *MRI; 58*0b57cec5SDimitry Andric const X86InstrInfo *TII; 59*0b57cec5SDimitry Andric 60*0b57cec5SDimitry Andric enum { SearchBound = 16 }; 61*0b57cec5SDimitry Andric 62*0b57cec5SDimitry Andric static char ID; 63*0b57cec5SDimitry Andric }; 64*0b57cec5SDimitry Andric 65*0b57cec5SDimitry Andric char X86FixupSetCCPass::ID = 0; 66*0b57cec5SDimitry Andric } 67*0b57cec5SDimitry Andric 68*0b57cec5SDimitry Andric FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); } 69*0b57cec5SDimitry Andric 70*0b57cec5SDimitry Andric // We expect the instruction *immediately* before the setcc to imp-def 71*0b57cec5SDimitry Andric // EFLAGS (because of scheduling glue). To make this less brittle w.r.t 72*0b57cec5SDimitry Andric // scheduling, look backwards until we hit the beginning of the 73*0b57cec5SDimitry Andric // basic-block, or a small bound (to avoid quadratic behavior). 74*0b57cec5SDimitry Andric MachineInstr * 75*0b57cec5SDimitry Andric X86FixupSetCCPass::findFlagsImpDef(MachineBasicBlock *MBB, 76*0b57cec5SDimitry Andric MachineBasicBlock::reverse_iterator MI) { 77*0b57cec5SDimitry Andric // FIXME: Should this be instr_rend(), and MI be reverse_instr_iterator? 78*0b57cec5SDimitry Andric auto MBBStart = MBB->rend(); 79*0b57cec5SDimitry Andric for (int i = 0; (i < SearchBound) && (MI != MBBStart); ++i, ++MI) 80*0b57cec5SDimitry Andric for (auto &Op : MI->implicit_operands()) 81*0b57cec5SDimitry Andric if (Op.isReg() && (Op.getReg() == X86::EFLAGS) && Op.isDef()) 82*0b57cec5SDimitry Andric return &*MI; 83*0b57cec5SDimitry Andric 84*0b57cec5SDimitry Andric return nullptr; 85*0b57cec5SDimitry Andric } 86*0b57cec5SDimitry Andric 87*0b57cec5SDimitry Andric bool X86FixupSetCCPass::impUsesFlags(MachineInstr *MI) { 88*0b57cec5SDimitry Andric for (auto &Op : MI->implicit_operands()) 89*0b57cec5SDimitry Andric if (Op.isReg() && (Op.getReg() == X86::EFLAGS) && Op.isUse()) 90*0b57cec5SDimitry Andric return true; 91*0b57cec5SDimitry Andric 92*0b57cec5SDimitry Andric return false; 93*0b57cec5SDimitry Andric } 94*0b57cec5SDimitry Andric 95*0b57cec5SDimitry Andric bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) { 96*0b57cec5SDimitry Andric bool Changed = false; 97*0b57cec5SDimitry Andric MRI = &MF.getRegInfo(); 98*0b57cec5SDimitry Andric TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); 99*0b57cec5SDimitry Andric 100*0b57cec5SDimitry Andric SmallVector<MachineInstr*, 4> ToErase; 101*0b57cec5SDimitry Andric 102*0b57cec5SDimitry Andric for (auto &MBB : MF) { 103*0b57cec5SDimitry Andric for (auto &MI : MBB) { 104*0b57cec5SDimitry Andric // Find a setcc that is used by a zext. 105*0b57cec5SDimitry Andric // This doesn't have to be the only use, the transformation is safe 106*0b57cec5SDimitry Andric // regardless. 107*0b57cec5SDimitry Andric if (MI.getOpcode() != X86::SETCCr) 108*0b57cec5SDimitry Andric continue; 109*0b57cec5SDimitry Andric 110*0b57cec5SDimitry Andric MachineInstr *ZExt = nullptr; 111*0b57cec5SDimitry Andric for (auto &Use : MRI->use_instructions(MI.getOperand(0).getReg())) 112*0b57cec5SDimitry Andric if (Use.getOpcode() == X86::MOVZX32rr8) 113*0b57cec5SDimitry Andric ZExt = &Use; 114*0b57cec5SDimitry Andric 115*0b57cec5SDimitry Andric if (!ZExt) 116*0b57cec5SDimitry Andric continue; 117*0b57cec5SDimitry Andric 118*0b57cec5SDimitry Andric // Find the preceding instruction that imp-defs eflags. 119*0b57cec5SDimitry Andric MachineInstr *FlagsDefMI = findFlagsImpDef( 120*0b57cec5SDimitry Andric MI.getParent(), MachineBasicBlock::reverse_iterator(&MI)); 121*0b57cec5SDimitry Andric if (!FlagsDefMI) 122*0b57cec5SDimitry Andric continue; 123*0b57cec5SDimitry Andric 124*0b57cec5SDimitry Andric // We'd like to put something that clobbers eflags directly before 125*0b57cec5SDimitry Andric // FlagsDefMI. This can't hurt anything after FlagsDefMI, because 126*0b57cec5SDimitry Andric // it, itself, by definition, clobbers eflags. But it may happen that 127*0b57cec5SDimitry Andric // FlagsDefMI also *uses* eflags, in which case the transformation is 128*0b57cec5SDimitry Andric // invalid. 129*0b57cec5SDimitry Andric if (impUsesFlags(FlagsDefMI)) 130*0b57cec5SDimitry Andric continue; 131*0b57cec5SDimitry Andric 132*0b57cec5SDimitry Andric ++NumSubstZexts; 133*0b57cec5SDimitry Andric Changed = true; 134*0b57cec5SDimitry Andric 135*0b57cec5SDimitry Andric // On 32-bit, we need to be careful to force an ABCD register. 136*0b57cec5SDimitry Andric const TargetRegisterClass *RC = MF.getSubtarget<X86Subtarget>().is64Bit() 137*0b57cec5SDimitry Andric ? &X86::GR32RegClass 138*0b57cec5SDimitry Andric : &X86::GR32_ABCDRegClass; 139*0b57cec5SDimitry Andric unsigned ZeroReg = MRI->createVirtualRegister(RC); 140*0b57cec5SDimitry Andric unsigned InsertReg = MRI->createVirtualRegister(RC); 141*0b57cec5SDimitry Andric 142*0b57cec5SDimitry Andric // Initialize a register with 0. This must go before the eflags def 143*0b57cec5SDimitry Andric BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0), 144*0b57cec5SDimitry Andric ZeroReg); 145*0b57cec5SDimitry Andric 146*0b57cec5SDimitry Andric // X86 setcc only takes an output GR8, so fake a GR32 input by inserting 147*0b57cec5SDimitry Andric // the setcc result into the low byte of the zeroed register. 148*0b57cec5SDimitry Andric BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(), 149*0b57cec5SDimitry Andric TII->get(X86::INSERT_SUBREG), InsertReg) 150*0b57cec5SDimitry Andric .addReg(ZeroReg) 151*0b57cec5SDimitry Andric .addReg(MI.getOperand(0).getReg()) 152*0b57cec5SDimitry Andric .addImm(X86::sub_8bit); 153*0b57cec5SDimitry Andric MRI->replaceRegWith(ZExt->getOperand(0).getReg(), InsertReg); 154*0b57cec5SDimitry Andric ToErase.push_back(ZExt); 155*0b57cec5SDimitry Andric } 156*0b57cec5SDimitry Andric } 157*0b57cec5SDimitry Andric 158*0b57cec5SDimitry Andric for (auto &I : ToErase) 159*0b57cec5SDimitry Andric I->eraseFromParent(); 160*0b57cec5SDimitry Andric 161*0b57cec5SDimitry Andric return Changed; 162*0b57cec5SDimitry Andric } 163