1*0b57cec5SDimitry Andric //===- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter ------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // This file defines the pass which inserts x86 AVX vzeroupper instructions 10*0b57cec5SDimitry Andric // before calls to SSE encoded functions. This avoids transition latency 11*0b57cec5SDimitry Andric // penalty when transferring control between AVX encoded instructions and old 12*0b57cec5SDimitry Andric // SSE encoding mode. 13*0b57cec5SDimitry Andric // 14*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 15*0b57cec5SDimitry Andric 16*0b57cec5SDimitry Andric #include "X86.h" 17*0b57cec5SDimitry Andric #include "X86InstrInfo.h" 18*0b57cec5SDimitry Andric #include "X86Subtarget.h" 19*0b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 20*0b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 21*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 22*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 23*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 24*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 25*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 26*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 27*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 28*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 29*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 30*0b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h" 31*0b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h" 32*0b57cec5SDimitry Andric #include "llvm/IR/Function.h" 33*0b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 34*0b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 35*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 36*0b57cec5SDimitry Andric #include <cassert> 37*0b57cec5SDimitry Andric 38*0b57cec5SDimitry Andric using namespace llvm; 39*0b57cec5SDimitry Andric 40*0b57cec5SDimitry Andric #define DEBUG_TYPE "x86-vzeroupper" 41*0b57cec5SDimitry Andric 42*0b57cec5SDimitry Andric STATISTIC(NumVZU, "Number of vzeroupper instructions inserted"); 43*0b57cec5SDimitry Andric 44*0b57cec5SDimitry Andric namespace { 45*0b57cec5SDimitry Andric 46*0b57cec5SDimitry Andric class VZeroUpperInserter : public MachineFunctionPass { 47*0b57cec5SDimitry Andric public: 48*0b57cec5SDimitry Andric VZeroUpperInserter() : MachineFunctionPass(ID) {} 49*0b57cec5SDimitry Andric 50*0b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 51*0b57cec5SDimitry Andric 52*0b57cec5SDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 53*0b57cec5SDimitry Andric return MachineFunctionProperties().set( 54*0b57cec5SDimitry Andric MachineFunctionProperties::Property::NoVRegs); 55*0b57cec5SDimitry Andric } 56*0b57cec5SDimitry Andric 57*0b57cec5SDimitry Andric StringRef getPassName() const override { return "X86 vzeroupper inserter"; } 58*0b57cec5SDimitry Andric 59*0b57cec5SDimitry Andric private: 60*0b57cec5SDimitry Andric void processBasicBlock(MachineBasicBlock &MBB); 61*0b57cec5SDimitry Andric void insertVZeroUpper(MachineBasicBlock::iterator I, 62*0b57cec5SDimitry Andric MachineBasicBlock &MBB); 63*0b57cec5SDimitry Andric void addDirtySuccessor(MachineBasicBlock &MBB); 64*0b57cec5SDimitry Andric 65*0b57cec5SDimitry Andric using BlockExitState = enum { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY }; 66*0b57cec5SDimitry Andric 67*0b57cec5SDimitry Andric static const char* getBlockExitStateName(BlockExitState ST); 68*0b57cec5SDimitry Andric 69*0b57cec5SDimitry Andric // Core algorithm state: 70*0b57cec5SDimitry Andric // BlockState - Each block is either: 71*0b57cec5SDimitry Andric // - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor 72*0b57cec5SDimitry Andric // vzeroupper instructions in this block. 73*0b57cec5SDimitry Andric // - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this 74*0b57cec5SDimitry Andric // block that will ensure that YMM/ZMM is clean on exit. 75*0b57cec5SDimitry Andric // - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no 76*0b57cec5SDimitry Andric // subsequent vzeroupper in the block clears it. 77*0b57cec5SDimitry Andric // 78*0b57cec5SDimitry Andric // AddedToDirtySuccessors - This flag is raised when a block is added to the 79*0b57cec5SDimitry Andric // DirtySuccessors list to ensure that it's not 80*0b57cec5SDimitry Andric // added multiple times. 81*0b57cec5SDimitry Andric // 82*0b57cec5SDimitry Andric // FirstUnguardedCall - Records the location of the first unguarded call in 83*0b57cec5SDimitry Andric // each basic block that may need to be guarded by a 84*0b57cec5SDimitry Andric // vzeroupper. We won't know whether it actually needs 85*0b57cec5SDimitry Andric // to be guarded until we discover a predecessor that 86*0b57cec5SDimitry Andric // is DIRTY_OUT. 87*0b57cec5SDimitry Andric struct BlockState { 88*0b57cec5SDimitry Andric BlockExitState ExitState = PASS_THROUGH; 89*0b57cec5SDimitry Andric bool AddedToDirtySuccessors = false; 90*0b57cec5SDimitry Andric MachineBasicBlock::iterator FirstUnguardedCall; 91*0b57cec5SDimitry Andric 92*0b57cec5SDimitry Andric BlockState() = default; 93*0b57cec5SDimitry Andric }; 94*0b57cec5SDimitry Andric 95*0b57cec5SDimitry Andric using BlockStateMap = SmallVector<BlockState, 8>; 96*0b57cec5SDimitry Andric using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>; 97*0b57cec5SDimitry Andric 98*0b57cec5SDimitry Andric BlockStateMap BlockStates; 99*0b57cec5SDimitry Andric DirtySuccessorsWorkList DirtySuccessors; 100*0b57cec5SDimitry Andric bool EverMadeChange; 101*0b57cec5SDimitry Andric bool IsX86INTR; 102*0b57cec5SDimitry Andric const TargetInstrInfo *TII; 103*0b57cec5SDimitry Andric 104*0b57cec5SDimitry Andric static char ID; 105*0b57cec5SDimitry Andric }; 106*0b57cec5SDimitry Andric 107*0b57cec5SDimitry Andric } // end anonymous namespace 108*0b57cec5SDimitry Andric 109*0b57cec5SDimitry Andric char VZeroUpperInserter::ID = 0; 110*0b57cec5SDimitry Andric 111*0b57cec5SDimitry Andric FunctionPass *llvm::createX86IssueVZeroUpperPass() { 112*0b57cec5SDimitry Andric return new VZeroUpperInserter(); 113*0b57cec5SDimitry Andric } 114*0b57cec5SDimitry Andric 115*0b57cec5SDimitry Andric #ifndef NDEBUG 116*0b57cec5SDimitry Andric const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) { 117*0b57cec5SDimitry Andric switch (ST) { 118*0b57cec5SDimitry Andric case PASS_THROUGH: return "Pass-through"; 119*0b57cec5SDimitry Andric case EXITS_DIRTY: return "Exits-dirty"; 120*0b57cec5SDimitry Andric case EXITS_CLEAN: return "Exits-clean"; 121*0b57cec5SDimitry Andric } 122*0b57cec5SDimitry Andric llvm_unreachable("Invalid block exit state."); 123*0b57cec5SDimitry Andric } 124*0b57cec5SDimitry Andric #endif 125*0b57cec5SDimitry Andric 126*0b57cec5SDimitry Andric /// VZEROUPPER cleans state that is related to Y/ZMM0-15 only. 127*0b57cec5SDimitry Andric /// Thus, there is no need to check for Y/ZMM16 and above. 128*0b57cec5SDimitry Andric static bool isYmmOrZmmReg(unsigned Reg) { 129*0b57cec5SDimitry Andric return (Reg >= X86::YMM0 && Reg <= X86::YMM15) || 130*0b57cec5SDimitry Andric (Reg >= X86::ZMM0 && Reg <= X86::ZMM15); 131*0b57cec5SDimitry Andric } 132*0b57cec5SDimitry Andric 133*0b57cec5SDimitry Andric static bool checkFnHasLiveInYmmOrZmm(MachineRegisterInfo &MRI) { 134*0b57cec5SDimitry Andric for (std::pair<unsigned, unsigned> LI : MRI.liveins()) 135*0b57cec5SDimitry Andric if (isYmmOrZmmReg(LI.first)) 136*0b57cec5SDimitry Andric return true; 137*0b57cec5SDimitry Andric 138*0b57cec5SDimitry Andric return false; 139*0b57cec5SDimitry Andric } 140*0b57cec5SDimitry Andric 141*0b57cec5SDimitry Andric static bool clobbersAllYmmAndZmmRegs(const MachineOperand &MO) { 142*0b57cec5SDimitry Andric for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) { 143*0b57cec5SDimitry Andric if (!MO.clobbersPhysReg(reg)) 144*0b57cec5SDimitry Andric return false; 145*0b57cec5SDimitry Andric } 146*0b57cec5SDimitry Andric for (unsigned reg = X86::ZMM0; reg <= X86::ZMM15; ++reg) { 147*0b57cec5SDimitry Andric if (!MO.clobbersPhysReg(reg)) 148*0b57cec5SDimitry Andric return false; 149*0b57cec5SDimitry Andric } 150*0b57cec5SDimitry Andric return true; 151*0b57cec5SDimitry Andric } 152*0b57cec5SDimitry Andric 153*0b57cec5SDimitry Andric static bool hasYmmOrZmmReg(MachineInstr &MI) { 154*0b57cec5SDimitry Andric for (const MachineOperand &MO : MI.operands()) { 155*0b57cec5SDimitry Andric if (MI.isCall() && MO.isRegMask() && !clobbersAllYmmAndZmmRegs(MO)) 156*0b57cec5SDimitry Andric return true; 157*0b57cec5SDimitry Andric if (!MO.isReg()) 158*0b57cec5SDimitry Andric continue; 159*0b57cec5SDimitry Andric if (MO.isDebug()) 160*0b57cec5SDimitry Andric continue; 161*0b57cec5SDimitry Andric if (isYmmOrZmmReg(MO.getReg())) 162*0b57cec5SDimitry Andric return true; 163*0b57cec5SDimitry Andric } 164*0b57cec5SDimitry Andric return false; 165*0b57cec5SDimitry Andric } 166*0b57cec5SDimitry Andric 167*0b57cec5SDimitry Andric /// Check if given call instruction has a RegMask operand. 168*0b57cec5SDimitry Andric static bool callHasRegMask(MachineInstr &MI) { 169*0b57cec5SDimitry Andric assert(MI.isCall() && "Can only be called on call instructions."); 170*0b57cec5SDimitry Andric for (const MachineOperand &MO : MI.operands()) { 171*0b57cec5SDimitry Andric if (MO.isRegMask()) 172*0b57cec5SDimitry Andric return true; 173*0b57cec5SDimitry Andric } 174*0b57cec5SDimitry Andric return false; 175*0b57cec5SDimitry Andric } 176*0b57cec5SDimitry Andric 177*0b57cec5SDimitry Andric /// Insert a vzeroupper instruction before I. 178*0b57cec5SDimitry Andric void VZeroUpperInserter::insertVZeroUpper(MachineBasicBlock::iterator I, 179*0b57cec5SDimitry Andric MachineBasicBlock &MBB) { 180*0b57cec5SDimitry Andric DebugLoc dl = I->getDebugLoc(); 181*0b57cec5SDimitry Andric BuildMI(MBB, I, dl, TII->get(X86::VZEROUPPER)); 182*0b57cec5SDimitry Andric ++NumVZU; 183*0b57cec5SDimitry Andric EverMadeChange = true; 184*0b57cec5SDimitry Andric } 185*0b57cec5SDimitry Andric 186*0b57cec5SDimitry Andric /// Add MBB to the DirtySuccessors list if it hasn't already been added. 187*0b57cec5SDimitry Andric void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) { 188*0b57cec5SDimitry Andric if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) { 189*0b57cec5SDimitry Andric DirtySuccessors.push_back(&MBB); 190*0b57cec5SDimitry Andric BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true; 191*0b57cec5SDimitry Andric } 192*0b57cec5SDimitry Andric } 193*0b57cec5SDimitry Andric 194*0b57cec5SDimitry Andric /// Loop over all of the instructions in the basic block, inserting vzeroupper 195*0b57cec5SDimitry Andric /// instructions before function calls. 196*0b57cec5SDimitry Andric void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) { 197*0b57cec5SDimitry Andric // Start by assuming that the block is PASS_THROUGH which implies no unguarded 198*0b57cec5SDimitry Andric // calls. 199*0b57cec5SDimitry Andric BlockExitState CurState = PASS_THROUGH; 200*0b57cec5SDimitry Andric BlockStates[MBB.getNumber()].FirstUnguardedCall = MBB.end(); 201*0b57cec5SDimitry Andric 202*0b57cec5SDimitry Andric for (MachineInstr &MI : MBB) { 203*0b57cec5SDimitry Andric bool IsCall = MI.isCall(); 204*0b57cec5SDimitry Andric bool IsReturn = MI.isReturn(); 205*0b57cec5SDimitry Andric bool IsControlFlow = IsCall || IsReturn; 206*0b57cec5SDimitry Andric 207*0b57cec5SDimitry Andric // No need for vzeroupper before iret in interrupt handler function, 208*0b57cec5SDimitry Andric // epilogue will restore YMM/ZMM registers if needed. 209*0b57cec5SDimitry Andric if (IsX86INTR && IsReturn) 210*0b57cec5SDimitry Andric continue; 211*0b57cec5SDimitry Andric 212*0b57cec5SDimitry Andric // An existing VZERO* instruction resets the state. 213*0b57cec5SDimitry Andric if (MI.getOpcode() == X86::VZEROALL || MI.getOpcode() == X86::VZEROUPPER) { 214*0b57cec5SDimitry Andric CurState = EXITS_CLEAN; 215*0b57cec5SDimitry Andric continue; 216*0b57cec5SDimitry Andric } 217*0b57cec5SDimitry Andric 218*0b57cec5SDimitry Andric // Shortcut: don't need to check regular instructions in dirty state. 219*0b57cec5SDimitry Andric if (!IsControlFlow && CurState == EXITS_DIRTY) 220*0b57cec5SDimitry Andric continue; 221*0b57cec5SDimitry Andric 222*0b57cec5SDimitry Andric if (hasYmmOrZmmReg(MI)) { 223*0b57cec5SDimitry Andric // We found a ymm/zmm-using instruction; this could be an AVX/AVX512 224*0b57cec5SDimitry Andric // instruction, or it could be control flow. 225*0b57cec5SDimitry Andric CurState = EXITS_DIRTY; 226*0b57cec5SDimitry Andric continue; 227*0b57cec5SDimitry Andric } 228*0b57cec5SDimitry Andric 229*0b57cec5SDimitry Andric // Check for control-flow out of the current function (which might 230*0b57cec5SDimitry Andric // indirectly execute SSE instructions). 231*0b57cec5SDimitry Andric if (!IsControlFlow) 232*0b57cec5SDimitry Andric continue; 233*0b57cec5SDimitry Andric 234*0b57cec5SDimitry Andric // If the call has no RegMask, skip it as well. It usually happens on 235*0b57cec5SDimitry Andric // helper function calls (such as '_chkstk', '_ftol2') where standard 236*0b57cec5SDimitry Andric // calling convention is not used (RegMask is not used to mark register 237*0b57cec5SDimitry Andric // clobbered and register usage (def/implicit-def/use) is well-defined and 238*0b57cec5SDimitry Andric // explicitly specified. 239*0b57cec5SDimitry Andric if (IsCall && !callHasRegMask(MI)) 240*0b57cec5SDimitry Andric continue; 241*0b57cec5SDimitry Andric 242*0b57cec5SDimitry Andric // The VZEROUPPER instruction resets the upper 128 bits of YMM0-YMM15 243*0b57cec5SDimitry Andric // registers. In addition, the processor changes back to Clean state, after 244*0b57cec5SDimitry Andric // which execution of SSE instructions or AVX instructions has no transition 245*0b57cec5SDimitry Andric // penalty. Add the VZEROUPPER instruction before any function call/return 246*0b57cec5SDimitry Andric // that might execute SSE code. 247*0b57cec5SDimitry Andric // FIXME: In some cases, we may want to move the VZEROUPPER into a 248*0b57cec5SDimitry Andric // predecessor block. 249*0b57cec5SDimitry Andric if (CurState == EXITS_DIRTY) { 250*0b57cec5SDimitry Andric // After the inserted VZEROUPPER the state becomes clean again, but 251*0b57cec5SDimitry Andric // other YMM/ZMM may appear before other subsequent calls or even before 252*0b57cec5SDimitry Andric // the end of the BB. 253*0b57cec5SDimitry Andric insertVZeroUpper(MI, MBB); 254*0b57cec5SDimitry Andric CurState = EXITS_CLEAN; 255*0b57cec5SDimitry Andric } else if (CurState == PASS_THROUGH) { 256*0b57cec5SDimitry Andric // If this block is currently in pass-through state and we encounter a 257*0b57cec5SDimitry Andric // call then whether we need a vzeroupper or not depends on whether this 258*0b57cec5SDimitry Andric // block has successors that exit dirty. Record the location of the call, 259*0b57cec5SDimitry Andric // and set the state to EXITS_CLEAN, but do not insert the vzeroupper yet. 260*0b57cec5SDimitry Andric // It will be inserted later if necessary. 261*0b57cec5SDimitry Andric BlockStates[MBB.getNumber()].FirstUnguardedCall = MI; 262*0b57cec5SDimitry Andric CurState = EXITS_CLEAN; 263*0b57cec5SDimitry Andric } 264*0b57cec5SDimitry Andric } 265*0b57cec5SDimitry Andric 266*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "MBB #" << MBB.getNumber() << " exit state: " 267*0b57cec5SDimitry Andric << getBlockExitStateName(CurState) << '\n'); 268*0b57cec5SDimitry Andric 269*0b57cec5SDimitry Andric if (CurState == EXITS_DIRTY) 270*0b57cec5SDimitry Andric for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), 271*0b57cec5SDimitry Andric SE = MBB.succ_end(); 272*0b57cec5SDimitry Andric SI != SE; ++SI) 273*0b57cec5SDimitry Andric addDirtySuccessor(**SI); 274*0b57cec5SDimitry Andric 275*0b57cec5SDimitry Andric BlockStates[MBB.getNumber()].ExitState = CurState; 276*0b57cec5SDimitry Andric } 277*0b57cec5SDimitry Andric 278*0b57cec5SDimitry Andric /// Loop over all of the basic blocks, inserting vzeroupper instructions before 279*0b57cec5SDimitry Andric /// function calls. 280*0b57cec5SDimitry Andric bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { 281*0b57cec5SDimitry Andric const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 282*0b57cec5SDimitry Andric if (!ST.hasAVX() || ST.hasFastPartialYMMorZMMWrite()) 283*0b57cec5SDimitry Andric return false; 284*0b57cec5SDimitry Andric TII = ST.getInstrInfo(); 285*0b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 286*0b57cec5SDimitry Andric EverMadeChange = false; 287*0b57cec5SDimitry Andric IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR; 288*0b57cec5SDimitry Andric 289*0b57cec5SDimitry Andric bool FnHasLiveInYmmOrZmm = checkFnHasLiveInYmmOrZmm(MRI); 290*0b57cec5SDimitry Andric 291*0b57cec5SDimitry Andric // Fast check: if the function doesn't use any ymm/zmm registers, we don't 292*0b57cec5SDimitry Andric // need to insert any VZEROUPPER instructions. This is constant-time, so it 293*0b57cec5SDimitry Andric // is cheap in the common case of no ymm/zmm use. 294*0b57cec5SDimitry Andric bool YmmOrZmmUsed = FnHasLiveInYmmOrZmm; 295*0b57cec5SDimitry Andric const TargetRegisterClass *RCs[2] = {&X86::VR256RegClass, &X86::VR512RegClass}; 296*0b57cec5SDimitry Andric for (auto *RC : RCs) { 297*0b57cec5SDimitry Andric if (!YmmOrZmmUsed) { 298*0b57cec5SDimitry Andric for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e; 299*0b57cec5SDimitry Andric i++) { 300*0b57cec5SDimitry Andric if (!MRI.reg_nodbg_empty(*i)) { 301*0b57cec5SDimitry Andric YmmOrZmmUsed = true; 302*0b57cec5SDimitry Andric break; 303*0b57cec5SDimitry Andric } 304*0b57cec5SDimitry Andric } 305*0b57cec5SDimitry Andric } 306*0b57cec5SDimitry Andric } 307*0b57cec5SDimitry Andric if (!YmmOrZmmUsed) { 308*0b57cec5SDimitry Andric return false; 309*0b57cec5SDimitry Andric } 310*0b57cec5SDimitry Andric 311*0b57cec5SDimitry Andric assert(BlockStates.empty() && DirtySuccessors.empty() && 312*0b57cec5SDimitry Andric "X86VZeroUpper state should be clear"); 313*0b57cec5SDimitry Andric BlockStates.resize(MF.getNumBlockIDs()); 314*0b57cec5SDimitry Andric 315*0b57cec5SDimitry Andric // Process all blocks. This will compute block exit states, record the first 316*0b57cec5SDimitry Andric // unguarded call in each block, and add successors of dirty blocks to the 317*0b57cec5SDimitry Andric // DirtySuccessors list. 318*0b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) 319*0b57cec5SDimitry Andric processBasicBlock(MBB); 320*0b57cec5SDimitry Andric 321*0b57cec5SDimitry Andric // If any YMM/ZMM regs are live-in to this function, add the entry block to 322*0b57cec5SDimitry Andric // the DirtySuccessors list 323*0b57cec5SDimitry Andric if (FnHasLiveInYmmOrZmm) 324*0b57cec5SDimitry Andric addDirtySuccessor(MF.front()); 325*0b57cec5SDimitry Andric 326*0b57cec5SDimitry Andric // Re-visit all blocks that are successors of EXITS_DIRTY blocks. Add 327*0b57cec5SDimitry Andric // vzeroupper instructions to unguarded calls, and propagate EXITS_DIRTY 328*0b57cec5SDimitry Andric // through PASS_THROUGH blocks. 329*0b57cec5SDimitry Andric while (!DirtySuccessors.empty()) { 330*0b57cec5SDimitry Andric MachineBasicBlock &MBB = *DirtySuccessors.back(); 331*0b57cec5SDimitry Andric DirtySuccessors.pop_back(); 332*0b57cec5SDimitry Andric BlockState &BBState = BlockStates[MBB.getNumber()]; 333*0b57cec5SDimitry Andric 334*0b57cec5SDimitry Andric // MBB is a successor of a dirty block, so its first call needs to be 335*0b57cec5SDimitry Andric // guarded. 336*0b57cec5SDimitry Andric if (BBState.FirstUnguardedCall != MBB.end()) 337*0b57cec5SDimitry Andric insertVZeroUpper(BBState.FirstUnguardedCall, MBB); 338*0b57cec5SDimitry Andric 339*0b57cec5SDimitry Andric // If this successor was a pass-through block, then it is now dirty. Its 340*0b57cec5SDimitry Andric // successors need to be added to the worklist (if they haven't been 341*0b57cec5SDimitry Andric // already). 342*0b57cec5SDimitry Andric if (BBState.ExitState == PASS_THROUGH) { 343*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "MBB #" << MBB.getNumber() 344*0b57cec5SDimitry Andric << " was Pass-through, is now Dirty-out.\n"); 345*0b57cec5SDimitry Andric for (MachineBasicBlock *Succ : MBB.successors()) 346*0b57cec5SDimitry Andric addDirtySuccessor(*Succ); 347*0b57cec5SDimitry Andric } 348*0b57cec5SDimitry Andric } 349*0b57cec5SDimitry Andric 350*0b57cec5SDimitry Andric BlockStates.clear(); 351*0b57cec5SDimitry Andric return EverMadeChange; 352*0b57cec5SDimitry Andric } 353