10b57cec5SDimitry Andric //===- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter ------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file defines the pass which inserts x86 AVX vzeroupper instructions
100b57cec5SDimitry Andric // before calls to SSE encoded functions. This avoids transition latency
110b57cec5SDimitry Andric // penalty when transferring control between AVX encoded instructions and old
120b57cec5SDimitry Andric // SSE encoding mode.
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
150b57cec5SDimitry Andric
160b57cec5SDimitry Andric #include "X86.h"
170b57cec5SDimitry Andric #include "X86InstrInfo.h"
180b57cec5SDimitry Andric #include "X86Subtarget.h"
190b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
200b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
210b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
220b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
230b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
240b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
260b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
290b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
300b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h"
310b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h"
320b57cec5SDimitry Andric #include "llvm/IR/Function.h"
330b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
340b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
350b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
360b57cec5SDimitry Andric #include <cassert>
370b57cec5SDimitry Andric
380b57cec5SDimitry Andric using namespace llvm;
390b57cec5SDimitry Andric
400b57cec5SDimitry Andric #define DEBUG_TYPE "x86-vzeroupper"
410b57cec5SDimitry Andric
425ffd83dbSDimitry Andric static cl::opt<bool>
435ffd83dbSDimitry Andric UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
445ffd83dbSDimitry Andric cl::desc("Minimize AVX to SSE transition penalty"),
455ffd83dbSDimitry Andric cl::init(true));
465ffd83dbSDimitry Andric
470b57cec5SDimitry Andric STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
480b57cec5SDimitry Andric
490b57cec5SDimitry Andric namespace {
500b57cec5SDimitry Andric
510b57cec5SDimitry Andric class VZeroUpperInserter : public MachineFunctionPass {
520b57cec5SDimitry Andric public:
VZeroUpperInserter()530b57cec5SDimitry Andric VZeroUpperInserter() : MachineFunctionPass(ID) {}
540b57cec5SDimitry Andric
550b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
560b57cec5SDimitry Andric
getRequiredProperties() const570b57cec5SDimitry Andric MachineFunctionProperties getRequiredProperties() const override {
580b57cec5SDimitry Andric return MachineFunctionProperties().set(
590b57cec5SDimitry Andric MachineFunctionProperties::Property::NoVRegs);
600b57cec5SDimitry Andric }
610b57cec5SDimitry Andric
getPassName() const620b57cec5SDimitry Andric StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
630b57cec5SDimitry Andric
640b57cec5SDimitry Andric private:
650b57cec5SDimitry Andric void processBasicBlock(MachineBasicBlock &MBB);
660b57cec5SDimitry Andric void insertVZeroUpper(MachineBasicBlock::iterator I,
670b57cec5SDimitry Andric MachineBasicBlock &MBB);
680b57cec5SDimitry Andric void addDirtySuccessor(MachineBasicBlock &MBB);
690b57cec5SDimitry Andric
700b57cec5SDimitry Andric using BlockExitState = enum { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
710b57cec5SDimitry Andric
720b57cec5SDimitry Andric static const char* getBlockExitStateName(BlockExitState ST);
730b57cec5SDimitry Andric
740b57cec5SDimitry Andric // Core algorithm state:
750b57cec5SDimitry Andric // BlockState - Each block is either:
760b57cec5SDimitry Andric // - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor
770b57cec5SDimitry Andric // vzeroupper instructions in this block.
780b57cec5SDimitry Andric // - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
790b57cec5SDimitry Andric // block that will ensure that YMM/ZMM is clean on exit.
800b57cec5SDimitry Andric // - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no
810b57cec5SDimitry Andric // subsequent vzeroupper in the block clears it.
820b57cec5SDimitry Andric //
830b57cec5SDimitry Andric // AddedToDirtySuccessors - This flag is raised when a block is added to the
840b57cec5SDimitry Andric // DirtySuccessors list to ensure that it's not
850b57cec5SDimitry Andric // added multiple times.
860b57cec5SDimitry Andric //
870b57cec5SDimitry Andric // FirstUnguardedCall - Records the location of the first unguarded call in
880b57cec5SDimitry Andric // each basic block that may need to be guarded by a
890b57cec5SDimitry Andric // vzeroupper. We won't know whether it actually needs
900b57cec5SDimitry Andric // to be guarded until we discover a predecessor that
910b57cec5SDimitry Andric // is DIRTY_OUT.
920b57cec5SDimitry Andric struct BlockState {
930b57cec5SDimitry Andric BlockExitState ExitState = PASS_THROUGH;
940b57cec5SDimitry Andric bool AddedToDirtySuccessors = false;
950b57cec5SDimitry Andric MachineBasicBlock::iterator FirstUnguardedCall;
960b57cec5SDimitry Andric
970b57cec5SDimitry Andric BlockState() = default;
980b57cec5SDimitry Andric };
990b57cec5SDimitry Andric
1000b57cec5SDimitry Andric using BlockStateMap = SmallVector<BlockState, 8>;
1010b57cec5SDimitry Andric using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
1020b57cec5SDimitry Andric
1030b57cec5SDimitry Andric BlockStateMap BlockStates;
1040b57cec5SDimitry Andric DirtySuccessorsWorkList DirtySuccessors;
1050b57cec5SDimitry Andric bool EverMadeChange;
1060b57cec5SDimitry Andric bool IsX86INTR;
1070b57cec5SDimitry Andric const TargetInstrInfo *TII;
1080b57cec5SDimitry Andric
1090b57cec5SDimitry Andric static char ID;
1100b57cec5SDimitry Andric };
1110b57cec5SDimitry Andric
1120b57cec5SDimitry Andric } // end anonymous namespace
1130b57cec5SDimitry Andric
1140b57cec5SDimitry Andric char VZeroUpperInserter::ID = 0;
1150b57cec5SDimitry Andric
createX86IssueVZeroUpperPass()1160b57cec5SDimitry Andric FunctionPass *llvm::createX86IssueVZeroUpperPass() {
1170b57cec5SDimitry Andric return new VZeroUpperInserter();
1180b57cec5SDimitry Andric }
1190b57cec5SDimitry Andric
1200b57cec5SDimitry Andric #ifndef NDEBUG
getBlockExitStateName(BlockExitState ST)1210b57cec5SDimitry Andric const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) {
1220b57cec5SDimitry Andric switch (ST) {
1230b57cec5SDimitry Andric case PASS_THROUGH: return "Pass-through";
1240b57cec5SDimitry Andric case EXITS_DIRTY: return "Exits-dirty";
1250b57cec5SDimitry Andric case EXITS_CLEAN: return "Exits-clean";
1260b57cec5SDimitry Andric }
1270b57cec5SDimitry Andric llvm_unreachable("Invalid block exit state.");
1280b57cec5SDimitry Andric }
1290b57cec5SDimitry Andric #endif
1300b57cec5SDimitry Andric
1310b57cec5SDimitry Andric /// VZEROUPPER cleans state that is related to Y/ZMM0-15 only.
1320b57cec5SDimitry Andric /// Thus, there is no need to check for Y/ZMM16 and above.
isYmmOrZmmReg(unsigned Reg)1330b57cec5SDimitry Andric static bool isYmmOrZmmReg(unsigned Reg) {
1340b57cec5SDimitry Andric return (Reg >= X86::YMM0 && Reg <= X86::YMM15) ||
1350b57cec5SDimitry Andric (Reg >= X86::ZMM0 && Reg <= X86::ZMM15);
1360b57cec5SDimitry Andric }
1370b57cec5SDimitry Andric
checkFnHasLiveInYmmOrZmm(MachineRegisterInfo & MRI)1380b57cec5SDimitry Andric static bool checkFnHasLiveInYmmOrZmm(MachineRegisterInfo &MRI) {
1390b57cec5SDimitry Andric for (std::pair<unsigned, unsigned> LI : MRI.liveins())
1400b57cec5SDimitry Andric if (isYmmOrZmmReg(LI.first))
1410b57cec5SDimitry Andric return true;
1420b57cec5SDimitry Andric
1430b57cec5SDimitry Andric return false;
1440b57cec5SDimitry Andric }
1450b57cec5SDimitry Andric
clobbersAllYmmAndZmmRegs(const MachineOperand & MO)1460b57cec5SDimitry Andric static bool clobbersAllYmmAndZmmRegs(const MachineOperand &MO) {
1470b57cec5SDimitry Andric for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
1480b57cec5SDimitry Andric if (!MO.clobbersPhysReg(reg))
1490b57cec5SDimitry Andric return false;
1500b57cec5SDimitry Andric }
1510b57cec5SDimitry Andric for (unsigned reg = X86::ZMM0; reg <= X86::ZMM15; ++reg) {
1520b57cec5SDimitry Andric if (!MO.clobbersPhysReg(reg))
1530b57cec5SDimitry Andric return false;
1540b57cec5SDimitry Andric }
1550b57cec5SDimitry Andric return true;
1560b57cec5SDimitry Andric }
1570b57cec5SDimitry Andric
hasYmmOrZmmReg(MachineInstr & MI)1580b57cec5SDimitry Andric static bool hasYmmOrZmmReg(MachineInstr &MI) {
1590b57cec5SDimitry Andric for (const MachineOperand &MO : MI.operands()) {
1600b57cec5SDimitry Andric if (MI.isCall() && MO.isRegMask() && !clobbersAllYmmAndZmmRegs(MO))
1610b57cec5SDimitry Andric return true;
1620b57cec5SDimitry Andric if (!MO.isReg())
1630b57cec5SDimitry Andric continue;
1640b57cec5SDimitry Andric if (MO.isDebug())
1650b57cec5SDimitry Andric continue;
1660b57cec5SDimitry Andric if (isYmmOrZmmReg(MO.getReg()))
1670b57cec5SDimitry Andric return true;
1680b57cec5SDimitry Andric }
1690b57cec5SDimitry Andric return false;
1700b57cec5SDimitry Andric }
1710b57cec5SDimitry Andric
1720b57cec5SDimitry Andric /// Check if given call instruction has a RegMask operand.
callHasRegMask(MachineInstr & MI)1730b57cec5SDimitry Andric static bool callHasRegMask(MachineInstr &MI) {
1740b57cec5SDimitry Andric assert(MI.isCall() && "Can only be called on call instructions.");
1750b57cec5SDimitry Andric for (const MachineOperand &MO : MI.operands()) {
1760b57cec5SDimitry Andric if (MO.isRegMask())
1770b57cec5SDimitry Andric return true;
1780b57cec5SDimitry Andric }
1790b57cec5SDimitry Andric return false;
1800b57cec5SDimitry Andric }
1810b57cec5SDimitry Andric
1820b57cec5SDimitry Andric /// Insert a vzeroupper instruction before I.
insertVZeroUpper(MachineBasicBlock::iterator I,MachineBasicBlock & MBB)1830b57cec5SDimitry Andric void VZeroUpperInserter::insertVZeroUpper(MachineBasicBlock::iterator I,
1840b57cec5SDimitry Andric MachineBasicBlock &MBB) {
185fe6060f1SDimitry Andric BuildMI(MBB, I, I->getDebugLoc(), TII->get(X86::VZEROUPPER));
1860b57cec5SDimitry Andric ++NumVZU;
1870b57cec5SDimitry Andric EverMadeChange = true;
1880b57cec5SDimitry Andric }
1890b57cec5SDimitry Andric
1900b57cec5SDimitry Andric /// Add MBB to the DirtySuccessors list if it hasn't already been added.
addDirtySuccessor(MachineBasicBlock & MBB)1910b57cec5SDimitry Andric void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
1920b57cec5SDimitry Andric if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {
1930b57cec5SDimitry Andric DirtySuccessors.push_back(&MBB);
1940b57cec5SDimitry Andric BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true;
1950b57cec5SDimitry Andric }
1960b57cec5SDimitry Andric }
1970b57cec5SDimitry Andric
1980b57cec5SDimitry Andric /// Loop over all of the instructions in the basic block, inserting vzeroupper
1990b57cec5SDimitry Andric /// instructions before function calls.
processBasicBlock(MachineBasicBlock & MBB)2000b57cec5SDimitry Andric void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
2010b57cec5SDimitry Andric // Start by assuming that the block is PASS_THROUGH which implies no unguarded
2020b57cec5SDimitry Andric // calls.
2030b57cec5SDimitry Andric BlockExitState CurState = PASS_THROUGH;
2040b57cec5SDimitry Andric BlockStates[MBB.getNumber()].FirstUnguardedCall = MBB.end();
2050b57cec5SDimitry Andric
2060b57cec5SDimitry Andric for (MachineInstr &MI : MBB) {
2070b57cec5SDimitry Andric bool IsCall = MI.isCall();
2080b57cec5SDimitry Andric bool IsReturn = MI.isReturn();
2090b57cec5SDimitry Andric bool IsControlFlow = IsCall || IsReturn;
2100b57cec5SDimitry Andric
2110b57cec5SDimitry Andric // No need for vzeroupper before iret in interrupt handler function,
2120b57cec5SDimitry Andric // epilogue will restore YMM/ZMM registers if needed.
2130b57cec5SDimitry Andric if (IsX86INTR && IsReturn)
2140b57cec5SDimitry Andric continue;
2150b57cec5SDimitry Andric
2160b57cec5SDimitry Andric // An existing VZERO* instruction resets the state.
2170b57cec5SDimitry Andric if (MI.getOpcode() == X86::VZEROALL || MI.getOpcode() == X86::VZEROUPPER) {
2180b57cec5SDimitry Andric CurState = EXITS_CLEAN;
2190b57cec5SDimitry Andric continue;
2200b57cec5SDimitry Andric }
2210b57cec5SDimitry Andric
2220b57cec5SDimitry Andric // Shortcut: don't need to check regular instructions in dirty state.
2230b57cec5SDimitry Andric if (!IsControlFlow && CurState == EXITS_DIRTY)
2240b57cec5SDimitry Andric continue;
2250b57cec5SDimitry Andric
2260b57cec5SDimitry Andric if (hasYmmOrZmmReg(MI)) {
2270b57cec5SDimitry Andric // We found a ymm/zmm-using instruction; this could be an AVX/AVX512
2280b57cec5SDimitry Andric // instruction, or it could be control flow.
2290b57cec5SDimitry Andric CurState = EXITS_DIRTY;
2300b57cec5SDimitry Andric continue;
2310b57cec5SDimitry Andric }
2320b57cec5SDimitry Andric
2330b57cec5SDimitry Andric // Check for control-flow out of the current function (which might
2340b57cec5SDimitry Andric // indirectly execute SSE instructions).
2350b57cec5SDimitry Andric if (!IsControlFlow)
2360b57cec5SDimitry Andric continue;
2370b57cec5SDimitry Andric
2380b57cec5SDimitry Andric // If the call has no RegMask, skip it as well. It usually happens on
2390b57cec5SDimitry Andric // helper function calls (such as '_chkstk', '_ftol2') where standard
2400b57cec5SDimitry Andric // calling convention is not used (RegMask is not used to mark register
2410b57cec5SDimitry Andric // clobbered and register usage (def/implicit-def/use) is well-defined and
2420b57cec5SDimitry Andric // explicitly specified.
2430b57cec5SDimitry Andric if (IsCall && !callHasRegMask(MI))
2440b57cec5SDimitry Andric continue;
2450b57cec5SDimitry Andric
2460b57cec5SDimitry Andric // The VZEROUPPER instruction resets the upper 128 bits of YMM0-YMM15
2470b57cec5SDimitry Andric // registers. In addition, the processor changes back to Clean state, after
2480b57cec5SDimitry Andric // which execution of SSE instructions or AVX instructions has no transition
2490b57cec5SDimitry Andric // penalty. Add the VZEROUPPER instruction before any function call/return
2500b57cec5SDimitry Andric // that might execute SSE code.
2510b57cec5SDimitry Andric // FIXME: In some cases, we may want to move the VZEROUPPER into a
2520b57cec5SDimitry Andric // predecessor block.
2530b57cec5SDimitry Andric if (CurState == EXITS_DIRTY) {
2540b57cec5SDimitry Andric // After the inserted VZEROUPPER the state becomes clean again, but
2550b57cec5SDimitry Andric // other YMM/ZMM may appear before other subsequent calls or even before
2560b57cec5SDimitry Andric // the end of the BB.
2570b57cec5SDimitry Andric insertVZeroUpper(MI, MBB);
2580b57cec5SDimitry Andric CurState = EXITS_CLEAN;
2590b57cec5SDimitry Andric } else if (CurState == PASS_THROUGH) {
2600b57cec5SDimitry Andric // If this block is currently in pass-through state and we encounter a
2610b57cec5SDimitry Andric // call then whether we need a vzeroupper or not depends on whether this
2620b57cec5SDimitry Andric // block has successors that exit dirty. Record the location of the call,
2630b57cec5SDimitry Andric // and set the state to EXITS_CLEAN, but do not insert the vzeroupper yet.
2640b57cec5SDimitry Andric // It will be inserted later if necessary.
2650b57cec5SDimitry Andric BlockStates[MBB.getNumber()].FirstUnguardedCall = MI;
2660b57cec5SDimitry Andric CurState = EXITS_CLEAN;
2670b57cec5SDimitry Andric }
2680b57cec5SDimitry Andric }
2690b57cec5SDimitry Andric
2700b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "MBB #" << MBB.getNumber() << " exit state: "
2710b57cec5SDimitry Andric << getBlockExitStateName(CurState) << '\n');
2720b57cec5SDimitry Andric
2730b57cec5SDimitry Andric if (CurState == EXITS_DIRTY)
274349cc55cSDimitry Andric for (MachineBasicBlock *Succ : MBB.successors())
275349cc55cSDimitry Andric addDirtySuccessor(*Succ);
2760b57cec5SDimitry Andric
2770b57cec5SDimitry Andric BlockStates[MBB.getNumber()].ExitState = CurState;
2780b57cec5SDimitry Andric }
2790b57cec5SDimitry Andric
2800b57cec5SDimitry Andric /// Loop over all of the basic blocks, inserting vzeroupper instructions before
2810b57cec5SDimitry Andric /// function calls.
runOnMachineFunction(MachineFunction & MF)2820b57cec5SDimitry Andric bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
2835ffd83dbSDimitry Andric if (!UseVZeroUpper)
2845ffd83dbSDimitry Andric return false;
2855ffd83dbSDimitry Andric
2860b57cec5SDimitry Andric const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
287480093f4SDimitry Andric if (!ST.hasAVX() || !ST.insertVZEROUPPER())
2880b57cec5SDimitry Andric return false;
2890b57cec5SDimitry Andric TII = ST.getInstrInfo();
2900b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
2910b57cec5SDimitry Andric EverMadeChange = false;
2920b57cec5SDimitry Andric IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
2930b57cec5SDimitry Andric
2940b57cec5SDimitry Andric bool FnHasLiveInYmmOrZmm = checkFnHasLiveInYmmOrZmm(MRI);
2950b57cec5SDimitry Andric
2960b57cec5SDimitry Andric // Fast check: if the function doesn't use any ymm/zmm registers, we don't
2970b57cec5SDimitry Andric // need to insert any VZEROUPPER instructions. This is constant-time, so it
2980b57cec5SDimitry Andric // is cheap in the common case of no ymm/zmm use.
2990b57cec5SDimitry Andric bool YmmOrZmmUsed = FnHasLiveInYmmOrZmm;
300*bdd1243dSDimitry Andric for (const auto *RC : {&X86::VR256RegClass, &X86::VR512_0_15RegClass}) {
3010b57cec5SDimitry Andric if (!YmmOrZmmUsed) {
302*bdd1243dSDimitry Andric for (MCPhysReg R : *RC) {
303*bdd1243dSDimitry Andric if (!MRI.reg_nodbg_empty(R)) {
3040b57cec5SDimitry Andric YmmOrZmmUsed = true;
3050b57cec5SDimitry Andric break;
3060b57cec5SDimitry Andric }
3070b57cec5SDimitry Andric }
3080b57cec5SDimitry Andric }
3090b57cec5SDimitry Andric }
3108bcb0991SDimitry Andric if (!YmmOrZmmUsed)
3110b57cec5SDimitry Andric return false;
3120b57cec5SDimitry Andric
3130b57cec5SDimitry Andric assert(BlockStates.empty() && DirtySuccessors.empty() &&
3140b57cec5SDimitry Andric "X86VZeroUpper state should be clear");
3150b57cec5SDimitry Andric BlockStates.resize(MF.getNumBlockIDs());
3160b57cec5SDimitry Andric
3170b57cec5SDimitry Andric // Process all blocks. This will compute block exit states, record the first
3180b57cec5SDimitry Andric // unguarded call in each block, and add successors of dirty blocks to the
3190b57cec5SDimitry Andric // DirtySuccessors list.
3200b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF)
3210b57cec5SDimitry Andric processBasicBlock(MBB);
3220b57cec5SDimitry Andric
3230b57cec5SDimitry Andric // If any YMM/ZMM regs are live-in to this function, add the entry block to
3240b57cec5SDimitry Andric // the DirtySuccessors list
3250b57cec5SDimitry Andric if (FnHasLiveInYmmOrZmm)
3260b57cec5SDimitry Andric addDirtySuccessor(MF.front());
3270b57cec5SDimitry Andric
3280b57cec5SDimitry Andric // Re-visit all blocks that are successors of EXITS_DIRTY blocks. Add
3290b57cec5SDimitry Andric // vzeroupper instructions to unguarded calls, and propagate EXITS_DIRTY
3300b57cec5SDimitry Andric // through PASS_THROUGH blocks.
3310b57cec5SDimitry Andric while (!DirtySuccessors.empty()) {
3320b57cec5SDimitry Andric MachineBasicBlock &MBB = *DirtySuccessors.back();
3330b57cec5SDimitry Andric DirtySuccessors.pop_back();
3340b57cec5SDimitry Andric BlockState &BBState = BlockStates[MBB.getNumber()];
3350b57cec5SDimitry Andric
3360b57cec5SDimitry Andric // MBB is a successor of a dirty block, so its first call needs to be
3370b57cec5SDimitry Andric // guarded.
3380b57cec5SDimitry Andric if (BBState.FirstUnguardedCall != MBB.end())
3390b57cec5SDimitry Andric insertVZeroUpper(BBState.FirstUnguardedCall, MBB);
3400b57cec5SDimitry Andric
3410b57cec5SDimitry Andric // If this successor was a pass-through block, then it is now dirty. Its
3420b57cec5SDimitry Andric // successors need to be added to the worklist (if they haven't been
3430b57cec5SDimitry Andric // already).
3440b57cec5SDimitry Andric if (BBState.ExitState == PASS_THROUGH) {
3450b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "MBB #" << MBB.getNumber()
3460b57cec5SDimitry Andric << " was Pass-through, is now Dirty-out.\n");
3470b57cec5SDimitry Andric for (MachineBasicBlock *Succ : MBB.successors())
3480b57cec5SDimitry Andric addDirtySuccessor(*Succ);
3490b57cec5SDimitry Andric }
3500b57cec5SDimitry Andric }
3510b57cec5SDimitry Andric
3520b57cec5SDimitry Andric BlockStates.clear();
3530b57cec5SDimitry Andric return EverMadeChange;
3540b57cec5SDimitry Andric }
355