xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86VZeroUpper.cpp (revision 8bcb0991864975618c09697b1aca10683346d9f0)
10b57cec5SDimitry Andric //===- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter ------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file defines the pass which inserts x86 AVX vzeroupper instructions
100b57cec5SDimitry Andric // before calls to SSE encoded functions. This avoids transition latency
110b57cec5SDimitry Andric // penalty when transferring control between AVX encoded instructions and old
120b57cec5SDimitry Andric // SSE encoding mode.
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "X86.h"
170b57cec5SDimitry Andric #include "X86InstrInfo.h"
180b57cec5SDimitry Andric #include "X86Subtarget.h"
190b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
200b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
210b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
220b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
230b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
240b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
260b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
290b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
300b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h"
310b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h"
320b57cec5SDimitry Andric #include "llvm/IR/Function.h"
330b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
340b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
350b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
360b57cec5SDimitry Andric #include <cassert>
370b57cec5SDimitry Andric 
380b57cec5SDimitry Andric using namespace llvm;
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric #define DEBUG_TYPE "x86-vzeroupper"
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric namespace {
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric   class VZeroUpperInserter : public MachineFunctionPass {
470b57cec5SDimitry Andric   public:
480b57cec5SDimitry Andric     VZeroUpperInserter() : MachineFunctionPass(ID) {}
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric     bool runOnMachineFunction(MachineFunction &MF) override;
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric     MachineFunctionProperties getRequiredProperties() const override {
530b57cec5SDimitry Andric       return MachineFunctionProperties().set(
540b57cec5SDimitry Andric           MachineFunctionProperties::Property::NoVRegs);
550b57cec5SDimitry Andric     }
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric     StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric   private:
600b57cec5SDimitry Andric     void processBasicBlock(MachineBasicBlock &MBB);
610b57cec5SDimitry Andric     void insertVZeroUpper(MachineBasicBlock::iterator I,
620b57cec5SDimitry Andric                           MachineBasicBlock &MBB);
630b57cec5SDimitry Andric     void addDirtySuccessor(MachineBasicBlock &MBB);
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric     using BlockExitState = enum { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
660b57cec5SDimitry Andric 
670b57cec5SDimitry Andric     static const char* getBlockExitStateName(BlockExitState ST);
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric     // Core algorithm state:
700b57cec5SDimitry Andric     // BlockState - Each block is either:
710b57cec5SDimitry Andric     //   - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor
720b57cec5SDimitry Andric     //                   vzeroupper instructions in this block.
730b57cec5SDimitry Andric     //   - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
740b57cec5SDimitry Andric     //                  block that will ensure that YMM/ZMM is clean on exit.
750b57cec5SDimitry Andric     //   - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no
760b57cec5SDimitry Andric     //                  subsequent vzeroupper in the block clears it.
770b57cec5SDimitry Andric     //
780b57cec5SDimitry Andric     // AddedToDirtySuccessors - This flag is raised when a block is added to the
790b57cec5SDimitry Andric     //                          DirtySuccessors list to ensure that it's not
800b57cec5SDimitry Andric     //                          added multiple times.
810b57cec5SDimitry Andric     //
820b57cec5SDimitry Andric     // FirstUnguardedCall - Records the location of the first unguarded call in
830b57cec5SDimitry Andric     //                      each basic block that may need to be guarded by a
840b57cec5SDimitry Andric     //                      vzeroupper. We won't know whether it actually needs
850b57cec5SDimitry Andric     //                      to be guarded until we discover a predecessor that
860b57cec5SDimitry Andric     //                      is DIRTY_OUT.
870b57cec5SDimitry Andric     struct BlockState {
880b57cec5SDimitry Andric       BlockExitState ExitState = PASS_THROUGH;
890b57cec5SDimitry Andric       bool AddedToDirtySuccessors = false;
900b57cec5SDimitry Andric       MachineBasicBlock::iterator FirstUnguardedCall;
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric       BlockState() = default;
930b57cec5SDimitry Andric     };
940b57cec5SDimitry Andric 
950b57cec5SDimitry Andric     using BlockStateMap = SmallVector<BlockState, 8>;
960b57cec5SDimitry Andric     using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
970b57cec5SDimitry Andric 
980b57cec5SDimitry Andric     BlockStateMap BlockStates;
990b57cec5SDimitry Andric     DirtySuccessorsWorkList DirtySuccessors;
1000b57cec5SDimitry Andric     bool EverMadeChange;
1010b57cec5SDimitry Andric     bool IsX86INTR;
1020b57cec5SDimitry Andric     const TargetInstrInfo *TII;
1030b57cec5SDimitry Andric 
1040b57cec5SDimitry Andric     static char ID;
1050b57cec5SDimitry Andric   };
1060b57cec5SDimitry Andric 
1070b57cec5SDimitry Andric } // end anonymous namespace
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric char VZeroUpperInserter::ID = 0;
1100b57cec5SDimitry Andric 
1110b57cec5SDimitry Andric FunctionPass *llvm::createX86IssueVZeroUpperPass() {
1120b57cec5SDimitry Andric   return new VZeroUpperInserter();
1130b57cec5SDimitry Andric }
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric #ifndef NDEBUG
1160b57cec5SDimitry Andric const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) {
1170b57cec5SDimitry Andric   switch (ST) {
1180b57cec5SDimitry Andric     case PASS_THROUGH: return "Pass-through";
1190b57cec5SDimitry Andric     case EXITS_DIRTY: return "Exits-dirty";
1200b57cec5SDimitry Andric     case EXITS_CLEAN: return "Exits-clean";
1210b57cec5SDimitry Andric   }
1220b57cec5SDimitry Andric   llvm_unreachable("Invalid block exit state.");
1230b57cec5SDimitry Andric }
1240b57cec5SDimitry Andric #endif
1250b57cec5SDimitry Andric 
1260b57cec5SDimitry Andric /// VZEROUPPER cleans state that is related to Y/ZMM0-15 only.
1270b57cec5SDimitry Andric /// Thus, there is no need to check for Y/ZMM16 and above.
1280b57cec5SDimitry Andric static bool isYmmOrZmmReg(unsigned Reg) {
1290b57cec5SDimitry Andric   return (Reg >= X86::YMM0 && Reg <= X86::YMM15) ||
1300b57cec5SDimitry Andric          (Reg >= X86::ZMM0 && Reg <= X86::ZMM15);
1310b57cec5SDimitry Andric }
1320b57cec5SDimitry Andric 
1330b57cec5SDimitry Andric static bool checkFnHasLiveInYmmOrZmm(MachineRegisterInfo &MRI) {
1340b57cec5SDimitry Andric   for (std::pair<unsigned, unsigned> LI : MRI.liveins())
1350b57cec5SDimitry Andric     if (isYmmOrZmmReg(LI.first))
1360b57cec5SDimitry Andric       return true;
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric   return false;
1390b57cec5SDimitry Andric }
1400b57cec5SDimitry Andric 
1410b57cec5SDimitry Andric static bool clobbersAllYmmAndZmmRegs(const MachineOperand &MO) {
1420b57cec5SDimitry Andric   for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
1430b57cec5SDimitry Andric     if (!MO.clobbersPhysReg(reg))
1440b57cec5SDimitry Andric       return false;
1450b57cec5SDimitry Andric   }
1460b57cec5SDimitry Andric   for (unsigned reg = X86::ZMM0; reg <= X86::ZMM15; ++reg) {
1470b57cec5SDimitry Andric     if (!MO.clobbersPhysReg(reg))
1480b57cec5SDimitry Andric       return false;
1490b57cec5SDimitry Andric   }
1500b57cec5SDimitry Andric   return true;
1510b57cec5SDimitry Andric }
1520b57cec5SDimitry Andric 
1530b57cec5SDimitry Andric static bool hasYmmOrZmmReg(MachineInstr &MI) {
1540b57cec5SDimitry Andric   for (const MachineOperand &MO : MI.operands()) {
1550b57cec5SDimitry Andric     if (MI.isCall() && MO.isRegMask() && !clobbersAllYmmAndZmmRegs(MO))
1560b57cec5SDimitry Andric       return true;
1570b57cec5SDimitry Andric     if (!MO.isReg())
1580b57cec5SDimitry Andric       continue;
1590b57cec5SDimitry Andric     if (MO.isDebug())
1600b57cec5SDimitry Andric       continue;
1610b57cec5SDimitry Andric     if (isYmmOrZmmReg(MO.getReg()))
1620b57cec5SDimitry Andric       return true;
1630b57cec5SDimitry Andric   }
1640b57cec5SDimitry Andric   return false;
1650b57cec5SDimitry Andric }
1660b57cec5SDimitry Andric 
1670b57cec5SDimitry Andric /// Check if given call instruction has a RegMask operand.
1680b57cec5SDimitry Andric static bool callHasRegMask(MachineInstr &MI) {
1690b57cec5SDimitry Andric   assert(MI.isCall() && "Can only be called on call instructions.");
1700b57cec5SDimitry Andric   for (const MachineOperand &MO : MI.operands()) {
1710b57cec5SDimitry Andric     if (MO.isRegMask())
1720b57cec5SDimitry Andric       return true;
1730b57cec5SDimitry Andric   }
1740b57cec5SDimitry Andric   return false;
1750b57cec5SDimitry Andric }
1760b57cec5SDimitry Andric 
1770b57cec5SDimitry Andric /// Insert a vzeroupper instruction before I.
1780b57cec5SDimitry Andric void VZeroUpperInserter::insertVZeroUpper(MachineBasicBlock::iterator I,
1790b57cec5SDimitry Andric                                           MachineBasicBlock &MBB) {
1800b57cec5SDimitry Andric   DebugLoc dl = I->getDebugLoc();
1810b57cec5SDimitry Andric   BuildMI(MBB, I, dl, TII->get(X86::VZEROUPPER));
1820b57cec5SDimitry Andric   ++NumVZU;
1830b57cec5SDimitry Andric   EverMadeChange = true;
1840b57cec5SDimitry Andric }
1850b57cec5SDimitry Andric 
1860b57cec5SDimitry Andric /// Add MBB to the DirtySuccessors list if it hasn't already been added.
1870b57cec5SDimitry Andric void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
1880b57cec5SDimitry Andric   if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {
1890b57cec5SDimitry Andric     DirtySuccessors.push_back(&MBB);
1900b57cec5SDimitry Andric     BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true;
1910b57cec5SDimitry Andric   }
1920b57cec5SDimitry Andric }
1930b57cec5SDimitry Andric 
1940b57cec5SDimitry Andric /// Loop over all of the instructions in the basic block, inserting vzeroupper
1950b57cec5SDimitry Andric /// instructions before function calls.
1960b57cec5SDimitry Andric void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
1970b57cec5SDimitry Andric   // Start by assuming that the block is PASS_THROUGH which implies no unguarded
1980b57cec5SDimitry Andric   // calls.
1990b57cec5SDimitry Andric   BlockExitState CurState = PASS_THROUGH;
2000b57cec5SDimitry Andric   BlockStates[MBB.getNumber()].FirstUnguardedCall = MBB.end();
2010b57cec5SDimitry Andric 
2020b57cec5SDimitry Andric   for (MachineInstr &MI : MBB) {
2030b57cec5SDimitry Andric     bool IsCall = MI.isCall();
2040b57cec5SDimitry Andric     bool IsReturn = MI.isReturn();
2050b57cec5SDimitry Andric     bool IsControlFlow = IsCall || IsReturn;
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric     // No need for vzeroupper before iret in interrupt handler function,
2080b57cec5SDimitry Andric     // epilogue will restore YMM/ZMM registers if needed.
2090b57cec5SDimitry Andric     if (IsX86INTR && IsReturn)
2100b57cec5SDimitry Andric       continue;
2110b57cec5SDimitry Andric 
2120b57cec5SDimitry Andric     // An existing VZERO* instruction resets the state.
2130b57cec5SDimitry Andric     if (MI.getOpcode() == X86::VZEROALL || MI.getOpcode() == X86::VZEROUPPER) {
2140b57cec5SDimitry Andric       CurState = EXITS_CLEAN;
2150b57cec5SDimitry Andric       continue;
2160b57cec5SDimitry Andric     }
2170b57cec5SDimitry Andric 
2180b57cec5SDimitry Andric     // Shortcut: don't need to check regular instructions in dirty state.
2190b57cec5SDimitry Andric     if (!IsControlFlow && CurState == EXITS_DIRTY)
2200b57cec5SDimitry Andric       continue;
2210b57cec5SDimitry Andric 
2220b57cec5SDimitry Andric     if (hasYmmOrZmmReg(MI)) {
2230b57cec5SDimitry Andric       // We found a ymm/zmm-using instruction; this could be an AVX/AVX512
2240b57cec5SDimitry Andric       // instruction, or it could be control flow.
2250b57cec5SDimitry Andric       CurState = EXITS_DIRTY;
2260b57cec5SDimitry Andric       continue;
2270b57cec5SDimitry Andric     }
2280b57cec5SDimitry Andric 
2290b57cec5SDimitry Andric     // Check for control-flow out of the current function (which might
2300b57cec5SDimitry Andric     // indirectly execute SSE instructions).
2310b57cec5SDimitry Andric     if (!IsControlFlow)
2320b57cec5SDimitry Andric       continue;
2330b57cec5SDimitry Andric 
2340b57cec5SDimitry Andric     // If the call has no RegMask, skip it as well. It usually happens on
2350b57cec5SDimitry Andric     // helper function calls (such as '_chkstk', '_ftol2') where standard
2360b57cec5SDimitry Andric     // calling convention is not used (RegMask is not used to mark register
2370b57cec5SDimitry Andric     // clobbered and register usage (def/implicit-def/use) is well-defined and
2380b57cec5SDimitry Andric     // explicitly specified.
2390b57cec5SDimitry Andric     if (IsCall && !callHasRegMask(MI))
2400b57cec5SDimitry Andric       continue;
2410b57cec5SDimitry Andric 
2420b57cec5SDimitry Andric     // The VZEROUPPER instruction resets the upper 128 bits of YMM0-YMM15
2430b57cec5SDimitry Andric     // registers. In addition, the processor changes back to Clean state, after
2440b57cec5SDimitry Andric     // which execution of SSE instructions or AVX instructions has no transition
2450b57cec5SDimitry Andric     // penalty. Add the VZEROUPPER instruction before any function call/return
2460b57cec5SDimitry Andric     // that might execute SSE code.
2470b57cec5SDimitry Andric     // FIXME: In some cases, we may want to move the VZEROUPPER into a
2480b57cec5SDimitry Andric     // predecessor block.
2490b57cec5SDimitry Andric     if (CurState == EXITS_DIRTY) {
2500b57cec5SDimitry Andric       // After the inserted VZEROUPPER the state becomes clean again, but
2510b57cec5SDimitry Andric       // other YMM/ZMM may appear before other subsequent calls or even before
2520b57cec5SDimitry Andric       // the end of the BB.
2530b57cec5SDimitry Andric       insertVZeroUpper(MI, MBB);
2540b57cec5SDimitry Andric       CurState = EXITS_CLEAN;
2550b57cec5SDimitry Andric     } else if (CurState == PASS_THROUGH) {
2560b57cec5SDimitry Andric       // If this block is currently in pass-through state and we encounter a
2570b57cec5SDimitry Andric       // call then whether we need a vzeroupper or not depends on whether this
2580b57cec5SDimitry Andric       // block has successors that exit dirty. Record the location of the call,
2590b57cec5SDimitry Andric       // and set the state to EXITS_CLEAN, but do not insert the vzeroupper yet.
2600b57cec5SDimitry Andric       // It will be inserted later if necessary.
2610b57cec5SDimitry Andric       BlockStates[MBB.getNumber()].FirstUnguardedCall = MI;
2620b57cec5SDimitry Andric       CurState = EXITS_CLEAN;
2630b57cec5SDimitry Andric     }
2640b57cec5SDimitry Andric   }
2650b57cec5SDimitry Andric 
2660b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "MBB #" << MBB.getNumber() << " exit state: "
2670b57cec5SDimitry Andric                     << getBlockExitStateName(CurState) << '\n');
2680b57cec5SDimitry Andric 
2690b57cec5SDimitry Andric   if (CurState == EXITS_DIRTY)
2700b57cec5SDimitry Andric     for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
2710b57cec5SDimitry Andric                                           SE = MBB.succ_end();
2720b57cec5SDimitry Andric          SI != SE; ++SI)
2730b57cec5SDimitry Andric       addDirtySuccessor(**SI);
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric   BlockStates[MBB.getNumber()].ExitState = CurState;
2760b57cec5SDimitry Andric }
2770b57cec5SDimitry Andric 
2780b57cec5SDimitry Andric /// Loop over all of the basic blocks, inserting vzeroupper instructions before
2790b57cec5SDimitry Andric /// function calls.
2800b57cec5SDimitry Andric bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
2810b57cec5SDimitry Andric   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
2820b57cec5SDimitry Andric   if (!ST.hasAVX() || ST.hasFastPartialYMMorZMMWrite())
2830b57cec5SDimitry Andric     return false;
2840b57cec5SDimitry Andric   TII = ST.getInstrInfo();
2850b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
2860b57cec5SDimitry Andric   EverMadeChange = false;
2870b57cec5SDimitry Andric   IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
2880b57cec5SDimitry Andric 
2890b57cec5SDimitry Andric   bool FnHasLiveInYmmOrZmm = checkFnHasLiveInYmmOrZmm(MRI);
2900b57cec5SDimitry Andric 
2910b57cec5SDimitry Andric   // Fast check: if the function doesn't use any ymm/zmm registers, we don't
2920b57cec5SDimitry Andric   // need to insert any VZEROUPPER instructions.  This is constant-time, so it
2930b57cec5SDimitry Andric   // is cheap in the common case of no ymm/zmm use.
2940b57cec5SDimitry Andric   bool YmmOrZmmUsed = FnHasLiveInYmmOrZmm;
295*8bcb0991SDimitry Andric   for (auto *RC : {&X86::VR256RegClass, &X86::VR512_0_15RegClass}) {
2960b57cec5SDimitry Andric     if (!YmmOrZmmUsed) {
2970b57cec5SDimitry Andric       for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e;
2980b57cec5SDimitry Andric            i++) {
2990b57cec5SDimitry Andric         if (!MRI.reg_nodbg_empty(*i)) {
3000b57cec5SDimitry Andric           YmmOrZmmUsed = true;
3010b57cec5SDimitry Andric           break;
3020b57cec5SDimitry Andric         }
3030b57cec5SDimitry Andric       }
3040b57cec5SDimitry Andric     }
3050b57cec5SDimitry Andric   }
306*8bcb0991SDimitry Andric   if (!YmmOrZmmUsed)
3070b57cec5SDimitry Andric     return false;
3080b57cec5SDimitry Andric 
3090b57cec5SDimitry Andric   assert(BlockStates.empty() && DirtySuccessors.empty() &&
3100b57cec5SDimitry Andric          "X86VZeroUpper state should be clear");
3110b57cec5SDimitry Andric   BlockStates.resize(MF.getNumBlockIDs());
3120b57cec5SDimitry Andric 
3130b57cec5SDimitry Andric   // Process all blocks. This will compute block exit states, record the first
3140b57cec5SDimitry Andric   // unguarded call in each block, and add successors of dirty blocks to the
3150b57cec5SDimitry Andric   // DirtySuccessors list.
3160b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF)
3170b57cec5SDimitry Andric     processBasicBlock(MBB);
3180b57cec5SDimitry Andric 
3190b57cec5SDimitry Andric   // If any YMM/ZMM regs are live-in to this function, add the entry block to
3200b57cec5SDimitry Andric   // the DirtySuccessors list
3210b57cec5SDimitry Andric   if (FnHasLiveInYmmOrZmm)
3220b57cec5SDimitry Andric     addDirtySuccessor(MF.front());
3230b57cec5SDimitry Andric 
3240b57cec5SDimitry Andric   // Re-visit all blocks that are successors of EXITS_DIRTY blocks. Add
3250b57cec5SDimitry Andric   // vzeroupper instructions to unguarded calls, and propagate EXITS_DIRTY
3260b57cec5SDimitry Andric   // through PASS_THROUGH blocks.
3270b57cec5SDimitry Andric   while (!DirtySuccessors.empty()) {
3280b57cec5SDimitry Andric     MachineBasicBlock &MBB = *DirtySuccessors.back();
3290b57cec5SDimitry Andric     DirtySuccessors.pop_back();
3300b57cec5SDimitry Andric     BlockState &BBState = BlockStates[MBB.getNumber()];
3310b57cec5SDimitry Andric 
3320b57cec5SDimitry Andric     // MBB is a successor of a dirty block, so its first call needs to be
3330b57cec5SDimitry Andric     // guarded.
3340b57cec5SDimitry Andric     if (BBState.FirstUnguardedCall != MBB.end())
3350b57cec5SDimitry Andric       insertVZeroUpper(BBState.FirstUnguardedCall, MBB);
3360b57cec5SDimitry Andric 
3370b57cec5SDimitry Andric     // If this successor was a pass-through block, then it is now dirty. Its
3380b57cec5SDimitry Andric     // successors need to be added to the worklist (if they haven't been
3390b57cec5SDimitry Andric     // already).
3400b57cec5SDimitry Andric     if (BBState.ExitState == PASS_THROUGH) {
3410b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "MBB #" << MBB.getNumber()
3420b57cec5SDimitry Andric                         << " was Pass-through, is now Dirty-out.\n");
3430b57cec5SDimitry Andric       for (MachineBasicBlock *Succ : MBB.successors())
3440b57cec5SDimitry Andric         addDirtySuccessor(*Succ);
3450b57cec5SDimitry Andric     }
3460b57cec5SDimitry Andric   }
3470b57cec5SDimitry Andric 
3480b57cec5SDimitry Andric   BlockStates.clear();
3490b57cec5SDimitry Andric   return EverMadeChange;
3500b57cec5SDimitry Andric }
351