xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86VZeroUpper.cpp (revision 349cc55c9796c4596a5b9904cd3281af295f878f)
10b57cec5SDimitry Andric //===- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter ------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file defines the pass which inserts x86 AVX vzeroupper instructions
100b57cec5SDimitry Andric // before calls to SSE encoded functions. This avoids transition latency
110b57cec5SDimitry Andric // penalty when transferring control between AVX encoded instructions and old
120b57cec5SDimitry Andric // SSE encoding mode.
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "X86.h"
170b57cec5SDimitry Andric #include "X86InstrInfo.h"
180b57cec5SDimitry Andric #include "X86Subtarget.h"
190b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
200b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
210b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
220b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
230b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
240b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
260b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
290b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
300b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h"
310b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h"
320b57cec5SDimitry Andric #include "llvm/IR/Function.h"
330b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
340b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
350b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
360b57cec5SDimitry Andric #include <cassert>
370b57cec5SDimitry Andric 
380b57cec5SDimitry Andric using namespace llvm;
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric #define DEBUG_TYPE "x86-vzeroupper"
410b57cec5SDimitry Andric 
425ffd83dbSDimitry Andric static cl::opt<bool>
435ffd83dbSDimitry Andric UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
445ffd83dbSDimitry Andric   cl::desc("Minimize AVX to SSE transition penalty"),
455ffd83dbSDimitry Andric   cl::init(true));
465ffd83dbSDimitry Andric 
470b57cec5SDimitry Andric STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
480b57cec5SDimitry Andric 
490b57cec5SDimitry Andric namespace {
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric   class VZeroUpperInserter : public MachineFunctionPass {
520b57cec5SDimitry Andric   public:
530b57cec5SDimitry Andric     VZeroUpperInserter() : MachineFunctionPass(ID) {}
540b57cec5SDimitry Andric 
550b57cec5SDimitry Andric     bool runOnMachineFunction(MachineFunction &MF) override;
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric     MachineFunctionProperties getRequiredProperties() const override {
580b57cec5SDimitry Andric       return MachineFunctionProperties().set(
590b57cec5SDimitry Andric           MachineFunctionProperties::Property::NoVRegs);
600b57cec5SDimitry Andric     }
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric     StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric   private:
650b57cec5SDimitry Andric     void processBasicBlock(MachineBasicBlock &MBB);
660b57cec5SDimitry Andric     void insertVZeroUpper(MachineBasicBlock::iterator I,
670b57cec5SDimitry Andric                           MachineBasicBlock &MBB);
680b57cec5SDimitry Andric     void addDirtySuccessor(MachineBasicBlock &MBB);
690b57cec5SDimitry Andric 
700b57cec5SDimitry Andric     using BlockExitState = enum { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric     static const char* getBlockExitStateName(BlockExitState ST);
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric     // Core algorithm state:
750b57cec5SDimitry Andric     // BlockState - Each block is either:
760b57cec5SDimitry Andric     //   - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor
770b57cec5SDimitry Andric     //                   vzeroupper instructions in this block.
780b57cec5SDimitry Andric     //   - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
790b57cec5SDimitry Andric     //                  block that will ensure that YMM/ZMM is clean on exit.
800b57cec5SDimitry Andric     //   - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no
810b57cec5SDimitry Andric     //                  subsequent vzeroupper in the block clears it.
820b57cec5SDimitry Andric     //
830b57cec5SDimitry Andric     // AddedToDirtySuccessors - This flag is raised when a block is added to the
840b57cec5SDimitry Andric     //                          DirtySuccessors list to ensure that it's not
850b57cec5SDimitry Andric     //                          added multiple times.
860b57cec5SDimitry Andric     //
870b57cec5SDimitry Andric     // FirstUnguardedCall - Records the location of the first unguarded call in
880b57cec5SDimitry Andric     //                      each basic block that may need to be guarded by a
890b57cec5SDimitry Andric     //                      vzeroupper. We won't know whether it actually needs
900b57cec5SDimitry Andric     //                      to be guarded until we discover a predecessor that
910b57cec5SDimitry Andric     //                      is DIRTY_OUT.
920b57cec5SDimitry Andric     struct BlockState {
930b57cec5SDimitry Andric       BlockExitState ExitState = PASS_THROUGH;
940b57cec5SDimitry Andric       bool AddedToDirtySuccessors = false;
950b57cec5SDimitry Andric       MachineBasicBlock::iterator FirstUnguardedCall;
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric       BlockState() = default;
980b57cec5SDimitry Andric     };
990b57cec5SDimitry Andric 
1000b57cec5SDimitry Andric     using BlockStateMap = SmallVector<BlockState, 8>;
1010b57cec5SDimitry Andric     using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
1020b57cec5SDimitry Andric 
1030b57cec5SDimitry Andric     BlockStateMap BlockStates;
1040b57cec5SDimitry Andric     DirtySuccessorsWorkList DirtySuccessors;
1050b57cec5SDimitry Andric     bool EverMadeChange;
1060b57cec5SDimitry Andric     bool IsX86INTR;
1070b57cec5SDimitry Andric     const TargetInstrInfo *TII;
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric     static char ID;
1100b57cec5SDimitry Andric   };
1110b57cec5SDimitry Andric 
1120b57cec5SDimitry Andric } // end anonymous namespace
1130b57cec5SDimitry Andric 
1140b57cec5SDimitry Andric char VZeroUpperInserter::ID = 0;
1150b57cec5SDimitry Andric 
1160b57cec5SDimitry Andric FunctionPass *llvm::createX86IssueVZeroUpperPass() {
1170b57cec5SDimitry Andric   return new VZeroUpperInserter();
1180b57cec5SDimitry Andric }
1190b57cec5SDimitry Andric 
1200b57cec5SDimitry Andric #ifndef NDEBUG
1210b57cec5SDimitry Andric const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) {
1220b57cec5SDimitry Andric   switch (ST) {
1230b57cec5SDimitry Andric     case PASS_THROUGH: return "Pass-through";
1240b57cec5SDimitry Andric     case EXITS_DIRTY: return "Exits-dirty";
1250b57cec5SDimitry Andric     case EXITS_CLEAN: return "Exits-clean";
1260b57cec5SDimitry Andric   }
1270b57cec5SDimitry Andric   llvm_unreachable("Invalid block exit state.");
1280b57cec5SDimitry Andric }
1290b57cec5SDimitry Andric #endif
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric /// VZEROUPPER cleans state that is related to Y/ZMM0-15 only.
1320b57cec5SDimitry Andric /// Thus, there is no need to check for Y/ZMM16 and above.
1330b57cec5SDimitry Andric static bool isYmmOrZmmReg(unsigned Reg) {
1340b57cec5SDimitry Andric   return (Reg >= X86::YMM0 && Reg <= X86::YMM15) ||
1350b57cec5SDimitry Andric          (Reg >= X86::ZMM0 && Reg <= X86::ZMM15);
1360b57cec5SDimitry Andric }
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric static bool checkFnHasLiveInYmmOrZmm(MachineRegisterInfo &MRI) {
1390b57cec5SDimitry Andric   for (std::pair<unsigned, unsigned> LI : MRI.liveins())
1400b57cec5SDimitry Andric     if (isYmmOrZmmReg(LI.first))
1410b57cec5SDimitry Andric       return true;
1420b57cec5SDimitry Andric 
1430b57cec5SDimitry Andric   return false;
1440b57cec5SDimitry Andric }
1450b57cec5SDimitry Andric 
1460b57cec5SDimitry Andric static bool clobbersAllYmmAndZmmRegs(const MachineOperand &MO) {
1470b57cec5SDimitry Andric   for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
1480b57cec5SDimitry Andric     if (!MO.clobbersPhysReg(reg))
1490b57cec5SDimitry Andric       return false;
1500b57cec5SDimitry Andric   }
1510b57cec5SDimitry Andric   for (unsigned reg = X86::ZMM0; reg <= X86::ZMM15; ++reg) {
1520b57cec5SDimitry Andric     if (!MO.clobbersPhysReg(reg))
1530b57cec5SDimitry Andric       return false;
1540b57cec5SDimitry Andric   }
1550b57cec5SDimitry Andric   return true;
1560b57cec5SDimitry Andric }
1570b57cec5SDimitry Andric 
1580b57cec5SDimitry Andric static bool hasYmmOrZmmReg(MachineInstr &MI) {
1590b57cec5SDimitry Andric   for (const MachineOperand &MO : MI.operands()) {
1600b57cec5SDimitry Andric     if (MI.isCall() && MO.isRegMask() && !clobbersAllYmmAndZmmRegs(MO))
1610b57cec5SDimitry Andric       return true;
1620b57cec5SDimitry Andric     if (!MO.isReg())
1630b57cec5SDimitry Andric       continue;
1640b57cec5SDimitry Andric     if (MO.isDebug())
1650b57cec5SDimitry Andric       continue;
1660b57cec5SDimitry Andric     if (isYmmOrZmmReg(MO.getReg()))
1670b57cec5SDimitry Andric       return true;
1680b57cec5SDimitry Andric   }
1690b57cec5SDimitry Andric   return false;
1700b57cec5SDimitry Andric }
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric /// Check if given call instruction has a RegMask operand.
1730b57cec5SDimitry Andric static bool callHasRegMask(MachineInstr &MI) {
1740b57cec5SDimitry Andric   assert(MI.isCall() && "Can only be called on call instructions.");
1750b57cec5SDimitry Andric   for (const MachineOperand &MO : MI.operands()) {
1760b57cec5SDimitry Andric     if (MO.isRegMask())
1770b57cec5SDimitry Andric       return true;
1780b57cec5SDimitry Andric   }
1790b57cec5SDimitry Andric   return false;
1800b57cec5SDimitry Andric }
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric /// Insert a vzeroupper instruction before I.
1830b57cec5SDimitry Andric void VZeroUpperInserter::insertVZeroUpper(MachineBasicBlock::iterator I,
1840b57cec5SDimitry Andric                                           MachineBasicBlock &MBB) {
185fe6060f1SDimitry Andric   BuildMI(MBB, I, I->getDebugLoc(), TII->get(X86::VZEROUPPER));
1860b57cec5SDimitry Andric   ++NumVZU;
1870b57cec5SDimitry Andric   EverMadeChange = true;
1880b57cec5SDimitry Andric }
1890b57cec5SDimitry Andric 
1900b57cec5SDimitry Andric /// Add MBB to the DirtySuccessors list if it hasn't already been added.
1910b57cec5SDimitry Andric void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
1920b57cec5SDimitry Andric   if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {
1930b57cec5SDimitry Andric     DirtySuccessors.push_back(&MBB);
1940b57cec5SDimitry Andric     BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true;
1950b57cec5SDimitry Andric   }
1960b57cec5SDimitry Andric }
1970b57cec5SDimitry Andric 
1980b57cec5SDimitry Andric /// Loop over all of the instructions in the basic block, inserting vzeroupper
1990b57cec5SDimitry Andric /// instructions before function calls.
2000b57cec5SDimitry Andric void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
2010b57cec5SDimitry Andric   // Start by assuming that the block is PASS_THROUGH which implies no unguarded
2020b57cec5SDimitry Andric   // calls.
2030b57cec5SDimitry Andric   BlockExitState CurState = PASS_THROUGH;
2040b57cec5SDimitry Andric   BlockStates[MBB.getNumber()].FirstUnguardedCall = MBB.end();
2050b57cec5SDimitry Andric 
2060b57cec5SDimitry Andric   for (MachineInstr &MI : MBB) {
2070b57cec5SDimitry Andric     bool IsCall = MI.isCall();
2080b57cec5SDimitry Andric     bool IsReturn = MI.isReturn();
2090b57cec5SDimitry Andric     bool IsControlFlow = IsCall || IsReturn;
2100b57cec5SDimitry Andric 
2110b57cec5SDimitry Andric     // No need for vzeroupper before iret in interrupt handler function,
2120b57cec5SDimitry Andric     // epilogue will restore YMM/ZMM registers if needed.
2130b57cec5SDimitry Andric     if (IsX86INTR && IsReturn)
2140b57cec5SDimitry Andric       continue;
2150b57cec5SDimitry Andric 
2160b57cec5SDimitry Andric     // An existing VZERO* instruction resets the state.
2170b57cec5SDimitry Andric     if (MI.getOpcode() == X86::VZEROALL || MI.getOpcode() == X86::VZEROUPPER) {
2180b57cec5SDimitry Andric       CurState = EXITS_CLEAN;
2190b57cec5SDimitry Andric       continue;
2200b57cec5SDimitry Andric     }
2210b57cec5SDimitry Andric 
2220b57cec5SDimitry Andric     // Shortcut: don't need to check regular instructions in dirty state.
2230b57cec5SDimitry Andric     if (!IsControlFlow && CurState == EXITS_DIRTY)
2240b57cec5SDimitry Andric       continue;
2250b57cec5SDimitry Andric 
2260b57cec5SDimitry Andric     if (hasYmmOrZmmReg(MI)) {
2270b57cec5SDimitry Andric       // We found a ymm/zmm-using instruction; this could be an AVX/AVX512
2280b57cec5SDimitry Andric       // instruction, or it could be control flow.
2290b57cec5SDimitry Andric       CurState = EXITS_DIRTY;
2300b57cec5SDimitry Andric       continue;
2310b57cec5SDimitry Andric     }
2320b57cec5SDimitry Andric 
2330b57cec5SDimitry Andric     // Check for control-flow out of the current function (which might
2340b57cec5SDimitry Andric     // indirectly execute SSE instructions).
2350b57cec5SDimitry Andric     if (!IsControlFlow)
2360b57cec5SDimitry Andric       continue;
2370b57cec5SDimitry Andric 
2380b57cec5SDimitry Andric     // If the call has no RegMask, skip it as well. It usually happens on
2390b57cec5SDimitry Andric     // helper function calls (such as '_chkstk', '_ftol2') where standard
2400b57cec5SDimitry Andric     // calling convention is not used (RegMask is not used to mark register
2410b57cec5SDimitry Andric     // clobbered and register usage (def/implicit-def/use) is well-defined and
2420b57cec5SDimitry Andric     // explicitly specified.
2430b57cec5SDimitry Andric     if (IsCall && !callHasRegMask(MI))
2440b57cec5SDimitry Andric       continue;
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric     // The VZEROUPPER instruction resets the upper 128 bits of YMM0-YMM15
2470b57cec5SDimitry Andric     // registers. In addition, the processor changes back to Clean state, after
2480b57cec5SDimitry Andric     // which execution of SSE instructions or AVX instructions has no transition
2490b57cec5SDimitry Andric     // penalty. Add the VZEROUPPER instruction before any function call/return
2500b57cec5SDimitry Andric     // that might execute SSE code.
2510b57cec5SDimitry Andric     // FIXME: In some cases, we may want to move the VZEROUPPER into a
2520b57cec5SDimitry Andric     // predecessor block.
2530b57cec5SDimitry Andric     if (CurState == EXITS_DIRTY) {
2540b57cec5SDimitry Andric       // After the inserted VZEROUPPER the state becomes clean again, but
2550b57cec5SDimitry Andric       // other YMM/ZMM may appear before other subsequent calls or even before
2560b57cec5SDimitry Andric       // the end of the BB.
2570b57cec5SDimitry Andric       insertVZeroUpper(MI, MBB);
2580b57cec5SDimitry Andric       CurState = EXITS_CLEAN;
2590b57cec5SDimitry Andric     } else if (CurState == PASS_THROUGH) {
2600b57cec5SDimitry Andric       // If this block is currently in pass-through state and we encounter a
2610b57cec5SDimitry Andric       // call then whether we need a vzeroupper or not depends on whether this
2620b57cec5SDimitry Andric       // block has successors that exit dirty. Record the location of the call,
2630b57cec5SDimitry Andric       // and set the state to EXITS_CLEAN, but do not insert the vzeroupper yet.
2640b57cec5SDimitry Andric       // It will be inserted later if necessary.
2650b57cec5SDimitry Andric       BlockStates[MBB.getNumber()].FirstUnguardedCall = MI;
2660b57cec5SDimitry Andric       CurState = EXITS_CLEAN;
2670b57cec5SDimitry Andric     }
2680b57cec5SDimitry Andric   }
2690b57cec5SDimitry Andric 
2700b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "MBB #" << MBB.getNumber() << " exit state: "
2710b57cec5SDimitry Andric                     << getBlockExitStateName(CurState) << '\n');
2720b57cec5SDimitry Andric 
2730b57cec5SDimitry Andric   if (CurState == EXITS_DIRTY)
274*349cc55cSDimitry Andric     for (MachineBasicBlock *Succ : MBB.successors())
275*349cc55cSDimitry Andric       addDirtySuccessor(*Succ);
2760b57cec5SDimitry Andric 
2770b57cec5SDimitry Andric   BlockStates[MBB.getNumber()].ExitState = CurState;
2780b57cec5SDimitry Andric }
2790b57cec5SDimitry Andric 
2800b57cec5SDimitry Andric /// Loop over all of the basic blocks, inserting vzeroupper instructions before
2810b57cec5SDimitry Andric /// function calls.
2820b57cec5SDimitry Andric bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
2835ffd83dbSDimitry Andric   if (!UseVZeroUpper)
2845ffd83dbSDimitry Andric     return false;
2855ffd83dbSDimitry Andric 
2860b57cec5SDimitry Andric   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
287480093f4SDimitry Andric   if (!ST.hasAVX() || !ST.insertVZEROUPPER())
2880b57cec5SDimitry Andric     return false;
2890b57cec5SDimitry Andric   TII = ST.getInstrInfo();
2900b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
2910b57cec5SDimitry Andric   EverMadeChange = false;
2920b57cec5SDimitry Andric   IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
2930b57cec5SDimitry Andric 
2940b57cec5SDimitry Andric   bool FnHasLiveInYmmOrZmm = checkFnHasLiveInYmmOrZmm(MRI);
2950b57cec5SDimitry Andric 
2960b57cec5SDimitry Andric   // Fast check: if the function doesn't use any ymm/zmm registers, we don't
2970b57cec5SDimitry Andric   // need to insert any VZEROUPPER instructions.  This is constant-time, so it
2980b57cec5SDimitry Andric   // is cheap in the common case of no ymm/zmm use.
2990b57cec5SDimitry Andric   bool YmmOrZmmUsed = FnHasLiveInYmmOrZmm;
3008bcb0991SDimitry Andric   for (auto *RC : {&X86::VR256RegClass, &X86::VR512_0_15RegClass}) {
3010b57cec5SDimitry Andric     if (!YmmOrZmmUsed) {
3020b57cec5SDimitry Andric       for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e;
3030b57cec5SDimitry Andric            i++) {
3040b57cec5SDimitry Andric         if (!MRI.reg_nodbg_empty(*i)) {
3050b57cec5SDimitry Andric           YmmOrZmmUsed = true;
3060b57cec5SDimitry Andric           break;
3070b57cec5SDimitry Andric         }
3080b57cec5SDimitry Andric       }
3090b57cec5SDimitry Andric     }
3100b57cec5SDimitry Andric   }
3118bcb0991SDimitry Andric   if (!YmmOrZmmUsed)
3120b57cec5SDimitry Andric     return false;
3130b57cec5SDimitry Andric 
3140b57cec5SDimitry Andric   assert(BlockStates.empty() && DirtySuccessors.empty() &&
3150b57cec5SDimitry Andric          "X86VZeroUpper state should be clear");
3160b57cec5SDimitry Andric   BlockStates.resize(MF.getNumBlockIDs());
3170b57cec5SDimitry Andric 
3180b57cec5SDimitry Andric   // Process all blocks. This will compute block exit states, record the first
3190b57cec5SDimitry Andric   // unguarded call in each block, and add successors of dirty blocks to the
3200b57cec5SDimitry Andric   // DirtySuccessors list.
3210b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF)
3220b57cec5SDimitry Andric     processBasicBlock(MBB);
3230b57cec5SDimitry Andric 
3240b57cec5SDimitry Andric   // If any YMM/ZMM regs are live-in to this function, add the entry block to
3250b57cec5SDimitry Andric   // the DirtySuccessors list
3260b57cec5SDimitry Andric   if (FnHasLiveInYmmOrZmm)
3270b57cec5SDimitry Andric     addDirtySuccessor(MF.front());
3280b57cec5SDimitry Andric 
3290b57cec5SDimitry Andric   // Re-visit all blocks that are successors of EXITS_DIRTY blocks. Add
3300b57cec5SDimitry Andric   // vzeroupper instructions to unguarded calls, and propagate EXITS_DIRTY
3310b57cec5SDimitry Andric   // through PASS_THROUGH blocks.
3320b57cec5SDimitry Andric   while (!DirtySuccessors.empty()) {
3330b57cec5SDimitry Andric     MachineBasicBlock &MBB = *DirtySuccessors.back();
3340b57cec5SDimitry Andric     DirtySuccessors.pop_back();
3350b57cec5SDimitry Andric     BlockState &BBState = BlockStates[MBB.getNumber()];
3360b57cec5SDimitry Andric 
3370b57cec5SDimitry Andric     // MBB is a successor of a dirty block, so its first call needs to be
3380b57cec5SDimitry Andric     // guarded.
3390b57cec5SDimitry Andric     if (BBState.FirstUnguardedCall != MBB.end())
3400b57cec5SDimitry Andric       insertVZeroUpper(BBState.FirstUnguardedCall, MBB);
3410b57cec5SDimitry Andric 
3420b57cec5SDimitry Andric     // If this successor was a pass-through block, then it is now dirty. Its
3430b57cec5SDimitry Andric     // successors need to be added to the worklist (if they haven't been
3440b57cec5SDimitry Andric     // already).
3450b57cec5SDimitry Andric     if (BBState.ExitState == PASS_THROUGH) {
3460b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "MBB #" << MBB.getNumber()
3470b57cec5SDimitry Andric                         << " was Pass-through, is now Dirty-out.\n");
3480b57cec5SDimitry Andric       for (MachineBasicBlock *Succ : MBB.successors())
3490b57cec5SDimitry Andric         addDirtySuccessor(*Succ);
3500b57cec5SDimitry Andric     }
3510b57cec5SDimitry Andric   }
3520b57cec5SDimitry Andric 
3530b57cec5SDimitry Andric   BlockStates.clear();
3540b57cec5SDimitry Andric   return EverMadeChange;
3550b57cec5SDimitry Andric }
356