xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86VZeroUpper.cpp (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //===- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter ------------===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric //
9*0b57cec5SDimitry Andric // This file defines the pass which inserts x86 AVX vzeroupper instructions
10*0b57cec5SDimitry Andric // before calls to SSE encoded functions. This avoids transition latency
11*0b57cec5SDimitry Andric // penalty when transferring control between AVX encoded instructions and old
12*0b57cec5SDimitry Andric // SSE encoding mode.
13*0b57cec5SDimitry Andric //
14*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
15*0b57cec5SDimitry Andric 
16*0b57cec5SDimitry Andric #include "X86.h"
17*0b57cec5SDimitry Andric #include "X86InstrInfo.h"
18*0b57cec5SDimitry Andric #include "X86Subtarget.h"
19*0b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
20*0b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
21*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
22*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
23*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
24*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
25*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
26*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
27*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
28*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
29*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
30*0b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h"
31*0b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h"
32*0b57cec5SDimitry Andric #include "llvm/IR/Function.h"
33*0b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
34*0b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
35*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
36*0b57cec5SDimitry Andric #include <cassert>
37*0b57cec5SDimitry Andric 
38*0b57cec5SDimitry Andric using namespace llvm;
39*0b57cec5SDimitry Andric 
40*0b57cec5SDimitry Andric #define DEBUG_TYPE "x86-vzeroupper"
41*0b57cec5SDimitry Andric 
42*0b57cec5SDimitry Andric STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
43*0b57cec5SDimitry Andric 
44*0b57cec5SDimitry Andric namespace {
45*0b57cec5SDimitry Andric 
46*0b57cec5SDimitry Andric   class VZeroUpperInserter : public MachineFunctionPass {
47*0b57cec5SDimitry Andric   public:
48*0b57cec5SDimitry Andric     VZeroUpperInserter() : MachineFunctionPass(ID) {}
49*0b57cec5SDimitry Andric 
50*0b57cec5SDimitry Andric     bool runOnMachineFunction(MachineFunction &MF) override;
51*0b57cec5SDimitry Andric 
52*0b57cec5SDimitry Andric     MachineFunctionProperties getRequiredProperties() const override {
53*0b57cec5SDimitry Andric       return MachineFunctionProperties().set(
54*0b57cec5SDimitry Andric           MachineFunctionProperties::Property::NoVRegs);
55*0b57cec5SDimitry Andric     }
56*0b57cec5SDimitry Andric 
57*0b57cec5SDimitry Andric     StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
58*0b57cec5SDimitry Andric 
59*0b57cec5SDimitry Andric   private:
60*0b57cec5SDimitry Andric     void processBasicBlock(MachineBasicBlock &MBB);
61*0b57cec5SDimitry Andric     void insertVZeroUpper(MachineBasicBlock::iterator I,
62*0b57cec5SDimitry Andric                           MachineBasicBlock &MBB);
63*0b57cec5SDimitry Andric     void addDirtySuccessor(MachineBasicBlock &MBB);
64*0b57cec5SDimitry Andric 
65*0b57cec5SDimitry Andric     using BlockExitState = enum { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
66*0b57cec5SDimitry Andric 
67*0b57cec5SDimitry Andric     static const char* getBlockExitStateName(BlockExitState ST);
68*0b57cec5SDimitry Andric 
69*0b57cec5SDimitry Andric     // Core algorithm state:
70*0b57cec5SDimitry Andric     // BlockState - Each block is either:
71*0b57cec5SDimitry Andric     //   - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor
72*0b57cec5SDimitry Andric     //                   vzeroupper instructions in this block.
73*0b57cec5SDimitry Andric     //   - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
74*0b57cec5SDimitry Andric     //                  block that will ensure that YMM/ZMM is clean on exit.
75*0b57cec5SDimitry Andric     //   - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no
76*0b57cec5SDimitry Andric     //                  subsequent vzeroupper in the block clears it.
77*0b57cec5SDimitry Andric     //
78*0b57cec5SDimitry Andric     // AddedToDirtySuccessors - This flag is raised when a block is added to the
79*0b57cec5SDimitry Andric     //                          DirtySuccessors list to ensure that it's not
80*0b57cec5SDimitry Andric     //                          added multiple times.
81*0b57cec5SDimitry Andric     //
82*0b57cec5SDimitry Andric     // FirstUnguardedCall - Records the location of the first unguarded call in
83*0b57cec5SDimitry Andric     //                      each basic block that may need to be guarded by a
84*0b57cec5SDimitry Andric     //                      vzeroupper. We won't know whether it actually needs
85*0b57cec5SDimitry Andric     //                      to be guarded until we discover a predecessor that
86*0b57cec5SDimitry Andric     //                      is DIRTY_OUT.
87*0b57cec5SDimitry Andric     struct BlockState {
88*0b57cec5SDimitry Andric       BlockExitState ExitState = PASS_THROUGH;
89*0b57cec5SDimitry Andric       bool AddedToDirtySuccessors = false;
90*0b57cec5SDimitry Andric       MachineBasicBlock::iterator FirstUnguardedCall;
91*0b57cec5SDimitry Andric 
92*0b57cec5SDimitry Andric       BlockState() = default;
93*0b57cec5SDimitry Andric     };
94*0b57cec5SDimitry Andric 
95*0b57cec5SDimitry Andric     using BlockStateMap = SmallVector<BlockState, 8>;
96*0b57cec5SDimitry Andric     using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
97*0b57cec5SDimitry Andric 
98*0b57cec5SDimitry Andric     BlockStateMap BlockStates;
99*0b57cec5SDimitry Andric     DirtySuccessorsWorkList DirtySuccessors;
100*0b57cec5SDimitry Andric     bool EverMadeChange;
101*0b57cec5SDimitry Andric     bool IsX86INTR;
102*0b57cec5SDimitry Andric     const TargetInstrInfo *TII;
103*0b57cec5SDimitry Andric 
104*0b57cec5SDimitry Andric     static char ID;
105*0b57cec5SDimitry Andric   };
106*0b57cec5SDimitry Andric 
107*0b57cec5SDimitry Andric } // end anonymous namespace
108*0b57cec5SDimitry Andric 
109*0b57cec5SDimitry Andric char VZeroUpperInserter::ID = 0;
110*0b57cec5SDimitry Andric 
111*0b57cec5SDimitry Andric FunctionPass *llvm::createX86IssueVZeroUpperPass() {
112*0b57cec5SDimitry Andric   return new VZeroUpperInserter();
113*0b57cec5SDimitry Andric }
114*0b57cec5SDimitry Andric 
115*0b57cec5SDimitry Andric #ifndef NDEBUG
116*0b57cec5SDimitry Andric const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) {
117*0b57cec5SDimitry Andric   switch (ST) {
118*0b57cec5SDimitry Andric     case PASS_THROUGH: return "Pass-through";
119*0b57cec5SDimitry Andric     case EXITS_DIRTY: return "Exits-dirty";
120*0b57cec5SDimitry Andric     case EXITS_CLEAN: return "Exits-clean";
121*0b57cec5SDimitry Andric   }
122*0b57cec5SDimitry Andric   llvm_unreachable("Invalid block exit state.");
123*0b57cec5SDimitry Andric }
124*0b57cec5SDimitry Andric #endif
125*0b57cec5SDimitry Andric 
126*0b57cec5SDimitry Andric /// VZEROUPPER cleans state that is related to Y/ZMM0-15 only.
127*0b57cec5SDimitry Andric /// Thus, there is no need to check for Y/ZMM16 and above.
128*0b57cec5SDimitry Andric static bool isYmmOrZmmReg(unsigned Reg) {
129*0b57cec5SDimitry Andric   return (Reg >= X86::YMM0 && Reg <= X86::YMM15) ||
130*0b57cec5SDimitry Andric          (Reg >= X86::ZMM0 && Reg <= X86::ZMM15);
131*0b57cec5SDimitry Andric }
132*0b57cec5SDimitry Andric 
133*0b57cec5SDimitry Andric static bool checkFnHasLiveInYmmOrZmm(MachineRegisterInfo &MRI) {
134*0b57cec5SDimitry Andric   for (std::pair<unsigned, unsigned> LI : MRI.liveins())
135*0b57cec5SDimitry Andric     if (isYmmOrZmmReg(LI.first))
136*0b57cec5SDimitry Andric       return true;
137*0b57cec5SDimitry Andric 
138*0b57cec5SDimitry Andric   return false;
139*0b57cec5SDimitry Andric }
140*0b57cec5SDimitry Andric 
141*0b57cec5SDimitry Andric static bool clobbersAllYmmAndZmmRegs(const MachineOperand &MO) {
142*0b57cec5SDimitry Andric   for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
143*0b57cec5SDimitry Andric     if (!MO.clobbersPhysReg(reg))
144*0b57cec5SDimitry Andric       return false;
145*0b57cec5SDimitry Andric   }
146*0b57cec5SDimitry Andric   for (unsigned reg = X86::ZMM0; reg <= X86::ZMM15; ++reg) {
147*0b57cec5SDimitry Andric     if (!MO.clobbersPhysReg(reg))
148*0b57cec5SDimitry Andric       return false;
149*0b57cec5SDimitry Andric   }
150*0b57cec5SDimitry Andric   return true;
151*0b57cec5SDimitry Andric }
152*0b57cec5SDimitry Andric 
153*0b57cec5SDimitry Andric static bool hasYmmOrZmmReg(MachineInstr &MI) {
154*0b57cec5SDimitry Andric   for (const MachineOperand &MO : MI.operands()) {
155*0b57cec5SDimitry Andric     if (MI.isCall() && MO.isRegMask() && !clobbersAllYmmAndZmmRegs(MO))
156*0b57cec5SDimitry Andric       return true;
157*0b57cec5SDimitry Andric     if (!MO.isReg())
158*0b57cec5SDimitry Andric       continue;
159*0b57cec5SDimitry Andric     if (MO.isDebug())
160*0b57cec5SDimitry Andric       continue;
161*0b57cec5SDimitry Andric     if (isYmmOrZmmReg(MO.getReg()))
162*0b57cec5SDimitry Andric       return true;
163*0b57cec5SDimitry Andric   }
164*0b57cec5SDimitry Andric   return false;
165*0b57cec5SDimitry Andric }
166*0b57cec5SDimitry Andric 
167*0b57cec5SDimitry Andric /// Check if given call instruction has a RegMask operand.
168*0b57cec5SDimitry Andric static bool callHasRegMask(MachineInstr &MI) {
169*0b57cec5SDimitry Andric   assert(MI.isCall() && "Can only be called on call instructions.");
170*0b57cec5SDimitry Andric   for (const MachineOperand &MO : MI.operands()) {
171*0b57cec5SDimitry Andric     if (MO.isRegMask())
172*0b57cec5SDimitry Andric       return true;
173*0b57cec5SDimitry Andric   }
174*0b57cec5SDimitry Andric   return false;
175*0b57cec5SDimitry Andric }
176*0b57cec5SDimitry Andric 
177*0b57cec5SDimitry Andric /// Insert a vzeroupper instruction before I.
178*0b57cec5SDimitry Andric void VZeroUpperInserter::insertVZeroUpper(MachineBasicBlock::iterator I,
179*0b57cec5SDimitry Andric                                           MachineBasicBlock &MBB) {
180*0b57cec5SDimitry Andric   DebugLoc dl = I->getDebugLoc();
181*0b57cec5SDimitry Andric   BuildMI(MBB, I, dl, TII->get(X86::VZEROUPPER));
182*0b57cec5SDimitry Andric   ++NumVZU;
183*0b57cec5SDimitry Andric   EverMadeChange = true;
184*0b57cec5SDimitry Andric }
185*0b57cec5SDimitry Andric 
186*0b57cec5SDimitry Andric /// Add MBB to the DirtySuccessors list if it hasn't already been added.
187*0b57cec5SDimitry Andric void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
188*0b57cec5SDimitry Andric   if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {
189*0b57cec5SDimitry Andric     DirtySuccessors.push_back(&MBB);
190*0b57cec5SDimitry Andric     BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true;
191*0b57cec5SDimitry Andric   }
192*0b57cec5SDimitry Andric }
193*0b57cec5SDimitry Andric 
194*0b57cec5SDimitry Andric /// Loop over all of the instructions in the basic block, inserting vzeroupper
195*0b57cec5SDimitry Andric /// instructions before function calls.
196*0b57cec5SDimitry Andric void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
197*0b57cec5SDimitry Andric   // Start by assuming that the block is PASS_THROUGH which implies no unguarded
198*0b57cec5SDimitry Andric   // calls.
199*0b57cec5SDimitry Andric   BlockExitState CurState = PASS_THROUGH;
200*0b57cec5SDimitry Andric   BlockStates[MBB.getNumber()].FirstUnguardedCall = MBB.end();
201*0b57cec5SDimitry Andric 
202*0b57cec5SDimitry Andric   for (MachineInstr &MI : MBB) {
203*0b57cec5SDimitry Andric     bool IsCall = MI.isCall();
204*0b57cec5SDimitry Andric     bool IsReturn = MI.isReturn();
205*0b57cec5SDimitry Andric     bool IsControlFlow = IsCall || IsReturn;
206*0b57cec5SDimitry Andric 
207*0b57cec5SDimitry Andric     // No need for vzeroupper before iret in interrupt handler function,
208*0b57cec5SDimitry Andric     // epilogue will restore YMM/ZMM registers if needed.
209*0b57cec5SDimitry Andric     if (IsX86INTR && IsReturn)
210*0b57cec5SDimitry Andric       continue;
211*0b57cec5SDimitry Andric 
212*0b57cec5SDimitry Andric     // An existing VZERO* instruction resets the state.
213*0b57cec5SDimitry Andric     if (MI.getOpcode() == X86::VZEROALL || MI.getOpcode() == X86::VZEROUPPER) {
214*0b57cec5SDimitry Andric       CurState = EXITS_CLEAN;
215*0b57cec5SDimitry Andric       continue;
216*0b57cec5SDimitry Andric     }
217*0b57cec5SDimitry Andric 
218*0b57cec5SDimitry Andric     // Shortcut: don't need to check regular instructions in dirty state.
219*0b57cec5SDimitry Andric     if (!IsControlFlow && CurState == EXITS_DIRTY)
220*0b57cec5SDimitry Andric       continue;
221*0b57cec5SDimitry Andric 
222*0b57cec5SDimitry Andric     if (hasYmmOrZmmReg(MI)) {
223*0b57cec5SDimitry Andric       // We found a ymm/zmm-using instruction; this could be an AVX/AVX512
224*0b57cec5SDimitry Andric       // instruction, or it could be control flow.
225*0b57cec5SDimitry Andric       CurState = EXITS_DIRTY;
226*0b57cec5SDimitry Andric       continue;
227*0b57cec5SDimitry Andric     }
228*0b57cec5SDimitry Andric 
229*0b57cec5SDimitry Andric     // Check for control-flow out of the current function (which might
230*0b57cec5SDimitry Andric     // indirectly execute SSE instructions).
231*0b57cec5SDimitry Andric     if (!IsControlFlow)
232*0b57cec5SDimitry Andric       continue;
233*0b57cec5SDimitry Andric 
234*0b57cec5SDimitry Andric     // If the call has no RegMask, skip it as well. It usually happens on
235*0b57cec5SDimitry Andric     // helper function calls (such as '_chkstk', '_ftol2') where standard
236*0b57cec5SDimitry Andric     // calling convention is not used (RegMask is not used to mark register
237*0b57cec5SDimitry Andric     // clobbered and register usage (def/implicit-def/use) is well-defined and
238*0b57cec5SDimitry Andric     // explicitly specified.
239*0b57cec5SDimitry Andric     if (IsCall && !callHasRegMask(MI))
240*0b57cec5SDimitry Andric       continue;
241*0b57cec5SDimitry Andric 
242*0b57cec5SDimitry Andric     // The VZEROUPPER instruction resets the upper 128 bits of YMM0-YMM15
243*0b57cec5SDimitry Andric     // registers. In addition, the processor changes back to Clean state, after
244*0b57cec5SDimitry Andric     // which execution of SSE instructions or AVX instructions has no transition
245*0b57cec5SDimitry Andric     // penalty. Add the VZEROUPPER instruction before any function call/return
246*0b57cec5SDimitry Andric     // that might execute SSE code.
247*0b57cec5SDimitry Andric     // FIXME: In some cases, we may want to move the VZEROUPPER into a
248*0b57cec5SDimitry Andric     // predecessor block.
249*0b57cec5SDimitry Andric     if (CurState == EXITS_DIRTY) {
250*0b57cec5SDimitry Andric       // After the inserted VZEROUPPER the state becomes clean again, but
251*0b57cec5SDimitry Andric       // other YMM/ZMM may appear before other subsequent calls or even before
252*0b57cec5SDimitry Andric       // the end of the BB.
253*0b57cec5SDimitry Andric       insertVZeroUpper(MI, MBB);
254*0b57cec5SDimitry Andric       CurState = EXITS_CLEAN;
255*0b57cec5SDimitry Andric     } else if (CurState == PASS_THROUGH) {
256*0b57cec5SDimitry Andric       // If this block is currently in pass-through state and we encounter a
257*0b57cec5SDimitry Andric       // call then whether we need a vzeroupper or not depends on whether this
258*0b57cec5SDimitry Andric       // block has successors that exit dirty. Record the location of the call,
259*0b57cec5SDimitry Andric       // and set the state to EXITS_CLEAN, but do not insert the vzeroupper yet.
260*0b57cec5SDimitry Andric       // It will be inserted later if necessary.
261*0b57cec5SDimitry Andric       BlockStates[MBB.getNumber()].FirstUnguardedCall = MI;
262*0b57cec5SDimitry Andric       CurState = EXITS_CLEAN;
263*0b57cec5SDimitry Andric     }
264*0b57cec5SDimitry Andric   }
265*0b57cec5SDimitry Andric 
266*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "MBB #" << MBB.getNumber() << " exit state: "
267*0b57cec5SDimitry Andric                     << getBlockExitStateName(CurState) << '\n');
268*0b57cec5SDimitry Andric 
269*0b57cec5SDimitry Andric   if (CurState == EXITS_DIRTY)
270*0b57cec5SDimitry Andric     for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
271*0b57cec5SDimitry Andric                                           SE = MBB.succ_end();
272*0b57cec5SDimitry Andric          SI != SE; ++SI)
273*0b57cec5SDimitry Andric       addDirtySuccessor(**SI);
274*0b57cec5SDimitry Andric 
275*0b57cec5SDimitry Andric   BlockStates[MBB.getNumber()].ExitState = CurState;
276*0b57cec5SDimitry Andric }
277*0b57cec5SDimitry Andric 
278*0b57cec5SDimitry Andric /// Loop over all of the basic blocks, inserting vzeroupper instructions before
279*0b57cec5SDimitry Andric /// function calls.
280*0b57cec5SDimitry Andric bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
281*0b57cec5SDimitry Andric   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
282*0b57cec5SDimitry Andric   if (!ST.hasAVX() || ST.hasFastPartialYMMorZMMWrite())
283*0b57cec5SDimitry Andric     return false;
284*0b57cec5SDimitry Andric   TII = ST.getInstrInfo();
285*0b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
286*0b57cec5SDimitry Andric   EverMadeChange = false;
287*0b57cec5SDimitry Andric   IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
288*0b57cec5SDimitry Andric 
289*0b57cec5SDimitry Andric   bool FnHasLiveInYmmOrZmm = checkFnHasLiveInYmmOrZmm(MRI);
290*0b57cec5SDimitry Andric 
291*0b57cec5SDimitry Andric   // Fast check: if the function doesn't use any ymm/zmm registers, we don't
292*0b57cec5SDimitry Andric   // need to insert any VZEROUPPER instructions.  This is constant-time, so it
293*0b57cec5SDimitry Andric   // is cheap in the common case of no ymm/zmm use.
294*0b57cec5SDimitry Andric   bool YmmOrZmmUsed = FnHasLiveInYmmOrZmm;
295*0b57cec5SDimitry Andric   const TargetRegisterClass *RCs[2] = {&X86::VR256RegClass, &X86::VR512RegClass};
296*0b57cec5SDimitry Andric   for (auto *RC : RCs) {
297*0b57cec5SDimitry Andric     if (!YmmOrZmmUsed) {
298*0b57cec5SDimitry Andric       for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e;
299*0b57cec5SDimitry Andric            i++) {
300*0b57cec5SDimitry Andric         if (!MRI.reg_nodbg_empty(*i)) {
301*0b57cec5SDimitry Andric           YmmOrZmmUsed = true;
302*0b57cec5SDimitry Andric           break;
303*0b57cec5SDimitry Andric         }
304*0b57cec5SDimitry Andric       }
305*0b57cec5SDimitry Andric     }
306*0b57cec5SDimitry Andric   }
307*0b57cec5SDimitry Andric   if (!YmmOrZmmUsed) {
308*0b57cec5SDimitry Andric     return false;
309*0b57cec5SDimitry Andric   }
310*0b57cec5SDimitry Andric 
311*0b57cec5SDimitry Andric   assert(BlockStates.empty() && DirtySuccessors.empty() &&
312*0b57cec5SDimitry Andric          "X86VZeroUpper state should be clear");
313*0b57cec5SDimitry Andric   BlockStates.resize(MF.getNumBlockIDs());
314*0b57cec5SDimitry Andric 
315*0b57cec5SDimitry Andric   // Process all blocks. This will compute block exit states, record the first
316*0b57cec5SDimitry Andric   // unguarded call in each block, and add successors of dirty blocks to the
317*0b57cec5SDimitry Andric   // DirtySuccessors list.
318*0b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF)
319*0b57cec5SDimitry Andric     processBasicBlock(MBB);
320*0b57cec5SDimitry Andric 
321*0b57cec5SDimitry Andric   // If any YMM/ZMM regs are live-in to this function, add the entry block to
322*0b57cec5SDimitry Andric   // the DirtySuccessors list
323*0b57cec5SDimitry Andric   if (FnHasLiveInYmmOrZmm)
324*0b57cec5SDimitry Andric     addDirtySuccessor(MF.front());
325*0b57cec5SDimitry Andric 
326*0b57cec5SDimitry Andric   // Re-visit all blocks that are successors of EXITS_DIRTY blocks. Add
327*0b57cec5SDimitry Andric   // vzeroupper instructions to unguarded calls, and propagate EXITS_DIRTY
328*0b57cec5SDimitry Andric   // through PASS_THROUGH blocks.
329*0b57cec5SDimitry Andric   while (!DirtySuccessors.empty()) {
330*0b57cec5SDimitry Andric     MachineBasicBlock &MBB = *DirtySuccessors.back();
331*0b57cec5SDimitry Andric     DirtySuccessors.pop_back();
332*0b57cec5SDimitry Andric     BlockState &BBState = BlockStates[MBB.getNumber()];
333*0b57cec5SDimitry Andric 
334*0b57cec5SDimitry Andric     // MBB is a successor of a dirty block, so its first call needs to be
335*0b57cec5SDimitry Andric     // guarded.
336*0b57cec5SDimitry Andric     if (BBState.FirstUnguardedCall != MBB.end())
337*0b57cec5SDimitry Andric       insertVZeroUpper(BBState.FirstUnguardedCall, MBB);
338*0b57cec5SDimitry Andric 
339*0b57cec5SDimitry Andric     // If this successor was a pass-through block, then it is now dirty. Its
340*0b57cec5SDimitry Andric     // successors need to be added to the worklist (if they haven't been
341*0b57cec5SDimitry Andric     // already).
342*0b57cec5SDimitry Andric     if (BBState.ExitState == PASS_THROUGH) {
343*0b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "MBB #" << MBB.getNumber()
344*0b57cec5SDimitry Andric                         << " was Pass-through, is now Dirty-out.\n");
345*0b57cec5SDimitry Andric       for (MachineBasicBlock *Succ : MBB.successors())
346*0b57cec5SDimitry Andric         addDirtySuccessor(*Succ);
347*0b57cec5SDimitry Andric     }
348*0b57cec5SDimitry Andric   }
349*0b57cec5SDimitry Andric 
350*0b57cec5SDimitry Andric   BlockStates.clear();
351*0b57cec5SDimitry Andric   return EverMadeChange;
352*0b57cec5SDimitry Andric }
353