xref: /freebsd/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp (revision 63f537551380d2dab29fa402ad1269feae17e594)
1 //===- HexagonFrameLowering.cpp - Define frame lowering -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "HexagonFrameLowering.h"
11 #include "HexagonBlockRanges.h"
12 #include "HexagonInstrInfo.h"
13 #include "HexagonMachineFunctionInfo.h"
14 #include "HexagonRegisterInfo.h"
15 #include "HexagonSubtarget.h"
16 #include "HexagonTargetMachine.h"
17 #include "MCTargetDesc/HexagonBaseInfo.h"
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/PostOrderIterator.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/CodeGen/LivePhysRegs.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineDominators.h"
27 #include "llvm/CodeGen/MachineFrameInfo.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineMemOperand.h"
33 #include "llvm/CodeGen/MachineModuleInfo.h"
34 #include "llvm/CodeGen/MachineOperand.h"
35 #include "llvm/CodeGen/MachinePostDominators.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/CodeGen/PseudoSourceValue.h"
38 #include "llvm/CodeGen/RegisterScavenging.h"
39 #include "llvm/CodeGen/TargetRegisterInfo.h"
40 #include "llvm/IR/Attributes.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/MC/MCDwarf.h"
44 #include "llvm/MC/MCRegisterInfo.h"
45 #include "llvm/Pass.h"
46 #include "llvm/Support/CodeGen.h"
47 #include "llvm/Support/CommandLine.h"
48 #include "llvm/Support/Compiler.h"
49 #include "llvm/Support/Debug.h"
50 #include "llvm/Support/ErrorHandling.h"
51 #include "llvm/Support/MathExtras.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include "llvm/Target/TargetMachine.h"
54 #include "llvm/Target/TargetOptions.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <iterator>
59 #include <limits>
60 #include <map>
61 #include <optional>
62 #include <utility>
63 #include <vector>
64 
65 #define DEBUG_TYPE "hexagon-pei"
66 
67 // Hexagon stack frame layout as defined by the ABI:
68 //
69 //                                                       Incoming arguments
70 //                                                       passed via stack
71 //                                                                      |
72 //                                                                      |
73 //        SP during function's                 FP during function's     |
74 //    +-- runtime (top of stack)               runtime (bottom) --+     |
75 //    |                                                           |     |
76 // --++---------------------+------------------+-----------------++-+-------
77 //   |  parameter area for  |  variable-size   |   fixed-size    |LR|  arg
78 //   |   called functions   |  local objects   |  local objects  |FP|
79 // --+----------------------+------------------+-----------------+--+-------
80 //    <-    size known    -> <- size unknown -> <- size known  ->
81 //
82 // Low address                                                 High address
83 //
84 // <--- stack growth
85 //
86 //
87 // - In any circumstances, the outgoing function arguments are always accessi-
88 //   ble using the SP, and the incoming arguments are accessible using the FP.
89 // - If the local objects are not aligned, they can always be accessed using
90 //   the FP.
91 // - If there are no variable-sized objects, the local objects can always be
92 //   accessed using the SP, regardless whether they are aligned or not. (The
93 //   alignment padding will be at the bottom of the stack (highest address),
94 //   and so the offset with respect to the SP will be known at the compile-
95 //   -time.)
96 //
97 // The only complication occurs if there are both, local aligned objects, and
98 // dynamically allocated (variable-sized) objects. The alignment pad will be
99 // placed between the FP and the local objects, thus preventing the use of the
100 // FP to access the local objects. At the same time, the variable-sized objects
101 // will be between the SP and the local objects, thus introducing an unknown
102 // distance from the SP to the locals.
103 //
104 // To avoid this problem, a new register is created that holds the aligned
105 // address of the bottom of the stack, referred in the sources as AP (aligned
106 // pointer). The AP will be equal to "FP-p", where "p" is the smallest pad
107 // that aligns AP to the required boundary (a maximum of the alignments of
108 // all stack objects, fixed- and variable-sized). All local objects[1] will
109 // then use AP as the base pointer.
110 // [1] The exception is with "fixed" stack objects. "Fixed" stack objects get
111 // their name from being allocated at fixed locations on the stack, relative
112 // to the FP. In the presence of dynamic allocation and local alignment, such
113 // objects can only be accessed through the FP.
114 //
115 // Illustration of the AP:
116 //                                                                FP --+
117 //                                                                     |
118 // ---------------+---------------------+-----+-----------------------++-+--
119 //   Rest of the  | Local stack objects | Pad |  Fixed stack objects  |LR|
120 //   stack frame  | (aligned)           |     |  (CSR, spills, etc.)  |FP|
121 // ---------------+---------------------+-----+-----------------+-----+--+--
122 //                                      |<-- Multiple of the -->|
123 //                                           stack alignment    +-- AP
124 //
125 // The AP is set up at the beginning of the function. Since it is not a dedi-
126 // cated (reserved) register, it needs to be kept live throughout the function
127 // to be available as the base register for local object accesses.
128 // Normally, an address of a stack objects is obtained by a pseudo-instruction
129 // PS_fi. To access local objects with the AP register present, a different
130 // pseudo-instruction needs to be used: PS_fia. The PS_fia takes one extra
131 // argument compared to PS_fi: the first input register is the AP register.
132 // This keeps the register live between its definition and its uses.
133 
134 // The AP register is originally set up using pseudo-instruction PS_aligna:
135 //   AP = PS_aligna A
136 // where
137 //   A  - required stack alignment
138 // The alignment value must be the maximum of all alignments required by
139 // any stack object.
140 
141 // The dynamic allocation uses a pseudo-instruction PS_alloca:
142 //   Rd = PS_alloca Rs, A
143 // where
144 //   Rd - address of the allocated space
145 //   Rs - minimum size (the actual allocated can be larger to accommodate
146 //        alignment)
147 //   A  - required alignment
148 
149 using namespace llvm;
150 
151 static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret",
152     cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target"));
153 
154 static cl::opt<unsigned>
155     NumberScavengerSlots("number-scavenger-slots", cl::Hidden,
156                          cl::desc("Set the number of scavenger slots"),
157                          cl::init(2));
158 
159 static cl::opt<int>
160     SpillFuncThreshold("spill-func-threshold", cl::Hidden,
161                        cl::desc("Specify O2(not Os) spill func threshold"),
162                        cl::init(6));
163 
164 static cl::opt<int>
165     SpillFuncThresholdOs("spill-func-threshold-Os", cl::Hidden,
166                          cl::desc("Specify Os spill func threshold"),
167                          cl::init(1));
168 
169 static cl::opt<bool> EnableStackOVFSanitizer(
170     "enable-stackovf-sanitizer", cl::Hidden,
171     cl::desc("Enable runtime checks for stack overflow."), cl::init(false));
172 
173 static cl::opt<bool>
174     EnableShrinkWrapping("hexagon-shrink-frame", cl::init(true), cl::Hidden,
175                          cl::desc("Enable stack frame shrink wrapping"));
176 
177 static cl::opt<unsigned>
178     ShrinkLimit("shrink-frame-limit",
179                 cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden,
180                 cl::desc("Max count of stack frame shrink-wraps"));
181 
182 static cl::opt<bool>
183     EnableSaveRestoreLong("enable-save-restore-long", cl::Hidden,
184                           cl::desc("Enable long calls for save-restore stubs."),
185                           cl::init(false));
186 
187 static cl::opt<bool> EliminateFramePointer("hexagon-fp-elim", cl::init(true),
188     cl::Hidden, cl::desc("Refrain from using FP whenever possible"));
189 
190 static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden,
191     cl::init(true), cl::desc("Optimize spill slots"));
192 
193 #ifndef NDEBUG
194 static cl::opt<unsigned> SpillOptMax("spill-opt-max", cl::Hidden,
195     cl::init(std::numeric_limits<unsigned>::max()));
196 static unsigned SpillOptCount = 0;
197 #endif
198 
199 namespace llvm {
200 
201   void initializeHexagonCallFrameInformationPass(PassRegistry&);
202   FunctionPass *createHexagonCallFrameInformation();
203 
204 } // end namespace llvm
205 
206 namespace {
207 
208   class HexagonCallFrameInformation : public MachineFunctionPass {
209   public:
210     static char ID;
211 
212     HexagonCallFrameInformation() : MachineFunctionPass(ID) {
213       PassRegistry &PR = *PassRegistry::getPassRegistry();
214       initializeHexagonCallFrameInformationPass(PR);
215     }
216 
217     bool runOnMachineFunction(MachineFunction &MF) override;
218 
219     MachineFunctionProperties getRequiredProperties() const override {
220       return MachineFunctionProperties().set(
221           MachineFunctionProperties::Property::NoVRegs);
222     }
223   };
224 
225   char HexagonCallFrameInformation::ID = 0;
226 
227 } // end anonymous namespace
228 
229 bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) {
230   auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
231   bool NeedCFI = MF.needsFrameMoves();
232 
233   if (!NeedCFI)
234     return false;
235   HFI.insertCFIInstructions(MF);
236   return true;
237 }
238 
239 INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi",
240                 "Hexagon call frame information", false, false)
241 
242 FunctionPass *llvm::createHexagonCallFrameInformation() {
243   return new HexagonCallFrameInformation();
244 }
245 
246 /// Map a register pair Reg to the subregister that has the greater "number",
247 /// i.e. D3 (aka R7:6) will be mapped to R7, etc.
248 static Register getMax32BitSubRegister(Register Reg,
249                                        const TargetRegisterInfo &TRI,
250                                        bool hireg = true) {
251     if (Reg < Hexagon::D0 || Reg > Hexagon::D15)
252       return Reg;
253 
254     Register RegNo = 0;
255     for (MCSubRegIterator SubRegs(Reg, &TRI); SubRegs.isValid(); ++SubRegs) {
256       if (hireg) {
257         if (*SubRegs > RegNo)
258           RegNo = *SubRegs;
259       } else {
260         if (!RegNo || *SubRegs < RegNo)
261           RegNo = *SubRegs;
262       }
263     }
264     return RegNo;
265 }
266 
267 /// Returns the callee saved register with the largest id in the vector.
268 static Register getMaxCalleeSavedReg(ArrayRef<CalleeSavedInfo> CSI,
269                                      const TargetRegisterInfo &TRI) {
270   static_assert(Hexagon::R1 > 0,
271                 "Assume physical registers are encoded as positive integers");
272   if (CSI.empty())
273     return 0;
274 
275   Register Max = getMax32BitSubRegister(CSI[0].getReg(), TRI);
276   for (unsigned I = 1, E = CSI.size(); I < E; ++I) {
277     Register Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI);
278     if (Reg > Max)
279       Max = Reg;
280   }
281   return Max;
282 }
283 
284 /// Checks if the basic block contains any instruction that needs a stack
285 /// frame to be already in place.
286 static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
287                             const HexagonRegisterInfo &HRI) {
288     for (const MachineInstr &MI : MBB) {
289       if (MI.isCall())
290         return true;
291       unsigned Opc = MI.getOpcode();
292       switch (Opc) {
293         case Hexagon::PS_alloca:
294         case Hexagon::PS_aligna:
295           return true;
296         default:
297           break;
298       }
299       // Check individual operands.
300       for (const MachineOperand &MO : MI.operands()) {
301         // While the presence of a frame index does not prove that a stack
302         // frame will be required, all frame indexes should be within alloc-
303         // frame/deallocframe. Otherwise, the code that translates a frame
304         // index into an offset would have to be aware of the placement of
305         // the frame creation/destruction instructions.
306         if (MO.isFI())
307           return true;
308         if (MO.isReg()) {
309           Register R = MO.getReg();
310           // Virtual registers will need scavenging, which then may require
311           // a stack slot.
312           if (R.isVirtual())
313             return true;
314           for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S)
315             if (CSR[*S])
316               return true;
317           continue;
318         }
319         if (MO.isRegMask()) {
320           // A regmask would normally have all callee-saved registers marked
321           // as preserved, so this check would not be needed, but in case of
322           // ever having other regmasks (for other calling conventions),
323           // make sure they would be processed correctly.
324           const uint32_t *BM = MO.getRegMask();
325           for (int x = CSR.find_first(); x >= 0; x = CSR.find_next(x)) {
326             unsigned R = x;
327             // If this regmask does not preserve a CSR, a frame will be needed.
328             if (!(BM[R/32] & (1u << (R%32))))
329               return true;
330           }
331         }
332       }
333     }
334     return false;
335 }
336 
337   /// Returns true if MBB has a machine instructions that indicates a tail call
338   /// in the block.
339 static bool hasTailCall(const MachineBasicBlock &MBB) {
340     MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
341     if (I == MBB.end())
342       return false;
343     unsigned RetOpc = I->getOpcode();
344     return RetOpc == Hexagon::PS_tailcall_i || RetOpc == Hexagon::PS_tailcall_r;
345 }
346 
347 /// Returns true if MBB contains an instruction that returns.
348 static bool hasReturn(const MachineBasicBlock &MBB) {
349     for (const MachineInstr &MI : MBB.terminators())
350       if (MI.isReturn())
351         return true;
352     return false;
353 }
354 
355 /// Returns the "return" instruction from this block, or nullptr if there
356 /// isn't any.
357 static MachineInstr *getReturn(MachineBasicBlock &MBB) {
358     for (auto &I : MBB)
359       if (I.isReturn())
360         return &I;
361     return nullptr;
362 }
363 
364 static bool isRestoreCall(unsigned Opc) {
365     switch (Opc) {
366       case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
367       case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC:
368       case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT:
369       case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC:
370       case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT:
371       case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC:
372       case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4:
373       case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC:
374         return true;
375     }
376     return false;
377 }
378 
379 static inline bool isOptNone(const MachineFunction &MF) {
380     return MF.getFunction().hasOptNone() ||
381            MF.getTarget().getOptLevel() == CodeGenOpt::None;
382 }
383 
384 static inline bool isOptSize(const MachineFunction &MF) {
385     const Function &F = MF.getFunction();
386     return F.hasOptSize() && !F.hasMinSize();
387 }
388 
389 static inline bool isMinSize(const MachineFunction &MF) {
390     return MF.getFunction().hasMinSize();
391 }
392 
393 /// Implements shrink-wrapping of the stack frame. By default, stack frame
394 /// is created in the function entry block, and is cleaned up in every block
395 /// that returns. This function finds alternate blocks: one for the frame
396 /// setup (prolog) and one for the cleanup (epilog).
397 void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
398       MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {
399   static unsigned ShrinkCounter = 0;
400 
401   if (MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl() &&
402       MF.getFunction().isVarArg())
403     return;
404   if (ShrinkLimit.getPosition()) {
405     if (ShrinkCounter >= ShrinkLimit)
406       return;
407     ShrinkCounter++;
408   }
409 
410   auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
411 
412   MachineDominatorTree MDT;
413   MDT.runOnMachineFunction(MF);
414   MachinePostDominatorTree MPT;
415   MPT.runOnMachineFunction(MF);
416 
417   using UnsignedMap = DenseMap<unsigned, unsigned>;
418   using RPOTType = ReversePostOrderTraversal<const MachineFunction *>;
419 
420   UnsignedMap RPO;
421   RPOTType RPOT(&MF);
422   unsigned RPON = 0;
423   for (auto &I : RPOT)
424     RPO[I->getNumber()] = RPON++;
425 
426   // Don't process functions that have loops, at least for now. Placement
427   // of prolog and epilog must take loop structure into account. For simpli-
428   // city don't do it right now.
429   for (auto &I : MF) {
430     unsigned BN = RPO[I.getNumber()];
431     for (MachineBasicBlock *Succ : I.successors())
432       // If found a back-edge, return.
433       if (RPO[Succ->getNumber()] <= BN)
434         return;
435   }
436 
437   // Collect the set of blocks that need a stack frame to execute. Scan
438   // each block for uses/defs of callee-saved registers, calls, etc.
439   SmallVector<MachineBasicBlock*,16> SFBlocks;
440   BitVector CSR(Hexagon::NUM_TARGET_REGS);
441   for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P)
442     for (MCSubRegIterator S(*P, &HRI, true); S.isValid(); ++S)
443       CSR[*S] = true;
444 
445   for (auto &I : MF)
446     if (needsStackFrame(I, CSR, HRI))
447       SFBlocks.push_back(&I);
448 
449   LLVM_DEBUG({
450     dbgs() << "Blocks needing SF: {";
451     for (auto &B : SFBlocks)
452       dbgs() << " " << printMBBReference(*B);
453     dbgs() << " }\n";
454   });
455   // No frame needed?
456   if (SFBlocks.empty())
457     return;
458 
459   // Pick a common dominator and a common post-dominator.
460   MachineBasicBlock *DomB = SFBlocks[0];
461   for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
462     DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]);
463     if (!DomB)
464       break;
465   }
466   MachineBasicBlock *PDomB = SFBlocks[0];
467   for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
468     PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]);
469     if (!PDomB)
470       break;
471   }
472   LLVM_DEBUG({
473     dbgs() << "Computed dom block: ";
474     if (DomB)
475       dbgs() << printMBBReference(*DomB);
476     else
477       dbgs() << "<null>";
478     dbgs() << ", computed pdom block: ";
479     if (PDomB)
480       dbgs() << printMBBReference(*PDomB);
481     else
482       dbgs() << "<null>";
483     dbgs() << "\n";
484   });
485   if (!DomB || !PDomB)
486     return;
487 
488   // Make sure that DomB dominates PDomB and PDomB post-dominates DomB.
489   if (!MDT.dominates(DomB, PDomB)) {
490     LLVM_DEBUG(dbgs() << "Dom block does not dominate pdom block\n");
491     return;
492   }
493   if (!MPT.dominates(PDomB, DomB)) {
494     LLVM_DEBUG(dbgs() << "PDom block does not post-dominate dom block\n");
495     return;
496   }
497 
498   // Finally, everything seems right.
499   PrologB = DomB;
500   EpilogB = PDomB;
501 }
502 
503 /// Perform most of the PEI work here:
504 /// - saving/restoring of the callee-saved registers,
505 /// - stack frame creation and destruction.
506 /// Normally, this work is distributed among various functions, but doing it
507 /// in one place allows shrink-wrapping of the stack frame.
508 void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
509                                         MachineBasicBlock &MBB) const {
510   auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
511 
512   MachineFrameInfo &MFI = MF.getFrameInfo();
513   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
514 
515   MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr;
516   if (EnableShrinkWrapping)
517     findShrunkPrologEpilog(MF, PrologB, EpilogB);
518 
519   bool PrologueStubs = false;
520   insertCSRSpillsInBlock(*PrologB, CSI, HRI, PrologueStubs);
521   insertPrologueInBlock(*PrologB, PrologueStubs);
522   updateEntryPaths(MF, *PrologB);
523 
524   if (EpilogB) {
525     insertCSRRestoresInBlock(*EpilogB, CSI, HRI);
526     insertEpilogueInBlock(*EpilogB);
527   } else {
528     for (auto &B : MF)
529       if (B.isReturnBlock())
530         insertCSRRestoresInBlock(B, CSI, HRI);
531 
532     for (auto &B : MF)
533       if (B.isReturnBlock())
534         insertEpilogueInBlock(B);
535 
536     for (auto &B : MF) {
537       if (B.empty())
538         continue;
539       MachineInstr *RetI = getReturn(B);
540       if (!RetI || isRestoreCall(RetI->getOpcode()))
541         continue;
542       for (auto &R : CSI)
543         RetI->addOperand(MachineOperand::CreateReg(R.getReg(), false, true));
544     }
545   }
546 
547   if (EpilogB) {
548     // If there is an epilog block, it may not have a return instruction.
549     // In such case, we need to add the callee-saved registers as live-ins
550     // in all blocks on all paths from the epilog to any return block.
551     unsigned MaxBN = MF.getNumBlockIDs();
552     BitVector DoneT(MaxBN+1), DoneF(MaxBN+1), Path(MaxBN+1);
553     updateExitPaths(*EpilogB, *EpilogB, DoneT, DoneF, Path);
554   }
555 }
556 
557 /// Returns true if the target can safely skip saving callee-saved registers
558 /// for noreturn nounwind functions.
559 bool HexagonFrameLowering::enableCalleeSaveSkip(
560     const MachineFunction &MF) const {
561   const auto &F = MF.getFunction();
562   assert(F.hasFnAttribute(Attribute::NoReturn) &&
563          F.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
564          !F.getFunction().hasFnAttribute(Attribute::UWTable));
565   (void)F;
566 
567   // No need to save callee saved registers if the function does not return.
568   return MF.getSubtarget<HexagonSubtarget>().noreturnStackElim();
569 }
570 
571 // Helper function used to determine when to eliminate the stack frame for
572 // functions marked as noreturn and when the noreturn-stack-elim options are
573 // specified. When both these conditions are true, then a FP may not be needed
574 // if the function makes a call. It is very similar to enableCalleeSaveSkip,
575 // but it used to check if the allocframe can be eliminated as well.
576 static bool enableAllocFrameElim(const MachineFunction &MF) {
577   const auto &F = MF.getFunction();
578   const auto &MFI = MF.getFrameInfo();
579   const auto &HST = MF.getSubtarget<HexagonSubtarget>();
580   assert(!MFI.hasVarSizedObjects() &&
581          !HST.getRegisterInfo()->hasStackRealignment(MF));
582   return F.hasFnAttribute(Attribute::NoReturn) &&
583     F.hasFnAttribute(Attribute::NoUnwind) &&
584     !F.hasFnAttribute(Attribute::UWTable) && HST.noreturnStackElim() &&
585     MFI.getStackSize() == 0;
586 }
587 
588 void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
589       bool PrologueStubs) const {
590   MachineFunction &MF = *MBB.getParent();
591   MachineFrameInfo &MFI = MF.getFrameInfo();
592   auto &HST = MF.getSubtarget<HexagonSubtarget>();
593   auto &HII = *HST.getInstrInfo();
594   auto &HRI = *HST.getRegisterInfo();
595 
596   Align MaxAlign = std::max(MFI.getMaxAlign(), getStackAlign());
597 
598   // Calculate the total stack frame size.
599   // Get the number of bytes to allocate from the FrameInfo.
600   unsigned FrameSize = MFI.getStackSize();
601   // Round up the max call frame size to the max alignment on the stack.
602   unsigned MaxCFA = alignTo(MFI.getMaxCallFrameSize(), MaxAlign);
603   MFI.setMaxCallFrameSize(MaxCFA);
604 
605   FrameSize = MaxCFA + alignTo(FrameSize, MaxAlign);
606   MFI.setStackSize(FrameSize);
607 
608   bool AlignStack = (MaxAlign > getStackAlign());
609 
610   // Get the number of bytes to allocate from the FrameInfo.
611   unsigned NumBytes = MFI.getStackSize();
612   Register SP = HRI.getStackRegister();
613   unsigned MaxCF = MFI.getMaxCallFrameSize();
614   MachineBasicBlock::iterator InsertPt = MBB.begin();
615 
616   SmallVector<MachineInstr *, 4> AdjustRegs;
617   for (auto &MBB : MF)
618     for (auto &MI : MBB)
619       if (MI.getOpcode() == Hexagon::PS_alloca)
620         AdjustRegs.push_back(&MI);
621 
622   for (auto *MI : AdjustRegs) {
623     assert((MI->getOpcode() == Hexagon::PS_alloca) && "Expected alloca");
624     expandAlloca(MI, HII, SP, MaxCF);
625     MI->eraseFromParent();
626   }
627 
628   DebugLoc dl = MBB.findDebugLoc(InsertPt);
629 
630   if (MF.getFunction().isVarArg() &&
631       MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl()) {
632     // Calculate the size of register saved area.
633     int NumVarArgRegs = 6 - FirstVarArgSavedReg;
634     int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0)
635                                               ? NumVarArgRegs * 4
636                                               : NumVarArgRegs * 4 + 4;
637     if (RegisterSavedAreaSizePlusPadding > 0) {
638       // Decrement the stack pointer by size of register saved area plus
639       // padding if any.
640       BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
641         .addReg(SP)
642         .addImm(-RegisterSavedAreaSizePlusPadding)
643         .setMIFlag(MachineInstr::FrameSetup);
644 
645       int NumBytes = 0;
646       // Copy all the named arguments below register saved area.
647       auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
648       for (int i = HMFI.getFirstNamedArgFrameIndex(),
649                e = HMFI.getLastNamedArgFrameIndex(); i >= e; --i) {
650         uint64_t ObjSize = MFI.getObjectSize(i);
651         Align ObjAlign = MFI.getObjectAlign(i);
652 
653         // Determine the kind of load/store that should be used.
654         unsigned LDOpc, STOpc;
655         uint64_t OpcodeChecker = ObjAlign.value();
656 
657         // Handle cases where alignment of an object is > its size.
658         if (ObjAlign > ObjSize) {
659           if (ObjSize <= 1)
660             OpcodeChecker = 1;
661           else if (ObjSize <= 2)
662             OpcodeChecker = 2;
663           else if (ObjSize <= 4)
664             OpcodeChecker = 4;
665           else if (ObjSize > 4)
666             OpcodeChecker = 8;
667         }
668 
669         switch (OpcodeChecker) {
670           case 1:
671             LDOpc = Hexagon::L2_loadrb_io;
672             STOpc = Hexagon::S2_storerb_io;
673             break;
674           case 2:
675             LDOpc = Hexagon::L2_loadrh_io;
676             STOpc = Hexagon::S2_storerh_io;
677             break;
678           case 4:
679             LDOpc = Hexagon::L2_loadri_io;
680             STOpc = Hexagon::S2_storeri_io;
681             break;
682           case 8:
683           default:
684             LDOpc = Hexagon::L2_loadrd_io;
685             STOpc = Hexagon::S2_storerd_io;
686             break;
687         }
688 
689         Register RegUsed = LDOpc == Hexagon::L2_loadrd_io ? Hexagon::D3
690                                                           : Hexagon::R6;
691         int LoadStoreCount = ObjSize / OpcodeChecker;
692 
693         if (ObjSize % OpcodeChecker)
694           ++LoadStoreCount;
695 
696         // Get the start location of the load. NumBytes is basically the
697         // offset from the stack pointer of previous function, which would be
698         // the caller in this case, as this function has variable argument
699         // list.
700         if (NumBytes != 0)
701           NumBytes = alignTo(NumBytes, ObjAlign);
702 
703         int Count = 0;
704         while (Count < LoadStoreCount) {
705           // Load the value of the named argument on stack.
706           BuildMI(MBB, InsertPt, dl, HII.get(LDOpc), RegUsed)
707               .addReg(SP)
708               .addImm(RegisterSavedAreaSizePlusPadding +
709                       ObjAlign.value() * Count + NumBytes)
710               .setMIFlag(MachineInstr::FrameSetup);
711 
712           // Store it below the register saved area plus padding.
713           BuildMI(MBB, InsertPt, dl, HII.get(STOpc))
714               .addReg(SP)
715               .addImm(ObjAlign.value() * Count + NumBytes)
716               .addReg(RegUsed)
717               .setMIFlag(MachineInstr::FrameSetup);
718 
719           Count++;
720         }
721         NumBytes += MFI.getObjectSize(i);
722       }
723 
724       // Make NumBytes 8 byte aligned
725       NumBytes = alignTo(NumBytes, 8);
726 
727       // If the number of registers having variable arguments is odd,
728       // leave 4 bytes of padding to get to the location where first
729       // variable argument which was passed through register was copied.
730       NumBytes = (NumVarArgRegs % 2 == 0) ? NumBytes : NumBytes + 4;
731 
732       for (int j = FirstVarArgSavedReg, i = 0; j < 6; ++j, ++i) {
733         BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_storeri_io))
734           .addReg(SP)
735           .addImm(NumBytes + 4 * i)
736           .addReg(Hexagon::R0 + j)
737           .setMIFlag(MachineInstr::FrameSetup);
738       }
739     }
740   }
741 
742   if (hasFP(MF)) {
743     insertAllocframe(MBB, InsertPt, NumBytes);
744     if (AlignStack) {
745       BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
746           .addReg(SP)
747           .addImm(-int64_t(MaxAlign.value()));
748     }
749     // If the stack-checking is enabled, and we spilled the callee-saved
750     // registers inline (i.e. did not use a spill function), then call
751     // the stack checker directly.
752     if (EnableStackOVFSanitizer && !PrologueStubs)
753       BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk))
754              .addExternalSymbol("__runtime_stack_check");
755   } else if (NumBytes > 0) {
756     assert(alignTo(NumBytes, 8) == NumBytes);
757     BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
758       .addReg(SP)
759       .addImm(-int(NumBytes));
760   }
761 }
762 
763 void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
764   MachineFunction &MF = *MBB.getParent();
765   auto &HST = MF.getSubtarget<HexagonSubtarget>();
766   auto &HII = *HST.getInstrInfo();
767   auto &HRI = *HST.getRegisterInfo();
768   Register SP = HRI.getStackRegister();
769 
770   MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
771   DebugLoc dl = MBB.findDebugLoc(InsertPt);
772 
773   if (!hasFP(MF)) {
774     MachineFrameInfo &MFI = MF.getFrameInfo();
775     unsigned NumBytes = MFI.getStackSize();
776     if (MF.getFunction().isVarArg() &&
777         MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl()) {
778       // On Hexagon Linux, deallocate the stack for the register saved area.
779       int NumVarArgRegs = 6 - FirstVarArgSavedReg;
780       int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0) ?
781         (NumVarArgRegs * 4) : (NumVarArgRegs * 4 + 4);
782       NumBytes += RegisterSavedAreaSizePlusPadding;
783     }
784     if (NumBytes) {
785       BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
786         .addReg(SP)
787         .addImm(NumBytes);
788     }
789     return;
790   }
791 
792   MachineInstr *RetI = getReturn(MBB);
793   unsigned RetOpc = RetI ? RetI->getOpcode() : 0;
794 
795   // Handle EH_RETURN.
796   if (RetOpc == Hexagon::EH_RETURN_JMPR) {
797     BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
798         .addDef(Hexagon::D15)
799         .addReg(Hexagon::R30);
800     BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_add), SP)
801         .addReg(SP)
802         .addReg(Hexagon::R28);
803     return;
804   }
805 
806   // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc-
807   // frame instruction if we encounter it.
808   if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4 ||
809       RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC ||
810       RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT ||
811       RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC) {
812     MachineBasicBlock::iterator It = RetI;
813     ++It;
814     // Delete all instructions after the RESTORE (except labels).
815     while (It != MBB.end()) {
816       if (!It->isLabel())
817         It = MBB.erase(It);
818       else
819         ++It;
820     }
821     return;
822   }
823 
824   // It is possible that the restoring code is a call to a library function.
825   // All of the restore* functions include "deallocframe", so we need to make
826   // sure that we don't add an extra one.
827   bool NeedsDeallocframe = true;
828   if (!MBB.empty() && InsertPt != MBB.begin()) {
829     MachineBasicBlock::iterator PrevIt = std::prev(InsertPt);
830     unsigned COpc = PrevIt->getOpcode();
831     if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 ||
832         COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC ||
833         COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT ||
834         COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC ||
835         COpc == Hexagon::PS_call_nr || COpc == Hexagon::PS_callr_nr)
836       NeedsDeallocframe = false;
837   }
838 
839   if (!MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl() ||
840       !MF.getFunction().isVarArg()) {
841     if (!NeedsDeallocframe)
842       return;
843     // If the returning instruction is PS_jmpret, replace it with
844     // dealloc_return, otherwise just add deallocframe. The function
845     // could be returning via a tail call.
846     if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) {
847       BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
848       .addDef(Hexagon::D15)
849       .addReg(Hexagon::R30);
850       return;
851     }
852     unsigned NewOpc = Hexagon::L4_return;
853     MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc))
854       .addDef(Hexagon::D15)
855       .addReg(Hexagon::R30);
856     // Transfer the function live-out registers.
857     NewI->copyImplicitOps(MF, *RetI);
858     MBB.erase(RetI);
859   } else {
860     // L2_deallocframe instruction after it.
861     // Calculate the size of register saved area.
862     int NumVarArgRegs = 6 - FirstVarArgSavedReg;
863     int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0) ?
864       (NumVarArgRegs * 4) : (NumVarArgRegs * 4 + 4);
865 
866     MachineBasicBlock::iterator Term = MBB.getFirstTerminator();
867     MachineBasicBlock::iterator I = (Term == MBB.begin()) ? MBB.end()
868                                                           : std::prev(Term);
869     if (I == MBB.end() ||
870        (I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT &&
871         I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC &&
872         I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 &&
873         I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC))
874       BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
875         .addDef(Hexagon::D15)
876         .addReg(Hexagon::R30);
877     if (RegisterSavedAreaSizePlusPadding != 0)
878       BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
879         .addReg(SP)
880         .addImm(RegisterSavedAreaSizePlusPadding);
881   }
882 }
883 
884 void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,
885       MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const {
886   MachineFunction &MF = *MBB.getParent();
887   auto &HST = MF.getSubtarget<HexagonSubtarget>();
888   auto &HII = *HST.getInstrInfo();
889   auto &HRI = *HST.getRegisterInfo();
890 
891   // Check for overflow.
892   // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
893   const unsigned int ALLOCFRAME_MAX = 16384;
894 
895   // Create a dummy memory operand to avoid allocframe from being treated as
896   // a volatile memory reference.
897   auto *MMO = MF.getMachineMemOperand(MachinePointerInfo::getStack(MF, 0),
898                                       MachineMemOperand::MOStore, 4, Align(4));
899 
900   DebugLoc dl = MBB.findDebugLoc(InsertPt);
901   Register SP = HRI.getStackRegister();
902 
903   if (NumBytes >= ALLOCFRAME_MAX) {
904     // Emit allocframe(#0).
905     BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
906       .addDef(SP)
907       .addReg(SP)
908       .addImm(0)
909       .addMemOperand(MMO);
910 
911     // Subtract the size from the stack pointer.
912     Register SP = HRI.getStackRegister();
913     BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
914       .addReg(SP)
915       .addImm(-int(NumBytes));
916   } else {
917     BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
918       .addDef(SP)
919       .addReg(SP)
920       .addImm(NumBytes)
921       .addMemOperand(MMO);
922   }
923 }
924 
925 void HexagonFrameLowering::updateEntryPaths(MachineFunction &MF,
926       MachineBasicBlock &SaveB) const {
927   SetVector<unsigned> Worklist;
928 
929   MachineBasicBlock &EntryB = MF.front();
930   Worklist.insert(EntryB.getNumber());
931 
932   unsigned SaveN = SaveB.getNumber();
933   auto &CSI = MF.getFrameInfo().getCalleeSavedInfo();
934 
935   for (unsigned i = 0; i < Worklist.size(); ++i) {
936     unsigned BN = Worklist[i];
937     MachineBasicBlock &MBB = *MF.getBlockNumbered(BN);
938     for (auto &R : CSI)
939       if (!MBB.isLiveIn(R.getReg()))
940         MBB.addLiveIn(R.getReg());
941     if (BN != SaveN)
942       for (auto &SB : MBB.successors())
943         Worklist.insert(SB->getNumber());
944   }
945 }
946 
947 bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB,
948       MachineBasicBlock &RestoreB, BitVector &DoneT, BitVector &DoneF,
949       BitVector &Path) const {
950   assert(MBB.getNumber() >= 0);
951   unsigned BN = MBB.getNumber();
952   if (Path[BN] || DoneF[BN])
953     return false;
954   if (DoneT[BN])
955     return true;
956 
957   auto &CSI = MBB.getParent()->getFrameInfo().getCalleeSavedInfo();
958 
959   Path[BN] = true;
960   bool ReachedExit = false;
961   for (auto &SB : MBB.successors())
962     ReachedExit |= updateExitPaths(*SB, RestoreB, DoneT, DoneF, Path);
963 
964   if (!MBB.empty() && MBB.back().isReturn()) {
965     // Add implicit uses of all callee-saved registers to the reached
966     // return instructions. This is to prevent the anti-dependency breaker
967     // from renaming these registers.
968     MachineInstr &RetI = MBB.back();
969     if (!isRestoreCall(RetI.getOpcode()))
970       for (auto &R : CSI)
971         RetI.addOperand(MachineOperand::CreateReg(R.getReg(), false, true));
972     ReachedExit = true;
973   }
974 
975   // We don't want to add unnecessary live-ins to the restore block: since
976   // the callee-saved registers are being defined in it, the entry of the
977   // restore block cannot be on the path from the definitions to any exit.
978   if (ReachedExit && &MBB != &RestoreB) {
979     for (auto &R : CSI)
980       if (!MBB.isLiveIn(R.getReg()))
981         MBB.addLiveIn(R.getReg());
982     DoneT[BN] = true;
983   }
984   if (!ReachedExit)
985     DoneF[BN] = true;
986 
987   Path[BN] = false;
988   return ReachedExit;
989 }
990 
991 static std::optional<MachineBasicBlock::iterator>
992 findCFILocation(MachineBasicBlock &B) {
993     // The CFI instructions need to be inserted right after allocframe.
994     // An exception to this is a situation where allocframe is bundled
995     // with a call: then the CFI instructions need to be inserted before
996     // the packet with the allocframe+call (in case the call throws an
997     // exception).
998     auto End = B.instr_end();
999 
1000     for (MachineInstr &I : B) {
1001       MachineBasicBlock::iterator It = I.getIterator();
1002       if (!I.isBundle()) {
1003         if (I.getOpcode() == Hexagon::S2_allocframe)
1004           return std::next(It);
1005         continue;
1006       }
1007       // I is a bundle.
1008       bool HasCall = false, HasAllocFrame = false;
1009       auto T = It.getInstrIterator();
1010       while (++T != End && T->isBundled()) {
1011         if (T->getOpcode() == Hexagon::S2_allocframe)
1012           HasAllocFrame = true;
1013         else if (T->isCall())
1014           HasCall = true;
1015       }
1016       if (HasAllocFrame)
1017         return HasCall ? It : std::next(It);
1018     }
1019     return std::nullopt;
1020 }
1021 
1022 void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const {
1023     for (auto &B : MF)
1024       if (auto At = findCFILocation(B))
1025         insertCFIInstructionsAt(B, *At);
1026 }
1027 
1028 void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
1029       MachineBasicBlock::iterator At) const {
1030   MachineFunction &MF = *MBB.getParent();
1031   MachineFrameInfo &MFI = MF.getFrameInfo();
1032   MachineModuleInfo &MMI = MF.getMMI();
1033   auto &HST = MF.getSubtarget<HexagonSubtarget>();
1034   auto &HII = *HST.getInstrInfo();
1035   auto &HRI = *HST.getRegisterInfo();
1036 
1037   // If CFI instructions have debug information attached, something goes
1038   // wrong with the final assembly generation: the prolog_end is placed
1039   // in a wrong location.
1040   DebugLoc DL;
1041   const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION);
1042 
1043   MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
1044   bool HasFP = hasFP(MF);
1045 
1046   if (HasFP) {
1047     unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
1048     unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
1049 
1050     // Define CFA via an offset from the value of FP.
1051     //
1052     //  -8   -4    0 (SP)
1053     // --+----+----+---------------------
1054     //   | FP | LR |          increasing addresses -->
1055     // --+----+----+---------------------
1056     //   |         +-- Old SP (before allocframe)
1057     //   +-- New FP (after allocframe)
1058     //
1059     // MCCFIInstruction::cfiDefCfa adds the offset from the register.
1060     // MCCFIInstruction::createOffset takes the offset without sign change.
1061     auto DefCfa = MCCFIInstruction::cfiDefCfa(FrameLabel, DwFPReg, 8);
1062     BuildMI(MBB, At, DL, CFID)
1063         .addCFIIndex(MF.addFrameInst(DefCfa));
1064     // R31 (return addr) = CFA - 4
1065     auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4);
1066     BuildMI(MBB, At, DL, CFID)
1067         .addCFIIndex(MF.addFrameInst(OffR31));
1068     // R30 (frame ptr) = CFA - 8
1069     auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8);
1070     BuildMI(MBB, At, DL, CFID)
1071         .addCFIIndex(MF.addFrameInst(OffR30));
1072   }
1073 
1074   static Register RegsToMove[] = {
1075     Hexagon::R1,  Hexagon::R0,  Hexagon::R3,  Hexagon::R2,
1076     Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
1077     Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
1078     Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
1079     Hexagon::D0,  Hexagon::D1,  Hexagon::D8,  Hexagon::D9,
1080     Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13,
1081     Hexagon::NoRegister
1082   };
1083 
1084   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1085 
1086   for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) {
1087     Register Reg = RegsToMove[i];
1088     auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool {
1089       return C.getReg() == Reg;
1090     };
1091     auto F = find_if(CSI, IfR);
1092     if (F == CSI.end())
1093       continue;
1094 
1095     int64_t Offset;
1096     if (HasFP) {
1097       // If the function has a frame pointer (i.e. has an allocframe),
1098       // then the CFA has been defined in terms of FP. Any offsets in
1099       // the following CFI instructions have to be defined relative
1100       // to FP, which points to the bottom of the stack frame.
1101       // The function getFrameIndexReference can still choose to use SP
1102       // for the offset calculation, so we cannot simply call it here.
1103       // Instead, get the offset (relative to the FP) directly.
1104       Offset = MFI.getObjectOffset(F->getFrameIdx());
1105     } else {
1106       Register FrameReg;
1107       Offset =
1108           getFrameIndexReference(MF, F->getFrameIdx(), FrameReg).getFixed();
1109     }
1110     // Subtract 8 to make room for R30 and R31, which are added above.
1111     Offset -= 8;
1112 
1113     if (Reg < Hexagon::D0 || Reg > Hexagon::D15) {
1114       unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true);
1115       auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg,
1116                                                    Offset);
1117       BuildMI(MBB, At, DL, CFID)
1118           .addCFIIndex(MF.addFrameInst(OffReg));
1119     } else {
1120       // Split the double regs into subregs, and generate appropriate
1121       // cfi_offsets.
1122       // The only reason, we are split double regs is, llvm-mc does not
1123       // understand paired registers for cfi_offset.
1124       // Eg .cfi_offset r1:0, -64
1125 
1126       Register HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi);
1127       Register LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo);
1128       unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
1129       unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
1130       auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg,
1131                                                   Offset+4);
1132       BuildMI(MBB, At, DL, CFID)
1133           .addCFIIndex(MF.addFrameInst(OffHi));
1134       auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg,
1135                                                   Offset);
1136       BuildMI(MBB, At, DL, CFID)
1137           .addCFIIndex(MF.addFrameInst(OffLo));
1138     }
1139   }
1140 }
1141 
1142 bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
1143   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
1144     return false;
1145 
1146   auto &MFI = MF.getFrameInfo();
1147   auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1148   bool HasExtraAlign = HRI.hasStackRealignment(MF);
1149   bool HasAlloca = MFI.hasVarSizedObjects();
1150 
1151   // Insert ALLOCFRAME if we need to or at -O0 for the debugger.  Think
1152   // that this shouldn't be required, but doing so now because gcc does and
1153   // gdb can't break at the start of the function without it.  Will remove if
1154   // this turns out to be a gdb bug.
1155   //
1156   if (MF.getTarget().getOptLevel() == CodeGenOpt::None)
1157     return true;
1158 
1159   // By default we want to use SP (since it's always there). FP requires
1160   // some setup (i.e. ALLOCFRAME).
1161   // Both, alloca and stack alignment modify the stack pointer by an
1162   // undetermined value, so we need to save it at the entry to the function
1163   // (i.e. use allocframe).
1164   if (HasAlloca || HasExtraAlign)
1165     return true;
1166 
1167   if (MFI.getStackSize() > 0) {
1168     // If FP-elimination is disabled, we have to use FP at this point.
1169     const TargetMachine &TM = MF.getTarget();
1170     if (TM.Options.DisableFramePointerElim(MF) || !EliminateFramePointer)
1171       return true;
1172     if (EnableStackOVFSanitizer)
1173       return true;
1174   }
1175 
1176   const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1177   if ((MFI.hasCalls() && !enableAllocFrameElim(MF)) || HMFI.hasClobberLR())
1178     return true;
1179 
1180   return false;
1181 }
1182 
1183 enum SpillKind {
1184   SK_ToMem,
1185   SK_FromMem,
1186   SK_FromMemTailcall
1187 };
1188 
1189 static const char *getSpillFunctionFor(Register MaxReg, SpillKind SpillType,
1190       bool Stkchk = false) {
1191   const char * V4SpillToMemoryFunctions[] = {
1192     "__save_r16_through_r17",
1193     "__save_r16_through_r19",
1194     "__save_r16_through_r21",
1195     "__save_r16_through_r23",
1196     "__save_r16_through_r25",
1197     "__save_r16_through_r27" };
1198 
1199   const char * V4SpillToMemoryStkchkFunctions[] = {
1200     "__save_r16_through_r17_stkchk",
1201     "__save_r16_through_r19_stkchk",
1202     "__save_r16_through_r21_stkchk",
1203     "__save_r16_through_r23_stkchk",
1204     "__save_r16_through_r25_stkchk",
1205     "__save_r16_through_r27_stkchk" };
1206 
1207   const char * V4SpillFromMemoryFunctions[] = {
1208     "__restore_r16_through_r17_and_deallocframe",
1209     "__restore_r16_through_r19_and_deallocframe",
1210     "__restore_r16_through_r21_and_deallocframe",
1211     "__restore_r16_through_r23_and_deallocframe",
1212     "__restore_r16_through_r25_and_deallocframe",
1213     "__restore_r16_through_r27_and_deallocframe" };
1214 
1215   const char * V4SpillFromMemoryTailcallFunctions[] = {
1216     "__restore_r16_through_r17_and_deallocframe_before_tailcall",
1217     "__restore_r16_through_r19_and_deallocframe_before_tailcall",
1218     "__restore_r16_through_r21_and_deallocframe_before_tailcall",
1219     "__restore_r16_through_r23_and_deallocframe_before_tailcall",
1220     "__restore_r16_through_r25_and_deallocframe_before_tailcall",
1221     "__restore_r16_through_r27_and_deallocframe_before_tailcall"
1222   };
1223 
1224   const char **SpillFunc = nullptr;
1225 
1226   switch(SpillType) {
1227   case SK_ToMem:
1228     SpillFunc = Stkchk ? V4SpillToMemoryStkchkFunctions
1229                        : V4SpillToMemoryFunctions;
1230     break;
1231   case SK_FromMem:
1232     SpillFunc = V4SpillFromMemoryFunctions;
1233     break;
1234   case SK_FromMemTailcall:
1235     SpillFunc = V4SpillFromMemoryTailcallFunctions;
1236     break;
1237   }
1238   assert(SpillFunc && "Unknown spill kind");
1239 
1240   // Spill all callee-saved registers up to the highest register used.
1241   switch (MaxReg) {
1242   case Hexagon::R17:
1243     return SpillFunc[0];
1244   case Hexagon::R19:
1245     return SpillFunc[1];
1246   case Hexagon::R21:
1247     return SpillFunc[2];
1248   case Hexagon::R23:
1249     return SpillFunc[3];
1250   case Hexagon::R25:
1251     return SpillFunc[4];
1252   case Hexagon::R27:
1253     return SpillFunc[5];
1254   default:
1255     llvm_unreachable("Unhandled maximum callee save register");
1256   }
1257   return nullptr;
1258 }
1259 
1260 StackOffset
1261 HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
1262                                              Register &FrameReg) const {
1263   auto &MFI = MF.getFrameInfo();
1264   auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1265 
1266   int Offset = MFI.getObjectOffset(FI);
1267   bool HasAlloca = MFI.hasVarSizedObjects();
1268   bool HasExtraAlign = HRI.hasStackRealignment(MF);
1269   bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
1270 
1271   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1272   unsigned FrameSize = MFI.getStackSize();
1273   Register SP = HRI.getStackRegister();
1274   Register FP = HRI.getFrameRegister();
1275   Register AP = HMFI.getStackAlignBaseReg();
1276   // It may happen that AP will be absent even HasAlloca && HasExtraAlign
1277   // is true. HasExtraAlign may be set because of vector spills, without
1278   // aligned locals or aligned outgoing function arguments. Since vector
1279   // spills will ultimately be "unaligned", it is safe to use FP as the
1280   // base register.
1281   // In fact, in such a scenario the stack is actually not required to be
1282   // aligned, although it may end up being aligned anyway, since this
1283   // particular case is not easily detectable. The alignment will be
1284   // unnecessary, but not incorrect.
1285   // Unfortunately there is no quick way to verify that the above is
1286   // indeed the case (and that it's not a result of an error), so just
1287   // assume that missing AP will be replaced by FP.
1288   // (A better fix would be to rematerialize AP from FP and always align
1289   // vector spills.)
1290   bool UseFP = false, UseAP = false;  // Default: use SP (except at -O0).
1291   // Use FP at -O0, except when there are objects with extra alignment.
1292   // That additional alignment requirement may cause a pad to be inserted,
1293   // which will make it impossible to use FP to access objects located
1294   // past the pad.
1295   if (NoOpt && !HasExtraAlign)
1296     UseFP = true;
1297   if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {
1298     // Fixed and preallocated objects will be located before any padding
1299     // so FP must be used to access them.
1300     UseFP |= (HasAlloca || HasExtraAlign);
1301   } else {
1302     if (HasAlloca) {
1303       if (HasExtraAlign)
1304         UseAP = true;
1305       else
1306         UseFP = true;
1307     }
1308   }
1309 
1310   // If FP was picked, then there had better be FP.
1311   bool HasFP = hasFP(MF);
1312   assert((HasFP || !UseFP) && "This function must have frame pointer");
1313 
1314   // Having FP implies allocframe. Allocframe will store extra 8 bytes:
1315   // FP/LR. If the base register is used to access an object across these
1316   // 8 bytes, then the offset will need to be adjusted by 8.
1317   //
1318   // After allocframe:
1319   //                    HexagonISelLowering adds 8 to ---+
1320   //                    the offsets of all stack-based   |
1321   //                    arguments (*)                    |
1322   //                                                     |
1323   //   getObjectOffset < 0   0     8  getObjectOffset >= 8
1324   // ------------------------+-----+------------------------> increasing
1325   //     <local objects>     |FP/LR|    <input arguments>     addresses
1326   // -----------------+------+-----+------------------------>
1327   //                  |      |
1328   //    SP/AP point --+      +-- FP points here (**)
1329   //    somewhere on
1330   //    this side of FP/LR
1331   //
1332   // (*) See LowerFormalArguments. The FP/LR is assumed to be present.
1333   // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR.
1334 
1335   // The lowering assumes that FP/LR is present, and so the offsets of
1336   // the formal arguments start at 8. If FP/LR is not there we need to
1337   // reduce the offset by 8.
1338   if (Offset > 0 && !HasFP)
1339     Offset -= 8;
1340 
1341   if (UseFP)
1342     FrameReg = FP;
1343   else if (UseAP)
1344     FrameReg = AP;
1345   else
1346     FrameReg = SP;
1347 
1348   // Calculate the actual offset in the instruction. If there is no FP
1349   // (in other words, no allocframe), then SP will not be adjusted (i.e.
1350   // there will be no SP -= FrameSize), so the frame size should not be
1351   // added to the calculated offset.
1352   int RealOffset = Offset;
1353   if (!UseFP && !UseAP)
1354     RealOffset = FrameSize+Offset;
1355   return StackOffset::getFixed(RealOffset);
1356 }
1357 
1358 bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
1359       const CSIVect &CSI, const HexagonRegisterInfo &HRI,
1360       bool &PrologueStubs) const {
1361   if (CSI.empty())
1362     return true;
1363 
1364   MachineBasicBlock::iterator MI = MBB.begin();
1365   PrologueStubs = false;
1366   MachineFunction &MF = *MBB.getParent();
1367   auto &HST = MF.getSubtarget<HexagonSubtarget>();
1368   auto &HII = *HST.getInstrInfo();
1369 
1370   if (useSpillFunction(MF, CSI)) {
1371     PrologueStubs = true;
1372     Register MaxReg = getMaxCalleeSavedReg(CSI, HRI);
1373     bool StkOvrFlowEnabled = EnableStackOVFSanitizer;
1374     const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem,
1375                                                StkOvrFlowEnabled);
1376     auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
1377     bool IsPIC = HTM.isPositionIndependent();
1378     bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong;
1379 
1380     // Call spill function.
1381     DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1382     unsigned SpillOpc;
1383     if (StkOvrFlowEnabled) {
1384       if (LongCalls)
1385         SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT_PIC
1386                          : Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT;
1387       else
1388         SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_PIC
1389                          : Hexagon::SAVE_REGISTERS_CALL_V4STK;
1390     } else {
1391       if (LongCalls)
1392         SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_EXT_PIC
1393                          : Hexagon::SAVE_REGISTERS_CALL_V4_EXT;
1394       else
1395         SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_PIC
1396                          : Hexagon::SAVE_REGISTERS_CALL_V4;
1397     }
1398 
1399     MachineInstr *SaveRegsCall =
1400         BuildMI(MBB, MI, DL, HII.get(SpillOpc))
1401           .addExternalSymbol(SpillFun);
1402 
1403     // Add callee-saved registers as use.
1404     addCalleeSaveRegistersAsImpOperand(SaveRegsCall, CSI, false, true);
1405     // Add live in registers.
1406     for (const CalleeSavedInfo &I : CSI)
1407       MBB.addLiveIn(I.getReg());
1408     return true;
1409   }
1410 
1411   for (const CalleeSavedInfo &I : CSI) {
1412     Register Reg = I.getReg();
1413     // Add live in registers. We treat eh_return callee saved register r0 - r3
1414     // specially. They are not really callee saved registers as they are not
1415     // supposed to be killed.
1416     bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
1417     int FI = I.getFrameIdx();
1418     const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
1419     HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI, Register());
1420     if (IsKill)
1421       MBB.addLiveIn(Reg);
1422   }
1423   return true;
1424 }
1425 
1426 bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
1427       const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
1428   if (CSI.empty())
1429     return false;
1430 
1431   MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
1432   MachineFunction &MF = *MBB.getParent();
1433   auto &HST = MF.getSubtarget<HexagonSubtarget>();
1434   auto &HII = *HST.getInstrInfo();
1435 
1436   if (useRestoreFunction(MF, CSI)) {
1437     bool HasTC = hasTailCall(MBB) || !hasReturn(MBB);
1438     Register MaxR = getMaxCalleeSavedReg(CSI, HRI);
1439     SpillKind Kind = HasTC ? SK_FromMemTailcall : SK_FromMem;
1440     const char *RestoreFn = getSpillFunctionFor(MaxR, Kind);
1441     auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
1442     bool IsPIC = HTM.isPositionIndependent();
1443     bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong;
1444 
1445     // Call spill function.
1446     DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()
1447                                   : MBB.findDebugLoc(MBB.end());
1448     MachineInstr *DeallocCall = nullptr;
1449 
1450     if (HasTC) {
1451       unsigned RetOpc;
1452       if (LongCalls)
1453         RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC
1454                        : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT;
1455       else
1456         RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC
1457                        : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
1458       DeallocCall = BuildMI(MBB, MI, DL, HII.get(RetOpc))
1459           .addExternalSymbol(RestoreFn);
1460     } else {
1461       // The block has a return.
1462       MachineBasicBlock::iterator It = MBB.getFirstTerminator();
1463       assert(It->isReturn() && std::next(It) == MBB.end());
1464       unsigned RetOpc;
1465       if (LongCalls)
1466         RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC
1467                        : Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT;
1468       else
1469         RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC
1470                        : Hexagon::RESTORE_DEALLOC_RET_JMP_V4;
1471       DeallocCall = BuildMI(MBB, It, DL, HII.get(RetOpc))
1472           .addExternalSymbol(RestoreFn);
1473       // Transfer the function live-out registers.
1474       DeallocCall->copyImplicitOps(MF, *It);
1475     }
1476     addCalleeSaveRegistersAsImpOperand(DeallocCall, CSI, true, false);
1477     return true;
1478   }
1479 
1480   for (const CalleeSavedInfo &I : CSI) {
1481     Register Reg = I.getReg();
1482     const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
1483     int FI = I.getFrameIdx();
1484     HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI, Register());
1485   }
1486 
1487   return true;
1488 }
1489 
1490 MachineBasicBlock::iterator HexagonFrameLowering::eliminateCallFramePseudoInstr(
1491     MachineFunction &MF, MachineBasicBlock &MBB,
1492     MachineBasicBlock::iterator I) const {
1493   MachineInstr &MI = *I;
1494   unsigned Opc = MI.getOpcode();
1495   (void)Opc; // Silence compiler warning.
1496   assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) &&
1497          "Cannot handle this call frame pseudo instruction");
1498   return MBB.erase(I);
1499 }
1500 
1501 void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
1502     MachineFunction &MF, RegScavenger *RS) const {
1503   // If this function has uses aligned stack and also has variable sized stack
1504   // objects, then we need to map all spill slots to fixed positions, so that
1505   // they can be accessed through FP. Otherwise they would have to be accessed
1506   // via AP, which may not be available at the particular place in the program.
1507   MachineFrameInfo &MFI = MF.getFrameInfo();
1508   bool HasAlloca = MFI.hasVarSizedObjects();
1509   bool NeedsAlign = (MFI.getMaxAlign() > getStackAlign());
1510 
1511   if (!HasAlloca || !NeedsAlign)
1512     return;
1513 
1514   // Set the physical aligned-stack base address register.
1515   Register AP = 0;
1516   if (const MachineInstr *AI = getAlignaInstr(MF))
1517     AP = AI->getOperand(0).getReg();
1518   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1519   assert(!AP.isValid() || AP.isPhysical());
1520   HMFI.setStackAlignBaseReg(AP);
1521 }
1522 
1523 /// Returns true if there are no caller-saved registers available in class RC.
1524 static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
1525       const HexagonRegisterInfo &HRI, const TargetRegisterClass *RC) {
1526   MachineRegisterInfo &MRI = MF.getRegInfo();
1527 
1528   auto IsUsed = [&HRI,&MRI] (Register Reg) -> bool {
1529     for (MCRegAliasIterator AI(Reg, &HRI, true); AI.isValid(); ++AI)
1530       if (MRI.isPhysRegUsed(*AI))
1531         return true;
1532     return false;
1533   };
1534 
1535   // Check for an unused caller-saved register. Callee-saved registers
1536   // have become pristine by now.
1537   for (const MCPhysReg *P = HRI.getCallerSavedRegs(&MF, RC); *P; ++P)
1538     if (!IsUsed(*P))
1539       return false;
1540 
1541   // All caller-saved registers are used.
1542   return true;
1543 }
1544 
1545 #ifndef NDEBUG
1546 static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) {
1547   dbgs() << '{';
1548   for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) {
1549     Register R = x;
1550     dbgs() << ' ' << printReg(R, &TRI);
1551   }
1552   dbgs() << " }";
1553 }
1554 #endif
1555 
1556 bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
1557       const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const {
1558   LLVM_DEBUG(dbgs() << __func__ << " on " << MF.getName() << '\n');
1559   MachineFrameInfo &MFI = MF.getFrameInfo();
1560   BitVector SRegs(Hexagon::NUM_TARGET_REGS);
1561 
1562   // Generate a set of unique, callee-saved registers (SRegs), where each
1563   // register in the set is maximal in terms of sub-/super-register relation,
1564   // i.e. for each R in SRegs, no proper super-register of R is also in SRegs.
1565 
1566   // (1) For each callee-saved register, add that register and all of its
1567   // sub-registers to SRegs.
1568   LLVM_DEBUG(dbgs() << "Initial CS registers: {");
1569   for (const CalleeSavedInfo &I : CSI) {
1570     Register R = I.getReg();
1571     LLVM_DEBUG(dbgs() << ' ' << printReg(R, TRI));
1572     for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
1573       SRegs[*SR] = true;
1574   }
1575   LLVM_DEBUG(dbgs() << " }\n");
1576   LLVM_DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI);
1577              dbgs() << "\n");
1578 
1579   // (2) For each reserved register, remove that register and all of its
1580   // sub- and super-registers from SRegs.
1581   BitVector Reserved = TRI->getReservedRegs(MF);
1582   // Unreserve the stack align register: it is reserved for this function
1583   // only, it still needs to be saved/restored.
1584   Register AP =
1585       MF.getInfo<HexagonMachineFunctionInfo>()->getStackAlignBaseReg();
1586   if (AP.isValid()) {
1587     Reserved[AP] = false;
1588     // Unreserve super-regs if no other subregisters are reserved.
1589     for (MCSuperRegIterator SP(AP, TRI, false); SP.isValid(); ++SP) {
1590       bool HasResSub = false;
1591       for (MCSubRegIterator SB(*SP, TRI, false); SB.isValid(); ++SB) {
1592         if (!Reserved[*SB])
1593           continue;
1594         HasResSub = true;
1595         break;
1596       }
1597       if (!HasResSub)
1598         Reserved[*SP] = false;
1599     }
1600   }
1601 
1602   for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) {
1603     Register R = x;
1604     for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
1605       SRegs[*SR] = false;
1606   }
1607   LLVM_DEBUG(dbgs() << "Res:     "; dump_registers(Reserved, *TRI);
1608              dbgs() << "\n");
1609   LLVM_DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI);
1610              dbgs() << "\n");
1611 
1612   // (3) Collect all registers that have at least one sub-register in SRegs,
1613   // and also have no sub-registers that are reserved. These will be the can-
1614   // didates for saving as a whole instead of their individual sub-registers.
1615   // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.)
1616   BitVector TmpSup(Hexagon::NUM_TARGET_REGS);
1617   for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1618     Register R = x;
1619     for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR)
1620       TmpSup[*SR] = true;
1621   }
1622   for (int x = TmpSup.find_first(); x >= 0; x = TmpSup.find_next(x)) {
1623     Register R = x;
1624     for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) {
1625       if (!Reserved[*SR])
1626         continue;
1627       TmpSup[R] = false;
1628       break;
1629     }
1630   }
1631   LLVM_DEBUG(dbgs() << "TmpSup:  "; dump_registers(TmpSup, *TRI);
1632              dbgs() << "\n");
1633 
1634   // (4) Include all super-registers found in (3) into SRegs.
1635   SRegs |= TmpSup;
1636   LLVM_DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI);
1637              dbgs() << "\n");
1638 
1639   // (5) For each register R in SRegs, if any super-register of R is in SRegs,
1640   // remove R from SRegs.
1641   for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1642     Register R = x;
1643     for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) {
1644       if (!SRegs[*SR])
1645         continue;
1646       SRegs[R] = false;
1647       break;
1648     }
1649   }
1650   LLVM_DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI);
1651              dbgs() << "\n");
1652 
1653   // Now, for each register that has a fixed stack slot, create the stack
1654   // object for it.
1655   CSI.clear();
1656 
1657   using SpillSlot = TargetFrameLowering::SpillSlot;
1658 
1659   unsigned NumFixed;
1660   int MinOffset = 0;  // CS offsets are negative.
1661   const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed);
1662   for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; ++S) {
1663     if (!SRegs[S->Reg])
1664       continue;
1665     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg);
1666     int FI = MFI.CreateFixedSpillStackObject(TRI->getSpillSize(*RC), S->Offset);
1667     MinOffset = std::min(MinOffset, S->Offset);
1668     CSI.push_back(CalleeSavedInfo(S->Reg, FI));
1669     SRegs[S->Reg] = false;
1670   }
1671 
1672   // There can be some registers that don't have fixed slots. For example,
1673   // we need to store R0-R3 in functions with exception handling. For each
1674   // such register, create a non-fixed stack object.
1675   for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1676     Register R = x;
1677     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R);
1678     unsigned Size = TRI->getSpillSize(*RC);
1679     int Off = MinOffset - Size;
1680     Align Alignment = std::min(TRI->getSpillAlign(*RC), getStackAlign());
1681     Off &= -Alignment.value();
1682     int FI = MFI.CreateFixedSpillStackObject(Size, Off);
1683     MinOffset = std::min(MinOffset, Off);
1684     CSI.push_back(CalleeSavedInfo(R, FI));
1685     SRegs[R] = false;
1686   }
1687 
1688   LLVM_DEBUG({
1689     dbgs() << "CS information: {";
1690     for (const CalleeSavedInfo &I : CSI) {
1691       int FI = I.getFrameIdx();
1692       int Off = MFI.getObjectOffset(FI);
1693       dbgs() << ' ' << printReg(I.getReg(), TRI) << ":fi#" << FI << ":sp";
1694       if (Off >= 0)
1695         dbgs() << '+';
1696       dbgs() << Off;
1697     }
1698     dbgs() << " }\n";
1699   });
1700 
1701 #ifndef NDEBUG
1702   // Verify that all registers were handled.
1703   bool MissedReg = false;
1704   for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1705     Register R = x;
1706     dbgs() << printReg(R, TRI) << ' ';
1707     MissedReg = true;
1708   }
1709   if (MissedReg)
1710     llvm_unreachable("...there are unhandled callee-saved registers!");
1711 #endif
1712 
1713   return true;
1714 }
1715 
1716 bool HexagonFrameLowering::expandCopy(MachineBasicBlock &B,
1717       MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1718       const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
1719   MachineInstr *MI = &*It;
1720   DebugLoc DL = MI->getDebugLoc();
1721   Register DstR = MI->getOperand(0).getReg();
1722   Register SrcR = MI->getOperand(1).getReg();
1723   if (!Hexagon::ModRegsRegClass.contains(DstR) ||
1724       !Hexagon::ModRegsRegClass.contains(SrcR))
1725     return false;
1726 
1727   Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1728   BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR).add(MI->getOperand(1));
1729   BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), DstR)
1730     .addReg(TmpR, RegState::Kill);
1731 
1732   NewRegs.push_back(TmpR);
1733   B.erase(It);
1734   return true;
1735 }
1736 
1737 bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B,
1738       MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1739       const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
1740   MachineInstr *MI = &*It;
1741   if (!MI->getOperand(0).isFI())
1742     return false;
1743 
1744   DebugLoc DL = MI->getDebugLoc();
1745   unsigned Opc = MI->getOpcode();
1746   Register SrcR = MI->getOperand(2).getReg();
1747   bool IsKill = MI->getOperand(2).isKill();
1748   int FI = MI->getOperand(0).getIndex();
1749 
1750   // TmpR = C2_tfrpr SrcR   if SrcR is a predicate register
1751   // TmpR = A2_tfrcrr SrcR  if SrcR is a modifier register
1752   Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1753   unsigned TfrOpc = (Opc == Hexagon::STriw_pred) ? Hexagon::C2_tfrpr
1754                                                  : Hexagon::A2_tfrcrr;
1755   BuildMI(B, It, DL, HII.get(TfrOpc), TmpR)
1756     .addReg(SrcR, getKillRegState(IsKill));
1757 
1758   // S2_storeri_io FI, 0, TmpR
1759   BuildMI(B, It, DL, HII.get(Hexagon::S2_storeri_io))
1760       .addFrameIndex(FI)
1761       .addImm(0)
1762       .addReg(TmpR, RegState::Kill)
1763       .cloneMemRefs(*MI);
1764 
1765   NewRegs.push_back(TmpR);
1766   B.erase(It);
1767   return true;
1768 }
1769 
1770 bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B,
1771       MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1772       const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
1773   MachineInstr *MI = &*It;
1774   if (!MI->getOperand(1).isFI())
1775     return false;
1776 
1777   DebugLoc DL = MI->getDebugLoc();
1778   unsigned Opc = MI->getOpcode();
1779   Register DstR = MI->getOperand(0).getReg();
1780   int FI = MI->getOperand(1).getIndex();
1781 
1782   // TmpR = L2_loadri_io FI, 0
1783   Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1784   BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR)
1785       .addFrameIndex(FI)
1786       .addImm(0)
1787       .cloneMemRefs(*MI);
1788 
1789   // DstR = C2_tfrrp TmpR   if DstR is a predicate register
1790   // DstR = A2_tfrrcr TmpR  if DstR is a modifier register
1791   unsigned TfrOpc = (Opc == Hexagon::LDriw_pred) ? Hexagon::C2_tfrrp
1792                                                  : Hexagon::A2_tfrrcr;
1793   BuildMI(B, It, DL, HII.get(TfrOpc), DstR)
1794     .addReg(TmpR, RegState::Kill);
1795 
1796   NewRegs.push_back(TmpR);
1797   B.erase(It);
1798   return true;
1799 }
1800 
1801 bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B,
1802       MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1803       const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
1804   MachineInstr *MI = &*It;
1805   if (!MI->getOperand(0).isFI())
1806     return false;
1807 
1808   DebugLoc DL = MI->getDebugLoc();
1809   Register SrcR = MI->getOperand(2).getReg();
1810   bool IsKill = MI->getOperand(2).isKill();
1811   int FI = MI->getOperand(0).getIndex();
1812   auto *RC = &Hexagon::HvxVRRegClass;
1813 
1814   // Insert transfer to general vector register.
1815   //   TmpR0 = A2_tfrsi 0x01010101
1816   //   TmpR1 = V6_vandqrt Qx, TmpR0
1817   //   store FI, 0, TmpR1
1818   Register TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1819   Register TmpR1 = MRI.createVirtualRegister(RC);
1820 
1821   BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
1822     .addImm(0x01010101);
1823 
1824   BuildMI(B, It, DL, HII.get(Hexagon::V6_vandqrt), TmpR1)
1825     .addReg(SrcR, getKillRegState(IsKill))
1826     .addReg(TmpR0, RegState::Kill);
1827 
1828   auto *HRI = B.getParent()->getSubtarget<HexagonSubtarget>().getRegisterInfo();
1829   HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, HRI, Register());
1830   expandStoreVec(B, std::prev(It), MRI, HII, NewRegs);
1831 
1832   NewRegs.push_back(TmpR0);
1833   NewRegs.push_back(TmpR1);
1834   B.erase(It);
1835   return true;
1836 }
1837 
1838 bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B,
1839       MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1840       const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
1841   MachineInstr *MI = &*It;
1842   if (!MI->getOperand(1).isFI())
1843     return false;
1844 
1845   DebugLoc DL = MI->getDebugLoc();
1846   Register DstR = MI->getOperand(0).getReg();
1847   int FI = MI->getOperand(1).getIndex();
1848   auto *RC = &Hexagon::HvxVRRegClass;
1849 
1850   // TmpR0 = A2_tfrsi 0x01010101
1851   // TmpR1 = load FI, 0
1852   // DstR = V6_vandvrt TmpR1, TmpR0
1853   Register TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1854   Register TmpR1 = MRI.createVirtualRegister(RC);
1855 
1856   BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
1857     .addImm(0x01010101);
1858   MachineFunction &MF = *B.getParent();
1859   auto *HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1860   HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, HRI, Register());
1861   expandLoadVec(B, std::prev(It), MRI, HII, NewRegs);
1862 
1863   BuildMI(B, It, DL, HII.get(Hexagon::V6_vandvrt), DstR)
1864     .addReg(TmpR1, RegState::Kill)
1865     .addReg(TmpR0, RegState::Kill);
1866 
1867   NewRegs.push_back(TmpR0);
1868   NewRegs.push_back(TmpR1);
1869   B.erase(It);
1870   return true;
1871 }
1872 
1873 bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
1874       MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1875       const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
1876   MachineFunction &MF = *B.getParent();
1877   auto &MFI = MF.getFrameInfo();
1878   auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1879   MachineInstr *MI = &*It;
1880   if (!MI->getOperand(0).isFI())
1881     return false;
1882 
1883   // It is possible that the double vector being stored is only partially
1884   // defined. From the point of view of the liveness tracking, it is ok to
1885   // store it as a whole, but if we break it up we may end up storing a
1886   // register that is entirely undefined.
1887   LivePhysRegs LPR(HRI);
1888   LPR.addLiveIns(B);
1889   SmallVector<std::pair<MCPhysReg, const MachineOperand*>,2> Clobbers;
1890   for (auto R = B.begin(); R != It; ++R) {
1891     Clobbers.clear();
1892     LPR.stepForward(*R, Clobbers);
1893   }
1894 
1895   DebugLoc DL = MI->getDebugLoc();
1896   Register SrcR = MI->getOperand(2).getReg();
1897   Register SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo);
1898   Register SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi);
1899   bool IsKill = MI->getOperand(2).isKill();
1900   int FI = MI->getOperand(0).getIndex();
1901 
1902   unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
1903   Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
1904   Align HasAlign = MFI.getObjectAlign(FI);
1905   unsigned StoreOpc;
1906 
1907   // Store low part.
1908   if (LPR.contains(SrcLo)) {
1909     StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
1910                                      : Hexagon::V6_vS32Ub_ai;
1911     BuildMI(B, It, DL, HII.get(StoreOpc))
1912         .addFrameIndex(FI)
1913         .addImm(0)
1914         .addReg(SrcLo, getKillRegState(IsKill))
1915         .cloneMemRefs(*MI);
1916   }
1917 
1918   // Store high part.
1919   if (LPR.contains(SrcHi)) {
1920     StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
1921                                      : Hexagon::V6_vS32Ub_ai;
1922     BuildMI(B, It, DL, HII.get(StoreOpc))
1923         .addFrameIndex(FI)
1924         .addImm(Size)
1925         .addReg(SrcHi, getKillRegState(IsKill))
1926         .cloneMemRefs(*MI);
1927   }
1928 
1929   B.erase(It);
1930   return true;
1931 }
1932 
1933 bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
1934       MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1935       const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
1936   MachineFunction &MF = *B.getParent();
1937   auto &MFI = MF.getFrameInfo();
1938   auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1939   MachineInstr *MI = &*It;
1940   if (!MI->getOperand(1).isFI())
1941     return false;
1942 
1943   DebugLoc DL = MI->getDebugLoc();
1944   Register DstR = MI->getOperand(0).getReg();
1945   Register DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi);
1946   Register DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo);
1947   int FI = MI->getOperand(1).getIndex();
1948 
1949   unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
1950   Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
1951   Align HasAlign = MFI.getObjectAlign(FI);
1952   unsigned LoadOpc;
1953 
1954   // Load low part.
1955   LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
1956                                   : Hexagon::V6_vL32Ub_ai;
1957   BuildMI(B, It, DL, HII.get(LoadOpc), DstLo)
1958       .addFrameIndex(FI)
1959       .addImm(0)
1960       .cloneMemRefs(*MI);
1961 
1962   // Load high part.
1963   LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
1964                                   : Hexagon::V6_vL32Ub_ai;
1965   BuildMI(B, It, DL, HII.get(LoadOpc), DstHi)
1966       .addFrameIndex(FI)
1967       .addImm(Size)
1968       .cloneMemRefs(*MI);
1969 
1970   B.erase(It);
1971   return true;
1972 }
1973 
1974 bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
1975       MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1976       const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
1977   MachineFunction &MF = *B.getParent();
1978   auto &MFI = MF.getFrameInfo();
1979   MachineInstr *MI = &*It;
1980   if (!MI->getOperand(0).isFI())
1981     return false;
1982 
1983   auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1984   DebugLoc DL = MI->getDebugLoc();
1985   Register SrcR = MI->getOperand(2).getReg();
1986   bool IsKill = MI->getOperand(2).isKill();
1987   int FI = MI->getOperand(0).getIndex();
1988 
1989   Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
1990   Align HasAlign = MFI.getObjectAlign(FI);
1991   unsigned StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
1992                                             : Hexagon::V6_vS32Ub_ai;
1993   BuildMI(B, It, DL, HII.get(StoreOpc))
1994       .addFrameIndex(FI)
1995       .addImm(0)
1996       .addReg(SrcR, getKillRegState(IsKill))
1997       .cloneMemRefs(*MI);
1998 
1999   B.erase(It);
2000   return true;
2001 }
2002 
2003 bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
2004       MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
2005       const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
2006   MachineFunction &MF = *B.getParent();
2007   auto &MFI = MF.getFrameInfo();
2008   MachineInstr *MI = &*It;
2009   if (!MI->getOperand(1).isFI())
2010     return false;
2011 
2012   auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
2013   DebugLoc DL = MI->getDebugLoc();
2014   Register DstR = MI->getOperand(0).getReg();
2015   int FI = MI->getOperand(1).getIndex();
2016 
2017   Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
2018   Align HasAlign = MFI.getObjectAlign(FI);
2019   unsigned LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
2020                                            : Hexagon::V6_vL32Ub_ai;
2021   BuildMI(B, It, DL, HII.get(LoadOpc), DstR)
2022       .addFrameIndex(FI)
2023       .addImm(0)
2024       .cloneMemRefs(*MI);
2025 
2026   B.erase(It);
2027   return true;
2028 }
2029 
2030 bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF,
2031       SmallVectorImpl<Register> &NewRegs) const {
2032   auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
2033   MachineRegisterInfo &MRI = MF.getRegInfo();
2034   bool Changed = false;
2035 
2036   for (auto &B : MF) {
2037     // Traverse the basic block.
2038     MachineBasicBlock::iterator NextI;
2039     for (auto I = B.begin(), E = B.end(); I != E; I = NextI) {
2040       MachineInstr *MI = &*I;
2041       NextI = std::next(I);
2042       unsigned Opc = MI->getOpcode();
2043 
2044       switch (Opc) {
2045         case TargetOpcode::COPY:
2046           Changed |= expandCopy(B, I, MRI, HII, NewRegs);
2047           break;
2048         case Hexagon::STriw_pred:
2049         case Hexagon::STriw_ctr:
2050           Changed |= expandStoreInt(B, I, MRI, HII, NewRegs);
2051           break;
2052         case Hexagon::LDriw_pred:
2053         case Hexagon::LDriw_ctr:
2054           Changed |= expandLoadInt(B, I, MRI, HII, NewRegs);
2055           break;
2056         case Hexagon::PS_vstorerq_ai:
2057           Changed |= expandStoreVecPred(B, I, MRI, HII, NewRegs);
2058           break;
2059         case Hexagon::PS_vloadrq_ai:
2060           Changed |= expandLoadVecPred(B, I, MRI, HII, NewRegs);
2061           break;
2062         case Hexagon::PS_vloadrw_ai:
2063           Changed |= expandLoadVec2(B, I, MRI, HII, NewRegs);
2064           break;
2065         case Hexagon::PS_vstorerw_ai:
2066           Changed |= expandStoreVec2(B, I, MRI, HII, NewRegs);
2067           break;
2068       }
2069     }
2070   }
2071 
2072   return Changed;
2073 }
2074 
2075 void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
2076                                                 BitVector &SavedRegs,
2077                                                 RegScavenger *RS) const {
2078   auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
2079 
2080   SavedRegs.resize(HRI.getNumRegs());
2081 
2082   // If we have a function containing __builtin_eh_return we want to spill and
2083   // restore all callee saved registers. Pretend that they are used.
2084   if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
2085     for (const MCPhysReg *R = HRI.getCalleeSavedRegs(&MF); *R; ++R)
2086       SavedRegs.set(*R);
2087 
2088   // Replace predicate register pseudo spill code.
2089   SmallVector<Register,8> NewRegs;
2090   expandSpillMacros(MF, NewRegs);
2091   if (OptimizeSpillSlots && !isOptNone(MF))
2092     optimizeSpillSlots(MF, NewRegs);
2093 
2094   // We need to reserve a spill slot if scavenging could potentially require
2095   // spilling a scavenged register.
2096   if (!NewRegs.empty() || mayOverflowFrameOffset(MF)) {
2097     MachineFrameInfo &MFI = MF.getFrameInfo();
2098     MachineRegisterInfo &MRI = MF.getRegInfo();
2099     SetVector<const TargetRegisterClass*> SpillRCs;
2100     // Reserve an int register in any case, because it could be used to hold
2101     // the stack offset in case it does not fit into a spill instruction.
2102     SpillRCs.insert(&Hexagon::IntRegsRegClass);
2103 
2104     for (Register VR : NewRegs)
2105       SpillRCs.insert(MRI.getRegClass(VR));
2106 
2107     for (const auto *RC : SpillRCs) {
2108       if (!needToReserveScavengingSpillSlots(MF, HRI, RC))
2109         continue;
2110       unsigned Num = 1;
2111       switch (RC->getID()) {
2112         case Hexagon::IntRegsRegClassID:
2113           Num = NumberScavengerSlots;
2114           break;
2115         case Hexagon::HvxQRRegClassID:
2116           Num = 2; // Vector predicate spills also need a vector register.
2117           break;
2118       }
2119       unsigned S = HRI.getSpillSize(*RC);
2120       Align A = HRI.getSpillAlign(*RC);
2121       for (unsigned i = 0; i < Num; i++) {
2122         int NewFI = MFI.CreateSpillStackObject(S, A);
2123         RS->addScavengingFrameIndex(NewFI);
2124       }
2125     }
2126   }
2127 
2128   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2129 }
2130 
2131 Register HexagonFrameLowering::findPhysReg(MachineFunction &MF,
2132       HexagonBlockRanges::IndexRange &FIR,
2133       HexagonBlockRanges::InstrIndexMap &IndexMap,
2134       HexagonBlockRanges::RegToRangeMap &DeadMap,
2135       const TargetRegisterClass *RC) const {
2136   auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
2137   auto &MRI = MF.getRegInfo();
2138 
2139   auto isDead = [&FIR,&DeadMap] (Register Reg) -> bool {
2140     auto F = DeadMap.find({Reg,0});
2141     if (F == DeadMap.end())
2142       return false;
2143     for (auto &DR : F->second)
2144       if (DR.contains(FIR))
2145         return true;
2146     return false;
2147   };
2148 
2149   for (Register Reg : RC->getRawAllocationOrder(MF)) {
2150     bool Dead = true;
2151     for (auto R : HexagonBlockRanges::expandToSubRegs({Reg,0}, MRI, HRI)) {
2152       if (isDead(R.Reg))
2153         continue;
2154       Dead = false;
2155       break;
2156     }
2157     if (Dead)
2158       return Reg;
2159   }
2160   return 0;
2161 }
2162 
2163 void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
2164       SmallVectorImpl<Register> &VRegs) const {
2165   auto &HST = MF.getSubtarget<HexagonSubtarget>();
2166   auto &HII = *HST.getInstrInfo();
2167   auto &HRI = *HST.getRegisterInfo();
2168   auto &MRI = MF.getRegInfo();
2169   HexagonBlockRanges HBR(MF);
2170 
2171   using BlockIndexMap =
2172       std::map<MachineBasicBlock *, HexagonBlockRanges::InstrIndexMap>;
2173   using BlockRangeMap =
2174       std::map<MachineBasicBlock *, HexagonBlockRanges::RangeList>;
2175   using IndexType = HexagonBlockRanges::IndexType;
2176 
2177   struct SlotInfo {
2178     BlockRangeMap Map;
2179     unsigned Size = 0;
2180     const TargetRegisterClass *RC = nullptr;
2181 
2182     SlotInfo() = default;
2183   };
2184 
2185   BlockIndexMap BlockIndexes;
2186   SmallSet<int,4> BadFIs;
2187   std::map<int,SlotInfo> FIRangeMap;
2188 
2189   // Accumulate register classes: get a common class for a pre-existing
2190   // class HaveRC and a new class NewRC. Return nullptr if a common class
2191   // cannot be found, otherwise return the resulting class. If HaveRC is
2192   // nullptr, assume that it is still unset.
2193   auto getCommonRC =
2194       [](const TargetRegisterClass *HaveRC,
2195          const TargetRegisterClass *NewRC) -> const TargetRegisterClass * {
2196     if (HaveRC == nullptr || HaveRC == NewRC)
2197       return NewRC;
2198     // Different classes, both non-null. Pick the more general one.
2199     if (HaveRC->hasSubClassEq(NewRC))
2200       return HaveRC;
2201     if (NewRC->hasSubClassEq(HaveRC))
2202       return NewRC;
2203     return nullptr;
2204   };
2205 
2206   // Scan all blocks in the function. Check all occurrences of frame indexes,
2207   // and collect relevant information.
2208   for (auto &B : MF) {
2209     std::map<int,IndexType> LastStore, LastLoad;
2210     // Emplace appears not to be supported in gcc 4.7.2-4.
2211     //auto P = BlockIndexes.emplace(&B, HexagonBlockRanges::InstrIndexMap(B));
2212     auto P = BlockIndexes.insert(
2213                 std::make_pair(&B, HexagonBlockRanges::InstrIndexMap(B)));
2214     auto &IndexMap = P.first->second;
2215     LLVM_DEBUG(dbgs() << "Index map for " << printMBBReference(B) << "\n"
2216                       << IndexMap << '\n');
2217 
2218     for (auto &In : B) {
2219       int LFI, SFI;
2220       bool Load = HII.isLoadFromStackSlot(In, LFI) && !HII.isPredicated(In);
2221       bool Store = HII.isStoreToStackSlot(In, SFI) && !HII.isPredicated(In);
2222       if (Load && Store) {
2223         // If it's both a load and a store, then we won't handle it.
2224         BadFIs.insert(LFI);
2225         BadFIs.insert(SFI);
2226         continue;
2227       }
2228       // Check for register classes of the register used as the source for
2229       // the store, and the register used as the destination for the load.
2230       // Also, only accept base+imm_offset addressing modes. Other addressing
2231       // modes can have side-effects (post-increments, etc.). For stack
2232       // slots they are very unlikely, so there is not much loss due to
2233       // this restriction.
2234       if (Load || Store) {
2235         int TFI = Load ? LFI : SFI;
2236         unsigned AM = HII.getAddrMode(In);
2237         SlotInfo &SI = FIRangeMap[TFI];
2238         bool Bad = (AM != HexagonII::BaseImmOffset);
2239         if (!Bad) {
2240           // If the addressing mode is ok, check the register class.
2241           unsigned OpNum = Load ? 0 : 2;
2242           auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI, MF);
2243           RC = getCommonRC(SI.RC, RC);
2244           if (RC == nullptr)
2245             Bad = true;
2246           else
2247             SI.RC = RC;
2248         }
2249         if (!Bad) {
2250           // Check sizes.
2251           unsigned S = HII.getMemAccessSize(In);
2252           if (SI.Size != 0 && SI.Size != S)
2253             Bad = true;
2254           else
2255             SI.Size = S;
2256         }
2257         if (!Bad) {
2258           for (auto *Mo : In.memoperands()) {
2259             if (!Mo->isVolatile() && !Mo->isAtomic())
2260               continue;
2261             Bad = true;
2262             break;
2263           }
2264         }
2265         if (Bad)
2266           BadFIs.insert(TFI);
2267       }
2268 
2269       // Locate uses of frame indices.
2270       for (unsigned i = 0, n = In.getNumOperands(); i < n; ++i) {
2271         const MachineOperand &Op = In.getOperand(i);
2272         if (!Op.isFI())
2273           continue;
2274         int FI = Op.getIndex();
2275         // Make sure that the following operand is an immediate and that
2276         // it is 0. This is the offset in the stack object.
2277         if (i+1 >= n || !In.getOperand(i+1).isImm() ||
2278             In.getOperand(i+1).getImm() != 0)
2279           BadFIs.insert(FI);
2280         if (BadFIs.count(FI))
2281           continue;
2282 
2283         IndexType Index = IndexMap.getIndex(&In);
2284         if (Load) {
2285           if (LastStore[FI] == IndexType::None)
2286             LastStore[FI] = IndexType::Entry;
2287           LastLoad[FI] = Index;
2288         } else if (Store) {
2289           HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
2290           if (LastStore[FI] != IndexType::None)
2291             RL.add(LastStore[FI], LastLoad[FI], false, false);
2292           else if (LastLoad[FI] != IndexType::None)
2293             RL.add(IndexType::Entry, LastLoad[FI], false, false);
2294           LastLoad[FI] = IndexType::None;
2295           LastStore[FI] = Index;
2296         } else {
2297           BadFIs.insert(FI);
2298         }
2299       }
2300     }
2301 
2302     for (auto &I : LastLoad) {
2303       IndexType LL = I.second;
2304       if (LL == IndexType::None)
2305         continue;
2306       auto &RL = FIRangeMap[I.first].Map[&B];
2307       IndexType &LS = LastStore[I.first];
2308       if (LS != IndexType::None)
2309         RL.add(LS, LL, false, false);
2310       else
2311         RL.add(IndexType::Entry, LL, false, false);
2312       LS = IndexType::None;
2313     }
2314     for (auto &I : LastStore) {
2315       IndexType LS = I.second;
2316       if (LS == IndexType::None)
2317         continue;
2318       auto &RL = FIRangeMap[I.first].Map[&B];
2319       RL.add(LS, IndexType::None, false, false);
2320     }
2321   }
2322 
2323   LLVM_DEBUG({
2324     for (auto &P : FIRangeMap) {
2325       dbgs() << "fi#" << P.first;
2326       if (BadFIs.count(P.first))
2327         dbgs() << " (bad)";
2328       dbgs() << "  RC: ";
2329       if (P.second.RC != nullptr)
2330         dbgs() << HRI.getRegClassName(P.second.RC) << '\n';
2331       else
2332         dbgs() << "<null>\n";
2333       for (auto &R : P.second.Map)
2334         dbgs() << "  " << printMBBReference(*R.first) << " { " << R.second
2335                << "}\n";
2336     }
2337   });
2338 
2339   // When a slot is loaded from in a block without being stored to in the
2340   // same block, it is live-on-entry to this block. To avoid CFG analysis,
2341   // consider this slot to be live-on-exit from all blocks.
2342   SmallSet<int,4> LoxFIs;
2343 
2344   std::map<MachineBasicBlock*,std::vector<int>> BlockFIMap;
2345 
2346   for (auto &P : FIRangeMap) {
2347     // P = pair(FI, map: BB->RangeList)
2348     if (BadFIs.count(P.first))
2349       continue;
2350     for (auto &B : MF) {
2351       auto F = P.second.Map.find(&B);
2352       // F = pair(BB, RangeList)
2353       if (F == P.second.Map.end() || F->second.empty())
2354         continue;
2355       HexagonBlockRanges::IndexRange &IR = F->second.front();
2356       if (IR.start() == IndexType::Entry)
2357         LoxFIs.insert(P.first);
2358       BlockFIMap[&B].push_back(P.first);
2359     }
2360   }
2361 
2362   LLVM_DEBUG({
2363     dbgs() << "Block-to-FI map (* -- live-on-exit):\n";
2364     for (auto &P : BlockFIMap) {
2365       auto &FIs = P.second;
2366       if (FIs.empty())
2367         continue;
2368       dbgs() << "  " << printMBBReference(*P.first) << ": {";
2369       for (auto I : FIs) {
2370         dbgs() << " fi#" << I;
2371         if (LoxFIs.count(I))
2372           dbgs() << '*';
2373       }
2374       dbgs() << " }\n";
2375     }
2376   });
2377 
2378 #ifndef NDEBUG
2379   bool HasOptLimit = SpillOptMax.getPosition();
2380 #endif
2381 
2382   // eliminate loads, when all loads eliminated, eliminate all stores.
2383   for (auto &B : MF) {
2384     auto F = BlockIndexes.find(&B);
2385     assert(F != BlockIndexes.end());
2386     HexagonBlockRanges::InstrIndexMap &IM = F->second;
2387     HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM);
2388     HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM);
2389     LLVM_DEBUG(dbgs() << printMBBReference(B) << " dead map\n"
2390                       << HexagonBlockRanges::PrintRangeMap(DM, HRI));
2391 
2392     for (auto FI : BlockFIMap[&B]) {
2393       if (BadFIs.count(FI))
2394         continue;
2395       LLVM_DEBUG(dbgs() << "Working on fi#" << FI << '\n');
2396       HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
2397       for (auto &Range : RL) {
2398         LLVM_DEBUG(dbgs() << "--Examining range:" << RL << '\n');
2399         if (!IndexType::isInstr(Range.start()) ||
2400             !IndexType::isInstr(Range.end()))
2401           continue;
2402         MachineInstr &SI = *IM.getInstr(Range.start());
2403         MachineInstr &EI = *IM.getInstr(Range.end());
2404         assert(SI.mayStore() && "Unexpected start instruction");
2405         assert(EI.mayLoad() && "Unexpected end instruction");
2406         MachineOperand &SrcOp = SI.getOperand(2);
2407 
2408         HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(),
2409                                                   SrcOp.getSubReg() };
2410         auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI, MF);
2411         // The this-> is needed to unconfuse MSVC.
2412         Register FoundR = this->findPhysReg(MF, Range, IM, DM, RC);
2413         LLVM_DEBUG(dbgs() << "Replacement reg:" << printReg(FoundR, &HRI)
2414                           << '\n');
2415         if (FoundR == 0)
2416           continue;
2417 #ifndef NDEBUG
2418         if (HasOptLimit) {
2419           if (SpillOptCount >= SpillOptMax)
2420             return;
2421           SpillOptCount++;
2422         }
2423 #endif
2424 
2425         // Generate the copy-in: "FoundR = COPY SrcR" at the store location.
2426         MachineBasicBlock::iterator StartIt = SI.getIterator(), NextIt;
2427         MachineInstr *CopyIn = nullptr;
2428         if (SrcRR.Reg != FoundR || SrcRR.Sub != 0) {
2429           const DebugLoc &DL = SI.getDebugLoc();
2430           CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR)
2431                        .add(SrcOp);
2432         }
2433 
2434         ++StartIt;
2435         // Check if this is a last store and the FI is live-on-exit.
2436         if (LoxFIs.count(FI) && (&Range == &RL.back())) {
2437           // Update store's source register.
2438           if (unsigned SR = SrcOp.getSubReg())
2439             SrcOp.setReg(HRI.getSubReg(FoundR, SR));
2440           else
2441             SrcOp.setReg(FoundR);
2442           SrcOp.setSubReg(0);
2443           // We are keeping this register live.
2444           SrcOp.setIsKill(false);
2445         } else {
2446           B.erase(&SI);
2447           IM.replaceInstr(&SI, CopyIn);
2448         }
2449 
2450         auto EndIt = std::next(EI.getIterator());
2451         for (auto It = StartIt; It != EndIt; It = NextIt) {
2452           MachineInstr &MI = *It;
2453           NextIt = std::next(It);
2454           int TFI;
2455           if (!HII.isLoadFromStackSlot(MI, TFI) || TFI != FI)
2456             continue;
2457           Register DstR = MI.getOperand(0).getReg();
2458           assert(MI.getOperand(0).getSubReg() == 0);
2459           MachineInstr *CopyOut = nullptr;
2460           if (DstR != FoundR) {
2461             DebugLoc DL = MI.getDebugLoc();
2462             unsigned MemSize = HII.getMemAccessSize(MI);
2463             assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset);
2464             unsigned CopyOpc = TargetOpcode::COPY;
2465             if (HII.isSignExtendingLoad(MI))
2466               CopyOpc = (MemSize == 1) ? Hexagon::A2_sxtb : Hexagon::A2_sxth;
2467             else if (HII.isZeroExtendingLoad(MI))
2468               CopyOpc = (MemSize == 1) ? Hexagon::A2_zxtb : Hexagon::A2_zxth;
2469             CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR)
2470                         .addReg(FoundR, getKillRegState(&MI == &EI));
2471           }
2472           IM.replaceInstr(&MI, CopyOut);
2473           B.erase(It);
2474         }
2475 
2476         // Update the dead map.
2477         HexagonBlockRanges::RegisterRef FoundRR = { FoundR, 0 };
2478         for (auto RR : HexagonBlockRanges::expandToSubRegs(FoundRR, MRI, HRI))
2479           DM[RR].subtract(Range);
2480       } // for Range in range list
2481     }
2482   }
2483 }
2484 
2485 void HexagonFrameLowering::expandAlloca(MachineInstr *AI,
2486       const HexagonInstrInfo &HII, Register SP, unsigned CF) const {
2487   MachineBasicBlock &MB = *AI->getParent();
2488   DebugLoc DL = AI->getDebugLoc();
2489   unsigned A = AI->getOperand(2).getImm();
2490 
2491   // Have
2492   //    Rd  = alloca Rs, #A
2493   //
2494   // If Rs and Rd are different registers, use this sequence:
2495   //    Rd  = sub(r29, Rs)
2496   //    r29 = sub(r29, Rs)
2497   //    Rd  = and(Rd, #-A)    ; if necessary
2498   //    r29 = and(r29, #-A)   ; if necessary
2499   //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
2500   // otherwise, do
2501   //    Rd  = sub(r29, Rs)
2502   //    Rd  = and(Rd, #-A)    ; if necessary
2503   //    r29 = Rd
2504   //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
2505 
2506   MachineOperand &RdOp = AI->getOperand(0);
2507   MachineOperand &RsOp = AI->getOperand(1);
2508   Register Rd = RdOp.getReg(), Rs = RsOp.getReg();
2509 
2510   // Rd = sub(r29, Rs)
2511   BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd)
2512       .addReg(SP)
2513       .addReg(Rs);
2514   if (Rs != Rd) {
2515     // r29 = sub(r29, Rs)
2516     BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP)
2517         .addReg(SP)
2518         .addReg(Rs);
2519   }
2520   if (A > 8) {
2521     // Rd  = and(Rd, #-A)
2522     BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd)
2523         .addReg(Rd)
2524         .addImm(-int64_t(A));
2525     if (Rs != Rd)
2526       BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP)
2527           .addReg(SP)
2528           .addImm(-int64_t(A));
2529   }
2530   if (Rs == Rd) {
2531     // r29 = Rd
2532     BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP)
2533         .addReg(Rd);
2534   }
2535   if (CF > 0) {
2536     // Rd = add(Rd, #CF)
2537     BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd)
2538         .addReg(Rd)
2539         .addImm(CF);
2540   }
2541 }
2542 
2543 bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
2544   const MachineFrameInfo &MFI = MF.getFrameInfo();
2545   if (!MFI.hasVarSizedObjects())
2546     return false;
2547   // Do not check for max stack object alignment here, because the stack
2548   // may not be complete yet. Assume that we will need PS_aligna if there
2549   // are variable-sized objects.
2550   return true;
2551 }
2552 
2553 const MachineInstr *HexagonFrameLowering::getAlignaInstr(
2554       const MachineFunction &MF) const {
2555   for (auto &B : MF)
2556     for (auto &I : B)
2557       if (I.getOpcode() == Hexagon::PS_aligna)
2558         return &I;
2559   return nullptr;
2560 }
2561 
2562 /// Adds all callee-saved registers as implicit uses or defs to the
2563 /// instruction.
2564 void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI,
2565       const CSIVect &CSI, bool IsDef, bool IsKill) const {
2566   // Add the callee-saved registers as implicit uses.
2567   for (auto &R : CSI)
2568     MI->addOperand(MachineOperand::CreateReg(R.getReg(), IsDef, true, IsKill));
2569 }
2570 
2571 /// Determine whether the callee-saved register saves and restores should
2572 /// be generated via inline code. If this function returns "true", inline
2573 /// code will be generated. If this function returns "false", additional
2574 /// checks are performed, which may still lead to the inline code.
2575 bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
2576       const CSIVect &CSI) const {
2577   if (MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl())
2578     return true;
2579   if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
2580     return true;
2581   if (!hasFP(MF))
2582     return true;
2583   if (!isOptSize(MF) && !isMinSize(MF))
2584     if (MF.getTarget().getOptLevel() > CodeGenOpt::Default)
2585       return true;
2586 
2587   // Check if CSI only has double registers, and if the registers form
2588   // a contiguous block starting from D8.
2589   BitVector Regs(Hexagon::NUM_TARGET_REGS);
2590   for (const CalleeSavedInfo &I : CSI) {
2591     Register R = I.getReg();
2592     if (!Hexagon::DoubleRegsRegClass.contains(R))
2593       return true;
2594     Regs[R] = true;
2595   }
2596   int F = Regs.find_first();
2597   if (F != Hexagon::D8)
2598     return true;
2599   while (F >= 0) {
2600     int N = Regs.find_next(F);
2601     if (N >= 0 && N != F+1)
2602       return true;
2603     F = N;
2604   }
2605 
2606   return false;
2607 }
2608 
2609 bool HexagonFrameLowering::useSpillFunction(const MachineFunction &MF,
2610       const CSIVect &CSI) const {
2611   if (shouldInlineCSR(MF, CSI))
2612     return false;
2613   unsigned NumCSI = CSI.size();
2614   if (NumCSI <= 1)
2615     return false;
2616 
2617   unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs
2618                                      : SpillFuncThreshold;
2619   return Threshold < NumCSI;
2620 }
2621 
2622 bool HexagonFrameLowering::useRestoreFunction(const MachineFunction &MF,
2623       const CSIVect &CSI) const {
2624   if (shouldInlineCSR(MF, CSI))
2625     return false;
2626   // The restore functions do a bit more than just restoring registers.
2627   // The non-returning versions will go back directly to the caller's
2628   // caller, others will clean up the stack frame in preparation for
2629   // a tail call. Using them can still save code size even if only one
2630   // register is getting restores. Make the decision based on -Oz:
2631   // using -Os will use inline restore for a single register.
2632   if (isMinSize(MF))
2633     return true;
2634   unsigned NumCSI = CSI.size();
2635   if (NumCSI <= 1)
2636     return false;
2637 
2638   unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs-1
2639                                      : SpillFuncThreshold;
2640   return Threshold < NumCSI;
2641 }
2642 
2643 bool HexagonFrameLowering::mayOverflowFrameOffset(MachineFunction &MF) const {
2644   unsigned StackSize = MF.getFrameInfo().estimateStackSize(MF);
2645   auto &HST = MF.getSubtarget<HexagonSubtarget>();
2646   // A fairly simplistic guess as to whether a potential load/store to a
2647   // stack location could require an extra register.
2648   if (HST.useHVXOps() && StackSize > 256)
2649     return true;
2650 
2651   // Check if the function has store-immediate instructions that access
2652   // the stack. Since the offset field is not extendable, if the stack
2653   // size exceeds the offset limit (6 bits, shifted), the stores will
2654   // require a new base register.
2655   bool HasImmStack = false;
2656   unsigned MinLS = ~0u;   // Log_2 of the memory access size.
2657 
2658   for (const MachineBasicBlock &B : MF) {
2659     for (const MachineInstr &MI : B) {
2660       unsigned LS = 0;
2661       switch (MI.getOpcode()) {
2662         case Hexagon::S4_storeirit_io:
2663         case Hexagon::S4_storeirif_io:
2664         case Hexagon::S4_storeiri_io:
2665           ++LS;
2666           [[fallthrough]];
2667         case Hexagon::S4_storeirht_io:
2668         case Hexagon::S4_storeirhf_io:
2669         case Hexagon::S4_storeirh_io:
2670           ++LS;
2671           [[fallthrough]];
2672         case Hexagon::S4_storeirbt_io:
2673         case Hexagon::S4_storeirbf_io:
2674         case Hexagon::S4_storeirb_io:
2675           if (MI.getOperand(0).isFI())
2676             HasImmStack = true;
2677           MinLS = std::min(MinLS, LS);
2678           break;
2679       }
2680     }
2681   }
2682 
2683   if (HasImmStack)
2684     return !isUInt<6>(StackSize >> MinLS);
2685 
2686   return false;
2687 }
2688