xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp (revision 55141f2c8991b2a6adbf30bb0fe3e6cbc303f06d)
1 //===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the ARM implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 //
13 // This file contains the ARM implementation of TargetFrameLowering class.
14 //
15 // On ARM, stack frames are structured as follows:
16 //
17 // The stack grows downward.
18 //
19 // All of the individual frame areas on the frame below are optional, i.e. it's
20 // possible to create a function so that the particular area isn't present
21 // in the frame.
22 //
23 // At function entry, the "frame" looks as follows:
24 //
25 // |                                   | Higher address
26 // |-----------------------------------|
27 // |                                   |
28 // | arguments passed on the stack     |
29 // |                                   |
30 // |-----------------------------------| <- sp
31 // |                                   | Lower address
32 //
33 //
34 // After the prologue has run, the frame has the following general structure.
35 // Technically the last frame area (VLAs) doesn't get created until in the
36 // main function body, after the prologue is run. However, it's depicted here
37 // for completeness.
38 //
39 // |                                   | Higher address
40 // |-----------------------------------|
41 // |                                   |
42 // | arguments passed on the stack     |
43 // |                                   |
44 // |-----------------------------------| <- (sp at function entry)
45 // |                                   |
46 // | varargs from registers            |
47 // |                                   |
48 // |-----------------------------------|
49 // |                                   |
50 // | prev_lr                           |
51 // | prev_fp                           |
52 // | (a.k.a. "frame record")           |
53 // |                                   |
54 // |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55 // |                                   |
56 // | callee-saved gpr registers        |
57 // |                                   |
58 // |-----------------------------------|
59 // |                                   |
60 // | callee-saved fp/simd regs         |
61 // |                                   |
62 // |-----------------------------------|
63 // |.empty.space.to.make.part.below....|
64 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65 // |.the.standard.8-byte.alignment.....|  compile time; if present)
66 // |-----------------------------------|
67 // |                                   |
68 // | local variables of fixed size     |
69 // | including spill slots             |
70 // |-----------------------------------| <- base pointer (not defined by ABI,
71 // |.variable-sized.local.variables....|       LLVM chooses r6)
72 // |.(VLAs)............................| (size of this area is unknown at
73 // |...................................|  compile time)
74 // |-----------------------------------| <- sp
75 // |                                   | Lower address
76 //
77 //
78 // To access the data in a frame, at-compile time, a constant offset must be
79 // computable from one of the pointers (fp, bp, sp) to access it. The size
80 // of the areas with a dotted background cannot be computed at compile-time
81 // if they are present, making it required to have all three of fp, bp and
82 // sp to be set up to be able to access all contents in the frame areas,
83 // assuming all of the frame areas are non-empty.
84 //
85 // For most functions, some of the frame areas are empty. For those functions,
86 // it may not be necessary to set up fp or bp:
87 // * A base pointer is definitely needed when there are both VLAs and local
88 //   variables with more-than-default alignment requirements.
89 // * A frame pointer is definitely needed when there are local variables with
90 //   more-than-default alignment requirements.
91 //
92 // In some cases when a base pointer is not strictly needed, it is generated
93 // anyway when offsets from the frame pointer to access local variables become
94 // so large that the offset can't be encoded in the immediate fields of loads
95 // or stores.
96 //
97 // The frame pointer might be chosen to be r7 or r11, depending on the target
98 // architecture and operating system. See ARMSubtarget::getFramePointerReg for
99 // details.
100 //
101 // Outgoing function arguments must be at the bottom of the stack frame when
102 // calling another function. If we do not have variable-sized stack objects, we
103 // can allocate a "reserved call frame" area at the bottom of the local
104 // variable area, large enough for all outgoing calls. If we do have VLAs, then
105 // the stack pointer must be decremented and incremented around each call to
106 // make space for the arguments below the VLAs.
107 //
108 //===----------------------------------------------------------------------===//
109 
110 #include "ARMFrameLowering.h"
111 #include "ARMBaseInstrInfo.h"
112 #include "ARMBaseRegisterInfo.h"
113 #include "ARMConstantPoolValue.h"
114 #include "ARMMachineFunctionInfo.h"
115 #include "ARMSubtarget.h"
116 #include "MCTargetDesc/ARMAddressingModes.h"
117 #include "MCTargetDesc/ARMBaseInfo.h"
118 #include "Utils/ARMBaseInfo.h"
119 #include "llvm/ADT/BitVector.h"
120 #include "llvm/ADT/STLExtras.h"
121 #include "llvm/ADT/SmallPtrSet.h"
122 #include "llvm/ADT/SmallVector.h"
123 #include "llvm/CodeGen/MachineBasicBlock.h"
124 #include "llvm/CodeGen/MachineConstantPool.h"
125 #include "llvm/CodeGen/MachineFrameInfo.h"
126 #include "llvm/CodeGen/MachineFunction.h"
127 #include "llvm/CodeGen/MachineInstr.h"
128 #include "llvm/CodeGen/MachineInstrBuilder.h"
129 #include "llvm/CodeGen/MachineJumpTableInfo.h"
130 #include "llvm/CodeGen/MachineModuleInfo.h"
131 #include "llvm/CodeGen/MachineOperand.h"
132 #include "llvm/CodeGen/MachineRegisterInfo.h"
133 #include "llvm/CodeGen/RegisterScavenging.h"
134 #include "llvm/CodeGen/TargetInstrInfo.h"
135 #include "llvm/CodeGen/TargetOpcodes.h"
136 #include "llvm/CodeGen/TargetRegisterInfo.h"
137 #include "llvm/CodeGen/TargetSubtargetInfo.h"
138 #include "llvm/IR/Attributes.h"
139 #include "llvm/IR/CallingConv.h"
140 #include "llvm/IR/DebugLoc.h"
141 #include "llvm/IR/Function.h"
142 #include "llvm/MC/MCAsmInfo.h"
143 #include "llvm/MC/MCContext.h"
144 #include "llvm/MC/MCDwarf.h"
145 #include "llvm/MC/MCInstrDesc.h"
146 #include "llvm/MC/MCRegisterInfo.h"
147 #include "llvm/Support/CodeGen.h"
148 #include "llvm/Support/CommandLine.h"
149 #include "llvm/Support/Compiler.h"
150 #include "llvm/Support/Debug.h"
151 #include "llvm/Support/ErrorHandling.h"
152 #include "llvm/Support/MathExtras.h"
153 #include "llvm/Support/raw_ostream.h"
154 #include "llvm/Target/TargetMachine.h"
155 #include "llvm/Target/TargetOptions.h"
156 #include <algorithm>
157 #include <cassert>
158 #include <cstddef>
159 #include <cstdint>
160 #include <iterator>
161 #include <utility>
162 #include <vector>
163 
164 #define DEBUG_TYPE "arm-frame-lowering"
165 
166 using namespace llvm;
167 
168 static cl::opt<bool>
169 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
170                      cl::desc("Align ARM NEON spills in prolog and epilog"));
171 
172 static MachineBasicBlock::iterator
173 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
174                         unsigned NumAlignedDPRCS2Regs);
175 
176 ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
177     : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
178       STI(sti) {}
179 
180 bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {
181   // iOS always has a FP for backtracking, force other targets to keep their FP
182   // when doing FastISel. The emitted code is currently superior, and in cases
183   // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
184   return MF.getSubtarget<ARMSubtarget>().useFastISel();
185 }
186 
187 /// Returns true if the target can safely skip saving callee-saved registers
188 /// for noreturn nounwind functions.
189 bool ARMFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
190   assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
191          MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
192          !MF.getFunction().hasFnAttribute(Attribute::UWTable));
193 
194   // Frame pointer and link register are not treated as normal CSR, thus we
195   // can always skip CSR saves for nonreturning functions.
196   return true;
197 }
198 
199 /// hasFP - Return true if the specified function should have a dedicated frame
200 /// pointer register.  This is true if the function has variable sized allocas
201 /// or if frame pointer elimination is disabled.
202 bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
203   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
204   const MachineFrameInfo &MFI = MF.getFrameInfo();
205 
206   // ABI-required frame pointer.
207   if (MF.getTarget().Options.DisableFramePointerElim(MF))
208     return true;
209 
210   // Frame pointer required for use within this function.
211   return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
212           MFI.isFrameAddressTaken());
213 }
214 
215 /// isFPReserved - Return true if the frame pointer register should be
216 /// considered a reserved register on the scope of the specified function.
217 bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const {
218   return hasFP(MF) || MF.getSubtarget<ARMSubtarget>().createAAPCSFrameChain();
219 }
220 
221 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
222 /// not required, we reserve argument space for call sites in the function
223 /// immediately on entry to the current function.  This eliminates the need for
224 /// add/sub sp brackets around call sites.  Returns true if the call frame is
225 /// included as part of the stack frame.
226 bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
227   const MachineFrameInfo &MFI = MF.getFrameInfo();
228   unsigned CFSize = MFI.getMaxCallFrameSize();
229   // It's not always a good idea to include the call frame as part of the
230   // stack frame. ARM (especially Thumb) has small immediate offset to
231   // address the stack frame. So a large call frame can cause poor codegen
232   // and may even makes it impossible to scavenge a register.
233   if (CFSize >= ((1 << 12) - 1) / 2)  // Half of imm12
234     return false;
235 
236   return !MFI.hasVarSizedObjects();
237 }
238 
239 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
240 /// call frame pseudos can be simplified.  Unlike most targets, having a FP
241 /// is not sufficient here since we still may reference some objects via SP
242 /// even when FP is available in Thumb2 mode.
243 bool
244 ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
245   return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects();
246 }
247 
248 // Returns how much of the incoming argument stack area we should clean up in an
249 // epilogue. For the C calling convention this will be 0, for guaranteed tail
250 // call conventions it can be positive (a normal return or a tail call to a
251 // function that uses less stack space for arguments) or negative (for a tail
252 // call to a function that needs more stack space than us for arguments).
253 static int getArgumentStackToRestore(MachineFunction &MF,
254                                      MachineBasicBlock &MBB) {
255   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
256   bool IsTailCallReturn = false;
257   if (MBB.end() != MBBI) {
258     unsigned RetOpcode = MBBI->getOpcode();
259     IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
260                        RetOpcode == ARM::TCRETURNri;
261   }
262   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
263 
264   int ArgumentPopSize = 0;
265   if (IsTailCallReturn) {
266     MachineOperand &StackAdjust = MBBI->getOperand(1);
267 
268     // For a tail-call in a callee-pops-arguments environment, some or all of
269     // the stack may actually be in use for the call's arguments, this is
270     // calculated during LowerCall and consumed here...
271     ArgumentPopSize = StackAdjust.getImm();
272   } else {
273     // ... otherwise the amount to pop is *all* of the argument space,
274     // conveniently stored in the MachineFunctionInfo by
275     // LowerFormalArguments. This will, of course, be zero for the C calling
276     // convention.
277     ArgumentPopSize = AFI->getArgumentStackToRestore();
278   }
279 
280   return ArgumentPopSize;
281 }
282 
283 static bool needsWinCFI(const MachineFunction &MF) {
284   const Function &F = MF.getFunction();
285   return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
286          F.needsUnwindTableEntry();
287 }
288 
289 // Given a load or a store instruction, generate an appropriate unwinding SEH
290 // code on Windows.
291 static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
292                                              const TargetInstrInfo &TII,
293                                              unsigned Flags) {
294   unsigned Opc = MBBI->getOpcode();
295   MachineBasicBlock *MBB = MBBI->getParent();
296   MachineFunction &MF = *MBB->getParent();
297   DebugLoc DL = MBBI->getDebugLoc();
298   MachineInstrBuilder MIB;
299   const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
300   const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
301 
302   Flags |= MachineInstr::NoMerge;
303 
304   switch (Opc) {
305   default:
306     report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
307     break;
308   case ARM::t2ADDri:   // add.w r11, sp, #xx
309   case ARM::t2ADDri12: // add.w r11, sp, #xx
310   case ARM::t2MOVTi16: // movt  r4, #xx
311   case ARM::tBL:       // bl __chkstk
312     // These are harmless if used for just setting up a frame pointer,
313     // but that frame pointer can't be relied upon for unwinding, unless
314     // set up with SEH_SaveSP.
315     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
316               .addImm(/*Wide=*/1)
317               .setMIFlags(Flags);
318     break;
319 
320   case ARM::t2MOVi16: { // mov(w) r4, #xx
321     bool Wide = MBBI->getOperand(1).getImm() >= 256;
322     if (!Wide) {
323       MachineInstrBuilder NewInstr =
324           BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
325       NewInstr.add(MBBI->getOperand(0));
326       NewInstr.add(t1CondCodeOp(/*isDead=*/true));
327       for (unsigned i = 1, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
328         NewInstr.add(MBBI->getOperand(i));
329       MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
330       MBB->erase(MBBI);
331       MBBI = NewMBBI;
332     }
333     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags);
334     break;
335   }
336 
337   case ARM::tBLXr: // blx r12 (__chkstk)
338     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
339               .addImm(/*Wide=*/0)
340               .setMIFlags(Flags);
341     break;
342 
343   case ARM::t2MOVi32imm: // movw+movt
344     // This pseudo instruction expands into two mov instructions. If the
345     // second operand is a symbol reference, this will stay as two wide
346     // instructions, movw+movt. If they're immediates, the first one can
347     // end up as a narrow mov though.
348     // As two SEH instructions are appended here, they won't get interleaved
349     // between the two final movw/movt instructions, but it doesn't make any
350     // practical difference.
351     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
352               .addImm(/*Wide=*/1)
353               .setMIFlags(Flags);
354     MBB->insertAfter(MBBI, MIB);
355     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
356               .addImm(/*Wide=*/1)
357               .setMIFlags(Flags);
358     break;
359 
360   case ARM::t2STR_PRE:
361     if (MBBI->getOperand(0).getReg() == ARM::SP &&
362         MBBI->getOperand(2).getReg() == ARM::SP &&
363         MBBI->getOperand(3).getImm() == -4) {
364       unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
365       MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
366                 .addImm(1ULL << Reg)
367                 .addImm(/*Wide=*/1)
368                 .setMIFlags(Flags);
369     } else {
370       report_fatal_error("No matching SEH Opcode for t2STR_PRE");
371     }
372     break;
373 
374   case ARM::t2LDR_POST:
375     if (MBBI->getOperand(1).getReg() == ARM::SP &&
376         MBBI->getOperand(2).getReg() == ARM::SP &&
377         MBBI->getOperand(3).getImm() == 4) {
378       unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
379       MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
380                 .addImm(1ULL << Reg)
381                 .addImm(/*Wide=*/1)
382                 .setMIFlags(Flags);
383     } else {
384       report_fatal_error("No matching SEH Opcode for t2LDR_POST");
385     }
386     break;
387 
388   case ARM::t2LDMIA_RET:
389   case ARM::t2LDMIA_UPD:
390   case ARM::t2STMDB_UPD: {
391     unsigned Mask = 0;
392     bool Wide = false;
393     for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
394       const MachineOperand &MO = MBBI->getOperand(i);
395       if (!MO.isReg() || MO.isImplicit())
396         continue;
397       unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
398       if (Reg == 15)
399         Reg = 14;
400       if (Reg >= 8 && Reg <= 13)
401         Wide = true;
402       else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
403         Wide = true;
404       Mask |= 1 << Reg;
405     }
406     if (!Wide) {
407       unsigned NewOpc;
408       switch (Opc) {
409       case ARM::t2LDMIA_RET:
410         NewOpc = ARM::tPOP_RET;
411         break;
412       case ARM::t2LDMIA_UPD:
413         NewOpc = ARM::tPOP;
414         break;
415       case ARM::t2STMDB_UPD:
416         NewOpc = ARM::tPUSH;
417         break;
418       default:
419         llvm_unreachable("");
420       }
421       MachineInstrBuilder NewInstr =
422           BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags());
423       for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
424         NewInstr.add(MBBI->getOperand(i));
425       MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
426       MBB->erase(MBBI);
427       MBBI = NewMBBI;
428     }
429     unsigned SEHOpc =
430         (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
431     MIB = BuildMI(MF, DL, TII.get(SEHOpc))
432               .addImm(Mask)
433               .addImm(Wide ? 1 : 0)
434               .setMIFlags(Flags);
435     break;
436   }
437   case ARM::VSTMDDB_UPD:
438   case ARM::VLDMDIA_UPD: {
439     int First = -1, Last = 0;
440     for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
441       const MachineOperand &MO = MBBI->getOperand(i);
442       unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
443       if (First == -1)
444         First = Reg;
445       Last = Reg;
446     }
447     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
448               .addImm(First)
449               .addImm(Last)
450               .setMIFlags(Flags);
451     break;
452   }
453   case ARM::tSUBspi:
454   case ARM::tADDspi:
455     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
456               .addImm(MBBI->getOperand(2).getImm() * 4)
457               .addImm(/*Wide=*/0)
458               .setMIFlags(Flags);
459     break;
460   case ARM::t2SUBspImm:
461   case ARM::t2SUBspImm12:
462   case ARM::t2ADDspImm:
463   case ARM::t2ADDspImm12:
464     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
465               .addImm(MBBI->getOperand(2).getImm())
466               .addImm(/*Wide=*/1)
467               .setMIFlags(Flags);
468     break;
469 
470   case ARM::tMOVr:
471     if (MBBI->getOperand(1).getReg() == ARM::SP &&
472         (Flags & MachineInstr::FrameSetup)) {
473       unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
474       MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
475                 .addImm(Reg)
476                 .setMIFlags(Flags);
477     } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
478                (Flags & MachineInstr::FrameDestroy)) {
479       unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
480       MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
481                 .addImm(Reg)
482                 .setMIFlags(Flags);
483     } else {
484       report_fatal_error("No SEH Opcode for MOV");
485     }
486     break;
487 
488   case ARM::tBX_RET:
489   case ARM::TCRETURNri:
490     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
491               .addImm(/*Wide=*/0)
492               .setMIFlags(Flags);
493     break;
494 
495   case ARM::TCRETURNdi:
496     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
497               .addImm(/*Wide=*/1)
498               .setMIFlags(Flags);
499     break;
500   }
501   return MBB->insertAfter(MBBI, MIB);
502 }
503 
504 static MachineBasicBlock::iterator
505 initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) {
506   if (MBBI == MBB.begin())
507     return MachineBasicBlock::iterator();
508   return std::prev(MBBI);
509 }
510 
511 static void insertSEHRange(MachineBasicBlock &MBB,
512                            MachineBasicBlock::iterator Start,
513                            const MachineBasicBlock::iterator &End,
514                            const ARMBaseInstrInfo &TII, unsigned MIFlags) {
515   if (Start.isValid())
516     Start = std::next(Start);
517   else
518     Start = MBB.begin();
519 
520   for (auto MI = Start; MI != End;) {
521     auto Next = std::next(MI);
522     // Check if this instruction already has got a SEH opcode added. In that
523     // case, don't do this generic mapping.
524     if (Next != End && isSEHInstruction(*Next)) {
525       MI = std::next(Next);
526       while (MI != End && isSEHInstruction(*MI))
527         ++MI;
528       continue;
529     }
530     insertSEH(MI, TII, MIFlags);
531     MI = Next;
532   }
533 }
534 
535 static void emitRegPlusImmediate(
536     bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
537     const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
538     unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
539     ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
540   if (isARM)
541     emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
542                             Pred, PredReg, TII, MIFlags);
543   else
544     emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
545                            Pred, PredReg, TII, MIFlags);
546 }
547 
548 static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
549                          MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
550                          const ARMBaseInstrInfo &TII, int NumBytes,
551                          unsigned MIFlags = MachineInstr::NoFlags,
552                          ARMCC::CondCodes Pred = ARMCC::AL,
553                          unsigned PredReg = 0) {
554   emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
555                        MIFlags, Pred, PredReg);
556 }
557 
558 static int sizeOfSPAdjustment(const MachineInstr &MI) {
559   int RegSize;
560   switch (MI.getOpcode()) {
561   case ARM::VSTMDDB_UPD:
562     RegSize = 8;
563     break;
564   case ARM::STMDB_UPD:
565   case ARM::t2STMDB_UPD:
566     RegSize = 4;
567     break;
568   case ARM::t2STR_PRE:
569   case ARM::STR_PRE_IMM:
570     return 4;
571   default:
572     llvm_unreachable("Unknown push or pop like instruction");
573   }
574 
575   int count = 0;
576   // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
577   // pred) so the list starts at 4.
578   for (int i = MI.getNumOperands() - 1; i >= 4; --i)
579     count += RegSize;
580   return count;
581 }
582 
583 static bool WindowsRequiresStackProbe(const MachineFunction &MF,
584                                       size_t StackSizeInBytes) {
585   const MachineFrameInfo &MFI = MF.getFrameInfo();
586   const Function &F = MF.getFunction();
587   unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
588 
589   StackProbeSize =
590       F.getFnAttributeAsParsedInteger("stack-probe-size", StackProbeSize);
591   return (StackSizeInBytes >= StackProbeSize) &&
592          !F.hasFnAttribute("no-stack-arg-probe");
593 }
594 
595 namespace {
596 
597 struct StackAdjustingInsts {
598   struct InstInfo {
599     MachineBasicBlock::iterator I;
600     unsigned SPAdjust;
601     bool BeforeFPSet;
602   };
603 
604   SmallVector<InstInfo, 4> Insts;
605 
606   void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
607                bool BeforeFPSet = false) {
608     InstInfo Info = {I, SPAdjust, BeforeFPSet};
609     Insts.push_back(Info);
610   }
611 
612   void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
613     auto Info =
614         llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
615     assert(Info != Insts.end() && "invalid sp adjusting instruction");
616     Info->SPAdjust += ExtraBytes;
617   }
618 
619   void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
620                          const ARMBaseInstrInfo &TII, bool HasFP) {
621     MachineFunction &MF = *MBB.getParent();
622     unsigned CFAOffset = 0;
623     for (auto &Info : Insts) {
624       if (HasFP && !Info.BeforeFPSet)
625         return;
626 
627       CFAOffset += Info.SPAdjust;
628       unsigned CFIIndex = MF.addFrameInst(
629           MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
630       BuildMI(MBB, std::next(Info.I), dl,
631               TII.get(TargetOpcode::CFI_INSTRUCTION))
632               .addCFIIndex(CFIIndex)
633               .setMIFlags(MachineInstr::FrameSetup);
634     }
635   }
636 };
637 
638 } // end anonymous namespace
639 
640 /// Emit an instruction sequence that will align the address in
641 /// register Reg by zero-ing out the lower bits.  For versions of the
642 /// architecture that support Neon, this must be done in a single
643 /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
644 /// single instruction. That function only gets called when optimizing
645 /// spilling of D registers on a core with the Neon instruction set
646 /// present.
647 static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
648                                      const TargetInstrInfo &TII,
649                                      MachineBasicBlock &MBB,
650                                      MachineBasicBlock::iterator MBBI,
651                                      const DebugLoc &DL, const unsigned Reg,
652                                      const Align Alignment,
653                                      const bool MustBeSingleInstruction) {
654   const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
655   const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
656   const unsigned AlignMask = Alignment.value() - 1U;
657   const unsigned NrBitsToZero = Log2(Alignment);
658   assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
659   if (!AFI->isThumbFunction()) {
660     // if the BFC instruction is available, use that to zero the lower
661     // bits:
662     //   bfc Reg, #0, log2(Alignment)
663     // otherwise use BIC, if the mask to zero the required number of bits
664     // can be encoded in the bic immediate field
665     //   bic Reg, Reg, Alignment-1
666     // otherwise, emit
667     //   lsr Reg, Reg, log2(Alignment)
668     //   lsl Reg, Reg, log2(Alignment)
669     if (CanUseBFC) {
670       BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
671           .addReg(Reg, RegState::Kill)
672           .addImm(~AlignMask)
673           .add(predOps(ARMCC::AL));
674     } else if (AlignMask <= 255) {
675       BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
676           .addReg(Reg, RegState::Kill)
677           .addImm(AlignMask)
678           .add(predOps(ARMCC::AL))
679           .add(condCodeOp());
680     } else {
681       assert(!MustBeSingleInstruction &&
682              "Shouldn't call emitAligningInstructions demanding a single "
683              "instruction to be emitted for large stack alignment for a target "
684              "without BFC.");
685       BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
686           .addReg(Reg, RegState::Kill)
687           .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
688           .add(predOps(ARMCC::AL))
689           .add(condCodeOp());
690       BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
691           .addReg(Reg, RegState::Kill)
692           .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
693           .add(predOps(ARMCC::AL))
694           .add(condCodeOp());
695     }
696   } else {
697     // Since this is only reached for Thumb-2 targets, the BFC instruction
698     // should always be available.
699     assert(CanUseBFC);
700     BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
701         .addReg(Reg, RegState::Kill)
702         .addImm(~AlignMask)
703         .add(predOps(ARMCC::AL));
704   }
705 }
706 
707 /// We need the offset of the frame pointer relative to other MachineFrameInfo
708 /// offsets which are encoded relative to SP at function begin.
709 /// See also emitPrologue() for how the FP is set up.
710 /// Unfortunately we cannot determine this value in determineCalleeSaves() yet
711 /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
712 /// this to produce a conservative estimate that we check in an assert() later.
713 static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
714                           const MachineFunction &MF) {
715   // For Thumb1, push.w isn't available, so the first push will always push
716   // r7 and lr onto the stack first.
717   if (AFI.isThumb1OnlyFunction())
718     return -AFI.getArgRegsSaveSize() - (2 * 4);
719   // This is a conservative estimation: Assume the frame pointer being r7 and
720   // pc("r15") up to r8 getting spilled before (= 8 registers).
721   int MaxRegBytes = 8 * 4;
722   if (STI.splitFramePointerPush(MF)) {
723     // Here, r11 can be stored below all of r4-r15 (3 registers more than
724     // above), plus d8-d15.
725     MaxRegBytes = 11 * 4 + 8 * 8;
726   }
727   int FPCXTSaveSize =
728       (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
729   return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
730 }
731 
732 void ARMFrameLowering::emitPrologue(MachineFunction &MF,
733                                     MachineBasicBlock &MBB) const {
734   MachineBasicBlock::iterator MBBI = MBB.begin();
735   MachineFrameInfo  &MFI = MF.getFrameInfo();
736   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
737   MachineModuleInfo &MMI = MF.getMMI();
738   MCContext &Context = MMI.getContext();
739   const TargetMachine &TM = MF.getTarget();
740   const MCRegisterInfo *MRI = Context.getRegisterInfo();
741   const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
742   const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
743   assert(!AFI->isThumb1OnlyFunction() &&
744          "This emitPrologue does not support Thumb1!");
745   bool isARM = !AFI->isThumbFunction();
746   Align Alignment = STI.getFrameLowering()->getStackAlign();
747   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
748   unsigned NumBytes = MFI.getStackSize();
749   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
750   int FPCXTSaveSize = 0;
751   bool NeedsWinCFI = needsWinCFI(MF);
752 
753   // Debug location must be unknown since the first debug location is used
754   // to determine the end of the prologue.
755   DebugLoc dl;
756 
757   Register FramePtr = RegInfo->getFrameRegister(MF);
758 
759   // Determine the sizes of each callee-save spill areas and record which frame
760   // belongs to which callee-save spill areas.
761   unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
762   int FramePtrSpillFI = 0;
763   int D8SpillFI = 0;
764 
765   // All calls are tail calls in GHC calling conv, and functions have no
766   // prologue/epilogue.
767   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
768     return;
769 
770   StackAdjustingInsts DefCFAOffsetCandidates;
771   bool HasFP = hasFP(MF);
772 
773   if (!AFI->hasStackFrame() &&
774       (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
775     if (NumBytes != 0) {
776       emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
777                    MachineInstr::FrameSetup);
778       DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
779     }
780     if (!NeedsWinCFI)
781       DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
782     if (NeedsWinCFI && MBBI != MBB.begin()) {
783       insertSEHRange(MBB, {}, MBBI, TII, MachineInstr::FrameSetup);
784       BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
785           .setMIFlag(MachineInstr::FrameSetup);
786       MF.setHasWinCFI(true);
787     }
788     return;
789   }
790 
791   // Determine spill area sizes.
792   if (STI.splitFramePointerPush(MF)) {
793     for (const CalleeSavedInfo &I : CSI) {
794       Register Reg = I.getReg();
795       int FI = I.getFrameIdx();
796       switch (Reg) {
797       case ARM::R11:
798       case ARM::LR:
799         if (Reg == FramePtr)
800           FramePtrSpillFI = FI;
801         GPRCS2Size += 4;
802         break;
803       case ARM::R0:
804       case ARM::R1:
805       case ARM::R2:
806       case ARM::R3:
807       case ARM::R4:
808       case ARM::R5:
809       case ARM::R6:
810       case ARM::R7:
811       case ARM::R8:
812       case ARM::R9:
813       case ARM::R10:
814       case ARM::R12:
815         GPRCS1Size += 4;
816         break;
817       case ARM::FPCXTNS:
818         FPCXTSaveSize = 4;
819         break;
820       default:
821         // This is a DPR. Exclude the aligned DPRCS2 spills.
822         if (Reg == ARM::D8)
823           D8SpillFI = FI;
824         if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
825           DPRCSSize += 8;
826       }
827     }
828   } else {
829     for (const CalleeSavedInfo &I : CSI) {
830       Register Reg = I.getReg();
831       int FI = I.getFrameIdx();
832       switch (Reg) {
833       case ARM::R8:
834       case ARM::R9:
835       case ARM::R10:
836       case ARM::R11:
837       case ARM::R12:
838         if (STI.splitFramePushPop(MF)) {
839           GPRCS2Size += 4;
840           break;
841         }
842         [[fallthrough]];
843       case ARM::R0:
844       case ARM::R1:
845       case ARM::R2:
846       case ARM::R3:
847       case ARM::R4:
848       case ARM::R5:
849       case ARM::R6:
850       case ARM::R7:
851       case ARM::LR:
852         if (Reg == FramePtr)
853           FramePtrSpillFI = FI;
854         GPRCS1Size += 4;
855         break;
856       case ARM::FPCXTNS:
857         FPCXTSaveSize = 4;
858         break;
859       default:
860         // This is a DPR. Exclude the aligned DPRCS2 spills.
861         if (Reg == ARM::D8)
862           D8SpillFI = FI;
863         if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
864           DPRCSSize += 8;
865       }
866     }
867   }
868 
869   MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
870 
871   // Move past the PAC computation.
872   if (AFI->shouldSignReturnAddress())
873     LastPush = MBBI++;
874 
875   // Move past FPCXT area.
876   if (FPCXTSaveSize > 0) {
877     LastPush = MBBI++;
878     DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
879   }
880 
881   // Allocate the vararg register save area.
882   if (ArgRegsSaveSize) {
883     emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
884                  MachineInstr::FrameSetup);
885     LastPush = std::prev(MBBI);
886     DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
887   }
888 
889   // Move past area 1.
890   if (GPRCS1Size > 0) {
891     GPRCS1Push = LastPush = MBBI++;
892     DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
893   }
894 
895   // Determine starting offsets of spill areas.
896   unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
897   unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
898   unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
899   Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
900   unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
901   if (!STI.splitFramePointerPush(MF)) {
902     DPRGapSize += GPRCS2Size;
903   }
904   DPRGapSize %= DPRAlign.value();
905 
906   unsigned DPRCSOffset;
907   if (STI.splitFramePointerPush(MF)) {
908     DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
909     GPRCS2Offset = DPRCSOffset - GPRCS2Size;
910   } else {
911     DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
912   }
913   int FramePtrOffsetInPush = 0;
914   if (HasFP) {
915     int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
916     assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
917            "Max FP estimation is wrong");
918     FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
919     AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
920                                 NumBytes);
921   }
922   AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
923   AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
924   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
925 
926   // Move past area 2.
927   if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
928     GPRCS2Push = LastPush = MBBI++;
929     DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
930   }
931 
932   // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
933   // .cfi_offset operations will reflect that.
934   if (DPRGapSize) {
935     assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
936     if (LastPush != MBB.end() &&
937         tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
938       DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
939     else {
940       emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
941                    MachineInstr::FrameSetup);
942       DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
943     }
944   }
945 
946   // Move past area 3.
947   if (DPRCSSize > 0) {
948     // Since vpush register list cannot have gaps, there may be multiple vpush
949     // instructions in the prologue.
950     while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
951       DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
952       LastPush = MBBI++;
953     }
954   }
955 
956   // Move past the aligned DPRCS2 area.
957   if (AFI->getNumAlignedDPRCS2Regs() > 0) {
958     MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs());
959     // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
960     // leaves the stack pointer pointing to the DPRCS2 area.
961     //
962     // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
963     NumBytes += MFI.getObjectOffset(D8SpillFI);
964   } else
965     NumBytes = DPRCSOffset;
966 
967   if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {
968     GPRCS2Push = LastPush = MBBI++;
969     DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
970   }
971 
972   bool NeedsWinCFIStackAlloc = NeedsWinCFI;
973   if (STI.splitFramePointerPush(MF) && HasFP)
974     NeedsWinCFIStackAlloc = false;
975 
976   if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
977     uint32_t NumWords = NumBytes >> 2;
978 
979     if (NumWords < 65536) {
980       BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
981           .addImm(NumWords)
982           .setMIFlags(MachineInstr::FrameSetup)
983           .add(predOps(ARMCC::AL));
984     } else {
985       // Split into two instructions here, instead of using t2MOVi32imm,
986       // to allow inserting accurate SEH instructions (including accurate
987       // instruction size for each of them).
988       BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
989           .addImm(NumWords & 0xffff)
990           .setMIFlags(MachineInstr::FrameSetup)
991           .add(predOps(ARMCC::AL));
992       BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
993           .addReg(ARM::R4)
994           .addImm(NumWords >> 16)
995           .setMIFlags(MachineInstr::FrameSetup)
996           .add(predOps(ARMCC::AL));
997     }
998 
999     switch (TM.getCodeModel()) {
1000     case CodeModel::Tiny:
1001       llvm_unreachable("Tiny code model not available on ARM.");
1002     case CodeModel::Small:
1003     case CodeModel::Medium:
1004     case CodeModel::Kernel:
1005       BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
1006           .add(predOps(ARMCC::AL))
1007           .addExternalSymbol("__chkstk")
1008           .addReg(ARM::R4, RegState::Implicit)
1009           .setMIFlags(MachineInstr::FrameSetup);
1010       break;
1011     case CodeModel::Large:
1012       BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
1013         .addExternalSymbol("__chkstk")
1014         .setMIFlags(MachineInstr::FrameSetup);
1015 
1016       BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
1017           .add(predOps(ARMCC::AL))
1018           .addReg(ARM::R12, RegState::Kill)
1019           .addReg(ARM::R4, RegState::Implicit)
1020           .setMIFlags(MachineInstr::FrameSetup);
1021       break;
1022     }
1023 
1024     MachineInstrBuilder Instr, SEH;
1025     Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
1026                 .addReg(ARM::SP, RegState::Kill)
1027                 .addReg(ARM::R4, RegState::Kill)
1028                 .setMIFlags(MachineInstr::FrameSetup)
1029                 .add(predOps(ARMCC::AL))
1030                 .add(condCodeOp());
1031     if (NeedsWinCFIStackAlloc) {
1032       SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
1033                 .addImm(NumBytes)
1034                 .addImm(/*Wide=*/1)
1035                 .setMIFlags(MachineInstr::FrameSetup);
1036       MBB.insertAfter(Instr, SEH);
1037     }
1038     NumBytes = 0;
1039   }
1040 
1041   if (NumBytes) {
1042     // Adjust SP after all the callee-save spills.
1043     if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1044         tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
1045       DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
1046     else {
1047       emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
1048                    MachineInstr::FrameSetup);
1049       DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
1050     }
1051 
1052     if (HasFP && isARM)
1053       // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1054       // Note it's not safe to do this in Thumb2 mode because it would have
1055       // taken two instructions:
1056       // mov sp, r7
1057       // sub sp, #24
1058       // If an interrupt is taken between the two instructions, then sp is in
1059       // an inconsistent state (pointing to the middle of callee-saved area).
1060       // The interrupt handler can end up clobbering the registers.
1061       AFI->setShouldRestoreSPFromFP(true);
1062   }
1063 
1064   // Set FP to point to the stack slot that contains the previous FP.
1065   // For iOS, FP is R7, which has now been stored in spill area 1.
1066   // Otherwise, if this is not iOS, all the callee-saved registers go
1067   // into spill area 1, including the FP in R11.  In either case, it
1068   // is in area one and the adjustment needs to take place just after
1069   // that push.
1070   // FIXME: The above is not necessary true when PACBTI is enabled.
1071   // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1072   // so FP ends up on area two.
1073   MachineBasicBlock::iterator AfterPush;
1074   if (HasFP) {
1075     AfterPush = std::next(GPRCS1Push);
1076     unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
1077     int FPOffset = PushSize + FramePtrOffsetInPush;
1078     if (STI.splitFramePointerPush(MF)) {
1079       AfterPush = std::next(GPRCS2Push);
1080       emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1081                            FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
1082     } else {
1083       emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1084                            FramePtr, ARM::SP, FPOffset,
1085                            MachineInstr::FrameSetup);
1086     }
1087     if (!NeedsWinCFI) {
1088       if (FramePtrOffsetInPush + PushSize != 0) {
1089         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
1090             nullptr, MRI->getDwarfRegNum(FramePtr, true),
1091             FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
1092         BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1093             .addCFIIndex(CFIIndex)
1094             .setMIFlags(MachineInstr::FrameSetup);
1095       } else {
1096         unsigned CFIIndex =
1097             MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
1098                 nullptr, MRI->getDwarfRegNum(FramePtr, true)));
1099         BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1100             .addCFIIndex(CFIIndex)
1101             .setMIFlags(MachineInstr::FrameSetup);
1102       }
1103     }
1104   }
1105 
1106   // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1107   // instructions below don't need to be replayed to unwind the stack.
1108   if (NeedsWinCFI && MBBI != MBB.begin()) {
1109     MachineBasicBlock::iterator End = MBBI;
1110     if (HasFP && STI.splitFramePointerPush(MF))
1111       End = AfterPush;
1112     insertSEHRange(MBB, {}, End, TII, MachineInstr::FrameSetup);
1113     BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
1114         .setMIFlag(MachineInstr::FrameSetup);
1115     MF.setHasWinCFI(true);
1116   }
1117 
1118   // Now that the prologue's actual instructions are finalised, we can insert
1119   // the necessary DWARF cf instructions to describe the situation. Start by
1120   // recording where each register ended up:
1121   if (GPRCS1Size > 0 && !NeedsWinCFI) {
1122     MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
1123     int CFIIndex;
1124     for (const auto &Entry : CSI) {
1125       Register Reg = Entry.getReg();
1126       int FI = Entry.getFrameIdx();
1127       switch (Reg) {
1128       case ARM::R8:
1129       case ARM::R9:
1130       case ARM::R10:
1131       case ARM::R11:
1132       case ARM::R12:
1133         if (STI.splitFramePushPop(MF))
1134           break;
1135         [[fallthrough]];
1136       case ARM::R0:
1137       case ARM::R1:
1138       case ARM::R2:
1139       case ARM::R3:
1140       case ARM::R4:
1141       case ARM::R5:
1142       case ARM::R6:
1143       case ARM::R7:
1144       case ARM::LR:
1145         CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1146             nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
1147         BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1148             .addCFIIndex(CFIIndex)
1149             .setMIFlags(MachineInstr::FrameSetup);
1150         break;
1151       }
1152     }
1153   }
1154 
1155   if (GPRCS2Size > 0 && !NeedsWinCFI) {
1156     MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
1157     for (const auto &Entry : CSI) {
1158       Register Reg = Entry.getReg();
1159       int FI = Entry.getFrameIdx();
1160       switch (Reg) {
1161       case ARM::R8:
1162       case ARM::R9:
1163       case ARM::R10:
1164       case ARM::R11:
1165       case ARM::R12:
1166         if (STI.splitFramePushPop(MF)) {
1167           unsigned DwarfReg = MRI->getDwarfRegNum(
1168               Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
1169           unsigned Offset = MFI.getObjectOffset(FI);
1170           unsigned CFIIndex = MF.addFrameInst(
1171               MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1172           BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1173               .addCFIIndex(CFIIndex)
1174               .setMIFlags(MachineInstr::FrameSetup);
1175         }
1176         break;
1177       }
1178     }
1179   }
1180 
1181   if (DPRCSSize > 0 && !NeedsWinCFI) {
1182     // Since vpush register list cannot have gaps, there may be multiple vpush
1183     // instructions in the prologue.
1184     MachineBasicBlock::iterator Pos = std::next(LastPush);
1185     for (const auto &Entry : CSI) {
1186       Register Reg = Entry.getReg();
1187       int FI = Entry.getFrameIdx();
1188       if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
1189           (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
1190         unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
1191         unsigned Offset = MFI.getObjectOffset(FI);
1192         unsigned CFIIndex = MF.addFrameInst(
1193             MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1194         BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1195             .addCFIIndex(CFIIndex)
1196             .setMIFlags(MachineInstr::FrameSetup);
1197       }
1198     }
1199   }
1200 
1201   // Now we can emit descriptions of where the canonical frame address was
1202   // throughout the process. If we have a frame pointer, it takes over the job
1203   // half-way through, so only the first few .cfi_def_cfa_offset instructions
1204   // actually get emitted.
1205   if (!NeedsWinCFI)
1206     DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
1207 
1208   if (STI.isTargetELF() && hasFP(MF))
1209     MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
1210                             AFI->getFramePtrSpillOffset());
1211 
1212   AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1213   AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1214   AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1215   AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1216   AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
1217 
1218   // If we need dynamic stack realignment, do it here. Be paranoid and make
1219   // sure if we also have VLAs, we have a base pointer for frame access.
1220   // If aligned NEON registers were spilled, the stack has already been
1221   // realigned.
1222   if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1223     Align MaxAlign = MFI.getMaxAlign();
1224     assert(!AFI->isThumb1OnlyFunction());
1225     if (!AFI->isThumbFunction()) {
1226       emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
1227                                false);
1228     } else {
1229       // We cannot use sp as source/dest register here, thus we're using r4 to
1230       // perform the calculations. We're emitting the following sequence:
1231       // mov r4, sp
1232       // -- use emitAligningInstructions to produce best sequence to zero
1233       // -- out lower bits in r4
1234       // mov sp, r4
1235       // FIXME: It will be better just to find spare register here.
1236       BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
1237           .addReg(ARM::SP, RegState::Kill)
1238           .add(predOps(ARMCC::AL));
1239       emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
1240                                false);
1241       BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1242           .addReg(ARM::R4, RegState::Kill)
1243           .add(predOps(ARMCC::AL));
1244     }
1245 
1246     AFI->setShouldRestoreSPFromFP(true);
1247   }
1248 
1249   // If we need a base pointer, set it up here. It's whatever the value
1250   // of the stack pointer is at this point. Any variable size objects
1251   // will be allocated after this, so we can still use the base pointer
1252   // to reference locals.
1253   // FIXME: Clarify FrameSetup flags here.
1254   if (RegInfo->hasBasePointer(MF)) {
1255     if (isARM)
1256       BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
1257           .addReg(ARM::SP)
1258           .add(predOps(ARMCC::AL))
1259           .add(condCodeOp());
1260     else
1261       BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
1262           .addReg(ARM::SP)
1263           .add(predOps(ARMCC::AL));
1264   }
1265 
1266   // If the frame has variable sized objects then the epilogue must restore
1267   // the sp from fp. We can assume there's an FP here since hasFP already
1268   // checks for hasVarSizedObjects.
1269   if (MFI.hasVarSizedObjects())
1270     AFI->setShouldRestoreSPFromFP(true);
1271 }
1272 
1273 void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
1274                                     MachineBasicBlock &MBB) const {
1275   MachineFrameInfo &MFI = MF.getFrameInfo();
1276   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1277   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1278   const ARMBaseInstrInfo &TII =
1279       *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1280   assert(!AFI->isThumb1OnlyFunction() &&
1281          "This emitEpilogue does not support Thumb1!");
1282   bool isARM = !AFI->isThumbFunction();
1283 
1284   // Amount of stack space we reserved next to incoming args for either
1285   // varargs registers or stack arguments in tail calls made by this function.
1286   unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1287 
1288   // How much of the stack used by incoming arguments this function is expected
1289   // to restore in this particular epilogue.
1290   int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1291   int NumBytes = (int)MFI.getStackSize();
1292   Register FramePtr = RegInfo->getFrameRegister(MF);
1293 
1294   // All calls are tail calls in GHC calling conv, and functions have no
1295   // prologue/epilogue.
1296   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1297     return;
1298 
1299   // First put ourselves on the first (from top) terminator instructions.
1300   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1301   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1302 
1303   MachineBasicBlock::iterator RangeStart;
1304   if (!AFI->hasStackFrame()) {
1305     if (MF.hasWinCFI()) {
1306       BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1307           .setMIFlag(MachineInstr::FrameDestroy);
1308       RangeStart = initMBBRange(MBB, MBBI);
1309     }
1310 
1311     if (NumBytes + IncomingArgStackToRestore != 0)
1312       emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1313                    NumBytes + IncomingArgStackToRestore,
1314                    MachineInstr::FrameDestroy);
1315   } else {
1316     // Unwind MBBI to point to first LDR / VLDRD.
1317     if (MBBI != MBB.begin()) {
1318       do {
1319         --MBBI;
1320       } while (MBBI != MBB.begin() &&
1321                MBBI->getFlag(MachineInstr::FrameDestroy));
1322       if (!MBBI->getFlag(MachineInstr::FrameDestroy))
1323         ++MBBI;
1324     }
1325 
1326     if (MF.hasWinCFI()) {
1327       BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1328           .setMIFlag(MachineInstr::FrameDestroy);
1329       RangeStart = initMBBRange(MBB, MBBI);
1330     }
1331 
1332     // Move SP to start of FP callee save spill area.
1333     NumBytes -= (ReservedArgStack +
1334                  AFI->getFPCXTSaveAreaSize() +
1335                  AFI->getGPRCalleeSavedArea1Size() +
1336                  AFI->getGPRCalleeSavedArea2Size() +
1337                  AFI->getDPRCalleeSavedGapSize() +
1338                  AFI->getDPRCalleeSavedAreaSize());
1339 
1340     // Reset SP based on frame pointer only if the stack frame extends beyond
1341     // frame pointer stack slot or target is ELF and the function has FP.
1342     if (AFI->shouldRestoreSPFromFP()) {
1343       NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1344       if (NumBytes) {
1345         if (isARM)
1346           emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
1347                                   ARMCC::AL, 0, TII,
1348                                   MachineInstr::FrameDestroy);
1349         else {
1350           // It's not possible to restore SP from FP in a single instruction.
1351           // For iOS, this looks like:
1352           // mov sp, r7
1353           // sub sp, #24
1354           // This is bad, if an interrupt is taken after the mov, sp is in an
1355           // inconsistent state.
1356           // Use the first callee-saved register as a scratch register.
1357           assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1358                  "No scratch register to restore SP from FP!");
1359           emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
1360                                  ARMCC::AL, 0, TII, MachineInstr::FrameDestroy);
1361           BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1362               .addReg(ARM::R4)
1363               .add(predOps(ARMCC::AL))
1364               .setMIFlag(MachineInstr::FrameDestroy);
1365         }
1366       } else {
1367         // Thumb2 or ARM.
1368         if (isARM)
1369           BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
1370               .addReg(FramePtr)
1371               .add(predOps(ARMCC::AL))
1372               .add(condCodeOp())
1373               .setMIFlag(MachineInstr::FrameDestroy);
1374         else
1375           BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1376               .addReg(FramePtr)
1377               .add(predOps(ARMCC::AL))
1378               .setMIFlag(MachineInstr::FrameDestroy);
1379       }
1380     } else if (NumBytes &&
1381                !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
1382       emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1383                    MachineInstr::FrameDestroy);
1384 
1385     // Increment past our save areas.
1386     if (AFI->getGPRCalleeSavedArea2Size() && STI.splitFramePointerPush(MF))
1387       MBBI++;
1388 
1389     if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
1390       MBBI++;
1391       // Since vpop register list cannot have gaps, there may be multiple vpop
1392       // instructions in the epilogue.
1393       while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1394         MBBI++;
1395     }
1396     if (AFI->getDPRCalleeSavedGapSize()) {
1397       assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1398              "unexpected DPR alignment gap");
1399       emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),
1400                    MachineInstr::FrameDestroy);
1401     }
1402 
1403     if (AFI->getGPRCalleeSavedArea2Size() && !STI.splitFramePointerPush(MF))
1404       MBBI++;
1405     if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1406 
1407     if (ReservedArgStack || IncomingArgStackToRestore) {
1408       assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1409              "attempting to restore negative stack amount");
1410       emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1411                    ReservedArgStack + IncomingArgStackToRestore,
1412                    MachineInstr::FrameDestroy);
1413     }
1414 
1415     // Validate PAC, It should have been already popped into R12. For CMSE entry
1416     // function, the validation instruction is emitted during expansion of the
1417     // tBXNS_RET, since the validation must use the value of SP at function
1418     // entry, before saving, resp. after restoring, FPCXTNS.
1419     if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1420       BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
1421   }
1422 
1423   if (MF.hasWinCFI()) {
1424     insertSEHRange(MBB, RangeStart, MBB.end(), TII, MachineInstr::FrameDestroy);
1425     BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
1426         .setMIFlag(MachineInstr::FrameDestroy);
1427   }
1428 }
1429 
1430 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1431 /// debug info.  It's the same as what we use for resolving the code-gen
1432 /// references for now.  FIXME: This can go wrong when references are
1433 /// SP-relative and simple call frames aren't used.
1434 StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1435                                                      int FI,
1436                                                      Register &FrameReg) const {
1437   return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
1438 }
1439 
1440 int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
1441                                                  int FI, Register &FrameReg,
1442                                                  int SPAdj) const {
1443   const MachineFrameInfo &MFI = MF.getFrameInfo();
1444   const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1445       MF.getSubtarget().getRegisterInfo());
1446   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1447   int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
1448   int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1449   bool isFixed = MFI.isFixedObjectIndex(FI);
1450 
1451   FrameReg = ARM::SP;
1452   Offset += SPAdj;
1453 
1454   // SP can move around if there are allocas.  We may also lose track of SP
1455   // when emergency spilling inside a non-reserved call frame setup.
1456   bool hasMovingSP = !hasReservedCallFrame(MF);
1457 
1458   // When dynamically realigning the stack, use the frame pointer for
1459   // parameters, and the stack/base pointer for locals.
1460   if (RegInfo->hasStackRealignment(MF)) {
1461     assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1462     if (isFixed) {
1463       FrameReg = RegInfo->getFrameRegister(MF);
1464       Offset = FPOffset;
1465     } else if (hasMovingSP) {
1466       assert(RegInfo->hasBasePointer(MF) &&
1467              "VLAs and dynamic stack alignment, but missing base pointer!");
1468       FrameReg = RegInfo->getBaseRegister();
1469       Offset -= SPAdj;
1470     }
1471     return Offset;
1472   }
1473 
1474   // If there is a frame pointer, use it when we can.
1475   if (hasFP(MF) && AFI->hasStackFrame()) {
1476     // Use frame pointer to reference fixed objects. Use it for locals if
1477     // there are VLAs (and thus the SP isn't reliable as a base).
1478     if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1479       FrameReg = RegInfo->getFrameRegister(MF);
1480       return FPOffset;
1481     } else if (hasMovingSP) {
1482       assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1483       if (AFI->isThumb2Function()) {
1484         // Try to use the frame pointer if we can, else use the base pointer
1485         // since it's available. This is handy for the emergency spill slot, in
1486         // particular.
1487         if (FPOffset >= -255 && FPOffset < 0) {
1488           FrameReg = RegInfo->getFrameRegister(MF);
1489           return FPOffset;
1490         }
1491       }
1492     } else if (AFI->isThumbFunction()) {
1493       // Prefer SP to base pointer, if the offset is suitably aligned and in
1494       // range as the effective range of the immediate offset is bigger when
1495       // basing off SP.
1496       // Use  add <rd>, sp, #<imm8>
1497       //      ldr <rd>, [sp, #<imm8>]
1498       if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1499         return Offset;
1500       // In Thumb2 mode, the negative offset is very limited. Try to avoid
1501       // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1502       if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1503         FrameReg = RegInfo->getFrameRegister(MF);
1504         return FPOffset;
1505       }
1506     } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1507       // Otherwise, use SP or FP, whichever is closer to the stack slot.
1508       FrameReg = RegInfo->getFrameRegister(MF);
1509       return FPOffset;
1510     }
1511   }
1512   // Use the base pointer if we have one.
1513   // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1514   // That can happen if we forced a base pointer for a large call frame.
1515   if (RegInfo->hasBasePointer(MF)) {
1516     FrameReg = RegInfo->getBaseRegister();
1517     Offset -= SPAdj;
1518   }
1519   return Offset;
1520 }
1521 
1522 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1523                                     MachineBasicBlock::iterator MI,
1524                                     ArrayRef<CalleeSavedInfo> CSI,
1525                                     unsigned StmOpc, unsigned StrOpc,
1526                                     bool NoGap, bool (*Func)(unsigned, bool),
1527                                     unsigned NumAlignedDPRCS2Regs,
1528                                     unsigned MIFlags) const {
1529   MachineFunction &MF = *MBB.getParent();
1530   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1531   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1532 
1533   DebugLoc DL;
1534 
1535   using RegAndKill = std::pair<unsigned, bool>;
1536 
1537   SmallVector<RegAndKill, 4> Regs;
1538   unsigned i = CSI.size();
1539   while (i != 0) {
1540     unsigned LastReg = 0;
1541     for (; i != 0; --i) {
1542       Register Reg = CSI[i-1].getReg();
1543       if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1544 
1545       // D-registers in the aligned area DPRCS2 are NOT spilled here.
1546       if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1547         continue;
1548 
1549       const MachineRegisterInfo &MRI = MF.getRegInfo();
1550       bool isLiveIn = MRI.isLiveIn(Reg);
1551       if (!isLiveIn && !MRI.isReserved(Reg))
1552         MBB.addLiveIn(Reg);
1553       // If NoGap is true, push consecutive registers and then leave the rest
1554       // for other instructions. e.g.
1555       // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1556       if (NoGap && LastReg && LastReg != Reg-1)
1557         break;
1558       LastReg = Reg;
1559       // Do not set a kill flag on values that are also marked as live-in. This
1560       // happens with the @llvm-returnaddress intrinsic and with arguments
1561       // passed in callee saved registers.
1562       // Omitting the kill flags is conservatively correct even if the live-in
1563       // is not used after all.
1564       Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1565     }
1566 
1567     if (Regs.empty())
1568       continue;
1569 
1570     llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1571       return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1572     });
1573 
1574     if (Regs.size() > 1 || StrOpc== 0) {
1575       MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1576                                     .addReg(ARM::SP)
1577                                     .setMIFlags(MIFlags)
1578                                     .add(predOps(ARMCC::AL));
1579       for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1580         MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1581     } else if (Regs.size() == 1) {
1582       BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1583           .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1584           .addReg(ARM::SP)
1585           .setMIFlags(MIFlags)
1586           .addImm(-4)
1587           .add(predOps(ARMCC::AL));
1588     }
1589     Regs.clear();
1590 
1591     // Put any subsequent vpush instructions before this one: they will refer to
1592     // higher register numbers so need to be pushed first in order to preserve
1593     // monotonicity.
1594     if (MI != MBB.begin())
1595       --MI;
1596   }
1597 }
1598 
1599 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1600                                    MachineBasicBlock::iterator MI,
1601                                    MutableArrayRef<CalleeSavedInfo> CSI,
1602                                    unsigned LdmOpc, unsigned LdrOpc,
1603                                    bool isVarArg, bool NoGap,
1604                                    bool (*Func)(unsigned, bool),
1605                                    unsigned NumAlignedDPRCS2Regs) const {
1606   MachineFunction &MF = *MBB.getParent();
1607   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1608   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1609   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1610   bool hasPAC = AFI->shouldSignReturnAddress();
1611   DebugLoc DL;
1612   bool isTailCall = false;
1613   bool isInterrupt = false;
1614   bool isTrap = false;
1615   bool isCmseEntry = false;
1616   if (MBB.end() != MI) {
1617     DL = MI->getDebugLoc();
1618     unsigned RetOpcode = MI->getOpcode();
1619     isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1620     isInterrupt =
1621         RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1622     isTrap =
1623         RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1624         RetOpcode == ARM::tTRAP;
1625     isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1626   }
1627 
1628   SmallVector<unsigned, 4> Regs;
1629   unsigned i = CSI.size();
1630   while (i != 0) {
1631     unsigned LastReg = 0;
1632     bool DeleteRet = false;
1633     for (; i != 0; --i) {
1634       CalleeSavedInfo &Info = CSI[i-1];
1635       Register Reg = Info.getReg();
1636       if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1637 
1638       // The aligned reloads from area DPRCS2 are not inserted here.
1639       if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1640         continue;
1641       if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1642           !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1643           STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1644           !STI.splitFramePointerPush(MF)) {
1645         Reg = ARM::PC;
1646         // Fold the return instruction into the LDM.
1647         DeleteRet = true;
1648         LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1649         // We 'restore' LR into PC so it is not live out of the return block:
1650         // Clear Restored bit.
1651         Info.setRestored(false);
1652       }
1653 
1654       // If NoGap is true, pop consecutive registers and then leave the rest
1655       // for other instructions. e.g.
1656       // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1657       if (NoGap && LastReg && LastReg != Reg-1)
1658         break;
1659 
1660       LastReg = Reg;
1661       Regs.push_back(Reg);
1662     }
1663 
1664     if (Regs.empty())
1665       continue;
1666 
1667     llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1668       return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1669     });
1670 
1671     if (Regs.size() > 1 || LdrOpc == 0) {
1672       MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1673                                     .addReg(ARM::SP)
1674                                     .add(predOps(ARMCC::AL))
1675                                     .setMIFlags(MachineInstr::FrameDestroy);
1676       for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1677         MIB.addReg(Regs[i], getDefRegState(true));
1678       if (DeleteRet) {
1679         if (MI != MBB.end()) {
1680           MIB.copyImplicitOps(*MI);
1681           MI->eraseFromParent();
1682         }
1683       }
1684       MI = MIB;
1685     } else if (Regs.size() == 1) {
1686       // If we adjusted the reg to PC from LR above, switch it back here. We
1687       // only do that for LDM.
1688       if (Regs[0] == ARM::PC)
1689         Regs[0] = ARM::LR;
1690       MachineInstrBuilder MIB =
1691         BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1692           .addReg(ARM::SP, RegState::Define)
1693           .addReg(ARM::SP)
1694           .setMIFlags(MachineInstr::FrameDestroy);
1695       // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1696       // that refactoring is complete (eventually).
1697       if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1698         MIB.addReg(0);
1699         MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
1700       } else
1701         MIB.addImm(4);
1702       MIB.add(predOps(ARMCC::AL));
1703     }
1704     Regs.clear();
1705 
1706     // Put any subsequent vpop instructions after this one: they will refer to
1707     // higher register numbers so need to be popped afterwards.
1708     if (MI != MBB.end())
1709       ++MI;
1710   }
1711 }
1712 
1713 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1714 /// starting from d8.  Also insert stack realignment code and leave the stack
1715 /// pointer pointing to the d8 spill slot.
1716 static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
1717                                     MachineBasicBlock::iterator MI,
1718                                     unsigned NumAlignedDPRCS2Regs,
1719                                     ArrayRef<CalleeSavedInfo> CSI,
1720                                     const TargetRegisterInfo *TRI) {
1721   MachineFunction &MF = *MBB.getParent();
1722   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1723   DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1724   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1725   MachineFrameInfo &MFI = MF.getFrameInfo();
1726 
1727   // Mark the D-register spill slots as properly aligned.  Since MFI computes
1728   // stack slot layout backwards, this can actually mean that the d-reg stack
1729   // slot offsets can be wrong. The offset for d8 will always be correct.
1730   for (const CalleeSavedInfo &I : CSI) {
1731     unsigned DNum = I.getReg() - ARM::D8;
1732     if (DNum > NumAlignedDPRCS2Regs - 1)
1733       continue;
1734     int FI = I.getFrameIdx();
1735     // The even-numbered registers will be 16-byte aligned, the odd-numbered
1736     // registers will be 8-byte aligned.
1737     MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
1738 
1739     // The stack slot for D8 needs to be maximally aligned because this is
1740     // actually the point where we align the stack pointer.  MachineFrameInfo
1741     // computes all offsets relative to the incoming stack pointer which is a
1742     // bit weird when realigning the stack.  Any extra padding for this
1743     // over-alignment is not realized because the code inserted below adjusts
1744     // the stack pointer by numregs * 8 before aligning the stack pointer.
1745     if (DNum == 0)
1746       MFI.setObjectAlignment(FI, MFI.getMaxAlign());
1747   }
1748 
1749   // Move the stack pointer to the d8 spill slot, and align it at the same
1750   // time. Leave the stack slot address in the scratch register r4.
1751   //
1752   //   sub r4, sp, #numregs * 8
1753   //   bic r4, r4, #align - 1
1754   //   mov sp, r4
1755   //
1756   bool isThumb = AFI->isThumbFunction();
1757   assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1758   AFI->setShouldRestoreSPFromFP(true);
1759 
1760   // sub r4, sp, #numregs * 8
1761   // The immediate is <= 64, so it doesn't need any special encoding.
1762   unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1763   BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1764       .addReg(ARM::SP)
1765       .addImm(8 * NumAlignedDPRCS2Regs)
1766       .add(predOps(ARMCC::AL))
1767       .add(condCodeOp());
1768 
1769   Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1770   // We must set parameter MustBeSingleInstruction to true, since
1771   // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1772   // stack alignment.  Luckily, this can always be done since all ARM
1773   // architecture versions that support Neon also support the BFC
1774   // instruction.
1775   emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1776 
1777   // mov sp, r4
1778   // The stack pointer must be adjusted before spilling anything, otherwise
1779   // the stack slots could be clobbered by an interrupt handler.
1780   // Leave r4 live, it is used below.
1781   Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1782   MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1783                                 .addReg(ARM::R4)
1784                                 .add(predOps(ARMCC::AL));
1785   if (!isThumb)
1786     MIB.add(condCodeOp());
1787 
1788   // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1789   // r4 holds the stack slot address.
1790   unsigned NextReg = ARM::D8;
1791 
1792   // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1793   // The writeback is only needed when emitting two vst1.64 instructions.
1794   if (NumAlignedDPRCS2Regs >= 6) {
1795     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1796                                                &ARM::QQPRRegClass);
1797     MBB.addLiveIn(SupReg);
1798     BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1799         .addReg(ARM::R4, RegState::Kill)
1800         .addImm(16)
1801         .addReg(NextReg)
1802         .addReg(SupReg, RegState::ImplicitKill)
1803         .add(predOps(ARMCC::AL));
1804     NextReg += 4;
1805     NumAlignedDPRCS2Regs -= 4;
1806   }
1807 
1808   // We won't modify r4 beyond this point.  It currently points to the next
1809   // register to be spilled.
1810   unsigned R4BaseReg = NextReg;
1811 
1812   // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1813   if (NumAlignedDPRCS2Regs >= 4) {
1814     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1815                                                &ARM::QQPRRegClass);
1816     MBB.addLiveIn(SupReg);
1817     BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1818         .addReg(ARM::R4)
1819         .addImm(16)
1820         .addReg(NextReg)
1821         .addReg(SupReg, RegState::ImplicitKill)
1822         .add(predOps(ARMCC::AL));
1823     NextReg += 4;
1824     NumAlignedDPRCS2Regs -= 4;
1825   }
1826 
1827   // 16-byte aligned vst1.64 with 2 d-regs.
1828   if (NumAlignedDPRCS2Regs >= 2) {
1829     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1830                                                &ARM::QPRRegClass);
1831     MBB.addLiveIn(SupReg);
1832     BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1833         .addReg(ARM::R4)
1834         .addImm(16)
1835         .addReg(SupReg)
1836         .add(predOps(ARMCC::AL));
1837     NextReg += 2;
1838     NumAlignedDPRCS2Regs -= 2;
1839   }
1840 
1841   // Finally, use a vanilla vstr.64 for the odd last register.
1842   if (NumAlignedDPRCS2Regs) {
1843     MBB.addLiveIn(NextReg);
1844     // vstr.64 uses addrmode5 which has an offset scale of 4.
1845     BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1846         .addReg(NextReg)
1847         .addReg(ARM::R4)
1848         .addImm((NextReg - R4BaseReg) * 2)
1849         .add(predOps(ARMCC::AL));
1850   }
1851 
1852   // The last spill instruction inserted should kill the scratch register r4.
1853   std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1854 }
1855 
1856 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1857 /// iterator to the following instruction.
1858 static MachineBasicBlock::iterator
1859 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
1860                         unsigned NumAlignedDPRCS2Regs) {
1861   //   sub r4, sp, #numregs * 8
1862   //   bic r4, r4, #align - 1
1863   //   mov sp, r4
1864   ++MI; ++MI; ++MI;
1865   assert(MI->mayStore() && "Expecting spill instruction");
1866 
1867   // These switches all fall through.
1868   switch(NumAlignedDPRCS2Regs) {
1869   case 7:
1870     ++MI;
1871     assert(MI->mayStore() && "Expecting spill instruction");
1872     [[fallthrough]];
1873   default:
1874     ++MI;
1875     assert(MI->mayStore() && "Expecting spill instruction");
1876     [[fallthrough]];
1877   case 1:
1878   case 2:
1879   case 4:
1880     assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1881     ++MI;
1882   }
1883   return MI;
1884 }
1885 
1886 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1887 /// starting from d8.  These instructions are assumed to execute while the
1888 /// stack is still aligned, unlike the code inserted by emitPopInst.
1889 static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
1890                                       MachineBasicBlock::iterator MI,
1891                                       unsigned NumAlignedDPRCS2Regs,
1892                                       ArrayRef<CalleeSavedInfo> CSI,
1893                                       const TargetRegisterInfo *TRI) {
1894   MachineFunction &MF = *MBB.getParent();
1895   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1896   DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1897   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1898 
1899   // Find the frame index assigned to d8.
1900   int D8SpillFI = 0;
1901   for (const CalleeSavedInfo &I : CSI)
1902     if (I.getReg() == ARM::D8) {
1903       D8SpillFI = I.getFrameIdx();
1904       break;
1905     }
1906 
1907   // Materialize the address of the d8 spill slot into the scratch register r4.
1908   // This can be fairly complicated if the stack frame is large, so just use
1909   // the normal frame index elimination mechanism to do it.  This code runs as
1910   // the initial part of the epilog where the stack and base pointers haven't
1911   // been changed yet.
1912   bool isThumb = AFI->isThumbFunction();
1913   assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1914 
1915   unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1916   BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1917       .addFrameIndex(D8SpillFI)
1918       .addImm(0)
1919       .add(predOps(ARMCC::AL))
1920       .add(condCodeOp());
1921 
1922   // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1923   unsigned NextReg = ARM::D8;
1924 
1925   // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1926   if (NumAlignedDPRCS2Regs >= 6) {
1927     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1928                                                &ARM::QQPRRegClass);
1929     BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1930         .addReg(ARM::R4, RegState::Define)
1931         .addReg(ARM::R4, RegState::Kill)
1932         .addImm(16)
1933         .addReg(SupReg, RegState::ImplicitDefine)
1934         .add(predOps(ARMCC::AL));
1935     NextReg += 4;
1936     NumAlignedDPRCS2Regs -= 4;
1937   }
1938 
1939   // We won't modify r4 beyond this point.  It currently points to the next
1940   // register to be spilled.
1941   unsigned R4BaseReg = NextReg;
1942 
1943   // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1944   if (NumAlignedDPRCS2Regs >= 4) {
1945     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1946                                                &ARM::QQPRRegClass);
1947     BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1948         .addReg(ARM::R4)
1949         .addImm(16)
1950         .addReg(SupReg, RegState::ImplicitDefine)
1951         .add(predOps(ARMCC::AL));
1952     NextReg += 4;
1953     NumAlignedDPRCS2Regs -= 4;
1954   }
1955 
1956   // 16-byte aligned vld1.64 with 2 d-regs.
1957   if (NumAlignedDPRCS2Regs >= 2) {
1958     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1959                                                &ARM::QPRRegClass);
1960     BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1961         .addReg(ARM::R4)
1962         .addImm(16)
1963         .add(predOps(ARMCC::AL));
1964     NextReg += 2;
1965     NumAlignedDPRCS2Regs -= 2;
1966   }
1967 
1968   // Finally, use a vanilla vldr.64 for the remaining odd register.
1969   if (NumAlignedDPRCS2Regs)
1970     BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1971         .addReg(ARM::R4)
1972         .addImm(2 * (NextReg - R4BaseReg))
1973         .add(predOps(ARMCC::AL));
1974 
1975   // Last store kills r4.
1976   std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1977 }
1978 
1979 bool ARMFrameLowering::spillCalleeSavedRegisters(
1980     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1981     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
1982   if (CSI.empty())
1983     return false;
1984 
1985   MachineFunction &MF = *MBB.getParent();
1986   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1987 
1988   unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1989   unsigned PushOneOpc = AFI->isThumbFunction() ?
1990     ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1991   unsigned FltOpc = ARM::VSTMDDB_UPD;
1992   unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1993   // Compute PAC in R12.
1994   if (AFI->shouldSignReturnAddress()) {
1995     BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
1996         .setMIFlags(MachineInstr::FrameSetup);
1997   }
1998   // Save the non-secure floating point context.
1999   if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
2000         return C.getReg() == ARM::FPCXTNS;
2001       })) {
2002     BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
2003             ARM::SP)
2004         .addReg(ARM::SP)
2005         .addImm(-4)
2006         .add(predOps(ARMCC::AL));
2007   }
2008   if (STI.splitFramePointerPush(MF)) {
2009     emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
2010                  &isSplitFPArea1Register, 0, MachineInstr::FrameSetup);
2011     emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
2012                  NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
2013     emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
2014                  &isSplitFPArea2Register, 0, MachineInstr::FrameSetup);
2015   } else {
2016     emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
2017                  0, MachineInstr::FrameSetup);
2018     emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
2019                  0, MachineInstr::FrameSetup);
2020     emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
2021                  NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
2022   }
2023 
2024   // The code above does not insert spill code for the aligned DPRCS2 registers.
2025   // The stack realignment code will be inserted between the push instructions
2026   // and these spills.
2027   if (NumAlignedDPRCS2Regs)
2028     emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2029 
2030   return true;
2031 }
2032 
2033 bool ARMFrameLowering::restoreCalleeSavedRegisters(
2034     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2035     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2036   if (CSI.empty())
2037     return false;
2038 
2039   MachineFunction &MF = *MBB.getParent();
2040   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2041   bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2042   unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2043 
2044   // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2045   // registers. Do that here instead.
2046   if (NumAlignedDPRCS2Regs)
2047     emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2048 
2049   unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2050   unsigned LdrOpc =
2051       AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2052   unsigned FltOpc = ARM::VLDMDIA_UPD;
2053   if (STI.splitFramePointerPush(MF)) {
2054     emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2055                 &isSplitFPArea2Register, 0);
2056     emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
2057                 NumAlignedDPRCS2Regs);
2058     emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2059                 &isSplitFPArea1Register, 0);
2060   } else {
2061     emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
2062                 NumAlignedDPRCS2Regs);
2063     emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2064                 &isARMArea2Register, 0);
2065     emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2066                 &isARMArea1Register, 0);
2067   }
2068 
2069   return true;
2070 }
2071 
2072 // FIXME: Make generic?
2073 static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,
2074                                             const ARMBaseInstrInfo &TII) {
2075   unsigned FnSize = 0;
2076   for (auto &MBB : MF) {
2077     for (auto &MI : MBB)
2078       FnSize += TII.getInstSizeInBytes(MI);
2079   }
2080   if (MF.getJumpTableInfo())
2081     for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2082       FnSize += Table.MBBs.size() * 4;
2083   FnSize += MF.getConstantPool()->getConstants().size() * 4;
2084   return FnSize;
2085 }
2086 
2087 /// estimateRSStackSizeLimit - Look at each instruction that references stack
2088 /// frames and return the stack size limit beyond which some of these
2089 /// instructions will require a scratch register during their expansion later.
2090 // FIXME: Move to TII?
2091 static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
2092                                          const TargetFrameLowering *TFI,
2093                                          bool &HasNonSPFrameIndex) {
2094   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2095   const ARMBaseInstrInfo &TII =
2096       *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2097   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2098   unsigned Limit = (1 << 12) - 1;
2099   for (auto &MBB : MF) {
2100     for (auto &MI : MBB) {
2101       if (MI.isDebugInstr())
2102         continue;
2103       for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2104         if (!MI.getOperand(i).isFI())
2105           continue;
2106 
2107         // When using ADDri to get the address of a stack object, 255 is the
2108         // largest offset guaranteed to fit in the immediate offset.
2109         if (MI.getOpcode() == ARM::ADDri) {
2110           Limit = std::min(Limit, (1U << 8) - 1);
2111           break;
2112         }
2113         // t2ADDri will not require an extra register, it can reuse the
2114         // destination.
2115         if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2116           break;
2117 
2118         const MCInstrDesc &MCID = MI.getDesc();
2119         const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
2120         if (RegClass && !RegClass->contains(ARM::SP))
2121           HasNonSPFrameIndex = true;
2122 
2123         // Otherwise check the addressing mode.
2124         switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2125         case ARMII::AddrMode_i12:
2126         case ARMII::AddrMode2:
2127           // Default 12 bit limit.
2128           break;
2129         case ARMII::AddrMode3:
2130         case ARMII::AddrModeT2_i8neg:
2131           Limit = std::min(Limit, (1U << 8) - 1);
2132           break;
2133         case ARMII::AddrMode5FP16:
2134           Limit = std::min(Limit, ((1U << 8) - 1) * 2);
2135           break;
2136         case ARMII::AddrMode5:
2137         case ARMII::AddrModeT2_i8s4:
2138         case ARMII::AddrModeT2_ldrex:
2139           Limit = std::min(Limit, ((1U << 8) - 1) * 4);
2140           break;
2141         case ARMII::AddrModeT2_i12:
2142           // i12 supports only positive offset so these will be converted to
2143           // i8 opcodes. See llvm::rewriteT2FrameIndex.
2144           if (TFI->hasFP(MF) && AFI->hasStackFrame())
2145             Limit = std::min(Limit, (1U << 8) - 1);
2146           break;
2147         case ARMII::AddrMode4:
2148         case ARMII::AddrMode6:
2149           // Addressing modes 4 & 6 (load/store) instructions can't encode an
2150           // immediate offset for stack references.
2151           return 0;
2152         case ARMII::AddrModeT2_i7:
2153           Limit = std::min(Limit, ((1U << 7) - 1) * 1);
2154           break;
2155         case ARMII::AddrModeT2_i7s2:
2156           Limit = std::min(Limit, ((1U << 7) - 1) * 2);
2157           break;
2158         case ARMII::AddrModeT2_i7s4:
2159           Limit = std::min(Limit, ((1U << 7) - 1) * 4);
2160           break;
2161         default:
2162           llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2163         }
2164         break; // At most one FI per instruction
2165       }
2166     }
2167   }
2168 
2169   return Limit;
2170 }
2171 
2172 // In functions that realign the stack, it can be an advantage to spill the
2173 // callee-saved vector registers after realigning the stack. The vst1 and vld1
2174 // instructions take alignment hints that can improve performance.
2175 static void
2176 checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
2177   MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2178   if (!SpillAlignedNEONRegs)
2179     return;
2180 
2181   // Naked functions don't spill callee-saved registers.
2182   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
2183     return;
2184 
2185   // We are planning to use NEON instructions vst1 / vld1.
2186   if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2187     return;
2188 
2189   // Don't bother if the default stack alignment is sufficiently high.
2190   if (MF.getSubtarget().getFrameLowering()->getStackAlign() >= Align(8))
2191     return;
2192 
2193   // Aligned spills require stack realignment.
2194   if (!static_cast<const ARMBaseRegisterInfo *>(
2195            MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
2196     return;
2197 
2198   // We always spill contiguous d-registers starting from d8. Count how many
2199   // needs spilling.  The register allocator will almost always use the
2200   // callee-saved registers in order, but it can happen that there are holes in
2201   // the range.  Registers above the hole will be spilled to the standard DPRCS
2202   // area.
2203   unsigned NumSpills = 0;
2204   for (; NumSpills < 8; ++NumSpills)
2205     if (!SavedRegs.test(ARM::D8 + NumSpills))
2206       break;
2207 
2208   // Don't do this for just one d-register. It's not worth it.
2209   if (NumSpills < 2)
2210     return;
2211 
2212   // Spill the first NumSpills D-registers after realigning the stack.
2213   MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2214 
2215   // A scratch register is required for the vst1 / vld1 instructions.
2216   SavedRegs.set(ARM::R4);
2217 }
2218 
2219 bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2220   // For CMSE entry functions, we want to save the FPCXT_NS immediately
2221   // upon function entry (resp. restore it immmediately before return)
2222   if (STI.hasV8_1MMainlineOps() &&
2223       MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction())
2224     return false;
2225 
2226   // We are disabling shrinkwrapping for now when PAC is enabled, as
2227   // shrinkwrapping can cause clobbering of r12 when the PAC code is
2228   // generated. A follow-up patch will fix this in a more performant manner.
2229   if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(
2230           true /* SpillsLR */))
2231     return false;
2232 
2233   return true;
2234 }
2235 
2236 static bool requiresAAPCSFrameRecord(const MachineFunction &MF) {
2237   const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2238   return Subtarget.createAAPCSFrameChainLeaf() ||
2239          (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls());
2240 }
2241 
2242 // Thumb1 may require a spill when storing to a frame index through FP, for
2243 // cases where FP is a high register (R11). This scans the function for cases
2244 // where this may happen.
2245 static bool canSpillOnFrameIndexAccess(const MachineFunction &MF,
2246                                        const TargetFrameLowering &TFI) {
2247   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2248   if (!AFI->isThumb1OnlyFunction())
2249     return false;
2250 
2251   for (const auto &MBB : MF)
2252     for (const auto &MI : MBB)
2253       if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi)
2254         for (const auto &Op : MI.operands())
2255           if (Op.isFI()) {
2256             Register Reg;
2257             TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
2258             if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2259               return true;
2260           }
2261   return false;
2262 }
2263 
2264 void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
2265                                             BitVector &SavedRegs,
2266                                             RegScavenger *RS) const {
2267   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2268   // This tells PEI to spill the FP as if it is any other callee-save register
2269   // to take advantage the eliminateFrameIndex machinery. This also ensures it
2270   // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2271   // to combine multiple loads / stores.
2272   bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF));
2273   bool CS1Spilled = false;
2274   bool LRSpilled = false;
2275   unsigned NumGPRSpills = 0;
2276   unsigned NumFPRSpills = 0;
2277   SmallVector<unsigned, 4> UnspilledCS1GPRs;
2278   SmallVector<unsigned, 4> UnspilledCS2GPRs;
2279   const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2280       MF.getSubtarget().getRegisterInfo());
2281   const ARMBaseInstrInfo &TII =
2282       *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2283   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2284   MachineFrameInfo &MFI = MF.getFrameInfo();
2285   MachineRegisterInfo &MRI = MF.getRegInfo();
2286   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2287   (void)TRI;  // Silence unused warning in non-assert builds.
2288   Register FramePtr = RegInfo->getFrameRegister(MF);
2289 
2290   // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2291   // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2292   // since it's not always possible to restore sp from fp in a single
2293   // instruction.
2294   // FIXME: It will be better just to find spare register here.
2295   if (AFI->isThumb2Function() &&
2296       (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2297     SavedRegs.set(ARM::R4);
2298 
2299   // If a stack probe will be emitted, spill R4 and LR, since they are
2300   // clobbered by the stack probe call.
2301   // This estimate should be a safe, conservative estimate. The actual
2302   // stack probe is enabled based on the size of the local objects;
2303   // this estimate also includes the varargs store size.
2304   if (STI.isTargetWindows() &&
2305       WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
2306     SavedRegs.set(ARM::R4);
2307     SavedRegs.set(ARM::LR);
2308   }
2309 
2310   if (AFI->isThumb1OnlyFunction()) {
2311     // Spill LR if Thumb1 function uses variable length argument lists.
2312     if (AFI->getArgRegsSaveSize() > 0)
2313       SavedRegs.set(ARM::LR);
2314 
2315     // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2316     // requires stack alignment.  We don't know for sure what the stack size
2317     // will be, but for this, an estimate is good enough. If there anything
2318     // changes it, it'll be a spill, which implies we've used all the registers
2319     // and so R4 is already used, so not marking it here will be OK.
2320     // FIXME: It will be better just to find spare register here.
2321     if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2322         MFI.estimateStackSize(MF) > 508)
2323       SavedRegs.set(ARM::R4);
2324   }
2325 
2326   // See if we can spill vector registers to aligned stack.
2327   checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2328 
2329   // Spill the BasePtr if it's used.
2330   if (RegInfo->hasBasePointer(MF))
2331     SavedRegs.set(RegInfo->getBaseRegister());
2332 
2333   // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2334   if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2335     CanEliminateFrame = false;
2336 
2337   // Don't spill FP if the frame can be eliminated. This is determined
2338   // by scanning the callee-save registers to see if any is modified.
2339   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2340   for (unsigned i = 0; CSRegs[i]; ++i) {
2341     unsigned Reg = CSRegs[i];
2342     bool Spilled = false;
2343     if (SavedRegs.test(Reg)) {
2344       Spilled = true;
2345       CanEliminateFrame = false;
2346     }
2347 
2348     if (!ARM::GPRRegClass.contains(Reg)) {
2349       if (Spilled) {
2350         if (ARM::SPRRegClass.contains(Reg))
2351           NumFPRSpills++;
2352         else if (ARM::DPRRegClass.contains(Reg))
2353           NumFPRSpills += 2;
2354         else if (ARM::QPRRegClass.contains(Reg))
2355           NumFPRSpills += 4;
2356       }
2357       continue;
2358     }
2359 
2360     if (Spilled) {
2361       NumGPRSpills++;
2362 
2363       if (!STI.splitFramePushPop(MF)) {
2364         if (Reg == ARM::LR)
2365           LRSpilled = true;
2366         CS1Spilled = true;
2367         continue;
2368       }
2369 
2370       // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2371       switch (Reg) {
2372       case ARM::LR:
2373         LRSpilled = true;
2374         [[fallthrough]];
2375       case ARM::R0: case ARM::R1:
2376       case ARM::R2: case ARM::R3:
2377       case ARM::R4: case ARM::R5:
2378       case ARM::R6: case ARM::R7:
2379         CS1Spilled = true;
2380         break;
2381       default:
2382         break;
2383       }
2384     } else {
2385       if (!STI.splitFramePushPop(MF)) {
2386         UnspilledCS1GPRs.push_back(Reg);
2387         continue;
2388       }
2389 
2390       switch (Reg) {
2391       case ARM::R0: case ARM::R1:
2392       case ARM::R2: case ARM::R3:
2393       case ARM::R4: case ARM::R5:
2394       case ARM::R6: case ARM::R7:
2395       case ARM::LR:
2396         UnspilledCS1GPRs.push_back(Reg);
2397         break;
2398       default:
2399         UnspilledCS2GPRs.push_back(Reg);
2400         break;
2401       }
2402     }
2403   }
2404 
2405   bool ForceLRSpill = false;
2406   if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2407     unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2408     // Force LR to be spilled if the Thumb function size is > 2048. This enables
2409     // use of BL to implement far jump.
2410     if (FnSize >= (1 << 11)) {
2411       CanEliminateFrame = false;
2412       ForceLRSpill = true;
2413     }
2414   }
2415 
2416   // If any of the stack slot references may be out of range of an immediate
2417   // offset, make sure a register (or a spill slot) is available for the
2418   // register scavenger. Note that if we're indexing off the frame pointer, the
2419   // effective stack size is 4 bytes larger since the FP points to the stack
2420   // slot of the previous FP. Also, if we have variable sized objects in the
2421   // function, stack slot references will often be negative, and some of
2422   // our instructions are positive-offset only, so conservatively consider
2423   // that case to want a spill slot (or register) as well. Similarly, if
2424   // the function adjusts the stack pointer during execution and the
2425   // adjustments aren't already part of our stack size estimate, our offset
2426   // calculations may be off, so be conservative.
2427   // FIXME: We could add logic to be more precise about negative offsets
2428   //        and which instructions will need a scratch register for them. Is it
2429   //        worth the effort and added fragility?
2430   unsigned EstimatedStackSize =
2431       MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2432 
2433   // Determine biggest (positive) SP offset in MachineFrameInfo.
2434   int MaxFixedOffset = 0;
2435   for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2436     int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
2437     MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
2438   }
2439 
2440   bool HasFP = hasFP(MF);
2441   if (HasFP) {
2442     if (AFI->hasStackFrame())
2443       EstimatedStackSize += 4;
2444   } else {
2445     // If FP is not used, SP will be used to access arguments, so count the
2446     // size of arguments into the estimation.
2447     EstimatedStackSize += MaxFixedOffset;
2448   }
2449   EstimatedStackSize += 16; // For possible paddings.
2450 
2451   unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2452   bool HasNonSPFrameIndex = false;
2453   if (AFI->isThumb1OnlyFunction()) {
2454     // For Thumb1, don't bother to iterate over the function. The only
2455     // instruction that requires an emergency spill slot is a store to a
2456     // frame index.
2457     //
2458     // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2459     // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2460     // a 5-bit unsigned immediate.
2461     //
2462     // We could try to check if the function actually contains a tSTRspi
2463     // that might need the spill slot, but it's not really important.
2464     // Functions with VLAs or extremely large call frames are rare, and
2465     // if a function is allocating more than 1KB of stack, an extra 4-byte
2466     // slot probably isn't relevant.
2467     //
2468     // A special case is the scenario where r11 is used as FP, where accesses
2469     // to a frame index will require its value to be moved into a low reg.
2470     // This is handled later on, once we are able to determine if we have any
2471     // fp-relative accesses.
2472     if (RegInfo->hasBasePointer(MF))
2473       EstimatedRSStackSizeLimit = (1U << 5) * 4;
2474     else
2475       EstimatedRSStackSizeLimit = (1U << 8) * 4;
2476     EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2477   } else {
2478     EstimatedRSStackSizeLimit =
2479         estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
2480     EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2481   }
2482   // Final estimate of whether sp or bp-relative accesses might require
2483   // scavenging.
2484   bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2485 
2486   // If the stack pointer moves and we don't have a base pointer, the
2487   // estimate logic doesn't work. The actual offsets might be larger when
2488   // we're constructing a call frame, or we might need to use negative
2489   // offsets from fp.
2490   bool HasMovingSP = MFI.hasVarSizedObjects() ||
2491     (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2492   bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2493 
2494   // If we have a frame pointer, we assume arguments will be accessed
2495   // relative to the frame pointer. Check whether fp-relative accesses to
2496   // arguments require scavenging.
2497   //
2498   // We could do slightly better on Thumb1; in some cases, an sp-relative
2499   // offset would be legal even though an fp-relative offset is not.
2500   int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
2501   bool HasLargeArgumentList =
2502       HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2503 
2504   bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2505                          HasLargeArgumentList || HasNonSPFrameIndex;
2506   LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2507                     << "; EstimatedStack: " << EstimatedStackSize
2508                     << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2509                     << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2510   if (BigFrameOffsets ||
2511       !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2512     AFI->setHasStackFrame(true);
2513 
2514     if (HasFP) {
2515       SavedRegs.set(FramePtr);
2516       // If the frame pointer is required by the ABI, also spill LR so that we
2517       // emit a complete frame record.
2518       if ((requiresAAPCSFrameRecord(MF) ||
2519            MF.getTarget().Options.DisableFramePointerElim(MF)) &&
2520           !LRSpilled) {
2521         SavedRegs.set(ARM::LR);
2522         LRSpilled = true;
2523         NumGPRSpills++;
2524         auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
2525         if (LRPos != UnspilledCS1GPRs.end())
2526           UnspilledCS1GPRs.erase(LRPos);
2527       }
2528       auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
2529       if (FPPos != UnspilledCS1GPRs.end())
2530         UnspilledCS1GPRs.erase(FPPos);
2531       NumGPRSpills++;
2532       if (FramePtr == ARM::R7)
2533         CS1Spilled = true;
2534     }
2535 
2536     // This is true when we inserted a spill for a callee-save GPR which is
2537     // not otherwise used by the function. This guaranteees it is possible
2538     // to scavenge a register to hold the address of a stack slot. On Thumb1,
2539     // the register must be a valid operand to tSTRi, i.e. r4-r7. For other
2540     // subtargets, this is any GPR, i.e. r4-r11 or lr.
2541     //
2542     // If we don't insert a spill, we instead allocate an emergency spill
2543     // slot, which can be used by scavenging to spill an arbitrary register.
2544     //
2545     // We currently don't try to figure out whether any specific instruction
2546     // requires scavening an additional register.
2547     bool ExtraCSSpill = false;
2548 
2549     if (AFI->isThumb1OnlyFunction()) {
2550       // For Thumb1-only targets, we need some low registers when we save and
2551       // restore the high registers (which aren't allocatable, but could be
2552       // used by inline assembly) because the push/pop instructions can not
2553       // access high registers. If necessary, we might need to push more low
2554       // registers to ensure that there is at least one free that can be used
2555       // for the saving & restoring, and preferably we should ensure that as
2556       // many as are needed are available so that fewer push/pop instructions
2557       // are required.
2558 
2559       // Low registers which are not currently pushed, but could be (r4-r7).
2560       SmallVector<unsigned, 4> AvailableRegs;
2561 
2562       // Unused argument registers (r0-r3) can be clobbered in the prologue for
2563       // free.
2564       int EntryRegDeficit = 0;
2565       for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2566         if (!MF.getRegInfo().isLiveIn(Reg)) {
2567           --EntryRegDeficit;
2568           LLVM_DEBUG(dbgs()
2569                      << printReg(Reg, TRI)
2570                      << " is unused argument register, EntryRegDeficit = "
2571                      << EntryRegDeficit << "\n");
2572         }
2573       }
2574 
2575       // Unused return registers can be clobbered in the epilogue for free.
2576       int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2577       LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
2578                         << " return regs used, ExitRegDeficit = "
2579                         << ExitRegDeficit << "\n");
2580 
2581       int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
2582       LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2583 
2584       // r4-r6 can be used in the prologue if they are pushed by the first push
2585       // instruction.
2586       for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2587         if (SavedRegs.test(Reg)) {
2588           --RegDeficit;
2589           LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2590                             << " is saved low register, RegDeficit = "
2591                             << RegDeficit << "\n");
2592         } else {
2593           AvailableRegs.push_back(Reg);
2594           LLVM_DEBUG(
2595               dbgs()
2596               << printReg(Reg, TRI)
2597               << " is non-saved low register, adding to AvailableRegs\n");
2598         }
2599       }
2600 
2601       // r7 can be used if it is not being used as the frame pointer.
2602       if (!HasFP || FramePtr != ARM::R7) {
2603         if (SavedRegs.test(ARM::R7)) {
2604           --RegDeficit;
2605           LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2606                             << RegDeficit << "\n");
2607         } else {
2608           AvailableRegs.push_back(ARM::R7);
2609           LLVM_DEBUG(
2610               dbgs()
2611               << "%r7 is non-saved low register, adding to AvailableRegs\n");
2612         }
2613       }
2614 
2615       // Each of r8-r11 needs to be copied to a low register, then pushed.
2616       for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2617         if (SavedRegs.test(Reg)) {
2618           ++RegDeficit;
2619           LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2620                             << " is saved high register, RegDeficit = "
2621                             << RegDeficit << "\n");
2622         }
2623       }
2624 
2625       // LR can only be used by PUSH, not POP, and can't be used at all if the
2626       // llvm.returnaddress intrinsic is used. This is only worth doing if we
2627       // are more limited at function entry than exit.
2628       if ((EntryRegDeficit > ExitRegDeficit) &&
2629           !(MF.getRegInfo().isLiveIn(ARM::LR) &&
2630             MF.getFrameInfo().isReturnAddressTaken())) {
2631         if (SavedRegs.test(ARM::LR)) {
2632           --RegDeficit;
2633           LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2634                             << RegDeficit << "\n");
2635         } else {
2636           AvailableRegs.push_back(ARM::LR);
2637           LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2638         }
2639       }
2640 
2641       // If there are more high registers that need pushing than low registers
2642       // available, push some more low registers so that we can use fewer push
2643       // instructions. This might not reduce RegDeficit all the way to zero,
2644       // because we can only guarantee that r4-r6 are available, but r8-r11 may
2645       // need saving.
2646       LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2647       for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2648         unsigned Reg = AvailableRegs.pop_back_val();
2649         LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2650                           << " to make up reg deficit\n");
2651         SavedRegs.set(Reg);
2652         NumGPRSpills++;
2653         CS1Spilled = true;
2654         assert(!MRI.isReserved(Reg) && "Should not be reserved");
2655         if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2656           ExtraCSSpill = true;
2657         UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
2658         if (Reg == ARM::LR)
2659           LRSpilled = true;
2660       }
2661       LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2662                         << "\n");
2663     }
2664 
2665     // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2666     // restore LR in that case.
2667     bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2668 
2669     // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2670     // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2671     if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2672       SavedRegs.set(ARM::LR);
2673       NumGPRSpills++;
2674       SmallVectorImpl<unsigned>::iterator LRPos;
2675       LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2676       if (LRPos != UnspilledCS1GPRs.end())
2677         UnspilledCS1GPRs.erase(LRPos);
2678 
2679       ForceLRSpill = false;
2680       if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2681           !AFI->isThumb1OnlyFunction())
2682         ExtraCSSpill = true;
2683     }
2684 
2685     // If stack and double are 8-byte aligned and we are spilling an odd number
2686     // of GPRs, spill one extra callee save GPR so we won't have to pad between
2687     // the integer and double callee save areas.
2688     LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2689     const Align TargetAlign = getStackAlign();
2690     if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2691       if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2692         for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
2693           unsigned Reg = UnspilledCS1GPRs[i];
2694           // Don't spill high register if the function is thumb.  In the case of
2695           // Windows on ARM, accept R11 (frame pointer)
2696           if (!AFI->isThumbFunction() ||
2697               (STI.isTargetWindows() && Reg == ARM::R11) ||
2698               isARMLowRegister(Reg) ||
2699               (Reg == ARM::LR && !ExpensiveLRRestore)) {
2700             SavedRegs.set(Reg);
2701             LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2702                               << " to make up alignment\n");
2703             if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2704                 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2705               ExtraCSSpill = true;
2706             break;
2707           }
2708         }
2709       } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2710         unsigned Reg = UnspilledCS2GPRs.front();
2711         SavedRegs.set(Reg);
2712         LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2713                           << " to make up alignment\n");
2714         if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
2715           ExtraCSSpill = true;
2716       }
2717     }
2718 
2719     // Estimate if we might need to scavenge a register at some point in order
2720     // to materialize a stack offset. If so, either spill one additional
2721     // callee-saved register or reserve a special spill slot to facilitate
2722     // register scavenging. Thumb1 needs a spill slot for stack pointer
2723     // adjustments and for frame index accesses when FP is high register,
2724     // even when the frame itself is small.
2725     if (!ExtraCSSpill &&
2726         (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this))) {
2727       // If any non-reserved CS register isn't spilled, just spill one or two
2728       // extra. That should take care of it!
2729       unsigned NumExtras = TargetAlign.value() / 4;
2730       SmallVector<unsigned, 2> Extras;
2731       while (NumExtras && !UnspilledCS1GPRs.empty()) {
2732         unsigned Reg = UnspilledCS1GPRs.pop_back_val();
2733         if (!MRI.isReserved(Reg) &&
2734             (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
2735           Extras.push_back(Reg);
2736           NumExtras--;
2737         }
2738       }
2739       // For non-Thumb1 functions, also check for hi-reg CS registers
2740       if (!AFI->isThumb1OnlyFunction()) {
2741         while (NumExtras && !UnspilledCS2GPRs.empty()) {
2742           unsigned Reg = UnspilledCS2GPRs.pop_back_val();
2743           if (!MRI.isReserved(Reg)) {
2744             Extras.push_back(Reg);
2745             NumExtras--;
2746           }
2747         }
2748       }
2749       if (NumExtras == 0) {
2750         for (unsigned Reg : Extras) {
2751           SavedRegs.set(Reg);
2752           if (!MRI.isPhysRegUsed(Reg))
2753             ExtraCSSpill = true;
2754         }
2755       }
2756       if (!ExtraCSSpill && RS) {
2757         // Reserve a slot closest to SP or frame pointer.
2758         LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2759         const TargetRegisterClass &RC = ARM::GPRRegClass;
2760         unsigned Size = TRI->getSpillSize(RC);
2761         Align Alignment = TRI->getSpillAlign(RC);
2762         RS->addScavengingFrameIndex(
2763             MFI.CreateStackObject(Size, Alignment, false));
2764       }
2765     }
2766   }
2767 
2768   if (ForceLRSpill)
2769     SavedRegs.set(ARM::LR);
2770   AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
2771 }
2772 
2773 void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
2774                                       BitVector &SavedRegs) const {
2775   TargetFrameLowering::getCalleeSaves(MF, SavedRegs);
2776 
2777   // If we have the "returned" parameter attribute which guarantees that we
2778   // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
2779   // record that fact for IPRA.
2780   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2781   if (AFI->getPreservesR0())
2782     SavedRegs.set(ARM::R0);
2783 }
2784 
2785 bool ARMFrameLowering::assignCalleeSavedSpillSlots(
2786     MachineFunction &MF, const TargetRegisterInfo *TRI,
2787     std::vector<CalleeSavedInfo> &CSI) const {
2788   // For CMSE entry functions, handle floating-point context as if it was a
2789   // callee-saved register.
2790   if (STI.hasV8_1MMainlineOps() &&
2791       MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction()) {
2792     CSI.emplace_back(ARM::FPCXTNS);
2793     CSI.back().setRestored(false);
2794   }
2795 
2796   // For functions, which sign their return address, upon function entry, the
2797   // return address PAC is computed in R12. Treat R12 as a callee-saved register
2798   // in this case.
2799   const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
2800   if (AFI.shouldSignReturnAddress()) {
2801     // The order of register must match the order we push them, because the
2802     // PEI assigns frame indices in that order. When compiling for return
2803     // address sign and authenication, we use split push, therefore the orders
2804     // we want are:
2805     // LR, R7, R6, R5, R4, <R12>, R11, R10,  R9,  R8, D15-D8
2806     CSI.insert(find_if(CSI,
2807                        [=](const auto &CS) {
2808                          Register Reg = CS.getReg();
2809                          return Reg == ARM::R10 || Reg == ARM::R11 ||
2810                                 Reg == ARM::R8 || Reg == ARM::R9 ||
2811                                 ARM::DPRRegClass.contains(Reg);
2812                        }),
2813                CalleeSavedInfo(ARM::R12));
2814   }
2815 
2816   return false;
2817 }
2818 
2819 const TargetFrameLowering::SpillSlot *
2820 ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
2821   static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
2822   NumEntries = std::size(FixedSpillOffsets);
2823   return FixedSpillOffsets;
2824 }
2825 
2826 MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2827     MachineFunction &MF, MachineBasicBlock &MBB,
2828     MachineBasicBlock::iterator I) const {
2829   const ARMBaseInstrInfo &TII =
2830       *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2831   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2832   bool isARM = !AFI->isThumbFunction();
2833   DebugLoc dl = I->getDebugLoc();
2834   unsigned Opc = I->getOpcode();
2835   bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
2836   unsigned CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2837 
2838   assert(!AFI->isThumb1OnlyFunction() &&
2839          "This eliminateCallFramePseudoInstr does not support Thumb1!");
2840 
2841   int PIdx = I->findFirstPredOperandIdx();
2842   ARMCC::CondCodes Pred = (PIdx == -1)
2843                               ? ARMCC::AL
2844                               : (ARMCC::CondCodes)I->getOperand(PIdx).getImm();
2845   unsigned PredReg = TII.getFramePred(*I);
2846 
2847   if (!hasReservedCallFrame(MF)) {
2848     // Bail early if the callee is expected to do the adjustment.
2849     if (IsDestroy && CalleePopAmount != -1U)
2850       return MBB.erase(I);
2851 
2852     // If we have alloca, convert as follows:
2853     // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2854     // ADJCALLSTACKUP   -> add, sp, sp, amount
2855     unsigned Amount = TII.getFrameSize(*I);
2856     if (Amount != 0) {
2857       // We need to keep the stack aligned properly.  To do this, we round the
2858       // amount of space needed for the outgoing arguments up to the next
2859       // alignment boundary.
2860       Amount = alignSPAdjust(Amount);
2861 
2862       if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2863         emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2864                      Pred, PredReg);
2865       } else {
2866         assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2867         emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2868                      Pred, PredReg);
2869       }
2870     }
2871   } else if (CalleePopAmount != -1U) {
2872     // If the calling convention demands that the callee pops arguments from the
2873     // stack, we want to add it back if we have a reserved call frame.
2874     emitSPUpdate(isARM, MBB, I, dl, TII, -CalleePopAmount,
2875                  MachineInstr::NoFlags, Pred, PredReg);
2876   }
2877   return MBB.erase(I);
2878 }
2879 
2880 /// Get the minimum constant for ARM that is greater than or equal to the
2881 /// argument. In ARM, constants can have any value that can be produced by
2882 /// rotating an 8-bit value to the right by an even number of bits within a
2883 /// 32-bit word.
2884 static uint32_t alignToARMConstant(uint32_t Value) {
2885   unsigned Shifted = 0;
2886 
2887   if (Value == 0)
2888       return 0;
2889 
2890   while (!(Value & 0xC0000000)) {
2891       Value = Value << 2;
2892       Shifted += 2;
2893   }
2894 
2895   bool Carry = (Value & 0x00FFFFFF);
2896   Value = ((Value & 0xFF000000) >> 24) + Carry;
2897 
2898   if (Value & 0x0000100)
2899       Value = Value & 0x000001FC;
2900 
2901   if (Shifted > 24)
2902       Value = Value >> (Shifted - 24);
2903   else
2904       Value = Value << (24 - Shifted);
2905 
2906   return Value;
2907 }
2908 
2909 // The stack limit in the TCB is set to this many bytes above the actual
2910 // stack limit.
2911 static const uint64_t kSplitStackAvailable = 256;
2912 
2913 // Adjust the function prologue to enable split stacks. This currently only
2914 // supports android and linux.
2915 //
2916 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2917 // must be well defined in order to allow for consistent implementations of the
2918 // __morestack helper function. The ABI is also not a normal ABI in that it
2919 // doesn't follow the normal calling conventions because this allows the
2920 // prologue of each function to be optimized further.
2921 //
2922 // Currently, the ABI looks like (when calling __morestack)
2923 //
2924 //  * r4 holds the minimum stack size requested for this function call
2925 //  * r5 holds the stack size of the arguments to the function
2926 //  * the beginning of the function is 3 instructions after the call to
2927 //    __morestack
2928 //
2929 // Implementations of __morestack should use r4 to allocate a new stack, r5 to
2930 // place the arguments on to the new stack, and the 3-instruction knowledge to
2931 // jump directly to the body of the function when working on the new stack.
2932 //
2933 // An old (and possibly no longer compatible) implementation of __morestack for
2934 // ARM can be found at [1].
2935 //
2936 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2937 void ARMFrameLowering::adjustForSegmentedStacks(
2938     MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2939   unsigned Opcode;
2940   unsigned CFIIndex;
2941   const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2942   bool Thumb = ST->isThumb();
2943   bool Thumb2 = ST->isThumb2();
2944 
2945   // Sadly, this currently doesn't support varargs, platforms other than
2946   // android/linux. Note that thumb1/thumb2 are support for android/linux.
2947   if (MF.getFunction().isVarArg())
2948     report_fatal_error("Segmented stacks do not support vararg functions.");
2949   if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2950     report_fatal_error("Segmented stacks not supported on this platform.");
2951 
2952   MachineFrameInfo &MFI = MF.getFrameInfo();
2953   MachineModuleInfo &MMI = MF.getMMI();
2954   MCContext &Context = MMI.getContext();
2955   const MCRegisterInfo *MRI = Context.getRegisterInfo();
2956   const ARMBaseInstrInfo &TII =
2957       *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2958   ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
2959   DebugLoc DL;
2960 
2961   if (!MFI.needsSplitStackProlog())
2962     return;
2963 
2964   uint64_t StackSize = MFI.getStackSize();
2965 
2966   // Use R4 and R5 as scratch registers.
2967   // We save R4 and R5 before use and restore them before leaving the function.
2968   unsigned ScratchReg0 = ARM::R4;
2969   unsigned ScratchReg1 = ARM::R5;
2970   uint64_t AlignedStackSize;
2971 
2972   MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
2973   MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
2974   MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
2975   MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
2976   MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
2977 
2978   // Grab everything that reaches PrologueMBB to update there liveness as well.
2979   SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
2980   SmallVector<MachineBasicBlock *, 2> WalkList;
2981   WalkList.push_back(&PrologueMBB);
2982 
2983   do {
2984     MachineBasicBlock *CurMBB = WalkList.pop_back_val();
2985     for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
2986       if (BeforePrologueRegion.insert(PredBB).second)
2987         WalkList.push_back(PredBB);
2988     }
2989   } while (!WalkList.empty());
2990 
2991   // The order in that list is important.
2992   // The blocks will all be inserted before PrologueMBB using that order.
2993   // Therefore the block that should appear first in the CFG should appear
2994   // first in the list.
2995   MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
2996                                       PostStackMBB};
2997 
2998   for (MachineBasicBlock *B : AddedBlocks)
2999     BeforePrologueRegion.insert(B);
3000 
3001   for (const auto &LI : PrologueMBB.liveins()) {
3002     for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3003       PredBB->addLiveIn(LI);
3004   }
3005 
3006   // Remove the newly added blocks from the list, since we know
3007   // we do not have to do the following updates for them.
3008   for (MachineBasicBlock *B : AddedBlocks) {
3009     BeforePrologueRegion.erase(B);
3010     MF.insert(PrologueMBB.getIterator(), B);
3011   }
3012 
3013   for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3014     // Make sure the LiveIns are still sorted and unique.
3015     MBB->sortUniqueLiveIns();
3016     // Replace the edges to PrologueMBB by edges to the sequences
3017     // we are about to add, but only update for immediate predecessors.
3018     if (MBB->isSuccessor(&PrologueMBB))
3019       MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
3020   }
3021 
3022   // The required stack size that is aligned to ARM constant criterion.
3023   AlignedStackSize = alignToARMConstant(StackSize);
3024 
3025   // When the frame size is less than 256 we just compare the stack
3026   // boundary directly to the value of the stack pointer, per gcc.
3027   bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3028 
3029   // We will use two of the callee save registers as scratch registers so we
3030   // need to save those registers onto the stack.
3031   // We will use SR0 to hold stack limit and SR1 to hold the stack size
3032   // requested and arguments for __morestack().
3033   // SR0: Scratch Register #0
3034   // SR1: Scratch Register #1
3035   // push {SR0, SR1}
3036   if (Thumb) {
3037     BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
3038         .add(predOps(ARMCC::AL))
3039         .addReg(ScratchReg0)
3040         .addReg(ScratchReg1);
3041   } else {
3042     BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
3043         .addReg(ARM::SP, RegState::Define)
3044         .addReg(ARM::SP)
3045         .add(predOps(ARMCC::AL))
3046         .addReg(ScratchReg0)
3047         .addReg(ScratchReg1);
3048   }
3049 
3050   // Emit the relevant DWARF information about the change in stack pointer as
3051   // well as where to find both r4 and r5 (the callee-save registers)
3052   if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3053     CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
3054     BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3055         .addCFIIndex(CFIIndex);
3056     CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
3057         nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
3058     BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3059         .addCFIIndex(CFIIndex);
3060     CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
3061         nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
3062     BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3063         .addCFIIndex(CFIIndex);
3064   }
3065 
3066   // mov SR1, sp
3067   if (Thumb) {
3068     BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
3069         .addReg(ARM::SP)
3070         .add(predOps(ARMCC::AL));
3071   } else if (CompareStackPointer) {
3072     BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
3073         .addReg(ARM::SP)
3074         .add(predOps(ARMCC::AL))
3075         .add(condCodeOp());
3076   }
3077 
3078   // sub SR1, sp, #StackSize
3079   if (!CompareStackPointer && Thumb) {
3080     if (AlignedStackSize < 256) {
3081       BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
3082           .add(condCodeOp())
3083           .addReg(ScratchReg1)
3084           .addImm(AlignedStackSize)
3085           .add(predOps(ARMCC::AL));
3086     } else {
3087       if (Thumb2) {
3088         BuildMI(McrMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0)
3089             .addImm(AlignedStackSize);
3090       } else {
3091         auto MBBI = McrMBB->end();
3092         auto RegInfo = STI.getRegisterInfo();
3093         RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3094                                    AlignedStackSize);
3095       }
3096       BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
3097           .add(condCodeOp())
3098           .addReg(ScratchReg1)
3099           .addReg(ScratchReg0)
3100           .add(predOps(ARMCC::AL));
3101     }
3102   } else if (!CompareStackPointer) {
3103     if (AlignedStackSize < 256) {
3104       BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
3105           .addReg(ARM::SP)
3106           .addImm(AlignedStackSize)
3107           .add(predOps(ARMCC::AL))
3108           .add(condCodeOp());
3109     } else {
3110       auto MBBI = McrMBB->end();
3111       auto RegInfo = STI.getRegisterInfo();
3112       RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3113                                  AlignedStackSize);
3114       BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
3115           .addReg(ARM::SP)
3116           .addReg(ScratchReg0)
3117           .add(predOps(ARMCC::AL))
3118           .add(condCodeOp());
3119     }
3120   }
3121 
3122   if (Thumb && ST->isThumb1Only()) {
3123     unsigned PCLabelId = ARMFI->createPICLabelUId();
3124     ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
3125         MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
3126     MachineConstantPool *MCP = MF.getConstantPool();
3127     unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
3128 
3129     // ldr SR0, [pc, offset(STACK_LIMIT)]
3130     BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
3131         .addConstantPoolIndex(CPI)
3132         .add(predOps(ARMCC::AL));
3133 
3134     // ldr SR0, [SR0]
3135     BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
3136         .addReg(ScratchReg0)
3137         .addImm(0)
3138         .add(predOps(ARMCC::AL));
3139   } else {
3140     // Get TLS base address from the coprocessor
3141     // mrc p15, #0, SR0, c13, c0, #3
3142     BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
3143             ScratchReg0)
3144         .addImm(15)
3145         .addImm(0)
3146         .addImm(13)
3147         .addImm(0)
3148         .addImm(3)
3149         .add(predOps(ARMCC::AL));
3150 
3151     // Use the last tls slot on android and a private field of the TCP on linux.
3152     assert(ST->isTargetAndroid() || ST->isTargetLinux());
3153     unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3154 
3155     // Get the stack limit from the right offset
3156     // ldr SR0, [sr0, #4 * TlsOffset]
3157     BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3158             ScratchReg0)
3159         .addReg(ScratchReg0)
3160         .addImm(4 * TlsOffset)
3161         .add(predOps(ARMCC::AL));
3162   }
3163 
3164   // Compare stack limit with stack size requested.
3165   // cmp SR0, SR1
3166   Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3167   BuildMI(GetMBB, DL, TII.get(Opcode))
3168       .addReg(ScratchReg0)
3169       .addReg(ScratchReg1)
3170       .add(predOps(ARMCC::AL));
3171 
3172   // This jump is taken if StackLimit < SP - stack required.
3173   Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3174   BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
3175        .addImm(ARMCC::LO)
3176        .addReg(ARM::CPSR);
3177 
3178 
3179   // Calling __morestack(StackSize, Size of stack arguments).
3180   // __morestack knows that the stack size requested is in SR0(r4)
3181   // and amount size of stack arguments is in SR1(r5).
3182 
3183   // Pass first argument for the __morestack by Scratch Register #0.
3184   //   The amount size of stack required
3185   if (Thumb) {
3186     if (AlignedStackSize < 256) {
3187       BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
3188           .add(condCodeOp())
3189           .addImm(AlignedStackSize)
3190           .add(predOps(ARMCC::AL));
3191     } else {
3192       if (Thumb2) {
3193         BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0)
3194             .addImm(AlignedStackSize);
3195       } else {
3196         auto MBBI = AllocMBB->end();
3197         auto RegInfo = STI.getRegisterInfo();
3198         RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3199                                    AlignedStackSize);
3200       }
3201     }
3202   } else {
3203     if (AlignedStackSize < 256) {
3204       BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
3205           .addImm(AlignedStackSize)
3206           .add(predOps(ARMCC::AL))
3207           .add(condCodeOp());
3208     } else {
3209       auto MBBI = AllocMBB->end();
3210       auto RegInfo = STI.getRegisterInfo();
3211       RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3212                                  AlignedStackSize);
3213     }
3214   }
3215 
3216   // Pass second argument for the __morestack by Scratch Register #1.
3217   //   The amount size of stack consumed to save function arguments.
3218   if (Thumb) {
3219     if (ARMFI->getArgumentStackSize() < 256) {
3220       BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
3221           .add(condCodeOp())
3222           .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
3223           .add(predOps(ARMCC::AL));
3224     } else {
3225       if (Thumb2) {
3226         BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg1)
3227             .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()));
3228       } else {
3229         auto MBBI = AllocMBB->end();
3230         auto RegInfo = STI.getRegisterInfo();
3231         RegInfo->emitLoadConstPool(
3232             *AllocMBB, MBBI, DL, ScratchReg1, 0,
3233             alignToARMConstant(ARMFI->getArgumentStackSize()));
3234       }
3235     }
3236   } else {
3237     if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {
3238       BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
3239           .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
3240           .add(predOps(ARMCC::AL))
3241           .add(condCodeOp());
3242     } else {
3243       auto MBBI = AllocMBB->end();
3244       auto RegInfo = STI.getRegisterInfo();
3245       RegInfo->emitLoadConstPool(
3246           *AllocMBB, MBBI, DL, ScratchReg1, 0,
3247           alignToARMConstant(ARMFI->getArgumentStackSize()));
3248     }
3249   }
3250 
3251   // push {lr} - Save return address of this function.
3252   if (Thumb) {
3253     BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
3254         .add(predOps(ARMCC::AL))
3255         .addReg(ARM::LR);
3256   } else {
3257     BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
3258         .addReg(ARM::SP, RegState::Define)
3259         .addReg(ARM::SP)
3260         .add(predOps(ARMCC::AL))
3261         .addReg(ARM::LR);
3262   }
3263 
3264   // Emit the DWARF info about the change in stack as well as where to find the
3265   // previous link register
3266   if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3267     CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
3268     BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3269         .addCFIIndex(CFIIndex);
3270     CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
3271         nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
3272     BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3273         .addCFIIndex(CFIIndex);
3274   }
3275 
3276   // Call __morestack().
3277   if (Thumb) {
3278     BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
3279         .add(predOps(ARMCC::AL))
3280         .addExternalSymbol("__morestack");
3281   } else {
3282     BuildMI(AllocMBB, DL, TII.get(ARM::BL))
3283         .addExternalSymbol("__morestack");
3284   }
3285 
3286   // pop {lr} - Restore return address of this original function.
3287   if (Thumb) {
3288     if (ST->isThumb1Only()) {
3289       BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3290           .add(predOps(ARMCC::AL))
3291           .addReg(ScratchReg0);
3292       BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
3293           .addReg(ScratchReg0)
3294           .add(predOps(ARMCC::AL));
3295     } else {
3296       BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
3297           .addReg(ARM::LR, RegState::Define)
3298           .addReg(ARM::SP, RegState::Define)
3299           .addReg(ARM::SP)
3300           .addImm(4)
3301           .add(predOps(ARMCC::AL));
3302     }
3303   } else {
3304     BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3305         .addReg(ARM::SP, RegState::Define)
3306         .addReg(ARM::SP)
3307         .add(predOps(ARMCC::AL))
3308         .addReg(ARM::LR);
3309   }
3310 
3311   // Restore SR0 and SR1 in case of __morestack() was called.
3312   // __morestack() will skip PostStackMBB block so we need to restore
3313   // scratch registers from here.
3314   // pop {SR0, SR1}
3315   if (Thumb) {
3316     BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3317         .add(predOps(ARMCC::AL))
3318         .addReg(ScratchReg0)
3319         .addReg(ScratchReg1);
3320   } else {
3321     BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3322         .addReg(ARM::SP, RegState::Define)
3323         .addReg(ARM::SP)
3324         .add(predOps(ARMCC::AL))
3325         .addReg(ScratchReg0)
3326         .addReg(ScratchReg1);
3327   }
3328 
3329   // Update the CFA offset now that we've popped
3330   if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3331     CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3332     BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3333         .addCFIIndex(CFIIndex);
3334   }
3335 
3336   // Return from this function.
3337   BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
3338 
3339   // Restore SR0 and SR1 in case of __morestack() was not called.
3340   // pop {SR0, SR1}
3341   if (Thumb) {
3342     BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
3343         .add(predOps(ARMCC::AL))
3344         .addReg(ScratchReg0)
3345         .addReg(ScratchReg1);
3346   } else {
3347     BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
3348         .addReg(ARM::SP, RegState::Define)
3349         .addReg(ARM::SP)
3350         .add(predOps(ARMCC::AL))
3351         .addReg(ScratchReg0)
3352         .addReg(ScratchReg1);
3353   }
3354 
3355   // Update the CFA offset now that we've popped
3356   if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3357     CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3358     BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3359         .addCFIIndex(CFIIndex);
3360 
3361     // Tell debuggers that r4 and r5 are now the same as they were in the
3362     // previous function, that they're the "Same Value".
3363     CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
3364         nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
3365     BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3366         .addCFIIndex(CFIIndex);
3367     CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
3368         nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
3369     BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3370         .addCFIIndex(CFIIndex);
3371   }
3372 
3373   // Organizing MBB lists
3374   PostStackMBB->addSuccessor(&PrologueMBB);
3375 
3376   AllocMBB->addSuccessor(PostStackMBB);
3377 
3378   GetMBB->addSuccessor(PostStackMBB);
3379   GetMBB->addSuccessor(AllocMBB);
3380 
3381   McrMBB->addSuccessor(GetMBB);
3382 
3383   PrevStackMBB->addSuccessor(McrMBB);
3384 
3385 #ifdef EXPENSIVE_CHECKS
3386   MF.verify();
3387 #endif
3388 }
3389