1 //===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the ARM implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 //
13 // This file contains the ARM implementation of TargetFrameLowering class.
14 //
15 // On ARM, stack frames are structured as follows:
16 //
17 // The stack grows downward.
18 //
19 // All of the individual frame areas on the frame below are optional, i.e. it's
20 // possible to create a function so that the particular area isn't present
21 // in the frame.
22 //
23 // At function entry, the "frame" looks as follows:
24 //
25 // | | Higher address
26 // |-----------------------------------|
27 // | |
28 // | arguments passed on the stack |
29 // | |
30 // |-----------------------------------| <- sp
31 // | | Lower address
32 //
33 //
34 // After the prologue has run, the frame has the following general structure.
35 // Technically the last frame area (VLAs) doesn't get created until in the
36 // main function body, after the prologue is run. However, it's depicted here
37 // for completeness.
38 //
39 // | | Higher address
40 // |-----------------------------------|
41 // | |
42 // | arguments passed on the stack |
43 // | |
44 // |-----------------------------------| <- (sp at function entry)
45 // | |
46 // | varargs from registers |
47 // | |
48 // |-----------------------------------|
49 // | |
50 // | prev_lr |
51 // | prev_fp |
52 // | (a.k.a. "frame record") |
53 // | |
54 // |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55 // | |
56 // | callee-saved gpr registers |
57 // | |
58 // |-----------------------------------|
59 // | |
60 // | callee-saved fp/simd regs |
61 // | |
62 // |-----------------------------------|
63 // |.empty.space.to.make.part.below....|
64 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65 // |.the.standard.8-byte.alignment.....| compile time; if present)
66 // |-----------------------------------|
67 // | |
68 // | local variables of fixed size |
69 // | including spill slots |
70 // |-----------------------------------| <- base pointer (not defined by ABI,
71 // |.variable-sized.local.variables....| LLVM chooses r6)
72 // |.(VLAs)............................| (size of this area is unknown at
73 // |...................................| compile time)
74 // |-----------------------------------| <- sp
75 // | | Lower address
76 //
77 //
78 // To access the data in a frame, at-compile time, a constant offset must be
79 // computable from one of the pointers (fp, bp, sp) to access it. The size
80 // of the areas with a dotted background cannot be computed at compile-time
81 // if they are present, making it required to have all three of fp, bp and
82 // sp to be set up to be able to access all contents in the frame areas,
83 // assuming all of the frame areas are non-empty.
84 //
85 // For most functions, some of the frame areas are empty. For those functions,
86 // it may not be necessary to set up fp or bp:
87 // * A base pointer is definitely needed when there are both VLAs and local
88 // variables with more-than-default alignment requirements.
89 // * A frame pointer is definitely needed when there are local variables with
90 // more-than-default alignment requirements.
91 //
92 // In some cases when a base pointer is not strictly needed, it is generated
93 // anyway when offsets from the frame pointer to access local variables become
94 // so large that the offset can't be encoded in the immediate fields of loads
95 // or stores.
96 //
97 // The frame pointer might be chosen to be r7 or r11, depending on the target
98 // architecture and operating system. See ARMSubtarget::getFramePointerReg for
99 // details.
100 //
101 // Outgoing function arguments must be at the bottom of the stack frame when
102 // calling another function. If we do not have variable-sized stack objects, we
103 // can allocate a "reserved call frame" area at the bottom of the local
104 // variable area, large enough for all outgoing calls. If we do have VLAs, then
105 // the stack pointer must be decremented and incremented around each call to
106 // make space for the arguments below the VLAs.
107 //
108 //===----------------------------------------------------------------------===//
109
110 #include "ARMFrameLowering.h"
111 #include "ARMBaseInstrInfo.h"
112 #include "ARMBaseRegisterInfo.h"
113 #include "ARMConstantPoolValue.h"
114 #include "ARMMachineFunctionInfo.h"
115 #include "ARMSubtarget.h"
116 #include "MCTargetDesc/ARMAddressingModes.h"
117 #include "MCTargetDesc/ARMBaseInfo.h"
118 #include "Utils/ARMBaseInfo.h"
119 #include "llvm/ADT/BitVector.h"
120 #include "llvm/ADT/STLExtras.h"
121 #include "llvm/ADT/SmallPtrSet.h"
122 #include "llvm/ADT/SmallVector.h"
123 #include "llvm/CodeGen/MachineBasicBlock.h"
124 #include "llvm/CodeGen/MachineConstantPool.h"
125 #include "llvm/CodeGen/MachineFrameInfo.h"
126 #include "llvm/CodeGen/MachineFunction.h"
127 #include "llvm/CodeGen/MachineInstr.h"
128 #include "llvm/CodeGen/MachineInstrBuilder.h"
129 #include "llvm/CodeGen/MachineJumpTableInfo.h"
130 #include "llvm/CodeGen/MachineModuleInfo.h"
131 #include "llvm/CodeGen/MachineOperand.h"
132 #include "llvm/CodeGen/MachineRegisterInfo.h"
133 #include "llvm/CodeGen/RegisterScavenging.h"
134 #include "llvm/CodeGen/TargetInstrInfo.h"
135 #include "llvm/CodeGen/TargetOpcodes.h"
136 #include "llvm/CodeGen/TargetRegisterInfo.h"
137 #include "llvm/CodeGen/TargetSubtargetInfo.h"
138 #include "llvm/IR/Attributes.h"
139 #include "llvm/IR/CallingConv.h"
140 #include "llvm/IR/DebugLoc.h"
141 #include "llvm/IR/Function.h"
142 #include "llvm/MC/MCAsmInfo.h"
143 #include "llvm/MC/MCContext.h"
144 #include "llvm/MC/MCDwarf.h"
145 #include "llvm/MC/MCInstrDesc.h"
146 #include "llvm/MC/MCRegisterInfo.h"
147 #include "llvm/Support/CodeGen.h"
148 #include "llvm/Support/CommandLine.h"
149 #include "llvm/Support/Compiler.h"
150 #include "llvm/Support/Debug.h"
151 #include "llvm/Support/ErrorHandling.h"
152 #include "llvm/Support/MathExtras.h"
153 #include "llvm/Support/raw_ostream.h"
154 #include "llvm/Target/TargetMachine.h"
155 #include "llvm/Target/TargetOptions.h"
156 #include <algorithm>
157 #include <cassert>
158 #include <cstddef>
159 #include <cstdint>
160 #include <iterator>
161 #include <utility>
162 #include <vector>
163
164 #define DEBUG_TYPE "arm-frame-lowering"
165
166 using namespace llvm;
167
168 static cl::opt<bool>
169 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
170 cl::desc("Align ARM NEON spills in prolog and epilog"));
171
172 static MachineBasicBlock::iterator
173 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
174 unsigned NumAlignedDPRCS2Regs);
175
ARMFrameLowering(const ARMSubtarget & sti)176 ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
177 : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
178 STI(sti) {}
179
keepFramePointer(const MachineFunction & MF) const180 bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {
181 // iOS always has a FP for backtracking, force other targets to keep their FP
182 // when doing FastISel. The emitted code is currently superior, and in cases
183 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
184 return MF.getSubtarget<ARMSubtarget>().useFastISel();
185 }
186
187 /// Returns true if the target can safely skip saving callee-saved registers
188 /// for noreturn nounwind functions.
enableCalleeSaveSkip(const MachineFunction & MF) const189 bool ARMFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
190 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
191 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
192 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
193
194 // Frame pointer and link register are not treated as normal CSR, thus we
195 // can always skip CSR saves for nonreturning functions.
196 return true;
197 }
198
199 /// hasFP - Return true if the specified function should have a dedicated frame
200 /// pointer register. This is true if the function has variable sized allocas
201 /// or if frame pointer elimination is disabled.
hasFP(const MachineFunction & MF) const202 bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
203 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
204 const MachineFrameInfo &MFI = MF.getFrameInfo();
205
206 // ABI-required frame pointer.
207 if (MF.getTarget().Options.DisableFramePointerElim(MF))
208 return true;
209
210 // Frame pointer required for use within this function.
211 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
212 MFI.isFrameAddressTaken());
213 }
214
215 /// isFPReserved - Return true if the frame pointer register should be
216 /// considered a reserved register on the scope of the specified function.
isFPReserved(const MachineFunction & MF) const217 bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const {
218 return hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF);
219 }
220
221 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
222 /// not required, we reserve argument space for call sites in the function
223 /// immediately on entry to the current function. This eliminates the need for
224 /// add/sub sp brackets around call sites. Returns true if the call frame is
225 /// included as part of the stack frame.
hasReservedCallFrame(const MachineFunction & MF) const226 bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
227 const MachineFrameInfo &MFI = MF.getFrameInfo();
228 unsigned CFSize = MFI.getMaxCallFrameSize();
229 // It's not always a good idea to include the call frame as part of the
230 // stack frame. ARM (especially Thumb) has small immediate offset to
231 // address the stack frame. So a large call frame can cause poor codegen
232 // and may even makes it impossible to scavenge a register.
233 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
234 return false;
235
236 return !MFI.hasVarSizedObjects();
237 }
238
239 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
240 /// call frame pseudos can be simplified. Unlike most targets, having a FP
241 /// is not sufficient here since we still may reference some objects via SP
242 /// even when FP is available in Thumb2 mode.
243 bool
canSimplifyCallFramePseudos(const MachineFunction & MF) const244 ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
245 return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects();
246 }
247
248 // Returns how much of the incoming argument stack area we should clean up in an
249 // epilogue. For the C calling convention this will be 0, for guaranteed tail
250 // call conventions it can be positive (a normal return or a tail call to a
251 // function that uses less stack space for arguments) or negative (for a tail
252 // call to a function that needs more stack space than us for arguments).
getArgumentStackToRestore(MachineFunction & MF,MachineBasicBlock & MBB)253 static int getArgumentStackToRestore(MachineFunction &MF,
254 MachineBasicBlock &MBB) {
255 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
256 bool IsTailCallReturn = false;
257 if (MBB.end() != MBBI) {
258 unsigned RetOpcode = MBBI->getOpcode();
259 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
260 RetOpcode == ARM::TCRETURNri ||
261 RetOpcode == ARM::TCRETURNrinotr12;
262 }
263 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
264
265 int ArgumentPopSize = 0;
266 if (IsTailCallReturn) {
267 MachineOperand &StackAdjust = MBBI->getOperand(1);
268
269 // For a tail-call in a callee-pops-arguments environment, some or all of
270 // the stack may actually be in use for the call's arguments, this is
271 // calculated during LowerCall and consumed here...
272 ArgumentPopSize = StackAdjust.getImm();
273 } else {
274 // ... otherwise the amount to pop is *all* of the argument space,
275 // conveniently stored in the MachineFunctionInfo by
276 // LowerFormalArguments. This will, of course, be zero for the C calling
277 // convention.
278 ArgumentPopSize = AFI->getArgumentStackToRestore();
279 }
280
281 return ArgumentPopSize;
282 }
283
needsWinCFI(const MachineFunction & MF)284 static bool needsWinCFI(const MachineFunction &MF) {
285 const Function &F = MF.getFunction();
286 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
287 F.needsUnwindTableEntry();
288 }
289
290 // Given a load or a store instruction, generate an appropriate unwinding SEH
291 // code on Windows.
insertSEH(MachineBasicBlock::iterator MBBI,const TargetInstrInfo & TII,unsigned Flags)292 static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
293 const TargetInstrInfo &TII,
294 unsigned Flags) {
295 unsigned Opc = MBBI->getOpcode();
296 MachineBasicBlock *MBB = MBBI->getParent();
297 MachineFunction &MF = *MBB->getParent();
298 DebugLoc DL = MBBI->getDebugLoc();
299 MachineInstrBuilder MIB;
300 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
301 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
302
303 Flags |= MachineInstr::NoMerge;
304
305 switch (Opc) {
306 default:
307 report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
308 break;
309 case ARM::t2ADDri: // add.w r11, sp, #xx
310 case ARM::t2ADDri12: // add.w r11, sp, #xx
311 case ARM::t2MOVTi16: // movt r4, #xx
312 case ARM::tBL: // bl __chkstk
313 // These are harmless if used for just setting up a frame pointer,
314 // but that frame pointer can't be relied upon for unwinding, unless
315 // set up with SEH_SaveSP.
316 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
317 .addImm(/*Wide=*/1)
318 .setMIFlags(Flags);
319 break;
320
321 case ARM::t2MOVi16: { // mov(w) r4, #xx
322 bool Wide = MBBI->getOperand(1).getImm() >= 256;
323 if (!Wide) {
324 MachineInstrBuilder NewInstr =
325 BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
326 NewInstr.add(MBBI->getOperand(0));
327 NewInstr.add(t1CondCodeOp(/*isDead=*/true));
328 for (MachineOperand &MO : llvm::drop_begin(MBBI->operands()))
329 NewInstr.add(MO);
330 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
331 MBB->erase(MBBI);
332 MBBI = NewMBBI;
333 }
334 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags);
335 break;
336 }
337
338 case ARM::tBLXr: // blx r12 (__chkstk)
339 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
340 .addImm(/*Wide=*/0)
341 .setMIFlags(Flags);
342 break;
343
344 case ARM::t2MOVi32imm: // movw+movt
345 // This pseudo instruction expands into two mov instructions. If the
346 // second operand is a symbol reference, this will stay as two wide
347 // instructions, movw+movt. If they're immediates, the first one can
348 // end up as a narrow mov though.
349 // As two SEH instructions are appended here, they won't get interleaved
350 // between the two final movw/movt instructions, but it doesn't make any
351 // practical difference.
352 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
353 .addImm(/*Wide=*/1)
354 .setMIFlags(Flags);
355 MBB->insertAfter(MBBI, MIB);
356 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
357 .addImm(/*Wide=*/1)
358 .setMIFlags(Flags);
359 break;
360
361 case ARM::t2STR_PRE:
362 if (MBBI->getOperand(0).getReg() == ARM::SP &&
363 MBBI->getOperand(2).getReg() == ARM::SP &&
364 MBBI->getOperand(3).getImm() == -4) {
365 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
366 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
367 .addImm(1ULL << Reg)
368 .addImm(/*Wide=*/1)
369 .setMIFlags(Flags);
370 } else {
371 report_fatal_error("No matching SEH Opcode for t2STR_PRE");
372 }
373 break;
374
375 case ARM::t2LDR_POST:
376 if (MBBI->getOperand(1).getReg() == ARM::SP &&
377 MBBI->getOperand(2).getReg() == ARM::SP &&
378 MBBI->getOperand(3).getImm() == 4) {
379 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
380 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
381 .addImm(1ULL << Reg)
382 .addImm(/*Wide=*/1)
383 .setMIFlags(Flags);
384 } else {
385 report_fatal_error("No matching SEH Opcode for t2LDR_POST");
386 }
387 break;
388
389 case ARM::t2LDMIA_RET:
390 case ARM::t2LDMIA_UPD:
391 case ARM::t2STMDB_UPD: {
392 unsigned Mask = 0;
393 bool Wide = false;
394 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
395 const MachineOperand &MO = MBBI->getOperand(i);
396 if (!MO.isReg() || MO.isImplicit())
397 continue;
398 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
399 if (Reg == 15)
400 Reg = 14;
401 if (Reg >= 8 && Reg <= 13)
402 Wide = true;
403 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
404 Wide = true;
405 Mask |= 1 << Reg;
406 }
407 if (!Wide) {
408 unsigned NewOpc;
409 switch (Opc) {
410 case ARM::t2LDMIA_RET:
411 NewOpc = ARM::tPOP_RET;
412 break;
413 case ARM::t2LDMIA_UPD:
414 NewOpc = ARM::tPOP;
415 break;
416 case ARM::t2STMDB_UPD:
417 NewOpc = ARM::tPUSH;
418 break;
419 default:
420 llvm_unreachable("");
421 }
422 MachineInstrBuilder NewInstr =
423 BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags());
424 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
425 NewInstr.add(MBBI->getOperand(i));
426 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
427 MBB->erase(MBBI);
428 MBBI = NewMBBI;
429 }
430 unsigned SEHOpc =
431 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
432 MIB = BuildMI(MF, DL, TII.get(SEHOpc))
433 .addImm(Mask)
434 .addImm(Wide ? 1 : 0)
435 .setMIFlags(Flags);
436 break;
437 }
438 case ARM::VSTMDDB_UPD:
439 case ARM::VLDMDIA_UPD: {
440 int First = -1, Last = 0;
441 for (const MachineOperand &MO : llvm::drop_begin(MBBI->operands(), 4)) {
442 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
443 if (First == -1)
444 First = Reg;
445 Last = Reg;
446 }
447 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
448 .addImm(First)
449 .addImm(Last)
450 .setMIFlags(Flags);
451 break;
452 }
453 case ARM::tSUBspi:
454 case ARM::tADDspi:
455 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
456 .addImm(MBBI->getOperand(2).getImm() * 4)
457 .addImm(/*Wide=*/0)
458 .setMIFlags(Flags);
459 break;
460 case ARM::t2SUBspImm:
461 case ARM::t2SUBspImm12:
462 case ARM::t2ADDspImm:
463 case ARM::t2ADDspImm12:
464 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
465 .addImm(MBBI->getOperand(2).getImm())
466 .addImm(/*Wide=*/1)
467 .setMIFlags(Flags);
468 break;
469
470 case ARM::tMOVr:
471 if (MBBI->getOperand(1).getReg() == ARM::SP &&
472 (Flags & MachineInstr::FrameSetup)) {
473 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
474 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
475 .addImm(Reg)
476 .setMIFlags(Flags);
477 } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
478 (Flags & MachineInstr::FrameDestroy)) {
479 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
480 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
481 .addImm(Reg)
482 .setMIFlags(Flags);
483 } else {
484 report_fatal_error("No SEH Opcode for MOV");
485 }
486 break;
487
488 case ARM::tBX_RET:
489 case ARM::TCRETURNri:
490 case ARM::TCRETURNrinotr12:
491 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
492 .addImm(/*Wide=*/0)
493 .setMIFlags(Flags);
494 break;
495
496 case ARM::TCRETURNdi:
497 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
498 .addImm(/*Wide=*/1)
499 .setMIFlags(Flags);
500 break;
501 }
502 return MBB->insertAfter(MBBI, MIB);
503 }
504
505 static MachineBasicBlock::iterator
initMBBRange(MachineBasicBlock & MBB,const MachineBasicBlock::iterator & MBBI)506 initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) {
507 if (MBBI == MBB.begin())
508 return MachineBasicBlock::iterator();
509 return std::prev(MBBI);
510 }
511
insertSEHRange(MachineBasicBlock & MBB,MachineBasicBlock::iterator Start,const MachineBasicBlock::iterator & End,const ARMBaseInstrInfo & TII,unsigned MIFlags)512 static void insertSEHRange(MachineBasicBlock &MBB,
513 MachineBasicBlock::iterator Start,
514 const MachineBasicBlock::iterator &End,
515 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
516 if (Start.isValid())
517 Start = std::next(Start);
518 else
519 Start = MBB.begin();
520
521 for (auto MI = Start; MI != End;) {
522 auto Next = std::next(MI);
523 // Check if this instruction already has got a SEH opcode added. In that
524 // case, don't do this generic mapping.
525 if (Next != End && isSEHInstruction(*Next)) {
526 MI = std::next(Next);
527 while (MI != End && isSEHInstruction(*MI))
528 ++MI;
529 continue;
530 }
531 insertSEH(MI, TII, MIFlags);
532 MI = Next;
533 }
534 }
535
emitRegPlusImmediate(bool isARM,MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,const DebugLoc & dl,const ARMBaseInstrInfo & TII,unsigned DestReg,unsigned SrcReg,int NumBytes,unsigned MIFlags=MachineInstr::NoFlags,ARMCC::CondCodes Pred=ARMCC::AL,unsigned PredReg=0)536 static void emitRegPlusImmediate(
537 bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
538 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
539 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
540 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
541 if (isARM)
542 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
543 Pred, PredReg, TII, MIFlags);
544 else
545 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
546 Pred, PredReg, TII, MIFlags);
547 }
548
emitSPUpdate(bool isARM,MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,const DebugLoc & dl,const ARMBaseInstrInfo & TII,int NumBytes,unsigned MIFlags=MachineInstr::NoFlags,ARMCC::CondCodes Pred=ARMCC::AL,unsigned PredReg=0)549 static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
550 MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
551 const ARMBaseInstrInfo &TII, int NumBytes,
552 unsigned MIFlags = MachineInstr::NoFlags,
553 ARMCC::CondCodes Pred = ARMCC::AL,
554 unsigned PredReg = 0) {
555 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
556 MIFlags, Pred, PredReg);
557 }
558
sizeOfSPAdjustment(const MachineInstr & MI)559 static int sizeOfSPAdjustment(const MachineInstr &MI) {
560 int RegSize;
561 switch (MI.getOpcode()) {
562 case ARM::VSTMDDB_UPD:
563 RegSize = 8;
564 break;
565 case ARM::STMDB_UPD:
566 case ARM::t2STMDB_UPD:
567 RegSize = 4;
568 break;
569 case ARM::t2STR_PRE:
570 case ARM::STR_PRE_IMM:
571 return 4;
572 default:
573 llvm_unreachable("Unknown push or pop like instruction");
574 }
575
576 int count = 0;
577 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
578 // pred) so the list starts at 4.
579 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
580 count += RegSize;
581 return count;
582 }
583
WindowsRequiresStackProbe(const MachineFunction & MF,size_t StackSizeInBytes)584 static bool WindowsRequiresStackProbe(const MachineFunction &MF,
585 size_t StackSizeInBytes) {
586 const MachineFrameInfo &MFI = MF.getFrameInfo();
587 const Function &F = MF.getFunction();
588 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
589
590 StackProbeSize =
591 F.getFnAttributeAsParsedInteger("stack-probe-size", StackProbeSize);
592 return (StackSizeInBytes >= StackProbeSize) &&
593 !F.hasFnAttribute("no-stack-arg-probe");
594 }
595
596 namespace {
597
598 struct StackAdjustingInsts {
599 struct InstInfo {
600 MachineBasicBlock::iterator I;
601 unsigned SPAdjust;
602 bool BeforeFPSet;
603 };
604
605 SmallVector<InstInfo, 4> Insts;
606
addInst__anon7ad736c70111::StackAdjustingInsts607 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
608 bool BeforeFPSet = false) {
609 InstInfo Info = {I, SPAdjust, BeforeFPSet};
610 Insts.push_back(Info);
611 }
612
addExtraBytes__anon7ad736c70111::StackAdjustingInsts613 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
614 auto Info =
615 llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
616 assert(Info != Insts.end() && "invalid sp adjusting instruction");
617 Info->SPAdjust += ExtraBytes;
618 }
619
emitDefCFAOffsets__anon7ad736c70111::StackAdjustingInsts620 void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
621 const ARMBaseInstrInfo &TII, bool HasFP) {
622 MachineFunction &MF = *MBB.getParent();
623 unsigned CFAOffset = 0;
624 for (auto &Info : Insts) {
625 if (HasFP && !Info.BeforeFPSet)
626 return;
627
628 CFAOffset += Info.SPAdjust;
629 unsigned CFIIndex = MF.addFrameInst(
630 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
631 BuildMI(MBB, std::next(Info.I), dl,
632 TII.get(TargetOpcode::CFI_INSTRUCTION))
633 .addCFIIndex(CFIIndex)
634 .setMIFlags(MachineInstr::FrameSetup);
635 }
636 }
637 };
638
639 } // end anonymous namespace
640
641 /// Emit an instruction sequence that will align the address in
642 /// register Reg by zero-ing out the lower bits. For versions of the
643 /// architecture that support Neon, this must be done in a single
644 /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
645 /// single instruction. That function only gets called when optimizing
646 /// spilling of D registers on a core with the Neon instruction set
647 /// present.
emitAligningInstructions(MachineFunction & MF,ARMFunctionInfo * AFI,const TargetInstrInfo & TII,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,const unsigned Reg,const Align Alignment,const bool MustBeSingleInstruction)648 static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
649 const TargetInstrInfo &TII,
650 MachineBasicBlock &MBB,
651 MachineBasicBlock::iterator MBBI,
652 const DebugLoc &DL, const unsigned Reg,
653 const Align Alignment,
654 const bool MustBeSingleInstruction) {
655 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
656 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
657 const unsigned AlignMask = Alignment.value() - 1U;
658 const unsigned NrBitsToZero = Log2(Alignment);
659 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
660 if (!AFI->isThumbFunction()) {
661 // if the BFC instruction is available, use that to zero the lower
662 // bits:
663 // bfc Reg, #0, log2(Alignment)
664 // otherwise use BIC, if the mask to zero the required number of bits
665 // can be encoded in the bic immediate field
666 // bic Reg, Reg, Alignment-1
667 // otherwise, emit
668 // lsr Reg, Reg, log2(Alignment)
669 // lsl Reg, Reg, log2(Alignment)
670 if (CanUseBFC) {
671 BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
672 .addReg(Reg, RegState::Kill)
673 .addImm(~AlignMask)
674 .add(predOps(ARMCC::AL));
675 } else if (AlignMask <= 255) {
676 BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
677 .addReg(Reg, RegState::Kill)
678 .addImm(AlignMask)
679 .add(predOps(ARMCC::AL))
680 .add(condCodeOp());
681 } else {
682 assert(!MustBeSingleInstruction &&
683 "Shouldn't call emitAligningInstructions demanding a single "
684 "instruction to be emitted for large stack alignment for a target "
685 "without BFC.");
686 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
687 .addReg(Reg, RegState::Kill)
688 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
689 .add(predOps(ARMCC::AL))
690 .add(condCodeOp());
691 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
692 .addReg(Reg, RegState::Kill)
693 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
694 .add(predOps(ARMCC::AL))
695 .add(condCodeOp());
696 }
697 } else {
698 // Since this is only reached for Thumb-2 targets, the BFC instruction
699 // should always be available.
700 assert(CanUseBFC);
701 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
702 .addReg(Reg, RegState::Kill)
703 .addImm(~AlignMask)
704 .add(predOps(ARMCC::AL));
705 }
706 }
707
708 /// We need the offset of the frame pointer relative to other MachineFrameInfo
709 /// offsets which are encoded relative to SP at function begin.
710 /// See also emitPrologue() for how the FP is set up.
711 /// Unfortunately we cannot determine this value in determineCalleeSaves() yet
712 /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
713 /// this to produce a conservative estimate that we check in an assert() later.
getMaxFPOffset(const ARMSubtarget & STI,const ARMFunctionInfo & AFI,const MachineFunction & MF)714 static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
715 const MachineFunction &MF) {
716 // For Thumb1, push.w isn't available, so the first push will always push
717 // r7 and lr onto the stack first.
718 if (AFI.isThumb1OnlyFunction())
719 return -AFI.getArgRegsSaveSize() - (2 * 4);
720 // This is a conservative estimation: Assume the frame pointer being r7 and
721 // pc("r15") up to r8 getting spilled before (= 8 registers).
722 int MaxRegBytes = 8 * 4;
723 if (STI.splitFramePointerPush(MF)) {
724 // Here, r11 can be stored below all of r4-r15 (3 registers more than
725 // above), plus d8-d15.
726 MaxRegBytes = 11 * 4 + 8 * 8;
727 }
728 int FPCXTSaveSize =
729 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
730 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
731 }
732
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const733 void ARMFrameLowering::emitPrologue(MachineFunction &MF,
734 MachineBasicBlock &MBB) const {
735 MachineBasicBlock::iterator MBBI = MBB.begin();
736 MachineFrameInfo &MFI = MF.getFrameInfo();
737 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
738 MCContext &Context = MF.getContext();
739 const TargetMachine &TM = MF.getTarget();
740 const MCRegisterInfo *MRI = Context.getRegisterInfo();
741 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
742 const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
743 assert(!AFI->isThumb1OnlyFunction() &&
744 "This emitPrologue does not support Thumb1!");
745 bool isARM = !AFI->isThumbFunction();
746 Align Alignment = STI.getFrameLowering()->getStackAlign();
747 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
748 unsigned NumBytes = MFI.getStackSize();
749 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
750 int FPCXTSaveSize = 0;
751 bool NeedsWinCFI = needsWinCFI(MF);
752
753 // Debug location must be unknown since the first debug location is used
754 // to determine the end of the prologue.
755 DebugLoc dl;
756
757 Register FramePtr = RegInfo->getFrameRegister(MF);
758
759 // Determine the sizes of each callee-save spill areas and record which frame
760 // belongs to which callee-save spill areas.
761 unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
762 int FramePtrSpillFI = 0;
763 int D8SpillFI = 0;
764
765 // All calls are tail calls in GHC calling conv, and functions have no
766 // prologue/epilogue.
767 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
768 return;
769
770 StackAdjustingInsts DefCFAOffsetCandidates;
771 bool HasFP = hasFP(MF);
772
773 if (!AFI->hasStackFrame() &&
774 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
775 if (NumBytes != 0) {
776 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
777 MachineInstr::FrameSetup);
778 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
779 }
780 if (!NeedsWinCFI)
781 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
782 if (NeedsWinCFI && MBBI != MBB.begin()) {
783 insertSEHRange(MBB, {}, MBBI, TII, MachineInstr::FrameSetup);
784 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
785 .setMIFlag(MachineInstr::FrameSetup);
786 MF.setHasWinCFI(true);
787 }
788 return;
789 }
790
791 // Determine spill area sizes.
792 if (STI.splitFramePointerPush(MF)) {
793 for (const CalleeSavedInfo &I : CSI) {
794 Register Reg = I.getReg();
795 int FI = I.getFrameIdx();
796 switch (Reg) {
797 case ARM::R11:
798 case ARM::LR:
799 if (Reg == FramePtr)
800 FramePtrSpillFI = FI;
801 GPRCS2Size += 4;
802 break;
803 case ARM::R0:
804 case ARM::R1:
805 case ARM::R2:
806 case ARM::R3:
807 case ARM::R4:
808 case ARM::R5:
809 case ARM::R6:
810 case ARM::R7:
811 case ARM::R8:
812 case ARM::R9:
813 case ARM::R10:
814 case ARM::R12:
815 GPRCS1Size += 4;
816 break;
817 case ARM::FPCXTNS:
818 FPCXTSaveSize = 4;
819 break;
820 default:
821 // This is a DPR. Exclude the aligned DPRCS2 spills.
822 if (Reg == ARM::D8)
823 D8SpillFI = FI;
824 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
825 DPRCSSize += 8;
826 }
827 }
828 } else {
829 for (const CalleeSavedInfo &I : CSI) {
830 Register Reg = I.getReg();
831 int FI = I.getFrameIdx();
832 switch (Reg) {
833 case ARM::R8:
834 case ARM::R9:
835 case ARM::R10:
836 case ARM::R11:
837 case ARM::R12:
838 if (STI.splitFramePushPop(MF)) {
839 GPRCS2Size += 4;
840 break;
841 }
842 [[fallthrough]];
843 case ARM::R0:
844 case ARM::R1:
845 case ARM::R2:
846 case ARM::R3:
847 case ARM::R4:
848 case ARM::R5:
849 case ARM::R6:
850 case ARM::R7:
851 case ARM::LR:
852 if (Reg == FramePtr)
853 FramePtrSpillFI = FI;
854 GPRCS1Size += 4;
855 break;
856 case ARM::FPCXTNS:
857 FPCXTSaveSize = 4;
858 break;
859 default:
860 // This is a DPR. Exclude the aligned DPRCS2 spills.
861 if (Reg == ARM::D8)
862 D8SpillFI = FI;
863 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
864 DPRCSSize += 8;
865 }
866 }
867 }
868
869 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
870
871 // Move past the PAC computation.
872 if (AFI->shouldSignReturnAddress())
873 LastPush = MBBI++;
874
875 // Move past FPCXT area.
876 if (FPCXTSaveSize > 0) {
877 LastPush = MBBI++;
878 DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
879 }
880
881 // Allocate the vararg register save area.
882 if (ArgRegsSaveSize) {
883 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
884 MachineInstr::FrameSetup);
885 LastPush = std::prev(MBBI);
886 DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
887 }
888
889 // Move past area 1.
890 if (GPRCS1Size > 0) {
891 GPRCS1Push = LastPush = MBBI++;
892 DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
893 }
894
895 // Determine starting offsets of spill areas.
896 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
897 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
898 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
899 Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
900 unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
901 if (!STI.splitFramePointerPush(MF)) {
902 DPRGapSize += GPRCS2Size;
903 }
904 DPRGapSize %= DPRAlign.value();
905
906 unsigned DPRCSOffset;
907 if (STI.splitFramePointerPush(MF)) {
908 DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
909 GPRCS2Offset = DPRCSOffset - GPRCS2Size;
910 } else {
911 DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
912 }
913 int FramePtrOffsetInPush = 0;
914 if (HasFP) {
915 int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
916 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
917 "Max FP estimation is wrong");
918 FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
919 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
920 NumBytes);
921 }
922 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
923 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
924 AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
925
926 // Move past area 2.
927 if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
928 GPRCS2Push = LastPush = MBBI++;
929 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
930 }
931
932 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
933 // .cfi_offset operations will reflect that.
934 if (DPRGapSize) {
935 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
936 if (LastPush != MBB.end() &&
937 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
938 DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
939 else {
940 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
941 MachineInstr::FrameSetup);
942 DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
943 }
944 }
945
946 // Move past area 3.
947 if (DPRCSSize > 0) {
948 // Since vpush register list cannot have gaps, there may be multiple vpush
949 // instructions in the prologue.
950 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
951 DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
952 LastPush = MBBI++;
953 }
954 }
955
956 // Move past the aligned DPRCS2 area.
957 if (AFI->getNumAlignedDPRCS2Regs() > 0) {
958 MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs());
959 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
960 // leaves the stack pointer pointing to the DPRCS2 area.
961 //
962 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
963 NumBytes += MFI.getObjectOffset(D8SpillFI);
964 } else
965 NumBytes = DPRCSOffset;
966
967 if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {
968 GPRCS2Push = LastPush = MBBI++;
969 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
970 }
971
972 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
973 if (STI.splitFramePointerPush(MF) && HasFP)
974 NeedsWinCFIStackAlloc = false;
975
976 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
977 uint32_t NumWords = NumBytes >> 2;
978
979 if (NumWords < 65536) {
980 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
981 .addImm(NumWords)
982 .setMIFlags(MachineInstr::FrameSetup)
983 .add(predOps(ARMCC::AL));
984 } else {
985 // Split into two instructions here, instead of using t2MOVi32imm,
986 // to allow inserting accurate SEH instructions (including accurate
987 // instruction size for each of them).
988 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
989 .addImm(NumWords & 0xffff)
990 .setMIFlags(MachineInstr::FrameSetup)
991 .add(predOps(ARMCC::AL));
992 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
993 .addReg(ARM::R4)
994 .addImm(NumWords >> 16)
995 .setMIFlags(MachineInstr::FrameSetup)
996 .add(predOps(ARMCC::AL));
997 }
998
999 switch (TM.getCodeModel()) {
1000 case CodeModel::Tiny:
1001 llvm_unreachable("Tiny code model not available on ARM.");
1002 case CodeModel::Small:
1003 case CodeModel::Medium:
1004 case CodeModel::Kernel:
1005 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
1006 .add(predOps(ARMCC::AL))
1007 .addExternalSymbol("__chkstk")
1008 .addReg(ARM::R4, RegState::Implicit)
1009 .setMIFlags(MachineInstr::FrameSetup);
1010 break;
1011 case CodeModel::Large:
1012 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
1013 .addExternalSymbol("__chkstk")
1014 .setMIFlags(MachineInstr::FrameSetup);
1015
1016 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
1017 .add(predOps(ARMCC::AL))
1018 .addReg(ARM::R12, RegState::Kill)
1019 .addReg(ARM::R4, RegState::Implicit)
1020 .setMIFlags(MachineInstr::FrameSetup);
1021 break;
1022 }
1023
1024 MachineInstrBuilder Instr, SEH;
1025 Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
1026 .addReg(ARM::SP, RegState::Kill)
1027 .addReg(ARM::R4, RegState::Kill)
1028 .setMIFlags(MachineInstr::FrameSetup)
1029 .add(predOps(ARMCC::AL))
1030 .add(condCodeOp());
1031 if (NeedsWinCFIStackAlloc) {
1032 SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
1033 .addImm(NumBytes)
1034 .addImm(/*Wide=*/1)
1035 .setMIFlags(MachineInstr::FrameSetup);
1036 MBB.insertAfter(Instr, SEH);
1037 }
1038 NumBytes = 0;
1039 }
1040
1041 if (NumBytes) {
1042 // Adjust SP after all the callee-save spills.
1043 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1044 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
1045 DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
1046 else {
1047 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
1048 MachineInstr::FrameSetup);
1049 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
1050 }
1051
1052 if (HasFP && isARM)
1053 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1054 // Note it's not safe to do this in Thumb2 mode because it would have
1055 // taken two instructions:
1056 // mov sp, r7
1057 // sub sp, #24
1058 // If an interrupt is taken between the two instructions, then sp is in
1059 // an inconsistent state (pointing to the middle of callee-saved area).
1060 // The interrupt handler can end up clobbering the registers.
1061 AFI->setShouldRestoreSPFromFP(true);
1062 }
1063
1064 // Set FP to point to the stack slot that contains the previous FP.
1065 // For iOS, FP is R7, which has now been stored in spill area 1.
1066 // Otherwise, if this is not iOS, all the callee-saved registers go
1067 // into spill area 1, including the FP in R11. In either case, it
1068 // is in area one and the adjustment needs to take place just after
1069 // that push.
1070 // FIXME: The above is not necessary true when PACBTI is enabled.
1071 // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1072 // so FP ends up on area two.
1073 MachineBasicBlock::iterator AfterPush;
1074 if (HasFP) {
1075 AfterPush = std::next(GPRCS1Push);
1076 unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
1077 int FPOffset = PushSize + FramePtrOffsetInPush;
1078 if (STI.splitFramePointerPush(MF)) {
1079 AfterPush = std::next(GPRCS2Push);
1080 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1081 FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
1082 } else {
1083 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1084 FramePtr, ARM::SP, FPOffset,
1085 MachineInstr::FrameSetup);
1086 }
1087 if (!NeedsWinCFI) {
1088 if (FramePtrOffsetInPush + PushSize != 0) {
1089 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
1090 nullptr, MRI->getDwarfRegNum(FramePtr, true),
1091 FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
1092 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1093 .addCFIIndex(CFIIndex)
1094 .setMIFlags(MachineInstr::FrameSetup);
1095 } else {
1096 unsigned CFIIndex =
1097 MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
1098 nullptr, MRI->getDwarfRegNum(FramePtr, true)));
1099 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1100 .addCFIIndex(CFIIndex)
1101 .setMIFlags(MachineInstr::FrameSetup);
1102 }
1103 }
1104 }
1105
1106 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1107 // instructions below don't need to be replayed to unwind the stack.
1108 if (NeedsWinCFI && MBBI != MBB.begin()) {
1109 MachineBasicBlock::iterator End = MBBI;
1110 if (HasFP && STI.splitFramePointerPush(MF))
1111 End = AfterPush;
1112 insertSEHRange(MBB, {}, End, TII, MachineInstr::FrameSetup);
1113 BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
1114 .setMIFlag(MachineInstr::FrameSetup);
1115 MF.setHasWinCFI(true);
1116 }
1117
1118 // Now that the prologue's actual instructions are finalised, we can insert
1119 // the necessary DWARF cf instructions to describe the situation. Start by
1120 // recording where each register ended up:
1121 if (GPRCS1Size > 0 && !NeedsWinCFI) {
1122 MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
1123 int CFIIndex;
1124 for (const auto &Entry : CSI) {
1125 Register Reg = Entry.getReg();
1126 int FI = Entry.getFrameIdx();
1127 switch (Reg) {
1128 case ARM::R8:
1129 case ARM::R9:
1130 case ARM::R10:
1131 case ARM::R11:
1132 case ARM::R12:
1133 if (STI.splitFramePushPop(MF))
1134 break;
1135 [[fallthrough]];
1136 case ARM::R0:
1137 case ARM::R1:
1138 case ARM::R2:
1139 case ARM::R3:
1140 case ARM::R4:
1141 case ARM::R5:
1142 case ARM::R6:
1143 case ARM::R7:
1144 case ARM::LR:
1145 CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1146 nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
1147 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1148 .addCFIIndex(CFIIndex)
1149 .setMIFlags(MachineInstr::FrameSetup);
1150 break;
1151 }
1152 }
1153 }
1154
1155 if (GPRCS2Size > 0 && !NeedsWinCFI) {
1156 MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
1157 for (const auto &Entry : CSI) {
1158 Register Reg = Entry.getReg();
1159 int FI = Entry.getFrameIdx();
1160 switch (Reg) {
1161 case ARM::R8:
1162 case ARM::R9:
1163 case ARM::R10:
1164 case ARM::R11:
1165 case ARM::R12:
1166 if (STI.splitFramePushPop(MF)) {
1167 unsigned DwarfReg = MRI->getDwarfRegNum(
1168 Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
1169 int64_t Offset = MFI.getObjectOffset(FI);
1170 unsigned CFIIndex = MF.addFrameInst(
1171 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1172 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1173 .addCFIIndex(CFIIndex)
1174 .setMIFlags(MachineInstr::FrameSetup);
1175 }
1176 break;
1177 }
1178 }
1179 }
1180
1181 if (DPRCSSize > 0 && !NeedsWinCFI) {
1182 // Since vpush register list cannot have gaps, there may be multiple vpush
1183 // instructions in the prologue.
1184 MachineBasicBlock::iterator Pos = std::next(LastPush);
1185 for (const auto &Entry : CSI) {
1186 Register Reg = Entry.getReg();
1187 int FI = Entry.getFrameIdx();
1188 if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
1189 (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
1190 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
1191 int64_t Offset = MFI.getObjectOffset(FI);
1192 unsigned CFIIndex = MF.addFrameInst(
1193 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1194 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1195 .addCFIIndex(CFIIndex)
1196 .setMIFlags(MachineInstr::FrameSetup);
1197 }
1198 }
1199 }
1200
1201 // Now we can emit descriptions of where the canonical frame address was
1202 // throughout the process. If we have a frame pointer, it takes over the job
1203 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1204 // actually get emitted.
1205 if (!NeedsWinCFI)
1206 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
1207
1208 if (STI.isTargetELF() && hasFP(MF))
1209 MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
1210 AFI->getFramePtrSpillOffset());
1211
1212 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1213 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1214 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1215 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1216 AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
1217
1218 // If we need dynamic stack realignment, do it here. Be paranoid and make
1219 // sure if we also have VLAs, we have a base pointer for frame access.
1220 // If aligned NEON registers were spilled, the stack has already been
1221 // realigned.
1222 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1223 Align MaxAlign = MFI.getMaxAlign();
1224 assert(!AFI->isThumb1OnlyFunction());
1225 if (!AFI->isThumbFunction()) {
1226 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
1227 false);
1228 } else {
1229 // We cannot use sp as source/dest register here, thus we're using r4 to
1230 // perform the calculations. We're emitting the following sequence:
1231 // mov r4, sp
1232 // -- use emitAligningInstructions to produce best sequence to zero
1233 // -- out lower bits in r4
1234 // mov sp, r4
1235 // FIXME: It will be better just to find spare register here.
1236 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
1237 .addReg(ARM::SP, RegState::Kill)
1238 .add(predOps(ARMCC::AL));
1239 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
1240 false);
1241 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1242 .addReg(ARM::R4, RegState::Kill)
1243 .add(predOps(ARMCC::AL));
1244 }
1245
1246 AFI->setShouldRestoreSPFromFP(true);
1247 }
1248
1249 // If we need a base pointer, set it up here. It's whatever the value
1250 // of the stack pointer is at this point. Any variable size objects
1251 // will be allocated after this, so we can still use the base pointer
1252 // to reference locals.
1253 // FIXME: Clarify FrameSetup flags here.
1254 if (RegInfo->hasBasePointer(MF)) {
1255 if (isARM)
1256 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
1257 .addReg(ARM::SP)
1258 .add(predOps(ARMCC::AL))
1259 .add(condCodeOp());
1260 else
1261 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
1262 .addReg(ARM::SP)
1263 .add(predOps(ARMCC::AL));
1264 }
1265
1266 // If the frame has variable sized objects then the epilogue must restore
1267 // the sp from fp. We can assume there's an FP here since hasFP already
1268 // checks for hasVarSizedObjects.
1269 if (MFI.hasVarSizedObjects())
1270 AFI->setShouldRestoreSPFromFP(true);
1271 }
1272
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const1273 void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
1274 MachineBasicBlock &MBB) const {
1275 MachineFrameInfo &MFI = MF.getFrameInfo();
1276 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1277 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1278 const ARMBaseInstrInfo &TII =
1279 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1280 assert(!AFI->isThumb1OnlyFunction() &&
1281 "This emitEpilogue does not support Thumb1!");
1282 bool isARM = !AFI->isThumbFunction();
1283
1284 // Amount of stack space we reserved next to incoming args for either
1285 // varargs registers or stack arguments in tail calls made by this function.
1286 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1287
1288 // How much of the stack used by incoming arguments this function is expected
1289 // to restore in this particular epilogue.
1290 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1291 int NumBytes = (int)MFI.getStackSize();
1292 Register FramePtr = RegInfo->getFrameRegister(MF);
1293
1294 // All calls are tail calls in GHC calling conv, and functions have no
1295 // prologue/epilogue.
1296 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1297 return;
1298
1299 // First put ourselves on the first (from top) terminator instructions.
1300 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1301 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1302
1303 MachineBasicBlock::iterator RangeStart;
1304 if (!AFI->hasStackFrame()) {
1305 if (MF.hasWinCFI()) {
1306 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1307 .setMIFlag(MachineInstr::FrameDestroy);
1308 RangeStart = initMBBRange(MBB, MBBI);
1309 }
1310
1311 if (NumBytes + IncomingArgStackToRestore != 0)
1312 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1313 NumBytes + IncomingArgStackToRestore,
1314 MachineInstr::FrameDestroy);
1315 } else {
1316 // Unwind MBBI to point to first LDR / VLDRD.
1317 if (MBBI != MBB.begin()) {
1318 do {
1319 --MBBI;
1320 } while (MBBI != MBB.begin() &&
1321 MBBI->getFlag(MachineInstr::FrameDestroy));
1322 if (!MBBI->getFlag(MachineInstr::FrameDestroy))
1323 ++MBBI;
1324 }
1325
1326 if (MF.hasWinCFI()) {
1327 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1328 .setMIFlag(MachineInstr::FrameDestroy);
1329 RangeStart = initMBBRange(MBB, MBBI);
1330 }
1331
1332 // Move SP to start of FP callee save spill area.
1333 NumBytes -= (ReservedArgStack +
1334 AFI->getFPCXTSaveAreaSize() +
1335 AFI->getGPRCalleeSavedArea1Size() +
1336 AFI->getGPRCalleeSavedArea2Size() +
1337 AFI->getDPRCalleeSavedGapSize() +
1338 AFI->getDPRCalleeSavedAreaSize());
1339
1340 // Reset SP based on frame pointer only if the stack frame extends beyond
1341 // frame pointer stack slot or target is ELF and the function has FP.
1342 if (AFI->shouldRestoreSPFromFP()) {
1343 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1344 if (NumBytes) {
1345 if (isARM)
1346 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
1347 ARMCC::AL, 0, TII,
1348 MachineInstr::FrameDestroy);
1349 else {
1350 // It's not possible to restore SP from FP in a single instruction.
1351 // For iOS, this looks like:
1352 // mov sp, r7
1353 // sub sp, #24
1354 // This is bad, if an interrupt is taken after the mov, sp is in an
1355 // inconsistent state.
1356 // Use the first callee-saved register as a scratch register.
1357 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1358 "No scratch register to restore SP from FP!");
1359 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
1360 ARMCC::AL, 0, TII, MachineInstr::FrameDestroy);
1361 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1362 .addReg(ARM::R4)
1363 .add(predOps(ARMCC::AL))
1364 .setMIFlag(MachineInstr::FrameDestroy);
1365 }
1366 } else {
1367 // Thumb2 or ARM.
1368 if (isARM)
1369 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
1370 .addReg(FramePtr)
1371 .add(predOps(ARMCC::AL))
1372 .add(condCodeOp())
1373 .setMIFlag(MachineInstr::FrameDestroy);
1374 else
1375 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1376 .addReg(FramePtr)
1377 .add(predOps(ARMCC::AL))
1378 .setMIFlag(MachineInstr::FrameDestroy);
1379 }
1380 } else if (NumBytes &&
1381 !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
1382 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1383 MachineInstr::FrameDestroy);
1384
1385 // Increment past our save areas.
1386 if (AFI->getGPRCalleeSavedArea2Size() && STI.splitFramePointerPush(MF))
1387 MBBI++;
1388
1389 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
1390 MBBI++;
1391 // Since vpop register list cannot have gaps, there may be multiple vpop
1392 // instructions in the epilogue.
1393 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1394 MBBI++;
1395 }
1396 if (AFI->getDPRCalleeSavedGapSize()) {
1397 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1398 "unexpected DPR alignment gap");
1399 emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),
1400 MachineInstr::FrameDestroy);
1401 }
1402
1403 if (AFI->getGPRCalleeSavedArea2Size() && !STI.splitFramePointerPush(MF))
1404 MBBI++;
1405 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1406
1407 if (ReservedArgStack || IncomingArgStackToRestore) {
1408 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1409 "attempting to restore negative stack amount");
1410 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1411 ReservedArgStack + IncomingArgStackToRestore,
1412 MachineInstr::FrameDestroy);
1413 }
1414
1415 // Validate PAC, It should have been already popped into R12. For CMSE entry
1416 // function, the validation instruction is emitted during expansion of the
1417 // tBXNS_RET, since the validation must use the value of SP at function
1418 // entry, before saving, resp. after restoring, FPCXTNS.
1419 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1420 BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
1421 }
1422
1423 if (MF.hasWinCFI()) {
1424 insertSEHRange(MBB, RangeStart, MBB.end(), TII, MachineInstr::FrameDestroy);
1425 BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
1426 .setMIFlag(MachineInstr::FrameDestroy);
1427 }
1428 }
1429
1430 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1431 /// debug info. It's the same as what we use for resolving the code-gen
1432 /// references for now. FIXME: This can go wrong when references are
1433 /// SP-relative and simple call frames aren't used.
getFrameIndexReference(const MachineFunction & MF,int FI,Register & FrameReg) const1434 StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1435 int FI,
1436 Register &FrameReg) const {
1437 return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
1438 }
1439
ResolveFrameIndexReference(const MachineFunction & MF,int FI,Register & FrameReg,int SPAdj) const1440 int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
1441 int FI, Register &FrameReg,
1442 int SPAdj) const {
1443 const MachineFrameInfo &MFI = MF.getFrameInfo();
1444 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1445 MF.getSubtarget().getRegisterInfo());
1446 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1447 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
1448 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1449 bool isFixed = MFI.isFixedObjectIndex(FI);
1450
1451 FrameReg = ARM::SP;
1452 Offset += SPAdj;
1453
1454 // SP can move around if there are allocas. We may also lose track of SP
1455 // when emergency spilling inside a non-reserved call frame setup.
1456 bool hasMovingSP = !hasReservedCallFrame(MF);
1457
1458 // When dynamically realigning the stack, use the frame pointer for
1459 // parameters, and the stack/base pointer for locals.
1460 if (RegInfo->hasStackRealignment(MF)) {
1461 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1462 if (isFixed) {
1463 FrameReg = RegInfo->getFrameRegister(MF);
1464 Offset = FPOffset;
1465 } else if (hasMovingSP) {
1466 assert(RegInfo->hasBasePointer(MF) &&
1467 "VLAs and dynamic stack alignment, but missing base pointer!");
1468 FrameReg = RegInfo->getBaseRegister();
1469 Offset -= SPAdj;
1470 }
1471 return Offset;
1472 }
1473
1474 // If there is a frame pointer, use it when we can.
1475 if (hasFP(MF) && AFI->hasStackFrame()) {
1476 // Use frame pointer to reference fixed objects. Use it for locals if
1477 // there are VLAs (and thus the SP isn't reliable as a base).
1478 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1479 FrameReg = RegInfo->getFrameRegister(MF);
1480 return FPOffset;
1481 } else if (hasMovingSP) {
1482 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1483 if (AFI->isThumb2Function()) {
1484 // Try to use the frame pointer if we can, else use the base pointer
1485 // since it's available. This is handy for the emergency spill slot, in
1486 // particular.
1487 if (FPOffset >= -255 && FPOffset < 0) {
1488 FrameReg = RegInfo->getFrameRegister(MF);
1489 return FPOffset;
1490 }
1491 }
1492 } else if (AFI->isThumbFunction()) {
1493 // Prefer SP to base pointer, if the offset is suitably aligned and in
1494 // range as the effective range of the immediate offset is bigger when
1495 // basing off SP.
1496 // Use add <rd>, sp, #<imm8>
1497 // ldr <rd>, [sp, #<imm8>]
1498 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1499 return Offset;
1500 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1501 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1502 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1503 FrameReg = RegInfo->getFrameRegister(MF);
1504 return FPOffset;
1505 }
1506 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1507 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1508 FrameReg = RegInfo->getFrameRegister(MF);
1509 return FPOffset;
1510 }
1511 }
1512 // Use the base pointer if we have one.
1513 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1514 // That can happen if we forced a base pointer for a large call frame.
1515 if (RegInfo->hasBasePointer(MF)) {
1516 FrameReg = RegInfo->getBaseRegister();
1517 Offset -= SPAdj;
1518 }
1519 return Offset;
1520 }
1521
emitPushInst(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,ArrayRef<CalleeSavedInfo> CSI,unsigned StmOpc,unsigned StrOpc,bool NoGap,bool (* Func)(unsigned,bool),unsigned NumAlignedDPRCS2Regs,unsigned MIFlags) const1522 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1523 MachineBasicBlock::iterator MI,
1524 ArrayRef<CalleeSavedInfo> CSI,
1525 unsigned StmOpc, unsigned StrOpc,
1526 bool NoGap, bool (*Func)(unsigned, bool),
1527 unsigned NumAlignedDPRCS2Regs,
1528 unsigned MIFlags) const {
1529 MachineFunction &MF = *MBB.getParent();
1530 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1531 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1532
1533 DebugLoc DL;
1534
1535 using RegAndKill = std::pair<unsigned, bool>;
1536
1537 SmallVector<RegAndKill, 4> Regs;
1538 unsigned i = CSI.size();
1539 while (i != 0) {
1540 unsigned LastReg = 0;
1541 for (; i != 0; --i) {
1542 Register Reg = CSI[i-1].getReg();
1543 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1544
1545 // D-registers in the aligned area DPRCS2 are NOT spilled here.
1546 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1547 continue;
1548
1549 const MachineRegisterInfo &MRI = MF.getRegInfo();
1550 bool isLiveIn = MRI.isLiveIn(Reg);
1551 if (!isLiveIn && !MRI.isReserved(Reg))
1552 MBB.addLiveIn(Reg);
1553 // If NoGap is true, push consecutive registers and then leave the rest
1554 // for other instructions. e.g.
1555 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1556 if (NoGap && LastReg && LastReg != Reg-1)
1557 break;
1558 LastReg = Reg;
1559 // Do not set a kill flag on values that are also marked as live-in. This
1560 // happens with the @llvm-returnaddress intrinsic and with arguments
1561 // passed in callee saved registers.
1562 // Omitting the kill flags is conservatively correct even if the live-in
1563 // is not used after all.
1564 Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1565 }
1566
1567 if (Regs.empty())
1568 continue;
1569
1570 llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1571 return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1572 });
1573
1574 if (Regs.size() > 1 || StrOpc== 0) {
1575 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1576 .addReg(ARM::SP)
1577 .setMIFlags(MIFlags)
1578 .add(predOps(ARMCC::AL));
1579 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1580 MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1581 } else if (Regs.size() == 1) {
1582 BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1583 .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1584 .addReg(ARM::SP)
1585 .setMIFlags(MIFlags)
1586 .addImm(-4)
1587 .add(predOps(ARMCC::AL));
1588 }
1589 Regs.clear();
1590
1591 // Put any subsequent vpush instructions before this one: they will refer to
1592 // higher register numbers so need to be pushed first in order to preserve
1593 // monotonicity.
1594 if (MI != MBB.begin())
1595 --MI;
1596 }
1597 }
1598
emitPopInst(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,MutableArrayRef<CalleeSavedInfo> CSI,unsigned LdmOpc,unsigned LdrOpc,bool isVarArg,bool NoGap,bool (* Func)(unsigned,bool),unsigned NumAlignedDPRCS2Regs) const1599 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1600 MachineBasicBlock::iterator MI,
1601 MutableArrayRef<CalleeSavedInfo> CSI,
1602 unsigned LdmOpc, unsigned LdrOpc,
1603 bool isVarArg, bool NoGap,
1604 bool (*Func)(unsigned, bool),
1605 unsigned NumAlignedDPRCS2Regs) const {
1606 MachineFunction &MF = *MBB.getParent();
1607 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1608 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1609 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1610 bool hasPAC = AFI->shouldSignReturnAddress();
1611 DebugLoc DL;
1612 bool isTailCall = false;
1613 bool isInterrupt = false;
1614 bool isTrap = false;
1615 bool isCmseEntry = false;
1616 if (MBB.end() != MI) {
1617 DL = MI->getDebugLoc();
1618 unsigned RetOpcode = MI->getOpcode();
1619 isTailCall =
1620 (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri ||
1621 RetOpcode == ARM::TCRETURNrinotr12);
1622 isInterrupt =
1623 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1624 isTrap =
1625 RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1626 RetOpcode == ARM::tTRAP;
1627 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1628 }
1629
1630 SmallVector<unsigned, 4> Regs;
1631 unsigned i = CSI.size();
1632 while (i != 0) {
1633 unsigned LastReg = 0;
1634 bool DeleteRet = false;
1635 for (; i != 0; --i) {
1636 CalleeSavedInfo &Info = CSI[i-1];
1637 Register Reg = Info.getReg();
1638 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1639
1640 // The aligned reloads from area DPRCS2 are not inserted here.
1641 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1642 continue;
1643 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1644 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1645 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1646 !STI.splitFramePointerPush(MF)) {
1647 Reg = ARM::PC;
1648 // Fold the return instruction into the LDM.
1649 DeleteRet = true;
1650 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1651 }
1652
1653 // If NoGap is true, pop consecutive registers and then leave the rest
1654 // for other instructions. e.g.
1655 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1656 if (NoGap && LastReg && LastReg != Reg-1)
1657 break;
1658
1659 LastReg = Reg;
1660 Regs.push_back(Reg);
1661 }
1662
1663 if (Regs.empty())
1664 continue;
1665
1666 llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1667 return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1668 });
1669
1670 if (Regs.size() > 1 || LdrOpc == 0) {
1671 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1672 .addReg(ARM::SP)
1673 .add(predOps(ARMCC::AL))
1674 .setMIFlags(MachineInstr::FrameDestroy);
1675 for (unsigned Reg : Regs)
1676 MIB.addReg(Reg, getDefRegState(true));
1677 if (DeleteRet) {
1678 if (MI != MBB.end()) {
1679 MIB.copyImplicitOps(*MI);
1680 MI->eraseFromParent();
1681 }
1682 }
1683 MI = MIB;
1684 } else if (Regs.size() == 1) {
1685 // If we adjusted the reg to PC from LR above, switch it back here. We
1686 // only do that for LDM.
1687 if (Regs[0] == ARM::PC)
1688 Regs[0] = ARM::LR;
1689 MachineInstrBuilder MIB =
1690 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1691 .addReg(ARM::SP, RegState::Define)
1692 .addReg(ARM::SP)
1693 .setMIFlags(MachineInstr::FrameDestroy);
1694 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1695 // that refactoring is complete (eventually).
1696 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1697 MIB.addReg(0);
1698 MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
1699 } else
1700 MIB.addImm(4);
1701 MIB.add(predOps(ARMCC::AL));
1702 }
1703 Regs.clear();
1704
1705 // Put any subsequent vpop instructions after this one: they will refer to
1706 // higher register numbers so need to be popped afterwards.
1707 if (MI != MBB.end())
1708 ++MI;
1709 }
1710 }
1711
1712 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1713 /// starting from d8. Also insert stack realignment code and leave the stack
1714 /// pointer pointing to the d8 spill slot.
emitAlignedDPRCS2Spills(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,unsigned NumAlignedDPRCS2Regs,ArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI)1715 static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
1716 MachineBasicBlock::iterator MI,
1717 unsigned NumAlignedDPRCS2Regs,
1718 ArrayRef<CalleeSavedInfo> CSI,
1719 const TargetRegisterInfo *TRI) {
1720 MachineFunction &MF = *MBB.getParent();
1721 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1722 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1723 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1724 MachineFrameInfo &MFI = MF.getFrameInfo();
1725
1726 // Mark the D-register spill slots as properly aligned. Since MFI computes
1727 // stack slot layout backwards, this can actually mean that the d-reg stack
1728 // slot offsets can be wrong. The offset for d8 will always be correct.
1729 for (const CalleeSavedInfo &I : CSI) {
1730 unsigned DNum = I.getReg() - ARM::D8;
1731 if (DNum > NumAlignedDPRCS2Regs - 1)
1732 continue;
1733 int FI = I.getFrameIdx();
1734 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1735 // registers will be 8-byte aligned.
1736 MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
1737
1738 // The stack slot for D8 needs to be maximally aligned because this is
1739 // actually the point where we align the stack pointer. MachineFrameInfo
1740 // computes all offsets relative to the incoming stack pointer which is a
1741 // bit weird when realigning the stack. Any extra padding for this
1742 // over-alignment is not realized because the code inserted below adjusts
1743 // the stack pointer by numregs * 8 before aligning the stack pointer.
1744 if (DNum == 0)
1745 MFI.setObjectAlignment(FI, MFI.getMaxAlign());
1746 }
1747
1748 // Move the stack pointer to the d8 spill slot, and align it at the same
1749 // time. Leave the stack slot address in the scratch register r4.
1750 //
1751 // sub r4, sp, #numregs * 8
1752 // bic r4, r4, #align - 1
1753 // mov sp, r4
1754 //
1755 bool isThumb = AFI->isThumbFunction();
1756 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1757 AFI->setShouldRestoreSPFromFP(true);
1758
1759 // sub r4, sp, #numregs * 8
1760 // The immediate is <= 64, so it doesn't need any special encoding.
1761 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1762 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1763 .addReg(ARM::SP)
1764 .addImm(8 * NumAlignedDPRCS2Regs)
1765 .add(predOps(ARMCC::AL))
1766 .add(condCodeOp());
1767
1768 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1769 // We must set parameter MustBeSingleInstruction to true, since
1770 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1771 // stack alignment. Luckily, this can always be done since all ARM
1772 // architecture versions that support Neon also support the BFC
1773 // instruction.
1774 emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1775
1776 // mov sp, r4
1777 // The stack pointer must be adjusted before spilling anything, otherwise
1778 // the stack slots could be clobbered by an interrupt handler.
1779 // Leave r4 live, it is used below.
1780 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1781 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1782 .addReg(ARM::R4)
1783 .add(predOps(ARMCC::AL));
1784 if (!isThumb)
1785 MIB.add(condCodeOp());
1786
1787 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1788 // r4 holds the stack slot address.
1789 unsigned NextReg = ARM::D8;
1790
1791 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1792 // The writeback is only needed when emitting two vst1.64 instructions.
1793 if (NumAlignedDPRCS2Regs >= 6) {
1794 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1795 &ARM::QQPRRegClass);
1796 MBB.addLiveIn(SupReg);
1797 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1798 .addReg(ARM::R4, RegState::Kill)
1799 .addImm(16)
1800 .addReg(NextReg)
1801 .addReg(SupReg, RegState::ImplicitKill)
1802 .add(predOps(ARMCC::AL));
1803 NextReg += 4;
1804 NumAlignedDPRCS2Regs -= 4;
1805 }
1806
1807 // We won't modify r4 beyond this point. It currently points to the next
1808 // register to be spilled.
1809 unsigned R4BaseReg = NextReg;
1810
1811 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1812 if (NumAlignedDPRCS2Regs >= 4) {
1813 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1814 &ARM::QQPRRegClass);
1815 MBB.addLiveIn(SupReg);
1816 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1817 .addReg(ARM::R4)
1818 .addImm(16)
1819 .addReg(NextReg)
1820 .addReg(SupReg, RegState::ImplicitKill)
1821 .add(predOps(ARMCC::AL));
1822 NextReg += 4;
1823 NumAlignedDPRCS2Regs -= 4;
1824 }
1825
1826 // 16-byte aligned vst1.64 with 2 d-regs.
1827 if (NumAlignedDPRCS2Regs >= 2) {
1828 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1829 &ARM::QPRRegClass);
1830 MBB.addLiveIn(SupReg);
1831 BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1832 .addReg(ARM::R4)
1833 .addImm(16)
1834 .addReg(SupReg)
1835 .add(predOps(ARMCC::AL));
1836 NextReg += 2;
1837 NumAlignedDPRCS2Regs -= 2;
1838 }
1839
1840 // Finally, use a vanilla vstr.64 for the odd last register.
1841 if (NumAlignedDPRCS2Regs) {
1842 MBB.addLiveIn(NextReg);
1843 // vstr.64 uses addrmode5 which has an offset scale of 4.
1844 BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1845 .addReg(NextReg)
1846 .addReg(ARM::R4)
1847 .addImm((NextReg - R4BaseReg) * 2)
1848 .add(predOps(ARMCC::AL));
1849 }
1850
1851 // The last spill instruction inserted should kill the scratch register r4.
1852 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1853 }
1854
1855 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1856 /// iterator to the following instruction.
1857 static MachineBasicBlock::iterator
skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,unsigned NumAlignedDPRCS2Regs)1858 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
1859 unsigned NumAlignedDPRCS2Regs) {
1860 // sub r4, sp, #numregs * 8
1861 // bic r4, r4, #align - 1
1862 // mov sp, r4
1863 ++MI; ++MI; ++MI;
1864 assert(MI->mayStore() && "Expecting spill instruction");
1865
1866 // These switches all fall through.
1867 switch(NumAlignedDPRCS2Regs) {
1868 case 7:
1869 ++MI;
1870 assert(MI->mayStore() && "Expecting spill instruction");
1871 [[fallthrough]];
1872 default:
1873 ++MI;
1874 assert(MI->mayStore() && "Expecting spill instruction");
1875 [[fallthrough]];
1876 case 1:
1877 case 2:
1878 case 4:
1879 assert(MI->killsRegister(ARM::R4, /*TRI=*/nullptr) && "Missed kill flag");
1880 ++MI;
1881 }
1882 return MI;
1883 }
1884
1885 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1886 /// starting from d8. These instructions are assumed to execute while the
1887 /// stack is still aligned, unlike the code inserted by emitPopInst.
emitAlignedDPRCS2Restores(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,unsigned NumAlignedDPRCS2Regs,ArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI)1888 static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
1889 MachineBasicBlock::iterator MI,
1890 unsigned NumAlignedDPRCS2Regs,
1891 ArrayRef<CalleeSavedInfo> CSI,
1892 const TargetRegisterInfo *TRI) {
1893 MachineFunction &MF = *MBB.getParent();
1894 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1895 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1896 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1897
1898 // Find the frame index assigned to d8.
1899 int D8SpillFI = 0;
1900 for (const CalleeSavedInfo &I : CSI)
1901 if (I.getReg() == ARM::D8) {
1902 D8SpillFI = I.getFrameIdx();
1903 break;
1904 }
1905
1906 // Materialize the address of the d8 spill slot into the scratch register r4.
1907 // This can be fairly complicated if the stack frame is large, so just use
1908 // the normal frame index elimination mechanism to do it. This code runs as
1909 // the initial part of the epilog where the stack and base pointers haven't
1910 // been changed yet.
1911 bool isThumb = AFI->isThumbFunction();
1912 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1913
1914 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1915 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1916 .addFrameIndex(D8SpillFI)
1917 .addImm(0)
1918 .add(predOps(ARMCC::AL))
1919 .add(condCodeOp());
1920
1921 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1922 unsigned NextReg = ARM::D8;
1923
1924 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1925 if (NumAlignedDPRCS2Regs >= 6) {
1926 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1927 &ARM::QQPRRegClass);
1928 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1929 .addReg(ARM::R4, RegState::Define)
1930 .addReg(ARM::R4, RegState::Kill)
1931 .addImm(16)
1932 .addReg(SupReg, RegState::ImplicitDefine)
1933 .add(predOps(ARMCC::AL));
1934 NextReg += 4;
1935 NumAlignedDPRCS2Regs -= 4;
1936 }
1937
1938 // We won't modify r4 beyond this point. It currently points to the next
1939 // register to be spilled.
1940 unsigned R4BaseReg = NextReg;
1941
1942 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1943 if (NumAlignedDPRCS2Regs >= 4) {
1944 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1945 &ARM::QQPRRegClass);
1946 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1947 .addReg(ARM::R4)
1948 .addImm(16)
1949 .addReg(SupReg, RegState::ImplicitDefine)
1950 .add(predOps(ARMCC::AL));
1951 NextReg += 4;
1952 NumAlignedDPRCS2Regs -= 4;
1953 }
1954
1955 // 16-byte aligned vld1.64 with 2 d-regs.
1956 if (NumAlignedDPRCS2Regs >= 2) {
1957 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1958 &ARM::QPRRegClass);
1959 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1960 .addReg(ARM::R4)
1961 .addImm(16)
1962 .add(predOps(ARMCC::AL));
1963 NextReg += 2;
1964 NumAlignedDPRCS2Regs -= 2;
1965 }
1966
1967 // Finally, use a vanilla vldr.64 for the remaining odd register.
1968 if (NumAlignedDPRCS2Regs)
1969 BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1970 .addReg(ARM::R4)
1971 .addImm(2 * (NextReg - R4BaseReg))
1972 .add(predOps(ARMCC::AL));
1973
1974 // Last store kills r4.
1975 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1976 }
1977
spillCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,ArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const1978 bool ARMFrameLowering::spillCalleeSavedRegisters(
1979 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1980 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
1981 if (CSI.empty())
1982 return false;
1983
1984 MachineFunction &MF = *MBB.getParent();
1985 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1986
1987 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1988 unsigned PushOneOpc = AFI->isThumbFunction() ?
1989 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1990 unsigned FltOpc = ARM::VSTMDDB_UPD;
1991 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1992 // Compute PAC in R12.
1993 if (AFI->shouldSignReturnAddress()) {
1994 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
1995 .setMIFlags(MachineInstr::FrameSetup);
1996 }
1997 // Save the non-secure floating point context.
1998 if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
1999 return C.getReg() == ARM::FPCXTNS;
2000 })) {
2001 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
2002 ARM::SP)
2003 .addReg(ARM::SP)
2004 .addImm(-4)
2005 .add(predOps(ARMCC::AL));
2006 }
2007 if (STI.splitFramePointerPush(MF)) {
2008 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
2009 &isSplitFPArea1Register, 0, MachineInstr::FrameSetup);
2010 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
2011 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
2012 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
2013 &isSplitFPArea2Register, 0, MachineInstr::FrameSetup);
2014 } else {
2015 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
2016 0, MachineInstr::FrameSetup);
2017 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
2018 0, MachineInstr::FrameSetup);
2019 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
2020 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
2021 }
2022
2023 // The code above does not insert spill code for the aligned DPRCS2 registers.
2024 // The stack realignment code will be inserted between the push instructions
2025 // and these spills.
2026 if (NumAlignedDPRCS2Regs)
2027 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2028
2029 return true;
2030 }
2031
restoreCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,MutableArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const2032 bool ARMFrameLowering::restoreCalleeSavedRegisters(
2033 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2034 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2035 if (CSI.empty())
2036 return false;
2037
2038 MachineFunction &MF = *MBB.getParent();
2039 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2040 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2041 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2042
2043 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2044 // registers. Do that here instead.
2045 if (NumAlignedDPRCS2Regs)
2046 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2047
2048 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2049 unsigned LdrOpc =
2050 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2051 unsigned FltOpc = ARM::VLDMDIA_UPD;
2052 if (STI.splitFramePointerPush(MF)) {
2053 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2054 &isSplitFPArea2Register, 0);
2055 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
2056 NumAlignedDPRCS2Regs);
2057 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2058 &isSplitFPArea1Register, 0);
2059 } else {
2060 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
2061 NumAlignedDPRCS2Regs);
2062 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2063 &isARMArea2Register, 0);
2064 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2065 &isARMArea1Register, 0);
2066 }
2067
2068 return true;
2069 }
2070
2071 // FIXME: Make generic?
EstimateFunctionSizeInBytes(const MachineFunction & MF,const ARMBaseInstrInfo & TII)2072 static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,
2073 const ARMBaseInstrInfo &TII) {
2074 unsigned FnSize = 0;
2075 for (auto &MBB : MF) {
2076 for (auto &MI : MBB)
2077 FnSize += TII.getInstSizeInBytes(MI);
2078 }
2079 if (MF.getJumpTableInfo())
2080 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2081 FnSize += Table.MBBs.size() * 4;
2082 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2083 return FnSize;
2084 }
2085
2086 /// estimateRSStackSizeLimit - Look at each instruction that references stack
2087 /// frames and return the stack size limit beyond which some of these
2088 /// instructions will require a scratch register during their expansion later.
2089 // FIXME: Move to TII?
estimateRSStackSizeLimit(MachineFunction & MF,const TargetFrameLowering * TFI,bool & HasNonSPFrameIndex)2090 static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
2091 const TargetFrameLowering *TFI,
2092 bool &HasNonSPFrameIndex) {
2093 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2094 const ARMBaseInstrInfo &TII =
2095 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2096 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2097 unsigned Limit = (1 << 12) - 1;
2098 for (auto &MBB : MF) {
2099 for (auto &MI : MBB) {
2100 if (MI.isDebugInstr())
2101 continue;
2102 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2103 if (!MI.getOperand(i).isFI())
2104 continue;
2105
2106 // When using ADDri to get the address of a stack object, 255 is the
2107 // largest offset guaranteed to fit in the immediate offset.
2108 if (MI.getOpcode() == ARM::ADDri) {
2109 Limit = std::min(Limit, (1U << 8) - 1);
2110 break;
2111 }
2112 // t2ADDri will not require an extra register, it can reuse the
2113 // destination.
2114 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2115 break;
2116
2117 const MCInstrDesc &MCID = MI.getDesc();
2118 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
2119 if (RegClass && !RegClass->contains(ARM::SP))
2120 HasNonSPFrameIndex = true;
2121
2122 // Otherwise check the addressing mode.
2123 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2124 case ARMII::AddrMode_i12:
2125 case ARMII::AddrMode2:
2126 // Default 12 bit limit.
2127 break;
2128 case ARMII::AddrMode3:
2129 case ARMII::AddrModeT2_i8neg:
2130 Limit = std::min(Limit, (1U << 8) - 1);
2131 break;
2132 case ARMII::AddrMode5FP16:
2133 Limit = std::min(Limit, ((1U << 8) - 1) * 2);
2134 break;
2135 case ARMII::AddrMode5:
2136 case ARMII::AddrModeT2_i8s4:
2137 case ARMII::AddrModeT2_ldrex:
2138 Limit = std::min(Limit, ((1U << 8) - 1) * 4);
2139 break;
2140 case ARMII::AddrModeT2_i12:
2141 // i12 supports only positive offset so these will be converted to
2142 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2143 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2144 Limit = std::min(Limit, (1U << 8) - 1);
2145 break;
2146 case ARMII::AddrMode4:
2147 case ARMII::AddrMode6:
2148 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2149 // immediate offset for stack references.
2150 return 0;
2151 case ARMII::AddrModeT2_i7:
2152 Limit = std::min(Limit, ((1U << 7) - 1) * 1);
2153 break;
2154 case ARMII::AddrModeT2_i7s2:
2155 Limit = std::min(Limit, ((1U << 7) - 1) * 2);
2156 break;
2157 case ARMII::AddrModeT2_i7s4:
2158 Limit = std::min(Limit, ((1U << 7) - 1) * 4);
2159 break;
2160 default:
2161 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2162 }
2163 break; // At most one FI per instruction
2164 }
2165 }
2166 }
2167
2168 return Limit;
2169 }
2170
2171 // In functions that realign the stack, it can be an advantage to spill the
2172 // callee-saved vector registers after realigning the stack. The vst1 and vld1
2173 // instructions take alignment hints that can improve performance.
2174 static void
checkNumAlignedDPRCS2Regs(MachineFunction & MF,BitVector & SavedRegs)2175 checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
2176 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2177 if (!SpillAlignedNEONRegs)
2178 return;
2179
2180 // Naked functions don't spill callee-saved registers.
2181 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
2182 return;
2183
2184 // We are planning to use NEON instructions vst1 / vld1.
2185 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2186 return;
2187
2188 // Don't bother if the default stack alignment is sufficiently high.
2189 if (MF.getSubtarget().getFrameLowering()->getStackAlign() >= Align(8))
2190 return;
2191
2192 // Aligned spills require stack realignment.
2193 if (!static_cast<const ARMBaseRegisterInfo *>(
2194 MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
2195 return;
2196
2197 // We always spill contiguous d-registers starting from d8. Count how many
2198 // needs spilling. The register allocator will almost always use the
2199 // callee-saved registers in order, but it can happen that there are holes in
2200 // the range. Registers above the hole will be spilled to the standard DPRCS
2201 // area.
2202 unsigned NumSpills = 0;
2203 for (; NumSpills < 8; ++NumSpills)
2204 if (!SavedRegs.test(ARM::D8 + NumSpills))
2205 break;
2206
2207 // Don't do this for just one d-register. It's not worth it.
2208 if (NumSpills < 2)
2209 return;
2210
2211 // Spill the first NumSpills D-registers after realigning the stack.
2212 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2213
2214 // A scratch register is required for the vst1 / vld1 instructions.
2215 SavedRegs.set(ARM::R4);
2216 }
2217
enableShrinkWrapping(const MachineFunction & MF) const2218 bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2219 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2220 // upon function entry (resp. restore it immmediately before return)
2221 if (STI.hasV8_1MMainlineOps() &&
2222 MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction())
2223 return false;
2224
2225 // We are disabling shrinkwrapping for now when PAC is enabled, as
2226 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2227 // generated. A follow-up patch will fix this in a more performant manner.
2228 if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(
2229 true /* SpillsLR */))
2230 return false;
2231
2232 return true;
2233 }
2234
requiresAAPCSFrameRecord(const MachineFunction & MF) const2235 bool ARMFrameLowering::requiresAAPCSFrameRecord(
2236 const MachineFunction &MF) const {
2237 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2238 return Subtarget.createAAPCSFrameChain() && hasFP(MF);
2239 }
2240
2241 // Thumb1 may require a spill when storing to a frame index through FP (or any
2242 // access with execute-only), for cases where FP is a high register (R11). This
2243 // scans the function for cases where this may happen.
canSpillOnFrameIndexAccess(const MachineFunction & MF,const TargetFrameLowering & TFI)2244 static bool canSpillOnFrameIndexAccess(const MachineFunction &MF,
2245 const TargetFrameLowering &TFI) {
2246 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2247 if (!AFI->isThumb1OnlyFunction())
2248 return false;
2249
2250 const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2251 for (const auto &MBB : MF)
2252 for (const auto &MI : MBB)
2253 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||
2254 STI.genExecuteOnly())
2255 for (const auto &Op : MI.operands())
2256 if (Op.isFI()) {
2257 Register Reg;
2258 TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
2259 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2260 return true;
2261 }
2262 return false;
2263 }
2264
determineCalleeSaves(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const2265 void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
2266 BitVector &SavedRegs,
2267 RegScavenger *RS) const {
2268 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2269 // This tells PEI to spill the FP as if it is any other callee-save register
2270 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2271 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2272 // to combine multiple loads / stores.
2273 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF));
2274 bool CS1Spilled = false;
2275 bool LRSpilled = false;
2276 unsigned NumGPRSpills = 0;
2277 unsigned NumFPRSpills = 0;
2278 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2279 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2280 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2281 MF.getSubtarget().getRegisterInfo());
2282 const ARMBaseInstrInfo &TII =
2283 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2284 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2285 MachineFrameInfo &MFI = MF.getFrameInfo();
2286 MachineRegisterInfo &MRI = MF.getRegInfo();
2287 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2288 (void)TRI; // Silence unused warning in non-assert builds.
2289 Register FramePtr = RegInfo->getFrameRegister(MF);
2290
2291 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2292 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2293 // since it's not always possible to restore sp from fp in a single
2294 // instruction.
2295 // FIXME: It will be better just to find spare register here.
2296 if (AFI->isThumb2Function() &&
2297 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2298 SavedRegs.set(ARM::R4);
2299
2300 // If a stack probe will be emitted, spill R4 and LR, since they are
2301 // clobbered by the stack probe call.
2302 // This estimate should be a safe, conservative estimate. The actual
2303 // stack probe is enabled based on the size of the local objects;
2304 // this estimate also includes the varargs store size.
2305 if (STI.isTargetWindows() &&
2306 WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
2307 SavedRegs.set(ARM::R4);
2308 SavedRegs.set(ARM::LR);
2309 }
2310
2311 if (AFI->isThumb1OnlyFunction()) {
2312 // Spill LR if Thumb1 function uses variable length argument lists.
2313 if (AFI->getArgRegsSaveSize() > 0)
2314 SavedRegs.set(ARM::LR);
2315
2316 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2317 // requires stack alignment. We don't know for sure what the stack size
2318 // will be, but for this, an estimate is good enough. If there anything
2319 // changes it, it'll be a spill, which implies we've used all the registers
2320 // and so R4 is already used, so not marking it here will be OK.
2321 // FIXME: It will be better just to find spare register here.
2322 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2323 MFI.estimateStackSize(MF) > 508)
2324 SavedRegs.set(ARM::R4);
2325 }
2326
2327 // See if we can spill vector registers to aligned stack.
2328 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2329
2330 // Spill the BasePtr if it's used.
2331 if (RegInfo->hasBasePointer(MF))
2332 SavedRegs.set(RegInfo->getBaseRegister());
2333
2334 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2335 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2336 CanEliminateFrame = false;
2337
2338 // When return address signing is enabled R12 is treated as callee-saved.
2339 if (AFI->shouldSignReturnAddress())
2340 CanEliminateFrame = false;
2341
2342 // Don't spill FP if the frame can be eliminated. This is determined
2343 // by scanning the callee-save registers to see if any is modified.
2344 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2345 for (unsigned i = 0; CSRegs[i]; ++i) {
2346 unsigned Reg = CSRegs[i];
2347 bool Spilled = false;
2348 if (SavedRegs.test(Reg)) {
2349 Spilled = true;
2350 CanEliminateFrame = false;
2351 }
2352
2353 if (!ARM::GPRRegClass.contains(Reg)) {
2354 if (Spilled) {
2355 if (ARM::SPRRegClass.contains(Reg))
2356 NumFPRSpills++;
2357 else if (ARM::DPRRegClass.contains(Reg))
2358 NumFPRSpills += 2;
2359 else if (ARM::QPRRegClass.contains(Reg))
2360 NumFPRSpills += 4;
2361 }
2362 continue;
2363 }
2364
2365 if (Spilled) {
2366 NumGPRSpills++;
2367
2368 if (!STI.splitFramePushPop(MF)) {
2369 if (Reg == ARM::LR)
2370 LRSpilled = true;
2371 CS1Spilled = true;
2372 continue;
2373 }
2374
2375 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2376 switch (Reg) {
2377 case ARM::LR:
2378 LRSpilled = true;
2379 [[fallthrough]];
2380 case ARM::R0: case ARM::R1:
2381 case ARM::R2: case ARM::R3:
2382 case ARM::R4: case ARM::R5:
2383 case ARM::R6: case ARM::R7:
2384 CS1Spilled = true;
2385 break;
2386 default:
2387 break;
2388 }
2389 } else {
2390 if (!STI.splitFramePushPop(MF)) {
2391 UnspilledCS1GPRs.push_back(Reg);
2392 continue;
2393 }
2394
2395 switch (Reg) {
2396 case ARM::R0: case ARM::R1:
2397 case ARM::R2: case ARM::R3:
2398 case ARM::R4: case ARM::R5:
2399 case ARM::R6: case ARM::R7:
2400 case ARM::LR:
2401 UnspilledCS1GPRs.push_back(Reg);
2402 break;
2403 default:
2404 UnspilledCS2GPRs.push_back(Reg);
2405 break;
2406 }
2407 }
2408 }
2409
2410 bool ForceLRSpill = false;
2411 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2412 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2413 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2414 // use of BL to implement far jump.
2415 if (FnSize >= (1 << 11)) {
2416 CanEliminateFrame = false;
2417 ForceLRSpill = true;
2418 }
2419 }
2420
2421 // If any of the stack slot references may be out of range of an immediate
2422 // offset, make sure a register (or a spill slot) is available for the
2423 // register scavenger. Note that if we're indexing off the frame pointer, the
2424 // effective stack size is 4 bytes larger since the FP points to the stack
2425 // slot of the previous FP. Also, if we have variable sized objects in the
2426 // function, stack slot references will often be negative, and some of
2427 // our instructions are positive-offset only, so conservatively consider
2428 // that case to want a spill slot (or register) as well. Similarly, if
2429 // the function adjusts the stack pointer during execution and the
2430 // adjustments aren't already part of our stack size estimate, our offset
2431 // calculations may be off, so be conservative.
2432 // FIXME: We could add logic to be more precise about negative offsets
2433 // and which instructions will need a scratch register for them. Is it
2434 // worth the effort and added fragility?
2435 unsigned EstimatedStackSize =
2436 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2437
2438 // Determine biggest (positive) SP offset in MachineFrameInfo.
2439 int MaxFixedOffset = 0;
2440 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2441 int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
2442 MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
2443 }
2444
2445 bool HasFP = hasFP(MF);
2446 if (HasFP) {
2447 if (AFI->hasStackFrame())
2448 EstimatedStackSize += 4;
2449 } else {
2450 // If FP is not used, SP will be used to access arguments, so count the
2451 // size of arguments into the estimation.
2452 EstimatedStackSize += MaxFixedOffset;
2453 }
2454 EstimatedStackSize += 16; // For possible paddings.
2455
2456 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2457 bool HasNonSPFrameIndex = false;
2458 if (AFI->isThumb1OnlyFunction()) {
2459 // For Thumb1, don't bother to iterate over the function. The only
2460 // instruction that requires an emergency spill slot is a store to a
2461 // frame index.
2462 //
2463 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2464 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2465 // a 5-bit unsigned immediate.
2466 //
2467 // We could try to check if the function actually contains a tSTRspi
2468 // that might need the spill slot, but it's not really important.
2469 // Functions with VLAs or extremely large call frames are rare, and
2470 // if a function is allocating more than 1KB of stack, an extra 4-byte
2471 // slot probably isn't relevant.
2472 //
2473 // A special case is the scenario where r11 is used as FP, where accesses
2474 // to a frame index will require its value to be moved into a low reg.
2475 // This is handled later on, once we are able to determine if we have any
2476 // fp-relative accesses.
2477 if (RegInfo->hasBasePointer(MF))
2478 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2479 else
2480 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2481 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2482 } else {
2483 EstimatedRSStackSizeLimit =
2484 estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
2485 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2486 }
2487 // Final estimate of whether sp or bp-relative accesses might require
2488 // scavenging.
2489 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2490
2491 // If the stack pointer moves and we don't have a base pointer, the
2492 // estimate logic doesn't work. The actual offsets might be larger when
2493 // we're constructing a call frame, or we might need to use negative
2494 // offsets from fp.
2495 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2496 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2497 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2498
2499 // If we have a frame pointer, we assume arguments will be accessed
2500 // relative to the frame pointer. Check whether fp-relative accesses to
2501 // arguments require scavenging.
2502 //
2503 // We could do slightly better on Thumb1; in some cases, an sp-relative
2504 // offset would be legal even though an fp-relative offset is not.
2505 int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
2506 bool HasLargeArgumentList =
2507 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2508
2509 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2510 HasLargeArgumentList || HasNonSPFrameIndex;
2511 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2512 << "; EstimatedStack: " << EstimatedStackSize
2513 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2514 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2515 if (BigFrameOffsets ||
2516 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2517 AFI->setHasStackFrame(true);
2518
2519 if (HasFP) {
2520 SavedRegs.set(FramePtr);
2521 // If the frame pointer is required by the ABI, also spill LR so that we
2522 // emit a complete frame record.
2523 if ((requiresAAPCSFrameRecord(MF) ||
2524 MF.getTarget().Options.DisableFramePointerElim(MF)) &&
2525 !LRSpilled) {
2526 SavedRegs.set(ARM::LR);
2527 LRSpilled = true;
2528 NumGPRSpills++;
2529 auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
2530 if (LRPos != UnspilledCS1GPRs.end())
2531 UnspilledCS1GPRs.erase(LRPos);
2532 }
2533 auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
2534 if (FPPos != UnspilledCS1GPRs.end())
2535 UnspilledCS1GPRs.erase(FPPos);
2536 NumGPRSpills++;
2537 if (FramePtr == ARM::R7)
2538 CS1Spilled = true;
2539 }
2540
2541 // This is the number of extra spills inserted for callee-save GPRs which
2542 // would not otherwise be used by the function. When greater than zero it
2543 // guaranteees that it is possible to scavenge a register to hold the
2544 // address of a stack slot. On Thumb1, the register must be a valid operand
2545 // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2546 // or lr.
2547 //
2548 // If we don't insert a spill, we instead allocate an emergency spill
2549 // slot, which can be used by scavenging to spill an arbitrary register.
2550 //
2551 // We currently don't try to figure out whether any specific instruction
2552 // requires scavening an additional register.
2553 unsigned NumExtraCSSpill = 0;
2554
2555 if (AFI->isThumb1OnlyFunction()) {
2556 // For Thumb1-only targets, we need some low registers when we save and
2557 // restore the high registers (which aren't allocatable, but could be
2558 // used by inline assembly) because the push/pop instructions can not
2559 // access high registers. If necessary, we might need to push more low
2560 // registers to ensure that there is at least one free that can be used
2561 // for the saving & restoring, and preferably we should ensure that as
2562 // many as are needed are available so that fewer push/pop instructions
2563 // are required.
2564
2565 // Low registers which are not currently pushed, but could be (r4-r7).
2566 SmallVector<unsigned, 4> AvailableRegs;
2567
2568 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2569 // free.
2570 int EntryRegDeficit = 0;
2571 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2572 if (!MF.getRegInfo().isLiveIn(Reg)) {
2573 --EntryRegDeficit;
2574 LLVM_DEBUG(dbgs()
2575 << printReg(Reg, TRI)
2576 << " is unused argument register, EntryRegDeficit = "
2577 << EntryRegDeficit << "\n");
2578 }
2579 }
2580
2581 // Unused return registers can be clobbered in the epilogue for free.
2582 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2583 LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
2584 << " return regs used, ExitRegDeficit = "
2585 << ExitRegDeficit << "\n");
2586
2587 int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
2588 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2589
2590 // r4-r6 can be used in the prologue if they are pushed by the first push
2591 // instruction.
2592 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2593 if (SavedRegs.test(Reg)) {
2594 --RegDeficit;
2595 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2596 << " is saved low register, RegDeficit = "
2597 << RegDeficit << "\n");
2598 } else {
2599 AvailableRegs.push_back(Reg);
2600 LLVM_DEBUG(
2601 dbgs()
2602 << printReg(Reg, TRI)
2603 << " is non-saved low register, adding to AvailableRegs\n");
2604 }
2605 }
2606
2607 // r7 can be used if it is not being used as the frame pointer.
2608 if (!HasFP || FramePtr != ARM::R7) {
2609 if (SavedRegs.test(ARM::R7)) {
2610 --RegDeficit;
2611 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2612 << RegDeficit << "\n");
2613 } else {
2614 AvailableRegs.push_back(ARM::R7);
2615 LLVM_DEBUG(
2616 dbgs()
2617 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2618 }
2619 }
2620
2621 // Each of r8-r11 needs to be copied to a low register, then pushed.
2622 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2623 if (SavedRegs.test(Reg)) {
2624 ++RegDeficit;
2625 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2626 << " is saved high register, RegDeficit = "
2627 << RegDeficit << "\n");
2628 }
2629 }
2630
2631 // LR can only be used by PUSH, not POP, and can't be used at all if the
2632 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2633 // are more limited at function entry than exit.
2634 if ((EntryRegDeficit > ExitRegDeficit) &&
2635 !(MF.getRegInfo().isLiveIn(ARM::LR) &&
2636 MF.getFrameInfo().isReturnAddressTaken())) {
2637 if (SavedRegs.test(ARM::LR)) {
2638 --RegDeficit;
2639 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2640 << RegDeficit << "\n");
2641 } else {
2642 AvailableRegs.push_back(ARM::LR);
2643 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2644 }
2645 }
2646
2647 // If there are more high registers that need pushing than low registers
2648 // available, push some more low registers so that we can use fewer push
2649 // instructions. This might not reduce RegDeficit all the way to zero,
2650 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2651 // need saving.
2652 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2653 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2654 unsigned Reg = AvailableRegs.pop_back_val();
2655 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2656 << " to make up reg deficit\n");
2657 SavedRegs.set(Reg);
2658 NumGPRSpills++;
2659 CS1Spilled = true;
2660 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2661 if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2662 NumExtraCSSpill++;
2663 UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
2664 if (Reg == ARM::LR)
2665 LRSpilled = true;
2666 }
2667 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2668 << "\n");
2669 }
2670
2671 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2672 // restore LR in that case.
2673 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2674
2675 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2676 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2677 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2678 SavedRegs.set(ARM::LR);
2679 NumGPRSpills++;
2680 SmallVectorImpl<unsigned>::iterator LRPos;
2681 LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2682 if (LRPos != UnspilledCS1GPRs.end())
2683 UnspilledCS1GPRs.erase(LRPos);
2684
2685 ForceLRSpill = false;
2686 if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2687 !AFI->isThumb1OnlyFunction())
2688 NumExtraCSSpill++;
2689 }
2690
2691 // If stack and double are 8-byte aligned and we are spilling an odd number
2692 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2693 // the integer and double callee save areas.
2694 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2695 const Align TargetAlign = getStackAlign();
2696 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2697 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2698 for (unsigned Reg : UnspilledCS1GPRs) {
2699 // Don't spill high register if the function is thumb. In the case of
2700 // Windows on ARM, accept R11 (frame pointer)
2701 if (!AFI->isThumbFunction() ||
2702 (STI.isTargetWindows() && Reg == ARM::R11) ||
2703 isARMLowRegister(Reg) ||
2704 (Reg == ARM::LR && !ExpensiveLRRestore)) {
2705 SavedRegs.set(Reg);
2706 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2707 << " to make up alignment\n");
2708 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2709 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2710 NumExtraCSSpill++;
2711 break;
2712 }
2713 }
2714 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2715 unsigned Reg = UnspilledCS2GPRs.front();
2716 SavedRegs.set(Reg);
2717 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2718 << " to make up alignment\n");
2719 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
2720 NumExtraCSSpill++;
2721 }
2722 }
2723
2724 // Estimate if we might need to scavenge registers at some point in order
2725 // to materialize a stack offset. If so, either spill one additional
2726 // callee-saved register or reserve a special spill slot to facilitate
2727 // register scavenging. Thumb1 needs a spill slot for stack pointer
2728 // adjustments and for frame index accesses when FP is high register,
2729 // even when the frame itself is small.
2730 unsigned RegsNeeded = 0;
2731 if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this)) {
2732 RegsNeeded++;
2733 // With thumb1 execute-only we may need an additional register for saving
2734 // and restoring the CPSR.
2735 if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
2736 RegsNeeded++;
2737 }
2738
2739 if (RegsNeeded > NumExtraCSSpill) {
2740 // If any non-reserved CS register isn't spilled, just spill one or two
2741 // extra. That should take care of it!
2742 unsigned NumExtras = TargetAlign.value() / 4;
2743 SmallVector<unsigned, 2> Extras;
2744 while (NumExtras && !UnspilledCS1GPRs.empty()) {
2745 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
2746 if (!MRI.isReserved(Reg) &&
2747 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
2748 Extras.push_back(Reg);
2749 NumExtras--;
2750 }
2751 }
2752 // For non-Thumb1 functions, also check for hi-reg CS registers
2753 if (!AFI->isThumb1OnlyFunction()) {
2754 while (NumExtras && !UnspilledCS2GPRs.empty()) {
2755 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
2756 if (!MRI.isReserved(Reg)) {
2757 Extras.push_back(Reg);
2758 NumExtras--;
2759 }
2760 }
2761 }
2762 if (NumExtras == 0) {
2763 for (unsigned Reg : Extras) {
2764 SavedRegs.set(Reg);
2765 if (!MRI.isPhysRegUsed(Reg))
2766 NumExtraCSSpill++;
2767 }
2768 }
2769 while ((RegsNeeded > NumExtraCSSpill) && RS) {
2770 // Reserve a slot closest to SP or frame pointer.
2771 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2772 const TargetRegisterClass &RC = ARM::GPRRegClass;
2773 unsigned Size = TRI->getSpillSize(RC);
2774 Align Alignment = TRI->getSpillAlign(RC);
2775 RS->addScavengingFrameIndex(
2776 MFI.CreateStackObject(Size, Alignment, false));
2777 --RegsNeeded;
2778 }
2779 }
2780 }
2781
2782 if (ForceLRSpill)
2783 SavedRegs.set(ARM::LR);
2784 AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
2785 }
2786
updateLRRestored(MachineFunction & MF)2787 void ARMFrameLowering::updateLRRestored(MachineFunction &MF) {
2788 MachineFrameInfo &MFI = MF.getFrameInfo();
2789 if (!MFI.isCalleeSavedInfoValid())
2790 return;
2791
2792 // Check if all terminators do not implicitly use LR. Then we can 'restore' LR
2793 // into PC so it is not live out of the return block: Clear the Restored bit
2794 // in that case.
2795 for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
2796 if (Info.getReg() != ARM::LR)
2797 continue;
2798 if (all_of(MF, [](const MachineBasicBlock &MBB) {
2799 return all_of(MBB.terminators(), [](const MachineInstr &Term) {
2800 return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||
2801 Term.getOpcode() == ARM::t2LDMIA_RET ||
2802 Term.getOpcode() == ARM::tPOP_RET;
2803 });
2804 })) {
2805 Info.setRestored(false);
2806 break;
2807 }
2808 }
2809 }
2810
processFunctionBeforeFrameFinalized(MachineFunction & MF,RegScavenger * RS) const2811 void ARMFrameLowering::processFunctionBeforeFrameFinalized(
2812 MachineFunction &MF, RegScavenger *RS) const {
2813 TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS);
2814 updateLRRestored(MF);
2815 }
2816
getCalleeSaves(const MachineFunction & MF,BitVector & SavedRegs) const2817 void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
2818 BitVector &SavedRegs) const {
2819 TargetFrameLowering::getCalleeSaves(MF, SavedRegs);
2820
2821 // If we have the "returned" parameter attribute which guarantees that we
2822 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
2823 // record that fact for IPRA.
2824 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2825 if (AFI->getPreservesR0())
2826 SavedRegs.set(ARM::R0);
2827 }
2828
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const2829 bool ARMFrameLowering::assignCalleeSavedSpillSlots(
2830 MachineFunction &MF, const TargetRegisterInfo *TRI,
2831 std::vector<CalleeSavedInfo> &CSI) const {
2832 // For CMSE entry functions, handle floating-point context as if it was a
2833 // callee-saved register.
2834 if (STI.hasV8_1MMainlineOps() &&
2835 MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction()) {
2836 CSI.emplace_back(ARM::FPCXTNS);
2837 CSI.back().setRestored(false);
2838 }
2839
2840 // For functions, which sign their return address, upon function entry, the
2841 // return address PAC is computed in R12. Treat R12 as a callee-saved register
2842 // in this case.
2843 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
2844 if (AFI.shouldSignReturnAddress()) {
2845 // The order of register must match the order we push them, because the
2846 // PEI assigns frame indices in that order. When compiling for return
2847 // address sign and authenication, we use split push, therefore the orders
2848 // we want are:
2849 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2850 CSI.insert(find_if(CSI,
2851 [=](const auto &CS) {
2852 Register Reg = CS.getReg();
2853 return Reg == ARM::R10 || Reg == ARM::R11 ||
2854 Reg == ARM::R8 || Reg == ARM::R9 ||
2855 ARM::DPRRegClass.contains(Reg);
2856 }),
2857 CalleeSavedInfo(ARM::R12));
2858 }
2859
2860 return false;
2861 }
2862
2863 const TargetFrameLowering::SpillSlot *
getCalleeSavedSpillSlots(unsigned & NumEntries) const2864 ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
2865 static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
2866 NumEntries = std::size(FixedSpillOffsets);
2867 return FixedSpillOffsets;
2868 }
2869
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const2870 MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2871 MachineFunction &MF, MachineBasicBlock &MBB,
2872 MachineBasicBlock::iterator I) const {
2873 const ARMBaseInstrInfo &TII =
2874 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2875 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2876 bool isARM = !AFI->isThumbFunction();
2877 DebugLoc dl = I->getDebugLoc();
2878 unsigned Opc = I->getOpcode();
2879 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
2880 unsigned CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2881
2882 assert(!AFI->isThumb1OnlyFunction() &&
2883 "This eliminateCallFramePseudoInstr does not support Thumb1!");
2884
2885 int PIdx = I->findFirstPredOperandIdx();
2886 ARMCC::CondCodes Pred = (PIdx == -1)
2887 ? ARMCC::AL
2888 : (ARMCC::CondCodes)I->getOperand(PIdx).getImm();
2889 unsigned PredReg = TII.getFramePred(*I);
2890
2891 if (!hasReservedCallFrame(MF)) {
2892 // Bail early if the callee is expected to do the adjustment.
2893 if (IsDestroy && CalleePopAmount != -1U)
2894 return MBB.erase(I);
2895
2896 // If we have alloca, convert as follows:
2897 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2898 // ADJCALLSTACKUP -> add, sp, sp, amount
2899 unsigned Amount = TII.getFrameSize(*I);
2900 if (Amount != 0) {
2901 // We need to keep the stack aligned properly. To do this, we round the
2902 // amount of space needed for the outgoing arguments up to the next
2903 // alignment boundary.
2904 Amount = alignSPAdjust(Amount);
2905
2906 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2907 emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2908 Pred, PredReg);
2909 } else {
2910 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2911 emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2912 Pred, PredReg);
2913 }
2914 }
2915 } else if (CalleePopAmount != -1U) {
2916 // If the calling convention demands that the callee pops arguments from the
2917 // stack, we want to add it back if we have a reserved call frame.
2918 emitSPUpdate(isARM, MBB, I, dl, TII, -CalleePopAmount,
2919 MachineInstr::NoFlags, Pred, PredReg);
2920 }
2921 return MBB.erase(I);
2922 }
2923
2924 /// Get the minimum constant for ARM that is greater than or equal to the
2925 /// argument. In ARM, constants can have any value that can be produced by
2926 /// rotating an 8-bit value to the right by an even number of bits within a
2927 /// 32-bit word.
alignToARMConstant(uint32_t Value)2928 static uint32_t alignToARMConstant(uint32_t Value) {
2929 unsigned Shifted = 0;
2930
2931 if (Value == 0)
2932 return 0;
2933
2934 while (!(Value & 0xC0000000)) {
2935 Value = Value << 2;
2936 Shifted += 2;
2937 }
2938
2939 bool Carry = (Value & 0x00FFFFFF);
2940 Value = ((Value & 0xFF000000) >> 24) + Carry;
2941
2942 if (Value & 0x0000100)
2943 Value = Value & 0x000001FC;
2944
2945 if (Shifted > 24)
2946 Value = Value >> (Shifted - 24);
2947 else
2948 Value = Value << (24 - Shifted);
2949
2950 return Value;
2951 }
2952
2953 // The stack limit in the TCB is set to this many bytes above the actual
2954 // stack limit.
2955 static const uint64_t kSplitStackAvailable = 256;
2956
2957 // Adjust the function prologue to enable split stacks. This currently only
2958 // supports android and linux.
2959 //
2960 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2961 // must be well defined in order to allow for consistent implementations of the
2962 // __morestack helper function. The ABI is also not a normal ABI in that it
2963 // doesn't follow the normal calling conventions because this allows the
2964 // prologue of each function to be optimized further.
2965 //
2966 // Currently, the ABI looks like (when calling __morestack)
2967 //
2968 // * r4 holds the minimum stack size requested for this function call
2969 // * r5 holds the stack size of the arguments to the function
2970 // * the beginning of the function is 3 instructions after the call to
2971 // __morestack
2972 //
2973 // Implementations of __morestack should use r4 to allocate a new stack, r5 to
2974 // place the arguments on to the new stack, and the 3-instruction knowledge to
2975 // jump directly to the body of the function when working on the new stack.
2976 //
2977 // An old (and possibly no longer compatible) implementation of __morestack for
2978 // ARM can be found at [1].
2979 //
2980 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
adjustForSegmentedStacks(MachineFunction & MF,MachineBasicBlock & PrologueMBB) const2981 void ARMFrameLowering::adjustForSegmentedStacks(
2982 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2983 unsigned Opcode;
2984 unsigned CFIIndex;
2985 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2986 bool Thumb = ST->isThumb();
2987 bool Thumb2 = ST->isThumb2();
2988
2989 // Sadly, this currently doesn't support varargs, platforms other than
2990 // android/linux. Note that thumb1/thumb2 are support for android/linux.
2991 if (MF.getFunction().isVarArg())
2992 report_fatal_error("Segmented stacks do not support vararg functions.");
2993 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2994 report_fatal_error("Segmented stacks not supported on this platform.");
2995
2996 MachineFrameInfo &MFI = MF.getFrameInfo();
2997 MCContext &Context = MF.getContext();
2998 const MCRegisterInfo *MRI = Context.getRegisterInfo();
2999 const ARMBaseInstrInfo &TII =
3000 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3001 ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
3002 DebugLoc DL;
3003
3004 if (!MFI.needsSplitStackProlog())
3005 return;
3006
3007 uint64_t StackSize = MFI.getStackSize();
3008
3009 // Use R4 and R5 as scratch registers.
3010 // We save R4 and R5 before use and restore them before leaving the function.
3011 unsigned ScratchReg0 = ARM::R4;
3012 unsigned ScratchReg1 = ARM::R5;
3013 unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3014 uint64_t AlignedStackSize;
3015
3016 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3017 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3018 MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
3019 MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
3020 MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
3021
3022 // Grab everything that reaches PrologueMBB to update there liveness as well.
3023 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
3024 SmallVector<MachineBasicBlock *, 2> WalkList;
3025 WalkList.push_back(&PrologueMBB);
3026
3027 do {
3028 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3029 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3030 if (BeforePrologueRegion.insert(PredBB).second)
3031 WalkList.push_back(PredBB);
3032 }
3033 } while (!WalkList.empty());
3034
3035 // The order in that list is important.
3036 // The blocks will all be inserted before PrologueMBB using that order.
3037 // Therefore the block that should appear first in the CFG should appear
3038 // first in the list.
3039 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3040 PostStackMBB};
3041
3042 for (MachineBasicBlock *B : AddedBlocks)
3043 BeforePrologueRegion.insert(B);
3044
3045 for (const auto &LI : PrologueMBB.liveins()) {
3046 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3047 PredBB->addLiveIn(LI);
3048 }
3049
3050 // Remove the newly added blocks from the list, since we know
3051 // we do not have to do the following updates for them.
3052 for (MachineBasicBlock *B : AddedBlocks) {
3053 BeforePrologueRegion.erase(B);
3054 MF.insert(PrologueMBB.getIterator(), B);
3055 }
3056
3057 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3058 // Make sure the LiveIns are still sorted and unique.
3059 MBB->sortUniqueLiveIns();
3060 // Replace the edges to PrologueMBB by edges to the sequences
3061 // we are about to add, but only update for immediate predecessors.
3062 if (MBB->isSuccessor(&PrologueMBB))
3063 MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
3064 }
3065
3066 // The required stack size that is aligned to ARM constant criterion.
3067 AlignedStackSize = alignToARMConstant(StackSize);
3068
3069 // When the frame size is less than 256 we just compare the stack
3070 // boundary directly to the value of the stack pointer, per gcc.
3071 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3072
3073 // We will use two of the callee save registers as scratch registers so we
3074 // need to save those registers onto the stack.
3075 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3076 // requested and arguments for __morestack().
3077 // SR0: Scratch Register #0
3078 // SR1: Scratch Register #1
3079 // push {SR0, SR1}
3080 if (Thumb) {
3081 BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
3082 .add(predOps(ARMCC::AL))
3083 .addReg(ScratchReg0)
3084 .addReg(ScratchReg1);
3085 } else {
3086 BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
3087 .addReg(ARM::SP, RegState::Define)
3088 .addReg(ARM::SP)
3089 .add(predOps(ARMCC::AL))
3090 .addReg(ScratchReg0)
3091 .addReg(ScratchReg1);
3092 }
3093
3094 // Emit the relevant DWARF information about the change in stack pointer as
3095 // well as where to find both r4 and r5 (the callee-save registers)
3096 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3097 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
3098 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3099 .addCFIIndex(CFIIndex);
3100 CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
3101 nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
3102 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3103 .addCFIIndex(CFIIndex);
3104 CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
3105 nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
3106 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3107 .addCFIIndex(CFIIndex);
3108 }
3109
3110 // mov SR1, sp
3111 if (Thumb) {
3112 BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
3113 .addReg(ARM::SP)
3114 .add(predOps(ARMCC::AL));
3115 } else if (CompareStackPointer) {
3116 BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
3117 .addReg(ARM::SP)
3118 .add(predOps(ARMCC::AL))
3119 .add(condCodeOp());
3120 }
3121
3122 // sub SR1, sp, #StackSize
3123 if (!CompareStackPointer && Thumb) {
3124 if (AlignedStackSize < 256) {
3125 BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
3126 .add(condCodeOp())
3127 .addReg(ScratchReg1)
3128 .addImm(AlignedStackSize)
3129 .add(predOps(ARMCC::AL));
3130 } else {
3131 if (Thumb2 || ST->genExecuteOnly()) {
3132 BuildMI(McrMBB, DL, TII.get(MovOp), ScratchReg0)
3133 .addImm(AlignedStackSize);
3134 } else {
3135 auto MBBI = McrMBB->end();
3136 auto RegInfo = STI.getRegisterInfo();
3137 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3138 AlignedStackSize);
3139 }
3140 BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
3141 .add(condCodeOp())
3142 .addReg(ScratchReg1)
3143 .addReg(ScratchReg0)
3144 .add(predOps(ARMCC::AL));
3145 }
3146 } else if (!CompareStackPointer) {
3147 if (AlignedStackSize < 256) {
3148 BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
3149 .addReg(ARM::SP)
3150 .addImm(AlignedStackSize)
3151 .add(predOps(ARMCC::AL))
3152 .add(condCodeOp());
3153 } else {
3154 auto MBBI = McrMBB->end();
3155 auto RegInfo = STI.getRegisterInfo();
3156 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3157 AlignedStackSize);
3158 BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
3159 .addReg(ARM::SP)
3160 .addReg(ScratchReg0)
3161 .add(predOps(ARMCC::AL))
3162 .add(condCodeOp());
3163 }
3164 }
3165
3166 if (Thumb && ST->isThumb1Only()) {
3167 if (ST->genExecuteOnly()) {
3168 BuildMI(GetMBB, DL, TII.get(MovOp), ScratchReg0)
3169 .addExternalSymbol("__STACK_LIMIT");
3170 } else {
3171 unsigned PCLabelId = ARMFI->createPICLabelUId();
3172 ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
3173 MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
3174 MachineConstantPool *MCP = MF.getConstantPool();
3175 unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
3176
3177 // ldr SR0, [pc, offset(STACK_LIMIT)]
3178 BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
3179 .addConstantPoolIndex(CPI)
3180 .add(predOps(ARMCC::AL));
3181 }
3182
3183 // ldr SR0, [SR0]
3184 BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
3185 .addReg(ScratchReg0)
3186 .addImm(0)
3187 .add(predOps(ARMCC::AL));
3188 } else {
3189 // Get TLS base address from the coprocessor
3190 // mrc p15, #0, SR0, c13, c0, #3
3191 BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
3192 ScratchReg0)
3193 .addImm(15)
3194 .addImm(0)
3195 .addImm(13)
3196 .addImm(0)
3197 .addImm(3)
3198 .add(predOps(ARMCC::AL));
3199
3200 // Use the last tls slot on android and a private field of the TCP on linux.
3201 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3202 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3203
3204 // Get the stack limit from the right offset
3205 // ldr SR0, [sr0, #4 * TlsOffset]
3206 BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3207 ScratchReg0)
3208 .addReg(ScratchReg0)
3209 .addImm(4 * TlsOffset)
3210 .add(predOps(ARMCC::AL));
3211 }
3212
3213 // Compare stack limit with stack size requested.
3214 // cmp SR0, SR1
3215 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3216 BuildMI(GetMBB, DL, TII.get(Opcode))
3217 .addReg(ScratchReg0)
3218 .addReg(ScratchReg1)
3219 .add(predOps(ARMCC::AL));
3220
3221 // This jump is taken if StackLimit <= SP - stack required.
3222 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3223 BuildMI(GetMBB, DL, TII.get(Opcode))
3224 .addMBB(PostStackMBB)
3225 .addImm(ARMCC::LS)
3226 .addReg(ARM::CPSR);
3227
3228 // Calling __morestack(StackSize, Size of stack arguments).
3229 // __morestack knows that the stack size requested is in SR0(r4)
3230 // and amount size of stack arguments is in SR1(r5).
3231
3232 // Pass first argument for the __morestack by Scratch Register #0.
3233 // The amount size of stack required
3234 if (Thumb) {
3235 if (AlignedStackSize < 256) {
3236 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
3237 .add(condCodeOp())
3238 .addImm(AlignedStackSize)
3239 .add(predOps(ARMCC::AL));
3240 } else {
3241 if (Thumb2 || ST->genExecuteOnly()) {
3242 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg0)
3243 .addImm(AlignedStackSize);
3244 } else {
3245 auto MBBI = AllocMBB->end();
3246 auto RegInfo = STI.getRegisterInfo();
3247 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3248 AlignedStackSize);
3249 }
3250 }
3251 } else {
3252 if (AlignedStackSize < 256) {
3253 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
3254 .addImm(AlignedStackSize)
3255 .add(predOps(ARMCC::AL))
3256 .add(condCodeOp());
3257 } else {
3258 auto MBBI = AllocMBB->end();
3259 auto RegInfo = STI.getRegisterInfo();
3260 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3261 AlignedStackSize);
3262 }
3263 }
3264
3265 // Pass second argument for the __morestack by Scratch Register #1.
3266 // The amount size of stack consumed to save function arguments.
3267 if (Thumb) {
3268 if (ARMFI->getArgumentStackSize() < 256) {
3269 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
3270 .add(condCodeOp())
3271 .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
3272 .add(predOps(ARMCC::AL));
3273 } else {
3274 if (Thumb2 || ST->genExecuteOnly()) {
3275 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg1)
3276 .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()));
3277 } else {
3278 auto MBBI = AllocMBB->end();
3279 auto RegInfo = STI.getRegisterInfo();
3280 RegInfo->emitLoadConstPool(
3281 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3282 alignToARMConstant(ARMFI->getArgumentStackSize()));
3283 }
3284 }
3285 } else {
3286 if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {
3287 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
3288 .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
3289 .add(predOps(ARMCC::AL))
3290 .add(condCodeOp());
3291 } else {
3292 auto MBBI = AllocMBB->end();
3293 auto RegInfo = STI.getRegisterInfo();
3294 RegInfo->emitLoadConstPool(
3295 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3296 alignToARMConstant(ARMFI->getArgumentStackSize()));
3297 }
3298 }
3299
3300 // push {lr} - Save return address of this function.
3301 if (Thumb) {
3302 BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
3303 .add(predOps(ARMCC::AL))
3304 .addReg(ARM::LR);
3305 } else {
3306 BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
3307 .addReg(ARM::SP, RegState::Define)
3308 .addReg(ARM::SP)
3309 .add(predOps(ARMCC::AL))
3310 .addReg(ARM::LR);
3311 }
3312
3313 // Emit the DWARF info about the change in stack as well as where to find the
3314 // previous link register
3315 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3316 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
3317 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3318 .addCFIIndex(CFIIndex);
3319 CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
3320 nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
3321 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3322 .addCFIIndex(CFIIndex);
3323 }
3324
3325 // Call __morestack().
3326 if (Thumb) {
3327 BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
3328 .add(predOps(ARMCC::AL))
3329 .addExternalSymbol("__morestack");
3330 } else {
3331 BuildMI(AllocMBB, DL, TII.get(ARM::BL))
3332 .addExternalSymbol("__morestack");
3333 }
3334
3335 // pop {lr} - Restore return address of this original function.
3336 if (Thumb) {
3337 if (ST->isThumb1Only()) {
3338 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3339 .add(predOps(ARMCC::AL))
3340 .addReg(ScratchReg0);
3341 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
3342 .addReg(ScratchReg0)
3343 .add(predOps(ARMCC::AL));
3344 } else {
3345 BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
3346 .addReg(ARM::LR, RegState::Define)
3347 .addReg(ARM::SP, RegState::Define)
3348 .addReg(ARM::SP)
3349 .addImm(4)
3350 .add(predOps(ARMCC::AL));
3351 }
3352 } else {
3353 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3354 .addReg(ARM::SP, RegState::Define)
3355 .addReg(ARM::SP)
3356 .add(predOps(ARMCC::AL))
3357 .addReg(ARM::LR);
3358 }
3359
3360 // Restore SR0 and SR1 in case of __morestack() was called.
3361 // __morestack() will skip PostStackMBB block so we need to restore
3362 // scratch registers from here.
3363 // pop {SR0, SR1}
3364 if (Thumb) {
3365 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3366 .add(predOps(ARMCC::AL))
3367 .addReg(ScratchReg0)
3368 .addReg(ScratchReg1);
3369 } else {
3370 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3371 .addReg(ARM::SP, RegState::Define)
3372 .addReg(ARM::SP)
3373 .add(predOps(ARMCC::AL))
3374 .addReg(ScratchReg0)
3375 .addReg(ScratchReg1);
3376 }
3377
3378 // Update the CFA offset now that we've popped
3379 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3380 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3381 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3382 .addCFIIndex(CFIIndex);
3383 }
3384
3385 // Return from this function.
3386 BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
3387
3388 // Restore SR0 and SR1 in case of __morestack() was not called.
3389 // pop {SR0, SR1}
3390 if (Thumb) {
3391 BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
3392 .add(predOps(ARMCC::AL))
3393 .addReg(ScratchReg0)
3394 .addReg(ScratchReg1);
3395 } else {
3396 BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
3397 .addReg(ARM::SP, RegState::Define)
3398 .addReg(ARM::SP)
3399 .add(predOps(ARMCC::AL))
3400 .addReg(ScratchReg0)
3401 .addReg(ScratchReg1);
3402 }
3403
3404 // Update the CFA offset now that we've popped
3405 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3406 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3407 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3408 .addCFIIndex(CFIIndex);
3409
3410 // Tell debuggers that r4 and r5 are now the same as they were in the
3411 // previous function, that they're the "Same Value".
3412 CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
3413 nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
3414 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3415 .addCFIIndex(CFIIndex);
3416 CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
3417 nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
3418 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3419 .addCFIIndex(CFIIndex);
3420 }
3421
3422 // Organizing MBB lists
3423 PostStackMBB->addSuccessor(&PrologueMBB);
3424
3425 AllocMBB->addSuccessor(PostStackMBB);
3426
3427 GetMBB->addSuccessor(PostStackMBB);
3428 GetMBB->addSuccessor(AllocMBB);
3429
3430 McrMBB->addSuccessor(GetMBB);
3431
3432 PrevStackMBB->addSuccessor(McrMBB);
3433
3434 #ifdef EXPENSIVE_CHECKS
3435 MF.verify();
3436 #endif
3437 }
3438