xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (revision 62987288060ff68c817b7056815aa9fb8ba8ecd7)
10b57cec5SDimitry Andric //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file contains the AArch64 implementation of TargetFrameLowering class.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric // On AArch64, stack frames are structured as follows:
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric // The stack grows downward.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric // All of the individual frame areas on the frame below are optional, i.e. it's
160b57cec5SDimitry Andric // possible to create a function so that the particular area isn't present
170b57cec5SDimitry Andric // in the frame.
180b57cec5SDimitry Andric //
190b57cec5SDimitry Andric // At function entry, the "frame" looks as follows:
200b57cec5SDimitry Andric //
210b57cec5SDimitry Andric // |                                   | Higher address
220b57cec5SDimitry Andric // |-----------------------------------|
230b57cec5SDimitry Andric // |                                   |
240b57cec5SDimitry Andric // | arguments passed on the stack     |
250b57cec5SDimitry Andric // |                                   |
260b57cec5SDimitry Andric // |-----------------------------------| <- sp
270b57cec5SDimitry Andric // |                                   | Lower address
280b57cec5SDimitry Andric //
290b57cec5SDimitry Andric //
300b57cec5SDimitry Andric // After the prologue has run, the frame has the following general structure.
310b57cec5SDimitry Andric // Note that this doesn't depict the case where a red-zone is used. Also,
320b57cec5SDimitry Andric // technically the last frame area (VLAs) doesn't get created until in the
330b57cec5SDimitry Andric // main function body, after the prologue is run. However, it's depicted here
340b57cec5SDimitry Andric // for completeness.
350b57cec5SDimitry Andric //
360b57cec5SDimitry Andric // |                                   | Higher address
370b57cec5SDimitry Andric // |-----------------------------------|
380b57cec5SDimitry Andric // |                                   |
390b57cec5SDimitry Andric // | arguments passed on the stack     |
400b57cec5SDimitry Andric // |                                   |
410b57cec5SDimitry Andric // |-----------------------------------|
420b57cec5SDimitry Andric // |                                   |
430b57cec5SDimitry Andric // | (Win64 only) varargs from reg     |
440b57cec5SDimitry Andric // |                                   |
450b57cec5SDimitry Andric // |-----------------------------------|
460b57cec5SDimitry Andric // |                                   |
478bcb0991SDimitry Andric // | callee-saved gpr registers        | <--.
488bcb0991SDimitry Andric // |                                   |    | On Darwin platforms these
498bcb0991SDimitry Andric // |- - - - - - - - - - - - - - - - - -|    | callee saves are swapped,
50fe6060f1SDimitry Andric // | prev_lr                           |    | (frame record first)
51fe6060f1SDimitry Andric // | prev_fp                           | <--'
52fe6060f1SDimitry Andric // | async context if needed           |
530b57cec5SDimitry Andric // | (a.k.a. "frame record")           |
540b57cec5SDimitry Andric // |-----------------------------------| <- fp(=x29)
550fca6ea1SDimitry Andric // |   <hazard padding>                |
560fca6ea1SDimitry Andric // |-----------------------------------|
570b57cec5SDimitry Andric // |                                   |
588bcb0991SDimitry Andric // | callee-saved fp/simd/SVE regs     |
598bcb0991SDimitry Andric // |                                   |
608bcb0991SDimitry Andric // |-----------------------------------|
618bcb0991SDimitry Andric // |                                   |
628bcb0991SDimitry Andric // |        SVE stack objects          |
630b57cec5SDimitry Andric // |                                   |
640b57cec5SDimitry Andric // |-----------------------------------|
650b57cec5SDimitry Andric // |.empty.space.to.make.part.below....|
660b57cec5SDimitry Andric // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
670b57cec5SDimitry Andric // |.the.standard.16-byte.alignment....|  compile time; if present)
680b57cec5SDimitry Andric // |-----------------------------------|
690b57cec5SDimitry Andric // | local variables of fixed size     |
700b57cec5SDimitry Andric // | including spill slots             |
710fca6ea1SDimitry Andric // |   <FPR>                           |
720fca6ea1SDimitry Andric // |   <hazard padding>                |
730fca6ea1SDimitry Andric // |   <GPR>                           |
740b57cec5SDimitry Andric // |-----------------------------------| <- bp(not defined by ABI,
750b57cec5SDimitry Andric // |.variable-sized.local.variables....|       LLVM chooses X19)
760b57cec5SDimitry Andric // |.(VLAs)............................| (size of this area is unknown at
770b57cec5SDimitry Andric // |...................................|  compile time)
780b57cec5SDimitry Andric // |-----------------------------------| <- sp
790b57cec5SDimitry Andric // |                                   | Lower address
800b57cec5SDimitry Andric //
810b57cec5SDimitry Andric //
820b57cec5SDimitry Andric // To access the data in a frame, at-compile time, a constant offset must be
830b57cec5SDimitry Andric // computable from one of the pointers (fp, bp, sp) to access it. The size
840b57cec5SDimitry Andric // of the areas with a dotted background cannot be computed at compile-time
850b57cec5SDimitry Andric // if they are present, making it required to have all three of fp, bp and
860b57cec5SDimitry Andric // sp to be set up to be able to access all contents in the frame areas,
870b57cec5SDimitry Andric // assuming all of the frame areas are non-empty.
880b57cec5SDimitry Andric //
890b57cec5SDimitry Andric // For most functions, some of the frame areas are empty. For those functions,
900b57cec5SDimitry Andric // it may not be necessary to set up fp or bp:
910b57cec5SDimitry Andric // * A base pointer is definitely needed when there are both VLAs and local
920b57cec5SDimitry Andric //   variables with more-than-default alignment requirements.
930b57cec5SDimitry Andric // * A frame pointer is definitely needed when there are local variables with
940b57cec5SDimitry Andric //   more-than-default alignment requirements.
950b57cec5SDimitry Andric //
968bcb0991SDimitry Andric // For Darwin platforms the frame-record (fp, lr) is stored at the top of the
978bcb0991SDimitry Andric // callee-saved area, since the unwind encoding does not allow for encoding
988bcb0991SDimitry Andric // this dynamically and existing tools depend on this layout. For other
998bcb0991SDimitry Andric // platforms, the frame-record is stored at the bottom of the (gpr) callee-saved
1008bcb0991SDimitry Andric // area to allow SVE stack objects (allocated directly below the callee-saves,
1018bcb0991SDimitry Andric // if available) to be accessed directly from the framepointer.
1028bcb0991SDimitry Andric // The SVE spill/fill instructions have VL-scaled addressing modes such
1038bcb0991SDimitry Andric // as:
1048bcb0991SDimitry Andric //    ldr z8, [fp, #-7 mul vl]
1058bcb0991SDimitry Andric // For SVE the size of the vector length (VL) is not known at compile-time, so
1068bcb0991SDimitry Andric // '#-7 mul vl' is an offset that can only be evaluated at runtime. With this
1078bcb0991SDimitry Andric // layout, we don't need to add an unscaled offset to the framepointer before
1088bcb0991SDimitry Andric // accessing the SVE object in the frame.
1098bcb0991SDimitry Andric //
1100b57cec5SDimitry Andric // In some cases when a base pointer is not strictly needed, it is generated
1110b57cec5SDimitry Andric // anyway when offsets from the frame pointer to access local variables become
1120b57cec5SDimitry Andric // so large that the offset can't be encoded in the immediate fields of loads
1130b57cec5SDimitry Andric // or stores.
1140b57cec5SDimitry Andric //
115fe6060f1SDimitry Andric // Outgoing function arguments must be at the bottom of the stack frame when
116fe6060f1SDimitry Andric // calling another function. If we do not have variable-sized stack objects, we
117fe6060f1SDimitry Andric // can allocate a "reserved call frame" area at the bottom of the local
118fe6060f1SDimitry Andric // variable area, large enough for all outgoing calls. If we do have VLAs, then
119fe6060f1SDimitry Andric // the stack pointer must be decremented and incremented around each call to
120fe6060f1SDimitry Andric // make space for the arguments below the VLAs.
121fe6060f1SDimitry Andric //
1220b57cec5SDimitry Andric // FIXME: also explain the redzone concept.
1230b57cec5SDimitry Andric //
1240fca6ea1SDimitry Andric // About stack hazards: Under some SME contexts, a coprocessor with its own
1250fca6ea1SDimitry Andric // separate cache can used for FP operations. This can create hazards if the CPU
1260fca6ea1SDimitry Andric // and the SME unit try to access the same area of memory, including if the
1270fca6ea1SDimitry Andric // access is to an area of the stack. To try to alleviate this we attempt to
1280fca6ea1SDimitry Andric // introduce extra padding into the stack frame between FP and GPR accesses,
1290fca6ea1SDimitry Andric // controlled by the StackHazardSize option. Without changing the layout of the
1300fca6ea1SDimitry Andric // stack frame in the diagram above, a stack object of size StackHazardSize is
1310fca6ea1SDimitry Andric // added between GPR and FPR CSRs. Another is added to the stack objects
1320fca6ea1SDimitry Andric // section, and stack objects are sorted so that FPR > Hazard padding slot >
1330fca6ea1SDimitry Andric // GPRs (where possible). Unfortunately some things are not handled well (VLA
1340fca6ea1SDimitry Andric // area, arguments on the stack, object with both GPR and FPR accesses), but if
1350fca6ea1SDimitry Andric // those are controlled by the user then the entire stack frame becomes GPR at
1360fca6ea1SDimitry Andric // the start/end with FPR in the middle, surrounded by Hazard padding.
1370fca6ea1SDimitry Andric //
13881ad6265SDimitry Andric // An example of the prologue:
13981ad6265SDimitry Andric //
14081ad6265SDimitry Andric //     .globl __foo
14181ad6265SDimitry Andric //     .align 2
14281ad6265SDimitry Andric //  __foo:
14381ad6265SDimitry Andric // Ltmp0:
14481ad6265SDimitry Andric //     .cfi_startproc
14581ad6265SDimitry Andric //     .cfi_personality 155, ___gxx_personality_v0
14681ad6265SDimitry Andric // Leh_func_begin:
14781ad6265SDimitry Andric //     .cfi_lsda 16, Lexception33
14881ad6265SDimitry Andric //
14981ad6265SDimitry Andric //     stp  xa,bx, [sp, -#offset]!
15081ad6265SDimitry Andric //     ...
15181ad6265SDimitry Andric //     stp  x28, x27, [sp, #offset-32]
15281ad6265SDimitry Andric //     stp  fp, lr, [sp, #offset-16]
15381ad6265SDimitry Andric //     add  fp, sp, #offset - 16
15481ad6265SDimitry Andric //     sub  sp, sp, #1360
15581ad6265SDimitry Andric //
15681ad6265SDimitry Andric // The Stack:
15781ad6265SDimitry Andric //       +-------------------------------------------+
15881ad6265SDimitry Andric // 10000 | ........ | ........ | ........ | ........ |
15981ad6265SDimitry Andric // 10004 | ........ | ........ | ........ | ........ |
16081ad6265SDimitry Andric //       +-------------------------------------------+
16181ad6265SDimitry Andric // 10008 | ........ | ........ | ........ | ........ |
16281ad6265SDimitry Andric // 1000c | ........ | ........ | ........ | ........ |
16381ad6265SDimitry Andric //       +===========================================+
16481ad6265SDimitry Andric // 10010 |                X28 Register               |
16581ad6265SDimitry Andric // 10014 |                X28 Register               |
16681ad6265SDimitry Andric //       +-------------------------------------------+
16781ad6265SDimitry Andric // 10018 |                X27 Register               |
16881ad6265SDimitry Andric // 1001c |                X27 Register               |
16981ad6265SDimitry Andric //       +===========================================+
17081ad6265SDimitry Andric // 10020 |                Frame Pointer              |
17181ad6265SDimitry Andric // 10024 |                Frame Pointer              |
17281ad6265SDimitry Andric //       +-------------------------------------------+
17381ad6265SDimitry Andric // 10028 |                Link Register              |
17481ad6265SDimitry Andric // 1002c |                Link Register              |
17581ad6265SDimitry Andric //       +===========================================+
17681ad6265SDimitry Andric // 10030 | ........ | ........ | ........ | ........ |
17781ad6265SDimitry Andric // 10034 | ........ | ........ | ........ | ........ |
17881ad6265SDimitry Andric //       +-------------------------------------------+
17981ad6265SDimitry Andric // 10038 | ........ | ........ | ........ | ........ |
18081ad6265SDimitry Andric // 1003c | ........ | ........ | ........ | ........ |
18181ad6265SDimitry Andric //       +-------------------------------------------+
18281ad6265SDimitry Andric //
18381ad6265SDimitry Andric //     [sp] = 10030        ::    >>initial value<<
18481ad6265SDimitry Andric //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
18581ad6265SDimitry Andric //     fp = sp == 10020    ::  mov fp, sp
18681ad6265SDimitry Andric //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
18781ad6265SDimitry Andric //     sp == 10010         ::    >>final value<<
18881ad6265SDimitry Andric //
18981ad6265SDimitry Andric // The frame pointer (w29) points to address 10020. If we use an offset of
19081ad6265SDimitry Andric // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
19181ad6265SDimitry Andric // for w27, and -32 for w28:
19281ad6265SDimitry Andric //
19381ad6265SDimitry Andric //  Ltmp1:
19481ad6265SDimitry Andric //     .cfi_def_cfa w29, 16
19581ad6265SDimitry Andric //  Ltmp2:
19681ad6265SDimitry Andric //     .cfi_offset w30, -8
19781ad6265SDimitry Andric //  Ltmp3:
19881ad6265SDimitry Andric //     .cfi_offset w29, -16
19981ad6265SDimitry Andric //  Ltmp4:
20081ad6265SDimitry Andric //     .cfi_offset w27, -24
20181ad6265SDimitry Andric //  Ltmp5:
20281ad6265SDimitry Andric //     .cfi_offset w28, -32
20381ad6265SDimitry Andric //
2040b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
2050b57cec5SDimitry Andric 
2060b57cec5SDimitry Andric #include "AArch64FrameLowering.h"
2070b57cec5SDimitry Andric #include "AArch64InstrInfo.h"
2080b57cec5SDimitry Andric #include "AArch64MachineFunctionInfo.h"
2090b57cec5SDimitry Andric #include "AArch64RegisterInfo.h"
2100b57cec5SDimitry Andric #include "AArch64Subtarget.h"
2110b57cec5SDimitry Andric #include "AArch64TargetMachine.h"
2120b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h"
21381ad6265SDimitry Andric #include "MCTargetDesc/AArch64MCTargetDesc.h"
2140b57cec5SDimitry Andric #include "llvm/ADT/ScopeExit.h"
2150b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
2160b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
2170fca6ea1SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
2180b57cec5SDimitry Andric #include "llvm/CodeGen/LivePhysRegs.h"
2190b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
2200b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
2210b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
2220b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
2230b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
2240b57cec5SDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h"
2250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
2260b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
2270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
2280b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h"
2290b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
2300b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
2310b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
2320b57cec5SDimitry Andric #include "llvm/CodeGen/WinEHFuncInfo.h"
2330b57cec5SDimitry Andric #include "llvm/IR/Attributes.h"
2340b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h"
2350b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
2360b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h"
2370b57cec5SDimitry Andric #include "llvm/IR/Function.h"
2380b57cec5SDimitry Andric #include "llvm/MC/MCAsmInfo.h"
2390b57cec5SDimitry Andric #include "llvm/MC/MCDwarf.h"
2400b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
2410b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
2420b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
243*62987288SDimitry Andric #include "llvm/Support/FormatVariadic.h"
2440b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
2450b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
2460b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h"
2470b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h"
2480b57cec5SDimitry Andric #include <cassert>
2490b57cec5SDimitry Andric #include <cstdint>
2500b57cec5SDimitry Andric #include <iterator>
251bdd1243dSDimitry Andric #include <optional>
2520b57cec5SDimitry Andric #include <vector>
2530b57cec5SDimitry Andric 
2540b57cec5SDimitry Andric using namespace llvm;
2550b57cec5SDimitry Andric 
2560b57cec5SDimitry Andric #define DEBUG_TYPE "frame-info"
2570b57cec5SDimitry Andric 
2580b57cec5SDimitry Andric static cl::opt<bool> EnableRedZone("aarch64-redzone",
2590b57cec5SDimitry Andric                                    cl::desc("enable use of redzone on AArch64"),
2600b57cec5SDimitry Andric                                    cl::init(false), cl::Hidden);
2610b57cec5SDimitry Andric 
2625ffd83dbSDimitry Andric static cl::opt<bool> StackTaggingMergeSetTag(
2635ffd83dbSDimitry Andric     "stack-tagging-merge-settag",
2645ffd83dbSDimitry Andric     cl::desc("merge settag instruction in function epilog"), cl::init(true),
2655ffd83dbSDimitry Andric     cl::Hidden);
2665ffd83dbSDimitry Andric 
267e8d8bef9SDimitry Andric static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
268e8d8bef9SDimitry Andric                                        cl::desc("sort stack allocations"),
269e8d8bef9SDimitry Andric                                        cl::init(true), cl::Hidden);
270e8d8bef9SDimitry Andric 
271fe6060f1SDimitry Andric cl::opt<bool> EnableHomogeneousPrologEpilog(
27281ad6265SDimitry Andric     "homogeneous-prolog-epilog", cl::Hidden,
273fe6060f1SDimitry Andric     cl::desc("Emit homogeneous prologue and epilogue for the size "
274fe6060f1SDimitry Andric              "optimization (default = off)"));
275fe6060f1SDimitry Andric 
2760fca6ea1SDimitry Andric // Stack hazard padding size. 0 = disabled.
2770fca6ea1SDimitry Andric static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size",
2780fca6ea1SDimitry Andric                                          cl::init(0), cl::Hidden);
279*62987288SDimitry Andric // Stack hazard size for analysis remarks. StackHazardSize takes precedence.
280*62987288SDimitry Andric static cl::opt<unsigned>
281*62987288SDimitry Andric     StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0),
282*62987288SDimitry Andric                           cl::Hidden);
2830fca6ea1SDimitry Andric // Whether to insert padding into non-streaming functions (for testing).
2840fca6ea1SDimitry Andric static cl::opt<bool>
2850fca6ea1SDimitry Andric     StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming",
2860fca6ea1SDimitry Andric                               cl::init(false), cl::Hidden);
2870fca6ea1SDimitry Andric 
2880b57cec5SDimitry Andric STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
2890b57cec5SDimitry Andric 
290fe6060f1SDimitry Andric /// Returns how much of the incoming argument stack area (in bytes) we should
291fe6060f1SDimitry Andric /// clean up in an epilogue. For the C calling convention this will be 0, for
292fe6060f1SDimitry Andric /// guaranteed tail call conventions it can be positive (a normal return or a
293fe6060f1SDimitry Andric /// tail call to a function that uses less stack space for arguments) or
294fe6060f1SDimitry Andric /// negative (for a tail call to a function that needs more stack space than us
295fe6060f1SDimitry Andric /// for arguments).
296fe6060f1SDimitry Andric static int64_t getArgumentStackToRestore(MachineFunction &MF,
2975ffd83dbSDimitry Andric                                          MachineBasicBlock &MBB) {
2985ffd83dbSDimitry Andric   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
2995ffd83dbSDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
3005f757f3fSDimitry Andric   bool IsTailCallReturn = (MBB.end() != MBBI)
3015f757f3fSDimitry Andric                               ? AArch64InstrInfo::isTailCallReturnInst(*MBBI)
3025f757f3fSDimitry Andric                               : false;
3035ffd83dbSDimitry Andric 
304fe6060f1SDimitry Andric   int64_t ArgumentPopSize = 0;
3055ffd83dbSDimitry Andric   if (IsTailCallReturn) {
3065ffd83dbSDimitry Andric     MachineOperand &StackAdjust = MBBI->getOperand(1);
3075ffd83dbSDimitry Andric 
3085ffd83dbSDimitry Andric     // For a tail-call in a callee-pops-arguments environment, some or all of
3095ffd83dbSDimitry Andric     // the stack may actually be in use for the call's arguments, this is
3105ffd83dbSDimitry Andric     // calculated during LowerCall and consumed here...
3115ffd83dbSDimitry Andric     ArgumentPopSize = StackAdjust.getImm();
3125ffd83dbSDimitry Andric   } else {
3135ffd83dbSDimitry Andric     // ... otherwise the amount to pop is *all* of the argument space,
3145ffd83dbSDimitry Andric     // conveniently stored in the MachineFunctionInfo by
3155ffd83dbSDimitry Andric     // LowerFormalArguments. This will, of course, be zero for the C calling
3165ffd83dbSDimitry Andric     // convention.
3175ffd83dbSDimitry Andric     ArgumentPopSize = AFI->getArgumentStackToRestore();
3185ffd83dbSDimitry Andric   }
3195ffd83dbSDimitry Andric 
3205ffd83dbSDimitry Andric   return ArgumentPopSize;
3215ffd83dbSDimitry Andric }
3225ffd83dbSDimitry Andric 
323fe6060f1SDimitry Andric static bool produceCompactUnwindFrame(MachineFunction &MF);
324fe6060f1SDimitry Andric static bool needsWinCFI(const MachineFunction &MF);
325fe6060f1SDimitry Andric static StackOffset getSVEStackSize(const MachineFunction &MF);
3260fca6ea1SDimitry Andric static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
327fe6060f1SDimitry Andric 
328fe6060f1SDimitry Andric /// Returns true if a homogeneous prolog or epilog code can be emitted
329fe6060f1SDimitry Andric /// for the size optimization. If possible, a frame helper call is injected.
330fe6060f1SDimitry Andric /// When Exit block is given, this check is for epilog.
331fe6060f1SDimitry Andric bool AArch64FrameLowering::homogeneousPrologEpilog(
332fe6060f1SDimitry Andric     MachineFunction &MF, MachineBasicBlock *Exit) const {
333fe6060f1SDimitry Andric   if (!MF.getFunction().hasMinSize())
334fe6060f1SDimitry Andric     return false;
335fe6060f1SDimitry Andric   if (!EnableHomogeneousPrologEpilog)
336fe6060f1SDimitry Andric     return false;
337fe6060f1SDimitry Andric   if (EnableRedZone)
338fe6060f1SDimitry Andric     return false;
339fe6060f1SDimitry Andric 
340fe6060f1SDimitry Andric   // TODO: Window is supported yet.
341fe6060f1SDimitry Andric   if (needsWinCFI(MF))
342fe6060f1SDimitry Andric     return false;
343fe6060f1SDimitry Andric   // TODO: SVE is not supported yet.
344fe6060f1SDimitry Andric   if (getSVEStackSize(MF))
345fe6060f1SDimitry Andric     return false;
346fe6060f1SDimitry Andric 
347fe6060f1SDimitry Andric   // Bail on stack adjustment needed on return for simplicity.
348fe6060f1SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
349fe6060f1SDimitry Andric   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
350fe6060f1SDimitry Andric   if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF))
351fe6060f1SDimitry Andric     return false;
352fe6060f1SDimitry Andric   if (Exit && getArgumentStackToRestore(MF, *Exit))
353fe6060f1SDimitry Andric     return false;
354fe6060f1SDimitry Andric 
3555f757f3fSDimitry Andric   auto *AFI = MF.getInfo<AArch64FunctionInfo>();
3560fca6ea1SDimitry Andric   if (AFI->hasSwiftAsyncContext() || AFI->hasStreamingModeChanges())
3575f757f3fSDimitry Andric     return false;
3585f757f3fSDimitry Andric 
3595f757f3fSDimitry Andric   // If there are an odd number of GPRs before LR and FP in the CSRs list,
3605f757f3fSDimitry Andric   // they will not be paired into one RegPairInfo, which is incompatible with
3615f757f3fSDimitry Andric   // the assumption made by the homogeneous prolog epilog pass.
3625f757f3fSDimitry Andric   const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
3635f757f3fSDimitry Andric   unsigned NumGPRs = 0;
3645f757f3fSDimitry Andric   for (unsigned I = 0; CSRegs[I]; ++I) {
3655f757f3fSDimitry Andric     Register Reg = CSRegs[I];
3665f757f3fSDimitry Andric     if (Reg == AArch64::LR) {
3675f757f3fSDimitry Andric       assert(CSRegs[I + 1] == AArch64::FP);
3685f757f3fSDimitry Andric       if (NumGPRs % 2 != 0)
3695f757f3fSDimitry Andric         return false;
3705f757f3fSDimitry Andric       break;
3715f757f3fSDimitry Andric     }
3725f757f3fSDimitry Andric     if (AArch64::GPR64RegClass.contains(Reg))
3735f757f3fSDimitry Andric       ++NumGPRs;
3745f757f3fSDimitry Andric   }
3755f757f3fSDimitry Andric 
376fe6060f1SDimitry Andric   return true;
377fe6060f1SDimitry Andric }
378fe6060f1SDimitry Andric 
379fe6060f1SDimitry Andric /// Returns true if CSRs should be paired.
380fe6060f1SDimitry Andric bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const {
381fe6060f1SDimitry Andric   return produceCompactUnwindFrame(MF) || homogeneousPrologEpilog(MF);
382fe6060f1SDimitry Andric }
383fe6060f1SDimitry Andric 
3840b57cec5SDimitry Andric /// This is the biggest offset to the stack pointer we can encode in aarch64
3850b57cec5SDimitry Andric /// instructions (without using a separate calculation and a temp register).
3860b57cec5SDimitry Andric /// Note that the exception here are vector stores/loads which cannot encode any
3870b57cec5SDimitry Andric /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()).
3880b57cec5SDimitry Andric static const unsigned DefaultSafeSPDisplacement = 255;
3890b57cec5SDimitry Andric 
3900b57cec5SDimitry Andric /// Look at each instruction that references stack frames and return the stack
3910b57cec5SDimitry Andric /// size limit beyond which some of these instructions will require a scratch
3920b57cec5SDimitry Andric /// register during their expansion later.
3930b57cec5SDimitry Andric static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
3940b57cec5SDimitry Andric   // FIXME: For now, just conservatively guestimate based on unscaled indexing
3950b57cec5SDimitry Andric   // range. We'll end up allocating an unnecessary spill slot a lot, but
3960b57cec5SDimitry Andric   // realistically that's not a big deal at this stage of the game.
3970b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
3980b57cec5SDimitry Andric     for (MachineInstr &MI : MBB) {
3990b57cec5SDimitry Andric       if (MI.isDebugInstr() || MI.isPseudo() ||
4000b57cec5SDimitry Andric           MI.getOpcode() == AArch64::ADDXri ||
4010b57cec5SDimitry Andric           MI.getOpcode() == AArch64::ADDSXri)
4020b57cec5SDimitry Andric         continue;
4030b57cec5SDimitry Andric 
4040b57cec5SDimitry Andric       for (const MachineOperand &MO : MI.operands()) {
4050b57cec5SDimitry Andric         if (!MO.isFI())
4060b57cec5SDimitry Andric           continue;
4070b57cec5SDimitry Andric 
4088bcb0991SDimitry Andric         StackOffset Offset;
4090b57cec5SDimitry Andric         if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
4100b57cec5SDimitry Andric             AArch64FrameOffsetCannotUpdate)
4110b57cec5SDimitry Andric           return 0;
4120b57cec5SDimitry Andric       }
4130b57cec5SDimitry Andric     }
4140b57cec5SDimitry Andric   }
4150b57cec5SDimitry Andric   return DefaultSafeSPDisplacement;
4160b57cec5SDimitry Andric }
4170b57cec5SDimitry Andric 
418480093f4SDimitry Andric TargetStackID::Value
419480093f4SDimitry Andric AArch64FrameLowering::getStackIDForScalableVectors() const {
420e8d8bef9SDimitry Andric   return TargetStackID::ScalableVector;
421480093f4SDimitry Andric }
422480093f4SDimitry Andric 
42362cfcf62SDimitry Andric /// Returns the size of the fixed object area (allocated next to sp on entry)
42462cfcf62SDimitry Andric /// On Win64 this may include a var args area and an UnwindHelp object for EH.
42562cfcf62SDimitry Andric static unsigned getFixedObjectSize(const MachineFunction &MF,
42662cfcf62SDimitry Andric                                    const AArch64FunctionInfo *AFI, bool IsWin64,
42762cfcf62SDimitry Andric                                    bool IsFunclet) {
42862cfcf62SDimitry Andric   if (!IsWin64 || IsFunclet) {
429fe6060f1SDimitry Andric     return AFI->getTailCallReservedStack();
43062cfcf62SDimitry Andric   } else {
4310fca6ea1SDimitry Andric     if (AFI->getTailCallReservedStack() != 0 &&
4320fca6ea1SDimitry Andric         !MF.getFunction().getAttributes().hasAttrSomewhere(
4330fca6ea1SDimitry Andric             Attribute::SwiftAsync))
434fe6060f1SDimitry Andric       report_fatal_error("cannot generate ABI-changing tail call for Win64");
43562cfcf62SDimitry Andric     // Var args are stored here in the primary function.
43662cfcf62SDimitry Andric     const unsigned VarArgsArea = AFI->getVarArgsGPRSize();
43762cfcf62SDimitry Andric     // To support EH funclets we allocate an UnwindHelp object
43862cfcf62SDimitry Andric     const unsigned UnwindHelpObject = (MF.hasEHFunclets() ? 8 : 0);
4390fca6ea1SDimitry Andric     return AFI->getTailCallReservedStack() +
4400fca6ea1SDimitry Andric            alignTo(VarArgsArea + UnwindHelpObject, 16);
44162cfcf62SDimitry Andric   }
44262cfcf62SDimitry Andric }
44362cfcf62SDimitry Andric 
4448bcb0991SDimitry Andric /// Returns the size of the entire SVE stackframe (calleesaves + spills).
4458bcb0991SDimitry Andric static StackOffset getSVEStackSize(const MachineFunction &MF) {
4468bcb0991SDimitry Andric   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
447e8d8bef9SDimitry Andric   return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
4488bcb0991SDimitry Andric }
4498bcb0991SDimitry Andric 
4500b57cec5SDimitry Andric bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
4510b57cec5SDimitry Andric   if (!EnableRedZone)
4520b57cec5SDimitry Andric     return false;
453fe6060f1SDimitry Andric 
4540b57cec5SDimitry Andric   // Don't use the red zone if the function explicitly asks us not to.
4550b57cec5SDimitry Andric   // This is typically used for kernel code.
456fe6060f1SDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
457fe6060f1SDimitry Andric   const unsigned RedZoneSize =
458fe6060f1SDimitry Andric       Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
459fe6060f1SDimitry Andric   if (!RedZoneSize)
4600b57cec5SDimitry Andric     return false;
4610b57cec5SDimitry Andric 
4620b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
4630b57cec5SDimitry Andric   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
464480093f4SDimitry Andric   uint64_t NumBytes = AFI->getLocalStackSize();
4650b57cec5SDimitry Andric 
4660fca6ea1SDimitry Andric   // If neither NEON or SVE are available, a COPY from one Q-reg to
4670fca6ea1SDimitry Andric   // another requires a spill -> reload sequence. We can do that
4680fca6ea1SDimitry Andric   // using a pre-decrementing store/post-decrementing load, but
4690fca6ea1SDimitry Andric   // if we do so, we can't use the Red Zone.
4700fca6ea1SDimitry Andric   bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() &&
4710fca6ea1SDimitry Andric                                  !Subtarget.isNeonAvailable() &&
4720fca6ea1SDimitry Andric                                  !Subtarget.hasSVE();
4730fca6ea1SDimitry Andric 
474fe6060f1SDimitry Andric   return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||
4750fca6ea1SDimitry Andric            getSVEStackSize(MF) || LowerQRegCopyThroughMem);
4760b57cec5SDimitry Andric }
4770b57cec5SDimitry Andric 
4780b57cec5SDimitry Andric /// hasFP - Return true if the specified function should have a dedicated frame
4790b57cec5SDimitry Andric /// pointer register.
4800b57cec5SDimitry Andric bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
4810b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
4820b57cec5SDimitry Andric   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
4835f757f3fSDimitry Andric 
4840b57cec5SDimitry Andric   // Win64 EH requires a frame pointer if funclets are present, as the locals
4850b57cec5SDimitry Andric   // are accessed off the frame pointer in both the parent function and the
4860b57cec5SDimitry Andric   // funclets.
4870b57cec5SDimitry Andric   if (MF.hasEHFunclets())
4880b57cec5SDimitry Andric     return true;
4890b57cec5SDimitry Andric   // Retain behavior of always omitting the FP for leaf functions when possible.
490480093f4SDimitry Andric   if (MF.getTarget().Options.DisableFramePointerElim(MF))
4910b57cec5SDimitry Andric     return true;
4920b57cec5SDimitry Andric   if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
4930b57cec5SDimitry Andric       MFI.hasStackMap() || MFI.hasPatchPoint() ||
494fe6060f1SDimitry Andric       RegInfo->hasStackRealignment(MF))
4950b57cec5SDimitry Andric     return true;
4960b57cec5SDimitry Andric   // With large callframes around we may need to use FP to access the scavenging
4970b57cec5SDimitry Andric   // emergency spillslot.
4980b57cec5SDimitry Andric   //
4990b57cec5SDimitry Andric   // Unfortunately some calls to hasFP() like machine verifier ->
5000b57cec5SDimitry Andric   // getReservedReg() -> hasFP in the middle of global isel are too early
5010b57cec5SDimitry Andric   // to know the max call frame size. Hopefully conservatively returning "true"
5020b57cec5SDimitry Andric   // in those cases is fine.
5030b57cec5SDimitry Andric   // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs.
5040b57cec5SDimitry Andric   if (!MFI.isMaxCallFrameSizeComputed() ||
5050b57cec5SDimitry Andric       MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
5060b57cec5SDimitry Andric     return true;
5070b57cec5SDimitry Andric 
5080b57cec5SDimitry Andric   return false;
5090b57cec5SDimitry Andric }
5100b57cec5SDimitry Andric 
5110b57cec5SDimitry Andric /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
5120b57cec5SDimitry Andric /// not required, we reserve argument space for call sites in the function
5130b57cec5SDimitry Andric /// immediately on entry to the current function.  This eliminates the need for
5140b57cec5SDimitry Andric /// add/sub sp brackets around call sites.  Returns true if the call frame is
5150b57cec5SDimitry Andric /// included as part of the stack frame.
5160fca6ea1SDimitry Andric bool AArch64FrameLowering::hasReservedCallFrame(
5170fca6ea1SDimitry Andric     const MachineFunction &MF) const {
5185f757f3fSDimitry Andric   // The stack probing code for the dynamically allocated outgoing arguments
5195f757f3fSDimitry Andric   // area assumes that the stack is probed at the top - either by the prologue
5205f757f3fSDimitry Andric   // code, which issues a probe if `hasVarSizedObjects` return true, or by the
5215f757f3fSDimitry Andric   // most recent variable-sized object allocation. Changing the condition here
5225f757f3fSDimitry Andric   // may need to be followed up by changes to the probe issuing logic.
5230b57cec5SDimitry Andric   return !MF.getFrameInfo().hasVarSizedObjects();
5240b57cec5SDimitry Andric }
5250b57cec5SDimitry Andric 
5260b57cec5SDimitry Andric MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
5270b57cec5SDimitry Andric     MachineFunction &MF, MachineBasicBlock &MBB,
5280b57cec5SDimitry Andric     MachineBasicBlock::iterator I) const {
5290b57cec5SDimitry Andric   const AArch64InstrInfo *TII =
5300b57cec5SDimitry Andric       static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
5315f757f3fSDimitry Andric   const AArch64TargetLowering *TLI =
5325f757f3fSDimitry Andric       MF.getSubtarget<AArch64Subtarget>().getTargetLowering();
5335f757f3fSDimitry Andric   [[maybe_unused]] MachineFrameInfo &MFI = MF.getFrameInfo();
5340b57cec5SDimitry Andric   DebugLoc DL = I->getDebugLoc();
5350b57cec5SDimitry Andric   unsigned Opc = I->getOpcode();
5360b57cec5SDimitry Andric   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
5370b57cec5SDimitry Andric   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
5380b57cec5SDimitry Andric 
5390b57cec5SDimitry Andric   if (!hasReservedCallFrame(MF)) {
5400b57cec5SDimitry Andric     int64_t Amount = I->getOperand(0).getImm();
5415ffd83dbSDimitry Andric     Amount = alignTo(Amount, getStackAlign());
5420b57cec5SDimitry Andric     if (!IsDestroy)
5430b57cec5SDimitry Andric       Amount = -Amount;
5440b57cec5SDimitry Andric 
5450b57cec5SDimitry Andric     // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
5460b57cec5SDimitry Andric     // doesn't have to pop anything), then the first operand will be zero too so
5470b57cec5SDimitry Andric     // this adjustment is a no-op.
5480b57cec5SDimitry Andric     if (CalleePopAmount == 0) {
5490b57cec5SDimitry Andric       // FIXME: in-function stack adjustment for calls is limited to 24-bits
5500b57cec5SDimitry Andric       // because there's no guaranteed temporary register available.
5510b57cec5SDimitry Andric       //
5520b57cec5SDimitry Andric       // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
5530b57cec5SDimitry Andric       // 1) For offset <= 12-bit, we use LSL #0
5540b57cec5SDimitry Andric       // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
5550b57cec5SDimitry Andric       // LSL #0, and the other uses LSL #12.
5560b57cec5SDimitry Andric       //
5570b57cec5SDimitry Andric       // Most call frames will be allocated at the start of a function so
5580b57cec5SDimitry Andric       // this is OK, but it is a limitation that needs dealing with.
5590b57cec5SDimitry Andric       assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
5605f757f3fSDimitry Andric 
5615f757f3fSDimitry Andric       if (TLI->hasInlineStackProbe(MF) &&
5625f757f3fSDimitry Andric           -Amount >= AArch64::StackProbeMaxUnprobedStack) {
5635f757f3fSDimitry Andric         // When stack probing is enabled, the decrement of SP may need to be
5645f757f3fSDimitry Andric         // probed. We only need to do this if the call site needs 1024 bytes of
5655f757f3fSDimitry Andric         // space or more, because a region smaller than that is allowed to be
5665f757f3fSDimitry Andric         // unprobed at an ABI boundary. We rely on the fact that SP has been
5675f757f3fSDimitry Andric         // probed exactly at this point, either by the prologue or most recent
5685f757f3fSDimitry Andric         // dynamic allocation.
5695f757f3fSDimitry Andric         assert(MFI.hasVarSizedObjects() &&
5705f757f3fSDimitry Andric                "non-reserved call frame without var sized objects?");
5715f757f3fSDimitry Andric         Register ScratchReg =
5725f757f3fSDimitry Andric             MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
5735f757f3fSDimitry Andric         inlineStackProbeFixed(I, ScratchReg, -Amount, StackOffset::get(0, 0));
5745f757f3fSDimitry Andric       } else {
575e8d8bef9SDimitry Andric         emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
576e8d8bef9SDimitry Andric                         StackOffset::getFixed(Amount), TII);
5770b57cec5SDimitry Andric       }
5785f757f3fSDimitry Andric     }
5790b57cec5SDimitry Andric   } else if (CalleePopAmount != 0) {
5800b57cec5SDimitry Andric     // If the calling convention demands that the callee pops arguments from the
5810b57cec5SDimitry Andric     // stack, we want to add it back if we have a reserved call frame.
5820b57cec5SDimitry Andric     assert(CalleePopAmount < 0xffffff && "call frame too large");
5838bcb0991SDimitry Andric     emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
584e8d8bef9SDimitry Andric                     StackOffset::getFixed(-(int64_t)CalleePopAmount), TII);
5850b57cec5SDimitry Andric   }
5860b57cec5SDimitry Andric   return MBB.erase(I);
5870b57cec5SDimitry Andric }
5880b57cec5SDimitry Andric 
58981ad6265SDimitry Andric void AArch64FrameLowering::emitCalleeSavedGPRLocations(
5900b57cec5SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
5910b57cec5SDimitry Andric   MachineFunction &MF = *MBB.getParent();
5920b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
5930fca6ea1SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
5940fca6ea1SDimitry Andric   SMEAttrs Attrs(MF.getFunction());
5950fca6ea1SDimitry Andric   bool LocallyStreaming =
5960fca6ea1SDimitry Andric       Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface();
59781ad6265SDimitry Andric 
59881ad6265SDimitry Andric   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
59981ad6265SDimitry Andric   if (CSI.empty())
60081ad6265SDimitry Andric     return;
60181ad6265SDimitry Andric 
6020b57cec5SDimitry Andric   const TargetSubtargetInfo &STI = MF.getSubtarget();
60381ad6265SDimitry Andric   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
60481ad6265SDimitry Andric   const TargetInstrInfo &TII = *STI.getInstrInfo();
6050b57cec5SDimitry Andric   DebugLoc DL = MBB.findDebugLoc(MBBI);
6060b57cec5SDimitry Andric 
60781ad6265SDimitry Andric   for (const auto &Info : CSI) {
6080fca6ea1SDimitry Andric     unsigned FrameIdx = Info.getFrameIdx();
6090fca6ea1SDimitry Andric     if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector)
61081ad6265SDimitry Andric       continue;
61181ad6265SDimitry Andric 
61281ad6265SDimitry Andric     assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
6130fca6ea1SDimitry Andric     int64_t DwarfReg = TRI.getDwarfRegNum(Info.getReg(), true);
6140fca6ea1SDimitry Andric     int64_t Offset = MFI.getObjectOffset(FrameIdx) - getOffsetOfLocalArea();
61581ad6265SDimitry Andric 
6160fca6ea1SDimitry Andric     // The location of VG will be emitted before each streaming-mode change in
6170fca6ea1SDimitry Andric     // the function. Only locally-streaming functions require emitting the
6180fca6ea1SDimitry Andric     // non-streaming VG location here.
6190fca6ea1SDimitry Andric     if ((LocallyStreaming && FrameIdx == AFI->getStreamingVGIdx()) ||
6200fca6ea1SDimitry Andric         (!LocallyStreaming &&
6210fca6ea1SDimitry Andric          DwarfReg == TRI.getDwarfRegNum(AArch64::VG, true)))
6220fca6ea1SDimitry Andric       continue;
6230fca6ea1SDimitry Andric 
62481ad6265SDimitry Andric     unsigned CFIIndex = MF.addFrameInst(
62581ad6265SDimitry Andric         MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
62681ad6265SDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
62781ad6265SDimitry Andric         .addCFIIndex(CFIIndex)
62881ad6265SDimitry Andric         .setMIFlags(MachineInstr::FrameSetup);
62981ad6265SDimitry Andric   }
63081ad6265SDimitry Andric }
63181ad6265SDimitry Andric 
63281ad6265SDimitry Andric void AArch64FrameLowering::emitCalleeSavedSVELocations(
63381ad6265SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
63481ad6265SDimitry Andric   MachineFunction &MF = *MBB.getParent();
63581ad6265SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
63681ad6265SDimitry Andric 
6370b57cec5SDimitry Andric   // Add callee saved registers to move list.
6380b57cec5SDimitry Andric   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
6390b57cec5SDimitry Andric   if (CSI.empty())
6400b57cec5SDimitry Andric     return;
6410b57cec5SDimitry Andric 
64281ad6265SDimitry Andric   const TargetSubtargetInfo &STI = MF.getSubtarget();
64381ad6265SDimitry Andric   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
64481ad6265SDimitry Andric   const TargetInstrInfo &TII = *STI.getInstrInfo();
64581ad6265SDimitry Andric   DebugLoc DL = MBB.findDebugLoc(MBBI);
64681ad6265SDimitry Andric   AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
64781ad6265SDimitry Andric 
6480b57cec5SDimitry Andric   for (const auto &Info : CSI) {
64981ad6265SDimitry Andric     if (!(MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector))
65081ad6265SDimitry Andric       continue;
65175b4d546SDimitry Andric 
65275b4d546SDimitry Andric     // Not all unwinders may know about SVE registers, so assume the lowest
65375b4d546SDimitry Andric     // common demoninator.
65481ad6265SDimitry Andric     assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
65581ad6265SDimitry Andric     unsigned Reg = Info.getReg();
65681ad6265SDimitry Andric     if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
65775b4d546SDimitry Andric       continue;
65875b4d546SDimitry Andric 
65981ad6265SDimitry Andric     StackOffset Offset =
660e8d8bef9SDimitry Andric         StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
66181ad6265SDimitry Andric         StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI));
66281ad6265SDimitry Andric 
66381ad6265SDimitry Andric     unsigned CFIIndex = MF.addFrameInst(createCFAOffset(TRI, Reg, Offset));
66481ad6265SDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
6650b57cec5SDimitry Andric         .addCFIIndex(CFIIndex)
6660b57cec5SDimitry Andric         .setMIFlags(MachineInstr::FrameSetup);
6670b57cec5SDimitry Andric   }
6680b57cec5SDimitry Andric }
6690b57cec5SDimitry Andric 
67081ad6265SDimitry Andric static void insertCFISameValue(const MCInstrDesc &Desc, MachineFunction &MF,
67181ad6265SDimitry Andric                                MachineBasicBlock &MBB,
67281ad6265SDimitry Andric                                MachineBasicBlock::iterator InsertPt,
67381ad6265SDimitry Andric                                unsigned DwarfReg) {
67481ad6265SDimitry Andric   unsigned CFIIndex =
67581ad6265SDimitry Andric       MF.addFrameInst(MCCFIInstruction::createSameValue(nullptr, DwarfReg));
67681ad6265SDimitry Andric   BuildMI(MBB, InsertPt, DebugLoc(), Desc).addCFIIndex(CFIIndex);
67781ad6265SDimitry Andric }
67881ad6265SDimitry Andric 
67981ad6265SDimitry Andric void AArch64FrameLowering::resetCFIToInitialState(
68081ad6265SDimitry Andric     MachineBasicBlock &MBB) const {
68181ad6265SDimitry Andric 
68281ad6265SDimitry Andric   MachineFunction &MF = *MBB.getParent();
68381ad6265SDimitry Andric   const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
68481ad6265SDimitry Andric   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
68581ad6265SDimitry Andric   const auto &TRI =
68681ad6265SDimitry Andric       static_cast<const AArch64RegisterInfo &>(*Subtarget.getRegisterInfo());
68781ad6265SDimitry Andric   const auto &MFI = *MF.getInfo<AArch64FunctionInfo>();
68881ad6265SDimitry Andric 
68981ad6265SDimitry Andric   const MCInstrDesc &CFIDesc = TII.get(TargetOpcode::CFI_INSTRUCTION);
69081ad6265SDimitry Andric   DebugLoc DL;
69181ad6265SDimitry Andric 
69281ad6265SDimitry Andric   // Reset the CFA to `SP + 0`.
69381ad6265SDimitry Andric   MachineBasicBlock::iterator InsertPt = MBB.begin();
69481ad6265SDimitry Andric   unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
69581ad6265SDimitry Andric       nullptr, TRI.getDwarfRegNum(AArch64::SP, true), 0));
69681ad6265SDimitry Andric   BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex);
69781ad6265SDimitry Andric 
69881ad6265SDimitry Andric   // Flip the RA sign state.
699bdd1243dSDimitry Andric   if (MFI.shouldSignReturnAddress(MF)) {
70081ad6265SDimitry Andric     CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
70181ad6265SDimitry Andric     BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex);
70281ad6265SDimitry Andric   }
70381ad6265SDimitry Andric 
70481ad6265SDimitry Andric   // Shadow call stack uses X18, reset it.
7055f757f3fSDimitry Andric   if (MFI.needsShadowCallStackPrologueEpilogue(MF))
70681ad6265SDimitry Andric     insertCFISameValue(CFIDesc, MF, MBB, InsertPt,
70781ad6265SDimitry Andric                        TRI.getDwarfRegNum(AArch64::X18, true));
70881ad6265SDimitry Andric 
70981ad6265SDimitry Andric   // Emit .cfi_same_value for callee-saved registers.
71081ad6265SDimitry Andric   const std::vector<CalleeSavedInfo> &CSI =
71181ad6265SDimitry Andric       MF.getFrameInfo().getCalleeSavedInfo();
71281ad6265SDimitry Andric   for (const auto &Info : CSI) {
71381ad6265SDimitry Andric     unsigned Reg = Info.getReg();
71481ad6265SDimitry Andric     if (!TRI.regNeedsCFI(Reg, Reg))
71581ad6265SDimitry Andric       continue;
71681ad6265SDimitry Andric     insertCFISameValue(CFIDesc, MF, MBB, InsertPt,
71781ad6265SDimitry Andric                        TRI.getDwarfRegNum(Reg, true));
71881ad6265SDimitry Andric   }
71981ad6265SDimitry Andric }
72081ad6265SDimitry Andric 
72181ad6265SDimitry Andric static void emitCalleeSavedRestores(MachineBasicBlock &MBB,
72281ad6265SDimitry Andric                                     MachineBasicBlock::iterator MBBI,
72381ad6265SDimitry Andric                                     bool SVE) {
72481ad6265SDimitry Andric   MachineFunction &MF = *MBB.getParent();
72581ad6265SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
72681ad6265SDimitry Andric 
72781ad6265SDimitry Andric   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
72881ad6265SDimitry Andric   if (CSI.empty())
72981ad6265SDimitry Andric     return;
73081ad6265SDimitry Andric 
73181ad6265SDimitry Andric   const TargetSubtargetInfo &STI = MF.getSubtarget();
73281ad6265SDimitry Andric   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
73381ad6265SDimitry Andric   const TargetInstrInfo &TII = *STI.getInstrInfo();
73481ad6265SDimitry Andric   DebugLoc DL = MBB.findDebugLoc(MBBI);
73581ad6265SDimitry Andric 
73681ad6265SDimitry Andric   for (const auto &Info : CSI) {
73781ad6265SDimitry Andric     if (SVE !=
73881ad6265SDimitry Andric         (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector))
73981ad6265SDimitry Andric       continue;
74081ad6265SDimitry Andric 
74181ad6265SDimitry Andric     unsigned Reg = Info.getReg();
74281ad6265SDimitry Andric     if (SVE &&
74381ad6265SDimitry Andric         !static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
74481ad6265SDimitry Andric       continue;
74581ad6265SDimitry Andric 
7460fca6ea1SDimitry Andric     if (!Info.isRestored())
7470fca6ea1SDimitry Andric       continue;
7480fca6ea1SDimitry Andric 
74981ad6265SDimitry Andric     unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(
75081ad6265SDimitry Andric         nullptr, TRI.getDwarfRegNum(Info.getReg(), true)));
75181ad6265SDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
75281ad6265SDimitry Andric         .addCFIIndex(CFIIndex)
75381ad6265SDimitry Andric         .setMIFlags(MachineInstr::FrameDestroy);
75481ad6265SDimitry Andric   }
75581ad6265SDimitry Andric }
75681ad6265SDimitry Andric 
75781ad6265SDimitry Andric void AArch64FrameLowering::emitCalleeSavedGPRRestores(
75881ad6265SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
75981ad6265SDimitry Andric   emitCalleeSavedRestores(MBB, MBBI, false);
76081ad6265SDimitry Andric }
76181ad6265SDimitry Andric 
76281ad6265SDimitry Andric void AArch64FrameLowering::emitCalleeSavedSVERestores(
76381ad6265SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
76481ad6265SDimitry Andric   emitCalleeSavedRestores(MBB, MBBI, true);
76581ad6265SDimitry Andric }
76681ad6265SDimitry Andric 
7675f757f3fSDimitry Andric // Return the maximum possible number of bytes for `Size` due to the
7685f757f3fSDimitry Andric // architectural limit on the size of a SVE register.
7695f757f3fSDimitry Andric static int64_t upperBound(StackOffset Size) {
7705f757f3fSDimitry Andric   static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
7715f757f3fSDimitry Andric   return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
7725f757f3fSDimitry Andric }
7735f757f3fSDimitry Andric 
7745f757f3fSDimitry Andric void AArch64FrameLowering::allocateStackSpace(
7755f757f3fSDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
7765f757f3fSDimitry Andric     int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI,
7775f757f3fSDimitry Andric     bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset,
7785f757f3fSDimitry Andric     bool FollowupAllocs) const {
7795f757f3fSDimitry Andric 
7805f757f3fSDimitry Andric   if (!AllocSize)
7815f757f3fSDimitry Andric     return;
7825f757f3fSDimitry Andric 
7835f757f3fSDimitry Andric   DebugLoc DL;
7845f757f3fSDimitry Andric   MachineFunction &MF = *MBB.getParent();
7855f757f3fSDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
7865f757f3fSDimitry Andric   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
7875f757f3fSDimitry Andric   AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
7885f757f3fSDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
7895f757f3fSDimitry Andric 
7905f757f3fSDimitry Andric   const int64_t MaxAlign = MFI.getMaxAlign().value();
7915f757f3fSDimitry Andric   const uint64_t AndMask = ~(MaxAlign - 1);
7925f757f3fSDimitry Andric 
7935f757f3fSDimitry Andric   if (!Subtarget.getTargetLowering()->hasInlineStackProbe(MF)) {
7945f757f3fSDimitry Andric     Register TargetReg = RealignmentPadding
7955f757f3fSDimitry Andric                              ? findScratchNonCalleeSaveRegister(&MBB)
7965f757f3fSDimitry Andric                              : AArch64::SP;
7975f757f3fSDimitry Andric     // SUB Xd/SP, SP, AllocSize
7985f757f3fSDimitry Andric     emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
7995f757f3fSDimitry Andric                     MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
8005f757f3fSDimitry Andric                     EmitCFI, InitialOffset);
8015f757f3fSDimitry Andric 
8025f757f3fSDimitry Andric     if (RealignmentPadding) {
8035f757f3fSDimitry Andric       // AND SP, X9, 0b11111...0000
8045f757f3fSDimitry Andric       BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
8055f757f3fSDimitry Andric           .addReg(TargetReg, RegState::Kill)
8065f757f3fSDimitry Andric           .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
8075f757f3fSDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
8085f757f3fSDimitry Andric       AFI.setStackRealigned(true);
8095f757f3fSDimitry Andric 
8105f757f3fSDimitry Andric       // No need for SEH instructions here; if we're realigning the stack,
8115f757f3fSDimitry Andric       // we've set a frame pointer and already finished the SEH prologue.
8125f757f3fSDimitry Andric       assert(!NeedsWinCFI);
8135f757f3fSDimitry Andric     }
8145f757f3fSDimitry Andric     return;
8155f757f3fSDimitry Andric   }
8165f757f3fSDimitry Andric 
8175f757f3fSDimitry Andric   //
8185f757f3fSDimitry Andric   // Stack probing allocation.
8195f757f3fSDimitry Andric   //
8205f757f3fSDimitry Andric 
8215f757f3fSDimitry Andric   // Fixed length allocation. If we don't need to re-align the stack and don't
8225f757f3fSDimitry Andric   // have SVE objects, we can use a more efficient sequence for stack probing.
8235f757f3fSDimitry Andric   if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
8245f757f3fSDimitry Andric     Register ScratchReg = findScratchNonCalleeSaveRegister(&MBB);
8255f757f3fSDimitry Andric     assert(ScratchReg != AArch64::NoRegister);
8265f757f3fSDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC))
8275f757f3fSDimitry Andric         .addDef(ScratchReg)
8285f757f3fSDimitry Andric         .addImm(AllocSize.getFixed())
8295f757f3fSDimitry Andric         .addImm(InitialOffset.getFixed())
8305f757f3fSDimitry Andric         .addImm(InitialOffset.getScalable());
8315f757f3fSDimitry Andric     // The fixed allocation may leave unprobed bytes at the top of the
8325f757f3fSDimitry Andric     // stack. If we have subsequent alocation (e.g. if we have variable-sized
8335f757f3fSDimitry Andric     // objects), we need to issue an extra probe, so these allocations start in
8345f757f3fSDimitry Andric     // a known state.
8355f757f3fSDimitry Andric     if (FollowupAllocs) {
8365f757f3fSDimitry Andric       // STR XZR, [SP]
8375f757f3fSDimitry Andric       BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
8385f757f3fSDimitry Andric           .addReg(AArch64::XZR)
8395f757f3fSDimitry Andric           .addReg(AArch64::SP)
8405f757f3fSDimitry Andric           .addImm(0)
8415f757f3fSDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
8425f757f3fSDimitry Andric     }
8435f757f3fSDimitry Andric 
8445f757f3fSDimitry Andric     return;
8455f757f3fSDimitry Andric   }
8465f757f3fSDimitry Andric 
8475f757f3fSDimitry Andric   // Variable length allocation.
8485f757f3fSDimitry Andric 
8495f757f3fSDimitry Andric   // If the (unknown) allocation size cannot exceed the probe size, decrement
8505f757f3fSDimitry Andric   // the stack pointer right away.
8515f757f3fSDimitry Andric   int64_t ProbeSize = AFI.getStackProbeSize();
8525f757f3fSDimitry Andric   if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
8535f757f3fSDimitry Andric     Register ScratchReg = RealignmentPadding
8545f757f3fSDimitry Andric                               ? findScratchNonCalleeSaveRegister(&MBB)
8555f757f3fSDimitry Andric                               : AArch64::SP;
8565f757f3fSDimitry Andric     assert(ScratchReg != AArch64::NoRegister);
8575f757f3fSDimitry Andric     // SUB Xd, SP, AllocSize
8585f757f3fSDimitry Andric     emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, &TII,
8595f757f3fSDimitry Andric                     MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
8605f757f3fSDimitry Andric                     EmitCFI, InitialOffset);
8615f757f3fSDimitry Andric     if (RealignmentPadding) {
8625f757f3fSDimitry Andric       // AND SP, Xn, 0b11111...0000
8635f757f3fSDimitry Andric       BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
8645f757f3fSDimitry Andric           .addReg(ScratchReg, RegState::Kill)
8655f757f3fSDimitry Andric           .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
8665f757f3fSDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
8675f757f3fSDimitry Andric       AFI.setStackRealigned(true);
8685f757f3fSDimitry Andric     }
8695f757f3fSDimitry Andric     if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
8705f757f3fSDimitry Andric                               AArch64::StackProbeMaxUnprobedStack) {
8715f757f3fSDimitry Andric       // STR XZR, [SP]
8725f757f3fSDimitry Andric       BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
8735f757f3fSDimitry Andric           .addReg(AArch64::XZR)
8745f757f3fSDimitry Andric           .addReg(AArch64::SP)
8755f757f3fSDimitry Andric           .addImm(0)
8765f757f3fSDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
8775f757f3fSDimitry Andric     }
8785f757f3fSDimitry Andric     return;
8795f757f3fSDimitry Andric   }
8805f757f3fSDimitry Andric 
8815f757f3fSDimitry Andric   // Emit a variable-length allocation probing loop.
8825f757f3fSDimitry Andric   // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
8835f757f3fSDimitry Andric   // each of them guaranteed to adjust the stack by less than the probe size.
8845f757f3fSDimitry Andric   Register TargetReg = findScratchNonCalleeSaveRegister(&MBB);
8855f757f3fSDimitry Andric   assert(TargetReg != AArch64::NoRegister);
8865f757f3fSDimitry Andric   // SUB Xd, SP, AllocSize
8875f757f3fSDimitry Andric   emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
8885f757f3fSDimitry Andric                   MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
8895f757f3fSDimitry Andric                   EmitCFI, InitialOffset);
8905f757f3fSDimitry Andric   if (RealignmentPadding) {
8915f757f3fSDimitry Andric     // AND Xn, Xn, 0b11111...0000
8925f757f3fSDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), TargetReg)
8935f757f3fSDimitry Andric         .addReg(TargetReg, RegState::Kill)
8945f757f3fSDimitry Andric         .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
8955f757f3fSDimitry Andric         .setMIFlags(MachineInstr::FrameSetup);
8965f757f3fSDimitry Andric   }
8975f757f3fSDimitry Andric 
8985f757f3fSDimitry Andric   BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC_VAR))
8995f757f3fSDimitry Andric       .addReg(TargetReg);
9005f757f3fSDimitry Andric   if (EmitCFI) {
9015f757f3fSDimitry Andric     // Set the CFA register back to SP.
9025f757f3fSDimitry Andric     unsigned Reg =
9035f757f3fSDimitry Andric         Subtarget.getRegisterInfo()->getDwarfRegNum(AArch64::SP, true);
9045f757f3fSDimitry Andric     unsigned CFIIndex =
9055f757f3fSDimitry Andric         MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
9065f757f3fSDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
9075f757f3fSDimitry Andric         .addCFIIndex(CFIIndex)
9085f757f3fSDimitry Andric         .setMIFlags(MachineInstr::FrameSetup);
9095f757f3fSDimitry Andric   }
9105f757f3fSDimitry Andric   if (RealignmentPadding)
9115f757f3fSDimitry Andric     AFI.setStackRealigned(true);
9125f757f3fSDimitry Andric }
9135f757f3fSDimitry Andric 
91481ad6265SDimitry Andric static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
91581ad6265SDimitry Andric   switch (Reg.id()) {
91681ad6265SDimitry Andric   default:
91781ad6265SDimitry Andric     // The called routine is expected to preserve r19-r28
91881ad6265SDimitry Andric     // r29 and r30 are used as frame pointer and link register resp.
91981ad6265SDimitry Andric     return 0;
92081ad6265SDimitry Andric 
92181ad6265SDimitry Andric     // GPRs
92281ad6265SDimitry Andric #define CASE(n)                                                                \
92381ad6265SDimitry Andric   case AArch64::W##n:                                                          \
92481ad6265SDimitry Andric   case AArch64::X##n:                                                          \
92581ad6265SDimitry Andric     return AArch64::X##n
92681ad6265SDimitry Andric   CASE(0);
92781ad6265SDimitry Andric   CASE(1);
92881ad6265SDimitry Andric   CASE(2);
92981ad6265SDimitry Andric   CASE(3);
93081ad6265SDimitry Andric   CASE(4);
93181ad6265SDimitry Andric   CASE(5);
93281ad6265SDimitry Andric   CASE(6);
93381ad6265SDimitry Andric   CASE(7);
93481ad6265SDimitry Andric   CASE(8);
93581ad6265SDimitry Andric   CASE(9);
93681ad6265SDimitry Andric   CASE(10);
93781ad6265SDimitry Andric   CASE(11);
93881ad6265SDimitry Andric   CASE(12);
93981ad6265SDimitry Andric   CASE(13);
94081ad6265SDimitry Andric   CASE(14);
94181ad6265SDimitry Andric   CASE(15);
94281ad6265SDimitry Andric   CASE(16);
94381ad6265SDimitry Andric   CASE(17);
94481ad6265SDimitry Andric   CASE(18);
94581ad6265SDimitry Andric #undef CASE
94681ad6265SDimitry Andric 
94781ad6265SDimitry Andric     // FPRs
94881ad6265SDimitry Andric #define CASE(n)                                                                \
94981ad6265SDimitry Andric   case AArch64::B##n:                                                          \
95081ad6265SDimitry Andric   case AArch64::H##n:                                                          \
95181ad6265SDimitry Andric   case AArch64::S##n:                                                          \
95281ad6265SDimitry Andric   case AArch64::D##n:                                                          \
95381ad6265SDimitry Andric   case AArch64::Q##n:                                                          \
95481ad6265SDimitry Andric     return HasSVE ? AArch64::Z##n : AArch64::Q##n
95581ad6265SDimitry Andric   CASE(0);
95681ad6265SDimitry Andric   CASE(1);
95781ad6265SDimitry Andric   CASE(2);
95881ad6265SDimitry Andric   CASE(3);
95981ad6265SDimitry Andric   CASE(4);
96081ad6265SDimitry Andric   CASE(5);
96181ad6265SDimitry Andric   CASE(6);
96281ad6265SDimitry Andric   CASE(7);
96381ad6265SDimitry Andric   CASE(8);
96481ad6265SDimitry Andric   CASE(9);
96581ad6265SDimitry Andric   CASE(10);
96681ad6265SDimitry Andric   CASE(11);
96781ad6265SDimitry Andric   CASE(12);
96881ad6265SDimitry Andric   CASE(13);
96981ad6265SDimitry Andric   CASE(14);
97081ad6265SDimitry Andric   CASE(15);
97181ad6265SDimitry Andric   CASE(16);
97281ad6265SDimitry Andric   CASE(17);
97381ad6265SDimitry Andric   CASE(18);
97481ad6265SDimitry Andric   CASE(19);
97581ad6265SDimitry Andric   CASE(20);
97681ad6265SDimitry Andric   CASE(21);
97781ad6265SDimitry Andric   CASE(22);
97881ad6265SDimitry Andric   CASE(23);
97981ad6265SDimitry Andric   CASE(24);
98081ad6265SDimitry Andric   CASE(25);
98181ad6265SDimitry Andric   CASE(26);
98281ad6265SDimitry Andric   CASE(27);
98381ad6265SDimitry Andric   CASE(28);
98481ad6265SDimitry Andric   CASE(29);
98581ad6265SDimitry Andric   CASE(30);
98681ad6265SDimitry Andric   CASE(31);
98781ad6265SDimitry Andric #undef CASE
98881ad6265SDimitry Andric   }
98981ad6265SDimitry Andric }
99081ad6265SDimitry Andric 
99181ad6265SDimitry Andric void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
99281ad6265SDimitry Andric                                                 MachineBasicBlock &MBB) const {
99381ad6265SDimitry Andric   // Insertion point.
99481ad6265SDimitry Andric   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
99581ad6265SDimitry Andric 
99681ad6265SDimitry Andric   // Fake a debug loc.
99781ad6265SDimitry Andric   DebugLoc DL;
99881ad6265SDimitry Andric   if (MBBI != MBB.end())
99981ad6265SDimitry Andric     DL = MBBI->getDebugLoc();
100081ad6265SDimitry Andric 
100181ad6265SDimitry Andric   const MachineFunction &MF = *MBB.getParent();
100281ad6265SDimitry Andric   const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
100381ad6265SDimitry Andric   const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
100481ad6265SDimitry Andric 
100581ad6265SDimitry Andric   BitVector GPRsToZero(TRI.getNumRegs());
100681ad6265SDimitry Andric   BitVector FPRsToZero(TRI.getNumRegs());
100781ad6265SDimitry Andric   bool HasSVE = STI.hasSVE();
100881ad6265SDimitry Andric   for (MCRegister Reg : RegsToZero.set_bits()) {
100981ad6265SDimitry Andric     if (TRI.isGeneralPurposeRegister(MF, Reg)) {
101081ad6265SDimitry Andric       // For GPRs, we only care to clear out the 64-bit register.
101181ad6265SDimitry Andric       if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE))
101281ad6265SDimitry Andric         GPRsToZero.set(XReg);
10130fca6ea1SDimitry Andric     } else if (AArch64InstrInfo::isFpOrNEON(Reg)) {
101481ad6265SDimitry Andric       // For FPRs,
101581ad6265SDimitry Andric       if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE))
101681ad6265SDimitry Andric         FPRsToZero.set(XReg);
101781ad6265SDimitry Andric     }
101881ad6265SDimitry Andric   }
101981ad6265SDimitry Andric 
102081ad6265SDimitry Andric   const AArch64InstrInfo &TII = *STI.getInstrInfo();
102181ad6265SDimitry Andric 
102281ad6265SDimitry Andric   // Zero out GPRs.
102381ad6265SDimitry Andric   for (MCRegister Reg : GPRsToZero.set_bits())
10245f757f3fSDimitry Andric     TII.buildClearRegister(Reg, MBB, MBBI, DL);
102581ad6265SDimitry Andric 
102681ad6265SDimitry Andric   // Zero out FP/vector registers.
102781ad6265SDimitry Andric   for (MCRegister Reg : FPRsToZero.set_bits())
10285f757f3fSDimitry Andric     TII.buildClearRegister(Reg, MBB, MBBI, DL);
102981ad6265SDimitry Andric 
103081ad6265SDimitry Andric   if (HasSVE) {
103181ad6265SDimitry Andric     for (MCRegister PReg :
103281ad6265SDimitry Andric          {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4,
103381ad6265SDimitry Andric           AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9,
103481ad6265SDimitry Andric           AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14,
103581ad6265SDimitry Andric           AArch64::P15}) {
103681ad6265SDimitry Andric       if (RegsToZero[PReg])
103781ad6265SDimitry Andric         BuildMI(MBB, MBBI, DL, TII.get(AArch64::PFALSE), PReg);
103881ad6265SDimitry Andric     }
103981ad6265SDimitry Andric   }
104081ad6265SDimitry Andric }
104181ad6265SDimitry Andric 
10425f757f3fSDimitry Andric static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs,
10435f757f3fSDimitry Andric                                    const MachineBasicBlock &MBB) {
10445f757f3fSDimitry Andric   const MachineFunction *MF = MBB.getParent();
10455f757f3fSDimitry Andric   LiveRegs.addLiveIns(MBB);
10465f757f3fSDimitry Andric   // Mark callee saved registers as used so we will not choose them.
10475f757f3fSDimitry Andric   const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
10485f757f3fSDimitry Andric   for (unsigned i = 0; CSRegs[i]; ++i)
10495f757f3fSDimitry Andric     LiveRegs.addReg(CSRegs[i]);
10505f757f3fSDimitry Andric }
10515f757f3fSDimitry Andric 
10520b57cec5SDimitry Andric // Find a scratch register that we can use at the start of the prologue to
10530b57cec5SDimitry Andric // re-align the stack pointer.  We avoid using callee-save registers since they
10540b57cec5SDimitry Andric // may appear to be free when this is called from canUseAsPrologue (during
10550b57cec5SDimitry Andric // shrink wrapping), but then no longer be free when this is called from
10560b57cec5SDimitry Andric // emitPrologue.
10570b57cec5SDimitry Andric //
10580b57cec5SDimitry Andric // FIXME: This is a bit conservative, since in the above case we could use one
10590b57cec5SDimitry Andric // of the callee-save registers as a scratch temp to re-align the stack pointer,
10600b57cec5SDimitry Andric // but we would then have to make sure that we were in fact saving at least one
10610b57cec5SDimitry Andric // callee-save register in the prologue, which is additional complexity that
10620b57cec5SDimitry Andric // doesn't seem worth the benefit.
10630fca6ea1SDimitry Andric static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
10640b57cec5SDimitry Andric   MachineFunction *MF = MBB->getParent();
10650b57cec5SDimitry Andric 
10660b57cec5SDimitry Andric   // If MBB is an entry block, use X9 as the scratch register
10670fca6ea1SDimitry Andric   // preserve_none functions may be using X9 to pass arguments,
10680fca6ea1SDimitry Andric   // so prefer to pick an available register below.
10690fca6ea1SDimitry Andric   if (&MF->front() == MBB &&
10700fca6ea1SDimitry Andric       MF->getFunction().getCallingConv() != CallingConv::PreserveNone)
10710b57cec5SDimitry Andric     return AArch64::X9;
10720b57cec5SDimitry Andric 
10730b57cec5SDimitry Andric   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
10740b57cec5SDimitry Andric   const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
10750b57cec5SDimitry Andric   LivePhysRegs LiveRegs(TRI);
10765f757f3fSDimitry Andric   getLiveRegsForEntryMBB(LiveRegs, *MBB);
10770b57cec5SDimitry Andric 
10780b57cec5SDimitry Andric   // Prefer X9 since it was historically used for the prologue scratch reg.
10790b57cec5SDimitry Andric   const MachineRegisterInfo &MRI = MF->getRegInfo();
10800b57cec5SDimitry Andric   if (LiveRegs.available(MRI, AArch64::X9))
10810b57cec5SDimitry Andric     return AArch64::X9;
10820b57cec5SDimitry Andric 
10830b57cec5SDimitry Andric   for (unsigned Reg : AArch64::GPR64RegClass) {
10840b57cec5SDimitry Andric     if (LiveRegs.available(MRI, Reg))
10850b57cec5SDimitry Andric       return Reg;
10860b57cec5SDimitry Andric   }
10870b57cec5SDimitry Andric   return AArch64::NoRegister;
10880b57cec5SDimitry Andric }
10890b57cec5SDimitry Andric 
10900b57cec5SDimitry Andric bool AArch64FrameLowering::canUseAsPrologue(
10910b57cec5SDimitry Andric     const MachineBasicBlock &MBB) const {
10920b57cec5SDimitry Andric   const MachineFunction *MF = MBB.getParent();
10930b57cec5SDimitry Andric   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
10940b57cec5SDimitry Andric   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
10950b57cec5SDimitry Andric   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
10965f757f3fSDimitry Andric   const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
10975f757f3fSDimitry Andric   const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>();
10980b57cec5SDimitry Andric 
10995f757f3fSDimitry Andric   if (AFI->hasSwiftAsyncContext()) {
11005f757f3fSDimitry Andric     const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
11015f757f3fSDimitry Andric     const MachineRegisterInfo &MRI = MF->getRegInfo();
11025f757f3fSDimitry Andric     LivePhysRegs LiveRegs(TRI);
11035f757f3fSDimitry Andric     getLiveRegsForEntryMBB(LiveRegs, MBB);
11045f757f3fSDimitry Andric     // The StoreSwiftAsyncContext clobbers X16 and X17. Make sure they are
11055f757f3fSDimitry Andric     // available.
11065f757f3fSDimitry Andric     if (!LiveRegs.available(MRI, AArch64::X16) ||
11075f757f3fSDimitry Andric         !LiveRegs.available(MRI, AArch64::X17))
11085f757f3fSDimitry Andric       return false;
11095f757f3fSDimitry Andric   }
11105f757f3fSDimitry Andric 
11110fca6ea1SDimitry Andric   // Certain stack probing sequences might clobber flags, then we can't use
11120fca6ea1SDimitry Andric   // the block as a prologue if the flags register is a live-in.
11130fca6ea1SDimitry Andric   if (MF->getInfo<AArch64FunctionInfo>()->hasStackProbing() &&
11140fca6ea1SDimitry Andric       MBB.isLiveIn(AArch64::NZCV))
11150fca6ea1SDimitry Andric     return false;
11160fca6ea1SDimitry Andric 
11175f757f3fSDimitry Andric   // Don't need a scratch register if we're not going to re-align the stack or
11185f757f3fSDimitry Andric   // emit stack probes.
11190fca6ea1SDimitry Andric   if (!RegInfo->hasStackRealignment(*MF) && !TLI->hasInlineStackProbe(*MF))
11200b57cec5SDimitry Andric     return true;
11210b57cec5SDimitry Andric   // Otherwise, we can use any block as long as it has a scratch register
11220b57cec5SDimitry Andric   // available.
11230b57cec5SDimitry Andric   return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
11240b57cec5SDimitry Andric }
11250b57cec5SDimitry Andric 
11260b57cec5SDimitry Andric static bool windowsRequiresStackProbe(MachineFunction &MF,
1127480093f4SDimitry Andric                                       uint64_t StackSizeInBytes) {
11280b57cec5SDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
11295f757f3fSDimitry Andric   const AArch64FunctionInfo &MFI = *MF.getInfo<AArch64FunctionInfo>();
11300b57cec5SDimitry Andric   // TODO: When implementing stack protectors, take that into account
11310b57cec5SDimitry Andric   // for the probe threshold.
11325f757f3fSDimitry Andric   return Subtarget.isTargetWindows() && MFI.hasStackProbing() &&
11335f757f3fSDimitry Andric          StackSizeInBytes >= uint64_t(MFI.getStackProbeSize());
11340b57cec5SDimitry Andric }
11350b57cec5SDimitry Andric 
1136e8d8bef9SDimitry Andric static bool needsWinCFI(const MachineFunction &MF) {
1137e8d8bef9SDimitry Andric   const Function &F = MF.getFunction();
1138e8d8bef9SDimitry Andric   return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1139e8d8bef9SDimitry Andric          F.needsUnwindTableEntry();
1140e8d8bef9SDimitry Andric }
1141e8d8bef9SDimitry Andric 
11420b57cec5SDimitry Andric bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
1143480093f4SDimitry Andric     MachineFunction &MF, uint64_t StackBumpBytes) const {
11440b57cec5SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
11450b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
11460b57cec5SDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
11470b57cec5SDimitry Andric   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1148fe6060f1SDimitry Andric   if (homogeneousPrologEpilog(MF))
1149fe6060f1SDimitry Andric     return false;
11500b57cec5SDimitry Andric 
11510b57cec5SDimitry Andric   if (AFI->getLocalStackSize() == 0)
11520b57cec5SDimitry Andric     return false;
11530b57cec5SDimitry Andric 
1154e8d8bef9SDimitry Andric   // For WinCFI, if optimizing for size, prefer to not combine the stack bump
1155e8d8bef9SDimitry Andric   // (to force a stp with predecrement) to match the packed unwind format,
1156e8d8bef9SDimitry Andric   // provided that there actually are any callee saved registers to merge the
1157e8d8bef9SDimitry Andric   // decrement with.
1158e8d8bef9SDimitry Andric   // This is potentially marginally slower, but allows using the packed
1159e8d8bef9SDimitry Andric   // unwind format for functions that both have a local area and callee saved
1160e8d8bef9SDimitry Andric   // registers. Using the packed unwind format notably reduces the size of
1161e8d8bef9SDimitry Andric   // the unwind info.
1162e8d8bef9SDimitry Andric   if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
1163e8d8bef9SDimitry Andric       MF.getFunction().hasOptSize())
1164e8d8bef9SDimitry Andric     return false;
1165e8d8bef9SDimitry Andric 
11660b57cec5SDimitry Andric   // 512 is the maximum immediate for stp/ldp that will be used for
11670b57cec5SDimitry Andric   // callee-save save/restores
11680b57cec5SDimitry Andric   if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
11690b57cec5SDimitry Andric     return false;
11700b57cec5SDimitry Andric 
11710b57cec5SDimitry Andric   if (MFI.hasVarSizedObjects())
11720b57cec5SDimitry Andric     return false;
11730b57cec5SDimitry Andric 
1174fe6060f1SDimitry Andric   if (RegInfo->hasStackRealignment(MF))
11750b57cec5SDimitry Andric     return false;
11760b57cec5SDimitry Andric 
11770b57cec5SDimitry Andric   // This isn't strictly necessary, but it simplifies things a bit since the
11780b57cec5SDimitry Andric   // current RedZone handling code assumes the SP is adjusted by the
11790b57cec5SDimitry Andric   // callee-save save/restore code.
11800b57cec5SDimitry Andric   if (canUseRedZone(MF))
11810b57cec5SDimitry Andric     return false;
11820b57cec5SDimitry Andric 
11838bcb0991SDimitry Andric   // When there is an SVE area on the stack, always allocate the
11848bcb0991SDimitry Andric   // callee-saves and spills/locals separately.
11858bcb0991SDimitry Andric   if (getSVEStackSize(MF))
11868bcb0991SDimitry Andric     return false;
11878bcb0991SDimitry Andric 
11880b57cec5SDimitry Andric   return true;
11890b57cec5SDimitry Andric }
11900b57cec5SDimitry Andric 
11915ffd83dbSDimitry Andric bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
11925ffd83dbSDimitry Andric     MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
11935ffd83dbSDimitry Andric   if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
11945ffd83dbSDimitry Andric     return false;
11955ffd83dbSDimitry Andric 
11965ffd83dbSDimitry Andric   if (MBB.empty())
11975ffd83dbSDimitry Andric     return true;
11985ffd83dbSDimitry Andric 
11995ffd83dbSDimitry Andric   // Disable combined SP bump if the last instruction is an MTE tag store. It
12005ffd83dbSDimitry Andric   // is almost always better to merge SP adjustment into those instructions.
12015ffd83dbSDimitry Andric   MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
12025ffd83dbSDimitry Andric   MachineBasicBlock::iterator Begin = MBB.begin();
12035ffd83dbSDimitry Andric   while (LastI != Begin) {
12045ffd83dbSDimitry Andric     --LastI;
12055ffd83dbSDimitry Andric     if (LastI->isTransient())
12065ffd83dbSDimitry Andric       continue;
12075ffd83dbSDimitry Andric     if (!LastI->getFlag(MachineInstr::FrameDestroy))
12085ffd83dbSDimitry Andric       break;
12095ffd83dbSDimitry Andric   }
12105ffd83dbSDimitry Andric   switch (LastI->getOpcode()) {
12115ffd83dbSDimitry Andric   case AArch64::STGloop:
12125ffd83dbSDimitry Andric   case AArch64::STZGloop:
121306c3fb27SDimitry Andric   case AArch64::STGi:
121406c3fb27SDimitry Andric   case AArch64::STZGi:
121506c3fb27SDimitry Andric   case AArch64::ST2Gi:
121606c3fb27SDimitry Andric   case AArch64::STZ2Gi:
12175ffd83dbSDimitry Andric     return false;
12185ffd83dbSDimitry Andric   default:
12195ffd83dbSDimitry Andric     return true;
12205ffd83dbSDimitry Andric   }
12215ffd83dbSDimitry Andric   llvm_unreachable("unreachable");
12225ffd83dbSDimitry Andric }
12235ffd83dbSDimitry Andric 
12240b57cec5SDimitry Andric // Given a load or a store instruction, generate an appropriate unwinding SEH
12250b57cec5SDimitry Andric // code on Windows.
12260b57cec5SDimitry Andric static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
12270b57cec5SDimitry Andric                                              const TargetInstrInfo &TII,
12280b57cec5SDimitry Andric                                              MachineInstr::MIFlag Flag) {
12290b57cec5SDimitry Andric   unsigned Opc = MBBI->getOpcode();
12300b57cec5SDimitry Andric   MachineBasicBlock *MBB = MBBI->getParent();
12310b57cec5SDimitry Andric   MachineFunction &MF = *MBB->getParent();
12320b57cec5SDimitry Andric   DebugLoc DL = MBBI->getDebugLoc();
12330b57cec5SDimitry Andric   unsigned ImmIdx = MBBI->getNumOperands() - 1;
12340b57cec5SDimitry Andric   int Imm = MBBI->getOperand(ImmIdx).getImm();
12350b57cec5SDimitry Andric   MachineInstrBuilder MIB;
12360b57cec5SDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
12370b57cec5SDimitry Andric   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
12380b57cec5SDimitry Andric 
12390b57cec5SDimitry Andric   switch (Opc) {
12400b57cec5SDimitry Andric   default:
12410b57cec5SDimitry Andric     llvm_unreachable("No SEH Opcode for this instruction");
12420b57cec5SDimitry Andric   case AArch64::LDPDpost:
12430b57cec5SDimitry Andric     Imm = -Imm;
1244bdd1243dSDimitry Andric     [[fallthrough]];
12450b57cec5SDimitry Andric   case AArch64::STPDpre: {
12460b57cec5SDimitry Andric     unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
12470b57cec5SDimitry Andric     unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
12480b57cec5SDimitry Andric     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
12490b57cec5SDimitry Andric               .addImm(Reg0)
12500b57cec5SDimitry Andric               .addImm(Reg1)
12510b57cec5SDimitry Andric               .addImm(Imm * 8)
12520b57cec5SDimitry Andric               .setMIFlag(Flag);
12530b57cec5SDimitry Andric     break;
12540b57cec5SDimitry Andric   }
12550b57cec5SDimitry Andric   case AArch64::LDPXpost:
12560b57cec5SDimitry Andric     Imm = -Imm;
1257bdd1243dSDimitry Andric     [[fallthrough]];
12580b57cec5SDimitry Andric   case AArch64::STPXpre: {
12598bcb0991SDimitry Andric     Register Reg0 = MBBI->getOperand(1).getReg();
12608bcb0991SDimitry Andric     Register Reg1 = MBBI->getOperand(2).getReg();
12610b57cec5SDimitry Andric     if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
12620b57cec5SDimitry Andric       MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
12630b57cec5SDimitry Andric                 .addImm(Imm * 8)
12640b57cec5SDimitry Andric                 .setMIFlag(Flag);
12650b57cec5SDimitry Andric     else
12660b57cec5SDimitry Andric       MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
12670b57cec5SDimitry Andric                 .addImm(RegInfo->getSEHRegNum(Reg0))
12680b57cec5SDimitry Andric                 .addImm(RegInfo->getSEHRegNum(Reg1))
12690b57cec5SDimitry Andric                 .addImm(Imm * 8)
12700b57cec5SDimitry Andric                 .setMIFlag(Flag);
12710b57cec5SDimitry Andric     break;
12720b57cec5SDimitry Andric   }
12730b57cec5SDimitry Andric   case AArch64::LDRDpost:
12740b57cec5SDimitry Andric     Imm = -Imm;
1275bdd1243dSDimitry Andric     [[fallthrough]];
12760b57cec5SDimitry Andric   case AArch64::STRDpre: {
12770b57cec5SDimitry Andric     unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
12780b57cec5SDimitry Andric     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
12790b57cec5SDimitry Andric               .addImm(Reg)
12800b57cec5SDimitry Andric               .addImm(Imm)
12810b57cec5SDimitry Andric               .setMIFlag(Flag);
12820b57cec5SDimitry Andric     break;
12830b57cec5SDimitry Andric   }
12840b57cec5SDimitry Andric   case AArch64::LDRXpost:
12850b57cec5SDimitry Andric     Imm = -Imm;
1286bdd1243dSDimitry Andric     [[fallthrough]];
12870b57cec5SDimitry Andric   case AArch64::STRXpre: {
12880b57cec5SDimitry Andric     unsigned Reg =  RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
12890b57cec5SDimitry Andric     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
12900b57cec5SDimitry Andric               .addImm(Reg)
12910b57cec5SDimitry Andric               .addImm(Imm)
12920b57cec5SDimitry Andric               .setMIFlag(Flag);
12930b57cec5SDimitry Andric     break;
12940b57cec5SDimitry Andric   }
12950b57cec5SDimitry Andric   case AArch64::STPDi:
12960b57cec5SDimitry Andric   case AArch64::LDPDi: {
12970b57cec5SDimitry Andric     unsigned Reg0 =  RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
12980b57cec5SDimitry Andric     unsigned Reg1 =  RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
12990b57cec5SDimitry Andric     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
13000b57cec5SDimitry Andric               .addImm(Reg0)
13010b57cec5SDimitry Andric               .addImm(Reg1)
13020b57cec5SDimitry Andric               .addImm(Imm * 8)
13030b57cec5SDimitry Andric               .setMIFlag(Flag);
13040b57cec5SDimitry Andric     break;
13050b57cec5SDimitry Andric   }
13060b57cec5SDimitry Andric   case AArch64::STPXi:
13070b57cec5SDimitry Andric   case AArch64::LDPXi: {
13088bcb0991SDimitry Andric     Register Reg0 = MBBI->getOperand(0).getReg();
13098bcb0991SDimitry Andric     Register Reg1 = MBBI->getOperand(1).getReg();
13100b57cec5SDimitry Andric     if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
13110b57cec5SDimitry Andric       MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
13120b57cec5SDimitry Andric                 .addImm(Imm * 8)
13130b57cec5SDimitry Andric                 .setMIFlag(Flag);
13140b57cec5SDimitry Andric     else
13150b57cec5SDimitry Andric       MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
13160b57cec5SDimitry Andric                 .addImm(RegInfo->getSEHRegNum(Reg0))
13170b57cec5SDimitry Andric                 .addImm(RegInfo->getSEHRegNum(Reg1))
13180b57cec5SDimitry Andric                 .addImm(Imm * 8)
13190b57cec5SDimitry Andric                 .setMIFlag(Flag);
13200b57cec5SDimitry Andric     break;
13210b57cec5SDimitry Andric   }
13220b57cec5SDimitry Andric   case AArch64::STRXui:
13230b57cec5SDimitry Andric   case AArch64::LDRXui: {
13240b57cec5SDimitry Andric     int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
13250b57cec5SDimitry Andric     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
13260b57cec5SDimitry Andric               .addImm(Reg)
13270b57cec5SDimitry Andric               .addImm(Imm * 8)
13280b57cec5SDimitry Andric               .setMIFlag(Flag);
13290b57cec5SDimitry Andric     break;
13300b57cec5SDimitry Andric   }
13310b57cec5SDimitry Andric   case AArch64::STRDui:
13320b57cec5SDimitry Andric   case AArch64::LDRDui: {
13330b57cec5SDimitry Andric     unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
13340b57cec5SDimitry Andric     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
13350b57cec5SDimitry Andric               .addImm(Reg)
13360b57cec5SDimitry Andric               .addImm(Imm * 8)
13370b57cec5SDimitry Andric               .setMIFlag(Flag);
13380b57cec5SDimitry Andric     break;
13390b57cec5SDimitry Andric   }
13407a6dacacSDimitry Andric   case AArch64::STPQi:
13417a6dacacSDimitry Andric   case AArch64::LDPQi: {
13427a6dacacSDimitry Andric     unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
13437a6dacacSDimitry Andric     unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
13447a6dacacSDimitry Andric     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQP))
13457a6dacacSDimitry Andric               .addImm(Reg0)
13467a6dacacSDimitry Andric               .addImm(Reg1)
13477a6dacacSDimitry Andric               .addImm(Imm * 16)
13487a6dacacSDimitry Andric               .setMIFlag(Flag);
13497a6dacacSDimitry Andric     break;
13507a6dacacSDimitry Andric   }
13517a6dacacSDimitry Andric   case AArch64::LDPQpost:
13527a6dacacSDimitry Andric     Imm = -Imm;
13530fca6ea1SDimitry Andric     [[fallthrough]];
13547a6dacacSDimitry Andric   case AArch64::STPQpre: {
13557a6dacacSDimitry Andric     unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
13567a6dacacSDimitry Andric     unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
13577a6dacacSDimitry Andric     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQPX))
13587a6dacacSDimitry Andric               .addImm(Reg0)
13597a6dacacSDimitry Andric               .addImm(Reg1)
13607a6dacacSDimitry Andric               .addImm(Imm * 16)
13617a6dacacSDimitry Andric               .setMIFlag(Flag);
13627a6dacacSDimitry Andric     break;
13637a6dacacSDimitry Andric   }
13640b57cec5SDimitry Andric   }
13650b57cec5SDimitry Andric   auto I = MBB->insertAfter(MBBI, MIB);
13660b57cec5SDimitry Andric   return I;
13670b57cec5SDimitry Andric }
13680b57cec5SDimitry Andric 
13690b57cec5SDimitry Andric // Fix up the SEH opcode associated with the save/restore instruction.
13700b57cec5SDimitry Andric static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
13710b57cec5SDimitry Andric                            unsigned LocalStackSize) {
13720b57cec5SDimitry Andric   MachineOperand *ImmOpnd = nullptr;
13730b57cec5SDimitry Andric   unsigned ImmIdx = MBBI->getNumOperands() - 1;
13740b57cec5SDimitry Andric   switch (MBBI->getOpcode()) {
13750b57cec5SDimitry Andric   default:
13760b57cec5SDimitry Andric     llvm_unreachable("Fix the offset in the SEH instruction");
13770b57cec5SDimitry Andric   case AArch64::SEH_SaveFPLR:
13780b57cec5SDimitry Andric   case AArch64::SEH_SaveRegP:
13790b57cec5SDimitry Andric   case AArch64::SEH_SaveReg:
13800b57cec5SDimitry Andric   case AArch64::SEH_SaveFRegP:
13810b57cec5SDimitry Andric   case AArch64::SEH_SaveFReg:
13827a6dacacSDimitry Andric   case AArch64::SEH_SaveAnyRegQP:
13837a6dacacSDimitry Andric   case AArch64::SEH_SaveAnyRegQPX:
13840b57cec5SDimitry Andric     ImmOpnd = &MBBI->getOperand(ImmIdx);
13850b57cec5SDimitry Andric     break;
13860b57cec5SDimitry Andric   }
13870b57cec5SDimitry Andric   if (ImmOpnd)
13880b57cec5SDimitry Andric     ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
13890b57cec5SDimitry Andric }
13900b57cec5SDimitry Andric 
13910fca6ea1SDimitry Andric bool requiresGetVGCall(MachineFunction &MF) {
13920fca6ea1SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
13930fca6ea1SDimitry Andric   return AFI->hasStreamingModeChanges() &&
13940fca6ea1SDimitry Andric          !MF.getSubtarget<AArch64Subtarget>().hasSVE();
13950fca6ea1SDimitry Andric }
13960fca6ea1SDimitry Andric 
13970fca6ea1SDimitry Andric bool isVGInstruction(MachineBasicBlock::iterator MBBI) {
13980fca6ea1SDimitry Andric   unsigned Opc = MBBI->getOpcode();
13990fca6ea1SDimitry Andric   if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
14000fca6ea1SDimitry Andric       Opc == AArch64::UBFMXri)
14010fca6ea1SDimitry Andric     return true;
14020fca6ea1SDimitry Andric 
14030fca6ea1SDimitry Andric   if (requiresGetVGCall(*MBBI->getMF())) {
14040fca6ea1SDimitry Andric     if (Opc == AArch64::ORRXrr)
14050fca6ea1SDimitry Andric       return true;
14060fca6ea1SDimitry Andric 
14070fca6ea1SDimitry Andric     if (Opc == AArch64::BL) {
14080fca6ea1SDimitry Andric       auto Op1 = MBBI->getOperand(0);
14090fca6ea1SDimitry Andric       return Op1.isSymbol() &&
14100fca6ea1SDimitry Andric              (StringRef(Op1.getSymbolName()) == "__arm_get_current_vg");
14110fca6ea1SDimitry Andric     }
14120fca6ea1SDimitry Andric   }
14130fca6ea1SDimitry Andric 
14140fca6ea1SDimitry Andric   return false;
14150fca6ea1SDimitry Andric }
14160fca6ea1SDimitry Andric 
14170b57cec5SDimitry Andric // Convert callee-save register save/restore instruction to do stack pointer
14180b57cec5SDimitry Andric // decrement/increment to allocate/deallocate the callee-save stack area by
14190b57cec5SDimitry Andric // converting store/load to use pre/post increment version.
14200b57cec5SDimitry Andric static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
14210b57cec5SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
14220b57cec5SDimitry Andric     const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
142381ad6265SDimitry Andric     bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
142481ad6265SDimitry Andric     MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
142581ad6265SDimitry Andric     int CFAOffset = 0) {
14260b57cec5SDimitry Andric   unsigned NewOpc;
14270fca6ea1SDimitry Andric 
14280fca6ea1SDimitry Andric   // If the function contains streaming mode changes, we expect instructions
14290fca6ea1SDimitry Andric   // to calculate the value of VG before spilling. For locally-streaming
14300fca6ea1SDimitry Andric   // functions, we need to do this for both the streaming and non-streaming
14310fca6ea1SDimitry Andric   // vector length. Move past these instructions if necessary.
14320fca6ea1SDimitry Andric   MachineFunction &MF = *MBB.getParent();
14330fca6ea1SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
14340fca6ea1SDimitry Andric   if (AFI->hasStreamingModeChanges())
14350fca6ea1SDimitry Andric     while (isVGInstruction(MBBI))
14360fca6ea1SDimitry Andric       ++MBBI;
14370fca6ea1SDimitry Andric 
14380b57cec5SDimitry Andric   switch (MBBI->getOpcode()) {
14390b57cec5SDimitry Andric   default:
14400b57cec5SDimitry Andric     llvm_unreachable("Unexpected callee-save save/restore opcode!");
14410b57cec5SDimitry Andric   case AArch64::STPXi:
14420b57cec5SDimitry Andric     NewOpc = AArch64::STPXpre;
14430b57cec5SDimitry Andric     break;
14440b57cec5SDimitry Andric   case AArch64::STPDi:
14450b57cec5SDimitry Andric     NewOpc = AArch64::STPDpre;
14460b57cec5SDimitry Andric     break;
14470b57cec5SDimitry Andric   case AArch64::STPQi:
14480b57cec5SDimitry Andric     NewOpc = AArch64::STPQpre;
14490b57cec5SDimitry Andric     break;
14500b57cec5SDimitry Andric   case AArch64::STRXui:
14510b57cec5SDimitry Andric     NewOpc = AArch64::STRXpre;
14520b57cec5SDimitry Andric     break;
14530b57cec5SDimitry Andric   case AArch64::STRDui:
14540b57cec5SDimitry Andric     NewOpc = AArch64::STRDpre;
14550b57cec5SDimitry Andric     break;
14560b57cec5SDimitry Andric   case AArch64::STRQui:
14570b57cec5SDimitry Andric     NewOpc = AArch64::STRQpre;
14580b57cec5SDimitry Andric     break;
14590b57cec5SDimitry Andric   case AArch64::LDPXi:
14600b57cec5SDimitry Andric     NewOpc = AArch64::LDPXpost;
14610b57cec5SDimitry Andric     break;
14620b57cec5SDimitry Andric   case AArch64::LDPDi:
14630b57cec5SDimitry Andric     NewOpc = AArch64::LDPDpost;
14640b57cec5SDimitry Andric     break;
14650b57cec5SDimitry Andric   case AArch64::LDPQi:
14660b57cec5SDimitry Andric     NewOpc = AArch64::LDPQpost;
14670b57cec5SDimitry Andric     break;
14680b57cec5SDimitry Andric   case AArch64::LDRXui:
14690b57cec5SDimitry Andric     NewOpc = AArch64::LDRXpost;
14700b57cec5SDimitry Andric     break;
14710b57cec5SDimitry Andric   case AArch64::LDRDui:
14720b57cec5SDimitry Andric     NewOpc = AArch64::LDRDpost;
14730b57cec5SDimitry Andric     break;
14740b57cec5SDimitry Andric   case AArch64::LDRQui:
14750b57cec5SDimitry Andric     NewOpc = AArch64::LDRQpost;
14760b57cec5SDimitry Andric     break;
14770b57cec5SDimitry Andric   }
14780b57cec5SDimitry Andric   // Get rid of the SEH code associated with the old instruction.
14790b57cec5SDimitry Andric   if (NeedsWinCFI) {
14800b57cec5SDimitry Andric     auto SEH = std::next(MBBI);
14810b57cec5SDimitry Andric     if (AArch64InstrInfo::isSEHInstruction(*SEH))
14820b57cec5SDimitry Andric       SEH->eraseFromParent();
14830b57cec5SDimitry Andric   }
14840b57cec5SDimitry Andric 
14855f757f3fSDimitry Andric   TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
1486fe6060f1SDimitry Andric   int64_t MinOffset, MaxOffset;
1487fe6060f1SDimitry Andric   bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
1488fe6060f1SDimitry Andric       NewOpc, Scale, Width, MinOffset, MaxOffset);
1489fe6060f1SDimitry Andric   (void)Success;
1490fe6060f1SDimitry Andric   assert(Success && "unknown load/store opcode");
1491fe6060f1SDimitry Andric 
1492fe6060f1SDimitry Andric   // If the first store isn't right where we want SP then we can't fold the
1493fe6060f1SDimitry Andric   // update in so create a normal arithmetic instruction instead.
1494fe6060f1SDimitry Andric   if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
1495fe6060f1SDimitry Andric       CSStackSizeInc < MinOffset || CSStackSizeInc > MaxOffset) {
14960fca6ea1SDimitry Andric     // If we are destroying the frame, make sure we add the increment after the
14970fca6ea1SDimitry Andric     // last frame operation.
14980fca6ea1SDimitry Andric     if (FrameFlag == MachineInstr::FrameDestroy)
14990fca6ea1SDimitry Andric       ++MBBI;
1500fe6060f1SDimitry Andric     emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
150181ad6265SDimitry Andric                     StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
150281ad6265SDimitry Andric                     false, false, nullptr, EmitCFI,
150381ad6265SDimitry Andric                     StackOffset::getFixed(CFAOffset));
150481ad6265SDimitry Andric 
1505fe6060f1SDimitry Andric     return std::prev(MBBI);
1506fe6060f1SDimitry Andric   }
1507fe6060f1SDimitry Andric 
15080b57cec5SDimitry Andric   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
15090b57cec5SDimitry Andric   MIB.addReg(AArch64::SP, RegState::Define);
15100b57cec5SDimitry Andric 
15110b57cec5SDimitry Andric   // Copy all operands other than the immediate offset.
15120b57cec5SDimitry Andric   unsigned OpndIdx = 0;
15130b57cec5SDimitry Andric   for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
15140b57cec5SDimitry Andric        ++OpndIdx)
15150b57cec5SDimitry Andric     MIB.add(MBBI->getOperand(OpndIdx));
15160b57cec5SDimitry Andric 
15170b57cec5SDimitry Andric   assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
15180b57cec5SDimitry Andric          "Unexpected immediate offset in first/last callee-save save/restore "
15190b57cec5SDimitry Andric          "instruction!");
15200b57cec5SDimitry Andric   assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
15210b57cec5SDimitry Andric          "Unexpected base register in callee-save save/restore instruction!");
15220b57cec5SDimitry Andric   assert(CSStackSizeInc % Scale == 0);
1523fe6060f1SDimitry Andric   MIB.addImm(CSStackSizeInc / (int)Scale);
15240b57cec5SDimitry Andric 
15250b57cec5SDimitry Andric   MIB.setMIFlags(MBBI->getFlags());
15260b57cec5SDimitry Andric   MIB.setMemRefs(MBBI->memoperands());
15270b57cec5SDimitry Andric 
15280b57cec5SDimitry Andric   // Generate a new SEH code that corresponds to the new instruction.
15290b57cec5SDimitry Andric   if (NeedsWinCFI) {
15300b57cec5SDimitry Andric     *HasWinCFI = true;
153181ad6265SDimitry Andric     InsertSEH(*MIB, *TII, FrameFlag);
153281ad6265SDimitry Andric   }
153381ad6265SDimitry Andric 
153481ad6265SDimitry Andric   if (EmitCFI) {
153581ad6265SDimitry Andric     unsigned CFIIndex = MF.addFrameInst(
153681ad6265SDimitry Andric         MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset - CSStackSizeInc));
153781ad6265SDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
153881ad6265SDimitry Andric         .addCFIIndex(CFIIndex)
153981ad6265SDimitry Andric         .setMIFlags(FrameFlag);
15400b57cec5SDimitry Andric   }
15410b57cec5SDimitry Andric 
15420b57cec5SDimitry Andric   return std::prev(MBB.erase(MBBI));
15430b57cec5SDimitry Andric }
15440b57cec5SDimitry Andric 
15450b57cec5SDimitry Andric // Fixup callee-save register save/restore instructions to take into account
15460b57cec5SDimitry Andric // combined SP bump by adding the local stack size to the stack offsets.
15470b57cec5SDimitry Andric static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
1548480093f4SDimitry Andric                                               uint64_t LocalStackSize,
15490b57cec5SDimitry Andric                                               bool NeedsWinCFI,
15500b57cec5SDimitry Andric                                               bool *HasWinCFI) {
15510b57cec5SDimitry Andric   if (AArch64InstrInfo::isSEHInstruction(MI))
15520b57cec5SDimitry Andric     return;
15530b57cec5SDimitry Andric 
15540b57cec5SDimitry Andric   unsigned Opc = MI.getOpcode();
15550b57cec5SDimitry Andric   unsigned Scale;
15560b57cec5SDimitry Andric   switch (Opc) {
15570b57cec5SDimitry Andric   case AArch64::STPXi:
15580b57cec5SDimitry Andric   case AArch64::STRXui:
15590b57cec5SDimitry Andric   case AArch64::STPDi:
15600b57cec5SDimitry Andric   case AArch64::STRDui:
15610b57cec5SDimitry Andric   case AArch64::LDPXi:
15620b57cec5SDimitry Andric   case AArch64::LDRXui:
15630b57cec5SDimitry Andric   case AArch64::LDPDi:
15640b57cec5SDimitry Andric   case AArch64::LDRDui:
15650b57cec5SDimitry Andric     Scale = 8;
15660b57cec5SDimitry Andric     break;
15670b57cec5SDimitry Andric   case AArch64::STPQi:
15680b57cec5SDimitry Andric   case AArch64::STRQui:
15690b57cec5SDimitry Andric   case AArch64::LDPQi:
15700b57cec5SDimitry Andric   case AArch64::LDRQui:
15710b57cec5SDimitry Andric     Scale = 16;
15720b57cec5SDimitry Andric     break;
15730b57cec5SDimitry Andric   default:
15740b57cec5SDimitry Andric     llvm_unreachable("Unexpected callee-save save/restore opcode!");
15750b57cec5SDimitry Andric   }
15760b57cec5SDimitry Andric 
15770b57cec5SDimitry Andric   unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
15780b57cec5SDimitry Andric   assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
15790b57cec5SDimitry Andric          "Unexpected base register in callee-save save/restore instruction!");
15800b57cec5SDimitry Andric   // Last operand is immediate offset that needs fixing.
15810b57cec5SDimitry Andric   MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
15820b57cec5SDimitry Andric   // All generated opcodes have scaled offsets.
15830b57cec5SDimitry Andric   assert(LocalStackSize % Scale == 0);
15840b57cec5SDimitry Andric   OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
15850b57cec5SDimitry Andric 
15860b57cec5SDimitry Andric   if (NeedsWinCFI) {
15870b57cec5SDimitry Andric     *HasWinCFI = true;
15880b57cec5SDimitry Andric     auto MBBI = std::next(MachineBasicBlock::iterator(MI));
15890b57cec5SDimitry Andric     assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
15900b57cec5SDimitry Andric     assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
15910b57cec5SDimitry Andric            "Expecting a SEH instruction");
15920b57cec5SDimitry Andric     fixupSEHOpcode(MBBI, LocalStackSize);
15930b57cec5SDimitry Andric   }
15940b57cec5SDimitry Andric }
15950b57cec5SDimitry Andric 
1596480093f4SDimitry Andric static bool isTargetWindows(const MachineFunction &MF) {
1597480093f4SDimitry Andric   return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
1598480093f4SDimitry Andric }
1599480093f4SDimitry Andric 
1600480093f4SDimitry Andric // Convenience function to determine whether I is an SVE callee save.
1601480093f4SDimitry Andric static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
1602480093f4SDimitry Andric   switch (I->getOpcode()) {
1603480093f4SDimitry Andric   default:
1604480093f4SDimitry Andric     return false;
16050fca6ea1SDimitry Andric   case AArch64::PTRUE_C_B:
16060fca6ea1SDimitry Andric   case AArch64::LD1B_2Z_IMM:
16070fca6ea1SDimitry Andric   case AArch64::ST1B_2Z_IMM:
1608480093f4SDimitry Andric   case AArch64::STR_ZXI:
1609480093f4SDimitry Andric   case AArch64::STR_PXI:
1610480093f4SDimitry Andric   case AArch64::LDR_ZXI:
1611480093f4SDimitry Andric   case AArch64::LDR_PXI:
1612480093f4SDimitry Andric     return I->getFlag(MachineInstr::FrameSetup) ||
1613480093f4SDimitry Andric            I->getFlag(MachineInstr::FrameDestroy);
1614480093f4SDimitry Andric   }
1615480093f4SDimitry Andric }
1616480093f4SDimitry Andric 
161781ad6265SDimitry Andric static void emitShadowCallStackPrologue(const TargetInstrInfo &TII,
161881ad6265SDimitry Andric                                         MachineFunction &MF,
161981ad6265SDimitry Andric                                         MachineBasicBlock &MBB,
162081ad6265SDimitry Andric                                         MachineBasicBlock::iterator MBBI,
162181ad6265SDimitry Andric                                         const DebugLoc &DL, bool NeedsWinCFI,
162281ad6265SDimitry Andric                                         bool NeedsUnwindInfo) {
162381ad6265SDimitry Andric   // Shadow call stack prolog: str x30, [x18], #8
162481ad6265SDimitry Andric   BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXpost))
162581ad6265SDimitry Andric       .addReg(AArch64::X18, RegState::Define)
162681ad6265SDimitry Andric       .addReg(AArch64::LR)
162781ad6265SDimitry Andric       .addReg(AArch64::X18)
162881ad6265SDimitry Andric       .addImm(8)
162981ad6265SDimitry Andric       .setMIFlag(MachineInstr::FrameSetup);
163081ad6265SDimitry Andric 
163181ad6265SDimitry Andric   // This instruction also makes x18 live-in to the entry block.
163281ad6265SDimitry Andric   MBB.addLiveIn(AArch64::X18);
163381ad6265SDimitry Andric 
163481ad6265SDimitry Andric   if (NeedsWinCFI)
163581ad6265SDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(AArch64::SEH_Nop))
163681ad6265SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
163781ad6265SDimitry Andric 
163881ad6265SDimitry Andric   if (NeedsUnwindInfo) {
163981ad6265SDimitry Andric     // Emit a CFI instruction that causes 8 to be subtracted from the value of
164081ad6265SDimitry Andric     // x18 when unwinding past this frame.
164181ad6265SDimitry Andric     static const char CFIInst[] = {
164281ad6265SDimitry Andric         dwarf::DW_CFA_val_expression,
164381ad6265SDimitry Andric         18, // register
164481ad6265SDimitry Andric         2,  // length
164581ad6265SDimitry Andric         static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
164681ad6265SDimitry Andric         static_cast<char>(-8) & 0x7f, // addend (sleb128)
164781ad6265SDimitry Andric     };
164881ad6265SDimitry Andric     unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
164981ad6265SDimitry Andric         nullptr, StringRef(CFIInst, sizeof(CFIInst))));
165081ad6265SDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(AArch64::CFI_INSTRUCTION))
165181ad6265SDimitry Andric         .addCFIIndex(CFIIndex)
165281ad6265SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
165381ad6265SDimitry Andric   }
165481ad6265SDimitry Andric }
165581ad6265SDimitry Andric 
165681ad6265SDimitry Andric static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII,
165781ad6265SDimitry Andric                                         MachineFunction &MF,
165881ad6265SDimitry Andric                                         MachineBasicBlock &MBB,
165981ad6265SDimitry Andric                                         MachineBasicBlock::iterator MBBI,
166081ad6265SDimitry Andric                                         const DebugLoc &DL) {
166181ad6265SDimitry Andric   // Shadow call stack epilog: ldr x30, [x18, #-8]!
166281ad6265SDimitry Andric   BuildMI(MBB, MBBI, DL, TII.get(AArch64::LDRXpre))
166381ad6265SDimitry Andric       .addReg(AArch64::X18, RegState::Define)
166481ad6265SDimitry Andric       .addReg(AArch64::LR, RegState::Define)
166581ad6265SDimitry Andric       .addReg(AArch64::X18)
166681ad6265SDimitry Andric       .addImm(-8)
166781ad6265SDimitry Andric       .setMIFlag(MachineInstr::FrameDestroy);
166881ad6265SDimitry Andric 
1669bdd1243dSDimitry Andric   if (MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(MF)) {
167081ad6265SDimitry Andric     unsigned CFIIndex =
167181ad6265SDimitry Andric         MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, 18));
167281ad6265SDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
167381ad6265SDimitry Andric         .addCFIIndex(CFIIndex)
167481ad6265SDimitry Andric         .setMIFlags(MachineInstr::FrameDestroy);
167581ad6265SDimitry Andric   }
167681ad6265SDimitry Andric }
167781ad6265SDimitry Andric 
167806c3fb27SDimitry Andric // Define the current CFA rule to use the provided FP.
167906c3fb27SDimitry Andric static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB,
168006c3fb27SDimitry Andric                                 MachineBasicBlock::iterator MBBI,
168106c3fb27SDimitry Andric                                 const DebugLoc &DL, unsigned FixedObject) {
168206c3fb27SDimitry Andric   const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
168306c3fb27SDimitry Andric   const AArch64RegisterInfo *TRI = STI.getRegisterInfo();
168406c3fb27SDimitry Andric   const TargetInstrInfo *TII = STI.getInstrInfo();
168506c3fb27SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
168606c3fb27SDimitry Andric 
168706c3fb27SDimitry Andric   const int OffsetToFirstCalleeSaveFromFP =
168806c3fb27SDimitry Andric       AFI->getCalleeSaveBaseToFrameRecordOffset() -
168906c3fb27SDimitry Andric       AFI->getCalleeSavedStackSize();
169006c3fb27SDimitry Andric   Register FramePtr = TRI->getFrameRegister(MF);
169106c3fb27SDimitry Andric   unsigned Reg = TRI->getDwarfRegNum(FramePtr, true);
169206c3fb27SDimitry Andric   unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
169306c3fb27SDimitry Andric       nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
169406c3fb27SDimitry Andric   BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
169506c3fb27SDimitry Andric       .addCFIIndex(CFIIndex)
169606c3fb27SDimitry Andric       .setMIFlags(MachineInstr::FrameSetup);
169706c3fb27SDimitry Andric }
169806c3fb27SDimitry Andric 
16995f757f3fSDimitry Andric #ifndef NDEBUG
17005f757f3fSDimitry Andric /// Collect live registers from the end of \p MI's parent up to (including) \p
17015f757f3fSDimitry Andric /// MI in \p LiveRegs.
17025f757f3fSDimitry Andric static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI,
17035f757f3fSDimitry Andric                                 LivePhysRegs &LiveRegs) {
17045f757f3fSDimitry Andric 
17055f757f3fSDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
17065f757f3fSDimitry Andric   LiveRegs.addLiveOuts(MBB);
17075f757f3fSDimitry Andric   for (const MachineInstr &MI :
17085f757f3fSDimitry Andric        reverse(make_range(MI.getIterator(), MBB.instr_end())))
17095f757f3fSDimitry Andric     LiveRegs.stepBackward(MI);
17105f757f3fSDimitry Andric }
17115f757f3fSDimitry Andric #endif
17125f757f3fSDimitry Andric 
17130b57cec5SDimitry Andric void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
17140b57cec5SDimitry Andric                                         MachineBasicBlock &MBB) const {
17150b57cec5SDimitry Andric   MachineBasicBlock::iterator MBBI = MBB.begin();
17160b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
17170b57cec5SDimitry Andric   const Function &F = MF.getFunction();
17180b57cec5SDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
17190b57cec5SDimitry Andric   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
17200b57cec5SDimitry Andric   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
17215f757f3fSDimitry Andric 
17220b57cec5SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1723bdd1243dSDimitry Andric   bool EmitCFI = AFI->needsDwarfUnwindInfo(MF);
172406c3fb27SDimitry Andric   bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
17250b57cec5SDimitry Andric   bool HasFP = hasFP(MF);
17260b57cec5SDimitry Andric   bool NeedsWinCFI = needsWinCFI(MF);
17270b57cec5SDimitry Andric   bool HasWinCFI = false;
17280b57cec5SDimitry Andric   auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
17290b57cec5SDimitry Andric 
17305f757f3fSDimitry Andric   MachineBasicBlock::iterator End = MBB.end();
17315f757f3fSDimitry Andric #ifndef NDEBUG
17325f757f3fSDimitry Andric   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
17335f757f3fSDimitry Andric   // Collect live register from the end of MBB up to the start of the existing
17345f757f3fSDimitry Andric   // frame setup instructions.
17355f757f3fSDimitry Andric   MachineBasicBlock::iterator NonFrameStart = MBB.begin();
17365f757f3fSDimitry Andric   while (NonFrameStart != End &&
17375f757f3fSDimitry Andric          NonFrameStart->getFlag(MachineInstr::FrameSetup))
17385f757f3fSDimitry Andric     ++NonFrameStart;
17395f757f3fSDimitry Andric 
17405f757f3fSDimitry Andric   LivePhysRegs LiveRegs(*TRI);
17415f757f3fSDimitry Andric   if (NonFrameStart != MBB.end()) {
17425f757f3fSDimitry Andric     getLivePhysRegsUpTo(*NonFrameStart, *TRI, LiveRegs);
17435f757f3fSDimitry Andric     // Ignore registers used for stack management for now.
17445f757f3fSDimitry Andric     LiveRegs.removeReg(AArch64::SP);
17455f757f3fSDimitry Andric     LiveRegs.removeReg(AArch64::X19);
17465f757f3fSDimitry Andric     LiveRegs.removeReg(AArch64::FP);
17475f757f3fSDimitry Andric     LiveRegs.removeReg(AArch64::LR);
17480fca6ea1SDimitry Andric 
17490fca6ea1SDimitry Andric     // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
17500fca6ea1SDimitry Andric     // This is necessary to spill VG if required where SVE is unavailable, but
17510fca6ea1SDimitry Andric     // X0 is preserved around this call.
17520fca6ea1SDimitry Andric     if (requiresGetVGCall(MF))
17530fca6ea1SDimitry Andric       LiveRegs.removeReg(AArch64::X0);
17545f757f3fSDimitry Andric   }
17555f757f3fSDimitry Andric 
17565f757f3fSDimitry Andric   auto VerifyClobberOnExit = make_scope_exit([&]() {
17575f757f3fSDimitry Andric     if (NonFrameStart == MBB.end())
17585f757f3fSDimitry Andric       return;
17595f757f3fSDimitry Andric     // Check if any of the newly instructions clobber any of the live registers.
17605f757f3fSDimitry Andric     for (MachineInstr &MI :
17615f757f3fSDimitry Andric          make_range(MBB.instr_begin(), NonFrameStart->getIterator())) {
17625f757f3fSDimitry Andric       for (auto &Op : MI.operands())
17635f757f3fSDimitry Andric         if (Op.isReg() && Op.isDef())
17645f757f3fSDimitry Andric           assert(!LiveRegs.contains(Op.getReg()) &&
17655f757f3fSDimitry Andric                  "live register clobbered by inserted prologue instructions");
17665f757f3fSDimitry Andric     }
17675f757f3fSDimitry Andric   });
17685f757f3fSDimitry Andric #endif
17695f757f3fSDimitry Andric 
17700b57cec5SDimitry Andric   bool IsFunclet = MBB.isEHFuncletEntry();
17710b57cec5SDimitry Andric 
17720b57cec5SDimitry Andric   // At this point, we're going to decide whether or not the function uses a
17730b57cec5SDimitry Andric   // redzone. In most cases, the function doesn't have a redzone so let's
17740b57cec5SDimitry Andric   // assume that's false and set it to true in the case that there's a redzone.
17750b57cec5SDimitry Andric   AFI->setHasRedZone(false);
17760b57cec5SDimitry Andric 
17770b57cec5SDimitry Andric   // Debug location must be unknown since the first debug location is used
17780b57cec5SDimitry Andric   // to determine the end of the prologue.
17790b57cec5SDimitry Andric   DebugLoc DL;
17800b57cec5SDimitry Andric 
1781e8d8bef9SDimitry Andric   const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
17825f757f3fSDimitry Andric   if (MFnI.needsShadowCallStackPrologueEpilogue(MF))
178381ad6265SDimitry Andric     emitShadowCallStackPrologue(*TII, MF, MBB, MBBI, DL, NeedsWinCFI,
1784bdd1243dSDimitry Andric                                 MFnI.needsDwarfUnwindInfo(MF));
1785fe6060f1SDimitry Andric 
1786bdd1243dSDimitry Andric   if (MFnI.shouldSignReturnAddress(MF)) {
17875f757f3fSDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
17880b57cec5SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
17895f757f3fSDimitry Andric     if (NeedsWinCFI)
17905f757f3fSDimitry Andric       HasWinCFI = true; // AArch64PointerAuth pass will insert SEH_PACSignLR
17910b57cec5SDimitry Andric   }
17920b57cec5SDimitry Andric 
179381ad6265SDimitry Andric   if (EmitCFI && MFnI.isMTETagged()) {
179481ad6265SDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITMTETAGGED))
179581ad6265SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
179681ad6265SDimitry Andric   }
17970b57cec5SDimitry Andric 
1798fe6060f1SDimitry Andric   // We signal the presence of a Swift extended frame to external tools by
1799fe6060f1SDimitry Andric   // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
1800fe6060f1SDimitry Andric   // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
1801fe6060f1SDimitry Andric   // bits so that is still true.
1802fe6060f1SDimitry Andric   if (HasFP && AFI->hasSwiftAsyncContext()) {
1803349cc55cSDimitry Andric     switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
1804349cc55cSDimitry Andric     case SwiftAsyncFramePointerMode::DeploymentBased:
1805349cc55cSDimitry Andric       if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
1806349cc55cSDimitry Andric         // The special symbol below is absolute and has a *value* that can be
1807349cc55cSDimitry Andric         // combined with the frame pointer to signal an extended frame.
1808349cc55cSDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
1809349cc55cSDimitry Andric             .addExternalSymbol("swift_async_extendedFramePointerFlags",
1810349cc55cSDimitry Andric                                AArch64II::MO_GOT);
18115f757f3fSDimitry Andric         if (NeedsWinCFI) {
18125f757f3fSDimitry Andric           BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
18135f757f3fSDimitry Andric               .setMIFlags(MachineInstr::FrameSetup);
18145f757f3fSDimitry Andric           HasWinCFI = true;
18155f757f3fSDimitry Andric         }
1816349cc55cSDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
1817349cc55cSDimitry Andric             .addUse(AArch64::FP)
1818349cc55cSDimitry Andric             .addUse(AArch64::X16)
1819349cc55cSDimitry Andric             .addImm(Subtarget.isTargetILP32() ? 32 : 0);
18205f757f3fSDimitry Andric         if (NeedsWinCFI) {
18215f757f3fSDimitry Andric           BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
18225f757f3fSDimitry Andric               .setMIFlags(MachineInstr::FrameSetup);
18235f757f3fSDimitry Andric           HasWinCFI = true;
18245f757f3fSDimitry Andric         }
1825349cc55cSDimitry Andric         break;
1826349cc55cSDimitry Andric       }
1827bdd1243dSDimitry Andric       [[fallthrough]];
1828349cc55cSDimitry Andric 
1829349cc55cSDimitry Andric     case SwiftAsyncFramePointerMode::Always:
1830fe6060f1SDimitry Andric       // ORR x29, x29, #0x1000_0000_0000_0000
1831fe6060f1SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
1832fe6060f1SDimitry Andric           .addUse(AArch64::FP)
1833fe6060f1SDimitry Andric           .addImm(0x1100)
1834fe6060f1SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
18355f757f3fSDimitry Andric       if (NeedsWinCFI) {
18365f757f3fSDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
18375f757f3fSDimitry Andric             .setMIFlags(MachineInstr::FrameSetup);
18385f757f3fSDimitry Andric         HasWinCFI = true;
18395f757f3fSDimitry Andric       }
1840349cc55cSDimitry Andric       break;
1841349cc55cSDimitry Andric 
1842349cc55cSDimitry Andric     case SwiftAsyncFramePointerMode::Never:
1843349cc55cSDimitry Andric       break;
1844349cc55cSDimitry Andric     }
1845fe6060f1SDimitry Andric   }
1846fe6060f1SDimitry Andric 
18470b57cec5SDimitry Andric   // All calls are tail calls in GHC calling conv, and functions have no
18480b57cec5SDimitry Andric   // prologue/epilogue.
18490b57cec5SDimitry Andric   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
18500b57cec5SDimitry Andric     return;
18510b57cec5SDimitry Andric 
1852e8d8bef9SDimitry Andric   // Set tagged base pointer to the requested stack slot.
18530b57cec5SDimitry Andric   // Ideally it should match SP value after prologue.
1854bdd1243dSDimitry Andric   std::optional<int> TBPI = AFI->getTaggedBasePointerIndex();
1855e8d8bef9SDimitry Andric   if (TBPI)
1856e8d8bef9SDimitry Andric     AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
1857e8d8bef9SDimitry Andric   else
18580b57cec5SDimitry Andric     AFI->setTaggedBasePointerOffset(MFI.getStackSize());
18590b57cec5SDimitry Andric 
18608bcb0991SDimitry Andric   const StackOffset &SVEStackSize = getSVEStackSize(MF);
18618bcb0991SDimitry Andric 
18620b57cec5SDimitry Andric   // getStackSize() includes all the locals in its size calculation. We don't
18630b57cec5SDimitry Andric   // include these locals when computing the stack size of a funclet, as they
18640b57cec5SDimitry Andric   // are allocated in the parent's stack frame and accessed via the frame
18650b57cec5SDimitry Andric   // pointer from the funclet.  We only save the callee saved registers in the
18660b57cec5SDimitry Andric   // funclet, which are really the callee saved registers of the parent
18670b57cec5SDimitry Andric   // function, including the funclet.
18680fca6ea1SDimitry Andric   int64_t NumBytes =
18690fca6ea1SDimitry Andric       IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
18700b57cec5SDimitry Andric   if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
18710b57cec5SDimitry Andric     assert(!HasFP && "unexpected function without stack frame but with FP");
18728bcb0991SDimitry Andric     assert(!SVEStackSize &&
18738bcb0991SDimitry Andric            "unexpected function without stack frame but with SVE objects");
18740b57cec5SDimitry Andric     // All of the stack allocation is for locals.
18750b57cec5SDimitry Andric     AFI->setLocalStackSize(NumBytes);
18760b57cec5SDimitry Andric     if (!NumBytes)
18770b57cec5SDimitry Andric       return;
18780b57cec5SDimitry Andric     // REDZONE: If the stack size is less than 128 bytes, we don't need
18790b57cec5SDimitry Andric     // to actually allocate.
18800b57cec5SDimitry Andric     if (canUseRedZone(MF)) {
18810b57cec5SDimitry Andric       AFI->setHasRedZone(true);
18820b57cec5SDimitry Andric       ++NumRedZoneFunctions;
18830b57cec5SDimitry Andric     } else {
18848bcb0991SDimitry Andric       emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
1885e8d8bef9SDimitry Andric                       StackOffset::getFixed(-NumBytes), TII,
1886e8d8bef9SDimitry Andric                       MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
188781ad6265SDimitry Andric       if (EmitCFI) {
18880b57cec5SDimitry Andric         // Label used to tie together the PROLOG_LABEL and the MachineMoves.
18890fca6ea1SDimitry Andric         MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
18900b57cec5SDimitry Andric         // Encode the stack size of the leaf function.
18910b57cec5SDimitry Andric         unsigned CFIIndex = MF.addFrameInst(
18925ffd83dbSDimitry Andric             MCCFIInstruction::cfiDefCfaOffset(FrameLabel, NumBytes));
18930b57cec5SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
18940b57cec5SDimitry Andric             .addCFIIndex(CFIIndex)
18950b57cec5SDimitry Andric             .setMIFlags(MachineInstr::FrameSetup);
18960b57cec5SDimitry Andric       }
18970b57cec5SDimitry Andric     }
18980b57cec5SDimitry Andric 
18990b57cec5SDimitry Andric     if (NeedsWinCFI) {
19000b57cec5SDimitry Andric       HasWinCFI = true;
19010b57cec5SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
19020b57cec5SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
19030b57cec5SDimitry Andric     }
19040b57cec5SDimitry Andric 
19050b57cec5SDimitry Andric     return;
19060b57cec5SDimitry Andric   }
19070b57cec5SDimitry Andric 
19080fca6ea1SDimitry Andric   bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
190962cfcf62SDimitry Andric   unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
19100b57cec5SDimitry Andric 
19110b57cec5SDimitry Andric   auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
19120b57cec5SDimitry Andric   // All of the remaining stack allocations are for locals.
19130b57cec5SDimitry Andric   AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
19140b57cec5SDimitry Andric   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
1915fe6060f1SDimitry Andric   bool HomPrologEpilog = homogeneousPrologEpilog(MF);
19160b57cec5SDimitry Andric   if (CombineSPBump) {
19178bcb0991SDimitry Andric     assert(!SVEStackSize && "Cannot combine SP bump with SVE");
19188bcb0991SDimitry Andric     emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
1919e8d8bef9SDimitry Andric                     StackOffset::getFixed(-NumBytes), TII,
192081ad6265SDimitry Andric                     MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
192106c3fb27SDimitry Andric                     EmitAsyncCFI);
19220b57cec5SDimitry Andric     NumBytes = 0;
1923fe6060f1SDimitry Andric   } else if (HomPrologEpilog) {
1924fe6060f1SDimitry Andric     // Stack has been already adjusted.
1925fe6060f1SDimitry Andric     NumBytes -= PrologueSaveSize;
19260b57cec5SDimitry Andric   } else if (PrologueSaveSize != 0) {
19270b57cec5SDimitry Andric     MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
192881ad6265SDimitry Andric         MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI,
192906c3fb27SDimitry Andric         EmitAsyncCFI);
19300b57cec5SDimitry Andric     NumBytes -= PrologueSaveSize;
19310b57cec5SDimitry Andric   }
19320b57cec5SDimitry Andric   assert(NumBytes >= 0 && "Negative stack allocation size!?");
19330b57cec5SDimitry Andric 
19340b57cec5SDimitry Andric   // Move past the saves of the callee-saved registers, fixing up the offsets
19350b57cec5SDimitry Andric   // and pre-inc if we decided to combine the callee-save and local stack
19360b57cec5SDimitry Andric   // pointer bump above.
1937480093f4SDimitry Andric   while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
1938480093f4SDimitry Andric          !IsSVECalleeSave(MBBI)) {
19390fca6ea1SDimitry Andric     // Move past instructions generated to calculate VG
19400fca6ea1SDimitry Andric     if (AFI->hasStreamingModeChanges())
19410fca6ea1SDimitry Andric       while (isVGInstruction(MBBI))
19420fca6ea1SDimitry Andric         ++MBBI;
19430fca6ea1SDimitry Andric 
19440b57cec5SDimitry Andric     if (CombineSPBump)
19450b57cec5SDimitry Andric       fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
19460b57cec5SDimitry Andric                                         NeedsWinCFI, &HasWinCFI);
19470b57cec5SDimitry Andric     ++MBBI;
19480b57cec5SDimitry Andric   }
19490b57cec5SDimitry Andric 
195062cfcf62SDimitry Andric   // For funclets the FP belongs to the containing function.
195162cfcf62SDimitry Andric   if (!IsFunclet && HasFP) {
19528bcb0991SDimitry Andric     // Only set up FP if we actually need to.
1953e8d8bef9SDimitry Andric     int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
19548bcb0991SDimitry Andric 
19550b57cec5SDimitry Andric     if (CombineSPBump)
19560b57cec5SDimitry Andric       FPOffset += AFI->getLocalStackSize();
19570b57cec5SDimitry Andric 
1958fe6060f1SDimitry Andric     if (AFI->hasSwiftAsyncContext()) {
1959fe6060f1SDimitry Andric       // Before we update the live FP we have to ensure there's a valid (or
1960fe6060f1SDimitry Andric       // null) asynchronous context in its slot just before FP in the frame
1961fe6060f1SDimitry Andric       // record, so store it now.
1962fe6060f1SDimitry Andric       const auto &Attrs = MF.getFunction().getAttributes();
1963fe6060f1SDimitry Andric       bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
1964fe6060f1SDimitry Andric       if (HaveInitialContext)
1965fe6060f1SDimitry Andric         MBB.addLiveIn(AArch64::X22);
19665f757f3fSDimitry Andric       Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
1967fe6060f1SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
19685f757f3fSDimitry Andric           .addUse(Reg)
1969fe6060f1SDimitry Andric           .addUse(AArch64::SP)
1970fe6060f1SDimitry Andric           .addImm(FPOffset - 8)
1971fe6060f1SDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
19725f757f3fSDimitry Andric       if (NeedsWinCFI) {
19735f757f3fSDimitry Andric         // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
19745f757f3fSDimitry Andric         // to multiple instructions, should be mutually-exclusive.
19755f757f3fSDimitry Andric         assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
19765f757f3fSDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
19775f757f3fSDimitry Andric             .setMIFlags(MachineInstr::FrameSetup);
19785f757f3fSDimitry Andric         HasWinCFI = true;
19795f757f3fSDimitry Andric       }
1980fe6060f1SDimitry Andric     }
1981fe6060f1SDimitry Andric 
1982fe6060f1SDimitry Andric     if (HomPrologEpilog) {
1983fe6060f1SDimitry Andric       auto Prolog = MBBI;
1984fe6060f1SDimitry Andric       --Prolog;
1985fe6060f1SDimitry Andric       assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
1986fe6060f1SDimitry Andric       Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
1987fe6060f1SDimitry Andric     } else {
19880b57cec5SDimitry Andric       // Issue    sub fp, sp, FPOffset or
19890b57cec5SDimitry Andric       //          mov fp,sp          when FPOffset is zero.
19900b57cec5SDimitry Andric       // Note: All stores of callee-saved registers are marked as "FrameSetup".
19910b57cec5SDimitry Andric       // This code marks the instruction(s) that set the FP also.
19928bcb0991SDimitry Andric       emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
1993e8d8bef9SDimitry Andric                       StackOffset::getFixed(FPOffset), TII,
1994e8d8bef9SDimitry Andric                       MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
1995bdd1243dSDimitry Andric       if (NeedsWinCFI && HasWinCFI) {
1996bdd1243dSDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1997bdd1243dSDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
1998bdd1243dSDimitry Andric         // After setting up the FP, the rest of the prolog doesn't need to be
1999bdd1243dSDimitry Andric         // included in the SEH unwind info.
2000bdd1243dSDimitry Andric         NeedsWinCFI = false;
2001bdd1243dSDimitry Andric       }
20020b57cec5SDimitry Andric     }
200306c3fb27SDimitry Andric     if (EmitAsyncCFI)
200406c3fb27SDimitry Andric       emitDefineCFAWithFP(MF, MBB, MBBI, DL, FixedObject);
200581ad6265SDimitry Andric   }
200681ad6265SDimitry Andric 
200781ad6265SDimitry Andric   // Now emit the moves for whatever callee saved regs we have (including FP,
200881ad6265SDimitry Andric   // LR if those are saved). Frame instructions for SVE register are emitted
200981ad6265SDimitry Andric   // later, after the instruction which actually save SVE regs.
201006c3fb27SDimitry Andric   if (EmitAsyncCFI)
201181ad6265SDimitry Andric     emitCalleeSavedGPRLocations(MBB, MBBI);
20120b57cec5SDimitry Andric 
2013bdd1243dSDimitry Andric   // Alignment is required for the parent frame, not the funclet
2014bdd1243dSDimitry Andric   const bool NeedsRealignment =
2015bdd1243dSDimitry Andric       NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF);
20165f757f3fSDimitry Andric   const int64_t RealignmentPadding =
2017bdd1243dSDimitry Andric       (NeedsRealignment && MFI.getMaxAlign() > Align(16))
2018bdd1243dSDimitry Andric           ? MFI.getMaxAlign().value() - 16
2019bdd1243dSDimitry Andric           : 0;
2020bdd1243dSDimitry Andric 
2021bdd1243dSDimitry Andric   if (windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) {
2022bdd1243dSDimitry Andric     uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
20230b57cec5SDimitry Andric     if (NeedsWinCFI) {
20240b57cec5SDimitry Andric       HasWinCFI = true;
20250b57cec5SDimitry Andric       // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
20260b57cec5SDimitry Andric       // exceed this amount.  We need to move at most 2^24 - 1 into x15.
20270b57cec5SDimitry Andric       // This is at most two instructions, MOVZ follwed by MOVK.
20280b57cec5SDimitry Andric       // TODO: Fix to use multiple stack alloc unwind codes for stacks
20290b57cec5SDimitry Andric       // exceeding 256MB in size.
20300b57cec5SDimitry Andric       if (NumBytes >= (1 << 28))
20310b57cec5SDimitry Andric         report_fatal_error("Stack size cannot exceed 256MB for stack "
20320b57cec5SDimitry Andric                            "unwinding purposes");
20330b57cec5SDimitry Andric 
20340b57cec5SDimitry Andric       uint32_t LowNumWords = NumWords & 0xFFFF;
20350b57cec5SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
20360b57cec5SDimitry Andric           .addImm(LowNumWords)
20370b57cec5SDimitry Andric           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
20380b57cec5SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
20390b57cec5SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
20400b57cec5SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
20410b57cec5SDimitry Andric       if ((NumWords & 0xFFFF0000) != 0) {
20420b57cec5SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
20430b57cec5SDimitry Andric             .addReg(AArch64::X15)
20440b57cec5SDimitry Andric             .addImm((NumWords & 0xFFFF0000) >> 16) // High half
20450b57cec5SDimitry Andric             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
20460b57cec5SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
20470b57cec5SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
20480b57cec5SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
20490b57cec5SDimitry Andric       }
20500b57cec5SDimitry Andric     } else {
20510b57cec5SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
20520b57cec5SDimitry Andric           .addImm(NumWords)
20530b57cec5SDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
20540b57cec5SDimitry Andric     }
20550b57cec5SDimitry Andric 
2056bdd1243dSDimitry Andric     const char *ChkStk = Subtarget.getChkStkName();
20570b57cec5SDimitry Andric     switch (MF.getTarget().getCodeModel()) {
20580b57cec5SDimitry Andric     case CodeModel::Tiny:
20590b57cec5SDimitry Andric     case CodeModel::Small:
20600b57cec5SDimitry Andric     case CodeModel::Medium:
20610b57cec5SDimitry Andric     case CodeModel::Kernel:
20620b57cec5SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
2063bdd1243dSDimitry Andric           .addExternalSymbol(ChkStk)
20640b57cec5SDimitry Andric           .addReg(AArch64::X15, RegState::Implicit)
20650b57cec5SDimitry Andric           .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
20660b57cec5SDimitry Andric           .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
20670b57cec5SDimitry Andric           .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
20680b57cec5SDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
20690b57cec5SDimitry Andric       if (NeedsWinCFI) {
20700b57cec5SDimitry Andric         HasWinCFI = true;
20710b57cec5SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
20720b57cec5SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
20730b57cec5SDimitry Andric       }
20740b57cec5SDimitry Andric       break;
20750b57cec5SDimitry Andric     case CodeModel::Large:
20760b57cec5SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
20770b57cec5SDimitry Andric           .addReg(AArch64::X16, RegState::Define)
2078bdd1243dSDimitry Andric           .addExternalSymbol(ChkStk)
2079bdd1243dSDimitry Andric           .addExternalSymbol(ChkStk)
20800b57cec5SDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
20810b57cec5SDimitry Andric       if (NeedsWinCFI) {
20820b57cec5SDimitry Andric         HasWinCFI = true;
20830b57cec5SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
20840b57cec5SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
20850b57cec5SDimitry Andric       }
20860b57cec5SDimitry Andric 
20875ffd83dbSDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
20880b57cec5SDimitry Andric           .addReg(AArch64::X16, RegState::Kill)
20890b57cec5SDimitry Andric           .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
20900b57cec5SDimitry Andric           .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
20910b57cec5SDimitry Andric           .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
20920b57cec5SDimitry Andric           .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
20930b57cec5SDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
20940b57cec5SDimitry Andric       if (NeedsWinCFI) {
20950b57cec5SDimitry Andric         HasWinCFI = true;
20960b57cec5SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
20970b57cec5SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
20980b57cec5SDimitry Andric       }
20990b57cec5SDimitry Andric       break;
21000b57cec5SDimitry Andric     }
21010b57cec5SDimitry Andric 
21020b57cec5SDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
21030b57cec5SDimitry Andric         .addReg(AArch64::SP, RegState::Kill)
21040b57cec5SDimitry Andric         .addReg(AArch64::X15, RegState::Kill)
21050b57cec5SDimitry Andric         .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
21060b57cec5SDimitry Andric         .setMIFlags(MachineInstr::FrameSetup);
21070b57cec5SDimitry Andric     if (NeedsWinCFI) {
21080b57cec5SDimitry Andric       HasWinCFI = true;
21090b57cec5SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
21100b57cec5SDimitry Andric           .addImm(NumBytes)
21110b57cec5SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
21120b57cec5SDimitry Andric     }
21130b57cec5SDimitry Andric     NumBytes = 0;
2114bdd1243dSDimitry Andric 
2115bdd1243dSDimitry Andric     if (RealignmentPadding > 0) {
211606c3fb27SDimitry Andric       if (RealignmentPadding >= 4096) {
211706c3fb27SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
211806c3fb27SDimitry Andric             .addReg(AArch64::X16, RegState::Define)
211906c3fb27SDimitry Andric             .addImm(RealignmentPadding)
212006c3fb27SDimitry Andric             .setMIFlags(MachineInstr::FrameSetup);
212106c3fb27SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
212206c3fb27SDimitry Andric             .addReg(AArch64::SP)
212306c3fb27SDimitry Andric             .addReg(AArch64::X16, RegState::Kill)
212406c3fb27SDimitry Andric             .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
212506c3fb27SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
212606c3fb27SDimitry Andric       } else {
2127bdd1243dSDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
2128bdd1243dSDimitry Andric             .addReg(AArch64::SP)
2129bdd1243dSDimitry Andric             .addImm(RealignmentPadding)
213006c3fb27SDimitry Andric             .addImm(0)
213106c3fb27SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
213206c3fb27SDimitry Andric       }
2133bdd1243dSDimitry Andric 
2134bdd1243dSDimitry Andric       uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
2135bdd1243dSDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
2136bdd1243dSDimitry Andric           .addReg(AArch64::X15, RegState::Kill)
2137bdd1243dSDimitry Andric           .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
2138bdd1243dSDimitry Andric       AFI->setStackRealigned(true);
2139bdd1243dSDimitry Andric 
2140bdd1243dSDimitry Andric       // No need for SEH instructions here; if we're realigning the stack,
2141bdd1243dSDimitry Andric       // we've set a frame pointer and already finished the SEH prologue.
2142bdd1243dSDimitry Andric       assert(!NeedsWinCFI);
2143bdd1243dSDimitry Andric     }
21440b57cec5SDimitry Andric   }
21450b57cec5SDimitry Andric 
21465f757f3fSDimitry Andric   StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
2147480093f4SDimitry Andric   MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
2148480093f4SDimitry Andric 
2149480093f4SDimitry Andric   // Process the SVE callee-saves to determine what space needs to be
2150480093f4SDimitry Andric   // allocated.
2151979e22ffSDimitry Andric   if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
21525f757f3fSDimitry Andric     LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
21535f757f3fSDimitry Andric                       << "\n");
2154480093f4SDimitry Andric     // Find callee save instructions in frame.
2155480093f4SDimitry Andric     CalleeSavesBegin = MBBI;
2156480093f4SDimitry Andric     assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
2157480093f4SDimitry Andric     while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
2158480093f4SDimitry Andric       ++MBBI;
2159480093f4SDimitry Andric     CalleeSavesEnd = MBBI;
2160480093f4SDimitry Andric 
21615f757f3fSDimitry Andric     SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
21625f757f3fSDimitry Andric     SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
2163480093f4SDimitry Andric   }
2164480093f4SDimitry Andric 
2165480093f4SDimitry Andric   // Allocate space for the callee saves (if any).
21665f757f3fSDimitry Andric   StackOffset CFAOffset =
21675f757f3fSDimitry Andric       StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
21685f757f3fSDimitry Andric   StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
21695f757f3fSDimitry Andric   allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
21705f757f3fSDimitry Andric                      nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
21715f757f3fSDimitry Andric                      MFI.hasVarSizedObjects() || LocalsSize);
21725f757f3fSDimitry Andric   CFAOffset += SVECalleeSavesSize;
217381ad6265SDimitry Andric 
217406c3fb27SDimitry Andric   if (EmitAsyncCFI)
217581ad6265SDimitry Andric     emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
2176480093f4SDimitry Andric 
21775f757f3fSDimitry Andric   // Allocate space for the rest of the frame including SVE locals. Align the
21785f757f3fSDimitry Andric   // stack as necessary.
21795f757f3fSDimitry Andric   assert(!(canUseRedZone(MF) && NeedsRealignment) &&
21805f757f3fSDimitry Andric          "Cannot use redzone with stack realignment");
218181ad6265SDimitry Andric   if (!canUseRedZone(MF)) {
21820b57cec5SDimitry Andric     // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
21830b57cec5SDimitry Andric     // the correct value here, as NumBytes also includes padding bytes,
21840b57cec5SDimitry Andric     // which shouldn't be counted here.
21855f757f3fSDimitry Andric     allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
21865f757f3fSDimitry Andric                        SVELocalsSize + StackOffset::getFixed(NumBytes),
21875f757f3fSDimitry Andric                        NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
21885f757f3fSDimitry Andric                        CFAOffset, MFI.hasVarSizedObjects());
21890b57cec5SDimitry Andric   }
21900b57cec5SDimitry Andric 
21910b57cec5SDimitry Andric   // If we need a base pointer, set it up here. It's whatever the value of the
21920b57cec5SDimitry Andric   // stack pointer is at this point. Any variable size objects will be allocated
21930b57cec5SDimitry Andric   // after this, so we can still use the base pointer to reference locals.
21940b57cec5SDimitry Andric   //
21950b57cec5SDimitry Andric   // FIXME: Clarify FrameSetup flags here.
21960b57cec5SDimitry Andric   // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
21970b57cec5SDimitry Andric   // needed.
219862cfcf62SDimitry Andric   // For funclets the BP belongs to the containing function.
219962cfcf62SDimitry Andric   if (!IsFunclet && RegInfo->hasBasePointer(MF)) {
22000b57cec5SDimitry Andric     TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
22010b57cec5SDimitry Andric                      false);
22020b57cec5SDimitry Andric     if (NeedsWinCFI) {
22030b57cec5SDimitry Andric       HasWinCFI = true;
22040b57cec5SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
22050b57cec5SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
22060b57cec5SDimitry Andric     }
22070b57cec5SDimitry Andric   }
22080b57cec5SDimitry Andric 
22090b57cec5SDimitry Andric   // The very last FrameSetup instruction indicates the end of prologue. Emit a
22100b57cec5SDimitry Andric   // SEH opcode indicating the prologue end.
22110b57cec5SDimitry Andric   if (NeedsWinCFI && HasWinCFI) {
22120b57cec5SDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
22130b57cec5SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
22140b57cec5SDimitry Andric   }
22150b57cec5SDimitry Andric 
221662cfcf62SDimitry Andric   // SEH funclets are passed the frame pointer in X1.  If the parent
221762cfcf62SDimitry Andric   // function uses the base register, then the base register is used
221862cfcf62SDimitry Andric   // directly, and is not retrieved from X1.
221962cfcf62SDimitry Andric   if (IsFunclet && F.hasPersonalityFn()) {
222062cfcf62SDimitry Andric     EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
222162cfcf62SDimitry Andric     if (isAsynchronousEHPersonality(Per)) {
222262cfcf62SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
222362cfcf62SDimitry Andric           .addReg(AArch64::X1)
222462cfcf62SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
222562cfcf62SDimitry Andric       MBB.addLiveIn(AArch64::X1);
222662cfcf62SDimitry Andric     }
222762cfcf62SDimitry Andric   }
222806c3fb27SDimitry Andric 
222906c3fb27SDimitry Andric   if (EmitCFI && !EmitAsyncCFI) {
223006c3fb27SDimitry Andric     if (HasFP) {
223106c3fb27SDimitry Andric       emitDefineCFAWithFP(MF, MBB, MBBI, DL, FixedObject);
223206c3fb27SDimitry Andric     } else {
223306c3fb27SDimitry Andric       StackOffset TotalSize =
223406c3fb27SDimitry Andric           SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
223506c3fb27SDimitry Andric       unsigned CFIIndex = MF.addFrameInst(createDefCFA(
223606c3fb27SDimitry Andric           *RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP, TotalSize,
223706c3fb27SDimitry Andric           /*LastAdjustmentWasScalable=*/false));
223806c3fb27SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
223906c3fb27SDimitry Andric           .addCFIIndex(CFIIndex)
224006c3fb27SDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
224106c3fb27SDimitry Andric     }
224206c3fb27SDimitry Andric     emitCalleeSavedGPRLocations(MBB, MBBI);
224306c3fb27SDimitry Andric     emitCalleeSavedSVELocations(MBB, MBBI);
224406c3fb27SDimitry Andric   }
22450b57cec5SDimitry Andric }
22460b57cec5SDimitry Andric 
22470b57cec5SDimitry Andric static bool isFuncletReturnInstr(const MachineInstr &MI) {
22480b57cec5SDimitry Andric   switch (MI.getOpcode()) {
22490b57cec5SDimitry Andric   default:
22500b57cec5SDimitry Andric     return false;
22510b57cec5SDimitry Andric   case AArch64::CATCHRET:
22520b57cec5SDimitry Andric   case AArch64::CLEANUPRET:
22530b57cec5SDimitry Andric     return true;
22540b57cec5SDimitry Andric   }
22550b57cec5SDimitry Andric }
22560b57cec5SDimitry Andric 
22570b57cec5SDimitry Andric void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
22580b57cec5SDimitry Andric                                         MachineBasicBlock &MBB) const {
22590b57cec5SDimitry Andric   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
22600b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
22615f757f3fSDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
22620b57cec5SDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
22630b57cec5SDimitry Andric   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
22640b57cec5SDimitry Andric   DebugLoc DL;
22650b57cec5SDimitry Andric   bool NeedsWinCFI = needsWinCFI(MF);
22665f757f3fSDimitry Andric   bool EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
22670b57cec5SDimitry Andric   bool HasWinCFI = false;
22680b57cec5SDimitry Andric   bool IsFunclet = false;
22690b57cec5SDimitry Andric 
22700b57cec5SDimitry Andric   if (MBB.end() != MBBI) {
22710b57cec5SDimitry Andric     DL = MBBI->getDebugLoc();
22720b57cec5SDimitry Andric     IsFunclet = isFuncletReturnInstr(*MBBI);
22730b57cec5SDimitry Andric   }
22740b57cec5SDimitry Andric 
22755f757f3fSDimitry Andric   MachineBasicBlock::iterator EpilogStartI = MBB.end();
22765f757f3fSDimitry Andric 
227781ad6265SDimitry Andric   auto FinishingTouches = make_scope_exit([&]() {
22785f757f3fSDimitry Andric     if (AFI->shouldSignReturnAddress(MF)) {
22795f757f3fSDimitry Andric       BuildMI(MBB, MBB.getFirstTerminator(), DL,
22805f757f3fSDimitry Andric               TII->get(AArch64::PAUTH_EPILOGUE))
22815f757f3fSDimitry Andric           .setMIFlag(MachineInstr::FrameDestroy);
22825f757f3fSDimitry Andric       if (NeedsWinCFI)
22835f757f3fSDimitry Andric         HasWinCFI = true; // AArch64PointerAuth pass will insert SEH_PACSignLR
22845f757f3fSDimitry Andric     }
22855f757f3fSDimitry Andric     if (AFI->needsShadowCallStackPrologueEpilogue(MF))
228681ad6265SDimitry Andric       emitShadowCallStackEpilogue(*TII, MF, MBB, MBB.getFirstTerminator(), DL);
228781ad6265SDimitry Andric     if (EmitCFI)
228881ad6265SDimitry Andric       emitCalleeSavedGPRRestores(MBB, MBB.getFirstTerminator());
22895f757f3fSDimitry Andric     if (HasWinCFI) {
2290bdd1243dSDimitry Andric       BuildMI(MBB, MBB.getFirstTerminator(), DL,
2291bdd1243dSDimitry Andric               TII->get(AArch64::SEH_EpilogEnd))
2292bdd1243dSDimitry Andric           .setMIFlag(MachineInstr::FrameDestroy);
22935f757f3fSDimitry Andric       if (!MF.hasWinCFI())
22945f757f3fSDimitry Andric         MF.setHasWinCFI(true);
22955f757f3fSDimitry Andric     }
22965f757f3fSDimitry Andric     if (NeedsWinCFI) {
22975f757f3fSDimitry Andric       assert(EpilogStartI != MBB.end());
22985f757f3fSDimitry Andric       if (!HasWinCFI)
22995f757f3fSDimitry Andric         MBB.erase(EpilogStartI);
23005f757f3fSDimitry Andric     }
230181ad6265SDimitry Andric   });
230281ad6265SDimitry Andric 
2303480093f4SDimitry Andric   int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF)
23040b57cec5SDimitry Andric                                : MFI.getStackSize();
23050b57cec5SDimitry Andric 
23060b57cec5SDimitry Andric   // All calls are tail calls in GHC calling conv, and functions have no
23070b57cec5SDimitry Andric   // prologue/epilogue.
23080b57cec5SDimitry Andric   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
23090b57cec5SDimitry Andric     return;
23100b57cec5SDimitry Andric 
2311fe6060f1SDimitry Andric   // How much of the stack used by incoming arguments this function is expected
2312fe6060f1SDimitry Andric   // to restore in this particular epilogue.
2313fe6060f1SDimitry Andric   int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB);
23140fca6ea1SDimitry Andric   bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
23150fca6ea1SDimitry Andric                                               MF.getFunction().isVarArg());
231662cfcf62SDimitry Andric   unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
23170b57cec5SDimitry Andric 
2318fe6060f1SDimitry Andric   int64_t AfterCSRPopSize = ArgumentStackToRestore;
23190b57cec5SDimitry Andric   auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
23200b57cec5SDimitry Andric   // We cannot rely on the local stack size set in emitPrologue if the function
23210b57cec5SDimitry Andric   // has funclets, as funclets have different local stack size requirements, and
23220b57cec5SDimitry Andric   // the current value set in emitPrologue may be that of the containing
23230b57cec5SDimitry Andric   // function.
23240b57cec5SDimitry Andric   if (MF.hasEHFunclets())
23250b57cec5SDimitry Andric     AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
2326fe6060f1SDimitry Andric   if (homogeneousPrologEpilog(MF, &MBB)) {
2327fe6060f1SDimitry Andric     assert(!NeedsWinCFI);
2328fe6060f1SDimitry Andric     auto LastPopI = MBB.getFirstTerminator();
2329fe6060f1SDimitry Andric     if (LastPopI != MBB.begin()) {
2330fe6060f1SDimitry Andric       auto HomogeneousEpilog = std::prev(LastPopI);
2331fe6060f1SDimitry Andric       if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
2332fe6060f1SDimitry Andric         LastPopI = HomogeneousEpilog;
2333fe6060f1SDimitry Andric     }
2334fe6060f1SDimitry Andric 
2335fe6060f1SDimitry Andric     // Adjust local stack
2336fe6060f1SDimitry Andric     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
2337fe6060f1SDimitry Andric                     StackOffset::getFixed(AFI->getLocalStackSize()), TII,
23385f757f3fSDimitry Andric                     MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
2339fe6060f1SDimitry Andric 
2340fe6060f1SDimitry Andric     // SP has been already adjusted while restoring callee save regs.
2341fe6060f1SDimitry Andric     // We've bailed-out the case with adjusting SP for arguments.
2342fe6060f1SDimitry Andric     assert(AfterCSRPopSize == 0);
2343fe6060f1SDimitry Andric     return;
2344fe6060f1SDimitry Andric   }
23455ffd83dbSDimitry Andric   bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
23460b57cec5SDimitry Andric   // Assume we can't combine the last pop with the sp restore.
23470b57cec5SDimitry Andric 
234881ad6265SDimitry Andric   bool CombineAfterCSRBump = false;
23490b57cec5SDimitry Andric   if (!CombineSPBump && PrologueSaveSize != 0) {
23500b57cec5SDimitry Andric     MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
235181ad6265SDimitry Andric     while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
235281ad6265SDimitry Andric            AArch64InstrInfo::isSEHInstruction(*Pop))
23530b57cec5SDimitry Andric       Pop = std::prev(Pop);
23540b57cec5SDimitry Andric     // Converting the last ldp to a post-index ldp is valid only if the last
23550b57cec5SDimitry Andric     // ldp's offset is 0.
23560b57cec5SDimitry Andric     const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
2357fe6060f1SDimitry Andric     // If the offset is 0 and the AfterCSR pop is not actually trying to
2358fe6060f1SDimitry Andric     // allocate more stack for arguments (in space that an untimely interrupt
2359fe6060f1SDimitry Andric     // may clobber), convert it to a post-index ldp.
236081ad6265SDimitry Andric     if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
23610b57cec5SDimitry Andric       convertCalleeSaveRestoreToSPPrePostIncDec(
236281ad6265SDimitry Andric           MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI,
236381ad6265SDimitry Andric           MachineInstr::FrameDestroy, PrologueSaveSize);
236481ad6265SDimitry Andric     } else {
23650b57cec5SDimitry Andric       // If not, make sure to emit an add after the last ldp.
23660b57cec5SDimitry Andric       // We're doing this by transfering the size to be restored from the
23670b57cec5SDimitry Andric       // adjustment *before* the CSR pops to the adjustment *after* the CSR
23680b57cec5SDimitry Andric       // pops.
23690b57cec5SDimitry Andric       AfterCSRPopSize += PrologueSaveSize;
237081ad6265SDimitry Andric       CombineAfterCSRBump = true;
23710b57cec5SDimitry Andric     }
23720b57cec5SDimitry Andric   }
23730b57cec5SDimitry Andric 
23740b57cec5SDimitry Andric   // Move past the restores of the callee-saved registers.
23750b57cec5SDimitry Andric   // If we plan on combining the sp bump of the local stack size and the callee
23760b57cec5SDimitry Andric   // save stack size, we might need to adjust the CSR save and restore offsets.
23770b57cec5SDimitry Andric   MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
23780b57cec5SDimitry Andric   MachineBasicBlock::iterator Begin = MBB.begin();
23790b57cec5SDimitry Andric   while (LastPopI != Begin) {
23800b57cec5SDimitry Andric     --LastPopI;
2381480093f4SDimitry Andric     if (!LastPopI->getFlag(MachineInstr::FrameDestroy) ||
2382480093f4SDimitry Andric         IsSVECalleeSave(LastPopI)) {
23830b57cec5SDimitry Andric       ++LastPopI;
23840b57cec5SDimitry Andric       break;
23850b57cec5SDimitry Andric     } else if (CombineSPBump)
23860b57cec5SDimitry Andric       fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
23870b57cec5SDimitry Andric                                         NeedsWinCFI, &HasWinCFI);
23880b57cec5SDimitry Andric   }
23890b57cec5SDimitry Andric 
23905f757f3fSDimitry Andric   if (NeedsWinCFI) {
23915f757f3fSDimitry Andric     // Note that there are cases where we insert SEH opcodes in the
23925f757f3fSDimitry Andric     // epilogue when we had no SEH opcodes in the prologue. For
23935f757f3fSDimitry Andric     // example, when there is no stack frame but there are stack
23945f757f3fSDimitry Andric     // arguments. Insert the SEH_EpilogStart and remove it later if it
23955f757f3fSDimitry Andric     // we didn't emit any SEH opcodes to avoid generating WinCFI for
23965f757f3fSDimitry Andric     // functions that don't need it.
23970b57cec5SDimitry Andric     BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
23980b57cec5SDimitry Andric         .setMIFlag(MachineInstr::FrameDestroy);
23995f757f3fSDimitry Andric     EpilogStartI = LastPopI;
24005f757f3fSDimitry Andric     --EpilogStartI;
24010b57cec5SDimitry Andric   }
24020b57cec5SDimitry Andric 
2403fe6060f1SDimitry Andric   if (hasFP(MF) && AFI->hasSwiftAsyncContext()) {
240481ad6265SDimitry Andric     switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
240581ad6265SDimitry Andric     case SwiftAsyncFramePointerMode::DeploymentBased:
240681ad6265SDimitry Andric       // Avoid the reload as it is GOT relative, and instead fall back to the
240781ad6265SDimitry Andric       // hardcoded value below.  This allows a mismatch between the OS and
240881ad6265SDimitry Andric       // application without immediately terminating on the difference.
2409bdd1243dSDimitry Andric       [[fallthrough]];
241081ad6265SDimitry Andric     case SwiftAsyncFramePointerMode::Always:
241181ad6265SDimitry Andric       // We need to reset FP to its untagged state on return. Bit 60 is
241281ad6265SDimitry Andric       // currently used to show the presence of an extended frame.
2413fe6060f1SDimitry Andric 
2414fe6060f1SDimitry Andric       // BIC x29, x29, #0x1000_0000_0000_0000
2415fe6060f1SDimitry Andric       BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
2416fe6060f1SDimitry Andric               AArch64::FP)
2417fe6060f1SDimitry Andric           .addUse(AArch64::FP)
2418fe6060f1SDimitry Andric           .addImm(0x10fe)
2419fe6060f1SDimitry Andric           .setMIFlag(MachineInstr::FrameDestroy);
24205f757f3fSDimitry Andric       if (NeedsWinCFI) {
24215f757f3fSDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
24225f757f3fSDimitry Andric             .setMIFlags(MachineInstr::FrameDestroy);
24235f757f3fSDimitry Andric         HasWinCFI = true;
24245f757f3fSDimitry Andric       }
242581ad6265SDimitry Andric       break;
242681ad6265SDimitry Andric 
242781ad6265SDimitry Andric     case SwiftAsyncFramePointerMode::Never:
242881ad6265SDimitry Andric       break;
242981ad6265SDimitry Andric     }
2430fe6060f1SDimitry Andric   }
2431fe6060f1SDimitry Andric 
24328bcb0991SDimitry Andric   const StackOffset &SVEStackSize = getSVEStackSize(MF);
24338bcb0991SDimitry Andric 
24340b57cec5SDimitry Andric   // If there is a single SP update, insert it before the ret and we're done.
24350b57cec5SDimitry Andric   if (CombineSPBump) {
24368bcb0991SDimitry Andric     assert(!SVEStackSize && "Cannot combine SP bump with SVE");
243781ad6265SDimitry Andric 
243881ad6265SDimitry Andric     // When we are about to restore the CSRs, the CFA register is SP again.
243981ad6265SDimitry Andric     if (EmitCFI && hasFP(MF)) {
244081ad6265SDimitry Andric       const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo();
244181ad6265SDimitry Andric       unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
244281ad6265SDimitry Andric       unsigned CFIIndex =
244381ad6265SDimitry Andric           MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, NumBytes));
244481ad6265SDimitry Andric       BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
244581ad6265SDimitry Andric           .addCFIIndex(CFIIndex)
244681ad6265SDimitry Andric           .setMIFlags(MachineInstr::FrameDestroy);
244781ad6265SDimitry Andric     }
244881ad6265SDimitry Andric 
24490b57cec5SDimitry Andric     emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
2450e8d8bef9SDimitry Andric                     StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize),
2451e8d8bef9SDimitry Andric                     TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
245281ad6265SDimitry Andric                     &HasWinCFI, EmitCFI, StackOffset::getFixed(NumBytes));
24530b57cec5SDimitry Andric     return;
24540b57cec5SDimitry Andric   }
24550b57cec5SDimitry Andric 
24560b57cec5SDimitry Andric   NumBytes -= PrologueSaveSize;
24570b57cec5SDimitry Andric   assert(NumBytes >= 0 && "Negative stack allocation size!?");
24580b57cec5SDimitry Andric 
2459480093f4SDimitry Andric   // Process the SVE callee-saves to determine what space needs to be
2460480093f4SDimitry Andric   // deallocated.
2461480093f4SDimitry Andric   StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
2462480093f4SDimitry Andric   MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
2463979e22ffSDimitry Andric   if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
246416d6b3b3SDimitry Andric     RestoreBegin = std::prev(RestoreEnd);
246516d6b3b3SDimitry Andric     while (RestoreBegin != MBB.begin() &&
246616d6b3b3SDimitry Andric            IsSVECalleeSave(std::prev(RestoreBegin)))
2467480093f4SDimitry Andric       --RestoreBegin;
2468480093f4SDimitry Andric 
2469480093f4SDimitry Andric     assert(IsSVECalleeSave(RestoreBegin) &&
2470480093f4SDimitry Andric            IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
2471480093f4SDimitry Andric 
2472e8d8bef9SDimitry Andric     StackOffset CalleeSavedSizeAsOffset =
2473e8d8bef9SDimitry Andric         StackOffset::getScalable(CalleeSavedSize);
2474979e22ffSDimitry Andric     DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
2475979e22ffSDimitry Andric     DeallocateAfter = CalleeSavedSizeAsOffset;
2476480093f4SDimitry Andric   }
2477480093f4SDimitry Andric 
24788bcb0991SDimitry Andric   // Deallocate the SVE area.
2479480093f4SDimitry Andric   if (SVEStackSize) {
248081ad6265SDimitry Andric     // If we have stack realignment or variable sized objects on the stack,
248181ad6265SDimitry Andric     // restore the stack pointer from the frame pointer prior to SVE CSR
248281ad6265SDimitry Andric     // restoration.
248381ad6265SDimitry Andric     if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) {
248481ad6265SDimitry Andric       if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
2485979e22ffSDimitry Andric         // Set SP to start of SVE callee-save area from which they can
2486979e22ffSDimitry Andric         // be reloaded. The code below will deallocate the stack space
2487480093f4SDimitry Andric         // space by moving FP -> SP.
2488480093f4SDimitry Andric         emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
2489e8d8bef9SDimitry Andric                         StackOffset::getScalable(-CalleeSavedSize), TII,
2490979e22ffSDimitry Andric                         MachineInstr::FrameDestroy);
249181ad6265SDimitry Andric       }
2492480093f4SDimitry Andric     } else {
2493480093f4SDimitry Andric       if (AFI->getSVECalleeSavedStackSize()) {
2494480093f4SDimitry Andric         // Deallocate the non-SVE locals first before we can deallocate (and
2495480093f4SDimitry Andric         // restore callee saves) from the SVE area.
249681ad6265SDimitry Andric         emitFrameOffset(
249781ad6265SDimitry Andric             MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
249881ad6265SDimitry Andric             StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
249981ad6265SDimitry Andric             false, false, nullptr, EmitCFI && !hasFP(MF),
250081ad6265SDimitry Andric             SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
2501480093f4SDimitry Andric         NumBytes = 0;
2502480093f4SDimitry Andric       }
2503480093f4SDimitry Andric 
2504480093f4SDimitry Andric       emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
250581ad6265SDimitry Andric                       DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
250681ad6265SDimitry Andric                       false, nullptr, EmitCFI && !hasFP(MF),
250781ad6265SDimitry Andric                       SVEStackSize +
250881ad6265SDimitry Andric                           StackOffset::getFixed(NumBytes + PrologueSaveSize));
2509480093f4SDimitry Andric 
2510480093f4SDimitry Andric       emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
251181ad6265SDimitry Andric                       DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
251281ad6265SDimitry Andric                       false, nullptr, EmitCFI && !hasFP(MF),
251381ad6265SDimitry Andric                       DeallocateAfter +
251481ad6265SDimitry Andric                           StackOffset::getFixed(NumBytes + PrologueSaveSize));
2515480093f4SDimitry Andric     }
251681ad6265SDimitry Andric     if (EmitCFI)
251781ad6265SDimitry Andric       emitCalleeSavedSVERestores(MBB, RestoreEnd);
2518480093f4SDimitry Andric   }
25198bcb0991SDimitry Andric 
25200b57cec5SDimitry Andric   if (!hasFP(MF)) {
25210b57cec5SDimitry Andric     bool RedZone = canUseRedZone(MF);
25220b57cec5SDimitry Andric     // If this was a redzone leaf function, we don't need to restore the
25230b57cec5SDimitry Andric     // stack pointer (but we may need to pop stack args for fastcc).
25240b57cec5SDimitry Andric     if (RedZone && AfterCSRPopSize == 0)
25250b57cec5SDimitry Andric       return;
25260b57cec5SDimitry Andric 
252781ad6265SDimitry Andric     // Pop the local variables off the stack. If there are no callee-saved
252881ad6265SDimitry Andric     // registers, it means we are actually positioned at the terminator and can
252981ad6265SDimitry Andric     // combine stack increment for the locals and the stack increment for
253081ad6265SDimitry Andric     // callee-popped arguments into (possibly) a single instruction and be done.
25310b57cec5SDimitry Andric     bool NoCalleeSaveRestore = PrologueSaveSize == 0;
2532480093f4SDimitry Andric     int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
25330b57cec5SDimitry Andric     if (NoCalleeSaveRestore)
25340b57cec5SDimitry Andric       StackRestoreBytes += AfterCSRPopSize;
25350b57cec5SDimitry Andric 
253681ad6265SDimitry Andric     emitFrameOffset(
253781ad6265SDimitry Andric         MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
253881ad6265SDimitry Andric         StackOffset::getFixed(StackRestoreBytes), TII,
253981ad6265SDimitry Andric         MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI,
254081ad6265SDimitry Andric         StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));
254181ad6265SDimitry Andric 
25420b57cec5SDimitry Andric     // If we were able to combine the local stack pop with the argument pop,
25430b57cec5SDimitry Andric     // then we're done.
254481ad6265SDimitry Andric     if (NoCalleeSaveRestore || AfterCSRPopSize == 0) {
25450b57cec5SDimitry Andric       return;
25460b57cec5SDimitry Andric     }
25470b57cec5SDimitry Andric 
25480b57cec5SDimitry Andric     NumBytes = 0;
25490b57cec5SDimitry Andric   }
25500b57cec5SDimitry Andric 
25510b57cec5SDimitry Andric   // Restore the original stack pointer.
25520b57cec5SDimitry Andric   // FIXME: Rather than doing the math here, we should instead just use
25530b57cec5SDimitry Andric   // non-post-indexed loads for the restores if we aren't actually going to
25540b57cec5SDimitry Andric   // be able to save any instructions.
25558bcb0991SDimitry Andric   if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
2556e8d8bef9SDimitry Andric     emitFrameOffset(
2557e8d8bef9SDimitry Andric         MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
2558e8d8bef9SDimitry Andric         StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
25595f757f3fSDimitry Andric         TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
25608bcb0991SDimitry Andric   } else if (NumBytes)
25618bcb0991SDimitry Andric     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
2562e8d8bef9SDimitry Andric                     StackOffset::getFixed(NumBytes), TII,
25635f757f3fSDimitry Andric                     MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
25640b57cec5SDimitry Andric 
256581ad6265SDimitry Andric   // When we are about to restore the CSRs, the CFA register is SP again.
256681ad6265SDimitry Andric   if (EmitCFI && hasFP(MF)) {
256781ad6265SDimitry Andric     const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo();
256881ad6265SDimitry Andric     unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
256981ad6265SDimitry Andric     unsigned CFIIndex = MF.addFrameInst(
257081ad6265SDimitry Andric         MCCFIInstruction::cfiDefCfa(nullptr, Reg, PrologueSaveSize));
257181ad6265SDimitry Andric     BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
257281ad6265SDimitry Andric         .addCFIIndex(CFIIndex)
257381ad6265SDimitry Andric         .setMIFlags(MachineInstr::FrameDestroy);
257481ad6265SDimitry Andric   }
257581ad6265SDimitry Andric 
25760b57cec5SDimitry Andric   // This must be placed after the callee-save restore code because that code
25770b57cec5SDimitry Andric   // assumes the SP is at the same location as it was after the callee-save save
25780b57cec5SDimitry Andric   // code in the prologue.
25790b57cec5SDimitry Andric   if (AfterCSRPopSize) {
2580fe6060f1SDimitry Andric     assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
2581fe6060f1SDimitry Andric                                   "interrupt may have clobbered");
25820b57cec5SDimitry Andric 
258381ad6265SDimitry Andric     emitFrameOffset(
258481ad6265SDimitry Andric         MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
258581ad6265SDimitry Andric         StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy,
258681ad6265SDimitry Andric         false, NeedsWinCFI, &HasWinCFI, EmitCFI,
258781ad6265SDimitry Andric         StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
25880b57cec5SDimitry Andric   }
25890b57cec5SDimitry Andric }
25900b57cec5SDimitry Andric 
259106c3fb27SDimitry Andric bool AArch64FrameLowering::enableCFIFixup(MachineFunction &MF) const {
259206c3fb27SDimitry Andric   return TargetFrameLowering::enableCFIFixup(MF) &&
259306c3fb27SDimitry Andric          MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(MF);
259406c3fb27SDimitry Andric }
259506c3fb27SDimitry Andric 
25960b57cec5SDimitry Andric /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
25970b57cec5SDimitry Andric /// debug info.  It's the same as what we use for resolving the code-gen
25980b57cec5SDimitry Andric /// references for now.  FIXME: This can go wrong when references are
25990b57cec5SDimitry Andric /// SP-relative and simple call frames aren't used.
2600e8d8bef9SDimitry Andric StackOffset
2601e8d8bef9SDimitry Andric AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
26025ffd83dbSDimitry Andric                                              Register &FrameReg) const {
26030b57cec5SDimitry Andric   return resolveFrameIndexReference(
26040b57cec5SDimitry Andric       MF, FI, FrameReg,
26050b57cec5SDimitry Andric       /*PreferFP=*/
26060fca6ea1SDimitry Andric       MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress) ||
26070fca6ea1SDimitry Andric           MF.getFunction().hasFnAttribute(Attribute::SanitizeMemTag),
2608e8d8bef9SDimitry Andric       /*ForSimm=*/false);
26090b57cec5SDimitry Andric }
26100b57cec5SDimitry Andric 
2611e8d8bef9SDimitry Andric StackOffset
261252418fc2SDimitry Andric AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
261352418fc2SDimitry Andric                                                    int FI) const {
261452418fc2SDimitry Andric   // This function serves to provide a comparable offset from a single reference
261552418fc2SDimitry Andric   // point (the value of SP at function entry) that can be used for analysis,
261652418fc2SDimitry Andric   // e.g. the stack-frame-layout analysis pass. It is not guaranteed to be
261752418fc2SDimitry Andric   // correct for all objects in the presence of VLA-area objects or dynamic
261852418fc2SDimitry Andric   // stack re-alignment.
261952418fc2SDimitry Andric 
262052418fc2SDimitry Andric   const auto &MFI = MF.getFrameInfo();
262152418fc2SDimitry Andric 
262252418fc2SDimitry Andric   int64_t ObjectOffset = MFI.getObjectOffset(FI);
2623*62987288SDimitry Andric   StackOffset SVEStackSize = getSVEStackSize(MF);
2624*62987288SDimitry Andric 
2625*62987288SDimitry Andric   // For VLA-area objects, just emit an offset at the end of the stack frame.
2626*62987288SDimitry Andric   // Whilst not quite correct, these objects do live at the end of the frame and
2627*62987288SDimitry Andric   // so it is more useful for analysis for the offset to reflect this.
2628*62987288SDimitry Andric   if (MFI.isVariableSizedObjectIndex(FI)) {
2629*62987288SDimitry Andric     return StackOffset::getFixed(-((int64_t)MFI.getStackSize())) - SVEStackSize;
2630*62987288SDimitry Andric   }
263152418fc2SDimitry Andric 
263252418fc2SDimitry Andric   // This is correct in the absence of any SVE stack objects.
263352418fc2SDimitry Andric   if (!SVEStackSize)
263452418fc2SDimitry Andric     return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());
263552418fc2SDimitry Andric 
263652418fc2SDimitry Andric   const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
263752418fc2SDimitry Andric   if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
263852418fc2SDimitry Andric     return StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()),
263952418fc2SDimitry Andric                             ObjectOffset);
264052418fc2SDimitry Andric   }
264152418fc2SDimitry Andric 
264252418fc2SDimitry Andric   bool IsFixed = MFI.isFixedObjectIndex(FI);
264352418fc2SDimitry Andric   bool IsCSR =
264452418fc2SDimitry Andric       !IsFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
264552418fc2SDimitry Andric 
264652418fc2SDimitry Andric   StackOffset ScalableOffset = {};
264752418fc2SDimitry Andric   if (!IsFixed && !IsCSR)
264852418fc2SDimitry Andric     ScalableOffset = -SVEStackSize;
264952418fc2SDimitry Andric 
265052418fc2SDimitry Andric   return StackOffset::getFixed(ObjectOffset) + ScalableOffset;
265152418fc2SDimitry Andric }
265252418fc2SDimitry Andric 
265352418fc2SDimitry Andric StackOffset
2654e8d8bef9SDimitry Andric AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF,
2655e8d8bef9SDimitry Andric                                                      int FI) const {
2656e8d8bef9SDimitry Andric   return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI));
26570b57cec5SDimitry Andric }
26580b57cec5SDimitry Andric 
2659e8d8bef9SDimitry Andric static StackOffset getFPOffset(const MachineFunction &MF,
2660e8d8bef9SDimitry Andric                                int64_t ObjectOffset) {
26610b57cec5SDimitry Andric   const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
26620b57cec5SDimitry Andric   const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
26630fca6ea1SDimitry Andric   const Function &F = MF.getFunction();
26640fca6ea1SDimitry Andric   bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
266562cfcf62SDimitry Andric   unsigned FixedObject =
266662cfcf62SDimitry Andric       getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false);
2667e8d8bef9SDimitry Andric   int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
2668e8d8bef9SDimitry Andric   int64_t FPAdjust =
2669e8d8bef9SDimitry Andric       CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset();
2670e8d8bef9SDimitry Andric   return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust);
26710b57cec5SDimitry Andric }
26720b57cec5SDimitry Andric 
2673e8d8bef9SDimitry Andric static StackOffset getStackOffset(const MachineFunction &MF,
2674e8d8bef9SDimitry Andric                                   int64_t ObjectOffset) {
26750b57cec5SDimitry Andric   const auto &MFI = MF.getFrameInfo();
2676e8d8bef9SDimitry Andric   return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize());
26770b57cec5SDimitry Andric }
26780b57cec5SDimitry Andric 
2679e8d8bef9SDimitry Andric // TODO: This function currently does not work for scalable vectors.
26800b57cec5SDimitry Andric int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
26810b57cec5SDimitry Andric                                                  int FI) const {
26820b57cec5SDimitry Andric   const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
26830b57cec5SDimitry Andric       MF.getSubtarget().getRegisterInfo());
26840b57cec5SDimitry Andric   int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
26850b57cec5SDimitry Andric   return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
2686e8d8bef9SDimitry Andric              ? getFPOffset(MF, ObjectOffset).getFixed()
2687e8d8bef9SDimitry Andric              : getStackOffset(MF, ObjectOffset).getFixed();
26880b57cec5SDimitry Andric }
26890b57cec5SDimitry Andric 
26908bcb0991SDimitry Andric StackOffset AArch64FrameLowering::resolveFrameIndexReference(
26915ffd83dbSDimitry Andric     const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP,
26920b57cec5SDimitry Andric     bool ForSimm) const {
26930b57cec5SDimitry Andric   const auto &MFI = MF.getFrameInfo();
2694480093f4SDimitry Andric   int64_t ObjectOffset = MFI.getObjectOffset(FI);
26950b57cec5SDimitry Andric   bool isFixed = MFI.isFixedObjectIndex(FI);
2696e8d8bef9SDimitry Andric   bool isSVE = MFI.getStackID(FI) == TargetStackID::ScalableVector;
26978bcb0991SDimitry Andric   return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
26980b57cec5SDimitry Andric                                      PreferFP, ForSimm);
26990b57cec5SDimitry Andric }
27000b57cec5SDimitry Andric 
27018bcb0991SDimitry Andric StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
2702480093f4SDimitry Andric     const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE,
27035ffd83dbSDimitry Andric     Register &FrameReg, bool PreferFP, bool ForSimm) const {
27040b57cec5SDimitry Andric   const auto &MFI = MF.getFrameInfo();
27050b57cec5SDimitry Andric   const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
27060b57cec5SDimitry Andric       MF.getSubtarget().getRegisterInfo());
27070b57cec5SDimitry Andric   const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
27080b57cec5SDimitry Andric   const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
27090b57cec5SDimitry Andric 
2710e8d8bef9SDimitry Andric   int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed();
2711e8d8bef9SDimitry Andric   int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed();
27120b57cec5SDimitry Andric   bool isCSR =
2713480093f4SDimitry Andric       !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
27140b57cec5SDimitry Andric 
27158bcb0991SDimitry Andric   const StackOffset &SVEStackSize = getSVEStackSize(MF);
27168bcb0991SDimitry Andric 
27170b57cec5SDimitry Andric   // Use frame pointer to reference fixed objects. Use it for locals if
27180b57cec5SDimitry Andric   // there are VLAs or a dynamically realigned SP (and thus the SP isn't
27190b57cec5SDimitry Andric   // reliable as a base). Make sure useFPForScavengingIndex() does the
27200b57cec5SDimitry Andric   // right thing for the emergency spill slot.
27210b57cec5SDimitry Andric   bool UseFP = false;
27228bcb0991SDimitry Andric   if (AFI->hasStackFrame() && !isSVE) {
272381ad6265SDimitry Andric     // We shouldn't prefer using the FP to access fixed-sized stack objects when
272481ad6265SDimitry Andric     // there are scalable (SVE) objects in between the FP and the fixed-sized
272581ad6265SDimitry Andric     // objects.
27268bcb0991SDimitry Andric     PreferFP &= !SVEStackSize;
27278bcb0991SDimitry Andric 
27280b57cec5SDimitry Andric     // Note: Keeping the following as multiple 'if' statements rather than
27290b57cec5SDimitry Andric     // merging to a single expression for readability.
27300b57cec5SDimitry Andric     //
27310b57cec5SDimitry Andric     // Argument access should always use the FP.
27320b57cec5SDimitry Andric     if (isFixed) {
27330b57cec5SDimitry Andric       UseFP = hasFP(MF);
2734fe6060f1SDimitry Andric     } else if (isCSR && RegInfo->hasStackRealignment(MF)) {
27350b57cec5SDimitry Andric       // References to the CSR area must use FP if we're re-aligning the stack
27360b57cec5SDimitry Andric       // since the dynamically-sized alignment padding is between the SP/BP and
27370b57cec5SDimitry Andric       // the CSR area.
27380b57cec5SDimitry Andric       assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
27390b57cec5SDimitry Andric       UseFP = true;
2740fe6060f1SDimitry Andric     } else if (hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {
27410b57cec5SDimitry Andric       // If the FPOffset is negative and we're producing a signed immediate, we
27420b57cec5SDimitry Andric       // have to keep in mind that the available offset range for negative
27430b57cec5SDimitry Andric       // offsets is smaller than for positive ones. If an offset is available
27440b57cec5SDimitry Andric       // via the FP and the SP, use whichever is closest.
27450b57cec5SDimitry Andric       bool FPOffsetFits = !ForSimm || FPOffset >= -256;
274681ad6265SDimitry Andric       PreferFP |= Offset > -FPOffset && !SVEStackSize;
27470b57cec5SDimitry Andric 
27480b57cec5SDimitry Andric       if (MFI.hasVarSizedObjects()) {
27490b57cec5SDimitry Andric         // If we have variable sized objects, we can use either FP or BP, as the
27500b57cec5SDimitry Andric         // SP offset is unknown. We can use the base pointer if we have one and
27510b57cec5SDimitry Andric         // FP is not preferred. If not, we're stuck with using FP.
27520b57cec5SDimitry Andric         bool CanUseBP = RegInfo->hasBasePointer(MF);
27530b57cec5SDimitry Andric         if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
27540b57cec5SDimitry Andric           UseFP = PreferFP;
27555ffd83dbSDimitry Andric         else if (!CanUseBP) // Can't use BP. Forced to use FP.
27560b57cec5SDimitry Andric           UseFP = true;
27570b57cec5SDimitry Andric         // else we can use BP and FP, but the offset from FP won't fit.
27580b57cec5SDimitry Andric         // That will make us scavenge registers which we can probably avoid by
27590b57cec5SDimitry Andric         // using BP. If it won't fit for BP either, we'll scavenge anyway.
27600b57cec5SDimitry Andric       } else if (FPOffset >= 0) {
27610b57cec5SDimitry Andric         // Use SP or FP, whichever gives us the best chance of the offset
27620b57cec5SDimitry Andric         // being in range for direct access. If the FPOffset is positive,
27630b57cec5SDimitry Andric         // that'll always be best, as the SP will be even further away.
27640b57cec5SDimitry Andric         UseFP = true;
27650b57cec5SDimitry Andric       } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
27660b57cec5SDimitry Andric         // Funclets access the locals contained in the parent's stack frame
27670b57cec5SDimitry Andric         // via the frame pointer, so we have to use the FP in the parent
27680b57cec5SDimitry Andric         // function.
27690b57cec5SDimitry Andric         (void) Subtarget;
27700fca6ea1SDimitry Andric         assert(Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
27710fca6ea1SDimitry Andric                                             MF.getFunction().isVarArg()) &&
27720b57cec5SDimitry Andric                "Funclets should only be present on Win64");
27730b57cec5SDimitry Andric         UseFP = true;
27740b57cec5SDimitry Andric       } else {
27750b57cec5SDimitry Andric         // We have the choice between FP and (SP or BP).
27760b57cec5SDimitry Andric         if (FPOffsetFits && PreferFP) // If FP is the best fit, use it.
27770b57cec5SDimitry Andric           UseFP = true;
27780b57cec5SDimitry Andric       }
27790b57cec5SDimitry Andric     }
27800b57cec5SDimitry Andric   }
27810b57cec5SDimitry Andric 
2782fe6060f1SDimitry Andric   assert(
2783fe6060f1SDimitry Andric       ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&
27840b57cec5SDimitry Andric       "In the presence of dynamic stack pointer realignment, "
27850b57cec5SDimitry Andric       "non-argument/CSR objects cannot be accessed through the frame pointer");
27860b57cec5SDimitry Andric 
27878bcb0991SDimitry Andric   if (isSVE) {
2788e8d8bef9SDimitry Andric     StackOffset FPOffset =
2789e8d8bef9SDimitry Andric         StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
2790e8d8bef9SDimitry Andric     StackOffset SPOffset =
2791e8d8bef9SDimitry Andric         SVEStackSize +
2792e8d8bef9SDimitry Andric         StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
2793e8d8bef9SDimitry Andric                          ObjectOffset);
27948bcb0991SDimitry Andric     // Always use the FP for SVE spills if available and beneficial.
2795fe6060f1SDimitry Andric     if (hasFP(MF) && (SPOffset.getFixed() ||
2796e8d8bef9SDimitry Andric                       FPOffset.getScalable() < SPOffset.getScalable() ||
2797fe6060f1SDimitry Andric                       RegInfo->hasStackRealignment(MF))) {
27980b57cec5SDimitry Andric       FrameReg = RegInfo->getFrameRegister(MF);
27990b57cec5SDimitry Andric       return FPOffset;
28000b57cec5SDimitry Andric     }
28010b57cec5SDimitry Andric 
28028bcb0991SDimitry Andric     FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
28038bcb0991SDimitry Andric                                            : (unsigned)AArch64::SP;
28048bcb0991SDimitry Andric     return SPOffset;
28058bcb0991SDimitry Andric   }
28068bcb0991SDimitry Andric 
28078bcb0991SDimitry Andric   StackOffset ScalableOffset = {};
28088bcb0991SDimitry Andric   if (UseFP && !(isFixed || isCSR))
28098bcb0991SDimitry Andric     ScalableOffset = -SVEStackSize;
28108bcb0991SDimitry Andric   if (!UseFP && (isFixed || isCSR))
28118bcb0991SDimitry Andric     ScalableOffset = SVEStackSize;
28128bcb0991SDimitry Andric 
28138bcb0991SDimitry Andric   if (UseFP) {
28148bcb0991SDimitry Andric     FrameReg = RegInfo->getFrameRegister(MF);
2815e8d8bef9SDimitry Andric     return StackOffset::getFixed(FPOffset) + ScalableOffset;
28168bcb0991SDimitry Andric   }
28178bcb0991SDimitry Andric 
28180b57cec5SDimitry Andric   // Use the base pointer if we have one.
28190b57cec5SDimitry Andric   if (RegInfo->hasBasePointer(MF))
28200b57cec5SDimitry Andric     FrameReg = RegInfo->getBaseRegister();
28210b57cec5SDimitry Andric   else {
28220b57cec5SDimitry Andric     assert(!MFI.hasVarSizedObjects() &&
28230b57cec5SDimitry Andric            "Can't use SP when we have var sized objects.");
28240b57cec5SDimitry Andric     FrameReg = AArch64::SP;
28250b57cec5SDimitry Andric     // If we're using the red zone for this function, the SP won't actually
28260b57cec5SDimitry Andric     // be adjusted, so the offsets will be negative. They're also all
28270b57cec5SDimitry Andric     // within range of the signed 9-bit immediate instructions.
28280b57cec5SDimitry Andric     if (canUseRedZone(MF))
28290b57cec5SDimitry Andric       Offset -= AFI->getLocalStackSize();
28300b57cec5SDimitry Andric   }
28310b57cec5SDimitry Andric 
2832e8d8bef9SDimitry Andric   return StackOffset::getFixed(Offset) + ScalableOffset;
28330b57cec5SDimitry Andric }
28340b57cec5SDimitry Andric 
28350b57cec5SDimitry Andric static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
28360b57cec5SDimitry Andric   // Do not set a kill flag on values that are also marked as live-in. This
28370b57cec5SDimitry Andric   // happens with the @llvm-returnaddress intrinsic and with arguments passed in
28380b57cec5SDimitry Andric   // callee saved registers.
28390b57cec5SDimitry Andric   // Omitting the kill flags is conservatively correct even if the live-in
28400b57cec5SDimitry Andric   // is not used after all.
28410b57cec5SDimitry Andric   bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
28420b57cec5SDimitry Andric   return getKillRegState(!IsLiveIn);
28430b57cec5SDimitry Andric }
28440b57cec5SDimitry Andric 
28450b57cec5SDimitry Andric static bool produceCompactUnwindFrame(MachineFunction &MF) {
28460b57cec5SDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
28470b57cec5SDimitry Andric   AttributeList Attrs = MF.getFunction().getAttributes();
28480b57cec5SDimitry Andric   return Subtarget.isTargetMachO() &&
28490b57cec5SDimitry Andric          !(Subtarget.getTargetLowering()->supportSwiftError() &&
2850fe6060f1SDimitry Andric            Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
2851fe6060f1SDimitry Andric          MF.getFunction().getCallingConv() != CallingConv::SwiftTail;
28520b57cec5SDimitry Andric }
28530b57cec5SDimitry Andric 
28540b57cec5SDimitry Andric static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
2855bdd1243dSDimitry Andric                                              bool NeedsWinCFI, bool IsFirst,
2856bdd1243dSDimitry Andric                                              const TargetRegisterInfo *TRI) {
28570b57cec5SDimitry Andric   // If we are generating register pairs for a Windows function that requires
28580b57cec5SDimitry Andric   // EH support, then pair consecutive registers only.  There are no unwind
28590b57cec5SDimitry Andric   // opcodes for saves/restores of non-consectuve register pairs.
2860e8d8bef9SDimitry Andric   // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x,
2861e8d8bef9SDimitry Andric   // save_lrpair.
28620b57cec5SDimitry Andric   // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
28630b57cec5SDimitry Andric 
2864480093f4SDimitry Andric   if (Reg2 == AArch64::FP)
2865480093f4SDimitry Andric     return true;
28660b57cec5SDimitry Andric   if (!NeedsWinCFI)
28670b57cec5SDimitry Andric     return false;
2868bdd1243dSDimitry Andric   if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1)
28690b57cec5SDimitry Andric     return false;
2870e8d8bef9SDimitry Andric   // If pairing a GPR with LR, the pair can be described by the save_lrpair
2871e8d8bef9SDimitry Andric   // opcode. If this is the first register pair, it would end up with a
2872e8d8bef9SDimitry Andric   // predecrement, but there's no save_lrpair_x opcode, so we can only do this
2873e8d8bef9SDimitry Andric   // if LR is paired with something else than the first register.
2874e8d8bef9SDimitry Andric   // The save_lrpair opcode requires the first register to be an odd one.
2875e8d8bef9SDimitry Andric   if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
2876e8d8bef9SDimitry Andric       (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
2877e8d8bef9SDimitry Andric     return false;
28780b57cec5SDimitry Andric   return true;
28790b57cec5SDimitry Andric }
28800b57cec5SDimitry Andric 
28818bcb0991SDimitry Andric /// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.
28828bcb0991SDimitry Andric /// WindowsCFI requires that only consecutive registers can be paired.
28838bcb0991SDimitry Andric /// LR and FP need to be allocated together when the frame needs to save
28848bcb0991SDimitry Andric /// the frame-record. This means any other register pairing with LR is invalid.
28858bcb0991SDimitry Andric static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
2886e8d8bef9SDimitry Andric                                       bool UsesWinAAPCS, bool NeedsWinCFI,
2887bdd1243dSDimitry Andric                                       bool NeedsFrameRecord, bool IsFirst,
2888bdd1243dSDimitry Andric                                       const TargetRegisterInfo *TRI) {
2889480093f4SDimitry Andric   if (UsesWinAAPCS)
2890bdd1243dSDimitry Andric     return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst,
2891bdd1243dSDimitry Andric                                             TRI);
28928bcb0991SDimitry Andric 
28938bcb0991SDimitry Andric   // If we need to store the frame record, don't pair any register
28948bcb0991SDimitry Andric   // with LR other than FP.
28958bcb0991SDimitry Andric   if (NeedsFrameRecord)
28968bcb0991SDimitry Andric     return Reg2 == AArch64::LR;
28978bcb0991SDimitry Andric 
28988bcb0991SDimitry Andric   return false;
28998bcb0991SDimitry Andric }
29008bcb0991SDimitry Andric 
29010b57cec5SDimitry Andric namespace {
29020b57cec5SDimitry Andric 
29030b57cec5SDimitry Andric struct RegPairInfo {
29040b57cec5SDimitry Andric   unsigned Reg1 = AArch64::NoRegister;
29050b57cec5SDimitry Andric   unsigned Reg2 = AArch64::NoRegister;
29060b57cec5SDimitry Andric   int FrameIdx;
29070b57cec5SDimitry Andric   int Offset;
29080fca6ea1SDimitry Andric   enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG } Type;
29090b57cec5SDimitry Andric 
29100b57cec5SDimitry Andric   RegPairInfo() = default;
29110b57cec5SDimitry Andric 
29120b57cec5SDimitry Andric   bool isPaired() const { return Reg2 != AArch64::NoRegister; }
2913480093f4SDimitry Andric 
2914480093f4SDimitry Andric   unsigned getScale() const {
2915480093f4SDimitry Andric     switch (Type) {
2916480093f4SDimitry Andric     case PPR:
2917480093f4SDimitry Andric       return 2;
2918480093f4SDimitry Andric     case GPR:
2919480093f4SDimitry Andric     case FPR64:
29200fca6ea1SDimitry Andric     case VG:
2921480093f4SDimitry Andric       return 8;
2922480093f4SDimitry Andric     case ZPR:
2923480093f4SDimitry Andric     case FPR128:
2924480093f4SDimitry Andric       return 16;
2925480093f4SDimitry Andric     }
2926480093f4SDimitry Andric     llvm_unreachable("Unsupported type");
2927480093f4SDimitry Andric   }
2928480093f4SDimitry Andric 
2929480093f4SDimitry Andric   bool isScalable() const { return Type == PPR || Type == ZPR; }
29300b57cec5SDimitry Andric };
29310b57cec5SDimitry Andric 
29320b57cec5SDimitry Andric } // end anonymous namespace
29330b57cec5SDimitry Andric 
29340fca6ea1SDimitry Andric unsigned findFreePredicateReg(BitVector &SavedRegs) {
29350fca6ea1SDimitry Andric   for (unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {
29360fca6ea1SDimitry Andric     if (SavedRegs.test(PReg)) {
29370fca6ea1SDimitry Andric       unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;
29380fca6ea1SDimitry Andric       return PNReg;
29390fca6ea1SDimitry Andric     }
29400fca6ea1SDimitry Andric   }
29410fca6ea1SDimitry Andric   return AArch64::NoRegister;
29420fca6ea1SDimitry Andric }
29430fca6ea1SDimitry Andric 
29440b57cec5SDimitry Andric static void computeCalleeSaveRegisterPairs(
29455ffd83dbSDimitry Andric     MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
29460b57cec5SDimitry Andric     const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
294781ad6265SDimitry Andric     bool NeedsFrameRecord) {
29480b57cec5SDimitry Andric 
29490b57cec5SDimitry Andric   if (CSI.empty())
29500b57cec5SDimitry Andric     return;
29510b57cec5SDimitry Andric 
2952480093f4SDimitry Andric   bool IsWindows = isTargetWindows(MF);
29530b57cec5SDimitry Andric   bool NeedsWinCFI = needsWinCFI(MF);
29540b57cec5SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
29550b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
29560b57cec5SDimitry Andric   CallingConv::ID CC = MF.getFunction().getCallingConv();
29570b57cec5SDimitry Andric   unsigned Count = CSI.size();
29580b57cec5SDimitry Andric   (void)CC;
29590b57cec5SDimitry Andric   // MachO's compact unwind format relies on all registers being stored in
29600b57cec5SDimitry Andric   // pairs.
2961bdd1243dSDimitry Andric   assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost ||
296206c3fb27SDimitry Andric           CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS ||
296306c3fb27SDimitry Andric           CC == CallingConv::Win64 || (Count & 1) == 0) &&
29640b57cec5SDimitry Andric          "Odd number of callee-saved regs to spill!");
2965480093f4SDimitry Andric   int ByteOffset = AFI->getCalleeSavedStackSize();
2966e8d8bef9SDimitry Andric   int StackFillDir = -1;
2967e8d8bef9SDimitry Andric   int RegInc = 1;
2968e8d8bef9SDimitry Andric   unsigned FirstReg = 0;
2969e8d8bef9SDimitry Andric   if (NeedsWinCFI) {
2970e8d8bef9SDimitry Andric     // For WinCFI, fill the stack from the bottom up.
2971e8d8bef9SDimitry Andric     ByteOffset = 0;
2972e8d8bef9SDimitry Andric     StackFillDir = 1;
2973e8d8bef9SDimitry Andric     // As the CSI array is reversed to match PrologEpilogInserter, iterate
2974e8d8bef9SDimitry Andric     // backwards, to pair up registers starting from lower numbered registers.
2975e8d8bef9SDimitry Andric     RegInc = -1;
2976e8d8bef9SDimitry Andric     FirstReg = Count - 1;
2977e8d8bef9SDimitry Andric   }
2978480093f4SDimitry Andric   int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
2979fe6060f1SDimitry Andric   bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
29800fca6ea1SDimitry Andric   Register LastReg = 0;
298175b4d546SDimitry Andric 
2982e8d8bef9SDimitry Andric   // When iterating backwards, the loop condition relies on unsigned wraparound.
2983e8d8bef9SDimitry Andric   for (unsigned i = FirstReg; i < Count; i += RegInc) {
29840b57cec5SDimitry Andric     RegPairInfo RPI;
29850b57cec5SDimitry Andric     RPI.Reg1 = CSI[i].getReg();
29860b57cec5SDimitry Andric 
29870b57cec5SDimitry Andric     if (AArch64::GPR64RegClass.contains(RPI.Reg1))
29880b57cec5SDimitry Andric       RPI.Type = RegPairInfo::GPR;
29890b57cec5SDimitry Andric     else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
29900b57cec5SDimitry Andric       RPI.Type = RegPairInfo::FPR64;
29910b57cec5SDimitry Andric     else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
29920b57cec5SDimitry Andric       RPI.Type = RegPairInfo::FPR128;
2993480093f4SDimitry Andric     else if (AArch64::ZPRRegClass.contains(RPI.Reg1))
2994480093f4SDimitry Andric       RPI.Type = RegPairInfo::ZPR;
2995480093f4SDimitry Andric     else if (AArch64::PPRRegClass.contains(RPI.Reg1))
2996480093f4SDimitry Andric       RPI.Type = RegPairInfo::PPR;
29970fca6ea1SDimitry Andric     else if (RPI.Reg1 == AArch64::VG)
29980fca6ea1SDimitry Andric       RPI.Type = RegPairInfo::VG;
29990b57cec5SDimitry Andric     else
30000b57cec5SDimitry Andric       llvm_unreachable("Unsupported register class.");
30010b57cec5SDimitry Andric 
30020fca6ea1SDimitry Andric     // Add the stack hazard size as we transition from GPR->FPR CSRs.
30030fca6ea1SDimitry Andric     if (AFI->hasStackHazardSlotIndex() &&
30040fca6ea1SDimitry Andric         (!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
30050fca6ea1SDimitry Andric         AArch64InstrInfo::isFpOrNEON(RPI.Reg1))
30060fca6ea1SDimitry Andric       ByteOffset += StackFillDir * StackHazardSize;
30070fca6ea1SDimitry Andric     LastReg = RPI.Reg1;
30080fca6ea1SDimitry Andric 
30090b57cec5SDimitry Andric     // Add the next reg to the pair if it is in the same register class.
30100fca6ea1SDimitry Andric     if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) {
301104eeddc0SDimitry Andric       Register NextReg = CSI[i + RegInc].getReg();
3012e8d8bef9SDimitry Andric       bool IsFirst = i == FirstReg;
30130b57cec5SDimitry Andric       switch (RPI.Type) {
30140b57cec5SDimitry Andric       case RegPairInfo::GPR:
30150b57cec5SDimitry Andric         if (AArch64::GPR64RegClass.contains(NextReg) &&
3016e8d8bef9SDimitry Andric             !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
3017bdd1243dSDimitry Andric                                        NeedsWinCFI, NeedsFrameRecord, IsFirst,
3018bdd1243dSDimitry Andric                                        TRI))
30190b57cec5SDimitry Andric           RPI.Reg2 = NextReg;
30200b57cec5SDimitry Andric         break;
30210b57cec5SDimitry Andric       case RegPairInfo::FPR64:
30220b57cec5SDimitry Andric         if (AArch64::FPR64RegClass.contains(NextReg) &&
3023e8d8bef9SDimitry Andric             !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
3024bdd1243dSDimitry Andric                                               IsFirst, TRI))
30250b57cec5SDimitry Andric           RPI.Reg2 = NextReg;
30260b57cec5SDimitry Andric         break;
30270b57cec5SDimitry Andric       case RegPairInfo::FPR128:
30280b57cec5SDimitry Andric         if (AArch64::FPR128RegClass.contains(NextReg))
30290b57cec5SDimitry Andric           RPI.Reg2 = NextReg;
30300b57cec5SDimitry Andric         break;
3031480093f4SDimitry Andric       case RegPairInfo::PPR:
30320fca6ea1SDimitry Andric         break;
3033480093f4SDimitry Andric       case RegPairInfo::ZPR:
30340fca6ea1SDimitry Andric         if (AFI->getPredicateRegForFillSpill() != 0)
30350fca6ea1SDimitry Andric           if (((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1))
30360fca6ea1SDimitry Andric             RPI.Reg2 = NextReg;
30370fca6ea1SDimitry Andric         break;
30380fca6ea1SDimitry Andric       case RegPairInfo::VG:
3039480093f4SDimitry Andric         break;
30400b57cec5SDimitry Andric       }
30410b57cec5SDimitry Andric     }
30420b57cec5SDimitry Andric 
30430b57cec5SDimitry Andric     // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
30440b57cec5SDimitry Andric     // list to come in sorted by frame index so that we can issue the store
30450b57cec5SDimitry Andric     // pair instructions directly. Assert if we see anything otherwise.
30460b57cec5SDimitry Andric     //
30470b57cec5SDimitry Andric     // The order of the registers in the list is controlled by
30480b57cec5SDimitry Andric     // getCalleeSavedRegs(), so they will always be in-order, as well.
30490b57cec5SDimitry Andric     assert((!RPI.isPaired() ||
3050e8d8bef9SDimitry Andric             (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&
30510b57cec5SDimitry Andric            "Out of order callee saved regs!");
30520b57cec5SDimitry Andric 
30538bcb0991SDimitry Andric     assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
30548bcb0991SDimitry Andric             RPI.Reg1 == AArch64::LR) &&
30558bcb0991SDimitry Andric            "FrameRecord must be allocated together with LR");
30568bcb0991SDimitry Andric 
3057480093f4SDimitry Andric     // Windows AAPCS has FP and LR reversed.
3058480093f4SDimitry Andric     assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP ||
3059480093f4SDimitry Andric             RPI.Reg2 == AArch64::LR) &&
3060480093f4SDimitry Andric            "FrameRecord must be allocated together with LR");
3061480093f4SDimitry Andric 
30620b57cec5SDimitry Andric     // MachO's compact unwind format relies on all registers being stored in
30630b57cec5SDimitry Andric     // adjacent register pairs.
3064bdd1243dSDimitry Andric     assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost ||
306506c3fb27SDimitry Andric             CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS ||
306606c3fb27SDimitry Andric             CC == CallingConv::Win64 ||
30670b57cec5SDimitry Andric             (RPI.isPaired() &&
30680b57cec5SDimitry Andric              ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
30690b57cec5SDimitry Andric               RPI.Reg1 + 1 == RPI.Reg2))) &&
30700b57cec5SDimitry Andric            "Callee-save registers not saved as adjacent register pair!");
30710b57cec5SDimitry Andric 
30720b57cec5SDimitry Andric     RPI.FrameIdx = CSI[i].getFrameIdx();
3073e8d8bef9SDimitry Andric     if (NeedsWinCFI &&
3074e8d8bef9SDimitry Andric         RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair
3075e8d8bef9SDimitry Andric       RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
3076480093f4SDimitry Andric     int Scale = RPI.getScale();
3077e8d8bef9SDimitry Andric 
3078e8d8bef9SDimitry Andric     int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
3079e8d8bef9SDimitry Andric     assert(OffsetPre % Scale == 0);
3080e8d8bef9SDimitry Andric 
3081480093f4SDimitry Andric     if (RPI.isScalable())
30820fca6ea1SDimitry Andric       ScalableByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
3083480093f4SDimitry Andric     else
3084e8d8bef9SDimitry Andric       ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
3085480093f4SDimitry Andric 
3086fe6060f1SDimitry Andric     // Swift's async context is directly before FP, so allocate an extra
3087fe6060f1SDimitry Andric     // 8 bytes for it.
3088fe6060f1SDimitry Andric     if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() &&
30895f757f3fSDimitry Andric         ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
30905f757f3fSDimitry Andric          (IsWindows && RPI.Reg2 == AArch64::LR)))
3091fe6060f1SDimitry Andric       ByteOffset += StackFillDir * 8;
3092fe6060f1SDimitry Andric 
30930b57cec5SDimitry Andric     // Round up size of non-pair to pair size if we need to pad the
30940b57cec5SDimitry Andric     // callee-save area to ensure 16-byte alignment.
30950fca6ea1SDimitry Andric     if (NeedGapToAlignStack && !NeedsWinCFI && !RPI.isScalable() &&
30960fca6ea1SDimitry Andric         RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() &&
30970fca6ea1SDimitry Andric         ByteOffset % 16 != 0) {
3098e8d8bef9SDimitry Andric       ByteOffset += 8 * StackFillDir;
30995ffd83dbSDimitry Andric       assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16));
3100e8d8bef9SDimitry Andric       // A stack frame with a gap looks like this, bottom up:
3101e8d8bef9SDimitry Andric       // d9, d8. x21, gap, x20, x19.
3102fe6060f1SDimitry Andric       // Set extra alignment on the x21 object to create the gap above it.
31035ffd83dbSDimitry Andric       MFI.setObjectAlignment(RPI.FrameIdx, Align(16));
3104fe6060f1SDimitry Andric       NeedGapToAlignStack = false;
31050b57cec5SDimitry Andric     }
31060b57cec5SDimitry Andric 
3107e8d8bef9SDimitry Andric     int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
3108e8d8bef9SDimitry Andric     assert(OffsetPost % Scale == 0);
3109e8d8bef9SDimitry Andric     // If filling top down (default), we want the offset after incrementing it.
31105f757f3fSDimitry Andric     // If filling bottom up (WinCFI) we need the original offset.
3111e8d8bef9SDimitry Andric     int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
3112fe6060f1SDimitry Andric 
3113fe6060f1SDimitry Andric     // The FP, LR pair goes 8 bytes into our expanded 24-byte slot so that the
3114fe6060f1SDimitry Andric     // Swift context can directly precede FP.
3115fe6060f1SDimitry Andric     if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() &&
31165f757f3fSDimitry Andric         ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
31175f757f3fSDimitry Andric          (IsWindows && RPI.Reg2 == AArch64::LR)))
3118fe6060f1SDimitry Andric       Offset += 8;
31190b57cec5SDimitry Andric     RPI.Offset = Offset / Scale;
3120480093f4SDimitry Andric 
31210fca6ea1SDimitry Andric     assert((!RPI.isPaired() ||
31220fca6ea1SDimitry Andric             (!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
3123480093f4SDimitry Andric             (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
31240b57cec5SDimitry Andric            "Offset out of bounds for LDP/STP immediate");
31250b57cec5SDimitry Andric 
3126e8d8bef9SDimitry Andric     // Save the offset to frame record so that the FP register can point to the
3127e8d8bef9SDimitry Andric     // innermost frame record (spilled FP and LR registers).
31280fca6ea1SDimitry Andric     if (NeedsFrameRecord &&
31290fca6ea1SDimitry Andric         ((!IsWindows && RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
31300fca6ea1SDimitry Andric          (IsWindows && RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR)))
3131e8d8bef9SDimitry Andric       AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);
3132e8d8bef9SDimitry Andric 
31330b57cec5SDimitry Andric     RegPairs.push_back(RPI);
31340b57cec5SDimitry Andric     if (RPI.isPaired())
3135e8d8bef9SDimitry Andric       i += RegInc;
3136e8d8bef9SDimitry Andric   }
3137e8d8bef9SDimitry Andric   if (NeedsWinCFI) {
3138e8d8bef9SDimitry Andric     // If we need an alignment gap in the stack, align the topmost stack
3139e8d8bef9SDimitry Andric     // object. A stack frame with a gap looks like this, bottom up:
3140e8d8bef9SDimitry Andric     // x19, d8. d9, gap.
3141e8d8bef9SDimitry Andric     // Set extra alignment on the topmost stack object (the first element in
3142e8d8bef9SDimitry Andric     // CSI, which goes top down), to create the gap above it.
3143e8d8bef9SDimitry Andric     if (AFI->hasCalleeSaveStackFreeSpace())
3144e8d8bef9SDimitry Andric       MFI.setObjectAlignment(CSI[0].getFrameIdx(), Align(16));
3145e8d8bef9SDimitry Andric     // We iterated bottom up over the registers; flip RegPairs back to top
3146e8d8bef9SDimitry Andric     // down order.
3147e8d8bef9SDimitry Andric     std::reverse(RegPairs.begin(), RegPairs.end());
31480b57cec5SDimitry Andric   }
31490b57cec5SDimitry Andric }
31500b57cec5SDimitry Andric 
31510b57cec5SDimitry Andric bool AArch64FrameLowering::spillCalleeSavedRegisters(
31520b57cec5SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
31535ffd83dbSDimitry Andric     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
31540b57cec5SDimitry Andric   MachineFunction &MF = *MBB.getParent();
31550b57cec5SDimitry Andric   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
31560fca6ea1SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
31570b57cec5SDimitry Andric   bool NeedsWinCFI = needsWinCFI(MF);
31580b57cec5SDimitry Andric   DebugLoc DL;
31590b57cec5SDimitry Andric   SmallVector<RegPairInfo, 8> RegPairs;
31600b57cec5SDimitry Andric 
316181ad6265SDimitry Andric   computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
316281ad6265SDimitry Andric 
31630fca6ea1SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
31640fca6ea1SDimitry Andric   // Refresh the reserved regs in case there are any potential changes since the
31650fca6ea1SDimitry Andric   // last freeze.
31660fca6ea1SDimitry Andric   MRI.freezeReservedRegs();
31670fca6ea1SDimitry Andric 
3168fe6060f1SDimitry Andric   if (homogeneousPrologEpilog(MF)) {
3169fe6060f1SDimitry Andric     auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Prolog))
3170fe6060f1SDimitry Andric                    .setMIFlag(MachineInstr::FrameSetup);
3171fe6060f1SDimitry Andric 
3172fe6060f1SDimitry Andric     for (auto &RPI : RegPairs) {
3173fe6060f1SDimitry Andric       MIB.addReg(RPI.Reg1);
3174fe6060f1SDimitry Andric       MIB.addReg(RPI.Reg2);
3175fe6060f1SDimitry Andric 
3176fe6060f1SDimitry Andric       // Update register live in.
3177fe6060f1SDimitry Andric       if (!MRI.isReserved(RPI.Reg1))
3178fe6060f1SDimitry Andric         MBB.addLiveIn(RPI.Reg1);
31795f757f3fSDimitry Andric       if (RPI.isPaired() && !MRI.isReserved(RPI.Reg2))
3180fe6060f1SDimitry Andric         MBB.addLiveIn(RPI.Reg2);
3181fe6060f1SDimitry Andric     }
3182fe6060f1SDimitry Andric     return true;
3183fe6060f1SDimitry Andric   }
31840fca6ea1SDimitry Andric   bool PTrueCreated = false;
3185349cc55cSDimitry Andric   for (const RegPairInfo &RPI : llvm::reverse(RegPairs)) {
31860b57cec5SDimitry Andric     unsigned Reg1 = RPI.Reg1;
31870b57cec5SDimitry Andric     unsigned Reg2 = RPI.Reg2;
31880b57cec5SDimitry Andric     unsigned StrOpc;
31890b57cec5SDimitry Andric 
31900b57cec5SDimitry Andric     // Issue sequence of spills for cs regs.  The first spill may be converted
31910b57cec5SDimitry Andric     // to a pre-decrement store later by emitPrologue if the callee-save stack
31920b57cec5SDimitry Andric     // area allocation can't be combined with the local stack area allocation.
31930b57cec5SDimitry Andric     // For example:
31940b57cec5SDimitry Andric     //    stp     x22, x21, [sp, #0]     // addImm(+0)
31950b57cec5SDimitry Andric     //    stp     x20, x19, [sp, #16]    // addImm(+2)
31960b57cec5SDimitry Andric     //    stp     fp, lr, [sp, #32]      // addImm(+4)
31970b57cec5SDimitry Andric     // Rationale: This sequence saves uop updates compared to a sequence of
31980b57cec5SDimitry Andric     // pre-increment spills like stp xi,xj,[sp,#-16]!
31990b57cec5SDimitry Andric     // Note: Similar rationale and sequence for restores in epilog.
32005ffd83dbSDimitry Andric     unsigned Size;
32015ffd83dbSDimitry Andric     Align Alignment;
32020b57cec5SDimitry Andric     switch (RPI.Type) {
32030b57cec5SDimitry Andric     case RegPairInfo::GPR:
32040b57cec5SDimitry Andric       StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
32050b57cec5SDimitry Andric       Size = 8;
32065ffd83dbSDimitry Andric       Alignment = Align(8);
32070b57cec5SDimitry Andric       break;
32080b57cec5SDimitry Andric     case RegPairInfo::FPR64:
32090b57cec5SDimitry Andric       StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
32100b57cec5SDimitry Andric       Size = 8;
32115ffd83dbSDimitry Andric       Alignment = Align(8);
32120b57cec5SDimitry Andric       break;
32130b57cec5SDimitry Andric     case RegPairInfo::FPR128:
32140b57cec5SDimitry Andric       StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
32150b57cec5SDimitry Andric       Size = 16;
32165ffd83dbSDimitry Andric       Alignment = Align(16);
32170b57cec5SDimitry Andric       break;
3218480093f4SDimitry Andric     case RegPairInfo::ZPR:
32190fca6ea1SDimitry Andric       StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI;
3220480093f4SDimitry Andric       Size = 16;
32215ffd83dbSDimitry Andric       Alignment = Align(16);
3222480093f4SDimitry Andric       break;
3223480093f4SDimitry Andric     case RegPairInfo::PPR:
3224480093f4SDimitry Andric       StrOpc = AArch64::STR_PXI;
3225480093f4SDimitry Andric       Size = 2;
32265ffd83dbSDimitry Andric       Alignment = Align(2);
3227480093f4SDimitry Andric       break;
32280fca6ea1SDimitry Andric     case RegPairInfo::VG:
32290fca6ea1SDimitry Andric       StrOpc = AArch64::STRXui;
32300fca6ea1SDimitry Andric       Size = 8;
32310fca6ea1SDimitry Andric       Alignment = Align(8);
32320fca6ea1SDimitry Andric       break;
32330b57cec5SDimitry Andric     }
32340fca6ea1SDimitry Andric 
32350fca6ea1SDimitry Andric     unsigned X0Scratch = AArch64::NoRegister;
32360fca6ea1SDimitry Andric     if (Reg1 == AArch64::VG) {
32370fca6ea1SDimitry Andric       // Find an available register to store value of VG to.
32380fca6ea1SDimitry Andric       Reg1 = findScratchNonCalleeSaveRegister(&MBB);
32390fca6ea1SDimitry Andric       assert(Reg1 != AArch64::NoRegister);
32400fca6ea1SDimitry Andric       SMEAttrs Attrs(MF.getFunction());
32410fca6ea1SDimitry Andric 
32420fca6ea1SDimitry Andric       if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface() &&
32430fca6ea1SDimitry Andric           AFI->getStreamingVGIdx() == std::numeric_limits<int>::max()) {
32440fca6ea1SDimitry Andric         // For locally-streaming functions, we need to store both the streaming
32450fca6ea1SDimitry Andric         // & non-streaming VG. Spill the streaming value first.
32460fca6ea1SDimitry Andric         BuildMI(MBB, MI, DL, TII.get(AArch64::RDSVLI_XI), Reg1)
32470fca6ea1SDimitry Andric             .addImm(1)
32480fca6ea1SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
32490fca6ea1SDimitry Andric         BuildMI(MBB, MI, DL, TII.get(AArch64::UBFMXri), Reg1)
32500fca6ea1SDimitry Andric             .addReg(Reg1)
32510fca6ea1SDimitry Andric             .addImm(3)
32520fca6ea1SDimitry Andric             .addImm(63)
32530fca6ea1SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
32540fca6ea1SDimitry Andric 
32550fca6ea1SDimitry Andric         AFI->setStreamingVGIdx(RPI.FrameIdx);
32560fca6ea1SDimitry Andric       } else if (MF.getSubtarget<AArch64Subtarget>().hasSVE()) {
32570fca6ea1SDimitry Andric         BuildMI(MBB, MI, DL, TII.get(AArch64::CNTD_XPiI), Reg1)
32580fca6ea1SDimitry Andric             .addImm(31)
32590fca6ea1SDimitry Andric             .addImm(1)
32600fca6ea1SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
32610fca6ea1SDimitry Andric         AFI->setVGIdx(RPI.FrameIdx);
32620fca6ea1SDimitry Andric       } else {
32630fca6ea1SDimitry Andric         const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
32640fca6ea1SDimitry Andric         if (llvm::any_of(
32650fca6ea1SDimitry Andric                 MBB.liveins(),
32660fca6ea1SDimitry Andric                 [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
32670fca6ea1SDimitry Andric                   return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
32680fca6ea1SDimitry Andric                       AArch64::X0, LiveIn.PhysReg);
32690fca6ea1SDimitry Andric                 }))
32700fca6ea1SDimitry Andric           X0Scratch = Reg1;
32710fca6ea1SDimitry Andric 
32720fca6ea1SDimitry Andric         if (X0Scratch != AArch64::NoRegister)
32730fca6ea1SDimitry Andric           BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), Reg1)
32740fca6ea1SDimitry Andric               .addReg(AArch64::XZR)
32750fca6ea1SDimitry Andric               .addReg(AArch64::X0, RegState::Undef)
32760fca6ea1SDimitry Andric               .addReg(AArch64::X0, RegState::Implicit)
32770fca6ea1SDimitry Andric               .setMIFlag(MachineInstr::FrameSetup);
32780fca6ea1SDimitry Andric 
32790fca6ea1SDimitry Andric         const uint32_t *RegMask = TRI->getCallPreservedMask(
32800fca6ea1SDimitry Andric             MF,
32810fca6ea1SDimitry Andric             CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1);
32820fca6ea1SDimitry Andric         BuildMI(MBB, MI, DL, TII.get(AArch64::BL))
32830fca6ea1SDimitry Andric             .addExternalSymbol("__arm_get_current_vg")
32840fca6ea1SDimitry Andric             .addRegMask(RegMask)
32850fca6ea1SDimitry Andric             .addReg(AArch64::X0, RegState::ImplicitDefine)
32860fca6ea1SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
32870fca6ea1SDimitry Andric         Reg1 = AArch64::X0;
32880fca6ea1SDimitry Andric         AFI->setVGIdx(RPI.FrameIdx);
32890fca6ea1SDimitry Andric       }
32900fca6ea1SDimitry Andric     }
32910fca6ea1SDimitry Andric 
32920b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
32930b57cec5SDimitry Andric                if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
32940b57cec5SDimitry Andric                dbgs() << ") -> fi#(" << RPI.FrameIdx;
32950b57cec5SDimitry Andric                if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
32960b57cec5SDimitry Andric                dbgs() << ")\n");
32970b57cec5SDimitry Andric 
32980b57cec5SDimitry Andric     assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
32990b57cec5SDimitry Andric            "Windows unwdinding requires a consecutive (FP,LR) pair");
33000b57cec5SDimitry Andric     // Windows unwind codes require consecutive registers if registers are
33010b57cec5SDimitry Andric     // paired.  Make the switch here, so that the code below will save (x,x+1)
33020b57cec5SDimitry Andric     // and not (x+1,x).
33030b57cec5SDimitry Andric     unsigned FrameIdxReg1 = RPI.FrameIdx;
33040b57cec5SDimitry Andric     unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
33050b57cec5SDimitry Andric     if (NeedsWinCFI && RPI.isPaired()) {
33060b57cec5SDimitry Andric       std::swap(Reg1, Reg2);
33070b57cec5SDimitry Andric       std::swap(FrameIdxReg1, FrameIdxReg2);
33080b57cec5SDimitry Andric     }
33090fca6ea1SDimitry Andric 
33100fca6ea1SDimitry Andric     if (RPI.isPaired() && RPI.isScalable()) {
33110fca6ea1SDimitry Andric       [[maybe_unused]] const AArch64Subtarget &Subtarget =
33120fca6ea1SDimitry Andric                               MF.getSubtarget<AArch64Subtarget>();
33130fca6ea1SDimitry Andric       AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
33140fca6ea1SDimitry Andric       unsigned PnReg = AFI->getPredicateRegForFillSpill();
33150fca6ea1SDimitry Andric       assert(((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && PnReg != 0) &&
33160fca6ea1SDimitry Andric              "Expects SVE2.1 or SME2 target and a predicate register");
33170fca6ea1SDimitry Andric #ifdef EXPENSIVE_CHECKS
33180fca6ea1SDimitry Andric       auto IsPPR = [](const RegPairInfo &c) {
33190fca6ea1SDimitry Andric         return c.Reg1 == RegPairInfo::PPR;
33200fca6ea1SDimitry Andric       };
33210fca6ea1SDimitry Andric       auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
33220fca6ea1SDimitry Andric       auto IsZPR = [](const RegPairInfo &c) {
33230fca6ea1SDimitry Andric         return c.Type == RegPairInfo::ZPR;
33240fca6ea1SDimitry Andric       };
33250fca6ea1SDimitry Andric       auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
33260fca6ea1SDimitry Andric       assert(!(PPRBegin < ZPRBegin) &&
33270fca6ea1SDimitry Andric              "Expected callee save predicate to be handled first");
33280fca6ea1SDimitry Andric #endif
33290fca6ea1SDimitry Andric       if (!PTrueCreated) {
33300fca6ea1SDimitry Andric         PTrueCreated = true;
33310fca6ea1SDimitry Andric         BuildMI(MBB, MI, DL, TII.get(AArch64::PTRUE_C_B), PnReg)
33320fca6ea1SDimitry Andric             .setMIFlags(MachineInstr::FrameSetup);
33330fca6ea1SDimitry Andric       }
33340fca6ea1SDimitry Andric       MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
33350fca6ea1SDimitry Andric       if (!MRI.isReserved(Reg1))
33360fca6ea1SDimitry Andric         MBB.addLiveIn(Reg1);
33370fca6ea1SDimitry Andric       if (!MRI.isReserved(Reg2))
33380fca6ea1SDimitry Andric         MBB.addLiveIn(Reg2);
33390fca6ea1SDimitry Andric       MIB.addReg(/*PairRegs*/ AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0));
33400fca6ea1SDimitry Andric       MIB.addMemOperand(MF.getMachineMemOperand(
33410fca6ea1SDimitry Andric           MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
33420fca6ea1SDimitry Andric           MachineMemOperand::MOStore, Size, Alignment));
33430fca6ea1SDimitry Andric       MIB.addReg(PnReg);
33440fca6ea1SDimitry Andric       MIB.addReg(AArch64::SP)
33450fca6ea1SDimitry Andric           .addImm(RPI.Offset) // [sp, #offset*scale],
33460fca6ea1SDimitry Andric                               // where factor*scale is implicit
33470fca6ea1SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
33480fca6ea1SDimitry Andric       MIB.addMemOperand(MF.getMachineMemOperand(
33490fca6ea1SDimitry Andric           MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
33500fca6ea1SDimitry Andric           MachineMemOperand::MOStore, Size, Alignment));
33510fca6ea1SDimitry Andric       if (NeedsWinCFI)
33520fca6ea1SDimitry Andric         InsertSEH(MIB, TII, MachineInstr::FrameSetup);
33530fca6ea1SDimitry Andric     } else { // The code when the pair of ZReg is not present
33540b57cec5SDimitry Andric       MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
33550b57cec5SDimitry Andric       if (!MRI.isReserved(Reg1))
33560b57cec5SDimitry Andric         MBB.addLiveIn(Reg1);
33570b57cec5SDimitry Andric       if (RPI.isPaired()) {
33580b57cec5SDimitry Andric         if (!MRI.isReserved(Reg2))
33590b57cec5SDimitry Andric           MBB.addLiveIn(Reg2);
33600b57cec5SDimitry Andric         MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
33610b57cec5SDimitry Andric         MIB.addMemOperand(MF.getMachineMemOperand(
33620b57cec5SDimitry Andric             MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
33635ffd83dbSDimitry Andric             MachineMemOperand::MOStore, Size, Alignment));
33640b57cec5SDimitry Andric       }
33650b57cec5SDimitry Andric       MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
33660b57cec5SDimitry Andric           .addReg(AArch64::SP)
33670b57cec5SDimitry Andric           .addImm(RPI.Offset) // [sp, #offset*scale],
33680b57cec5SDimitry Andric                               // where factor*scale is implicit
33690b57cec5SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
33700b57cec5SDimitry Andric       MIB.addMemOperand(MF.getMachineMemOperand(
33710b57cec5SDimitry Andric           MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
33725ffd83dbSDimitry Andric           MachineMemOperand::MOStore, Size, Alignment));
33730b57cec5SDimitry Andric       if (NeedsWinCFI)
33740b57cec5SDimitry Andric         InsertSEH(MIB, TII, MachineInstr::FrameSetup);
33750fca6ea1SDimitry Andric     }
3376480093f4SDimitry Andric     // Update the StackIDs of the SVE stack slots.
3377480093f4SDimitry Andric     MachineFrameInfo &MFI = MF.getFrameInfo();
33780fca6ea1SDimitry Andric     if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR) {
33790fca6ea1SDimitry Andric       MFI.setStackID(FrameIdxReg1, TargetStackID::ScalableVector);
33800fca6ea1SDimitry Andric       if (RPI.isPaired())
33810fca6ea1SDimitry Andric         MFI.setStackID(FrameIdxReg2, TargetStackID::ScalableVector);
33820fca6ea1SDimitry Andric     }
3383480093f4SDimitry Andric 
33840fca6ea1SDimitry Andric     if (X0Scratch != AArch64::NoRegister)
33850fca6ea1SDimitry Andric       BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), AArch64::X0)
33860fca6ea1SDimitry Andric           .addReg(AArch64::XZR)
33870fca6ea1SDimitry Andric           .addReg(X0Scratch, RegState::Undef)
33880fca6ea1SDimitry Andric           .addReg(X0Scratch, RegState::Implicit)
33890fca6ea1SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
33900b57cec5SDimitry Andric   }
33910b57cec5SDimitry Andric   return true;
33920b57cec5SDimitry Andric }
33930b57cec5SDimitry Andric 
33940b57cec5SDimitry Andric bool AArch64FrameLowering::restoreCalleeSavedRegisters(
339581ad6265SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
33965ffd83dbSDimitry Andric     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
33970b57cec5SDimitry Andric   MachineFunction &MF = *MBB.getParent();
33980b57cec5SDimitry Andric   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
33990b57cec5SDimitry Andric   DebugLoc DL;
34000b57cec5SDimitry Andric   SmallVector<RegPairInfo, 8> RegPairs;
34010b57cec5SDimitry Andric   bool NeedsWinCFI = needsWinCFI(MF);
34020b57cec5SDimitry Andric 
340381ad6265SDimitry Andric   if (MBBI != MBB.end())
340481ad6265SDimitry Andric     DL = MBBI->getDebugLoc();
34050b57cec5SDimitry Andric 
340681ad6265SDimitry Andric   computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
34070fca6ea1SDimitry Andric   if (homogeneousPrologEpilog(MF, &MBB)) {
34080fca6ea1SDimitry Andric     auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog))
34090fca6ea1SDimitry Andric                    .setMIFlag(MachineInstr::FrameDestroy);
34100fca6ea1SDimitry Andric     for (auto &RPI : RegPairs) {
34110fca6ea1SDimitry Andric       MIB.addReg(RPI.Reg1, RegState::Define);
34120fca6ea1SDimitry Andric       MIB.addReg(RPI.Reg2, RegState::Define);
34130fca6ea1SDimitry Andric     }
34140fca6ea1SDimitry Andric     return true;
34150fca6ea1SDimitry Andric   }
34160b57cec5SDimitry Andric 
34170fca6ea1SDimitry Andric   // For performance reasons restore SVE register in increasing order
34180fca6ea1SDimitry Andric   auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };
34190fca6ea1SDimitry Andric   auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
34200fca6ea1SDimitry Andric   auto PPREnd = std::find_if_not(PPRBegin, RegPairs.end(), IsPPR);
34210fca6ea1SDimitry Andric   std::reverse(PPRBegin, PPREnd);
34220fca6ea1SDimitry Andric   auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; };
34230fca6ea1SDimitry Andric   auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
34240fca6ea1SDimitry Andric   auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.end(), IsZPR);
34250fca6ea1SDimitry Andric   std::reverse(ZPRBegin, ZPREnd);
34260fca6ea1SDimitry Andric 
34270fca6ea1SDimitry Andric   bool PTrueCreated = false;
34280fca6ea1SDimitry Andric   for (const RegPairInfo &RPI : RegPairs) {
34290b57cec5SDimitry Andric     unsigned Reg1 = RPI.Reg1;
34300b57cec5SDimitry Andric     unsigned Reg2 = RPI.Reg2;
34310b57cec5SDimitry Andric 
34320b57cec5SDimitry Andric     // Issue sequence of restores for cs regs. The last restore may be converted
34330b57cec5SDimitry Andric     // to a post-increment load later by emitEpilogue if the callee-save stack
34340b57cec5SDimitry Andric     // area allocation can't be combined with the local stack area allocation.
34350b57cec5SDimitry Andric     // For example:
34360b57cec5SDimitry Andric     //    ldp     fp, lr, [sp, #32]       // addImm(+4)
34370b57cec5SDimitry Andric     //    ldp     x20, x19, [sp, #16]     // addImm(+2)
34380b57cec5SDimitry Andric     //    ldp     x22, x21, [sp, #0]      // addImm(+0)
34390b57cec5SDimitry Andric     // Note: see comment in spillCalleeSavedRegisters()
34400b57cec5SDimitry Andric     unsigned LdrOpc;
34415ffd83dbSDimitry Andric     unsigned Size;
34425ffd83dbSDimitry Andric     Align Alignment;
34430b57cec5SDimitry Andric     switch (RPI.Type) {
34440b57cec5SDimitry Andric     case RegPairInfo::GPR:
34450b57cec5SDimitry Andric       LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
34460b57cec5SDimitry Andric       Size = 8;
34475ffd83dbSDimitry Andric       Alignment = Align(8);
34480b57cec5SDimitry Andric       break;
34490b57cec5SDimitry Andric     case RegPairInfo::FPR64:
34500b57cec5SDimitry Andric       LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
34510b57cec5SDimitry Andric       Size = 8;
34525ffd83dbSDimitry Andric       Alignment = Align(8);
34530b57cec5SDimitry Andric       break;
34540b57cec5SDimitry Andric     case RegPairInfo::FPR128:
34550b57cec5SDimitry Andric       LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
34560b57cec5SDimitry Andric       Size = 16;
34575ffd83dbSDimitry Andric       Alignment = Align(16);
34580b57cec5SDimitry Andric       break;
3459480093f4SDimitry Andric     case RegPairInfo::ZPR:
34600fca6ea1SDimitry Andric       LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI;
3461480093f4SDimitry Andric       Size = 16;
34625ffd83dbSDimitry Andric       Alignment = Align(16);
3463480093f4SDimitry Andric       break;
3464480093f4SDimitry Andric     case RegPairInfo::PPR:
3465480093f4SDimitry Andric       LdrOpc = AArch64::LDR_PXI;
3466480093f4SDimitry Andric       Size = 2;
34675ffd83dbSDimitry Andric       Alignment = Align(2);
3468480093f4SDimitry Andric       break;
34690fca6ea1SDimitry Andric     case RegPairInfo::VG:
34700fca6ea1SDimitry Andric       continue;
34710b57cec5SDimitry Andric     }
34720b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
34730b57cec5SDimitry Andric                if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
34740b57cec5SDimitry Andric                dbgs() << ") -> fi#(" << RPI.FrameIdx;
34750b57cec5SDimitry Andric                if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
34760b57cec5SDimitry Andric                dbgs() << ")\n");
34770b57cec5SDimitry Andric 
34780b57cec5SDimitry Andric     // Windows unwind codes require consecutive registers if registers are
34790b57cec5SDimitry Andric     // paired.  Make the switch here, so that the code below will save (x,x+1)
34800b57cec5SDimitry Andric     // and not (x+1,x).
34810b57cec5SDimitry Andric     unsigned FrameIdxReg1 = RPI.FrameIdx;
34820b57cec5SDimitry Andric     unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
34830b57cec5SDimitry Andric     if (NeedsWinCFI && RPI.isPaired()) {
34840b57cec5SDimitry Andric       std::swap(Reg1, Reg2);
34850b57cec5SDimitry Andric       std::swap(FrameIdxReg1, FrameIdxReg2);
34860b57cec5SDimitry Andric     }
34870fca6ea1SDimitry Andric 
34880fca6ea1SDimitry Andric     AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
34890fca6ea1SDimitry Andric     if (RPI.isPaired() && RPI.isScalable()) {
34900fca6ea1SDimitry Andric       [[maybe_unused]] const AArch64Subtarget &Subtarget =
34910fca6ea1SDimitry Andric                               MF.getSubtarget<AArch64Subtarget>();
34920fca6ea1SDimitry Andric       unsigned PnReg = AFI->getPredicateRegForFillSpill();
34930fca6ea1SDimitry Andric       assert(((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && PnReg != 0) &&
34940fca6ea1SDimitry Andric              "Expects SVE2.1 or SME2 target and a predicate register");
34950fca6ea1SDimitry Andric #ifdef EXPENSIVE_CHECKS
34960fca6ea1SDimitry Andric       assert(!(PPRBegin < ZPRBegin) &&
34970fca6ea1SDimitry Andric              "Expected callee save predicate to be handled first");
34980fca6ea1SDimitry Andric #endif
34990fca6ea1SDimitry Andric       if (!PTrueCreated) {
35000fca6ea1SDimitry Andric         PTrueCreated = true;
35010fca6ea1SDimitry Andric         BuildMI(MBB, MBBI, DL, TII.get(AArch64::PTRUE_C_B), PnReg)
35020fca6ea1SDimitry Andric             .setMIFlags(MachineInstr::FrameDestroy);
35030fca6ea1SDimitry Andric       }
350481ad6265SDimitry Andric       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc));
35050fca6ea1SDimitry Andric       MIB.addReg(/*PairRegs*/ AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0),
35060fca6ea1SDimitry Andric                  getDefRegState(true));
35070b57cec5SDimitry Andric       MIB.addMemOperand(MF.getMachineMemOperand(
35080b57cec5SDimitry Andric           MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
35095ffd83dbSDimitry Andric           MachineMemOperand::MOLoad, Size, Alignment));
35100fca6ea1SDimitry Andric       MIB.addReg(PnReg);
35110fca6ea1SDimitry Andric       MIB.addReg(AArch64::SP)
35120b57cec5SDimitry Andric           .addImm(RPI.Offset) // [sp, #offset*scale]
35130b57cec5SDimitry Andric                               // where factor*scale is implicit
35140b57cec5SDimitry Andric           .setMIFlag(MachineInstr::FrameDestroy);
35150b57cec5SDimitry Andric       MIB.addMemOperand(MF.getMachineMemOperand(
35160b57cec5SDimitry Andric           MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
35175ffd83dbSDimitry Andric           MachineMemOperand::MOLoad, Size, Alignment));
35180b57cec5SDimitry Andric       if (NeedsWinCFI)
35190b57cec5SDimitry Andric         InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
352081ad6265SDimitry Andric     } else {
35210fca6ea1SDimitry Andric       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc));
35220fca6ea1SDimitry Andric       if (RPI.isPaired()) {
35230fca6ea1SDimitry Andric         MIB.addReg(Reg2, getDefRegState(true));
35240fca6ea1SDimitry Andric         MIB.addMemOperand(MF.getMachineMemOperand(
35250fca6ea1SDimitry Andric             MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
35260fca6ea1SDimitry Andric             MachineMemOperand::MOLoad, Size, Alignment));
35270fca6ea1SDimitry Andric       }
35280fca6ea1SDimitry Andric       MIB.addReg(Reg1, getDefRegState(true));
35290fca6ea1SDimitry Andric       MIB.addReg(AArch64::SP)
35300fca6ea1SDimitry Andric           .addImm(RPI.Offset) // [sp, #offset*scale]
35310fca6ea1SDimitry Andric                               // where factor*scale is implicit
35320fca6ea1SDimitry Andric           .setMIFlag(MachineInstr::FrameDestroy);
35330fca6ea1SDimitry Andric       MIB.addMemOperand(MF.getMachineMemOperand(
35340fca6ea1SDimitry Andric           MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
35350fca6ea1SDimitry Andric           MachineMemOperand::MOLoad, Size, Alignment));
35360fca6ea1SDimitry Andric       if (NeedsWinCFI)
35370fca6ea1SDimitry Andric         InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
35380fca6ea1SDimitry Andric     }
35390fca6ea1SDimitry Andric   }
35400fca6ea1SDimitry Andric   return true;
35410fca6ea1SDimitry Andric }
35420fca6ea1SDimitry Andric 
3543*62987288SDimitry Andric // Return the FrameID for a MMO.
3544*62987288SDimitry Andric static std::optional<int> getMMOFrameID(MachineMemOperand *MMO,
35450fca6ea1SDimitry Andric                                         const MachineFrameInfo &MFI) {
35460fca6ea1SDimitry Andric   auto *PSV =
35470fca6ea1SDimitry Andric       dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->getPseudoValue());
35480fca6ea1SDimitry Andric   if (PSV)
35490fca6ea1SDimitry Andric     return std::optional<int>(PSV->getFrameIndex());
35500fca6ea1SDimitry Andric 
35510fca6ea1SDimitry Andric   if (MMO->getValue()) {
35520fca6ea1SDimitry Andric     if (auto *Al = dyn_cast<AllocaInst>(getUnderlyingObject(MMO->getValue()))) {
35530fca6ea1SDimitry Andric       for (int FI = MFI.getObjectIndexBegin(); FI < MFI.getObjectIndexEnd();
35540fca6ea1SDimitry Andric            FI++)
35550fca6ea1SDimitry Andric         if (MFI.getObjectAllocation(FI) == Al)
35560fca6ea1SDimitry Andric           return FI;
355781ad6265SDimitry Andric     }
35580b57cec5SDimitry Andric   }
35590b57cec5SDimitry Andric 
35600fca6ea1SDimitry Andric   return std::nullopt;
35610fca6ea1SDimitry Andric }
35620fca6ea1SDimitry Andric 
3563*62987288SDimitry Andric // Return the FrameID for a Load/Store instruction by looking at the first MMO.
3564*62987288SDimitry Andric static std::optional<int> getLdStFrameID(const MachineInstr &MI,
3565*62987288SDimitry Andric                                          const MachineFrameInfo &MFI) {
3566*62987288SDimitry Andric   if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)
3567*62987288SDimitry Andric     return std::nullopt;
3568*62987288SDimitry Andric 
3569*62987288SDimitry Andric   return getMMOFrameID(*MI.memoperands_begin(), MFI);
3570*62987288SDimitry Andric }
3571*62987288SDimitry Andric 
35720fca6ea1SDimitry Andric // Check if a Hazard slot is needed for the current function, and if so create
35730fca6ea1SDimitry Andric // one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex,
35740fca6ea1SDimitry Andric // which can be used to determine if any hazard padding is needed.
35750fca6ea1SDimitry Andric void AArch64FrameLowering::determineStackHazardSlot(
35760fca6ea1SDimitry Andric     MachineFunction &MF, BitVector &SavedRegs) const {
35770fca6ea1SDimitry Andric   if (StackHazardSize == 0 || StackHazardSize % 16 != 0 ||
35780fca6ea1SDimitry Andric       MF.getInfo<AArch64FunctionInfo>()->hasStackHazardSlotIndex())
35790fca6ea1SDimitry Andric     return;
35800fca6ea1SDimitry Andric 
35810fca6ea1SDimitry Andric   // Stack hazards are only needed in streaming functions.
35820fca6ea1SDimitry Andric   SMEAttrs Attrs(MF.getFunction());
35830fca6ea1SDimitry Andric   if (!StackHazardInNonStreaming && Attrs.hasNonStreamingInterfaceAndBody())
35840fca6ea1SDimitry Andric     return;
35850fca6ea1SDimitry Andric 
35860fca6ea1SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
35870fca6ea1SDimitry Andric 
35880fca6ea1SDimitry Andric   // Add a hazard slot if there are any CSR FPR registers, or are any fp-only
35890fca6ea1SDimitry Andric   // stack objects.
35900fca6ea1SDimitry Andric   bool HasFPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {
35910fca6ea1SDimitry Andric     return AArch64::FPR64RegClass.contains(Reg) ||
35920fca6ea1SDimitry Andric            AArch64::FPR128RegClass.contains(Reg) ||
35930fca6ea1SDimitry Andric            AArch64::ZPRRegClass.contains(Reg) ||
35940fca6ea1SDimitry Andric            AArch64::PPRRegClass.contains(Reg);
35950fca6ea1SDimitry Andric   });
35960fca6ea1SDimitry Andric   bool HasFPRStackObjects = false;
35970fca6ea1SDimitry Andric   if (!HasFPRCSRs) {
35980fca6ea1SDimitry Andric     std::vector<unsigned> FrameObjects(MFI.getObjectIndexEnd());
35990fca6ea1SDimitry Andric     for (auto &MBB : MF) {
36000fca6ea1SDimitry Andric       for (auto &MI : MBB) {
36010fca6ea1SDimitry Andric         std::optional<int> FI = getLdStFrameID(MI, MFI);
36020fca6ea1SDimitry Andric         if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {
36030fca6ea1SDimitry Andric           if (MFI.getStackID(*FI) == TargetStackID::ScalableVector ||
36040fca6ea1SDimitry Andric               AArch64InstrInfo::isFpOrNEON(MI))
36050fca6ea1SDimitry Andric             FrameObjects[*FI] |= 2;
36060fca6ea1SDimitry Andric           else
36070fca6ea1SDimitry Andric             FrameObjects[*FI] |= 1;
36080fca6ea1SDimitry Andric         }
36090fca6ea1SDimitry Andric       }
36100fca6ea1SDimitry Andric     }
36110fca6ea1SDimitry Andric     HasFPRStackObjects =
36120fca6ea1SDimitry Andric         any_of(FrameObjects, [](unsigned B) { return (B & 3) == 2; });
36130fca6ea1SDimitry Andric   }
36140fca6ea1SDimitry Andric 
36150fca6ea1SDimitry Andric   if (HasFPRCSRs || HasFPRStackObjects) {
36160fca6ea1SDimitry Andric     int ID = MFI.CreateStackObject(StackHazardSize, Align(16), false);
36170fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "Created Hazard slot at " << ID << " size "
36180fca6ea1SDimitry Andric                       << StackHazardSize << "\n");
36190fca6ea1SDimitry Andric     MF.getInfo<AArch64FunctionInfo>()->setStackHazardSlotIndex(ID);
36200fca6ea1SDimitry Andric   }
36210b57cec5SDimitry Andric }
36220b57cec5SDimitry Andric 
36230b57cec5SDimitry Andric void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
36240b57cec5SDimitry Andric                                                 BitVector &SavedRegs,
36250b57cec5SDimitry Andric                                                 RegScavenger *RS) const {
36260b57cec5SDimitry Andric   // All calls are tail calls in GHC calling conv, and functions have no
36270b57cec5SDimitry Andric   // prologue/epilogue.
36280b57cec5SDimitry Andric   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
36290b57cec5SDimitry Andric     return;
36300b57cec5SDimitry Andric 
36310b57cec5SDimitry Andric   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
36320b57cec5SDimitry Andric   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
36330b57cec5SDimitry Andric       MF.getSubtarget().getRegisterInfo());
36345ffd83dbSDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
36350b57cec5SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
36360b57cec5SDimitry Andric   unsigned UnspilledCSGPR = AArch64::NoRegister;
36370b57cec5SDimitry Andric   unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
36380b57cec5SDimitry Andric 
36390b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
36400b57cec5SDimitry Andric   const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
36410b57cec5SDimitry Andric 
36420b57cec5SDimitry Andric   unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
36430b57cec5SDimitry Andric                                 ? RegInfo->getBaseRegister()
36440b57cec5SDimitry Andric                                 : (unsigned)AArch64::NoRegister;
36450b57cec5SDimitry Andric 
36460b57cec5SDimitry Andric   unsigned ExtraCSSpill = 0;
36475f757f3fSDimitry Andric   bool HasUnpairedGPR64 = false;
36480fca6ea1SDimitry Andric   bool HasPairZReg = false;
36490b57cec5SDimitry Andric   // Figure out which callee-saved registers to save/restore.
36500b57cec5SDimitry Andric   for (unsigned i = 0; CSRegs[i]; ++i) {
36510b57cec5SDimitry Andric     const unsigned Reg = CSRegs[i];
36520b57cec5SDimitry Andric 
36530b57cec5SDimitry Andric     // Add the base pointer register to SavedRegs if it is callee-save.
36540b57cec5SDimitry Andric     if (Reg == BasePointerReg)
36550b57cec5SDimitry Andric       SavedRegs.set(Reg);
36560b57cec5SDimitry Andric 
36570b57cec5SDimitry Andric     bool RegUsed = SavedRegs.test(Reg);
3658480093f4SDimitry Andric     unsigned PairedReg = AArch64::NoRegister;
36595f757f3fSDimitry Andric     const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
36605f757f3fSDimitry Andric     if (RegIsGPR64 || AArch64::FPR64RegClass.contains(Reg) ||
36615f757f3fSDimitry Andric         AArch64::FPR128RegClass.contains(Reg)) {
36625f757f3fSDimitry Andric       // Compensate for odd numbers of GP CSRs.
36635f757f3fSDimitry Andric       // For now, all the known cases of odd number of CSRs are of GPRs.
36645f757f3fSDimitry Andric       if (HasUnpairedGPR64)
36655f757f3fSDimitry Andric         PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];
36665f757f3fSDimitry Andric       else
3667480093f4SDimitry Andric         PairedReg = CSRegs[i ^ 1];
36685f757f3fSDimitry Andric     }
36695f757f3fSDimitry Andric 
36705f757f3fSDimitry Andric     // If the function requires all the GP registers to save (SavedRegs),
36715f757f3fSDimitry Andric     // and there are an odd number of GP CSRs at the same time (CSRegs),
36725f757f3fSDimitry Andric     // PairedReg could be in a different register class from Reg, which would
36735f757f3fSDimitry Andric     // lead to a FPR (usually D8) accidentally being marked saved.
36745f757f3fSDimitry Andric     if (RegIsGPR64 && !AArch64::GPR64RegClass.contains(PairedReg)) {
36755f757f3fSDimitry Andric       PairedReg = AArch64::NoRegister;
36765f757f3fSDimitry Andric       HasUnpairedGPR64 = true;
36775f757f3fSDimitry Andric     }
36785f757f3fSDimitry Andric     assert(PairedReg == AArch64::NoRegister ||
36795f757f3fSDimitry Andric            AArch64::GPR64RegClass.contains(Reg, PairedReg) ||
36805f757f3fSDimitry Andric            AArch64::FPR64RegClass.contains(Reg, PairedReg) ||
36815f757f3fSDimitry Andric            AArch64::FPR128RegClass.contains(Reg, PairedReg));
3682480093f4SDimitry Andric 
36830b57cec5SDimitry Andric     if (!RegUsed) {
36840b57cec5SDimitry Andric       if (AArch64::GPR64RegClass.contains(Reg) &&
36850b57cec5SDimitry Andric           !RegInfo->isReservedReg(MF, Reg)) {
36860b57cec5SDimitry Andric         UnspilledCSGPR = Reg;
36870b57cec5SDimitry Andric         UnspilledCSGPRPaired = PairedReg;
36880b57cec5SDimitry Andric       }
36890b57cec5SDimitry Andric       continue;
36900b57cec5SDimitry Andric     }
36910b57cec5SDimitry Andric 
36920b57cec5SDimitry Andric     // MachO's compact unwind format relies on all registers being stored in
36930b57cec5SDimitry Andric     // pairs.
36940b57cec5SDimitry Andric     // FIXME: the usual format is actually better if unwinding isn't needed.
3695fe6060f1SDimitry Andric     if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
36960b57cec5SDimitry Andric         !SavedRegs.test(PairedReg)) {
36970b57cec5SDimitry Andric       SavedRegs.set(PairedReg);
36980b57cec5SDimitry Andric       if (AArch64::GPR64RegClass.contains(PairedReg) &&
36990b57cec5SDimitry Andric           !RegInfo->isReservedReg(MF, PairedReg))
37000b57cec5SDimitry Andric         ExtraCSSpill = PairedReg;
37010b57cec5SDimitry Andric     }
37020fca6ea1SDimitry Andric     // Check if there is a pair of ZRegs, so it can select PReg for spill/fill
37030fca6ea1SDimitry Andric     HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&
37040fca6ea1SDimitry Andric                     SavedRegs.test(CSRegs[i ^ 1]));
37050fca6ea1SDimitry Andric   }
37060fca6ea1SDimitry Andric 
37070fca6ea1SDimitry Andric   if (HasPairZReg && (Subtarget.hasSVE2p1() || Subtarget.hasSME2())) {
37080fca6ea1SDimitry Andric     AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
37090fca6ea1SDimitry Andric     // Find a suitable predicate register for the multi-vector spill/fill
37100fca6ea1SDimitry Andric     // instructions.
37110fca6ea1SDimitry Andric     unsigned PnReg = findFreePredicateReg(SavedRegs);
37120fca6ea1SDimitry Andric     if (PnReg != AArch64::NoRegister)
37130fca6ea1SDimitry Andric       AFI->setPredicateRegForFillSpill(PnReg);
37140fca6ea1SDimitry Andric     // If no free callee-save has been found assign one.
37150fca6ea1SDimitry Andric     if (!AFI->getPredicateRegForFillSpill() &&
37160fca6ea1SDimitry Andric         MF.getFunction().getCallingConv() ==
37170fca6ea1SDimitry Andric             CallingConv::AArch64_SVE_VectorCall) {
37180fca6ea1SDimitry Andric       SavedRegs.set(AArch64::P8);
37190fca6ea1SDimitry Andric       AFI->setPredicateRegForFillSpill(AArch64::PN8);
37200fca6ea1SDimitry Andric     }
37210fca6ea1SDimitry Andric 
37220fca6ea1SDimitry Andric     assert(!RegInfo->isReservedReg(MF, AFI->getPredicateRegForFillSpill()) &&
37230fca6ea1SDimitry Andric            "Predicate cannot be a reserved register");
37240b57cec5SDimitry Andric   }
37250b57cec5SDimitry Andric 
37265ffd83dbSDimitry Andric   if (MF.getFunction().getCallingConv() == CallingConv::Win64 &&
37275ffd83dbSDimitry Andric       !Subtarget.isTargetWindows()) {
37285ffd83dbSDimitry Andric     // For Windows calling convention on a non-windows OS, where X18 is treated
37295ffd83dbSDimitry Andric     // as reserved, back up X18 when entering non-windows code (marked with the
37305ffd83dbSDimitry Andric     // Windows calling convention) and restore when returning regardless of
37315ffd83dbSDimitry Andric     // whether the individual function uses it - it might call other functions
37325ffd83dbSDimitry Andric     // that clobber it.
37335ffd83dbSDimitry Andric     SavedRegs.set(AArch64::X18);
37345ffd83dbSDimitry Andric   }
37355ffd83dbSDimitry Andric 
37360b57cec5SDimitry Andric   // Calculates the callee saved stack size.
37370b57cec5SDimitry Andric   unsigned CSStackSize = 0;
3738480093f4SDimitry Andric   unsigned SVECSStackSize = 0;
37390b57cec5SDimitry Andric   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
37400b57cec5SDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
3741480093f4SDimitry Andric   for (unsigned Reg : SavedRegs.set_bits()) {
3742480093f4SDimitry Andric     auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8;
3743480093f4SDimitry Andric     if (AArch64::PPRRegClass.contains(Reg) ||
3744480093f4SDimitry Andric         AArch64::ZPRRegClass.contains(Reg))
3745480093f4SDimitry Andric       SVECSStackSize += RegSize;
3746480093f4SDimitry Andric     else
3747480093f4SDimitry Andric       CSStackSize += RegSize;
3748480093f4SDimitry Andric   }
37490b57cec5SDimitry Andric 
37500fca6ea1SDimitry Andric   // Increase the callee-saved stack size if the function has streaming mode
37510fca6ea1SDimitry Andric   // changes, as we will need to spill the value of the VG register.
37520fca6ea1SDimitry Andric   // For locally streaming functions, we spill both the streaming and
37530fca6ea1SDimitry Andric   // non-streaming VG value.
37540fca6ea1SDimitry Andric   const Function &F = MF.getFunction();
37550fca6ea1SDimitry Andric   SMEAttrs Attrs(F);
37560fca6ea1SDimitry Andric   if (AFI->hasStreamingModeChanges()) {
37570fca6ea1SDimitry Andric     if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
37580fca6ea1SDimitry Andric       CSStackSize += 16;
37590fca6ea1SDimitry Andric     else
37600fca6ea1SDimitry Andric       CSStackSize += 8;
37610fca6ea1SDimitry Andric   }
37620fca6ea1SDimitry Andric 
37630fca6ea1SDimitry Andric   // Determine if a Hazard slot should be used, and increase the CSStackSize by
37640fca6ea1SDimitry Andric   // StackHazardSize if so.
37650fca6ea1SDimitry Andric   determineStackHazardSlot(MF, SavedRegs);
37660fca6ea1SDimitry Andric   if (AFI->hasStackHazardSlotIndex())
37670fca6ea1SDimitry Andric     CSStackSize += StackHazardSize;
37680fca6ea1SDimitry Andric 
37690b57cec5SDimitry Andric   // Save number of saved regs, so we can easily update CSStackSize later.
37700b57cec5SDimitry Andric   unsigned NumSavedRegs = SavedRegs.count();
37710b57cec5SDimitry Andric 
37720b57cec5SDimitry Andric   // The frame record needs to be created by saving the appropriate registers
3773480093f4SDimitry Andric   uint64_t EstimatedStackSize = MFI.estimateStackSize(MF);
37740b57cec5SDimitry Andric   if (hasFP(MF) ||
37750b57cec5SDimitry Andric       windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
37760b57cec5SDimitry Andric     SavedRegs.set(AArch64::FP);
37770b57cec5SDimitry Andric     SavedRegs.set(AArch64::LR);
37780b57cec5SDimitry Andric   }
37790b57cec5SDimitry Andric 
37800fca6ea1SDimitry Andric   LLVM_DEBUG({
37810fca6ea1SDimitry Andric     dbgs() << "*** determineCalleeSaves\nSaved CSRs:";
37820fca6ea1SDimitry Andric     for (unsigned Reg : SavedRegs.set_bits())
37830fca6ea1SDimitry Andric       dbgs() << ' ' << printReg(Reg, RegInfo);
37840fca6ea1SDimitry Andric     dbgs() << "\n";
37850fca6ea1SDimitry Andric   });
37860b57cec5SDimitry Andric 
37870b57cec5SDimitry Andric   // If any callee-saved registers are used, the frame cannot be eliminated.
37888bcb0991SDimitry Andric   int64_t SVEStackSize =
3789480093f4SDimitry Andric       alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
37908bcb0991SDimitry Andric   bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
37910b57cec5SDimitry Andric 
37920b57cec5SDimitry Andric   // The CSR spill slots have not been allocated yet, so estimateStackSize
37930b57cec5SDimitry Andric   // won't include them.
37940b57cec5SDimitry Andric   unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
37958bcb0991SDimitry Andric 
379606c3fb27SDimitry Andric   // We may address some of the stack above the canonical frame address, either
379706c3fb27SDimitry Andric   // for our own arguments or during a call. Include that in calculating whether
379806c3fb27SDimitry Andric   // we have complicated addressing concerns.
379906c3fb27SDimitry Andric   int64_t CalleeStackUsed = 0;
380006c3fb27SDimitry Andric   for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) {
380106c3fb27SDimitry Andric     int64_t FixedOff = MFI.getObjectOffset(I);
38020fca6ea1SDimitry Andric     if (FixedOff > CalleeStackUsed)
38030fca6ea1SDimitry Andric       CalleeStackUsed = FixedOff;
380406c3fb27SDimitry Andric   }
380506c3fb27SDimitry Andric 
38068bcb0991SDimitry Andric   // Conservatively always assume BigStack when there are SVE spills.
380706c3fb27SDimitry Andric   bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize +
380806c3fb27SDimitry Andric                                    CalleeStackUsed) > EstimatedStackSizeLimit;
38090b57cec5SDimitry Andric   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
38100b57cec5SDimitry Andric     AFI->setHasStackFrame(true);
38110b57cec5SDimitry Andric 
38120b57cec5SDimitry Andric   // Estimate if we might need to scavenge a register at some point in order
38130b57cec5SDimitry Andric   // to materialize a stack offset. If so, either spill one additional
38140b57cec5SDimitry Andric   // callee-saved register or reserve a special spill slot to facilitate
38150b57cec5SDimitry Andric   // register scavenging. If we already spilled an extra callee-saved register
38160b57cec5SDimitry Andric   // above to keep the number of spills even, we don't need to do anything else
38170b57cec5SDimitry Andric   // here.
38180b57cec5SDimitry Andric   if (BigStack) {
38190b57cec5SDimitry Andric     if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
38200b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
38210b57cec5SDimitry Andric                         << " to get a scratch register.\n");
38220b57cec5SDimitry Andric       SavedRegs.set(UnspilledCSGPR);
38235f757f3fSDimitry Andric       ExtraCSSpill = UnspilledCSGPR;
38245f757f3fSDimitry Andric 
38250b57cec5SDimitry Andric       // MachO's compact unwind format relies on all registers being stored in
38260b57cec5SDimitry Andric       // pairs, so if we need to spill one extra for BigStack, then we need to
38270b57cec5SDimitry Andric       // store the pair.
38285f757f3fSDimitry Andric       if (producePairRegisters(MF)) {
38295f757f3fSDimitry Andric         if (UnspilledCSGPRPaired == AArch64::NoRegister) {
38305f757f3fSDimitry Andric           // Failed to make a pair for compact unwind format, revert spilling.
38315f757f3fSDimitry Andric           if (produceCompactUnwindFrame(MF)) {
38325f757f3fSDimitry Andric             SavedRegs.reset(UnspilledCSGPR);
38335f757f3fSDimitry Andric             ExtraCSSpill = AArch64::NoRegister;
38345f757f3fSDimitry Andric           }
38355f757f3fSDimitry Andric         } else
38360b57cec5SDimitry Andric           SavedRegs.set(UnspilledCSGPRPaired);
38375f757f3fSDimitry Andric       }
38380b57cec5SDimitry Andric     }
38390b57cec5SDimitry Andric 
38400b57cec5SDimitry Andric     // If we didn't find an extra callee-saved register to spill, create
38410b57cec5SDimitry Andric     // an emergency spill slot.
38420b57cec5SDimitry Andric     if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
38430b57cec5SDimitry Andric       const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
38440b57cec5SDimitry Andric       const TargetRegisterClass &RC = AArch64::GPR64RegClass;
38450b57cec5SDimitry Andric       unsigned Size = TRI->getSpillSize(RC);
38465ffd83dbSDimitry Andric       Align Alignment = TRI->getSpillAlign(RC);
38475ffd83dbSDimitry Andric       int FI = MFI.CreateStackObject(Size, Alignment, false);
38480b57cec5SDimitry Andric       RS->addScavengingFrameIndex(FI);
38490b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
38500b57cec5SDimitry Andric                         << " as the emergency spill slot.\n");
38510b57cec5SDimitry Andric     }
38520b57cec5SDimitry Andric   }
38530b57cec5SDimitry Andric 
38540b57cec5SDimitry Andric   // Adding the size of additional 64bit GPR saves.
38550b57cec5SDimitry Andric   CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
3856fe6060f1SDimitry Andric 
3857fe6060f1SDimitry Andric   // A Swift asynchronous context extends the frame record with a pointer
3858fe6060f1SDimitry Andric   // directly before FP.
3859fe6060f1SDimitry Andric   if (hasFP(MF) && AFI->hasSwiftAsyncContext())
3860fe6060f1SDimitry Andric     CSStackSize += 8;
3861fe6060f1SDimitry Andric 
3862480093f4SDimitry Andric   uint64_t AlignedCSStackSize = alignTo(CSStackSize, 16);
38630b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
38640fca6ea1SDimitry Andric                     << EstimatedStackSize + AlignedCSStackSize << " bytes.\n");
38650b57cec5SDimitry Andric 
3866480093f4SDimitry Andric   assert((!MFI.isCalleeSavedInfoValid() ||
3867480093f4SDimitry Andric           AFI->getCalleeSavedStackSize() == AlignedCSStackSize) &&
3868480093f4SDimitry Andric          "Should not invalidate callee saved info");
3869480093f4SDimitry Andric 
38700b57cec5SDimitry Andric   // Round up to register pair alignment to avoid additional SP adjustment
38710b57cec5SDimitry Andric   // instructions.
38720b57cec5SDimitry Andric   AFI->setCalleeSavedStackSize(AlignedCSStackSize);
38730b57cec5SDimitry Andric   AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
3874480093f4SDimitry Andric   AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
38750b57cec5SDimitry Andric }
38760b57cec5SDimitry Andric 
3877e8d8bef9SDimitry Andric bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
3878fe6060f1SDimitry Andric     MachineFunction &MF, const TargetRegisterInfo *RegInfo,
3879fe6060f1SDimitry Andric     std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
3880fe6060f1SDimitry Andric     unsigned &MaxCSFrameIndex) const {
3881e8d8bef9SDimitry Andric   bool NeedsWinCFI = needsWinCFI(MF);
3882e8d8bef9SDimitry Andric   // To match the canonical windows frame layout, reverse the list of
3883e8d8bef9SDimitry Andric   // callee saved registers to get them laid out by PrologEpilogInserter
3884e8d8bef9SDimitry Andric   // in the right order. (PrologEpilogInserter allocates stack objects top
3885e8d8bef9SDimitry Andric   // down. Windows canonical prologs store higher numbered registers at
3886e8d8bef9SDimitry Andric   // the top, thus have the CSI array start from the highest registers.)
3887e8d8bef9SDimitry Andric   if (NeedsWinCFI)
3888e8d8bef9SDimitry Andric     std::reverse(CSI.begin(), CSI.end());
3889fe6060f1SDimitry Andric 
3890fe6060f1SDimitry Andric   if (CSI.empty())
3891fe6060f1SDimitry Andric     return true; // Early exit if no callee saved registers are modified!
3892fe6060f1SDimitry Andric 
3893fe6060f1SDimitry Andric   // Now that we know which registers need to be saved and restored, allocate
3894fe6060f1SDimitry Andric   // stack slots for them.
3895fe6060f1SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
3896fe6060f1SDimitry Andric   auto *AFI = MF.getInfo<AArch64FunctionInfo>();
389781ad6265SDimitry Andric 
389881ad6265SDimitry Andric   bool UsesWinAAPCS = isTargetWindows(MF);
389981ad6265SDimitry Andric   if (UsesWinAAPCS && hasFP(MF) && AFI->hasSwiftAsyncContext()) {
390081ad6265SDimitry Andric     int FrameIdx = MFI.CreateStackObject(8, Align(16), true);
390181ad6265SDimitry Andric     AFI->setSwiftAsyncContextFrameIdx(FrameIdx);
39020fca6ea1SDimitry Andric     if ((unsigned)FrameIdx < MinCSFrameIndex)
39030fca6ea1SDimitry Andric       MinCSFrameIndex = FrameIdx;
39040fca6ea1SDimitry Andric     if ((unsigned)FrameIdx > MaxCSFrameIndex)
39050fca6ea1SDimitry Andric       MaxCSFrameIndex = FrameIdx;
390681ad6265SDimitry Andric   }
390781ad6265SDimitry Andric 
39080fca6ea1SDimitry Andric   // Insert VG into the list of CSRs, immediately before LR if saved.
39090fca6ea1SDimitry Andric   if (AFI->hasStreamingModeChanges()) {
39100fca6ea1SDimitry Andric     std::vector<CalleeSavedInfo> VGSaves;
39110fca6ea1SDimitry Andric     SMEAttrs Attrs(MF.getFunction());
39120fca6ea1SDimitry Andric 
39130fca6ea1SDimitry Andric     auto VGInfo = CalleeSavedInfo(AArch64::VG);
39140fca6ea1SDimitry Andric     VGInfo.setRestored(false);
39150fca6ea1SDimitry Andric     VGSaves.push_back(VGInfo);
39160fca6ea1SDimitry Andric 
39170fca6ea1SDimitry Andric     // Add VG again if the function is locally-streaming, as we will spill two
39180fca6ea1SDimitry Andric     // values.
39190fca6ea1SDimitry Andric     if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
39200fca6ea1SDimitry Andric       VGSaves.push_back(VGInfo);
39210fca6ea1SDimitry Andric 
39220fca6ea1SDimitry Andric     bool InsertBeforeLR = false;
39230fca6ea1SDimitry Andric 
39240fca6ea1SDimitry Andric     for (unsigned I = 0; I < CSI.size(); I++)
39250fca6ea1SDimitry Andric       if (CSI[I].getReg() == AArch64::LR) {
39260fca6ea1SDimitry Andric         InsertBeforeLR = true;
39270fca6ea1SDimitry Andric         CSI.insert(CSI.begin() + I, VGSaves.begin(), VGSaves.end());
39280fca6ea1SDimitry Andric         break;
39290fca6ea1SDimitry Andric       }
39300fca6ea1SDimitry Andric 
39310fca6ea1SDimitry Andric     if (!InsertBeforeLR)
39320fca6ea1SDimitry Andric       CSI.insert(CSI.end(), VGSaves.begin(), VGSaves.end());
39330fca6ea1SDimitry Andric   }
39340fca6ea1SDimitry Andric 
39350fca6ea1SDimitry Andric   Register LastReg = 0;
39360fca6ea1SDimitry Andric   int HazardSlotIndex = std::numeric_limits<int>::max();
3937fe6060f1SDimitry Andric   for (auto &CS : CSI) {
3938fe6060f1SDimitry Andric     Register Reg = CS.getReg();
3939fe6060f1SDimitry Andric     const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
3940fe6060f1SDimitry Andric 
39410fca6ea1SDimitry Andric     // Create a hazard slot as we switch between GPR and FPR CSRs.
39420fca6ea1SDimitry Andric     if (AFI->hasStackHazardSlotIndex() &&
39430fca6ea1SDimitry Andric         (!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
39440fca6ea1SDimitry Andric         AArch64InstrInfo::isFpOrNEON(Reg)) {
39450fca6ea1SDimitry Andric       assert(HazardSlotIndex == std::numeric_limits<int>::max() &&
39460fca6ea1SDimitry Andric              "Unexpected register order for hazard slot");
39470fca6ea1SDimitry Andric       HazardSlotIndex = MFI.CreateStackObject(StackHazardSize, Align(8), true);
39480fca6ea1SDimitry Andric       LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex
39490fca6ea1SDimitry Andric                         << "\n");
39500fca6ea1SDimitry Andric       AFI->setStackHazardCSRSlotIndex(HazardSlotIndex);
39510fca6ea1SDimitry Andric       if ((unsigned)HazardSlotIndex < MinCSFrameIndex)
39520fca6ea1SDimitry Andric         MinCSFrameIndex = HazardSlotIndex;
39530fca6ea1SDimitry Andric       if ((unsigned)HazardSlotIndex > MaxCSFrameIndex)
39540fca6ea1SDimitry Andric         MaxCSFrameIndex = HazardSlotIndex;
39550fca6ea1SDimitry Andric     }
39560fca6ea1SDimitry Andric 
3957fe6060f1SDimitry Andric     unsigned Size = RegInfo->getSpillSize(*RC);
3958fe6060f1SDimitry Andric     Align Alignment(RegInfo->getSpillAlign(*RC));
3959fe6060f1SDimitry Andric     int FrameIdx = MFI.CreateStackObject(Size, Alignment, true);
3960fe6060f1SDimitry Andric     CS.setFrameIdx(FrameIdx);
3961fe6060f1SDimitry Andric 
39620fca6ea1SDimitry Andric     if ((unsigned)FrameIdx < MinCSFrameIndex)
39630fca6ea1SDimitry Andric       MinCSFrameIndex = FrameIdx;
39640fca6ea1SDimitry Andric     if ((unsigned)FrameIdx > MaxCSFrameIndex)
39650fca6ea1SDimitry Andric       MaxCSFrameIndex = FrameIdx;
3966fe6060f1SDimitry Andric 
3967fe6060f1SDimitry Andric     // Grab 8 bytes below FP for the extended asynchronous frame info.
396881ad6265SDimitry Andric     if (hasFP(MF) && AFI->hasSwiftAsyncContext() && !UsesWinAAPCS &&
396981ad6265SDimitry Andric         Reg == AArch64::FP) {
3970fe6060f1SDimitry Andric       FrameIdx = MFI.CreateStackObject(8, Alignment, true);
3971fe6060f1SDimitry Andric       AFI->setSwiftAsyncContextFrameIdx(FrameIdx);
39720fca6ea1SDimitry Andric       if ((unsigned)FrameIdx < MinCSFrameIndex)
39730fca6ea1SDimitry Andric         MinCSFrameIndex = FrameIdx;
39740fca6ea1SDimitry Andric       if ((unsigned)FrameIdx > MaxCSFrameIndex)
39750fca6ea1SDimitry Andric         MaxCSFrameIndex = FrameIdx;
3976fe6060f1SDimitry Andric     }
39770fca6ea1SDimitry Andric     LastReg = Reg;
3978fe6060f1SDimitry Andric   }
39790fca6ea1SDimitry Andric 
39800fca6ea1SDimitry Andric   // Add hazard slot in the case where no FPR CSRs are present.
39810fca6ea1SDimitry Andric   if (AFI->hasStackHazardSlotIndex() &&
39820fca6ea1SDimitry Andric       HazardSlotIndex == std::numeric_limits<int>::max()) {
39830fca6ea1SDimitry Andric     HazardSlotIndex = MFI.CreateStackObject(StackHazardSize, Align(8), true);
39840fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex
39850fca6ea1SDimitry Andric                       << "\n");
39860fca6ea1SDimitry Andric     AFI->setStackHazardCSRSlotIndex(HazardSlotIndex);
39870fca6ea1SDimitry Andric     if ((unsigned)HazardSlotIndex < MinCSFrameIndex)
39880fca6ea1SDimitry Andric       MinCSFrameIndex = HazardSlotIndex;
39890fca6ea1SDimitry Andric     if ((unsigned)HazardSlotIndex > MaxCSFrameIndex)
39900fca6ea1SDimitry Andric       MaxCSFrameIndex = HazardSlotIndex;
39910fca6ea1SDimitry Andric   }
39920fca6ea1SDimitry Andric 
3993fe6060f1SDimitry Andric   return true;
3994e8d8bef9SDimitry Andric }
3995e8d8bef9SDimitry Andric 
39960b57cec5SDimitry Andric bool AArch64FrameLowering::enableStackSlotScavenging(
39970b57cec5SDimitry Andric     const MachineFunction &MF) const {
39980b57cec5SDimitry Andric   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
39995f757f3fSDimitry Andric   // If the function has streaming-mode changes, don't scavenge a
40005f757f3fSDimitry Andric   // spillslot in the callee-save area, as that might require an
40015f757f3fSDimitry Andric   // 'addvl' in the streaming-mode-changing call-sequence when the
40025f757f3fSDimitry Andric   // function doesn't use a FP.
40035f757f3fSDimitry Andric   if (AFI->hasStreamingModeChanges() && !hasFP(MF))
40045f757f3fSDimitry Andric     return false;
40050fca6ea1SDimitry Andric   // Don't allow register salvaging with hazard slots, in case it moves objects
40060fca6ea1SDimitry Andric   // into the wrong place.
40070fca6ea1SDimitry Andric   if (AFI->hasStackHazardSlotIndex())
40080fca6ea1SDimitry Andric     return false;
40090b57cec5SDimitry Andric   return AFI->hasCalleeSaveStackFreeSpace();
40100b57cec5SDimitry Andric }
40110b57cec5SDimitry Andric 
4012480093f4SDimitry Andric /// returns true if there are any SVE callee saves.
4013480093f4SDimitry Andric static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
4014480093f4SDimitry Andric                                       int &Min, int &Max) {
4015480093f4SDimitry Andric   Min = std::numeric_limits<int>::max();
4016480093f4SDimitry Andric   Max = std::numeric_limits<int>::min();
4017480093f4SDimitry Andric 
4018480093f4SDimitry Andric   if (!MFI.isCalleeSavedInfoValid())
4019480093f4SDimitry Andric     return false;
4020480093f4SDimitry Andric 
4021480093f4SDimitry Andric   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
4022480093f4SDimitry Andric   for (auto &CS : CSI) {
4023480093f4SDimitry Andric     if (AArch64::ZPRRegClass.contains(CS.getReg()) ||
4024480093f4SDimitry Andric         AArch64::PPRRegClass.contains(CS.getReg())) {
4025480093f4SDimitry Andric       assert((Max == std::numeric_limits<int>::min() ||
4026480093f4SDimitry Andric               Max + 1 == CS.getFrameIdx()) &&
4027480093f4SDimitry Andric              "SVE CalleeSaves are not consecutive");
4028480093f4SDimitry Andric 
4029480093f4SDimitry Andric       Min = std::min(Min, CS.getFrameIdx());
4030480093f4SDimitry Andric       Max = std::max(Max, CS.getFrameIdx());
4031480093f4SDimitry Andric     }
4032480093f4SDimitry Andric   }
4033480093f4SDimitry Andric   return Min != std::numeric_limits<int>::max();
4034480093f4SDimitry Andric }
4035480093f4SDimitry Andric 
4036480093f4SDimitry Andric // Process all the SVE stack objects and determine offsets for each
4037480093f4SDimitry Andric // object. If AssignOffsets is true, the offsets get assigned.
4038480093f4SDimitry Andric // Fills in the first and last callee-saved frame indices into
4039480093f4SDimitry Andric // Min/MaxCSFrameIndex, respectively.
4040480093f4SDimitry Andric // Returns the size of the stack.
4041480093f4SDimitry Andric static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
4042480093f4SDimitry Andric                                               int &MinCSFrameIndex,
4043480093f4SDimitry Andric                                               int &MaxCSFrameIndex,
4044480093f4SDimitry Andric                                               bool AssignOffsets) {
4045979e22ffSDimitry Andric #ifndef NDEBUG
4046480093f4SDimitry Andric   // First process all fixed stack objects.
40478bcb0991SDimitry Andric   for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
4048e8d8bef9SDimitry Andric     assert(MFI.getStackID(I) != TargetStackID::ScalableVector &&
4049979e22ffSDimitry Andric            "SVE vectors should never be passed on the stack by value, only by "
4050979e22ffSDimitry Andric            "reference.");
4051979e22ffSDimitry Andric #endif
40528bcb0991SDimitry Andric 
4053480093f4SDimitry Andric   auto Assign = [&MFI](int FI, int64_t Offset) {
4054480093f4SDimitry Andric     LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n");
4055480093f4SDimitry Andric     MFI.setObjectOffset(FI, Offset);
4056480093f4SDimitry Andric   };
4057480093f4SDimitry Andric 
4058979e22ffSDimitry Andric   int64_t Offset = 0;
4059979e22ffSDimitry Andric 
4060480093f4SDimitry Andric   // Then process all callee saved slots.
4061480093f4SDimitry Andric   if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
4062480093f4SDimitry Andric     // Assign offsets to the callee save slots.
4063480093f4SDimitry Andric     for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
4064480093f4SDimitry Andric       Offset += MFI.getObjectSize(I);
40655ffd83dbSDimitry Andric       Offset = alignTo(Offset, MFI.getObjectAlign(I));
4066480093f4SDimitry Andric       if (AssignOffsets)
4067480093f4SDimitry Andric         Assign(I, -Offset);
4068480093f4SDimitry Andric     }
4069480093f4SDimitry Andric   }
4070480093f4SDimitry Andric 
4071979e22ffSDimitry Andric   // Ensure that the Callee-save area is aligned to 16bytes.
4072979e22ffSDimitry Andric   Offset = alignTo(Offset, Align(16U));
4073979e22ffSDimitry Andric 
4074480093f4SDimitry Andric   // Create a buffer of SVE objects to allocate and sort it.
4075480093f4SDimitry Andric   SmallVector<int, 8> ObjectsToAllocate;
40760eae32dcSDimitry Andric   // If we have a stack protector, and we've previously decided that we have SVE
40770eae32dcSDimitry Andric   // objects on the stack and thus need it to go in the SVE stack area, then it
40780eae32dcSDimitry Andric   // needs to go first.
40790eae32dcSDimitry Andric   int StackProtectorFI = -1;
40800eae32dcSDimitry Andric   if (MFI.hasStackProtectorIndex()) {
40810eae32dcSDimitry Andric     StackProtectorFI = MFI.getStackProtectorIndex();
40820eae32dcSDimitry Andric     if (MFI.getStackID(StackProtectorFI) == TargetStackID::ScalableVector)
40830eae32dcSDimitry Andric       ObjectsToAllocate.push_back(StackProtectorFI);
40840eae32dcSDimitry Andric   }
4085480093f4SDimitry Andric   for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
4086480093f4SDimitry Andric     unsigned StackID = MFI.getStackID(I);
4087e8d8bef9SDimitry Andric     if (StackID != TargetStackID::ScalableVector)
4088480093f4SDimitry Andric       continue;
40890eae32dcSDimitry Andric     if (I == StackProtectorFI)
40900eae32dcSDimitry Andric       continue;
4091480093f4SDimitry Andric     if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
4092480093f4SDimitry Andric       continue;
4093480093f4SDimitry Andric     if (MFI.isDeadObjectIndex(I))
4094480093f4SDimitry Andric       continue;
4095480093f4SDimitry Andric 
4096480093f4SDimitry Andric     ObjectsToAllocate.push_back(I);
4097480093f4SDimitry Andric   }
4098480093f4SDimitry Andric 
4099480093f4SDimitry Andric   // Allocate all SVE locals and spills
4100480093f4SDimitry Andric   for (unsigned FI : ObjectsToAllocate) {
41015ffd83dbSDimitry Andric     Align Alignment = MFI.getObjectAlign(FI);
4102480093f4SDimitry Andric     // FIXME: Given that the length of SVE vectors is not necessarily a power of
4103480093f4SDimitry Andric     // two, we'd need to align every object dynamically at runtime if the
4104480093f4SDimitry Andric     // alignment is larger than 16. This is not yet supported.
41055ffd83dbSDimitry Andric     if (Alignment > Align(16))
4106480093f4SDimitry Andric       report_fatal_error(
4107480093f4SDimitry Andric           "Alignment of scalable vectors > 16 bytes is not yet supported");
4108480093f4SDimitry Andric 
41095ffd83dbSDimitry Andric     Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment);
4110480093f4SDimitry Andric     if (AssignOffsets)
4111480093f4SDimitry Andric       Assign(FI, -Offset);
4112480093f4SDimitry Andric   }
4113480093f4SDimitry Andric 
41148bcb0991SDimitry Andric   return Offset;
41158bcb0991SDimitry Andric }
41168bcb0991SDimitry Andric 
4117480093f4SDimitry Andric int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
4118480093f4SDimitry Andric     MachineFrameInfo &MFI) const {
4119480093f4SDimitry Andric   int MinCSFrameIndex, MaxCSFrameIndex;
4120480093f4SDimitry Andric   return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false);
4121480093f4SDimitry Andric }
4122480093f4SDimitry Andric 
4123480093f4SDimitry Andric int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
4124480093f4SDimitry Andric     MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {
4125480093f4SDimitry Andric   return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex,
4126480093f4SDimitry Andric                                         true);
4127480093f4SDimitry Andric }
4128480093f4SDimitry Andric 
41290b57cec5SDimitry Andric void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
41300b57cec5SDimitry Andric     MachineFunction &MF, RegScavenger *RS) const {
41318bcb0991SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
41328bcb0991SDimitry Andric 
41338bcb0991SDimitry Andric   assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
41348bcb0991SDimitry Andric          "Upwards growing stack unsupported");
41358bcb0991SDimitry Andric 
4136480093f4SDimitry Andric   int MinCSFrameIndex, MaxCSFrameIndex;
4137480093f4SDimitry Andric   int64_t SVEStackSize =
4138480093f4SDimitry Andric       assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
41398bcb0991SDimitry Andric 
41408bcb0991SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
4141480093f4SDimitry Andric   AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
4142480093f4SDimitry Andric   AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);
41438bcb0991SDimitry Andric 
41440b57cec5SDimitry Andric   // If this function isn't doing Win64-style C++ EH, we don't need to do
41450b57cec5SDimitry Andric   // anything.
41460b57cec5SDimitry Andric   if (!MF.hasEHFunclets())
41470b57cec5SDimitry Andric     return;
41480b57cec5SDimitry Andric   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
41490b57cec5SDimitry Andric   WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
41500b57cec5SDimitry Andric 
41510b57cec5SDimitry Andric   MachineBasicBlock &MBB = MF.front();
41520b57cec5SDimitry Andric   auto MBBI = MBB.begin();
41530b57cec5SDimitry Andric   while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
41540b57cec5SDimitry Andric     ++MBBI;
41550b57cec5SDimitry Andric 
41560b57cec5SDimitry Andric   // Create an UnwindHelp object.
415762cfcf62SDimitry Andric   // The UnwindHelp object is allocated at the start of the fixed object area
415862cfcf62SDimitry Andric   int64_t FixedObject =
415962cfcf62SDimitry Andric       getFixedObjectSize(MF, AFI, /*IsWin64*/ true, /*IsFunclet*/ false);
416062cfcf62SDimitry Andric   int UnwindHelpFI = MFI.CreateFixedObject(/*Size*/ 8,
416162cfcf62SDimitry Andric                                            /*SPOffset*/ -FixedObject,
416262cfcf62SDimitry Andric                                            /*IsImmutable=*/false);
41630b57cec5SDimitry Andric   EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
416462cfcf62SDimitry Andric 
41650b57cec5SDimitry Andric   // We need to store -2 into the UnwindHelp object at the start of the
41660b57cec5SDimitry Andric   // function.
41670b57cec5SDimitry Andric   DebugLoc DL;
41680b57cec5SDimitry Andric   RS->enterBasicBlockEnd(MBB);
41695f757f3fSDimitry Andric   RS->backward(MBBI);
417004eeddc0SDimitry Andric   Register DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
41710b57cec5SDimitry Andric   assert(DstReg && "There must be a free register after frame setup");
41720b57cec5SDimitry Andric   BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
41730b57cec5SDimitry Andric   BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
41740b57cec5SDimitry Andric       .addReg(DstReg, getKillRegState(true))
41750b57cec5SDimitry Andric       .addFrameIndex(UnwindHelpFI)
41760b57cec5SDimitry Andric       .addImm(0);
41770b57cec5SDimitry Andric }
41780b57cec5SDimitry Andric 
41795ffd83dbSDimitry Andric namespace {
41805ffd83dbSDimitry Andric struct TagStoreInstr {
41815ffd83dbSDimitry Andric   MachineInstr *MI;
41825ffd83dbSDimitry Andric   int64_t Offset, Size;
41835ffd83dbSDimitry Andric   explicit TagStoreInstr(MachineInstr *MI, int64_t Offset, int64_t Size)
41845ffd83dbSDimitry Andric       : MI(MI), Offset(Offset), Size(Size) {}
41855ffd83dbSDimitry Andric };
41865ffd83dbSDimitry Andric 
41875ffd83dbSDimitry Andric class TagStoreEdit {
41885ffd83dbSDimitry Andric   MachineFunction *MF;
41895ffd83dbSDimitry Andric   MachineBasicBlock *MBB;
41905ffd83dbSDimitry Andric   MachineRegisterInfo *MRI;
41915ffd83dbSDimitry Andric   // Tag store instructions that are being replaced.
41925ffd83dbSDimitry Andric   SmallVector<TagStoreInstr, 8> TagStores;
41935ffd83dbSDimitry Andric   // Combined memref arguments of the above instructions.
41945ffd83dbSDimitry Andric   SmallVector<MachineMemOperand *, 8> CombinedMemRefs;
41955ffd83dbSDimitry Andric 
41965ffd83dbSDimitry Andric   // Replace allocation tags in [FrameReg + FrameRegOffset, FrameReg +
41975ffd83dbSDimitry Andric   // FrameRegOffset + Size) with the address tag of SP.
41985ffd83dbSDimitry Andric   Register FrameReg;
41995ffd83dbSDimitry Andric   StackOffset FrameRegOffset;
42005ffd83dbSDimitry Andric   int64_t Size;
420106c3fb27SDimitry Andric   // If not std::nullopt, move FrameReg to (FrameReg + FrameRegUpdate) at the
420206c3fb27SDimitry Andric   // end.
4203bdd1243dSDimitry Andric   std::optional<int64_t> FrameRegUpdate;
42045ffd83dbSDimitry Andric   // MIFlags for any FrameReg updating instructions.
42055ffd83dbSDimitry Andric   unsigned FrameRegUpdateFlags;
42065ffd83dbSDimitry Andric 
42075ffd83dbSDimitry Andric   // Use zeroing instruction variants.
42085ffd83dbSDimitry Andric   bool ZeroData;
42095ffd83dbSDimitry Andric   DebugLoc DL;
42105ffd83dbSDimitry Andric 
42115ffd83dbSDimitry Andric   void emitUnrolled(MachineBasicBlock::iterator InsertI);
42125ffd83dbSDimitry Andric   void emitLoop(MachineBasicBlock::iterator InsertI);
42135ffd83dbSDimitry Andric 
42145ffd83dbSDimitry Andric public:
42155ffd83dbSDimitry Andric   TagStoreEdit(MachineBasicBlock *MBB, bool ZeroData)
42165ffd83dbSDimitry Andric       : MBB(MBB), ZeroData(ZeroData) {
42175ffd83dbSDimitry Andric     MF = MBB->getParent();
42185ffd83dbSDimitry Andric     MRI = &MF->getRegInfo();
42195ffd83dbSDimitry Andric   }
42205ffd83dbSDimitry Andric   // Add an instruction to be replaced. Instructions must be added in the
42215ffd83dbSDimitry Andric   // ascending order of Offset, and have to be adjacent.
42225ffd83dbSDimitry Andric   void addInstruction(TagStoreInstr I) {
42235ffd83dbSDimitry Andric     assert((TagStores.empty() ||
42245ffd83dbSDimitry Andric             TagStores.back().Offset + TagStores.back().Size == I.Offset) &&
42255ffd83dbSDimitry Andric            "Non-adjacent tag store instructions.");
42265ffd83dbSDimitry Andric     TagStores.push_back(I);
42275ffd83dbSDimitry Andric   }
42285ffd83dbSDimitry Andric   void clear() { TagStores.clear(); }
42295ffd83dbSDimitry Andric   // Emit equivalent code at the given location, and erase the current set of
42305ffd83dbSDimitry Andric   // instructions. May skip if the replacement is not profitable. May invalidate
42315ffd83dbSDimitry Andric   // the input iterator and replace it with a valid one.
42325ffd83dbSDimitry Andric   void emitCode(MachineBasicBlock::iterator &InsertI,
423381ad6265SDimitry Andric                 const AArch64FrameLowering *TFI, bool TryMergeSPUpdate);
42345ffd83dbSDimitry Andric };
42355ffd83dbSDimitry Andric 
42365ffd83dbSDimitry Andric void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
42375ffd83dbSDimitry Andric   const AArch64InstrInfo *TII =
42385ffd83dbSDimitry Andric       MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
42395ffd83dbSDimitry Andric 
42405ffd83dbSDimitry Andric   const int64_t kMinOffset = -256 * 16;
42415ffd83dbSDimitry Andric   const int64_t kMaxOffset = 255 * 16;
42425ffd83dbSDimitry Andric 
42435ffd83dbSDimitry Andric   Register BaseReg = FrameReg;
4244e8d8bef9SDimitry Andric   int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed();
42455ffd83dbSDimitry Andric   if (BaseRegOffsetBytes < kMinOffset ||
424606c3fb27SDimitry Andric       BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset ||
424706c3fb27SDimitry Andric       // BaseReg can be FP, which is not necessarily aligned to 16-bytes. In
424806c3fb27SDimitry Andric       // that case, BaseRegOffsetBytes will not be aligned to 16 bytes, which
424906c3fb27SDimitry Andric       // is required for the offset of ST2G.
425006c3fb27SDimitry Andric       BaseRegOffsetBytes % 16 != 0) {
42515ffd83dbSDimitry Andric     Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
42525ffd83dbSDimitry Andric     emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg,
4253e8d8bef9SDimitry Andric                     StackOffset::getFixed(BaseRegOffsetBytes), TII);
42545ffd83dbSDimitry Andric     BaseReg = ScratchReg;
42555ffd83dbSDimitry Andric     BaseRegOffsetBytes = 0;
42565ffd83dbSDimitry Andric   }
42575ffd83dbSDimitry Andric 
42585ffd83dbSDimitry Andric   MachineInstr *LastI = nullptr;
42595ffd83dbSDimitry Andric   while (Size) {
42605ffd83dbSDimitry Andric     int64_t InstrSize = (Size > 16) ? 32 : 16;
42615ffd83dbSDimitry Andric     unsigned Opcode =
42625ffd83dbSDimitry Andric         InstrSize == 16
426306c3fb27SDimitry Andric             ? (ZeroData ? AArch64::STZGi : AArch64::STGi)
426406c3fb27SDimitry Andric             : (ZeroData ? AArch64::STZ2Gi : AArch64::ST2Gi);
426506c3fb27SDimitry Andric     assert(BaseRegOffsetBytes % 16 == 0);
42665ffd83dbSDimitry Andric     MachineInstr *I = BuildMI(*MBB, InsertI, DL, TII->get(Opcode))
42675ffd83dbSDimitry Andric                           .addReg(AArch64::SP)
42685ffd83dbSDimitry Andric                           .addReg(BaseReg)
42695ffd83dbSDimitry Andric                           .addImm(BaseRegOffsetBytes / 16)
42705ffd83dbSDimitry Andric                           .setMemRefs(CombinedMemRefs);
42715ffd83dbSDimitry Andric     // A store to [BaseReg, #0] should go last for an opportunity to fold the
42725ffd83dbSDimitry Andric     // final SP adjustment in the epilogue.
42735ffd83dbSDimitry Andric     if (BaseRegOffsetBytes == 0)
42745ffd83dbSDimitry Andric       LastI = I;
42755ffd83dbSDimitry Andric     BaseRegOffsetBytes += InstrSize;
42765ffd83dbSDimitry Andric     Size -= InstrSize;
42775ffd83dbSDimitry Andric   }
42785ffd83dbSDimitry Andric 
42795ffd83dbSDimitry Andric   if (LastI)
42805ffd83dbSDimitry Andric     MBB->splice(InsertI, MBB, LastI);
42815ffd83dbSDimitry Andric }
42825ffd83dbSDimitry Andric 
42835ffd83dbSDimitry Andric void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
42845ffd83dbSDimitry Andric   const AArch64InstrInfo *TII =
42855ffd83dbSDimitry Andric       MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
42865ffd83dbSDimitry Andric 
42875ffd83dbSDimitry Andric   Register BaseReg = FrameRegUpdate
42885ffd83dbSDimitry Andric                          ? FrameReg
42895ffd83dbSDimitry Andric                          : MRI->createVirtualRegister(&AArch64::GPR64RegClass);
42905ffd83dbSDimitry Andric   Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
42915ffd83dbSDimitry Andric 
42925ffd83dbSDimitry Andric   emitFrameOffset(*MBB, InsertI, DL, BaseReg, FrameReg, FrameRegOffset, TII);
42935ffd83dbSDimitry Andric 
42945ffd83dbSDimitry Andric   int64_t LoopSize = Size;
42955ffd83dbSDimitry Andric   // If the loop size is not a multiple of 32, split off one 16-byte store at
42965ffd83dbSDimitry Andric   // the end to fold BaseReg update into.
42975ffd83dbSDimitry Andric   if (FrameRegUpdate && *FrameRegUpdate)
42985ffd83dbSDimitry Andric     LoopSize -= LoopSize % 32;
42995ffd83dbSDimitry Andric   MachineInstr *LoopI = BuildMI(*MBB, InsertI, DL,
43005ffd83dbSDimitry Andric                                 TII->get(ZeroData ? AArch64::STZGloop_wback
43015ffd83dbSDimitry Andric                                                   : AArch64::STGloop_wback))
43025ffd83dbSDimitry Andric                             .addDef(SizeReg)
43035ffd83dbSDimitry Andric                             .addDef(BaseReg)
43045ffd83dbSDimitry Andric                             .addImm(LoopSize)
43055ffd83dbSDimitry Andric                             .addReg(BaseReg)
43065ffd83dbSDimitry Andric                             .setMemRefs(CombinedMemRefs);
43075ffd83dbSDimitry Andric   if (FrameRegUpdate)
43085ffd83dbSDimitry Andric     LoopI->setFlags(FrameRegUpdateFlags);
43095ffd83dbSDimitry Andric 
43105ffd83dbSDimitry Andric   int64_t ExtraBaseRegUpdate =
4311e8d8bef9SDimitry Andric       FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;
43125ffd83dbSDimitry Andric   if (LoopSize < Size) {
43135ffd83dbSDimitry Andric     assert(FrameRegUpdate);
43145ffd83dbSDimitry Andric     assert(Size - LoopSize == 16);
43155ffd83dbSDimitry Andric     // Tag 16 more bytes at BaseReg and update BaseReg.
43165ffd83dbSDimitry Andric     BuildMI(*MBB, InsertI, DL,
43175ffd83dbSDimitry Andric             TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
43185ffd83dbSDimitry Andric         .addDef(BaseReg)
43195ffd83dbSDimitry Andric         .addReg(BaseReg)
43205ffd83dbSDimitry Andric         .addReg(BaseReg)
43215ffd83dbSDimitry Andric         .addImm(1 + ExtraBaseRegUpdate / 16)
43225ffd83dbSDimitry Andric         .setMemRefs(CombinedMemRefs)
43235ffd83dbSDimitry Andric         .setMIFlags(FrameRegUpdateFlags);
43245ffd83dbSDimitry Andric   } else if (ExtraBaseRegUpdate) {
43255ffd83dbSDimitry Andric     // Update BaseReg.
43265ffd83dbSDimitry Andric     BuildMI(
43275ffd83dbSDimitry Andric         *MBB, InsertI, DL,
43285ffd83dbSDimitry Andric         TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
43295ffd83dbSDimitry Andric         .addDef(BaseReg)
43305ffd83dbSDimitry Andric         .addReg(BaseReg)
43315ffd83dbSDimitry Andric         .addImm(std::abs(ExtraBaseRegUpdate))
43325ffd83dbSDimitry Andric         .addImm(0)
43335ffd83dbSDimitry Andric         .setMIFlags(FrameRegUpdateFlags);
43345ffd83dbSDimitry Andric   }
43355ffd83dbSDimitry Andric }
43365ffd83dbSDimitry Andric 
43375ffd83dbSDimitry Andric // Check if *II is a register update that can be merged into STGloop that ends
43385ffd83dbSDimitry Andric // at (Reg + Size). RemainingOffset is the required adjustment to Reg after the
43395ffd83dbSDimitry Andric // end of the loop.
43405ffd83dbSDimitry Andric bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg,
43415ffd83dbSDimitry Andric                        int64_t Size, int64_t *TotalOffset) {
43425ffd83dbSDimitry Andric   MachineInstr &MI = *II;
43435ffd83dbSDimitry Andric   if ((MI.getOpcode() == AArch64::ADDXri ||
43445ffd83dbSDimitry Andric        MI.getOpcode() == AArch64::SUBXri) &&
43455ffd83dbSDimitry Andric       MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) {
43465ffd83dbSDimitry Andric     unsigned Shift = AArch64_AM::getShiftValue(MI.getOperand(3).getImm());
43475ffd83dbSDimitry Andric     int64_t Offset = MI.getOperand(2).getImm() << Shift;
43485ffd83dbSDimitry Andric     if (MI.getOpcode() == AArch64::SUBXri)
43495ffd83dbSDimitry Andric       Offset = -Offset;
43505ffd83dbSDimitry Andric     int64_t AbsPostOffset = std::abs(Offset - Size);
43515ffd83dbSDimitry Andric     const int64_t kMaxOffset =
43525ffd83dbSDimitry Andric         0xFFF; // Max encoding for unshifted ADDXri / SUBXri
43535ffd83dbSDimitry Andric     if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
43545ffd83dbSDimitry Andric       *TotalOffset = Offset;
43555ffd83dbSDimitry Andric       return true;
43565ffd83dbSDimitry Andric     }
43575ffd83dbSDimitry Andric   }
43585ffd83dbSDimitry Andric   return false;
43595ffd83dbSDimitry Andric }
43605ffd83dbSDimitry Andric 
43615ffd83dbSDimitry Andric void mergeMemRefs(const SmallVectorImpl<TagStoreInstr> &TSE,
43625ffd83dbSDimitry Andric                   SmallVectorImpl<MachineMemOperand *> &MemRefs) {
43635ffd83dbSDimitry Andric   MemRefs.clear();
43645ffd83dbSDimitry Andric   for (auto &TS : TSE) {
43655ffd83dbSDimitry Andric     MachineInstr *MI = TS.MI;
43665ffd83dbSDimitry Andric     // An instruction without memory operands may access anything. Be
43675ffd83dbSDimitry Andric     // conservative and return an empty list.
43685ffd83dbSDimitry Andric     if (MI->memoperands_empty()) {
43695ffd83dbSDimitry Andric       MemRefs.clear();
43705ffd83dbSDimitry Andric       return;
43715ffd83dbSDimitry Andric     }
43725ffd83dbSDimitry Andric     MemRefs.append(MI->memoperands_begin(), MI->memoperands_end());
43735ffd83dbSDimitry Andric   }
43745ffd83dbSDimitry Andric }
43755ffd83dbSDimitry Andric 
43765ffd83dbSDimitry Andric void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
437781ad6265SDimitry Andric                             const AArch64FrameLowering *TFI,
437881ad6265SDimitry Andric                             bool TryMergeSPUpdate) {
43795ffd83dbSDimitry Andric   if (TagStores.empty())
43805ffd83dbSDimitry Andric     return;
43815ffd83dbSDimitry Andric   TagStoreInstr &FirstTagStore = TagStores[0];
43825ffd83dbSDimitry Andric   TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1];
43835ffd83dbSDimitry Andric   Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
43845ffd83dbSDimitry Andric   DL = TagStores[0].MI->getDebugLoc();
43855ffd83dbSDimitry Andric 
43865ffd83dbSDimitry Andric   Register Reg;
43875ffd83dbSDimitry Andric   FrameRegOffset = TFI->resolveFrameOffsetReference(
43885ffd83dbSDimitry Andric       *MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg,
43895ffd83dbSDimitry Andric       /*PreferFP=*/false, /*ForSimm=*/true);
43905ffd83dbSDimitry Andric   FrameReg = Reg;
4391bdd1243dSDimitry Andric   FrameRegUpdate = std::nullopt;
43925ffd83dbSDimitry Andric 
43935ffd83dbSDimitry Andric   mergeMemRefs(TagStores, CombinedMemRefs);
43945ffd83dbSDimitry Andric 
43950fca6ea1SDimitry Andric   LLVM_DEBUG({
43960fca6ea1SDimitry Andric     dbgs() << "Replacing adjacent STG instructions:\n";
43970fca6ea1SDimitry Andric     for (const auto &Instr : TagStores) {
43980fca6ea1SDimitry Andric       dbgs() << "  " << *Instr.MI;
43990fca6ea1SDimitry Andric     }
44000fca6ea1SDimitry Andric   });
44015ffd83dbSDimitry Andric 
44025ffd83dbSDimitry Andric   // Size threshold where a loop becomes shorter than a linear sequence of
44035ffd83dbSDimitry Andric   // tagging instructions.
44045ffd83dbSDimitry Andric   const int kSetTagLoopThreshold = 176;
44055ffd83dbSDimitry Andric   if (Size < kSetTagLoopThreshold) {
44065ffd83dbSDimitry Andric     if (TagStores.size() < 2)
44075ffd83dbSDimitry Andric       return;
44085ffd83dbSDimitry Andric     emitUnrolled(InsertI);
44095ffd83dbSDimitry Andric   } else {
44105ffd83dbSDimitry Andric     MachineInstr *UpdateInstr = nullptr;
441181ad6265SDimitry Andric     int64_t TotalOffset = 0;
441281ad6265SDimitry Andric     if (TryMergeSPUpdate) {
44135ffd83dbSDimitry Andric       // See if we can merge base register update into the STGloop.
44145ffd83dbSDimitry Andric       // This is done in AArch64LoadStoreOptimizer for "normal" stores,
44155ffd83dbSDimitry Andric       // but STGloop is way too unusual for that, and also it only
44165ffd83dbSDimitry Andric       // realistically happens in function epilogue. Also, STGloop is expanded
44175ffd83dbSDimitry Andric       // before that pass.
44185ffd83dbSDimitry Andric       if (InsertI != MBB->end() &&
4419e8d8bef9SDimitry Andric           canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size,
44205ffd83dbSDimitry Andric                             &TotalOffset)) {
44215ffd83dbSDimitry Andric         UpdateInstr = &*InsertI++;
44225ffd83dbSDimitry Andric         LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n  "
44235ffd83dbSDimitry Andric                           << *UpdateInstr);
44245ffd83dbSDimitry Andric       }
44255ffd83dbSDimitry Andric     }
44265ffd83dbSDimitry Andric 
44275ffd83dbSDimitry Andric     if (!UpdateInstr && TagStores.size() < 2)
44285ffd83dbSDimitry Andric       return;
44295ffd83dbSDimitry Andric 
44305ffd83dbSDimitry Andric     if (UpdateInstr) {
44315ffd83dbSDimitry Andric       FrameRegUpdate = TotalOffset;
44325ffd83dbSDimitry Andric       FrameRegUpdateFlags = UpdateInstr->getFlags();
44335ffd83dbSDimitry Andric     }
44345ffd83dbSDimitry Andric     emitLoop(InsertI);
44355ffd83dbSDimitry Andric     if (UpdateInstr)
44365ffd83dbSDimitry Andric       UpdateInstr->eraseFromParent();
44375ffd83dbSDimitry Andric   }
44385ffd83dbSDimitry Andric 
44395ffd83dbSDimitry Andric   for (auto &TS : TagStores)
44405ffd83dbSDimitry Andric     TS.MI->eraseFromParent();
44415ffd83dbSDimitry Andric }
44425ffd83dbSDimitry Andric 
44435ffd83dbSDimitry Andric bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset,
44445ffd83dbSDimitry Andric                                         int64_t &Size, bool &ZeroData) {
44455ffd83dbSDimitry Andric   MachineFunction &MF = *MI.getParent()->getParent();
44465ffd83dbSDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
44475ffd83dbSDimitry Andric 
44485ffd83dbSDimitry Andric   unsigned Opcode = MI.getOpcode();
444906c3fb27SDimitry Andric   ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGi ||
445006c3fb27SDimitry Andric               Opcode == AArch64::STZ2Gi);
44515ffd83dbSDimitry Andric 
44525ffd83dbSDimitry Andric   if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
44535ffd83dbSDimitry Andric     if (!MI.getOperand(0).isDead() || !MI.getOperand(1).isDead())
44545ffd83dbSDimitry Andric       return false;
44555ffd83dbSDimitry Andric     if (!MI.getOperand(2).isImm() || !MI.getOperand(3).isFI())
44565ffd83dbSDimitry Andric       return false;
44575ffd83dbSDimitry Andric     Offset = MFI.getObjectOffset(MI.getOperand(3).getIndex());
44585ffd83dbSDimitry Andric     Size = MI.getOperand(2).getImm();
44595ffd83dbSDimitry Andric     return true;
44605ffd83dbSDimitry Andric   }
44615ffd83dbSDimitry Andric 
446206c3fb27SDimitry Andric   if (Opcode == AArch64::STGi || Opcode == AArch64::STZGi)
44635ffd83dbSDimitry Andric     Size = 16;
446406c3fb27SDimitry Andric   else if (Opcode == AArch64::ST2Gi || Opcode == AArch64::STZ2Gi)
44655ffd83dbSDimitry Andric     Size = 32;
44665ffd83dbSDimitry Andric   else
44675ffd83dbSDimitry Andric     return false;
44685ffd83dbSDimitry Andric 
44695ffd83dbSDimitry Andric   if (MI.getOperand(0).getReg() != AArch64::SP || !MI.getOperand(1).isFI())
44705ffd83dbSDimitry Andric     return false;
44715ffd83dbSDimitry Andric 
44725ffd83dbSDimitry Andric   Offset = MFI.getObjectOffset(MI.getOperand(1).getIndex()) +
44735ffd83dbSDimitry Andric            16 * MI.getOperand(2).getImm();
44745ffd83dbSDimitry Andric   return true;
44755ffd83dbSDimitry Andric }
44765ffd83dbSDimitry Andric 
44775ffd83dbSDimitry Andric // Detect a run of memory tagging instructions for adjacent stack frame slots,
44785ffd83dbSDimitry Andric // and replace them with a shorter instruction sequence:
44795ffd83dbSDimitry Andric // * replace STG + STG with ST2G
44805ffd83dbSDimitry Andric // * replace STGloop + STGloop with STGloop
44815ffd83dbSDimitry Andric // This code needs to run when stack slot offsets are already known, but before
44825ffd83dbSDimitry Andric // FrameIndex operands in STG instructions are eliminated.
44835ffd83dbSDimitry Andric MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
44845ffd83dbSDimitry Andric                                                 const AArch64FrameLowering *TFI,
44855ffd83dbSDimitry Andric                                                 RegScavenger *RS) {
44865ffd83dbSDimitry Andric   bool FirstZeroData;
44875ffd83dbSDimitry Andric   int64_t Size, Offset;
44885ffd83dbSDimitry Andric   MachineInstr &MI = *II;
44895ffd83dbSDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
44905ffd83dbSDimitry Andric   MachineBasicBlock::iterator NextI = ++II;
44915ffd83dbSDimitry Andric   if (&MI == &MBB->instr_back())
44925ffd83dbSDimitry Andric     return II;
44935ffd83dbSDimitry Andric   if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData))
44945ffd83dbSDimitry Andric     return II;
44955ffd83dbSDimitry Andric 
44965ffd83dbSDimitry Andric   SmallVector<TagStoreInstr, 4> Instrs;
44975ffd83dbSDimitry Andric   Instrs.emplace_back(&MI, Offset, Size);
44985ffd83dbSDimitry Andric 
44995ffd83dbSDimitry Andric   constexpr int kScanLimit = 10;
45005ffd83dbSDimitry Andric   int Count = 0;
45015ffd83dbSDimitry Andric   for (MachineBasicBlock::iterator E = MBB->end();
45025ffd83dbSDimitry Andric        NextI != E && Count < kScanLimit; ++NextI) {
45035ffd83dbSDimitry Andric     MachineInstr &MI = *NextI;
45045ffd83dbSDimitry Andric     bool ZeroData;
45055ffd83dbSDimitry Andric     int64_t Size, Offset;
45065ffd83dbSDimitry Andric     // Collect instructions that update memory tags with a FrameIndex operand
45075ffd83dbSDimitry Andric     // and (when applicable) constant size, and whose output registers are dead
45085ffd83dbSDimitry Andric     // (the latter is almost always the case in practice). Since these
45095ffd83dbSDimitry Andric     // instructions effectively have no inputs or outputs, we are free to skip
45105ffd83dbSDimitry Andric     // any non-aliasing instructions in between without tracking used registers.
45115ffd83dbSDimitry Andric     if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) {
45125ffd83dbSDimitry Andric       if (ZeroData != FirstZeroData)
45135ffd83dbSDimitry Andric         break;
45145ffd83dbSDimitry Andric       Instrs.emplace_back(&MI, Offset, Size);
45155ffd83dbSDimitry Andric       continue;
45165ffd83dbSDimitry Andric     }
45175ffd83dbSDimitry Andric 
45185ffd83dbSDimitry Andric     // Only count non-transient, non-tagging instructions toward the scan
45195ffd83dbSDimitry Andric     // limit.
45205ffd83dbSDimitry Andric     if (!MI.isTransient())
45215ffd83dbSDimitry Andric       ++Count;
45225ffd83dbSDimitry Andric 
45235ffd83dbSDimitry Andric     // Just in case, stop before the epilogue code starts.
45245ffd83dbSDimitry Andric     if (MI.getFlag(MachineInstr::FrameSetup) ||
45255ffd83dbSDimitry Andric         MI.getFlag(MachineInstr::FrameDestroy))
45265ffd83dbSDimitry Andric       break;
45275ffd83dbSDimitry Andric 
45285ffd83dbSDimitry Andric     // Reject anything that may alias the collected instructions.
45295ffd83dbSDimitry Andric     if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects())
45305ffd83dbSDimitry Andric       break;
45315ffd83dbSDimitry Andric   }
45325ffd83dbSDimitry Andric 
45335ffd83dbSDimitry Andric   // New code will be inserted after the last tagging instruction we've found.
45345ffd83dbSDimitry Andric   MachineBasicBlock::iterator InsertI = Instrs.back().MI;
45355f757f3fSDimitry Andric 
45365f757f3fSDimitry Andric   // All the gathered stack tag instructions are merged and placed after
45375f757f3fSDimitry Andric   // last tag store in the list. The check should be made if the nzcv
45385f757f3fSDimitry Andric   // flag is live at the point where we are trying to insert. Otherwise
45395f757f3fSDimitry Andric   // the nzcv flag might get clobbered if any stg loops are present.
45405f757f3fSDimitry Andric 
45415f757f3fSDimitry Andric   // FIXME : This approach of bailing out from merge is conservative in
45425f757f3fSDimitry Andric   // some ways like even if stg loops are not present after merge the
45435f757f3fSDimitry Andric   // insert list, this liveness check is done (which is not needed).
45445f757f3fSDimitry Andric   LivePhysRegs LiveRegs(*(MBB->getParent()->getSubtarget().getRegisterInfo()));
45455f757f3fSDimitry Andric   LiveRegs.addLiveOuts(*MBB);
45465f757f3fSDimitry Andric   for (auto I = MBB->rbegin();; ++I) {
45475f757f3fSDimitry Andric     MachineInstr &MI = *I;
45485f757f3fSDimitry Andric     if (MI == InsertI)
45495f757f3fSDimitry Andric       break;
45505f757f3fSDimitry Andric     LiveRegs.stepBackward(*I);
45515f757f3fSDimitry Andric   }
45525ffd83dbSDimitry Andric   InsertI++;
45535f757f3fSDimitry Andric   if (LiveRegs.contains(AArch64::NZCV))
45545f757f3fSDimitry Andric     return InsertI;
45555ffd83dbSDimitry Andric 
45565ffd83dbSDimitry Andric   llvm::stable_sort(Instrs,
45575ffd83dbSDimitry Andric                     [](const TagStoreInstr &Left, const TagStoreInstr &Right) {
45585ffd83dbSDimitry Andric                       return Left.Offset < Right.Offset;
45595ffd83dbSDimitry Andric                     });
45605ffd83dbSDimitry Andric 
45615ffd83dbSDimitry Andric   // Make sure that we don't have any overlapping stores.
45625ffd83dbSDimitry Andric   int64_t CurOffset = Instrs[0].Offset;
45635ffd83dbSDimitry Andric   for (auto &Instr : Instrs) {
45645ffd83dbSDimitry Andric     if (CurOffset > Instr.Offset)
45655ffd83dbSDimitry Andric       return NextI;
45665ffd83dbSDimitry Andric     CurOffset = Instr.Offset + Instr.Size;
45675ffd83dbSDimitry Andric   }
45685ffd83dbSDimitry Andric 
45695ffd83dbSDimitry Andric   // Find contiguous runs of tagged memory and emit shorter instruction
45705ffd83dbSDimitry Andric   // sequencies for them when possible.
45715ffd83dbSDimitry Andric   TagStoreEdit TSE(MBB, FirstZeroData);
4572bdd1243dSDimitry Andric   std::optional<int64_t> EndOffset;
45735ffd83dbSDimitry Andric   for (auto &Instr : Instrs) {
45745ffd83dbSDimitry Andric     if (EndOffset && *EndOffset != Instr.Offset) {
45755ffd83dbSDimitry Andric       // Found a gap.
457681ad6265SDimitry Andric       TSE.emitCode(InsertI, TFI, /*TryMergeSPUpdate = */ false);
45775ffd83dbSDimitry Andric       TSE.clear();
45785ffd83dbSDimitry Andric     }
45795ffd83dbSDimitry Andric 
45805ffd83dbSDimitry Andric     TSE.addInstruction(Instr);
45815ffd83dbSDimitry Andric     EndOffset = Instr.Offset + Instr.Size;
45825ffd83dbSDimitry Andric   }
45835ffd83dbSDimitry Andric 
4584bdd1243dSDimitry Andric   const MachineFunction *MF = MBB->getParent();
458581ad6265SDimitry Andric   // Multiple FP/SP updates in a loop cannot be described by CFI instructions.
4586bdd1243dSDimitry Andric   TSE.emitCode(
4587bdd1243dSDimitry Andric       InsertI, TFI, /*TryMergeSPUpdate = */
4588bdd1243dSDimitry Andric       !MF->getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(*MF));
45895ffd83dbSDimitry Andric 
45905ffd83dbSDimitry Andric   return InsertI;
45915ffd83dbSDimitry Andric }
45925ffd83dbSDimitry Andric } // namespace
45935ffd83dbSDimitry Andric 
45940fca6ea1SDimitry Andric MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II,
45950fca6ea1SDimitry Andric                                               const AArch64FrameLowering *TFI) {
45960fca6ea1SDimitry Andric   MachineInstr &MI = *II;
45970fca6ea1SDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
45980fca6ea1SDimitry Andric   MachineFunction *MF = MBB->getParent();
45990fca6ea1SDimitry Andric 
46000fca6ea1SDimitry Andric   if (MI.getOpcode() != AArch64::VGSavePseudo &&
46010fca6ea1SDimitry Andric       MI.getOpcode() != AArch64::VGRestorePseudo)
46020fca6ea1SDimitry Andric     return II;
46030fca6ea1SDimitry Andric 
46040fca6ea1SDimitry Andric   SMEAttrs FuncAttrs(MF->getFunction());
46050fca6ea1SDimitry Andric   bool LocallyStreaming =
46060fca6ea1SDimitry Andric       FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface();
46070fca6ea1SDimitry Andric   const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>();
46080fca6ea1SDimitry Andric   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
46090fca6ea1SDimitry Andric   const AArch64InstrInfo *TII =
46100fca6ea1SDimitry Andric       MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
46110fca6ea1SDimitry Andric 
46120fca6ea1SDimitry Andric   int64_t VGFrameIdx =
46130fca6ea1SDimitry Andric       LocallyStreaming ? AFI->getStreamingVGIdx() : AFI->getVGIdx();
46140fca6ea1SDimitry Andric   assert(VGFrameIdx != std::numeric_limits<int>::max() &&
46150fca6ea1SDimitry Andric          "Expected FrameIdx for VG");
46160fca6ea1SDimitry Andric 
46170fca6ea1SDimitry Andric   unsigned CFIIndex;
46180fca6ea1SDimitry Andric   if (MI.getOpcode() == AArch64::VGSavePseudo) {
46190fca6ea1SDimitry Andric     const MachineFrameInfo &MFI = MF->getFrameInfo();
46200fca6ea1SDimitry Andric     int64_t Offset =
46210fca6ea1SDimitry Andric         MFI.getObjectOffset(VGFrameIdx) - TFI->getOffsetOfLocalArea();
46220fca6ea1SDimitry Andric     CFIIndex = MF->addFrameInst(MCCFIInstruction::createOffset(
46230fca6ea1SDimitry Andric         nullptr, TRI->getDwarfRegNum(AArch64::VG, true), Offset));
46240fca6ea1SDimitry Andric   } else
46250fca6ea1SDimitry Andric     CFIIndex = MF->addFrameInst(MCCFIInstruction::createRestore(
46260fca6ea1SDimitry Andric         nullptr, TRI->getDwarfRegNum(AArch64::VG, true)));
46270fca6ea1SDimitry Andric 
46280fca6ea1SDimitry Andric   MachineInstr *UnwindInst = BuildMI(*MBB, II, II->getDebugLoc(),
46290fca6ea1SDimitry Andric                                      TII->get(TargetOpcode::CFI_INSTRUCTION))
46300fca6ea1SDimitry Andric                                  .addCFIIndex(CFIIndex);
46310fca6ea1SDimitry Andric 
46320fca6ea1SDimitry Andric   MI.eraseFromParent();
46330fca6ea1SDimitry Andric   return UnwindInst->getIterator();
46340fca6ea1SDimitry Andric }
46350fca6ea1SDimitry Andric 
46365ffd83dbSDimitry Andric void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
46375ffd83dbSDimitry Andric     MachineFunction &MF, RegScavenger *RS = nullptr) const {
46380fca6ea1SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
46395ffd83dbSDimitry Andric   for (auto &BB : MF)
46400fca6ea1SDimitry Andric     for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) {
46410fca6ea1SDimitry Andric       if (AFI->hasStreamingModeChanges())
46420fca6ea1SDimitry Andric         II = emitVGSaveRestore(II, this);
46430fca6ea1SDimitry Andric       if (StackTaggingMergeSetTag)
46445ffd83dbSDimitry Andric         II = tryMergeAdjacentSTG(II, this, RS);
46455ffd83dbSDimitry Andric     }
46460fca6ea1SDimitry Andric }
46475ffd83dbSDimitry Andric 
46485ffd83dbSDimitry Andric /// For Win64 AArch64 EH, the offset to the Unwind object is from the SP
46495ffd83dbSDimitry Andric /// before the update.  This is easily retrieved as it is exactly the offset
46505ffd83dbSDimitry Andric /// that is set in processFunctionBeforeFrameFinalized.
4651e8d8bef9SDimitry Andric StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
46525ffd83dbSDimitry Andric     const MachineFunction &MF, int FI, Register &FrameReg,
46530b57cec5SDimitry Andric     bool IgnoreSPUpdates) const {
46540b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
465562cfcf62SDimitry Andric   if (IgnoreSPUpdates) {
46560b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
46570b57cec5SDimitry Andric                       << MFI.getObjectOffset(FI) << "\n");
46580b57cec5SDimitry Andric     FrameReg = AArch64::SP;
4659e8d8bef9SDimitry Andric     return StackOffset::getFixed(MFI.getObjectOffset(FI));
46600b57cec5SDimitry Andric   }
46610b57cec5SDimitry Andric 
4662349cc55cSDimitry Andric   // Go to common code if we cannot provide sp + offset.
4663349cc55cSDimitry Andric   if (MFI.hasVarSizedObjects() ||
4664349cc55cSDimitry Andric       MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() ||
4665349cc55cSDimitry Andric       MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF))
466662cfcf62SDimitry Andric     return getFrameIndexReference(MF, FI, FrameReg);
4667349cc55cSDimitry Andric 
4668349cc55cSDimitry Andric   FrameReg = AArch64::SP;
4669349cc55cSDimitry Andric   return getStackOffset(MF, MFI.getObjectOffset(FI));
467062cfcf62SDimitry Andric }
467162cfcf62SDimitry Andric 
46720b57cec5SDimitry Andric /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
46730b57cec5SDimitry Andric /// the parent's frame pointer
46740b57cec5SDimitry Andric unsigned AArch64FrameLowering::getWinEHParentFrameOffset(
46750b57cec5SDimitry Andric     const MachineFunction &MF) const {
46760b57cec5SDimitry Andric   return 0;
46770b57cec5SDimitry Andric }
46780b57cec5SDimitry Andric 
46790b57cec5SDimitry Andric /// Funclets only need to account for space for the callee saved registers,
46800b57cec5SDimitry Andric /// as the locals are accounted for in the parent's stack frame.
46810b57cec5SDimitry Andric unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
46820b57cec5SDimitry Andric     const MachineFunction &MF) const {
46830b57cec5SDimitry Andric   // This is the size of the pushed CSRs.
46840b57cec5SDimitry Andric   unsigned CSSize =
46850b57cec5SDimitry Andric       MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
46860b57cec5SDimitry Andric   // This is the amount of stack a funclet needs to allocate.
46870b57cec5SDimitry Andric   return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
46885ffd83dbSDimitry Andric                  getStackAlign());
46890b57cec5SDimitry Andric }
4690e8d8bef9SDimitry Andric 
4691e8d8bef9SDimitry Andric namespace {
4692e8d8bef9SDimitry Andric struct FrameObject {
4693e8d8bef9SDimitry Andric   bool IsValid = false;
4694e8d8bef9SDimitry Andric   // Index of the object in MFI.
4695e8d8bef9SDimitry Andric   int ObjectIndex = 0;
4696e8d8bef9SDimitry Andric   // Group ID this object belongs to.
4697e8d8bef9SDimitry Andric   int GroupIndex = -1;
4698e8d8bef9SDimitry Andric   // This object should be placed first (closest to SP).
4699e8d8bef9SDimitry Andric   bool ObjectFirst = false;
4700e8d8bef9SDimitry Andric   // This object's group (which always contains the object with
4701e8d8bef9SDimitry Andric   // ObjectFirst==true) should be placed first.
4702e8d8bef9SDimitry Andric   bool GroupFirst = false;
47030fca6ea1SDimitry Andric 
47040fca6ea1SDimitry Andric   // Used to distinguish between FP and GPR accesses. The values are decided so
47050fca6ea1SDimitry Andric   // that they sort FPR < Hazard < GPR and they can be or'd together.
47060fca6ea1SDimitry Andric   unsigned Accesses = 0;
47070fca6ea1SDimitry Andric   enum { AccessFPR = 1, AccessHazard = 2, AccessGPR = 4 };
4708e8d8bef9SDimitry Andric };
4709e8d8bef9SDimitry Andric 
4710e8d8bef9SDimitry Andric class GroupBuilder {
4711e8d8bef9SDimitry Andric   SmallVector<int, 8> CurrentMembers;
4712e8d8bef9SDimitry Andric   int NextGroupIndex = 0;
4713e8d8bef9SDimitry Andric   std::vector<FrameObject> &Objects;
4714e8d8bef9SDimitry Andric 
4715e8d8bef9SDimitry Andric public:
4716e8d8bef9SDimitry Andric   GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {}
4717e8d8bef9SDimitry Andric   void AddMember(int Index) { CurrentMembers.push_back(Index); }
4718e8d8bef9SDimitry Andric   void EndCurrentGroup() {
4719e8d8bef9SDimitry Andric     if (CurrentMembers.size() > 1) {
4720e8d8bef9SDimitry Andric       // Create a new group with the current member list. This might remove them
4721e8d8bef9SDimitry Andric       // from their pre-existing groups. That's OK, dealing with overlapping
4722e8d8bef9SDimitry Andric       // groups is too hard and unlikely to make a difference.
4723e8d8bef9SDimitry Andric       LLVM_DEBUG(dbgs() << "group:");
4724e8d8bef9SDimitry Andric       for (int Index : CurrentMembers) {
4725e8d8bef9SDimitry Andric         Objects[Index].GroupIndex = NextGroupIndex;
4726e8d8bef9SDimitry Andric         LLVM_DEBUG(dbgs() << " " << Index);
4727e8d8bef9SDimitry Andric       }
4728e8d8bef9SDimitry Andric       LLVM_DEBUG(dbgs() << "\n");
4729e8d8bef9SDimitry Andric       NextGroupIndex++;
4730e8d8bef9SDimitry Andric     }
4731e8d8bef9SDimitry Andric     CurrentMembers.clear();
4732e8d8bef9SDimitry Andric   }
4733e8d8bef9SDimitry Andric };
4734e8d8bef9SDimitry Andric 
4735e8d8bef9SDimitry Andric bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
4736e8d8bef9SDimitry Andric   // Objects at a lower index are closer to FP; objects at a higher index are
4737e8d8bef9SDimitry Andric   // closer to SP.
4738e8d8bef9SDimitry Andric   //
4739e8d8bef9SDimitry Andric   // For consistency in our comparison, all invalid objects are placed
4740e8d8bef9SDimitry Andric   // at the end. This also allows us to stop walking when we hit the
4741e8d8bef9SDimitry Andric   // first invalid item after it's all sorted.
4742e8d8bef9SDimitry Andric   //
47430fca6ea1SDimitry Andric   // If we want to include a stack hazard region, order FPR accesses < the
47440fca6ea1SDimitry Andric   // hazard object < GPRs accesses in order to create a separation between the
47450fca6ea1SDimitry Andric   // two. For the Accesses field 1 = FPR, 2 = Hazard Object, 4 = GPR.
47460fca6ea1SDimitry Andric   //
47470fca6ea1SDimitry Andric   // Otherwise the "first" object goes first (closest to SP), followed by the
47480fca6ea1SDimitry Andric   // members of the "first" group.
4749e8d8bef9SDimitry Andric   //
4750e8d8bef9SDimitry Andric   // The rest are sorted by the group index to keep the groups together.
4751e8d8bef9SDimitry Andric   // Higher numbered groups are more likely to be around longer (i.e. untagged
4752e8d8bef9SDimitry Andric   // in the function epilogue and not at some earlier point). Place them closer
4753e8d8bef9SDimitry Andric   // to SP.
4754e8d8bef9SDimitry Andric   //
4755e8d8bef9SDimitry Andric   // If all else equal, sort by the object index to keep the objects in the
4756e8d8bef9SDimitry Andric   // original order.
47570fca6ea1SDimitry Andric   return std::make_tuple(!A.IsValid, A.Accesses, A.ObjectFirst, A.GroupFirst,
47580fca6ea1SDimitry Andric                          A.GroupIndex, A.ObjectIndex) <
47590fca6ea1SDimitry Andric          std::make_tuple(!B.IsValid, B.Accesses, B.ObjectFirst, B.GroupFirst,
47600fca6ea1SDimitry Andric                          B.GroupIndex, B.ObjectIndex);
4761e8d8bef9SDimitry Andric }
4762e8d8bef9SDimitry Andric } // namespace
4763e8d8bef9SDimitry Andric 
4764e8d8bef9SDimitry Andric void AArch64FrameLowering::orderFrameObjects(
4765e8d8bef9SDimitry Andric     const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4766e8d8bef9SDimitry Andric   if (!OrderFrameObjects || ObjectsToAllocate.empty())
4767e8d8bef9SDimitry Andric     return;
4768e8d8bef9SDimitry Andric 
47690fca6ea1SDimitry Andric   const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
4770e8d8bef9SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
4771e8d8bef9SDimitry Andric   std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd());
4772e8d8bef9SDimitry Andric   for (auto &Obj : ObjectsToAllocate) {
4773e8d8bef9SDimitry Andric     FrameObjects[Obj].IsValid = true;
4774e8d8bef9SDimitry Andric     FrameObjects[Obj].ObjectIndex = Obj;
4775e8d8bef9SDimitry Andric   }
4776e8d8bef9SDimitry Andric 
47770fca6ea1SDimitry Andric   // Identify FPR vs GPR slots for hazards, and stack slots that are tagged at
47780fca6ea1SDimitry Andric   // the same time.
4779e8d8bef9SDimitry Andric   GroupBuilder GB(FrameObjects);
4780e8d8bef9SDimitry Andric   for (auto &MBB : MF) {
4781e8d8bef9SDimitry Andric     for (auto &MI : MBB) {
4782e8d8bef9SDimitry Andric       if (MI.isDebugInstr())
4783e8d8bef9SDimitry Andric         continue;
47840fca6ea1SDimitry Andric 
47850fca6ea1SDimitry Andric       if (AFI.hasStackHazardSlotIndex()) {
47860fca6ea1SDimitry Andric         std::optional<int> FI = getLdStFrameID(MI, MFI);
47870fca6ea1SDimitry Andric         if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {
47880fca6ea1SDimitry Andric           if (MFI.getStackID(*FI) == TargetStackID::ScalableVector ||
47890fca6ea1SDimitry Andric               AArch64InstrInfo::isFpOrNEON(MI))
47900fca6ea1SDimitry Andric             FrameObjects[*FI].Accesses |= FrameObject::AccessFPR;
47910fca6ea1SDimitry Andric           else
47920fca6ea1SDimitry Andric             FrameObjects[*FI].Accesses |= FrameObject::AccessGPR;
47930fca6ea1SDimitry Andric         }
47940fca6ea1SDimitry Andric       }
47950fca6ea1SDimitry Andric 
4796e8d8bef9SDimitry Andric       int OpIndex;
4797e8d8bef9SDimitry Andric       switch (MI.getOpcode()) {
4798e8d8bef9SDimitry Andric       case AArch64::STGloop:
4799e8d8bef9SDimitry Andric       case AArch64::STZGloop:
4800e8d8bef9SDimitry Andric         OpIndex = 3;
4801e8d8bef9SDimitry Andric         break;
480206c3fb27SDimitry Andric       case AArch64::STGi:
480306c3fb27SDimitry Andric       case AArch64::STZGi:
480406c3fb27SDimitry Andric       case AArch64::ST2Gi:
480506c3fb27SDimitry Andric       case AArch64::STZ2Gi:
4806e8d8bef9SDimitry Andric         OpIndex = 1;
4807e8d8bef9SDimitry Andric         break;
4808e8d8bef9SDimitry Andric       default:
4809e8d8bef9SDimitry Andric         OpIndex = -1;
4810e8d8bef9SDimitry Andric       }
4811e8d8bef9SDimitry Andric 
4812e8d8bef9SDimitry Andric       int TaggedFI = -1;
4813e8d8bef9SDimitry Andric       if (OpIndex >= 0) {
4814e8d8bef9SDimitry Andric         const MachineOperand &MO = MI.getOperand(OpIndex);
4815e8d8bef9SDimitry Andric         if (MO.isFI()) {
4816e8d8bef9SDimitry Andric           int FI = MO.getIndex();
4817e8d8bef9SDimitry Andric           if (FI >= 0 && FI < MFI.getObjectIndexEnd() &&
4818e8d8bef9SDimitry Andric               FrameObjects[FI].IsValid)
4819e8d8bef9SDimitry Andric             TaggedFI = FI;
4820e8d8bef9SDimitry Andric         }
4821e8d8bef9SDimitry Andric       }
4822e8d8bef9SDimitry Andric 
4823e8d8bef9SDimitry Andric       // If this is a stack tagging instruction for a slot that is not part of a
4824e8d8bef9SDimitry Andric       // group yet, either start a new group or add it to the current one.
4825e8d8bef9SDimitry Andric       if (TaggedFI >= 0)
4826e8d8bef9SDimitry Andric         GB.AddMember(TaggedFI);
4827e8d8bef9SDimitry Andric       else
4828e8d8bef9SDimitry Andric         GB.EndCurrentGroup();
4829e8d8bef9SDimitry Andric     }
4830e8d8bef9SDimitry Andric     // Groups should never span multiple basic blocks.
4831e8d8bef9SDimitry Andric     GB.EndCurrentGroup();
4832e8d8bef9SDimitry Andric   }
4833e8d8bef9SDimitry Andric 
48340fca6ea1SDimitry Andric   if (AFI.hasStackHazardSlotIndex()) {
48350fca6ea1SDimitry Andric     FrameObjects[AFI.getStackHazardSlotIndex()].Accesses =
48360fca6ea1SDimitry Andric         FrameObject::AccessHazard;
48370fca6ea1SDimitry Andric     // If a stack object is unknown or both GPR and FPR, sort it into GPR.
48380fca6ea1SDimitry Andric     for (auto &Obj : FrameObjects)
48390fca6ea1SDimitry Andric       if (!Obj.Accesses ||
48400fca6ea1SDimitry Andric           Obj.Accesses == (FrameObject::AccessGPR | FrameObject::AccessFPR))
48410fca6ea1SDimitry Andric         Obj.Accesses = FrameObject::AccessGPR;
48420fca6ea1SDimitry Andric   }
48430fca6ea1SDimitry Andric 
4844e8d8bef9SDimitry Andric   // If the function's tagged base pointer is pinned to a stack slot, we want to
4845e8d8bef9SDimitry Andric   // put that slot first when possible. This will likely place it at SP + 0,
4846e8d8bef9SDimitry Andric   // and save one instruction when generating the base pointer because IRG does
4847e8d8bef9SDimitry Andric   // not allow an immediate offset.
4848bdd1243dSDimitry Andric   std::optional<int> TBPI = AFI.getTaggedBasePointerIndex();
4849e8d8bef9SDimitry Andric   if (TBPI) {
4850e8d8bef9SDimitry Andric     FrameObjects[*TBPI].ObjectFirst = true;
4851e8d8bef9SDimitry Andric     FrameObjects[*TBPI].GroupFirst = true;
4852e8d8bef9SDimitry Andric     int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;
4853e8d8bef9SDimitry Andric     if (FirstGroupIndex >= 0)
4854e8d8bef9SDimitry Andric       for (FrameObject &Object : FrameObjects)
4855e8d8bef9SDimitry Andric         if (Object.GroupIndex == FirstGroupIndex)
4856e8d8bef9SDimitry Andric           Object.GroupFirst = true;
4857e8d8bef9SDimitry Andric   }
4858e8d8bef9SDimitry Andric 
4859e8d8bef9SDimitry Andric   llvm::stable_sort(FrameObjects, FrameObjectCompare);
4860e8d8bef9SDimitry Andric 
4861e8d8bef9SDimitry Andric   int i = 0;
4862e8d8bef9SDimitry Andric   for (auto &Obj : FrameObjects) {
4863e8d8bef9SDimitry Andric     // All invalid items are sorted at the end, so it's safe to stop.
4864e8d8bef9SDimitry Andric     if (!Obj.IsValid)
4865e8d8bef9SDimitry Andric       break;
4866e8d8bef9SDimitry Andric     ObjectsToAllocate[i++] = Obj.ObjectIndex;
4867e8d8bef9SDimitry Andric   }
4868e8d8bef9SDimitry Andric 
48690fca6ea1SDimitry Andric   LLVM_DEBUG({
48700fca6ea1SDimitry Andric     dbgs() << "Final frame order:\n";
48710fca6ea1SDimitry Andric     for (auto &Obj : FrameObjects) {
4872e8d8bef9SDimitry Andric       if (!Obj.IsValid)
4873e8d8bef9SDimitry Andric         break;
4874e8d8bef9SDimitry Andric       dbgs() << "  " << Obj.ObjectIndex << ": group " << Obj.GroupIndex;
4875e8d8bef9SDimitry Andric       if (Obj.ObjectFirst)
4876e8d8bef9SDimitry Andric         dbgs() << ", first";
4877e8d8bef9SDimitry Andric       if (Obj.GroupFirst)
4878e8d8bef9SDimitry Andric         dbgs() << ", group-first";
4879e8d8bef9SDimitry Andric       dbgs() << "\n";
48800fca6ea1SDimitry Andric     }
4881e8d8bef9SDimitry Andric   });
4882e8d8bef9SDimitry Andric }
48835f757f3fSDimitry Andric 
48845f757f3fSDimitry Andric /// Emit a loop to decrement SP until it is equal to TargetReg, with probes at
48855f757f3fSDimitry Andric /// least every ProbeSize bytes. Returns an iterator of the first instruction
48865f757f3fSDimitry Andric /// after the loop. The difference between SP and TargetReg must be an exact
48875f757f3fSDimitry Andric /// multiple of ProbeSize.
48885f757f3fSDimitry Andric MachineBasicBlock::iterator
48895f757f3fSDimitry Andric AArch64FrameLowering::inlineStackProbeLoopExactMultiple(
48905f757f3fSDimitry Andric     MachineBasicBlock::iterator MBBI, int64_t ProbeSize,
48915f757f3fSDimitry Andric     Register TargetReg) const {
48925f757f3fSDimitry Andric   MachineBasicBlock &MBB = *MBBI->getParent();
48935f757f3fSDimitry Andric   MachineFunction &MF = *MBB.getParent();
48945f757f3fSDimitry Andric   const AArch64InstrInfo *TII =
48955f757f3fSDimitry Andric       MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
48965f757f3fSDimitry Andric   DebugLoc DL = MBB.findDebugLoc(MBBI);
48975f757f3fSDimitry Andric 
48985f757f3fSDimitry Andric   MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
48995f757f3fSDimitry Andric   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
49005f757f3fSDimitry Andric   MF.insert(MBBInsertPoint, LoopMBB);
49015f757f3fSDimitry Andric   MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
49025f757f3fSDimitry Andric   MF.insert(MBBInsertPoint, ExitMBB);
49035f757f3fSDimitry Andric 
49045f757f3fSDimitry Andric   // SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not encodable
49055f757f3fSDimitry Andric   // in SUB).
49065f757f3fSDimitry Andric   emitFrameOffset(*LoopMBB, LoopMBB->end(), DL, AArch64::SP, AArch64::SP,
49075f757f3fSDimitry Andric                   StackOffset::getFixed(-ProbeSize), TII,
49085f757f3fSDimitry Andric                   MachineInstr::FrameSetup);
49095f757f3fSDimitry Andric   // STR XZR, [SP]
49105f757f3fSDimitry Andric   BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::STRXui))
49115f757f3fSDimitry Andric       .addReg(AArch64::XZR)
49125f757f3fSDimitry Andric       .addReg(AArch64::SP)
49135f757f3fSDimitry Andric       .addImm(0)
49145f757f3fSDimitry Andric       .setMIFlags(MachineInstr::FrameSetup);
49155f757f3fSDimitry Andric   // CMP SP, TargetReg
49165f757f3fSDimitry Andric   BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
49175f757f3fSDimitry Andric           AArch64::XZR)
49185f757f3fSDimitry Andric       .addReg(AArch64::SP)
49195f757f3fSDimitry Andric       .addReg(TargetReg)
49205f757f3fSDimitry Andric       .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
49215f757f3fSDimitry Andric       .setMIFlags(MachineInstr::FrameSetup);
49225f757f3fSDimitry Andric   // B.CC Loop
49235f757f3fSDimitry Andric   BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::Bcc))
49245f757f3fSDimitry Andric       .addImm(AArch64CC::NE)
49255f757f3fSDimitry Andric       .addMBB(LoopMBB)
49265f757f3fSDimitry Andric       .setMIFlags(MachineInstr::FrameSetup);
49275f757f3fSDimitry Andric 
49285f757f3fSDimitry Andric   LoopMBB->addSuccessor(ExitMBB);
49295f757f3fSDimitry Andric   LoopMBB->addSuccessor(LoopMBB);
49305f757f3fSDimitry Andric   // Synthesize the exit MBB.
49315f757f3fSDimitry Andric   ExitMBB->splice(ExitMBB->end(), &MBB, MBBI, MBB.end());
49325f757f3fSDimitry Andric   ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
49335f757f3fSDimitry Andric   MBB.addSuccessor(LoopMBB);
49345f757f3fSDimitry Andric   // Update liveins.
49350fca6ea1SDimitry Andric   fullyRecomputeLiveIns({ExitMBB, LoopMBB});
49365f757f3fSDimitry Andric 
49375f757f3fSDimitry Andric   return ExitMBB->begin();
49385f757f3fSDimitry Andric }
49395f757f3fSDimitry Andric 
49405f757f3fSDimitry Andric void AArch64FrameLowering::inlineStackProbeFixed(
49415f757f3fSDimitry Andric     MachineBasicBlock::iterator MBBI, Register ScratchReg, int64_t FrameSize,
49425f757f3fSDimitry Andric     StackOffset CFAOffset) const {
49435f757f3fSDimitry Andric   MachineBasicBlock *MBB = MBBI->getParent();
49445f757f3fSDimitry Andric   MachineFunction &MF = *MBB->getParent();
49455f757f3fSDimitry Andric   const AArch64InstrInfo *TII =
49465f757f3fSDimitry Andric       MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
49475f757f3fSDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
49485f757f3fSDimitry Andric   bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
49495f757f3fSDimitry Andric   bool HasFP = hasFP(MF);
49505f757f3fSDimitry Andric 
49515f757f3fSDimitry Andric   DebugLoc DL;
49525f757f3fSDimitry Andric   int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
49535f757f3fSDimitry Andric   int64_t NumBlocks = FrameSize / ProbeSize;
49545f757f3fSDimitry Andric   int64_t ResidualSize = FrameSize % ProbeSize;
49555f757f3fSDimitry Andric 
49565f757f3fSDimitry Andric   LLVM_DEBUG(dbgs() << "Stack probing: total " << FrameSize << " bytes, "
49575f757f3fSDimitry Andric                     << NumBlocks << " blocks of " << ProbeSize
49585f757f3fSDimitry Andric                     << " bytes, plus " << ResidualSize << " bytes\n");
49595f757f3fSDimitry Andric 
49605f757f3fSDimitry Andric   // Decrement SP by NumBlock * ProbeSize bytes, with either unrolled or
49615f757f3fSDimitry Andric   // ordinary loop.
49625f757f3fSDimitry Andric   if (NumBlocks <= AArch64::StackProbeMaxLoopUnroll) {
49635f757f3fSDimitry Andric     for (int i = 0; i < NumBlocks; ++i) {
49645f757f3fSDimitry Andric       // SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not
49655f757f3fSDimitry Andric       // encodable in a SUB).
49665f757f3fSDimitry Andric       emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP,
49675f757f3fSDimitry Andric                       StackOffset::getFixed(-ProbeSize), TII,
49685f757f3fSDimitry Andric                       MachineInstr::FrameSetup, false, false, nullptr,
49695f757f3fSDimitry Andric                       EmitAsyncCFI && !HasFP, CFAOffset);
49705f757f3fSDimitry Andric       CFAOffset += StackOffset::getFixed(ProbeSize);
49715f757f3fSDimitry Andric       // STR XZR, [SP]
49725f757f3fSDimitry Andric       BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui))
49735f757f3fSDimitry Andric           .addReg(AArch64::XZR)
49745f757f3fSDimitry Andric           .addReg(AArch64::SP)
49755f757f3fSDimitry Andric           .addImm(0)
49765f757f3fSDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
49775f757f3fSDimitry Andric     }
49785f757f3fSDimitry Andric   } else if (NumBlocks != 0) {
49795f757f3fSDimitry Andric     // SUB ScratchReg, SP, #FrameSize (or equivalent if FrameSize is not
49805f757f3fSDimitry Andric     // encodable in ADD). ScrathReg may temporarily become the CFA register.
49815f757f3fSDimitry Andric     emitFrameOffset(*MBB, MBBI, DL, ScratchReg, AArch64::SP,
49825f757f3fSDimitry Andric                     StackOffset::getFixed(-ProbeSize * NumBlocks), TII,
49835f757f3fSDimitry Andric                     MachineInstr::FrameSetup, false, false, nullptr,
49845f757f3fSDimitry Andric                     EmitAsyncCFI && !HasFP, CFAOffset);
49855f757f3fSDimitry Andric     CFAOffset += StackOffset::getFixed(ProbeSize * NumBlocks);
49865f757f3fSDimitry Andric     MBBI = inlineStackProbeLoopExactMultiple(MBBI, ProbeSize, ScratchReg);
49875f757f3fSDimitry Andric     MBB = MBBI->getParent();
49885f757f3fSDimitry Andric     if (EmitAsyncCFI && !HasFP) {
49895f757f3fSDimitry Andric       // Set the CFA register back to SP.
49905f757f3fSDimitry Andric       const AArch64RegisterInfo &RegInfo =
49915f757f3fSDimitry Andric           *MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
49925f757f3fSDimitry Andric       unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
49935f757f3fSDimitry Andric       unsigned CFIIndex =
49945f757f3fSDimitry Andric           MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
49955f757f3fSDimitry Andric       BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
49965f757f3fSDimitry Andric           .addCFIIndex(CFIIndex)
49975f757f3fSDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
49985f757f3fSDimitry Andric     }
49995f757f3fSDimitry Andric   }
50005f757f3fSDimitry Andric 
50015f757f3fSDimitry Andric   if (ResidualSize != 0) {
50025f757f3fSDimitry Andric     // SUB SP, SP, #ResidualSize (or equivalent if ResidualSize is not encodable
50035f757f3fSDimitry Andric     // in SUB).
50045f757f3fSDimitry Andric     emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP,
50055f757f3fSDimitry Andric                     StackOffset::getFixed(-ResidualSize), TII,
50065f757f3fSDimitry Andric                     MachineInstr::FrameSetup, false, false, nullptr,
50075f757f3fSDimitry Andric                     EmitAsyncCFI && !HasFP, CFAOffset);
50085f757f3fSDimitry Andric     if (ResidualSize > AArch64::StackProbeMaxUnprobedStack) {
50095f757f3fSDimitry Andric       // STR XZR, [SP]
50105f757f3fSDimitry Andric       BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui))
50115f757f3fSDimitry Andric           .addReg(AArch64::XZR)
50125f757f3fSDimitry Andric           .addReg(AArch64::SP)
50135f757f3fSDimitry Andric           .addImm(0)
50145f757f3fSDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
50155f757f3fSDimitry Andric     }
50165f757f3fSDimitry Andric   }
50175f757f3fSDimitry Andric }
50185f757f3fSDimitry Andric 
50195f757f3fSDimitry Andric void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,
50205f757f3fSDimitry Andric                                             MachineBasicBlock &MBB) const {
50215f757f3fSDimitry Andric   // Get the instructions that need to be replaced. We emit at most two of
50225f757f3fSDimitry Andric   // these. Remember them in order to avoid complications coming from the need
50235f757f3fSDimitry Andric   // to traverse the block while potentially creating more blocks.
50245f757f3fSDimitry Andric   SmallVector<MachineInstr *, 4> ToReplace;
50255f757f3fSDimitry Andric   for (MachineInstr &MI : MBB)
50265f757f3fSDimitry Andric     if (MI.getOpcode() == AArch64::PROBED_STACKALLOC ||
50275f757f3fSDimitry Andric         MI.getOpcode() == AArch64::PROBED_STACKALLOC_VAR)
50285f757f3fSDimitry Andric       ToReplace.push_back(&MI);
50295f757f3fSDimitry Andric 
50305f757f3fSDimitry Andric   for (MachineInstr *MI : ToReplace) {
50315f757f3fSDimitry Andric     if (MI->getOpcode() == AArch64::PROBED_STACKALLOC) {
50325f757f3fSDimitry Andric       Register ScratchReg = MI->getOperand(0).getReg();
50335f757f3fSDimitry Andric       int64_t FrameSize = MI->getOperand(1).getImm();
50345f757f3fSDimitry Andric       StackOffset CFAOffset = StackOffset::get(MI->getOperand(2).getImm(),
50355f757f3fSDimitry Andric                                                MI->getOperand(3).getImm());
50365f757f3fSDimitry Andric       inlineStackProbeFixed(MI->getIterator(), ScratchReg, FrameSize,
50375f757f3fSDimitry Andric                             CFAOffset);
50385f757f3fSDimitry Andric     } else {
50395f757f3fSDimitry Andric       assert(MI->getOpcode() == AArch64::PROBED_STACKALLOC_VAR &&
50405f757f3fSDimitry Andric              "Stack probe pseudo-instruction expected");
50415f757f3fSDimitry Andric       const AArch64InstrInfo *TII =
50425f757f3fSDimitry Andric           MI->getMF()->getSubtarget<AArch64Subtarget>().getInstrInfo();
50435f757f3fSDimitry Andric       Register TargetReg = MI->getOperand(0).getReg();
50445f757f3fSDimitry Andric       (void)TII->probedStackAlloc(MI->getIterator(), TargetReg, true);
50455f757f3fSDimitry Andric     }
50465f757f3fSDimitry Andric     MI->eraseFromParent();
50475f757f3fSDimitry Andric   }
50485f757f3fSDimitry Andric }
5049*62987288SDimitry Andric 
5050*62987288SDimitry Andric struct StackAccess {
5051*62987288SDimitry Andric   enum AccessType {
5052*62987288SDimitry Andric     NotAccessed = 0, // Stack object not accessed by load/store instructions.
5053*62987288SDimitry Andric     GPR = 1 << 0,    // A general purpose register.
5054*62987288SDimitry Andric     PPR = 1 << 1,    // A predicate register.
5055*62987288SDimitry Andric     FPR = 1 << 2,    // A floating point/Neon/SVE register.
5056*62987288SDimitry Andric   };
5057*62987288SDimitry Andric 
5058*62987288SDimitry Andric   int Idx;
5059*62987288SDimitry Andric   StackOffset Offset;
5060*62987288SDimitry Andric   int64_t Size;
5061*62987288SDimitry Andric   unsigned AccessTypes;
5062*62987288SDimitry Andric 
5063*62987288SDimitry Andric   StackAccess() : Idx(0), Offset(), Size(0), AccessTypes(NotAccessed) {}
5064*62987288SDimitry Andric 
5065*62987288SDimitry Andric   bool operator<(const StackAccess &Rhs) const {
5066*62987288SDimitry Andric     return std::make_tuple(start(), Idx) <
5067*62987288SDimitry Andric            std::make_tuple(Rhs.start(), Rhs.Idx);
5068*62987288SDimitry Andric   }
5069*62987288SDimitry Andric 
5070*62987288SDimitry Andric   bool isCPU() const {
5071*62987288SDimitry Andric     // Predicate register load and store instructions execute on the CPU.
5072*62987288SDimitry Andric     return AccessTypes & (AccessType::GPR | AccessType::PPR);
5073*62987288SDimitry Andric   }
5074*62987288SDimitry Andric   bool isSME() const { return AccessTypes & AccessType::FPR; }
5075*62987288SDimitry Andric   bool isMixed() const { return isCPU() && isSME(); }
5076*62987288SDimitry Andric 
5077*62987288SDimitry Andric   int64_t start() const { return Offset.getFixed() + Offset.getScalable(); }
5078*62987288SDimitry Andric   int64_t end() const { return start() + Size; }
5079*62987288SDimitry Andric 
5080*62987288SDimitry Andric   std::string getTypeString() const {
5081*62987288SDimitry Andric     switch (AccessTypes) {
5082*62987288SDimitry Andric     case AccessType::FPR:
5083*62987288SDimitry Andric       return "FPR";
5084*62987288SDimitry Andric     case AccessType::PPR:
5085*62987288SDimitry Andric       return "PPR";
5086*62987288SDimitry Andric     case AccessType::GPR:
5087*62987288SDimitry Andric       return "GPR";
5088*62987288SDimitry Andric     case AccessType::NotAccessed:
5089*62987288SDimitry Andric       return "NA";
5090*62987288SDimitry Andric     default:
5091*62987288SDimitry Andric       return "Mixed";
5092*62987288SDimitry Andric     }
5093*62987288SDimitry Andric   }
5094*62987288SDimitry Andric 
5095*62987288SDimitry Andric   void print(raw_ostream &OS) const {
5096*62987288SDimitry Andric     OS << getTypeString() << " stack object at [SP"
5097*62987288SDimitry Andric        << (Offset.getFixed() < 0 ? "" : "+") << Offset.getFixed();
5098*62987288SDimitry Andric     if (Offset.getScalable())
5099*62987288SDimitry Andric       OS << (Offset.getScalable() < 0 ? "" : "+") << Offset.getScalable()
5100*62987288SDimitry Andric          << " * vscale";
5101*62987288SDimitry Andric     OS << "]";
5102*62987288SDimitry Andric   }
5103*62987288SDimitry Andric };
5104*62987288SDimitry Andric 
5105*62987288SDimitry Andric static inline raw_ostream &operator<<(raw_ostream &OS, const StackAccess &SA) {
5106*62987288SDimitry Andric   SA.print(OS);
5107*62987288SDimitry Andric   return OS;
5108*62987288SDimitry Andric }
5109*62987288SDimitry Andric 
5110*62987288SDimitry Andric void AArch64FrameLowering::emitRemarks(
5111*62987288SDimitry Andric     const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const {
5112*62987288SDimitry Andric 
5113*62987288SDimitry Andric   SMEAttrs Attrs(MF.getFunction());
5114*62987288SDimitry Andric   if (Attrs.hasNonStreamingInterfaceAndBody())
5115*62987288SDimitry Andric     return;
5116*62987288SDimitry Andric 
5117*62987288SDimitry Andric   const uint64_t HazardSize =
5118*62987288SDimitry Andric       (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;
5119*62987288SDimitry Andric 
5120*62987288SDimitry Andric   if (HazardSize == 0)
5121*62987288SDimitry Andric     return;
5122*62987288SDimitry Andric 
5123*62987288SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
5124*62987288SDimitry Andric   // Bail if function has no stack objects.
5125*62987288SDimitry Andric   if (!MFI.hasStackObjects())
5126*62987288SDimitry Andric     return;
5127*62987288SDimitry Andric 
5128*62987288SDimitry Andric   std::vector<StackAccess> StackAccesses(MFI.getNumObjects());
5129*62987288SDimitry Andric 
5130*62987288SDimitry Andric   size_t NumFPLdSt = 0;
5131*62987288SDimitry Andric   size_t NumNonFPLdSt = 0;
5132*62987288SDimitry Andric 
5133*62987288SDimitry Andric   // Collect stack accesses via Load/Store instructions.
5134*62987288SDimitry Andric   for (const MachineBasicBlock &MBB : MF) {
5135*62987288SDimitry Andric     for (const MachineInstr &MI : MBB) {
5136*62987288SDimitry Andric       if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)
5137*62987288SDimitry Andric         continue;
5138*62987288SDimitry Andric       for (MachineMemOperand *MMO : MI.memoperands()) {
5139*62987288SDimitry Andric         std::optional<int> FI = getMMOFrameID(MMO, MFI);
5140*62987288SDimitry Andric         if (FI && !MFI.isDeadObjectIndex(*FI)) {
5141*62987288SDimitry Andric           int FrameIdx = *FI;
5142*62987288SDimitry Andric 
5143*62987288SDimitry Andric           size_t ArrIdx = FrameIdx + MFI.getNumFixedObjects();
5144*62987288SDimitry Andric           if (StackAccesses[ArrIdx].AccessTypes == StackAccess::NotAccessed) {
5145*62987288SDimitry Andric             StackAccesses[ArrIdx].Idx = FrameIdx;
5146*62987288SDimitry Andric             StackAccesses[ArrIdx].Offset =
5147*62987288SDimitry Andric                 getFrameIndexReferenceFromSP(MF, FrameIdx);
5148*62987288SDimitry Andric             StackAccesses[ArrIdx].Size = MFI.getObjectSize(FrameIdx);
5149*62987288SDimitry Andric           }
5150*62987288SDimitry Andric 
5151*62987288SDimitry Andric           unsigned RegTy = StackAccess::AccessType::GPR;
5152*62987288SDimitry Andric           if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) {
5153*62987288SDimitry Andric             if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))
5154*62987288SDimitry Andric               RegTy = StackAccess::PPR;
5155*62987288SDimitry Andric             else
5156*62987288SDimitry Andric               RegTy = StackAccess::FPR;
5157*62987288SDimitry Andric           } else if (AArch64InstrInfo::isFpOrNEON(MI)) {
5158*62987288SDimitry Andric             RegTy = StackAccess::FPR;
5159*62987288SDimitry Andric           }
5160*62987288SDimitry Andric 
5161*62987288SDimitry Andric           StackAccesses[ArrIdx].AccessTypes |= RegTy;
5162*62987288SDimitry Andric 
5163*62987288SDimitry Andric           if (RegTy == StackAccess::FPR)
5164*62987288SDimitry Andric             ++NumFPLdSt;
5165*62987288SDimitry Andric           else
5166*62987288SDimitry Andric             ++NumNonFPLdSt;
5167*62987288SDimitry Andric         }
5168*62987288SDimitry Andric       }
5169*62987288SDimitry Andric     }
5170*62987288SDimitry Andric   }
5171*62987288SDimitry Andric 
5172*62987288SDimitry Andric   if (NumFPLdSt == 0 || NumNonFPLdSt == 0)
5173*62987288SDimitry Andric     return;
5174*62987288SDimitry Andric 
5175*62987288SDimitry Andric   llvm::sort(StackAccesses);
5176*62987288SDimitry Andric   StackAccesses.erase(llvm::remove_if(StackAccesses,
5177*62987288SDimitry Andric                                       [](const StackAccess &S) {
5178*62987288SDimitry Andric                                         return S.AccessTypes ==
5179*62987288SDimitry Andric                                                StackAccess::NotAccessed;
5180*62987288SDimitry Andric                                       }),
5181*62987288SDimitry Andric                       StackAccesses.end());
5182*62987288SDimitry Andric 
5183*62987288SDimitry Andric   SmallVector<const StackAccess *> MixedObjects;
5184*62987288SDimitry Andric   SmallVector<std::pair<const StackAccess *, const StackAccess *>> HazardPairs;
5185*62987288SDimitry Andric 
5186*62987288SDimitry Andric   if (StackAccesses.front().isMixed())
5187*62987288SDimitry Andric     MixedObjects.push_back(&StackAccesses.front());
5188*62987288SDimitry Andric 
5189*62987288SDimitry Andric   for (auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end());
5190*62987288SDimitry Andric        It != End; ++It) {
5191*62987288SDimitry Andric     const auto &First = *It;
5192*62987288SDimitry Andric     const auto &Second = *(It + 1);
5193*62987288SDimitry Andric 
5194*62987288SDimitry Andric     if (Second.isMixed())
5195*62987288SDimitry Andric       MixedObjects.push_back(&Second);
5196*62987288SDimitry Andric 
5197*62987288SDimitry Andric     if ((First.isSME() && Second.isCPU()) ||
5198*62987288SDimitry Andric         (First.isCPU() && Second.isSME())) {
5199*62987288SDimitry Andric       uint64_t Distance = static_cast<uint64_t>(Second.start() - First.end());
5200*62987288SDimitry Andric       if (Distance < HazardSize)
5201*62987288SDimitry Andric         HazardPairs.emplace_back(&First, &Second);
5202*62987288SDimitry Andric     }
5203*62987288SDimitry Andric   }
5204*62987288SDimitry Andric 
5205*62987288SDimitry Andric   auto EmitRemark = [&](llvm::StringRef Str) {
5206*62987288SDimitry Andric     ORE->emit([&]() {
5207*62987288SDimitry Andric       auto R = MachineOptimizationRemarkAnalysis(
5208*62987288SDimitry Andric           "sme", "StackHazard", MF.getFunction().getSubprogram(), &MF.front());
5209*62987288SDimitry Andric       return R << formatv("stack hazard in '{0}': ", MF.getName()).str() << Str;
5210*62987288SDimitry Andric     });
5211*62987288SDimitry Andric   };
5212*62987288SDimitry Andric 
5213*62987288SDimitry Andric   for (const auto &P : HazardPairs)
5214*62987288SDimitry Andric     EmitRemark(formatv("{0} is too close to {1}", *P.first, *P.second).str());
5215*62987288SDimitry Andric 
5216*62987288SDimitry Andric   for (const auto *Obj : MixedObjects)
5217*62987288SDimitry Andric     EmitRemark(
5218*62987288SDimitry Andric         formatv("{0} accessed by both GP and FP instructions", *Obj).str());
5219*62987288SDimitry Andric }
5220