10b57cec5SDimitry Andric //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file contains the AArch64 implementation of TargetFrameLowering class. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric // On AArch64, stack frames are structured as follows: 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric // The stack grows downward. 140b57cec5SDimitry Andric // 150b57cec5SDimitry Andric // All of the individual frame areas on the frame below are optional, i.e. it's 160b57cec5SDimitry Andric // possible to create a function so that the particular area isn't present 170b57cec5SDimitry Andric // in the frame. 180b57cec5SDimitry Andric // 190b57cec5SDimitry Andric // At function entry, the "frame" looks as follows: 200b57cec5SDimitry Andric // 210b57cec5SDimitry Andric // | | Higher address 220b57cec5SDimitry Andric // |-----------------------------------| 230b57cec5SDimitry Andric // | | 240b57cec5SDimitry Andric // | arguments passed on the stack | 250b57cec5SDimitry Andric // | | 260b57cec5SDimitry Andric // |-----------------------------------| <- sp 270b57cec5SDimitry Andric // | | Lower address 280b57cec5SDimitry Andric // 290b57cec5SDimitry Andric // 300b57cec5SDimitry Andric // After the prologue has run, the frame has the following general structure. 310b57cec5SDimitry Andric // Note that this doesn't depict the case where a red-zone is used. Also, 320b57cec5SDimitry Andric // technically the last frame area (VLAs) doesn't get created until in the 330b57cec5SDimitry Andric // main function body, after the prologue is run. However, it's depicted here 340b57cec5SDimitry Andric // for completeness. 350b57cec5SDimitry Andric // 360b57cec5SDimitry Andric // | | Higher address 370b57cec5SDimitry Andric // |-----------------------------------| 380b57cec5SDimitry Andric // | | 390b57cec5SDimitry Andric // | arguments passed on the stack | 400b57cec5SDimitry Andric // | | 410b57cec5SDimitry Andric // |-----------------------------------| 420b57cec5SDimitry Andric // | | 430b57cec5SDimitry Andric // | (Win64 only) varargs from reg | 440b57cec5SDimitry Andric // | | 450b57cec5SDimitry Andric // |-----------------------------------| 460b57cec5SDimitry Andric // | | 478bcb0991SDimitry Andric // | callee-saved gpr registers | <--. 488bcb0991SDimitry Andric // | | | On Darwin platforms these 498bcb0991SDimitry Andric // |- - - - - - - - - - - - - - - - - -| | callee saves are swapped, 50fe6060f1SDimitry Andric // | prev_lr | | (frame record first) 51fe6060f1SDimitry Andric // | prev_fp | <--' 52fe6060f1SDimitry Andric // | async context if needed | 530b57cec5SDimitry Andric // | (a.k.a. "frame record") | 540b57cec5SDimitry Andric // |-----------------------------------| <- fp(=x29) 550fca6ea1SDimitry Andric // | <hazard padding> | 560fca6ea1SDimitry Andric // |-----------------------------------| 570b57cec5SDimitry Andric // | | 588bcb0991SDimitry Andric // | callee-saved fp/simd/SVE regs | 598bcb0991SDimitry Andric // | | 608bcb0991SDimitry Andric // |-----------------------------------| 618bcb0991SDimitry Andric // | | 628bcb0991SDimitry Andric // | SVE stack objects | 630b57cec5SDimitry Andric // | | 640b57cec5SDimitry Andric // |-----------------------------------| 650b57cec5SDimitry Andric // |.empty.space.to.make.part.below....| 660b57cec5SDimitry Andric // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at 670b57cec5SDimitry Andric // |.the.standard.16-byte.alignment....| compile time; if present) 680b57cec5SDimitry Andric // |-----------------------------------| 690b57cec5SDimitry Andric // | local variables of fixed size | 700b57cec5SDimitry Andric // | including spill slots | 710fca6ea1SDimitry Andric // | <FPR> | 720fca6ea1SDimitry Andric // | <hazard padding> | 730fca6ea1SDimitry Andric // | <GPR> | 740b57cec5SDimitry Andric // |-----------------------------------| <- bp(not defined by ABI, 750b57cec5SDimitry Andric // |.variable-sized.local.variables....| LLVM chooses X19) 760b57cec5SDimitry Andric // |.(VLAs)............................| (size of this area is unknown at 770b57cec5SDimitry Andric // |...................................| compile time) 780b57cec5SDimitry Andric // |-----------------------------------| <- sp 790b57cec5SDimitry Andric // | | Lower address 800b57cec5SDimitry Andric // 810b57cec5SDimitry Andric // 820b57cec5SDimitry Andric // To access the data in a frame, at-compile time, a constant offset must be 830b57cec5SDimitry Andric // computable from one of the pointers (fp, bp, sp) to access it. The size 840b57cec5SDimitry Andric // of the areas with a dotted background cannot be computed at compile-time 850b57cec5SDimitry Andric // if they are present, making it required to have all three of fp, bp and 860b57cec5SDimitry Andric // sp to be set up to be able to access all contents in the frame areas, 870b57cec5SDimitry Andric // assuming all of the frame areas are non-empty. 880b57cec5SDimitry Andric // 890b57cec5SDimitry Andric // For most functions, some of the frame areas are empty. For those functions, 900b57cec5SDimitry Andric // it may not be necessary to set up fp or bp: 910b57cec5SDimitry Andric // * A base pointer is definitely needed when there are both VLAs and local 920b57cec5SDimitry Andric // variables with more-than-default alignment requirements. 930b57cec5SDimitry Andric // * A frame pointer is definitely needed when there are local variables with 940b57cec5SDimitry Andric // more-than-default alignment requirements. 950b57cec5SDimitry Andric // 968bcb0991SDimitry Andric // For Darwin platforms the frame-record (fp, lr) is stored at the top of the 978bcb0991SDimitry Andric // callee-saved area, since the unwind encoding does not allow for encoding 988bcb0991SDimitry Andric // this dynamically and existing tools depend on this layout. For other 998bcb0991SDimitry Andric // platforms, the frame-record is stored at the bottom of the (gpr) callee-saved 1008bcb0991SDimitry Andric // area to allow SVE stack objects (allocated directly below the callee-saves, 1018bcb0991SDimitry Andric // if available) to be accessed directly from the framepointer. 1028bcb0991SDimitry Andric // The SVE spill/fill instructions have VL-scaled addressing modes such 1038bcb0991SDimitry Andric // as: 1048bcb0991SDimitry Andric // ldr z8, [fp, #-7 mul vl] 1058bcb0991SDimitry Andric // For SVE the size of the vector length (VL) is not known at compile-time, so 1068bcb0991SDimitry Andric // '#-7 mul vl' is an offset that can only be evaluated at runtime. With this 1078bcb0991SDimitry Andric // layout, we don't need to add an unscaled offset to the framepointer before 1088bcb0991SDimitry Andric // accessing the SVE object in the frame. 1098bcb0991SDimitry Andric // 1100b57cec5SDimitry Andric // In some cases when a base pointer is not strictly needed, it is generated 1110b57cec5SDimitry Andric // anyway when offsets from the frame pointer to access local variables become 1120b57cec5SDimitry Andric // so large that the offset can't be encoded in the immediate fields of loads 1130b57cec5SDimitry Andric // or stores. 1140b57cec5SDimitry Andric // 115fe6060f1SDimitry Andric // Outgoing function arguments must be at the bottom of the stack frame when 116fe6060f1SDimitry Andric // calling another function. If we do not have variable-sized stack objects, we 117fe6060f1SDimitry Andric // can allocate a "reserved call frame" area at the bottom of the local 118fe6060f1SDimitry Andric // variable area, large enough for all outgoing calls. If we do have VLAs, then 119fe6060f1SDimitry Andric // the stack pointer must be decremented and incremented around each call to 120fe6060f1SDimitry Andric // make space for the arguments below the VLAs. 121fe6060f1SDimitry Andric // 1220b57cec5SDimitry Andric // FIXME: also explain the redzone concept. 1230b57cec5SDimitry Andric // 1240fca6ea1SDimitry Andric // About stack hazards: Under some SME contexts, a coprocessor with its own 1250fca6ea1SDimitry Andric // separate cache can used for FP operations. This can create hazards if the CPU 1260fca6ea1SDimitry Andric // and the SME unit try to access the same area of memory, including if the 1270fca6ea1SDimitry Andric // access is to an area of the stack. To try to alleviate this we attempt to 1280fca6ea1SDimitry Andric // introduce extra padding into the stack frame between FP and GPR accesses, 1290fca6ea1SDimitry Andric // controlled by the StackHazardSize option. Without changing the layout of the 1300fca6ea1SDimitry Andric // stack frame in the diagram above, a stack object of size StackHazardSize is 1310fca6ea1SDimitry Andric // added between GPR and FPR CSRs. Another is added to the stack objects 1320fca6ea1SDimitry Andric // section, and stack objects are sorted so that FPR > Hazard padding slot > 1330fca6ea1SDimitry Andric // GPRs (where possible). Unfortunately some things are not handled well (VLA 1340fca6ea1SDimitry Andric // area, arguments on the stack, object with both GPR and FPR accesses), but if 1350fca6ea1SDimitry Andric // those are controlled by the user then the entire stack frame becomes GPR at 1360fca6ea1SDimitry Andric // the start/end with FPR in the middle, surrounded by Hazard padding. 1370fca6ea1SDimitry Andric // 13881ad6265SDimitry Andric // An example of the prologue: 13981ad6265SDimitry Andric // 14081ad6265SDimitry Andric // .globl __foo 14181ad6265SDimitry Andric // .align 2 14281ad6265SDimitry Andric // __foo: 14381ad6265SDimitry Andric // Ltmp0: 14481ad6265SDimitry Andric // .cfi_startproc 14581ad6265SDimitry Andric // .cfi_personality 155, ___gxx_personality_v0 14681ad6265SDimitry Andric // Leh_func_begin: 14781ad6265SDimitry Andric // .cfi_lsda 16, Lexception33 14881ad6265SDimitry Andric // 14981ad6265SDimitry Andric // stp xa,bx, [sp, -#offset]! 15081ad6265SDimitry Andric // ... 15181ad6265SDimitry Andric // stp x28, x27, [sp, #offset-32] 15281ad6265SDimitry Andric // stp fp, lr, [sp, #offset-16] 15381ad6265SDimitry Andric // add fp, sp, #offset - 16 15481ad6265SDimitry Andric // sub sp, sp, #1360 15581ad6265SDimitry Andric // 15681ad6265SDimitry Andric // The Stack: 15781ad6265SDimitry Andric // +-------------------------------------------+ 15881ad6265SDimitry Andric // 10000 | ........ | ........ | ........ | ........ | 15981ad6265SDimitry Andric // 10004 | ........ | ........ | ........ | ........ | 16081ad6265SDimitry Andric // +-------------------------------------------+ 16181ad6265SDimitry Andric // 10008 | ........ | ........ | ........ | ........ | 16281ad6265SDimitry Andric // 1000c | ........ | ........ | ........ | ........ | 16381ad6265SDimitry Andric // +===========================================+ 16481ad6265SDimitry Andric // 10010 | X28 Register | 16581ad6265SDimitry Andric // 10014 | X28 Register | 16681ad6265SDimitry Andric // +-------------------------------------------+ 16781ad6265SDimitry Andric // 10018 | X27 Register | 16881ad6265SDimitry Andric // 1001c | X27 Register | 16981ad6265SDimitry Andric // +===========================================+ 17081ad6265SDimitry Andric // 10020 | Frame Pointer | 17181ad6265SDimitry Andric // 10024 | Frame Pointer | 17281ad6265SDimitry Andric // +-------------------------------------------+ 17381ad6265SDimitry Andric // 10028 | Link Register | 17481ad6265SDimitry Andric // 1002c | Link Register | 17581ad6265SDimitry Andric // +===========================================+ 17681ad6265SDimitry Andric // 10030 | ........ | ........ | ........ | ........ | 17781ad6265SDimitry Andric // 10034 | ........ | ........ | ........ | ........ | 17881ad6265SDimitry Andric // +-------------------------------------------+ 17981ad6265SDimitry Andric // 10038 | ........ | ........ | ........ | ........ | 18081ad6265SDimitry Andric // 1003c | ........ | ........ | ........ | ........ | 18181ad6265SDimitry Andric // +-------------------------------------------+ 18281ad6265SDimitry Andric // 18381ad6265SDimitry Andric // [sp] = 10030 :: >>initial value<< 18481ad6265SDimitry Andric // sp = 10020 :: stp fp, lr, [sp, #-16]! 18581ad6265SDimitry Andric // fp = sp == 10020 :: mov fp, sp 18681ad6265SDimitry Andric // [sp] == 10020 :: stp x28, x27, [sp, #-16]! 18781ad6265SDimitry Andric // sp == 10010 :: >>final value<< 18881ad6265SDimitry Andric // 18981ad6265SDimitry Andric // The frame pointer (w29) points to address 10020. If we use an offset of 19081ad6265SDimitry Andric // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 19181ad6265SDimitry Andric // for w27, and -32 for w28: 19281ad6265SDimitry Andric // 19381ad6265SDimitry Andric // Ltmp1: 19481ad6265SDimitry Andric // .cfi_def_cfa w29, 16 19581ad6265SDimitry Andric // Ltmp2: 19681ad6265SDimitry Andric // .cfi_offset w30, -8 19781ad6265SDimitry Andric // Ltmp3: 19881ad6265SDimitry Andric // .cfi_offset w29, -16 19981ad6265SDimitry Andric // Ltmp4: 20081ad6265SDimitry Andric // .cfi_offset w27, -24 20181ad6265SDimitry Andric // Ltmp5: 20281ad6265SDimitry Andric // .cfi_offset w28, -32 20381ad6265SDimitry Andric // 2040b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 2050b57cec5SDimitry Andric 2060b57cec5SDimitry Andric #include "AArch64FrameLowering.h" 2070b57cec5SDimitry Andric #include "AArch64InstrInfo.h" 2080b57cec5SDimitry Andric #include "AArch64MachineFunctionInfo.h" 2090b57cec5SDimitry Andric #include "AArch64RegisterInfo.h" 2100b57cec5SDimitry Andric #include "AArch64Subtarget.h" 2110b57cec5SDimitry Andric #include "AArch64TargetMachine.h" 2120b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h" 21381ad6265SDimitry Andric #include "MCTargetDesc/AArch64MCTargetDesc.h" 2140b57cec5SDimitry Andric #include "llvm/ADT/ScopeExit.h" 2150b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 2160b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 2170fca6ea1SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 2180b57cec5SDimitry Andric #include "llvm/CodeGen/LivePhysRegs.h" 2190b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 2200b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 2210b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 2220b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 2230b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 2240b57cec5SDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h" 2250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h" 2260b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 2270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 2280b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h" 2290b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 2300b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 2310b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 2320b57cec5SDimitry Andric #include "llvm/CodeGen/WinEHFuncInfo.h" 2330b57cec5SDimitry Andric #include "llvm/IR/Attributes.h" 2340b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h" 2350b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 2360b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h" 2370b57cec5SDimitry Andric #include "llvm/IR/Function.h" 2380b57cec5SDimitry Andric #include "llvm/MC/MCAsmInfo.h" 2390b57cec5SDimitry Andric #include "llvm/MC/MCDwarf.h" 2400b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 2410b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 2420b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 243*62987288SDimitry Andric #include "llvm/Support/FormatVariadic.h" 2440b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h" 2450b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 2460b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h" 2470b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h" 2480b57cec5SDimitry Andric #include <cassert> 2490b57cec5SDimitry Andric #include <cstdint> 2500b57cec5SDimitry Andric #include <iterator> 251bdd1243dSDimitry Andric #include <optional> 2520b57cec5SDimitry Andric #include <vector> 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric using namespace llvm; 2550b57cec5SDimitry Andric 2560b57cec5SDimitry Andric #define DEBUG_TYPE "frame-info" 2570b57cec5SDimitry Andric 2580b57cec5SDimitry Andric static cl::opt<bool> EnableRedZone("aarch64-redzone", 2590b57cec5SDimitry Andric cl::desc("enable use of redzone on AArch64"), 2600b57cec5SDimitry Andric cl::init(false), cl::Hidden); 2610b57cec5SDimitry Andric 2625ffd83dbSDimitry Andric static cl::opt<bool> StackTaggingMergeSetTag( 2635ffd83dbSDimitry Andric "stack-tagging-merge-settag", 2645ffd83dbSDimitry Andric cl::desc("merge settag instruction in function epilog"), cl::init(true), 2655ffd83dbSDimitry Andric cl::Hidden); 2665ffd83dbSDimitry Andric 267e8d8bef9SDimitry Andric static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects", 268e8d8bef9SDimitry Andric cl::desc("sort stack allocations"), 269e8d8bef9SDimitry Andric cl::init(true), cl::Hidden); 270e8d8bef9SDimitry Andric 271fe6060f1SDimitry Andric cl::opt<bool> EnableHomogeneousPrologEpilog( 27281ad6265SDimitry Andric "homogeneous-prolog-epilog", cl::Hidden, 273fe6060f1SDimitry Andric cl::desc("Emit homogeneous prologue and epilogue for the size " 274fe6060f1SDimitry Andric "optimization (default = off)")); 275fe6060f1SDimitry Andric 2760fca6ea1SDimitry Andric // Stack hazard padding size. 0 = disabled. 2770fca6ea1SDimitry Andric static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size", 2780fca6ea1SDimitry Andric cl::init(0), cl::Hidden); 279*62987288SDimitry Andric // Stack hazard size for analysis remarks. StackHazardSize takes precedence. 280*62987288SDimitry Andric static cl::opt<unsigned> 281*62987288SDimitry Andric StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0), 282*62987288SDimitry Andric cl::Hidden); 2830fca6ea1SDimitry Andric // Whether to insert padding into non-streaming functions (for testing). 2840fca6ea1SDimitry Andric static cl::opt<bool> 2850fca6ea1SDimitry Andric StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming", 2860fca6ea1SDimitry Andric cl::init(false), cl::Hidden); 2870fca6ea1SDimitry Andric 2880b57cec5SDimitry Andric STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); 2890b57cec5SDimitry Andric 290fe6060f1SDimitry Andric /// Returns how much of the incoming argument stack area (in bytes) we should 291fe6060f1SDimitry Andric /// clean up in an epilogue. For the C calling convention this will be 0, for 292fe6060f1SDimitry Andric /// guaranteed tail call conventions it can be positive (a normal return or a 293fe6060f1SDimitry Andric /// tail call to a function that uses less stack space for arguments) or 294fe6060f1SDimitry Andric /// negative (for a tail call to a function that needs more stack space than us 295fe6060f1SDimitry Andric /// for arguments). 296fe6060f1SDimitry Andric static int64_t getArgumentStackToRestore(MachineFunction &MF, 2975ffd83dbSDimitry Andric MachineBasicBlock &MBB) { 2985ffd83dbSDimitry Andric MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 2995ffd83dbSDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 3005f757f3fSDimitry Andric bool IsTailCallReturn = (MBB.end() != MBBI) 3015f757f3fSDimitry Andric ? AArch64InstrInfo::isTailCallReturnInst(*MBBI) 3025f757f3fSDimitry Andric : false; 3035ffd83dbSDimitry Andric 304fe6060f1SDimitry Andric int64_t ArgumentPopSize = 0; 3055ffd83dbSDimitry Andric if (IsTailCallReturn) { 3065ffd83dbSDimitry Andric MachineOperand &StackAdjust = MBBI->getOperand(1); 3075ffd83dbSDimitry Andric 3085ffd83dbSDimitry Andric // For a tail-call in a callee-pops-arguments environment, some or all of 3095ffd83dbSDimitry Andric // the stack may actually be in use for the call's arguments, this is 3105ffd83dbSDimitry Andric // calculated during LowerCall and consumed here... 3115ffd83dbSDimitry Andric ArgumentPopSize = StackAdjust.getImm(); 3125ffd83dbSDimitry Andric } else { 3135ffd83dbSDimitry Andric // ... otherwise the amount to pop is *all* of the argument space, 3145ffd83dbSDimitry Andric // conveniently stored in the MachineFunctionInfo by 3155ffd83dbSDimitry Andric // LowerFormalArguments. This will, of course, be zero for the C calling 3165ffd83dbSDimitry Andric // convention. 3175ffd83dbSDimitry Andric ArgumentPopSize = AFI->getArgumentStackToRestore(); 3185ffd83dbSDimitry Andric } 3195ffd83dbSDimitry Andric 3205ffd83dbSDimitry Andric return ArgumentPopSize; 3215ffd83dbSDimitry Andric } 3225ffd83dbSDimitry Andric 323fe6060f1SDimitry Andric static bool produceCompactUnwindFrame(MachineFunction &MF); 324fe6060f1SDimitry Andric static bool needsWinCFI(const MachineFunction &MF); 325fe6060f1SDimitry Andric static StackOffset getSVEStackSize(const MachineFunction &MF); 3260fca6ea1SDimitry Andric static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB); 327fe6060f1SDimitry Andric 328fe6060f1SDimitry Andric /// Returns true if a homogeneous prolog or epilog code can be emitted 329fe6060f1SDimitry Andric /// for the size optimization. If possible, a frame helper call is injected. 330fe6060f1SDimitry Andric /// When Exit block is given, this check is for epilog. 331fe6060f1SDimitry Andric bool AArch64FrameLowering::homogeneousPrologEpilog( 332fe6060f1SDimitry Andric MachineFunction &MF, MachineBasicBlock *Exit) const { 333fe6060f1SDimitry Andric if (!MF.getFunction().hasMinSize()) 334fe6060f1SDimitry Andric return false; 335fe6060f1SDimitry Andric if (!EnableHomogeneousPrologEpilog) 336fe6060f1SDimitry Andric return false; 337fe6060f1SDimitry Andric if (EnableRedZone) 338fe6060f1SDimitry Andric return false; 339fe6060f1SDimitry Andric 340fe6060f1SDimitry Andric // TODO: Window is supported yet. 341fe6060f1SDimitry Andric if (needsWinCFI(MF)) 342fe6060f1SDimitry Andric return false; 343fe6060f1SDimitry Andric // TODO: SVE is not supported yet. 344fe6060f1SDimitry Andric if (getSVEStackSize(MF)) 345fe6060f1SDimitry Andric return false; 346fe6060f1SDimitry Andric 347fe6060f1SDimitry Andric // Bail on stack adjustment needed on return for simplicity. 348fe6060f1SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 349fe6060f1SDimitry Andric const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 350fe6060f1SDimitry Andric if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)) 351fe6060f1SDimitry Andric return false; 352fe6060f1SDimitry Andric if (Exit && getArgumentStackToRestore(MF, *Exit)) 353fe6060f1SDimitry Andric return false; 354fe6060f1SDimitry Andric 3555f757f3fSDimitry Andric auto *AFI = MF.getInfo<AArch64FunctionInfo>(); 3560fca6ea1SDimitry Andric if (AFI->hasSwiftAsyncContext() || AFI->hasStreamingModeChanges()) 3575f757f3fSDimitry Andric return false; 3585f757f3fSDimitry Andric 3595f757f3fSDimitry Andric // If there are an odd number of GPRs before LR and FP in the CSRs list, 3605f757f3fSDimitry Andric // they will not be paired into one RegPairInfo, which is incompatible with 3615f757f3fSDimitry Andric // the assumption made by the homogeneous prolog epilog pass. 3625f757f3fSDimitry Andric const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); 3635f757f3fSDimitry Andric unsigned NumGPRs = 0; 3645f757f3fSDimitry Andric for (unsigned I = 0; CSRegs[I]; ++I) { 3655f757f3fSDimitry Andric Register Reg = CSRegs[I]; 3665f757f3fSDimitry Andric if (Reg == AArch64::LR) { 3675f757f3fSDimitry Andric assert(CSRegs[I + 1] == AArch64::FP); 3685f757f3fSDimitry Andric if (NumGPRs % 2 != 0) 3695f757f3fSDimitry Andric return false; 3705f757f3fSDimitry Andric break; 3715f757f3fSDimitry Andric } 3725f757f3fSDimitry Andric if (AArch64::GPR64RegClass.contains(Reg)) 3735f757f3fSDimitry Andric ++NumGPRs; 3745f757f3fSDimitry Andric } 3755f757f3fSDimitry Andric 376fe6060f1SDimitry Andric return true; 377fe6060f1SDimitry Andric } 378fe6060f1SDimitry Andric 379fe6060f1SDimitry Andric /// Returns true if CSRs should be paired. 380fe6060f1SDimitry Andric bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const { 381fe6060f1SDimitry Andric return produceCompactUnwindFrame(MF) || homogeneousPrologEpilog(MF); 382fe6060f1SDimitry Andric } 383fe6060f1SDimitry Andric 3840b57cec5SDimitry Andric /// This is the biggest offset to the stack pointer we can encode in aarch64 3850b57cec5SDimitry Andric /// instructions (without using a separate calculation and a temp register). 3860b57cec5SDimitry Andric /// Note that the exception here are vector stores/loads which cannot encode any 3870b57cec5SDimitry Andric /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()). 3880b57cec5SDimitry Andric static const unsigned DefaultSafeSPDisplacement = 255; 3890b57cec5SDimitry Andric 3900b57cec5SDimitry Andric /// Look at each instruction that references stack frames and return the stack 3910b57cec5SDimitry Andric /// size limit beyond which some of these instructions will require a scratch 3920b57cec5SDimitry Andric /// register during their expansion later. 3930b57cec5SDimitry Andric static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { 3940b57cec5SDimitry Andric // FIXME: For now, just conservatively guestimate based on unscaled indexing 3950b57cec5SDimitry Andric // range. We'll end up allocating an unnecessary spill slot a lot, but 3960b57cec5SDimitry Andric // realistically that's not a big deal at this stage of the game. 3970b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) { 3980b57cec5SDimitry Andric for (MachineInstr &MI : MBB) { 3990b57cec5SDimitry Andric if (MI.isDebugInstr() || MI.isPseudo() || 4000b57cec5SDimitry Andric MI.getOpcode() == AArch64::ADDXri || 4010b57cec5SDimitry Andric MI.getOpcode() == AArch64::ADDSXri) 4020b57cec5SDimitry Andric continue; 4030b57cec5SDimitry Andric 4040b57cec5SDimitry Andric for (const MachineOperand &MO : MI.operands()) { 4050b57cec5SDimitry Andric if (!MO.isFI()) 4060b57cec5SDimitry Andric continue; 4070b57cec5SDimitry Andric 4088bcb0991SDimitry Andric StackOffset Offset; 4090b57cec5SDimitry Andric if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) == 4100b57cec5SDimitry Andric AArch64FrameOffsetCannotUpdate) 4110b57cec5SDimitry Andric return 0; 4120b57cec5SDimitry Andric } 4130b57cec5SDimitry Andric } 4140b57cec5SDimitry Andric } 4150b57cec5SDimitry Andric return DefaultSafeSPDisplacement; 4160b57cec5SDimitry Andric } 4170b57cec5SDimitry Andric 418480093f4SDimitry Andric TargetStackID::Value 419480093f4SDimitry Andric AArch64FrameLowering::getStackIDForScalableVectors() const { 420e8d8bef9SDimitry Andric return TargetStackID::ScalableVector; 421480093f4SDimitry Andric } 422480093f4SDimitry Andric 42362cfcf62SDimitry Andric /// Returns the size of the fixed object area (allocated next to sp on entry) 42462cfcf62SDimitry Andric /// On Win64 this may include a var args area and an UnwindHelp object for EH. 42562cfcf62SDimitry Andric static unsigned getFixedObjectSize(const MachineFunction &MF, 42662cfcf62SDimitry Andric const AArch64FunctionInfo *AFI, bool IsWin64, 42762cfcf62SDimitry Andric bool IsFunclet) { 42862cfcf62SDimitry Andric if (!IsWin64 || IsFunclet) { 429fe6060f1SDimitry Andric return AFI->getTailCallReservedStack(); 43062cfcf62SDimitry Andric } else { 4310fca6ea1SDimitry Andric if (AFI->getTailCallReservedStack() != 0 && 4320fca6ea1SDimitry Andric !MF.getFunction().getAttributes().hasAttrSomewhere( 4330fca6ea1SDimitry Andric Attribute::SwiftAsync)) 434fe6060f1SDimitry Andric report_fatal_error("cannot generate ABI-changing tail call for Win64"); 43562cfcf62SDimitry Andric // Var args are stored here in the primary function. 43662cfcf62SDimitry Andric const unsigned VarArgsArea = AFI->getVarArgsGPRSize(); 43762cfcf62SDimitry Andric // To support EH funclets we allocate an UnwindHelp object 43862cfcf62SDimitry Andric const unsigned UnwindHelpObject = (MF.hasEHFunclets() ? 8 : 0); 4390fca6ea1SDimitry Andric return AFI->getTailCallReservedStack() + 4400fca6ea1SDimitry Andric alignTo(VarArgsArea + UnwindHelpObject, 16); 44162cfcf62SDimitry Andric } 44262cfcf62SDimitry Andric } 44362cfcf62SDimitry Andric 4448bcb0991SDimitry Andric /// Returns the size of the entire SVE stackframe (calleesaves + spills). 4458bcb0991SDimitry Andric static StackOffset getSVEStackSize(const MachineFunction &MF) { 4468bcb0991SDimitry Andric const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 447e8d8bef9SDimitry Andric return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE()); 4488bcb0991SDimitry Andric } 4498bcb0991SDimitry Andric 4500b57cec5SDimitry Andric bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { 4510b57cec5SDimitry Andric if (!EnableRedZone) 4520b57cec5SDimitry Andric return false; 453fe6060f1SDimitry Andric 4540b57cec5SDimitry Andric // Don't use the red zone if the function explicitly asks us not to. 4550b57cec5SDimitry Andric // This is typically used for kernel code. 456fe6060f1SDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 457fe6060f1SDimitry Andric const unsigned RedZoneSize = 458fe6060f1SDimitry Andric Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction()); 459fe6060f1SDimitry Andric if (!RedZoneSize) 4600b57cec5SDimitry Andric return false; 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 4630b57cec5SDimitry Andric const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 464480093f4SDimitry Andric uint64_t NumBytes = AFI->getLocalStackSize(); 4650b57cec5SDimitry Andric 4660fca6ea1SDimitry Andric // If neither NEON or SVE are available, a COPY from one Q-reg to 4670fca6ea1SDimitry Andric // another requires a spill -> reload sequence. We can do that 4680fca6ea1SDimitry Andric // using a pre-decrementing store/post-decrementing load, but 4690fca6ea1SDimitry Andric // if we do so, we can't use the Red Zone. 4700fca6ea1SDimitry Andric bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() && 4710fca6ea1SDimitry Andric !Subtarget.isNeonAvailable() && 4720fca6ea1SDimitry Andric !Subtarget.hasSVE(); 4730fca6ea1SDimitry Andric 474fe6060f1SDimitry Andric return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize || 4750fca6ea1SDimitry Andric getSVEStackSize(MF) || LowerQRegCopyThroughMem); 4760b57cec5SDimitry Andric } 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric /// hasFP - Return true if the specified function should have a dedicated frame 4790b57cec5SDimitry Andric /// pointer register. 4800b57cec5SDimitry Andric bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { 4810b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 4820b57cec5SDimitry Andric const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 4835f757f3fSDimitry Andric 4840b57cec5SDimitry Andric // Win64 EH requires a frame pointer if funclets are present, as the locals 4850b57cec5SDimitry Andric // are accessed off the frame pointer in both the parent function and the 4860b57cec5SDimitry Andric // funclets. 4870b57cec5SDimitry Andric if (MF.hasEHFunclets()) 4880b57cec5SDimitry Andric return true; 4890b57cec5SDimitry Andric // Retain behavior of always omitting the FP for leaf functions when possible. 490480093f4SDimitry Andric if (MF.getTarget().Options.DisableFramePointerElim(MF)) 4910b57cec5SDimitry Andric return true; 4920b57cec5SDimitry Andric if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || 4930b57cec5SDimitry Andric MFI.hasStackMap() || MFI.hasPatchPoint() || 494fe6060f1SDimitry Andric RegInfo->hasStackRealignment(MF)) 4950b57cec5SDimitry Andric return true; 4960b57cec5SDimitry Andric // With large callframes around we may need to use FP to access the scavenging 4970b57cec5SDimitry Andric // emergency spillslot. 4980b57cec5SDimitry Andric // 4990b57cec5SDimitry Andric // Unfortunately some calls to hasFP() like machine verifier -> 5000b57cec5SDimitry Andric // getReservedReg() -> hasFP in the middle of global isel are too early 5010b57cec5SDimitry Andric // to know the max call frame size. Hopefully conservatively returning "true" 5020b57cec5SDimitry Andric // in those cases is fine. 5030b57cec5SDimitry Andric // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs. 5040b57cec5SDimitry Andric if (!MFI.isMaxCallFrameSizeComputed() || 5050b57cec5SDimitry Andric MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement) 5060b57cec5SDimitry Andric return true; 5070b57cec5SDimitry Andric 5080b57cec5SDimitry Andric return false; 5090b57cec5SDimitry Andric } 5100b57cec5SDimitry Andric 5110b57cec5SDimitry Andric /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is 5120b57cec5SDimitry Andric /// not required, we reserve argument space for call sites in the function 5130b57cec5SDimitry Andric /// immediately on entry to the current function. This eliminates the need for 5140b57cec5SDimitry Andric /// add/sub sp brackets around call sites. Returns true if the call frame is 5150b57cec5SDimitry Andric /// included as part of the stack frame. 5160fca6ea1SDimitry Andric bool AArch64FrameLowering::hasReservedCallFrame( 5170fca6ea1SDimitry Andric const MachineFunction &MF) const { 5185f757f3fSDimitry Andric // The stack probing code for the dynamically allocated outgoing arguments 5195f757f3fSDimitry Andric // area assumes that the stack is probed at the top - either by the prologue 5205f757f3fSDimitry Andric // code, which issues a probe if `hasVarSizedObjects` return true, or by the 5215f757f3fSDimitry Andric // most recent variable-sized object allocation. Changing the condition here 5225f757f3fSDimitry Andric // may need to be followed up by changes to the probe issuing logic. 5230b57cec5SDimitry Andric return !MF.getFrameInfo().hasVarSizedObjects(); 5240b57cec5SDimitry Andric } 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( 5270b57cec5SDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB, 5280b57cec5SDimitry Andric MachineBasicBlock::iterator I) const { 5290b57cec5SDimitry Andric const AArch64InstrInfo *TII = 5300b57cec5SDimitry Andric static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 5315f757f3fSDimitry Andric const AArch64TargetLowering *TLI = 5325f757f3fSDimitry Andric MF.getSubtarget<AArch64Subtarget>().getTargetLowering(); 5335f757f3fSDimitry Andric [[maybe_unused]] MachineFrameInfo &MFI = MF.getFrameInfo(); 5340b57cec5SDimitry Andric DebugLoc DL = I->getDebugLoc(); 5350b57cec5SDimitry Andric unsigned Opc = I->getOpcode(); 5360b57cec5SDimitry Andric bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); 5370b57cec5SDimitry Andric uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; 5380b57cec5SDimitry Andric 5390b57cec5SDimitry Andric if (!hasReservedCallFrame(MF)) { 5400b57cec5SDimitry Andric int64_t Amount = I->getOperand(0).getImm(); 5415ffd83dbSDimitry Andric Amount = alignTo(Amount, getStackAlign()); 5420b57cec5SDimitry Andric if (!IsDestroy) 5430b57cec5SDimitry Andric Amount = -Amount; 5440b57cec5SDimitry Andric 5450b57cec5SDimitry Andric // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it 5460b57cec5SDimitry Andric // doesn't have to pop anything), then the first operand will be zero too so 5470b57cec5SDimitry Andric // this adjustment is a no-op. 5480b57cec5SDimitry Andric if (CalleePopAmount == 0) { 5490b57cec5SDimitry Andric // FIXME: in-function stack adjustment for calls is limited to 24-bits 5500b57cec5SDimitry Andric // because there's no guaranteed temporary register available. 5510b57cec5SDimitry Andric // 5520b57cec5SDimitry Andric // ADD/SUB (immediate) has only LSL #0 and LSL #12 available. 5530b57cec5SDimitry Andric // 1) For offset <= 12-bit, we use LSL #0 5540b57cec5SDimitry Andric // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses 5550b57cec5SDimitry Andric // LSL #0, and the other uses LSL #12. 5560b57cec5SDimitry Andric // 5570b57cec5SDimitry Andric // Most call frames will be allocated at the start of a function so 5580b57cec5SDimitry Andric // this is OK, but it is a limitation that needs dealing with. 5590b57cec5SDimitry Andric assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); 5605f757f3fSDimitry Andric 5615f757f3fSDimitry Andric if (TLI->hasInlineStackProbe(MF) && 5625f757f3fSDimitry Andric -Amount >= AArch64::StackProbeMaxUnprobedStack) { 5635f757f3fSDimitry Andric // When stack probing is enabled, the decrement of SP may need to be 5645f757f3fSDimitry Andric // probed. We only need to do this if the call site needs 1024 bytes of 5655f757f3fSDimitry Andric // space or more, because a region smaller than that is allowed to be 5665f757f3fSDimitry Andric // unprobed at an ABI boundary. We rely on the fact that SP has been 5675f757f3fSDimitry Andric // probed exactly at this point, either by the prologue or most recent 5685f757f3fSDimitry Andric // dynamic allocation. 5695f757f3fSDimitry Andric assert(MFI.hasVarSizedObjects() && 5705f757f3fSDimitry Andric "non-reserved call frame without var sized objects?"); 5715f757f3fSDimitry Andric Register ScratchReg = 5725f757f3fSDimitry Andric MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); 5735f757f3fSDimitry Andric inlineStackProbeFixed(I, ScratchReg, -Amount, StackOffset::get(0, 0)); 5745f757f3fSDimitry Andric } else { 575e8d8bef9SDimitry Andric emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, 576e8d8bef9SDimitry Andric StackOffset::getFixed(Amount), TII); 5770b57cec5SDimitry Andric } 5785f757f3fSDimitry Andric } 5790b57cec5SDimitry Andric } else if (CalleePopAmount != 0) { 5800b57cec5SDimitry Andric // If the calling convention demands that the callee pops arguments from the 5810b57cec5SDimitry Andric // stack, we want to add it back if we have a reserved call frame. 5820b57cec5SDimitry Andric assert(CalleePopAmount < 0xffffff && "call frame too large"); 5838bcb0991SDimitry Andric emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, 584e8d8bef9SDimitry Andric StackOffset::getFixed(-(int64_t)CalleePopAmount), TII); 5850b57cec5SDimitry Andric } 5860b57cec5SDimitry Andric return MBB.erase(I); 5870b57cec5SDimitry Andric } 5880b57cec5SDimitry Andric 58981ad6265SDimitry Andric void AArch64FrameLowering::emitCalleeSavedGPRLocations( 5900b57cec5SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 5910b57cec5SDimitry Andric MachineFunction &MF = *MBB.getParent(); 5920b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 5930fca6ea1SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 5940fca6ea1SDimitry Andric SMEAttrs Attrs(MF.getFunction()); 5950fca6ea1SDimitry Andric bool LocallyStreaming = 5960fca6ea1SDimitry Andric Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface(); 59781ad6265SDimitry Andric 59881ad6265SDimitry Andric const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 59981ad6265SDimitry Andric if (CSI.empty()) 60081ad6265SDimitry Andric return; 60181ad6265SDimitry Andric 6020b57cec5SDimitry Andric const TargetSubtargetInfo &STI = MF.getSubtarget(); 60381ad6265SDimitry Andric const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 60481ad6265SDimitry Andric const TargetInstrInfo &TII = *STI.getInstrInfo(); 6050b57cec5SDimitry Andric DebugLoc DL = MBB.findDebugLoc(MBBI); 6060b57cec5SDimitry Andric 60781ad6265SDimitry Andric for (const auto &Info : CSI) { 6080fca6ea1SDimitry Andric unsigned FrameIdx = Info.getFrameIdx(); 6090fca6ea1SDimitry Andric if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) 61081ad6265SDimitry Andric continue; 61181ad6265SDimitry Andric 61281ad6265SDimitry Andric assert(!Info.isSpilledToReg() && "Spilling to registers not implemented"); 6130fca6ea1SDimitry Andric int64_t DwarfReg = TRI.getDwarfRegNum(Info.getReg(), true); 6140fca6ea1SDimitry Andric int64_t Offset = MFI.getObjectOffset(FrameIdx) - getOffsetOfLocalArea(); 61581ad6265SDimitry Andric 6160fca6ea1SDimitry Andric // The location of VG will be emitted before each streaming-mode change in 6170fca6ea1SDimitry Andric // the function. Only locally-streaming functions require emitting the 6180fca6ea1SDimitry Andric // non-streaming VG location here. 6190fca6ea1SDimitry Andric if ((LocallyStreaming && FrameIdx == AFI->getStreamingVGIdx()) || 6200fca6ea1SDimitry Andric (!LocallyStreaming && 6210fca6ea1SDimitry Andric DwarfReg == TRI.getDwarfRegNum(AArch64::VG, true))) 6220fca6ea1SDimitry Andric continue; 6230fca6ea1SDimitry Andric 62481ad6265SDimitry Andric unsigned CFIIndex = MF.addFrameInst( 62581ad6265SDimitry Andric MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); 62681ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 62781ad6265SDimitry Andric .addCFIIndex(CFIIndex) 62881ad6265SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 62981ad6265SDimitry Andric } 63081ad6265SDimitry Andric } 63181ad6265SDimitry Andric 63281ad6265SDimitry Andric void AArch64FrameLowering::emitCalleeSavedSVELocations( 63381ad6265SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 63481ad6265SDimitry Andric MachineFunction &MF = *MBB.getParent(); 63581ad6265SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 63681ad6265SDimitry Andric 6370b57cec5SDimitry Andric // Add callee saved registers to move list. 6380b57cec5SDimitry Andric const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 6390b57cec5SDimitry Andric if (CSI.empty()) 6400b57cec5SDimitry Andric return; 6410b57cec5SDimitry Andric 64281ad6265SDimitry Andric const TargetSubtargetInfo &STI = MF.getSubtarget(); 64381ad6265SDimitry Andric const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 64481ad6265SDimitry Andric const TargetInstrInfo &TII = *STI.getInstrInfo(); 64581ad6265SDimitry Andric DebugLoc DL = MBB.findDebugLoc(MBBI); 64681ad6265SDimitry Andric AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); 64781ad6265SDimitry Andric 6480b57cec5SDimitry Andric for (const auto &Info : CSI) { 64981ad6265SDimitry Andric if (!(MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector)) 65081ad6265SDimitry Andric continue; 65175b4d546SDimitry Andric 65275b4d546SDimitry Andric // Not all unwinders may know about SVE registers, so assume the lowest 65375b4d546SDimitry Andric // common demoninator. 65481ad6265SDimitry Andric assert(!Info.isSpilledToReg() && "Spilling to registers not implemented"); 65581ad6265SDimitry Andric unsigned Reg = Info.getReg(); 65681ad6265SDimitry Andric if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg)) 65775b4d546SDimitry Andric continue; 65875b4d546SDimitry Andric 65981ad6265SDimitry Andric StackOffset Offset = 660e8d8bef9SDimitry Andric StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) - 66181ad6265SDimitry Andric StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI)); 66281ad6265SDimitry Andric 66381ad6265SDimitry Andric unsigned CFIIndex = MF.addFrameInst(createCFAOffset(TRI, Reg, Offset)); 66481ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 6650b57cec5SDimitry Andric .addCFIIndex(CFIIndex) 6660b57cec5SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 6670b57cec5SDimitry Andric } 6680b57cec5SDimitry Andric } 6690b57cec5SDimitry Andric 67081ad6265SDimitry Andric static void insertCFISameValue(const MCInstrDesc &Desc, MachineFunction &MF, 67181ad6265SDimitry Andric MachineBasicBlock &MBB, 67281ad6265SDimitry Andric MachineBasicBlock::iterator InsertPt, 67381ad6265SDimitry Andric unsigned DwarfReg) { 67481ad6265SDimitry Andric unsigned CFIIndex = 67581ad6265SDimitry Andric MF.addFrameInst(MCCFIInstruction::createSameValue(nullptr, DwarfReg)); 67681ad6265SDimitry Andric BuildMI(MBB, InsertPt, DebugLoc(), Desc).addCFIIndex(CFIIndex); 67781ad6265SDimitry Andric } 67881ad6265SDimitry Andric 67981ad6265SDimitry Andric void AArch64FrameLowering::resetCFIToInitialState( 68081ad6265SDimitry Andric MachineBasicBlock &MBB) const { 68181ad6265SDimitry Andric 68281ad6265SDimitry Andric MachineFunction &MF = *MBB.getParent(); 68381ad6265SDimitry Andric const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 68481ad6265SDimitry Andric const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 68581ad6265SDimitry Andric const auto &TRI = 68681ad6265SDimitry Andric static_cast<const AArch64RegisterInfo &>(*Subtarget.getRegisterInfo()); 68781ad6265SDimitry Andric const auto &MFI = *MF.getInfo<AArch64FunctionInfo>(); 68881ad6265SDimitry Andric 68981ad6265SDimitry Andric const MCInstrDesc &CFIDesc = TII.get(TargetOpcode::CFI_INSTRUCTION); 69081ad6265SDimitry Andric DebugLoc DL; 69181ad6265SDimitry Andric 69281ad6265SDimitry Andric // Reset the CFA to `SP + 0`. 69381ad6265SDimitry Andric MachineBasicBlock::iterator InsertPt = MBB.begin(); 69481ad6265SDimitry Andric unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( 69581ad6265SDimitry Andric nullptr, TRI.getDwarfRegNum(AArch64::SP, true), 0)); 69681ad6265SDimitry Andric BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex); 69781ad6265SDimitry Andric 69881ad6265SDimitry Andric // Flip the RA sign state. 699bdd1243dSDimitry Andric if (MFI.shouldSignReturnAddress(MF)) { 70081ad6265SDimitry Andric CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); 70181ad6265SDimitry Andric BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex); 70281ad6265SDimitry Andric } 70381ad6265SDimitry Andric 70481ad6265SDimitry Andric // Shadow call stack uses X18, reset it. 7055f757f3fSDimitry Andric if (MFI.needsShadowCallStackPrologueEpilogue(MF)) 70681ad6265SDimitry Andric insertCFISameValue(CFIDesc, MF, MBB, InsertPt, 70781ad6265SDimitry Andric TRI.getDwarfRegNum(AArch64::X18, true)); 70881ad6265SDimitry Andric 70981ad6265SDimitry Andric // Emit .cfi_same_value for callee-saved registers. 71081ad6265SDimitry Andric const std::vector<CalleeSavedInfo> &CSI = 71181ad6265SDimitry Andric MF.getFrameInfo().getCalleeSavedInfo(); 71281ad6265SDimitry Andric for (const auto &Info : CSI) { 71381ad6265SDimitry Andric unsigned Reg = Info.getReg(); 71481ad6265SDimitry Andric if (!TRI.regNeedsCFI(Reg, Reg)) 71581ad6265SDimitry Andric continue; 71681ad6265SDimitry Andric insertCFISameValue(CFIDesc, MF, MBB, InsertPt, 71781ad6265SDimitry Andric TRI.getDwarfRegNum(Reg, true)); 71881ad6265SDimitry Andric } 71981ad6265SDimitry Andric } 72081ad6265SDimitry Andric 72181ad6265SDimitry Andric static void emitCalleeSavedRestores(MachineBasicBlock &MBB, 72281ad6265SDimitry Andric MachineBasicBlock::iterator MBBI, 72381ad6265SDimitry Andric bool SVE) { 72481ad6265SDimitry Andric MachineFunction &MF = *MBB.getParent(); 72581ad6265SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 72681ad6265SDimitry Andric 72781ad6265SDimitry Andric const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 72881ad6265SDimitry Andric if (CSI.empty()) 72981ad6265SDimitry Andric return; 73081ad6265SDimitry Andric 73181ad6265SDimitry Andric const TargetSubtargetInfo &STI = MF.getSubtarget(); 73281ad6265SDimitry Andric const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 73381ad6265SDimitry Andric const TargetInstrInfo &TII = *STI.getInstrInfo(); 73481ad6265SDimitry Andric DebugLoc DL = MBB.findDebugLoc(MBBI); 73581ad6265SDimitry Andric 73681ad6265SDimitry Andric for (const auto &Info : CSI) { 73781ad6265SDimitry Andric if (SVE != 73881ad6265SDimitry Andric (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector)) 73981ad6265SDimitry Andric continue; 74081ad6265SDimitry Andric 74181ad6265SDimitry Andric unsigned Reg = Info.getReg(); 74281ad6265SDimitry Andric if (SVE && 74381ad6265SDimitry Andric !static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg)) 74481ad6265SDimitry Andric continue; 74581ad6265SDimitry Andric 7460fca6ea1SDimitry Andric if (!Info.isRestored()) 7470fca6ea1SDimitry Andric continue; 7480fca6ea1SDimitry Andric 74981ad6265SDimitry Andric unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore( 75081ad6265SDimitry Andric nullptr, TRI.getDwarfRegNum(Info.getReg(), true))); 75181ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 75281ad6265SDimitry Andric .addCFIIndex(CFIIndex) 75381ad6265SDimitry Andric .setMIFlags(MachineInstr::FrameDestroy); 75481ad6265SDimitry Andric } 75581ad6265SDimitry Andric } 75681ad6265SDimitry Andric 75781ad6265SDimitry Andric void AArch64FrameLowering::emitCalleeSavedGPRRestores( 75881ad6265SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 75981ad6265SDimitry Andric emitCalleeSavedRestores(MBB, MBBI, false); 76081ad6265SDimitry Andric } 76181ad6265SDimitry Andric 76281ad6265SDimitry Andric void AArch64FrameLowering::emitCalleeSavedSVERestores( 76381ad6265SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 76481ad6265SDimitry Andric emitCalleeSavedRestores(MBB, MBBI, true); 76581ad6265SDimitry Andric } 76681ad6265SDimitry Andric 7675f757f3fSDimitry Andric // Return the maximum possible number of bytes for `Size` due to the 7685f757f3fSDimitry Andric // architectural limit on the size of a SVE register. 7695f757f3fSDimitry Andric static int64_t upperBound(StackOffset Size) { 7705f757f3fSDimitry Andric static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16; 7715f757f3fSDimitry Andric return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed(); 7725f757f3fSDimitry Andric } 7735f757f3fSDimitry Andric 7745f757f3fSDimitry Andric void AArch64FrameLowering::allocateStackSpace( 7755f757f3fSDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 7765f757f3fSDimitry Andric int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI, 7775f757f3fSDimitry Andric bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset, 7785f757f3fSDimitry Andric bool FollowupAllocs) const { 7795f757f3fSDimitry Andric 7805f757f3fSDimitry Andric if (!AllocSize) 7815f757f3fSDimitry Andric return; 7825f757f3fSDimitry Andric 7835f757f3fSDimitry Andric DebugLoc DL; 7845f757f3fSDimitry Andric MachineFunction &MF = *MBB.getParent(); 7855f757f3fSDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 7865f757f3fSDimitry Andric const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 7875f757f3fSDimitry Andric AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); 7885f757f3fSDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 7895f757f3fSDimitry Andric 7905f757f3fSDimitry Andric const int64_t MaxAlign = MFI.getMaxAlign().value(); 7915f757f3fSDimitry Andric const uint64_t AndMask = ~(MaxAlign - 1); 7925f757f3fSDimitry Andric 7935f757f3fSDimitry Andric if (!Subtarget.getTargetLowering()->hasInlineStackProbe(MF)) { 7945f757f3fSDimitry Andric Register TargetReg = RealignmentPadding 7955f757f3fSDimitry Andric ? findScratchNonCalleeSaveRegister(&MBB) 7965f757f3fSDimitry Andric : AArch64::SP; 7975f757f3fSDimitry Andric // SUB Xd/SP, SP, AllocSize 7985f757f3fSDimitry Andric emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII, 7995f757f3fSDimitry Andric MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI, 8005f757f3fSDimitry Andric EmitCFI, InitialOffset); 8015f757f3fSDimitry Andric 8025f757f3fSDimitry Andric if (RealignmentPadding) { 8035f757f3fSDimitry Andric // AND SP, X9, 0b11111...0000 8045f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP) 8055f757f3fSDimitry Andric .addReg(TargetReg, RegState::Kill) 8065f757f3fSDimitry Andric .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)) 8075f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 8085f757f3fSDimitry Andric AFI.setStackRealigned(true); 8095f757f3fSDimitry Andric 8105f757f3fSDimitry Andric // No need for SEH instructions here; if we're realigning the stack, 8115f757f3fSDimitry Andric // we've set a frame pointer and already finished the SEH prologue. 8125f757f3fSDimitry Andric assert(!NeedsWinCFI); 8135f757f3fSDimitry Andric } 8145f757f3fSDimitry Andric return; 8155f757f3fSDimitry Andric } 8165f757f3fSDimitry Andric 8175f757f3fSDimitry Andric // 8185f757f3fSDimitry Andric // Stack probing allocation. 8195f757f3fSDimitry Andric // 8205f757f3fSDimitry Andric 8215f757f3fSDimitry Andric // Fixed length allocation. If we don't need to re-align the stack and don't 8225f757f3fSDimitry Andric // have SVE objects, we can use a more efficient sequence for stack probing. 8235f757f3fSDimitry Andric if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) { 8245f757f3fSDimitry Andric Register ScratchReg = findScratchNonCalleeSaveRegister(&MBB); 8255f757f3fSDimitry Andric assert(ScratchReg != AArch64::NoRegister); 8265f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC)) 8275f757f3fSDimitry Andric .addDef(ScratchReg) 8285f757f3fSDimitry Andric .addImm(AllocSize.getFixed()) 8295f757f3fSDimitry Andric .addImm(InitialOffset.getFixed()) 8305f757f3fSDimitry Andric .addImm(InitialOffset.getScalable()); 8315f757f3fSDimitry Andric // The fixed allocation may leave unprobed bytes at the top of the 8325f757f3fSDimitry Andric // stack. If we have subsequent alocation (e.g. if we have variable-sized 8335f757f3fSDimitry Andric // objects), we need to issue an extra probe, so these allocations start in 8345f757f3fSDimitry Andric // a known state. 8355f757f3fSDimitry Andric if (FollowupAllocs) { 8365f757f3fSDimitry Andric // STR XZR, [SP] 8375f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui)) 8385f757f3fSDimitry Andric .addReg(AArch64::XZR) 8395f757f3fSDimitry Andric .addReg(AArch64::SP) 8405f757f3fSDimitry Andric .addImm(0) 8415f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 8425f757f3fSDimitry Andric } 8435f757f3fSDimitry Andric 8445f757f3fSDimitry Andric return; 8455f757f3fSDimitry Andric } 8465f757f3fSDimitry Andric 8475f757f3fSDimitry Andric // Variable length allocation. 8485f757f3fSDimitry Andric 8495f757f3fSDimitry Andric // If the (unknown) allocation size cannot exceed the probe size, decrement 8505f757f3fSDimitry Andric // the stack pointer right away. 8515f757f3fSDimitry Andric int64_t ProbeSize = AFI.getStackProbeSize(); 8525f757f3fSDimitry Andric if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) { 8535f757f3fSDimitry Andric Register ScratchReg = RealignmentPadding 8545f757f3fSDimitry Andric ? findScratchNonCalleeSaveRegister(&MBB) 8555f757f3fSDimitry Andric : AArch64::SP; 8565f757f3fSDimitry Andric assert(ScratchReg != AArch64::NoRegister); 8575f757f3fSDimitry Andric // SUB Xd, SP, AllocSize 8585f757f3fSDimitry Andric emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, &TII, 8595f757f3fSDimitry Andric MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI, 8605f757f3fSDimitry Andric EmitCFI, InitialOffset); 8615f757f3fSDimitry Andric if (RealignmentPadding) { 8625f757f3fSDimitry Andric // AND SP, Xn, 0b11111...0000 8635f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP) 8645f757f3fSDimitry Andric .addReg(ScratchReg, RegState::Kill) 8655f757f3fSDimitry Andric .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)) 8665f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 8675f757f3fSDimitry Andric AFI.setStackRealigned(true); 8685f757f3fSDimitry Andric } 8695f757f3fSDimitry Andric if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding > 8705f757f3fSDimitry Andric AArch64::StackProbeMaxUnprobedStack) { 8715f757f3fSDimitry Andric // STR XZR, [SP] 8725f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui)) 8735f757f3fSDimitry Andric .addReg(AArch64::XZR) 8745f757f3fSDimitry Andric .addReg(AArch64::SP) 8755f757f3fSDimitry Andric .addImm(0) 8765f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 8775f757f3fSDimitry Andric } 8785f757f3fSDimitry Andric return; 8795f757f3fSDimitry Andric } 8805f757f3fSDimitry Andric 8815f757f3fSDimitry Andric // Emit a variable-length allocation probing loop. 8825f757f3fSDimitry Andric // TODO: As an optimisation, the loop can be "unrolled" into a few parts, 8835f757f3fSDimitry Andric // each of them guaranteed to adjust the stack by less than the probe size. 8845f757f3fSDimitry Andric Register TargetReg = findScratchNonCalleeSaveRegister(&MBB); 8855f757f3fSDimitry Andric assert(TargetReg != AArch64::NoRegister); 8865f757f3fSDimitry Andric // SUB Xd, SP, AllocSize 8875f757f3fSDimitry Andric emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII, 8885f757f3fSDimitry Andric MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI, 8895f757f3fSDimitry Andric EmitCFI, InitialOffset); 8905f757f3fSDimitry Andric if (RealignmentPadding) { 8915f757f3fSDimitry Andric // AND Xn, Xn, 0b11111...0000 8925f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), TargetReg) 8935f757f3fSDimitry Andric .addReg(TargetReg, RegState::Kill) 8945f757f3fSDimitry Andric .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)) 8955f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 8965f757f3fSDimitry Andric } 8975f757f3fSDimitry Andric 8985f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC_VAR)) 8995f757f3fSDimitry Andric .addReg(TargetReg); 9005f757f3fSDimitry Andric if (EmitCFI) { 9015f757f3fSDimitry Andric // Set the CFA register back to SP. 9025f757f3fSDimitry Andric unsigned Reg = 9035f757f3fSDimitry Andric Subtarget.getRegisterInfo()->getDwarfRegNum(AArch64::SP, true); 9045f757f3fSDimitry Andric unsigned CFIIndex = 9055f757f3fSDimitry Andric MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 9065f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 9075f757f3fSDimitry Andric .addCFIIndex(CFIIndex) 9085f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 9095f757f3fSDimitry Andric } 9105f757f3fSDimitry Andric if (RealignmentPadding) 9115f757f3fSDimitry Andric AFI.setStackRealigned(true); 9125f757f3fSDimitry Andric } 9135f757f3fSDimitry Andric 91481ad6265SDimitry Andric static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) { 91581ad6265SDimitry Andric switch (Reg.id()) { 91681ad6265SDimitry Andric default: 91781ad6265SDimitry Andric // The called routine is expected to preserve r19-r28 91881ad6265SDimitry Andric // r29 and r30 are used as frame pointer and link register resp. 91981ad6265SDimitry Andric return 0; 92081ad6265SDimitry Andric 92181ad6265SDimitry Andric // GPRs 92281ad6265SDimitry Andric #define CASE(n) \ 92381ad6265SDimitry Andric case AArch64::W##n: \ 92481ad6265SDimitry Andric case AArch64::X##n: \ 92581ad6265SDimitry Andric return AArch64::X##n 92681ad6265SDimitry Andric CASE(0); 92781ad6265SDimitry Andric CASE(1); 92881ad6265SDimitry Andric CASE(2); 92981ad6265SDimitry Andric CASE(3); 93081ad6265SDimitry Andric CASE(4); 93181ad6265SDimitry Andric CASE(5); 93281ad6265SDimitry Andric CASE(6); 93381ad6265SDimitry Andric CASE(7); 93481ad6265SDimitry Andric CASE(8); 93581ad6265SDimitry Andric CASE(9); 93681ad6265SDimitry Andric CASE(10); 93781ad6265SDimitry Andric CASE(11); 93881ad6265SDimitry Andric CASE(12); 93981ad6265SDimitry Andric CASE(13); 94081ad6265SDimitry Andric CASE(14); 94181ad6265SDimitry Andric CASE(15); 94281ad6265SDimitry Andric CASE(16); 94381ad6265SDimitry Andric CASE(17); 94481ad6265SDimitry Andric CASE(18); 94581ad6265SDimitry Andric #undef CASE 94681ad6265SDimitry Andric 94781ad6265SDimitry Andric // FPRs 94881ad6265SDimitry Andric #define CASE(n) \ 94981ad6265SDimitry Andric case AArch64::B##n: \ 95081ad6265SDimitry Andric case AArch64::H##n: \ 95181ad6265SDimitry Andric case AArch64::S##n: \ 95281ad6265SDimitry Andric case AArch64::D##n: \ 95381ad6265SDimitry Andric case AArch64::Q##n: \ 95481ad6265SDimitry Andric return HasSVE ? AArch64::Z##n : AArch64::Q##n 95581ad6265SDimitry Andric CASE(0); 95681ad6265SDimitry Andric CASE(1); 95781ad6265SDimitry Andric CASE(2); 95881ad6265SDimitry Andric CASE(3); 95981ad6265SDimitry Andric CASE(4); 96081ad6265SDimitry Andric CASE(5); 96181ad6265SDimitry Andric CASE(6); 96281ad6265SDimitry Andric CASE(7); 96381ad6265SDimitry Andric CASE(8); 96481ad6265SDimitry Andric CASE(9); 96581ad6265SDimitry Andric CASE(10); 96681ad6265SDimitry Andric CASE(11); 96781ad6265SDimitry Andric CASE(12); 96881ad6265SDimitry Andric CASE(13); 96981ad6265SDimitry Andric CASE(14); 97081ad6265SDimitry Andric CASE(15); 97181ad6265SDimitry Andric CASE(16); 97281ad6265SDimitry Andric CASE(17); 97381ad6265SDimitry Andric CASE(18); 97481ad6265SDimitry Andric CASE(19); 97581ad6265SDimitry Andric CASE(20); 97681ad6265SDimitry Andric CASE(21); 97781ad6265SDimitry Andric CASE(22); 97881ad6265SDimitry Andric CASE(23); 97981ad6265SDimitry Andric CASE(24); 98081ad6265SDimitry Andric CASE(25); 98181ad6265SDimitry Andric CASE(26); 98281ad6265SDimitry Andric CASE(27); 98381ad6265SDimitry Andric CASE(28); 98481ad6265SDimitry Andric CASE(29); 98581ad6265SDimitry Andric CASE(30); 98681ad6265SDimitry Andric CASE(31); 98781ad6265SDimitry Andric #undef CASE 98881ad6265SDimitry Andric } 98981ad6265SDimitry Andric } 99081ad6265SDimitry Andric 99181ad6265SDimitry Andric void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, 99281ad6265SDimitry Andric MachineBasicBlock &MBB) const { 99381ad6265SDimitry Andric // Insertion point. 99481ad6265SDimitry Andric MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 99581ad6265SDimitry Andric 99681ad6265SDimitry Andric // Fake a debug loc. 99781ad6265SDimitry Andric DebugLoc DL; 99881ad6265SDimitry Andric if (MBBI != MBB.end()) 99981ad6265SDimitry Andric DL = MBBI->getDebugLoc(); 100081ad6265SDimitry Andric 100181ad6265SDimitry Andric const MachineFunction &MF = *MBB.getParent(); 100281ad6265SDimitry Andric const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>(); 100381ad6265SDimitry Andric const AArch64RegisterInfo &TRI = *STI.getRegisterInfo(); 100481ad6265SDimitry Andric 100581ad6265SDimitry Andric BitVector GPRsToZero(TRI.getNumRegs()); 100681ad6265SDimitry Andric BitVector FPRsToZero(TRI.getNumRegs()); 100781ad6265SDimitry Andric bool HasSVE = STI.hasSVE(); 100881ad6265SDimitry Andric for (MCRegister Reg : RegsToZero.set_bits()) { 100981ad6265SDimitry Andric if (TRI.isGeneralPurposeRegister(MF, Reg)) { 101081ad6265SDimitry Andric // For GPRs, we only care to clear out the 64-bit register. 101181ad6265SDimitry Andric if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE)) 101281ad6265SDimitry Andric GPRsToZero.set(XReg); 10130fca6ea1SDimitry Andric } else if (AArch64InstrInfo::isFpOrNEON(Reg)) { 101481ad6265SDimitry Andric // For FPRs, 101581ad6265SDimitry Andric if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE)) 101681ad6265SDimitry Andric FPRsToZero.set(XReg); 101781ad6265SDimitry Andric } 101881ad6265SDimitry Andric } 101981ad6265SDimitry Andric 102081ad6265SDimitry Andric const AArch64InstrInfo &TII = *STI.getInstrInfo(); 102181ad6265SDimitry Andric 102281ad6265SDimitry Andric // Zero out GPRs. 102381ad6265SDimitry Andric for (MCRegister Reg : GPRsToZero.set_bits()) 10245f757f3fSDimitry Andric TII.buildClearRegister(Reg, MBB, MBBI, DL); 102581ad6265SDimitry Andric 102681ad6265SDimitry Andric // Zero out FP/vector registers. 102781ad6265SDimitry Andric for (MCRegister Reg : FPRsToZero.set_bits()) 10285f757f3fSDimitry Andric TII.buildClearRegister(Reg, MBB, MBBI, DL); 102981ad6265SDimitry Andric 103081ad6265SDimitry Andric if (HasSVE) { 103181ad6265SDimitry Andric for (MCRegister PReg : 103281ad6265SDimitry Andric {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4, 103381ad6265SDimitry Andric AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9, 103481ad6265SDimitry Andric AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14, 103581ad6265SDimitry Andric AArch64::P15}) { 103681ad6265SDimitry Andric if (RegsToZero[PReg]) 103781ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::PFALSE), PReg); 103881ad6265SDimitry Andric } 103981ad6265SDimitry Andric } 104081ad6265SDimitry Andric } 104181ad6265SDimitry Andric 10425f757f3fSDimitry Andric static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs, 10435f757f3fSDimitry Andric const MachineBasicBlock &MBB) { 10445f757f3fSDimitry Andric const MachineFunction *MF = MBB.getParent(); 10455f757f3fSDimitry Andric LiveRegs.addLiveIns(MBB); 10465f757f3fSDimitry Andric // Mark callee saved registers as used so we will not choose them. 10475f757f3fSDimitry Andric const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs(); 10485f757f3fSDimitry Andric for (unsigned i = 0; CSRegs[i]; ++i) 10495f757f3fSDimitry Andric LiveRegs.addReg(CSRegs[i]); 10505f757f3fSDimitry Andric } 10515f757f3fSDimitry Andric 10520b57cec5SDimitry Andric // Find a scratch register that we can use at the start of the prologue to 10530b57cec5SDimitry Andric // re-align the stack pointer. We avoid using callee-save registers since they 10540b57cec5SDimitry Andric // may appear to be free when this is called from canUseAsPrologue (during 10550b57cec5SDimitry Andric // shrink wrapping), but then no longer be free when this is called from 10560b57cec5SDimitry Andric // emitPrologue. 10570b57cec5SDimitry Andric // 10580b57cec5SDimitry Andric // FIXME: This is a bit conservative, since in the above case we could use one 10590b57cec5SDimitry Andric // of the callee-save registers as a scratch temp to re-align the stack pointer, 10600b57cec5SDimitry Andric // but we would then have to make sure that we were in fact saving at least one 10610b57cec5SDimitry Andric // callee-save register in the prologue, which is additional complexity that 10620b57cec5SDimitry Andric // doesn't seem worth the benefit. 10630fca6ea1SDimitry Andric static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { 10640b57cec5SDimitry Andric MachineFunction *MF = MBB->getParent(); 10650b57cec5SDimitry Andric 10660b57cec5SDimitry Andric // If MBB is an entry block, use X9 as the scratch register 10670fca6ea1SDimitry Andric // preserve_none functions may be using X9 to pass arguments, 10680fca6ea1SDimitry Andric // so prefer to pick an available register below. 10690fca6ea1SDimitry Andric if (&MF->front() == MBB && 10700fca6ea1SDimitry Andric MF->getFunction().getCallingConv() != CallingConv::PreserveNone) 10710b57cec5SDimitry Andric return AArch64::X9; 10720b57cec5SDimitry Andric 10730b57cec5SDimitry Andric const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>(); 10740b57cec5SDimitry Andric const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); 10750b57cec5SDimitry Andric LivePhysRegs LiveRegs(TRI); 10765f757f3fSDimitry Andric getLiveRegsForEntryMBB(LiveRegs, *MBB); 10770b57cec5SDimitry Andric 10780b57cec5SDimitry Andric // Prefer X9 since it was historically used for the prologue scratch reg. 10790b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF->getRegInfo(); 10800b57cec5SDimitry Andric if (LiveRegs.available(MRI, AArch64::X9)) 10810b57cec5SDimitry Andric return AArch64::X9; 10820b57cec5SDimitry Andric 10830b57cec5SDimitry Andric for (unsigned Reg : AArch64::GPR64RegClass) { 10840b57cec5SDimitry Andric if (LiveRegs.available(MRI, Reg)) 10850b57cec5SDimitry Andric return Reg; 10860b57cec5SDimitry Andric } 10870b57cec5SDimitry Andric return AArch64::NoRegister; 10880b57cec5SDimitry Andric } 10890b57cec5SDimitry Andric 10900b57cec5SDimitry Andric bool AArch64FrameLowering::canUseAsPrologue( 10910b57cec5SDimitry Andric const MachineBasicBlock &MBB) const { 10920b57cec5SDimitry Andric const MachineFunction *MF = MBB.getParent(); 10930b57cec5SDimitry Andric MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 10940b57cec5SDimitry Andric const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>(); 10950b57cec5SDimitry Andric const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 10965f757f3fSDimitry Andric const AArch64TargetLowering *TLI = Subtarget.getTargetLowering(); 10975f757f3fSDimitry Andric const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>(); 10980b57cec5SDimitry Andric 10995f757f3fSDimitry Andric if (AFI->hasSwiftAsyncContext()) { 11005f757f3fSDimitry Andric const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); 11015f757f3fSDimitry Andric const MachineRegisterInfo &MRI = MF->getRegInfo(); 11025f757f3fSDimitry Andric LivePhysRegs LiveRegs(TRI); 11035f757f3fSDimitry Andric getLiveRegsForEntryMBB(LiveRegs, MBB); 11045f757f3fSDimitry Andric // The StoreSwiftAsyncContext clobbers X16 and X17. Make sure they are 11055f757f3fSDimitry Andric // available. 11065f757f3fSDimitry Andric if (!LiveRegs.available(MRI, AArch64::X16) || 11075f757f3fSDimitry Andric !LiveRegs.available(MRI, AArch64::X17)) 11085f757f3fSDimitry Andric return false; 11095f757f3fSDimitry Andric } 11105f757f3fSDimitry Andric 11110fca6ea1SDimitry Andric // Certain stack probing sequences might clobber flags, then we can't use 11120fca6ea1SDimitry Andric // the block as a prologue if the flags register is a live-in. 11130fca6ea1SDimitry Andric if (MF->getInfo<AArch64FunctionInfo>()->hasStackProbing() && 11140fca6ea1SDimitry Andric MBB.isLiveIn(AArch64::NZCV)) 11150fca6ea1SDimitry Andric return false; 11160fca6ea1SDimitry Andric 11175f757f3fSDimitry Andric // Don't need a scratch register if we're not going to re-align the stack or 11185f757f3fSDimitry Andric // emit stack probes. 11190fca6ea1SDimitry Andric if (!RegInfo->hasStackRealignment(*MF) && !TLI->hasInlineStackProbe(*MF)) 11200b57cec5SDimitry Andric return true; 11210b57cec5SDimitry Andric // Otherwise, we can use any block as long as it has a scratch register 11220b57cec5SDimitry Andric // available. 11230b57cec5SDimitry Andric return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister; 11240b57cec5SDimitry Andric } 11250b57cec5SDimitry Andric 11260b57cec5SDimitry Andric static bool windowsRequiresStackProbe(MachineFunction &MF, 1127480093f4SDimitry Andric uint64_t StackSizeInBytes) { 11280b57cec5SDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 11295f757f3fSDimitry Andric const AArch64FunctionInfo &MFI = *MF.getInfo<AArch64FunctionInfo>(); 11300b57cec5SDimitry Andric // TODO: When implementing stack protectors, take that into account 11310b57cec5SDimitry Andric // for the probe threshold. 11325f757f3fSDimitry Andric return Subtarget.isTargetWindows() && MFI.hasStackProbing() && 11335f757f3fSDimitry Andric StackSizeInBytes >= uint64_t(MFI.getStackProbeSize()); 11340b57cec5SDimitry Andric } 11350b57cec5SDimitry Andric 1136e8d8bef9SDimitry Andric static bool needsWinCFI(const MachineFunction &MF) { 1137e8d8bef9SDimitry Andric const Function &F = MF.getFunction(); 1138e8d8bef9SDimitry Andric return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && 1139e8d8bef9SDimitry Andric F.needsUnwindTableEntry(); 1140e8d8bef9SDimitry Andric } 1141e8d8bef9SDimitry Andric 11420b57cec5SDimitry Andric bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( 1143480093f4SDimitry Andric MachineFunction &MF, uint64_t StackBumpBytes) const { 11440b57cec5SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 11450b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 11460b57cec5SDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 11470b57cec5SDimitry Andric const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1148fe6060f1SDimitry Andric if (homogeneousPrologEpilog(MF)) 1149fe6060f1SDimitry Andric return false; 11500b57cec5SDimitry Andric 11510b57cec5SDimitry Andric if (AFI->getLocalStackSize() == 0) 11520b57cec5SDimitry Andric return false; 11530b57cec5SDimitry Andric 1154e8d8bef9SDimitry Andric // For WinCFI, if optimizing for size, prefer to not combine the stack bump 1155e8d8bef9SDimitry Andric // (to force a stp with predecrement) to match the packed unwind format, 1156e8d8bef9SDimitry Andric // provided that there actually are any callee saved registers to merge the 1157e8d8bef9SDimitry Andric // decrement with. 1158e8d8bef9SDimitry Andric // This is potentially marginally slower, but allows using the packed 1159e8d8bef9SDimitry Andric // unwind format for functions that both have a local area and callee saved 1160e8d8bef9SDimitry Andric // registers. Using the packed unwind format notably reduces the size of 1161e8d8bef9SDimitry Andric // the unwind info. 1162e8d8bef9SDimitry Andric if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 && 1163e8d8bef9SDimitry Andric MF.getFunction().hasOptSize()) 1164e8d8bef9SDimitry Andric return false; 1165e8d8bef9SDimitry Andric 11660b57cec5SDimitry Andric // 512 is the maximum immediate for stp/ldp that will be used for 11670b57cec5SDimitry Andric // callee-save save/restores 11680b57cec5SDimitry Andric if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes)) 11690b57cec5SDimitry Andric return false; 11700b57cec5SDimitry Andric 11710b57cec5SDimitry Andric if (MFI.hasVarSizedObjects()) 11720b57cec5SDimitry Andric return false; 11730b57cec5SDimitry Andric 1174fe6060f1SDimitry Andric if (RegInfo->hasStackRealignment(MF)) 11750b57cec5SDimitry Andric return false; 11760b57cec5SDimitry Andric 11770b57cec5SDimitry Andric // This isn't strictly necessary, but it simplifies things a bit since the 11780b57cec5SDimitry Andric // current RedZone handling code assumes the SP is adjusted by the 11790b57cec5SDimitry Andric // callee-save save/restore code. 11800b57cec5SDimitry Andric if (canUseRedZone(MF)) 11810b57cec5SDimitry Andric return false; 11820b57cec5SDimitry Andric 11838bcb0991SDimitry Andric // When there is an SVE area on the stack, always allocate the 11848bcb0991SDimitry Andric // callee-saves and spills/locals separately. 11858bcb0991SDimitry Andric if (getSVEStackSize(MF)) 11868bcb0991SDimitry Andric return false; 11878bcb0991SDimitry Andric 11880b57cec5SDimitry Andric return true; 11890b57cec5SDimitry Andric } 11900b57cec5SDimitry Andric 11915ffd83dbSDimitry Andric bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue( 11925ffd83dbSDimitry Andric MachineBasicBlock &MBB, unsigned StackBumpBytes) const { 11935ffd83dbSDimitry Andric if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes)) 11945ffd83dbSDimitry Andric return false; 11955ffd83dbSDimitry Andric 11965ffd83dbSDimitry Andric if (MBB.empty()) 11975ffd83dbSDimitry Andric return true; 11985ffd83dbSDimitry Andric 11995ffd83dbSDimitry Andric // Disable combined SP bump if the last instruction is an MTE tag store. It 12005ffd83dbSDimitry Andric // is almost always better to merge SP adjustment into those instructions. 12015ffd83dbSDimitry Andric MachineBasicBlock::iterator LastI = MBB.getFirstTerminator(); 12025ffd83dbSDimitry Andric MachineBasicBlock::iterator Begin = MBB.begin(); 12035ffd83dbSDimitry Andric while (LastI != Begin) { 12045ffd83dbSDimitry Andric --LastI; 12055ffd83dbSDimitry Andric if (LastI->isTransient()) 12065ffd83dbSDimitry Andric continue; 12075ffd83dbSDimitry Andric if (!LastI->getFlag(MachineInstr::FrameDestroy)) 12085ffd83dbSDimitry Andric break; 12095ffd83dbSDimitry Andric } 12105ffd83dbSDimitry Andric switch (LastI->getOpcode()) { 12115ffd83dbSDimitry Andric case AArch64::STGloop: 12125ffd83dbSDimitry Andric case AArch64::STZGloop: 121306c3fb27SDimitry Andric case AArch64::STGi: 121406c3fb27SDimitry Andric case AArch64::STZGi: 121506c3fb27SDimitry Andric case AArch64::ST2Gi: 121606c3fb27SDimitry Andric case AArch64::STZ2Gi: 12175ffd83dbSDimitry Andric return false; 12185ffd83dbSDimitry Andric default: 12195ffd83dbSDimitry Andric return true; 12205ffd83dbSDimitry Andric } 12215ffd83dbSDimitry Andric llvm_unreachable("unreachable"); 12225ffd83dbSDimitry Andric } 12235ffd83dbSDimitry Andric 12240b57cec5SDimitry Andric // Given a load or a store instruction, generate an appropriate unwinding SEH 12250b57cec5SDimitry Andric // code on Windows. 12260b57cec5SDimitry Andric static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, 12270b57cec5SDimitry Andric const TargetInstrInfo &TII, 12280b57cec5SDimitry Andric MachineInstr::MIFlag Flag) { 12290b57cec5SDimitry Andric unsigned Opc = MBBI->getOpcode(); 12300b57cec5SDimitry Andric MachineBasicBlock *MBB = MBBI->getParent(); 12310b57cec5SDimitry Andric MachineFunction &MF = *MBB->getParent(); 12320b57cec5SDimitry Andric DebugLoc DL = MBBI->getDebugLoc(); 12330b57cec5SDimitry Andric unsigned ImmIdx = MBBI->getNumOperands() - 1; 12340b57cec5SDimitry Andric int Imm = MBBI->getOperand(ImmIdx).getImm(); 12350b57cec5SDimitry Andric MachineInstrBuilder MIB; 12360b57cec5SDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 12370b57cec5SDimitry Andric const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 12380b57cec5SDimitry Andric 12390b57cec5SDimitry Andric switch (Opc) { 12400b57cec5SDimitry Andric default: 12410b57cec5SDimitry Andric llvm_unreachable("No SEH Opcode for this instruction"); 12420b57cec5SDimitry Andric case AArch64::LDPDpost: 12430b57cec5SDimitry Andric Imm = -Imm; 1244bdd1243dSDimitry Andric [[fallthrough]]; 12450b57cec5SDimitry Andric case AArch64::STPDpre: { 12460b57cec5SDimitry Andric unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); 12470b57cec5SDimitry Andric unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg()); 12480b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X)) 12490b57cec5SDimitry Andric .addImm(Reg0) 12500b57cec5SDimitry Andric .addImm(Reg1) 12510b57cec5SDimitry Andric .addImm(Imm * 8) 12520b57cec5SDimitry Andric .setMIFlag(Flag); 12530b57cec5SDimitry Andric break; 12540b57cec5SDimitry Andric } 12550b57cec5SDimitry Andric case AArch64::LDPXpost: 12560b57cec5SDimitry Andric Imm = -Imm; 1257bdd1243dSDimitry Andric [[fallthrough]]; 12580b57cec5SDimitry Andric case AArch64::STPXpre: { 12598bcb0991SDimitry Andric Register Reg0 = MBBI->getOperand(1).getReg(); 12608bcb0991SDimitry Andric Register Reg1 = MBBI->getOperand(2).getReg(); 12610b57cec5SDimitry Andric if (Reg0 == AArch64::FP && Reg1 == AArch64::LR) 12620b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X)) 12630b57cec5SDimitry Andric .addImm(Imm * 8) 12640b57cec5SDimitry Andric .setMIFlag(Flag); 12650b57cec5SDimitry Andric else 12660b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X)) 12670b57cec5SDimitry Andric .addImm(RegInfo->getSEHRegNum(Reg0)) 12680b57cec5SDimitry Andric .addImm(RegInfo->getSEHRegNum(Reg1)) 12690b57cec5SDimitry Andric .addImm(Imm * 8) 12700b57cec5SDimitry Andric .setMIFlag(Flag); 12710b57cec5SDimitry Andric break; 12720b57cec5SDimitry Andric } 12730b57cec5SDimitry Andric case AArch64::LDRDpost: 12740b57cec5SDimitry Andric Imm = -Imm; 1275bdd1243dSDimitry Andric [[fallthrough]]; 12760b57cec5SDimitry Andric case AArch64::STRDpre: { 12770b57cec5SDimitry Andric unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); 12780b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X)) 12790b57cec5SDimitry Andric .addImm(Reg) 12800b57cec5SDimitry Andric .addImm(Imm) 12810b57cec5SDimitry Andric .setMIFlag(Flag); 12820b57cec5SDimitry Andric break; 12830b57cec5SDimitry Andric } 12840b57cec5SDimitry Andric case AArch64::LDRXpost: 12850b57cec5SDimitry Andric Imm = -Imm; 1286bdd1243dSDimitry Andric [[fallthrough]]; 12870b57cec5SDimitry Andric case AArch64::STRXpre: { 12880b57cec5SDimitry Andric unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); 12890b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X)) 12900b57cec5SDimitry Andric .addImm(Reg) 12910b57cec5SDimitry Andric .addImm(Imm) 12920b57cec5SDimitry Andric .setMIFlag(Flag); 12930b57cec5SDimitry Andric break; 12940b57cec5SDimitry Andric } 12950b57cec5SDimitry Andric case AArch64::STPDi: 12960b57cec5SDimitry Andric case AArch64::LDPDi: { 12970b57cec5SDimitry Andric unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); 12980b57cec5SDimitry Andric unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); 12990b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP)) 13000b57cec5SDimitry Andric .addImm(Reg0) 13010b57cec5SDimitry Andric .addImm(Reg1) 13020b57cec5SDimitry Andric .addImm(Imm * 8) 13030b57cec5SDimitry Andric .setMIFlag(Flag); 13040b57cec5SDimitry Andric break; 13050b57cec5SDimitry Andric } 13060b57cec5SDimitry Andric case AArch64::STPXi: 13070b57cec5SDimitry Andric case AArch64::LDPXi: { 13088bcb0991SDimitry Andric Register Reg0 = MBBI->getOperand(0).getReg(); 13098bcb0991SDimitry Andric Register Reg1 = MBBI->getOperand(1).getReg(); 13100b57cec5SDimitry Andric if (Reg0 == AArch64::FP && Reg1 == AArch64::LR) 13110b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR)) 13120b57cec5SDimitry Andric .addImm(Imm * 8) 13130b57cec5SDimitry Andric .setMIFlag(Flag); 13140b57cec5SDimitry Andric else 13150b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP)) 13160b57cec5SDimitry Andric .addImm(RegInfo->getSEHRegNum(Reg0)) 13170b57cec5SDimitry Andric .addImm(RegInfo->getSEHRegNum(Reg1)) 13180b57cec5SDimitry Andric .addImm(Imm * 8) 13190b57cec5SDimitry Andric .setMIFlag(Flag); 13200b57cec5SDimitry Andric break; 13210b57cec5SDimitry Andric } 13220b57cec5SDimitry Andric case AArch64::STRXui: 13230b57cec5SDimitry Andric case AArch64::LDRXui: { 13240b57cec5SDimitry Andric int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); 13250b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg)) 13260b57cec5SDimitry Andric .addImm(Reg) 13270b57cec5SDimitry Andric .addImm(Imm * 8) 13280b57cec5SDimitry Andric .setMIFlag(Flag); 13290b57cec5SDimitry Andric break; 13300b57cec5SDimitry Andric } 13310b57cec5SDimitry Andric case AArch64::STRDui: 13320b57cec5SDimitry Andric case AArch64::LDRDui: { 13330b57cec5SDimitry Andric unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); 13340b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg)) 13350b57cec5SDimitry Andric .addImm(Reg) 13360b57cec5SDimitry Andric .addImm(Imm * 8) 13370b57cec5SDimitry Andric .setMIFlag(Flag); 13380b57cec5SDimitry Andric break; 13390b57cec5SDimitry Andric } 13407a6dacacSDimitry Andric case AArch64::STPQi: 13417a6dacacSDimitry Andric case AArch64::LDPQi: { 13427a6dacacSDimitry Andric unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); 13437a6dacacSDimitry Andric unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); 13447a6dacacSDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQP)) 13457a6dacacSDimitry Andric .addImm(Reg0) 13467a6dacacSDimitry Andric .addImm(Reg1) 13477a6dacacSDimitry Andric .addImm(Imm * 16) 13487a6dacacSDimitry Andric .setMIFlag(Flag); 13497a6dacacSDimitry Andric break; 13507a6dacacSDimitry Andric } 13517a6dacacSDimitry Andric case AArch64::LDPQpost: 13527a6dacacSDimitry Andric Imm = -Imm; 13530fca6ea1SDimitry Andric [[fallthrough]]; 13547a6dacacSDimitry Andric case AArch64::STPQpre: { 13557a6dacacSDimitry Andric unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); 13567a6dacacSDimitry Andric unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg()); 13577a6dacacSDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQPX)) 13587a6dacacSDimitry Andric .addImm(Reg0) 13597a6dacacSDimitry Andric .addImm(Reg1) 13607a6dacacSDimitry Andric .addImm(Imm * 16) 13617a6dacacSDimitry Andric .setMIFlag(Flag); 13627a6dacacSDimitry Andric break; 13637a6dacacSDimitry Andric } 13640b57cec5SDimitry Andric } 13650b57cec5SDimitry Andric auto I = MBB->insertAfter(MBBI, MIB); 13660b57cec5SDimitry Andric return I; 13670b57cec5SDimitry Andric } 13680b57cec5SDimitry Andric 13690b57cec5SDimitry Andric // Fix up the SEH opcode associated with the save/restore instruction. 13700b57cec5SDimitry Andric static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, 13710b57cec5SDimitry Andric unsigned LocalStackSize) { 13720b57cec5SDimitry Andric MachineOperand *ImmOpnd = nullptr; 13730b57cec5SDimitry Andric unsigned ImmIdx = MBBI->getNumOperands() - 1; 13740b57cec5SDimitry Andric switch (MBBI->getOpcode()) { 13750b57cec5SDimitry Andric default: 13760b57cec5SDimitry Andric llvm_unreachable("Fix the offset in the SEH instruction"); 13770b57cec5SDimitry Andric case AArch64::SEH_SaveFPLR: 13780b57cec5SDimitry Andric case AArch64::SEH_SaveRegP: 13790b57cec5SDimitry Andric case AArch64::SEH_SaveReg: 13800b57cec5SDimitry Andric case AArch64::SEH_SaveFRegP: 13810b57cec5SDimitry Andric case AArch64::SEH_SaveFReg: 13827a6dacacSDimitry Andric case AArch64::SEH_SaveAnyRegQP: 13837a6dacacSDimitry Andric case AArch64::SEH_SaveAnyRegQPX: 13840b57cec5SDimitry Andric ImmOpnd = &MBBI->getOperand(ImmIdx); 13850b57cec5SDimitry Andric break; 13860b57cec5SDimitry Andric } 13870b57cec5SDimitry Andric if (ImmOpnd) 13880b57cec5SDimitry Andric ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize); 13890b57cec5SDimitry Andric } 13900b57cec5SDimitry Andric 13910fca6ea1SDimitry Andric bool requiresGetVGCall(MachineFunction &MF) { 13920fca6ea1SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 13930fca6ea1SDimitry Andric return AFI->hasStreamingModeChanges() && 13940fca6ea1SDimitry Andric !MF.getSubtarget<AArch64Subtarget>().hasSVE(); 13950fca6ea1SDimitry Andric } 13960fca6ea1SDimitry Andric 13970fca6ea1SDimitry Andric bool isVGInstruction(MachineBasicBlock::iterator MBBI) { 13980fca6ea1SDimitry Andric unsigned Opc = MBBI->getOpcode(); 13990fca6ea1SDimitry Andric if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI || 14000fca6ea1SDimitry Andric Opc == AArch64::UBFMXri) 14010fca6ea1SDimitry Andric return true; 14020fca6ea1SDimitry Andric 14030fca6ea1SDimitry Andric if (requiresGetVGCall(*MBBI->getMF())) { 14040fca6ea1SDimitry Andric if (Opc == AArch64::ORRXrr) 14050fca6ea1SDimitry Andric return true; 14060fca6ea1SDimitry Andric 14070fca6ea1SDimitry Andric if (Opc == AArch64::BL) { 14080fca6ea1SDimitry Andric auto Op1 = MBBI->getOperand(0); 14090fca6ea1SDimitry Andric return Op1.isSymbol() && 14100fca6ea1SDimitry Andric (StringRef(Op1.getSymbolName()) == "__arm_get_current_vg"); 14110fca6ea1SDimitry Andric } 14120fca6ea1SDimitry Andric } 14130fca6ea1SDimitry Andric 14140fca6ea1SDimitry Andric return false; 14150fca6ea1SDimitry Andric } 14160fca6ea1SDimitry Andric 14170b57cec5SDimitry Andric // Convert callee-save register save/restore instruction to do stack pointer 14180b57cec5SDimitry Andric // decrement/increment to allocate/deallocate the callee-save stack area by 14190b57cec5SDimitry Andric // converting store/load to use pre/post increment version. 14200b57cec5SDimitry Andric static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( 14210b57cec5SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 14220b57cec5SDimitry Andric const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc, 142381ad6265SDimitry Andric bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI, 142481ad6265SDimitry Andric MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup, 142581ad6265SDimitry Andric int CFAOffset = 0) { 14260b57cec5SDimitry Andric unsigned NewOpc; 14270fca6ea1SDimitry Andric 14280fca6ea1SDimitry Andric // If the function contains streaming mode changes, we expect instructions 14290fca6ea1SDimitry Andric // to calculate the value of VG before spilling. For locally-streaming 14300fca6ea1SDimitry Andric // functions, we need to do this for both the streaming and non-streaming 14310fca6ea1SDimitry Andric // vector length. Move past these instructions if necessary. 14320fca6ea1SDimitry Andric MachineFunction &MF = *MBB.getParent(); 14330fca6ea1SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 14340fca6ea1SDimitry Andric if (AFI->hasStreamingModeChanges()) 14350fca6ea1SDimitry Andric while (isVGInstruction(MBBI)) 14360fca6ea1SDimitry Andric ++MBBI; 14370fca6ea1SDimitry Andric 14380b57cec5SDimitry Andric switch (MBBI->getOpcode()) { 14390b57cec5SDimitry Andric default: 14400b57cec5SDimitry Andric llvm_unreachable("Unexpected callee-save save/restore opcode!"); 14410b57cec5SDimitry Andric case AArch64::STPXi: 14420b57cec5SDimitry Andric NewOpc = AArch64::STPXpre; 14430b57cec5SDimitry Andric break; 14440b57cec5SDimitry Andric case AArch64::STPDi: 14450b57cec5SDimitry Andric NewOpc = AArch64::STPDpre; 14460b57cec5SDimitry Andric break; 14470b57cec5SDimitry Andric case AArch64::STPQi: 14480b57cec5SDimitry Andric NewOpc = AArch64::STPQpre; 14490b57cec5SDimitry Andric break; 14500b57cec5SDimitry Andric case AArch64::STRXui: 14510b57cec5SDimitry Andric NewOpc = AArch64::STRXpre; 14520b57cec5SDimitry Andric break; 14530b57cec5SDimitry Andric case AArch64::STRDui: 14540b57cec5SDimitry Andric NewOpc = AArch64::STRDpre; 14550b57cec5SDimitry Andric break; 14560b57cec5SDimitry Andric case AArch64::STRQui: 14570b57cec5SDimitry Andric NewOpc = AArch64::STRQpre; 14580b57cec5SDimitry Andric break; 14590b57cec5SDimitry Andric case AArch64::LDPXi: 14600b57cec5SDimitry Andric NewOpc = AArch64::LDPXpost; 14610b57cec5SDimitry Andric break; 14620b57cec5SDimitry Andric case AArch64::LDPDi: 14630b57cec5SDimitry Andric NewOpc = AArch64::LDPDpost; 14640b57cec5SDimitry Andric break; 14650b57cec5SDimitry Andric case AArch64::LDPQi: 14660b57cec5SDimitry Andric NewOpc = AArch64::LDPQpost; 14670b57cec5SDimitry Andric break; 14680b57cec5SDimitry Andric case AArch64::LDRXui: 14690b57cec5SDimitry Andric NewOpc = AArch64::LDRXpost; 14700b57cec5SDimitry Andric break; 14710b57cec5SDimitry Andric case AArch64::LDRDui: 14720b57cec5SDimitry Andric NewOpc = AArch64::LDRDpost; 14730b57cec5SDimitry Andric break; 14740b57cec5SDimitry Andric case AArch64::LDRQui: 14750b57cec5SDimitry Andric NewOpc = AArch64::LDRQpost; 14760b57cec5SDimitry Andric break; 14770b57cec5SDimitry Andric } 14780b57cec5SDimitry Andric // Get rid of the SEH code associated with the old instruction. 14790b57cec5SDimitry Andric if (NeedsWinCFI) { 14800b57cec5SDimitry Andric auto SEH = std::next(MBBI); 14810b57cec5SDimitry Andric if (AArch64InstrInfo::isSEHInstruction(*SEH)) 14820b57cec5SDimitry Andric SEH->eraseFromParent(); 14830b57cec5SDimitry Andric } 14840b57cec5SDimitry Andric 14855f757f3fSDimitry Andric TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0); 1486fe6060f1SDimitry Andric int64_t MinOffset, MaxOffset; 1487fe6060f1SDimitry Andric bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo( 1488fe6060f1SDimitry Andric NewOpc, Scale, Width, MinOffset, MaxOffset); 1489fe6060f1SDimitry Andric (void)Success; 1490fe6060f1SDimitry Andric assert(Success && "unknown load/store opcode"); 1491fe6060f1SDimitry Andric 1492fe6060f1SDimitry Andric // If the first store isn't right where we want SP then we can't fold the 1493fe6060f1SDimitry Andric // update in so create a normal arithmetic instruction instead. 1494fe6060f1SDimitry Andric if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 || 1495fe6060f1SDimitry Andric CSStackSizeInc < MinOffset || CSStackSizeInc > MaxOffset) { 14960fca6ea1SDimitry Andric // If we are destroying the frame, make sure we add the increment after the 14970fca6ea1SDimitry Andric // last frame operation. 14980fca6ea1SDimitry Andric if (FrameFlag == MachineInstr::FrameDestroy) 14990fca6ea1SDimitry Andric ++MBBI; 1500fe6060f1SDimitry Andric emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, 150181ad6265SDimitry Andric StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag, 150281ad6265SDimitry Andric false, false, nullptr, EmitCFI, 150381ad6265SDimitry Andric StackOffset::getFixed(CFAOffset)); 150481ad6265SDimitry Andric 1505fe6060f1SDimitry Andric return std::prev(MBBI); 1506fe6060f1SDimitry Andric } 1507fe6060f1SDimitry Andric 15080b57cec5SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 15090b57cec5SDimitry Andric MIB.addReg(AArch64::SP, RegState::Define); 15100b57cec5SDimitry Andric 15110b57cec5SDimitry Andric // Copy all operands other than the immediate offset. 15120b57cec5SDimitry Andric unsigned OpndIdx = 0; 15130b57cec5SDimitry Andric for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd; 15140b57cec5SDimitry Andric ++OpndIdx) 15150b57cec5SDimitry Andric MIB.add(MBBI->getOperand(OpndIdx)); 15160b57cec5SDimitry Andric 15170b57cec5SDimitry Andric assert(MBBI->getOperand(OpndIdx).getImm() == 0 && 15180b57cec5SDimitry Andric "Unexpected immediate offset in first/last callee-save save/restore " 15190b57cec5SDimitry Andric "instruction!"); 15200b57cec5SDimitry Andric assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP && 15210b57cec5SDimitry Andric "Unexpected base register in callee-save save/restore instruction!"); 15220b57cec5SDimitry Andric assert(CSStackSizeInc % Scale == 0); 1523fe6060f1SDimitry Andric MIB.addImm(CSStackSizeInc / (int)Scale); 15240b57cec5SDimitry Andric 15250b57cec5SDimitry Andric MIB.setMIFlags(MBBI->getFlags()); 15260b57cec5SDimitry Andric MIB.setMemRefs(MBBI->memoperands()); 15270b57cec5SDimitry Andric 15280b57cec5SDimitry Andric // Generate a new SEH code that corresponds to the new instruction. 15290b57cec5SDimitry Andric if (NeedsWinCFI) { 15300b57cec5SDimitry Andric *HasWinCFI = true; 153181ad6265SDimitry Andric InsertSEH(*MIB, *TII, FrameFlag); 153281ad6265SDimitry Andric } 153381ad6265SDimitry Andric 153481ad6265SDimitry Andric if (EmitCFI) { 153581ad6265SDimitry Andric unsigned CFIIndex = MF.addFrameInst( 153681ad6265SDimitry Andric MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset - CSStackSizeInc)); 153781ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 153881ad6265SDimitry Andric .addCFIIndex(CFIIndex) 153981ad6265SDimitry Andric .setMIFlags(FrameFlag); 15400b57cec5SDimitry Andric } 15410b57cec5SDimitry Andric 15420b57cec5SDimitry Andric return std::prev(MBB.erase(MBBI)); 15430b57cec5SDimitry Andric } 15440b57cec5SDimitry Andric 15450b57cec5SDimitry Andric // Fixup callee-save register save/restore instructions to take into account 15460b57cec5SDimitry Andric // combined SP bump by adding the local stack size to the stack offsets. 15470b57cec5SDimitry Andric static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, 1548480093f4SDimitry Andric uint64_t LocalStackSize, 15490b57cec5SDimitry Andric bool NeedsWinCFI, 15500b57cec5SDimitry Andric bool *HasWinCFI) { 15510b57cec5SDimitry Andric if (AArch64InstrInfo::isSEHInstruction(MI)) 15520b57cec5SDimitry Andric return; 15530b57cec5SDimitry Andric 15540b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 15550b57cec5SDimitry Andric unsigned Scale; 15560b57cec5SDimitry Andric switch (Opc) { 15570b57cec5SDimitry Andric case AArch64::STPXi: 15580b57cec5SDimitry Andric case AArch64::STRXui: 15590b57cec5SDimitry Andric case AArch64::STPDi: 15600b57cec5SDimitry Andric case AArch64::STRDui: 15610b57cec5SDimitry Andric case AArch64::LDPXi: 15620b57cec5SDimitry Andric case AArch64::LDRXui: 15630b57cec5SDimitry Andric case AArch64::LDPDi: 15640b57cec5SDimitry Andric case AArch64::LDRDui: 15650b57cec5SDimitry Andric Scale = 8; 15660b57cec5SDimitry Andric break; 15670b57cec5SDimitry Andric case AArch64::STPQi: 15680b57cec5SDimitry Andric case AArch64::STRQui: 15690b57cec5SDimitry Andric case AArch64::LDPQi: 15700b57cec5SDimitry Andric case AArch64::LDRQui: 15710b57cec5SDimitry Andric Scale = 16; 15720b57cec5SDimitry Andric break; 15730b57cec5SDimitry Andric default: 15740b57cec5SDimitry Andric llvm_unreachable("Unexpected callee-save save/restore opcode!"); 15750b57cec5SDimitry Andric } 15760b57cec5SDimitry Andric 15770b57cec5SDimitry Andric unsigned OffsetIdx = MI.getNumExplicitOperands() - 1; 15780b57cec5SDimitry Andric assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP && 15790b57cec5SDimitry Andric "Unexpected base register in callee-save save/restore instruction!"); 15800b57cec5SDimitry Andric // Last operand is immediate offset that needs fixing. 15810b57cec5SDimitry Andric MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx); 15820b57cec5SDimitry Andric // All generated opcodes have scaled offsets. 15830b57cec5SDimitry Andric assert(LocalStackSize % Scale == 0); 15840b57cec5SDimitry Andric OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale); 15850b57cec5SDimitry Andric 15860b57cec5SDimitry Andric if (NeedsWinCFI) { 15870b57cec5SDimitry Andric *HasWinCFI = true; 15880b57cec5SDimitry Andric auto MBBI = std::next(MachineBasicBlock::iterator(MI)); 15890b57cec5SDimitry Andric assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction"); 15900b57cec5SDimitry Andric assert(AArch64InstrInfo::isSEHInstruction(*MBBI) && 15910b57cec5SDimitry Andric "Expecting a SEH instruction"); 15920b57cec5SDimitry Andric fixupSEHOpcode(MBBI, LocalStackSize); 15930b57cec5SDimitry Andric } 15940b57cec5SDimitry Andric } 15950b57cec5SDimitry Andric 1596480093f4SDimitry Andric static bool isTargetWindows(const MachineFunction &MF) { 1597480093f4SDimitry Andric return MF.getSubtarget<AArch64Subtarget>().isTargetWindows(); 1598480093f4SDimitry Andric } 1599480093f4SDimitry Andric 1600480093f4SDimitry Andric // Convenience function to determine whether I is an SVE callee save. 1601480093f4SDimitry Andric static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { 1602480093f4SDimitry Andric switch (I->getOpcode()) { 1603480093f4SDimitry Andric default: 1604480093f4SDimitry Andric return false; 16050fca6ea1SDimitry Andric case AArch64::PTRUE_C_B: 16060fca6ea1SDimitry Andric case AArch64::LD1B_2Z_IMM: 16070fca6ea1SDimitry Andric case AArch64::ST1B_2Z_IMM: 1608480093f4SDimitry Andric case AArch64::STR_ZXI: 1609480093f4SDimitry Andric case AArch64::STR_PXI: 1610480093f4SDimitry Andric case AArch64::LDR_ZXI: 1611480093f4SDimitry Andric case AArch64::LDR_PXI: 1612480093f4SDimitry Andric return I->getFlag(MachineInstr::FrameSetup) || 1613480093f4SDimitry Andric I->getFlag(MachineInstr::FrameDestroy); 1614480093f4SDimitry Andric } 1615480093f4SDimitry Andric } 1616480093f4SDimitry Andric 161781ad6265SDimitry Andric static void emitShadowCallStackPrologue(const TargetInstrInfo &TII, 161881ad6265SDimitry Andric MachineFunction &MF, 161981ad6265SDimitry Andric MachineBasicBlock &MBB, 162081ad6265SDimitry Andric MachineBasicBlock::iterator MBBI, 162181ad6265SDimitry Andric const DebugLoc &DL, bool NeedsWinCFI, 162281ad6265SDimitry Andric bool NeedsUnwindInfo) { 162381ad6265SDimitry Andric // Shadow call stack prolog: str x30, [x18], #8 162481ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXpost)) 162581ad6265SDimitry Andric .addReg(AArch64::X18, RegState::Define) 162681ad6265SDimitry Andric .addReg(AArch64::LR) 162781ad6265SDimitry Andric .addReg(AArch64::X18) 162881ad6265SDimitry Andric .addImm(8) 162981ad6265SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 163081ad6265SDimitry Andric 163181ad6265SDimitry Andric // This instruction also makes x18 live-in to the entry block. 163281ad6265SDimitry Andric MBB.addLiveIn(AArch64::X18); 163381ad6265SDimitry Andric 163481ad6265SDimitry Andric if (NeedsWinCFI) 163581ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::SEH_Nop)) 163681ad6265SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 163781ad6265SDimitry Andric 163881ad6265SDimitry Andric if (NeedsUnwindInfo) { 163981ad6265SDimitry Andric // Emit a CFI instruction that causes 8 to be subtracted from the value of 164081ad6265SDimitry Andric // x18 when unwinding past this frame. 164181ad6265SDimitry Andric static const char CFIInst[] = { 164281ad6265SDimitry Andric dwarf::DW_CFA_val_expression, 164381ad6265SDimitry Andric 18, // register 164481ad6265SDimitry Andric 2, // length 164581ad6265SDimitry Andric static_cast<char>(unsigned(dwarf::DW_OP_breg18)), 164681ad6265SDimitry Andric static_cast<char>(-8) & 0x7f, // addend (sleb128) 164781ad6265SDimitry Andric }; 164881ad6265SDimitry Andric unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape( 164981ad6265SDimitry Andric nullptr, StringRef(CFIInst, sizeof(CFIInst)))); 165081ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::CFI_INSTRUCTION)) 165181ad6265SDimitry Andric .addCFIIndex(CFIIndex) 165281ad6265SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 165381ad6265SDimitry Andric } 165481ad6265SDimitry Andric } 165581ad6265SDimitry Andric 165681ad6265SDimitry Andric static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII, 165781ad6265SDimitry Andric MachineFunction &MF, 165881ad6265SDimitry Andric MachineBasicBlock &MBB, 165981ad6265SDimitry Andric MachineBasicBlock::iterator MBBI, 166081ad6265SDimitry Andric const DebugLoc &DL) { 166181ad6265SDimitry Andric // Shadow call stack epilog: ldr x30, [x18, #-8]! 166281ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::LDRXpre)) 166381ad6265SDimitry Andric .addReg(AArch64::X18, RegState::Define) 166481ad6265SDimitry Andric .addReg(AArch64::LR, RegState::Define) 166581ad6265SDimitry Andric .addReg(AArch64::X18) 166681ad6265SDimitry Andric .addImm(-8) 166781ad6265SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 166881ad6265SDimitry Andric 1669bdd1243dSDimitry Andric if (MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(MF)) { 167081ad6265SDimitry Andric unsigned CFIIndex = 167181ad6265SDimitry Andric MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, 18)); 167281ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 167381ad6265SDimitry Andric .addCFIIndex(CFIIndex) 167481ad6265SDimitry Andric .setMIFlags(MachineInstr::FrameDestroy); 167581ad6265SDimitry Andric } 167681ad6265SDimitry Andric } 167781ad6265SDimitry Andric 167806c3fb27SDimitry Andric // Define the current CFA rule to use the provided FP. 167906c3fb27SDimitry Andric static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB, 168006c3fb27SDimitry Andric MachineBasicBlock::iterator MBBI, 168106c3fb27SDimitry Andric const DebugLoc &DL, unsigned FixedObject) { 168206c3fb27SDimitry Andric const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>(); 168306c3fb27SDimitry Andric const AArch64RegisterInfo *TRI = STI.getRegisterInfo(); 168406c3fb27SDimitry Andric const TargetInstrInfo *TII = STI.getInstrInfo(); 168506c3fb27SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 168606c3fb27SDimitry Andric 168706c3fb27SDimitry Andric const int OffsetToFirstCalleeSaveFromFP = 168806c3fb27SDimitry Andric AFI->getCalleeSaveBaseToFrameRecordOffset() - 168906c3fb27SDimitry Andric AFI->getCalleeSavedStackSize(); 169006c3fb27SDimitry Andric Register FramePtr = TRI->getFrameRegister(MF); 169106c3fb27SDimitry Andric unsigned Reg = TRI->getDwarfRegNum(FramePtr, true); 169206c3fb27SDimitry Andric unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( 169306c3fb27SDimitry Andric nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP)); 169406c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 169506c3fb27SDimitry Andric .addCFIIndex(CFIIndex) 169606c3fb27SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 169706c3fb27SDimitry Andric } 169806c3fb27SDimitry Andric 16995f757f3fSDimitry Andric #ifndef NDEBUG 17005f757f3fSDimitry Andric /// Collect live registers from the end of \p MI's parent up to (including) \p 17015f757f3fSDimitry Andric /// MI in \p LiveRegs. 17025f757f3fSDimitry Andric static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, 17035f757f3fSDimitry Andric LivePhysRegs &LiveRegs) { 17045f757f3fSDimitry Andric 17055f757f3fSDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 17065f757f3fSDimitry Andric LiveRegs.addLiveOuts(MBB); 17075f757f3fSDimitry Andric for (const MachineInstr &MI : 17085f757f3fSDimitry Andric reverse(make_range(MI.getIterator(), MBB.instr_end()))) 17095f757f3fSDimitry Andric LiveRegs.stepBackward(MI); 17105f757f3fSDimitry Andric } 17115f757f3fSDimitry Andric #endif 17125f757f3fSDimitry Andric 17130b57cec5SDimitry Andric void AArch64FrameLowering::emitPrologue(MachineFunction &MF, 17140b57cec5SDimitry Andric MachineBasicBlock &MBB) const { 17150b57cec5SDimitry Andric MachineBasicBlock::iterator MBBI = MBB.begin(); 17160b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 17170b57cec5SDimitry Andric const Function &F = MF.getFunction(); 17180b57cec5SDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 17190b57cec5SDimitry Andric const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 17200b57cec5SDimitry Andric const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 17215f757f3fSDimitry Andric 17220b57cec5SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1723bdd1243dSDimitry Andric bool EmitCFI = AFI->needsDwarfUnwindInfo(MF); 172406c3fb27SDimitry Andric bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF); 17250b57cec5SDimitry Andric bool HasFP = hasFP(MF); 17260b57cec5SDimitry Andric bool NeedsWinCFI = needsWinCFI(MF); 17270b57cec5SDimitry Andric bool HasWinCFI = false; 17280b57cec5SDimitry Andric auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); }); 17290b57cec5SDimitry Andric 17305f757f3fSDimitry Andric MachineBasicBlock::iterator End = MBB.end(); 17315f757f3fSDimitry Andric #ifndef NDEBUG 17325f757f3fSDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 17335f757f3fSDimitry Andric // Collect live register from the end of MBB up to the start of the existing 17345f757f3fSDimitry Andric // frame setup instructions. 17355f757f3fSDimitry Andric MachineBasicBlock::iterator NonFrameStart = MBB.begin(); 17365f757f3fSDimitry Andric while (NonFrameStart != End && 17375f757f3fSDimitry Andric NonFrameStart->getFlag(MachineInstr::FrameSetup)) 17385f757f3fSDimitry Andric ++NonFrameStart; 17395f757f3fSDimitry Andric 17405f757f3fSDimitry Andric LivePhysRegs LiveRegs(*TRI); 17415f757f3fSDimitry Andric if (NonFrameStart != MBB.end()) { 17425f757f3fSDimitry Andric getLivePhysRegsUpTo(*NonFrameStart, *TRI, LiveRegs); 17435f757f3fSDimitry Andric // Ignore registers used for stack management for now. 17445f757f3fSDimitry Andric LiveRegs.removeReg(AArch64::SP); 17455f757f3fSDimitry Andric LiveRegs.removeReg(AArch64::X19); 17465f757f3fSDimitry Andric LiveRegs.removeReg(AArch64::FP); 17475f757f3fSDimitry Andric LiveRegs.removeReg(AArch64::LR); 17480fca6ea1SDimitry Andric 17490fca6ea1SDimitry Andric // X0 will be clobbered by a call to __arm_get_current_vg in the prologue. 17500fca6ea1SDimitry Andric // This is necessary to spill VG if required where SVE is unavailable, but 17510fca6ea1SDimitry Andric // X0 is preserved around this call. 17520fca6ea1SDimitry Andric if (requiresGetVGCall(MF)) 17530fca6ea1SDimitry Andric LiveRegs.removeReg(AArch64::X0); 17545f757f3fSDimitry Andric } 17555f757f3fSDimitry Andric 17565f757f3fSDimitry Andric auto VerifyClobberOnExit = make_scope_exit([&]() { 17575f757f3fSDimitry Andric if (NonFrameStart == MBB.end()) 17585f757f3fSDimitry Andric return; 17595f757f3fSDimitry Andric // Check if any of the newly instructions clobber any of the live registers. 17605f757f3fSDimitry Andric for (MachineInstr &MI : 17615f757f3fSDimitry Andric make_range(MBB.instr_begin(), NonFrameStart->getIterator())) { 17625f757f3fSDimitry Andric for (auto &Op : MI.operands()) 17635f757f3fSDimitry Andric if (Op.isReg() && Op.isDef()) 17645f757f3fSDimitry Andric assert(!LiveRegs.contains(Op.getReg()) && 17655f757f3fSDimitry Andric "live register clobbered by inserted prologue instructions"); 17665f757f3fSDimitry Andric } 17675f757f3fSDimitry Andric }); 17685f757f3fSDimitry Andric #endif 17695f757f3fSDimitry Andric 17700b57cec5SDimitry Andric bool IsFunclet = MBB.isEHFuncletEntry(); 17710b57cec5SDimitry Andric 17720b57cec5SDimitry Andric // At this point, we're going to decide whether or not the function uses a 17730b57cec5SDimitry Andric // redzone. In most cases, the function doesn't have a redzone so let's 17740b57cec5SDimitry Andric // assume that's false and set it to true in the case that there's a redzone. 17750b57cec5SDimitry Andric AFI->setHasRedZone(false); 17760b57cec5SDimitry Andric 17770b57cec5SDimitry Andric // Debug location must be unknown since the first debug location is used 17780b57cec5SDimitry Andric // to determine the end of the prologue. 17790b57cec5SDimitry Andric DebugLoc DL; 17800b57cec5SDimitry Andric 1781e8d8bef9SDimitry Andric const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>(); 17825f757f3fSDimitry Andric if (MFnI.needsShadowCallStackPrologueEpilogue(MF)) 178381ad6265SDimitry Andric emitShadowCallStackPrologue(*TII, MF, MBB, MBBI, DL, NeedsWinCFI, 1784bdd1243dSDimitry Andric MFnI.needsDwarfUnwindInfo(MF)); 1785fe6060f1SDimitry Andric 1786bdd1243dSDimitry Andric if (MFnI.shouldSignReturnAddress(MF)) { 17875f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::PAUTH_PROLOGUE)) 17880b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 17895f757f3fSDimitry Andric if (NeedsWinCFI) 17905f757f3fSDimitry Andric HasWinCFI = true; // AArch64PointerAuth pass will insert SEH_PACSignLR 17910b57cec5SDimitry Andric } 17920b57cec5SDimitry Andric 179381ad6265SDimitry Andric if (EmitCFI && MFnI.isMTETagged()) { 179481ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITMTETAGGED)) 179581ad6265SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 179681ad6265SDimitry Andric } 17970b57cec5SDimitry Andric 1798fe6060f1SDimitry Andric // We signal the presence of a Swift extended frame to external tools by 1799fe6060f1SDimitry Andric // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple 1800fe6060f1SDimitry Andric // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI 1801fe6060f1SDimitry Andric // bits so that is still true. 1802fe6060f1SDimitry Andric if (HasFP && AFI->hasSwiftAsyncContext()) { 1803349cc55cSDimitry Andric switch (MF.getTarget().Options.SwiftAsyncFramePointer) { 1804349cc55cSDimitry Andric case SwiftAsyncFramePointerMode::DeploymentBased: 1805349cc55cSDimitry Andric if (Subtarget.swiftAsyncContextIsDynamicallySet()) { 1806349cc55cSDimitry Andric // The special symbol below is absolute and has a *value* that can be 1807349cc55cSDimitry Andric // combined with the frame pointer to signal an extended frame. 1808349cc55cSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16) 1809349cc55cSDimitry Andric .addExternalSymbol("swift_async_extendedFramePointerFlags", 1810349cc55cSDimitry Andric AArch64II::MO_GOT); 18115f757f3fSDimitry Andric if (NeedsWinCFI) { 18125f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 18135f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 18145f757f3fSDimitry Andric HasWinCFI = true; 18155f757f3fSDimitry Andric } 1816349cc55cSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP) 1817349cc55cSDimitry Andric .addUse(AArch64::FP) 1818349cc55cSDimitry Andric .addUse(AArch64::X16) 1819349cc55cSDimitry Andric .addImm(Subtarget.isTargetILP32() ? 32 : 0); 18205f757f3fSDimitry Andric if (NeedsWinCFI) { 18215f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 18225f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 18235f757f3fSDimitry Andric HasWinCFI = true; 18245f757f3fSDimitry Andric } 1825349cc55cSDimitry Andric break; 1826349cc55cSDimitry Andric } 1827bdd1243dSDimitry Andric [[fallthrough]]; 1828349cc55cSDimitry Andric 1829349cc55cSDimitry Andric case SwiftAsyncFramePointerMode::Always: 1830fe6060f1SDimitry Andric // ORR x29, x29, #0x1000_0000_0000_0000 1831fe6060f1SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP) 1832fe6060f1SDimitry Andric .addUse(AArch64::FP) 1833fe6060f1SDimitry Andric .addImm(0x1100) 1834fe6060f1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 18355f757f3fSDimitry Andric if (NeedsWinCFI) { 18365f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 18375f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 18385f757f3fSDimitry Andric HasWinCFI = true; 18395f757f3fSDimitry Andric } 1840349cc55cSDimitry Andric break; 1841349cc55cSDimitry Andric 1842349cc55cSDimitry Andric case SwiftAsyncFramePointerMode::Never: 1843349cc55cSDimitry Andric break; 1844349cc55cSDimitry Andric } 1845fe6060f1SDimitry Andric } 1846fe6060f1SDimitry Andric 18470b57cec5SDimitry Andric // All calls are tail calls in GHC calling conv, and functions have no 18480b57cec5SDimitry Andric // prologue/epilogue. 18490b57cec5SDimitry Andric if (MF.getFunction().getCallingConv() == CallingConv::GHC) 18500b57cec5SDimitry Andric return; 18510b57cec5SDimitry Andric 1852e8d8bef9SDimitry Andric // Set tagged base pointer to the requested stack slot. 18530b57cec5SDimitry Andric // Ideally it should match SP value after prologue. 1854bdd1243dSDimitry Andric std::optional<int> TBPI = AFI->getTaggedBasePointerIndex(); 1855e8d8bef9SDimitry Andric if (TBPI) 1856e8d8bef9SDimitry Andric AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI)); 1857e8d8bef9SDimitry Andric else 18580b57cec5SDimitry Andric AFI->setTaggedBasePointerOffset(MFI.getStackSize()); 18590b57cec5SDimitry Andric 18608bcb0991SDimitry Andric const StackOffset &SVEStackSize = getSVEStackSize(MF); 18618bcb0991SDimitry Andric 18620b57cec5SDimitry Andric // getStackSize() includes all the locals in its size calculation. We don't 18630b57cec5SDimitry Andric // include these locals when computing the stack size of a funclet, as they 18640b57cec5SDimitry Andric // are allocated in the parent's stack frame and accessed via the frame 18650b57cec5SDimitry Andric // pointer from the funclet. We only save the callee saved registers in the 18660b57cec5SDimitry Andric // funclet, which are really the callee saved registers of the parent 18670b57cec5SDimitry Andric // function, including the funclet. 18680fca6ea1SDimitry Andric int64_t NumBytes = 18690fca6ea1SDimitry Andric IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize(); 18700b57cec5SDimitry Andric if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { 18710b57cec5SDimitry Andric assert(!HasFP && "unexpected function without stack frame but with FP"); 18728bcb0991SDimitry Andric assert(!SVEStackSize && 18738bcb0991SDimitry Andric "unexpected function without stack frame but with SVE objects"); 18740b57cec5SDimitry Andric // All of the stack allocation is for locals. 18750b57cec5SDimitry Andric AFI->setLocalStackSize(NumBytes); 18760b57cec5SDimitry Andric if (!NumBytes) 18770b57cec5SDimitry Andric return; 18780b57cec5SDimitry Andric // REDZONE: If the stack size is less than 128 bytes, we don't need 18790b57cec5SDimitry Andric // to actually allocate. 18800b57cec5SDimitry Andric if (canUseRedZone(MF)) { 18810b57cec5SDimitry Andric AFI->setHasRedZone(true); 18820b57cec5SDimitry Andric ++NumRedZoneFunctions; 18830b57cec5SDimitry Andric } else { 18848bcb0991SDimitry Andric emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, 1885e8d8bef9SDimitry Andric StackOffset::getFixed(-NumBytes), TII, 1886e8d8bef9SDimitry Andric MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); 188781ad6265SDimitry Andric if (EmitCFI) { 18880b57cec5SDimitry Andric // Label used to tie together the PROLOG_LABEL and the MachineMoves. 18890fca6ea1SDimitry Andric MCSymbol *FrameLabel = MF.getContext().createTempSymbol(); 18900b57cec5SDimitry Andric // Encode the stack size of the leaf function. 18910b57cec5SDimitry Andric unsigned CFIIndex = MF.addFrameInst( 18925ffd83dbSDimitry Andric MCCFIInstruction::cfiDefCfaOffset(FrameLabel, NumBytes)); 18930b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 18940b57cec5SDimitry Andric .addCFIIndex(CFIIndex) 18950b57cec5SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 18960b57cec5SDimitry Andric } 18970b57cec5SDimitry Andric } 18980b57cec5SDimitry Andric 18990b57cec5SDimitry Andric if (NeedsWinCFI) { 19000b57cec5SDimitry Andric HasWinCFI = true; 19010b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) 19020b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 19030b57cec5SDimitry Andric } 19040b57cec5SDimitry Andric 19050b57cec5SDimitry Andric return; 19060b57cec5SDimitry Andric } 19070b57cec5SDimitry Andric 19080fca6ea1SDimitry Andric bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg()); 190962cfcf62SDimitry Andric unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); 19100b57cec5SDimitry Andric 19110b57cec5SDimitry Andric auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; 19120b57cec5SDimitry Andric // All of the remaining stack allocations are for locals. 19130b57cec5SDimitry Andric AFI->setLocalStackSize(NumBytes - PrologueSaveSize); 19140b57cec5SDimitry Andric bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); 1915fe6060f1SDimitry Andric bool HomPrologEpilog = homogeneousPrologEpilog(MF); 19160b57cec5SDimitry Andric if (CombineSPBump) { 19178bcb0991SDimitry Andric assert(!SVEStackSize && "Cannot combine SP bump with SVE"); 19188bcb0991SDimitry Andric emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, 1919e8d8bef9SDimitry Andric StackOffset::getFixed(-NumBytes), TII, 192081ad6265SDimitry Andric MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI, 192106c3fb27SDimitry Andric EmitAsyncCFI); 19220b57cec5SDimitry Andric NumBytes = 0; 1923fe6060f1SDimitry Andric } else if (HomPrologEpilog) { 1924fe6060f1SDimitry Andric // Stack has been already adjusted. 1925fe6060f1SDimitry Andric NumBytes -= PrologueSaveSize; 19260b57cec5SDimitry Andric } else if (PrologueSaveSize != 0) { 19270b57cec5SDimitry Andric MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( 192881ad6265SDimitry Andric MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI, 192906c3fb27SDimitry Andric EmitAsyncCFI); 19300b57cec5SDimitry Andric NumBytes -= PrologueSaveSize; 19310b57cec5SDimitry Andric } 19320b57cec5SDimitry Andric assert(NumBytes >= 0 && "Negative stack allocation size!?"); 19330b57cec5SDimitry Andric 19340b57cec5SDimitry Andric // Move past the saves of the callee-saved registers, fixing up the offsets 19350b57cec5SDimitry Andric // and pre-inc if we decided to combine the callee-save and local stack 19360b57cec5SDimitry Andric // pointer bump above. 1937480093f4SDimitry Andric while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) && 1938480093f4SDimitry Andric !IsSVECalleeSave(MBBI)) { 19390fca6ea1SDimitry Andric // Move past instructions generated to calculate VG 19400fca6ea1SDimitry Andric if (AFI->hasStreamingModeChanges()) 19410fca6ea1SDimitry Andric while (isVGInstruction(MBBI)) 19420fca6ea1SDimitry Andric ++MBBI; 19430fca6ea1SDimitry Andric 19440b57cec5SDimitry Andric if (CombineSPBump) 19450b57cec5SDimitry Andric fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(), 19460b57cec5SDimitry Andric NeedsWinCFI, &HasWinCFI); 19470b57cec5SDimitry Andric ++MBBI; 19480b57cec5SDimitry Andric } 19490b57cec5SDimitry Andric 195062cfcf62SDimitry Andric // For funclets the FP belongs to the containing function. 195162cfcf62SDimitry Andric if (!IsFunclet && HasFP) { 19528bcb0991SDimitry Andric // Only set up FP if we actually need to. 1953e8d8bef9SDimitry Andric int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset(); 19548bcb0991SDimitry Andric 19550b57cec5SDimitry Andric if (CombineSPBump) 19560b57cec5SDimitry Andric FPOffset += AFI->getLocalStackSize(); 19570b57cec5SDimitry Andric 1958fe6060f1SDimitry Andric if (AFI->hasSwiftAsyncContext()) { 1959fe6060f1SDimitry Andric // Before we update the live FP we have to ensure there's a valid (or 1960fe6060f1SDimitry Andric // null) asynchronous context in its slot just before FP in the frame 1961fe6060f1SDimitry Andric // record, so store it now. 1962fe6060f1SDimitry Andric const auto &Attrs = MF.getFunction().getAttributes(); 1963fe6060f1SDimitry Andric bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync); 1964fe6060f1SDimitry Andric if (HaveInitialContext) 1965fe6060f1SDimitry Andric MBB.addLiveIn(AArch64::X22); 19665f757f3fSDimitry Andric Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR; 1967fe6060f1SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext)) 19685f757f3fSDimitry Andric .addUse(Reg) 1969fe6060f1SDimitry Andric .addUse(AArch64::SP) 1970fe6060f1SDimitry Andric .addImm(FPOffset - 8) 1971fe6060f1SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 19725f757f3fSDimitry Andric if (NeedsWinCFI) { 19735f757f3fSDimitry Andric // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded 19745f757f3fSDimitry Andric // to multiple instructions, should be mutually-exclusive. 19755f757f3fSDimitry Andric assert(Subtarget.getTargetTriple().getArchName() != "arm64e"); 19765f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 19775f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 19785f757f3fSDimitry Andric HasWinCFI = true; 19795f757f3fSDimitry Andric } 1980fe6060f1SDimitry Andric } 1981fe6060f1SDimitry Andric 1982fe6060f1SDimitry Andric if (HomPrologEpilog) { 1983fe6060f1SDimitry Andric auto Prolog = MBBI; 1984fe6060f1SDimitry Andric --Prolog; 1985fe6060f1SDimitry Andric assert(Prolog->getOpcode() == AArch64::HOM_Prolog); 1986fe6060f1SDimitry Andric Prolog->addOperand(MachineOperand::CreateImm(FPOffset)); 1987fe6060f1SDimitry Andric } else { 19880b57cec5SDimitry Andric // Issue sub fp, sp, FPOffset or 19890b57cec5SDimitry Andric // mov fp,sp when FPOffset is zero. 19900b57cec5SDimitry Andric // Note: All stores of callee-saved registers are marked as "FrameSetup". 19910b57cec5SDimitry Andric // This code marks the instruction(s) that set the FP also. 19928bcb0991SDimitry Andric emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, 1993e8d8bef9SDimitry Andric StackOffset::getFixed(FPOffset), TII, 1994e8d8bef9SDimitry Andric MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); 1995bdd1243dSDimitry Andric if (NeedsWinCFI && HasWinCFI) { 1996bdd1243dSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) 1997bdd1243dSDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 1998bdd1243dSDimitry Andric // After setting up the FP, the rest of the prolog doesn't need to be 1999bdd1243dSDimitry Andric // included in the SEH unwind info. 2000bdd1243dSDimitry Andric NeedsWinCFI = false; 2001bdd1243dSDimitry Andric } 20020b57cec5SDimitry Andric } 200306c3fb27SDimitry Andric if (EmitAsyncCFI) 200406c3fb27SDimitry Andric emitDefineCFAWithFP(MF, MBB, MBBI, DL, FixedObject); 200581ad6265SDimitry Andric } 200681ad6265SDimitry Andric 200781ad6265SDimitry Andric // Now emit the moves for whatever callee saved regs we have (including FP, 200881ad6265SDimitry Andric // LR if those are saved). Frame instructions for SVE register are emitted 200981ad6265SDimitry Andric // later, after the instruction which actually save SVE regs. 201006c3fb27SDimitry Andric if (EmitAsyncCFI) 201181ad6265SDimitry Andric emitCalleeSavedGPRLocations(MBB, MBBI); 20120b57cec5SDimitry Andric 2013bdd1243dSDimitry Andric // Alignment is required for the parent frame, not the funclet 2014bdd1243dSDimitry Andric const bool NeedsRealignment = 2015bdd1243dSDimitry Andric NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF); 20165f757f3fSDimitry Andric const int64_t RealignmentPadding = 2017bdd1243dSDimitry Andric (NeedsRealignment && MFI.getMaxAlign() > Align(16)) 2018bdd1243dSDimitry Andric ? MFI.getMaxAlign().value() - 16 2019bdd1243dSDimitry Andric : 0; 2020bdd1243dSDimitry Andric 2021bdd1243dSDimitry Andric if (windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) { 2022bdd1243dSDimitry Andric uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4; 20230b57cec5SDimitry Andric if (NeedsWinCFI) { 20240b57cec5SDimitry Andric HasWinCFI = true; 20250b57cec5SDimitry Andric // alloc_l can hold at most 256MB, so assume that NumBytes doesn't 20260b57cec5SDimitry Andric // exceed this amount. We need to move at most 2^24 - 1 into x15. 20270b57cec5SDimitry Andric // This is at most two instructions, MOVZ follwed by MOVK. 20280b57cec5SDimitry Andric // TODO: Fix to use multiple stack alloc unwind codes for stacks 20290b57cec5SDimitry Andric // exceeding 256MB in size. 20300b57cec5SDimitry Andric if (NumBytes >= (1 << 28)) 20310b57cec5SDimitry Andric report_fatal_error("Stack size cannot exceed 256MB for stack " 20320b57cec5SDimitry Andric "unwinding purposes"); 20330b57cec5SDimitry Andric 20340b57cec5SDimitry Andric uint32_t LowNumWords = NumWords & 0xFFFF; 20350b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15) 20360b57cec5SDimitry Andric .addImm(LowNumWords) 20370b57cec5SDimitry Andric .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 20380b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 20390b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 20400b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 20410b57cec5SDimitry Andric if ((NumWords & 0xFFFF0000) != 0) { 20420b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15) 20430b57cec5SDimitry Andric .addReg(AArch64::X15) 20440b57cec5SDimitry Andric .addImm((NumWords & 0xFFFF0000) >> 16) // High half 20450b57cec5SDimitry Andric .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16)) 20460b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 20470b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 20480b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 20490b57cec5SDimitry Andric } 20500b57cec5SDimitry Andric } else { 20510b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15) 20520b57cec5SDimitry Andric .addImm(NumWords) 20530b57cec5SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 20540b57cec5SDimitry Andric } 20550b57cec5SDimitry Andric 2056bdd1243dSDimitry Andric const char *ChkStk = Subtarget.getChkStkName(); 20570b57cec5SDimitry Andric switch (MF.getTarget().getCodeModel()) { 20580b57cec5SDimitry Andric case CodeModel::Tiny: 20590b57cec5SDimitry Andric case CodeModel::Small: 20600b57cec5SDimitry Andric case CodeModel::Medium: 20610b57cec5SDimitry Andric case CodeModel::Kernel: 20620b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 2063bdd1243dSDimitry Andric .addExternalSymbol(ChkStk) 20640b57cec5SDimitry Andric .addReg(AArch64::X15, RegState::Implicit) 20650b57cec5SDimitry Andric .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead) 20660b57cec5SDimitry Andric .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead) 20670b57cec5SDimitry Andric .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead) 20680b57cec5SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 20690b57cec5SDimitry Andric if (NeedsWinCFI) { 20700b57cec5SDimitry Andric HasWinCFI = true; 20710b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 20720b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 20730b57cec5SDimitry Andric } 20740b57cec5SDimitry Andric break; 20750b57cec5SDimitry Andric case CodeModel::Large: 20760b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT)) 20770b57cec5SDimitry Andric .addReg(AArch64::X16, RegState::Define) 2078bdd1243dSDimitry Andric .addExternalSymbol(ChkStk) 2079bdd1243dSDimitry Andric .addExternalSymbol(ChkStk) 20800b57cec5SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 20810b57cec5SDimitry Andric if (NeedsWinCFI) { 20820b57cec5SDimitry Andric HasWinCFI = true; 20830b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 20840b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 20850b57cec5SDimitry Andric } 20860b57cec5SDimitry Andric 20875ffd83dbSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF))) 20880b57cec5SDimitry Andric .addReg(AArch64::X16, RegState::Kill) 20890b57cec5SDimitry Andric .addReg(AArch64::X15, RegState::Implicit | RegState::Define) 20900b57cec5SDimitry Andric .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead) 20910b57cec5SDimitry Andric .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead) 20920b57cec5SDimitry Andric .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead) 20930b57cec5SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 20940b57cec5SDimitry Andric if (NeedsWinCFI) { 20950b57cec5SDimitry Andric HasWinCFI = true; 20960b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 20970b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 20980b57cec5SDimitry Andric } 20990b57cec5SDimitry Andric break; 21000b57cec5SDimitry Andric } 21010b57cec5SDimitry Andric 21020b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP) 21030b57cec5SDimitry Andric .addReg(AArch64::SP, RegState::Kill) 21040b57cec5SDimitry Andric .addReg(AArch64::X15, RegState::Kill) 21050b57cec5SDimitry Andric .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4)) 21060b57cec5SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 21070b57cec5SDimitry Andric if (NeedsWinCFI) { 21080b57cec5SDimitry Andric HasWinCFI = true; 21090b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) 21100b57cec5SDimitry Andric .addImm(NumBytes) 21110b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 21120b57cec5SDimitry Andric } 21130b57cec5SDimitry Andric NumBytes = 0; 2114bdd1243dSDimitry Andric 2115bdd1243dSDimitry Andric if (RealignmentPadding > 0) { 211606c3fb27SDimitry Andric if (RealignmentPadding >= 4096) { 211706c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm)) 211806c3fb27SDimitry Andric .addReg(AArch64::X16, RegState::Define) 211906c3fb27SDimitry Andric .addImm(RealignmentPadding) 212006c3fb27SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 212106c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15) 212206c3fb27SDimitry Andric .addReg(AArch64::SP) 212306c3fb27SDimitry Andric .addReg(AArch64::X16, RegState::Kill) 212406c3fb27SDimitry Andric .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0)) 212506c3fb27SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 212606c3fb27SDimitry Andric } else { 2127bdd1243dSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15) 2128bdd1243dSDimitry Andric .addReg(AArch64::SP) 2129bdd1243dSDimitry Andric .addImm(RealignmentPadding) 213006c3fb27SDimitry Andric .addImm(0) 213106c3fb27SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 213206c3fb27SDimitry Andric } 2133bdd1243dSDimitry Andric 2134bdd1243dSDimitry Andric uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1); 2135bdd1243dSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) 2136bdd1243dSDimitry Andric .addReg(AArch64::X15, RegState::Kill) 2137bdd1243dSDimitry Andric .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)); 2138bdd1243dSDimitry Andric AFI->setStackRealigned(true); 2139bdd1243dSDimitry Andric 2140bdd1243dSDimitry Andric // No need for SEH instructions here; if we're realigning the stack, 2141bdd1243dSDimitry Andric // we've set a frame pointer and already finished the SEH prologue. 2142bdd1243dSDimitry Andric assert(!NeedsWinCFI); 2143bdd1243dSDimitry Andric } 21440b57cec5SDimitry Andric } 21450b57cec5SDimitry Andric 21465f757f3fSDimitry Andric StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize; 2147480093f4SDimitry Andric MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI; 2148480093f4SDimitry Andric 2149480093f4SDimitry Andric // Process the SVE callee-saves to determine what space needs to be 2150480093f4SDimitry Andric // allocated. 2151979e22ffSDimitry Andric if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { 21525f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize 21535f757f3fSDimitry Andric << "\n"); 2154480093f4SDimitry Andric // Find callee save instructions in frame. 2155480093f4SDimitry Andric CalleeSavesBegin = MBBI; 2156480093f4SDimitry Andric assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction"); 2157480093f4SDimitry Andric while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator()) 2158480093f4SDimitry Andric ++MBBI; 2159480093f4SDimitry Andric CalleeSavesEnd = MBBI; 2160480093f4SDimitry Andric 21615f757f3fSDimitry Andric SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize); 21625f757f3fSDimitry Andric SVELocalsSize = SVEStackSize - SVECalleeSavesSize; 2163480093f4SDimitry Andric } 2164480093f4SDimitry Andric 2165480093f4SDimitry Andric // Allocate space for the callee saves (if any). 21665f757f3fSDimitry Andric StackOffset CFAOffset = 21675f757f3fSDimitry Andric StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes); 21685f757f3fSDimitry Andric StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes); 21695f757f3fSDimitry Andric allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false, 21705f757f3fSDimitry Andric nullptr, EmitAsyncCFI && !HasFP, CFAOffset, 21715f757f3fSDimitry Andric MFI.hasVarSizedObjects() || LocalsSize); 21725f757f3fSDimitry Andric CFAOffset += SVECalleeSavesSize; 217381ad6265SDimitry Andric 217406c3fb27SDimitry Andric if (EmitAsyncCFI) 217581ad6265SDimitry Andric emitCalleeSavedSVELocations(MBB, CalleeSavesEnd); 2176480093f4SDimitry Andric 21775f757f3fSDimitry Andric // Allocate space for the rest of the frame including SVE locals. Align the 21785f757f3fSDimitry Andric // stack as necessary. 21795f757f3fSDimitry Andric assert(!(canUseRedZone(MF) && NeedsRealignment) && 21805f757f3fSDimitry Andric "Cannot use redzone with stack realignment"); 218181ad6265SDimitry Andric if (!canUseRedZone(MF)) { 21820b57cec5SDimitry Andric // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have 21830b57cec5SDimitry Andric // the correct value here, as NumBytes also includes padding bytes, 21840b57cec5SDimitry Andric // which shouldn't be counted here. 21855f757f3fSDimitry Andric allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding, 21865f757f3fSDimitry Andric SVELocalsSize + StackOffset::getFixed(NumBytes), 21875f757f3fSDimitry Andric NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP, 21885f757f3fSDimitry Andric CFAOffset, MFI.hasVarSizedObjects()); 21890b57cec5SDimitry Andric } 21900b57cec5SDimitry Andric 21910b57cec5SDimitry Andric // If we need a base pointer, set it up here. It's whatever the value of the 21920b57cec5SDimitry Andric // stack pointer is at this point. Any variable size objects will be allocated 21930b57cec5SDimitry Andric // after this, so we can still use the base pointer to reference locals. 21940b57cec5SDimitry Andric // 21950b57cec5SDimitry Andric // FIXME: Clarify FrameSetup flags here. 21960b57cec5SDimitry Andric // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is 21970b57cec5SDimitry Andric // needed. 219862cfcf62SDimitry Andric // For funclets the BP belongs to the containing function. 219962cfcf62SDimitry Andric if (!IsFunclet && RegInfo->hasBasePointer(MF)) { 22000b57cec5SDimitry Andric TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, 22010b57cec5SDimitry Andric false); 22020b57cec5SDimitry Andric if (NeedsWinCFI) { 22030b57cec5SDimitry Andric HasWinCFI = true; 22040b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 22050b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 22060b57cec5SDimitry Andric } 22070b57cec5SDimitry Andric } 22080b57cec5SDimitry Andric 22090b57cec5SDimitry Andric // The very last FrameSetup instruction indicates the end of prologue. Emit a 22100b57cec5SDimitry Andric // SEH opcode indicating the prologue end. 22110b57cec5SDimitry Andric if (NeedsWinCFI && HasWinCFI) { 22120b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) 22130b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 22140b57cec5SDimitry Andric } 22150b57cec5SDimitry Andric 221662cfcf62SDimitry Andric // SEH funclets are passed the frame pointer in X1. If the parent 221762cfcf62SDimitry Andric // function uses the base register, then the base register is used 221862cfcf62SDimitry Andric // directly, and is not retrieved from X1. 221962cfcf62SDimitry Andric if (IsFunclet && F.hasPersonalityFn()) { 222062cfcf62SDimitry Andric EHPersonality Per = classifyEHPersonality(F.getPersonalityFn()); 222162cfcf62SDimitry Andric if (isAsynchronousEHPersonality(Per)) { 222262cfcf62SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP) 222362cfcf62SDimitry Andric .addReg(AArch64::X1) 222462cfcf62SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 222562cfcf62SDimitry Andric MBB.addLiveIn(AArch64::X1); 222662cfcf62SDimitry Andric } 222762cfcf62SDimitry Andric } 222806c3fb27SDimitry Andric 222906c3fb27SDimitry Andric if (EmitCFI && !EmitAsyncCFI) { 223006c3fb27SDimitry Andric if (HasFP) { 223106c3fb27SDimitry Andric emitDefineCFAWithFP(MF, MBB, MBBI, DL, FixedObject); 223206c3fb27SDimitry Andric } else { 223306c3fb27SDimitry Andric StackOffset TotalSize = 223406c3fb27SDimitry Andric SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize()); 223506c3fb27SDimitry Andric unsigned CFIIndex = MF.addFrameInst(createDefCFA( 223606c3fb27SDimitry Andric *RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP, TotalSize, 223706c3fb27SDimitry Andric /*LastAdjustmentWasScalable=*/false)); 223806c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 223906c3fb27SDimitry Andric .addCFIIndex(CFIIndex) 224006c3fb27SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 224106c3fb27SDimitry Andric } 224206c3fb27SDimitry Andric emitCalleeSavedGPRLocations(MBB, MBBI); 224306c3fb27SDimitry Andric emitCalleeSavedSVELocations(MBB, MBBI); 224406c3fb27SDimitry Andric } 22450b57cec5SDimitry Andric } 22460b57cec5SDimitry Andric 22470b57cec5SDimitry Andric static bool isFuncletReturnInstr(const MachineInstr &MI) { 22480b57cec5SDimitry Andric switch (MI.getOpcode()) { 22490b57cec5SDimitry Andric default: 22500b57cec5SDimitry Andric return false; 22510b57cec5SDimitry Andric case AArch64::CATCHRET: 22520b57cec5SDimitry Andric case AArch64::CLEANUPRET: 22530b57cec5SDimitry Andric return true; 22540b57cec5SDimitry Andric } 22550b57cec5SDimitry Andric } 22560b57cec5SDimitry Andric 22570b57cec5SDimitry Andric void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, 22580b57cec5SDimitry Andric MachineBasicBlock &MBB) const { 22590b57cec5SDimitry Andric MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 22600b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 22615f757f3fSDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 22620b57cec5SDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 22630b57cec5SDimitry Andric const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 22640b57cec5SDimitry Andric DebugLoc DL; 22650b57cec5SDimitry Andric bool NeedsWinCFI = needsWinCFI(MF); 22665f757f3fSDimitry Andric bool EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF); 22670b57cec5SDimitry Andric bool HasWinCFI = false; 22680b57cec5SDimitry Andric bool IsFunclet = false; 22690b57cec5SDimitry Andric 22700b57cec5SDimitry Andric if (MBB.end() != MBBI) { 22710b57cec5SDimitry Andric DL = MBBI->getDebugLoc(); 22720b57cec5SDimitry Andric IsFunclet = isFuncletReturnInstr(*MBBI); 22730b57cec5SDimitry Andric } 22740b57cec5SDimitry Andric 22755f757f3fSDimitry Andric MachineBasicBlock::iterator EpilogStartI = MBB.end(); 22765f757f3fSDimitry Andric 227781ad6265SDimitry Andric auto FinishingTouches = make_scope_exit([&]() { 22785f757f3fSDimitry Andric if (AFI->shouldSignReturnAddress(MF)) { 22795f757f3fSDimitry Andric BuildMI(MBB, MBB.getFirstTerminator(), DL, 22805f757f3fSDimitry Andric TII->get(AArch64::PAUTH_EPILOGUE)) 22815f757f3fSDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 22825f757f3fSDimitry Andric if (NeedsWinCFI) 22835f757f3fSDimitry Andric HasWinCFI = true; // AArch64PointerAuth pass will insert SEH_PACSignLR 22845f757f3fSDimitry Andric } 22855f757f3fSDimitry Andric if (AFI->needsShadowCallStackPrologueEpilogue(MF)) 228681ad6265SDimitry Andric emitShadowCallStackEpilogue(*TII, MF, MBB, MBB.getFirstTerminator(), DL); 228781ad6265SDimitry Andric if (EmitCFI) 228881ad6265SDimitry Andric emitCalleeSavedGPRRestores(MBB, MBB.getFirstTerminator()); 22895f757f3fSDimitry Andric if (HasWinCFI) { 2290bdd1243dSDimitry Andric BuildMI(MBB, MBB.getFirstTerminator(), DL, 2291bdd1243dSDimitry Andric TII->get(AArch64::SEH_EpilogEnd)) 2292bdd1243dSDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 22935f757f3fSDimitry Andric if (!MF.hasWinCFI()) 22945f757f3fSDimitry Andric MF.setHasWinCFI(true); 22955f757f3fSDimitry Andric } 22965f757f3fSDimitry Andric if (NeedsWinCFI) { 22975f757f3fSDimitry Andric assert(EpilogStartI != MBB.end()); 22985f757f3fSDimitry Andric if (!HasWinCFI) 22995f757f3fSDimitry Andric MBB.erase(EpilogStartI); 23005f757f3fSDimitry Andric } 230181ad6265SDimitry Andric }); 230281ad6265SDimitry Andric 2303480093f4SDimitry Andric int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF) 23040b57cec5SDimitry Andric : MFI.getStackSize(); 23050b57cec5SDimitry Andric 23060b57cec5SDimitry Andric // All calls are tail calls in GHC calling conv, and functions have no 23070b57cec5SDimitry Andric // prologue/epilogue. 23080b57cec5SDimitry Andric if (MF.getFunction().getCallingConv() == CallingConv::GHC) 23090b57cec5SDimitry Andric return; 23100b57cec5SDimitry Andric 2311fe6060f1SDimitry Andric // How much of the stack used by incoming arguments this function is expected 2312fe6060f1SDimitry Andric // to restore in this particular epilogue. 2313fe6060f1SDimitry Andric int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB); 23140fca6ea1SDimitry Andric bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(), 23150fca6ea1SDimitry Andric MF.getFunction().isVarArg()); 231662cfcf62SDimitry Andric unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); 23170b57cec5SDimitry Andric 2318fe6060f1SDimitry Andric int64_t AfterCSRPopSize = ArgumentStackToRestore; 23190b57cec5SDimitry Andric auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; 23200b57cec5SDimitry Andric // We cannot rely on the local stack size set in emitPrologue if the function 23210b57cec5SDimitry Andric // has funclets, as funclets have different local stack size requirements, and 23220b57cec5SDimitry Andric // the current value set in emitPrologue may be that of the containing 23230b57cec5SDimitry Andric // function. 23240b57cec5SDimitry Andric if (MF.hasEHFunclets()) 23250b57cec5SDimitry Andric AFI->setLocalStackSize(NumBytes - PrologueSaveSize); 2326fe6060f1SDimitry Andric if (homogeneousPrologEpilog(MF, &MBB)) { 2327fe6060f1SDimitry Andric assert(!NeedsWinCFI); 2328fe6060f1SDimitry Andric auto LastPopI = MBB.getFirstTerminator(); 2329fe6060f1SDimitry Andric if (LastPopI != MBB.begin()) { 2330fe6060f1SDimitry Andric auto HomogeneousEpilog = std::prev(LastPopI); 2331fe6060f1SDimitry Andric if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog) 2332fe6060f1SDimitry Andric LastPopI = HomogeneousEpilog; 2333fe6060f1SDimitry Andric } 2334fe6060f1SDimitry Andric 2335fe6060f1SDimitry Andric // Adjust local stack 2336fe6060f1SDimitry Andric emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, 2337fe6060f1SDimitry Andric StackOffset::getFixed(AFI->getLocalStackSize()), TII, 23385f757f3fSDimitry Andric MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); 2339fe6060f1SDimitry Andric 2340fe6060f1SDimitry Andric // SP has been already adjusted while restoring callee save regs. 2341fe6060f1SDimitry Andric // We've bailed-out the case with adjusting SP for arguments. 2342fe6060f1SDimitry Andric assert(AfterCSRPopSize == 0); 2343fe6060f1SDimitry Andric return; 2344fe6060f1SDimitry Andric } 23455ffd83dbSDimitry Andric bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes); 23460b57cec5SDimitry Andric // Assume we can't combine the last pop with the sp restore. 23470b57cec5SDimitry Andric 234881ad6265SDimitry Andric bool CombineAfterCSRBump = false; 23490b57cec5SDimitry Andric if (!CombineSPBump && PrologueSaveSize != 0) { 23500b57cec5SDimitry Andric MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); 235181ad6265SDimitry Andric while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION || 235281ad6265SDimitry Andric AArch64InstrInfo::isSEHInstruction(*Pop)) 23530b57cec5SDimitry Andric Pop = std::prev(Pop); 23540b57cec5SDimitry Andric // Converting the last ldp to a post-index ldp is valid only if the last 23550b57cec5SDimitry Andric // ldp's offset is 0. 23560b57cec5SDimitry Andric const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1); 2357fe6060f1SDimitry Andric // If the offset is 0 and the AfterCSR pop is not actually trying to 2358fe6060f1SDimitry Andric // allocate more stack for arguments (in space that an untimely interrupt 2359fe6060f1SDimitry Andric // may clobber), convert it to a post-index ldp. 236081ad6265SDimitry Andric if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) { 23610b57cec5SDimitry Andric convertCalleeSaveRestoreToSPPrePostIncDec( 236281ad6265SDimitry Andric MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI, 236381ad6265SDimitry Andric MachineInstr::FrameDestroy, PrologueSaveSize); 236481ad6265SDimitry Andric } else { 23650b57cec5SDimitry Andric // If not, make sure to emit an add after the last ldp. 23660b57cec5SDimitry Andric // We're doing this by transfering the size to be restored from the 23670b57cec5SDimitry Andric // adjustment *before* the CSR pops to the adjustment *after* the CSR 23680b57cec5SDimitry Andric // pops. 23690b57cec5SDimitry Andric AfterCSRPopSize += PrologueSaveSize; 237081ad6265SDimitry Andric CombineAfterCSRBump = true; 23710b57cec5SDimitry Andric } 23720b57cec5SDimitry Andric } 23730b57cec5SDimitry Andric 23740b57cec5SDimitry Andric // Move past the restores of the callee-saved registers. 23750b57cec5SDimitry Andric // If we plan on combining the sp bump of the local stack size and the callee 23760b57cec5SDimitry Andric // save stack size, we might need to adjust the CSR save and restore offsets. 23770b57cec5SDimitry Andric MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); 23780b57cec5SDimitry Andric MachineBasicBlock::iterator Begin = MBB.begin(); 23790b57cec5SDimitry Andric while (LastPopI != Begin) { 23800b57cec5SDimitry Andric --LastPopI; 2381480093f4SDimitry Andric if (!LastPopI->getFlag(MachineInstr::FrameDestroy) || 2382480093f4SDimitry Andric IsSVECalleeSave(LastPopI)) { 23830b57cec5SDimitry Andric ++LastPopI; 23840b57cec5SDimitry Andric break; 23850b57cec5SDimitry Andric } else if (CombineSPBump) 23860b57cec5SDimitry Andric fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), 23870b57cec5SDimitry Andric NeedsWinCFI, &HasWinCFI); 23880b57cec5SDimitry Andric } 23890b57cec5SDimitry Andric 23905f757f3fSDimitry Andric if (NeedsWinCFI) { 23915f757f3fSDimitry Andric // Note that there are cases where we insert SEH opcodes in the 23925f757f3fSDimitry Andric // epilogue when we had no SEH opcodes in the prologue. For 23935f757f3fSDimitry Andric // example, when there is no stack frame but there are stack 23945f757f3fSDimitry Andric // arguments. Insert the SEH_EpilogStart and remove it later if it 23955f757f3fSDimitry Andric // we didn't emit any SEH opcodes to avoid generating WinCFI for 23965f757f3fSDimitry Andric // functions that don't need it. 23970b57cec5SDimitry Andric BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart)) 23980b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 23995f757f3fSDimitry Andric EpilogStartI = LastPopI; 24005f757f3fSDimitry Andric --EpilogStartI; 24010b57cec5SDimitry Andric } 24020b57cec5SDimitry Andric 2403fe6060f1SDimitry Andric if (hasFP(MF) && AFI->hasSwiftAsyncContext()) { 240481ad6265SDimitry Andric switch (MF.getTarget().Options.SwiftAsyncFramePointer) { 240581ad6265SDimitry Andric case SwiftAsyncFramePointerMode::DeploymentBased: 240681ad6265SDimitry Andric // Avoid the reload as it is GOT relative, and instead fall back to the 240781ad6265SDimitry Andric // hardcoded value below. This allows a mismatch between the OS and 240881ad6265SDimitry Andric // application without immediately terminating on the difference. 2409bdd1243dSDimitry Andric [[fallthrough]]; 241081ad6265SDimitry Andric case SwiftAsyncFramePointerMode::Always: 241181ad6265SDimitry Andric // We need to reset FP to its untagged state on return. Bit 60 is 241281ad6265SDimitry Andric // currently used to show the presence of an extended frame. 2413fe6060f1SDimitry Andric 2414fe6060f1SDimitry Andric // BIC x29, x29, #0x1000_0000_0000_0000 2415fe6060f1SDimitry Andric BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri), 2416fe6060f1SDimitry Andric AArch64::FP) 2417fe6060f1SDimitry Andric .addUse(AArch64::FP) 2418fe6060f1SDimitry Andric .addImm(0x10fe) 2419fe6060f1SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 24205f757f3fSDimitry Andric if (NeedsWinCFI) { 24215f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 24225f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameDestroy); 24235f757f3fSDimitry Andric HasWinCFI = true; 24245f757f3fSDimitry Andric } 242581ad6265SDimitry Andric break; 242681ad6265SDimitry Andric 242781ad6265SDimitry Andric case SwiftAsyncFramePointerMode::Never: 242881ad6265SDimitry Andric break; 242981ad6265SDimitry Andric } 2430fe6060f1SDimitry Andric } 2431fe6060f1SDimitry Andric 24328bcb0991SDimitry Andric const StackOffset &SVEStackSize = getSVEStackSize(MF); 24338bcb0991SDimitry Andric 24340b57cec5SDimitry Andric // If there is a single SP update, insert it before the ret and we're done. 24350b57cec5SDimitry Andric if (CombineSPBump) { 24368bcb0991SDimitry Andric assert(!SVEStackSize && "Cannot combine SP bump with SVE"); 243781ad6265SDimitry Andric 243881ad6265SDimitry Andric // When we are about to restore the CSRs, the CFA register is SP again. 243981ad6265SDimitry Andric if (EmitCFI && hasFP(MF)) { 244081ad6265SDimitry Andric const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo(); 244181ad6265SDimitry Andric unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true); 244281ad6265SDimitry Andric unsigned CFIIndex = 244381ad6265SDimitry Andric MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, NumBytes)); 244481ad6265SDimitry Andric BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 244581ad6265SDimitry Andric .addCFIIndex(CFIIndex) 244681ad6265SDimitry Andric .setMIFlags(MachineInstr::FrameDestroy); 244781ad6265SDimitry Andric } 244881ad6265SDimitry Andric 24490b57cec5SDimitry Andric emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, 2450e8d8bef9SDimitry Andric StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize), 2451e8d8bef9SDimitry Andric TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, 245281ad6265SDimitry Andric &HasWinCFI, EmitCFI, StackOffset::getFixed(NumBytes)); 24530b57cec5SDimitry Andric return; 24540b57cec5SDimitry Andric } 24550b57cec5SDimitry Andric 24560b57cec5SDimitry Andric NumBytes -= PrologueSaveSize; 24570b57cec5SDimitry Andric assert(NumBytes >= 0 && "Negative stack allocation size!?"); 24580b57cec5SDimitry Andric 2459480093f4SDimitry Andric // Process the SVE callee-saves to determine what space needs to be 2460480093f4SDimitry Andric // deallocated. 2461480093f4SDimitry Andric StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize; 2462480093f4SDimitry Andric MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI; 2463979e22ffSDimitry Andric if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { 246416d6b3b3SDimitry Andric RestoreBegin = std::prev(RestoreEnd); 246516d6b3b3SDimitry Andric while (RestoreBegin != MBB.begin() && 246616d6b3b3SDimitry Andric IsSVECalleeSave(std::prev(RestoreBegin))) 2467480093f4SDimitry Andric --RestoreBegin; 2468480093f4SDimitry Andric 2469480093f4SDimitry Andric assert(IsSVECalleeSave(RestoreBegin) && 2470480093f4SDimitry Andric IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction"); 2471480093f4SDimitry Andric 2472e8d8bef9SDimitry Andric StackOffset CalleeSavedSizeAsOffset = 2473e8d8bef9SDimitry Andric StackOffset::getScalable(CalleeSavedSize); 2474979e22ffSDimitry Andric DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset; 2475979e22ffSDimitry Andric DeallocateAfter = CalleeSavedSizeAsOffset; 2476480093f4SDimitry Andric } 2477480093f4SDimitry Andric 24788bcb0991SDimitry Andric // Deallocate the SVE area. 2479480093f4SDimitry Andric if (SVEStackSize) { 248081ad6265SDimitry Andric // If we have stack realignment or variable sized objects on the stack, 248181ad6265SDimitry Andric // restore the stack pointer from the frame pointer prior to SVE CSR 248281ad6265SDimitry Andric // restoration. 248381ad6265SDimitry Andric if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) { 248481ad6265SDimitry Andric if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { 2485979e22ffSDimitry Andric // Set SP to start of SVE callee-save area from which they can 2486979e22ffSDimitry Andric // be reloaded. The code below will deallocate the stack space 2487480093f4SDimitry Andric // space by moving FP -> SP. 2488480093f4SDimitry Andric emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP, 2489e8d8bef9SDimitry Andric StackOffset::getScalable(-CalleeSavedSize), TII, 2490979e22ffSDimitry Andric MachineInstr::FrameDestroy); 249181ad6265SDimitry Andric } 2492480093f4SDimitry Andric } else { 2493480093f4SDimitry Andric if (AFI->getSVECalleeSavedStackSize()) { 2494480093f4SDimitry Andric // Deallocate the non-SVE locals first before we can deallocate (and 2495480093f4SDimitry Andric // restore callee saves) from the SVE area. 249681ad6265SDimitry Andric emitFrameOffset( 249781ad6265SDimitry Andric MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, 249881ad6265SDimitry Andric StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy, 249981ad6265SDimitry Andric false, false, nullptr, EmitCFI && !hasFP(MF), 250081ad6265SDimitry Andric SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize)); 2501480093f4SDimitry Andric NumBytes = 0; 2502480093f4SDimitry Andric } 2503480093f4SDimitry Andric 2504480093f4SDimitry Andric emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, 250581ad6265SDimitry Andric DeallocateBefore, TII, MachineInstr::FrameDestroy, false, 250681ad6265SDimitry Andric false, nullptr, EmitCFI && !hasFP(MF), 250781ad6265SDimitry Andric SVEStackSize + 250881ad6265SDimitry Andric StackOffset::getFixed(NumBytes + PrologueSaveSize)); 2509480093f4SDimitry Andric 2510480093f4SDimitry Andric emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, 251181ad6265SDimitry Andric DeallocateAfter, TII, MachineInstr::FrameDestroy, false, 251281ad6265SDimitry Andric false, nullptr, EmitCFI && !hasFP(MF), 251381ad6265SDimitry Andric DeallocateAfter + 251481ad6265SDimitry Andric StackOffset::getFixed(NumBytes + PrologueSaveSize)); 2515480093f4SDimitry Andric } 251681ad6265SDimitry Andric if (EmitCFI) 251781ad6265SDimitry Andric emitCalleeSavedSVERestores(MBB, RestoreEnd); 2518480093f4SDimitry Andric } 25198bcb0991SDimitry Andric 25200b57cec5SDimitry Andric if (!hasFP(MF)) { 25210b57cec5SDimitry Andric bool RedZone = canUseRedZone(MF); 25220b57cec5SDimitry Andric // If this was a redzone leaf function, we don't need to restore the 25230b57cec5SDimitry Andric // stack pointer (but we may need to pop stack args for fastcc). 25240b57cec5SDimitry Andric if (RedZone && AfterCSRPopSize == 0) 25250b57cec5SDimitry Andric return; 25260b57cec5SDimitry Andric 252781ad6265SDimitry Andric // Pop the local variables off the stack. If there are no callee-saved 252881ad6265SDimitry Andric // registers, it means we are actually positioned at the terminator and can 252981ad6265SDimitry Andric // combine stack increment for the locals and the stack increment for 253081ad6265SDimitry Andric // callee-popped arguments into (possibly) a single instruction and be done. 25310b57cec5SDimitry Andric bool NoCalleeSaveRestore = PrologueSaveSize == 0; 2532480093f4SDimitry Andric int64_t StackRestoreBytes = RedZone ? 0 : NumBytes; 25330b57cec5SDimitry Andric if (NoCalleeSaveRestore) 25340b57cec5SDimitry Andric StackRestoreBytes += AfterCSRPopSize; 25350b57cec5SDimitry Andric 253681ad6265SDimitry Andric emitFrameOffset( 253781ad6265SDimitry Andric MBB, LastPopI, DL, AArch64::SP, AArch64::SP, 253881ad6265SDimitry Andric StackOffset::getFixed(StackRestoreBytes), TII, 253981ad6265SDimitry Andric MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI, 254081ad6265SDimitry Andric StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize)); 254181ad6265SDimitry Andric 25420b57cec5SDimitry Andric // If we were able to combine the local stack pop with the argument pop, 25430b57cec5SDimitry Andric // then we're done. 254481ad6265SDimitry Andric if (NoCalleeSaveRestore || AfterCSRPopSize == 0) { 25450b57cec5SDimitry Andric return; 25460b57cec5SDimitry Andric } 25470b57cec5SDimitry Andric 25480b57cec5SDimitry Andric NumBytes = 0; 25490b57cec5SDimitry Andric } 25500b57cec5SDimitry Andric 25510b57cec5SDimitry Andric // Restore the original stack pointer. 25520b57cec5SDimitry Andric // FIXME: Rather than doing the math here, we should instead just use 25530b57cec5SDimitry Andric // non-post-indexed loads for the restores if we aren't actually going to 25540b57cec5SDimitry Andric // be able to save any instructions. 25558bcb0991SDimitry Andric if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) { 2556e8d8bef9SDimitry Andric emitFrameOffset( 2557e8d8bef9SDimitry Andric MBB, LastPopI, DL, AArch64::SP, AArch64::FP, 2558e8d8bef9SDimitry Andric StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()), 25595f757f3fSDimitry Andric TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); 25608bcb0991SDimitry Andric } else if (NumBytes) 25618bcb0991SDimitry Andric emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, 2562e8d8bef9SDimitry Andric StackOffset::getFixed(NumBytes), TII, 25635f757f3fSDimitry Andric MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); 25640b57cec5SDimitry Andric 256581ad6265SDimitry Andric // When we are about to restore the CSRs, the CFA register is SP again. 256681ad6265SDimitry Andric if (EmitCFI && hasFP(MF)) { 256781ad6265SDimitry Andric const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo(); 256881ad6265SDimitry Andric unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true); 256981ad6265SDimitry Andric unsigned CFIIndex = MF.addFrameInst( 257081ad6265SDimitry Andric MCCFIInstruction::cfiDefCfa(nullptr, Reg, PrologueSaveSize)); 257181ad6265SDimitry Andric BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 257281ad6265SDimitry Andric .addCFIIndex(CFIIndex) 257381ad6265SDimitry Andric .setMIFlags(MachineInstr::FrameDestroy); 257481ad6265SDimitry Andric } 257581ad6265SDimitry Andric 25760b57cec5SDimitry Andric // This must be placed after the callee-save restore code because that code 25770b57cec5SDimitry Andric // assumes the SP is at the same location as it was after the callee-save save 25780b57cec5SDimitry Andric // code in the prologue. 25790b57cec5SDimitry Andric if (AfterCSRPopSize) { 2580fe6060f1SDimitry Andric assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an " 2581fe6060f1SDimitry Andric "interrupt may have clobbered"); 25820b57cec5SDimitry Andric 258381ad6265SDimitry Andric emitFrameOffset( 258481ad6265SDimitry Andric MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, 258581ad6265SDimitry Andric StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy, 258681ad6265SDimitry Andric false, NeedsWinCFI, &HasWinCFI, EmitCFI, 258781ad6265SDimitry Andric StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0)); 25880b57cec5SDimitry Andric } 25890b57cec5SDimitry Andric } 25900b57cec5SDimitry Andric 259106c3fb27SDimitry Andric bool AArch64FrameLowering::enableCFIFixup(MachineFunction &MF) const { 259206c3fb27SDimitry Andric return TargetFrameLowering::enableCFIFixup(MF) && 259306c3fb27SDimitry Andric MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(MF); 259406c3fb27SDimitry Andric } 259506c3fb27SDimitry Andric 25960b57cec5SDimitry Andric /// getFrameIndexReference - Provide a base+offset reference to an FI slot for 25970b57cec5SDimitry Andric /// debug info. It's the same as what we use for resolving the code-gen 25980b57cec5SDimitry Andric /// references for now. FIXME: This can go wrong when references are 25990b57cec5SDimitry Andric /// SP-relative and simple call frames aren't used. 2600e8d8bef9SDimitry Andric StackOffset 2601e8d8bef9SDimitry Andric AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 26025ffd83dbSDimitry Andric Register &FrameReg) const { 26030b57cec5SDimitry Andric return resolveFrameIndexReference( 26040b57cec5SDimitry Andric MF, FI, FrameReg, 26050b57cec5SDimitry Andric /*PreferFP=*/ 26060fca6ea1SDimitry Andric MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress) || 26070fca6ea1SDimitry Andric MF.getFunction().hasFnAttribute(Attribute::SanitizeMemTag), 2608e8d8bef9SDimitry Andric /*ForSimm=*/false); 26090b57cec5SDimitry Andric } 26100b57cec5SDimitry Andric 2611e8d8bef9SDimitry Andric StackOffset 261252418fc2SDimitry Andric AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, 261352418fc2SDimitry Andric int FI) const { 261452418fc2SDimitry Andric // This function serves to provide a comparable offset from a single reference 261552418fc2SDimitry Andric // point (the value of SP at function entry) that can be used for analysis, 261652418fc2SDimitry Andric // e.g. the stack-frame-layout analysis pass. It is not guaranteed to be 261752418fc2SDimitry Andric // correct for all objects in the presence of VLA-area objects or dynamic 261852418fc2SDimitry Andric // stack re-alignment. 261952418fc2SDimitry Andric 262052418fc2SDimitry Andric const auto &MFI = MF.getFrameInfo(); 262152418fc2SDimitry Andric 262252418fc2SDimitry Andric int64_t ObjectOffset = MFI.getObjectOffset(FI); 2623*62987288SDimitry Andric StackOffset SVEStackSize = getSVEStackSize(MF); 2624*62987288SDimitry Andric 2625*62987288SDimitry Andric // For VLA-area objects, just emit an offset at the end of the stack frame. 2626*62987288SDimitry Andric // Whilst not quite correct, these objects do live at the end of the frame and 2627*62987288SDimitry Andric // so it is more useful for analysis for the offset to reflect this. 2628*62987288SDimitry Andric if (MFI.isVariableSizedObjectIndex(FI)) { 2629*62987288SDimitry Andric return StackOffset::getFixed(-((int64_t)MFI.getStackSize())) - SVEStackSize; 2630*62987288SDimitry Andric } 263152418fc2SDimitry Andric 263252418fc2SDimitry Andric // This is correct in the absence of any SVE stack objects. 263352418fc2SDimitry Andric if (!SVEStackSize) 263452418fc2SDimitry Andric return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea()); 263552418fc2SDimitry Andric 263652418fc2SDimitry Andric const auto *AFI = MF.getInfo<AArch64FunctionInfo>(); 263752418fc2SDimitry Andric if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { 263852418fc2SDimitry Andric return StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()), 263952418fc2SDimitry Andric ObjectOffset); 264052418fc2SDimitry Andric } 264152418fc2SDimitry Andric 264252418fc2SDimitry Andric bool IsFixed = MFI.isFixedObjectIndex(FI); 264352418fc2SDimitry Andric bool IsCSR = 264452418fc2SDimitry Andric !IsFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI)); 264552418fc2SDimitry Andric 264652418fc2SDimitry Andric StackOffset ScalableOffset = {}; 264752418fc2SDimitry Andric if (!IsFixed && !IsCSR) 264852418fc2SDimitry Andric ScalableOffset = -SVEStackSize; 264952418fc2SDimitry Andric 265052418fc2SDimitry Andric return StackOffset::getFixed(ObjectOffset) + ScalableOffset; 265152418fc2SDimitry Andric } 265252418fc2SDimitry Andric 265352418fc2SDimitry Andric StackOffset 2654e8d8bef9SDimitry Andric AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF, 2655e8d8bef9SDimitry Andric int FI) const { 2656e8d8bef9SDimitry Andric return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI)); 26570b57cec5SDimitry Andric } 26580b57cec5SDimitry Andric 2659e8d8bef9SDimitry Andric static StackOffset getFPOffset(const MachineFunction &MF, 2660e8d8bef9SDimitry Andric int64_t ObjectOffset) { 26610b57cec5SDimitry Andric const auto *AFI = MF.getInfo<AArch64FunctionInfo>(); 26620b57cec5SDimitry Andric const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 26630fca6ea1SDimitry Andric const Function &F = MF.getFunction(); 26640fca6ea1SDimitry Andric bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg()); 266562cfcf62SDimitry Andric unsigned FixedObject = 266662cfcf62SDimitry Andric getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false); 2667e8d8bef9SDimitry Andric int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo()); 2668e8d8bef9SDimitry Andric int64_t FPAdjust = 2669e8d8bef9SDimitry Andric CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset(); 2670e8d8bef9SDimitry Andric return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust); 26710b57cec5SDimitry Andric } 26720b57cec5SDimitry Andric 2673e8d8bef9SDimitry Andric static StackOffset getStackOffset(const MachineFunction &MF, 2674e8d8bef9SDimitry Andric int64_t ObjectOffset) { 26750b57cec5SDimitry Andric const auto &MFI = MF.getFrameInfo(); 2676e8d8bef9SDimitry Andric return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize()); 26770b57cec5SDimitry Andric } 26780b57cec5SDimitry Andric 2679e8d8bef9SDimitry Andric // TODO: This function currently does not work for scalable vectors. 26800b57cec5SDimitry Andric int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF, 26810b57cec5SDimitry Andric int FI) const { 26820b57cec5SDimitry Andric const auto *RegInfo = static_cast<const AArch64RegisterInfo *>( 26830b57cec5SDimitry Andric MF.getSubtarget().getRegisterInfo()); 26840b57cec5SDimitry Andric int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI); 26850b57cec5SDimitry Andric return RegInfo->getLocalAddressRegister(MF) == AArch64::FP 2686e8d8bef9SDimitry Andric ? getFPOffset(MF, ObjectOffset).getFixed() 2687e8d8bef9SDimitry Andric : getStackOffset(MF, ObjectOffset).getFixed(); 26880b57cec5SDimitry Andric } 26890b57cec5SDimitry Andric 26908bcb0991SDimitry Andric StackOffset AArch64FrameLowering::resolveFrameIndexReference( 26915ffd83dbSDimitry Andric const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP, 26920b57cec5SDimitry Andric bool ForSimm) const { 26930b57cec5SDimitry Andric const auto &MFI = MF.getFrameInfo(); 2694480093f4SDimitry Andric int64_t ObjectOffset = MFI.getObjectOffset(FI); 26950b57cec5SDimitry Andric bool isFixed = MFI.isFixedObjectIndex(FI); 2696e8d8bef9SDimitry Andric bool isSVE = MFI.getStackID(FI) == TargetStackID::ScalableVector; 26978bcb0991SDimitry Andric return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg, 26980b57cec5SDimitry Andric PreferFP, ForSimm); 26990b57cec5SDimitry Andric } 27000b57cec5SDimitry Andric 27018bcb0991SDimitry Andric StackOffset AArch64FrameLowering::resolveFrameOffsetReference( 2702480093f4SDimitry Andric const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE, 27035ffd83dbSDimitry Andric Register &FrameReg, bool PreferFP, bool ForSimm) const { 27040b57cec5SDimitry Andric const auto &MFI = MF.getFrameInfo(); 27050b57cec5SDimitry Andric const auto *RegInfo = static_cast<const AArch64RegisterInfo *>( 27060b57cec5SDimitry Andric MF.getSubtarget().getRegisterInfo()); 27070b57cec5SDimitry Andric const auto *AFI = MF.getInfo<AArch64FunctionInfo>(); 27080b57cec5SDimitry Andric const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 27090b57cec5SDimitry Andric 2710e8d8bef9SDimitry Andric int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed(); 2711e8d8bef9SDimitry Andric int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed(); 27120b57cec5SDimitry Andric bool isCSR = 2713480093f4SDimitry Andric !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI)); 27140b57cec5SDimitry Andric 27158bcb0991SDimitry Andric const StackOffset &SVEStackSize = getSVEStackSize(MF); 27168bcb0991SDimitry Andric 27170b57cec5SDimitry Andric // Use frame pointer to reference fixed objects. Use it for locals if 27180b57cec5SDimitry Andric // there are VLAs or a dynamically realigned SP (and thus the SP isn't 27190b57cec5SDimitry Andric // reliable as a base). Make sure useFPForScavengingIndex() does the 27200b57cec5SDimitry Andric // right thing for the emergency spill slot. 27210b57cec5SDimitry Andric bool UseFP = false; 27228bcb0991SDimitry Andric if (AFI->hasStackFrame() && !isSVE) { 272381ad6265SDimitry Andric // We shouldn't prefer using the FP to access fixed-sized stack objects when 272481ad6265SDimitry Andric // there are scalable (SVE) objects in between the FP and the fixed-sized 272581ad6265SDimitry Andric // objects. 27268bcb0991SDimitry Andric PreferFP &= !SVEStackSize; 27278bcb0991SDimitry Andric 27280b57cec5SDimitry Andric // Note: Keeping the following as multiple 'if' statements rather than 27290b57cec5SDimitry Andric // merging to a single expression for readability. 27300b57cec5SDimitry Andric // 27310b57cec5SDimitry Andric // Argument access should always use the FP. 27320b57cec5SDimitry Andric if (isFixed) { 27330b57cec5SDimitry Andric UseFP = hasFP(MF); 2734fe6060f1SDimitry Andric } else if (isCSR && RegInfo->hasStackRealignment(MF)) { 27350b57cec5SDimitry Andric // References to the CSR area must use FP if we're re-aligning the stack 27360b57cec5SDimitry Andric // since the dynamically-sized alignment padding is between the SP/BP and 27370b57cec5SDimitry Andric // the CSR area. 27380b57cec5SDimitry Andric assert(hasFP(MF) && "Re-aligned stack must have frame pointer"); 27390b57cec5SDimitry Andric UseFP = true; 2740fe6060f1SDimitry Andric } else if (hasFP(MF) && !RegInfo->hasStackRealignment(MF)) { 27410b57cec5SDimitry Andric // If the FPOffset is negative and we're producing a signed immediate, we 27420b57cec5SDimitry Andric // have to keep in mind that the available offset range for negative 27430b57cec5SDimitry Andric // offsets is smaller than for positive ones. If an offset is available 27440b57cec5SDimitry Andric // via the FP and the SP, use whichever is closest. 27450b57cec5SDimitry Andric bool FPOffsetFits = !ForSimm || FPOffset >= -256; 274681ad6265SDimitry Andric PreferFP |= Offset > -FPOffset && !SVEStackSize; 27470b57cec5SDimitry Andric 27480b57cec5SDimitry Andric if (MFI.hasVarSizedObjects()) { 27490b57cec5SDimitry Andric // If we have variable sized objects, we can use either FP or BP, as the 27500b57cec5SDimitry Andric // SP offset is unknown. We can use the base pointer if we have one and 27510b57cec5SDimitry Andric // FP is not preferred. If not, we're stuck with using FP. 27520b57cec5SDimitry Andric bool CanUseBP = RegInfo->hasBasePointer(MF); 27530b57cec5SDimitry Andric if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best. 27540b57cec5SDimitry Andric UseFP = PreferFP; 27555ffd83dbSDimitry Andric else if (!CanUseBP) // Can't use BP. Forced to use FP. 27560b57cec5SDimitry Andric UseFP = true; 27570b57cec5SDimitry Andric // else we can use BP and FP, but the offset from FP won't fit. 27580b57cec5SDimitry Andric // That will make us scavenge registers which we can probably avoid by 27590b57cec5SDimitry Andric // using BP. If it won't fit for BP either, we'll scavenge anyway. 27600b57cec5SDimitry Andric } else if (FPOffset >= 0) { 27610b57cec5SDimitry Andric // Use SP or FP, whichever gives us the best chance of the offset 27620b57cec5SDimitry Andric // being in range for direct access. If the FPOffset is positive, 27630b57cec5SDimitry Andric // that'll always be best, as the SP will be even further away. 27640b57cec5SDimitry Andric UseFP = true; 27650b57cec5SDimitry Andric } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) { 27660b57cec5SDimitry Andric // Funclets access the locals contained in the parent's stack frame 27670b57cec5SDimitry Andric // via the frame pointer, so we have to use the FP in the parent 27680b57cec5SDimitry Andric // function. 27690b57cec5SDimitry Andric (void) Subtarget; 27700fca6ea1SDimitry Andric assert(Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(), 27710fca6ea1SDimitry Andric MF.getFunction().isVarArg()) && 27720b57cec5SDimitry Andric "Funclets should only be present on Win64"); 27730b57cec5SDimitry Andric UseFP = true; 27740b57cec5SDimitry Andric } else { 27750b57cec5SDimitry Andric // We have the choice between FP and (SP or BP). 27760b57cec5SDimitry Andric if (FPOffsetFits && PreferFP) // If FP is the best fit, use it. 27770b57cec5SDimitry Andric UseFP = true; 27780b57cec5SDimitry Andric } 27790b57cec5SDimitry Andric } 27800b57cec5SDimitry Andric } 27810b57cec5SDimitry Andric 2782fe6060f1SDimitry Andric assert( 2783fe6060f1SDimitry Andric ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) && 27840b57cec5SDimitry Andric "In the presence of dynamic stack pointer realignment, " 27850b57cec5SDimitry Andric "non-argument/CSR objects cannot be accessed through the frame pointer"); 27860b57cec5SDimitry Andric 27878bcb0991SDimitry Andric if (isSVE) { 2788e8d8bef9SDimitry Andric StackOffset FPOffset = 2789e8d8bef9SDimitry Andric StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset); 2790e8d8bef9SDimitry Andric StackOffset SPOffset = 2791e8d8bef9SDimitry Andric SVEStackSize + 2792e8d8bef9SDimitry Andric StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(), 2793e8d8bef9SDimitry Andric ObjectOffset); 27948bcb0991SDimitry Andric // Always use the FP for SVE spills if available and beneficial. 2795fe6060f1SDimitry Andric if (hasFP(MF) && (SPOffset.getFixed() || 2796e8d8bef9SDimitry Andric FPOffset.getScalable() < SPOffset.getScalable() || 2797fe6060f1SDimitry Andric RegInfo->hasStackRealignment(MF))) { 27980b57cec5SDimitry Andric FrameReg = RegInfo->getFrameRegister(MF); 27990b57cec5SDimitry Andric return FPOffset; 28000b57cec5SDimitry Andric } 28010b57cec5SDimitry Andric 28028bcb0991SDimitry Andric FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister() 28038bcb0991SDimitry Andric : (unsigned)AArch64::SP; 28048bcb0991SDimitry Andric return SPOffset; 28058bcb0991SDimitry Andric } 28068bcb0991SDimitry Andric 28078bcb0991SDimitry Andric StackOffset ScalableOffset = {}; 28088bcb0991SDimitry Andric if (UseFP && !(isFixed || isCSR)) 28098bcb0991SDimitry Andric ScalableOffset = -SVEStackSize; 28108bcb0991SDimitry Andric if (!UseFP && (isFixed || isCSR)) 28118bcb0991SDimitry Andric ScalableOffset = SVEStackSize; 28128bcb0991SDimitry Andric 28138bcb0991SDimitry Andric if (UseFP) { 28148bcb0991SDimitry Andric FrameReg = RegInfo->getFrameRegister(MF); 2815e8d8bef9SDimitry Andric return StackOffset::getFixed(FPOffset) + ScalableOffset; 28168bcb0991SDimitry Andric } 28178bcb0991SDimitry Andric 28180b57cec5SDimitry Andric // Use the base pointer if we have one. 28190b57cec5SDimitry Andric if (RegInfo->hasBasePointer(MF)) 28200b57cec5SDimitry Andric FrameReg = RegInfo->getBaseRegister(); 28210b57cec5SDimitry Andric else { 28220b57cec5SDimitry Andric assert(!MFI.hasVarSizedObjects() && 28230b57cec5SDimitry Andric "Can't use SP when we have var sized objects."); 28240b57cec5SDimitry Andric FrameReg = AArch64::SP; 28250b57cec5SDimitry Andric // If we're using the red zone for this function, the SP won't actually 28260b57cec5SDimitry Andric // be adjusted, so the offsets will be negative. They're also all 28270b57cec5SDimitry Andric // within range of the signed 9-bit immediate instructions. 28280b57cec5SDimitry Andric if (canUseRedZone(MF)) 28290b57cec5SDimitry Andric Offset -= AFI->getLocalStackSize(); 28300b57cec5SDimitry Andric } 28310b57cec5SDimitry Andric 2832e8d8bef9SDimitry Andric return StackOffset::getFixed(Offset) + ScalableOffset; 28330b57cec5SDimitry Andric } 28340b57cec5SDimitry Andric 28350b57cec5SDimitry Andric static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { 28360b57cec5SDimitry Andric // Do not set a kill flag on values that are also marked as live-in. This 28370b57cec5SDimitry Andric // happens with the @llvm-returnaddress intrinsic and with arguments passed in 28380b57cec5SDimitry Andric // callee saved registers. 28390b57cec5SDimitry Andric // Omitting the kill flags is conservatively correct even if the live-in 28400b57cec5SDimitry Andric // is not used after all. 28410b57cec5SDimitry Andric bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg); 28420b57cec5SDimitry Andric return getKillRegState(!IsLiveIn); 28430b57cec5SDimitry Andric } 28440b57cec5SDimitry Andric 28450b57cec5SDimitry Andric static bool produceCompactUnwindFrame(MachineFunction &MF) { 28460b57cec5SDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 28470b57cec5SDimitry Andric AttributeList Attrs = MF.getFunction().getAttributes(); 28480b57cec5SDimitry Andric return Subtarget.isTargetMachO() && 28490b57cec5SDimitry Andric !(Subtarget.getTargetLowering()->supportSwiftError() && 2850fe6060f1SDimitry Andric Attrs.hasAttrSomewhere(Attribute::SwiftError)) && 2851fe6060f1SDimitry Andric MF.getFunction().getCallingConv() != CallingConv::SwiftTail; 28520b57cec5SDimitry Andric } 28530b57cec5SDimitry Andric 28540b57cec5SDimitry Andric static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, 2855bdd1243dSDimitry Andric bool NeedsWinCFI, bool IsFirst, 2856bdd1243dSDimitry Andric const TargetRegisterInfo *TRI) { 28570b57cec5SDimitry Andric // If we are generating register pairs for a Windows function that requires 28580b57cec5SDimitry Andric // EH support, then pair consecutive registers only. There are no unwind 28590b57cec5SDimitry Andric // opcodes for saves/restores of non-consectuve register pairs. 2860e8d8bef9SDimitry Andric // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x, 2861e8d8bef9SDimitry Andric // save_lrpair. 28620b57cec5SDimitry Andric // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling 28630b57cec5SDimitry Andric 2864480093f4SDimitry Andric if (Reg2 == AArch64::FP) 2865480093f4SDimitry Andric return true; 28660b57cec5SDimitry Andric if (!NeedsWinCFI) 28670b57cec5SDimitry Andric return false; 2868bdd1243dSDimitry Andric if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1) 28690b57cec5SDimitry Andric return false; 2870e8d8bef9SDimitry Andric // If pairing a GPR with LR, the pair can be described by the save_lrpair 2871e8d8bef9SDimitry Andric // opcode. If this is the first register pair, it would end up with a 2872e8d8bef9SDimitry Andric // predecrement, but there's no save_lrpair_x opcode, so we can only do this 2873e8d8bef9SDimitry Andric // if LR is paired with something else than the first register. 2874e8d8bef9SDimitry Andric // The save_lrpair opcode requires the first register to be an odd one. 2875e8d8bef9SDimitry Andric if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 && 2876e8d8bef9SDimitry Andric (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst) 2877e8d8bef9SDimitry Andric return false; 28780b57cec5SDimitry Andric return true; 28790b57cec5SDimitry Andric } 28800b57cec5SDimitry Andric 28818bcb0991SDimitry Andric /// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction. 28828bcb0991SDimitry Andric /// WindowsCFI requires that only consecutive registers can be paired. 28838bcb0991SDimitry Andric /// LR and FP need to be allocated together when the frame needs to save 28848bcb0991SDimitry Andric /// the frame-record. This means any other register pairing with LR is invalid. 28858bcb0991SDimitry Andric static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2, 2886e8d8bef9SDimitry Andric bool UsesWinAAPCS, bool NeedsWinCFI, 2887bdd1243dSDimitry Andric bool NeedsFrameRecord, bool IsFirst, 2888bdd1243dSDimitry Andric const TargetRegisterInfo *TRI) { 2889480093f4SDimitry Andric if (UsesWinAAPCS) 2890bdd1243dSDimitry Andric return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst, 2891bdd1243dSDimitry Andric TRI); 28928bcb0991SDimitry Andric 28938bcb0991SDimitry Andric // If we need to store the frame record, don't pair any register 28948bcb0991SDimitry Andric // with LR other than FP. 28958bcb0991SDimitry Andric if (NeedsFrameRecord) 28968bcb0991SDimitry Andric return Reg2 == AArch64::LR; 28978bcb0991SDimitry Andric 28988bcb0991SDimitry Andric return false; 28998bcb0991SDimitry Andric } 29008bcb0991SDimitry Andric 29010b57cec5SDimitry Andric namespace { 29020b57cec5SDimitry Andric 29030b57cec5SDimitry Andric struct RegPairInfo { 29040b57cec5SDimitry Andric unsigned Reg1 = AArch64::NoRegister; 29050b57cec5SDimitry Andric unsigned Reg2 = AArch64::NoRegister; 29060b57cec5SDimitry Andric int FrameIdx; 29070b57cec5SDimitry Andric int Offset; 29080fca6ea1SDimitry Andric enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG } Type; 29090b57cec5SDimitry Andric 29100b57cec5SDimitry Andric RegPairInfo() = default; 29110b57cec5SDimitry Andric 29120b57cec5SDimitry Andric bool isPaired() const { return Reg2 != AArch64::NoRegister; } 2913480093f4SDimitry Andric 2914480093f4SDimitry Andric unsigned getScale() const { 2915480093f4SDimitry Andric switch (Type) { 2916480093f4SDimitry Andric case PPR: 2917480093f4SDimitry Andric return 2; 2918480093f4SDimitry Andric case GPR: 2919480093f4SDimitry Andric case FPR64: 29200fca6ea1SDimitry Andric case VG: 2921480093f4SDimitry Andric return 8; 2922480093f4SDimitry Andric case ZPR: 2923480093f4SDimitry Andric case FPR128: 2924480093f4SDimitry Andric return 16; 2925480093f4SDimitry Andric } 2926480093f4SDimitry Andric llvm_unreachable("Unsupported type"); 2927480093f4SDimitry Andric } 2928480093f4SDimitry Andric 2929480093f4SDimitry Andric bool isScalable() const { return Type == PPR || Type == ZPR; } 29300b57cec5SDimitry Andric }; 29310b57cec5SDimitry Andric 29320b57cec5SDimitry Andric } // end anonymous namespace 29330b57cec5SDimitry Andric 29340fca6ea1SDimitry Andric unsigned findFreePredicateReg(BitVector &SavedRegs) { 29350fca6ea1SDimitry Andric for (unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) { 29360fca6ea1SDimitry Andric if (SavedRegs.test(PReg)) { 29370fca6ea1SDimitry Andric unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0; 29380fca6ea1SDimitry Andric return PNReg; 29390fca6ea1SDimitry Andric } 29400fca6ea1SDimitry Andric } 29410fca6ea1SDimitry Andric return AArch64::NoRegister; 29420fca6ea1SDimitry Andric } 29430fca6ea1SDimitry Andric 29440b57cec5SDimitry Andric static void computeCalleeSaveRegisterPairs( 29455ffd83dbSDimitry Andric MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI, 29460b57cec5SDimitry Andric const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs, 294781ad6265SDimitry Andric bool NeedsFrameRecord) { 29480b57cec5SDimitry Andric 29490b57cec5SDimitry Andric if (CSI.empty()) 29500b57cec5SDimitry Andric return; 29510b57cec5SDimitry Andric 2952480093f4SDimitry Andric bool IsWindows = isTargetWindows(MF); 29530b57cec5SDimitry Andric bool NeedsWinCFI = needsWinCFI(MF); 29540b57cec5SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 29550b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 29560b57cec5SDimitry Andric CallingConv::ID CC = MF.getFunction().getCallingConv(); 29570b57cec5SDimitry Andric unsigned Count = CSI.size(); 29580b57cec5SDimitry Andric (void)CC; 29590b57cec5SDimitry Andric // MachO's compact unwind format relies on all registers being stored in 29600b57cec5SDimitry Andric // pairs. 2961bdd1243dSDimitry Andric assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost || 296206c3fb27SDimitry Andric CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS || 296306c3fb27SDimitry Andric CC == CallingConv::Win64 || (Count & 1) == 0) && 29640b57cec5SDimitry Andric "Odd number of callee-saved regs to spill!"); 2965480093f4SDimitry Andric int ByteOffset = AFI->getCalleeSavedStackSize(); 2966e8d8bef9SDimitry Andric int StackFillDir = -1; 2967e8d8bef9SDimitry Andric int RegInc = 1; 2968e8d8bef9SDimitry Andric unsigned FirstReg = 0; 2969e8d8bef9SDimitry Andric if (NeedsWinCFI) { 2970e8d8bef9SDimitry Andric // For WinCFI, fill the stack from the bottom up. 2971e8d8bef9SDimitry Andric ByteOffset = 0; 2972e8d8bef9SDimitry Andric StackFillDir = 1; 2973e8d8bef9SDimitry Andric // As the CSI array is reversed to match PrologEpilogInserter, iterate 2974e8d8bef9SDimitry Andric // backwards, to pair up registers starting from lower numbered registers. 2975e8d8bef9SDimitry Andric RegInc = -1; 2976e8d8bef9SDimitry Andric FirstReg = Count - 1; 2977e8d8bef9SDimitry Andric } 2978480093f4SDimitry Andric int ScalableByteOffset = AFI->getSVECalleeSavedStackSize(); 2979fe6060f1SDimitry Andric bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace(); 29800fca6ea1SDimitry Andric Register LastReg = 0; 298175b4d546SDimitry Andric 2982e8d8bef9SDimitry Andric // When iterating backwards, the loop condition relies on unsigned wraparound. 2983e8d8bef9SDimitry Andric for (unsigned i = FirstReg; i < Count; i += RegInc) { 29840b57cec5SDimitry Andric RegPairInfo RPI; 29850b57cec5SDimitry Andric RPI.Reg1 = CSI[i].getReg(); 29860b57cec5SDimitry Andric 29870b57cec5SDimitry Andric if (AArch64::GPR64RegClass.contains(RPI.Reg1)) 29880b57cec5SDimitry Andric RPI.Type = RegPairInfo::GPR; 29890b57cec5SDimitry Andric else if (AArch64::FPR64RegClass.contains(RPI.Reg1)) 29900b57cec5SDimitry Andric RPI.Type = RegPairInfo::FPR64; 29910b57cec5SDimitry Andric else if (AArch64::FPR128RegClass.contains(RPI.Reg1)) 29920b57cec5SDimitry Andric RPI.Type = RegPairInfo::FPR128; 2993480093f4SDimitry Andric else if (AArch64::ZPRRegClass.contains(RPI.Reg1)) 2994480093f4SDimitry Andric RPI.Type = RegPairInfo::ZPR; 2995480093f4SDimitry Andric else if (AArch64::PPRRegClass.contains(RPI.Reg1)) 2996480093f4SDimitry Andric RPI.Type = RegPairInfo::PPR; 29970fca6ea1SDimitry Andric else if (RPI.Reg1 == AArch64::VG) 29980fca6ea1SDimitry Andric RPI.Type = RegPairInfo::VG; 29990b57cec5SDimitry Andric else 30000b57cec5SDimitry Andric llvm_unreachable("Unsupported register class."); 30010b57cec5SDimitry Andric 30020fca6ea1SDimitry Andric // Add the stack hazard size as we transition from GPR->FPR CSRs. 30030fca6ea1SDimitry Andric if (AFI->hasStackHazardSlotIndex() && 30040fca6ea1SDimitry Andric (!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) && 30050fca6ea1SDimitry Andric AArch64InstrInfo::isFpOrNEON(RPI.Reg1)) 30060fca6ea1SDimitry Andric ByteOffset += StackFillDir * StackHazardSize; 30070fca6ea1SDimitry Andric LastReg = RPI.Reg1; 30080fca6ea1SDimitry Andric 30090b57cec5SDimitry Andric // Add the next reg to the pair if it is in the same register class. 30100fca6ea1SDimitry Andric if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) { 301104eeddc0SDimitry Andric Register NextReg = CSI[i + RegInc].getReg(); 3012e8d8bef9SDimitry Andric bool IsFirst = i == FirstReg; 30130b57cec5SDimitry Andric switch (RPI.Type) { 30140b57cec5SDimitry Andric case RegPairInfo::GPR: 30150b57cec5SDimitry Andric if (AArch64::GPR64RegClass.contains(NextReg) && 3016e8d8bef9SDimitry Andric !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows, 3017bdd1243dSDimitry Andric NeedsWinCFI, NeedsFrameRecord, IsFirst, 3018bdd1243dSDimitry Andric TRI)) 30190b57cec5SDimitry Andric RPI.Reg2 = NextReg; 30200b57cec5SDimitry Andric break; 30210b57cec5SDimitry Andric case RegPairInfo::FPR64: 30220b57cec5SDimitry Andric if (AArch64::FPR64RegClass.contains(NextReg) && 3023e8d8bef9SDimitry Andric !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI, 3024bdd1243dSDimitry Andric IsFirst, TRI)) 30250b57cec5SDimitry Andric RPI.Reg2 = NextReg; 30260b57cec5SDimitry Andric break; 30270b57cec5SDimitry Andric case RegPairInfo::FPR128: 30280b57cec5SDimitry Andric if (AArch64::FPR128RegClass.contains(NextReg)) 30290b57cec5SDimitry Andric RPI.Reg2 = NextReg; 30300b57cec5SDimitry Andric break; 3031480093f4SDimitry Andric case RegPairInfo::PPR: 30320fca6ea1SDimitry Andric break; 3033480093f4SDimitry Andric case RegPairInfo::ZPR: 30340fca6ea1SDimitry Andric if (AFI->getPredicateRegForFillSpill() != 0) 30350fca6ea1SDimitry Andric if (((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1)) 30360fca6ea1SDimitry Andric RPI.Reg2 = NextReg; 30370fca6ea1SDimitry Andric break; 30380fca6ea1SDimitry Andric case RegPairInfo::VG: 3039480093f4SDimitry Andric break; 30400b57cec5SDimitry Andric } 30410b57cec5SDimitry Andric } 30420b57cec5SDimitry Andric 30430b57cec5SDimitry Andric // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI 30440b57cec5SDimitry Andric // list to come in sorted by frame index so that we can issue the store 30450b57cec5SDimitry Andric // pair instructions directly. Assert if we see anything otherwise. 30460b57cec5SDimitry Andric // 30470b57cec5SDimitry Andric // The order of the registers in the list is controlled by 30480b57cec5SDimitry Andric // getCalleeSavedRegs(), so they will always be in-order, as well. 30490b57cec5SDimitry Andric assert((!RPI.isPaired() || 3050e8d8bef9SDimitry Andric (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) && 30510b57cec5SDimitry Andric "Out of order callee saved regs!"); 30520b57cec5SDimitry Andric 30538bcb0991SDimitry Andric assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP || 30548bcb0991SDimitry Andric RPI.Reg1 == AArch64::LR) && 30558bcb0991SDimitry Andric "FrameRecord must be allocated together with LR"); 30568bcb0991SDimitry Andric 3057480093f4SDimitry Andric // Windows AAPCS has FP and LR reversed. 3058480093f4SDimitry Andric assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP || 3059480093f4SDimitry Andric RPI.Reg2 == AArch64::LR) && 3060480093f4SDimitry Andric "FrameRecord must be allocated together with LR"); 3061480093f4SDimitry Andric 30620b57cec5SDimitry Andric // MachO's compact unwind format relies on all registers being stored in 30630b57cec5SDimitry Andric // adjacent register pairs. 3064bdd1243dSDimitry Andric assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost || 306506c3fb27SDimitry Andric CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS || 306606c3fb27SDimitry Andric CC == CallingConv::Win64 || 30670b57cec5SDimitry Andric (RPI.isPaired() && 30680b57cec5SDimitry Andric ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || 30690b57cec5SDimitry Andric RPI.Reg1 + 1 == RPI.Reg2))) && 30700b57cec5SDimitry Andric "Callee-save registers not saved as adjacent register pair!"); 30710b57cec5SDimitry Andric 30720b57cec5SDimitry Andric RPI.FrameIdx = CSI[i].getFrameIdx(); 3073e8d8bef9SDimitry Andric if (NeedsWinCFI && 3074e8d8bef9SDimitry Andric RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair 3075e8d8bef9SDimitry Andric RPI.FrameIdx = CSI[i + RegInc].getFrameIdx(); 3076480093f4SDimitry Andric int Scale = RPI.getScale(); 3077e8d8bef9SDimitry Andric 3078e8d8bef9SDimitry Andric int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset; 3079e8d8bef9SDimitry Andric assert(OffsetPre % Scale == 0); 3080e8d8bef9SDimitry Andric 3081480093f4SDimitry Andric if (RPI.isScalable()) 30820fca6ea1SDimitry Andric ScalableByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale); 3083480093f4SDimitry Andric else 3084e8d8bef9SDimitry Andric ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale); 3085480093f4SDimitry Andric 3086fe6060f1SDimitry Andric // Swift's async context is directly before FP, so allocate an extra 3087fe6060f1SDimitry Andric // 8 bytes for it. 3088fe6060f1SDimitry Andric if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() && 30895f757f3fSDimitry Andric ((!IsWindows && RPI.Reg2 == AArch64::FP) || 30905f757f3fSDimitry Andric (IsWindows && RPI.Reg2 == AArch64::LR))) 3091fe6060f1SDimitry Andric ByteOffset += StackFillDir * 8; 3092fe6060f1SDimitry Andric 30930b57cec5SDimitry Andric // Round up size of non-pair to pair size if we need to pad the 30940b57cec5SDimitry Andric // callee-save area to ensure 16-byte alignment. 30950fca6ea1SDimitry Andric if (NeedGapToAlignStack && !NeedsWinCFI && !RPI.isScalable() && 30960fca6ea1SDimitry Andric RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() && 30970fca6ea1SDimitry Andric ByteOffset % 16 != 0) { 3098e8d8bef9SDimitry Andric ByteOffset += 8 * StackFillDir; 30995ffd83dbSDimitry Andric assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16)); 3100e8d8bef9SDimitry Andric // A stack frame with a gap looks like this, bottom up: 3101e8d8bef9SDimitry Andric // d9, d8. x21, gap, x20, x19. 3102fe6060f1SDimitry Andric // Set extra alignment on the x21 object to create the gap above it. 31035ffd83dbSDimitry Andric MFI.setObjectAlignment(RPI.FrameIdx, Align(16)); 3104fe6060f1SDimitry Andric NeedGapToAlignStack = false; 31050b57cec5SDimitry Andric } 31060b57cec5SDimitry Andric 3107e8d8bef9SDimitry Andric int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset; 3108e8d8bef9SDimitry Andric assert(OffsetPost % Scale == 0); 3109e8d8bef9SDimitry Andric // If filling top down (default), we want the offset after incrementing it. 31105f757f3fSDimitry Andric // If filling bottom up (WinCFI) we need the original offset. 3111e8d8bef9SDimitry Andric int Offset = NeedsWinCFI ? OffsetPre : OffsetPost; 3112fe6060f1SDimitry Andric 3113fe6060f1SDimitry Andric // The FP, LR pair goes 8 bytes into our expanded 24-byte slot so that the 3114fe6060f1SDimitry Andric // Swift context can directly precede FP. 3115fe6060f1SDimitry Andric if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() && 31165f757f3fSDimitry Andric ((!IsWindows && RPI.Reg2 == AArch64::FP) || 31175f757f3fSDimitry Andric (IsWindows && RPI.Reg2 == AArch64::LR))) 3118fe6060f1SDimitry Andric Offset += 8; 31190b57cec5SDimitry Andric RPI.Offset = Offset / Scale; 3120480093f4SDimitry Andric 31210fca6ea1SDimitry Andric assert((!RPI.isPaired() || 31220fca6ea1SDimitry Andric (!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) || 3123480093f4SDimitry Andric (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) && 31240b57cec5SDimitry Andric "Offset out of bounds for LDP/STP immediate"); 31250b57cec5SDimitry Andric 3126e8d8bef9SDimitry Andric // Save the offset to frame record so that the FP register can point to the 3127e8d8bef9SDimitry Andric // innermost frame record (spilled FP and LR registers). 31280fca6ea1SDimitry Andric if (NeedsFrameRecord && 31290fca6ea1SDimitry Andric ((!IsWindows && RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || 31300fca6ea1SDimitry Andric (IsWindows && RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR))) 3131e8d8bef9SDimitry Andric AFI->setCalleeSaveBaseToFrameRecordOffset(Offset); 3132e8d8bef9SDimitry Andric 31330b57cec5SDimitry Andric RegPairs.push_back(RPI); 31340b57cec5SDimitry Andric if (RPI.isPaired()) 3135e8d8bef9SDimitry Andric i += RegInc; 3136e8d8bef9SDimitry Andric } 3137e8d8bef9SDimitry Andric if (NeedsWinCFI) { 3138e8d8bef9SDimitry Andric // If we need an alignment gap in the stack, align the topmost stack 3139e8d8bef9SDimitry Andric // object. A stack frame with a gap looks like this, bottom up: 3140e8d8bef9SDimitry Andric // x19, d8. d9, gap. 3141e8d8bef9SDimitry Andric // Set extra alignment on the topmost stack object (the first element in 3142e8d8bef9SDimitry Andric // CSI, which goes top down), to create the gap above it. 3143e8d8bef9SDimitry Andric if (AFI->hasCalleeSaveStackFreeSpace()) 3144e8d8bef9SDimitry Andric MFI.setObjectAlignment(CSI[0].getFrameIdx(), Align(16)); 3145e8d8bef9SDimitry Andric // We iterated bottom up over the registers; flip RegPairs back to top 3146e8d8bef9SDimitry Andric // down order. 3147e8d8bef9SDimitry Andric std::reverse(RegPairs.begin(), RegPairs.end()); 31480b57cec5SDimitry Andric } 31490b57cec5SDimitry Andric } 31500b57cec5SDimitry Andric 31510b57cec5SDimitry Andric bool AArch64FrameLowering::spillCalleeSavedRegisters( 31520b57cec5SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 31535ffd83dbSDimitry Andric ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 31540b57cec5SDimitry Andric MachineFunction &MF = *MBB.getParent(); 31550b57cec5SDimitry Andric const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 31560fca6ea1SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 31570b57cec5SDimitry Andric bool NeedsWinCFI = needsWinCFI(MF); 31580b57cec5SDimitry Andric DebugLoc DL; 31590b57cec5SDimitry Andric SmallVector<RegPairInfo, 8> RegPairs; 31600b57cec5SDimitry Andric 316181ad6265SDimitry Andric computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF)); 316281ad6265SDimitry Andric 31630fca6ea1SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 31640fca6ea1SDimitry Andric // Refresh the reserved regs in case there are any potential changes since the 31650fca6ea1SDimitry Andric // last freeze. 31660fca6ea1SDimitry Andric MRI.freezeReservedRegs(); 31670fca6ea1SDimitry Andric 3168fe6060f1SDimitry Andric if (homogeneousPrologEpilog(MF)) { 3169fe6060f1SDimitry Andric auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Prolog)) 3170fe6060f1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 3171fe6060f1SDimitry Andric 3172fe6060f1SDimitry Andric for (auto &RPI : RegPairs) { 3173fe6060f1SDimitry Andric MIB.addReg(RPI.Reg1); 3174fe6060f1SDimitry Andric MIB.addReg(RPI.Reg2); 3175fe6060f1SDimitry Andric 3176fe6060f1SDimitry Andric // Update register live in. 3177fe6060f1SDimitry Andric if (!MRI.isReserved(RPI.Reg1)) 3178fe6060f1SDimitry Andric MBB.addLiveIn(RPI.Reg1); 31795f757f3fSDimitry Andric if (RPI.isPaired() && !MRI.isReserved(RPI.Reg2)) 3180fe6060f1SDimitry Andric MBB.addLiveIn(RPI.Reg2); 3181fe6060f1SDimitry Andric } 3182fe6060f1SDimitry Andric return true; 3183fe6060f1SDimitry Andric } 31840fca6ea1SDimitry Andric bool PTrueCreated = false; 3185349cc55cSDimitry Andric for (const RegPairInfo &RPI : llvm::reverse(RegPairs)) { 31860b57cec5SDimitry Andric unsigned Reg1 = RPI.Reg1; 31870b57cec5SDimitry Andric unsigned Reg2 = RPI.Reg2; 31880b57cec5SDimitry Andric unsigned StrOpc; 31890b57cec5SDimitry Andric 31900b57cec5SDimitry Andric // Issue sequence of spills for cs regs. The first spill may be converted 31910b57cec5SDimitry Andric // to a pre-decrement store later by emitPrologue if the callee-save stack 31920b57cec5SDimitry Andric // area allocation can't be combined with the local stack area allocation. 31930b57cec5SDimitry Andric // For example: 31940b57cec5SDimitry Andric // stp x22, x21, [sp, #0] // addImm(+0) 31950b57cec5SDimitry Andric // stp x20, x19, [sp, #16] // addImm(+2) 31960b57cec5SDimitry Andric // stp fp, lr, [sp, #32] // addImm(+4) 31970b57cec5SDimitry Andric // Rationale: This sequence saves uop updates compared to a sequence of 31980b57cec5SDimitry Andric // pre-increment spills like stp xi,xj,[sp,#-16]! 31990b57cec5SDimitry Andric // Note: Similar rationale and sequence for restores in epilog. 32005ffd83dbSDimitry Andric unsigned Size; 32015ffd83dbSDimitry Andric Align Alignment; 32020b57cec5SDimitry Andric switch (RPI.Type) { 32030b57cec5SDimitry Andric case RegPairInfo::GPR: 32040b57cec5SDimitry Andric StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; 32050b57cec5SDimitry Andric Size = 8; 32065ffd83dbSDimitry Andric Alignment = Align(8); 32070b57cec5SDimitry Andric break; 32080b57cec5SDimitry Andric case RegPairInfo::FPR64: 32090b57cec5SDimitry Andric StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; 32100b57cec5SDimitry Andric Size = 8; 32115ffd83dbSDimitry Andric Alignment = Align(8); 32120b57cec5SDimitry Andric break; 32130b57cec5SDimitry Andric case RegPairInfo::FPR128: 32140b57cec5SDimitry Andric StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui; 32150b57cec5SDimitry Andric Size = 16; 32165ffd83dbSDimitry Andric Alignment = Align(16); 32170b57cec5SDimitry Andric break; 3218480093f4SDimitry Andric case RegPairInfo::ZPR: 32190fca6ea1SDimitry Andric StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI; 3220480093f4SDimitry Andric Size = 16; 32215ffd83dbSDimitry Andric Alignment = Align(16); 3222480093f4SDimitry Andric break; 3223480093f4SDimitry Andric case RegPairInfo::PPR: 3224480093f4SDimitry Andric StrOpc = AArch64::STR_PXI; 3225480093f4SDimitry Andric Size = 2; 32265ffd83dbSDimitry Andric Alignment = Align(2); 3227480093f4SDimitry Andric break; 32280fca6ea1SDimitry Andric case RegPairInfo::VG: 32290fca6ea1SDimitry Andric StrOpc = AArch64::STRXui; 32300fca6ea1SDimitry Andric Size = 8; 32310fca6ea1SDimitry Andric Alignment = Align(8); 32320fca6ea1SDimitry Andric break; 32330b57cec5SDimitry Andric } 32340fca6ea1SDimitry Andric 32350fca6ea1SDimitry Andric unsigned X0Scratch = AArch64::NoRegister; 32360fca6ea1SDimitry Andric if (Reg1 == AArch64::VG) { 32370fca6ea1SDimitry Andric // Find an available register to store value of VG to. 32380fca6ea1SDimitry Andric Reg1 = findScratchNonCalleeSaveRegister(&MBB); 32390fca6ea1SDimitry Andric assert(Reg1 != AArch64::NoRegister); 32400fca6ea1SDimitry Andric SMEAttrs Attrs(MF.getFunction()); 32410fca6ea1SDimitry Andric 32420fca6ea1SDimitry Andric if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface() && 32430fca6ea1SDimitry Andric AFI->getStreamingVGIdx() == std::numeric_limits<int>::max()) { 32440fca6ea1SDimitry Andric // For locally-streaming functions, we need to store both the streaming 32450fca6ea1SDimitry Andric // & non-streaming VG. Spill the streaming value first. 32460fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII.get(AArch64::RDSVLI_XI), Reg1) 32470fca6ea1SDimitry Andric .addImm(1) 32480fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 32490fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII.get(AArch64::UBFMXri), Reg1) 32500fca6ea1SDimitry Andric .addReg(Reg1) 32510fca6ea1SDimitry Andric .addImm(3) 32520fca6ea1SDimitry Andric .addImm(63) 32530fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 32540fca6ea1SDimitry Andric 32550fca6ea1SDimitry Andric AFI->setStreamingVGIdx(RPI.FrameIdx); 32560fca6ea1SDimitry Andric } else if (MF.getSubtarget<AArch64Subtarget>().hasSVE()) { 32570fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII.get(AArch64::CNTD_XPiI), Reg1) 32580fca6ea1SDimitry Andric .addImm(31) 32590fca6ea1SDimitry Andric .addImm(1) 32600fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 32610fca6ea1SDimitry Andric AFI->setVGIdx(RPI.FrameIdx); 32620fca6ea1SDimitry Andric } else { 32630fca6ea1SDimitry Andric const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>(); 32640fca6ea1SDimitry Andric if (llvm::any_of( 32650fca6ea1SDimitry Andric MBB.liveins(), 32660fca6ea1SDimitry Andric [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) { 32670fca6ea1SDimitry Andric return STI.getRegisterInfo()->isSuperOrSubRegisterEq( 32680fca6ea1SDimitry Andric AArch64::X0, LiveIn.PhysReg); 32690fca6ea1SDimitry Andric })) 32700fca6ea1SDimitry Andric X0Scratch = Reg1; 32710fca6ea1SDimitry Andric 32720fca6ea1SDimitry Andric if (X0Scratch != AArch64::NoRegister) 32730fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), Reg1) 32740fca6ea1SDimitry Andric .addReg(AArch64::XZR) 32750fca6ea1SDimitry Andric .addReg(AArch64::X0, RegState::Undef) 32760fca6ea1SDimitry Andric .addReg(AArch64::X0, RegState::Implicit) 32770fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 32780fca6ea1SDimitry Andric 32790fca6ea1SDimitry Andric const uint32_t *RegMask = TRI->getCallPreservedMask( 32800fca6ea1SDimitry Andric MF, 32810fca6ea1SDimitry Andric CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1); 32820fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII.get(AArch64::BL)) 32830fca6ea1SDimitry Andric .addExternalSymbol("__arm_get_current_vg") 32840fca6ea1SDimitry Andric .addRegMask(RegMask) 32850fca6ea1SDimitry Andric .addReg(AArch64::X0, RegState::ImplicitDefine) 32860fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 32870fca6ea1SDimitry Andric Reg1 = AArch64::X0; 32880fca6ea1SDimitry Andric AFI->setVGIdx(RPI.FrameIdx); 32890fca6ea1SDimitry Andric } 32900fca6ea1SDimitry Andric } 32910fca6ea1SDimitry Andric 32920b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI); 32930b57cec5SDimitry Andric if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI); 32940b57cec5SDimitry Andric dbgs() << ") -> fi#(" << RPI.FrameIdx; 32950b57cec5SDimitry Andric if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1; 32960b57cec5SDimitry Andric dbgs() << ")\n"); 32970b57cec5SDimitry Andric 32980b57cec5SDimitry Andric assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) && 32990b57cec5SDimitry Andric "Windows unwdinding requires a consecutive (FP,LR) pair"); 33000b57cec5SDimitry Andric // Windows unwind codes require consecutive registers if registers are 33010b57cec5SDimitry Andric // paired. Make the switch here, so that the code below will save (x,x+1) 33020b57cec5SDimitry Andric // and not (x+1,x). 33030b57cec5SDimitry Andric unsigned FrameIdxReg1 = RPI.FrameIdx; 33040b57cec5SDimitry Andric unsigned FrameIdxReg2 = RPI.FrameIdx + 1; 33050b57cec5SDimitry Andric if (NeedsWinCFI && RPI.isPaired()) { 33060b57cec5SDimitry Andric std::swap(Reg1, Reg2); 33070b57cec5SDimitry Andric std::swap(FrameIdxReg1, FrameIdxReg2); 33080b57cec5SDimitry Andric } 33090fca6ea1SDimitry Andric 33100fca6ea1SDimitry Andric if (RPI.isPaired() && RPI.isScalable()) { 33110fca6ea1SDimitry Andric [[maybe_unused]] const AArch64Subtarget &Subtarget = 33120fca6ea1SDimitry Andric MF.getSubtarget<AArch64Subtarget>(); 33130fca6ea1SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 33140fca6ea1SDimitry Andric unsigned PnReg = AFI->getPredicateRegForFillSpill(); 33150fca6ea1SDimitry Andric assert(((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && PnReg != 0) && 33160fca6ea1SDimitry Andric "Expects SVE2.1 or SME2 target and a predicate register"); 33170fca6ea1SDimitry Andric #ifdef EXPENSIVE_CHECKS 33180fca6ea1SDimitry Andric auto IsPPR = [](const RegPairInfo &c) { 33190fca6ea1SDimitry Andric return c.Reg1 == RegPairInfo::PPR; 33200fca6ea1SDimitry Andric }; 33210fca6ea1SDimitry Andric auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR); 33220fca6ea1SDimitry Andric auto IsZPR = [](const RegPairInfo &c) { 33230fca6ea1SDimitry Andric return c.Type == RegPairInfo::ZPR; 33240fca6ea1SDimitry Andric }; 33250fca6ea1SDimitry Andric auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR); 33260fca6ea1SDimitry Andric assert(!(PPRBegin < ZPRBegin) && 33270fca6ea1SDimitry Andric "Expected callee save predicate to be handled first"); 33280fca6ea1SDimitry Andric #endif 33290fca6ea1SDimitry Andric if (!PTrueCreated) { 33300fca6ea1SDimitry Andric PTrueCreated = true; 33310fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII.get(AArch64::PTRUE_C_B), PnReg) 33320fca6ea1SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 33330fca6ea1SDimitry Andric } 33340fca6ea1SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); 33350fca6ea1SDimitry Andric if (!MRI.isReserved(Reg1)) 33360fca6ea1SDimitry Andric MBB.addLiveIn(Reg1); 33370fca6ea1SDimitry Andric if (!MRI.isReserved(Reg2)) 33380fca6ea1SDimitry Andric MBB.addLiveIn(Reg2); 33390fca6ea1SDimitry Andric MIB.addReg(/*PairRegs*/ AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0)); 33400fca6ea1SDimitry Andric MIB.addMemOperand(MF.getMachineMemOperand( 33410fca6ea1SDimitry Andric MachinePointerInfo::getFixedStack(MF, FrameIdxReg2), 33420fca6ea1SDimitry Andric MachineMemOperand::MOStore, Size, Alignment)); 33430fca6ea1SDimitry Andric MIB.addReg(PnReg); 33440fca6ea1SDimitry Andric MIB.addReg(AArch64::SP) 33450fca6ea1SDimitry Andric .addImm(RPI.Offset) // [sp, #offset*scale], 33460fca6ea1SDimitry Andric // where factor*scale is implicit 33470fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 33480fca6ea1SDimitry Andric MIB.addMemOperand(MF.getMachineMemOperand( 33490fca6ea1SDimitry Andric MachinePointerInfo::getFixedStack(MF, FrameIdxReg1), 33500fca6ea1SDimitry Andric MachineMemOperand::MOStore, Size, Alignment)); 33510fca6ea1SDimitry Andric if (NeedsWinCFI) 33520fca6ea1SDimitry Andric InsertSEH(MIB, TII, MachineInstr::FrameSetup); 33530fca6ea1SDimitry Andric } else { // The code when the pair of ZReg is not present 33540b57cec5SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); 33550b57cec5SDimitry Andric if (!MRI.isReserved(Reg1)) 33560b57cec5SDimitry Andric MBB.addLiveIn(Reg1); 33570b57cec5SDimitry Andric if (RPI.isPaired()) { 33580b57cec5SDimitry Andric if (!MRI.isReserved(Reg2)) 33590b57cec5SDimitry Andric MBB.addLiveIn(Reg2); 33600b57cec5SDimitry Andric MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); 33610b57cec5SDimitry Andric MIB.addMemOperand(MF.getMachineMemOperand( 33620b57cec5SDimitry Andric MachinePointerInfo::getFixedStack(MF, FrameIdxReg2), 33635ffd83dbSDimitry Andric MachineMemOperand::MOStore, Size, Alignment)); 33640b57cec5SDimitry Andric } 33650b57cec5SDimitry Andric MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) 33660b57cec5SDimitry Andric .addReg(AArch64::SP) 33670b57cec5SDimitry Andric .addImm(RPI.Offset) // [sp, #offset*scale], 33680b57cec5SDimitry Andric // where factor*scale is implicit 33690b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 33700b57cec5SDimitry Andric MIB.addMemOperand(MF.getMachineMemOperand( 33710b57cec5SDimitry Andric MachinePointerInfo::getFixedStack(MF, FrameIdxReg1), 33725ffd83dbSDimitry Andric MachineMemOperand::MOStore, Size, Alignment)); 33730b57cec5SDimitry Andric if (NeedsWinCFI) 33740b57cec5SDimitry Andric InsertSEH(MIB, TII, MachineInstr::FrameSetup); 33750fca6ea1SDimitry Andric } 3376480093f4SDimitry Andric // Update the StackIDs of the SVE stack slots. 3377480093f4SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 33780fca6ea1SDimitry Andric if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR) { 33790fca6ea1SDimitry Andric MFI.setStackID(FrameIdxReg1, TargetStackID::ScalableVector); 33800fca6ea1SDimitry Andric if (RPI.isPaired()) 33810fca6ea1SDimitry Andric MFI.setStackID(FrameIdxReg2, TargetStackID::ScalableVector); 33820fca6ea1SDimitry Andric } 3383480093f4SDimitry Andric 33840fca6ea1SDimitry Andric if (X0Scratch != AArch64::NoRegister) 33850fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), AArch64::X0) 33860fca6ea1SDimitry Andric .addReg(AArch64::XZR) 33870fca6ea1SDimitry Andric .addReg(X0Scratch, RegState::Undef) 33880fca6ea1SDimitry Andric .addReg(X0Scratch, RegState::Implicit) 33890fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 33900b57cec5SDimitry Andric } 33910b57cec5SDimitry Andric return true; 33920b57cec5SDimitry Andric } 33930b57cec5SDimitry Andric 33940b57cec5SDimitry Andric bool AArch64FrameLowering::restoreCalleeSavedRegisters( 339581ad6265SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 33965ffd83dbSDimitry Andric MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 33970b57cec5SDimitry Andric MachineFunction &MF = *MBB.getParent(); 33980b57cec5SDimitry Andric const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 33990b57cec5SDimitry Andric DebugLoc DL; 34000b57cec5SDimitry Andric SmallVector<RegPairInfo, 8> RegPairs; 34010b57cec5SDimitry Andric bool NeedsWinCFI = needsWinCFI(MF); 34020b57cec5SDimitry Andric 340381ad6265SDimitry Andric if (MBBI != MBB.end()) 340481ad6265SDimitry Andric DL = MBBI->getDebugLoc(); 34050b57cec5SDimitry Andric 340681ad6265SDimitry Andric computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF)); 34070fca6ea1SDimitry Andric if (homogeneousPrologEpilog(MF, &MBB)) { 34080fca6ea1SDimitry Andric auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog)) 34090fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 34100fca6ea1SDimitry Andric for (auto &RPI : RegPairs) { 34110fca6ea1SDimitry Andric MIB.addReg(RPI.Reg1, RegState::Define); 34120fca6ea1SDimitry Andric MIB.addReg(RPI.Reg2, RegState::Define); 34130fca6ea1SDimitry Andric } 34140fca6ea1SDimitry Andric return true; 34150fca6ea1SDimitry Andric } 34160b57cec5SDimitry Andric 34170fca6ea1SDimitry Andric // For performance reasons restore SVE register in increasing order 34180fca6ea1SDimitry Andric auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; }; 34190fca6ea1SDimitry Andric auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR); 34200fca6ea1SDimitry Andric auto PPREnd = std::find_if_not(PPRBegin, RegPairs.end(), IsPPR); 34210fca6ea1SDimitry Andric std::reverse(PPRBegin, PPREnd); 34220fca6ea1SDimitry Andric auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; }; 34230fca6ea1SDimitry Andric auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR); 34240fca6ea1SDimitry Andric auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.end(), IsZPR); 34250fca6ea1SDimitry Andric std::reverse(ZPRBegin, ZPREnd); 34260fca6ea1SDimitry Andric 34270fca6ea1SDimitry Andric bool PTrueCreated = false; 34280fca6ea1SDimitry Andric for (const RegPairInfo &RPI : RegPairs) { 34290b57cec5SDimitry Andric unsigned Reg1 = RPI.Reg1; 34300b57cec5SDimitry Andric unsigned Reg2 = RPI.Reg2; 34310b57cec5SDimitry Andric 34320b57cec5SDimitry Andric // Issue sequence of restores for cs regs. The last restore may be converted 34330b57cec5SDimitry Andric // to a post-increment load later by emitEpilogue if the callee-save stack 34340b57cec5SDimitry Andric // area allocation can't be combined with the local stack area allocation. 34350b57cec5SDimitry Andric // For example: 34360b57cec5SDimitry Andric // ldp fp, lr, [sp, #32] // addImm(+4) 34370b57cec5SDimitry Andric // ldp x20, x19, [sp, #16] // addImm(+2) 34380b57cec5SDimitry Andric // ldp x22, x21, [sp, #0] // addImm(+0) 34390b57cec5SDimitry Andric // Note: see comment in spillCalleeSavedRegisters() 34400b57cec5SDimitry Andric unsigned LdrOpc; 34415ffd83dbSDimitry Andric unsigned Size; 34425ffd83dbSDimitry Andric Align Alignment; 34430b57cec5SDimitry Andric switch (RPI.Type) { 34440b57cec5SDimitry Andric case RegPairInfo::GPR: 34450b57cec5SDimitry Andric LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; 34460b57cec5SDimitry Andric Size = 8; 34475ffd83dbSDimitry Andric Alignment = Align(8); 34480b57cec5SDimitry Andric break; 34490b57cec5SDimitry Andric case RegPairInfo::FPR64: 34500b57cec5SDimitry Andric LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; 34510b57cec5SDimitry Andric Size = 8; 34525ffd83dbSDimitry Andric Alignment = Align(8); 34530b57cec5SDimitry Andric break; 34540b57cec5SDimitry Andric case RegPairInfo::FPR128: 34550b57cec5SDimitry Andric LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui; 34560b57cec5SDimitry Andric Size = 16; 34575ffd83dbSDimitry Andric Alignment = Align(16); 34580b57cec5SDimitry Andric break; 3459480093f4SDimitry Andric case RegPairInfo::ZPR: 34600fca6ea1SDimitry Andric LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI; 3461480093f4SDimitry Andric Size = 16; 34625ffd83dbSDimitry Andric Alignment = Align(16); 3463480093f4SDimitry Andric break; 3464480093f4SDimitry Andric case RegPairInfo::PPR: 3465480093f4SDimitry Andric LdrOpc = AArch64::LDR_PXI; 3466480093f4SDimitry Andric Size = 2; 34675ffd83dbSDimitry Andric Alignment = Align(2); 3468480093f4SDimitry Andric break; 34690fca6ea1SDimitry Andric case RegPairInfo::VG: 34700fca6ea1SDimitry Andric continue; 34710b57cec5SDimitry Andric } 34720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI); 34730b57cec5SDimitry Andric if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI); 34740b57cec5SDimitry Andric dbgs() << ") -> fi#(" << RPI.FrameIdx; 34750b57cec5SDimitry Andric if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1; 34760b57cec5SDimitry Andric dbgs() << ")\n"); 34770b57cec5SDimitry Andric 34780b57cec5SDimitry Andric // Windows unwind codes require consecutive registers if registers are 34790b57cec5SDimitry Andric // paired. Make the switch here, so that the code below will save (x,x+1) 34800b57cec5SDimitry Andric // and not (x+1,x). 34810b57cec5SDimitry Andric unsigned FrameIdxReg1 = RPI.FrameIdx; 34820b57cec5SDimitry Andric unsigned FrameIdxReg2 = RPI.FrameIdx + 1; 34830b57cec5SDimitry Andric if (NeedsWinCFI && RPI.isPaired()) { 34840b57cec5SDimitry Andric std::swap(Reg1, Reg2); 34850b57cec5SDimitry Andric std::swap(FrameIdxReg1, FrameIdxReg2); 34860b57cec5SDimitry Andric } 34870fca6ea1SDimitry Andric 34880fca6ea1SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 34890fca6ea1SDimitry Andric if (RPI.isPaired() && RPI.isScalable()) { 34900fca6ea1SDimitry Andric [[maybe_unused]] const AArch64Subtarget &Subtarget = 34910fca6ea1SDimitry Andric MF.getSubtarget<AArch64Subtarget>(); 34920fca6ea1SDimitry Andric unsigned PnReg = AFI->getPredicateRegForFillSpill(); 34930fca6ea1SDimitry Andric assert(((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && PnReg != 0) && 34940fca6ea1SDimitry Andric "Expects SVE2.1 or SME2 target and a predicate register"); 34950fca6ea1SDimitry Andric #ifdef EXPENSIVE_CHECKS 34960fca6ea1SDimitry Andric assert(!(PPRBegin < ZPRBegin) && 34970fca6ea1SDimitry Andric "Expected callee save predicate to be handled first"); 34980fca6ea1SDimitry Andric #endif 34990fca6ea1SDimitry Andric if (!PTrueCreated) { 35000fca6ea1SDimitry Andric PTrueCreated = true; 35010fca6ea1SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::PTRUE_C_B), PnReg) 35020fca6ea1SDimitry Andric .setMIFlags(MachineInstr::FrameDestroy); 35030fca6ea1SDimitry Andric } 350481ad6265SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc)); 35050fca6ea1SDimitry Andric MIB.addReg(/*PairRegs*/ AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0), 35060fca6ea1SDimitry Andric getDefRegState(true)); 35070b57cec5SDimitry Andric MIB.addMemOperand(MF.getMachineMemOperand( 35080b57cec5SDimitry Andric MachinePointerInfo::getFixedStack(MF, FrameIdxReg2), 35095ffd83dbSDimitry Andric MachineMemOperand::MOLoad, Size, Alignment)); 35100fca6ea1SDimitry Andric MIB.addReg(PnReg); 35110fca6ea1SDimitry Andric MIB.addReg(AArch64::SP) 35120b57cec5SDimitry Andric .addImm(RPI.Offset) // [sp, #offset*scale] 35130b57cec5SDimitry Andric // where factor*scale is implicit 35140b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 35150b57cec5SDimitry Andric MIB.addMemOperand(MF.getMachineMemOperand( 35160b57cec5SDimitry Andric MachinePointerInfo::getFixedStack(MF, FrameIdxReg1), 35175ffd83dbSDimitry Andric MachineMemOperand::MOLoad, Size, Alignment)); 35180b57cec5SDimitry Andric if (NeedsWinCFI) 35190b57cec5SDimitry Andric InsertSEH(MIB, TII, MachineInstr::FrameDestroy); 352081ad6265SDimitry Andric } else { 35210fca6ea1SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc)); 35220fca6ea1SDimitry Andric if (RPI.isPaired()) { 35230fca6ea1SDimitry Andric MIB.addReg(Reg2, getDefRegState(true)); 35240fca6ea1SDimitry Andric MIB.addMemOperand(MF.getMachineMemOperand( 35250fca6ea1SDimitry Andric MachinePointerInfo::getFixedStack(MF, FrameIdxReg2), 35260fca6ea1SDimitry Andric MachineMemOperand::MOLoad, Size, Alignment)); 35270fca6ea1SDimitry Andric } 35280fca6ea1SDimitry Andric MIB.addReg(Reg1, getDefRegState(true)); 35290fca6ea1SDimitry Andric MIB.addReg(AArch64::SP) 35300fca6ea1SDimitry Andric .addImm(RPI.Offset) // [sp, #offset*scale] 35310fca6ea1SDimitry Andric // where factor*scale is implicit 35320fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 35330fca6ea1SDimitry Andric MIB.addMemOperand(MF.getMachineMemOperand( 35340fca6ea1SDimitry Andric MachinePointerInfo::getFixedStack(MF, FrameIdxReg1), 35350fca6ea1SDimitry Andric MachineMemOperand::MOLoad, Size, Alignment)); 35360fca6ea1SDimitry Andric if (NeedsWinCFI) 35370fca6ea1SDimitry Andric InsertSEH(MIB, TII, MachineInstr::FrameDestroy); 35380fca6ea1SDimitry Andric } 35390fca6ea1SDimitry Andric } 35400fca6ea1SDimitry Andric return true; 35410fca6ea1SDimitry Andric } 35420fca6ea1SDimitry Andric 3543*62987288SDimitry Andric // Return the FrameID for a MMO. 3544*62987288SDimitry Andric static std::optional<int> getMMOFrameID(MachineMemOperand *MMO, 35450fca6ea1SDimitry Andric const MachineFrameInfo &MFI) { 35460fca6ea1SDimitry Andric auto *PSV = 35470fca6ea1SDimitry Andric dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->getPseudoValue()); 35480fca6ea1SDimitry Andric if (PSV) 35490fca6ea1SDimitry Andric return std::optional<int>(PSV->getFrameIndex()); 35500fca6ea1SDimitry Andric 35510fca6ea1SDimitry Andric if (MMO->getValue()) { 35520fca6ea1SDimitry Andric if (auto *Al = dyn_cast<AllocaInst>(getUnderlyingObject(MMO->getValue()))) { 35530fca6ea1SDimitry Andric for (int FI = MFI.getObjectIndexBegin(); FI < MFI.getObjectIndexEnd(); 35540fca6ea1SDimitry Andric FI++) 35550fca6ea1SDimitry Andric if (MFI.getObjectAllocation(FI) == Al) 35560fca6ea1SDimitry Andric return FI; 355781ad6265SDimitry Andric } 35580b57cec5SDimitry Andric } 35590b57cec5SDimitry Andric 35600fca6ea1SDimitry Andric return std::nullopt; 35610fca6ea1SDimitry Andric } 35620fca6ea1SDimitry Andric 3563*62987288SDimitry Andric // Return the FrameID for a Load/Store instruction by looking at the first MMO. 3564*62987288SDimitry Andric static std::optional<int> getLdStFrameID(const MachineInstr &MI, 3565*62987288SDimitry Andric const MachineFrameInfo &MFI) { 3566*62987288SDimitry Andric if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1) 3567*62987288SDimitry Andric return std::nullopt; 3568*62987288SDimitry Andric 3569*62987288SDimitry Andric return getMMOFrameID(*MI.memoperands_begin(), MFI); 3570*62987288SDimitry Andric } 3571*62987288SDimitry Andric 35720fca6ea1SDimitry Andric // Check if a Hazard slot is needed for the current function, and if so create 35730fca6ea1SDimitry Andric // one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex, 35740fca6ea1SDimitry Andric // which can be used to determine if any hazard padding is needed. 35750fca6ea1SDimitry Andric void AArch64FrameLowering::determineStackHazardSlot( 35760fca6ea1SDimitry Andric MachineFunction &MF, BitVector &SavedRegs) const { 35770fca6ea1SDimitry Andric if (StackHazardSize == 0 || StackHazardSize % 16 != 0 || 35780fca6ea1SDimitry Andric MF.getInfo<AArch64FunctionInfo>()->hasStackHazardSlotIndex()) 35790fca6ea1SDimitry Andric return; 35800fca6ea1SDimitry Andric 35810fca6ea1SDimitry Andric // Stack hazards are only needed in streaming functions. 35820fca6ea1SDimitry Andric SMEAttrs Attrs(MF.getFunction()); 35830fca6ea1SDimitry Andric if (!StackHazardInNonStreaming && Attrs.hasNonStreamingInterfaceAndBody()) 35840fca6ea1SDimitry Andric return; 35850fca6ea1SDimitry Andric 35860fca6ea1SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 35870fca6ea1SDimitry Andric 35880fca6ea1SDimitry Andric // Add a hazard slot if there are any CSR FPR registers, or are any fp-only 35890fca6ea1SDimitry Andric // stack objects. 35900fca6ea1SDimitry Andric bool HasFPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) { 35910fca6ea1SDimitry Andric return AArch64::FPR64RegClass.contains(Reg) || 35920fca6ea1SDimitry Andric AArch64::FPR128RegClass.contains(Reg) || 35930fca6ea1SDimitry Andric AArch64::ZPRRegClass.contains(Reg) || 35940fca6ea1SDimitry Andric AArch64::PPRRegClass.contains(Reg); 35950fca6ea1SDimitry Andric }); 35960fca6ea1SDimitry Andric bool HasFPRStackObjects = false; 35970fca6ea1SDimitry Andric if (!HasFPRCSRs) { 35980fca6ea1SDimitry Andric std::vector<unsigned> FrameObjects(MFI.getObjectIndexEnd()); 35990fca6ea1SDimitry Andric for (auto &MBB : MF) { 36000fca6ea1SDimitry Andric for (auto &MI : MBB) { 36010fca6ea1SDimitry Andric std::optional<int> FI = getLdStFrameID(MI, MFI); 36020fca6ea1SDimitry Andric if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) { 36030fca6ea1SDimitry Andric if (MFI.getStackID(*FI) == TargetStackID::ScalableVector || 36040fca6ea1SDimitry Andric AArch64InstrInfo::isFpOrNEON(MI)) 36050fca6ea1SDimitry Andric FrameObjects[*FI] |= 2; 36060fca6ea1SDimitry Andric else 36070fca6ea1SDimitry Andric FrameObjects[*FI] |= 1; 36080fca6ea1SDimitry Andric } 36090fca6ea1SDimitry Andric } 36100fca6ea1SDimitry Andric } 36110fca6ea1SDimitry Andric HasFPRStackObjects = 36120fca6ea1SDimitry Andric any_of(FrameObjects, [](unsigned B) { return (B & 3) == 2; }); 36130fca6ea1SDimitry Andric } 36140fca6ea1SDimitry Andric 36150fca6ea1SDimitry Andric if (HasFPRCSRs || HasFPRStackObjects) { 36160fca6ea1SDimitry Andric int ID = MFI.CreateStackObject(StackHazardSize, Align(16), false); 36170fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Created Hazard slot at " << ID << " size " 36180fca6ea1SDimitry Andric << StackHazardSize << "\n"); 36190fca6ea1SDimitry Andric MF.getInfo<AArch64FunctionInfo>()->setStackHazardSlotIndex(ID); 36200fca6ea1SDimitry Andric } 36210b57cec5SDimitry Andric } 36220b57cec5SDimitry Andric 36230b57cec5SDimitry Andric void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, 36240b57cec5SDimitry Andric BitVector &SavedRegs, 36250b57cec5SDimitry Andric RegScavenger *RS) const { 36260b57cec5SDimitry Andric // All calls are tail calls in GHC calling conv, and functions have no 36270b57cec5SDimitry Andric // prologue/epilogue. 36280b57cec5SDimitry Andric if (MF.getFunction().getCallingConv() == CallingConv::GHC) 36290b57cec5SDimitry Andric return; 36300b57cec5SDimitry Andric 36310b57cec5SDimitry Andric TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 36320b57cec5SDimitry Andric const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 36330b57cec5SDimitry Andric MF.getSubtarget().getRegisterInfo()); 36345ffd83dbSDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 36350b57cec5SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 36360b57cec5SDimitry Andric unsigned UnspilledCSGPR = AArch64::NoRegister; 36370b57cec5SDimitry Andric unsigned UnspilledCSGPRPaired = AArch64::NoRegister; 36380b57cec5SDimitry Andric 36390b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 36400b57cec5SDimitry Andric const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); 36410b57cec5SDimitry Andric 36420b57cec5SDimitry Andric unsigned BasePointerReg = RegInfo->hasBasePointer(MF) 36430b57cec5SDimitry Andric ? RegInfo->getBaseRegister() 36440b57cec5SDimitry Andric : (unsigned)AArch64::NoRegister; 36450b57cec5SDimitry Andric 36460b57cec5SDimitry Andric unsigned ExtraCSSpill = 0; 36475f757f3fSDimitry Andric bool HasUnpairedGPR64 = false; 36480fca6ea1SDimitry Andric bool HasPairZReg = false; 36490b57cec5SDimitry Andric // Figure out which callee-saved registers to save/restore. 36500b57cec5SDimitry Andric for (unsigned i = 0; CSRegs[i]; ++i) { 36510b57cec5SDimitry Andric const unsigned Reg = CSRegs[i]; 36520b57cec5SDimitry Andric 36530b57cec5SDimitry Andric // Add the base pointer register to SavedRegs if it is callee-save. 36540b57cec5SDimitry Andric if (Reg == BasePointerReg) 36550b57cec5SDimitry Andric SavedRegs.set(Reg); 36560b57cec5SDimitry Andric 36570b57cec5SDimitry Andric bool RegUsed = SavedRegs.test(Reg); 3658480093f4SDimitry Andric unsigned PairedReg = AArch64::NoRegister; 36595f757f3fSDimitry Andric const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg); 36605f757f3fSDimitry Andric if (RegIsGPR64 || AArch64::FPR64RegClass.contains(Reg) || 36615f757f3fSDimitry Andric AArch64::FPR128RegClass.contains(Reg)) { 36625f757f3fSDimitry Andric // Compensate for odd numbers of GP CSRs. 36635f757f3fSDimitry Andric // For now, all the known cases of odd number of CSRs are of GPRs. 36645f757f3fSDimitry Andric if (HasUnpairedGPR64) 36655f757f3fSDimitry Andric PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1]; 36665f757f3fSDimitry Andric else 3667480093f4SDimitry Andric PairedReg = CSRegs[i ^ 1]; 36685f757f3fSDimitry Andric } 36695f757f3fSDimitry Andric 36705f757f3fSDimitry Andric // If the function requires all the GP registers to save (SavedRegs), 36715f757f3fSDimitry Andric // and there are an odd number of GP CSRs at the same time (CSRegs), 36725f757f3fSDimitry Andric // PairedReg could be in a different register class from Reg, which would 36735f757f3fSDimitry Andric // lead to a FPR (usually D8) accidentally being marked saved. 36745f757f3fSDimitry Andric if (RegIsGPR64 && !AArch64::GPR64RegClass.contains(PairedReg)) { 36755f757f3fSDimitry Andric PairedReg = AArch64::NoRegister; 36765f757f3fSDimitry Andric HasUnpairedGPR64 = true; 36775f757f3fSDimitry Andric } 36785f757f3fSDimitry Andric assert(PairedReg == AArch64::NoRegister || 36795f757f3fSDimitry Andric AArch64::GPR64RegClass.contains(Reg, PairedReg) || 36805f757f3fSDimitry Andric AArch64::FPR64RegClass.contains(Reg, PairedReg) || 36815f757f3fSDimitry Andric AArch64::FPR128RegClass.contains(Reg, PairedReg)); 3682480093f4SDimitry Andric 36830b57cec5SDimitry Andric if (!RegUsed) { 36840b57cec5SDimitry Andric if (AArch64::GPR64RegClass.contains(Reg) && 36850b57cec5SDimitry Andric !RegInfo->isReservedReg(MF, Reg)) { 36860b57cec5SDimitry Andric UnspilledCSGPR = Reg; 36870b57cec5SDimitry Andric UnspilledCSGPRPaired = PairedReg; 36880b57cec5SDimitry Andric } 36890b57cec5SDimitry Andric continue; 36900b57cec5SDimitry Andric } 36910b57cec5SDimitry Andric 36920b57cec5SDimitry Andric // MachO's compact unwind format relies on all registers being stored in 36930b57cec5SDimitry Andric // pairs. 36940b57cec5SDimitry Andric // FIXME: the usual format is actually better if unwinding isn't needed. 3695fe6060f1SDimitry Andric if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister && 36960b57cec5SDimitry Andric !SavedRegs.test(PairedReg)) { 36970b57cec5SDimitry Andric SavedRegs.set(PairedReg); 36980b57cec5SDimitry Andric if (AArch64::GPR64RegClass.contains(PairedReg) && 36990b57cec5SDimitry Andric !RegInfo->isReservedReg(MF, PairedReg)) 37000b57cec5SDimitry Andric ExtraCSSpill = PairedReg; 37010b57cec5SDimitry Andric } 37020fca6ea1SDimitry Andric // Check if there is a pair of ZRegs, so it can select PReg for spill/fill 37030fca6ea1SDimitry Andric HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) && 37040fca6ea1SDimitry Andric SavedRegs.test(CSRegs[i ^ 1])); 37050fca6ea1SDimitry Andric } 37060fca6ea1SDimitry Andric 37070fca6ea1SDimitry Andric if (HasPairZReg && (Subtarget.hasSVE2p1() || Subtarget.hasSME2())) { 37080fca6ea1SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 37090fca6ea1SDimitry Andric // Find a suitable predicate register for the multi-vector spill/fill 37100fca6ea1SDimitry Andric // instructions. 37110fca6ea1SDimitry Andric unsigned PnReg = findFreePredicateReg(SavedRegs); 37120fca6ea1SDimitry Andric if (PnReg != AArch64::NoRegister) 37130fca6ea1SDimitry Andric AFI->setPredicateRegForFillSpill(PnReg); 37140fca6ea1SDimitry Andric // If no free callee-save has been found assign one. 37150fca6ea1SDimitry Andric if (!AFI->getPredicateRegForFillSpill() && 37160fca6ea1SDimitry Andric MF.getFunction().getCallingConv() == 37170fca6ea1SDimitry Andric CallingConv::AArch64_SVE_VectorCall) { 37180fca6ea1SDimitry Andric SavedRegs.set(AArch64::P8); 37190fca6ea1SDimitry Andric AFI->setPredicateRegForFillSpill(AArch64::PN8); 37200fca6ea1SDimitry Andric } 37210fca6ea1SDimitry Andric 37220fca6ea1SDimitry Andric assert(!RegInfo->isReservedReg(MF, AFI->getPredicateRegForFillSpill()) && 37230fca6ea1SDimitry Andric "Predicate cannot be a reserved register"); 37240b57cec5SDimitry Andric } 37250b57cec5SDimitry Andric 37265ffd83dbSDimitry Andric if (MF.getFunction().getCallingConv() == CallingConv::Win64 && 37275ffd83dbSDimitry Andric !Subtarget.isTargetWindows()) { 37285ffd83dbSDimitry Andric // For Windows calling convention on a non-windows OS, where X18 is treated 37295ffd83dbSDimitry Andric // as reserved, back up X18 when entering non-windows code (marked with the 37305ffd83dbSDimitry Andric // Windows calling convention) and restore when returning regardless of 37315ffd83dbSDimitry Andric // whether the individual function uses it - it might call other functions 37325ffd83dbSDimitry Andric // that clobber it. 37335ffd83dbSDimitry Andric SavedRegs.set(AArch64::X18); 37345ffd83dbSDimitry Andric } 37355ffd83dbSDimitry Andric 37360b57cec5SDimitry Andric // Calculates the callee saved stack size. 37370b57cec5SDimitry Andric unsigned CSStackSize = 0; 3738480093f4SDimitry Andric unsigned SVECSStackSize = 0; 37390b57cec5SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 37400b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 3741480093f4SDimitry Andric for (unsigned Reg : SavedRegs.set_bits()) { 3742480093f4SDimitry Andric auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8; 3743480093f4SDimitry Andric if (AArch64::PPRRegClass.contains(Reg) || 3744480093f4SDimitry Andric AArch64::ZPRRegClass.contains(Reg)) 3745480093f4SDimitry Andric SVECSStackSize += RegSize; 3746480093f4SDimitry Andric else 3747480093f4SDimitry Andric CSStackSize += RegSize; 3748480093f4SDimitry Andric } 37490b57cec5SDimitry Andric 37500fca6ea1SDimitry Andric // Increase the callee-saved stack size if the function has streaming mode 37510fca6ea1SDimitry Andric // changes, as we will need to spill the value of the VG register. 37520fca6ea1SDimitry Andric // For locally streaming functions, we spill both the streaming and 37530fca6ea1SDimitry Andric // non-streaming VG value. 37540fca6ea1SDimitry Andric const Function &F = MF.getFunction(); 37550fca6ea1SDimitry Andric SMEAttrs Attrs(F); 37560fca6ea1SDimitry Andric if (AFI->hasStreamingModeChanges()) { 37570fca6ea1SDimitry Andric if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface()) 37580fca6ea1SDimitry Andric CSStackSize += 16; 37590fca6ea1SDimitry Andric else 37600fca6ea1SDimitry Andric CSStackSize += 8; 37610fca6ea1SDimitry Andric } 37620fca6ea1SDimitry Andric 37630fca6ea1SDimitry Andric // Determine if a Hazard slot should be used, and increase the CSStackSize by 37640fca6ea1SDimitry Andric // StackHazardSize if so. 37650fca6ea1SDimitry Andric determineStackHazardSlot(MF, SavedRegs); 37660fca6ea1SDimitry Andric if (AFI->hasStackHazardSlotIndex()) 37670fca6ea1SDimitry Andric CSStackSize += StackHazardSize; 37680fca6ea1SDimitry Andric 37690b57cec5SDimitry Andric // Save number of saved regs, so we can easily update CSStackSize later. 37700b57cec5SDimitry Andric unsigned NumSavedRegs = SavedRegs.count(); 37710b57cec5SDimitry Andric 37720b57cec5SDimitry Andric // The frame record needs to be created by saving the appropriate registers 3773480093f4SDimitry Andric uint64_t EstimatedStackSize = MFI.estimateStackSize(MF); 37740b57cec5SDimitry Andric if (hasFP(MF) || 37750b57cec5SDimitry Andric windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) { 37760b57cec5SDimitry Andric SavedRegs.set(AArch64::FP); 37770b57cec5SDimitry Andric SavedRegs.set(AArch64::LR); 37780b57cec5SDimitry Andric } 37790b57cec5SDimitry Andric 37800fca6ea1SDimitry Andric LLVM_DEBUG({ 37810fca6ea1SDimitry Andric dbgs() << "*** determineCalleeSaves\nSaved CSRs:"; 37820fca6ea1SDimitry Andric for (unsigned Reg : SavedRegs.set_bits()) 37830fca6ea1SDimitry Andric dbgs() << ' ' << printReg(Reg, RegInfo); 37840fca6ea1SDimitry Andric dbgs() << "\n"; 37850fca6ea1SDimitry Andric }); 37860b57cec5SDimitry Andric 37870b57cec5SDimitry Andric // If any callee-saved registers are used, the frame cannot be eliminated. 37888bcb0991SDimitry Andric int64_t SVEStackSize = 3789480093f4SDimitry Andric alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16); 37908bcb0991SDimitry Andric bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize; 37910b57cec5SDimitry Andric 37920b57cec5SDimitry Andric // The CSR spill slots have not been allocated yet, so estimateStackSize 37930b57cec5SDimitry Andric // won't include them. 37940b57cec5SDimitry Andric unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); 37958bcb0991SDimitry Andric 379606c3fb27SDimitry Andric // We may address some of the stack above the canonical frame address, either 379706c3fb27SDimitry Andric // for our own arguments or during a call. Include that in calculating whether 379806c3fb27SDimitry Andric // we have complicated addressing concerns. 379906c3fb27SDimitry Andric int64_t CalleeStackUsed = 0; 380006c3fb27SDimitry Andric for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) { 380106c3fb27SDimitry Andric int64_t FixedOff = MFI.getObjectOffset(I); 38020fca6ea1SDimitry Andric if (FixedOff > CalleeStackUsed) 38030fca6ea1SDimitry Andric CalleeStackUsed = FixedOff; 380406c3fb27SDimitry Andric } 380506c3fb27SDimitry Andric 38068bcb0991SDimitry Andric // Conservatively always assume BigStack when there are SVE spills. 380706c3fb27SDimitry Andric bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize + 380806c3fb27SDimitry Andric CalleeStackUsed) > EstimatedStackSizeLimit; 38090b57cec5SDimitry Andric if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) 38100b57cec5SDimitry Andric AFI->setHasStackFrame(true); 38110b57cec5SDimitry Andric 38120b57cec5SDimitry Andric // Estimate if we might need to scavenge a register at some point in order 38130b57cec5SDimitry Andric // to materialize a stack offset. If so, either spill one additional 38140b57cec5SDimitry Andric // callee-saved register or reserve a special spill slot to facilitate 38150b57cec5SDimitry Andric // register scavenging. If we already spilled an extra callee-saved register 38160b57cec5SDimitry Andric // above to keep the number of spills even, we don't need to do anything else 38170b57cec5SDimitry Andric // here. 38180b57cec5SDimitry Andric if (BigStack) { 38190b57cec5SDimitry Andric if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) { 38200b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo) 38210b57cec5SDimitry Andric << " to get a scratch register.\n"); 38220b57cec5SDimitry Andric SavedRegs.set(UnspilledCSGPR); 38235f757f3fSDimitry Andric ExtraCSSpill = UnspilledCSGPR; 38245f757f3fSDimitry Andric 38250b57cec5SDimitry Andric // MachO's compact unwind format relies on all registers being stored in 38260b57cec5SDimitry Andric // pairs, so if we need to spill one extra for BigStack, then we need to 38270b57cec5SDimitry Andric // store the pair. 38285f757f3fSDimitry Andric if (producePairRegisters(MF)) { 38295f757f3fSDimitry Andric if (UnspilledCSGPRPaired == AArch64::NoRegister) { 38305f757f3fSDimitry Andric // Failed to make a pair for compact unwind format, revert spilling. 38315f757f3fSDimitry Andric if (produceCompactUnwindFrame(MF)) { 38325f757f3fSDimitry Andric SavedRegs.reset(UnspilledCSGPR); 38335f757f3fSDimitry Andric ExtraCSSpill = AArch64::NoRegister; 38345f757f3fSDimitry Andric } 38355f757f3fSDimitry Andric } else 38360b57cec5SDimitry Andric SavedRegs.set(UnspilledCSGPRPaired); 38375f757f3fSDimitry Andric } 38380b57cec5SDimitry Andric } 38390b57cec5SDimitry Andric 38400b57cec5SDimitry Andric // If we didn't find an extra callee-saved register to spill, create 38410b57cec5SDimitry Andric // an emergency spill slot. 38420b57cec5SDimitry Andric if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) { 38430b57cec5SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 38440b57cec5SDimitry Andric const TargetRegisterClass &RC = AArch64::GPR64RegClass; 38450b57cec5SDimitry Andric unsigned Size = TRI->getSpillSize(RC); 38465ffd83dbSDimitry Andric Align Alignment = TRI->getSpillAlign(RC); 38475ffd83dbSDimitry Andric int FI = MFI.CreateStackObject(Size, Alignment, false); 38480b57cec5SDimitry Andric RS->addScavengingFrameIndex(FI); 38490b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI 38500b57cec5SDimitry Andric << " as the emergency spill slot.\n"); 38510b57cec5SDimitry Andric } 38520b57cec5SDimitry Andric } 38530b57cec5SDimitry Andric 38540b57cec5SDimitry Andric // Adding the size of additional 64bit GPR saves. 38550b57cec5SDimitry Andric CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs); 3856fe6060f1SDimitry Andric 3857fe6060f1SDimitry Andric // A Swift asynchronous context extends the frame record with a pointer 3858fe6060f1SDimitry Andric // directly before FP. 3859fe6060f1SDimitry Andric if (hasFP(MF) && AFI->hasSwiftAsyncContext()) 3860fe6060f1SDimitry Andric CSStackSize += 8; 3861fe6060f1SDimitry Andric 3862480093f4SDimitry Andric uint64_t AlignedCSStackSize = alignTo(CSStackSize, 16); 38630b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Estimated stack frame size: " 38640fca6ea1SDimitry Andric << EstimatedStackSize + AlignedCSStackSize << " bytes.\n"); 38650b57cec5SDimitry Andric 3866480093f4SDimitry Andric assert((!MFI.isCalleeSavedInfoValid() || 3867480093f4SDimitry Andric AFI->getCalleeSavedStackSize() == AlignedCSStackSize) && 3868480093f4SDimitry Andric "Should not invalidate callee saved info"); 3869480093f4SDimitry Andric 38700b57cec5SDimitry Andric // Round up to register pair alignment to avoid additional SP adjustment 38710b57cec5SDimitry Andric // instructions. 38720b57cec5SDimitry Andric AFI->setCalleeSavedStackSize(AlignedCSStackSize); 38730b57cec5SDimitry Andric AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize); 3874480093f4SDimitry Andric AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16)); 38750b57cec5SDimitry Andric } 38760b57cec5SDimitry Andric 3877e8d8bef9SDimitry Andric bool AArch64FrameLowering::assignCalleeSavedSpillSlots( 3878fe6060f1SDimitry Andric MachineFunction &MF, const TargetRegisterInfo *RegInfo, 3879fe6060f1SDimitry Andric std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex, 3880fe6060f1SDimitry Andric unsigned &MaxCSFrameIndex) const { 3881e8d8bef9SDimitry Andric bool NeedsWinCFI = needsWinCFI(MF); 3882e8d8bef9SDimitry Andric // To match the canonical windows frame layout, reverse the list of 3883e8d8bef9SDimitry Andric // callee saved registers to get them laid out by PrologEpilogInserter 3884e8d8bef9SDimitry Andric // in the right order. (PrologEpilogInserter allocates stack objects top 3885e8d8bef9SDimitry Andric // down. Windows canonical prologs store higher numbered registers at 3886e8d8bef9SDimitry Andric // the top, thus have the CSI array start from the highest registers.) 3887e8d8bef9SDimitry Andric if (NeedsWinCFI) 3888e8d8bef9SDimitry Andric std::reverse(CSI.begin(), CSI.end()); 3889fe6060f1SDimitry Andric 3890fe6060f1SDimitry Andric if (CSI.empty()) 3891fe6060f1SDimitry Andric return true; // Early exit if no callee saved registers are modified! 3892fe6060f1SDimitry Andric 3893fe6060f1SDimitry Andric // Now that we know which registers need to be saved and restored, allocate 3894fe6060f1SDimitry Andric // stack slots for them. 3895fe6060f1SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 3896fe6060f1SDimitry Andric auto *AFI = MF.getInfo<AArch64FunctionInfo>(); 389781ad6265SDimitry Andric 389881ad6265SDimitry Andric bool UsesWinAAPCS = isTargetWindows(MF); 389981ad6265SDimitry Andric if (UsesWinAAPCS && hasFP(MF) && AFI->hasSwiftAsyncContext()) { 390081ad6265SDimitry Andric int FrameIdx = MFI.CreateStackObject(8, Align(16), true); 390181ad6265SDimitry Andric AFI->setSwiftAsyncContextFrameIdx(FrameIdx); 39020fca6ea1SDimitry Andric if ((unsigned)FrameIdx < MinCSFrameIndex) 39030fca6ea1SDimitry Andric MinCSFrameIndex = FrameIdx; 39040fca6ea1SDimitry Andric if ((unsigned)FrameIdx > MaxCSFrameIndex) 39050fca6ea1SDimitry Andric MaxCSFrameIndex = FrameIdx; 390681ad6265SDimitry Andric } 390781ad6265SDimitry Andric 39080fca6ea1SDimitry Andric // Insert VG into the list of CSRs, immediately before LR if saved. 39090fca6ea1SDimitry Andric if (AFI->hasStreamingModeChanges()) { 39100fca6ea1SDimitry Andric std::vector<CalleeSavedInfo> VGSaves; 39110fca6ea1SDimitry Andric SMEAttrs Attrs(MF.getFunction()); 39120fca6ea1SDimitry Andric 39130fca6ea1SDimitry Andric auto VGInfo = CalleeSavedInfo(AArch64::VG); 39140fca6ea1SDimitry Andric VGInfo.setRestored(false); 39150fca6ea1SDimitry Andric VGSaves.push_back(VGInfo); 39160fca6ea1SDimitry Andric 39170fca6ea1SDimitry Andric // Add VG again if the function is locally-streaming, as we will spill two 39180fca6ea1SDimitry Andric // values. 39190fca6ea1SDimitry Andric if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface()) 39200fca6ea1SDimitry Andric VGSaves.push_back(VGInfo); 39210fca6ea1SDimitry Andric 39220fca6ea1SDimitry Andric bool InsertBeforeLR = false; 39230fca6ea1SDimitry Andric 39240fca6ea1SDimitry Andric for (unsigned I = 0; I < CSI.size(); I++) 39250fca6ea1SDimitry Andric if (CSI[I].getReg() == AArch64::LR) { 39260fca6ea1SDimitry Andric InsertBeforeLR = true; 39270fca6ea1SDimitry Andric CSI.insert(CSI.begin() + I, VGSaves.begin(), VGSaves.end()); 39280fca6ea1SDimitry Andric break; 39290fca6ea1SDimitry Andric } 39300fca6ea1SDimitry Andric 39310fca6ea1SDimitry Andric if (!InsertBeforeLR) 39320fca6ea1SDimitry Andric CSI.insert(CSI.end(), VGSaves.begin(), VGSaves.end()); 39330fca6ea1SDimitry Andric } 39340fca6ea1SDimitry Andric 39350fca6ea1SDimitry Andric Register LastReg = 0; 39360fca6ea1SDimitry Andric int HazardSlotIndex = std::numeric_limits<int>::max(); 3937fe6060f1SDimitry Andric for (auto &CS : CSI) { 3938fe6060f1SDimitry Andric Register Reg = CS.getReg(); 3939fe6060f1SDimitry Andric const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); 3940fe6060f1SDimitry Andric 39410fca6ea1SDimitry Andric // Create a hazard slot as we switch between GPR and FPR CSRs. 39420fca6ea1SDimitry Andric if (AFI->hasStackHazardSlotIndex() && 39430fca6ea1SDimitry Andric (!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) && 39440fca6ea1SDimitry Andric AArch64InstrInfo::isFpOrNEON(Reg)) { 39450fca6ea1SDimitry Andric assert(HazardSlotIndex == std::numeric_limits<int>::max() && 39460fca6ea1SDimitry Andric "Unexpected register order for hazard slot"); 39470fca6ea1SDimitry Andric HazardSlotIndex = MFI.CreateStackObject(StackHazardSize, Align(8), true); 39480fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex 39490fca6ea1SDimitry Andric << "\n"); 39500fca6ea1SDimitry Andric AFI->setStackHazardCSRSlotIndex(HazardSlotIndex); 39510fca6ea1SDimitry Andric if ((unsigned)HazardSlotIndex < MinCSFrameIndex) 39520fca6ea1SDimitry Andric MinCSFrameIndex = HazardSlotIndex; 39530fca6ea1SDimitry Andric if ((unsigned)HazardSlotIndex > MaxCSFrameIndex) 39540fca6ea1SDimitry Andric MaxCSFrameIndex = HazardSlotIndex; 39550fca6ea1SDimitry Andric } 39560fca6ea1SDimitry Andric 3957fe6060f1SDimitry Andric unsigned Size = RegInfo->getSpillSize(*RC); 3958fe6060f1SDimitry Andric Align Alignment(RegInfo->getSpillAlign(*RC)); 3959fe6060f1SDimitry Andric int FrameIdx = MFI.CreateStackObject(Size, Alignment, true); 3960fe6060f1SDimitry Andric CS.setFrameIdx(FrameIdx); 3961fe6060f1SDimitry Andric 39620fca6ea1SDimitry Andric if ((unsigned)FrameIdx < MinCSFrameIndex) 39630fca6ea1SDimitry Andric MinCSFrameIndex = FrameIdx; 39640fca6ea1SDimitry Andric if ((unsigned)FrameIdx > MaxCSFrameIndex) 39650fca6ea1SDimitry Andric MaxCSFrameIndex = FrameIdx; 3966fe6060f1SDimitry Andric 3967fe6060f1SDimitry Andric // Grab 8 bytes below FP for the extended asynchronous frame info. 396881ad6265SDimitry Andric if (hasFP(MF) && AFI->hasSwiftAsyncContext() && !UsesWinAAPCS && 396981ad6265SDimitry Andric Reg == AArch64::FP) { 3970fe6060f1SDimitry Andric FrameIdx = MFI.CreateStackObject(8, Alignment, true); 3971fe6060f1SDimitry Andric AFI->setSwiftAsyncContextFrameIdx(FrameIdx); 39720fca6ea1SDimitry Andric if ((unsigned)FrameIdx < MinCSFrameIndex) 39730fca6ea1SDimitry Andric MinCSFrameIndex = FrameIdx; 39740fca6ea1SDimitry Andric if ((unsigned)FrameIdx > MaxCSFrameIndex) 39750fca6ea1SDimitry Andric MaxCSFrameIndex = FrameIdx; 3976fe6060f1SDimitry Andric } 39770fca6ea1SDimitry Andric LastReg = Reg; 3978fe6060f1SDimitry Andric } 39790fca6ea1SDimitry Andric 39800fca6ea1SDimitry Andric // Add hazard slot in the case where no FPR CSRs are present. 39810fca6ea1SDimitry Andric if (AFI->hasStackHazardSlotIndex() && 39820fca6ea1SDimitry Andric HazardSlotIndex == std::numeric_limits<int>::max()) { 39830fca6ea1SDimitry Andric HazardSlotIndex = MFI.CreateStackObject(StackHazardSize, Align(8), true); 39840fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex 39850fca6ea1SDimitry Andric << "\n"); 39860fca6ea1SDimitry Andric AFI->setStackHazardCSRSlotIndex(HazardSlotIndex); 39870fca6ea1SDimitry Andric if ((unsigned)HazardSlotIndex < MinCSFrameIndex) 39880fca6ea1SDimitry Andric MinCSFrameIndex = HazardSlotIndex; 39890fca6ea1SDimitry Andric if ((unsigned)HazardSlotIndex > MaxCSFrameIndex) 39900fca6ea1SDimitry Andric MaxCSFrameIndex = HazardSlotIndex; 39910fca6ea1SDimitry Andric } 39920fca6ea1SDimitry Andric 3993fe6060f1SDimitry Andric return true; 3994e8d8bef9SDimitry Andric } 3995e8d8bef9SDimitry Andric 39960b57cec5SDimitry Andric bool AArch64FrameLowering::enableStackSlotScavenging( 39970b57cec5SDimitry Andric const MachineFunction &MF) const { 39980b57cec5SDimitry Andric const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 39995f757f3fSDimitry Andric // If the function has streaming-mode changes, don't scavenge a 40005f757f3fSDimitry Andric // spillslot in the callee-save area, as that might require an 40015f757f3fSDimitry Andric // 'addvl' in the streaming-mode-changing call-sequence when the 40025f757f3fSDimitry Andric // function doesn't use a FP. 40035f757f3fSDimitry Andric if (AFI->hasStreamingModeChanges() && !hasFP(MF)) 40045f757f3fSDimitry Andric return false; 40050fca6ea1SDimitry Andric // Don't allow register salvaging with hazard slots, in case it moves objects 40060fca6ea1SDimitry Andric // into the wrong place. 40070fca6ea1SDimitry Andric if (AFI->hasStackHazardSlotIndex()) 40080fca6ea1SDimitry Andric return false; 40090b57cec5SDimitry Andric return AFI->hasCalleeSaveStackFreeSpace(); 40100b57cec5SDimitry Andric } 40110b57cec5SDimitry Andric 4012480093f4SDimitry Andric /// returns true if there are any SVE callee saves. 4013480093f4SDimitry Andric static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, 4014480093f4SDimitry Andric int &Min, int &Max) { 4015480093f4SDimitry Andric Min = std::numeric_limits<int>::max(); 4016480093f4SDimitry Andric Max = std::numeric_limits<int>::min(); 4017480093f4SDimitry Andric 4018480093f4SDimitry Andric if (!MFI.isCalleeSavedInfoValid()) 4019480093f4SDimitry Andric return false; 4020480093f4SDimitry Andric 4021480093f4SDimitry Andric const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 4022480093f4SDimitry Andric for (auto &CS : CSI) { 4023480093f4SDimitry Andric if (AArch64::ZPRRegClass.contains(CS.getReg()) || 4024480093f4SDimitry Andric AArch64::PPRRegClass.contains(CS.getReg())) { 4025480093f4SDimitry Andric assert((Max == std::numeric_limits<int>::min() || 4026480093f4SDimitry Andric Max + 1 == CS.getFrameIdx()) && 4027480093f4SDimitry Andric "SVE CalleeSaves are not consecutive"); 4028480093f4SDimitry Andric 4029480093f4SDimitry Andric Min = std::min(Min, CS.getFrameIdx()); 4030480093f4SDimitry Andric Max = std::max(Max, CS.getFrameIdx()); 4031480093f4SDimitry Andric } 4032480093f4SDimitry Andric } 4033480093f4SDimitry Andric return Min != std::numeric_limits<int>::max(); 4034480093f4SDimitry Andric } 4035480093f4SDimitry Andric 4036480093f4SDimitry Andric // Process all the SVE stack objects and determine offsets for each 4037480093f4SDimitry Andric // object. If AssignOffsets is true, the offsets get assigned. 4038480093f4SDimitry Andric // Fills in the first and last callee-saved frame indices into 4039480093f4SDimitry Andric // Min/MaxCSFrameIndex, respectively. 4040480093f4SDimitry Andric // Returns the size of the stack. 4041480093f4SDimitry Andric static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, 4042480093f4SDimitry Andric int &MinCSFrameIndex, 4043480093f4SDimitry Andric int &MaxCSFrameIndex, 4044480093f4SDimitry Andric bool AssignOffsets) { 4045979e22ffSDimitry Andric #ifndef NDEBUG 4046480093f4SDimitry Andric // First process all fixed stack objects. 40478bcb0991SDimitry Andric for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) 4048e8d8bef9SDimitry Andric assert(MFI.getStackID(I) != TargetStackID::ScalableVector && 4049979e22ffSDimitry Andric "SVE vectors should never be passed on the stack by value, only by " 4050979e22ffSDimitry Andric "reference."); 4051979e22ffSDimitry Andric #endif 40528bcb0991SDimitry Andric 4053480093f4SDimitry Andric auto Assign = [&MFI](int FI, int64_t Offset) { 4054480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n"); 4055480093f4SDimitry Andric MFI.setObjectOffset(FI, Offset); 4056480093f4SDimitry Andric }; 4057480093f4SDimitry Andric 4058979e22ffSDimitry Andric int64_t Offset = 0; 4059979e22ffSDimitry Andric 4060480093f4SDimitry Andric // Then process all callee saved slots. 4061480093f4SDimitry Andric if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) { 4062480093f4SDimitry Andric // Assign offsets to the callee save slots. 4063480093f4SDimitry Andric for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) { 4064480093f4SDimitry Andric Offset += MFI.getObjectSize(I); 40655ffd83dbSDimitry Andric Offset = alignTo(Offset, MFI.getObjectAlign(I)); 4066480093f4SDimitry Andric if (AssignOffsets) 4067480093f4SDimitry Andric Assign(I, -Offset); 4068480093f4SDimitry Andric } 4069480093f4SDimitry Andric } 4070480093f4SDimitry Andric 4071979e22ffSDimitry Andric // Ensure that the Callee-save area is aligned to 16bytes. 4072979e22ffSDimitry Andric Offset = alignTo(Offset, Align(16U)); 4073979e22ffSDimitry Andric 4074480093f4SDimitry Andric // Create a buffer of SVE objects to allocate and sort it. 4075480093f4SDimitry Andric SmallVector<int, 8> ObjectsToAllocate; 40760eae32dcSDimitry Andric // If we have a stack protector, and we've previously decided that we have SVE 40770eae32dcSDimitry Andric // objects on the stack and thus need it to go in the SVE stack area, then it 40780eae32dcSDimitry Andric // needs to go first. 40790eae32dcSDimitry Andric int StackProtectorFI = -1; 40800eae32dcSDimitry Andric if (MFI.hasStackProtectorIndex()) { 40810eae32dcSDimitry Andric StackProtectorFI = MFI.getStackProtectorIndex(); 40820eae32dcSDimitry Andric if (MFI.getStackID(StackProtectorFI) == TargetStackID::ScalableVector) 40830eae32dcSDimitry Andric ObjectsToAllocate.push_back(StackProtectorFI); 40840eae32dcSDimitry Andric } 4085480093f4SDimitry Andric for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { 4086480093f4SDimitry Andric unsigned StackID = MFI.getStackID(I); 4087e8d8bef9SDimitry Andric if (StackID != TargetStackID::ScalableVector) 4088480093f4SDimitry Andric continue; 40890eae32dcSDimitry Andric if (I == StackProtectorFI) 40900eae32dcSDimitry Andric continue; 4091480093f4SDimitry Andric if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex) 4092480093f4SDimitry Andric continue; 4093480093f4SDimitry Andric if (MFI.isDeadObjectIndex(I)) 4094480093f4SDimitry Andric continue; 4095480093f4SDimitry Andric 4096480093f4SDimitry Andric ObjectsToAllocate.push_back(I); 4097480093f4SDimitry Andric } 4098480093f4SDimitry Andric 4099480093f4SDimitry Andric // Allocate all SVE locals and spills 4100480093f4SDimitry Andric for (unsigned FI : ObjectsToAllocate) { 41015ffd83dbSDimitry Andric Align Alignment = MFI.getObjectAlign(FI); 4102480093f4SDimitry Andric // FIXME: Given that the length of SVE vectors is not necessarily a power of 4103480093f4SDimitry Andric // two, we'd need to align every object dynamically at runtime if the 4104480093f4SDimitry Andric // alignment is larger than 16. This is not yet supported. 41055ffd83dbSDimitry Andric if (Alignment > Align(16)) 4106480093f4SDimitry Andric report_fatal_error( 4107480093f4SDimitry Andric "Alignment of scalable vectors > 16 bytes is not yet supported"); 4108480093f4SDimitry Andric 41095ffd83dbSDimitry Andric Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment); 4110480093f4SDimitry Andric if (AssignOffsets) 4111480093f4SDimitry Andric Assign(FI, -Offset); 4112480093f4SDimitry Andric } 4113480093f4SDimitry Andric 41148bcb0991SDimitry Andric return Offset; 41158bcb0991SDimitry Andric } 41168bcb0991SDimitry Andric 4117480093f4SDimitry Andric int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets( 4118480093f4SDimitry Andric MachineFrameInfo &MFI) const { 4119480093f4SDimitry Andric int MinCSFrameIndex, MaxCSFrameIndex; 4120480093f4SDimitry Andric return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false); 4121480093f4SDimitry Andric } 4122480093f4SDimitry Andric 4123480093f4SDimitry Andric int64_t AArch64FrameLowering::assignSVEStackObjectOffsets( 4124480093f4SDimitry Andric MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const { 4125480093f4SDimitry Andric return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, 4126480093f4SDimitry Andric true); 4127480093f4SDimitry Andric } 4128480093f4SDimitry Andric 41290b57cec5SDimitry Andric void AArch64FrameLowering::processFunctionBeforeFrameFinalized( 41300b57cec5SDimitry Andric MachineFunction &MF, RegScavenger *RS) const { 41318bcb0991SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 41328bcb0991SDimitry Andric 41338bcb0991SDimitry Andric assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown && 41348bcb0991SDimitry Andric "Upwards growing stack unsupported"); 41358bcb0991SDimitry Andric 4136480093f4SDimitry Andric int MinCSFrameIndex, MaxCSFrameIndex; 4137480093f4SDimitry Andric int64_t SVEStackSize = 4138480093f4SDimitry Andric assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex); 41398bcb0991SDimitry Andric 41408bcb0991SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 4141480093f4SDimitry Andric AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U)); 4142480093f4SDimitry Andric AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex); 41438bcb0991SDimitry Andric 41440b57cec5SDimitry Andric // If this function isn't doing Win64-style C++ EH, we don't need to do 41450b57cec5SDimitry Andric // anything. 41460b57cec5SDimitry Andric if (!MF.hasEHFunclets()) 41470b57cec5SDimitry Andric return; 41480b57cec5SDimitry Andric const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 41490b57cec5SDimitry Andric WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo(); 41500b57cec5SDimitry Andric 41510b57cec5SDimitry Andric MachineBasicBlock &MBB = MF.front(); 41520b57cec5SDimitry Andric auto MBBI = MBB.begin(); 41530b57cec5SDimitry Andric while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) 41540b57cec5SDimitry Andric ++MBBI; 41550b57cec5SDimitry Andric 41560b57cec5SDimitry Andric // Create an UnwindHelp object. 415762cfcf62SDimitry Andric // The UnwindHelp object is allocated at the start of the fixed object area 415862cfcf62SDimitry Andric int64_t FixedObject = 415962cfcf62SDimitry Andric getFixedObjectSize(MF, AFI, /*IsWin64*/ true, /*IsFunclet*/ false); 416062cfcf62SDimitry Andric int UnwindHelpFI = MFI.CreateFixedObject(/*Size*/ 8, 416162cfcf62SDimitry Andric /*SPOffset*/ -FixedObject, 416262cfcf62SDimitry Andric /*IsImmutable=*/false); 41630b57cec5SDimitry Andric EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; 416462cfcf62SDimitry Andric 41650b57cec5SDimitry Andric // We need to store -2 into the UnwindHelp object at the start of the 41660b57cec5SDimitry Andric // function. 41670b57cec5SDimitry Andric DebugLoc DL; 41680b57cec5SDimitry Andric RS->enterBasicBlockEnd(MBB); 41695f757f3fSDimitry Andric RS->backward(MBBI); 417004eeddc0SDimitry Andric Register DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass); 41710b57cec5SDimitry Andric assert(DstReg && "There must be a free register after frame setup"); 41720b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2); 41730b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi)) 41740b57cec5SDimitry Andric .addReg(DstReg, getKillRegState(true)) 41750b57cec5SDimitry Andric .addFrameIndex(UnwindHelpFI) 41760b57cec5SDimitry Andric .addImm(0); 41770b57cec5SDimitry Andric } 41780b57cec5SDimitry Andric 41795ffd83dbSDimitry Andric namespace { 41805ffd83dbSDimitry Andric struct TagStoreInstr { 41815ffd83dbSDimitry Andric MachineInstr *MI; 41825ffd83dbSDimitry Andric int64_t Offset, Size; 41835ffd83dbSDimitry Andric explicit TagStoreInstr(MachineInstr *MI, int64_t Offset, int64_t Size) 41845ffd83dbSDimitry Andric : MI(MI), Offset(Offset), Size(Size) {} 41855ffd83dbSDimitry Andric }; 41865ffd83dbSDimitry Andric 41875ffd83dbSDimitry Andric class TagStoreEdit { 41885ffd83dbSDimitry Andric MachineFunction *MF; 41895ffd83dbSDimitry Andric MachineBasicBlock *MBB; 41905ffd83dbSDimitry Andric MachineRegisterInfo *MRI; 41915ffd83dbSDimitry Andric // Tag store instructions that are being replaced. 41925ffd83dbSDimitry Andric SmallVector<TagStoreInstr, 8> TagStores; 41935ffd83dbSDimitry Andric // Combined memref arguments of the above instructions. 41945ffd83dbSDimitry Andric SmallVector<MachineMemOperand *, 8> CombinedMemRefs; 41955ffd83dbSDimitry Andric 41965ffd83dbSDimitry Andric // Replace allocation tags in [FrameReg + FrameRegOffset, FrameReg + 41975ffd83dbSDimitry Andric // FrameRegOffset + Size) with the address tag of SP. 41985ffd83dbSDimitry Andric Register FrameReg; 41995ffd83dbSDimitry Andric StackOffset FrameRegOffset; 42005ffd83dbSDimitry Andric int64_t Size; 420106c3fb27SDimitry Andric // If not std::nullopt, move FrameReg to (FrameReg + FrameRegUpdate) at the 420206c3fb27SDimitry Andric // end. 4203bdd1243dSDimitry Andric std::optional<int64_t> FrameRegUpdate; 42045ffd83dbSDimitry Andric // MIFlags for any FrameReg updating instructions. 42055ffd83dbSDimitry Andric unsigned FrameRegUpdateFlags; 42065ffd83dbSDimitry Andric 42075ffd83dbSDimitry Andric // Use zeroing instruction variants. 42085ffd83dbSDimitry Andric bool ZeroData; 42095ffd83dbSDimitry Andric DebugLoc DL; 42105ffd83dbSDimitry Andric 42115ffd83dbSDimitry Andric void emitUnrolled(MachineBasicBlock::iterator InsertI); 42125ffd83dbSDimitry Andric void emitLoop(MachineBasicBlock::iterator InsertI); 42135ffd83dbSDimitry Andric 42145ffd83dbSDimitry Andric public: 42155ffd83dbSDimitry Andric TagStoreEdit(MachineBasicBlock *MBB, bool ZeroData) 42165ffd83dbSDimitry Andric : MBB(MBB), ZeroData(ZeroData) { 42175ffd83dbSDimitry Andric MF = MBB->getParent(); 42185ffd83dbSDimitry Andric MRI = &MF->getRegInfo(); 42195ffd83dbSDimitry Andric } 42205ffd83dbSDimitry Andric // Add an instruction to be replaced. Instructions must be added in the 42215ffd83dbSDimitry Andric // ascending order of Offset, and have to be adjacent. 42225ffd83dbSDimitry Andric void addInstruction(TagStoreInstr I) { 42235ffd83dbSDimitry Andric assert((TagStores.empty() || 42245ffd83dbSDimitry Andric TagStores.back().Offset + TagStores.back().Size == I.Offset) && 42255ffd83dbSDimitry Andric "Non-adjacent tag store instructions."); 42265ffd83dbSDimitry Andric TagStores.push_back(I); 42275ffd83dbSDimitry Andric } 42285ffd83dbSDimitry Andric void clear() { TagStores.clear(); } 42295ffd83dbSDimitry Andric // Emit equivalent code at the given location, and erase the current set of 42305ffd83dbSDimitry Andric // instructions. May skip if the replacement is not profitable. May invalidate 42315ffd83dbSDimitry Andric // the input iterator and replace it with a valid one. 42325ffd83dbSDimitry Andric void emitCode(MachineBasicBlock::iterator &InsertI, 423381ad6265SDimitry Andric const AArch64FrameLowering *TFI, bool TryMergeSPUpdate); 42345ffd83dbSDimitry Andric }; 42355ffd83dbSDimitry Andric 42365ffd83dbSDimitry Andric void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) { 42375ffd83dbSDimitry Andric const AArch64InstrInfo *TII = 42385ffd83dbSDimitry Andric MF->getSubtarget<AArch64Subtarget>().getInstrInfo(); 42395ffd83dbSDimitry Andric 42405ffd83dbSDimitry Andric const int64_t kMinOffset = -256 * 16; 42415ffd83dbSDimitry Andric const int64_t kMaxOffset = 255 * 16; 42425ffd83dbSDimitry Andric 42435ffd83dbSDimitry Andric Register BaseReg = FrameReg; 4244e8d8bef9SDimitry Andric int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed(); 42455ffd83dbSDimitry Andric if (BaseRegOffsetBytes < kMinOffset || 424606c3fb27SDimitry Andric BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset || 424706c3fb27SDimitry Andric // BaseReg can be FP, which is not necessarily aligned to 16-bytes. In 424806c3fb27SDimitry Andric // that case, BaseRegOffsetBytes will not be aligned to 16 bytes, which 424906c3fb27SDimitry Andric // is required for the offset of ST2G. 425006c3fb27SDimitry Andric BaseRegOffsetBytes % 16 != 0) { 42515ffd83dbSDimitry Andric Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass); 42525ffd83dbSDimitry Andric emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg, 4253e8d8bef9SDimitry Andric StackOffset::getFixed(BaseRegOffsetBytes), TII); 42545ffd83dbSDimitry Andric BaseReg = ScratchReg; 42555ffd83dbSDimitry Andric BaseRegOffsetBytes = 0; 42565ffd83dbSDimitry Andric } 42575ffd83dbSDimitry Andric 42585ffd83dbSDimitry Andric MachineInstr *LastI = nullptr; 42595ffd83dbSDimitry Andric while (Size) { 42605ffd83dbSDimitry Andric int64_t InstrSize = (Size > 16) ? 32 : 16; 42615ffd83dbSDimitry Andric unsigned Opcode = 42625ffd83dbSDimitry Andric InstrSize == 16 426306c3fb27SDimitry Andric ? (ZeroData ? AArch64::STZGi : AArch64::STGi) 426406c3fb27SDimitry Andric : (ZeroData ? AArch64::STZ2Gi : AArch64::ST2Gi); 426506c3fb27SDimitry Andric assert(BaseRegOffsetBytes % 16 == 0); 42665ffd83dbSDimitry Andric MachineInstr *I = BuildMI(*MBB, InsertI, DL, TII->get(Opcode)) 42675ffd83dbSDimitry Andric .addReg(AArch64::SP) 42685ffd83dbSDimitry Andric .addReg(BaseReg) 42695ffd83dbSDimitry Andric .addImm(BaseRegOffsetBytes / 16) 42705ffd83dbSDimitry Andric .setMemRefs(CombinedMemRefs); 42715ffd83dbSDimitry Andric // A store to [BaseReg, #0] should go last for an opportunity to fold the 42725ffd83dbSDimitry Andric // final SP adjustment in the epilogue. 42735ffd83dbSDimitry Andric if (BaseRegOffsetBytes == 0) 42745ffd83dbSDimitry Andric LastI = I; 42755ffd83dbSDimitry Andric BaseRegOffsetBytes += InstrSize; 42765ffd83dbSDimitry Andric Size -= InstrSize; 42775ffd83dbSDimitry Andric } 42785ffd83dbSDimitry Andric 42795ffd83dbSDimitry Andric if (LastI) 42805ffd83dbSDimitry Andric MBB->splice(InsertI, MBB, LastI); 42815ffd83dbSDimitry Andric } 42825ffd83dbSDimitry Andric 42835ffd83dbSDimitry Andric void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) { 42845ffd83dbSDimitry Andric const AArch64InstrInfo *TII = 42855ffd83dbSDimitry Andric MF->getSubtarget<AArch64Subtarget>().getInstrInfo(); 42865ffd83dbSDimitry Andric 42875ffd83dbSDimitry Andric Register BaseReg = FrameRegUpdate 42885ffd83dbSDimitry Andric ? FrameReg 42895ffd83dbSDimitry Andric : MRI->createVirtualRegister(&AArch64::GPR64RegClass); 42905ffd83dbSDimitry Andric Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass); 42915ffd83dbSDimitry Andric 42925ffd83dbSDimitry Andric emitFrameOffset(*MBB, InsertI, DL, BaseReg, FrameReg, FrameRegOffset, TII); 42935ffd83dbSDimitry Andric 42945ffd83dbSDimitry Andric int64_t LoopSize = Size; 42955ffd83dbSDimitry Andric // If the loop size is not a multiple of 32, split off one 16-byte store at 42965ffd83dbSDimitry Andric // the end to fold BaseReg update into. 42975ffd83dbSDimitry Andric if (FrameRegUpdate && *FrameRegUpdate) 42985ffd83dbSDimitry Andric LoopSize -= LoopSize % 32; 42995ffd83dbSDimitry Andric MachineInstr *LoopI = BuildMI(*MBB, InsertI, DL, 43005ffd83dbSDimitry Andric TII->get(ZeroData ? AArch64::STZGloop_wback 43015ffd83dbSDimitry Andric : AArch64::STGloop_wback)) 43025ffd83dbSDimitry Andric .addDef(SizeReg) 43035ffd83dbSDimitry Andric .addDef(BaseReg) 43045ffd83dbSDimitry Andric .addImm(LoopSize) 43055ffd83dbSDimitry Andric .addReg(BaseReg) 43065ffd83dbSDimitry Andric .setMemRefs(CombinedMemRefs); 43075ffd83dbSDimitry Andric if (FrameRegUpdate) 43085ffd83dbSDimitry Andric LoopI->setFlags(FrameRegUpdateFlags); 43095ffd83dbSDimitry Andric 43105ffd83dbSDimitry Andric int64_t ExtraBaseRegUpdate = 4311e8d8bef9SDimitry Andric FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0; 43125ffd83dbSDimitry Andric if (LoopSize < Size) { 43135ffd83dbSDimitry Andric assert(FrameRegUpdate); 43145ffd83dbSDimitry Andric assert(Size - LoopSize == 16); 43155ffd83dbSDimitry Andric // Tag 16 more bytes at BaseReg and update BaseReg. 43165ffd83dbSDimitry Andric BuildMI(*MBB, InsertI, DL, 43175ffd83dbSDimitry Andric TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex)) 43185ffd83dbSDimitry Andric .addDef(BaseReg) 43195ffd83dbSDimitry Andric .addReg(BaseReg) 43205ffd83dbSDimitry Andric .addReg(BaseReg) 43215ffd83dbSDimitry Andric .addImm(1 + ExtraBaseRegUpdate / 16) 43225ffd83dbSDimitry Andric .setMemRefs(CombinedMemRefs) 43235ffd83dbSDimitry Andric .setMIFlags(FrameRegUpdateFlags); 43245ffd83dbSDimitry Andric } else if (ExtraBaseRegUpdate) { 43255ffd83dbSDimitry Andric // Update BaseReg. 43265ffd83dbSDimitry Andric BuildMI( 43275ffd83dbSDimitry Andric *MBB, InsertI, DL, 43285ffd83dbSDimitry Andric TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri)) 43295ffd83dbSDimitry Andric .addDef(BaseReg) 43305ffd83dbSDimitry Andric .addReg(BaseReg) 43315ffd83dbSDimitry Andric .addImm(std::abs(ExtraBaseRegUpdate)) 43325ffd83dbSDimitry Andric .addImm(0) 43335ffd83dbSDimitry Andric .setMIFlags(FrameRegUpdateFlags); 43345ffd83dbSDimitry Andric } 43355ffd83dbSDimitry Andric } 43365ffd83dbSDimitry Andric 43375ffd83dbSDimitry Andric // Check if *II is a register update that can be merged into STGloop that ends 43385ffd83dbSDimitry Andric // at (Reg + Size). RemainingOffset is the required adjustment to Reg after the 43395ffd83dbSDimitry Andric // end of the loop. 43405ffd83dbSDimitry Andric bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg, 43415ffd83dbSDimitry Andric int64_t Size, int64_t *TotalOffset) { 43425ffd83dbSDimitry Andric MachineInstr &MI = *II; 43435ffd83dbSDimitry Andric if ((MI.getOpcode() == AArch64::ADDXri || 43445ffd83dbSDimitry Andric MI.getOpcode() == AArch64::SUBXri) && 43455ffd83dbSDimitry Andric MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) { 43465ffd83dbSDimitry Andric unsigned Shift = AArch64_AM::getShiftValue(MI.getOperand(3).getImm()); 43475ffd83dbSDimitry Andric int64_t Offset = MI.getOperand(2).getImm() << Shift; 43485ffd83dbSDimitry Andric if (MI.getOpcode() == AArch64::SUBXri) 43495ffd83dbSDimitry Andric Offset = -Offset; 43505ffd83dbSDimitry Andric int64_t AbsPostOffset = std::abs(Offset - Size); 43515ffd83dbSDimitry Andric const int64_t kMaxOffset = 43525ffd83dbSDimitry Andric 0xFFF; // Max encoding for unshifted ADDXri / SUBXri 43535ffd83dbSDimitry Andric if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) { 43545ffd83dbSDimitry Andric *TotalOffset = Offset; 43555ffd83dbSDimitry Andric return true; 43565ffd83dbSDimitry Andric } 43575ffd83dbSDimitry Andric } 43585ffd83dbSDimitry Andric return false; 43595ffd83dbSDimitry Andric } 43605ffd83dbSDimitry Andric 43615ffd83dbSDimitry Andric void mergeMemRefs(const SmallVectorImpl<TagStoreInstr> &TSE, 43625ffd83dbSDimitry Andric SmallVectorImpl<MachineMemOperand *> &MemRefs) { 43635ffd83dbSDimitry Andric MemRefs.clear(); 43645ffd83dbSDimitry Andric for (auto &TS : TSE) { 43655ffd83dbSDimitry Andric MachineInstr *MI = TS.MI; 43665ffd83dbSDimitry Andric // An instruction without memory operands may access anything. Be 43675ffd83dbSDimitry Andric // conservative and return an empty list. 43685ffd83dbSDimitry Andric if (MI->memoperands_empty()) { 43695ffd83dbSDimitry Andric MemRefs.clear(); 43705ffd83dbSDimitry Andric return; 43715ffd83dbSDimitry Andric } 43725ffd83dbSDimitry Andric MemRefs.append(MI->memoperands_begin(), MI->memoperands_end()); 43735ffd83dbSDimitry Andric } 43745ffd83dbSDimitry Andric } 43755ffd83dbSDimitry Andric 43765ffd83dbSDimitry Andric void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI, 437781ad6265SDimitry Andric const AArch64FrameLowering *TFI, 437881ad6265SDimitry Andric bool TryMergeSPUpdate) { 43795ffd83dbSDimitry Andric if (TagStores.empty()) 43805ffd83dbSDimitry Andric return; 43815ffd83dbSDimitry Andric TagStoreInstr &FirstTagStore = TagStores[0]; 43825ffd83dbSDimitry Andric TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1]; 43835ffd83dbSDimitry Andric Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size; 43845ffd83dbSDimitry Andric DL = TagStores[0].MI->getDebugLoc(); 43855ffd83dbSDimitry Andric 43865ffd83dbSDimitry Andric Register Reg; 43875ffd83dbSDimitry Andric FrameRegOffset = TFI->resolveFrameOffsetReference( 43885ffd83dbSDimitry Andric *MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg, 43895ffd83dbSDimitry Andric /*PreferFP=*/false, /*ForSimm=*/true); 43905ffd83dbSDimitry Andric FrameReg = Reg; 4391bdd1243dSDimitry Andric FrameRegUpdate = std::nullopt; 43925ffd83dbSDimitry Andric 43935ffd83dbSDimitry Andric mergeMemRefs(TagStores, CombinedMemRefs); 43945ffd83dbSDimitry Andric 43950fca6ea1SDimitry Andric LLVM_DEBUG({ 43960fca6ea1SDimitry Andric dbgs() << "Replacing adjacent STG instructions:\n"; 43970fca6ea1SDimitry Andric for (const auto &Instr : TagStores) { 43980fca6ea1SDimitry Andric dbgs() << " " << *Instr.MI; 43990fca6ea1SDimitry Andric } 44000fca6ea1SDimitry Andric }); 44015ffd83dbSDimitry Andric 44025ffd83dbSDimitry Andric // Size threshold where a loop becomes shorter than a linear sequence of 44035ffd83dbSDimitry Andric // tagging instructions. 44045ffd83dbSDimitry Andric const int kSetTagLoopThreshold = 176; 44055ffd83dbSDimitry Andric if (Size < kSetTagLoopThreshold) { 44065ffd83dbSDimitry Andric if (TagStores.size() < 2) 44075ffd83dbSDimitry Andric return; 44085ffd83dbSDimitry Andric emitUnrolled(InsertI); 44095ffd83dbSDimitry Andric } else { 44105ffd83dbSDimitry Andric MachineInstr *UpdateInstr = nullptr; 441181ad6265SDimitry Andric int64_t TotalOffset = 0; 441281ad6265SDimitry Andric if (TryMergeSPUpdate) { 44135ffd83dbSDimitry Andric // See if we can merge base register update into the STGloop. 44145ffd83dbSDimitry Andric // This is done in AArch64LoadStoreOptimizer for "normal" stores, 44155ffd83dbSDimitry Andric // but STGloop is way too unusual for that, and also it only 44165ffd83dbSDimitry Andric // realistically happens in function epilogue. Also, STGloop is expanded 44175ffd83dbSDimitry Andric // before that pass. 44185ffd83dbSDimitry Andric if (InsertI != MBB->end() && 4419e8d8bef9SDimitry Andric canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size, 44205ffd83dbSDimitry Andric &TotalOffset)) { 44215ffd83dbSDimitry Andric UpdateInstr = &*InsertI++; 44225ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n " 44235ffd83dbSDimitry Andric << *UpdateInstr); 44245ffd83dbSDimitry Andric } 44255ffd83dbSDimitry Andric } 44265ffd83dbSDimitry Andric 44275ffd83dbSDimitry Andric if (!UpdateInstr && TagStores.size() < 2) 44285ffd83dbSDimitry Andric return; 44295ffd83dbSDimitry Andric 44305ffd83dbSDimitry Andric if (UpdateInstr) { 44315ffd83dbSDimitry Andric FrameRegUpdate = TotalOffset; 44325ffd83dbSDimitry Andric FrameRegUpdateFlags = UpdateInstr->getFlags(); 44335ffd83dbSDimitry Andric } 44345ffd83dbSDimitry Andric emitLoop(InsertI); 44355ffd83dbSDimitry Andric if (UpdateInstr) 44365ffd83dbSDimitry Andric UpdateInstr->eraseFromParent(); 44375ffd83dbSDimitry Andric } 44385ffd83dbSDimitry Andric 44395ffd83dbSDimitry Andric for (auto &TS : TagStores) 44405ffd83dbSDimitry Andric TS.MI->eraseFromParent(); 44415ffd83dbSDimitry Andric } 44425ffd83dbSDimitry Andric 44435ffd83dbSDimitry Andric bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset, 44445ffd83dbSDimitry Andric int64_t &Size, bool &ZeroData) { 44455ffd83dbSDimitry Andric MachineFunction &MF = *MI.getParent()->getParent(); 44465ffd83dbSDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 44475ffd83dbSDimitry Andric 44485ffd83dbSDimitry Andric unsigned Opcode = MI.getOpcode(); 444906c3fb27SDimitry Andric ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGi || 445006c3fb27SDimitry Andric Opcode == AArch64::STZ2Gi); 44515ffd83dbSDimitry Andric 44525ffd83dbSDimitry Andric if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) { 44535ffd83dbSDimitry Andric if (!MI.getOperand(0).isDead() || !MI.getOperand(1).isDead()) 44545ffd83dbSDimitry Andric return false; 44555ffd83dbSDimitry Andric if (!MI.getOperand(2).isImm() || !MI.getOperand(3).isFI()) 44565ffd83dbSDimitry Andric return false; 44575ffd83dbSDimitry Andric Offset = MFI.getObjectOffset(MI.getOperand(3).getIndex()); 44585ffd83dbSDimitry Andric Size = MI.getOperand(2).getImm(); 44595ffd83dbSDimitry Andric return true; 44605ffd83dbSDimitry Andric } 44615ffd83dbSDimitry Andric 446206c3fb27SDimitry Andric if (Opcode == AArch64::STGi || Opcode == AArch64::STZGi) 44635ffd83dbSDimitry Andric Size = 16; 446406c3fb27SDimitry Andric else if (Opcode == AArch64::ST2Gi || Opcode == AArch64::STZ2Gi) 44655ffd83dbSDimitry Andric Size = 32; 44665ffd83dbSDimitry Andric else 44675ffd83dbSDimitry Andric return false; 44685ffd83dbSDimitry Andric 44695ffd83dbSDimitry Andric if (MI.getOperand(0).getReg() != AArch64::SP || !MI.getOperand(1).isFI()) 44705ffd83dbSDimitry Andric return false; 44715ffd83dbSDimitry Andric 44725ffd83dbSDimitry Andric Offset = MFI.getObjectOffset(MI.getOperand(1).getIndex()) + 44735ffd83dbSDimitry Andric 16 * MI.getOperand(2).getImm(); 44745ffd83dbSDimitry Andric return true; 44755ffd83dbSDimitry Andric } 44765ffd83dbSDimitry Andric 44775ffd83dbSDimitry Andric // Detect a run of memory tagging instructions for adjacent stack frame slots, 44785ffd83dbSDimitry Andric // and replace them with a shorter instruction sequence: 44795ffd83dbSDimitry Andric // * replace STG + STG with ST2G 44805ffd83dbSDimitry Andric // * replace STGloop + STGloop with STGloop 44815ffd83dbSDimitry Andric // This code needs to run when stack slot offsets are already known, but before 44825ffd83dbSDimitry Andric // FrameIndex operands in STG instructions are eliminated. 44835ffd83dbSDimitry Andric MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II, 44845ffd83dbSDimitry Andric const AArch64FrameLowering *TFI, 44855ffd83dbSDimitry Andric RegScavenger *RS) { 44865ffd83dbSDimitry Andric bool FirstZeroData; 44875ffd83dbSDimitry Andric int64_t Size, Offset; 44885ffd83dbSDimitry Andric MachineInstr &MI = *II; 44895ffd83dbSDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 44905ffd83dbSDimitry Andric MachineBasicBlock::iterator NextI = ++II; 44915ffd83dbSDimitry Andric if (&MI == &MBB->instr_back()) 44925ffd83dbSDimitry Andric return II; 44935ffd83dbSDimitry Andric if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData)) 44945ffd83dbSDimitry Andric return II; 44955ffd83dbSDimitry Andric 44965ffd83dbSDimitry Andric SmallVector<TagStoreInstr, 4> Instrs; 44975ffd83dbSDimitry Andric Instrs.emplace_back(&MI, Offset, Size); 44985ffd83dbSDimitry Andric 44995ffd83dbSDimitry Andric constexpr int kScanLimit = 10; 45005ffd83dbSDimitry Andric int Count = 0; 45015ffd83dbSDimitry Andric for (MachineBasicBlock::iterator E = MBB->end(); 45025ffd83dbSDimitry Andric NextI != E && Count < kScanLimit; ++NextI) { 45035ffd83dbSDimitry Andric MachineInstr &MI = *NextI; 45045ffd83dbSDimitry Andric bool ZeroData; 45055ffd83dbSDimitry Andric int64_t Size, Offset; 45065ffd83dbSDimitry Andric // Collect instructions that update memory tags with a FrameIndex operand 45075ffd83dbSDimitry Andric // and (when applicable) constant size, and whose output registers are dead 45085ffd83dbSDimitry Andric // (the latter is almost always the case in practice). Since these 45095ffd83dbSDimitry Andric // instructions effectively have no inputs or outputs, we are free to skip 45105ffd83dbSDimitry Andric // any non-aliasing instructions in between without tracking used registers. 45115ffd83dbSDimitry Andric if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) { 45125ffd83dbSDimitry Andric if (ZeroData != FirstZeroData) 45135ffd83dbSDimitry Andric break; 45145ffd83dbSDimitry Andric Instrs.emplace_back(&MI, Offset, Size); 45155ffd83dbSDimitry Andric continue; 45165ffd83dbSDimitry Andric } 45175ffd83dbSDimitry Andric 45185ffd83dbSDimitry Andric // Only count non-transient, non-tagging instructions toward the scan 45195ffd83dbSDimitry Andric // limit. 45205ffd83dbSDimitry Andric if (!MI.isTransient()) 45215ffd83dbSDimitry Andric ++Count; 45225ffd83dbSDimitry Andric 45235ffd83dbSDimitry Andric // Just in case, stop before the epilogue code starts. 45245ffd83dbSDimitry Andric if (MI.getFlag(MachineInstr::FrameSetup) || 45255ffd83dbSDimitry Andric MI.getFlag(MachineInstr::FrameDestroy)) 45265ffd83dbSDimitry Andric break; 45275ffd83dbSDimitry Andric 45285ffd83dbSDimitry Andric // Reject anything that may alias the collected instructions. 45295ffd83dbSDimitry Andric if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects()) 45305ffd83dbSDimitry Andric break; 45315ffd83dbSDimitry Andric } 45325ffd83dbSDimitry Andric 45335ffd83dbSDimitry Andric // New code will be inserted after the last tagging instruction we've found. 45345ffd83dbSDimitry Andric MachineBasicBlock::iterator InsertI = Instrs.back().MI; 45355f757f3fSDimitry Andric 45365f757f3fSDimitry Andric // All the gathered stack tag instructions are merged and placed after 45375f757f3fSDimitry Andric // last tag store in the list. The check should be made if the nzcv 45385f757f3fSDimitry Andric // flag is live at the point where we are trying to insert. Otherwise 45395f757f3fSDimitry Andric // the nzcv flag might get clobbered if any stg loops are present. 45405f757f3fSDimitry Andric 45415f757f3fSDimitry Andric // FIXME : This approach of bailing out from merge is conservative in 45425f757f3fSDimitry Andric // some ways like even if stg loops are not present after merge the 45435f757f3fSDimitry Andric // insert list, this liveness check is done (which is not needed). 45445f757f3fSDimitry Andric LivePhysRegs LiveRegs(*(MBB->getParent()->getSubtarget().getRegisterInfo())); 45455f757f3fSDimitry Andric LiveRegs.addLiveOuts(*MBB); 45465f757f3fSDimitry Andric for (auto I = MBB->rbegin();; ++I) { 45475f757f3fSDimitry Andric MachineInstr &MI = *I; 45485f757f3fSDimitry Andric if (MI == InsertI) 45495f757f3fSDimitry Andric break; 45505f757f3fSDimitry Andric LiveRegs.stepBackward(*I); 45515f757f3fSDimitry Andric } 45525ffd83dbSDimitry Andric InsertI++; 45535f757f3fSDimitry Andric if (LiveRegs.contains(AArch64::NZCV)) 45545f757f3fSDimitry Andric return InsertI; 45555ffd83dbSDimitry Andric 45565ffd83dbSDimitry Andric llvm::stable_sort(Instrs, 45575ffd83dbSDimitry Andric [](const TagStoreInstr &Left, const TagStoreInstr &Right) { 45585ffd83dbSDimitry Andric return Left.Offset < Right.Offset; 45595ffd83dbSDimitry Andric }); 45605ffd83dbSDimitry Andric 45615ffd83dbSDimitry Andric // Make sure that we don't have any overlapping stores. 45625ffd83dbSDimitry Andric int64_t CurOffset = Instrs[0].Offset; 45635ffd83dbSDimitry Andric for (auto &Instr : Instrs) { 45645ffd83dbSDimitry Andric if (CurOffset > Instr.Offset) 45655ffd83dbSDimitry Andric return NextI; 45665ffd83dbSDimitry Andric CurOffset = Instr.Offset + Instr.Size; 45675ffd83dbSDimitry Andric } 45685ffd83dbSDimitry Andric 45695ffd83dbSDimitry Andric // Find contiguous runs of tagged memory and emit shorter instruction 45705ffd83dbSDimitry Andric // sequencies for them when possible. 45715ffd83dbSDimitry Andric TagStoreEdit TSE(MBB, FirstZeroData); 4572bdd1243dSDimitry Andric std::optional<int64_t> EndOffset; 45735ffd83dbSDimitry Andric for (auto &Instr : Instrs) { 45745ffd83dbSDimitry Andric if (EndOffset && *EndOffset != Instr.Offset) { 45755ffd83dbSDimitry Andric // Found a gap. 457681ad6265SDimitry Andric TSE.emitCode(InsertI, TFI, /*TryMergeSPUpdate = */ false); 45775ffd83dbSDimitry Andric TSE.clear(); 45785ffd83dbSDimitry Andric } 45795ffd83dbSDimitry Andric 45805ffd83dbSDimitry Andric TSE.addInstruction(Instr); 45815ffd83dbSDimitry Andric EndOffset = Instr.Offset + Instr.Size; 45825ffd83dbSDimitry Andric } 45835ffd83dbSDimitry Andric 4584bdd1243dSDimitry Andric const MachineFunction *MF = MBB->getParent(); 458581ad6265SDimitry Andric // Multiple FP/SP updates in a loop cannot be described by CFI instructions. 4586bdd1243dSDimitry Andric TSE.emitCode( 4587bdd1243dSDimitry Andric InsertI, TFI, /*TryMergeSPUpdate = */ 4588bdd1243dSDimitry Andric !MF->getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(*MF)); 45895ffd83dbSDimitry Andric 45905ffd83dbSDimitry Andric return InsertI; 45915ffd83dbSDimitry Andric } 45925ffd83dbSDimitry Andric } // namespace 45935ffd83dbSDimitry Andric 45940fca6ea1SDimitry Andric MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II, 45950fca6ea1SDimitry Andric const AArch64FrameLowering *TFI) { 45960fca6ea1SDimitry Andric MachineInstr &MI = *II; 45970fca6ea1SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 45980fca6ea1SDimitry Andric MachineFunction *MF = MBB->getParent(); 45990fca6ea1SDimitry Andric 46000fca6ea1SDimitry Andric if (MI.getOpcode() != AArch64::VGSavePseudo && 46010fca6ea1SDimitry Andric MI.getOpcode() != AArch64::VGRestorePseudo) 46020fca6ea1SDimitry Andric return II; 46030fca6ea1SDimitry Andric 46040fca6ea1SDimitry Andric SMEAttrs FuncAttrs(MF->getFunction()); 46050fca6ea1SDimitry Andric bool LocallyStreaming = 46060fca6ea1SDimitry Andric FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface(); 46070fca6ea1SDimitry Andric const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>(); 46080fca6ea1SDimitry Andric const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 46090fca6ea1SDimitry Andric const AArch64InstrInfo *TII = 46100fca6ea1SDimitry Andric MF->getSubtarget<AArch64Subtarget>().getInstrInfo(); 46110fca6ea1SDimitry Andric 46120fca6ea1SDimitry Andric int64_t VGFrameIdx = 46130fca6ea1SDimitry Andric LocallyStreaming ? AFI->getStreamingVGIdx() : AFI->getVGIdx(); 46140fca6ea1SDimitry Andric assert(VGFrameIdx != std::numeric_limits<int>::max() && 46150fca6ea1SDimitry Andric "Expected FrameIdx for VG"); 46160fca6ea1SDimitry Andric 46170fca6ea1SDimitry Andric unsigned CFIIndex; 46180fca6ea1SDimitry Andric if (MI.getOpcode() == AArch64::VGSavePseudo) { 46190fca6ea1SDimitry Andric const MachineFrameInfo &MFI = MF->getFrameInfo(); 46200fca6ea1SDimitry Andric int64_t Offset = 46210fca6ea1SDimitry Andric MFI.getObjectOffset(VGFrameIdx) - TFI->getOffsetOfLocalArea(); 46220fca6ea1SDimitry Andric CFIIndex = MF->addFrameInst(MCCFIInstruction::createOffset( 46230fca6ea1SDimitry Andric nullptr, TRI->getDwarfRegNum(AArch64::VG, true), Offset)); 46240fca6ea1SDimitry Andric } else 46250fca6ea1SDimitry Andric CFIIndex = MF->addFrameInst(MCCFIInstruction::createRestore( 46260fca6ea1SDimitry Andric nullptr, TRI->getDwarfRegNum(AArch64::VG, true))); 46270fca6ea1SDimitry Andric 46280fca6ea1SDimitry Andric MachineInstr *UnwindInst = BuildMI(*MBB, II, II->getDebugLoc(), 46290fca6ea1SDimitry Andric TII->get(TargetOpcode::CFI_INSTRUCTION)) 46300fca6ea1SDimitry Andric .addCFIIndex(CFIIndex); 46310fca6ea1SDimitry Andric 46320fca6ea1SDimitry Andric MI.eraseFromParent(); 46330fca6ea1SDimitry Andric return UnwindInst->getIterator(); 46340fca6ea1SDimitry Andric } 46350fca6ea1SDimitry Andric 46365ffd83dbSDimitry Andric void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced( 46375ffd83dbSDimitry Andric MachineFunction &MF, RegScavenger *RS = nullptr) const { 46380fca6ea1SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 46395ffd83dbSDimitry Andric for (auto &BB : MF) 46400fca6ea1SDimitry Andric for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) { 46410fca6ea1SDimitry Andric if (AFI->hasStreamingModeChanges()) 46420fca6ea1SDimitry Andric II = emitVGSaveRestore(II, this); 46430fca6ea1SDimitry Andric if (StackTaggingMergeSetTag) 46445ffd83dbSDimitry Andric II = tryMergeAdjacentSTG(II, this, RS); 46455ffd83dbSDimitry Andric } 46460fca6ea1SDimitry Andric } 46475ffd83dbSDimitry Andric 46485ffd83dbSDimitry Andric /// For Win64 AArch64 EH, the offset to the Unwind object is from the SP 46495ffd83dbSDimitry Andric /// before the update. This is easily retrieved as it is exactly the offset 46505ffd83dbSDimitry Andric /// that is set in processFunctionBeforeFrameFinalized. 4651e8d8bef9SDimitry Andric StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP( 46525ffd83dbSDimitry Andric const MachineFunction &MF, int FI, Register &FrameReg, 46530b57cec5SDimitry Andric bool IgnoreSPUpdates) const { 46540b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 465562cfcf62SDimitry Andric if (IgnoreSPUpdates) { 46560b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is " 46570b57cec5SDimitry Andric << MFI.getObjectOffset(FI) << "\n"); 46580b57cec5SDimitry Andric FrameReg = AArch64::SP; 4659e8d8bef9SDimitry Andric return StackOffset::getFixed(MFI.getObjectOffset(FI)); 46600b57cec5SDimitry Andric } 46610b57cec5SDimitry Andric 4662349cc55cSDimitry Andric // Go to common code if we cannot provide sp + offset. 4663349cc55cSDimitry Andric if (MFI.hasVarSizedObjects() || 4664349cc55cSDimitry Andric MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() || 4665349cc55cSDimitry Andric MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF)) 466662cfcf62SDimitry Andric return getFrameIndexReference(MF, FI, FrameReg); 4667349cc55cSDimitry Andric 4668349cc55cSDimitry Andric FrameReg = AArch64::SP; 4669349cc55cSDimitry Andric return getStackOffset(MF, MFI.getObjectOffset(FI)); 467062cfcf62SDimitry Andric } 467162cfcf62SDimitry Andric 46720b57cec5SDimitry Andric /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve 46730b57cec5SDimitry Andric /// the parent's frame pointer 46740b57cec5SDimitry Andric unsigned AArch64FrameLowering::getWinEHParentFrameOffset( 46750b57cec5SDimitry Andric const MachineFunction &MF) const { 46760b57cec5SDimitry Andric return 0; 46770b57cec5SDimitry Andric } 46780b57cec5SDimitry Andric 46790b57cec5SDimitry Andric /// Funclets only need to account for space for the callee saved registers, 46800b57cec5SDimitry Andric /// as the locals are accounted for in the parent's stack frame. 46810b57cec5SDimitry Andric unsigned AArch64FrameLowering::getWinEHFuncletFrameSize( 46820b57cec5SDimitry Andric const MachineFunction &MF) const { 46830b57cec5SDimitry Andric // This is the size of the pushed CSRs. 46840b57cec5SDimitry Andric unsigned CSSize = 46850b57cec5SDimitry Andric MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize(); 46860b57cec5SDimitry Andric // This is the amount of stack a funclet needs to allocate. 46870b57cec5SDimitry Andric return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(), 46885ffd83dbSDimitry Andric getStackAlign()); 46890b57cec5SDimitry Andric } 4690e8d8bef9SDimitry Andric 4691e8d8bef9SDimitry Andric namespace { 4692e8d8bef9SDimitry Andric struct FrameObject { 4693e8d8bef9SDimitry Andric bool IsValid = false; 4694e8d8bef9SDimitry Andric // Index of the object in MFI. 4695e8d8bef9SDimitry Andric int ObjectIndex = 0; 4696e8d8bef9SDimitry Andric // Group ID this object belongs to. 4697e8d8bef9SDimitry Andric int GroupIndex = -1; 4698e8d8bef9SDimitry Andric // This object should be placed first (closest to SP). 4699e8d8bef9SDimitry Andric bool ObjectFirst = false; 4700e8d8bef9SDimitry Andric // This object's group (which always contains the object with 4701e8d8bef9SDimitry Andric // ObjectFirst==true) should be placed first. 4702e8d8bef9SDimitry Andric bool GroupFirst = false; 47030fca6ea1SDimitry Andric 47040fca6ea1SDimitry Andric // Used to distinguish between FP and GPR accesses. The values are decided so 47050fca6ea1SDimitry Andric // that they sort FPR < Hazard < GPR and they can be or'd together. 47060fca6ea1SDimitry Andric unsigned Accesses = 0; 47070fca6ea1SDimitry Andric enum { AccessFPR = 1, AccessHazard = 2, AccessGPR = 4 }; 4708e8d8bef9SDimitry Andric }; 4709e8d8bef9SDimitry Andric 4710e8d8bef9SDimitry Andric class GroupBuilder { 4711e8d8bef9SDimitry Andric SmallVector<int, 8> CurrentMembers; 4712e8d8bef9SDimitry Andric int NextGroupIndex = 0; 4713e8d8bef9SDimitry Andric std::vector<FrameObject> &Objects; 4714e8d8bef9SDimitry Andric 4715e8d8bef9SDimitry Andric public: 4716e8d8bef9SDimitry Andric GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {} 4717e8d8bef9SDimitry Andric void AddMember(int Index) { CurrentMembers.push_back(Index); } 4718e8d8bef9SDimitry Andric void EndCurrentGroup() { 4719e8d8bef9SDimitry Andric if (CurrentMembers.size() > 1) { 4720e8d8bef9SDimitry Andric // Create a new group with the current member list. This might remove them 4721e8d8bef9SDimitry Andric // from their pre-existing groups. That's OK, dealing with overlapping 4722e8d8bef9SDimitry Andric // groups is too hard and unlikely to make a difference. 4723e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "group:"); 4724e8d8bef9SDimitry Andric for (int Index : CurrentMembers) { 4725e8d8bef9SDimitry Andric Objects[Index].GroupIndex = NextGroupIndex; 4726e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << " " << Index); 4727e8d8bef9SDimitry Andric } 4728e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 4729e8d8bef9SDimitry Andric NextGroupIndex++; 4730e8d8bef9SDimitry Andric } 4731e8d8bef9SDimitry Andric CurrentMembers.clear(); 4732e8d8bef9SDimitry Andric } 4733e8d8bef9SDimitry Andric }; 4734e8d8bef9SDimitry Andric 4735e8d8bef9SDimitry Andric bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) { 4736e8d8bef9SDimitry Andric // Objects at a lower index are closer to FP; objects at a higher index are 4737e8d8bef9SDimitry Andric // closer to SP. 4738e8d8bef9SDimitry Andric // 4739e8d8bef9SDimitry Andric // For consistency in our comparison, all invalid objects are placed 4740e8d8bef9SDimitry Andric // at the end. This also allows us to stop walking when we hit the 4741e8d8bef9SDimitry Andric // first invalid item after it's all sorted. 4742e8d8bef9SDimitry Andric // 47430fca6ea1SDimitry Andric // If we want to include a stack hazard region, order FPR accesses < the 47440fca6ea1SDimitry Andric // hazard object < GPRs accesses in order to create a separation between the 47450fca6ea1SDimitry Andric // two. For the Accesses field 1 = FPR, 2 = Hazard Object, 4 = GPR. 47460fca6ea1SDimitry Andric // 47470fca6ea1SDimitry Andric // Otherwise the "first" object goes first (closest to SP), followed by the 47480fca6ea1SDimitry Andric // members of the "first" group. 4749e8d8bef9SDimitry Andric // 4750e8d8bef9SDimitry Andric // The rest are sorted by the group index to keep the groups together. 4751e8d8bef9SDimitry Andric // Higher numbered groups are more likely to be around longer (i.e. untagged 4752e8d8bef9SDimitry Andric // in the function epilogue and not at some earlier point). Place them closer 4753e8d8bef9SDimitry Andric // to SP. 4754e8d8bef9SDimitry Andric // 4755e8d8bef9SDimitry Andric // If all else equal, sort by the object index to keep the objects in the 4756e8d8bef9SDimitry Andric // original order. 47570fca6ea1SDimitry Andric return std::make_tuple(!A.IsValid, A.Accesses, A.ObjectFirst, A.GroupFirst, 47580fca6ea1SDimitry Andric A.GroupIndex, A.ObjectIndex) < 47590fca6ea1SDimitry Andric std::make_tuple(!B.IsValid, B.Accesses, B.ObjectFirst, B.GroupFirst, 47600fca6ea1SDimitry Andric B.GroupIndex, B.ObjectIndex); 4761e8d8bef9SDimitry Andric } 4762e8d8bef9SDimitry Andric } // namespace 4763e8d8bef9SDimitry Andric 4764e8d8bef9SDimitry Andric void AArch64FrameLowering::orderFrameObjects( 4765e8d8bef9SDimitry Andric const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const { 4766e8d8bef9SDimitry Andric if (!OrderFrameObjects || ObjectsToAllocate.empty()) 4767e8d8bef9SDimitry Andric return; 4768e8d8bef9SDimitry Andric 47690fca6ea1SDimitry Andric const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); 4770e8d8bef9SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 4771e8d8bef9SDimitry Andric std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd()); 4772e8d8bef9SDimitry Andric for (auto &Obj : ObjectsToAllocate) { 4773e8d8bef9SDimitry Andric FrameObjects[Obj].IsValid = true; 4774e8d8bef9SDimitry Andric FrameObjects[Obj].ObjectIndex = Obj; 4775e8d8bef9SDimitry Andric } 4776e8d8bef9SDimitry Andric 47770fca6ea1SDimitry Andric // Identify FPR vs GPR slots for hazards, and stack slots that are tagged at 47780fca6ea1SDimitry Andric // the same time. 4779e8d8bef9SDimitry Andric GroupBuilder GB(FrameObjects); 4780e8d8bef9SDimitry Andric for (auto &MBB : MF) { 4781e8d8bef9SDimitry Andric for (auto &MI : MBB) { 4782e8d8bef9SDimitry Andric if (MI.isDebugInstr()) 4783e8d8bef9SDimitry Andric continue; 47840fca6ea1SDimitry Andric 47850fca6ea1SDimitry Andric if (AFI.hasStackHazardSlotIndex()) { 47860fca6ea1SDimitry Andric std::optional<int> FI = getLdStFrameID(MI, MFI); 47870fca6ea1SDimitry Andric if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) { 47880fca6ea1SDimitry Andric if (MFI.getStackID(*FI) == TargetStackID::ScalableVector || 47890fca6ea1SDimitry Andric AArch64InstrInfo::isFpOrNEON(MI)) 47900fca6ea1SDimitry Andric FrameObjects[*FI].Accesses |= FrameObject::AccessFPR; 47910fca6ea1SDimitry Andric else 47920fca6ea1SDimitry Andric FrameObjects[*FI].Accesses |= FrameObject::AccessGPR; 47930fca6ea1SDimitry Andric } 47940fca6ea1SDimitry Andric } 47950fca6ea1SDimitry Andric 4796e8d8bef9SDimitry Andric int OpIndex; 4797e8d8bef9SDimitry Andric switch (MI.getOpcode()) { 4798e8d8bef9SDimitry Andric case AArch64::STGloop: 4799e8d8bef9SDimitry Andric case AArch64::STZGloop: 4800e8d8bef9SDimitry Andric OpIndex = 3; 4801e8d8bef9SDimitry Andric break; 480206c3fb27SDimitry Andric case AArch64::STGi: 480306c3fb27SDimitry Andric case AArch64::STZGi: 480406c3fb27SDimitry Andric case AArch64::ST2Gi: 480506c3fb27SDimitry Andric case AArch64::STZ2Gi: 4806e8d8bef9SDimitry Andric OpIndex = 1; 4807e8d8bef9SDimitry Andric break; 4808e8d8bef9SDimitry Andric default: 4809e8d8bef9SDimitry Andric OpIndex = -1; 4810e8d8bef9SDimitry Andric } 4811e8d8bef9SDimitry Andric 4812e8d8bef9SDimitry Andric int TaggedFI = -1; 4813e8d8bef9SDimitry Andric if (OpIndex >= 0) { 4814e8d8bef9SDimitry Andric const MachineOperand &MO = MI.getOperand(OpIndex); 4815e8d8bef9SDimitry Andric if (MO.isFI()) { 4816e8d8bef9SDimitry Andric int FI = MO.getIndex(); 4817e8d8bef9SDimitry Andric if (FI >= 0 && FI < MFI.getObjectIndexEnd() && 4818e8d8bef9SDimitry Andric FrameObjects[FI].IsValid) 4819e8d8bef9SDimitry Andric TaggedFI = FI; 4820e8d8bef9SDimitry Andric } 4821e8d8bef9SDimitry Andric } 4822e8d8bef9SDimitry Andric 4823e8d8bef9SDimitry Andric // If this is a stack tagging instruction for a slot that is not part of a 4824e8d8bef9SDimitry Andric // group yet, either start a new group or add it to the current one. 4825e8d8bef9SDimitry Andric if (TaggedFI >= 0) 4826e8d8bef9SDimitry Andric GB.AddMember(TaggedFI); 4827e8d8bef9SDimitry Andric else 4828e8d8bef9SDimitry Andric GB.EndCurrentGroup(); 4829e8d8bef9SDimitry Andric } 4830e8d8bef9SDimitry Andric // Groups should never span multiple basic blocks. 4831e8d8bef9SDimitry Andric GB.EndCurrentGroup(); 4832e8d8bef9SDimitry Andric } 4833e8d8bef9SDimitry Andric 48340fca6ea1SDimitry Andric if (AFI.hasStackHazardSlotIndex()) { 48350fca6ea1SDimitry Andric FrameObjects[AFI.getStackHazardSlotIndex()].Accesses = 48360fca6ea1SDimitry Andric FrameObject::AccessHazard; 48370fca6ea1SDimitry Andric // If a stack object is unknown or both GPR and FPR, sort it into GPR. 48380fca6ea1SDimitry Andric for (auto &Obj : FrameObjects) 48390fca6ea1SDimitry Andric if (!Obj.Accesses || 48400fca6ea1SDimitry Andric Obj.Accesses == (FrameObject::AccessGPR | FrameObject::AccessFPR)) 48410fca6ea1SDimitry Andric Obj.Accesses = FrameObject::AccessGPR; 48420fca6ea1SDimitry Andric } 48430fca6ea1SDimitry Andric 4844e8d8bef9SDimitry Andric // If the function's tagged base pointer is pinned to a stack slot, we want to 4845e8d8bef9SDimitry Andric // put that slot first when possible. This will likely place it at SP + 0, 4846e8d8bef9SDimitry Andric // and save one instruction when generating the base pointer because IRG does 4847e8d8bef9SDimitry Andric // not allow an immediate offset. 4848bdd1243dSDimitry Andric std::optional<int> TBPI = AFI.getTaggedBasePointerIndex(); 4849e8d8bef9SDimitry Andric if (TBPI) { 4850e8d8bef9SDimitry Andric FrameObjects[*TBPI].ObjectFirst = true; 4851e8d8bef9SDimitry Andric FrameObjects[*TBPI].GroupFirst = true; 4852e8d8bef9SDimitry Andric int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex; 4853e8d8bef9SDimitry Andric if (FirstGroupIndex >= 0) 4854e8d8bef9SDimitry Andric for (FrameObject &Object : FrameObjects) 4855e8d8bef9SDimitry Andric if (Object.GroupIndex == FirstGroupIndex) 4856e8d8bef9SDimitry Andric Object.GroupFirst = true; 4857e8d8bef9SDimitry Andric } 4858e8d8bef9SDimitry Andric 4859e8d8bef9SDimitry Andric llvm::stable_sort(FrameObjects, FrameObjectCompare); 4860e8d8bef9SDimitry Andric 4861e8d8bef9SDimitry Andric int i = 0; 4862e8d8bef9SDimitry Andric for (auto &Obj : FrameObjects) { 4863e8d8bef9SDimitry Andric // All invalid items are sorted at the end, so it's safe to stop. 4864e8d8bef9SDimitry Andric if (!Obj.IsValid) 4865e8d8bef9SDimitry Andric break; 4866e8d8bef9SDimitry Andric ObjectsToAllocate[i++] = Obj.ObjectIndex; 4867e8d8bef9SDimitry Andric } 4868e8d8bef9SDimitry Andric 48690fca6ea1SDimitry Andric LLVM_DEBUG({ 48700fca6ea1SDimitry Andric dbgs() << "Final frame order:\n"; 48710fca6ea1SDimitry Andric for (auto &Obj : FrameObjects) { 4872e8d8bef9SDimitry Andric if (!Obj.IsValid) 4873e8d8bef9SDimitry Andric break; 4874e8d8bef9SDimitry Andric dbgs() << " " << Obj.ObjectIndex << ": group " << Obj.GroupIndex; 4875e8d8bef9SDimitry Andric if (Obj.ObjectFirst) 4876e8d8bef9SDimitry Andric dbgs() << ", first"; 4877e8d8bef9SDimitry Andric if (Obj.GroupFirst) 4878e8d8bef9SDimitry Andric dbgs() << ", group-first"; 4879e8d8bef9SDimitry Andric dbgs() << "\n"; 48800fca6ea1SDimitry Andric } 4881e8d8bef9SDimitry Andric }); 4882e8d8bef9SDimitry Andric } 48835f757f3fSDimitry Andric 48845f757f3fSDimitry Andric /// Emit a loop to decrement SP until it is equal to TargetReg, with probes at 48855f757f3fSDimitry Andric /// least every ProbeSize bytes. Returns an iterator of the first instruction 48865f757f3fSDimitry Andric /// after the loop. The difference between SP and TargetReg must be an exact 48875f757f3fSDimitry Andric /// multiple of ProbeSize. 48885f757f3fSDimitry Andric MachineBasicBlock::iterator 48895f757f3fSDimitry Andric AArch64FrameLowering::inlineStackProbeLoopExactMultiple( 48905f757f3fSDimitry Andric MachineBasicBlock::iterator MBBI, int64_t ProbeSize, 48915f757f3fSDimitry Andric Register TargetReg) const { 48925f757f3fSDimitry Andric MachineBasicBlock &MBB = *MBBI->getParent(); 48935f757f3fSDimitry Andric MachineFunction &MF = *MBB.getParent(); 48945f757f3fSDimitry Andric const AArch64InstrInfo *TII = 48955f757f3fSDimitry Andric MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); 48965f757f3fSDimitry Andric DebugLoc DL = MBB.findDebugLoc(MBBI); 48975f757f3fSDimitry Andric 48985f757f3fSDimitry Andric MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); 48995f757f3fSDimitry Andric MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock()); 49005f757f3fSDimitry Andric MF.insert(MBBInsertPoint, LoopMBB); 49015f757f3fSDimitry Andric MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock()); 49025f757f3fSDimitry Andric MF.insert(MBBInsertPoint, ExitMBB); 49035f757f3fSDimitry Andric 49045f757f3fSDimitry Andric // SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not encodable 49055f757f3fSDimitry Andric // in SUB). 49065f757f3fSDimitry Andric emitFrameOffset(*LoopMBB, LoopMBB->end(), DL, AArch64::SP, AArch64::SP, 49075f757f3fSDimitry Andric StackOffset::getFixed(-ProbeSize), TII, 49085f757f3fSDimitry Andric MachineInstr::FrameSetup); 49095f757f3fSDimitry Andric // STR XZR, [SP] 49105f757f3fSDimitry Andric BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::STRXui)) 49115f757f3fSDimitry Andric .addReg(AArch64::XZR) 49125f757f3fSDimitry Andric .addReg(AArch64::SP) 49135f757f3fSDimitry Andric .addImm(0) 49145f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 49155f757f3fSDimitry Andric // CMP SP, TargetReg 49165f757f3fSDimitry Andric BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::SUBSXrx64), 49175f757f3fSDimitry Andric AArch64::XZR) 49185f757f3fSDimitry Andric .addReg(AArch64::SP) 49195f757f3fSDimitry Andric .addReg(TargetReg) 49205f757f3fSDimitry Andric .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0)) 49215f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 49225f757f3fSDimitry Andric // B.CC Loop 49235f757f3fSDimitry Andric BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::Bcc)) 49245f757f3fSDimitry Andric .addImm(AArch64CC::NE) 49255f757f3fSDimitry Andric .addMBB(LoopMBB) 49265f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 49275f757f3fSDimitry Andric 49285f757f3fSDimitry Andric LoopMBB->addSuccessor(ExitMBB); 49295f757f3fSDimitry Andric LoopMBB->addSuccessor(LoopMBB); 49305f757f3fSDimitry Andric // Synthesize the exit MBB. 49315f757f3fSDimitry Andric ExitMBB->splice(ExitMBB->end(), &MBB, MBBI, MBB.end()); 49325f757f3fSDimitry Andric ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 49335f757f3fSDimitry Andric MBB.addSuccessor(LoopMBB); 49345f757f3fSDimitry Andric // Update liveins. 49350fca6ea1SDimitry Andric fullyRecomputeLiveIns({ExitMBB, LoopMBB}); 49365f757f3fSDimitry Andric 49375f757f3fSDimitry Andric return ExitMBB->begin(); 49385f757f3fSDimitry Andric } 49395f757f3fSDimitry Andric 49405f757f3fSDimitry Andric void AArch64FrameLowering::inlineStackProbeFixed( 49415f757f3fSDimitry Andric MachineBasicBlock::iterator MBBI, Register ScratchReg, int64_t FrameSize, 49425f757f3fSDimitry Andric StackOffset CFAOffset) const { 49435f757f3fSDimitry Andric MachineBasicBlock *MBB = MBBI->getParent(); 49445f757f3fSDimitry Andric MachineFunction &MF = *MBB->getParent(); 49455f757f3fSDimitry Andric const AArch64InstrInfo *TII = 49465f757f3fSDimitry Andric MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); 49475f757f3fSDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 49485f757f3fSDimitry Andric bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF); 49495f757f3fSDimitry Andric bool HasFP = hasFP(MF); 49505f757f3fSDimitry Andric 49515f757f3fSDimitry Andric DebugLoc DL; 49525f757f3fSDimitry Andric int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize(); 49535f757f3fSDimitry Andric int64_t NumBlocks = FrameSize / ProbeSize; 49545f757f3fSDimitry Andric int64_t ResidualSize = FrameSize % ProbeSize; 49555f757f3fSDimitry Andric 49565f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Stack probing: total " << FrameSize << " bytes, " 49575f757f3fSDimitry Andric << NumBlocks << " blocks of " << ProbeSize 49585f757f3fSDimitry Andric << " bytes, plus " << ResidualSize << " bytes\n"); 49595f757f3fSDimitry Andric 49605f757f3fSDimitry Andric // Decrement SP by NumBlock * ProbeSize bytes, with either unrolled or 49615f757f3fSDimitry Andric // ordinary loop. 49625f757f3fSDimitry Andric if (NumBlocks <= AArch64::StackProbeMaxLoopUnroll) { 49635f757f3fSDimitry Andric for (int i = 0; i < NumBlocks; ++i) { 49645f757f3fSDimitry Andric // SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not 49655f757f3fSDimitry Andric // encodable in a SUB). 49665f757f3fSDimitry Andric emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP, 49675f757f3fSDimitry Andric StackOffset::getFixed(-ProbeSize), TII, 49685f757f3fSDimitry Andric MachineInstr::FrameSetup, false, false, nullptr, 49695f757f3fSDimitry Andric EmitAsyncCFI && !HasFP, CFAOffset); 49705f757f3fSDimitry Andric CFAOffset += StackOffset::getFixed(ProbeSize); 49715f757f3fSDimitry Andric // STR XZR, [SP] 49725f757f3fSDimitry Andric BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui)) 49735f757f3fSDimitry Andric .addReg(AArch64::XZR) 49745f757f3fSDimitry Andric .addReg(AArch64::SP) 49755f757f3fSDimitry Andric .addImm(0) 49765f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 49775f757f3fSDimitry Andric } 49785f757f3fSDimitry Andric } else if (NumBlocks != 0) { 49795f757f3fSDimitry Andric // SUB ScratchReg, SP, #FrameSize (or equivalent if FrameSize is not 49805f757f3fSDimitry Andric // encodable in ADD). ScrathReg may temporarily become the CFA register. 49815f757f3fSDimitry Andric emitFrameOffset(*MBB, MBBI, DL, ScratchReg, AArch64::SP, 49825f757f3fSDimitry Andric StackOffset::getFixed(-ProbeSize * NumBlocks), TII, 49835f757f3fSDimitry Andric MachineInstr::FrameSetup, false, false, nullptr, 49845f757f3fSDimitry Andric EmitAsyncCFI && !HasFP, CFAOffset); 49855f757f3fSDimitry Andric CFAOffset += StackOffset::getFixed(ProbeSize * NumBlocks); 49865f757f3fSDimitry Andric MBBI = inlineStackProbeLoopExactMultiple(MBBI, ProbeSize, ScratchReg); 49875f757f3fSDimitry Andric MBB = MBBI->getParent(); 49885f757f3fSDimitry Andric if (EmitAsyncCFI && !HasFP) { 49895f757f3fSDimitry Andric // Set the CFA register back to SP. 49905f757f3fSDimitry Andric const AArch64RegisterInfo &RegInfo = 49915f757f3fSDimitry Andric *MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); 49925f757f3fSDimitry Andric unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true); 49935f757f3fSDimitry Andric unsigned CFIIndex = 49945f757f3fSDimitry Andric MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 49955f757f3fSDimitry Andric BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 49965f757f3fSDimitry Andric .addCFIIndex(CFIIndex) 49975f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 49985f757f3fSDimitry Andric } 49995f757f3fSDimitry Andric } 50005f757f3fSDimitry Andric 50015f757f3fSDimitry Andric if (ResidualSize != 0) { 50025f757f3fSDimitry Andric // SUB SP, SP, #ResidualSize (or equivalent if ResidualSize is not encodable 50035f757f3fSDimitry Andric // in SUB). 50045f757f3fSDimitry Andric emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP, 50055f757f3fSDimitry Andric StackOffset::getFixed(-ResidualSize), TII, 50065f757f3fSDimitry Andric MachineInstr::FrameSetup, false, false, nullptr, 50075f757f3fSDimitry Andric EmitAsyncCFI && !HasFP, CFAOffset); 50085f757f3fSDimitry Andric if (ResidualSize > AArch64::StackProbeMaxUnprobedStack) { 50095f757f3fSDimitry Andric // STR XZR, [SP] 50105f757f3fSDimitry Andric BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui)) 50115f757f3fSDimitry Andric .addReg(AArch64::XZR) 50125f757f3fSDimitry Andric .addReg(AArch64::SP) 50135f757f3fSDimitry Andric .addImm(0) 50145f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 50155f757f3fSDimitry Andric } 50165f757f3fSDimitry Andric } 50175f757f3fSDimitry Andric } 50185f757f3fSDimitry Andric 50195f757f3fSDimitry Andric void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF, 50205f757f3fSDimitry Andric MachineBasicBlock &MBB) const { 50215f757f3fSDimitry Andric // Get the instructions that need to be replaced. We emit at most two of 50225f757f3fSDimitry Andric // these. Remember them in order to avoid complications coming from the need 50235f757f3fSDimitry Andric // to traverse the block while potentially creating more blocks. 50245f757f3fSDimitry Andric SmallVector<MachineInstr *, 4> ToReplace; 50255f757f3fSDimitry Andric for (MachineInstr &MI : MBB) 50265f757f3fSDimitry Andric if (MI.getOpcode() == AArch64::PROBED_STACKALLOC || 50275f757f3fSDimitry Andric MI.getOpcode() == AArch64::PROBED_STACKALLOC_VAR) 50285f757f3fSDimitry Andric ToReplace.push_back(&MI); 50295f757f3fSDimitry Andric 50305f757f3fSDimitry Andric for (MachineInstr *MI : ToReplace) { 50315f757f3fSDimitry Andric if (MI->getOpcode() == AArch64::PROBED_STACKALLOC) { 50325f757f3fSDimitry Andric Register ScratchReg = MI->getOperand(0).getReg(); 50335f757f3fSDimitry Andric int64_t FrameSize = MI->getOperand(1).getImm(); 50345f757f3fSDimitry Andric StackOffset CFAOffset = StackOffset::get(MI->getOperand(2).getImm(), 50355f757f3fSDimitry Andric MI->getOperand(3).getImm()); 50365f757f3fSDimitry Andric inlineStackProbeFixed(MI->getIterator(), ScratchReg, FrameSize, 50375f757f3fSDimitry Andric CFAOffset); 50385f757f3fSDimitry Andric } else { 50395f757f3fSDimitry Andric assert(MI->getOpcode() == AArch64::PROBED_STACKALLOC_VAR && 50405f757f3fSDimitry Andric "Stack probe pseudo-instruction expected"); 50415f757f3fSDimitry Andric const AArch64InstrInfo *TII = 50425f757f3fSDimitry Andric MI->getMF()->getSubtarget<AArch64Subtarget>().getInstrInfo(); 50435f757f3fSDimitry Andric Register TargetReg = MI->getOperand(0).getReg(); 50445f757f3fSDimitry Andric (void)TII->probedStackAlloc(MI->getIterator(), TargetReg, true); 50455f757f3fSDimitry Andric } 50465f757f3fSDimitry Andric MI->eraseFromParent(); 50475f757f3fSDimitry Andric } 50485f757f3fSDimitry Andric } 5049*62987288SDimitry Andric 5050*62987288SDimitry Andric struct StackAccess { 5051*62987288SDimitry Andric enum AccessType { 5052*62987288SDimitry Andric NotAccessed = 0, // Stack object not accessed by load/store instructions. 5053*62987288SDimitry Andric GPR = 1 << 0, // A general purpose register. 5054*62987288SDimitry Andric PPR = 1 << 1, // A predicate register. 5055*62987288SDimitry Andric FPR = 1 << 2, // A floating point/Neon/SVE register. 5056*62987288SDimitry Andric }; 5057*62987288SDimitry Andric 5058*62987288SDimitry Andric int Idx; 5059*62987288SDimitry Andric StackOffset Offset; 5060*62987288SDimitry Andric int64_t Size; 5061*62987288SDimitry Andric unsigned AccessTypes; 5062*62987288SDimitry Andric 5063*62987288SDimitry Andric StackAccess() : Idx(0), Offset(), Size(0), AccessTypes(NotAccessed) {} 5064*62987288SDimitry Andric 5065*62987288SDimitry Andric bool operator<(const StackAccess &Rhs) const { 5066*62987288SDimitry Andric return std::make_tuple(start(), Idx) < 5067*62987288SDimitry Andric std::make_tuple(Rhs.start(), Rhs.Idx); 5068*62987288SDimitry Andric } 5069*62987288SDimitry Andric 5070*62987288SDimitry Andric bool isCPU() const { 5071*62987288SDimitry Andric // Predicate register load and store instructions execute on the CPU. 5072*62987288SDimitry Andric return AccessTypes & (AccessType::GPR | AccessType::PPR); 5073*62987288SDimitry Andric } 5074*62987288SDimitry Andric bool isSME() const { return AccessTypes & AccessType::FPR; } 5075*62987288SDimitry Andric bool isMixed() const { return isCPU() && isSME(); } 5076*62987288SDimitry Andric 5077*62987288SDimitry Andric int64_t start() const { return Offset.getFixed() + Offset.getScalable(); } 5078*62987288SDimitry Andric int64_t end() const { return start() + Size; } 5079*62987288SDimitry Andric 5080*62987288SDimitry Andric std::string getTypeString() const { 5081*62987288SDimitry Andric switch (AccessTypes) { 5082*62987288SDimitry Andric case AccessType::FPR: 5083*62987288SDimitry Andric return "FPR"; 5084*62987288SDimitry Andric case AccessType::PPR: 5085*62987288SDimitry Andric return "PPR"; 5086*62987288SDimitry Andric case AccessType::GPR: 5087*62987288SDimitry Andric return "GPR"; 5088*62987288SDimitry Andric case AccessType::NotAccessed: 5089*62987288SDimitry Andric return "NA"; 5090*62987288SDimitry Andric default: 5091*62987288SDimitry Andric return "Mixed"; 5092*62987288SDimitry Andric } 5093*62987288SDimitry Andric } 5094*62987288SDimitry Andric 5095*62987288SDimitry Andric void print(raw_ostream &OS) const { 5096*62987288SDimitry Andric OS << getTypeString() << " stack object at [SP" 5097*62987288SDimitry Andric << (Offset.getFixed() < 0 ? "" : "+") << Offset.getFixed(); 5098*62987288SDimitry Andric if (Offset.getScalable()) 5099*62987288SDimitry Andric OS << (Offset.getScalable() < 0 ? "" : "+") << Offset.getScalable() 5100*62987288SDimitry Andric << " * vscale"; 5101*62987288SDimitry Andric OS << "]"; 5102*62987288SDimitry Andric } 5103*62987288SDimitry Andric }; 5104*62987288SDimitry Andric 5105*62987288SDimitry Andric static inline raw_ostream &operator<<(raw_ostream &OS, const StackAccess &SA) { 5106*62987288SDimitry Andric SA.print(OS); 5107*62987288SDimitry Andric return OS; 5108*62987288SDimitry Andric } 5109*62987288SDimitry Andric 5110*62987288SDimitry Andric void AArch64FrameLowering::emitRemarks( 5111*62987288SDimitry Andric const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const { 5112*62987288SDimitry Andric 5113*62987288SDimitry Andric SMEAttrs Attrs(MF.getFunction()); 5114*62987288SDimitry Andric if (Attrs.hasNonStreamingInterfaceAndBody()) 5115*62987288SDimitry Andric return; 5116*62987288SDimitry Andric 5117*62987288SDimitry Andric const uint64_t HazardSize = 5118*62987288SDimitry Andric (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize; 5119*62987288SDimitry Andric 5120*62987288SDimitry Andric if (HazardSize == 0) 5121*62987288SDimitry Andric return; 5122*62987288SDimitry Andric 5123*62987288SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 5124*62987288SDimitry Andric // Bail if function has no stack objects. 5125*62987288SDimitry Andric if (!MFI.hasStackObjects()) 5126*62987288SDimitry Andric return; 5127*62987288SDimitry Andric 5128*62987288SDimitry Andric std::vector<StackAccess> StackAccesses(MFI.getNumObjects()); 5129*62987288SDimitry Andric 5130*62987288SDimitry Andric size_t NumFPLdSt = 0; 5131*62987288SDimitry Andric size_t NumNonFPLdSt = 0; 5132*62987288SDimitry Andric 5133*62987288SDimitry Andric // Collect stack accesses via Load/Store instructions. 5134*62987288SDimitry Andric for (const MachineBasicBlock &MBB : MF) { 5135*62987288SDimitry Andric for (const MachineInstr &MI : MBB) { 5136*62987288SDimitry Andric if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1) 5137*62987288SDimitry Andric continue; 5138*62987288SDimitry Andric for (MachineMemOperand *MMO : MI.memoperands()) { 5139*62987288SDimitry Andric std::optional<int> FI = getMMOFrameID(MMO, MFI); 5140*62987288SDimitry Andric if (FI && !MFI.isDeadObjectIndex(*FI)) { 5141*62987288SDimitry Andric int FrameIdx = *FI; 5142*62987288SDimitry Andric 5143*62987288SDimitry Andric size_t ArrIdx = FrameIdx + MFI.getNumFixedObjects(); 5144*62987288SDimitry Andric if (StackAccesses[ArrIdx].AccessTypes == StackAccess::NotAccessed) { 5145*62987288SDimitry Andric StackAccesses[ArrIdx].Idx = FrameIdx; 5146*62987288SDimitry Andric StackAccesses[ArrIdx].Offset = 5147*62987288SDimitry Andric getFrameIndexReferenceFromSP(MF, FrameIdx); 5148*62987288SDimitry Andric StackAccesses[ArrIdx].Size = MFI.getObjectSize(FrameIdx); 5149*62987288SDimitry Andric } 5150*62987288SDimitry Andric 5151*62987288SDimitry Andric unsigned RegTy = StackAccess::AccessType::GPR; 5152*62987288SDimitry Andric if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) { 5153*62987288SDimitry Andric if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg())) 5154*62987288SDimitry Andric RegTy = StackAccess::PPR; 5155*62987288SDimitry Andric else 5156*62987288SDimitry Andric RegTy = StackAccess::FPR; 5157*62987288SDimitry Andric } else if (AArch64InstrInfo::isFpOrNEON(MI)) { 5158*62987288SDimitry Andric RegTy = StackAccess::FPR; 5159*62987288SDimitry Andric } 5160*62987288SDimitry Andric 5161*62987288SDimitry Andric StackAccesses[ArrIdx].AccessTypes |= RegTy; 5162*62987288SDimitry Andric 5163*62987288SDimitry Andric if (RegTy == StackAccess::FPR) 5164*62987288SDimitry Andric ++NumFPLdSt; 5165*62987288SDimitry Andric else 5166*62987288SDimitry Andric ++NumNonFPLdSt; 5167*62987288SDimitry Andric } 5168*62987288SDimitry Andric } 5169*62987288SDimitry Andric } 5170*62987288SDimitry Andric } 5171*62987288SDimitry Andric 5172*62987288SDimitry Andric if (NumFPLdSt == 0 || NumNonFPLdSt == 0) 5173*62987288SDimitry Andric return; 5174*62987288SDimitry Andric 5175*62987288SDimitry Andric llvm::sort(StackAccesses); 5176*62987288SDimitry Andric StackAccesses.erase(llvm::remove_if(StackAccesses, 5177*62987288SDimitry Andric [](const StackAccess &S) { 5178*62987288SDimitry Andric return S.AccessTypes == 5179*62987288SDimitry Andric StackAccess::NotAccessed; 5180*62987288SDimitry Andric }), 5181*62987288SDimitry Andric StackAccesses.end()); 5182*62987288SDimitry Andric 5183*62987288SDimitry Andric SmallVector<const StackAccess *> MixedObjects; 5184*62987288SDimitry Andric SmallVector<std::pair<const StackAccess *, const StackAccess *>> HazardPairs; 5185*62987288SDimitry Andric 5186*62987288SDimitry Andric if (StackAccesses.front().isMixed()) 5187*62987288SDimitry Andric MixedObjects.push_back(&StackAccesses.front()); 5188*62987288SDimitry Andric 5189*62987288SDimitry Andric for (auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end()); 5190*62987288SDimitry Andric It != End; ++It) { 5191*62987288SDimitry Andric const auto &First = *It; 5192*62987288SDimitry Andric const auto &Second = *(It + 1); 5193*62987288SDimitry Andric 5194*62987288SDimitry Andric if (Second.isMixed()) 5195*62987288SDimitry Andric MixedObjects.push_back(&Second); 5196*62987288SDimitry Andric 5197*62987288SDimitry Andric if ((First.isSME() && Second.isCPU()) || 5198*62987288SDimitry Andric (First.isCPU() && Second.isSME())) { 5199*62987288SDimitry Andric uint64_t Distance = static_cast<uint64_t>(Second.start() - First.end()); 5200*62987288SDimitry Andric if (Distance < HazardSize) 5201*62987288SDimitry Andric HazardPairs.emplace_back(&First, &Second); 5202*62987288SDimitry Andric } 5203*62987288SDimitry Andric } 5204*62987288SDimitry Andric 5205*62987288SDimitry Andric auto EmitRemark = [&](llvm::StringRef Str) { 5206*62987288SDimitry Andric ORE->emit([&]() { 5207*62987288SDimitry Andric auto R = MachineOptimizationRemarkAnalysis( 5208*62987288SDimitry Andric "sme", "StackHazard", MF.getFunction().getSubprogram(), &MF.front()); 5209*62987288SDimitry Andric return R << formatv("stack hazard in '{0}': ", MF.getName()).str() << Str; 5210*62987288SDimitry Andric }); 5211*62987288SDimitry Andric }; 5212*62987288SDimitry Andric 5213*62987288SDimitry Andric for (const auto &P : HazardPairs) 5214*62987288SDimitry Andric EmitRemark(formatv("{0} is too close to {1}", *P.first, *P.second).str()); 5215*62987288SDimitry Andric 5216*62987288SDimitry Andric for (const auto *Obj : MixedObjects) 5217*62987288SDimitry Andric EmitRemark( 5218*62987288SDimitry Andric formatv("{0} accessed by both GP and FP instructions", *Obj).str()); 5219*62987288SDimitry Andric } 5220