xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===-- RISCVFrameLowering.cpp - RISC-V Frame Information -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the RISC-V implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "RISCVFrameLowering.h"
14 #include "MCTargetDesc/RISCVBaseInfo.h"
15 #include "RISCVMachineFunctionInfo.h"
16 #include "RISCVSubtarget.h"
17 #include "llvm/BinaryFormat/Dwarf.h"
18 #include "llvm/CodeGen/CFIInstBuilder.h"
19 #include "llvm/CodeGen/LivePhysRegs.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/RegisterScavenging.h"
25 #include "llvm/IR/DiagnosticInfo.h"
26 #include "llvm/MC/MCDwarf.h"
27 #include "llvm/Support/LEB128.h"
28 
29 #include <algorithm>
30 
31 #define DEBUG_TYPE "riscv-frame"
32 
33 using namespace llvm;
34 
getABIStackAlignment(RISCVABI::ABI ABI)35 static Align getABIStackAlignment(RISCVABI::ABI ABI) {
36   if (ABI == RISCVABI::ABI_ILP32E)
37     return Align(4);
38   if (ABI == RISCVABI::ABI_LP64E)
39     return Align(8);
40   return Align(16);
41 }
42 
RISCVFrameLowering(const RISCVSubtarget & STI)43 RISCVFrameLowering::RISCVFrameLowering(const RISCVSubtarget &STI)
44     : TargetFrameLowering(
45           StackGrowsDown, getABIStackAlignment(STI.getTargetABI()),
46           /*LocalAreaOffset=*/0,
47           /*TransientStackAlignment=*/getABIStackAlignment(STI.getTargetABI())),
48       STI(STI) {}
49 
50 // The register used to hold the frame pointer.
51 static constexpr MCPhysReg FPReg = RISCV::X8;
52 
53 // The register used to hold the stack pointer.
54 static constexpr MCPhysReg SPReg = RISCV::X2;
55 
56 // The register used to hold the return address.
57 static constexpr MCPhysReg RAReg = RISCV::X1;
58 
59 // LIst of CSRs that are given a fixed location by save/restore libcalls or
60 // Zcmp/Xqccmp Push/Pop. The order in this table indicates the order the
61 // registers are saved on the stack. Zcmp uses the reverse order of save/restore
62 // and Xqccmp on the stack, but this is handled when offsets are calculated.
63 static const MCPhysReg FixedCSRFIMap[] = {
64     /*ra*/ RAReg,      /*s0*/ FPReg,      /*s1*/ RISCV::X9,
65     /*s2*/ RISCV::X18, /*s3*/ RISCV::X19, /*s4*/ RISCV::X20,
66     /*s5*/ RISCV::X21, /*s6*/ RISCV::X22, /*s7*/ RISCV::X23,
67     /*s8*/ RISCV::X24, /*s9*/ RISCV::X25, /*s10*/ RISCV::X26,
68     /*s11*/ RISCV::X27};
69 
70 // The number of stack bytes allocated by `QC.C.MIENTER(.NEST)` and popped by
71 // `QC.C.MILEAVERET`.
72 static constexpr uint64_t QCIInterruptPushAmount = 96;
73 
74 static const std::pair<MCPhysReg, int8_t> FixedCSRFIQCIInterruptMap[] = {
75     /* -1 is a gap for mepc/mnepc */
76     {/*fp*/ FPReg, -2},
77     /* -3 is a gap for qc.mcause */
78     {/*ra*/ RAReg, -4},
79     /* -5 is reserved */
80     {/*t0*/ RISCV::X5, -6},
81     {/*t1*/ RISCV::X6, -7},
82     {/*t2*/ RISCV::X7, -8},
83     {/*a0*/ RISCV::X10, -9},
84     {/*a1*/ RISCV::X11, -10},
85     {/*a2*/ RISCV::X12, -11},
86     {/*a3*/ RISCV::X13, -12},
87     {/*a4*/ RISCV::X14, -13},
88     {/*a5*/ RISCV::X15, -14},
89     {/*a6*/ RISCV::X16, -15},
90     {/*a7*/ RISCV::X17, -16},
91     {/*t3*/ RISCV::X28, -17},
92     {/*t4*/ RISCV::X29, -18},
93     {/*t5*/ RISCV::X30, -19},
94     {/*t6*/ RISCV::X31, -20},
95     /* -21, -22, -23, -24 are reserved */
96 };
97 
98 // For now we use x3, a.k.a gp, as pointer to shadow call stack.
99 // User should not use x3 in their asm.
emitSCSPrologue(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,const DebugLoc & DL)100 static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
101                             MachineBasicBlock::iterator MI,
102                             const DebugLoc &DL) {
103   const auto &STI = MF.getSubtarget<RISCVSubtarget>();
104   bool HasHWShadowStack = MF.getFunction().hasFnAttribute("hw-shadow-stack") &&
105                           STI.hasStdExtZicfiss();
106   bool HasSWShadowStack =
107       MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack);
108   if (!HasHWShadowStack && !HasSWShadowStack)
109     return;
110 
111   const llvm::RISCVRegisterInfo *TRI = STI.getRegisterInfo();
112 
113   // Do not save RA to the SCS if it's not saved to the regular stack,
114   // i.e. RA is not at risk of being overwritten.
115   std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo();
116   if (llvm::none_of(
117           CSI, [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; }))
118     return;
119 
120   const RISCVInstrInfo *TII = STI.getInstrInfo();
121   if (HasHWShadowStack) {
122     BuildMI(MBB, MI, DL, TII->get(RISCV::SSPUSH)).addReg(RAReg);
123     return;
124   }
125 
126   Register SCSPReg = RISCVABI::getSCSPReg();
127 
128   bool IsRV64 = STI.is64Bit();
129   int64_t SlotSize = STI.getXLen() / 8;
130   // Store return address to shadow call stack
131   // addi    gp, gp, [4|8]
132   // s[w|d]  ra, -[4|8](gp)
133   BuildMI(MBB, MI, DL, TII->get(RISCV::ADDI))
134       .addReg(SCSPReg, RegState::Define)
135       .addReg(SCSPReg)
136       .addImm(SlotSize)
137       .setMIFlag(MachineInstr::FrameSetup);
138   BuildMI(MBB, MI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
139       .addReg(RAReg)
140       .addReg(SCSPReg)
141       .addImm(-SlotSize)
142       .setMIFlag(MachineInstr::FrameSetup);
143 
144   // Emit a CFI instruction that causes SlotSize to be subtracted from the value
145   // of the shadow stack pointer when unwinding past this frame.
146   char DwarfSCSReg = TRI->getDwarfRegNum(SCSPReg, /*IsEH*/ true);
147   assert(DwarfSCSReg < 32 && "SCS Register should be < 32 (X3).");
148 
149   char Offset = static_cast<char>(-SlotSize) & 0x7f;
150   const char CFIInst[] = {
151       dwarf::DW_CFA_val_expression,
152       DwarfSCSReg, // register
153       2,           // length
154       static_cast<char>(unsigned(dwarf::DW_OP_breg0 + DwarfSCSReg)),
155       Offset, // addend (sleb128)
156   };
157 
158   CFIInstBuilder(MBB, MI, MachineInstr::FrameSetup)
159       .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
160 }
161 
emitSCSEpilogue(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,const DebugLoc & DL)162 static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB,
163                             MachineBasicBlock::iterator MI,
164                             const DebugLoc &DL) {
165   const auto &STI = MF.getSubtarget<RISCVSubtarget>();
166   bool HasHWShadowStack = MF.getFunction().hasFnAttribute("hw-shadow-stack") &&
167                           STI.hasStdExtZicfiss();
168   bool HasSWShadowStack =
169       MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack);
170   if (!HasHWShadowStack && !HasSWShadowStack)
171     return;
172 
173   // See emitSCSPrologue() above.
174   std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo();
175   if (llvm::none_of(
176           CSI, [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; }))
177     return;
178 
179   const RISCVInstrInfo *TII = STI.getInstrInfo();
180   if (HasHWShadowStack) {
181     BuildMI(MBB, MI, DL, TII->get(RISCV::SSPOPCHK)).addReg(RAReg);
182     return;
183   }
184 
185   Register SCSPReg = RISCVABI::getSCSPReg();
186 
187   bool IsRV64 = STI.is64Bit();
188   int64_t SlotSize = STI.getXLen() / 8;
189   // Load return address from shadow call stack
190   // l[w|d]  ra, -[4|8](gp)
191   // addi    gp, gp, -[4|8]
192   BuildMI(MBB, MI, DL, TII->get(IsRV64 ? RISCV::LD : RISCV::LW))
193       .addReg(RAReg, RegState::Define)
194       .addReg(SCSPReg)
195       .addImm(-SlotSize)
196       .setMIFlag(MachineInstr::FrameDestroy);
197   BuildMI(MBB, MI, DL, TII->get(RISCV::ADDI))
198       .addReg(SCSPReg, RegState::Define)
199       .addReg(SCSPReg)
200       .addImm(-SlotSize)
201       .setMIFlag(MachineInstr::FrameDestroy);
202   // Restore the SCS pointer
203   CFIInstBuilder(MBB, MI, MachineInstr::FrameDestroy).buildRestore(SCSPReg);
204 }
205 
206 // Insert instruction to swap mscratchsw with sp
emitSiFiveCLICStackSwap(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL)207 static void emitSiFiveCLICStackSwap(MachineFunction &MF, MachineBasicBlock &MBB,
208                                     MachineBasicBlock::iterator MBBI,
209                                     const DebugLoc &DL) {
210   auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
211 
212   if (!RVFI->isSiFiveStackSwapInterrupt(MF))
213     return;
214 
215   const auto &STI = MF.getSubtarget<RISCVSubtarget>();
216   const RISCVInstrInfo *TII = STI.getInstrInfo();
217 
218   assert(STI.hasVendorXSfmclic() && "Stack Swapping Requires XSfmclic");
219 
220   BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRW))
221       .addReg(SPReg, RegState::Define)
222       .addImm(RISCVSysReg::sf_mscratchcsw)
223       .addReg(SPReg, RegState::Kill)
224       .setMIFlag(MachineInstr::FrameSetup);
225 
226   // FIXME: CFI Information for this swap.
227 }
228 
229 static void
createSiFivePreemptibleInterruptFrameEntries(MachineFunction & MF,RISCVMachineFunctionInfo & RVFI)230 createSiFivePreemptibleInterruptFrameEntries(MachineFunction &MF,
231                                              RISCVMachineFunctionInfo &RVFI) {
232   if (!RVFI.isSiFivePreemptibleInterrupt(MF))
233     return;
234 
235   const TargetRegisterClass &RC = RISCV::GPRRegClass;
236   const TargetRegisterInfo &TRI =
237       *MF.getSubtarget<RISCVSubtarget>().getRegisterInfo();
238   MachineFrameInfo &MFI = MF.getFrameInfo();
239 
240   // Create two frame objects for spilling X8 and X9, which will be done in
241   // `emitSiFiveCLICPreemptibleSaves`. This is in addition to any other stack
242   // objects we might have for X8 and X9, as they might be saved twice.
243   for (int I = 0; I < 2; ++I) {
244     int FI = MFI.CreateStackObject(TRI.getSpillSize(RC), TRI.getSpillAlign(RC),
245                                    true);
246     RVFI.pushInterruptCSRFrameIndex(FI);
247   }
248 }
249 
emitSiFiveCLICPreemptibleSaves(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL)250 static void emitSiFiveCLICPreemptibleSaves(MachineFunction &MF,
251                                            MachineBasicBlock &MBB,
252                                            MachineBasicBlock::iterator MBBI,
253                                            const DebugLoc &DL) {
254   auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
255 
256   if (!RVFI->isSiFivePreemptibleInterrupt(MF))
257     return;
258 
259   const auto &STI = MF.getSubtarget<RISCVSubtarget>();
260   const RISCVInstrInfo *TII = STI.getInstrInfo();
261 
262   // FIXME: CFI Information here is nonexistent/wrong.
263 
264   // X8 and X9 might be stored into the stack twice, initially into the
265   // `interruptCSRFrameIndex` here, and then maybe again into their CSI frame
266   // index.
267   //
268   // This is done instead of telling the register allocator that we need two
269   // VRegs to store the value of `mcause` and `mepc` through the instruction,
270   // which affects other passes.
271   TII->storeRegToStackSlot(MBB, MBBI, RISCV::X8, /* IsKill=*/true,
272                            RVFI->getInterruptCSRFrameIndex(0),
273                            &RISCV::GPRRegClass, STI.getRegisterInfo(),
274                            Register(), MachineInstr::FrameSetup);
275   TII->storeRegToStackSlot(MBB, MBBI, RISCV::X9, /* IsKill=*/true,
276                            RVFI->getInterruptCSRFrameIndex(1),
277                            &RISCV::GPRRegClass, STI.getRegisterInfo(),
278                            Register(), MachineInstr::FrameSetup);
279 
280   // Put `mcause` into X8 (s0), and `mepc` into X9 (s1). If either of these are
281   // used in the function, then they will appear in `getUnmanagedCSI` and will
282   // be saved again.
283   BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRS))
284       .addReg(RISCV::X8, RegState::Define)
285       .addImm(RISCVSysReg::mcause)
286       .addReg(RISCV::X0)
287       .setMIFlag(MachineInstr::FrameSetup);
288   BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRS))
289       .addReg(RISCV::X9, RegState::Define)
290       .addImm(RISCVSysReg::mepc)
291       .addReg(RISCV::X0)
292       .setMIFlag(MachineInstr::FrameSetup);
293 
294   // Enable interrupts.
295   BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRSI))
296       .addReg(RISCV::X0, RegState::Define)
297       .addImm(RISCVSysReg::mstatus)
298       .addImm(8)
299       .setMIFlag(MachineInstr::FrameSetup);
300 }
301 
emitSiFiveCLICPreemptibleRestores(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL)302 static void emitSiFiveCLICPreemptibleRestores(MachineFunction &MF,
303                                               MachineBasicBlock &MBB,
304                                               MachineBasicBlock::iterator MBBI,
305                                               const DebugLoc &DL) {
306   auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
307 
308   if (!RVFI->isSiFivePreemptibleInterrupt(MF))
309     return;
310 
311   const auto &STI = MF.getSubtarget<RISCVSubtarget>();
312   const RISCVInstrInfo *TII = STI.getInstrInfo();
313 
314   // FIXME: CFI Information here is nonexistent/wrong.
315 
316   // Disable interrupts.
317   BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRCI))
318       .addReg(RISCV::X0, RegState::Define)
319       .addImm(RISCVSysReg::mstatus)
320       .addImm(8)
321       .setMIFlag(MachineInstr::FrameSetup);
322 
323   // Restore `mepc` from x9 (s1), and `mcause` from x8 (s0). If either were used
324   // in the function, they have already been restored once, so now have the
325   // value stored in `emitSiFiveCLICPreemptibleSaves`.
326   BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRW))
327       .addReg(RISCV::X0, RegState::Define)
328       .addImm(RISCVSysReg::mepc)
329       .addReg(RISCV::X9, RegState::Kill)
330       .setMIFlag(MachineInstr::FrameSetup);
331   BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRW))
332       .addReg(RISCV::X0, RegState::Define)
333       .addImm(RISCVSysReg::mcause)
334       .addReg(RISCV::X8, RegState::Kill)
335       .setMIFlag(MachineInstr::FrameSetup);
336 
337   // X8 and X9 need to be restored to their values on function entry, which we
338   // saved onto the stack in `emitSiFiveCLICPreemptibleSaves`.
339   TII->loadRegFromStackSlot(MBB, MBBI, RISCV::X9,
340                             RVFI->getInterruptCSRFrameIndex(1),
341                             &RISCV::GPRRegClass, STI.getRegisterInfo(),
342                             Register(), MachineInstr::FrameSetup);
343   TII->loadRegFromStackSlot(MBB, MBBI, RISCV::X8,
344                             RVFI->getInterruptCSRFrameIndex(0),
345                             &RISCV::GPRRegClass, STI.getRegisterInfo(),
346                             Register(), MachineInstr::FrameSetup);
347 }
348 
349 // Get the ID of the libcall used for spilling and restoring callee saved
350 // registers. The ID is representative of the number of registers saved or
351 // restored by the libcall, except it is zero-indexed - ID 0 corresponds to a
352 // single register.
getLibCallID(const MachineFunction & MF,const std::vector<CalleeSavedInfo> & CSI)353 static int getLibCallID(const MachineFunction &MF,
354                         const std::vector<CalleeSavedInfo> &CSI) {
355   const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
356 
357   if (CSI.empty() || !RVFI->useSaveRestoreLibCalls(MF))
358     return -1;
359 
360   MCRegister MaxReg;
361   for (auto &CS : CSI)
362     // assignCalleeSavedSpillSlots assigns negative frame indexes to
363     // registers which can be saved by libcall.
364     if (CS.getFrameIdx() < 0)
365       MaxReg = std::max(MaxReg.id(), CS.getReg().id());
366 
367   if (!MaxReg)
368     return -1;
369 
370   switch (MaxReg.id()) {
371   default:
372     llvm_unreachable("Something has gone wrong!");
373     // clang-format off
374   case /*s11*/ RISCV::X27: return 12;
375   case /*s10*/ RISCV::X26: return 11;
376   case /*s9*/  RISCV::X25: return 10;
377   case /*s8*/  RISCV::X24: return 9;
378   case /*s7*/  RISCV::X23: return 8;
379   case /*s6*/  RISCV::X22: return 7;
380   case /*s5*/  RISCV::X21: return 6;
381   case /*s4*/  RISCV::X20: return 5;
382   case /*s3*/  RISCV::X19: return 4;
383   case /*s2*/  RISCV::X18: return 3;
384   case /*s1*/  RISCV::X9:  return 2;
385   case /*s0*/  FPReg:  return 1;
386   case /*ra*/  RAReg:  return 0;
387     // clang-format on
388   }
389 }
390 
391 // Get the name of the libcall used for spilling callee saved registers.
392 // If this function will not use save/restore libcalls, then return a nullptr.
393 static const char *
getSpillLibCallName(const MachineFunction & MF,const std::vector<CalleeSavedInfo> & CSI)394 getSpillLibCallName(const MachineFunction &MF,
395                     const std::vector<CalleeSavedInfo> &CSI) {
396   static const char *const SpillLibCalls[] = {
397     "__riscv_save_0",
398     "__riscv_save_1",
399     "__riscv_save_2",
400     "__riscv_save_3",
401     "__riscv_save_4",
402     "__riscv_save_5",
403     "__riscv_save_6",
404     "__riscv_save_7",
405     "__riscv_save_8",
406     "__riscv_save_9",
407     "__riscv_save_10",
408     "__riscv_save_11",
409     "__riscv_save_12"
410   };
411 
412   int LibCallID = getLibCallID(MF, CSI);
413   if (LibCallID == -1)
414     return nullptr;
415   return SpillLibCalls[LibCallID];
416 }
417 
418 // Get the name of the libcall used for restoring callee saved registers.
419 // If this function will not use save/restore libcalls, then return a nullptr.
420 static const char *
getRestoreLibCallName(const MachineFunction & MF,const std::vector<CalleeSavedInfo> & CSI)421 getRestoreLibCallName(const MachineFunction &MF,
422                       const std::vector<CalleeSavedInfo> &CSI) {
423   static const char *const RestoreLibCalls[] = {
424     "__riscv_restore_0",
425     "__riscv_restore_1",
426     "__riscv_restore_2",
427     "__riscv_restore_3",
428     "__riscv_restore_4",
429     "__riscv_restore_5",
430     "__riscv_restore_6",
431     "__riscv_restore_7",
432     "__riscv_restore_8",
433     "__riscv_restore_9",
434     "__riscv_restore_10",
435     "__riscv_restore_11",
436     "__riscv_restore_12"
437   };
438 
439   int LibCallID = getLibCallID(MF, CSI);
440   if (LibCallID == -1)
441     return nullptr;
442   return RestoreLibCalls[LibCallID];
443 }
444 
445 // Get the max reg of Push/Pop for restoring callee saved registers.
getNumPushPopRegs(const std::vector<CalleeSavedInfo> & CSI)446 static unsigned getNumPushPopRegs(const std::vector<CalleeSavedInfo> &CSI) {
447   unsigned NumPushPopRegs = 0;
448   for (auto &CS : CSI) {
449     auto *FII = llvm::find_if(FixedCSRFIMap,
450                               [&](MCPhysReg P) { return P == CS.getReg(); });
451     if (FII != std::end(FixedCSRFIMap)) {
452       unsigned RegNum = std::distance(std::begin(FixedCSRFIMap), FII);
453       NumPushPopRegs = std::max(NumPushPopRegs, RegNum + 1);
454     }
455   }
456   assert(NumPushPopRegs != 12 && "x26 requires x27 to also be pushed");
457   return NumPushPopRegs;
458 }
459 
460 // Return true if the specified function should have a dedicated frame
461 // pointer register.  This is true if frame pointer elimination is
462 // disabled, if it needs dynamic stack realignment, if the function has
463 // variable sized allocas, or if the frame address is taken.
hasFPImpl(const MachineFunction & MF) const464 bool RISCVFrameLowering::hasFPImpl(const MachineFunction &MF) const {
465   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
466 
467   const MachineFrameInfo &MFI = MF.getFrameInfo();
468   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
469          RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
470          MFI.isFrameAddressTaken();
471 }
472 
hasBP(const MachineFunction & MF) const473 bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const {
474   const MachineFrameInfo &MFI = MF.getFrameInfo();
475   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
476 
477   // If we do not reserve stack space for outgoing arguments in prologue,
478   // we will adjust the stack pointer before call instruction. After the
479   // adjustment, we can not use SP to access the stack objects for the
480   // arguments. Instead, use BP to access these stack objects.
481   return (MFI.hasVarSizedObjects() ||
482           (!hasReservedCallFrame(MF) && (!MFI.isMaxCallFrameSizeComputed() ||
483                                          MFI.getMaxCallFrameSize() != 0))) &&
484          TRI->hasStackRealignment(MF);
485 }
486 
487 // Determines the size of the frame and maximum call frame size.
determineFrameLayout(MachineFunction & MF) const488 void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const {
489   MachineFrameInfo &MFI = MF.getFrameInfo();
490   auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
491 
492   // Get the number of bytes to allocate from the FrameInfo.
493   uint64_t FrameSize = MFI.getStackSize();
494 
495   // QCI Interrupts use at least 96 bytes of stack space
496   if (RVFI->useQCIInterrupt(MF))
497     FrameSize = std::max(FrameSize, QCIInterruptPushAmount);
498 
499   // Get the alignment.
500   Align StackAlign = getStackAlign();
501 
502   // Make sure the frame is aligned.
503   FrameSize = alignTo(FrameSize, StackAlign);
504 
505   // Update frame info.
506   MFI.setStackSize(FrameSize);
507 
508   // When using SP or BP to access stack objects, we may require extra padding
509   // to ensure the bottom of the RVV stack is correctly aligned within the main
510   // stack. We calculate this as the amount required to align the scalar local
511   // variable section up to the RVV alignment.
512   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
513   if (RVFI->getRVVStackSize() && (!hasFP(MF) || TRI->hasStackRealignment(MF))) {
514     int ScalarLocalVarSize = FrameSize - RVFI->getCalleeSavedStackSize() -
515                              RVFI->getVarArgsSaveSize();
516     if (auto RVVPadding =
517             offsetToAlignment(ScalarLocalVarSize, RVFI->getRVVStackAlign()))
518       RVFI->setRVVPadding(RVVPadding);
519   }
520 }
521 
522 // Returns the stack size including RVV padding (when required), rounded back
523 // up to the required stack alignment.
getStackSizeWithRVVPadding(const MachineFunction & MF) const524 uint64_t RISCVFrameLowering::getStackSizeWithRVVPadding(
525     const MachineFunction &MF) const {
526   const MachineFrameInfo &MFI = MF.getFrameInfo();
527   auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
528   return alignTo(MFI.getStackSize() + RVFI->getRVVPadding(), getStackAlign());
529 }
530 
531 static SmallVector<CalleeSavedInfo, 8>
getUnmanagedCSI(const MachineFunction & MF,const std::vector<CalleeSavedInfo> & CSI)532 getUnmanagedCSI(const MachineFunction &MF,
533                 const std::vector<CalleeSavedInfo> &CSI) {
534   const MachineFrameInfo &MFI = MF.getFrameInfo();
535   SmallVector<CalleeSavedInfo, 8> NonLibcallCSI;
536 
537   for (auto &CS : CSI) {
538     int FI = CS.getFrameIdx();
539     if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::Default)
540       NonLibcallCSI.push_back(CS);
541   }
542 
543   return NonLibcallCSI;
544 }
545 
546 static SmallVector<CalleeSavedInfo, 8>
getRVVCalleeSavedInfo(const MachineFunction & MF,const std::vector<CalleeSavedInfo> & CSI)547 getRVVCalleeSavedInfo(const MachineFunction &MF,
548                       const std::vector<CalleeSavedInfo> &CSI) {
549   const MachineFrameInfo &MFI = MF.getFrameInfo();
550   SmallVector<CalleeSavedInfo, 8> RVVCSI;
551 
552   for (auto &CS : CSI) {
553     int FI = CS.getFrameIdx();
554     if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector)
555       RVVCSI.push_back(CS);
556   }
557 
558   return RVVCSI;
559 }
560 
561 static SmallVector<CalleeSavedInfo, 8>
getPushOrLibCallsSavedInfo(const MachineFunction & MF,const std::vector<CalleeSavedInfo> & CSI)562 getPushOrLibCallsSavedInfo(const MachineFunction &MF,
563                            const std::vector<CalleeSavedInfo> &CSI) {
564   auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
565 
566   SmallVector<CalleeSavedInfo, 8> PushOrLibCallsCSI;
567   if (!RVFI->useSaveRestoreLibCalls(MF) && !RVFI->isPushable(MF))
568     return PushOrLibCallsCSI;
569 
570   for (const auto &CS : CSI) {
571     if (RVFI->useQCIInterrupt(MF)) {
572       // Some registers are saved by both `QC.C.MIENTER(.NEST)` and
573       // `QC.CM.PUSH(FP)`. In these cases, prioritise the CFI info that points
574       // to the versions saved by `QC.C.MIENTER(.NEST)` which is what FP
575       // unwinding would use.
576       if (llvm::is_contained(llvm::make_first_range(FixedCSRFIQCIInterruptMap),
577                              CS.getReg()))
578         continue;
579     }
580 
581     if (llvm::is_contained(FixedCSRFIMap, CS.getReg()))
582       PushOrLibCallsCSI.push_back(CS);
583   }
584 
585   return PushOrLibCallsCSI;
586 }
587 
588 static SmallVector<CalleeSavedInfo, 8>
getQCISavedInfo(const MachineFunction & MF,const std::vector<CalleeSavedInfo> & CSI)589 getQCISavedInfo(const MachineFunction &MF,
590                 const std::vector<CalleeSavedInfo> &CSI) {
591   auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
592 
593   SmallVector<CalleeSavedInfo, 8> QCIInterruptCSI;
594   if (!RVFI->useQCIInterrupt(MF))
595     return QCIInterruptCSI;
596 
597   for (const auto &CS : CSI) {
598     if (llvm::is_contained(llvm::make_first_range(FixedCSRFIQCIInterruptMap),
599                            CS.getReg()))
600       QCIInterruptCSI.push_back(CS);
601   }
602 
603   return QCIInterruptCSI;
604 }
605 
allocateAndProbeStackForRVV(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,int64_t Amount,MachineInstr::MIFlag Flag,bool EmitCFI,bool DynAllocation) const606 void RISCVFrameLowering::allocateAndProbeStackForRVV(
607     MachineFunction &MF, MachineBasicBlock &MBB,
608     MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Amount,
609     MachineInstr::MIFlag Flag, bool EmitCFI, bool DynAllocation) const {
610   assert(Amount != 0 && "Did not need to adjust stack pointer for RVV.");
611 
612   // Emit a variable-length allocation probing loop.
613 
614   // Get VLEN in TargetReg
615   const RISCVInstrInfo *TII = STI.getInstrInfo();
616   Register TargetReg = RISCV::X6;
617   uint32_t NumOfVReg = Amount / RISCV::RVVBytesPerBlock;
618   BuildMI(MBB, MBBI, DL, TII->get(RISCV::PseudoReadVLENB), TargetReg)
619       .setMIFlag(Flag);
620   TII->mulImm(MF, MBB, MBBI, DL, TargetReg, NumOfVReg, Flag);
621 
622   CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
623   if (EmitCFI) {
624     // Set the CFA register to TargetReg.
625     CFIBuilder.buildDefCFA(TargetReg, -Amount);
626   }
627 
628   // It will be expanded to a probe loop in `inlineStackProbe`.
629   BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC_RVV))
630       .addReg(TargetReg);
631 
632   if (EmitCFI) {
633     // Set the CFA register back to SP.
634     CFIBuilder.buildDefCFARegister(SPReg);
635   }
636 
637   // SUB SP, SP, T1
638   BuildMI(MBB, MBBI, DL, TII->get(RISCV::SUB), SPReg)
639       .addReg(SPReg)
640       .addReg(TargetReg)
641       .setMIFlag(Flag);
642 
643   // If we have a dynamic allocation later we need to probe any residuals.
644   if (DynAllocation) {
645     BuildMI(MBB, MBBI, DL, TII->get(STI.is64Bit() ? RISCV::SD : RISCV::SW))
646         .addReg(RISCV::X0)
647         .addReg(SPReg)
648         .addImm(0)
649         .setMIFlags(MachineInstr::FrameSetup);
650   }
651 }
652 
appendScalableVectorExpression(const TargetRegisterInfo & TRI,SmallVectorImpl<char> & Expr,int FixedOffset,int ScalableOffset,llvm::raw_string_ostream & Comment)653 static void appendScalableVectorExpression(const TargetRegisterInfo &TRI,
654                                            SmallVectorImpl<char> &Expr,
655                                            int FixedOffset, int ScalableOffset,
656                                            llvm::raw_string_ostream &Comment) {
657   unsigned DwarfVLenB = TRI.getDwarfRegNum(RISCV::VLENB, true);
658   uint8_t Buffer[16];
659   if (FixedOffset) {
660     Expr.push_back(dwarf::DW_OP_consts);
661     Expr.append(Buffer, Buffer + encodeSLEB128(FixedOffset, Buffer));
662     Expr.push_back((uint8_t)dwarf::DW_OP_plus);
663     Comment << (FixedOffset < 0 ? " - " : " + ") << std::abs(FixedOffset);
664   }
665 
666   Expr.push_back((uint8_t)dwarf::DW_OP_consts);
667   Expr.append(Buffer, Buffer + encodeSLEB128(ScalableOffset, Buffer));
668 
669   Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
670   Expr.append(Buffer, Buffer + encodeULEB128(DwarfVLenB, Buffer));
671   Expr.push_back(0);
672 
673   Expr.push_back((uint8_t)dwarf::DW_OP_mul);
674   Expr.push_back((uint8_t)dwarf::DW_OP_plus);
675 
676   Comment << (ScalableOffset < 0 ? " - " : " + ") << std::abs(ScalableOffset)
677           << " * vlenb";
678 }
679 
createDefCFAExpression(const TargetRegisterInfo & TRI,Register Reg,uint64_t FixedOffset,uint64_t ScalableOffset)680 static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
681                                                Register Reg,
682                                                uint64_t FixedOffset,
683                                                uint64_t ScalableOffset) {
684   assert(ScalableOffset != 0 && "Did not need to adjust CFA for RVV");
685   SmallString<64> Expr;
686   std::string CommentBuffer;
687   llvm::raw_string_ostream Comment(CommentBuffer);
688   // Build up the expression (Reg + FixedOffset + ScalableOffset * VLENB).
689   unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
690   Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
691   Expr.push_back(0);
692   if (Reg == SPReg)
693     Comment << "sp";
694   else
695     Comment << printReg(Reg, &TRI);
696 
697   appendScalableVectorExpression(TRI, Expr, FixedOffset, ScalableOffset,
698                                  Comment);
699 
700   SmallString<64> DefCfaExpr;
701   uint8_t Buffer[16];
702   DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
703   DefCfaExpr.append(Buffer, Buffer + encodeULEB128(Expr.size(), Buffer));
704   DefCfaExpr.append(Expr.str());
705 
706   return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
707                                         Comment.str());
708 }
709 
createDefCFAOffset(const TargetRegisterInfo & TRI,Register Reg,uint64_t FixedOffset,uint64_t ScalableOffset)710 static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI,
711                                            Register Reg, uint64_t FixedOffset,
712                                            uint64_t ScalableOffset) {
713   assert(ScalableOffset != 0 && "Did not need to adjust CFA for RVV");
714   SmallString<64> Expr;
715   std::string CommentBuffer;
716   llvm::raw_string_ostream Comment(CommentBuffer);
717   Comment << printReg(Reg, &TRI) << "  @ cfa";
718 
719   // Build up the expression (FixedOffset + ScalableOffset * VLENB).
720   appendScalableVectorExpression(TRI, Expr, FixedOffset, ScalableOffset,
721                                  Comment);
722 
723   SmallString<64> DefCfaExpr;
724   uint8_t Buffer[16];
725   unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
726   DefCfaExpr.push_back(dwarf::DW_CFA_expression);
727   DefCfaExpr.append(Buffer, Buffer + encodeULEB128(DwarfReg, Buffer));
728   DefCfaExpr.append(Buffer, Buffer + encodeULEB128(Expr.size(), Buffer));
729   DefCfaExpr.append(Expr.str());
730 
731   return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
732                                         Comment.str());
733 }
734 
735 // Allocate stack space and probe it if necessary.
allocateStack(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineFunction & MF,uint64_t Offset,uint64_t RealStackSize,bool EmitCFI,bool NeedProbe,uint64_t ProbeSize,bool DynAllocation,MachineInstr::MIFlag Flag) const736 void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
737                                        MachineBasicBlock::iterator MBBI,
738                                        MachineFunction &MF, uint64_t Offset,
739                                        uint64_t RealStackSize, bool EmitCFI,
740                                        bool NeedProbe, uint64_t ProbeSize,
741                                        bool DynAllocation,
742                                        MachineInstr::MIFlag Flag) const {
743   DebugLoc DL;
744   const RISCVRegisterInfo *RI = STI.getRegisterInfo();
745   const RISCVInstrInfo *TII = STI.getInstrInfo();
746   bool IsRV64 = STI.is64Bit();
747   CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
748 
749   // Simply allocate the stack if it's not big enough to require a probe.
750   if (!NeedProbe || Offset <= ProbeSize) {
751     RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Offset),
752                   Flag, getStackAlign());
753 
754     if (EmitCFI)
755       CFIBuilder.buildDefCFAOffset(RealStackSize);
756 
757     if (NeedProbe && DynAllocation) {
758       // s[d|w] zero, 0(sp)
759       BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
760           .addReg(RISCV::X0)
761           .addReg(SPReg)
762           .addImm(0)
763           .setMIFlags(Flag);
764     }
765 
766     return;
767   }
768 
769   // Unroll the probe loop depending on the number of iterations.
770   if (Offset < ProbeSize * 5) {
771     uint64_t CFAAdjust = RealStackSize - Offset;
772 
773     uint64_t CurrentOffset = 0;
774     while (CurrentOffset + ProbeSize <= Offset) {
775       RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
776                     StackOffset::getFixed(-ProbeSize), Flag, getStackAlign());
777       // s[d|w] zero, 0(sp)
778       BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
779           .addReg(RISCV::X0)
780           .addReg(SPReg)
781           .addImm(0)
782           .setMIFlags(Flag);
783 
784       CurrentOffset += ProbeSize;
785       if (EmitCFI)
786         CFIBuilder.buildDefCFAOffset(CurrentOffset + CFAAdjust);
787     }
788 
789     uint64_t Residual = Offset - CurrentOffset;
790     if (Residual) {
791       RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
792                     StackOffset::getFixed(-Residual), Flag, getStackAlign());
793       if (EmitCFI)
794         CFIBuilder.buildDefCFAOffset(RealStackSize);
795 
796       if (DynAllocation) {
797         // s[d|w] zero, 0(sp)
798         BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
799             .addReg(RISCV::X0)
800             .addReg(SPReg)
801             .addImm(0)
802             .setMIFlags(Flag);
803       }
804     }
805 
806     return;
807   }
808 
809   // Emit a variable-length allocation probing loop.
810   uint64_t RoundedSize = alignDown(Offset, ProbeSize);
811   uint64_t Residual = Offset - RoundedSize;
812 
813   Register TargetReg = RISCV::X6;
814   // SUB TargetReg, SP, RoundedSize
815   RI->adjustReg(MBB, MBBI, DL, TargetReg, SPReg,
816                 StackOffset::getFixed(-RoundedSize), Flag, getStackAlign());
817 
818   if (EmitCFI) {
819     // Set the CFA register to TargetReg.
820     CFIBuilder.buildDefCFA(TargetReg, RoundedSize);
821   }
822 
823   // It will be expanded to a probe loop in `inlineStackProbe`.
824   BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC)).addReg(TargetReg);
825 
826   if (EmitCFI) {
827     // Set the CFA register back to SP.
828     CFIBuilder.buildDefCFARegister(SPReg);
829   }
830 
831   if (Residual) {
832     RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Residual),
833                   Flag, getStackAlign());
834     if (DynAllocation) {
835       // s[d|w] zero, 0(sp)
836       BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
837           .addReg(RISCV::X0)
838           .addReg(SPReg)
839           .addImm(0)
840           .setMIFlags(Flag);
841     }
842   }
843 
844   if (EmitCFI)
845     CFIBuilder.buildDefCFAOffset(Offset);
846 }
847 
isPush(unsigned Opcode)848 static bool isPush(unsigned Opcode) {
849   switch (Opcode) {
850   case RISCV::CM_PUSH:
851   case RISCV::QC_CM_PUSH:
852   case RISCV::QC_CM_PUSHFP:
853     return true;
854   default:
855     return false;
856   }
857 }
858 
isPop(unsigned Opcode)859 static bool isPop(unsigned Opcode) {
860   // There are other pops but these are the only ones introduced during this
861   // pass.
862   switch (Opcode) {
863   case RISCV::CM_POP:
864   case RISCV::QC_CM_POP:
865     return true;
866   default:
867     return false;
868   }
869 }
870 
getPushOpcode(RISCVMachineFunctionInfo::PushPopKind Kind,bool UpdateFP)871 static unsigned getPushOpcode(RISCVMachineFunctionInfo::PushPopKind Kind,
872                               bool UpdateFP) {
873   switch (Kind) {
874   case RISCVMachineFunctionInfo::PushPopKind::StdExtZcmp:
875     return RISCV::CM_PUSH;
876   case RISCVMachineFunctionInfo::PushPopKind::VendorXqccmp:
877     return UpdateFP ? RISCV::QC_CM_PUSHFP : RISCV::QC_CM_PUSH;
878   default:
879     llvm_unreachable("Unhandled PushPopKind");
880   }
881 }
882 
getPopOpcode(RISCVMachineFunctionInfo::PushPopKind Kind)883 static unsigned getPopOpcode(RISCVMachineFunctionInfo::PushPopKind Kind) {
884   // There are other pops but they are introduced later by the Push/Pop
885   // Optimizer.
886   switch (Kind) {
887   case RISCVMachineFunctionInfo::PushPopKind::StdExtZcmp:
888     return RISCV::CM_POP;
889   case RISCVMachineFunctionInfo::PushPopKind::VendorXqccmp:
890     return RISCV::QC_CM_POP;
891   default:
892     llvm_unreachable("Unhandled PushPopKind");
893   }
894 }
895 
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const896 void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
897                                       MachineBasicBlock &MBB) const {
898   MachineFrameInfo &MFI = MF.getFrameInfo();
899   auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
900   const RISCVRegisterInfo *RI = STI.getRegisterInfo();
901   MachineBasicBlock::iterator MBBI = MBB.begin();
902 
903   Register BPReg = RISCVABI::getBPReg();
904 
905   // Debug location must be unknown since the first debug location is used
906   // to determine the end of the prologue.
907   DebugLoc DL;
908 
909   // All calls are tail calls in GHC calling conv, and functions have no
910   // prologue/epilogue.
911   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
912     return;
913 
914   // SiFive CLIC needs to swap `sp` into `sf.mscratchcsw`
915   emitSiFiveCLICStackSwap(MF, MBB, MBBI, DL);
916 
917   // Emit prologue for shadow call stack.
918   emitSCSPrologue(MF, MBB, MBBI, DL);
919 
920   // We keep track of the first instruction because it might be a
921   // `(QC.)CM.PUSH(FP)`, and we may need to adjust the immediate rather than
922   // inserting an `addi sp, sp, -N*16`
923   auto PossiblePush = MBBI;
924 
925   // Skip past all callee-saved register spill instructions.
926   while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
927     ++MBBI;
928 
929   // Determine the correct frame layout
930   determineFrameLayout(MF);
931 
932   const auto &CSI = MFI.getCalleeSavedInfo();
933 
934   // Skip to before the spills of scalar callee-saved registers
935   // FIXME: assumes exactly one instruction is used to restore each
936   // callee-saved register.
937   MBBI = std::prev(MBBI, getRVVCalleeSavedInfo(MF, CSI).size() +
938                              getUnmanagedCSI(MF, CSI).size());
939   CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
940 
941   // If libcalls are used to spill and restore callee-saved registers, the frame
942   // has two sections; the opaque section managed by the libcalls, and the
943   // section managed by MachineFrameInfo which can also hold callee saved
944   // registers in fixed stack slots, both of which have negative frame indices.
945   // This gets even more complicated when incoming arguments are passed via the
946   // stack, as these too have negative frame indices. An example is detailed
947   // below:
948   //
949   //  | incoming arg | <- FI[-3]
950   //  | libcallspill |
951   //  | calleespill  | <- FI[-2]
952   //  | calleespill  | <- FI[-1]
953   //  | this_frame   | <- FI[0]
954   //
955   // For negative frame indices, the offset from the frame pointer will differ
956   // depending on which of these groups the frame index applies to.
957   // The following calculates the correct offset knowing the number of callee
958   // saved registers spilt by the two methods.
959   if (int LibCallRegs = getLibCallID(MF, MFI.getCalleeSavedInfo()) + 1) {
960     // Calculate the size of the frame managed by the libcall. The stack
961     // alignment of these libcalls should be the same as how we set it in
962     // getABIStackAlignment.
963     unsigned LibCallFrameSize =
964         alignTo((STI.getXLen() / 8) * LibCallRegs, getStackAlign());
965     RVFI->setLibCallStackSize(LibCallFrameSize);
966 
967     CFIBuilder.buildDefCFAOffset(LibCallFrameSize);
968     for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
969       CFIBuilder.buildOffset(CS.getReg(),
970                              MFI.getObjectOffset(CS.getFrameIdx()));
971   }
972 
973   // FIXME (note copied from Lanai): This appears to be overallocating.  Needs
974   // investigation. Get the number of bytes to allocate from the FrameInfo.
975   uint64_t RealStackSize = getStackSizeWithRVVPadding(MF);
976   uint64_t StackSize = RealStackSize - RVFI->getReservedSpillsSize();
977   uint64_t RVVStackSize = RVFI->getRVVStackSize();
978 
979   // Early exit if there is no need to allocate on the stack
980   if (RealStackSize == 0 && !MFI.adjustsStack() && RVVStackSize == 0)
981     return;
982 
983   // If the stack pointer has been marked as reserved, then produce an error if
984   // the frame requires stack allocation
985   if (STI.isRegisterReservedByUser(SPReg))
986     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
987         MF.getFunction(), "Stack pointer required, but has been reserved."});
988 
989   uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
990   // Split the SP adjustment to reduce the offsets of callee saved spill.
991   if (FirstSPAdjustAmount) {
992     StackSize = FirstSPAdjustAmount;
993     RealStackSize = FirstSPAdjustAmount;
994   }
995 
996   if (RVFI->useQCIInterrupt(MF)) {
997     // The function starts with `QC.C.MIENTER(.NEST)`, so the `(QC.)CM.PUSH(FP)`
998     // could only be the next instruction.
999     ++PossiblePush;
1000 
1001     // Insert the CFI metadata before where we think the `(QC.)CM.PUSH(FP)`
1002     // could be. The PUSH will also get its own CFI metadata for its own
1003     // modifications, which should come after the PUSH.
1004     CFIInstBuilder PushCFIBuilder(MBB, PossiblePush, MachineInstr::FrameSetup);
1005     PushCFIBuilder.buildDefCFAOffset(QCIInterruptPushAmount);
1006     for (const CalleeSavedInfo &CS : getQCISavedInfo(MF, CSI))
1007       PushCFIBuilder.buildOffset(CS.getReg(),
1008                                  MFI.getObjectOffset(CS.getFrameIdx()));
1009   }
1010 
1011   if (RVFI->isPushable(MF) && PossiblePush != MBB.end() &&
1012       isPush(PossiblePush->getOpcode())) {
1013     // Use available stack adjustment in push instruction to allocate additional
1014     // stack space. Align the stack size down to a multiple of 16. This is
1015     // needed for RVE.
1016     // FIXME: Can we increase the stack size to a multiple of 16 instead?
1017     uint64_t StackAdj =
1018         std::min(alignDown(StackSize, 16), static_cast<uint64_t>(48));
1019     PossiblePush->getOperand(1).setImm(StackAdj);
1020     StackSize -= StackAdj;
1021 
1022     CFIBuilder.buildDefCFAOffset(RealStackSize - StackSize);
1023     for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
1024       CFIBuilder.buildOffset(CS.getReg(),
1025                              MFI.getObjectOffset(CS.getFrameIdx()));
1026   }
1027 
1028   // Allocate space on the stack if necessary.
1029   auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
1030   const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
1031   bool NeedProbe = TLI->hasInlineStackProbe(MF);
1032   uint64_t ProbeSize = TLI->getStackProbeSize(MF, getStackAlign());
1033   bool DynAllocation =
1034       MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
1035   if (StackSize != 0)
1036     allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, /*EmitCFI=*/true,
1037                   NeedProbe, ProbeSize, DynAllocation,
1038                   MachineInstr::FrameSetup);
1039 
1040   // Save SiFive CLIC CSRs into Stack
1041   emitSiFiveCLICPreemptibleSaves(MF, MBB, MBBI, DL);
1042 
1043   // The frame pointer is callee-saved, and code has been generated for us to
1044   // save it to the stack. We need to skip over the storing of callee-saved
1045   // registers as the frame pointer must be modified after it has been saved
1046   // to the stack, not before.
1047   // FIXME: assumes exactly one instruction is used to save each callee-saved
1048   // register.
1049   std::advance(MBBI, getUnmanagedCSI(MF, CSI).size());
1050   CFIBuilder.setInsertPoint(MBBI);
1051 
1052   // Iterate over list of callee-saved registers and emit .cfi_offset
1053   // directives.
1054   for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
1055     CFIBuilder.buildOffset(CS.getReg(), MFI.getObjectOffset(CS.getFrameIdx()));
1056 
1057   // Generate new FP.
1058   if (hasFP(MF)) {
1059     if (STI.isRegisterReservedByUser(FPReg))
1060       MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
1061           MF.getFunction(), "Frame pointer required, but has been reserved."});
1062     // The frame pointer does need to be reserved from register allocation.
1063     assert(MF.getRegInfo().isReserved(FPReg) && "FP not reserved");
1064 
1065     // Some stack management variants automatically keep FP updated, so we don't
1066     // need an instruction to do so.
1067     if (!RVFI->hasImplicitFPUpdates(MF)) {
1068       RI->adjustReg(
1069           MBB, MBBI, DL, FPReg, SPReg,
1070           StackOffset::getFixed(RealStackSize - RVFI->getVarArgsSaveSize()),
1071           MachineInstr::FrameSetup, getStackAlign());
1072     }
1073 
1074     CFIBuilder.buildDefCFA(FPReg, RVFI->getVarArgsSaveSize());
1075   }
1076 
1077   uint64_t SecondSPAdjustAmount = 0;
1078   // Emit the second SP adjustment after saving callee saved registers.
1079   if (FirstSPAdjustAmount) {
1080     SecondSPAdjustAmount = getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
1081     assert(SecondSPAdjustAmount > 0 &&
1082            "SecondSPAdjustAmount should be greater than zero");
1083 
1084     allocateStack(MBB, MBBI, MF, SecondSPAdjustAmount,
1085                   getStackSizeWithRVVPadding(MF), !hasFP(MF), NeedProbe,
1086                   ProbeSize, DynAllocation, MachineInstr::FrameSetup);
1087   }
1088 
1089   if (RVVStackSize) {
1090     if (NeedProbe) {
1091       allocateAndProbeStackForRVV(MF, MBB, MBBI, DL, RVVStackSize,
1092                                   MachineInstr::FrameSetup, !hasFP(MF),
1093                                   DynAllocation);
1094     } else {
1095       // We must keep the stack pointer aligned through any intermediate
1096       // updates.
1097       RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
1098                     StackOffset::getScalable(-RVVStackSize),
1099                     MachineInstr::FrameSetup, getStackAlign());
1100     }
1101 
1102     if (!hasFP(MF)) {
1103       // Emit .cfi_def_cfa_expression "sp + StackSize + RVVStackSize * vlenb".
1104       CFIBuilder.insertCFIInst(createDefCFAExpression(
1105           *RI, SPReg, getStackSizeWithRVVPadding(MF), RVVStackSize / 8));
1106     }
1107 
1108     std::advance(MBBI, getRVVCalleeSavedInfo(MF, CSI).size());
1109     emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF));
1110   }
1111 
1112   if (hasFP(MF)) {
1113     // Realign Stack
1114     const RISCVRegisterInfo *RI = STI.getRegisterInfo();
1115     if (RI->hasStackRealignment(MF)) {
1116       Align MaxAlignment = MFI.getMaxAlign();
1117 
1118       const RISCVInstrInfo *TII = STI.getInstrInfo();
1119       if (isInt<12>(-(int)MaxAlignment.value())) {
1120         BuildMI(MBB, MBBI, DL, TII->get(RISCV::ANDI), SPReg)
1121             .addReg(SPReg)
1122             .addImm(-(int)MaxAlignment.value())
1123             .setMIFlag(MachineInstr::FrameSetup);
1124       } else {
1125         unsigned ShiftAmount = Log2(MaxAlignment);
1126         Register VR =
1127             MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
1128         BuildMI(MBB, MBBI, DL, TII->get(RISCV::SRLI), VR)
1129             .addReg(SPReg)
1130             .addImm(ShiftAmount)
1131             .setMIFlag(MachineInstr::FrameSetup);
1132         BuildMI(MBB, MBBI, DL, TII->get(RISCV::SLLI), SPReg)
1133             .addReg(VR)
1134             .addImm(ShiftAmount)
1135             .setMIFlag(MachineInstr::FrameSetup);
1136       }
1137       if (NeedProbe && RVVStackSize == 0) {
1138         // Do a probe if the align + size allocated just passed the probe size
1139         // and was not yet probed.
1140         if (SecondSPAdjustAmount < ProbeSize &&
1141             SecondSPAdjustAmount + MaxAlignment.value() >= ProbeSize) {
1142           bool IsRV64 = STI.is64Bit();
1143           BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
1144               .addReg(RISCV::X0)
1145               .addReg(SPReg)
1146               .addImm(0)
1147               .setMIFlags(MachineInstr::FrameSetup);
1148         }
1149       }
1150       // FP will be used to restore the frame in the epilogue, so we need
1151       // another base register BP to record SP after re-alignment. SP will
1152       // track the current stack after allocating variable sized objects.
1153       if (hasBP(MF)) {
1154         // move BP, SP
1155         BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), BPReg)
1156             .addReg(SPReg)
1157             .addImm(0)
1158             .setMIFlag(MachineInstr::FrameSetup);
1159       }
1160     }
1161   }
1162 }
1163 
deallocateStack(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,uint64_t & StackSize,int64_t CFAOffset) const1164 void RISCVFrameLowering::deallocateStack(MachineFunction &MF,
1165                                          MachineBasicBlock &MBB,
1166                                          MachineBasicBlock::iterator MBBI,
1167                                          const DebugLoc &DL,
1168                                          uint64_t &StackSize,
1169                                          int64_t CFAOffset) const {
1170   const RISCVRegisterInfo *RI = STI.getRegisterInfo();
1171 
1172   RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize),
1173                 MachineInstr::FrameDestroy, getStackAlign());
1174   StackSize = 0;
1175 
1176   CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
1177       .buildDefCFAOffset(CFAOffset);
1178 }
1179 
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const1180 void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
1181                                       MachineBasicBlock &MBB) const {
1182   const RISCVRegisterInfo *RI = STI.getRegisterInfo();
1183   MachineFrameInfo &MFI = MF.getFrameInfo();
1184   auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1185 
1186   // All calls are tail calls in GHC calling conv, and functions have no
1187   // prologue/epilogue.
1188   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1189     return;
1190 
1191   // Get the insert location for the epilogue. If there were no terminators in
1192   // the block, get the last instruction.
1193   MachineBasicBlock::iterator MBBI = MBB.end();
1194   DebugLoc DL;
1195   if (!MBB.empty()) {
1196     MBBI = MBB.getLastNonDebugInstr();
1197     if (MBBI != MBB.end())
1198       DL = MBBI->getDebugLoc();
1199 
1200     MBBI = MBB.getFirstTerminator();
1201 
1202     // Skip to before the restores of all callee-saved registers.
1203     while (MBBI != MBB.begin() &&
1204            std::prev(MBBI)->getFlag(MachineInstr::FrameDestroy))
1205       --MBBI;
1206   }
1207 
1208   const auto &CSI = MFI.getCalleeSavedInfo();
1209 
1210   // Skip to before the restores of scalar callee-saved registers
1211   // FIXME: assumes exactly one instruction is used to restore each
1212   // callee-saved register.
1213   auto FirstScalarCSRRestoreInsn =
1214       std::next(MBBI, getRVVCalleeSavedInfo(MF, CSI).size());
1215   CFIInstBuilder CFIBuilder(MBB, FirstScalarCSRRestoreInsn,
1216                             MachineInstr::FrameDestroy);
1217 
1218   uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
1219   uint64_t RealStackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount
1220                                                : getStackSizeWithRVVPadding(MF);
1221   uint64_t StackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount
1222                                            : getStackSizeWithRVVPadding(MF) -
1223                                                  RVFI->getReservedSpillsSize();
1224   uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize();
1225   uint64_t RVVStackSize = RVFI->getRVVStackSize();
1226 
1227   bool RestoreSPFromFP = RI->hasStackRealignment(MF) ||
1228                          MFI.hasVarSizedObjects() || !hasReservedCallFrame(MF);
1229   if (RVVStackSize) {
1230     // If RestoreSPFromFP the stack pointer will be restored using the frame
1231     // pointer value.
1232     if (!RestoreSPFromFP)
1233       RI->adjustReg(MBB, FirstScalarCSRRestoreInsn, DL, SPReg, SPReg,
1234                     StackOffset::getScalable(RVVStackSize),
1235                     MachineInstr::FrameDestroy, getStackAlign());
1236 
1237     if (!hasFP(MF))
1238       CFIBuilder.buildDefCFA(SPReg, RealStackSize);
1239 
1240     emitCalleeSavedRVVEpilogCFI(MBB, FirstScalarCSRRestoreInsn);
1241   }
1242 
1243   if (FirstSPAdjustAmount) {
1244     uint64_t SecondSPAdjustAmount =
1245         getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
1246     assert(SecondSPAdjustAmount > 0 &&
1247            "SecondSPAdjustAmount should be greater than zero");
1248 
1249     // If RestoreSPFromFP the stack pointer will be restored using the frame
1250     // pointer value.
1251     if (!RestoreSPFromFP)
1252       RI->adjustReg(MBB, FirstScalarCSRRestoreInsn, DL, SPReg, SPReg,
1253                     StackOffset::getFixed(SecondSPAdjustAmount),
1254                     MachineInstr::FrameDestroy, getStackAlign());
1255 
1256     if (!hasFP(MF))
1257       CFIBuilder.buildDefCFAOffset(FirstSPAdjustAmount);
1258   }
1259 
1260   // Restore the stack pointer using the value of the frame pointer. Only
1261   // necessary if the stack pointer was modified, meaning the stack size is
1262   // unknown.
1263   //
1264   // In order to make sure the stack point is right through the EH region,
1265   // we also need to restore stack pointer from the frame pointer if we
1266   // don't preserve stack space within prologue/epilogue for outgoing variables,
1267   // normally it's just checking the variable sized object is present or not
1268   // is enough, but we also don't preserve that at prologue/epilogue when
1269   // have vector objects in stack.
1270   if (RestoreSPFromFP) {
1271     assert(hasFP(MF) && "frame pointer should not have been eliminated");
1272     RI->adjustReg(MBB, FirstScalarCSRRestoreInsn, DL, SPReg, FPReg,
1273                   StackOffset::getFixed(-FPOffset), MachineInstr::FrameDestroy,
1274                   getStackAlign());
1275   }
1276 
1277   if (hasFP(MF))
1278     CFIBuilder.buildDefCFA(SPReg, RealStackSize);
1279 
1280   // Skip to after the restores of scalar callee-saved registers
1281   // FIXME: assumes exactly one instruction is used to restore each
1282   // callee-saved register.
1283   MBBI = std::next(FirstScalarCSRRestoreInsn, getUnmanagedCSI(MF, CSI).size());
1284   CFIBuilder.setInsertPoint(MBBI);
1285 
1286   if (getLibCallID(MF, CSI) != -1) {
1287     // tail __riscv_restore_[0-12] instruction is considered as a terminator,
1288     // therefore it is unnecessary to place any CFI instructions after it. Just
1289     // deallocate stack if needed and return.
1290     if (StackSize != 0)
1291       deallocateStack(MF, MBB, MBBI, DL, StackSize,
1292                       RVFI->getLibCallStackSize());
1293 
1294     // Emit epilogue for shadow call stack.
1295     emitSCSEpilogue(MF, MBB, MBBI, DL);
1296     return;
1297   }
1298 
1299   // Recover callee-saved registers.
1300   for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
1301     CFIBuilder.buildRestore(CS.getReg());
1302 
1303   if (RVFI->isPushable(MF) && MBBI != MBB.end() && isPop(MBBI->getOpcode())) {
1304     // Use available stack adjustment in pop instruction to deallocate stack
1305     // space. Align the stack size down to a multiple of 16. This is needed for
1306     // RVE.
1307     // FIXME: Can we increase the stack size to a multiple of 16 instead?
1308     uint64_t StackAdj =
1309         std::min(alignDown(StackSize, 16), static_cast<uint64_t>(48));
1310     MBBI->getOperand(1).setImm(StackAdj);
1311     StackSize -= StackAdj;
1312 
1313     if (StackSize != 0)
1314       deallocateStack(MF, MBB, MBBI, DL, StackSize,
1315                       /*stack_adj of cm.pop instr*/ RealStackSize - StackSize);
1316 
1317     auto NextI = next_nodbg(MBBI, MBB.end());
1318     if (NextI == MBB.end() || NextI->getOpcode() != RISCV::PseudoRET) {
1319       ++MBBI;
1320       CFIBuilder.setInsertPoint(MBBI);
1321 
1322       for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
1323         CFIBuilder.buildRestore(CS.getReg());
1324 
1325       // Update CFA Offset. If this is a QCI interrupt function, there will be a
1326       // leftover offset which is deallocated by `QC.C.MILEAVERET`, otherwise
1327       // getQCIInterruptStackSize() will be 0.
1328       CFIBuilder.buildDefCFAOffset(RVFI->getQCIInterruptStackSize());
1329     }
1330   }
1331 
1332   emitSiFiveCLICPreemptibleRestores(MF, MBB, MBBI, DL);
1333 
1334   // Deallocate stack if StackSize isn't a zero yet. If this is a QCI interrupt
1335   // function, there will be a leftover offset which is deallocated by
1336   // `QC.C.MILEAVERET`, otherwise getQCIInterruptStackSize() will be 0.
1337   if (StackSize != 0)
1338     deallocateStack(MF, MBB, MBBI, DL, StackSize,
1339                     RVFI->getQCIInterruptStackSize());
1340 
1341   // Emit epilogue for shadow call stack.
1342   emitSCSEpilogue(MF, MBB, MBBI, DL);
1343 
1344   // SiFive CLIC needs to swap `sf.mscratchcsw` into `sp`
1345   emitSiFiveCLICStackSwap(MF, MBB, MBBI, DL);
1346 }
1347 
1348 StackOffset
getFrameIndexReference(const MachineFunction & MF,int FI,Register & FrameReg) const1349 RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
1350                                            Register &FrameReg) const {
1351   const MachineFrameInfo &MFI = MF.getFrameInfo();
1352   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
1353   const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1354 
1355   // Callee-saved registers should be referenced relative to the stack
1356   // pointer (positive offset), otherwise use the frame pointer (negative
1357   // offset).
1358   const auto &CSI = getUnmanagedCSI(MF, MFI.getCalleeSavedInfo());
1359   int MinCSFI = 0;
1360   int MaxCSFI = -1;
1361   StackOffset Offset;
1362   auto StackID = MFI.getStackID(FI);
1363 
1364   assert((StackID == TargetStackID::Default ||
1365           StackID == TargetStackID::ScalableVector) &&
1366          "Unexpected stack ID for the frame object.");
1367   if (StackID == TargetStackID::Default) {
1368     assert(getOffsetOfLocalArea() == 0 && "LocalAreaOffset is not 0!");
1369     Offset = StackOffset::getFixed(MFI.getObjectOffset(FI) +
1370                                    MFI.getOffsetAdjustment());
1371   } else if (StackID == TargetStackID::ScalableVector) {
1372     Offset = StackOffset::getScalable(MFI.getObjectOffset(FI));
1373   }
1374 
1375   uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
1376 
1377   if (CSI.size()) {
1378     MinCSFI = CSI[0].getFrameIdx();
1379     MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
1380   }
1381 
1382   if (FI >= MinCSFI && FI <= MaxCSFI) {
1383     FrameReg = SPReg;
1384 
1385     if (FirstSPAdjustAmount)
1386       Offset += StackOffset::getFixed(FirstSPAdjustAmount);
1387     else
1388       Offset += StackOffset::getFixed(getStackSizeWithRVVPadding(MF));
1389     return Offset;
1390   }
1391 
1392   if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) {
1393     // If the stack was realigned, the frame pointer is set in order to allow
1394     // SP to be restored, so we need another base register to record the stack
1395     // after realignment.
1396     // |--------------------------| -- <-- FP
1397     // | callee-allocated save    | | <----|
1398     // | area for register varargs| |      |
1399     // |--------------------------| |      |
1400     // | callee-saved registers   | |      |
1401     // |--------------------------| --     |
1402     // | realignment (the size of | |      |
1403     // | this area is not counted | |      |
1404     // | in MFI.getStackSize())   | |      |
1405     // |--------------------------| --     |-- MFI.getStackSize()
1406     // | RVV alignment padding    | |      |
1407     // | (not counted in          | |      |
1408     // | MFI.getStackSize() but   | |      |
1409     // | counted in               | |      |
1410     // | RVFI.getRVVStackSize())  | |      |
1411     // |--------------------------| --     |
1412     // | RVV objects              | |      |
1413     // | (not counted in          | |      |
1414     // | MFI.getStackSize())      | |      |
1415     // |--------------------------| --     |
1416     // | padding before RVV       | |      |
1417     // | (not counted in          | |      |
1418     // | MFI.getStackSize() or in | |      |
1419     // | RVFI.getRVVStackSize())  | |      |
1420     // |--------------------------| --     |
1421     // | scalar local variables   | | <----'
1422     // |--------------------------| -- <-- BP (if var sized objects present)
1423     // | VarSize objects          | |
1424     // |--------------------------| -- <-- SP
1425     if (hasBP(MF)) {
1426       FrameReg = RISCVABI::getBPReg();
1427     } else {
1428       // VarSize objects must be empty in this case!
1429       assert(!MFI.hasVarSizedObjects());
1430       FrameReg = SPReg;
1431     }
1432   } else {
1433     FrameReg = RI->getFrameRegister(MF);
1434   }
1435 
1436   if (FrameReg == FPReg) {
1437     Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize());
1438     // When using FP to access scalable vector objects, we need to minus
1439     // the frame size.
1440     //
1441     // |--------------------------| -- <-- FP
1442     // | callee-allocated save    | |
1443     // | area for register varargs| |
1444     // |--------------------------| |
1445     // | callee-saved registers   | |
1446     // |--------------------------| | MFI.getStackSize()
1447     // | scalar local variables   | |
1448     // |--------------------------| -- (Offset of RVV objects is from here.)
1449     // | RVV objects              |
1450     // |--------------------------|
1451     // | VarSize objects          |
1452     // |--------------------------| <-- SP
1453     if (StackID == TargetStackID::ScalableVector) {
1454       assert(!RI->hasStackRealignment(MF) &&
1455              "Can't index across variable sized realign");
1456       // We don't expect any extra RVV alignment padding, as the stack size
1457       // and RVV object sections should be correct aligned in their own
1458       // right.
1459       assert(MFI.getStackSize() == getStackSizeWithRVVPadding(MF) &&
1460              "Inconsistent stack layout");
1461       Offset -= StackOffset::getFixed(MFI.getStackSize());
1462     }
1463     return Offset;
1464   }
1465 
1466   // This case handles indexing off both SP and BP.
1467   // If indexing off SP, there must not be any var sized objects
1468   assert(FrameReg == RISCVABI::getBPReg() || !MFI.hasVarSizedObjects());
1469 
1470   // When using SP to access frame objects, we need to add RVV stack size.
1471   //
1472   // |--------------------------| -- <-- FP
1473   // | callee-allocated save    | | <----|
1474   // | area for register varargs| |      |
1475   // |--------------------------| |      |
1476   // | callee-saved registers   | |      |
1477   // |--------------------------| --     |
1478   // | RVV alignment padding    | |      |
1479   // | (not counted in          | |      |
1480   // | MFI.getStackSize() but   | |      |
1481   // | counted in               | |      |
1482   // | RVFI.getRVVStackSize())  | |      |
1483   // |--------------------------| --     |
1484   // | RVV objects              | |      |-- MFI.getStackSize()
1485   // | (not counted in          | |      |
1486   // | MFI.getStackSize())      | |      |
1487   // |--------------------------| --     |
1488   // | padding before RVV       | |      |
1489   // | (not counted in          | |      |
1490   // | MFI.getStackSize())      | |      |
1491   // |--------------------------| --     |
1492   // | scalar local variables   | | <----'
1493   // |--------------------------| -- <-- BP (if var sized objects present)
1494   // | VarSize objects          | |
1495   // |--------------------------| -- <-- SP
1496   //
1497   // The total amount of padding surrounding RVV objects is described by
1498   // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV
1499   // objects to the required alignment.
1500   if (MFI.getStackID(FI) == TargetStackID::Default) {
1501     if (MFI.isFixedObjectIndex(FI)) {
1502       assert(!RI->hasStackRealignment(MF) &&
1503              "Can't index across variable sized realign");
1504       Offset += StackOffset::get(getStackSizeWithRVVPadding(MF),
1505                                  RVFI->getRVVStackSize());
1506     } else {
1507       Offset += StackOffset::getFixed(MFI.getStackSize());
1508     }
1509   } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
1510     // Ensure the base of the RVV stack is correctly aligned: add on the
1511     // alignment padding.
1512     int ScalarLocalVarSize = MFI.getStackSize() -
1513                              RVFI->getCalleeSavedStackSize() -
1514                              RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding();
1515     Offset += StackOffset::get(ScalarLocalVarSize, RVFI->getRVVStackSize());
1516   }
1517   return Offset;
1518 }
1519 
determineCalleeSaves(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const1520 void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
1521                                               BitVector &SavedRegs,
1522                                               RegScavenger *RS) const {
1523   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1524   // Unconditionally spill RA and FP only if the function uses a frame
1525   // pointer.
1526   if (hasFP(MF)) {
1527     SavedRegs.set(RAReg);
1528     SavedRegs.set(FPReg);
1529   }
1530   // Mark BP as used if function has dedicated base pointer.
1531   if (hasBP(MF))
1532     SavedRegs.set(RISCVABI::getBPReg());
1533 
1534   // When using cm.push/pop we must save X27 if we save X26.
1535   auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1536   if (RVFI->isPushable(MF) && SavedRegs.test(RISCV::X26))
1537     SavedRegs.set(RISCV::X27);
1538 
1539   // SiFive Preemptible Interrupt Handlers need additional frame entries
1540   createSiFivePreemptibleInterruptFrameEntries(MF, *RVFI);
1541 }
1542 
1543 std::pair<int64_t, Align>
assignRVVStackObjectOffsets(MachineFunction & MF) const1544 RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const {
1545   MachineFrameInfo &MFI = MF.getFrameInfo();
1546   // Create a buffer of RVV objects to allocate.
1547   SmallVector<int, 8> ObjectsToAllocate;
1548   auto pushRVVObjects = [&](int FIBegin, int FIEnd) {
1549     for (int I = FIBegin, E = FIEnd; I != E; ++I) {
1550       unsigned StackID = MFI.getStackID(I);
1551       if (StackID != TargetStackID::ScalableVector)
1552         continue;
1553       if (MFI.isDeadObjectIndex(I))
1554         continue;
1555 
1556       ObjectsToAllocate.push_back(I);
1557     }
1558   };
1559   // First push RVV Callee Saved object, then push RVV stack object
1560   std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo();
1561   const auto &RVVCSI = getRVVCalleeSavedInfo(MF, CSI);
1562   if (!RVVCSI.empty())
1563     pushRVVObjects(RVVCSI[0].getFrameIdx(),
1564                    RVVCSI[RVVCSI.size() - 1].getFrameIdx() + 1);
1565   pushRVVObjects(0, MFI.getObjectIndexEnd() - RVVCSI.size());
1566 
1567   // The minimum alignment is 16 bytes.
1568   Align RVVStackAlign(16);
1569   const auto &ST = MF.getSubtarget<RISCVSubtarget>();
1570 
1571   if (!ST.hasVInstructions()) {
1572     assert(ObjectsToAllocate.empty() &&
1573            "Can't allocate scalable-vector objects without V instructions");
1574     return std::make_pair(0, RVVStackAlign);
1575   }
1576 
1577   // Allocate all RVV locals and spills
1578   int64_t Offset = 0;
1579   for (int FI : ObjectsToAllocate) {
1580     // ObjectSize in bytes.
1581     int64_t ObjectSize = MFI.getObjectSize(FI);
1582     auto ObjectAlign =
1583         std::max(Align(RISCV::RVVBytesPerBlock), MFI.getObjectAlign(FI));
1584     // If the data type is the fractional vector type, reserve one vector
1585     // register for it.
1586     if (ObjectSize < RISCV::RVVBytesPerBlock)
1587       ObjectSize = RISCV::RVVBytesPerBlock;
1588     Offset = alignTo(Offset + ObjectSize, ObjectAlign);
1589     MFI.setObjectOffset(FI, -Offset);
1590     // Update the maximum alignment of the RVV stack section
1591     RVVStackAlign = std::max(RVVStackAlign, ObjectAlign);
1592   }
1593 
1594   uint64_t StackSize = Offset;
1595 
1596   // Ensure the alignment of the RVV stack. Since we want the most-aligned
1597   // object right at the bottom (i.e., any padding at the top of the frame),
1598   // readjust all RVV objects down by the alignment padding.
1599   // Stack size and offsets are multiples of vscale, stack alignment is in
1600   // bytes, we can divide stack alignment by minimum vscale to get a maximum
1601   // stack alignment multiple of vscale.
1602   auto VScale =
1603       std::max<uint64_t>(ST.getRealMinVLen() / RISCV::RVVBitsPerBlock, 1);
1604   if (auto RVVStackAlignVScale = RVVStackAlign.value() / VScale) {
1605     if (auto AlignmentPadding =
1606             offsetToAlignment(StackSize, Align(RVVStackAlignVScale))) {
1607       StackSize += AlignmentPadding;
1608       for (int FI : ObjectsToAllocate)
1609         MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding);
1610     }
1611   }
1612 
1613   return std::make_pair(StackSize, RVVStackAlign);
1614 }
1615 
getScavSlotsNumForRVV(MachineFunction & MF)1616 static unsigned getScavSlotsNumForRVV(MachineFunction &MF) {
1617   // For RVV spill, scalable stack offsets computing requires up to two scratch
1618   // registers
1619   static constexpr unsigned ScavSlotsNumRVVSpillScalableObject = 2;
1620 
1621   // For RVV spill, non-scalable stack offsets computing requires up to one
1622   // scratch register.
1623   static constexpr unsigned ScavSlotsNumRVVSpillNonScalableObject = 1;
1624 
1625   // ADDI instruction's destination register can be used for computing
1626   // offsets. So Scalable stack offsets require up to one scratch register.
1627   static constexpr unsigned ScavSlotsADDIScalableObject = 1;
1628 
1629   static constexpr unsigned MaxScavSlotsNumKnown =
1630       std::max({ScavSlotsADDIScalableObject, ScavSlotsNumRVVSpillScalableObject,
1631                 ScavSlotsNumRVVSpillNonScalableObject});
1632 
1633   unsigned MaxScavSlotsNum = 0;
1634   if (!MF.getSubtarget<RISCVSubtarget>().hasVInstructions())
1635     return false;
1636   for (const MachineBasicBlock &MBB : MF)
1637     for (const MachineInstr &MI : MBB) {
1638       bool IsRVVSpill = RISCV::isRVVSpill(MI);
1639       for (auto &MO : MI.operands()) {
1640         if (!MO.isFI())
1641           continue;
1642         bool IsScalableVectorID = MF.getFrameInfo().getStackID(MO.getIndex()) ==
1643                                   TargetStackID::ScalableVector;
1644         if (IsRVVSpill) {
1645           MaxScavSlotsNum = std::max(
1646               MaxScavSlotsNum, IsScalableVectorID
1647                                    ? ScavSlotsNumRVVSpillScalableObject
1648                                    : ScavSlotsNumRVVSpillNonScalableObject);
1649         } else if (MI.getOpcode() == RISCV::ADDI && IsScalableVectorID) {
1650           MaxScavSlotsNum =
1651               std::max(MaxScavSlotsNum, ScavSlotsADDIScalableObject);
1652         }
1653       }
1654       if (MaxScavSlotsNum == MaxScavSlotsNumKnown)
1655         return MaxScavSlotsNumKnown;
1656     }
1657   return MaxScavSlotsNum;
1658 }
1659 
hasRVVFrameObject(const MachineFunction & MF)1660 static bool hasRVVFrameObject(const MachineFunction &MF) {
1661   // Originally, the function will scan all the stack objects to check whether
1662   // if there is any scalable vector object on the stack or not. However, it
1663   // causes errors in the register allocator. In issue 53016, it returns false
1664   // before RA because there is no RVV stack objects. After RA, it returns true
1665   // because there are spilling slots for RVV values during RA. It will not
1666   // reserve BP during register allocation and generate BP access in the PEI
1667   // pass due to the inconsistent behavior of the function.
1668   //
1669   // The function is changed to use hasVInstructions() as the return value. It
1670   // is not precise, but it can make the register allocation correct.
1671   //
1672   // FIXME: Find a better way to make the decision or revisit the solution in
1673   // D103622.
1674   //
1675   // Refer to https://github.com/llvm/llvm-project/issues/53016.
1676   return MF.getSubtarget<RISCVSubtarget>().hasVInstructions();
1677 }
1678 
estimateFunctionSizeInBytes(const MachineFunction & MF,const RISCVInstrInfo & TII)1679 static unsigned estimateFunctionSizeInBytes(const MachineFunction &MF,
1680                                             const RISCVInstrInfo &TII) {
1681   unsigned FnSize = 0;
1682   for (auto &MBB : MF) {
1683     for (auto &MI : MBB) {
1684       // Far branches over 20-bit offset will be relaxed in branch relaxation
1685       // pass. In the worst case, conditional branches will be relaxed into
1686       // the following instruction sequence. Unconditional branches are
1687       // relaxed in the same way, with the exception that there is no first
1688       // branch instruction.
1689       //
1690       //        foo
1691       //        bne     t5, t6, .rev_cond # `TII->getInstSizeInBytes(MI)` bytes
1692       //        sd      s11, 0(sp)        # 4 bytes, or 2 bytes with Zca
1693       //        jump    .restore, s11     # 8 bytes
1694       // .rev_cond
1695       //        bar
1696       //        j       .dest_bb          # 4 bytes, or 2 bytes with Zca
1697       // .restore:
1698       //        ld      s11, 0(sp)        # 4 bytes, or 2 bytes with Zca
1699       // .dest:
1700       //        baz
1701       if (MI.isConditionalBranch())
1702         FnSize += TII.getInstSizeInBytes(MI);
1703       if (MI.isConditionalBranch() || MI.isUnconditionalBranch()) {
1704         if (MF.getSubtarget<RISCVSubtarget>().hasStdExtZca())
1705           FnSize += 2 + 8 + 2 + 2;
1706         else
1707           FnSize += 4 + 8 + 4 + 4;
1708         continue;
1709       }
1710 
1711       FnSize += TII.getInstSizeInBytes(MI);
1712     }
1713   }
1714   return FnSize;
1715 }
1716 
processFunctionBeforeFrameFinalized(MachineFunction & MF,RegScavenger * RS) const1717 void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
1718     MachineFunction &MF, RegScavenger *RS) const {
1719   const RISCVRegisterInfo *RegInfo =
1720       MF.getSubtarget<RISCVSubtarget>().getRegisterInfo();
1721   const RISCVInstrInfo *TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
1722   MachineFrameInfo &MFI = MF.getFrameInfo();
1723   const TargetRegisterClass *RC = &RISCV::GPRRegClass;
1724   auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1725 
1726   int64_t RVVStackSize;
1727   Align RVVStackAlign;
1728   std::tie(RVVStackSize, RVVStackAlign) = assignRVVStackObjectOffsets(MF);
1729 
1730   RVFI->setRVVStackSize(RVVStackSize);
1731   RVFI->setRVVStackAlign(RVVStackAlign);
1732 
1733   if (hasRVVFrameObject(MF)) {
1734     // Ensure the entire stack is aligned to at least the RVV requirement: some
1735     // scalable-vector object alignments are not considered by the
1736     // target-independent code.
1737     MFI.ensureMaxAlignment(RVVStackAlign);
1738   }
1739 
1740   unsigned ScavSlotsNum = 0;
1741 
1742   // estimateStackSize has been observed to under-estimate the final stack
1743   // size, so give ourselves wiggle-room by checking for stack size
1744   // representable an 11-bit signed field rather than 12-bits.
1745   if (!isInt<11>(MFI.estimateStackSize(MF)))
1746     ScavSlotsNum = 1;
1747 
1748   // Far branches over 20-bit offset require a spill slot for scratch register.
1749   bool IsLargeFunction = !isInt<20>(estimateFunctionSizeInBytes(MF, *TII));
1750   if (IsLargeFunction)
1751     ScavSlotsNum = std::max(ScavSlotsNum, 1u);
1752 
1753   // RVV loads & stores have no capacity to hold the immediate address offsets
1754   // so we must always reserve an emergency spill slot if the MachineFunction
1755   // contains any RVV spills.
1756   ScavSlotsNum = std::max(ScavSlotsNum, getScavSlotsNumForRVV(MF));
1757 
1758   for (unsigned I = 0; I < ScavSlotsNum; I++) {
1759     int FI = MFI.CreateSpillStackObject(RegInfo->getSpillSize(*RC),
1760                                         RegInfo->getSpillAlign(*RC));
1761     RS->addScavengingFrameIndex(FI);
1762 
1763     if (IsLargeFunction && RVFI->getBranchRelaxationScratchFrameIndex() == -1)
1764       RVFI->setBranchRelaxationScratchFrameIndex(FI);
1765   }
1766 
1767   unsigned Size = RVFI->getReservedSpillsSize();
1768   for (const auto &Info : MFI.getCalleeSavedInfo()) {
1769     int FrameIdx = Info.getFrameIdx();
1770     if (FrameIdx < 0 || MFI.getStackID(FrameIdx) != TargetStackID::Default)
1771       continue;
1772 
1773     Size += MFI.getObjectSize(FrameIdx);
1774   }
1775   RVFI->setCalleeSavedStackSize(Size);
1776 }
1777 
1778 // Not preserve stack space within prologue for outgoing variables when the
1779 // function contains variable size objects or there are vector objects accessed
1780 // by the frame pointer.
1781 // Let eliminateCallFramePseudoInstr preserve stack space for it.
hasReservedCallFrame(const MachineFunction & MF) const1782 bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
1783   return !MF.getFrameInfo().hasVarSizedObjects() &&
1784          !(hasFP(MF) && hasRVVFrameObject(MF));
1785 }
1786 
1787 // Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions.
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MI) const1788 MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
1789     MachineFunction &MF, MachineBasicBlock &MBB,
1790     MachineBasicBlock::iterator MI) const {
1791   DebugLoc DL = MI->getDebugLoc();
1792 
1793   if (!hasReservedCallFrame(MF)) {
1794     // If space has not been reserved for a call frame, ADJCALLSTACKDOWN and
1795     // ADJCALLSTACKUP must be converted to instructions manipulating the stack
1796     // pointer. This is necessary when there is a variable length stack
1797     // allocation (e.g. alloca), which means it's not possible to allocate
1798     // space for outgoing arguments from within the function prologue.
1799     int64_t Amount = MI->getOperand(0).getImm();
1800 
1801     if (Amount != 0) {
1802       // Ensure the stack remains aligned after adjustment.
1803       Amount = alignSPAdjust(Amount);
1804 
1805       if (MI->getOpcode() == RISCV::ADJCALLSTACKDOWN)
1806         Amount = -Amount;
1807 
1808       const RISCVTargetLowering *TLI =
1809           MF.getSubtarget<RISCVSubtarget>().getTargetLowering();
1810       int64_t ProbeSize = TLI->getStackProbeSize(MF, getStackAlign());
1811       if (TLI->hasInlineStackProbe(MF) && -Amount >= ProbeSize) {
1812         // When stack probing is enabled, the decrement of SP may need to be
1813         // probed. We can handle both the decrement and the probing in
1814         // allocateStack.
1815         bool DynAllocation =
1816             MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
1817         allocateStack(MBB, MI, MF, -Amount, -Amount, !hasFP(MF),
1818                       /*NeedProbe=*/true, ProbeSize, DynAllocation,
1819                       MachineInstr::NoFlags);
1820       } else {
1821         const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
1822         RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
1823                      MachineInstr::NoFlags, getStackAlign());
1824       }
1825     }
1826   }
1827 
1828   return MBB.erase(MI);
1829 }
1830 
1831 // We would like to split the SP adjustment to reduce prologue/epilogue
1832 // as following instructions. In this way, the offset of the callee saved
1833 // register could fit in a single store. Supposed that the first sp adjust
1834 // amount is 2032.
1835 //   add     sp,sp,-2032
1836 //   sw      ra,2028(sp)
1837 //   sw      s0,2024(sp)
1838 //   sw      s1,2020(sp)
1839 //   sw      s3,2012(sp)
1840 //   sw      s4,2008(sp)
1841 //   add     sp,sp,-64
1842 uint64_t
getFirstSPAdjustAmount(const MachineFunction & MF) const1843 RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
1844   const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1845   const MachineFrameInfo &MFI = MF.getFrameInfo();
1846   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1847   uint64_t StackSize = getStackSizeWithRVVPadding(MF);
1848 
1849   // Disable SplitSPAdjust if save-restore libcall, push/pop or QCI interrupts
1850   // are used. The callee-saved registers will be pushed by the save-restore
1851   // libcalls, so we don't have to split the SP adjustment in this case.
1852   if (RVFI->getReservedSpillsSize())
1853     return 0;
1854 
1855   // Return the FirstSPAdjustAmount if the StackSize can not fit in a signed
1856   // 12-bit and there exists a callee-saved register needing to be pushed.
1857   if (!isInt<12>(StackSize) && (CSI.size() > 0)) {
1858     // FirstSPAdjustAmount is chosen at most as (2048 - StackAlign) because
1859     // 2048 will cause sp = sp + 2048 in the epilogue to be split into multiple
1860     // instructions. Offsets smaller than 2048 can fit in a single load/store
1861     // instruction, and we have to stick with the stack alignment. 2048 has
1862     // 16-byte alignment. The stack alignment for RV32 and RV64 is 16 and for
1863     // RV32E it is 4. So (2048 - StackAlign) will satisfy the stack alignment.
1864     const uint64_t StackAlign = getStackAlign().value();
1865 
1866     // Amount of (2048 - StackAlign) will prevent callee saved and restored
1867     // instructions be compressed, so try to adjust the amount to the largest
1868     // offset that stack compression instructions accept when target supports
1869     // compression instructions.
1870     if (STI.hasStdExtZca()) {
1871       // The compression extensions may support the following instructions:
1872       // riscv32: c.lwsp rd, offset[7:2] => 2^(6 + 2)
1873       //          c.swsp rs2, offset[7:2] => 2^(6 + 2)
1874       //          c.flwsp rd, offset[7:2] => 2^(6 + 2)
1875       //          c.fswsp rs2, offset[7:2] => 2^(6 + 2)
1876       // riscv64: c.ldsp rd, offset[8:3] => 2^(6 + 3)
1877       //          c.sdsp rs2, offset[8:3] => 2^(6 + 3)
1878       //          c.fldsp rd, offset[8:3] => 2^(6 + 3)
1879       //          c.fsdsp rs2, offset[8:3] => 2^(6 + 3)
1880       const uint64_t RVCompressLen = STI.getXLen() * 8;
1881       // Compared with amount (2048 - StackAlign), StackSize needs to
1882       // satisfy the following conditions to avoid using more instructions
1883       // to adjust the sp after adjusting the amount, such as
1884       // StackSize meets the condition (StackSize <= 2048 + RVCompressLen),
1885       // case1: Amount is 2048 - StackAlign: use addi + addi to adjust sp.
1886       // case2: Amount is RVCompressLen: use addi + addi to adjust sp.
1887       auto CanCompress = [&](uint64_t CompressLen) -> bool {
1888         if (StackSize <= 2047 + CompressLen ||
1889             (StackSize > 2048 * 2 - StackAlign &&
1890              StackSize <= 2047 * 2 + CompressLen) ||
1891             StackSize > 2048 * 3 - StackAlign)
1892           return true;
1893 
1894         return false;
1895       };
1896       // In the epilogue, addi sp, sp, 496 is used to recover the sp and it
1897       // can be compressed(C.ADDI16SP, offset can be [-512, 496]), but
1898       // addi sp, sp, 512 can not be compressed. So try to use 496 first.
1899       const uint64_t ADDI16SPCompressLen = 496;
1900       if (STI.is64Bit() && CanCompress(ADDI16SPCompressLen))
1901         return ADDI16SPCompressLen;
1902       if (CanCompress(RVCompressLen))
1903         return RVCompressLen;
1904     }
1905     return 2048 - StackAlign;
1906   }
1907   return 0;
1908 }
1909 
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI,unsigned & MinCSFrameIndex,unsigned & MaxCSFrameIndex) const1910 bool RISCVFrameLowering::assignCalleeSavedSpillSlots(
1911     MachineFunction &MF, const TargetRegisterInfo *TRI,
1912     std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
1913     unsigned &MaxCSFrameIndex) const {
1914   auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1915 
1916   // Preemptible Interrupts have two additional Callee-save Frame Indexes,
1917   // not tracked by `CSI`.
1918   if (RVFI->isSiFivePreemptibleInterrupt(MF)) {
1919     for (int I = 0; I < 2; ++I) {
1920       int FI = RVFI->getInterruptCSRFrameIndex(I);
1921       MinCSFrameIndex = std::min<unsigned>(MinCSFrameIndex, FI);
1922       MaxCSFrameIndex = std::max<unsigned>(MaxCSFrameIndex, FI);
1923     }
1924   }
1925 
1926   // Early exit if no callee saved registers are modified!
1927   if (CSI.empty())
1928     return true;
1929 
1930   if (RVFI->useQCIInterrupt(MF)) {
1931     RVFI->setQCIInterruptStackSize(QCIInterruptPushAmount);
1932   }
1933 
1934   if (RVFI->isPushable(MF)) {
1935     // Determine how many GPRs we need to push and save it to RVFI.
1936     unsigned PushedRegNum = getNumPushPopRegs(CSI);
1937 
1938     // `QC.C.MIENTER(.NEST)` will save `ra` and `s0`, so we should only push if
1939     // we want to push more than 2 registers. Otherwise, we should push if we
1940     // want to push more than 0 registers.
1941     unsigned OnlyPushIfMoreThan = RVFI->useQCIInterrupt(MF) ? 2 : 0;
1942     if (PushedRegNum > OnlyPushIfMoreThan) {
1943       RVFI->setRVPushRegs(PushedRegNum);
1944       RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushedRegNum, 16));
1945     }
1946   }
1947 
1948   MachineFrameInfo &MFI = MF.getFrameInfo();
1949   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1950 
1951   for (auto &CS : CSI) {
1952     MCRegister Reg = CS.getReg();
1953     const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
1954     unsigned Size = RegInfo->getSpillSize(*RC);
1955 
1956     if (RVFI->useQCIInterrupt(MF)) {
1957       const auto *FFI = llvm::find_if(FixedCSRFIQCIInterruptMap, [&](auto P) {
1958         return P.first == CS.getReg();
1959       });
1960       if (FFI != std::end(FixedCSRFIQCIInterruptMap)) {
1961         int64_t Offset = FFI->second * (int64_t)Size;
1962 
1963         int FrameIdx = MFI.CreateFixedSpillStackObject(Size, Offset);
1964         assert(FrameIdx < 0);
1965         CS.setFrameIdx(FrameIdx);
1966         continue;
1967       }
1968     }
1969 
1970     if (RVFI->useSaveRestoreLibCalls(MF) || RVFI->isPushable(MF)) {
1971       const auto *FII = llvm::find_if(
1972           FixedCSRFIMap, [&](MCPhysReg P) { return P == CS.getReg(); });
1973       unsigned RegNum = std::distance(std::begin(FixedCSRFIMap), FII);
1974 
1975       if (FII != std::end(FixedCSRFIMap)) {
1976         int64_t Offset;
1977         if (RVFI->getPushPopKind(MF) ==
1978             RISCVMachineFunctionInfo::PushPopKind::StdExtZcmp)
1979           Offset = -int64_t(RVFI->getRVPushRegs() - RegNum) * Size;
1980         else
1981           Offset = -int64_t(RegNum + 1) * Size;
1982 
1983         if (RVFI->useQCIInterrupt(MF))
1984           Offset -= QCIInterruptPushAmount;
1985 
1986         int FrameIdx = MFI.CreateFixedSpillStackObject(Size, Offset);
1987         assert(FrameIdx < 0);
1988         CS.setFrameIdx(FrameIdx);
1989         continue;
1990       }
1991     }
1992 
1993     // Not a fixed slot.
1994     Align Alignment = RegInfo->getSpillAlign(*RC);
1995     // We may not be able to satisfy the desired alignment specification of
1996     // the TargetRegisterClass if the stack alignment is smaller. Use the
1997     // min.
1998     Alignment = std::min(Alignment, getStackAlign());
1999     int FrameIdx = MFI.CreateStackObject(Size, Alignment, true);
2000     if ((unsigned)FrameIdx < MinCSFrameIndex)
2001       MinCSFrameIndex = FrameIdx;
2002     if ((unsigned)FrameIdx > MaxCSFrameIndex)
2003       MaxCSFrameIndex = FrameIdx;
2004     CS.setFrameIdx(FrameIdx);
2005     if (RISCVRegisterInfo::isRVVRegClass(RC))
2006       MFI.setStackID(FrameIdx, TargetStackID::ScalableVector);
2007   }
2008 
2009   if (RVFI->useQCIInterrupt(MF)) {
2010     // Allocate a fixed object that covers the entire QCI stack allocation,
2011     // because there are gaps which are reserved for future use.
2012     MFI.CreateFixedSpillStackObject(
2013         QCIInterruptPushAmount, -static_cast<int64_t>(QCIInterruptPushAmount));
2014   }
2015 
2016   if (RVFI->isPushable(MF)) {
2017     int64_t QCIOffset = RVFI->useQCIInterrupt(MF) ? QCIInterruptPushAmount : 0;
2018     // Allocate a fixed object that covers the full push.
2019     if (int64_t PushSize = RVFI->getRVPushStackSize())
2020       MFI.CreateFixedSpillStackObject(PushSize, -PushSize - QCIOffset);
2021   } else if (int LibCallRegs = getLibCallID(MF, CSI) + 1) {
2022     int64_t LibCallFrameSize =
2023         alignTo((STI.getXLen() / 8) * LibCallRegs, getStackAlign());
2024     MFI.CreateFixedSpillStackObject(LibCallFrameSize, -LibCallFrameSize);
2025   }
2026 
2027   return true;
2028 }
2029 
spillCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,ArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const2030 bool RISCVFrameLowering::spillCalleeSavedRegisters(
2031     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2032     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2033   if (CSI.empty())
2034     return true;
2035 
2036   MachineFunction *MF = MBB.getParent();
2037   const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
2038   DebugLoc DL;
2039   if (MI != MBB.end() && !MI->isDebugInstr())
2040     DL = MI->getDebugLoc();
2041 
2042   RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
2043   if (RVFI->useQCIInterrupt(*MF)) {
2044     // Emit QC.C.MIENTER(.NEST)
2045     BuildMI(
2046         MBB, MI, DL,
2047         TII.get(RVFI->getInterruptStackKind(*MF) ==
2048                         RISCVMachineFunctionInfo::InterruptStackKind::QCINest
2049                     ? RISCV::QC_C_MIENTER_NEST
2050                     : RISCV::QC_C_MIENTER))
2051         .setMIFlag(MachineInstr::FrameSetup);
2052 
2053     for (auto [Reg, _Offset] : FixedCSRFIQCIInterruptMap)
2054       MBB.addLiveIn(Reg);
2055   }
2056 
2057   if (RVFI->isPushable(*MF)) {
2058     // Emit CM.PUSH with base StackAdj & evaluate Push stack
2059     unsigned PushedRegNum = RVFI->getRVPushRegs();
2060     if (PushedRegNum > 0) {
2061       // Use encoded number to represent registers to spill.
2062       unsigned Opcode = getPushOpcode(
2063           RVFI->getPushPopKind(*MF), hasFP(*MF) && !RVFI->useQCIInterrupt(*MF));
2064       unsigned RegEnc = RISCVZC::encodeRegListNumRegs(PushedRegNum);
2065       MachineInstrBuilder PushBuilder =
2066           BuildMI(MBB, MI, DL, TII.get(Opcode))
2067               .setMIFlag(MachineInstr::FrameSetup);
2068       PushBuilder.addImm(RegEnc);
2069       PushBuilder.addImm(0);
2070 
2071       for (unsigned i = 0; i < PushedRegNum; i++)
2072         PushBuilder.addUse(FixedCSRFIMap[i], RegState::Implicit);
2073     }
2074   } else if (const char *SpillLibCall = getSpillLibCallName(*MF, CSI)) {
2075     // Add spill libcall via non-callee-saved register t0.
2076     BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoCALLReg), RISCV::X5)
2077         .addExternalSymbol(SpillLibCall, RISCVII::MO_CALL)
2078         .setMIFlag(MachineInstr::FrameSetup);
2079 
2080     // Add registers spilled in libcall as liveins.
2081     for (auto &CS : CSI)
2082       MBB.addLiveIn(CS.getReg());
2083   }
2084 
2085   // Manually spill values not spilled by libcall & Push/Pop.
2086   const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI);
2087   const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, CSI);
2088 
2089   auto storeRegsToStackSlots = [&](decltype(UnmanagedCSI) CSInfo) {
2090     for (auto &CS : CSInfo) {
2091       // Insert the spill to the stack frame.
2092       MCRegister Reg = CS.getReg();
2093       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2094       TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg),
2095                               CS.getFrameIdx(), RC, TRI, Register(),
2096                               MachineInstr::FrameSetup);
2097     }
2098   };
2099   storeRegsToStackSlots(UnmanagedCSI);
2100   storeRegsToStackSlots(RVVCSI);
2101 
2102   return true;
2103 }
2104 
getCalleeSavedRVVNumRegs(const Register & BaseReg)2105 static unsigned getCalleeSavedRVVNumRegs(const Register &BaseReg) {
2106   return RISCV::VRRegClass.contains(BaseReg)     ? 1
2107          : RISCV::VRM2RegClass.contains(BaseReg) ? 2
2108          : RISCV::VRM4RegClass.contains(BaseReg) ? 4
2109                                                  : 8;
2110 }
2111 
getRVVBaseRegister(const RISCVRegisterInfo & TRI,const Register & Reg)2112 static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI,
2113                                      const Register &Reg) {
2114   MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0);
2115   // If it's not a grouped vector register, it doesn't have subregister, so
2116   // the base register is just itself.
2117   if (BaseReg == RISCV::NoRegister)
2118     BaseReg = Reg;
2119   return BaseReg;
2120 }
2121 
emitCalleeSavedRVVPrologCFI(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,bool HasFP) const2122 void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI(
2123     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, bool HasFP) const {
2124   MachineFunction *MF = MBB.getParent();
2125   const MachineFrameInfo &MFI = MF->getFrameInfo();
2126   RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
2127   const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();
2128 
2129   const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, MFI.getCalleeSavedInfo());
2130   if (RVVCSI.empty())
2131     return;
2132 
2133   uint64_t FixedSize = getStackSizeWithRVVPadding(*MF);
2134   if (!HasFP) {
2135     uint64_t ScalarLocalVarSize =
2136         MFI.getStackSize() - RVFI->getCalleeSavedStackSize() -
2137         RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding();
2138     FixedSize -= ScalarLocalVarSize;
2139   }
2140 
2141   CFIInstBuilder CFIBuilder(MBB, MI, MachineInstr::FrameSetup);
2142   for (auto &CS : RVVCSI) {
2143     // Insert the spill to the stack frame.
2144     int FI = CS.getFrameIdx();
2145     MCRegister BaseReg = getRVVBaseRegister(TRI, CS.getReg());
2146     unsigned NumRegs = getCalleeSavedRVVNumRegs(CS.getReg());
2147     for (unsigned i = 0; i < NumRegs; ++i) {
2148       CFIBuilder.insertCFIInst(createDefCFAOffset(
2149           TRI, BaseReg + i, -FixedSize, MFI.getObjectOffset(FI) / 8 + i));
2150     }
2151   }
2152 }
2153 
emitCalleeSavedRVVEpilogCFI(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI) const2154 void RISCVFrameLowering::emitCalleeSavedRVVEpilogCFI(
2155     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
2156   MachineFunction *MF = MBB.getParent();
2157   const MachineFrameInfo &MFI = MF->getFrameInfo();
2158   const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();
2159 
2160   CFIInstBuilder CFIHelper(MBB, MI, MachineInstr::FrameDestroy);
2161   const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, MFI.getCalleeSavedInfo());
2162   for (auto &CS : RVVCSI) {
2163     MCRegister BaseReg = getRVVBaseRegister(TRI, CS.getReg());
2164     unsigned NumRegs = getCalleeSavedRVVNumRegs(CS.getReg());
2165     for (unsigned i = 0; i < NumRegs; ++i)
2166       CFIHelper.buildRestore(BaseReg + i);
2167   }
2168 }
2169 
restoreCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,MutableArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const2170 bool RISCVFrameLowering::restoreCalleeSavedRegisters(
2171     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2172     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2173   if (CSI.empty())
2174     return true;
2175 
2176   MachineFunction *MF = MBB.getParent();
2177   const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
2178   DebugLoc DL;
2179   if (MI != MBB.end() && !MI->isDebugInstr())
2180     DL = MI->getDebugLoc();
2181 
2182   // Manually restore values not restored by libcall & Push/Pop.
2183   // Reverse the restore order in epilog.  In addition, the return
2184   // address will be restored first in the epilogue. It increases
2185   // the opportunity to avoid the load-to-use data hazard between
2186   // loading RA and return by RA.  loadRegFromStackSlot can insert
2187   // multiple instructions.
2188   const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI);
2189   const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, CSI);
2190 
2191   auto loadRegFromStackSlot = [&](decltype(UnmanagedCSI) CSInfo) {
2192     for (auto &CS : CSInfo) {
2193       MCRegister Reg = CS.getReg();
2194       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2195       TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI,
2196                                Register(), MachineInstr::FrameDestroy);
2197       assert(MI != MBB.begin() &&
2198              "loadRegFromStackSlot didn't insert any code!");
2199     }
2200   };
2201   loadRegFromStackSlot(RVVCSI);
2202   loadRegFromStackSlot(UnmanagedCSI);
2203 
2204   RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
2205   if (RVFI->useQCIInterrupt(*MF)) {
2206     // Don't emit anything here because restoration is handled by
2207     // QC.C.MILEAVERET which we already inserted to return.
2208     assert(MI->getOpcode() == RISCV::QC_C_MILEAVERET &&
2209            "Unexpected QCI Interrupt Return Instruction");
2210   }
2211 
2212   if (RVFI->isPushable(*MF)) {
2213     unsigned PushedRegNum = RVFI->getRVPushRegs();
2214     if (PushedRegNum > 0) {
2215       unsigned Opcode = getPopOpcode(RVFI->getPushPopKind(*MF));
2216       unsigned RegEnc = RISCVZC::encodeRegListNumRegs(PushedRegNum);
2217       MachineInstrBuilder PopBuilder =
2218           BuildMI(MBB, MI, DL, TII.get(Opcode))
2219               .setMIFlag(MachineInstr::FrameDestroy);
2220       // Use encoded number to represent registers to restore.
2221       PopBuilder.addImm(RegEnc);
2222       PopBuilder.addImm(0);
2223 
2224       for (unsigned i = 0; i < RVFI->getRVPushRegs(); i++)
2225         PopBuilder.addDef(FixedCSRFIMap[i], RegState::ImplicitDefine);
2226     }
2227   } else {
2228     const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI);
2229     if (RestoreLibCall) {
2230       // Add restore libcall via tail call.
2231       MachineBasicBlock::iterator NewMI =
2232           BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoTAIL))
2233               .addExternalSymbol(RestoreLibCall, RISCVII::MO_CALL)
2234               .setMIFlag(MachineInstr::FrameDestroy);
2235 
2236       // Remove trailing returns, since the terminator is now a tail call to the
2237       // restore function.
2238       if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) {
2239         NewMI->copyImplicitOps(*MF, *MI);
2240         MI->eraseFromParent();
2241       }
2242     }
2243   }
2244   return true;
2245 }
2246 
enableShrinkWrapping(const MachineFunction & MF) const2247 bool RISCVFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2248   // Keep the conventional code flow when not optimizing.
2249   if (MF.getFunction().hasOptNone())
2250     return false;
2251 
2252   return true;
2253 }
2254 
canUseAsPrologue(const MachineBasicBlock & MBB) const2255 bool RISCVFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
2256   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
2257   const MachineFunction *MF = MBB.getParent();
2258   const auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
2259 
2260   // Make sure VTYPE and VL are not live-in since we will use vsetvli in the
2261   // prologue to get the VLEN, and that will clobber these registers.
2262   //
2263   // We may do also check the stack contains objects with scalable vector type,
2264   // but this will require iterating over all the stack objects, but this may
2265   // not worth since the situation is rare, we could do further check in future
2266   // if we find it is necessary.
2267   if (STI.preferVsetvliOverReadVLENB() &&
2268       (MBB.isLiveIn(RISCV::VTYPE) || MBB.isLiveIn(RISCV::VL)))
2269     return false;
2270 
2271   if (!RVFI->useSaveRestoreLibCalls(*MF))
2272     return true;
2273 
2274   // Inserting a call to a __riscv_save libcall requires the use of the register
2275   // t0 (X5) to hold the return address. Therefore if this register is already
2276   // used we can't insert the call.
2277 
2278   RegScavenger RS;
2279   RS.enterBasicBlock(*TmpMBB);
2280   return !RS.isRegUsed(RISCV::X5);
2281 }
2282 
canUseAsEpilogue(const MachineBasicBlock & MBB) const2283 bool RISCVFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
2284   const MachineFunction *MF = MBB.getParent();
2285   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
2286   const auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
2287 
2288   // We do not want QC.C.MILEAVERET to be subject to shrink-wrapping - it must
2289   // come in the final block of its function as it both pops and returns.
2290   if (RVFI->useQCIInterrupt(*MF))
2291     return MBB.succ_empty();
2292 
2293   if (!RVFI->useSaveRestoreLibCalls(*MF))
2294     return true;
2295 
2296   // Using the __riscv_restore libcalls to restore CSRs requires a tail call.
2297   // This means if we still need to continue executing code within this function
2298   // the restore cannot take place in this basic block.
2299 
2300   if (MBB.succ_size() > 1)
2301     return false;
2302 
2303   MachineBasicBlock *SuccMBB =
2304       MBB.succ_empty() ? TmpMBB->getFallThrough() : *MBB.succ_begin();
2305 
2306   // Doing a tail call should be safe if there are no successors, because either
2307   // we have a returning block or the end of the block is unreachable, so the
2308   // restore will be eliminated regardless.
2309   if (!SuccMBB)
2310     return true;
2311 
2312   // The successor can only contain a return, since we would effectively be
2313   // replacing the successor with our own tail return at the end of our block.
2314   return SuccMBB->isReturnBlock() && SuccMBB->size() == 1;
2315 }
2316 
isSupportedStackID(TargetStackID::Value ID) const2317 bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
2318   switch (ID) {
2319   case TargetStackID::Default:
2320   case TargetStackID::ScalableVector:
2321     return true;
2322   case TargetStackID::NoAlloc:
2323   case TargetStackID::SGPRSpill:
2324   case TargetStackID::WasmLocal:
2325     return false;
2326   }
2327   llvm_unreachable("Invalid TargetStackID::Value");
2328 }
2329 
getStackIDForScalableVectors() const2330 TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {
2331   return TargetStackID::ScalableVector;
2332 }
2333 
2334 // Synthesize the probe loop.
emitStackProbeInline(MachineBasicBlock::iterator MBBI,DebugLoc DL,Register TargetReg,bool IsRVV)2335 static void emitStackProbeInline(MachineBasicBlock::iterator MBBI, DebugLoc DL,
2336                                  Register TargetReg, bool IsRVV) {
2337   assert(TargetReg != RISCV::X2 && "New top of stack cannot already be in SP");
2338 
2339   MachineBasicBlock &MBB = *MBBI->getParent();
2340   MachineFunction &MF = *MBB.getParent();
2341 
2342   auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
2343   const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
2344   bool IsRV64 = Subtarget.is64Bit();
2345   Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
2346   const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
2347   uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
2348 
2349   MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
2350   MachineBasicBlock *LoopTestMBB =
2351       MF.CreateMachineBasicBlock(MBB.getBasicBlock());
2352   MF.insert(MBBInsertPoint, LoopTestMBB);
2353   MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
2354   MF.insert(MBBInsertPoint, ExitMBB);
2355   MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
2356   Register ScratchReg = RISCV::X7;
2357 
2358   // ScratchReg = ProbeSize
2359   TII->movImm(MBB, MBBI, DL, ScratchReg, ProbeSize, Flags);
2360 
2361   // LoopTest:
2362   //   SUB SP, SP, ProbeSize
2363   BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
2364       .addReg(SPReg)
2365       .addReg(ScratchReg)
2366       .setMIFlags(Flags);
2367 
2368   //   s[d|w] zero, 0(sp)
2369   BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
2370           TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
2371       .addReg(RISCV::X0)
2372       .addReg(SPReg)
2373       .addImm(0)
2374       .setMIFlags(Flags);
2375 
2376   if (IsRVV) {
2377     //  SUB TargetReg, TargetReg, ProbeSize
2378     BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB),
2379             TargetReg)
2380         .addReg(TargetReg)
2381         .addReg(ScratchReg)
2382         .setMIFlags(Flags);
2383 
2384     //  BGE TargetReg, ProbeSize, LoopTest
2385     BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BGE))
2386         .addReg(TargetReg)
2387         .addReg(ScratchReg)
2388         .addMBB(LoopTestMBB)
2389         .setMIFlags(Flags);
2390 
2391   } else {
2392     //  BNE SP, TargetReg, LoopTest
2393     BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE))
2394         .addReg(SPReg)
2395         .addReg(TargetReg)
2396         .addMBB(LoopTestMBB)
2397         .setMIFlags(Flags);
2398   }
2399 
2400   ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
2401   ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
2402 
2403   LoopTestMBB->addSuccessor(ExitMBB);
2404   LoopTestMBB->addSuccessor(LoopTestMBB);
2405   MBB.addSuccessor(LoopTestMBB);
2406   // Update liveins.
2407   fullyRecomputeLiveIns({ExitMBB, LoopTestMBB});
2408 }
2409 
inlineStackProbe(MachineFunction & MF,MachineBasicBlock & MBB) const2410 void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
2411                                           MachineBasicBlock &MBB) const {
2412   // Get the instructions that need to be replaced. We emit at most two of
2413   // these. Remember them in order to avoid complications coming from the need
2414   // to traverse the block while potentially creating more blocks.
2415   SmallVector<MachineInstr *, 4> ToReplace;
2416   for (MachineInstr &MI : MBB) {
2417     unsigned Opc = MI.getOpcode();
2418     if (Opc == RISCV::PROBED_STACKALLOC ||
2419         Opc == RISCV::PROBED_STACKALLOC_RVV) {
2420       ToReplace.push_back(&MI);
2421     }
2422   }
2423 
2424   for (MachineInstr *MI : ToReplace) {
2425     if (MI->getOpcode() == RISCV::PROBED_STACKALLOC ||
2426         MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV) {
2427       MachineBasicBlock::iterator MBBI = MI->getIterator();
2428       DebugLoc DL = MBB.findDebugLoc(MBBI);
2429       Register TargetReg = MI->getOperand(0).getReg();
2430       emitStackProbeInline(MBBI, DL, TargetReg,
2431                            (MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV));
2432       MBBI->eraseFromParent();
2433     }
2434   }
2435 }
2436