xref: /freebsd/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //===-- LoongArchFrameLowering.cpp - LoongArch Frame Information -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the LoongArch implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "LoongArchFrameLowering.h"
14 #include "LoongArchMachineFunctionInfo.h"
15 #include "LoongArchSubtarget.h"
16 #include "MCTargetDesc/LoongArchBaseInfo.h"
17 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/RegisterScavenging.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/MC/MCDwarf.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "loongarch-frame-lowering"
29 
30 // Return true if the specified function should have a dedicated frame
31 // pointer register.  This is true if frame pointer elimination is
32 // disabled, if it needs dynamic stack realignment, if the function has
33 // variable sized allocas, or if the frame address is taken.
34 bool LoongArchFrameLowering::hasFP(const MachineFunction &MF) const {
35   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
36 
37   const MachineFrameInfo &MFI = MF.getFrameInfo();
38   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
39          RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
40          MFI.isFrameAddressTaken();
41 }
42 
43 bool LoongArchFrameLowering::hasBP(const MachineFunction &MF) const {
44   const MachineFrameInfo &MFI = MF.getFrameInfo();
45   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
46 
47   return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF);
48 }
49 
50 void LoongArchFrameLowering::adjustReg(MachineBasicBlock &MBB,
51                                        MachineBasicBlock::iterator MBBI,
52                                        const DebugLoc &DL, Register DestReg,
53                                        Register SrcReg, int64_t Val,
54                                        MachineInstr::MIFlag Flag) const {
55   const LoongArchInstrInfo *TII = STI.getInstrInfo();
56   bool IsLA64 = STI.is64Bit();
57   unsigned Addi = IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W;
58 
59   if (DestReg == SrcReg && Val == 0)
60     return;
61 
62   if (isInt<12>(Val)) {
63     // addi.w/d $DstReg, $SrcReg, Val
64     BuildMI(MBB, MBBI, DL, TII->get(Addi), DestReg)
65         .addReg(SrcReg)
66         .addImm(Val)
67         .setMIFlag(Flag);
68     return;
69   }
70 
71   // Try to split the offset across two ADDIs. We need to keep the stack pointer
72   // aligned after each ADDI. We need to determine the maximum value we can put
73   // in each ADDI. In the negative direction, we can use -2048 which is always
74   // sufficiently aligned. In the positive direction, we need to find the
75   // largest 12-bit immediate that is aligned. Exclude -4096 since it can be
76   // created with LU12I.W.
77   assert(getStackAlign().value() < 2048 && "Stack alignment too large");
78   int64_t MaxPosAdjStep = 2048 - getStackAlign().value();
79   if (Val > -4096 && Val <= (2 * MaxPosAdjStep)) {
80     int64_t FirstAdj = Val < 0 ? -2048 : MaxPosAdjStep;
81     Val -= FirstAdj;
82     BuildMI(MBB, MBBI, DL, TII->get(Addi), DestReg)
83         .addReg(SrcReg)
84         .addImm(FirstAdj)
85         .setMIFlag(Flag);
86     BuildMI(MBB, MBBI, DL, TII->get(Addi), DestReg)
87         .addReg(DestReg, RegState::Kill)
88         .addImm(Val)
89         .setMIFlag(Flag);
90     return;
91   }
92 
93   unsigned Opc = IsLA64 ? LoongArch::ADD_D : LoongArch::ADD_W;
94   if (Val < 0) {
95     Val = -Val;
96     Opc = IsLA64 ? LoongArch::SUB_D : LoongArch::SUB_W;
97   }
98 
99   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
100   Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
101   TII->movImm(MBB, MBBI, DL, ScratchReg, Val, Flag);
102   BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
103       .addReg(SrcReg)
104       .addReg(ScratchReg, RegState::Kill)
105       .setMIFlag(Flag);
106 }
107 
108 // Determine the size of the frame and maximum call frame size.
109 void LoongArchFrameLowering::determineFrameLayout(MachineFunction &MF) const {
110   MachineFrameInfo &MFI = MF.getFrameInfo();
111 
112   // Get the number of bytes to allocate from the FrameInfo.
113   uint64_t FrameSize = MFI.getStackSize();
114 
115   // Make sure the frame is aligned.
116   FrameSize = alignTo(FrameSize, getStackAlign());
117 
118   // Update frame info.
119   MFI.setStackSize(FrameSize);
120 }
121 
122 static uint64_t estimateFunctionSizeInBytes(const LoongArchInstrInfo *TII,
123                                             const MachineFunction &MF) {
124   uint64_t FuncSize = 0;
125   for (auto &MBB : MF)
126     for (auto &MI : MBB)
127       FuncSize += TII->getInstSizeInBytes(MI);
128   return FuncSize;
129 }
130 
131 static bool needScavSlotForCFR(MachineFunction &MF) {
132   if (!MF.getSubtarget<LoongArchSubtarget>().hasBasicF())
133     return false;
134   for (auto &MBB : MF)
135     for (auto &MI : MBB)
136       if (MI.getOpcode() == LoongArch::PseudoST_CFR)
137         return true;
138   return false;
139 }
140 
141 void LoongArchFrameLowering::processFunctionBeforeFrameFinalized(
142     MachineFunction &MF, RegScavenger *RS) const {
143   const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
144   const TargetRegisterClass &RC = LoongArch::GPRRegClass;
145   const LoongArchInstrInfo *TII = STI.getInstrInfo();
146   LoongArchMachineFunctionInfo *LAFI =
147       MF.getInfo<LoongArchMachineFunctionInfo>();
148   MachineFrameInfo &MFI = MF.getFrameInfo();
149 
150   unsigned ScavSlotsNum = 0;
151 
152   // Far branches beyond 27-bit offset require a spill slot for scratch
153   // register.
154   bool IsLargeFunction = !isInt<27>(estimateFunctionSizeInBytes(TII, MF));
155   if (IsLargeFunction)
156     ScavSlotsNum = 1;
157 
158   // estimateStackSize has been observed to under-estimate the final stack
159   // size, so give ourselves wiggle-room by checking for stack size
160   // representable an 11-bit signed field rather than 12-bits.
161   if (!isInt<11>(MFI.estimateStackSize(MF)))
162     ScavSlotsNum = std::max(ScavSlotsNum, 1u);
163 
164   // For CFR spill.
165   if (needScavSlotForCFR(MF))
166     ++ScavSlotsNum;
167 
168   // Create emergency spill slots.
169   for (unsigned i = 0; i < ScavSlotsNum; ++i) {
170     int FI = MFI.CreateStackObject(RI->getSpillSize(RC), RI->getSpillAlign(RC),
171                                    false);
172     RS->addScavengingFrameIndex(FI);
173     if (IsLargeFunction && LAFI->getBranchRelaxationSpillFrameIndex() == -1)
174       LAFI->setBranchRelaxationSpillFrameIndex(FI);
175     LLVM_DEBUG(dbgs() << "Allocated FI(" << FI
176                       << ") as the emergency spill slot.\n");
177   }
178 }
179 
180 void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
181                                           MachineBasicBlock &MBB) const {
182   MachineFrameInfo &MFI = MF.getFrameInfo();
183   auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
184   const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
185   const LoongArchInstrInfo *TII = STI.getInstrInfo();
186   MachineBasicBlock::iterator MBBI = MBB.begin();
187   bool IsLA64 = STI.is64Bit();
188 
189   Register SPReg = LoongArch::R3;
190   Register FPReg = LoongArch::R22;
191 
192   // Debug location must be unknown since the first debug location is used
193   // to determine the end of the prologue.
194   DebugLoc DL;
195   // All calls are tail calls in GHC calling conv, and functions have no
196   // prologue/epilogue.
197   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
198     return;
199   // Determine the correct frame layout
200   determineFrameLayout(MF);
201 
202   // First, compute final stack size.
203   uint64_t StackSize = MFI.getStackSize();
204   uint64_t RealStackSize = StackSize;
205 
206   // Early exit if there is no need to allocate space in the stack.
207   if (StackSize == 0 && !MFI.adjustsStack())
208     return;
209 
210   uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
211   // Split the SP adjustment to reduce the offsets of callee saved spill.
212   if (FirstSPAdjustAmount)
213     StackSize = FirstSPAdjustAmount;
214 
215   // Adjust stack.
216   adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup);
217   // Emit ".cfi_def_cfa_offset StackSize".
218   unsigned CFIIndex =
219       MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize));
220   BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
221       .addCFIIndex(CFIIndex)
222       .setMIFlag(MachineInstr::FrameSetup);
223 
224   const auto &CSI = MFI.getCalleeSavedInfo();
225 
226   // The frame pointer is callee-saved, and code has been generated for us to
227   // save it to the stack. We need to skip over the storing of callee-saved
228   // registers as the frame pointer must be modified after it has been saved
229   // to the stack, not before.
230   std::advance(MBBI, CSI.size());
231 
232   // Iterate over list of callee-saved registers and emit .cfi_offset
233   // directives.
234   for (const auto &Entry : CSI) {
235     int64_t Offset = MFI.getObjectOffset(Entry.getFrameIdx());
236     unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
237         nullptr, RI->getDwarfRegNum(Entry.getReg(), true), Offset));
238     BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
239         .addCFIIndex(CFIIndex)
240         .setMIFlag(MachineInstr::FrameSetup);
241   }
242 
243   // Generate new FP.
244   if (hasFP(MF)) {
245     adjustReg(MBB, MBBI, DL, FPReg, SPReg,
246               StackSize - LoongArchFI->getVarArgsSaveSize(),
247               MachineInstr::FrameSetup);
248 
249     // Emit ".cfi_def_cfa $fp, LoongArchFI->getVarArgsSaveSize()"
250     unsigned CFIIndex = MF.addFrameInst(
251         MCCFIInstruction::cfiDefCfa(nullptr, RI->getDwarfRegNum(FPReg, true),
252                                     LoongArchFI->getVarArgsSaveSize()));
253     BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
254         .addCFIIndex(CFIIndex)
255         .setMIFlag(MachineInstr::FrameSetup);
256   }
257 
258   // Emit the second SP adjustment after saving callee saved registers.
259   if (FirstSPAdjustAmount) {
260     uint64_t SecondSPAdjustAmount = RealStackSize - FirstSPAdjustAmount;
261     assert(SecondSPAdjustAmount > 0 &&
262            "SecondSPAdjustAmount should be greater than zero");
263     adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount,
264               MachineInstr::FrameSetup);
265 
266     if (!hasFP(MF)) {
267       // If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0",
268       // don't emit an sp-based .cfi_def_cfa_offset
269       // Emit ".cfi_def_cfa_offset RealStackSize"
270       unsigned CFIIndex = MF.addFrameInst(
271           MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize));
272       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
273           .addCFIIndex(CFIIndex)
274           .setMIFlag(MachineInstr::FrameSetup);
275     }
276   }
277 
278   if (hasFP(MF)) {
279     // Realign stack.
280     if (RI->hasStackRealignment(MF)) {
281       unsigned Align = Log2(MFI.getMaxAlign());
282       assert(Align > 0 && "The stack realignment size is invalid!");
283       BuildMI(MBB, MBBI, DL,
284               TII->get(IsLA64 ? LoongArch::BSTRINS_D : LoongArch::BSTRINS_W),
285               SPReg)
286           .addReg(SPReg)
287           .addReg(LoongArch::R0)
288           .addImm(Align - 1)
289           .addImm(0)
290           .setMIFlag(MachineInstr::FrameSetup);
291       // FP will be used to restore the frame in the epilogue, so we need
292       // another base register BP to record SP after re-alignment. SP will
293       // track the current stack after allocating variable sized objects.
294       if (hasBP(MF)) {
295         // move BP, $sp
296         BuildMI(MBB, MBBI, DL, TII->get(LoongArch::OR),
297                 LoongArchABI::getBPReg())
298             .addReg(SPReg)
299             .addReg(LoongArch::R0)
300             .setMIFlag(MachineInstr::FrameSetup);
301       }
302     }
303   }
304 }
305 
306 void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF,
307                                           MachineBasicBlock &MBB) const {
308   const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
309   MachineFrameInfo &MFI = MF.getFrameInfo();
310   auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
311   Register SPReg = LoongArch::R3;
312   // All calls are tail calls in GHC calling conv, and functions have no
313   // prologue/epilogue.
314   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
315     return;
316   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
317   DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
318 
319   const auto &CSI = MFI.getCalleeSavedInfo();
320   // Skip to before the restores of callee-saved registers.
321   auto LastFrameDestroy = MBBI;
322   if (!CSI.empty())
323     LastFrameDestroy = std::prev(MBBI, CSI.size());
324 
325   // Get the number of bytes from FrameInfo.
326   uint64_t StackSize = MFI.getStackSize();
327 
328   // Restore the stack pointer.
329   if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects()) {
330     assert(hasFP(MF) && "frame pointer should not have been eliminated");
331     adjustReg(MBB, LastFrameDestroy, DL, SPReg, LoongArch::R22,
332               -StackSize + LoongArchFI->getVarArgsSaveSize(),
333               MachineInstr::FrameDestroy);
334   }
335 
336   uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
337   if (FirstSPAdjustAmount) {
338     uint64_t SecondSPAdjustAmount = StackSize - FirstSPAdjustAmount;
339     assert(SecondSPAdjustAmount > 0 &&
340            "SecondSPAdjustAmount should be greater than zero");
341 
342     adjustReg(MBB, LastFrameDestroy, DL, SPReg, SPReg, SecondSPAdjustAmount,
343               MachineInstr::FrameDestroy);
344     StackSize = FirstSPAdjustAmount;
345   }
346 
347   // Deallocate stack
348   adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy);
349 }
350 
351 // We would like to split the SP adjustment to reduce prologue/epilogue
352 // as following instructions. In this way, the offset of the callee saved
353 // register could fit in a single store.
354 // e.g.
355 //   addi.d  $sp, $sp, -2032
356 //   st.d    $ra, $sp,  2024
357 //   st.d    $fp, $sp,  2016
358 //   addi.d  $sp, $sp,   -16
359 uint64_t LoongArchFrameLowering::getFirstSPAdjustAmount(
360     const MachineFunction &MF) const {
361   const MachineFrameInfo &MFI = MF.getFrameInfo();
362   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
363 
364   // Return the FirstSPAdjustAmount if the StackSize can not fit in a signed
365   // 12-bit and there exists a callee-saved register needing to be pushed.
366   if (!isInt<12>(MFI.getStackSize()) && (CSI.size() > 0)) {
367     // FirstSPAdjustAmount is chosen as (2048 - StackAlign) because 2048 will
368     // cause sp = sp + 2048 in the epilogue to be split into multiple
369     // instructions. Offsets smaller than 2048 can fit in a single load/store
370     // instruction, and we have to stick with the stack alignment.
371     // So (2048 - StackAlign) will satisfy the stack alignment.
372     return 2048 - getStackAlign().value();
373   }
374   return 0;
375 }
376 
377 void LoongArchFrameLowering::determineCalleeSaves(MachineFunction &MF,
378                                                   BitVector &SavedRegs,
379                                                   RegScavenger *RS) const {
380   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
381   // Unconditionally spill RA and FP only if the function uses a frame
382   // pointer.
383   if (hasFP(MF)) {
384     SavedRegs.set(LoongArch::R1);
385     SavedRegs.set(LoongArch::R22);
386   }
387   // Mark BP as used if function has dedicated base pointer.
388   if (hasBP(MF))
389     SavedRegs.set(LoongArchABI::getBPReg());
390 }
391 
392 // Do not preserve stack space within prologue for outgoing variables if the
393 // function contains variable size objects.
394 // Let eliminateCallFramePseudoInstr preserve stack space for it.
395 bool LoongArchFrameLowering::hasReservedCallFrame(
396     const MachineFunction &MF) const {
397   return !MF.getFrameInfo().hasVarSizedObjects();
398 }
399 
400 // Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions.
401 MachineBasicBlock::iterator
402 LoongArchFrameLowering::eliminateCallFramePseudoInstr(
403     MachineFunction &MF, MachineBasicBlock &MBB,
404     MachineBasicBlock::iterator MI) const {
405   Register SPReg = LoongArch::R3;
406   DebugLoc DL = MI->getDebugLoc();
407 
408   if (!hasReservedCallFrame(MF)) {
409     // If space has not been reserved for a call frame, ADJCALLSTACKDOWN and
410     // ADJCALLSTACKUP must be converted to instructions manipulating the stack
411     // pointer. This is necessary when there is a variable length stack
412     // allocation (e.g. alloca), which means it's not possible to allocate
413     // space for outgoing arguments from within the function prologue.
414     int64_t Amount = MI->getOperand(0).getImm();
415 
416     if (Amount != 0) {
417       // Ensure the stack remains aligned after adjustment.
418       Amount = alignSPAdjust(Amount);
419 
420       if (MI->getOpcode() == LoongArch::ADJCALLSTACKDOWN)
421         Amount = -Amount;
422 
423       adjustReg(MBB, MI, DL, SPReg, SPReg, Amount, MachineInstr::NoFlags);
424     }
425   }
426 
427   return MBB.erase(MI);
428 }
429 
430 bool LoongArchFrameLowering::spillCalleeSavedRegisters(
431     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
432     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
433   if (CSI.empty())
434     return true;
435 
436   MachineFunction *MF = MBB.getParent();
437   const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
438 
439   // Insert the spill to the stack frame.
440   for (auto &CS : CSI) {
441     Register Reg = CS.getReg();
442     // If the register is RA and the return address is taken by method
443     // LoongArchTargetLowering::lowerRETURNADDR, don't set kill flag.
444     bool IsKill =
445         !(Reg == LoongArch::R1 && MF->getFrameInfo().isReturnAddressTaken());
446     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
447     TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, CS.getFrameIdx(), RC, TRI,
448                             Register());
449   }
450 
451   return true;
452 }
453 
454 StackOffset LoongArchFrameLowering::getFrameIndexReference(
455     const MachineFunction &MF, int FI, Register &FrameReg) const {
456   const MachineFrameInfo &MFI = MF.getFrameInfo();
457   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
458   auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
459   uint64_t StackSize = MFI.getStackSize();
460   uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
461 
462   // Callee-saved registers should be referenced relative to the stack
463   // pointer (positive offset), otherwise use the frame pointer (negative
464   // offset).
465   const auto &CSI = MFI.getCalleeSavedInfo();
466   int MinCSFI = 0;
467   int MaxCSFI = -1;
468   StackOffset Offset =
469       StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea() +
470                             MFI.getOffsetAdjustment());
471 
472   if (CSI.size()) {
473     MinCSFI = CSI[0].getFrameIdx();
474     MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
475   }
476 
477   if (FI >= MinCSFI && FI <= MaxCSFI) {
478     FrameReg = LoongArch::R3;
479     if (FirstSPAdjustAmount)
480       Offset += StackOffset::getFixed(FirstSPAdjustAmount);
481     else
482       Offset += StackOffset::getFixed(StackSize);
483   } else if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) {
484     // If the stack was realigned, the frame pointer is set in order to allow
485     // SP to be restored, so we need another base register to record the stack
486     // after realignment.
487     FrameReg = hasBP(MF) ? LoongArchABI::getBPReg() : LoongArch::R3;
488     Offset += StackOffset::getFixed(StackSize);
489   } else {
490     FrameReg = RI->getFrameRegister(MF);
491     if (hasFP(MF))
492       Offset += StackOffset::getFixed(LoongArchFI->getVarArgsSaveSize());
493     else
494       Offset += StackOffset::getFixed(StackSize);
495   }
496 
497   return Offset;
498 }
499 
500 bool LoongArchFrameLowering::enableShrinkWrapping(
501     const MachineFunction &MF) const {
502   // Keep the conventional code flow when not optimizing.
503   if (MF.getFunction().hasOptNone())
504     return false;
505 
506   return true;
507 }
508