1 //===-- RISCVFrameLowering.cpp - RISC-V Frame Information -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the RISC-V implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVFrameLowering.h" 14 #include "MCTargetDesc/RISCVBaseInfo.h" 15 #include "RISCVMachineFunctionInfo.h" 16 #include "RISCVSubtarget.h" 17 #include "llvm/BinaryFormat/Dwarf.h" 18 #include "llvm/CodeGen/CFIInstBuilder.h" 19 #include "llvm/CodeGen/LivePhysRegs.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/RegisterScavenging.h" 25 #include "llvm/IR/DiagnosticInfo.h" 26 #include "llvm/MC/MCDwarf.h" 27 #include "llvm/Support/LEB128.h" 28 29 #include <algorithm> 30 31 #define DEBUG_TYPE "riscv-frame" 32 33 using namespace llvm; 34 35 static Align getABIStackAlignment(RISCVABI::ABI ABI) { 36 if (ABI == RISCVABI::ABI_ILP32E) 37 return Align(4); 38 if (ABI == RISCVABI::ABI_LP64E) 39 return Align(8); 40 return Align(16); 41 } 42 43 RISCVFrameLowering::RISCVFrameLowering(const RISCVSubtarget &STI) 44 : TargetFrameLowering( 45 StackGrowsDown, getABIStackAlignment(STI.getTargetABI()), 46 /*LocalAreaOffset=*/0, 47 /*TransientStackAlignment=*/getABIStackAlignment(STI.getTargetABI())), 48 STI(STI) {} 49 50 // The register used to hold the frame pointer. 51 static constexpr MCPhysReg FPReg = RISCV::X8; 52 53 // The register used to hold the stack pointer. 54 static constexpr MCPhysReg SPReg = RISCV::X2; 55 56 // The register used to hold the return address. 57 static constexpr MCPhysReg RAReg = RISCV::X1; 58 59 // LIst of CSRs that are given a fixed location by save/restore libcalls or 60 // Zcmp/Xqccmp Push/Pop. The order in this table indicates the order the 61 // registers are saved on the stack. Zcmp uses the reverse order of save/restore 62 // and Xqccmp on the stack, but this is handled when offsets are calculated. 63 static const MCPhysReg FixedCSRFIMap[] = { 64 /*ra*/ RAReg, /*s0*/ FPReg, /*s1*/ RISCV::X9, 65 /*s2*/ RISCV::X18, /*s3*/ RISCV::X19, /*s4*/ RISCV::X20, 66 /*s5*/ RISCV::X21, /*s6*/ RISCV::X22, /*s7*/ RISCV::X23, 67 /*s8*/ RISCV::X24, /*s9*/ RISCV::X25, /*s10*/ RISCV::X26, 68 /*s11*/ RISCV::X27}; 69 70 // The number of stack bytes allocated by `QC.C.MIENTER(.NEST)` and popped by 71 // `QC.C.MILEAVERET`. 72 static constexpr uint64_t QCIInterruptPushAmount = 96; 73 74 static const std::pair<MCPhysReg, int8_t> FixedCSRFIQCIInterruptMap[] = { 75 /* -1 is a gap for mepc/mnepc */ 76 {/*fp*/ FPReg, -2}, 77 /* -3 is a gap for qc.mcause */ 78 {/*ra*/ RAReg, -4}, 79 /* -5 is reserved */ 80 {/*t0*/ RISCV::X5, -6}, 81 {/*t1*/ RISCV::X6, -7}, 82 {/*t2*/ RISCV::X7, -8}, 83 {/*a0*/ RISCV::X10, -9}, 84 {/*a1*/ RISCV::X11, -10}, 85 {/*a2*/ RISCV::X12, -11}, 86 {/*a3*/ RISCV::X13, -12}, 87 {/*a4*/ RISCV::X14, -13}, 88 {/*a5*/ RISCV::X15, -14}, 89 {/*a6*/ RISCV::X16, -15}, 90 {/*a7*/ RISCV::X17, -16}, 91 {/*t3*/ RISCV::X28, -17}, 92 {/*t4*/ RISCV::X29, -18}, 93 {/*t5*/ RISCV::X30, -19}, 94 {/*t6*/ RISCV::X31, -20}, 95 /* -21, -22, -23, -24 are reserved */ 96 }; 97 98 // For now we use x3, a.k.a gp, as pointer to shadow call stack. 99 // User should not use x3 in their asm. 100 static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB, 101 MachineBasicBlock::iterator MI, 102 const DebugLoc &DL) { 103 const auto &STI = MF.getSubtarget<RISCVSubtarget>(); 104 bool HasHWShadowStack = MF.getFunction().hasFnAttribute("hw-shadow-stack") && 105 STI.hasStdExtZicfiss(); 106 bool HasSWShadowStack = 107 MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack); 108 if (!HasHWShadowStack && !HasSWShadowStack) 109 return; 110 111 const llvm::RISCVRegisterInfo *TRI = STI.getRegisterInfo(); 112 113 // Do not save RA to the SCS if it's not saved to the regular stack, 114 // i.e. RA is not at risk of being overwritten. 115 std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo(); 116 if (llvm::none_of( 117 CSI, [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; })) 118 return; 119 120 const RISCVInstrInfo *TII = STI.getInstrInfo(); 121 if (HasHWShadowStack) { 122 BuildMI(MBB, MI, DL, TII->get(RISCV::SSPUSH)).addReg(RAReg); 123 return; 124 } 125 126 Register SCSPReg = RISCVABI::getSCSPReg(); 127 128 bool IsRV64 = STI.is64Bit(); 129 int64_t SlotSize = STI.getXLen() / 8; 130 // Store return address to shadow call stack 131 // addi gp, gp, [4|8] 132 // s[w|d] ra, -[4|8](gp) 133 BuildMI(MBB, MI, DL, TII->get(RISCV::ADDI)) 134 .addReg(SCSPReg, RegState::Define) 135 .addReg(SCSPReg) 136 .addImm(SlotSize) 137 .setMIFlag(MachineInstr::FrameSetup); 138 BuildMI(MBB, MI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW)) 139 .addReg(RAReg) 140 .addReg(SCSPReg) 141 .addImm(-SlotSize) 142 .setMIFlag(MachineInstr::FrameSetup); 143 144 // Emit a CFI instruction that causes SlotSize to be subtracted from the value 145 // of the shadow stack pointer when unwinding past this frame. 146 char DwarfSCSReg = TRI->getDwarfRegNum(SCSPReg, /*IsEH*/ true); 147 assert(DwarfSCSReg < 32 && "SCS Register should be < 32 (X3)."); 148 149 char Offset = static_cast<char>(-SlotSize) & 0x7f; 150 const char CFIInst[] = { 151 dwarf::DW_CFA_val_expression, 152 DwarfSCSReg, // register 153 2, // length 154 static_cast<char>(unsigned(dwarf::DW_OP_breg0 + DwarfSCSReg)), 155 Offset, // addend (sleb128) 156 }; 157 158 CFIInstBuilder(MBB, MI, MachineInstr::FrameSetup) 159 .buildEscape(StringRef(CFIInst, sizeof(CFIInst))); 160 } 161 162 static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB, 163 MachineBasicBlock::iterator MI, 164 const DebugLoc &DL) { 165 const auto &STI = MF.getSubtarget<RISCVSubtarget>(); 166 bool HasHWShadowStack = MF.getFunction().hasFnAttribute("hw-shadow-stack") && 167 STI.hasStdExtZicfiss(); 168 bool HasSWShadowStack = 169 MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack); 170 if (!HasHWShadowStack && !HasSWShadowStack) 171 return; 172 173 // See emitSCSPrologue() above. 174 std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo(); 175 if (llvm::none_of( 176 CSI, [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; })) 177 return; 178 179 const RISCVInstrInfo *TII = STI.getInstrInfo(); 180 if (HasHWShadowStack) { 181 BuildMI(MBB, MI, DL, TII->get(RISCV::SSPOPCHK)).addReg(RAReg); 182 return; 183 } 184 185 Register SCSPReg = RISCVABI::getSCSPReg(); 186 187 bool IsRV64 = STI.is64Bit(); 188 int64_t SlotSize = STI.getXLen() / 8; 189 // Load return address from shadow call stack 190 // l[w|d] ra, -[4|8](gp) 191 // addi gp, gp, -[4|8] 192 BuildMI(MBB, MI, DL, TII->get(IsRV64 ? RISCV::LD : RISCV::LW)) 193 .addReg(RAReg, RegState::Define) 194 .addReg(SCSPReg) 195 .addImm(-SlotSize) 196 .setMIFlag(MachineInstr::FrameDestroy); 197 BuildMI(MBB, MI, DL, TII->get(RISCV::ADDI)) 198 .addReg(SCSPReg, RegState::Define) 199 .addReg(SCSPReg) 200 .addImm(-SlotSize) 201 .setMIFlag(MachineInstr::FrameDestroy); 202 // Restore the SCS pointer 203 CFIInstBuilder(MBB, MI, MachineInstr::FrameDestroy).buildRestore(SCSPReg); 204 } 205 206 // Insert instruction to swap mscratchsw with sp 207 static void emitSiFiveCLICStackSwap(MachineFunction &MF, MachineBasicBlock &MBB, 208 MachineBasicBlock::iterator MBBI, 209 const DebugLoc &DL) { 210 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 211 212 if (!RVFI->isSiFiveStackSwapInterrupt(MF)) 213 return; 214 215 const auto &STI = MF.getSubtarget<RISCVSubtarget>(); 216 const RISCVInstrInfo *TII = STI.getInstrInfo(); 217 218 assert(STI.hasVendorXSfmclic() && "Stack Swapping Requires XSfmclic"); 219 220 BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRW)) 221 .addReg(SPReg, RegState::Define) 222 .addImm(RISCVSysReg::sf_mscratchcsw) 223 .addReg(SPReg, RegState::Kill) 224 .setMIFlag(MachineInstr::FrameSetup); 225 226 // FIXME: CFI Information for this swap. 227 } 228 229 static void 230 createSiFivePreemptibleInterruptFrameEntries(MachineFunction &MF, 231 RISCVMachineFunctionInfo &RVFI) { 232 if (!RVFI.isSiFivePreemptibleInterrupt(MF)) 233 return; 234 235 const TargetRegisterClass &RC = RISCV::GPRRegClass; 236 const TargetRegisterInfo &TRI = 237 *MF.getSubtarget<RISCVSubtarget>().getRegisterInfo(); 238 MachineFrameInfo &MFI = MF.getFrameInfo(); 239 240 // Create two frame objects for spilling X8 and X9, which will be done in 241 // `emitSiFiveCLICPreemptibleSaves`. This is in addition to any other stack 242 // objects we might have for X8 and X9, as they might be saved twice. 243 for (int I = 0; I < 2; ++I) { 244 int FI = MFI.CreateStackObject(TRI.getSpillSize(RC), TRI.getSpillAlign(RC), 245 true); 246 RVFI.pushInterruptCSRFrameIndex(FI); 247 } 248 } 249 250 static void emitSiFiveCLICPreemptibleSaves(MachineFunction &MF, 251 MachineBasicBlock &MBB, 252 MachineBasicBlock::iterator MBBI, 253 const DebugLoc &DL) { 254 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 255 256 if (!RVFI->isSiFivePreemptibleInterrupt(MF)) 257 return; 258 259 const auto &STI = MF.getSubtarget<RISCVSubtarget>(); 260 const RISCVInstrInfo *TII = STI.getInstrInfo(); 261 262 // FIXME: CFI Information here is nonexistent/wrong. 263 264 // X8 and X9 might be stored into the stack twice, initially into the 265 // `interruptCSRFrameIndex` here, and then maybe again into their CSI frame 266 // index. 267 // 268 // This is done instead of telling the register allocator that we need two 269 // VRegs to store the value of `mcause` and `mepc` through the instruction, 270 // which affects other passes. 271 TII->storeRegToStackSlot(MBB, MBBI, RISCV::X8, /* IsKill=*/true, 272 RVFI->getInterruptCSRFrameIndex(0), 273 &RISCV::GPRRegClass, STI.getRegisterInfo(), 274 Register(), MachineInstr::FrameSetup); 275 TII->storeRegToStackSlot(MBB, MBBI, RISCV::X9, /* IsKill=*/true, 276 RVFI->getInterruptCSRFrameIndex(1), 277 &RISCV::GPRRegClass, STI.getRegisterInfo(), 278 Register(), MachineInstr::FrameSetup); 279 280 // Put `mcause` into X8 (s0), and `mepc` into X9 (s1). If either of these are 281 // used in the function, then they will appear in `getUnmanagedCSI` and will 282 // be saved again. 283 BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRS)) 284 .addReg(RISCV::X8, RegState::Define) 285 .addImm(RISCVSysReg::mcause) 286 .addReg(RISCV::X0) 287 .setMIFlag(MachineInstr::FrameSetup); 288 BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRS)) 289 .addReg(RISCV::X9, RegState::Define) 290 .addImm(RISCVSysReg::mepc) 291 .addReg(RISCV::X0) 292 .setMIFlag(MachineInstr::FrameSetup); 293 294 // Enable interrupts. 295 BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRSI)) 296 .addReg(RISCV::X0, RegState::Define) 297 .addImm(RISCVSysReg::mstatus) 298 .addImm(8) 299 .setMIFlag(MachineInstr::FrameSetup); 300 } 301 302 static void emitSiFiveCLICPreemptibleRestores(MachineFunction &MF, 303 MachineBasicBlock &MBB, 304 MachineBasicBlock::iterator MBBI, 305 const DebugLoc &DL) { 306 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 307 308 if (!RVFI->isSiFivePreemptibleInterrupt(MF)) 309 return; 310 311 const auto &STI = MF.getSubtarget<RISCVSubtarget>(); 312 const RISCVInstrInfo *TII = STI.getInstrInfo(); 313 314 // FIXME: CFI Information here is nonexistent/wrong. 315 316 // Disable interrupts. 317 BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRCI)) 318 .addReg(RISCV::X0, RegState::Define) 319 .addImm(RISCVSysReg::mstatus) 320 .addImm(8) 321 .setMIFlag(MachineInstr::FrameSetup); 322 323 // Restore `mepc` from x9 (s1), and `mcause` from x8 (s0). If either were used 324 // in the function, they have already been restored once, so now have the 325 // value stored in `emitSiFiveCLICPreemptibleSaves`. 326 BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRW)) 327 .addReg(RISCV::X0, RegState::Define) 328 .addImm(RISCVSysReg::mepc) 329 .addReg(RISCV::X9, RegState::Kill) 330 .setMIFlag(MachineInstr::FrameSetup); 331 BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRW)) 332 .addReg(RISCV::X0, RegState::Define) 333 .addImm(RISCVSysReg::mcause) 334 .addReg(RISCV::X8, RegState::Kill) 335 .setMIFlag(MachineInstr::FrameSetup); 336 337 // X8 and X9 need to be restored to their values on function entry, which we 338 // saved onto the stack in `emitSiFiveCLICPreemptibleSaves`. 339 TII->loadRegFromStackSlot(MBB, MBBI, RISCV::X9, 340 RVFI->getInterruptCSRFrameIndex(1), 341 &RISCV::GPRRegClass, STI.getRegisterInfo(), 342 Register(), MachineInstr::FrameSetup); 343 TII->loadRegFromStackSlot(MBB, MBBI, RISCV::X8, 344 RVFI->getInterruptCSRFrameIndex(0), 345 &RISCV::GPRRegClass, STI.getRegisterInfo(), 346 Register(), MachineInstr::FrameSetup); 347 } 348 349 // Get the ID of the libcall used for spilling and restoring callee saved 350 // registers. The ID is representative of the number of registers saved or 351 // restored by the libcall, except it is zero-indexed - ID 0 corresponds to a 352 // single register. 353 static int getLibCallID(const MachineFunction &MF, 354 const std::vector<CalleeSavedInfo> &CSI) { 355 const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 356 357 if (CSI.empty() || !RVFI->useSaveRestoreLibCalls(MF)) 358 return -1; 359 360 MCRegister MaxReg; 361 for (auto &CS : CSI) 362 // assignCalleeSavedSpillSlots assigns negative frame indexes to 363 // registers which can be saved by libcall. 364 if (CS.getFrameIdx() < 0) 365 MaxReg = std::max(MaxReg.id(), CS.getReg().id()); 366 367 if (!MaxReg) 368 return -1; 369 370 switch (MaxReg.id()) { 371 default: 372 llvm_unreachable("Something has gone wrong!"); 373 // clang-format off 374 case /*s11*/ RISCV::X27: return 12; 375 case /*s10*/ RISCV::X26: return 11; 376 case /*s9*/ RISCV::X25: return 10; 377 case /*s8*/ RISCV::X24: return 9; 378 case /*s7*/ RISCV::X23: return 8; 379 case /*s6*/ RISCV::X22: return 7; 380 case /*s5*/ RISCV::X21: return 6; 381 case /*s4*/ RISCV::X20: return 5; 382 case /*s3*/ RISCV::X19: return 4; 383 case /*s2*/ RISCV::X18: return 3; 384 case /*s1*/ RISCV::X9: return 2; 385 case /*s0*/ FPReg: return 1; 386 case /*ra*/ RAReg: return 0; 387 // clang-format on 388 } 389 } 390 391 // Get the name of the libcall used for spilling callee saved registers. 392 // If this function will not use save/restore libcalls, then return a nullptr. 393 static const char * 394 getSpillLibCallName(const MachineFunction &MF, 395 const std::vector<CalleeSavedInfo> &CSI) { 396 static const char *const SpillLibCalls[] = { 397 "__riscv_save_0", 398 "__riscv_save_1", 399 "__riscv_save_2", 400 "__riscv_save_3", 401 "__riscv_save_4", 402 "__riscv_save_5", 403 "__riscv_save_6", 404 "__riscv_save_7", 405 "__riscv_save_8", 406 "__riscv_save_9", 407 "__riscv_save_10", 408 "__riscv_save_11", 409 "__riscv_save_12" 410 }; 411 412 int LibCallID = getLibCallID(MF, CSI); 413 if (LibCallID == -1) 414 return nullptr; 415 return SpillLibCalls[LibCallID]; 416 } 417 418 // Get the name of the libcall used for restoring callee saved registers. 419 // If this function will not use save/restore libcalls, then return a nullptr. 420 static const char * 421 getRestoreLibCallName(const MachineFunction &MF, 422 const std::vector<CalleeSavedInfo> &CSI) { 423 static const char *const RestoreLibCalls[] = { 424 "__riscv_restore_0", 425 "__riscv_restore_1", 426 "__riscv_restore_2", 427 "__riscv_restore_3", 428 "__riscv_restore_4", 429 "__riscv_restore_5", 430 "__riscv_restore_6", 431 "__riscv_restore_7", 432 "__riscv_restore_8", 433 "__riscv_restore_9", 434 "__riscv_restore_10", 435 "__riscv_restore_11", 436 "__riscv_restore_12" 437 }; 438 439 int LibCallID = getLibCallID(MF, CSI); 440 if (LibCallID == -1) 441 return nullptr; 442 return RestoreLibCalls[LibCallID]; 443 } 444 445 // Get the max reg of Push/Pop for restoring callee saved registers. 446 static unsigned getNumPushPopRegs(const std::vector<CalleeSavedInfo> &CSI) { 447 unsigned NumPushPopRegs = 0; 448 for (auto &CS : CSI) { 449 auto *FII = llvm::find_if(FixedCSRFIMap, 450 [&](MCPhysReg P) { return P == CS.getReg(); }); 451 if (FII != std::end(FixedCSRFIMap)) { 452 unsigned RegNum = std::distance(std::begin(FixedCSRFIMap), FII); 453 NumPushPopRegs = std::max(NumPushPopRegs, RegNum + 1); 454 } 455 } 456 assert(NumPushPopRegs != 12 && "x26 requires x27 to also be pushed"); 457 return NumPushPopRegs; 458 } 459 460 // Return true if the specified function should have a dedicated frame 461 // pointer register. This is true if frame pointer elimination is 462 // disabled, if it needs dynamic stack realignment, if the function has 463 // variable sized allocas, or if the frame address is taken. 464 bool RISCVFrameLowering::hasFPImpl(const MachineFunction &MF) const { 465 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 466 467 const MachineFrameInfo &MFI = MF.getFrameInfo(); 468 return MF.getTarget().Options.DisableFramePointerElim(MF) || 469 RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() || 470 MFI.isFrameAddressTaken(); 471 } 472 473 bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const { 474 const MachineFrameInfo &MFI = MF.getFrameInfo(); 475 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 476 477 // If we do not reserve stack space for outgoing arguments in prologue, 478 // we will adjust the stack pointer before call instruction. After the 479 // adjustment, we can not use SP to access the stack objects for the 480 // arguments. Instead, use BP to access these stack objects. 481 return (MFI.hasVarSizedObjects() || 482 (!hasReservedCallFrame(MF) && (!MFI.isMaxCallFrameSizeComputed() || 483 MFI.getMaxCallFrameSize() != 0))) && 484 TRI->hasStackRealignment(MF); 485 } 486 487 // Determines the size of the frame and maximum call frame size. 488 void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const { 489 MachineFrameInfo &MFI = MF.getFrameInfo(); 490 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 491 492 // Get the number of bytes to allocate from the FrameInfo. 493 uint64_t FrameSize = MFI.getStackSize(); 494 495 // QCI Interrupts use at least 96 bytes of stack space 496 if (RVFI->useQCIInterrupt(MF)) 497 FrameSize = std::max(FrameSize, QCIInterruptPushAmount); 498 499 // Get the alignment. 500 Align StackAlign = getStackAlign(); 501 502 // Make sure the frame is aligned. 503 FrameSize = alignTo(FrameSize, StackAlign); 504 505 // Update frame info. 506 MFI.setStackSize(FrameSize); 507 508 // When using SP or BP to access stack objects, we may require extra padding 509 // to ensure the bottom of the RVV stack is correctly aligned within the main 510 // stack. We calculate this as the amount required to align the scalar local 511 // variable section up to the RVV alignment. 512 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 513 if (RVFI->getRVVStackSize() && (!hasFP(MF) || TRI->hasStackRealignment(MF))) { 514 int ScalarLocalVarSize = FrameSize - RVFI->getCalleeSavedStackSize() - 515 RVFI->getVarArgsSaveSize(); 516 if (auto RVVPadding = 517 offsetToAlignment(ScalarLocalVarSize, RVFI->getRVVStackAlign())) 518 RVFI->setRVVPadding(RVVPadding); 519 } 520 } 521 522 // Returns the stack size including RVV padding (when required), rounded back 523 // up to the required stack alignment. 524 uint64_t RISCVFrameLowering::getStackSizeWithRVVPadding( 525 const MachineFunction &MF) const { 526 const MachineFrameInfo &MFI = MF.getFrameInfo(); 527 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 528 return alignTo(MFI.getStackSize() + RVFI->getRVVPadding(), getStackAlign()); 529 } 530 531 static SmallVector<CalleeSavedInfo, 8> 532 getUnmanagedCSI(const MachineFunction &MF, 533 const std::vector<CalleeSavedInfo> &CSI) { 534 const MachineFrameInfo &MFI = MF.getFrameInfo(); 535 SmallVector<CalleeSavedInfo, 8> NonLibcallCSI; 536 537 for (auto &CS : CSI) { 538 int FI = CS.getFrameIdx(); 539 if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::Default) 540 NonLibcallCSI.push_back(CS); 541 } 542 543 return NonLibcallCSI; 544 } 545 546 static SmallVector<CalleeSavedInfo, 8> 547 getRVVCalleeSavedInfo(const MachineFunction &MF, 548 const std::vector<CalleeSavedInfo> &CSI) { 549 const MachineFrameInfo &MFI = MF.getFrameInfo(); 550 SmallVector<CalleeSavedInfo, 8> RVVCSI; 551 552 for (auto &CS : CSI) { 553 int FI = CS.getFrameIdx(); 554 if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector) 555 RVVCSI.push_back(CS); 556 } 557 558 return RVVCSI; 559 } 560 561 static SmallVector<CalleeSavedInfo, 8> 562 getPushOrLibCallsSavedInfo(const MachineFunction &MF, 563 const std::vector<CalleeSavedInfo> &CSI) { 564 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 565 566 SmallVector<CalleeSavedInfo, 8> PushOrLibCallsCSI; 567 if (!RVFI->useSaveRestoreLibCalls(MF) && !RVFI->isPushable(MF)) 568 return PushOrLibCallsCSI; 569 570 for (const auto &CS : CSI) { 571 if (RVFI->useQCIInterrupt(MF)) { 572 // Some registers are saved by both `QC.C.MIENTER(.NEST)` and 573 // `QC.CM.PUSH(FP)`. In these cases, prioritise the CFI info that points 574 // to the versions saved by `QC.C.MIENTER(.NEST)` which is what FP 575 // unwinding would use. 576 if (llvm::is_contained(llvm::make_first_range(FixedCSRFIQCIInterruptMap), 577 CS.getReg())) 578 continue; 579 } 580 581 if (llvm::is_contained(FixedCSRFIMap, CS.getReg())) 582 PushOrLibCallsCSI.push_back(CS); 583 } 584 585 return PushOrLibCallsCSI; 586 } 587 588 static SmallVector<CalleeSavedInfo, 8> 589 getQCISavedInfo(const MachineFunction &MF, 590 const std::vector<CalleeSavedInfo> &CSI) { 591 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 592 593 SmallVector<CalleeSavedInfo, 8> QCIInterruptCSI; 594 if (!RVFI->useQCIInterrupt(MF)) 595 return QCIInterruptCSI; 596 597 for (const auto &CS : CSI) { 598 if (llvm::is_contained(llvm::make_first_range(FixedCSRFIQCIInterruptMap), 599 CS.getReg())) 600 QCIInterruptCSI.push_back(CS); 601 } 602 603 return QCIInterruptCSI; 604 } 605 606 void RISCVFrameLowering::allocateAndProbeStackForRVV( 607 MachineFunction &MF, MachineBasicBlock &MBB, 608 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Amount, 609 MachineInstr::MIFlag Flag, bool EmitCFI, bool DynAllocation) const { 610 assert(Amount != 0 && "Did not need to adjust stack pointer for RVV."); 611 612 // Emit a variable-length allocation probing loop. 613 614 // Get VLEN in TargetReg 615 const RISCVInstrInfo *TII = STI.getInstrInfo(); 616 Register TargetReg = RISCV::X6; 617 uint32_t NumOfVReg = Amount / RISCV::RVVBytesPerBlock; 618 BuildMI(MBB, MBBI, DL, TII->get(RISCV::PseudoReadVLENB), TargetReg) 619 .setMIFlag(Flag); 620 TII->mulImm(MF, MBB, MBBI, DL, TargetReg, NumOfVReg, Flag); 621 622 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup); 623 if (EmitCFI) { 624 // Set the CFA register to TargetReg. 625 CFIBuilder.buildDefCFA(TargetReg, -Amount); 626 } 627 628 // It will be expanded to a probe loop in `inlineStackProbe`. 629 BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC_RVV)) 630 .addReg(TargetReg); 631 632 if (EmitCFI) { 633 // Set the CFA register back to SP. 634 CFIBuilder.buildDefCFARegister(SPReg); 635 } 636 637 // SUB SP, SP, T1 638 BuildMI(MBB, MBBI, DL, TII->get(RISCV::SUB), SPReg) 639 .addReg(SPReg) 640 .addReg(TargetReg) 641 .setMIFlag(Flag); 642 643 // If we have a dynamic allocation later we need to probe any residuals. 644 if (DynAllocation) { 645 BuildMI(MBB, MBBI, DL, TII->get(STI.is64Bit() ? RISCV::SD : RISCV::SW)) 646 .addReg(RISCV::X0) 647 .addReg(SPReg) 648 .addImm(0) 649 .setMIFlags(MachineInstr::FrameSetup); 650 } 651 } 652 653 static void appendScalableVectorExpression(const TargetRegisterInfo &TRI, 654 SmallVectorImpl<char> &Expr, 655 int FixedOffset, int ScalableOffset, 656 llvm::raw_string_ostream &Comment) { 657 unsigned DwarfVLenB = TRI.getDwarfRegNum(RISCV::VLENB, true); 658 uint8_t Buffer[16]; 659 if (FixedOffset) { 660 Expr.push_back(dwarf::DW_OP_consts); 661 Expr.append(Buffer, Buffer + encodeSLEB128(FixedOffset, Buffer)); 662 Expr.push_back((uint8_t)dwarf::DW_OP_plus); 663 Comment << (FixedOffset < 0 ? " - " : " + ") << std::abs(FixedOffset); 664 } 665 666 Expr.push_back((uint8_t)dwarf::DW_OP_consts); 667 Expr.append(Buffer, Buffer + encodeSLEB128(ScalableOffset, Buffer)); 668 669 Expr.push_back((uint8_t)dwarf::DW_OP_bregx); 670 Expr.append(Buffer, Buffer + encodeULEB128(DwarfVLenB, Buffer)); 671 Expr.push_back(0); 672 673 Expr.push_back((uint8_t)dwarf::DW_OP_mul); 674 Expr.push_back((uint8_t)dwarf::DW_OP_plus); 675 676 Comment << (ScalableOffset < 0 ? " - " : " + ") << std::abs(ScalableOffset) 677 << " * vlenb"; 678 } 679 680 static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, 681 Register Reg, 682 uint64_t FixedOffset, 683 uint64_t ScalableOffset) { 684 assert(ScalableOffset != 0 && "Did not need to adjust CFA for RVV"); 685 SmallString<64> Expr; 686 std::string CommentBuffer; 687 llvm::raw_string_ostream Comment(CommentBuffer); 688 // Build up the expression (Reg + FixedOffset + ScalableOffset * VLENB). 689 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); 690 Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg)); 691 Expr.push_back(0); 692 if (Reg == SPReg) 693 Comment << "sp"; 694 else 695 Comment << printReg(Reg, &TRI); 696 697 appendScalableVectorExpression(TRI, Expr, FixedOffset, ScalableOffset, 698 Comment); 699 700 SmallString<64> DefCfaExpr; 701 uint8_t Buffer[16]; 702 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); 703 DefCfaExpr.append(Buffer, Buffer + encodeULEB128(Expr.size(), Buffer)); 704 DefCfaExpr.append(Expr.str()); 705 706 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(), 707 Comment.str()); 708 } 709 710 static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI, 711 Register Reg, uint64_t FixedOffset, 712 uint64_t ScalableOffset) { 713 assert(ScalableOffset != 0 && "Did not need to adjust CFA for RVV"); 714 SmallString<64> Expr; 715 std::string CommentBuffer; 716 llvm::raw_string_ostream Comment(CommentBuffer); 717 Comment << printReg(Reg, &TRI) << " @ cfa"; 718 719 // Build up the expression (FixedOffset + ScalableOffset * VLENB). 720 appendScalableVectorExpression(TRI, Expr, FixedOffset, ScalableOffset, 721 Comment); 722 723 SmallString<64> DefCfaExpr; 724 uint8_t Buffer[16]; 725 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); 726 DefCfaExpr.push_back(dwarf::DW_CFA_expression); 727 DefCfaExpr.append(Buffer, Buffer + encodeULEB128(DwarfReg, Buffer)); 728 DefCfaExpr.append(Buffer, Buffer + encodeULEB128(Expr.size(), Buffer)); 729 DefCfaExpr.append(Expr.str()); 730 731 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(), 732 Comment.str()); 733 } 734 735 // Allocate stack space and probe it if necessary. 736 void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB, 737 MachineBasicBlock::iterator MBBI, 738 MachineFunction &MF, uint64_t Offset, 739 uint64_t RealStackSize, bool EmitCFI, 740 bool NeedProbe, uint64_t ProbeSize, 741 bool DynAllocation, 742 MachineInstr::MIFlag Flag) const { 743 DebugLoc DL; 744 const RISCVRegisterInfo *RI = STI.getRegisterInfo(); 745 const RISCVInstrInfo *TII = STI.getInstrInfo(); 746 bool IsRV64 = STI.is64Bit(); 747 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup); 748 749 // Simply allocate the stack if it's not big enough to require a probe. 750 if (!NeedProbe || Offset <= ProbeSize) { 751 RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Offset), 752 Flag, getStackAlign()); 753 754 if (EmitCFI) 755 CFIBuilder.buildDefCFAOffset(RealStackSize); 756 757 if (NeedProbe && DynAllocation) { 758 // s[d|w] zero, 0(sp) 759 BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW)) 760 .addReg(RISCV::X0) 761 .addReg(SPReg) 762 .addImm(0) 763 .setMIFlags(Flag); 764 } 765 766 return; 767 } 768 769 // Unroll the probe loop depending on the number of iterations. 770 if (Offset < ProbeSize * 5) { 771 uint64_t CFAAdjust = RealStackSize - Offset; 772 773 uint64_t CurrentOffset = 0; 774 while (CurrentOffset + ProbeSize <= Offset) { 775 RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, 776 StackOffset::getFixed(-ProbeSize), Flag, getStackAlign()); 777 // s[d|w] zero, 0(sp) 778 BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW)) 779 .addReg(RISCV::X0) 780 .addReg(SPReg) 781 .addImm(0) 782 .setMIFlags(Flag); 783 784 CurrentOffset += ProbeSize; 785 if (EmitCFI) 786 CFIBuilder.buildDefCFAOffset(CurrentOffset + CFAAdjust); 787 } 788 789 uint64_t Residual = Offset - CurrentOffset; 790 if (Residual) { 791 RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, 792 StackOffset::getFixed(-Residual), Flag, getStackAlign()); 793 if (EmitCFI) 794 CFIBuilder.buildDefCFAOffset(RealStackSize); 795 796 if (DynAllocation) { 797 // s[d|w] zero, 0(sp) 798 BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW)) 799 .addReg(RISCV::X0) 800 .addReg(SPReg) 801 .addImm(0) 802 .setMIFlags(Flag); 803 } 804 } 805 806 return; 807 } 808 809 // Emit a variable-length allocation probing loop. 810 uint64_t RoundedSize = alignDown(Offset, ProbeSize); 811 uint64_t Residual = Offset - RoundedSize; 812 813 Register TargetReg = RISCV::X6; 814 // SUB TargetReg, SP, RoundedSize 815 RI->adjustReg(MBB, MBBI, DL, TargetReg, SPReg, 816 StackOffset::getFixed(-RoundedSize), Flag, getStackAlign()); 817 818 if (EmitCFI) { 819 // Set the CFA register to TargetReg. 820 CFIBuilder.buildDefCFA(TargetReg, RoundedSize); 821 } 822 823 // It will be expanded to a probe loop in `inlineStackProbe`. 824 BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC)).addReg(TargetReg); 825 826 if (EmitCFI) { 827 // Set the CFA register back to SP. 828 CFIBuilder.buildDefCFARegister(SPReg); 829 } 830 831 if (Residual) { 832 RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Residual), 833 Flag, getStackAlign()); 834 if (DynAllocation) { 835 // s[d|w] zero, 0(sp) 836 BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW)) 837 .addReg(RISCV::X0) 838 .addReg(SPReg) 839 .addImm(0) 840 .setMIFlags(Flag); 841 } 842 } 843 844 if (EmitCFI) 845 CFIBuilder.buildDefCFAOffset(Offset); 846 } 847 848 static bool isPush(unsigned Opcode) { 849 switch (Opcode) { 850 case RISCV::CM_PUSH: 851 case RISCV::QC_CM_PUSH: 852 case RISCV::QC_CM_PUSHFP: 853 return true; 854 default: 855 return false; 856 } 857 } 858 859 static bool isPop(unsigned Opcode) { 860 // There are other pops but these are the only ones introduced during this 861 // pass. 862 switch (Opcode) { 863 case RISCV::CM_POP: 864 case RISCV::QC_CM_POP: 865 return true; 866 default: 867 return false; 868 } 869 } 870 871 static unsigned getPushOpcode(RISCVMachineFunctionInfo::PushPopKind Kind, 872 bool UpdateFP) { 873 switch (Kind) { 874 case RISCVMachineFunctionInfo::PushPopKind::StdExtZcmp: 875 return RISCV::CM_PUSH; 876 case RISCVMachineFunctionInfo::PushPopKind::VendorXqccmp: 877 return UpdateFP ? RISCV::QC_CM_PUSHFP : RISCV::QC_CM_PUSH; 878 default: 879 llvm_unreachable("Unhandled PushPopKind"); 880 } 881 } 882 883 static unsigned getPopOpcode(RISCVMachineFunctionInfo::PushPopKind Kind) { 884 // There are other pops but they are introduced later by the Push/Pop 885 // Optimizer. 886 switch (Kind) { 887 case RISCVMachineFunctionInfo::PushPopKind::StdExtZcmp: 888 return RISCV::CM_POP; 889 case RISCVMachineFunctionInfo::PushPopKind::VendorXqccmp: 890 return RISCV::QC_CM_POP; 891 default: 892 llvm_unreachable("Unhandled PushPopKind"); 893 } 894 } 895 896 void RISCVFrameLowering::emitPrologue(MachineFunction &MF, 897 MachineBasicBlock &MBB) const { 898 MachineFrameInfo &MFI = MF.getFrameInfo(); 899 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 900 const RISCVRegisterInfo *RI = STI.getRegisterInfo(); 901 MachineBasicBlock::iterator MBBI = MBB.begin(); 902 903 Register BPReg = RISCVABI::getBPReg(); 904 905 // Debug location must be unknown since the first debug location is used 906 // to determine the end of the prologue. 907 DebugLoc DL; 908 909 // All calls are tail calls in GHC calling conv, and functions have no 910 // prologue/epilogue. 911 if (MF.getFunction().getCallingConv() == CallingConv::GHC) 912 return; 913 914 // SiFive CLIC needs to swap `sp` into `sf.mscratchcsw` 915 emitSiFiveCLICStackSwap(MF, MBB, MBBI, DL); 916 917 // Emit prologue for shadow call stack. 918 emitSCSPrologue(MF, MBB, MBBI, DL); 919 920 // We keep track of the first instruction because it might be a 921 // `(QC.)CM.PUSH(FP)`, and we may need to adjust the immediate rather than 922 // inserting an `addi sp, sp, -N*16` 923 auto PossiblePush = MBBI; 924 925 // Skip past all callee-saved register spill instructions. 926 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) 927 ++MBBI; 928 929 // Determine the correct frame layout 930 determineFrameLayout(MF); 931 932 const auto &CSI = MFI.getCalleeSavedInfo(); 933 934 // Skip to before the spills of scalar callee-saved registers 935 // FIXME: assumes exactly one instruction is used to restore each 936 // callee-saved register. 937 MBBI = std::prev(MBBI, getRVVCalleeSavedInfo(MF, CSI).size() + 938 getUnmanagedCSI(MF, CSI).size()); 939 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup); 940 941 // If libcalls are used to spill and restore callee-saved registers, the frame 942 // has two sections; the opaque section managed by the libcalls, and the 943 // section managed by MachineFrameInfo which can also hold callee saved 944 // registers in fixed stack slots, both of which have negative frame indices. 945 // This gets even more complicated when incoming arguments are passed via the 946 // stack, as these too have negative frame indices. An example is detailed 947 // below: 948 // 949 // | incoming arg | <- FI[-3] 950 // | libcallspill | 951 // | calleespill | <- FI[-2] 952 // | calleespill | <- FI[-1] 953 // | this_frame | <- FI[0] 954 // 955 // For negative frame indices, the offset from the frame pointer will differ 956 // depending on which of these groups the frame index applies to. 957 // The following calculates the correct offset knowing the number of callee 958 // saved registers spilt by the two methods. 959 if (int LibCallRegs = getLibCallID(MF, MFI.getCalleeSavedInfo()) + 1) { 960 // Calculate the size of the frame managed by the libcall. The stack 961 // alignment of these libcalls should be the same as how we set it in 962 // getABIStackAlignment. 963 unsigned LibCallFrameSize = 964 alignTo((STI.getXLen() / 8) * LibCallRegs, getStackAlign()); 965 RVFI->setLibCallStackSize(LibCallFrameSize); 966 967 CFIBuilder.buildDefCFAOffset(LibCallFrameSize); 968 for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI)) 969 CFIBuilder.buildOffset(CS.getReg(), 970 MFI.getObjectOffset(CS.getFrameIdx())); 971 } 972 973 // FIXME (note copied from Lanai): This appears to be overallocating. Needs 974 // investigation. Get the number of bytes to allocate from the FrameInfo. 975 uint64_t RealStackSize = getStackSizeWithRVVPadding(MF); 976 uint64_t StackSize = RealStackSize - RVFI->getReservedSpillsSize(); 977 uint64_t RVVStackSize = RVFI->getRVVStackSize(); 978 979 // Early exit if there is no need to allocate on the stack 980 if (RealStackSize == 0 && !MFI.adjustsStack() && RVVStackSize == 0) 981 return; 982 983 // If the stack pointer has been marked as reserved, then produce an error if 984 // the frame requires stack allocation 985 if (STI.isRegisterReservedByUser(SPReg)) 986 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 987 MF.getFunction(), "Stack pointer required, but has been reserved."}); 988 989 uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); 990 // Split the SP adjustment to reduce the offsets of callee saved spill. 991 if (FirstSPAdjustAmount) { 992 StackSize = FirstSPAdjustAmount; 993 RealStackSize = FirstSPAdjustAmount; 994 } 995 996 if (RVFI->useQCIInterrupt(MF)) { 997 // The function starts with `QC.C.MIENTER(.NEST)`, so the `(QC.)CM.PUSH(FP)` 998 // could only be the next instruction. 999 ++PossiblePush; 1000 1001 // Insert the CFI metadata before where we think the `(QC.)CM.PUSH(FP)` 1002 // could be. The PUSH will also get its own CFI metadata for its own 1003 // modifications, which should come after the PUSH. 1004 CFIInstBuilder PushCFIBuilder(MBB, PossiblePush, MachineInstr::FrameSetup); 1005 PushCFIBuilder.buildDefCFAOffset(QCIInterruptPushAmount); 1006 for (const CalleeSavedInfo &CS : getQCISavedInfo(MF, CSI)) 1007 PushCFIBuilder.buildOffset(CS.getReg(), 1008 MFI.getObjectOffset(CS.getFrameIdx())); 1009 } 1010 1011 if (RVFI->isPushable(MF) && PossiblePush != MBB.end() && 1012 isPush(PossiblePush->getOpcode())) { 1013 // Use available stack adjustment in push instruction to allocate additional 1014 // stack space. Align the stack size down to a multiple of 16. This is 1015 // needed for RVE. 1016 // FIXME: Can we increase the stack size to a multiple of 16 instead? 1017 uint64_t StackAdj = 1018 std::min(alignDown(StackSize, 16), static_cast<uint64_t>(48)); 1019 PossiblePush->getOperand(1).setImm(StackAdj); 1020 StackSize -= StackAdj; 1021 1022 CFIBuilder.buildDefCFAOffset(RealStackSize - StackSize); 1023 for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI)) 1024 CFIBuilder.buildOffset(CS.getReg(), 1025 MFI.getObjectOffset(CS.getFrameIdx())); 1026 } 1027 1028 // Allocate space on the stack if necessary. 1029 auto &Subtarget = MF.getSubtarget<RISCVSubtarget>(); 1030 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering(); 1031 bool NeedProbe = TLI->hasInlineStackProbe(MF); 1032 uint64_t ProbeSize = TLI->getStackProbeSize(MF, getStackAlign()); 1033 bool DynAllocation = 1034 MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation(); 1035 if (StackSize != 0) 1036 allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, /*EmitCFI=*/true, 1037 NeedProbe, ProbeSize, DynAllocation, 1038 MachineInstr::FrameSetup); 1039 1040 // Save SiFive CLIC CSRs into Stack 1041 emitSiFiveCLICPreemptibleSaves(MF, MBB, MBBI, DL); 1042 1043 // The frame pointer is callee-saved, and code has been generated for us to 1044 // save it to the stack. We need to skip over the storing of callee-saved 1045 // registers as the frame pointer must be modified after it has been saved 1046 // to the stack, not before. 1047 // FIXME: assumes exactly one instruction is used to save each callee-saved 1048 // register. 1049 std::advance(MBBI, getUnmanagedCSI(MF, CSI).size()); 1050 CFIBuilder.setInsertPoint(MBBI); 1051 1052 // Iterate over list of callee-saved registers and emit .cfi_offset 1053 // directives. 1054 for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI)) 1055 CFIBuilder.buildOffset(CS.getReg(), MFI.getObjectOffset(CS.getFrameIdx())); 1056 1057 // Generate new FP. 1058 if (hasFP(MF)) { 1059 if (STI.isRegisterReservedByUser(FPReg)) 1060 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 1061 MF.getFunction(), "Frame pointer required, but has been reserved."}); 1062 // The frame pointer does need to be reserved from register allocation. 1063 assert(MF.getRegInfo().isReserved(FPReg) && "FP not reserved"); 1064 1065 // Some stack management variants automatically keep FP updated, so we don't 1066 // need an instruction to do so. 1067 if (!RVFI->hasImplicitFPUpdates(MF)) { 1068 RI->adjustReg( 1069 MBB, MBBI, DL, FPReg, SPReg, 1070 StackOffset::getFixed(RealStackSize - RVFI->getVarArgsSaveSize()), 1071 MachineInstr::FrameSetup, getStackAlign()); 1072 } 1073 1074 CFIBuilder.buildDefCFA(FPReg, RVFI->getVarArgsSaveSize()); 1075 } 1076 1077 uint64_t SecondSPAdjustAmount = 0; 1078 // Emit the second SP adjustment after saving callee saved registers. 1079 if (FirstSPAdjustAmount) { 1080 SecondSPAdjustAmount = getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount; 1081 assert(SecondSPAdjustAmount > 0 && 1082 "SecondSPAdjustAmount should be greater than zero"); 1083 1084 allocateStack(MBB, MBBI, MF, SecondSPAdjustAmount, 1085 getStackSizeWithRVVPadding(MF), !hasFP(MF), NeedProbe, 1086 ProbeSize, DynAllocation, MachineInstr::FrameSetup); 1087 } 1088 1089 if (RVVStackSize) { 1090 if (NeedProbe) { 1091 allocateAndProbeStackForRVV(MF, MBB, MBBI, DL, RVVStackSize, 1092 MachineInstr::FrameSetup, !hasFP(MF), 1093 DynAllocation); 1094 } else { 1095 // We must keep the stack pointer aligned through any intermediate 1096 // updates. 1097 RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, 1098 StackOffset::getScalable(-RVVStackSize), 1099 MachineInstr::FrameSetup, getStackAlign()); 1100 } 1101 1102 if (!hasFP(MF)) { 1103 // Emit .cfi_def_cfa_expression "sp + StackSize + RVVStackSize * vlenb". 1104 CFIBuilder.insertCFIInst(createDefCFAExpression( 1105 *RI, SPReg, getStackSizeWithRVVPadding(MF), RVVStackSize / 8)); 1106 } 1107 1108 std::advance(MBBI, getRVVCalleeSavedInfo(MF, CSI).size()); 1109 emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF)); 1110 } 1111 1112 if (hasFP(MF)) { 1113 // Realign Stack 1114 const RISCVRegisterInfo *RI = STI.getRegisterInfo(); 1115 if (RI->hasStackRealignment(MF)) { 1116 Align MaxAlignment = MFI.getMaxAlign(); 1117 1118 const RISCVInstrInfo *TII = STI.getInstrInfo(); 1119 if (isInt<12>(-(int)MaxAlignment.value())) { 1120 BuildMI(MBB, MBBI, DL, TII->get(RISCV::ANDI), SPReg) 1121 .addReg(SPReg) 1122 .addImm(-(int)MaxAlignment.value()) 1123 .setMIFlag(MachineInstr::FrameSetup); 1124 } else { 1125 unsigned ShiftAmount = Log2(MaxAlignment); 1126 Register VR = 1127 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass); 1128 BuildMI(MBB, MBBI, DL, TII->get(RISCV::SRLI), VR) 1129 .addReg(SPReg) 1130 .addImm(ShiftAmount) 1131 .setMIFlag(MachineInstr::FrameSetup); 1132 BuildMI(MBB, MBBI, DL, TII->get(RISCV::SLLI), SPReg) 1133 .addReg(VR) 1134 .addImm(ShiftAmount) 1135 .setMIFlag(MachineInstr::FrameSetup); 1136 } 1137 if (NeedProbe && RVVStackSize == 0) { 1138 // Do a probe if the align + size allocated just passed the probe size 1139 // and was not yet probed. 1140 if (SecondSPAdjustAmount < ProbeSize && 1141 SecondSPAdjustAmount + MaxAlignment.value() >= ProbeSize) { 1142 bool IsRV64 = STI.is64Bit(); 1143 BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW)) 1144 .addReg(RISCV::X0) 1145 .addReg(SPReg) 1146 .addImm(0) 1147 .setMIFlags(MachineInstr::FrameSetup); 1148 } 1149 } 1150 // FP will be used to restore the frame in the epilogue, so we need 1151 // another base register BP to record SP after re-alignment. SP will 1152 // track the current stack after allocating variable sized objects. 1153 if (hasBP(MF)) { 1154 // move BP, SP 1155 BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), BPReg) 1156 .addReg(SPReg) 1157 .addImm(0) 1158 .setMIFlag(MachineInstr::FrameSetup); 1159 } 1160 } 1161 } 1162 } 1163 1164 void RISCVFrameLowering::deallocateStack(MachineFunction &MF, 1165 MachineBasicBlock &MBB, 1166 MachineBasicBlock::iterator MBBI, 1167 const DebugLoc &DL, 1168 uint64_t &StackSize, 1169 int64_t CFAOffset) const { 1170 const RISCVRegisterInfo *RI = STI.getRegisterInfo(); 1171 1172 RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize), 1173 MachineInstr::FrameDestroy, getStackAlign()); 1174 StackSize = 0; 1175 1176 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy) 1177 .buildDefCFAOffset(CFAOffset); 1178 } 1179 1180 void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, 1181 MachineBasicBlock &MBB) const { 1182 const RISCVRegisterInfo *RI = STI.getRegisterInfo(); 1183 MachineFrameInfo &MFI = MF.getFrameInfo(); 1184 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 1185 1186 // All calls are tail calls in GHC calling conv, and functions have no 1187 // prologue/epilogue. 1188 if (MF.getFunction().getCallingConv() == CallingConv::GHC) 1189 return; 1190 1191 // Get the insert location for the epilogue. If there were no terminators in 1192 // the block, get the last instruction. 1193 MachineBasicBlock::iterator MBBI = MBB.end(); 1194 DebugLoc DL; 1195 if (!MBB.empty()) { 1196 MBBI = MBB.getLastNonDebugInstr(); 1197 if (MBBI != MBB.end()) 1198 DL = MBBI->getDebugLoc(); 1199 1200 MBBI = MBB.getFirstTerminator(); 1201 1202 // Skip to before the restores of all callee-saved registers. 1203 while (MBBI != MBB.begin() && 1204 std::prev(MBBI)->getFlag(MachineInstr::FrameDestroy)) 1205 --MBBI; 1206 } 1207 1208 const auto &CSI = MFI.getCalleeSavedInfo(); 1209 1210 // Skip to before the restores of scalar callee-saved registers 1211 // FIXME: assumes exactly one instruction is used to restore each 1212 // callee-saved register. 1213 auto FirstScalarCSRRestoreInsn = 1214 std::next(MBBI, getRVVCalleeSavedInfo(MF, CSI).size()); 1215 CFIInstBuilder CFIBuilder(MBB, FirstScalarCSRRestoreInsn, 1216 MachineInstr::FrameDestroy); 1217 1218 uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); 1219 uint64_t RealStackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount 1220 : getStackSizeWithRVVPadding(MF); 1221 uint64_t StackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount 1222 : getStackSizeWithRVVPadding(MF) - 1223 RVFI->getReservedSpillsSize(); 1224 uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize(); 1225 uint64_t RVVStackSize = RVFI->getRVVStackSize(); 1226 1227 bool RestoreSPFromFP = RI->hasStackRealignment(MF) || 1228 MFI.hasVarSizedObjects() || !hasReservedCallFrame(MF); 1229 if (RVVStackSize) { 1230 // If RestoreSPFromFP the stack pointer will be restored using the frame 1231 // pointer value. 1232 if (!RestoreSPFromFP) 1233 RI->adjustReg(MBB, FirstScalarCSRRestoreInsn, DL, SPReg, SPReg, 1234 StackOffset::getScalable(RVVStackSize), 1235 MachineInstr::FrameDestroy, getStackAlign()); 1236 1237 if (!hasFP(MF)) 1238 CFIBuilder.buildDefCFA(SPReg, RealStackSize); 1239 1240 emitCalleeSavedRVVEpilogCFI(MBB, FirstScalarCSRRestoreInsn); 1241 } 1242 1243 if (FirstSPAdjustAmount) { 1244 uint64_t SecondSPAdjustAmount = 1245 getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount; 1246 assert(SecondSPAdjustAmount > 0 && 1247 "SecondSPAdjustAmount should be greater than zero"); 1248 1249 // If RestoreSPFromFP the stack pointer will be restored using the frame 1250 // pointer value. 1251 if (!RestoreSPFromFP) 1252 RI->adjustReg(MBB, FirstScalarCSRRestoreInsn, DL, SPReg, SPReg, 1253 StackOffset::getFixed(SecondSPAdjustAmount), 1254 MachineInstr::FrameDestroy, getStackAlign()); 1255 1256 if (!hasFP(MF)) 1257 CFIBuilder.buildDefCFAOffset(FirstSPAdjustAmount); 1258 } 1259 1260 // Restore the stack pointer using the value of the frame pointer. Only 1261 // necessary if the stack pointer was modified, meaning the stack size is 1262 // unknown. 1263 // 1264 // In order to make sure the stack point is right through the EH region, 1265 // we also need to restore stack pointer from the frame pointer if we 1266 // don't preserve stack space within prologue/epilogue for outgoing variables, 1267 // normally it's just checking the variable sized object is present or not 1268 // is enough, but we also don't preserve that at prologue/epilogue when 1269 // have vector objects in stack. 1270 if (RestoreSPFromFP) { 1271 assert(hasFP(MF) && "frame pointer should not have been eliminated"); 1272 RI->adjustReg(MBB, FirstScalarCSRRestoreInsn, DL, SPReg, FPReg, 1273 StackOffset::getFixed(-FPOffset), MachineInstr::FrameDestroy, 1274 getStackAlign()); 1275 } 1276 1277 if (hasFP(MF)) 1278 CFIBuilder.buildDefCFA(SPReg, RealStackSize); 1279 1280 // Skip to after the restores of scalar callee-saved registers 1281 // FIXME: assumes exactly one instruction is used to restore each 1282 // callee-saved register. 1283 MBBI = std::next(FirstScalarCSRRestoreInsn, getUnmanagedCSI(MF, CSI).size()); 1284 CFIBuilder.setInsertPoint(MBBI); 1285 1286 if (getLibCallID(MF, CSI) != -1) { 1287 // tail __riscv_restore_[0-12] instruction is considered as a terminator, 1288 // therefore it is unnecessary to place any CFI instructions after it. Just 1289 // deallocate stack if needed and return. 1290 if (StackSize != 0) 1291 deallocateStack(MF, MBB, MBBI, DL, StackSize, 1292 RVFI->getLibCallStackSize()); 1293 1294 // Emit epilogue for shadow call stack. 1295 emitSCSEpilogue(MF, MBB, MBBI, DL); 1296 return; 1297 } 1298 1299 // Recover callee-saved registers. 1300 for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI)) 1301 CFIBuilder.buildRestore(CS.getReg()); 1302 1303 if (RVFI->isPushable(MF) && MBBI != MBB.end() && isPop(MBBI->getOpcode())) { 1304 // Use available stack adjustment in pop instruction to deallocate stack 1305 // space. Align the stack size down to a multiple of 16. This is needed for 1306 // RVE. 1307 // FIXME: Can we increase the stack size to a multiple of 16 instead? 1308 uint64_t StackAdj = 1309 std::min(alignDown(StackSize, 16), static_cast<uint64_t>(48)); 1310 MBBI->getOperand(1).setImm(StackAdj); 1311 StackSize -= StackAdj; 1312 1313 if (StackSize != 0) 1314 deallocateStack(MF, MBB, MBBI, DL, StackSize, 1315 /*stack_adj of cm.pop instr*/ RealStackSize - StackSize); 1316 1317 auto NextI = next_nodbg(MBBI, MBB.end()); 1318 if (NextI == MBB.end() || NextI->getOpcode() != RISCV::PseudoRET) { 1319 ++MBBI; 1320 CFIBuilder.setInsertPoint(MBBI); 1321 1322 for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI)) 1323 CFIBuilder.buildRestore(CS.getReg()); 1324 1325 // Update CFA Offset. If this is a QCI interrupt function, there will be a 1326 // leftover offset which is deallocated by `QC.C.MILEAVERET`, otherwise 1327 // getQCIInterruptStackSize() will be 0. 1328 CFIBuilder.buildDefCFAOffset(RVFI->getQCIInterruptStackSize()); 1329 } 1330 } 1331 1332 emitSiFiveCLICPreemptibleRestores(MF, MBB, MBBI, DL); 1333 1334 // Deallocate stack if StackSize isn't a zero yet. If this is a QCI interrupt 1335 // function, there will be a leftover offset which is deallocated by 1336 // `QC.C.MILEAVERET`, otherwise getQCIInterruptStackSize() will be 0. 1337 if (StackSize != 0) 1338 deallocateStack(MF, MBB, MBBI, DL, StackSize, 1339 RVFI->getQCIInterruptStackSize()); 1340 1341 // Emit epilogue for shadow call stack. 1342 emitSCSEpilogue(MF, MBB, MBBI, DL); 1343 1344 // SiFive CLIC needs to swap `sf.mscratchcsw` into `sp` 1345 emitSiFiveCLICStackSwap(MF, MBB, MBBI, DL); 1346 } 1347 1348 StackOffset 1349 RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 1350 Register &FrameReg) const { 1351 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1352 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 1353 const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 1354 1355 // Callee-saved registers should be referenced relative to the stack 1356 // pointer (positive offset), otherwise use the frame pointer (negative 1357 // offset). 1358 const auto &CSI = getUnmanagedCSI(MF, MFI.getCalleeSavedInfo()); 1359 int MinCSFI = 0; 1360 int MaxCSFI = -1; 1361 StackOffset Offset; 1362 auto StackID = MFI.getStackID(FI); 1363 1364 assert((StackID == TargetStackID::Default || 1365 StackID == TargetStackID::ScalableVector) && 1366 "Unexpected stack ID for the frame object."); 1367 if (StackID == TargetStackID::Default) { 1368 assert(getOffsetOfLocalArea() == 0 && "LocalAreaOffset is not 0!"); 1369 Offset = StackOffset::getFixed(MFI.getObjectOffset(FI) + 1370 MFI.getOffsetAdjustment()); 1371 } else if (StackID == TargetStackID::ScalableVector) { 1372 Offset = StackOffset::getScalable(MFI.getObjectOffset(FI)); 1373 } 1374 1375 uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); 1376 1377 if (CSI.size()) { 1378 MinCSFI = CSI[0].getFrameIdx(); 1379 MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); 1380 } 1381 1382 if (FI >= MinCSFI && FI <= MaxCSFI) { 1383 FrameReg = SPReg; 1384 1385 if (FirstSPAdjustAmount) 1386 Offset += StackOffset::getFixed(FirstSPAdjustAmount); 1387 else 1388 Offset += StackOffset::getFixed(getStackSizeWithRVVPadding(MF)); 1389 return Offset; 1390 } 1391 1392 if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { 1393 // If the stack was realigned, the frame pointer is set in order to allow 1394 // SP to be restored, so we need another base register to record the stack 1395 // after realignment. 1396 // |--------------------------| -- <-- FP 1397 // | callee-allocated save | | <----| 1398 // | area for register varargs| | | 1399 // |--------------------------| | | 1400 // | callee-saved registers | | | 1401 // |--------------------------| -- | 1402 // | realignment (the size of | | | 1403 // | this area is not counted | | | 1404 // | in MFI.getStackSize()) | | | 1405 // |--------------------------| -- |-- MFI.getStackSize() 1406 // | RVV alignment padding | | | 1407 // | (not counted in | | | 1408 // | MFI.getStackSize() but | | | 1409 // | counted in | | | 1410 // | RVFI.getRVVStackSize()) | | | 1411 // |--------------------------| -- | 1412 // | RVV objects | | | 1413 // | (not counted in | | | 1414 // | MFI.getStackSize()) | | | 1415 // |--------------------------| -- | 1416 // | padding before RVV | | | 1417 // | (not counted in | | | 1418 // | MFI.getStackSize() or in | | | 1419 // | RVFI.getRVVStackSize()) | | | 1420 // |--------------------------| -- | 1421 // | scalar local variables | | <----' 1422 // |--------------------------| -- <-- BP (if var sized objects present) 1423 // | VarSize objects | | 1424 // |--------------------------| -- <-- SP 1425 if (hasBP(MF)) { 1426 FrameReg = RISCVABI::getBPReg(); 1427 } else { 1428 // VarSize objects must be empty in this case! 1429 assert(!MFI.hasVarSizedObjects()); 1430 FrameReg = SPReg; 1431 } 1432 } else { 1433 FrameReg = RI->getFrameRegister(MF); 1434 } 1435 1436 if (FrameReg == FPReg) { 1437 Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize()); 1438 // When using FP to access scalable vector objects, we need to minus 1439 // the frame size. 1440 // 1441 // |--------------------------| -- <-- FP 1442 // | callee-allocated save | | 1443 // | area for register varargs| | 1444 // |--------------------------| | 1445 // | callee-saved registers | | 1446 // |--------------------------| | MFI.getStackSize() 1447 // | scalar local variables | | 1448 // |--------------------------| -- (Offset of RVV objects is from here.) 1449 // | RVV objects | 1450 // |--------------------------| 1451 // | VarSize objects | 1452 // |--------------------------| <-- SP 1453 if (StackID == TargetStackID::ScalableVector) { 1454 assert(!RI->hasStackRealignment(MF) && 1455 "Can't index across variable sized realign"); 1456 // We don't expect any extra RVV alignment padding, as the stack size 1457 // and RVV object sections should be correct aligned in their own 1458 // right. 1459 assert(MFI.getStackSize() == getStackSizeWithRVVPadding(MF) && 1460 "Inconsistent stack layout"); 1461 Offset -= StackOffset::getFixed(MFI.getStackSize()); 1462 } 1463 return Offset; 1464 } 1465 1466 // This case handles indexing off both SP and BP. 1467 // If indexing off SP, there must not be any var sized objects 1468 assert(FrameReg == RISCVABI::getBPReg() || !MFI.hasVarSizedObjects()); 1469 1470 // When using SP to access frame objects, we need to add RVV stack size. 1471 // 1472 // |--------------------------| -- <-- FP 1473 // | callee-allocated save | | <----| 1474 // | area for register varargs| | | 1475 // |--------------------------| | | 1476 // | callee-saved registers | | | 1477 // |--------------------------| -- | 1478 // | RVV alignment padding | | | 1479 // | (not counted in | | | 1480 // | MFI.getStackSize() but | | | 1481 // | counted in | | | 1482 // | RVFI.getRVVStackSize()) | | | 1483 // |--------------------------| -- | 1484 // | RVV objects | | |-- MFI.getStackSize() 1485 // | (not counted in | | | 1486 // | MFI.getStackSize()) | | | 1487 // |--------------------------| -- | 1488 // | padding before RVV | | | 1489 // | (not counted in | | | 1490 // | MFI.getStackSize()) | | | 1491 // |--------------------------| -- | 1492 // | scalar local variables | | <----' 1493 // |--------------------------| -- <-- BP (if var sized objects present) 1494 // | VarSize objects | | 1495 // |--------------------------| -- <-- SP 1496 // 1497 // The total amount of padding surrounding RVV objects is described by 1498 // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV 1499 // objects to the required alignment. 1500 if (MFI.getStackID(FI) == TargetStackID::Default) { 1501 if (MFI.isFixedObjectIndex(FI)) { 1502 assert(!RI->hasStackRealignment(MF) && 1503 "Can't index across variable sized realign"); 1504 Offset += StackOffset::get(getStackSizeWithRVVPadding(MF), 1505 RVFI->getRVVStackSize()); 1506 } else { 1507 Offset += StackOffset::getFixed(MFI.getStackSize()); 1508 } 1509 } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { 1510 // Ensure the base of the RVV stack is correctly aligned: add on the 1511 // alignment padding. 1512 int ScalarLocalVarSize = MFI.getStackSize() - 1513 RVFI->getCalleeSavedStackSize() - 1514 RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding(); 1515 Offset += StackOffset::get(ScalarLocalVarSize, RVFI->getRVVStackSize()); 1516 } 1517 return Offset; 1518 } 1519 1520 void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, 1521 BitVector &SavedRegs, 1522 RegScavenger *RS) const { 1523 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1524 // Unconditionally spill RA and FP only if the function uses a frame 1525 // pointer. 1526 if (hasFP(MF)) { 1527 SavedRegs.set(RAReg); 1528 SavedRegs.set(FPReg); 1529 } 1530 // Mark BP as used if function has dedicated base pointer. 1531 if (hasBP(MF)) 1532 SavedRegs.set(RISCVABI::getBPReg()); 1533 1534 // When using cm.push/pop we must save X27 if we save X26. 1535 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 1536 if (RVFI->isPushable(MF) && SavedRegs.test(RISCV::X26)) 1537 SavedRegs.set(RISCV::X27); 1538 1539 // SiFive Preemptible Interrupt Handlers need additional frame entries 1540 createSiFivePreemptibleInterruptFrameEntries(MF, *RVFI); 1541 } 1542 1543 std::pair<int64_t, Align> 1544 RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const { 1545 MachineFrameInfo &MFI = MF.getFrameInfo(); 1546 // Create a buffer of RVV objects to allocate. 1547 SmallVector<int, 8> ObjectsToAllocate; 1548 auto pushRVVObjects = [&](int FIBegin, int FIEnd) { 1549 for (int I = FIBegin, E = FIEnd; I != E; ++I) { 1550 unsigned StackID = MFI.getStackID(I); 1551 if (StackID != TargetStackID::ScalableVector) 1552 continue; 1553 if (MFI.isDeadObjectIndex(I)) 1554 continue; 1555 1556 ObjectsToAllocate.push_back(I); 1557 } 1558 }; 1559 // First push RVV Callee Saved object, then push RVV stack object 1560 std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo(); 1561 const auto &RVVCSI = getRVVCalleeSavedInfo(MF, CSI); 1562 if (!RVVCSI.empty()) 1563 pushRVVObjects(RVVCSI[0].getFrameIdx(), 1564 RVVCSI[RVVCSI.size() - 1].getFrameIdx() + 1); 1565 pushRVVObjects(0, MFI.getObjectIndexEnd() - RVVCSI.size()); 1566 1567 // The minimum alignment is 16 bytes. 1568 Align RVVStackAlign(16); 1569 const auto &ST = MF.getSubtarget<RISCVSubtarget>(); 1570 1571 if (!ST.hasVInstructions()) { 1572 assert(ObjectsToAllocate.empty() && 1573 "Can't allocate scalable-vector objects without V instructions"); 1574 return std::make_pair(0, RVVStackAlign); 1575 } 1576 1577 // Allocate all RVV locals and spills 1578 int64_t Offset = 0; 1579 for (int FI : ObjectsToAllocate) { 1580 // ObjectSize in bytes. 1581 int64_t ObjectSize = MFI.getObjectSize(FI); 1582 auto ObjectAlign = 1583 std::max(Align(RISCV::RVVBytesPerBlock), MFI.getObjectAlign(FI)); 1584 // If the data type is the fractional vector type, reserve one vector 1585 // register for it. 1586 if (ObjectSize < RISCV::RVVBytesPerBlock) 1587 ObjectSize = RISCV::RVVBytesPerBlock; 1588 Offset = alignTo(Offset + ObjectSize, ObjectAlign); 1589 MFI.setObjectOffset(FI, -Offset); 1590 // Update the maximum alignment of the RVV stack section 1591 RVVStackAlign = std::max(RVVStackAlign, ObjectAlign); 1592 } 1593 1594 uint64_t StackSize = Offset; 1595 1596 // Ensure the alignment of the RVV stack. Since we want the most-aligned 1597 // object right at the bottom (i.e., any padding at the top of the frame), 1598 // readjust all RVV objects down by the alignment padding. 1599 // Stack size and offsets are multiples of vscale, stack alignment is in 1600 // bytes, we can divide stack alignment by minimum vscale to get a maximum 1601 // stack alignment multiple of vscale. 1602 auto VScale = 1603 std::max<uint64_t>(ST.getRealMinVLen() / RISCV::RVVBitsPerBlock, 1); 1604 if (auto RVVStackAlignVScale = RVVStackAlign.value() / VScale) { 1605 if (auto AlignmentPadding = 1606 offsetToAlignment(StackSize, Align(RVVStackAlignVScale))) { 1607 StackSize += AlignmentPadding; 1608 for (int FI : ObjectsToAllocate) 1609 MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding); 1610 } 1611 } 1612 1613 return std::make_pair(StackSize, RVVStackAlign); 1614 } 1615 1616 static unsigned getScavSlotsNumForRVV(MachineFunction &MF) { 1617 // For RVV spill, scalable stack offsets computing requires up to two scratch 1618 // registers 1619 static constexpr unsigned ScavSlotsNumRVVSpillScalableObject = 2; 1620 1621 // For RVV spill, non-scalable stack offsets computing requires up to one 1622 // scratch register. 1623 static constexpr unsigned ScavSlotsNumRVVSpillNonScalableObject = 1; 1624 1625 // ADDI instruction's destination register can be used for computing 1626 // offsets. So Scalable stack offsets require up to one scratch register. 1627 static constexpr unsigned ScavSlotsADDIScalableObject = 1; 1628 1629 static constexpr unsigned MaxScavSlotsNumKnown = 1630 std::max({ScavSlotsADDIScalableObject, ScavSlotsNumRVVSpillScalableObject, 1631 ScavSlotsNumRVVSpillNonScalableObject}); 1632 1633 unsigned MaxScavSlotsNum = 0; 1634 if (!MF.getSubtarget<RISCVSubtarget>().hasVInstructions()) 1635 return false; 1636 for (const MachineBasicBlock &MBB : MF) 1637 for (const MachineInstr &MI : MBB) { 1638 bool IsRVVSpill = RISCV::isRVVSpill(MI); 1639 for (auto &MO : MI.operands()) { 1640 if (!MO.isFI()) 1641 continue; 1642 bool IsScalableVectorID = MF.getFrameInfo().getStackID(MO.getIndex()) == 1643 TargetStackID::ScalableVector; 1644 if (IsRVVSpill) { 1645 MaxScavSlotsNum = std::max( 1646 MaxScavSlotsNum, IsScalableVectorID 1647 ? ScavSlotsNumRVVSpillScalableObject 1648 : ScavSlotsNumRVVSpillNonScalableObject); 1649 } else if (MI.getOpcode() == RISCV::ADDI && IsScalableVectorID) { 1650 MaxScavSlotsNum = 1651 std::max(MaxScavSlotsNum, ScavSlotsADDIScalableObject); 1652 } 1653 } 1654 if (MaxScavSlotsNum == MaxScavSlotsNumKnown) 1655 return MaxScavSlotsNumKnown; 1656 } 1657 return MaxScavSlotsNum; 1658 } 1659 1660 static bool hasRVVFrameObject(const MachineFunction &MF) { 1661 // Originally, the function will scan all the stack objects to check whether 1662 // if there is any scalable vector object on the stack or not. However, it 1663 // causes errors in the register allocator. In issue 53016, it returns false 1664 // before RA because there is no RVV stack objects. After RA, it returns true 1665 // because there are spilling slots for RVV values during RA. It will not 1666 // reserve BP during register allocation and generate BP access in the PEI 1667 // pass due to the inconsistent behavior of the function. 1668 // 1669 // The function is changed to use hasVInstructions() as the return value. It 1670 // is not precise, but it can make the register allocation correct. 1671 // 1672 // FIXME: Find a better way to make the decision or revisit the solution in 1673 // D103622. 1674 // 1675 // Refer to https://github.com/llvm/llvm-project/issues/53016. 1676 return MF.getSubtarget<RISCVSubtarget>().hasVInstructions(); 1677 } 1678 1679 static unsigned estimateFunctionSizeInBytes(const MachineFunction &MF, 1680 const RISCVInstrInfo &TII) { 1681 unsigned FnSize = 0; 1682 for (auto &MBB : MF) { 1683 for (auto &MI : MBB) { 1684 // Far branches over 20-bit offset will be relaxed in branch relaxation 1685 // pass. In the worst case, conditional branches will be relaxed into 1686 // the following instruction sequence. Unconditional branches are 1687 // relaxed in the same way, with the exception that there is no first 1688 // branch instruction. 1689 // 1690 // foo 1691 // bne t5, t6, .rev_cond # `TII->getInstSizeInBytes(MI)` bytes 1692 // sd s11, 0(sp) # 4 bytes, or 2 bytes with Zca 1693 // jump .restore, s11 # 8 bytes 1694 // .rev_cond 1695 // bar 1696 // j .dest_bb # 4 bytes, or 2 bytes with Zca 1697 // .restore: 1698 // ld s11, 0(sp) # 4 bytes, or 2 bytes with Zca 1699 // .dest: 1700 // baz 1701 if (MI.isConditionalBranch()) 1702 FnSize += TII.getInstSizeInBytes(MI); 1703 if (MI.isConditionalBranch() || MI.isUnconditionalBranch()) { 1704 if (MF.getSubtarget<RISCVSubtarget>().hasStdExtZca()) 1705 FnSize += 2 + 8 + 2 + 2; 1706 else 1707 FnSize += 4 + 8 + 4 + 4; 1708 continue; 1709 } 1710 1711 FnSize += TII.getInstSizeInBytes(MI); 1712 } 1713 } 1714 return FnSize; 1715 } 1716 1717 void RISCVFrameLowering::processFunctionBeforeFrameFinalized( 1718 MachineFunction &MF, RegScavenger *RS) const { 1719 const RISCVRegisterInfo *RegInfo = 1720 MF.getSubtarget<RISCVSubtarget>().getRegisterInfo(); 1721 const RISCVInstrInfo *TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo(); 1722 MachineFrameInfo &MFI = MF.getFrameInfo(); 1723 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 1724 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 1725 1726 int64_t RVVStackSize; 1727 Align RVVStackAlign; 1728 std::tie(RVVStackSize, RVVStackAlign) = assignRVVStackObjectOffsets(MF); 1729 1730 RVFI->setRVVStackSize(RVVStackSize); 1731 RVFI->setRVVStackAlign(RVVStackAlign); 1732 1733 if (hasRVVFrameObject(MF)) { 1734 // Ensure the entire stack is aligned to at least the RVV requirement: some 1735 // scalable-vector object alignments are not considered by the 1736 // target-independent code. 1737 MFI.ensureMaxAlignment(RVVStackAlign); 1738 } 1739 1740 unsigned ScavSlotsNum = 0; 1741 1742 // estimateStackSize has been observed to under-estimate the final stack 1743 // size, so give ourselves wiggle-room by checking for stack size 1744 // representable an 11-bit signed field rather than 12-bits. 1745 if (!isInt<11>(MFI.estimateStackSize(MF))) 1746 ScavSlotsNum = 1; 1747 1748 // Far branches over 20-bit offset require a spill slot for scratch register. 1749 bool IsLargeFunction = !isInt<20>(estimateFunctionSizeInBytes(MF, *TII)); 1750 if (IsLargeFunction) 1751 ScavSlotsNum = std::max(ScavSlotsNum, 1u); 1752 1753 // RVV loads & stores have no capacity to hold the immediate address offsets 1754 // so we must always reserve an emergency spill slot if the MachineFunction 1755 // contains any RVV spills. 1756 ScavSlotsNum = std::max(ScavSlotsNum, getScavSlotsNumForRVV(MF)); 1757 1758 for (unsigned I = 0; I < ScavSlotsNum; I++) { 1759 int FI = MFI.CreateSpillStackObject(RegInfo->getSpillSize(*RC), 1760 RegInfo->getSpillAlign(*RC)); 1761 RS->addScavengingFrameIndex(FI); 1762 1763 if (IsLargeFunction && RVFI->getBranchRelaxationScratchFrameIndex() == -1) 1764 RVFI->setBranchRelaxationScratchFrameIndex(FI); 1765 } 1766 1767 unsigned Size = RVFI->getReservedSpillsSize(); 1768 for (const auto &Info : MFI.getCalleeSavedInfo()) { 1769 int FrameIdx = Info.getFrameIdx(); 1770 if (FrameIdx < 0 || MFI.getStackID(FrameIdx) != TargetStackID::Default) 1771 continue; 1772 1773 Size += MFI.getObjectSize(FrameIdx); 1774 } 1775 RVFI->setCalleeSavedStackSize(Size); 1776 } 1777 1778 // Not preserve stack space within prologue for outgoing variables when the 1779 // function contains variable size objects or there are vector objects accessed 1780 // by the frame pointer. 1781 // Let eliminateCallFramePseudoInstr preserve stack space for it. 1782 bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 1783 return !MF.getFrameInfo().hasVarSizedObjects() && 1784 !(hasFP(MF) && hasRVVFrameObject(MF)); 1785 } 1786 1787 // Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions. 1788 MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr( 1789 MachineFunction &MF, MachineBasicBlock &MBB, 1790 MachineBasicBlock::iterator MI) const { 1791 DebugLoc DL = MI->getDebugLoc(); 1792 1793 if (!hasReservedCallFrame(MF)) { 1794 // If space has not been reserved for a call frame, ADJCALLSTACKDOWN and 1795 // ADJCALLSTACKUP must be converted to instructions manipulating the stack 1796 // pointer. This is necessary when there is a variable length stack 1797 // allocation (e.g. alloca), which means it's not possible to allocate 1798 // space for outgoing arguments from within the function prologue. 1799 int64_t Amount = MI->getOperand(0).getImm(); 1800 1801 if (Amount != 0) { 1802 // Ensure the stack remains aligned after adjustment. 1803 Amount = alignSPAdjust(Amount); 1804 1805 if (MI->getOpcode() == RISCV::ADJCALLSTACKDOWN) 1806 Amount = -Amount; 1807 1808 const RISCVTargetLowering *TLI = 1809 MF.getSubtarget<RISCVSubtarget>().getTargetLowering(); 1810 int64_t ProbeSize = TLI->getStackProbeSize(MF, getStackAlign()); 1811 if (TLI->hasInlineStackProbe(MF) && -Amount >= ProbeSize) { 1812 // When stack probing is enabled, the decrement of SP may need to be 1813 // probed. We can handle both the decrement and the probing in 1814 // allocateStack. 1815 bool DynAllocation = 1816 MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation(); 1817 allocateStack(MBB, MI, MF, -Amount, -Amount, !hasFP(MF), 1818 /*NeedProbe=*/true, ProbeSize, DynAllocation, 1819 MachineInstr::NoFlags); 1820 } else { 1821 const RISCVRegisterInfo &RI = *STI.getRegisterInfo(); 1822 RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount), 1823 MachineInstr::NoFlags, getStackAlign()); 1824 } 1825 } 1826 } 1827 1828 return MBB.erase(MI); 1829 } 1830 1831 // We would like to split the SP adjustment to reduce prologue/epilogue 1832 // as following instructions. In this way, the offset of the callee saved 1833 // register could fit in a single store. Supposed that the first sp adjust 1834 // amount is 2032. 1835 // add sp,sp,-2032 1836 // sw ra,2028(sp) 1837 // sw s0,2024(sp) 1838 // sw s1,2020(sp) 1839 // sw s3,2012(sp) 1840 // sw s4,2008(sp) 1841 // add sp,sp,-64 1842 uint64_t 1843 RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const { 1844 const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 1845 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1846 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1847 uint64_t StackSize = getStackSizeWithRVVPadding(MF); 1848 1849 // Disable SplitSPAdjust if save-restore libcall, push/pop or QCI interrupts 1850 // are used. The callee-saved registers will be pushed by the save-restore 1851 // libcalls, so we don't have to split the SP adjustment in this case. 1852 if (RVFI->getReservedSpillsSize()) 1853 return 0; 1854 1855 // Return the FirstSPAdjustAmount if the StackSize can not fit in a signed 1856 // 12-bit and there exists a callee-saved register needing to be pushed. 1857 if (!isInt<12>(StackSize) && (CSI.size() > 0)) { 1858 // FirstSPAdjustAmount is chosen at most as (2048 - StackAlign) because 1859 // 2048 will cause sp = sp + 2048 in the epilogue to be split into multiple 1860 // instructions. Offsets smaller than 2048 can fit in a single load/store 1861 // instruction, and we have to stick with the stack alignment. 2048 has 1862 // 16-byte alignment. The stack alignment for RV32 and RV64 is 16 and for 1863 // RV32E it is 4. So (2048 - StackAlign) will satisfy the stack alignment. 1864 const uint64_t StackAlign = getStackAlign().value(); 1865 1866 // Amount of (2048 - StackAlign) will prevent callee saved and restored 1867 // instructions be compressed, so try to adjust the amount to the largest 1868 // offset that stack compression instructions accept when target supports 1869 // compression instructions. 1870 if (STI.hasStdExtZca()) { 1871 // The compression extensions may support the following instructions: 1872 // riscv32: c.lwsp rd, offset[7:2] => 2^(6 + 2) 1873 // c.swsp rs2, offset[7:2] => 2^(6 + 2) 1874 // c.flwsp rd, offset[7:2] => 2^(6 + 2) 1875 // c.fswsp rs2, offset[7:2] => 2^(6 + 2) 1876 // riscv64: c.ldsp rd, offset[8:3] => 2^(6 + 3) 1877 // c.sdsp rs2, offset[8:3] => 2^(6 + 3) 1878 // c.fldsp rd, offset[8:3] => 2^(6 + 3) 1879 // c.fsdsp rs2, offset[8:3] => 2^(6 + 3) 1880 const uint64_t RVCompressLen = STI.getXLen() * 8; 1881 // Compared with amount (2048 - StackAlign), StackSize needs to 1882 // satisfy the following conditions to avoid using more instructions 1883 // to adjust the sp after adjusting the amount, such as 1884 // StackSize meets the condition (StackSize <= 2048 + RVCompressLen), 1885 // case1: Amount is 2048 - StackAlign: use addi + addi to adjust sp. 1886 // case2: Amount is RVCompressLen: use addi + addi to adjust sp. 1887 auto CanCompress = [&](uint64_t CompressLen) -> bool { 1888 if (StackSize <= 2047 + CompressLen || 1889 (StackSize > 2048 * 2 - StackAlign && 1890 StackSize <= 2047 * 2 + CompressLen) || 1891 StackSize > 2048 * 3 - StackAlign) 1892 return true; 1893 1894 return false; 1895 }; 1896 // In the epilogue, addi sp, sp, 496 is used to recover the sp and it 1897 // can be compressed(C.ADDI16SP, offset can be [-512, 496]), but 1898 // addi sp, sp, 512 can not be compressed. So try to use 496 first. 1899 const uint64_t ADDI16SPCompressLen = 496; 1900 if (STI.is64Bit() && CanCompress(ADDI16SPCompressLen)) 1901 return ADDI16SPCompressLen; 1902 if (CanCompress(RVCompressLen)) 1903 return RVCompressLen; 1904 } 1905 return 2048 - StackAlign; 1906 } 1907 return 0; 1908 } 1909 1910 bool RISCVFrameLowering::assignCalleeSavedSpillSlots( 1911 MachineFunction &MF, const TargetRegisterInfo *TRI, 1912 std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex, 1913 unsigned &MaxCSFrameIndex) const { 1914 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 1915 1916 // Preemptible Interrupts have two additional Callee-save Frame Indexes, 1917 // not tracked by `CSI`. 1918 if (RVFI->isSiFivePreemptibleInterrupt(MF)) { 1919 for (int I = 0; I < 2; ++I) { 1920 int FI = RVFI->getInterruptCSRFrameIndex(I); 1921 MinCSFrameIndex = std::min<unsigned>(MinCSFrameIndex, FI); 1922 MaxCSFrameIndex = std::max<unsigned>(MaxCSFrameIndex, FI); 1923 } 1924 } 1925 1926 // Early exit if no callee saved registers are modified! 1927 if (CSI.empty()) 1928 return true; 1929 1930 if (RVFI->useQCIInterrupt(MF)) { 1931 RVFI->setQCIInterruptStackSize(QCIInterruptPushAmount); 1932 } 1933 1934 if (RVFI->isPushable(MF)) { 1935 // Determine how many GPRs we need to push and save it to RVFI. 1936 unsigned PushedRegNum = getNumPushPopRegs(CSI); 1937 1938 // `QC.C.MIENTER(.NEST)` will save `ra` and `s0`, so we should only push if 1939 // we want to push more than 2 registers. Otherwise, we should push if we 1940 // want to push more than 0 registers. 1941 unsigned OnlyPushIfMoreThan = RVFI->useQCIInterrupt(MF) ? 2 : 0; 1942 if (PushedRegNum > OnlyPushIfMoreThan) { 1943 RVFI->setRVPushRegs(PushedRegNum); 1944 RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushedRegNum, 16)); 1945 } 1946 } 1947 1948 MachineFrameInfo &MFI = MF.getFrameInfo(); 1949 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 1950 1951 for (auto &CS : CSI) { 1952 MCRegister Reg = CS.getReg(); 1953 const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); 1954 unsigned Size = RegInfo->getSpillSize(*RC); 1955 1956 if (RVFI->useQCIInterrupt(MF)) { 1957 const auto *FFI = llvm::find_if(FixedCSRFIQCIInterruptMap, [&](auto P) { 1958 return P.first == CS.getReg(); 1959 }); 1960 if (FFI != std::end(FixedCSRFIQCIInterruptMap)) { 1961 int64_t Offset = FFI->second * (int64_t)Size; 1962 1963 int FrameIdx = MFI.CreateFixedSpillStackObject(Size, Offset); 1964 assert(FrameIdx < 0); 1965 CS.setFrameIdx(FrameIdx); 1966 continue; 1967 } 1968 } 1969 1970 if (RVFI->useSaveRestoreLibCalls(MF) || RVFI->isPushable(MF)) { 1971 const auto *FII = llvm::find_if( 1972 FixedCSRFIMap, [&](MCPhysReg P) { return P == CS.getReg(); }); 1973 unsigned RegNum = std::distance(std::begin(FixedCSRFIMap), FII); 1974 1975 if (FII != std::end(FixedCSRFIMap)) { 1976 int64_t Offset; 1977 if (RVFI->getPushPopKind(MF) == 1978 RISCVMachineFunctionInfo::PushPopKind::StdExtZcmp) 1979 Offset = -int64_t(RVFI->getRVPushRegs() - RegNum) * Size; 1980 else 1981 Offset = -int64_t(RegNum + 1) * Size; 1982 1983 if (RVFI->useQCIInterrupt(MF)) 1984 Offset -= QCIInterruptPushAmount; 1985 1986 int FrameIdx = MFI.CreateFixedSpillStackObject(Size, Offset); 1987 assert(FrameIdx < 0); 1988 CS.setFrameIdx(FrameIdx); 1989 continue; 1990 } 1991 } 1992 1993 // Not a fixed slot. 1994 Align Alignment = RegInfo->getSpillAlign(*RC); 1995 // We may not be able to satisfy the desired alignment specification of 1996 // the TargetRegisterClass if the stack alignment is smaller. Use the 1997 // min. 1998 Alignment = std::min(Alignment, getStackAlign()); 1999 int FrameIdx = MFI.CreateStackObject(Size, Alignment, true); 2000 if ((unsigned)FrameIdx < MinCSFrameIndex) 2001 MinCSFrameIndex = FrameIdx; 2002 if ((unsigned)FrameIdx > MaxCSFrameIndex) 2003 MaxCSFrameIndex = FrameIdx; 2004 CS.setFrameIdx(FrameIdx); 2005 if (RISCVRegisterInfo::isRVVRegClass(RC)) 2006 MFI.setStackID(FrameIdx, TargetStackID::ScalableVector); 2007 } 2008 2009 if (RVFI->useQCIInterrupt(MF)) { 2010 // Allocate a fixed object that covers the entire QCI stack allocation, 2011 // because there are gaps which are reserved for future use. 2012 MFI.CreateFixedSpillStackObject( 2013 QCIInterruptPushAmount, -static_cast<int64_t>(QCIInterruptPushAmount)); 2014 } 2015 2016 if (RVFI->isPushable(MF)) { 2017 int64_t QCIOffset = RVFI->useQCIInterrupt(MF) ? QCIInterruptPushAmount : 0; 2018 // Allocate a fixed object that covers the full push. 2019 if (int64_t PushSize = RVFI->getRVPushStackSize()) 2020 MFI.CreateFixedSpillStackObject(PushSize, -PushSize - QCIOffset); 2021 } else if (int LibCallRegs = getLibCallID(MF, CSI) + 1) { 2022 int64_t LibCallFrameSize = 2023 alignTo((STI.getXLen() / 8) * LibCallRegs, getStackAlign()); 2024 MFI.CreateFixedSpillStackObject(LibCallFrameSize, -LibCallFrameSize); 2025 } 2026 2027 return true; 2028 } 2029 2030 bool RISCVFrameLowering::spillCalleeSavedRegisters( 2031 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2032 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2033 if (CSI.empty()) 2034 return true; 2035 2036 MachineFunction *MF = MBB.getParent(); 2037 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); 2038 DebugLoc DL; 2039 if (MI != MBB.end() && !MI->isDebugInstr()) 2040 DL = MI->getDebugLoc(); 2041 2042 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>(); 2043 if (RVFI->useQCIInterrupt(*MF)) { 2044 // Emit QC.C.MIENTER(.NEST) 2045 BuildMI( 2046 MBB, MI, DL, 2047 TII.get(RVFI->getInterruptStackKind(*MF) == 2048 RISCVMachineFunctionInfo::InterruptStackKind::QCINest 2049 ? RISCV::QC_C_MIENTER_NEST 2050 : RISCV::QC_C_MIENTER)) 2051 .setMIFlag(MachineInstr::FrameSetup); 2052 2053 for (auto [Reg, _Offset] : FixedCSRFIQCIInterruptMap) 2054 MBB.addLiveIn(Reg); 2055 } 2056 2057 if (RVFI->isPushable(*MF)) { 2058 // Emit CM.PUSH with base StackAdj & evaluate Push stack 2059 unsigned PushedRegNum = RVFI->getRVPushRegs(); 2060 if (PushedRegNum > 0) { 2061 // Use encoded number to represent registers to spill. 2062 unsigned Opcode = getPushOpcode( 2063 RVFI->getPushPopKind(*MF), hasFP(*MF) && !RVFI->useQCIInterrupt(*MF)); 2064 unsigned RegEnc = RISCVZC::encodeRegListNumRegs(PushedRegNum); 2065 MachineInstrBuilder PushBuilder = 2066 BuildMI(MBB, MI, DL, TII.get(Opcode)) 2067 .setMIFlag(MachineInstr::FrameSetup); 2068 PushBuilder.addImm(RegEnc); 2069 PushBuilder.addImm(0); 2070 2071 for (unsigned i = 0; i < PushedRegNum; i++) 2072 PushBuilder.addUse(FixedCSRFIMap[i], RegState::Implicit); 2073 } 2074 } else if (const char *SpillLibCall = getSpillLibCallName(*MF, CSI)) { 2075 // Add spill libcall via non-callee-saved register t0. 2076 BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoCALLReg), RISCV::X5) 2077 .addExternalSymbol(SpillLibCall, RISCVII::MO_CALL) 2078 .setMIFlag(MachineInstr::FrameSetup); 2079 2080 // Add registers spilled in libcall as liveins. 2081 for (auto &CS : CSI) 2082 MBB.addLiveIn(CS.getReg()); 2083 } 2084 2085 // Manually spill values not spilled by libcall & Push/Pop. 2086 const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI); 2087 const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, CSI); 2088 2089 auto storeRegsToStackSlots = [&](decltype(UnmanagedCSI) CSInfo) { 2090 for (auto &CS : CSInfo) { 2091 // Insert the spill to the stack frame. 2092 MCRegister Reg = CS.getReg(); 2093 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2094 TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), 2095 CS.getFrameIdx(), RC, TRI, Register(), 2096 MachineInstr::FrameSetup); 2097 } 2098 }; 2099 storeRegsToStackSlots(UnmanagedCSI); 2100 storeRegsToStackSlots(RVVCSI); 2101 2102 return true; 2103 } 2104 2105 static unsigned getCalleeSavedRVVNumRegs(const Register &BaseReg) { 2106 return RISCV::VRRegClass.contains(BaseReg) ? 1 2107 : RISCV::VRM2RegClass.contains(BaseReg) ? 2 2108 : RISCV::VRM4RegClass.contains(BaseReg) ? 4 2109 : 8; 2110 } 2111 2112 static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI, 2113 const Register &Reg) { 2114 MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0); 2115 // If it's not a grouped vector register, it doesn't have subregister, so 2116 // the base register is just itself. 2117 if (BaseReg == RISCV::NoRegister) 2118 BaseReg = Reg; 2119 return BaseReg; 2120 } 2121 2122 void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI( 2123 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, bool HasFP) const { 2124 MachineFunction *MF = MBB.getParent(); 2125 const MachineFrameInfo &MFI = MF->getFrameInfo(); 2126 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>(); 2127 const RISCVRegisterInfo &TRI = *STI.getRegisterInfo(); 2128 2129 const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, MFI.getCalleeSavedInfo()); 2130 if (RVVCSI.empty()) 2131 return; 2132 2133 uint64_t FixedSize = getStackSizeWithRVVPadding(*MF); 2134 if (!HasFP) { 2135 uint64_t ScalarLocalVarSize = 2136 MFI.getStackSize() - RVFI->getCalleeSavedStackSize() - 2137 RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding(); 2138 FixedSize -= ScalarLocalVarSize; 2139 } 2140 2141 CFIInstBuilder CFIBuilder(MBB, MI, MachineInstr::FrameSetup); 2142 for (auto &CS : RVVCSI) { 2143 // Insert the spill to the stack frame. 2144 int FI = CS.getFrameIdx(); 2145 MCRegister BaseReg = getRVVBaseRegister(TRI, CS.getReg()); 2146 unsigned NumRegs = getCalleeSavedRVVNumRegs(CS.getReg()); 2147 for (unsigned i = 0; i < NumRegs; ++i) { 2148 CFIBuilder.insertCFIInst(createDefCFAOffset( 2149 TRI, BaseReg + i, -FixedSize, MFI.getObjectOffset(FI) / 8 + i)); 2150 } 2151 } 2152 } 2153 2154 void RISCVFrameLowering::emitCalleeSavedRVVEpilogCFI( 2155 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { 2156 MachineFunction *MF = MBB.getParent(); 2157 const MachineFrameInfo &MFI = MF->getFrameInfo(); 2158 const RISCVRegisterInfo &TRI = *STI.getRegisterInfo(); 2159 2160 CFIInstBuilder CFIHelper(MBB, MI, MachineInstr::FrameDestroy); 2161 const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, MFI.getCalleeSavedInfo()); 2162 for (auto &CS : RVVCSI) { 2163 MCRegister BaseReg = getRVVBaseRegister(TRI, CS.getReg()); 2164 unsigned NumRegs = getCalleeSavedRVVNumRegs(CS.getReg()); 2165 for (unsigned i = 0; i < NumRegs; ++i) 2166 CFIHelper.buildRestore(BaseReg + i); 2167 } 2168 } 2169 2170 bool RISCVFrameLowering::restoreCalleeSavedRegisters( 2171 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2172 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2173 if (CSI.empty()) 2174 return true; 2175 2176 MachineFunction *MF = MBB.getParent(); 2177 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); 2178 DebugLoc DL; 2179 if (MI != MBB.end() && !MI->isDebugInstr()) 2180 DL = MI->getDebugLoc(); 2181 2182 // Manually restore values not restored by libcall & Push/Pop. 2183 // Reverse the restore order in epilog. In addition, the return 2184 // address will be restored first in the epilogue. It increases 2185 // the opportunity to avoid the load-to-use data hazard between 2186 // loading RA and return by RA. loadRegFromStackSlot can insert 2187 // multiple instructions. 2188 const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI); 2189 const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, CSI); 2190 2191 auto loadRegFromStackSlot = [&](decltype(UnmanagedCSI) CSInfo) { 2192 for (auto &CS : CSInfo) { 2193 MCRegister Reg = CS.getReg(); 2194 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2195 TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI, 2196 Register(), MachineInstr::FrameDestroy); 2197 assert(MI != MBB.begin() && 2198 "loadRegFromStackSlot didn't insert any code!"); 2199 } 2200 }; 2201 loadRegFromStackSlot(RVVCSI); 2202 loadRegFromStackSlot(UnmanagedCSI); 2203 2204 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>(); 2205 if (RVFI->useQCIInterrupt(*MF)) { 2206 // Don't emit anything here because restoration is handled by 2207 // QC.C.MILEAVERET which we already inserted to return. 2208 assert(MI->getOpcode() == RISCV::QC_C_MILEAVERET && 2209 "Unexpected QCI Interrupt Return Instruction"); 2210 } 2211 2212 if (RVFI->isPushable(*MF)) { 2213 unsigned PushedRegNum = RVFI->getRVPushRegs(); 2214 if (PushedRegNum > 0) { 2215 unsigned Opcode = getPopOpcode(RVFI->getPushPopKind(*MF)); 2216 unsigned RegEnc = RISCVZC::encodeRegListNumRegs(PushedRegNum); 2217 MachineInstrBuilder PopBuilder = 2218 BuildMI(MBB, MI, DL, TII.get(Opcode)) 2219 .setMIFlag(MachineInstr::FrameDestroy); 2220 // Use encoded number to represent registers to restore. 2221 PopBuilder.addImm(RegEnc); 2222 PopBuilder.addImm(0); 2223 2224 for (unsigned i = 0; i < RVFI->getRVPushRegs(); i++) 2225 PopBuilder.addDef(FixedCSRFIMap[i], RegState::ImplicitDefine); 2226 } 2227 } else { 2228 const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI); 2229 if (RestoreLibCall) { 2230 // Add restore libcall via tail call. 2231 MachineBasicBlock::iterator NewMI = 2232 BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoTAIL)) 2233 .addExternalSymbol(RestoreLibCall, RISCVII::MO_CALL) 2234 .setMIFlag(MachineInstr::FrameDestroy); 2235 2236 // Remove trailing returns, since the terminator is now a tail call to the 2237 // restore function. 2238 if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) { 2239 NewMI->copyImplicitOps(*MF, *MI); 2240 MI->eraseFromParent(); 2241 } 2242 } 2243 } 2244 return true; 2245 } 2246 2247 bool RISCVFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2248 // Keep the conventional code flow when not optimizing. 2249 if (MF.getFunction().hasOptNone()) 2250 return false; 2251 2252 return true; 2253 } 2254 2255 bool RISCVFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 2256 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 2257 const MachineFunction *MF = MBB.getParent(); 2258 const auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>(); 2259 2260 // Make sure VTYPE and VL are not live-in since we will use vsetvli in the 2261 // prologue to get the VLEN, and that will clobber these registers. 2262 // 2263 // We may do also check the stack contains objects with scalable vector type, 2264 // but this will require iterating over all the stack objects, but this may 2265 // not worth since the situation is rare, we could do further check in future 2266 // if we find it is necessary. 2267 if (STI.preferVsetvliOverReadVLENB() && 2268 (MBB.isLiveIn(RISCV::VTYPE) || MBB.isLiveIn(RISCV::VL))) 2269 return false; 2270 2271 if (!RVFI->useSaveRestoreLibCalls(*MF)) 2272 return true; 2273 2274 // Inserting a call to a __riscv_save libcall requires the use of the register 2275 // t0 (X5) to hold the return address. Therefore if this register is already 2276 // used we can't insert the call. 2277 2278 RegScavenger RS; 2279 RS.enterBasicBlock(*TmpMBB); 2280 return !RS.isRegUsed(RISCV::X5); 2281 } 2282 2283 bool RISCVFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 2284 const MachineFunction *MF = MBB.getParent(); 2285 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 2286 const auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>(); 2287 2288 // We do not want QC.C.MILEAVERET to be subject to shrink-wrapping - it must 2289 // come in the final block of its function as it both pops and returns. 2290 if (RVFI->useQCIInterrupt(*MF)) 2291 return MBB.succ_empty(); 2292 2293 if (!RVFI->useSaveRestoreLibCalls(*MF)) 2294 return true; 2295 2296 // Using the __riscv_restore libcalls to restore CSRs requires a tail call. 2297 // This means if we still need to continue executing code within this function 2298 // the restore cannot take place in this basic block. 2299 2300 if (MBB.succ_size() > 1) 2301 return false; 2302 2303 MachineBasicBlock *SuccMBB = 2304 MBB.succ_empty() ? TmpMBB->getFallThrough() : *MBB.succ_begin(); 2305 2306 // Doing a tail call should be safe if there are no successors, because either 2307 // we have a returning block or the end of the block is unreachable, so the 2308 // restore will be eliminated regardless. 2309 if (!SuccMBB) 2310 return true; 2311 2312 // The successor can only contain a return, since we would effectively be 2313 // replacing the successor with our own tail return at the end of our block. 2314 return SuccMBB->isReturnBlock() && SuccMBB->size() == 1; 2315 } 2316 2317 bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { 2318 switch (ID) { 2319 case TargetStackID::Default: 2320 case TargetStackID::ScalableVector: 2321 return true; 2322 case TargetStackID::NoAlloc: 2323 case TargetStackID::SGPRSpill: 2324 case TargetStackID::WasmLocal: 2325 return false; 2326 } 2327 llvm_unreachable("Invalid TargetStackID::Value"); 2328 } 2329 2330 TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const { 2331 return TargetStackID::ScalableVector; 2332 } 2333 2334 // Synthesize the probe loop. 2335 static void emitStackProbeInline(MachineBasicBlock::iterator MBBI, DebugLoc DL, 2336 Register TargetReg, bool IsRVV) { 2337 assert(TargetReg != RISCV::X2 && "New top of stack cannot already be in SP"); 2338 2339 MachineBasicBlock &MBB = *MBBI->getParent(); 2340 MachineFunction &MF = *MBB.getParent(); 2341 2342 auto &Subtarget = MF.getSubtarget<RISCVSubtarget>(); 2343 const RISCVInstrInfo *TII = Subtarget.getInstrInfo(); 2344 bool IsRV64 = Subtarget.is64Bit(); 2345 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign(); 2346 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering(); 2347 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign); 2348 2349 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); 2350 MachineBasicBlock *LoopTestMBB = 2351 MF.CreateMachineBasicBlock(MBB.getBasicBlock()); 2352 MF.insert(MBBInsertPoint, LoopTestMBB); 2353 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock()); 2354 MF.insert(MBBInsertPoint, ExitMBB); 2355 MachineInstr::MIFlag Flags = MachineInstr::FrameSetup; 2356 Register ScratchReg = RISCV::X7; 2357 2358 // ScratchReg = ProbeSize 2359 TII->movImm(MBB, MBBI, DL, ScratchReg, ProbeSize, Flags); 2360 2361 // LoopTest: 2362 // SUB SP, SP, ProbeSize 2363 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg) 2364 .addReg(SPReg) 2365 .addReg(ScratchReg) 2366 .setMIFlags(Flags); 2367 2368 // s[d|w] zero, 0(sp) 2369 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, 2370 TII->get(IsRV64 ? RISCV::SD : RISCV::SW)) 2371 .addReg(RISCV::X0) 2372 .addReg(SPReg) 2373 .addImm(0) 2374 .setMIFlags(Flags); 2375 2376 if (IsRVV) { 2377 // SUB TargetReg, TargetReg, ProbeSize 2378 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), 2379 TargetReg) 2380 .addReg(TargetReg) 2381 .addReg(ScratchReg) 2382 .setMIFlags(Flags); 2383 2384 // BGE TargetReg, ProbeSize, LoopTest 2385 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BGE)) 2386 .addReg(TargetReg) 2387 .addReg(ScratchReg) 2388 .addMBB(LoopTestMBB) 2389 .setMIFlags(Flags); 2390 2391 } else { 2392 // BNE SP, TargetReg, LoopTest 2393 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE)) 2394 .addReg(SPReg) 2395 .addReg(TargetReg) 2396 .addMBB(LoopTestMBB) 2397 .setMIFlags(Flags); 2398 } 2399 2400 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end()); 2401 ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 2402 2403 LoopTestMBB->addSuccessor(ExitMBB); 2404 LoopTestMBB->addSuccessor(LoopTestMBB); 2405 MBB.addSuccessor(LoopTestMBB); 2406 // Update liveins. 2407 fullyRecomputeLiveIns({ExitMBB, LoopTestMBB}); 2408 } 2409 2410 void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF, 2411 MachineBasicBlock &MBB) const { 2412 // Get the instructions that need to be replaced. We emit at most two of 2413 // these. Remember them in order to avoid complications coming from the need 2414 // to traverse the block while potentially creating more blocks. 2415 SmallVector<MachineInstr *, 4> ToReplace; 2416 for (MachineInstr &MI : MBB) { 2417 unsigned Opc = MI.getOpcode(); 2418 if (Opc == RISCV::PROBED_STACKALLOC || 2419 Opc == RISCV::PROBED_STACKALLOC_RVV) { 2420 ToReplace.push_back(&MI); 2421 } 2422 } 2423 2424 for (MachineInstr *MI : ToReplace) { 2425 if (MI->getOpcode() == RISCV::PROBED_STACKALLOC || 2426 MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV) { 2427 MachineBasicBlock::iterator MBBI = MI->getIterator(); 2428 DebugLoc DL = MBB.findDebugLoc(MBBI); 2429 Register TargetReg = MI->getOperand(0).getReg(); 2430 emitStackProbeInline(MBBI, DL, TargetReg, 2431 (MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV)); 2432 MBBI->eraseFromParent(); 2433 } 2434 } 2435 } 2436