1 //===- Thumb1FrameLowering.cpp - Thumb1 Frame Information -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the Thumb1 implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "Thumb1FrameLowering.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMBaseRegisterInfo.h" 16 #include "ARMMachineFunctionInfo.h" 17 #include "ARMSubtarget.h" 18 #include "Thumb1InstrInfo.h" 19 #include "ThumbRegisterInfo.h" 20 #include "Utils/ARMBaseInfo.h" 21 #include "llvm/ADT/BitVector.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/CodeGen/CFIInstBuilder.h" 25 #include "llvm/CodeGen/LivePhysRegs.h" 26 #include "llvm/CodeGen/MachineBasicBlock.h" 27 #include "llvm/CodeGen/MachineFrameInfo.h" 28 #include "llvm/CodeGen/MachineFunction.h" 29 #include "llvm/CodeGen/MachineInstr.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineModuleInfo.h" 32 #include "llvm/CodeGen/MachineOperand.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/TargetInstrInfo.h" 35 #include "llvm/CodeGen/TargetSubtargetInfo.h" 36 #include "llvm/IR/DebugLoc.h" 37 #include "llvm/Support/Compiler.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include <cassert> 40 #include <iterator> 41 #include <vector> 42 43 #define DEBUG_TYPE "arm-frame-lowering" 44 45 using namespace llvm; 46 47 Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti) 48 : ARMFrameLowering(sti) {} 49 50 bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ 51 const MachineFrameInfo &MFI = MF.getFrameInfo(); 52 unsigned CFSize = MFI.getMaxCallFrameSize(); 53 // It's not always a good idea to include the call frame as part of the 54 // stack frame. ARM (especially Thumb) has small immediate offset to 55 // address the stack frame. So a large call frame can cause poor codegen 56 // and may even makes it impossible to scavenge a register. 57 if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 58 return false; 59 60 return !MFI.hasVarSizedObjects(); 61 } 62 63 static void 64 emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB, 65 MachineBasicBlock::iterator &MBBI, 66 const TargetInstrInfo &TII, const DebugLoc &dl, 67 const ThumbRegisterInfo &MRI, int NumBytes, 68 unsigned ScratchReg, unsigned MIFlags) { 69 // If it would take more than three instructions to adjust the stack pointer 70 // using tADDspi/tSUBspi, load an immediate instead. 71 if (std::abs(NumBytes) > 508 * 3) { 72 // We use a different codepath here from the normal 73 // emitThumbRegPlusImmediate so we don't have to deal with register 74 // scavenging. (Scavenging could try to use the emergency spill slot 75 // before we've actually finished setting up the stack.) 76 if (ScratchReg == ARM::NoRegister) 77 report_fatal_error("Failed to emit Thumb1 stack adjustment"); 78 MachineFunction &MF = *MBB.getParent(); 79 const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>(); 80 if (ST.genExecuteOnly()) { 81 unsigned XOInstr = ST.useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm; 82 BuildMI(MBB, MBBI, dl, TII.get(XOInstr), ScratchReg) 83 .addImm(NumBytes).setMIFlags(MIFlags); 84 } else { 85 MRI.emitLoadConstPool(MBB, MBBI, dl, ScratchReg, 0, NumBytes, ARMCC::AL, 86 0, MIFlags); 87 } 88 BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDhirr), ARM::SP) 89 .addReg(ARM::SP) 90 .addReg(ScratchReg, RegState::Kill) 91 .add(predOps(ARMCC::AL)) 92 .setMIFlags(MIFlags); 93 return; 94 } 95 // FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate 96 // won't change. 97 emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, 98 MRI, MIFlags); 99 100 } 101 102 static void emitCallSPUpdate(MachineBasicBlock &MBB, 103 MachineBasicBlock::iterator &MBBI, 104 const TargetInstrInfo &TII, const DebugLoc &dl, 105 const ThumbRegisterInfo &MRI, int NumBytes, 106 unsigned MIFlags = MachineInstr::NoFlags) { 107 emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, 108 MRI, MIFlags); 109 } 110 111 112 MachineBasicBlock::iterator Thumb1FrameLowering:: 113 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 114 MachineBasicBlock::iterator I) const { 115 const Thumb1InstrInfo &TII = 116 *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); 117 const ThumbRegisterInfo *RegInfo = 118 static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 119 if (!hasReservedCallFrame(MF)) { 120 // If we have alloca, convert as follows: 121 // ADJCALLSTACKDOWN -> sub, sp, sp, amount 122 // ADJCALLSTACKUP -> add, sp, sp, amount 123 MachineInstr &Old = *I; 124 DebugLoc dl = Old.getDebugLoc(); 125 unsigned Amount = TII.getFrameSize(Old); 126 if (Amount != 0) { 127 // We need to keep the stack aligned properly. To do this, we round the 128 // amount of space needed for the outgoing arguments up to the next 129 // alignment boundary. 130 Amount = alignTo(Amount, getStackAlign()); 131 132 // Replace the pseudo instruction with a new instruction... 133 unsigned Opc = Old.getOpcode(); 134 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { 135 emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount); 136 } else { 137 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); 138 emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, Amount); 139 } 140 } 141 } 142 return MBB.erase(I); 143 } 144 145 void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, 146 MachineBasicBlock &MBB) const { 147 MachineBasicBlock::iterator MBBI = MBB.begin(); 148 MachineFrameInfo &MFI = MF.getFrameInfo(); 149 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 150 const ThumbRegisterInfo *RegInfo = 151 static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 152 const Thumb1InstrInfo &TII = 153 *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); 154 155 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 156 unsigned NumBytes = MFI.getStackSize(); 157 assert(NumBytes >= ArgRegsSaveSize && 158 "ArgRegsSaveSize is included in NumBytes"); 159 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 160 assert(STI.getPushPopSplitVariation(MF) == ARMSubtarget::SplitR7 && 161 "Must use R7 spilt for Thumb1"); 162 163 // Debug location must be unknown since the first debug location is used 164 // to determine the end of the prologue. 165 DebugLoc dl; 166 167 Register FramePtr = RegInfo->getFrameRegister(MF); 168 Register BasePtr = RegInfo->getBaseRegister(); 169 int CFAOffset = 0; 170 171 // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. 172 NumBytes = (NumBytes + 3) & ~3; 173 MFI.setStackSize(NumBytes); 174 175 // Determine the sizes of each callee-save spill areas and record which frame 176 // belongs to which callee-save spill areas. 177 unsigned FRSize = 0, GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; 178 int FramePtrSpillFI = 0; 179 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup); 180 181 if (ArgRegsSaveSize) { 182 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize, 183 ARM::NoRegister, MachineInstr::FrameSetup); 184 CFAOffset += ArgRegsSaveSize; 185 CFIBuilder.buildDefCFAOffset(CFAOffset); 186 } 187 188 if (!AFI->hasStackFrame()) { 189 if (NumBytes - ArgRegsSaveSize != 0) { 190 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, 191 -(NumBytes - ArgRegsSaveSize), 192 ARM::NoRegister, MachineInstr::FrameSetup); 193 CFAOffset += NumBytes - ArgRegsSaveSize; 194 CFIBuilder.buildDefCFAOffset(CFAOffset); 195 } 196 return; 197 } 198 199 bool HasFrameRecordArea = hasFP(MF) && ARM::hGPRRegClass.contains(FramePtr); 200 201 for (const CalleeSavedInfo &I : CSI) { 202 MCRegister Reg = I.getReg(); 203 int FI = I.getFrameIdx(); 204 if (Reg == FramePtr.asMCReg()) 205 FramePtrSpillFI = FI; 206 switch (Reg) { 207 case ARM::R11: 208 if (HasFrameRecordArea) { 209 FRSize += 4; 210 break; 211 } 212 [[fallthrough]]; 213 case ARM::R8: 214 case ARM::R9: 215 case ARM::R10: 216 GPRCS2Size += 4; 217 break; 218 case ARM::LR: 219 if (HasFrameRecordArea) { 220 FRSize += 4; 221 break; 222 } 223 [[fallthrough]]; 224 case ARM::R4: 225 case ARM::R5: 226 case ARM::R6: 227 case ARM::R7: 228 GPRCS1Size += 4; 229 break; 230 default: 231 DPRCSSize += 8; 232 } 233 } 234 235 MachineBasicBlock::iterator FRPush, GPRCS1Push, GPRCS2Push; 236 if (HasFrameRecordArea) { 237 // Skip Frame Record setup: 238 // push {lr} 239 // mov lr, r11 240 // push {lr} 241 std::advance(MBBI, 2); 242 FRPush = MBBI++; 243 } 244 245 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { 246 GPRCS1Push = MBBI; 247 ++MBBI; 248 } 249 250 // Find last push instruction for GPRCS2 - spilling of high registers 251 // (r8-r11) could consist of multiple tPUSH and tMOVr instructions. 252 while (true) { 253 MachineBasicBlock::iterator OldMBBI = MBBI; 254 // Skip a run of tMOVr instructions 255 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr && 256 MBBI->getFlag(MachineInstr::FrameSetup)) 257 MBBI++; 258 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH && 259 MBBI->getFlag(MachineInstr::FrameSetup)) { 260 GPRCS2Push = MBBI; 261 MBBI++; 262 } else { 263 // We have reached an instruction which is not a push, so the previous 264 // run of tMOVr instructions (which may have been empty) was not part of 265 // the prologue. Reset MBBI back to the last PUSH of the prologue. 266 MBBI = OldMBBI; 267 break; 268 } 269 } 270 271 // Skip past this code sequence, which is emitted to restore the LR if it is 272 // live-in and clobbered by the frame record setup code: 273 // ldr rX, [sp, #Y] 274 // mov lr, rX 275 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tLDRspi && 276 MBBI->getFlag(MachineInstr::FrameSetup)) { 277 ++MBBI; 278 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr && 279 MBBI->getOperand(0).getReg() == ARM::LR && 280 MBBI->getFlag(MachineInstr::FrameSetup)) { 281 ++MBBI; 282 } 283 } 284 285 // Determine starting offsets of spill areas. 286 unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - 287 (FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize); 288 unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; 289 unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; 290 bool HasFP = hasFP(MF); 291 if (HasFP) 292 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + 293 NumBytes); 294 if (HasFrameRecordArea) 295 AFI->setFrameRecordSavedAreaSize(FRSize); 296 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); 297 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); 298 AFI->setDPRCalleeSavedArea1Offset(DPRCSOffset); 299 NumBytes = DPRCSOffset; 300 301 int FramePtrOffsetInBlock = 0; 302 unsigned adjustedGPRCS1Size = GPRCS1Size; 303 if (GPRCS1Size > 0 && GPRCS2Size == 0 && 304 tryFoldSPUpdateIntoPushPop(STI, MF, &*(GPRCS1Push), NumBytes)) { 305 FramePtrOffsetInBlock = NumBytes; 306 adjustedGPRCS1Size += NumBytes; 307 NumBytes = 0; 308 } 309 CFAOffset += adjustedGPRCS1Size; 310 311 // Adjust FP so it point to the stack slot that contains the previous FP. 312 if (HasFP) { 313 MachineBasicBlock::iterator AfterPush = 314 HasFrameRecordArea ? std::next(FRPush) : std::next(GPRCS1Push); 315 if (HasFrameRecordArea) { 316 // We have just finished pushing the previous FP into the stack, 317 // so simply capture the SP value as the new Frame Pointer. 318 BuildMI(MBB, AfterPush, dl, TII.get(ARM::tMOVr), FramePtr) 319 .addReg(ARM::SP) 320 .setMIFlags(MachineInstr::FrameSetup) 321 .add(predOps(ARMCC::AL)); 322 } else { 323 FramePtrOffsetInBlock += 324 MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; 325 BuildMI(MBB, AfterPush, dl, TII.get(ARM::tADDrSPi), FramePtr) 326 .addReg(ARM::SP) 327 .addImm(FramePtrOffsetInBlock / 4) 328 .setMIFlags(MachineInstr::FrameSetup) 329 .add(predOps(ARMCC::AL)); 330 } 331 332 CFIBuilder.setInsertPoint(AfterPush); 333 if (FramePtrOffsetInBlock) 334 CFIBuilder.buildDefCFA(FramePtr, CFAOffset - FramePtrOffsetInBlock); 335 else 336 CFIBuilder.buildDefCFARegister(FramePtr); 337 if (NumBytes > 508) 338 // If offset is > 508 then sp cannot be adjusted in a single instruction, 339 // try restoring from fp instead. 340 AFI->setShouldRestoreSPFromFP(true); 341 } 342 343 // Emit call frame information for the callee-saved low registers. 344 if (GPRCS1Size > 0) { 345 CFIBuilder.setInsertPoint(std::next(GPRCS1Push)); 346 if (adjustedGPRCS1Size) 347 CFIBuilder.buildDefCFAOffset(CFAOffset); 348 for (const CalleeSavedInfo &I : CSI) { 349 switch (I.getReg()) { 350 case ARM::R8: 351 case ARM::R9: 352 case ARM::R10: 353 case ARM::R11: 354 case ARM::R12: 355 break; 356 case ARM::R0: 357 case ARM::R1: 358 case ARM::R2: 359 case ARM::R3: 360 case ARM::R4: 361 case ARM::R5: 362 case ARM::R6: 363 case ARM::R7: 364 case ARM::LR: 365 CFIBuilder.buildOffset(I.getReg(), 366 MFI.getObjectOffset(I.getFrameIdx())); 367 break; 368 } 369 } 370 } 371 372 // Emit call frame information for the callee-saved high registers. 373 if (GPRCS2Size > 0) { 374 CFIBuilder.setInsertPoint(std::next(GPRCS2Push)); 375 for (auto &I : CSI) { 376 switch (I.getReg()) { 377 case ARM::R8: 378 case ARM::R9: 379 case ARM::R10: 380 case ARM::R11: 381 case ARM::R12: 382 CFIBuilder.buildOffset(I.getReg(), 383 MFI.getObjectOffset(I.getFrameIdx())); 384 break; 385 default: 386 break; 387 } 388 } 389 } 390 391 if (NumBytes) { 392 // Insert it after all the callee-save spills. 393 // 394 // For a large stack frame, we might need a scratch register to store 395 // the size of the frame. We know all callee-save registers are free 396 // at this point in the prologue, so pick one. 397 unsigned ScratchRegister = ARM::NoRegister; 398 for (auto &I : CSI) { 399 MCRegister Reg = I.getReg(); 400 if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr.asMCReg())) { 401 ScratchRegister = Reg; 402 break; 403 } 404 } 405 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, 406 ScratchRegister, MachineInstr::FrameSetup); 407 if (!HasFP) { 408 CFAOffset += NumBytes; 409 CFIBuilder.buildDefCFAOffset(CFAOffset); 410 } 411 } 412 413 if (STI.isTargetELF() && HasFP) 414 MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() - 415 AFI->getFramePtrSpillOffset()); 416 417 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); 418 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); 419 AFI->setDPRCalleeSavedArea1Size(DPRCSSize); 420 421 if (RegInfo->hasStackRealignment(MF)) { 422 const unsigned NrBitsToZero = Log2(MFI.getMaxAlign()); 423 // Emit the following sequence, using R4 as a temporary, since we cannot use 424 // SP as a source or destination register for the shifts: 425 // mov r4, sp 426 // lsrs r4, r4, #NrBitsToZero 427 // lsls r4, r4, #NrBitsToZero 428 // mov sp, r4 429 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) 430 .addReg(ARM::SP, RegState::Kill) 431 .add(predOps(ARMCC::AL)); 432 433 BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ARM::R4) 434 .addDef(ARM::CPSR) 435 .addReg(ARM::R4, RegState::Kill) 436 .addImm(NrBitsToZero) 437 .add(predOps(ARMCC::AL)); 438 439 BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ARM::R4) 440 .addDef(ARM::CPSR) 441 .addReg(ARM::R4, RegState::Kill) 442 .addImm(NrBitsToZero) 443 .add(predOps(ARMCC::AL)); 444 445 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 446 .addReg(ARM::R4, RegState::Kill) 447 .add(predOps(ARMCC::AL)); 448 449 AFI->setShouldRestoreSPFromFP(true); 450 } 451 452 // If we need a base pointer, set it up here. It's whatever the value 453 // of the stack pointer is at this point. Any variable size objects 454 // will be allocated after this, so we can still use the base pointer 455 // to reference locals. 456 if (RegInfo->hasBasePointer(MF)) 457 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) 458 .addReg(ARM::SP) 459 .add(predOps(ARMCC::AL)); 460 461 // If the frame has variable sized objects then the epilogue must restore 462 // the sp from fp. We can assume there's an FP here since hasFP already 463 // checks for hasVarSizedObjects. 464 if (MFI.hasVarSizedObjects()) 465 AFI->setShouldRestoreSPFromFP(true); 466 467 // In some cases, virtual registers have been introduced, e.g. by uses of 468 // emitThumbRegPlusImmInReg. 469 MF.getProperties().resetNoVRegs(); 470 } 471 472 void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, 473 MachineBasicBlock &MBB) const { 474 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 475 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); 476 MachineFrameInfo &MFI = MF.getFrameInfo(); 477 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 478 const ThumbRegisterInfo *RegInfo = 479 static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 480 const Thumb1InstrInfo &TII = 481 *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); 482 483 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 484 int NumBytes = (int)MFI.getStackSize(); 485 assert((unsigned)NumBytes >= ArgRegsSaveSize && 486 "ArgRegsSaveSize is included in NumBytes"); 487 Register FramePtr = RegInfo->getFrameRegister(MF); 488 489 if (!AFI->hasStackFrame()) { 490 if (NumBytes - ArgRegsSaveSize != 0) 491 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, 492 NumBytes - ArgRegsSaveSize, ARM::NoRegister, 493 MachineInstr::FrameDestroy); 494 } else { 495 // Unwind MBBI to point to first LDR / VLDRD. 496 if (MBBI != MBB.begin()) { 497 do 498 --MBBI; 499 while (MBBI != MBB.begin() && MBBI->getFlag(MachineInstr::FrameDestroy)); 500 if (!MBBI->getFlag(MachineInstr::FrameDestroy)) 501 ++MBBI; 502 } 503 504 // Move SP to start of FP callee save spill area. 505 NumBytes -= 506 (AFI->getFrameRecordSavedAreaSize() + 507 AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + 508 AFI->getDPRCalleeSavedArea1Size() + ArgRegsSaveSize); 509 510 // We are likely to need a scratch register and we know all callee-save 511 // registers are free at this point in the epilogue, so pick one. 512 unsigned ScratchRegister = ARM::NoRegister; 513 bool HasFP = hasFP(MF); 514 for (auto &I : MFI.getCalleeSavedInfo()) { 515 MCRegister Reg = I.getReg(); 516 if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr.asMCReg())) { 517 ScratchRegister = Reg; 518 break; 519 } 520 } 521 522 if (AFI->shouldRestoreSPFromFP()) { 523 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; 524 // Reset SP based on frame pointer only if the stack frame extends beyond 525 // frame pointer stack slot, the target is ELF and the function has FP, or 526 // the target uses var sized objects. 527 if (NumBytes) { 528 assert(ScratchRegister != ARM::NoRegister && 529 "No scratch register to restore SP from FP!"); 530 emitThumbRegPlusImmediate(MBB, MBBI, dl, ScratchRegister, FramePtr, -NumBytes, 531 TII, *RegInfo, MachineInstr::FrameDestroy); 532 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 533 .addReg(ScratchRegister) 534 .add(predOps(ARMCC::AL)) 535 .setMIFlag(MachineInstr::FrameDestroy); 536 } else 537 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 538 .addReg(FramePtr) 539 .add(predOps(ARMCC::AL)) 540 .setMIFlag(MachineInstr::FrameDestroy); 541 } else { 542 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && 543 &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { 544 MachineBasicBlock::iterator PMBBI = std::prev(MBBI); 545 if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes)) 546 emitPrologueEpilogueSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes, 547 ScratchRegister, MachineInstr::FrameDestroy); 548 } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes)) 549 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes, 550 ScratchRegister, MachineInstr::FrameDestroy); 551 } 552 } 553 554 if (needPopSpecialFixUp(MF)) { 555 bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true); 556 (void)Done; 557 assert(Done && "Emission of the special fixup failed!?"); 558 } 559 } 560 561 bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 562 if (!needPopSpecialFixUp(*MBB.getParent())) 563 return true; 564 565 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 566 return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false); 567 } 568 569 bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { 570 ARMFunctionInfo *AFI = 571 const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>(); 572 if (AFI->getArgRegsSaveSize()) 573 return true; 574 575 // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up. 576 for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo()) 577 if (CSI.getReg() == ARM::LR) 578 return true; 579 580 return false; 581 } 582 583 static void findTemporariesForLR(const BitVector &GPRsNoLRSP, 584 const BitVector &PopFriendly, 585 const LiveRegUnits &UsedRegs, unsigned &PopReg, 586 unsigned &TmpReg, MachineRegisterInfo &MRI) { 587 PopReg = TmpReg = 0; 588 for (auto Reg : GPRsNoLRSP.set_bits()) { 589 if (UsedRegs.available(Reg)) { 590 // Remember the first pop-friendly register and exit. 591 if (PopFriendly.test(Reg)) { 592 PopReg = Reg; 593 TmpReg = 0; 594 break; 595 } 596 // Otherwise, remember that the register will be available to 597 // save a pop-friendly register. 598 TmpReg = Reg; 599 } 600 } 601 } 602 603 bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, 604 bool DoIt) const { 605 MachineFunction &MF = *MBB.getParent(); 606 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 607 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 608 const TargetInstrInfo &TII = *STI.getInstrInfo(); 609 const ThumbRegisterInfo *RegInfo = 610 static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 611 612 // If MBBI is a return instruction, or is a tPOP followed by a return 613 // instruction in the successor BB, we may be able to directly restore 614 // LR in the PC. 615 // This is only possible with v5T ops (v4T can't change the Thumb bit via 616 // a POP PC instruction), and only if we do not need to emit any SP update. 617 // Otherwise, we need a temporary register to pop the value 618 // and copy that value into LR. 619 auto MBBI = MBB.getFirstTerminator(); 620 bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize; 621 if (CanRestoreDirectly) { 622 if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB) 623 CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET || 624 MBBI->getOpcode() == ARM::tPOP_RET); 625 else { 626 auto MBBI_prev = MBBI; 627 MBBI_prev--; 628 assert(MBBI_prev->getOpcode() == ARM::tPOP); 629 assert(MBB.succ_size() == 1); 630 if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET) 631 MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET. 632 else 633 CanRestoreDirectly = false; 634 } 635 } 636 637 if (CanRestoreDirectly) { 638 if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET) 639 return true; 640 MachineInstrBuilder MIB = 641 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)) 642 .add(predOps(ARMCC::AL)) 643 .setMIFlag(MachineInstr::FrameDestroy); 644 // Copy implicit ops and popped registers, if any. 645 for (auto MO: MBBI->operands()) 646 if (MO.isReg() && (MO.isImplicit() || MO.isDef())) 647 MIB.add(MO); 648 MIB.addReg(ARM::PC, RegState::Define); 649 // Erase the old instruction (tBX_RET or tPOP). 650 MBB.erase(MBBI); 651 return true; 652 } 653 654 // Look for a temporary register to use. 655 // First, compute the liveness information. 656 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 657 LiveRegUnits UsedRegs(TRI); 658 UsedRegs.addLiveOuts(MBB); 659 // The semantic of pristines changed recently and now, 660 // the callee-saved registers that are touched in the function 661 // are not part of the pristines set anymore. 662 // Add those callee-saved now. 663 const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); 664 for (unsigned i = 0; CSRegs[i]; ++i) 665 UsedRegs.addReg(CSRegs[i]); 666 667 DebugLoc dl = DebugLoc(); 668 if (MBBI != MBB.end()) { 669 dl = MBBI->getDebugLoc(); 670 auto InstUpToMBBI = MBB.end(); 671 while (InstUpToMBBI != MBBI) 672 // The pre-decrement is on purpose here. 673 // We want to have the liveness right before MBBI. 674 UsedRegs.stepBackward(*--InstUpToMBBI); 675 } 676 677 // Look for a register that can be directly use in the POP. 678 unsigned PopReg = 0; 679 // And some temporary register, just in case. 680 unsigned TemporaryReg = 0; 681 BitVector PopFriendly = 682 TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID)); 683 684 assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); 685 // Rebuild the GPRs from the high registers because they are removed 686 // form the GPR reg class for thumb1. 687 BitVector GPRsNoLRSP = 688 TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID)); 689 GPRsNoLRSP |= PopFriendly; 690 GPRsNoLRSP.reset(ARM::LR); 691 GPRsNoLRSP.reset(ARM::SP); 692 GPRsNoLRSP.reset(ARM::PC); 693 findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg, 694 MF.getRegInfo()); 695 696 // If we couldn't find a pop-friendly register, try restoring LR before 697 // popping the other callee-saved registers, so we could use one of them as a 698 // temporary. 699 bool UseLDRSP = false; 700 if (!PopReg && MBBI != MBB.begin()) { 701 auto PrevMBBI = MBBI; 702 PrevMBBI--; 703 if (PrevMBBI->getOpcode() == ARM::tPOP) { 704 UsedRegs.stepBackward(*PrevMBBI); 705 findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, 706 TemporaryReg, MF.getRegInfo()); 707 if (PopReg) { 708 MBBI = PrevMBBI; 709 UseLDRSP = true; 710 } 711 } 712 } 713 714 if (!DoIt && !PopReg && !TemporaryReg) 715 return false; 716 717 assert((PopReg || TemporaryReg) && "Cannot get LR"); 718 719 if (UseLDRSP) { 720 assert(PopReg && "Do not know how to get LR"); 721 // Load the LR via LDR tmp, [SP, #off] 722 BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRspi)) 723 .addReg(PopReg, RegState::Define) 724 .addReg(ARM::SP) 725 .addImm(MBBI->getNumExplicitOperands() - 2) 726 .add(predOps(ARMCC::AL)) 727 .setMIFlag(MachineInstr::FrameDestroy); 728 // Move from the temporary register to the LR. 729 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 730 .addReg(ARM::LR, RegState::Define) 731 .addReg(PopReg, RegState::Kill) 732 .add(predOps(ARMCC::AL)) 733 .setMIFlag(MachineInstr::FrameDestroy); 734 // Advance past the pop instruction. 735 MBBI++; 736 // Increment the SP. 737 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, 738 ArgRegsSaveSize + 4, ARM::NoRegister, 739 MachineInstr::FrameDestroy); 740 return true; 741 } 742 743 if (TemporaryReg) { 744 assert(!PopReg && "Unnecessary MOV is about to be inserted"); 745 PopReg = PopFriendly.find_first(); 746 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 747 .addReg(TemporaryReg, RegState::Define) 748 .addReg(PopReg, RegState::Kill) 749 .add(predOps(ARMCC::AL)) 750 .setMIFlag(MachineInstr::FrameDestroy); 751 } 752 753 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { 754 // We couldn't use the direct restoration above, so 755 // perform the opposite conversion: tPOP_RET to tPOP. 756 MachineInstrBuilder MIB = 757 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)) 758 .add(predOps(ARMCC::AL)) 759 .setMIFlag(MachineInstr::FrameDestroy); 760 bool Popped = false; 761 for (auto MO: MBBI->operands()) 762 if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && 763 MO.getReg() != ARM::PC) { 764 MIB.add(MO); 765 if (!MO.isImplicit()) 766 Popped = true; 767 } 768 // Is there anything left to pop? 769 if (!Popped) 770 MBB.erase(MIB.getInstr()); 771 // Erase the old instruction. 772 MBB.erase(MBBI); 773 MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)) 774 .add(predOps(ARMCC::AL)) 775 .setMIFlag(MachineInstr::FrameDestroy); 776 } 777 778 assert(PopReg && "Do not know how to get LR"); 779 BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)) 780 .add(predOps(ARMCC::AL)) 781 .addReg(PopReg, RegState::Define) 782 .setMIFlag(MachineInstr::FrameDestroy); 783 784 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize, 785 ARM::NoRegister, MachineInstr::FrameDestroy); 786 787 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 788 .addReg(ARM::LR, RegState::Define) 789 .addReg(PopReg, RegState::Kill) 790 .add(predOps(ARMCC::AL)) 791 .setMIFlag(MachineInstr::FrameDestroy); 792 793 if (TemporaryReg) 794 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 795 .addReg(PopReg, RegState::Define) 796 .addReg(TemporaryReg, RegState::Kill) 797 .add(predOps(ARMCC::AL)) 798 .setMIFlag(MachineInstr::FrameDestroy); 799 800 return true; 801 } 802 803 static const SmallVector<Register> OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6, 804 ARM::R7, ARM::LR}; 805 static const SmallVector<Register> OrderedHighRegs = {ARM::R8, ARM::R9, 806 ARM::R10, ARM::R11}; 807 static const SmallVector<Register> OrderedCopyRegs = { 808 ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, 809 ARM::R5, ARM::R6, ARM::R7, ARM::LR}; 810 811 static void splitLowAndHighRegs(const std::set<Register> &Regs, 812 std::set<Register> &LowRegs, 813 std::set<Register> &HighRegs) { 814 for (Register Reg : Regs) { 815 if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { 816 LowRegs.insert(Reg); 817 } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { 818 HighRegs.insert(Reg); 819 } else { 820 llvm_unreachable("callee-saved register of unexpected class"); 821 } 822 } 823 } 824 825 template <typename It> 826 It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt, 827 const std::set<Register> &RegSet) { 828 return std::find_if(OrderedStartIt, OrderedEndIt, 829 [&](Register Reg) { return RegSet.count(Reg); }); 830 } 831 832 static void pushRegsToStack(MachineBasicBlock &MBB, 833 MachineBasicBlock::iterator MI, 834 const TargetInstrInfo &TII, 835 const std::set<Register> &RegsToSave, 836 const std::set<Register> &CopyRegs, 837 bool &UsedLRAsTemp) { 838 MachineFunction &MF = *MBB.getParent(); 839 const MachineRegisterInfo &MRI = MF.getRegInfo(); 840 DebugLoc DL; 841 842 std::set<Register> LowRegs, HighRegs; 843 splitLowAndHighRegs(RegsToSave, LowRegs, HighRegs); 844 845 // Push low regs first 846 if (!LowRegs.empty()) { 847 MachineInstrBuilder MIB = 848 BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); 849 for (unsigned Reg : OrderedLowRegs) { 850 if (LowRegs.count(Reg)) { 851 bool isKill = !MRI.isLiveIn(Reg); 852 if (isKill && !MRI.isReserved(Reg)) 853 MBB.addLiveIn(Reg); 854 855 MIB.addReg(Reg, getKillRegState(isKill)); 856 } 857 } 858 MIB.setMIFlags(MachineInstr::FrameSetup); 859 } 860 861 // Now push the high registers 862 // There are no store instructions that can access high registers directly, 863 // so we have to move them to low registers, and push them. 864 // This might take multiple pushes, as it is possible for there to 865 // be fewer low registers available than high registers which need saving. 866 867 // Find the first register to save. 868 // Registers must be processed in reverse order so that in case we need to use 869 // multiple PUSH instructions, the order of the registers on the stack still 870 // matches the unwind info. They need to be swicthed back to ascending order 871 // before adding to the PUSH instruction. 872 auto HiRegToSave = getNextOrderedReg(OrderedHighRegs.rbegin(), 873 OrderedHighRegs.rend(), 874 HighRegs); 875 876 while (HiRegToSave != OrderedHighRegs.rend()) { 877 // Find the first low register to use. 878 auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(), 879 OrderedCopyRegs.rend(), 880 CopyRegs); 881 882 // Create the PUSH, but don't insert it yet (the MOVs need to come first). 883 MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)) 884 .add(predOps(ARMCC::AL)) 885 .setMIFlags(MachineInstr::FrameSetup); 886 887 SmallVector<unsigned, 4> RegsToPush; 888 while (HiRegToSave != OrderedHighRegs.rend() && 889 CopyRegIt != OrderedCopyRegs.rend()) { 890 if (HighRegs.count(*HiRegToSave)) { 891 bool isKill = !MRI.isLiveIn(*HiRegToSave); 892 if (isKill && !MRI.isReserved(*HiRegToSave)) 893 MBB.addLiveIn(*HiRegToSave); 894 if (*CopyRegIt == ARM::LR) 895 UsedLRAsTemp = true; 896 897 // Emit a MOV from the high reg to the low reg. 898 BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 899 .addReg(*CopyRegIt, RegState::Define) 900 .addReg(*HiRegToSave, getKillRegState(isKill)) 901 .add(predOps(ARMCC::AL)) 902 .setMIFlags(MachineInstr::FrameSetup); 903 904 // Record the register that must be added to the PUSH. 905 RegsToPush.push_back(*CopyRegIt); 906 907 CopyRegIt = getNextOrderedReg(std::next(CopyRegIt), 908 OrderedCopyRegs.rend(), 909 CopyRegs); 910 HiRegToSave = getNextOrderedReg(std::next(HiRegToSave), 911 OrderedHighRegs.rend(), 912 HighRegs); 913 } 914 } 915 916 // Add the low registers to the PUSH, in ascending order. 917 for (unsigned Reg : llvm::reverse(RegsToPush)) 918 PushMIB.addReg(Reg, RegState::Kill); 919 920 // Insert the PUSH instruction after the MOVs. 921 MBB.insert(MI, PushMIB); 922 } 923 } 924 925 static void popRegsFromStack(MachineBasicBlock &MBB, 926 MachineBasicBlock::iterator &MI, 927 const TargetInstrInfo &TII, 928 const std::set<Register> &RegsToRestore, 929 const std::set<Register> &AvailableCopyRegs, 930 bool IsVarArg, bool HasV5Ops) { 931 if (RegsToRestore.empty()) 932 return; 933 934 MachineFunction &MF = *MBB.getParent(); 935 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 936 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); 937 938 std::set<Register> LowRegs, HighRegs; 939 splitLowAndHighRegs(RegsToRestore, LowRegs, HighRegs); 940 941 // Pop the high registers first 942 // There are no store instructions that can access high registers directly, 943 // so we have to pop into low registers and them move to the high registers. 944 // This might take multiple pops, as it is possible for there to 945 // be fewer low registers available than high registers which need restoring. 946 947 // Find the first register to restore. 948 auto HiRegToRestore = getNextOrderedReg(OrderedHighRegs.begin(), 949 OrderedHighRegs.end(), 950 HighRegs); 951 952 std::set<Register> CopyRegs = AvailableCopyRegs; 953 Register LowScratchReg; 954 if (!HighRegs.empty() && CopyRegs.empty()) { 955 // No copy regs are available to pop high regs. Let's make use of a return 956 // register and the scratch register (IP/R12) to copy things around. 957 LowScratchReg = ARM::R0; 958 BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 959 .addReg(ARM::R12, RegState::Define) 960 .addReg(LowScratchReg, RegState::Kill) 961 .add(predOps(ARMCC::AL)) 962 .setMIFlag(MachineInstr::FrameDestroy); 963 CopyRegs.insert(LowScratchReg); 964 } 965 966 while (HiRegToRestore != OrderedHighRegs.end()) { 967 assert(!CopyRegs.empty()); 968 // Find the first low register to use. 969 auto CopyReg = getNextOrderedReg(OrderedCopyRegs.begin(), 970 OrderedCopyRegs.end(), 971 CopyRegs); 972 973 // Create the POP instruction. 974 MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)) 975 .add(predOps(ARMCC::AL)) 976 .setMIFlag(MachineInstr::FrameDestroy); 977 978 while (HiRegToRestore != OrderedHighRegs.end() && 979 CopyReg != OrderedCopyRegs.end()) { 980 // Add the low register to the POP. 981 PopMIB.addReg(*CopyReg, RegState::Define); 982 983 // Create the MOV from low to high register. 984 BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 985 .addReg(*HiRegToRestore, RegState::Define) 986 .addReg(*CopyReg, RegState::Kill) 987 .add(predOps(ARMCC::AL)) 988 .setMIFlag(MachineInstr::FrameDestroy); 989 990 CopyReg = getNextOrderedReg(std::next(CopyReg), 991 OrderedCopyRegs.end(), 992 CopyRegs); 993 HiRegToRestore = getNextOrderedReg(std::next(HiRegToRestore), 994 OrderedHighRegs.end(), 995 HighRegs); 996 } 997 } 998 999 // Restore low register used as scratch if necessary 1000 if (LowScratchReg.isValid()) { 1001 BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 1002 .addReg(LowScratchReg, RegState::Define) 1003 .addReg(ARM::R12, RegState::Kill) 1004 .add(predOps(ARMCC::AL)) 1005 .setMIFlag(MachineInstr::FrameDestroy); 1006 } 1007 1008 // Now pop the low registers 1009 if (!LowRegs.empty()) { 1010 MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)) 1011 .add(predOps(ARMCC::AL)) 1012 .setMIFlag(MachineInstr::FrameDestroy); 1013 1014 bool NeedsPop = false; 1015 for (Register Reg : OrderedLowRegs) { 1016 if (!LowRegs.count(Reg)) 1017 continue; 1018 1019 if (Reg == ARM::LR) { 1020 if (!MBB.succ_empty() || MI->getOpcode() == ARM::TCRETURNdi || 1021 MI->getOpcode() == ARM::TCRETURNri || 1022 MI->getOpcode() == ARM::TCRETURNrinotr12) 1023 // LR may only be popped into PC, as part of return sequence. 1024 // If this isn't the return sequence, we'll need emitPopSpecialFixUp 1025 // to restore LR the hard way. 1026 // FIXME: if we don't pass any stack arguments it would be actually 1027 // advantageous *and* correct to do the conversion to an ordinary call 1028 // instruction here. 1029 continue; 1030 // Special epilogue for vararg functions. See emitEpilogue 1031 if (IsVarArg) 1032 continue; 1033 // ARMv4T requires BX, see emitEpilogue 1034 if (!HasV5Ops) 1035 continue; 1036 1037 // CMSE entry functions must return via BXNS, see emitEpilogue. 1038 if (AFI->isCmseNSEntryFunction()) 1039 continue; 1040 1041 // Pop LR into PC. 1042 Reg = ARM::PC; 1043 (*MIB).setDesc(TII.get(ARM::tPOP_RET)); 1044 if (MI != MBB.end()) 1045 MIB.copyImplicitOps(*MI); 1046 MI = MBB.erase(MI); 1047 } 1048 MIB.addReg(Reg, getDefRegState(true)); 1049 NeedsPop = true; 1050 } 1051 1052 // It's illegal to emit pop instruction without operands. 1053 if (NeedsPop) 1054 MBB.insert(MI, &*MIB); 1055 else 1056 MF.deleteMachineInstr(MIB); 1057 } 1058 } 1059 1060 bool Thumb1FrameLowering::spillCalleeSavedRegisters( 1061 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1062 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 1063 if (CSI.empty()) 1064 return false; 1065 1066 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1067 MachineFunction &MF = *MBB.getParent(); 1068 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( 1069 MF.getSubtarget().getRegisterInfo()); 1070 Register FPReg = RegInfo->getFrameRegister(MF); 1071 1072 // In case FP is a high reg, we need a separate push sequence to generate 1073 // a correct Frame Record 1074 bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); 1075 bool LRLiveIn = MF.getRegInfo().isLiveIn(ARM::LR); 1076 bool UsedLRAsTemp = false; 1077 1078 std::set<Register> FrameRecord; 1079 std::set<Register> SpilledGPRs; 1080 for (const CalleeSavedInfo &I : CSI) { 1081 MCRegister Reg = I.getReg(); 1082 if (NeedsFrameRecordPush && (Reg == FPReg.asMCReg() || Reg == ARM::LR)) 1083 FrameRecord.insert(Reg); 1084 else 1085 SpilledGPRs.insert(Reg); 1086 } 1087 1088 // Determine intermediate registers which can be used for pushing the frame 1089 // record: 1090 // - Unused argument registers 1091 // - LR: This is possible because the first PUSH will save it on the stack, 1092 // so it is free to be used as a temporary for the second. However, it 1093 // is possible for LR to be live-in to the function, in which case we 1094 // will need to restore it later in the prologue, so we only use this 1095 // if there are no free argument registers. 1096 std::set<Register> FrameRecordCopyRegs; 1097 for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) 1098 if (!MF.getRegInfo().isLiveIn(ArgReg)) 1099 FrameRecordCopyRegs.insert(ArgReg); 1100 if (FrameRecordCopyRegs.empty()) 1101 FrameRecordCopyRegs.insert(ARM::LR); 1102 1103 pushRegsToStack(MBB, MI, TII, FrameRecord, FrameRecordCopyRegs, UsedLRAsTemp); 1104 1105 // Determine intermediate registers which can be used for pushing high regs: 1106 // - Spilled low regs 1107 // - Unused argument registers 1108 std::set<Register> CopyRegs; 1109 for (Register Reg : SpilledGPRs) 1110 if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && 1111 !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg)) 1112 CopyRegs.insert(Reg); 1113 for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) 1114 if (!MF.getRegInfo().isLiveIn(ArgReg)) 1115 CopyRegs.insert(ArgReg); 1116 1117 pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs, UsedLRAsTemp); 1118 1119 // If the push sequence used LR as a temporary, and LR is live-in (for 1120 // example because it is used by the llvm.returnaddress intrinsic), then we 1121 // need to reload it from the stack. Thumb1 does not have a load instruction 1122 // which can use LR, so we need to load into a temporary low register and 1123 // copy to LR. 1124 if (LRLiveIn && UsedLRAsTemp) { 1125 auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(), 1126 OrderedCopyRegs.rend(), CopyRegs); 1127 assert(CopyRegIt != OrderedCopyRegs.rend()); 1128 unsigned NumRegsPushed = FrameRecord.size() + SpilledGPRs.size(); 1129 LLVM_DEBUG( 1130 dbgs() << "LR is live-in but clobbered in prologue, restoring via " 1131 << RegInfo->getName(*CopyRegIt) << "\n"); 1132 1133 BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::tLDRspi), *CopyRegIt) 1134 .addReg(ARM::SP) 1135 .addImm(NumRegsPushed - 1) 1136 .add(predOps(ARMCC::AL)) 1137 .setMIFlags(MachineInstr::FrameSetup); 1138 1139 BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::tMOVr), ARM::LR) 1140 .addReg(*CopyRegIt) 1141 .add(predOps(ARMCC::AL)) 1142 .setMIFlags(MachineInstr::FrameSetup); 1143 } 1144 1145 return true; 1146 } 1147 1148 bool Thumb1FrameLowering::restoreCalleeSavedRegisters( 1149 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1150 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 1151 if (CSI.empty()) 1152 return false; 1153 1154 MachineFunction &MF = *MBB.getParent(); 1155 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1156 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1157 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( 1158 MF.getSubtarget().getRegisterInfo()); 1159 bool IsVarArg = AFI->getArgRegsSaveSize() > 0; 1160 Register FPReg = RegInfo->getFrameRegister(MF); 1161 1162 // In case FP is a high reg, we need a separate pop sequence to generate 1163 // a correct Frame Record 1164 bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); 1165 1166 std::set<Register> FrameRecord; 1167 std::set<Register> SpilledGPRs; 1168 for (CalleeSavedInfo &I : CSI) { 1169 MCRegister Reg = I.getReg(); 1170 if (NeedsFrameRecordPop && (Reg == FPReg.asMCReg() || Reg == ARM::LR)) 1171 FrameRecord.insert(Reg); 1172 else 1173 SpilledGPRs.insert(Reg); 1174 1175 if (Reg == ARM::LR) 1176 I.setRestored(false); 1177 } 1178 1179 // Determine intermidiate registers which can be used for popping high regs: 1180 // - Spilled low regs 1181 // - Unused return registers 1182 std::set<Register> CopyRegs; 1183 std::set<Register> UnusedReturnRegs; 1184 for (Register Reg : SpilledGPRs) 1185 if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg)) 1186 CopyRegs.insert(Reg); 1187 auto Terminator = MBB.getFirstTerminator(); 1188 if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { 1189 UnusedReturnRegs.insert(ARM::R0); 1190 UnusedReturnRegs.insert(ARM::R1); 1191 UnusedReturnRegs.insert(ARM::R2); 1192 UnusedReturnRegs.insert(ARM::R3); 1193 for (auto Op : Terminator->implicit_operands()) { 1194 if (Op.isReg()) 1195 UnusedReturnRegs.erase(Op.getReg()); 1196 } 1197 } 1198 CopyRegs.insert(UnusedReturnRegs.begin(), UnusedReturnRegs.end()); 1199 1200 // First pop regular spilled regs. 1201 popRegsFromStack(MBB, MI, TII, SpilledGPRs, CopyRegs, IsVarArg, 1202 STI.hasV5TOps()); 1203 1204 // LR may only be popped into pc, as part of a return sequence. 1205 // Check that no other pop instructions are inserted after that. 1206 assert((!SpilledGPRs.count(ARM::LR) || FrameRecord.empty()) && 1207 "Can't insert pop after return sequence"); 1208 1209 // Now pop Frame Record regs. 1210 // Only unused return registers can be used as copy regs at this point. 1211 popRegsFromStack(MBB, MI, TII, FrameRecord, UnusedReturnRegs, IsVarArg, 1212 STI.hasV5TOps()); 1213 1214 return true; 1215 } 1216