1 //===- Thumb1FrameLowering.cpp - Thumb1 Frame Information -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the Thumb1 implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "Thumb1FrameLowering.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMBaseRegisterInfo.h" 16 #include "ARMMachineFunctionInfo.h" 17 #include "ARMSubtarget.h" 18 #include "Thumb1InstrInfo.h" 19 #include "ThumbRegisterInfo.h" 20 #include "Utils/ARMBaseInfo.h" 21 #include "llvm/ADT/BitVector.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/CodeGen/LivePhysRegs.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFrameInfo.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineModuleInfo.h" 31 #include "llvm/CodeGen/MachineOperand.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/CodeGen/TargetInstrInfo.h" 34 #include "llvm/CodeGen/TargetOpcodes.h" 35 #include "llvm/CodeGen/TargetSubtargetInfo.h" 36 #include "llvm/IR/DebugLoc.h" 37 #include "llvm/MC/MCContext.h" 38 #include "llvm/MC/MCDwarf.h" 39 #include "llvm/MC/MCRegisterInfo.h" 40 #include "llvm/Support/Compiler.h" 41 #include "llvm/Support/ErrorHandling.h" 42 #include "llvm/Support/MathExtras.h" 43 #include <bitset> 44 #include <cassert> 45 #include <iterator> 46 #include <vector> 47 48 using namespace llvm; 49 50 Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti) 51 : ARMFrameLowering(sti) {} 52 53 bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ 54 const MachineFrameInfo &MFI = MF.getFrameInfo(); 55 unsigned CFSize = MFI.getMaxCallFrameSize(); 56 // It's not always a good idea to include the call frame as part of the 57 // stack frame. ARM (especially Thumb) has small immediate offset to 58 // address the stack frame. So a large call frame can cause poor codegen 59 // and may even makes it impossible to scavenge a register. 60 if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 61 return false; 62 63 return !MFI.hasVarSizedObjects(); 64 } 65 66 static void 67 emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB, 68 MachineBasicBlock::iterator &MBBI, 69 const TargetInstrInfo &TII, const DebugLoc &dl, 70 const ThumbRegisterInfo &MRI, int NumBytes, 71 unsigned ScratchReg, unsigned MIFlags) { 72 // If it would take more than three instructions to adjust the stack pointer 73 // using tADDspi/tSUBspi, load an immediate instead. 74 if (std::abs(NumBytes) > 508 * 3) { 75 // We use a different codepath here from the normal 76 // emitThumbRegPlusImmediate so we don't have to deal with register 77 // scavenging. (Scavenging could try to use the emergency spill slot 78 // before we've actually finished setting up the stack.) 79 if (ScratchReg == ARM::NoRegister) 80 report_fatal_error("Failed to emit Thumb1 stack adjustment"); 81 MachineFunction &MF = *MBB.getParent(); 82 const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>(); 83 if (ST.genExecuteOnly()) { 84 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ScratchReg) 85 .addImm(NumBytes).setMIFlags(MIFlags); 86 } else { 87 MRI.emitLoadConstPool(MBB, MBBI, dl, ScratchReg, 0, NumBytes, ARMCC::AL, 88 0, MIFlags); 89 } 90 BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDhirr), ARM::SP) 91 .addReg(ARM::SP) 92 .addReg(ScratchReg, RegState::Kill) 93 .add(predOps(ARMCC::AL)) 94 .setMIFlags(MIFlags); 95 return; 96 } 97 // FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate 98 // won't change. 99 emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, 100 MRI, MIFlags); 101 102 } 103 104 static void emitCallSPUpdate(MachineBasicBlock &MBB, 105 MachineBasicBlock::iterator &MBBI, 106 const TargetInstrInfo &TII, const DebugLoc &dl, 107 const ThumbRegisterInfo &MRI, int NumBytes, 108 unsigned MIFlags = MachineInstr::NoFlags) { 109 emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, 110 MRI, MIFlags); 111 } 112 113 114 MachineBasicBlock::iterator Thumb1FrameLowering:: 115 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 116 MachineBasicBlock::iterator I) const { 117 const Thumb1InstrInfo &TII = 118 *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); 119 const ThumbRegisterInfo *RegInfo = 120 static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 121 if (!hasReservedCallFrame(MF)) { 122 // If we have alloca, convert as follows: 123 // ADJCALLSTACKDOWN -> sub, sp, sp, amount 124 // ADJCALLSTACKUP -> add, sp, sp, amount 125 MachineInstr &Old = *I; 126 DebugLoc dl = Old.getDebugLoc(); 127 unsigned Amount = TII.getFrameSize(Old); 128 if (Amount != 0) { 129 // We need to keep the stack aligned properly. To do this, we round the 130 // amount of space needed for the outgoing arguments up to the next 131 // alignment boundary. 132 Amount = alignTo(Amount, getStackAlign()); 133 134 // Replace the pseudo instruction with a new instruction... 135 unsigned Opc = Old.getOpcode(); 136 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { 137 emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount); 138 } else { 139 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); 140 emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, Amount); 141 } 142 } 143 } 144 return MBB.erase(I); 145 } 146 147 void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, 148 MachineBasicBlock &MBB) const { 149 MachineBasicBlock::iterator MBBI = MBB.begin(); 150 MachineFrameInfo &MFI = MF.getFrameInfo(); 151 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 152 MachineModuleInfo &MMI = MF.getMMI(); 153 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 154 const ThumbRegisterInfo *RegInfo = 155 static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 156 const Thumb1InstrInfo &TII = 157 *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); 158 159 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 160 unsigned NumBytes = MFI.getStackSize(); 161 assert(NumBytes >= ArgRegsSaveSize && 162 "ArgRegsSaveSize is included in NumBytes"); 163 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 164 165 // Debug location must be unknown since the first debug location is used 166 // to determine the end of the prologue. 167 DebugLoc dl; 168 169 Register FramePtr = RegInfo->getFrameRegister(MF); 170 Register BasePtr = RegInfo->getBaseRegister(); 171 int CFAOffset = 0; 172 173 // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. 174 NumBytes = (NumBytes + 3) & ~3; 175 MFI.setStackSize(NumBytes); 176 177 // Determine the sizes of each callee-save spill areas and record which frame 178 // belongs to which callee-save spill areas. 179 unsigned FRSize = 0, GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; 180 int FramePtrSpillFI = 0; 181 182 if (ArgRegsSaveSize) { 183 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize, 184 ARM::NoRegister, MachineInstr::FrameSetup); 185 CFAOffset += ArgRegsSaveSize; 186 unsigned CFIIndex = 187 MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); 188 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 189 .addCFIIndex(CFIIndex) 190 .setMIFlags(MachineInstr::FrameSetup); 191 } 192 193 if (!AFI->hasStackFrame()) { 194 if (NumBytes - ArgRegsSaveSize != 0) { 195 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, 196 -(NumBytes - ArgRegsSaveSize), 197 ARM::NoRegister, MachineInstr::FrameSetup); 198 CFAOffset += NumBytes - ArgRegsSaveSize; 199 unsigned CFIIndex = MF.addFrameInst( 200 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); 201 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 202 .addCFIIndex(CFIIndex) 203 .setMIFlags(MachineInstr::FrameSetup); 204 } 205 return; 206 } 207 208 bool HasFrameRecordArea = hasFP(MF) && ARM::hGPRRegClass.contains(FramePtr); 209 210 for (const CalleeSavedInfo &I : CSI) { 211 Register Reg = I.getReg(); 212 int FI = I.getFrameIdx(); 213 if (Reg == FramePtr) 214 FramePtrSpillFI = FI; 215 switch (Reg) { 216 case ARM::R11: 217 if (HasFrameRecordArea) { 218 FRSize += 4; 219 break; 220 } 221 LLVM_FALLTHROUGH; 222 case ARM::R8: 223 case ARM::R9: 224 case ARM::R10: 225 if (STI.splitFramePushPop(MF)) { 226 GPRCS2Size += 4; 227 break; 228 } 229 LLVM_FALLTHROUGH; 230 case ARM::LR: 231 if (HasFrameRecordArea) { 232 FRSize += 4; 233 break; 234 } 235 LLVM_FALLTHROUGH; 236 case ARM::R4: 237 case ARM::R5: 238 case ARM::R6: 239 case ARM::R7: 240 GPRCS1Size += 4; 241 break; 242 default: 243 DPRCSSize += 8; 244 } 245 } 246 247 MachineBasicBlock::iterator FRPush, GPRCS1Push, GPRCS2Push; 248 if (HasFrameRecordArea) { 249 // Skip Frame Record setup: 250 // push {lr} 251 // mov lr, r11 252 // push {lr} 253 std::advance(MBBI, 2); 254 FRPush = MBBI++; 255 } 256 257 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { 258 GPRCS1Push = MBBI; 259 ++MBBI; 260 } 261 262 // Find last push instruction for GPRCS2 - spilling of high registers 263 // (r8-r11) could consist of multiple tPUSH and tMOVr instructions. 264 while (true) { 265 MachineBasicBlock::iterator OldMBBI = MBBI; 266 // Skip a run of tMOVr instructions 267 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr && 268 MBBI->getFlag(MachineInstr::FrameSetup)) 269 MBBI++; 270 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH && 271 MBBI->getFlag(MachineInstr::FrameSetup)) { 272 GPRCS2Push = MBBI; 273 MBBI++; 274 } else { 275 // We have reached an instruction which is not a push, so the previous 276 // run of tMOVr instructions (which may have been empty) was not part of 277 // the prologue. Reset MBBI back to the last PUSH of the prologue. 278 MBBI = OldMBBI; 279 break; 280 } 281 } 282 283 // Determine starting offsets of spill areas. 284 unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - 285 (FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize); 286 unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; 287 unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; 288 bool HasFP = hasFP(MF); 289 if (HasFP) 290 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + 291 NumBytes); 292 if (HasFrameRecordArea) 293 AFI->setFrameRecordSavedAreaSize(FRSize); 294 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); 295 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); 296 AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); 297 NumBytes = DPRCSOffset; 298 299 int FramePtrOffsetInBlock = 0; 300 unsigned adjustedGPRCS1Size = GPRCS1Size; 301 if (GPRCS1Size > 0 && GPRCS2Size == 0 && 302 tryFoldSPUpdateIntoPushPop(STI, MF, &*(GPRCS1Push), NumBytes)) { 303 FramePtrOffsetInBlock = NumBytes; 304 adjustedGPRCS1Size += NumBytes; 305 NumBytes = 0; 306 } 307 CFAOffset += adjustedGPRCS1Size; 308 309 // Adjust FP so it point to the stack slot that contains the previous FP. 310 if (HasFP) { 311 MachineBasicBlock::iterator AfterPush = 312 HasFrameRecordArea ? std::next(FRPush) : std::next(GPRCS1Push); 313 if (HasFrameRecordArea) { 314 // We have just finished pushing the previous FP into the stack, 315 // so simply capture the SP value as the new Frame Pointer. 316 BuildMI(MBB, AfterPush, dl, TII.get(ARM::tMOVr), FramePtr) 317 .addReg(ARM::SP) 318 .setMIFlags(MachineInstr::FrameSetup) 319 .add(predOps(ARMCC::AL)); 320 } else { 321 FramePtrOffsetInBlock += 322 MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; 323 BuildMI(MBB, AfterPush, dl, TII.get(ARM::tADDrSPi), FramePtr) 324 .addReg(ARM::SP) 325 .addImm(FramePtrOffsetInBlock / 4) 326 .setMIFlags(MachineInstr::FrameSetup) 327 .add(predOps(ARMCC::AL)); 328 } 329 330 if(FramePtrOffsetInBlock) { 331 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( 332 nullptr, MRI->getDwarfRegNum(FramePtr, true), (CFAOffset - FramePtrOffsetInBlock))); 333 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 334 .addCFIIndex(CFIIndex) 335 .setMIFlags(MachineInstr::FrameSetup); 336 } else { 337 unsigned CFIIndex = 338 MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( 339 nullptr, MRI->getDwarfRegNum(FramePtr, true))); 340 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 341 .addCFIIndex(CFIIndex) 342 .setMIFlags(MachineInstr::FrameSetup); 343 } 344 if (NumBytes > 508) 345 // If offset is > 508 then sp cannot be adjusted in a single instruction, 346 // try restoring from fp instead. 347 AFI->setShouldRestoreSPFromFP(true); 348 } 349 350 // Emit call frame information for the callee-saved low registers. 351 if (GPRCS1Size > 0) { 352 MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); 353 if (adjustedGPRCS1Size) { 354 unsigned CFIIndex = 355 MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); 356 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 357 .addCFIIndex(CFIIndex) 358 .setMIFlags(MachineInstr::FrameSetup); 359 } 360 for (const CalleeSavedInfo &I : CSI) { 361 Register Reg = I.getReg(); 362 int FI = I.getFrameIdx(); 363 switch (Reg) { 364 case ARM::R8: 365 case ARM::R9: 366 case ARM::R10: 367 case ARM::R11: 368 case ARM::R12: 369 if (STI.splitFramePushPop(MF)) 370 break; 371 LLVM_FALLTHROUGH; 372 case ARM::R0: 373 case ARM::R1: 374 case ARM::R2: 375 case ARM::R3: 376 case ARM::R4: 377 case ARM::R5: 378 case ARM::R6: 379 case ARM::R7: 380 case ARM::LR: 381 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 382 nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); 383 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 384 .addCFIIndex(CFIIndex) 385 .setMIFlags(MachineInstr::FrameSetup); 386 break; 387 } 388 } 389 } 390 391 // Emit call frame information for the callee-saved high registers. 392 if (GPRCS2Size > 0) { 393 MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); 394 for (auto &I : CSI) { 395 Register Reg = I.getReg(); 396 int FI = I.getFrameIdx(); 397 switch (Reg) { 398 case ARM::R8: 399 case ARM::R9: 400 case ARM::R10: 401 case ARM::R11: 402 case ARM::R12: { 403 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 404 nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); 405 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 406 .addCFIIndex(CFIIndex) 407 .setMIFlags(MachineInstr::FrameSetup); 408 break; 409 } 410 default: 411 break; 412 } 413 } 414 } 415 416 if (NumBytes) { 417 // Insert it after all the callee-save spills. 418 // 419 // For a large stack frame, we might need a scratch register to store 420 // the size of the frame. We know all callee-save registers are free 421 // at this point in the prologue, so pick one. 422 unsigned ScratchRegister = ARM::NoRegister; 423 for (auto &I : CSI) { 424 Register Reg = I.getReg(); 425 if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) { 426 ScratchRegister = Reg; 427 break; 428 } 429 } 430 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, 431 ScratchRegister, MachineInstr::FrameSetup); 432 if (!HasFP) { 433 CFAOffset += NumBytes; 434 unsigned CFIIndex = MF.addFrameInst( 435 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); 436 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 437 .addCFIIndex(CFIIndex) 438 .setMIFlags(MachineInstr::FrameSetup); 439 } 440 } 441 442 if (STI.isTargetELF() && HasFP) 443 MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() - 444 AFI->getFramePtrSpillOffset()); 445 446 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); 447 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); 448 AFI->setDPRCalleeSavedAreaSize(DPRCSSize); 449 450 if (RegInfo->hasStackRealignment(MF)) { 451 const unsigned NrBitsToZero = Log2(MFI.getMaxAlign()); 452 // Emit the following sequence, using R4 as a temporary, since we cannot use 453 // SP as a source or destination register for the shifts: 454 // mov r4, sp 455 // lsrs r4, r4, #NrBitsToZero 456 // lsls r4, r4, #NrBitsToZero 457 // mov sp, r4 458 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) 459 .addReg(ARM::SP, RegState::Kill) 460 .add(predOps(ARMCC::AL)); 461 462 BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ARM::R4) 463 .addDef(ARM::CPSR) 464 .addReg(ARM::R4, RegState::Kill) 465 .addImm(NrBitsToZero) 466 .add(predOps(ARMCC::AL)); 467 468 BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ARM::R4) 469 .addDef(ARM::CPSR) 470 .addReg(ARM::R4, RegState::Kill) 471 .addImm(NrBitsToZero) 472 .add(predOps(ARMCC::AL)); 473 474 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 475 .addReg(ARM::R4, RegState::Kill) 476 .add(predOps(ARMCC::AL)); 477 478 AFI->setShouldRestoreSPFromFP(true); 479 } 480 481 // If we need a base pointer, set it up here. It's whatever the value 482 // of the stack pointer is at this point. Any variable size objects 483 // will be allocated after this, so we can still use the base pointer 484 // to reference locals. 485 if (RegInfo->hasBasePointer(MF)) 486 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) 487 .addReg(ARM::SP) 488 .add(predOps(ARMCC::AL)); 489 490 // If the frame has variable sized objects then the epilogue must restore 491 // the sp from fp. We can assume there's an FP here since hasFP already 492 // checks for hasVarSizedObjects. 493 if (MFI.hasVarSizedObjects()) 494 AFI->setShouldRestoreSPFromFP(true); 495 496 // In some cases, virtual registers have been introduced, e.g. by uses of 497 // emitThumbRegPlusImmInReg. 498 MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs); 499 } 500 501 void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, 502 MachineBasicBlock &MBB) const { 503 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 504 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); 505 MachineFrameInfo &MFI = MF.getFrameInfo(); 506 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 507 const ThumbRegisterInfo *RegInfo = 508 static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 509 const Thumb1InstrInfo &TII = 510 *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); 511 512 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 513 int NumBytes = (int)MFI.getStackSize(); 514 assert((unsigned)NumBytes >= ArgRegsSaveSize && 515 "ArgRegsSaveSize is included in NumBytes"); 516 Register FramePtr = RegInfo->getFrameRegister(MF); 517 518 if (!AFI->hasStackFrame()) { 519 if (NumBytes - ArgRegsSaveSize != 0) 520 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, 521 NumBytes - ArgRegsSaveSize, ARM::NoRegister, 522 MachineInstr::FrameDestroy); 523 } else { 524 // Unwind MBBI to point to first LDR / VLDRD. 525 if (MBBI != MBB.begin()) { 526 do 527 --MBBI; 528 while (MBBI != MBB.begin() && MBBI->getFlag(MachineInstr::FrameDestroy)); 529 if (!MBBI->getFlag(MachineInstr::FrameDestroy)) 530 ++MBBI; 531 } 532 533 // Move SP to start of FP callee save spill area. 534 NumBytes -= (AFI->getFrameRecordSavedAreaSize() + 535 AFI->getGPRCalleeSavedArea1Size() + 536 AFI->getGPRCalleeSavedArea2Size() + 537 AFI->getDPRCalleeSavedAreaSize() + 538 ArgRegsSaveSize); 539 540 if (AFI->shouldRestoreSPFromFP()) { 541 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; 542 // Reset SP based on frame pointer only if the stack frame extends beyond 543 // frame pointer stack slot, the target is ELF and the function has FP, or 544 // the target uses var sized objects. 545 if (NumBytes) { 546 assert(!MFI.getPristineRegs(MF).test(ARM::R4) && 547 "No scratch register to restore SP from FP!"); 548 emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, 549 TII, *RegInfo, MachineInstr::FrameDestroy); 550 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 551 .addReg(ARM::R4) 552 .add(predOps(ARMCC::AL)) 553 .setMIFlag(MachineInstr::FrameDestroy); 554 } else 555 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 556 .addReg(FramePtr) 557 .add(predOps(ARMCC::AL)) 558 .setMIFlag(MachineInstr::FrameDestroy); 559 } else { 560 // For a large stack frame, we might need a scratch register to store 561 // the size of the frame. We know all callee-save registers are free 562 // at this point in the epilogue, so pick one. 563 unsigned ScratchRegister = ARM::NoRegister; 564 bool HasFP = hasFP(MF); 565 for (auto &I : MFI.getCalleeSavedInfo()) { 566 Register Reg = I.getReg(); 567 if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) { 568 ScratchRegister = Reg; 569 break; 570 } 571 } 572 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && 573 &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { 574 MachineBasicBlock::iterator PMBBI = std::prev(MBBI); 575 if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes)) 576 emitPrologueEpilogueSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes, 577 ScratchRegister, MachineInstr::FrameDestroy); 578 } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes)) 579 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes, 580 ScratchRegister, MachineInstr::FrameDestroy); 581 } 582 } 583 584 if (needPopSpecialFixUp(MF)) { 585 bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true); 586 (void)Done; 587 assert(Done && "Emission of the special fixup failed!?"); 588 } 589 } 590 591 bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 592 if (!needPopSpecialFixUp(*MBB.getParent())) 593 return true; 594 595 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 596 return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false); 597 } 598 599 bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { 600 ARMFunctionInfo *AFI = 601 const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>(); 602 if (AFI->getArgRegsSaveSize()) 603 return true; 604 605 // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up. 606 for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo()) 607 if (CSI.getReg() == ARM::LR) 608 return true; 609 610 return false; 611 } 612 613 static void findTemporariesForLR(const BitVector &GPRsNoLRSP, 614 const BitVector &PopFriendly, 615 const LivePhysRegs &UsedRegs, unsigned &PopReg, 616 unsigned &TmpReg, MachineRegisterInfo &MRI) { 617 PopReg = TmpReg = 0; 618 for (auto Reg : GPRsNoLRSP.set_bits()) { 619 if (UsedRegs.available(MRI, Reg)) { 620 // Remember the first pop-friendly register and exit. 621 if (PopFriendly.test(Reg)) { 622 PopReg = Reg; 623 TmpReg = 0; 624 break; 625 } 626 // Otherwise, remember that the register will be available to 627 // save a pop-friendly register. 628 TmpReg = Reg; 629 } 630 } 631 } 632 633 bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, 634 bool DoIt) const { 635 MachineFunction &MF = *MBB.getParent(); 636 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 637 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 638 const TargetInstrInfo &TII = *STI.getInstrInfo(); 639 const ThumbRegisterInfo *RegInfo = 640 static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 641 642 // If MBBI is a return instruction, or is a tPOP followed by a return 643 // instruction in the successor BB, we may be able to directly restore 644 // LR in the PC. 645 // This is only possible with v5T ops (v4T can't change the Thumb bit via 646 // a POP PC instruction), and only if we do not need to emit any SP update. 647 // Otherwise, we need a temporary register to pop the value 648 // and copy that value into LR. 649 auto MBBI = MBB.getFirstTerminator(); 650 bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize; 651 if (CanRestoreDirectly) { 652 if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB) 653 CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET || 654 MBBI->getOpcode() == ARM::tPOP_RET); 655 else { 656 auto MBBI_prev = MBBI; 657 MBBI_prev--; 658 assert(MBBI_prev->getOpcode() == ARM::tPOP); 659 assert(MBB.succ_size() == 1); 660 if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET) 661 MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET. 662 else 663 CanRestoreDirectly = false; 664 } 665 } 666 667 if (CanRestoreDirectly) { 668 if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET) 669 return true; 670 MachineInstrBuilder MIB = 671 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)) 672 .add(predOps(ARMCC::AL)) 673 .setMIFlag(MachineInstr::FrameDestroy); 674 // Copy implicit ops and popped registers, if any. 675 for (auto MO: MBBI->operands()) 676 if (MO.isReg() && (MO.isImplicit() || MO.isDef())) 677 MIB.add(MO); 678 MIB.addReg(ARM::PC, RegState::Define); 679 // Erase the old instruction (tBX_RET or tPOP). 680 MBB.erase(MBBI); 681 return true; 682 } 683 684 // Look for a temporary register to use. 685 // First, compute the liveness information. 686 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 687 LivePhysRegs UsedRegs(TRI); 688 UsedRegs.addLiveOuts(MBB); 689 // The semantic of pristines changed recently and now, 690 // the callee-saved registers that are touched in the function 691 // are not part of the pristines set anymore. 692 // Add those callee-saved now. 693 const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); 694 for (unsigned i = 0; CSRegs[i]; ++i) 695 UsedRegs.addReg(CSRegs[i]); 696 697 DebugLoc dl = DebugLoc(); 698 if (MBBI != MBB.end()) { 699 dl = MBBI->getDebugLoc(); 700 auto InstUpToMBBI = MBB.end(); 701 while (InstUpToMBBI != MBBI) 702 // The pre-decrement is on purpose here. 703 // We want to have the liveness right before MBBI. 704 UsedRegs.stepBackward(*--InstUpToMBBI); 705 } 706 707 // Look for a register that can be directly use in the POP. 708 unsigned PopReg = 0; 709 // And some temporary register, just in case. 710 unsigned TemporaryReg = 0; 711 BitVector PopFriendly = 712 TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID)); 713 // R7 may be used as a frame pointer, hence marked as not generally 714 // allocatable, however there's no reason to not use it as a temporary for 715 // restoring LR. 716 if (STI.getFramePointerReg() == ARM::R7) 717 PopFriendly.set(ARM::R7); 718 719 assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); 720 // Rebuild the GPRs from the high registers because they are removed 721 // form the GPR reg class for thumb1. 722 BitVector GPRsNoLRSP = 723 TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID)); 724 GPRsNoLRSP |= PopFriendly; 725 GPRsNoLRSP.reset(ARM::LR); 726 GPRsNoLRSP.reset(ARM::SP); 727 GPRsNoLRSP.reset(ARM::PC); 728 findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg, 729 MF.getRegInfo()); 730 731 // If we couldn't find a pop-friendly register, try restoring LR before 732 // popping the other callee-saved registers, so we could use one of them as a 733 // temporary. 734 bool UseLDRSP = false; 735 if (!PopReg && MBBI != MBB.begin()) { 736 auto PrevMBBI = MBBI; 737 PrevMBBI--; 738 if (PrevMBBI->getOpcode() == ARM::tPOP) { 739 UsedRegs.stepBackward(*PrevMBBI); 740 findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, 741 TemporaryReg, MF.getRegInfo()); 742 if (PopReg) { 743 MBBI = PrevMBBI; 744 UseLDRSP = true; 745 } 746 } 747 } 748 749 if (!DoIt && !PopReg && !TemporaryReg) 750 return false; 751 752 assert((PopReg || TemporaryReg) && "Cannot get LR"); 753 754 if (UseLDRSP) { 755 assert(PopReg && "Do not know how to get LR"); 756 // Load the LR via LDR tmp, [SP, #off] 757 BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRspi)) 758 .addReg(PopReg, RegState::Define) 759 .addReg(ARM::SP) 760 .addImm(MBBI->getNumExplicitOperands() - 2) 761 .add(predOps(ARMCC::AL)) 762 .setMIFlag(MachineInstr::FrameDestroy); 763 // Move from the temporary register to the LR. 764 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 765 .addReg(ARM::LR, RegState::Define) 766 .addReg(PopReg, RegState::Kill) 767 .add(predOps(ARMCC::AL)) 768 .setMIFlag(MachineInstr::FrameDestroy); 769 // Advance past the pop instruction. 770 MBBI++; 771 // Increment the SP. 772 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, 773 ArgRegsSaveSize + 4, ARM::NoRegister, 774 MachineInstr::FrameDestroy); 775 return true; 776 } 777 778 if (TemporaryReg) { 779 assert(!PopReg && "Unnecessary MOV is about to be inserted"); 780 PopReg = PopFriendly.find_first(); 781 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 782 .addReg(TemporaryReg, RegState::Define) 783 .addReg(PopReg, RegState::Kill) 784 .add(predOps(ARMCC::AL)) 785 .setMIFlag(MachineInstr::FrameDestroy); 786 } 787 788 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { 789 // We couldn't use the direct restoration above, so 790 // perform the opposite conversion: tPOP_RET to tPOP. 791 MachineInstrBuilder MIB = 792 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)) 793 .add(predOps(ARMCC::AL)) 794 .setMIFlag(MachineInstr::FrameDestroy); 795 bool Popped = false; 796 for (auto MO: MBBI->operands()) 797 if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && 798 MO.getReg() != ARM::PC) { 799 MIB.add(MO); 800 if (!MO.isImplicit()) 801 Popped = true; 802 } 803 // Is there anything left to pop? 804 if (!Popped) 805 MBB.erase(MIB.getInstr()); 806 // Erase the old instruction. 807 MBB.erase(MBBI); 808 MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)) 809 .add(predOps(ARMCC::AL)) 810 .setMIFlag(MachineInstr::FrameDestroy); 811 } 812 813 assert(PopReg && "Do not know how to get LR"); 814 BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)) 815 .add(predOps(ARMCC::AL)) 816 .addReg(PopReg, RegState::Define) 817 .setMIFlag(MachineInstr::FrameDestroy); 818 819 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize, 820 ARM::NoRegister, MachineInstr::FrameDestroy); 821 822 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 823 .addReg(ARM::LR, RegState::Define) 824 .addReg(PopReg, RegState::Kill) 825 .add(predOps(ARMCC::AL)) 826 .setMIFlag(MachineInstr::FrameDestroy); 827 828 if (TemporaryReg) 829 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 830 .addReg(PopReg, RegState::Define) 831 .addReg(TemporaryReg, RegState::Kill) 832 .add(predOps(ARMCC::AL)) 833 .setMIFlag(MachineInstr::FrameDestroy); 834 835 return true; 836 } 837 838 static const SmallVector<Register> OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6, 839 ARM::R7, ARM::LR}; 840 static const SmallVector<Register> OrderedHighRegs = {ARM::R8, ARM::R9, 841 ARM::R10, ARM::R11}; 842 static const SmallVector<Register> OrderedCopyRegs = { 843 ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, 844 ARM::R5, ARM::R6, ARM::R7, ARM::LR}; 845 846 static void splitLowAndHighRegs(const std::set<Register> &Regs, 847 std::set<Register> &LowRegs, 848 std::set<Register> &HighRegs) { 849 for (Register Reg : Regs) { 850 if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { 851 LowRegs.insert(Reg); 852 } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { 853 HighRegs.insert(Reg); 854 } else { 855 llvm_unreachable("callee-saved register of unexpected class"); 856 } 857 } 858 } 859 860 template <typename It> 861 It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt, 862 const std::set<Register> &RegSet) { 863 return std::find_if(OrderedStartIt, OrderedEndIt, 864 [&](Register Reg) { return RegSet.count(Reg); }); 865 } 866 867 static void pushRegsToStack(MachineBasicBlock &MBB, 868 MachineBasicBlock::iterator MI, 869 const TargetInstrInfo &TII, 870 const std::set<Register> &RegsToSave, 871 const std::set<Register> &CopyRegs) { 872 MachineFunction &MF = *MBB.getParent(); 873 const MachineRegisterInfo &MRI = MF.getRegInfo(); 874 DebugLoc DL; 875 876 std::set<Register> LowRegs, HighRegs; 877 splitLowAndHighRegs(RegsToSave, LowRegs, HighRegs); 878 879 // Push low regs first 880 if (!LowRegs.empty()) { 881 MachineInstrBuilder MIB = 882 BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); 883 for (unsigned Reg : OrderedLowRegs) { 884 if (LowRegs.count(Reg)) { 885 bool isKill = !MRI.isLiveIn(Reg); 886 if (isKill && !MRI.isReserved(Reg)) 887 MBB.addLiveIn(Reg); 888 889 MIB.addReg(Reg, getKillRegState(isKill)); 890 } 891 } 892 MIB.setMIFlags(MachineInstr::FrameSetup); 893 } 894 895 // Now push the high registers 896 // There are no store instructions that can access high registers directly, 897 // so we have to move them to low registers, and push them. 898 // This might take multiple pushes, as it is possible for there to 899 // be fewer low registers available than high registers which need saving. 900 901 // Find the first register to save. 902 // Registers must be processed in reverse order so that in case we need to use 903 // multiple PUSH instructions, the order of the registers on the stack still 904 // matches the unwind info. They need to be swicthed back to ascending order 905 // before adding to the PUSH instruction. 906 auto HiRegToSave = getNextOrderedReg(OrderedHighRegs.rbegin(), 907 OrderedHighRegs.rend(), 908 HighRegs); 909 910 while (HiRegToSave != OrderedHighRegs.rend()) { 911 // Find the first low register to use. 912 auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(), 913 OrderedCopyRegs.rend(), 914 CopyRegs); 915 916 // Create the PUSH, but don't insert it yet (the MOVs need to come first). 917 MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)) 918 .add(predOps(ARMCC::AL)) 919 .setMIFlags(MachineInstr::FrameSetup); 920 921 SmallVector<unsigned, 4> RegsToPush; 922 while (HiRegToSave != OrderedHighRegs.rend() && 923 CopyRegIt != OrderedCopyRegs.rend()) { 924 if (HighRegs.count(*HiRegToSave)) { 925 bool isKill = !MRI.isLiveIn(*HiRegToSave); 926 if (isKill && !MRI.isReserved(*HiRegToSave)) 927 MBB.addLiveIn(*HiRegToSave); 928 929 // Emit a MOV from the high reg to the low reg. 930 BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 931 .addReg(*CopyRegIt, RegState::Define) 932 .addReg(*HiRegToSave, getKillRegState(isKill)) 933 .add(predOps(ARMCC::AL)) 934 .setMIFlags(MachineInstr::FrameSetup); 935 936 // Record the register that must be added to the PUSH. 937 RegsToPush.push_back(*CopyRegIt); 938 939 CopyRegIt = getNextOrderedReg(std::next(CopyRegIt), 940 OrderedCopyRegs.rend(), 941 CopyRegs); 942 HiRegToSave = getNextOrderedReg(std::next(HiRegToSave), 943 OrderedHighRegs.rend(), 944 HighRegs); 945 } 946 } 947 948 // Add the low registers to the PUSH, in ascending order. 949 for (unsigned Reg : llvm::reverse(RegsToPush)) 950 PushMIB.addReg(Reg, RegState::Kill); 951 952 // Insert the PUSH instruction after the MOVs. 953 MBB.insert(MI, PushMIB); 954 } 955 } 956 957 static void popRegsFromStack(MachineBasicBlock &MBB, 958 MachineBasicBlock::iterator &MI, 959 const TargetInstrInfo &TII, 960 const std::set<Register> &RegsToRestore, 961 const std::set<Register> &AvailableCopyRegs, 962 bool IsVarArg, bool HasV5Ops) { 963 if (RegsToRestore.empty()) 964 return; 965 966 MachineFunction &MF = *MBB.getParent(); 967 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 968 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); 969 970 std::set<Register> LowRegs, HighRegs; 971 splitLowAndHighRegs(RegsToRestore, LowRegs, HighRegs); 972 973 // Pop the high registers first 974 // There are no store instructions that can access high registers directly, 975 // so we have to pop into low registers and them move to the high registers. 976 // This might take multiple pops, as it is possible for there to 977 // be fewer low registers available than high registers which need restoring. 978 979 // Find the first register to restore. 980 auto HiRegToRestore = getNextOrderedReg(OrderedHighRegs.begin(), 981 OrderedHighRegs.end(), 982 HighRegs); 983 984 std::set<Register> CopyRegs = AvailableCopyRegs; 985 Register LowScratchReg; 986 if (!HighRegs.empty() && CopyRegs.empty()) { 987 // No copy regs are available to pop high regs. Let's make use of a return 988 // register and the scratch register (IP/R12) to copy things around. 989 LowScratchReg = ARM::R0; 990 BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 991 .addReg(ARM::R12, RegState::Define) 992 .addReg(LowScratchReg, RegState::Kill) 993 .add(predOps(ARMCC::AL)) 994 .setMIFlag(MachineInstr::FrameDestroy); 995 CopyRegs.insert(LowScratchReg); 996 } 997 998 while (HiRegToRestore != OrderedHighRegs.end()) { 999 assert(!CopyRegs.empty()); 1000 // Find the first low register to use. 1001 auto CopyReg = getNextOrderedReg(OrderedCopyRegs.begin(), 1002 OrderedCopyRegs.end(), 1003 CopyRegs); 1004 1005 // Create the POP instruction. 1006 MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)) 1007 .add(predOps(ARMCC::AL)) 1008 .setMIFlag(MachineInstr::FrameDestroy); 1009 1010 while (HiRegToRestore != OrderedHighRegs.end() && 1011 CopyReg != OrderedCopyRegs.end()) { 1012 // Add the low register to the POP. 1013 PopMIB.addReg(*CopyReg, RegState::Define); 1014 1015 // Create the MOV from low to high register. 1016 BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 1017 .addReg(*HiRegToRestore, RegState::Define) 1018 .addReg(*CopyReg, RegState::Kill) 1019 .add(predOps(ARMCC::AL)) 1020 .setMIFlag(MachineInstr::FrameDestroy); 1021 1022 CopyReg = getNextOrderedReg(std::next(CopyReg), 1023 OrderedCopyRegs.end(), 1024 CopyRegs); 1025 HiRegToRestore = getNextOrderedReg(std::next(HiRegToRestore), 1026 OrderedHighRegs.end(), 1027 HighRegs); 1028 } 1029 } 1030 1031 // Restore low register used as scratch if necessary 1032 if (LowScratchReg.isValid()) { 1033 BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 1034 .addReg(LowScratchReg, RegState::Define) 1035 .addReg(ARM::R12, RegState::Kill) 1036 .add(predOps(ARMCC::AL)) 1037 .setMIFlag(MachineInstr::FrameDestroy); 1038 } 1039 1040 // Now pop the low registers 1041 if (!LowRegs.empty()) { 1042 MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)) 1043 .add(predOps(ARMCC::AL)) 1044 .setMIFlag(MachineInstr::FrameDestroy); 1045 1046 bool NeedsPop = false; 1047 for (Register Reg : OrderedLowRegs) { 1048 if (!LowRegs.count(Reg)) 1049 continue; 1050 1051 if (Reg == ARM::LR) { 1052 if (!MBB.succ_empty() || 1053 MI->getOpcode() == ARM::TCRETURNdi || 1054 MI->getOpcode() == ARM::TCRETURNri) 1055 // LR may only be popped into PC, as part of return sequence. 1056 // If this isn't the return sequence, we'll need emitPopSpecialFixUp 1057 // to restore LR the hard way. 1058 // FIXME: if we don't pass any stack arguments it would be actually 1059 // advantageous *and* correct to do the conversion to an ordinary call 1060 // instruction here. 1061 continue; 1062 // Special epilogue for vararg functions. See emitEpilogue 1063 if (IsVarArg) 1064 continue; 1065 // ARMv4T requires BX, see emitEpilogue 1066 if (!HasV5Ops) 1067 continue; 1068 1069 // CMSE entry functions must return via BXNS, see emitEpilogue. 1070 if (AFI->isCmseNSEntryFunction()) 1071 continue; 1072 1073 // Pop LR into PC. 1074 Reg = ARM::PC; 1075 (*MIB).setDesc(TII.get(ARM::tPOP_RET)); 1076 if (MI != MBB.end()) 1077 MIB.copyImplicitOps(*MI); 1078 MI = MBB.erase(MI); 1079 } 1080 MIB.addReg(Reg, getDefRegState(true)); 1081 NeedsPop = true; 1082 } 1083 1084 // It's illegal to emit pop instruction without operands. 1085 if (NeedsPop) 1086 MBB.insert(MI, &*MIB); 1087 else 1088 MF.deleteMachineInstr(MIB); 1089 } 1090 } 1091 1092 bool Thumb1FrameLowering::spillCalleeSavedRegisters( 1093 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1094 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 1095 if (CSI.empty()) 1096 return false; 1097 1098 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1099 MachineFunction &MF = *MBB.getParent(); 1100 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( 1101 MF.getSubtarget().getRegisterInfo()); 1102 Register FPReg = RegInfo->getFrameRegister(MF); 1103 1104 // In case FP is a high reg, we need a separate push sequence to generate 1105 // a correct Frame Record 1106 bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); 1107 1108 std::set<Register> FrameRecord; 1109 std::set<Register> SpilledGPRs; 1110 for (const CalleeSavedInfo &I : CSI) { 1111 Register Reg = I.getReg(); 1112 if (NeedsFrameRecordPush && (Reg == FPReg || Reg == ARM::LR)) 1113 FrameRecord.insert(Reg); 1114 else 1115 SpilledGPRs.insert(Reg); 1116 } 1117 1118 pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR}); 1119 1120 // Determine intermediate registers which can be used for pushing high regs: 1121 // - Spilled low regs 1122 // - Unused argument registers 1123 std::set<Register> CopyRegs; 1124 for (Register Reg : SpilledGPRs) 1125 if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && 1126 !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg)) 1127 CopyRegs.insert(Reg); 1128 for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) 1129 if (!MF.getRegInfo().isLiveIn(ArgReg)) 1130 CopyRegs.insert(ArgReg); 1131 1132 pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs); 1133 1134 return true; 1135 } 1136 1137 bool Thumb1FrameLowering::restoreCalleeSavedRegisters( 1138 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1139 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 1140 if (CSI.empty()) 1141 return false; 1142 1143 MachineFunction &MF = *MBB.getParent(); 1144 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1145 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1146 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( 1147 MF.getSubtarget().getRegisterInfo()); 1148 bool IsVarArg = AFI->getArgRegsSaveSize() > 0; 1149 Register FPReg = RegInfo->getFrameRegister(MF); 1150 1151 // In case FP is a high reg, we need a separate pop sequence to generate 1152 // a correct Frame Record 1153 bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); 1154 1155 std::set<Register> FrameRecord; 1156 std::set<Register> SpilledGPRs; 1157 for (CalleeSavedInfo &I : CSI) { 1158 Register Reg = I.getReg(); 1159 if (NeedsFrameRecordPop && (Reg == FPReg || Reg == ARM::LR)) 1160 FrameRecord.insert(Reg); 1161 else 1162 SpilledGPRs.insert(Reg); 1163 1164 if (Reg == ARM::LR) 1165 I.setRestored(false); 1166 } 1167 1168 // Determine intermidiate registers which can be used for popping high regs: 1169 // - Spilled low regs 1170 // - Unused return registers 1171 std::set<Register> CopyRegs; 1172 std::set<Register> UnusedReturnRegs; 1173 for (Register Reg : SpilledGPRs) 1174 if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg)) 1175 CopyRegs.insert(Reg); 1176 auto Terminator = MBB.getFirstTerminator(); 1177 if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { 1178 UnusedReturnRegs.insert(ARM::R0); 1179 UnusedReturnRegs.insert(ARM::R1); 1180 UnusedReturnRegs.insert(ARM::R2); 1181 UnusedReturnRegs.insert(ARM::R3); 1182 for (auto Op : Terminator->implicit_operands()) { 1183 if (Op.isReg()) 1184 UnusedReturnRegs.erase(Op.getReg()); 1185 } 1186 } 1187 CopyRegs.insert(UnusedReturnRegs.begin(), UnusedReturnRegs.end()); 1188 1189 // First pop regular spilled regs. 1190 popRegsFromStack(MBB, MI, TII, SpilledGPRs, CopyRegs, IsVarArg, 1191 STI.hasV5TOps()); 1192 1193 // LR may only be popped into pc, as part of a return sequence. 1194 // Check that no other pop instructions are inserted after that. 1195 assert((!SpilledGPRs.count(ARM::LR) || FrameRecord.empty()) && 1196 "Can't insert pop after return sequence"); 1197 1198 // Now pop Frame Record regs. 1199 // Only unused return registers can be used as copy regs at this point. 1200 popRegsFromStack(MBB, MI, TII, FrameRecord, UnusedReturnRegs, IsVarArg, 1201 STI.hasV5TOps()); 1202 1203 return true; 1204 } 1205