1 //===- Thumb1FrameLowering.cpp - Thumb1 Frame Information -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the Thumb1 implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "Thumb1FrameLowering.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMBaseRegisterInfo.h" 16 #include "ARMMachineFunctionInfo.h" 17 #include "ARMSubtarget.h" 18 #include "Thumb1InstrInfo.h" 19 #include "ThumbRegisterInfo.h" 20 #include "Utils/ARMBaseInfo.h" 21 #include "llvm/ADT/BitVector.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/CodeGen/LivePhysRegs.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFrameInfo.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineModuleInfo.h" 31 #include "llvm/CodeGen/MachineOperand.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/CodeGen/TargetInstrInfo.h" 34 #include "llvm/CodeGen/TargetOpcodes.h" 35 #include "llvm/CodeGen/TargetSubtargetInfo.h" 36 #include "llvm/IR/DebugLoc.h" 37 #include "llvm/MC/MCContext.h" 38 #include "llvm/MC/MCDwarf.h" 39 #include "llvm/MC/MCRegisterInfo.h" 40 #include "llvm/Support/Compiler.h" 41 #include "llvm/Support/ErrorHandling.h" 42 #include "llvm/Support/MathExtras.h" 43 #include <bitset> 44 #include <cassert> 45 #include <iterator> 46 #include <vector> 47 48 using namespace llvm; 49 50 Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti) 51 : ARMFrameLowering(sti) {} 52 53 bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ 54 const MachineFrameInfo &MFI = MF.getFrameInfo(); 55 unsigned CFSize = MFI.getMaxCallFrameSize(); 56 // It's not always a good idea to include the call frame as part of the 57 // stack frame. ARM (especially Thumb) has small immediate offset to 58 // address the stack frame. So a large call frame can cause poor codegen 59 // and may even makes it impossible to scavenge a register. 60 if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 61 return false; 62 63 return !MFI.hasVarSizedObjects(); 64 } 65 66 static void 67 emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB, 68 MachineBasicBlock::iterator &MBBI, 69 const TargetInstrInfo &TII, const DebugLoc &dl, 70 const ThumbRegisterInfo &MRI, int NumBytes, 71 unsigned ScratchReg, unsigned MIFlags) { 72 // If it would take more than three instructions to adjust the stack pointer 73 // using tADDspi/tSUBspi, load an immediate instead. 74 if (std::abs(NumBytes) > 508 * 3) { 75 // We use a different codepath here from the normal 76 // emitThumbRegPlusImmediate so we don't have to deal with register 77 // scavenging. (Scavenging could try to use the emergency spill slot 78 // before we've actually finished setting up the stack.) 79 if (ScratchReg == ARM::NoRegister) 80 report_fatal_error("Failed to emit Thumb1 stack adjustment"); 81 MachineFunction &MF = *MBB.getParent(); 82 const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>(); 83 if (ST.genExecuteOnly()) { 84 unsigned XOInstr = ST.useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm; 85 BuildMI(MBB, MBBI, dl, TII.get(XOInstr), ScratchReg) 86 .addImm(NumBytes).setMIFlags(MIFlags); 87 } else { 88 MRI.emitLoadConstPool(MBB, MBBI, dl, ScratchReg, 0, NumBytes, ARMCC::AL, 89 0, MIFlags); 90 } 91 BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDhirr), ARM::SP) 92 .addReg(ARM::SP) 93 .addReg(ScratchReg, RegState::Kill) 94 .add(predOps(ARMCC::AL)) 95 .setMIFlags(MIFlags); 96 return; 97 } 98 // FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate 99 // won't change. 100 emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, 101 MRI, MIFlags); 102 103 } 104 105 static void emitCallSPUpdate(MachineBasicBlock &MBB, 106 MachineBasicBlock::iterator &MBBI, 107 const TargetInstrInfo &TII, const DebugLoc &dl, 108 const ThumbRegisterInfo &MRI, int NumBytes, 109 unsigned MIFlags = MachineInstr::NoFlags) { 110 emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, 111 MRI, MIFlags); 112 } 113 114 115 MachineBasicBlock::iterator Thumb1FrameLowering:: 116 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 117 MachineBasicBlock::iterator I) const { 118 const Thumb1InstrInfo &TII = 119 *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); 120 const ThumbRegisterInfo *RegInfo = 121 static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 122 if (!hasReservedCallFrame(MF)) { 123 // If we have alloca, convert as follows: 124 // ADJCALLSTACKDOWN -> sub, sp, sp, amount 125 // ADJCALLSTACKUP -> add, sp, sp, amount 126 MachineInstr &Old = *I; 127 DebugLoc dl = Old.getDebugLoc(); 128 unsigned Amount = TII.getFrameSize(Old); 129 if (Amount != 0) { 130 // We need to keep the stack aligned properly. To do this, we round the 131 // amount of space needed for the outgoing arguments up to the next 132 // alignment boundary. 133 Amount = alignTo(Amount, getStackAlign()); 134 135 // Replace the pseudo instruction with a new instruction... 136 unsigned Opc = Old.getOpcode(); 137 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { 138 emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount); 139 } else { 140 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); 141 emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, Amount); 142 } 143 } 144 } 145 return MBB.erase(I); 146 } 147 148 void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, 149 MachineBasicBlock &MBB) const { 150 MachineBasicBlock::iterator MBBI = MBB.begin(); 151 MachineFrameInfo &MFI = MF.getFrameInfo(); 152 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 153 MachineModuleInfo &MMI = MF.getMMI(); 154 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 155 const ThumbRegisterInfo *RegInfo = 156 static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 157 const Thumb1InstrInfo &TII = 158 *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); 159 160 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 161 unsigned NumBytes = MFI.getStackSize(); 162 assert(NumBytes >= ArgRegsSaveSize && 163 "ArgRegsSaveSize is included in NumBytes"); 164 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 165 166 // Debug location must be unknown since the first debug location is used 167 // to determine the end of the prologue. 168 DebugLoc dl; 169 170 Register FramePtr = RegInfo->getFrameRegister(MF); 171 Register BasePtr = RegInfo->getBaseRegister(); 172 int CFAOffset = 0; 173 174 // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. 175 NumBytes = (NumBytes + 3) & ~3; 176 MFI.setStackSize(NumBytes); 177 178 // Determine the sizes of each callee-save spill areas and record which frame 179 // belongs to which callee-save spill areas. 180 unsigned FRSize = 0, GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; 181 int FramePtrSpillFI = 0; 182 183 if (ArgRegsSaveSize) { 184 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize, 185 ARM::NoRegister, MachineInstr::FrameSetup); 186 CFAOffset += ArgRegsSaveSize; 187 unsigned CFIIndex = 188 MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); 189 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 190 .addCFIIndex(CFIIndex) 191 .setMIFlags(MachineInstr::FrameSetup); 192 } 193 194 if (!AFI->hasStackFrame()) { 195 if (NumBytes - ArgRegsSaveSize != 0) { 196 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, 197 -(NumBytes - ArgRegsSaveSize), 198 ARM::NoRegister, MachineInstr::FrameSetup); 199 CFAOffset += NumBytes - ArgRegsSaveSize; 200 unsigned CFIIndex = MF.addFrameInst( 201 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); 202 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 203 .addCFIIndex(CFIIndex) 204 .setMIFlags(MachineInstr::FrameSetup); 205 } 206 return; 207 } 208 209 bool HasFrameRecordArea = hasFP(MF) && ARM::hGPRRegClass.contains(FramePtr); 210 211 for (const CalleeSavedInfo &I : CSI) { 212 Register Reg = I.getReg(); 213 int FI = I.getFrameIdx(); 214 if (Reg == FramePtr) 215 FramePtrSpillFI = FI; 216 switch (Reg) { 217 case ARM::R11: 218 if (HasFrameRecordArea) { 219 FRSize += 4; 220 break; 221 } 222 [[fallthrough]]; 223 case ARM::R8: 224 case ARM::R9: 225 case ARM::R10: 226 if (STI.splitFramePushPop(MF)) { 227 GPRCS2Size += 4; 228 break; 229 } 230 [[fallthrough]]; 231 case ARM::LR: 232 if (HasFrameRecordArea) { 233 FRSize += 4; 234 break; 235 } 236 [[fallthrough]]; 237 case ARM::R4: 238 case ARM::R5: 239 case ARM::R6: 240 case ARM::R7: 241 GPRCS1Size += 4; 242 break; 243 default: 244 DPRCSSize += 8; 245 } 246 } 247 248 MachineBasicBlock::iterator FRPush, GPRCS1Push, GPRCS2Push; 249 if (HasFrameRecordArea) { 250 // Skip Frame Record setup: 251 // push {lr} 252 // mov lr, r11 253 // push {lr} 254 std::advance(MBBI, 2); 255 FRPush = MBBI++; 256 } 257 258 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { 259 GPRCS1Push = MBBI; 260 ++MBBI; 261 } 262 263 // Find last push instruction for GPRCS2 - spilling of high registers 264 // (r8-r11) could consist of multiple tPUSH and tMOVr instructions. 265 while (true) { 266 MachineBasicBlock::iterator OldMBBI = MBBI; 267 // Skip a run of tMOVr instructions 268 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr && 269 MBBI->getFlag(MachineInstr::FrameSetup)) 270 MBBI++; 271 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH && 272 MBBI->getFlag(MachineInstr::FrameSetup)) { 273 GPRCS2Push = MBBI; 274 MBBI++; 275 } else { 276 // We have reached an instruction which is not a push, so the previous 277 // run of tMOVr instructions (which may have been empty) was not part of 278 // the prologue. Reset MBBI back to the last PUSH of the prologue. 279 MBBI = OldMBBI; 280 break; 281 } 282 } 283 284 // Determine starting offsets of spill areas. 285 unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - 286 (FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize); 287 unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; 288 unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; 289 bool HasFP = hasFP(MF); 290 if (HasFP) 291 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + 292 NumBytes); 293 if (HasFrameRecordArea) 294 AFI->setFrameRecordSavedAreaSize(FRSize); 295 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); 296 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); 297 AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); 298 NumBytes = DPRCSOffset; 299 300 int FramePtrOffsetInBlock = 0; 301 unsigned adjustedGPRCS1Size = GPRCS1Size; 302 if (GPRCS1Size > 0 && GPRCS2Size == 0 && 303 tryFoldSPUpdateIntoPushPop(STI, MF, &*(GPRCS1Push), NumBytes)) { 304 FramePtrOffsetInBlock = NumBytes; 305 adjustedGPRCS1Size += NumBytes; 306 NumBytes = 0; 307 } 308 CFAOffset += adjustedGPRCS1Size; 309 310 // Adjust FP so it point to the stack slot that contains the previous FP. 311 if (HasFP) { 312 MachineBasicBlock::iterator AfterPush = 313 HasFrameRecordArea ? std::next(FRPush) : std::next(GPRCS1Push); 314 if (HasFrameRecordArea) { 315 // We have just finished pushing the previous FP into the stack, 316 // so simply capture the SP value as the new Frame Pointer. 317 BuildMI(MBB, AfterPush, dl, TII.get(ARM::tMOVr), FramePtr) 318 .addReg(ARM::SP) 319 .setMIFlags(MachineInstr::FrameSetup) 320 .add(predOps(ARMCC::AL)); 321 } else { 322 FramePtrOffsetInBlock += 323 MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; 324 BuildMI(MBB, AfterPush, dl, TII.get(ARM::tADDrSPi), FramePtr) 325 .addReg(ARM::SP) 326 .addImm(FramePtrOffsetInBlock / 4) 327 .setMIFlags(MachineInstr::FrameSetup) 328 .add(predOps(ARMCC::AL)); 329 } 330 331 if(FramePtrOffsetInBlock) { 332 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( 333 nullptr, MRI->getDwarfRegNum(FramePtr, true), (CFAOffset - FramePtrOffsetInBlock))); 334 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 335 .addCFIIndex(CFIIndex) 336 .setMIFlags(MachineInstr::FrameSetup); 337 } else { 338 unsigned CFIIndex = 339 MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( 340 nullptr, MRI->getDwarfRegNum(FramePtr, true))); 341 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 342 .addCFIIndex(CFIIndex) 343 .setMIFlags(MachineInstr::FrameSetup); 344 } 345 if (NumBytes > 508) 346 // If offset is > 508 then sp cannot be adjusted in a single instruction, 347 // try restoring from fp instead. 348 AFI->setShouldRestoreSPFromFP(true); 349 } 350 351 // Emit call frame information for the callee-saved low registers. 352 if (GPRCS1Size > 0) { 353 MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); 354 if (adjustedGPRCS1Size) { 355 unsigned CFIIndex = 356 MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); 357 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 358 .addCFIIndex(CFIIndex) 359 .setMIFlags(MachineInstr::FrameSetup); 360 } 361 for (const CalleeSavedInfo &I : CSI) { 362 Register Reg = I.getReg(); 363 int FI = I.getFrameIdx(); 364 switch (Reg) { 365 case ARM::R8: 366 case ARM::R9: 367 case ARM::R10: 368 case ARM::R11: 369 case ARM::R12: 370 if (STI.splitFramePushPop(MF)) 371 break; 372 [[fallthrough]]; 373 case ARM::R0: 374 case ARM::R1: 375 case ARM::R2: 376 case ARM::R3: 377 case ARM::R4: 378 case ARM::R5: 379 case ARM::R6: 380 case ARM::R7: 381 case ARM::LR: 382 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 383 nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); 384 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 385 .addCFIIndex(CFIIndex) 386 .setMIFlags(MachineInstr::FrameSetup); 387 break; 388 } 389 } 390 } 391 392 // Emit call frame information for the callee-saved high registers. 393 if (GPRCS2Size > 0) { 394 MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); 395 for (auto &I : CSI) { 396 Register Reg = I.getReg(); 397 int FI = I.getFrameIdx(); 398 switch (Reg) { 399 case ARM::R8: 400 case ARM::R9: 401 case ARM::R10: 402 case ARM::R11: 403 case ARM::R12: { 404 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 405 nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); 406 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 407 .addCFIIndex(CFIIndex) 408 .setMIFlags(MachineInstr::FrameSetup); 409 break; 410 } 411 default: 412 break; 413 } 414 } 415 } 416 417 if (NumBytes) { 418 // Insert it after all the callee-save spills. 419 // 420 // For a large stack frame, we might need a scratch register to store 421 // the size of the frame. We know all callee-save registers are free 422 // at this point in the prologue, so pick one. 423 unsigned ScratchRegister = ARM::NoRegister; 424 for (auto &I : CSI) { 425 Register Reg = I.getReg(); 426 if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) { 427 ScratchRegister = Reg; 428 break; 429 } 430 } 431 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, 432 ScratchRegister, MachineInstr::FrameSetup); 433 if (!HasFP) { 434 CFAOffset += NumBytes; 435 unsigned CFIIndex = MF.addFrameInst( 436 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); 437 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 438 .addCFIIndex(CFIIndex) 439 .setMIFlags(MachineInstr::FrameSetup); 440 } 441 } 442 443 if (STI.isTargetELF() && HasFP) 444 MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() - 445 AFI->getFramePtrSpillOffset()); 446 447 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); 448 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); 449 AFI->setDPRCalleeSavedAreaSize(DPRCSSize); 450 451 if (RegInfo->hasStackRealignment(MF)) { 452 const unsigned NrBitsToZero = Log2(MFI.getMaxAlign()); 453 // Emit the following sequence, using R4 as a temporary, since we cannot use 454 // SP as a source or destination register for the shifts: 455 // mov r4, sp 456 // lsrs r4, r4, #NrBitsToZero 457 // lsls r4, r4, #NrBitsToZero 458 // mov sp, r4 459 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) 460 .addReg(ARM::SP, RegState::Kill) 461 .add(predOps(ARMCC::AL)); 462 463 BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ARM::R4) 464 .addDef(ARM::CPSR) 465 .addReg(ARM::R4, RegState::Kill) 466 .addImm(NrBitsToZero) 467 .add(predOps(ARMCC::AL)); 468 469 BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ARM::R4) 470 .addDef(ARM::CPSR) 471 .addReg(ARM::R4, RegState::Kill) 472 .addImm(NrBitsToZero) 473 .add(predOps(ARMCC::AL)); 474 475 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 476 .addReg(ARM::R4, RegState::Kill) 477 .add(predOps(ARMCC::AL)); 478 479 AFI->setShouldRestoreSPFromFP(true); 480 } 481 482 // If we need a base pointer, set it up here. It's whatever the value 483 // of the stack pointer is at this point. Any variable size objects 484 // will be allocated after this, so we can still use the base pointer 485 // to reference locals. 486 if (RegInfo->hasBasePointer(MF)) 487 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) 488 .addReg(ARM::SP) 489 .add(predOps(ARMCC::AL)); 490 491 // If the frame has variable sized objects then the epilogue must restore 492 // the sp from fp. We can assume there's an FP here since hasFP already 493 // checks for hasVarSizedObjects. 494 if (MFI.hasVarSizedObjects()) 495 AFI->setShouldRestoreSPFromFP(true); 496 497 // In some cases, virtual registers have been introduced, e.g. by uses of 498 // emitThumbRegPlusImmInReg. 499 MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs); 500 } 501 502 void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, 503 MachineBasicBlock &MBB) const { 504 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 505 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); 506 MachineFrameInfo &MFI = MF.getFrameInfo(); 507 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 508 const ThumbRegisterInfo *RegInfo = 509 static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 510 const Thumb1InstrInfo &TII = 511 *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); 512 513 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 514 int NumBytes = (int)MFI.getStackSize(); 515 assert((unsigned)NumBytes >= ArgRegsSaveSize && 516 "ArgRegsSaveSize is included in NumBytes"); 517 Register FramePtr = RegInfo->getFrameRegister(MF); 518 519 if (!AFI->hasStackFrame()) { 520 if (NumBytes - ArgRegsSaveSize != 0) 521 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, 522 NumBytes - ArgRegsSaveSize, ARM::NoRegister, 523 MachineInstr::FrameDestroy); 524 } else { 525 // Unwind MBBI to point to first LDR / VLDRD. 526 if (MBBI != MBB.begin()) { 527 do 528 --MBBI; 529 while (MBBI != MBB.begin() && MBBI->getFlag(MachineInstr::FrameDestroy)); 530 if (!MBBI->getFlag(MachineInstr::FrameDestroy)) 531 ++MBBI; 532 } 533 534 // Move SP to start of FP callee save spill area. 535 NumBytes -= (AFI->getFrameRecordSavedAreaSize() + 536 AFI->getGPRCalleeSavedArea1Size() + 537 AFI->getGPRCalleeSavedArea2Size() + 538 AFI->getDPRCalleeSavedAreaSize() + 539 ArgRegsSaveSize); 540 541 if (AFI->shouldRestoreSPFromFP()) { 542 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; 543 // Reset SP based on frame pointer only if the stack frame extends beyond 544 // frame pointer stack slot, the target is ELF and the function has FP, or 545 // the target uses var sized objects. 546 if (NumBytes) { 547 assert(!MFI.getPristineRegs(MF).test(ARM::R4) && 548 "No scratch register to restore SP from FP!"); 549 emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, 550 TII, *RegInfo, MachineInstr::FrameDestroy); 551 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 552 .addReg(ARM::R4) 553 .add(predOps(ARMCC::AL)) 554 .setMIFlag(MachineInstr::FrameDestroy); 555 } else 556 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 557 .addReg(FramePtr) 558 .add(predOps(ARMCC::AL)) 559 .setMIFlag(MachineInstr::FrameDestroy); 560 } else { 561 // For a large stack frame, we might need a scratch register to store 562 // the size of the frame. We know all callee-save registers are free 563 // at this point in the epilogue, so pick one. 564 unsigned ScratchRegister = ARM::NoRegister; 565 bool HasFP = hasFP(MF); 566 for (auto &I : MFI.getCalleeSavedInfo()) { 567 Register Reg = I.getReg(); 568 if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) { 569 ScratchRegister = Reg; 570 break; 571 } 572 } 573 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && 574 &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { 575 MachineBasicBlock::iterator PMBBI = std::prev(MBBI); 576 if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes)) 577 emitPrologueEpilogueSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes, 578 ScratchRegister, MachineInstr::FrameDestroy); 579 } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes)) 580 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes, 581 ScratchRegister, MachineInstr::FrameDestroy); 582 } 583 } 584 585 if (needPopSpecialFixUp(MF)) { 586 bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true); 587 (void)Done; 588 assert(Done && "Emission of the special fixup failed!?"); 589 } 590 } 591 592 bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 593 if (!needPopSpecialFixUp(*MBB.getParent())) 594 return true; 595 596 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 597 return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false); 598 } 599 600 bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { 601 ARMFunctionInfo *AFI = 602 const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>(); 603 if (AFI->getArgRegsSaveSize()) 604 return true; 605 606 // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up. 607 for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo()) 608 if (CSI.getReg() == ARM::LR) 609 return true; 610 611 return false; 612 } 613 614 static void findTemporariesForLR(const BitVector &GPRsNoLRSP, 615 const BitVector &PopFriendly, 616 const LivePhysRegs &UsedRegs, unsigned &PopReg, 617 unsigned &TmpReg, MachineRegisterInfo &MRI) { 618 PopReg = TmpReg = 0; 619 for (auto Reg : GPRsNoLRSP.set_bits()) { 620 if (UsedRegs.available(MRI, Reg)) { 621 // Remember the first pop-friendly register and exit. 622 if (PopFriendly.test(Reg)) { 623 PopReg = Reg; 624 TmpReg = 0; 625 break; 626 } 627 // Otherwise, remember that the register will be available to 628 // save a pop-friendly register. 629 TmpReg = Reg; 630 } 631 } 632 } 633 634 bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, 635 bool DoIt) const { 636 MachineFunction &MF = *MBB.getParent(); 637 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 638 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 639 const TargetInstrInfo &TII = *STI.getInstrInfo(); 640 const ThumbRegisterInfo *RegInfo = 641 static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 642 643 // If MBBI is a return instruction, or is a tPOP followed by a return 644 // instruction in the successor BB, we may be able to directly restore 645 // LR in the PC. 646 // This is only possible with v5T ops (v4T can't change the Thumb bit via 647 // a POP PC instruction), and only if we do not need to emit any SP update. 648 // Otherwise, we need a temporary register to pop the value 649 // and copy that value into LR. 650 auto MBBI = MBB.getFirstTerminator(); 651 bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize; 652 if (CanRestoreDirectly) { 653 if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB) 654 CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET || 655 MBBI->getOpcode() == ARM::tPOP_RET); 656 else { 657 auto MBBI_prev = MBBI; 658 MBBI_prev--; 659 assert(MBBI_prev->getOpcode() == ARM::tPOP); 660 assert(MBB.succ_size() == 1); 661 if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET) 662 MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET. 663 else 664 CanRestoreDirectly = false; 665 } 666 } 667 668 if (CanRestoreDirectly) { 669 if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET) 670 return true; 671 MachineInstrBuilder MIB = 672 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)) 673 .add(predOps(ARMCC::AL)) 674 .setMIFlag(MachineInstr::FrameDestroy); 675 // Copy implicit ops and popped registers, if any. 676 for (auto MO: MBBI->operands()) 677 if (MO.isReg() && (MO.isImplicit() || MO.isDef())) 678 MIB.add(MO); 679 MIB.addReg(ARM::PC, RegState::Define); 680 // Erase the old instruction (tBX_RET or tPOP). 681 MBB.erase(MBBI); 682 return true; 683 } 684 685 // Look for a temporary register to use. 686 // First, compute the liveness information. 687 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 688 LivePhysRegs UsedRegs(TRI); 689 UsedRegs.addLiveOuts(MBB); 690 // The semantic of pristines changed recently and now, 691 // the callee-saved registers that are touched in the function 692 // are not part of the pristines set anymore. 693 // Add those callee-saved now. 694 const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); 695 for (unsigned i = 0; CSRegs[i]; ++i) 696 UsedRegs.addReg(CSRegs[i]); 697 698 DebugLoc dl = DebugLoc(); 699 if (MBBI != MBB.end()) { 700 dl = MBBI->getDebugLoc(); 701 auto InstUpToMBBI = MBB.end(); 702 while (InstUpToMBBI != MBBI) 703 // The pre-decrement is on purpose here. 704 // We want to have the liveness right before MBBI. 705 UsedRegs.stepBackward(*--InstUpToMBBI); 706 } 707 708 // Look for a register that can be directly use in the POP. 709 unsigned PopReg = 0; 710 // And some temporary register, just in case. 711 unsigned TemporaryReg = 0; 712 BitVector PopFriendly = 713 TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID)); 714 // R7 may be used as a frame pointer, hence marked as not generally 715 // allocatable, however there's no reason to not use it as a temporary for 716 // restoring LR. 717 if (STI.getFramePointerReg() == ARM::R7) 718 PopFriendly.set(ARM::R7); 719 720 assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); 721 // Rebuild the GPRs from the high registers because they are removed 722 // form the GPR reg class for thumb1. 723 BitVector GPRsNoLRSP = 724 TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID)); 725 GPRsNoLRSP |= PopFriendly; 726 GPRsNoLRSP.reset(ARM::LR); 727 GPRsNoLRSP.reset(ARM::SP); 728 GPRsNoLRSP.reset(ARM::PC); 729 findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg, 730 MF.getRegInfo()); 731 732 // If we couldn't find a pop-friendly register, try restoring LR before 733 // popping the other callee-saved registers, so we could use one of them as a 734 // temporary. 735 bool UseLDRSP = false; 736 if (!PopReg && MBBI != MBB.begin()) { 737 auto PrevMBBI = MBBI; 738 PrevMBBI--; 739 if (PrevMBBI->getOpcode() == ARM::tPOP) { 740 UsedRegs.stepBackward(*PrevMBBI); 741 findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, 742 TemporaryReg, MF.getRegInfo()); 743 if (PopReg) { 744 MBBI = PrevMBBI; 745 UseLDRSP = true; 746 } 747 } 748 } 749 750 if (!DoIt && !PopReg && !TemporaryReg) 751 return false; 752 753 assert((PopReg || TemporaryReg) && "Cannot get LR"); 754 755 if (UseLDRSP) { 756 assert(PopReg && "Do not know how to get LR"); 757 // Load the LR via LDR tmp, [SP, #off] 758 BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRspi)) 759 .addReg(PopReg, RegState::Define) 760 .addReg(ARM::SP) 761 .addImm(MBBI->getNumExplicitOperands() - 2) 762 .add(predOps(ARMCC::AL)) 763 .setMIFlag(MachineInstr::FrameDestroy); 764 // Move from the temporary register to the LR. 765 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 766 .addReg(ARM::LR, RegState::Define) 767 .addReg(PopReg, RegState::Kill) 768 .add(predOps(ARMCC::AL)) 769 .setMIFlag(MachineInstr::FrameDestroy); 770 // Advance past the pop instruction. 771 MBBI++; 772 // Increment the SP. 773 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, 774 ArgRegsSaveSize + 4, ARM::NoRegister, 775 MachineInstr::FrameDestroy); 776 return true; 777 } 778 779 if (TemporaryReg) { 780 assert(!PopReg && "Unnecessary MOV is about to be inserted"); 781 PopReg = PopFriendly.find_first(); 782 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 783 .addReg(TemporaryReg, RegState::Define) 784 .addReg(PopReg, RegState::Kill) 785 .add(predOps(ARMCC::AL)) 786 .setMIFlag(MachineInstr::FrameDestroy); 787 } 788 789 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { 790 // We couldn't use the direct restoration above, so 791 // perform the opposite conversion: tPOP_RET to tPOP. 792 MachineInstrBuilder MIB = 793 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)) 794 .add(predOps(ARMCC::AL)) 795 .setMIFlag(MachineInstr::FrameDestroy); 796 bool Popped = false; 797 for (auto MO: MBBI->operands()) 798 if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && 799 MO.getReg() != ARM::PC) { 800 MIB.add(MO); 801 if (!MO.isImplicit()) 802 Popped = true; 803 } 804 // Is there anything left to pop? 805 if (!Popped) 806 MBB.erase(MIB.getInstr()); 807 // Erase the old instruction. 808 MBB.erase(MBBI); 809 MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)) 810 .add(predOps(ARMCC::AL)) 811 .setMIFlag(MachineInstr::FrameDestroy); 812 } 813 814 assert(PopReg && "Do not know how to get LR"); 815 BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)) 816 .add(predOps(ARMCC::AL)) 817 .addReg(PopReg, RegState::Define) 818 .setMIFlag(MachineInstr::FrameDestroy); 819 820 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize, 821 ARM::NoRegister, MachineInstr::FrameDestroy); 822 823 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 824 .addReg(ARM::LR, RegState::Define) 825 .addReg(PopReg, RegState::Kill) 826 .add(predOps(ARMCC::AL)) 827 .setMIFlag(MachineInstr::FrameDestroy); 828 829 if (TemporaryReg) 830 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 831 .addReg(PopReg, RegState::Define) 832 .addReg(TemporaryReg, RegState::Kill) 833 .add(predOps(ARMCC::AL)) 834 .setMIFlag(MachineInstr::FrameDestroy); 835 836 return true; 837 } 838 839 static const SmallVector<Register> OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6, 840 ARM::R7, ARM::LR}; 841 static const SmallVector<Register> OrderedHighRegs = {ARM::R8, ARM::R9, 842 ARM::R10, ARM::R11}; 843 static const SmallVector<Register> OrderedCopyRegs = { 844 ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, 845 ARM::R5, ARM::R6, ARM::R7, ARM::LR}; 846 847 static void splitLowAndHighRegs(const std::set<Register> &Regs, 848 std::set<Register> &LowRegs, 849 std::set<Register> &HighRegs) { 850 for (Register Reg : Regs) { 851 if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { 852 LowRegs.insert(Reg); 853 } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { 854 HighRegs.insert(Reg); 855 } else { 856 llvm_unreachable("callee-saved register of unexpected class"); 857 } 858 } 859 } 860 861 template <typename It> 862 It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt, 863 const std::set<Register> &RegSet) { 864 return std::find_if(OrderedStartIt, OrderedEndIt, 865 [&](Register Reg) { return RegSet.count(Reg); }); 866 } 867 868 static void pushRegsToStack(MachineBasicBlock &MBB, 869 MachineBasicBlock::iterator MI, 870 const TargetInstrInfo &TII, 871 const std::set<Register> &RegsToSave, 872 const std::set<Register> &CopyRegs) { 873 MachineFunction &MF = *MBB.getParent(); 874 const MachineRegisterInfo &MRI = MF.getRegInfo(); 875 DebugLoc DL; 876 877 std::set<Register> LowRegs, HighRegs; 878 splitLowAndHighRegs(RegsToSave, LowRegs, HighRegs); 879 880 // Push low regs first 881 if (!LowRegs.empty()) { 882 MachineInstrBuilder MIB = 883 BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); 884 for (unsigned Reg : OrderedLowRegs) { 885 if (LowRegs.count(Reg)) { 886 bool isKill = !MRI.isLiveIn(Reg); 887 if (isKill && !MRI.isReserved(Reg)) 888 MBB.addLiveIn(Reg); 889 890 MIB.addReg(Reg, getKillRegState(isKill)); 891 } 892 } 893 MIB.setMIFlags(MachineInstr::FrameSetup); 894 } 895 896 // Now push the high registers 897 // There are no store instructions that can access high registers directly, 898 // so we have to move them to low registers, and push them. 899 // This might take multiple pushes, as it is possible for there to 900 // be fewer low registers available than high registers which need saving. 901 902 // Find the first register to save. 903 // Registers must be processed in reverse order so that in case we need to use 904 // multiple PUSH instructions, the order of the registers on the stack still 905 // matches the unwind info. They need to be swicthed back to ascending order 906 // before adding to the PUSH instruction. 907 auto HiRegToSave = getNextOrderedReg(OrderedHighRegs.rbegin(), 908 OrderedHighRegs.rend(), 909 HighRegs); 910 911 while (HiRegToSave != OrderedHighRegs.rend()) { 912 // Find the first low register to use. 913 auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(), 914 OrderedCopyRegs.rend(), 915 CopyRegs); 916 917 // Create the PUSH, but don't insert it yet (the MOVs need to come first). 918 MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)) 919 .add(predOps(ARMCC::AL)) 920 .setMIFlags(MachineInstr::FrameSetup); 921 922 SmallVector<unsigned, 4> RegsToPush; 923 while (HiRegToSave != OrderedHighRegs.rend() && 924 CopyRegIt != OrderedCopyRegs.rend()) { 925 if (HighRegs.count(*HiRegToSave)) { 926 bool isKill = !MRI.isLiveIn(*HiRegToSave); 927 if (isKill && !MRI.isReserved(*HiRegToSave)) 928 MBB.addLiveIn(*HiRegToSave); 929 930 // Emit a MOV from the high reg to the low reg. 931 BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 932 .addReg(*CopyRegIt, RegState::Define) 933 .addReg(*HiRegToSave, getKillRegState(isKill)) 934 .add(predOps(ARMCC::AL)) 935 .setMIFlags(MachineInstr::FrameSetup); 936 937 // Record the register that must be added to the PUSH. 938 RegsToPush.push_back(*CopyRegIt); 939 940 CopyRegIt = getNextOrderedReg(std::next(CopyRegIt), 941 OrderedCopyRegs.rend(), 942 CopyRegs); 943 HiRegToSave = getNextOrderedReg(std::next(HiRegToSave), 944 OrderedHighRegs.rend(), 945 HighRegs); 946 } 947 } 948 949 // Add the low registers to the PUSH, in ascending order. 950 for (unsigned Reg : llvm::reverse(RegsToPush)) 951 PushMIB.addReg(Reg, RegState::Kill); 952 953 // Insert the PUSH instruction after the MOVs. 954 MBB.insert(MI, PushMIB); 955 } 956 } 957 958 static void popRegsFromStack(MachineBasicBlock &MBB, 959 MachineBasicBlock::iterator &MI, 960 const TargetInstrInfo &TII, 961 const std::set<Register> &RegsToRestore, 962 const std::set<Register> &AvailableCopyRegs, 963 bool IsVarArg, bool HasV5Ops) { 964 if (RegsToRestore.empty()) 965 return; 966 967 MachineFunction &MF = *MBB.getParent(); 968 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 969 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); 970 971 std::set<Register> LowRegs, HighRegs; 972 splitLowAndHighRegs(RegsToRestore, LowRegs, HighRegs); 973 974 // Pop the high registers first 975 // There are no store instructions that can access high registers directly, 976 // so we have to pop into low registers and them move to the high registers. 977 // This might take multiple pops, as it is possible for there to 978 // be fewer low registers available than high registers which need restoring. 979 980 // Find the first register to restore. 981 auto HiRegToRestore = getNextOrderedReg(OrderedHighRegs.begin(), 982 OrderedHighRegs.end(), 983 HighRegs); 984 985 std::set<Register> CopyRegs = AvailableCopyRegs; 986 Register LowScratchReg; 987 if (!HighRegs.empty() && CopyRegs.empty()) { 988 // No copy regs are available to pop high regs. Let's make use of a return 989 // register and the scratch register (IP/R12) to copy things around. 990 LowScratchReg = ARM::R0; 991 BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 992 .addReg(ARM::R12, RegState::Define) 993 .addReg(LowScratchReg, RegState::Kill) 994 .add(predOps(ARMCC::AL)) 995 .setMIFlag(MachineInstr::FrameDestroy); 996 CopyRegs.insert(LowScratchReg); 997 } 998 999 while (HiRegToRestore != OrderedHighRegs.end()) { 1000 assert(!CopyRegs.empty()); 1001 // Find the first low register to use. 1002 auto CopyReg = getNextOrderedReg(OrderedCopyRegs.begin(), 1003 OrderedCopyRegs.end(), 1004 CopyRegs); 1005 1006 // Create the POP instruction. 1007 MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)) 1008 .add(predOps(ARMCC::AL)) 1009 .setMIFlag(MachineInstr::FrameDestroy); 1010 1011 while (HiRegToRestore != OrderedHighRegs.end() && 1012 CopyReg != OrderedCopyRegs.end()) { 1013 // Add the low register to the POP. 1014 PopMIB.addReg(*CopyReg, RegState::Define); 1015 1016 // Create the MOV from low to high register. 1017 BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 1018 .addReg(*HiRegToRestore, RegState::Define) 1019 .addReg(*CopyReg, RegState::Kill) 1020 .add(predOps(ARMCC::AL)) 1021 .setMIFlag(MachineInstr::FrameDestroy); 1022 1023 CopyReg = getNextOrderedReg(std::next(CopyReg), 1024 OrderedCopyRegs.end(), 1025 CopyRegs); 1026 HiRegToRestore = getNextOrderedReg(std::next(HiRegToRestore), 1027 OrderedHighRegs.end(), 1028 HighRegs); 1029 } 1030 } 1031 1032 // Restore low register used as scratch if necessary 1033 if (LowScratchReg.isValid()) { 1034 BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 1035 .addReg(LowScratchReg, RegState::Define) 1036 .addReg(ARM::R12, RegState::Kill) 1037 .add(predOps(ARMCC::AL)) 1038 .setMIFlag(MachineInstr::FrameDestroy); 1039 } 1040 1041 // Now pop the low registers 1042 if (!LowRegs.empty()) { 1043 MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)) 1044 .add(predOps(ARMCC::AL)) 1045 .setMIFlag(MachineInstr::FrameDestroy); 1046 1047 bool NeedsPop = false; 1048 for (Register Reg : OrderedLowRegs) { 1049 if (!LowRegs.count(Reg)) 1050 continue; 1051 1052 if (Reg == ARM::LR) { 1053 if (!MBB.succ_empty() || 1054 MI->getOpcode() == ARM::TCRETURNdi || 1055 MI->getOpcode() == ARM::TCRETURNri) 1056 // LR may only be popped into PC, as part of return sequence. 1057 // If this isn't the return sequence, we'll need emitPopSpecialFixUp 1058 // to restore LR the hard way. 1059 // FIXME: if we don't pass any stack arguments it would be actually 1060 // advantageous *and* correct to do the conversion to an ordinary call 1061 // instruction here. 1062 continue; 1063 // Special epilogue for vararg functions. See emitEpilogue 1064 if (IsVarArg) 1065 continue; 1066 // ARMv4T requires BX, see emitEpilogue 1067 if (!HasV5Ops) 1068 continue; 1069 1070 // CMSE entry functions must return via BXNS, see emitEpilogue. 1071 if (AFI->isCmseNSEntryFunction()) 1072 continue; 1073 1074 // Pop LR into PC. 1075 Reg = ARM::PC; 1076 (*MIB).setDesc(TII.get(ARM::tPOP_RET)); 1077 if (MI != MBB.end()) 1078 MIB.copyImplicitOps(*MI); 1079 MI = MBB.erase(MI); 1080 } 1081 MIB.addReg(Reg, getDefRegState(true)); 1082 NeedsPop = true; 1083 } 1084 1085 // It's illegal to emit pop instruction without operands. 1086 if (NeedsPop) 1087 MBB.insert(MI, &*MIB); 1088 else 1089 MF.deleteMachineInstr(MIB); 1090 } 1091 } 1092 1093 bool Thumb1FrameLowering::spillCalleeSavedRegisters( 1094 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1095 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 1096 if (CSI.empty()) 1097 return false; 1098 1099 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1100 MachineFunction &MF = *MBB.getParent(); 1101 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( 1102 MF.getSubtarget().getRegisterInfo()); 1103 Register FPReg = RegInfo->getFrameRegister(MF); 1104 1105 // In case FP is a high reg, we need a separate push sequence to generate 1106 // a correct Frame Record 1107 bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); 1108 1109 std::set<Register> FrameRecord; 1110 std::set<Register> SpilledGPRs; 1111 for (const CalleeSavedInfo &I : CSI) { 1112 Register Reg = I.getReg(); 1113 if (NeedsFrameRecordPush && (Reg == FPReg || Reg == ARM::LR)) 1114 FrameRecord.insert(Reg); 1115 else 1116 SpilledGPRs.insert(Reg); 1117 } 1118 1119 pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR}); 1120 1121 // Determine intermediate registers which can be used for pushing high regs: 1122 // - Spilled low regs 1123 // - Unused argument registers 1124 std::set<Register> CopyRegs; 1125 for (Register Reg : SpilledGPRs) 1126 if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && 1127 !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg)) 1128 CopyRegs.insert(Reg); 1129 for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) 1130 if (!MF.getRegInfo().isLiveIn(ArgReg)) 1131 CopyRegs.insert(ArgReg); 1132 1133 pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs); 1134 1135 return true; 1136 } 1137 1138 bool Thumb1FrameLowering::restoreCalleeSavedRegisters( 1139 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1140 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 1141 if (CSI.empty()) 1142 return false; 1143 1144 MachineFunction &MF = *MBB.getParent(); 1145 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1146 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1147 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( 1148 MF.getSubtarget().getRegisterInfo()); 1149 bool IsVarArg = AFI->getArgRegsSaveSize() > 0; 1150 Register FPReg = RegInfo->getFrameRegister(MF); 1151 1152 // In case FP is a high reg, we need a separate pop sequence to generate 1153 // a correct Frame Record 1154 bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); 1155 1156 std::set<Register> FrameRecord; 1157 std::set<Register> SpilledGPRs; 1158 for (CalleeSavedInfo &I : CSI) { 1159 Register Reg = I.getReg(); 1160 if (NeedsFrameRecordPop && (Reg == FPReg || Reg == ARM::LR)) 1161 FrameRecord.insert(Reg); 1162 else 1163 SpilledGPRs.insert(Reg); 1164 1165 if (Reg == ARM::LR) 1166 I.setRestored(false); 1167 } 1168 1169 // Determine intermidiate registers which can be used for popping high regs: 1170 // - Spilled low regs 1171 // - Unused return registers 1172 std::set<Register> CopyRegs; 1173 std::set<Register> UnusedReturnRegs; 1174 for (Register Reg : SpilledGPRs) 1175 if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg)) 1176 CopyRegs.insert(Reg); 1177 auto Terminator = MBB.getFirstTerminator(); 1178 if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { 1179 UnusedReturnRegs.insert(ARM::R0); 1180 UnusedReturnRegs.insert(ARM::R1); 1181 UnusedReturnRegs.insert(ARM::R2); 1182 UnusedReturnRegs.insert(ARM::R3); 1183 for (auto Op : Terminator->implicit_operands()) { 1184 if (Op.isReg()) 1185 UnusedReturnRegs.erase(Op.getReg()); 1186 } 1187 } 1188 CopyRegs.insert(UnusedReturnRegs.begin(), UnusedReturnRegs.end()); 1189 1190 // First pop regular spilled regs. 1191 popRegsFromStack(MBB, MI, TII, SpilledGPRs, CopyRegs, IsVarArg, 1192 STI.hasV5TOps()); 1193 1194 // LR may only be popped into pc, as part of a return sequence. 1195 // Check that no other pop instructions are inserted after that. 1196 assert((!SpilledGPRs.count(ARM::LR) || FrameRecord.empty()) && 1197 "Can't insert pop after return sequence"); 1198 1199 // Now pop Frame Record regs. 1200 // Only unused return registers can be used as copy regs at this point. 1201 popRegsFromStack(MBB, MI, TII, FrameRecord, UnusedReturnRegs, IsVarArg, 1202 STI.hasV5TOps()); 1203 1204 return true; 1205 } 1206