1 //===-- GCNPreRALongBranchReg.cpp ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // \file 9 // \brief Pass to estimate pre RA branch size and reserve a pair of SGPRs if 10 // there is a long branch. Branch size at this point is difficult to track since 11 // we have no idea what spills will be inserted later on. We just assume 8 bytes 12 // per instruction to compute approximations without computing the actual 13 // instruction size to see if we're in the neighborhood of the maximum branch 14 // distrance threshold tuning of what is considered "long" is handled through 15 // amdgpu-long-branch-factor cl argument which sets LongBranchFactor. 16 //===----------------------------------------------------------------------===// 17 #include "GCNPreRALongBranchReg.h" 18 #include "AMDGPU.h" 19 #include "GCNSubtarget.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/InitializePasses.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "amdgpu-pre-ra-long-branch-reg" 27 28 namespace { 29 30 static cl::opt<double> LongBranchFactor( 31 "amdgpu-long-branch-factor", cl::init(1.0), cl::Hidden, 32 cl::desc("Factor to apply to what qualifies as a long branch " 33 "to reserve a pair of scalar registers. If this value " 34 "is 0 the long branch registers are never reserved. As this " 35 "value grows the greater chance the branch distance will fall " 36 "within the threshold and the registers will be marked to be " 37 "reserved. We lean towards always reserving a register for " 38 "long jumps")); 39 40 class GCNPreRALongBranchReg { 41 42 struct BasicBlockInfo { 43 // Offset - Distance from the beginning of the function to the beginning 44 // of this basic block. 45 uint64_t Offset = 0; 46 // Size - Size of the basic block in bytes 47 uint64_t Size = 0; 48 }; 49 void generateBlockInfo(MachineFunction &MF, 50 SmallVectorImpl<BasicBlockInfo> &BlockInfo); 51 52 public: 53 GCNPreRALongBranchReg() = default; 54 bool run(MachineFunction &MF); 55 }; 56 57 class GCNPreRALongBranchRegLegacy : public MachineFunctionPass { 58 public: 59 static char ID; 60 GCNPreRALongBranchRegLegacy() : MachineFunctionPass(ID) { 61 initializeGCNPreRALongBranchRegLegacyPass(*PassRegistry::getPassRegistry()); 62 } 63 64 bool runOnMachineFunction(MachineFunction &MF) override { 65 return GCNPreRALongBranchReg().run(MF); 66 } 67 68 StringRef getPassName() const override { 69 return "AMDGPU Pre-RA Long Branch Reg"; 70 } 71 72 void getAnalysisUsage(AnalysisUsage &AU) const override { 73 AU.setPreservesAll(); 74 MachineFunctionPass::getAnalysisUsage(AU); 75 } 76 }; 77 } // End anonymous namespace. 78 79 char GCNPreRALongBranchRegLegacy::ID = 0; 80 81 INITIALIZE_PASS(GCNPreRALongBranchRegLegacy, DEBUG_TYPE, 82 "AMDGPU Pre-RA Long Branch Reg", false, false) 83 84 char &llvm::GCNPreRALongBranchRegID = GCNPreRALongBranchRegLegacy::ID; 85 void GCNPreRALongBranchReg::generateBlockInfo( 86 MachineFunction &MF, SmallVectorImpl<BasicBlockInfo> &BlockInfo) { 87 88 BlockInfo.resize(MF.getNumBlockIDs()); 89 90 // Approximate the size of all basic blocks by just 91 // assuming 8 bytes per instruction 92 for (const MachineBasicBlock &MBB : MF) { 93 uint64_t NumInstr = 0; 94 // Loop through the basic block and add up all non-debug 95 // non-meta instructions 96 for (const MachineInstr &MI : MBB) { 97 // isMetaInstruction is a superset of isDebugIstr 98 if (MI.isMetaInstruction()) 99 continue; 100 NumInstr += 1; 101 } 102 // Approximate size as just 8 bytes per instruction 103 BlockInfo[MBB.getNumber()].Size = 8 * NumInstr; 104 } 105 uint64_t PrevNum = (&MF)->begin()->getNumber(); 106 for (auto &MBB : 107 make_range(std::next(MachineFunction::iterator((&MF)->begin())), 108 (&MF)->end())) { 109 uint64_t Num = MBB.getNumber(); 110 // Compute the offset immediately following this block. 111 BlockInfo[Num].Offset = BlockInfo[PrevNum].Offset + BlockInfo[PrevNum].Size; 112 PrevNum = Num; 113 } 114 } 115 116 bool GCNPreRALongBranchReg::run(MachineFunction &MF) { 117 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 118 const SIInstrInfo *TII = STM.getInstrInfo(); 119 const SIRegisterInfo *TRI = STM.getRegisterInfo(); 120 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 121 MachineRegisterInfo &MRI = MF.getRegInfo(); 122 123 // For now, reserve highest available SGPR pair. After RA, 124 // shift down to a lower unused pair of SGPRs 125 // If all registers are used, then findUnusedRegister will return 126 // AMDGPU::NoRegister. 127 constexpr bool ReserveHighestRegister = true; 128 Register LongBranchReservedReg = TRI->findUnusedRegister( 129 MRI, &AMDGPU::SGPR_64RegClass, MF, ReserveHighestRegister); 130 if (!LongBranchReservedReg) 131 return false; 132 133 // Approximate code size and offsets of each basic block 134 SmallVector<BasicBlockInfo, 16> BlockInfo; 135 generateBlockInfo(MF, BlockInfo); 136 137 for (const MachineBasicBlock &MBB : MF) { 138 MachineBasicBlock::const_iterator Last = MBB.getLastNonDebugInstr(); 139 if (Last == MBB.end() || !Last->isUnconditionalBranch()) 140 continue; 141 MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last); 142 uint64_t BlockDistance = static_cast<uint64_t>( 143 LongBranchFactor * BlockInfo[DestBB->getNumber()].Offset); 144 // If the distance falls outside the threshold assume it is a long branch 145 // and we need to reserve the registers 146 if (!TII->isBranchOffsetInRange(Last->getOpcode(), BlockDistance)) { 147 MFI->setLongBranchReservedReg(LongBranchReservedReg); 148 return true; 149 } 150 } 151 return false; 152 } 153 154 PreservedAnalyses 155 GCNPreRALongBranchRegPass::run(MachineFunction &MF, 156 MachineFunctionAnalysisManager &MFAM) { 157 GCNPreRALongBranchReg().run(MF); 158 return PreservedAnalyses::all(); 159 } 160