1 //===-- GCNPreRALongBranchReg.cpp ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // \file 9 // \brief Pass to estimate pre RA branch size and reserve a pair of SGPRs if 10 // there is a long branch. Branch size at this point is difficult to track since 11 // we have no idea what spills will be inserted later on. We just assume 8 bytes 12 // per instruction to compute approximations without computing the actual 13 // instruction size to see if we're in the neighborhood of the maximum branch 14 // distrance threshold tuning of what is considered "long" is handled through 15 // amdgpu-long-branch-factor cl argument which sets LongBranchFactor. 16 //===----------------------------------------------------------------------===// 17 #include "AMDGPU.h" 18 #include "GCNSubtarget.h" 19 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/InitializePasses.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "amdgpu-pre-ra-long-branch-reg" 27 28 namespace { 29 30 static cl::opt<double> LongBranchFactor( 31 "amdgpu-long-branch-factor", cl::init(1.0), cl::Hidden, 32 cl::desc("Factor to apply to what qualifies as a long branch " 33 "to reserve a pair of scalar registers. If this value " 34 "is 0 the long branch registers are never reserved. As this " 35 "value grows the greater chance the branch distance will fall " 36 "within the threshold and the registers will be marked to be " 37 "reserved. We lean towards always reserving a register for " 38 "long jumps")); 39 40 class GCNPreRALongBranchReg : public MachineFunctionPass { 41 42 struct BasicBlockInfo { 43 // Offset - Distance from the beginning of the function to the beginning 44 // of this basic block. 45 uint64_t Offset = 0; 46 // Size - Size of the basic block in bytes 47 uint64_t Size = 0; 48 }; 49 void generateBlockInfo(MachineFunction &MF, 50 SmallVectorImpl<BasicBlockInfo> &BlockInfo); 51 52 public: 53 static char ID; 54 GCNPreRALongBranchReg() : MachineFunctionPass(ID) { 55 initializeGCNPreRALongBranchRegPass(*PassRegistry::getPassRegistry()); 56 } 57 bool runOnMachineFunction(MachineFunction &MF) override; 58 StringRef getPassName() const override { 59 return "AMDGPU Pre-RA Long Branch Reg"; 60 } 61 void getAnalysisUsage(AnalysisUsage &AU) const override { 62 AU.setPreservesAll(); 63 MachineFunctionPass::getAnalysisUsage(AU); 64 } 65 }; 66 } // End anonymous namespace. 67 char GCNPreRALongBranchReg::ID = 0; 68 69 INITIALIZE_PASS(GCNPreRALongBranchReg, DEBUG_TYPE, 70 "AMDGPU Pre-RA Long Branch Reg", false, false) 71 72 char &llvm::GCNPreRALongBranchRegID = GCNPreRALongBranchReg::ID; 73 void GCNPreRALongBranchReg::generateBlockInfo( 74 MachineFunction &MF, SmallVectorImpl<BasicBlockInfo> &BlockInfo) { 75 76 BlockInfo.resize(MF.getNumBlockIDs()); 77 78 // Approximate the size of all basic blocks by just 79 // assuming 8 bytes per instruction 80 for (const MachineBasicBlock &MBB : MF) { 81 uint64_t NumInstr = 0; 82 // Loop through the basic block and add up all non-debug 83 // non-meta instructions 84 for (const MachineInstr &MI : MBB) { 85 // isMetaInstruction is a superset of isDebugIstr 86 if (MI.isMetaInstruction()) 87 continue; 88 NumInstr += 1; 89 } 90 // Approximate size as just 8 bytes per instruction 91 BlockInfo[MBB.getNumber()].Size = 8 * NumInstr; 92 } 93 uint64_t PrevNum = (&MF)->begin()->getNumber(); 94 for (auto &MBB : 95 make_range(std::next(MachineFunction::iterator((&MF)->begin())), 96 (&MF)->end())) { 97 uint64_t Num = MBB.getNumber(); 98 // Compute the offset immediately following this block. 99 BlockInfo[Num].Offset = BlockInfo[PrevNum].Offset + BlockInfo[PrevNum].Size; 100 PrevNum = Num; 101 } 102 } 103 bool GCNPreRALongBranchReg::runOnMachineFunction(MachineFunction &MF) { 104 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 105 const SIInstrInfo *TII = STM.getInstrInfo(); 106 const SIRegisterInfo *TRI = STM.getRegisterInfo(); 107 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 108 MachineRegisterInfo &MRI = MF.getRegInfo(); 109 110 // For now, reserve highest available SGPR pair. After RA, 111 // shift down to a lower unused pair of SGPRs 112 // If all registers are used, then findUnusedRegister will return 113 // AMDGPU::NoRegister. 114 constexpr bool ReserveHighestRegister = true; 115 Register LongBranchReservedReg = TRI->findUnusedRegister( 116 MRI, &AMDGPU::SGPR_64RegClass, MF, ReserveHighestRegister); 117 if (!LongBranchReservedReg) 118 return false; 119 120 // Approximate code size and offsets of each basic block 121 SmallVector<BasicBlockInfo, 16> BlockInfo; 122 generateBlockInfo(MF, BlockInfo); 123 124 for (const MachineBasicBlock &MBB : MF) { 125 MachineBasicBlock::const_iterator Last = MBB.getLastNonDebugInstr(); 126 if (Last == MBB.end() || !Last->isUnconditionalBranch()) 127 continue; 128 MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last); 129 uint64_t BlockDistance = static_cast<uint64_t>( 130 LongBranchFactor * BlockInfo[DestBB->getNumber()].Offset); 131 // If the distance falls outside the threshold assume it is a long branch 132 // and we need to reserve the registers 133 if (!TII->isBranchOffsetInRange(Last->getOpcode(), BlockDistance)) { 134 MFI->setLongBranchReservedReg(LongBranchReservedReg); 135 return true; 136 } 137 } 138 return false; 139 } 140