xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.cpp (revision a03411e84728e9b267056fd31c7d1d9d1dc1b01e)
1 //===-- GCNPreRALongBranchReg.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // \file
9 // \brief Pass to estimate pre RA branch size and reserve a pair of SGPRs if
10 // there is a long branch. Branch size at this point is difficult to track since
11 // we have no idea what spills will be inserted later on. We just assume 8 bytes
12 // per instruction to compute approximations without computing the actual
13 // instruction size to see if we're in the neighborhood of the maximum branch
14 // distrance threshold tuning of what is considered "long" is handled through
15 // amdgpu-long-branch-factor cl argument which sets LongBranchFactor.
16 //===----------------------------------------------------------------------===//
17 #include "AMDGPU.h"
18 #include "GCNSubtarget.h"
19 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/InitializePasses.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "amdgpu-pre-ra-long-branch-reg"
27 
28 namespace {
29 
30 static cl::opt<double> LongBranchFactor(
31     "amdgpu-long-branch-factor", cl::init(1.0), cl::Hidden,
32     cl::desc("Factor to apply to what qualifies as a long branch "
33              "to reserve a pair of scalar registers. If this value "
34              "is 0 the long branch registers are never reserved. As this "
35              "value grows the greater chance the branch distance will fall "
36              "within the threshold and the registers will be marked to be "
37              "reserved. We lean towards always reserving a register for  "
38              "long jumps"));
39 
40 class GCNPreRALongBranchReg : public MachineFunctionPass {
41 
42   struct BasicBlockInfo {
43     // Offset - Distance from the beginning of the function to the beginning
44     // of this basic block.
45     uint64_t Offset = 0;
46     // Size - Size of the basic block in bytes
47     uint64_t Size = 0;
48   };
49   void generateBlockInfo(MachineFunction &MF,
50                          SmallVectorImpl<BasicBlockInfo> &BlockInfo);
51 
52 public:
53   static char ID;
54   GCNPreRALongBranchReg() : MachineFunctionPass(ID) {
55     initializeGCNPreRALongBranchRegPass(*PassRegistry::getPassRegistry());
56   }
57   bool runOnMachineFunction(MachineFunction &MF) override;
58   StringRef getPassName() const override {
59     return "AMDGPU Pre-RA Long Branch Reg";
60   }
61   void getAnalysisUsage(AnalysisUsage &AU) const override {
62     AU.setPreservesAll();
63     MachineFunctionPass::getAnalysisUsage(AU);
64   }
65 };
66 } // End anonymous namespace.
67 char GCNPreRALongBranchReg::ID = 0;
68 
69 INITIALIZE_PASS(GCNPreRALongBranchReg, DEBUG_TYPE,
70                 "AMDGPU Pre-RA Long Branch Reg", false, false)
71 
72 char &llvm::GCNPreRALongBranchRegID = GCNPreRALongBranchReg::ID;
73 void GCNPreRALongBranchReg::generateBlockInfo(
74     MachineFunction &MF, SmallVectorImpl<BasicBlockInfo> &BlockInfo) {
75 
76   BlockInfo.resize(MF.getNumBlockIDs());
77 
78   // Approximate the size of all basic blocks by just
79   // assuming 8 bytes per instruction
80   for (const MachineBasicBlock &MBB : MF) {
81     uint64_t NumInstr = 0;
82     // Loop through the basic block and add up all non-debug
83     // non-meta instructions
84     for (const MachineInstr &MI : MBB) {
85       // isMetaInstruction is a superset of isDebugIstr
86       if (MI.isMetaInstruction())
87         continue;
88       NumInstr += 1;
89     }
90     // Approximate size as just 8 bytes per instruction
91     BlockInfo[MBB.getNumber()].Size = 8 * NumInstr;
92   }
93   uint64_t PrevNum = (&MF)->begin()->getNumber();
94   for (auto &MBB :
95        make_range(std::next(MachineFunction::iterator((&MF)->begin())),
96                   (&MF)->end())) {
97     uint64_t Num = MBB.getNumber();
98     // Compute the offset immediately following this block.
99     BlockInfo[Num].Offset = BlockInfo[PrevNum].Offset + BlockInfo[PrevNum].Size;
100     PrevNum = Num;
101   }
102 }
103 bool GCNPreRALongBranchReg::runOnMachineFunction(MachineFunction &MF) {
104   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
105   const SIInstrInfo *TII = STM.getInstrInfo();
106   const SIRegisterInfo *TRI = STM.getRegisterInfo();
107   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
108   MachineRegisterInfo &MRI = MF.getRegInfo();
109 
110   // For now, reserve highest available SGPR pair. After RA,
111   // shift down to a lower unused pair of SGPRs
112   // If all registers are used, then findUnusedRegister will return
113   // AMDGPU::NoRegister.
114   constexpr bool ReserveHighestRegister = true;
115   Register LongBranchReservedReg = TRI->findUnusedRegister(
116       MRI, &AMDGPU::SGPR_64RegClass, MF, ReserveHighestRegister);
117   if (!LongBranchReservedReg)
118     return false;
119 
120   // Approximate code size and offsets of each basic block
121   SmallVector<BasicBlockInfo, 16> BlockInfo;
122   generateBlockInfo(MF, BlockInfo);
123 
124   for (const MachineBasicBlock &MBB : MF) {
125     MachineBasicBlock::const_iterator Last = MBB.getLastNonDebugInstr();
126     if (Last == MBB.end() || !Last->isUnconditionalBranch())
127       continue;
128     MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last);
129     uint64_t BlockDistance = static_cast<uint64_t>(
130         LongBranchFactor * BlockInfo[DestBB->getNumber()].Offset);
131     // If the distance falls outside the threshold assume it is a long branch
132     // and we need to reserve the registers
133     if (!TII->isBranchOffsetInRange(Last->getOpcode(), BlockDistance)) {
134       MFI->setLongBranchReservedReg(LongBranchReservedReg);
135       return true;
136     }
137   }
138   return false;
139 }
140