xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.cpp (revision 2c2ec6bbc9cc7762a250ffe903bda6c2e44d25ff)
1 //===-- GCNPreRALongBranchReg.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // \file
9 // \brief Pass to estimate pre RA branch size and reserve a pair of SGPRs if
10 // there is a long branch. Branch size at this point is difficult to track since
11 // we have no idea what spills will be inserted later on. We just assume 8 bytes
12 // per instruction to compute approximations without computing the actual
13 // instruction size to see if we're in the neighborhood of the maximum branch
14 // distrance threshold tuning of what is considered "long" is handled through
15 // amdgpu-long-branch-factor cl argument which sets LongBranchFactor.
16 //===----------------------------------------------------------------------===//
17 #include "GCNPreRALongBranchReg.h"
18 #include "AMDGPU.h"
19 #include "GCNSubtarget.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/InitializePasses.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "amdgpu-pre-ra-long-branch-reg"
27 
28 namespace {
29 
30 static cl::opt<double> LongBranchFactor(
31     "amdgpu-long-branch-factor", cl::init(1.0), cl::Hidden,
32     cl::desc("Factor to apply to what qualifies as a long branch "
33              "to reserve a pair of scalar registers. If this value "
34              "is 0 the long branch registers are never reserved. As this "
35              "value grows the greater chance the branch distance will fall "
36              "within the threshold and the registers will be marked to be "
37              "reserved. We lean towards always reserving a register for  "
38              "long jumps"));
39 
40 class GCNPreRALongBranchReg {
41 
42   struct BasicBlockInfo {
43     // Offset - Distance from the beginning of the function to the beginning
44     // of this basic block.
45     uint64_t Offset = 0;
46     // Size - Size of the basic block in bytes
47     uint64_t Size = 0;
48   };
49   void generateBlockInfo(MachineFunction &MF,
50                          SmallVectorImpl<BasicBlockInfo> &BlockInfo);
51 
52 public:
53   GCNPreRALongBranchReg() = default;
54   bool run(MachineFunction &MF);
55 };
56 
57 class GCNPreRALongBranchRegLegacy : public MachineFunctionPass {
58 public:
59   static char ID;
60   GCNPreRALongBranchRegLegacy() : MachineFunctionPass(ID) {
61     initializeGCNPreRALongBranchRegLegacyPass(*PassRegistry::getPassRegistry());
62   }
63 
64   bool runOnMachineFunction(MachineFunction &MF) override {
65     return GCNPreRALongBranchReg().run(MF);
66   }
67 
68   StringRef getPassName() const override {
69     return "AMDGPU Pre-RA Long Branch Reg";
70   }
71 
72   void getAnalysisUsage(AnalysisUsage &AU) const override {
73     AU.setPreservesAll();
74     MachineFunctionPass::getAnalysisUsage(AU);
75   }
76 };
77 } // End anonymous namespace.
78 
79 char GCNPreRALongBranchRegLegacy::ID = 0;
80 
81 INITIALIZE_PASS(GCNPreRALongBranchRegLegacy, DEBUG_TYPE,
82                 "AMDGPU Pre-RA Long Branch Reg", false, false)
83 
84 char &llvm::GCNPreRALongBranchRegID = GCNPreRALongBranchRegLegacy::ID;
85 void GCNPreRALongBranchReg::generateBlockInfo(
86     MachineFunction &MF, SmallVectorImpl<BasicBlockInfo> &BlockInfo) {
87 
88   BlockInfo.resize(MF.getNumBlockIDs());
89 
90   // Approximate the size of all basic blocks by just
91   // assuming 8 bytes per instruction
92   for (const MachineBasicBlock &MBB : MF) {
93     uint64_t NumInstr = 0;
94     // Loop through the basic block and add up all non-debug
95     // non-meta instructions
96     for (const MachineInstr &MI : MBB) {
97       // isMetaInstruction is a superset of isDebugIstr
98       if (MI.isMetaInstruction())
99         continue;
100       NumInstr += 1;
101     }
102     // Approximate size as just 8 bytes per instruction
103     BlockInfo[MBB.getNumber()].Size = 8 * NumInstr;
104   }
105   uint64_t PrevNum = (&MF)->begin()->getNumber();
106   for (auto &MBB :
107        make_range(std::next(MachineFunction::iterator((&MF)->begin())),
108                   (&MF)->end())) {
109     uint64_t Num = MBB.getNumber();
110     // Compute the offset immediately following this block.
111     BlockInfo[Num].Offset = BlockInfo[PrevNum].Offset + BlockInfo[PrevNum].Size;
112     PrevNum = Num;
113   }
114 }
115 
116 bool GCNPreRALongBranchReg::run(MachineFunction &MF) {
117   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
118   const SIInstrInfo *TII = STM.getInstrInfo();
119   const SIRegisterInfo *TRI = STM.getRegisterInfo();
120   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
121   MachineRegisterInfo &MRI = MF.getRegInfo();
122 
123   // For now, reserve highest available SGPR pair. After RA,
124   // shift down to a lower unused pair of SGPRs
125   // If all registers are used, then findUnusedRegister will return
126   // AMDGPU::NoRegister.
127   constexpr bool ReserveHighestRegister = true;
128   Register LongBranchReservedReg = TRI->findUnusedRegister(
129       MRI, &AMDGPU::SGPR_64RegClass, MF, ReserveHighestRegister);
130   if (!LongBranchReservedReg)
131     return false;
132 
133   // Approximate code size and offsets of each basic block
134   SmallVector<BasicBlockInfo, 16> BlockInfo;
135   generateBlockInfo(MF, BlockInfo);
136 
137   for (const MachineBasicBlock &MBB : MF) {
138     MachineBasicBlock::const_iterator Last = MBB.getLastNonDebugInstr();
139     if (Last == MBB.end() || !Last->isUnconditionalBranch())
140       continue;
141     MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last);
142     uint64_t BlockDistance = static_cast<uint64_t>(
143         LongBranchFactor * BlockInfo[DestBB->getNumber()].Offset);
144     // If the distance falls outside the threshold assume it is a long branch
145     // and we need to reserve the registers
146     if (!TII->isBranchOffsetInRange(Last->getOpcode(), BlockDistance)) {
147       MFI->setLongBranchReservedReg(LongBranchReservedReg);
148       return true;
149     }
150   }
151   return false;
152 }
153 
154 PreservedAnalyses
155 GCNPreRALongBranchRegPass::run(MachineFunction &MF,
156                                MachineFunctionAnalysisManager &MFAM) {
157   GCNPreRALongBranchReg().run(MF);
158   return PreservedAnalyses::all();
159 }
160