xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- AMDGPUMCResourceInfo.cpp --- MC Resource Info ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief MC infrastructure to propagate the function level resource usage
11 /// info.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUMCResourceInfo.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/MC/MCAsmInfo.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCSymbol.h"
21 #include "llvm/Target/TargetMachine.h"
22 
23 #define DEBUG_TYPE "amdgpu-mc-resource-usage"
24 
25 using namespace llvm;
26 
getSymbol(StringRef FuncName,ResourceInfoKind RIK,MCContext & OutContext,bool IsLocal)27 MCSymbol *MCResourceInfo::getSymbol(StringRef FuncName, ResourceInfoKind RIK,
28                                     MCContext &OutContext, bool IsLocal) {
29   auto GOCS = [FuncName, &OutContext, IsLocal](StringRef Suffix) {
30     StringRef Prefix =
31         IsLocal ? OutContext.getAsmInfo()->getPrivateGlobalPrefix() : "";
32     return OutContext.getOrCreateSymbol(Twine(Prefix) + FuncName +
33                                         Twine(Suffix));
34   };
35   switch (RIK) {
36   case RIK_NumVGPR:
37     return GOCS(".num_vgpr");
38   case RIK_NumAGPR:
39     return GOCS(".num_agpr");
40   case RIK_NumSGPR:
41     return GOCS(".numbered_sgpr");
42   case RIK_PrivateSegSize:
43     return GOCS(".private_seg_size");
44   case RIK_UsesVCC:
45     return GOCS(".uses_vcc");
46   case RIK_UsesFlatScratch:
47     return GOCS(".uses_flat_scratch");
48   case RIK_HasDynSizedStack:
49     return GOCS(".has_dyn_sized_stack");
50   case RIK_HasRecursion:
51     return GOCS(".has_recursion");
52   case RIK_HasIndirectCall:
53     return GOCS(".has_indirect_call");
54   }
55   llvm_unreachable("Unexpected ResourceInfoKind.");
56 }
57 
getSymRefExpr(StringRef FuncName,ResourceInfoKind RIK,MCContext & Ctx,bool IsLocal)58 const MCExpr *MCResourceInfo::getSymRefExpr(StringRef FuncName,
59                                             ResourceInfoKind RIK,
60                                             MCContext &Ctx, bool IsLocal) {
61   return MCSymbolRefExpr::create(getSymbol(FuncName, RIK, Ctx, IsLocal), Ctx);
62 }
63 
assignMaxRegs(MCContext & OutContext)64 void MCResourceInfo::assignMaxRegs(MCContext &OutContext) {
65   // Assign expression to get the max register use to the max_num_Xgpr symbol.
66   MCSymbol *MaxVGPRSym = getMaxVGPRSymbol(OutContext);
67   MCSymbol *MaxAGPRSym = getMaxAGPRSymbol(OutContext);
68   MCSymbol *MaxSGPRSym = getMaxSGPRSymbol(OutContext);
69 
70   auto assignMaxRegSym = [&OutContext](MCSymbol *Sym, int32_t RegCount) {
71     const MCExpr *MaxExpr = MCConstantExpr::create(RegCount, OutContext);
72     Sym->setVariableValue(MaxExpr);
73   };
74 
75   assignMaxRegSym(MaxVGPRSym, MaxVGPR);
76   assignMaxRegSym(MaxAGPRSym, MaxAGPR);
77   assignMaxRegSym(MaxSGPRSym, MaxSGPR);
78 }
79 
reset()80 void MCResourceInfo::reset() { *this = MCResourceInfo(); }
81 
finalize(MCContext & OutContext)82 void MCResourceInfo::finalize(MCContext &OutContext) {
83   assert(!Finalized && "Cannot finalize ResourceInfo again.");
84   Finalized = true;
85   assignMaxRegs(OutContext);
86 }
87 
getMaxVGPRSymbol(MCContext & OutContext)88 MCSymbol *MCResourceInfo::getMaxVGPRSymbol(MCContext &OutContext) {
89   return OutContext.getOrCreateSymbol("amdgpu.max_num_vgpr");
90 }
91 
getMaxAGPRSymbol(MCContext & OutContext)92 MCSymbol *MCResourceInfo::getMaxAGPRSymbol(MCContext &OutContext) {
93   return OutContext.getOrCreateSymbol("amdgpu.max_num_agpr");
94 }
95 
getMaxSGPRSymbol(MCContext & OutContext)96 MCSymbol *MCResourceInfo::getMaxSGPRSymbol(MCContext &OutContext) {
97   return OutContext.getOrCreateSymbol("amdgpu.max_num_sgpr");
98 }
99 
100 // Tries to flatten recursive call register resource gathering. Simple cycle
101 // avoiding dfs to find the constants in the propagated symbols.
102 // Assumes:
103 // - RecSym has been confirmed to recurse (this means the callee symbols should
104 //   all be populated, started at RecSym).
105 // - Shape of the resource symbol's MCExpr (`max` args are order agnostic):
106 //   RecSym.MCExpr := max(<constant>+, <callee_symbol>*)
flattenedCycleMax(MCSymbol * RecSym,ResourceInfoKind RIK,MCContext & OutContext)107 const MCExpr *MCResourceInfo::flattenedCycleMax(MCSymbol *RecSym,
108                                                 ResourceInfoKind RIK,
109                                                 MCContext &OutContext) {
110   SmallPtrSet<const MCExpr *, 8> Seen;
111   SmallVector<const MCExpr *, 8> WorkList;
112   int64_t Maximum = 0;
113 
114   const MCExpr *RecExpr = RecSym->getVariableValue();
115   WorkList.push_back(RecExpr);
116 
117   while (!WorkList.empty()) {
118     const MCExpr *CurExpr = WorkList.pop_back_val();
119     switch (CurExpr->getKind()) {
120     default: {
121       // Assuming the recursion is of shape `max(<constant>, <callee_symbol>)`
122       // where <callee_symbol> will eventually recurse. If this condition holds,
123       // the recursion occurs within some other (possibly unresolvable) MCExpr,
124       // thus using the worst case value then.
125       if (!AMDGPUMCExpr::isSymbolUsedInExpression(RecSym, CurExpr)) {
126         LLVM_DEBUG(dbgs() << "MCResUse:   " << RecSym->getName()
127                           << ": Recursion in unexpected sub-expression, using "
128                              "module maximum\n");
129         switch (RIK) {
130         default:
131           break;
132         case RIK_NumVGPR:
133           return MCSymbolRefExpr::create(getMaxVGPRSymbol(OutContext),
134                                          OutContext);
135           break;
136         case RIK_NumSGPR:
137           return MCSymbolRefExpr::create(getMaxSGPRSymbol(OutContext),
138                                          OutContext);
139           break;
140         case RIK_NumAGPR:
141           return MCSymbolRefExpr::create(getMaxAGPRSymbol(OutContext),
142                                          OutContext);
143           break;
144         }
145       }
146       break;
147     }
148     case MCExpr::ExprKind::Constant: {
149       int64_t Val = cast<MCConstantExpr>(CurExpr)->getValue();
150       Maximum = std::max(Maximum, Val);
151       break;
152     }
153     case MCExpr::ExprKind::SymbolRef: {
154       const MCSymbolRefExpr *SymExpr = cast<MCSymbolRefExpr>(CurExpr);
155       const MCSymbol &SymRef = SymExpr->getSymbol();
156       if (SymRef.isVariable()) {
157         const MCExpr *SymVal = SymRef.getVariableValue();
158         if (Seen.insert(SymVal).second)
159           WorkList.push_back(SymVal);
160       }
161       break;
162     }
163     case MCExpr::ExprKind::Target: {
164       const AMDGPUMCExpr *TargetExpr = cast<AMDGPUMCExpr>(CurExpr);
165       if (TargetExpr->getKind() == AMDGPUMCExpr::VariantKind::AGVK_Max) {
166         for (auto &Arg : TargetExpr->getArgs())
167           WorkList.push_back(Arg);
168       }
169       break;
170     }
171     }
172   }
173 
174   LLVM_DEBUG(dbgs() << "MCResUse:   " << RecSym->getName()
175                     << ": Using flattened max: << " << Maximum << '\n');
176 
177   return MCConstantExpr::create(Maximum, OutContext);
178 }
179 
assignResourceInfoExpr(int64_t LocalValue,ResourceInfoKind RIK,AMDGPUMCExpr::VariantKind Kind,const MachineFunction & MF,const SmallVectorImpl<const Function * > & Callees,MCContext & OutContext)180 void MCResourceInfo::assignResourceInfoExpr(
181     int64_t LocalValue, ResourceInfoKind RIK, AMDGPUMCExpr::VariantKind Kind,
182     const MachineFunction &MF, const SmallVectorImpl<const Function *> &Callees,
183     MCContext &OutContext) {
184   const TargetMachine &TM = MF.getTarget();
185   bool IsLocal = MF.getFunction().hasLocalLinkage();
186   MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
187   const MCConstantExpr *LocalConstExpr =
188       MCConstantExpr::create(LocalValue, OutContext);
189   const MCExpr *SymVal = LocalConstExpr;
190   MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext, IsLocal);
191   LLVM_DEBUG(dbgs() << "MCResUse:   " << Sym->getName() << ": Adding "
192                     << LocalValue << " as function local usage\n");
193   if (!Callees.empty()) {
194     SmallVector<const MCExpr *, 8> ArgExprs;
195     SmallPtrSet<const Function *, 8> Seen;
196     ArgExprs.push_back(LocalConstExpr);
197 
198     for (const Function *Callee : Callees) {
199       if (!Seen.insert(Callee).second)
200         continue;
201 
202       bool IsCalleeLocal = Callee->hasLocalLinkage();
203       MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
204       MCSymbol *CalleeValSym =
205           getSymbol(CalleeFnSym->getName(), RIK, OutContext, IsCalleeLocal);
206 
207       // Avoid constructing recursive definitions by detecting whether `Sym` is
208       // found transitively within any of its `CalleeValSym`.
209       if (!CalleeValSym->isVariable() ||
210           !AMDGPUMCExpr::isSymbolUsedInExpression(
211               Sym, CalleeValSym->getVariableValue())) {
212         LLVM_DEBUG(dbgs() << "MCResUse:   " << Sym->getName() << ": Adding "
213                           << CalleeValSym->getName() << " as callee\n");
214         ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
215       } else {
216         LLVM_DEBUG(dbgs() << "MCResUse:   " << Sym->getName()
217                           << ": Recursion found, attempt flattening of cycle "
218                              "for resource usage\n");
219         // In case of recursion for vgpr/sgpr/agpr resource usage: try to
220         // flatten and use the max of the call cycle. May still end up emitting
221         // module max if not fully resolvable.
222         switch (RIK) {
223         default:
224           break;
225         case RIK_NumVGPR:
226         case RIK_NumSGPR:
227         case RIK_NumAGPR:
228           ArgExprs.push_back(flattenedCycleMax(CalleeValSym, RIK, OutContext));
229           break;
230         }
231       }
232     }
233     if (ArgExprs.size() > 1)
234       SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
235   }
236   Sym->setVariableValue(SymVal);
237 }
238 
gatherResourceInfo(const MachineFunction & MF,const AMDGPUResourceUsageAnalysisWrapperPass::FunctionResourceInfo & FRI,MCContext & OutContext)239 void MCResourceInfo::gatherResourceInfo(
240     const MachineFunction &MF,
241     const AMDGPUResourceUsageAnalysisWrapperPass::FunctionResourceInfo &FRI,
242     MCContext &OutContext) {
243   // Worst case VGPR use for non-hardware-entrypoints.
244   MCSymbol *MaxVGPRSym = getMaxVGPRSymbol(OutContext);
245   MCSymbol *MaxAGPRSym = getMaxAGPRSymbol(OutContext);
246   MCSymbol *MaxSGPRSym = getMaxSGPRSymbol(OutContext);
247   bool IsLocal = MF.getFunction().hasLocalLinkage();
248 
249   if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())) {
250     addMaxVGPRCandidate(FRI.NumVGPR);
251     addMaxAGPRCandidate(FRI.NumAGPR);
252     addMaxSGPRCandidate(FRI.NumExplicitSGPR);
253   }
254 
255   const TargetMachine &TM = MF.getTarget();
256   MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
257 
258   LLVM_DEBUG(dbgs() << "MCResUse: Gathering resource information for "
259                     << FnSym->getName() << '\n');
260   LLVM_DEBUG({
261     if (!FRI.Callees.empty()) {
262       dbgs() << "MCResUse: Callees:\n";
263       for (const Function *Callee : FRI.Callees) {
264         MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
265         dbgs() << "MCResUse:   " << CalleeFnSym->getName() << '\n';
266       }
267     }
268   });
269 
270   auto SetMaxReg = [&](MCSymbol *MaxSym, int32_t numRegs,
271                        ResourceInfoKind RIK) {
272     if (!FRI.HasIndirectCall) {
273       assignResourceInfoExpr(numRegs, RIK, AMDGPUMCExpr::AGVK_Max, MF,
274                              FRI.Callees, OutContext);
275     } else {
276       const MCExpr *SymRef = MCSymbolRefExpr::create(MaxSym, OutContext);
277       MCSymbol *LocalNumSym =
278           getSymbol(FnSym->getName(), RIK, OutContext, IsLocal);
279       const MCExpr *MaxWithLocal = AMDGPUMCExpr::createMax(
280           {MCConstantExpr::create(numRegs, OutContext), SymRef}, OutContext);
281       LocalNumSym->setVariableValue(MaxWithLocal);
282       LLVM_DEBUG(dbgs() << "MCResUse:   " << LocalNumSym->getName()
283                         << ": Indirect callee within, using module maximum\n");
284     }
285   };
286 
287   LLVM_DEBUG(dbgs() << "MCResUse: " << FnSym->getName() << '\n');
288   SetMaxReg(MaxVGPRSym, FRI.NumVGPR, RIK_NumVGPR);
289   SetMaxReg(MaxAGPRSym, FRI.NumAGPR, RIK_NumAGPR);
290   SetMaxReg(MaxSGPRSym, FRI.NumExplicitSGPR, RIK_NumSGPR);
291 
292   {
293     // The expression for private segment size should be: FRI.PrivateSegmentSize
294     // + max(FRI.Callees, FRI.CalleeSegmentSize)
295     SmallVector<const MCExpr *, 8> ArgExprs;
296     MCSymbol *Sym =
297         getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext, IsLocal);
298     if (FRI.CalleeSegmentSize) {
299       LLVM_DEBUG(dbgs() << "MCResUse:   " << Sym->getName() << ": Adding "
300                         << FRI.CalleeSegmentSize
301                         << " for indirect/recursive callees within\n");
302       ArgExprs.push_back(
303           MCConstantExpr::create(FRI.CalleeSegmentSize, OutContext));
304     }
305 
306     SmallPtrSet<const Function *, 8> Seen;
307     Seen.insert(&MF.getFunction());
308     for (const Function *Callee : FRI.Callees) {
309       if (!Seen.insert(Callee).second)
310         continue;
311       if (!Callee->isDeclaration()) {
312         bool IsCalleeLocal = Callee->hasLocalLinkage();
313         MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
314         MCSymbol *CalleeValSym =
315             getSymbol(CalleeFnSym->getName(), RIK_PrivateSegSize, OutContext,
316                       IsCalleeLocal);
317 
318         // Avoid constructing recursive definitions by detecting whether `Sym`
319         // is found transitively within any of its `CalleeValSym`.
320         if (!CalleeValSym->isVariable() ||
321             !AMDGPUMCExpr::isSymbolUsedInExpression(
322                 Sym, CalleeValSym->getVariableValue())) {
323           LLVM_DEBUG(dbgs() << "MCResUse:   " << Sym->getName() << ": Adding "
324                             << CalleeValSym->getName() << " as callee\n");
325           ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
326         }
327       }
328     }
329     const MCExpr *localConstExpr =
330         MCConstantExpr::create(FRI.PrivateSegmentSize, OutContext);
331     LLVM_DEBUG(dbgs() << "MCResUse:   " << Sym->getName() << ": Adding "
332                       << FRI.PrivateSegmentSize
333                       << " as function local usage\n");
334     if (!ArgExprs.empty()) {
335       const AMDGPUMCExpr *transitiveExpr =
336           AMDGPUMCExpr::createMax(ArgExprs, OutContext);
337       localConstExpr =
338           MCBinaryExpr::createAdd(localConstExpr, transitiveExpr, OutContext);
339     }
340     Sym->setVariableValue(localConstExpr);
341   }
342 
343   auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {
344     MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext, IsLocal);
345     LLVM_DEBUG(
346         dbgs() << "MCResUse:   " << Sym->getName() << ": Adding " << LocalValue
347                << ", no further propagation as indirect callee found within\n");
348     Sym->setVariableValue(MCConstantExpr::create(LocalValue, OutContext));
349   };
350 
351   if (!FRI.HasIndirectCall) {
352     assignResourceInfoExpr(FRI.UsesVCC, ResourceInfoKind::RIK_UsesVCC,
353                            AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext);
354     assignResourceInfoExpr(FRI.UsesFlatScratch,
355                            ResourceInfoKind::RIK_UsesFlatScratch,
356                            AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext);
357     assignResourceInfoExpr(FRI.HasDynamicallySizedStack,
358                            ResourceInfoKind::RIK_HasDynSizedStack,
359                            AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext);
360     assignResourceInfoExpr(FRI.HasRecursion, ResourceInfoKind::RIK_HasRecursion,
361                            AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext);
362     assignResourceInfoExpr(FRI.HasIndirectCall,
363                            ResourceInfoKind::RIK_HasIndirectCall,
364                            AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext);
365   } else {
366     SetToLocal(FRI.UsesVCC, ResourceInfoKind::RIK_UsesVCC);
367     SetToLocal(FRI.UsesFlatScratch, ResourceInfoKind::RIK_UsesFlatScratch);
368     SetToLocal(FRI.HasDynamicallySizedStack,
369                ResourceInfoKind::RIK_HasDynSizedStack);
370     SetToLocal(FRI.HasRecursion, ResourceInfoKind::RIK_HasRecursion);
371     SetToLocal(FRI.HasIndirectCall, ResourceInfoKind::RIK_HasIndirectCall);
372   }
373 }
374 
createTotalNumVGPRs(const MachineFunction & MF,MCContext & Ctx)375 const MCExpr *MCResourceInfo::createTotalNumVGPRs(const MachineFunction &MF,
376                                                   MCContext &Ctx) {
377   const TargetMachine &TM = MF.getTarget();
378   MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
379   bool IsLocal = MF.getFunction().hasLocalLinkage();
380   return AMDGPUMCExpr::createTotalNumVGPR(
381       getSymRefExpr(FnSym->getName(), RIK_NumAGPR, Ctx, IsLocal),
382       getSymRefExpr(FnSym->getName(), RIK_NumVGPR, Ctx, IsLocal), Ctx);
383 }
384 
createTotalNumSGPRs(const MachineFunction & MF,bool hasXnack,MCContext & Ctx)385 const MCExpr *MCResourceInfo::createTotalNumSGPRs(const MachineFunction &MF,
386                                                   bool hasXnack,
387                                                   MCContext &Ctx) {
388   const TargetMachine &TM = MF.getTarget();
389   MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
390   bool IsLocal = MF.getFunction().hasLocalLinkage();
391   return MCBinaryExpr::createAdd(
392       getSymRefExpr(FnSym->getName(), RIK_NumSGPR, Ctx, IsLocal),
393       AMDGPUMCExpr::createExtraSGPRs(
394           getSymRefExpr(FnSym->getName(), RIK_UsesVCC, Ctx, IsLocal),
395           getSymRefExpr(FnSym->getName(), RIK_UsesFlatScratch, Ctx, IsLocal),
396           hasXnack, Ctx),
397       Ctx);
398 }
399