1 //===- AMDGPUMCResourceInfo.cpp --- MC Resource Info ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// \brief MC infrastructure to propagate the function level resource usage 11 /// info. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUMCResourceInfo.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/MC/MCAsmInfo.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCSymbol.h" 21 #include "llvm/Target/TargetMachine.h" 22 23 #define DEBUG_TYPE "amdgpu-mc-resource-usage" 24 25 using namespace llvm; 26 27 MCSymbol *MCResourceInfo::getSymbol(StringRef FuncName, ResourceInfoKind RIK, 28 MCContext &OutContext, bool IsLocal) { 29 auto GOCS = [FuncName, &OutContext, IsLocal](StringRef Suffix) { 30 StringRef Prefix = 31 IsLocal ? OutContext.getAsmInfo()->getPrivateGlobalPrefix() : ""; 32 return OutContext.getOrCreateSymbol(Twine(Prefix) + FuncName + 33 Twine(Suffix)); 34 }; 35 switch (RIK) { 36 case RIK_NumVGPR: 37 return GOCS(".num_vgpr"); 38 case RIK_NumAGPR: 39 return GOCS(".num_agpr"); 40 case RIK_NumSGPR: 41 return GOCS(".numbered_sgpr"); 42 case RIK_PrivateSegSize: 43 return GOCS(".private_seg_size"); 44 case RIK_UsesVCC: 45 return GOCS(".uses_vcc"); 46 case RIK_UsesFlatScratch: 47 return GOCS(".uses_flat_scratch"); 48 case RIK_HasDynSizedStack: 49 return GOCS(".has_dyn_sized_stack"); 50 case RIK_HasRecursion: 51 return GOCS(".has_recursion"); 52 case RIK_HasIndirectCall: 53 return GOCS(".has_indirect_call"); 54 } 55 llvm_unreachable("Unexpected ResourceInfoKind."); 56 } 57 58 const MCExpr *MCResourceInfo::getSymRefExpr(StringRef FuncName, 59 ResourceInfoKind RIK, 60 MCContext &Ctx, bool IsLocal) { 61 return MCSymbolRefExpr::create(getSymbol(FuncName, RIK, Ctx, IsLocal), Ctx); 62 } 63 64 void MCResourceInfo::assignMaxRegs(MCContext &OutContext) { 65 // Assign expression to get the max register use to the max_num_Xgpr symbol. 66 MCSymbol *MaxVGPRSym = getMaxVGPRSymbol(OutContext); 67 MCSymbol *MaxAGPRSym = getMaxAGPRSymbol(OutContext); 68 MCSymbol *MaxSGPRSym = getMaxSGPRSymbol(OutContext); 69 70 auto assignMaxRegSym = [&OutContext](MCSymbol *Sym, int32_t RegCount) { 71 const MCExpr *MaxExpr = MCConstantExpr::create(RegCount, OutContext); 72 Sym->setVariableValue(MaxExpr); 73 }; 74 75 assignMaxRegSym(MaxVGPRSym, MaxVGPR); 76 assignMaxRegSym(MaxAGPRSym, MaxAGPR); 77 assignMaxRegSym(MaxSGPRSym, MaxSGPR); 78 } 79 80 void MCResourceInfo::reset() { *this = MCResourceInfo(); } 81 82 void MCResourceInfo::finalize(MCContext &OutContext) { 83 assert(!Finalized && "Cannot finalize ResourceInfo again."); 84 Finalized = true; 85 assignMaxRegs(OutContext); 86 } 87 88 MCSymbol *MCResourceInfo::getMaxVGPRSymbol(MCContext &OutContext) { 89 return OutContext.getOrCreateSymbol("amdgpu.max_num_vgpr"); 90 } 91 92 MCSymbol *MCResourceInfo::getMaxAGPRSymbol(MCContext &OutContext) { 93 return OutContext.getOrCreateSymbol("amdgpu.max_num_agpr"); 94 } 95 96 MCSymbol *MCResourceInfo::getMaxSGPRSymbol(MCContext &OutContext) { 97 return OutContext.getOrCreateSymbol("amdgpu.max_num_sgpr"); 98 } 99 100 // Tries to flatten recursive call register resource gathering. Simple cycle 101 // avoiding dfs to find the constants in the propagated symbols. 102 // Assumes: 103 // - RecSym has been confirmed to recurse (this means the callee symbols should 104 // all be populated, started at RecSym). 105 // - Shape of the resource symbol's MCExpr (`max` args are order agnostic): 106 // RecSym.MCExpr := max(<constant>+, <callee_symbol>*) 107 const MCExpr *MCResourceInfo::flattenedCycleMax(MCSymbol *RecSym, 108 ResourceInfoKind RIK, 109 MCContext &OutContext) { 110 SmallPtrSet<const MCExpr *, 8> Seen; 111 SmallVector<const MCExpr *, 8> WorkList; 112 int64_t Maximum = 0; 113 114 const MCExpr *RecExpr = RecSym->getVariableValue(); 115 WorkList.push_back(RecExpr); 116 117 while (!WorkList.empty()) { 118 const MCExpr *CurExpr = WorkList.pop_back_val(); 119 switch (CurExpr->getKind()) { 120 default: { 121 // Assuming the recursion is of shape `max(<constant>, <callee_symbol>)` 122 // where <callee_symbol> will eventually recurse. If this condition holds, 123 // the recursion occurs within some other (possibly unresolvable) MCExpr, 124 // thus using the worst case value then. 125 if (!AMDGPUMCExpr::isSymbolUsedInExpression(RecSym, CurExpr)) { 126 LLVM_DEBUG(dbgs() << "MCResUse: " << RecSym->getName() 127 << ": Recursion in unexpected sub-expression, using " 128 "module maximum\n"); 129 switch (RIK) { 130 default: 131 break; 132 case RIK_NumVGPR: 133 return MCSymbolRefExpr::create(getMaxVGPRSymbol(OutContext), 134 OutContext); 135 break; 136 case RIK_NumSGPR: 137 return MCSymbolRefExpr::create(getMaxSGPRSymbol(OutContext), 138 OutContext); 139 break; 140 case RIK_NumAGPR: 141 return MCSymbolRefExpr::create(getMaxAGPRSymbol(OutContext), 142 OutContext); 143 break; 144 } 145 } 146 break; 147 } 148 case MCExpr::ExprKind::Constant: { 149 int64_t Val = cast<MCConstantExpr>(CurExpr)->getValue(); 150 Maximum = std::max(Maximum, Val); 151 break; 152 } 153 case MCExpr::ExprKind::SymbolRef: { 154 const MCSymbolRefExpr *SymExpr = cast<MCSymbolRefExpr>(CurExpr); 155 const MCSymbol &SymRef = SymExpr->getSymbol(); 156 if (SymRef.isVariable()) { 157 const MCExpr *SymVal = SymRef.getVariableValue(); 158 if (Seen.insert(SymVal).second) 159 WorkList.push_back(SymVal); 160 } 161 break; 162 } 163 case MCExpr::ExprKind::Target: { 164 const AMDGPUMCExpr *TargetExpr = cast<AMDGPUMCExpr>(CurExpr); 165 if (TargetExpr->getKind() == AMDGPUMCExpr::VariantKind::AGVK_Max) { 166 for (auto &Arg : TargetExpr->getArgs()) 167 WorkList.push_back(Arg); 168 } 169 break; 170 } 171 } 172 } 173 174 LLVM_DEBUG(dbgs() << "MCResUse: " << RecSym->getName() 175 << ": Using flattened max: << " << Maximum << '\n'); 176 177 return MCConstantExpr::create(Maximum, OutContext); 178 } 179 180 void MCResourceInfo::assignResourceInfoExpr( 181 int64_t LocalValue, ResourceInfoKind RIK, AMDGPUMCExpr::VariantKind Kind, 182 const MachineFunction &MF, const SmallVectorImpl<const Function *> &Callees, 183 MCContext &OutContext) { 184 const TargetMachine &TM = MF.getTarget(); 185 bool IsLocal = MF.getFunction().hasLocalLinkage(); 186 MCSymbol *FnSym = TM.getSymbol(&MF.getFunction()); 187 const MCConstantExpr *LocalConstExpr = 188 MCConstantExpr::create(LocalValue, OutContext); 189 const MCExpr *SymVal = LocalConstExpr; 190 MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext, IsLocal); 191 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding " 192 << LocalValue << " as function local usage\n"); 193 if (!Callees.empty()) { 194 SmallVector<const MCExpr *, 8> ArgExprs; 195 SmallPtrSet<const Function *, 8> Seen; 196 ArgExprs.push_back(LocalConstExpr); 197 198 for (const Function *Callee : Callees) { 199 if (!Seen.insert(Callee).second) 200 continue; 201 202 bool IsCalleeLocal = Callee->hasLocalLinkage(); 203 MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction()); 204 MCSymbol *CalleeValSym = 205 getSymbol(CalleeFnSym->getName(), RIK, OutContext, IsCalleeLocal); 206 207 // Avoid constructing recursive definitions by detecting whether `Sym` is 208 // found transitively within any of its `CalleeValSym`. 209 if (!CalleeValSym->isVariable() || 210 !AMDGPUMCExpr::isSymbolUsedInExpression( 211 Sym, CalleeValSym->getVariableValue())) { 212 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding " 213 << CalleeValSym->getName() << " as callee\n"); 214 ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext)); 215 } else { 216 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() 217 << ": Recursion found, attempt flattening of cycle " 218 "for resource usage\n"); 219 // In case of recursion for vgpr/sgpr/agpr resource usage: try to 220 // flatten and use the max of the call cycle. May still end up emitting 221 // module max if not fully resolvable. 222 switch (RIK) { 223 default: 224 break; 225 case RIK_NumVGPR: 226 case RIK_NumSGPR: 227 case RIK_NumAGPR: 228 ArgExprs.push_back(flattenedCycleMax(CalleeValSym, RIK, OutContext)); 229 break; 230 } 231 } 232 } 233 if (ArgExprs.size() > 1) 234 SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext); 235 } 236 Sym->setVariableValue(SymVal); 237 } 238 239 void MCResourceInfo::gatherResourceInfo( 240 const MachineFunction &MF, 241 const AMDGPUResourceUsageAnalysisWrapperPass::FunctionResourceInfo &FRI, 242 MCContext &OutContext) { 243 // Worst case VGPR use for non-hardware-entrypoints. 244 MCSymbol *MaxVGPRSym = getMaxVGPRSymbol(OutContext); 245 MCSymbol *MaxAGPRSym = getMaxAGPRSymbol(OutContext); 246 MCSymbol *MaxSGPRSym = getMaxSGPRSymbol(OutContext); 247 bool IsLocal = MF.getFunction().hasLocalLinkage(); 248 249 if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())) { 250 addMaxVGPRCandidate(FRI.NumVGPR); 251 addMaxAGPRCandidate(FRI.NumAGPR); 252 addMaxSGPRCandidate(FRI.NumExplicitSGPR); 253 } 254 255 const TargetMachine &TM = MF.getTarget(); 256 MCSymbol *FnSym = TM.getSymbol(&MF.getFunction()); 257 258 LLVM_DEBUG(dbgs() << "MCResUse: Gathering resource information for " 259 << FnSym->getName() << '\n'); 260 LLVM_DEBUG({ 261 if (!FRI.Callees.empty()) { 262 dbgs() << "MCResUse: Callees:\n"; 263 for (const Function *Callee : FRI.Callees) { 264 MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction()); 265 dbgs() << "MCResUse: " << CalleeFnSym->getName() << '\n'; 266 } 267 } 268 }); 269 270 auto SetMaxReg = [&](MCSymbol *MaxSym, int32_t numRegs, 271 ResourceInfoKind RIK) { 272 if (!FRI.HasIndirectCall) { 273 assignResourceInfoExpr(numRegs, RIK, AMDGPUMCExpr::AGVK_Max, MF, 274 FRI.Callees, OutContext); 275 } else { 276 const MCExpr *SymRef = MCSymbolRefExpr::create(MaxSym, OutContext); 277 MCSymbol *LocalNumSym = 278 getSymbol(FnSym->getName(), RIK, OutContext, IsLocal); 279 const MCExpr *MaxWithLocal = AMDGPUMCExpr::createMax( 280 {MCConstantExpr::create(numRegs, OutContext), SymRef}, OutContext); 281 LocalNumSym->setVariableValue(MaxWithLocal); 282 LLVM_DEBUG(dbgs() << "MCResUse: " << LocalNumSym->getName() 283 << ": Indirect callee within, using module maximum\n"); 284 } 285 }; 286 287 LLVM_DEBUG(dbgs() << "MCResUse: " << FnSym->getName() << '\n'); 288 SetMaxReg(MaxVGPRSym, FRI.NumVGPR, RIK_NumVGPR); 289 SetMaxReg(MaxAGPRSym, FRI.NumAGPR, RIK_NumAGPR); 290 SetMaxReg(MaxSGPRSym, FRI.NumExplicitSGPR, RIK_NumSGPR); 291 292 { 293 // The expression for private segment size should be: FRI.PrivateSegmentSize 294 // + max(FRI.Callees, FRI.CalleeSegmentSize) 295 SmallVector<const MCExpr *, 8> ArgExprs; 296 MCSymbol *Sym = 297 getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext, IsLocal); 298 if (FRI.CalleeSegmentSize) { 299 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding " 300 << FRI.CalleeSegmentSize 301 << " for indirect/recursive callees within\n"); 302 ArgExprs.push_back( 303 MCConstantExpr::create(FRI.CalleeSegmentSize, OutContext)); 304 } 305 306 SmallPtrSet<const Function *, 8> Seen; 307 Seen.insert(&MF.getFunction()); 308 for (const Function *Callee : FRI.Callees) { 309 if (!Seen.insert(Callee).second) 310 continue; 311 if (!Callee->isDeclaration()) { 312 bool IsCalleeLocal = Callee->hasLocalLinkage(); 313 MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction()); 314 MCSymbol *CalleeValSym = 315 getSymbol(CalleeFnSym->getName(), RIK_PrivateSegSize, OutContext, 316 IsCalleeLocal); 317 318 // Avoid constructing recursive definitions by detecting whether `Sym` 319 // is found transitively within any of its `CalleeValSym`. 320 if (!CalleeValSym->isVariable() || 321 !AMDGPUMCExpr::isSymbolUsedInExpression( 322 Sym, CalleeValSym->getVariableValue())) { 323 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding " 324 << CalleeValSym->getName() << " as callee\n"); 325 ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext)); 326 } 327 } 328 } 329 const MCExpr *localConstExpr = 330 MCConstantExpr::create(FRI.PrivateSegmentSize, OutContext); 331 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding " 332 << FRI.PrivateSegmentSize 333 << " as function local usage\n"); 334 if (!ArgExprs.empty()) { 335 const AMDGPUMCExpr *transitiveExpr = 336 AMDGPUMCExpr::createMax(ArgExprs, OutContext); 337 localConstExpr = 338 MCBinaryExpr::createAdd(localConstExpr, transitiveExpr, OutContext); 339 } 340 Sym->setVariableValue(localConstExpr); 341 } 342 343 auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) { 344 MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext, IsLocal); 345 LLVM_DEBUG( 346 dbgs() << "MCResUse: " << Sym->getName() << ": Adding " << LocalValue 347 << ", no further propagation as indirect callee found within\n"); 348 Sym->setVariableValue(MCConstantExpr::create(LocalValue, OutContext)); 349 }; 350 351 if (!FRI.HasIndirectCall) { 352 assignResourceInfoExpr(FRI.UsesVCC, ResourceInfoKind::RIK_UsesVCC, 353 AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext); 354 assignResourceInfoExpr(FRI.UsesFlatScratch, 355 ResourceInfoKind::RIK_UsesFlatScratch, 356 AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext); 357 assignResourceInfoExpr(FRI.HasDynamicallySizedStack, 358 ResourceInfoKind::RIK_HasDynSizedStack, 359 AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext); 360 assignResourceInfoExpr(FRI.HasRecursion, ResourceInfoKind::RIK_HasRecursion, 361 AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext); 362 assignResourceInfoExpr(FRI.HasIndirectCall, 363 ResourceInfoKind::RIK_HasIndirectCall, 364 AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext); 365 } else { 366 SetToLocal(FRI.UsesVCC, ResourceInfoKind::RIK_UsesVCC); 367 SetToLocal(FRI.UsesFlatScratch, ResourceInfoKind::RIK_UsesFlatScratch); 368 SetToLocal(FRI.HasDynamicallySizedStack, 369 ResourceInfoKind::RIK_HasDynSizedStack); 370 SetToLocal(FRI.HasRecursion, ResourceInfoKind::RIK_HasRecursion); 371 SetToLocal(FRI.HasIndirectCall, ResourceInfoKind::RIK_HasIndirectCall); 372 } 373 } 374 375 const MCExpr *MCResourceInfo::createTotalNumVGPRs(const MachineFunction &MF, 376 MCContext &Ctx) { 377 const TargetMachine &TM = MF.getTarget(); 378 MCSymbol *FnSym = TM.getSymbol(&MF.getFunction()); 379 bool IsLocal = MF.getFunction().hasLocalLinkage(); 380 return AMDGPUMCExpr::createTotalNumVGPR( 381 getSymRefExpr(FnSym->getName(), RIK_NumAGPR, Ctx, IsLocal), 382 getSymRefExpr(FnSym->getName(), RIK_NumVGPR, Ctx, IsLocal), Ctx); 383 } 384 385 const MCExpr *MCResourceInfo::createTotalNumSGPRs(const MachineFunction &MF, 386 bool hasXnack, 387 MCContext &Ctx) { 388 const TargetMachine &TM = MF.getTarget(); 389 MCSymbol *FnSym = TM.getSymbol(&MF.getFunction()); 390 bool IsLocal = MF.getFunction().hasLocalLinkage(); 391 return MCBinaryExpr::createAdd( 392 getSymRefExpr(FnSym->getName(), RIK_NumSGPR, Ctx, IsLocal), 393 AMDGPUMCExpr::createExtraSGPRs( 394 getSymRefExpr(FnSym->getName(), RIK_UsesVCC, Ctx, IsLocal), 395 getSymRefExpr(FnSym->getName(), RIK_UsesFlatScratch, Ctx, IsLocal), 396 hasXnack, Ctx), 397 Ctx); 398 } 399