1 //===- AMDGPUAttributor.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "GCNSubtarget.h" 15 #include "llvm/CodeGen/TargetPassConfig.h" 16 #include "llvm/IR/IntrinsicsAMDGPU.h" 17 #include "llvm/IR/IntrinsicsR600.h" 18 #include "llvm/Target/TargetMachine.h" 19 #include "llvm/Transforms/IPO/Attributor.h" 20 21 #define DEBUG_TYPE "amdgpu-attributor" 22 23 using namespace llvm; 24 25 static constexpr StringLiteral ImplicitAttrNames[] = { 26 // X ids unnecessarily propagated to kernels. 27 "amdgpu-work-item-id-x", "amdgpu-work-item-id-y", 28 "amdgpu-work-item-id-z", "amdgpu-work-group-id-x", 29 "amdgpu-work-group-id-y", "amdgpu-work-group-id-z", 30 "amdgpu-dispatch-ptr", "amdgpu-dispatch-id", 31 "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"}; 32 33 // We do not need to note the x workitem or workgroup id because they are always 34 // initialized. 35 // 36 // TODO: We should not add the attributes if the known compile time workgroup 37 // size is 1 for y/z. 38 static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly, 39 bool &IsQueuePtr) { 40 switch (ID) { 41 case Intrinsic::amdgcn_workitem_id_x: 42 NonKernelOnly = true; 43 return "amdgpu-work-item-id-x"; 44 case Intrinsic::amdgcn_workgroup_id_x: 45 NonKernelOnly = true; 46 return "amdgpu-work-group-id-x"; 47 case Intrinsic::amdgcn_workitem_id_y: 48 case Intrinsic::r600_read_tidig_y: 49 return "amdgpu-work-item-id-y"; 50 case Intrinsic::amdgcn_workitem_id_z: 51 case Intrinsic::r600_read_tidig_z: 52 return "amdgpu-work-item-id-z"; 53 case Intrinsic::amdgcn_workgroup_id_y: 54 case Intrinsic::r600_read_tgid_y: 55 return "amdgpu-work-group-id-y"; 56 case Intrinsic::amdgcn_workgroup_id_z: 57 case Intrinsic::r600_read_tgid_z: 58 return "amdgpu-work-group-id-z"; 59 case Intrinsic::amdgcn_dispatch_ptr: 60 return "amdgpu-dispatch-ptr"; 61 case Intrinsic::amdgcn_dispatch_id: 62 return "amdgpu-dispatch-id"; 63 case Intrinsic::amdgcn_kernarg_segment_ptr: 64 return "amdgpu-kernarg-segment-ptr"; 65 case Intrinsic::amdgcn_implicitarg_ptr: 66 return "amdgpu-implicitarg-ptr"; 67 case Intrinsic::amdgcn_queue_ptr: 68 case Intrinsic::amdgcn_is_shared: 69 case Intrinsic::amdgcn_is_private: 70 // TODO: Does not require queue ptr on gfx9+ 71 case Intrinsic::trap: 72 case Intrinsic::debugtrap: 73 IsQueuePtr = true; 74 return "amdgpu-queue-ptr"; 75 default: 76 return ""; 77 } 78 } 79 80 static bool castRequiresQueuePtr(unsigned SrcAS) { 81 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 82 } 83 84 static bool isDSAddress(const Constant *C) { 85 const GlobalValue *GV = dyn_cast<GlobalValue>(C); 86 if (!GV) 87 return false; 88 unsigned AS = GV->getAddressSpace(); 89 return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; 90 } 91 92 class AMDGPUInformationCache : public InformationCache { 93 public: 94 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, 95 BumpPtrAllocator &Allocator, 96 SetVector<Function *> *CGSCC, TargetMachine &TM) 97 : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {} 98 TargetMachine &TM; 99 100 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; 101 102 /// Check if the subtarget has aperture regs. 103 bool hasApertureRegs(Function &F) { 104 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 105 return ST.hasApertureRegs(); 106 } 107 108 private: 109 /// Check if the ConstantExpr \p CE requires queue ptr attribute. 110 static bool visitConstExpr(const ConstantExpr *CE) { 111 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 112 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 113 return castRequiresQueuePtr(SrcAS); 114 } 115 return false; 116 } 117 118 /// Get the constant access bitmap for \p C. 119 uint8_t getConstantAccess(const Constant *C) { 120 auto It = ConstantStatus.find(C); 121 if (It != ConstantStatus.end()) 122 return It->second; 123 124 uint8_t Result = 0; 125 if (isDSAddress(C)) 126 Result = DS_GLOBAL; 127 128 if (const auto *CE = dyn_cast<ConstantExpr>(C)) 129 if (visitConstExpr(CE)) 130 Result |= ADDR_SPACE_CAST; 131 132 for (const Use &U : C->operands()) { 133 const auto *OpC = dyn_cast<Constant>(U); 134 if (!OpC) 135 continue; 136 137 Result |= getConstantAccess(OpC); 138 } 139 return Result; 140 } 141 142 public: 143 /// Returns true if \p Fn needs a queue ptr attribute because of \p C. 144 bool needsQueuePtr(const Constant *C, Function &Fn) { 145 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()); 146 bool HasAperture = hasApertureRegs(Fn); 147 148 // No need to explore the constants. 149 if (!IsNonEntryFunc && HasAperture) 150 return false; 151 152 uint8_t Access = getConstantAccess(C); 153 154 // We need to trap on DS globals in non-entry functions. 155 if (IsNonEntryFunc && (Access & DS_GLOBAL)) 156 return true; 157 158 return !HasAperture && (Access & ADDR_SPACE_CAST); 159 } 160 161 private: 162 /// Used to determine if the Constant needs a queue ptr attribute. 163 DenseMap<const Constant *, uint8_t> ConstantStatus; 164 }; 165 166 struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> { 167 using Base = StateWrapper<BooleanState, AbstractAttribute>; 168 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 169 170 /// Create an abstract attribute view for the position \p IRP. 171 static AAAMDAttributes &createForPosition(const IRPosition &IRP, 172 Attributor &A); 173 174 /// See AbstractAttribute::getName(). 175 const std::string getName() const override { return "AAAMDAttributes"; } 176 177 /// See AbstractAttribute::getIdAddr(). 178 const char *getIdAddr() const override { return &ID; } 179 180 /// This function should return true if the type of the \p AA is 181 /// AAAMDAttributes. 182 static bool classof(const AbstractAttribute *AA) { 183 return (AA->getIdAddr() == &ID); 184 } 185 186 virtual const DenseSet<StringRef> &getAttributes() const = 0; 187 188 /// Unique ID (due to the unique address) 189 static const char ID; 190 }; 191 const char AAAMDAttributes::ID = 0; 192 193 struct AAAMDWorkGroupSize 194 : public StateWrapper<BooleanState, AbstractAttribute> { 195 using Base = StateWrapper<BooleanState, AbstractAttribute>; 196 AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 197 198 /// Create an abstract attribute view for the position \p IRP. 199 static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP, 200 Attributor &A); 201 202 /// See AbstractAttribute::getName(). 203 const std::string getName() const override { return "AAAMDWorkGroupSize"; } 204 205 /// See AbstractAttribute::getIdAddr(). 206 const char *getIdAddr() const override { return &ID; } 207 208 /// This function should return true if the type of the \p AA is 209 /// AAAMDAttributes. 210 static bool classof(const AbstractAttribute *AA) { 211 return (AA->getIdAddr() == &ID); 212 } 213 214 /// Unique ID (due to the unique address) 215 static const char ID; 216 }; 217 const char AAAMDWorkGroupSize::ID = 0; 218 219 struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize { 220 AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A) 221 : AAAMDWorkGroupSize(IRP, A) {} 222 223 void initialize(Attributor &A) override { 224 Function *F = getAssociatedFunction(); 225 CallingConv::ID CC = F->getCallingConv(); 226 227 if (CC != CallingConv::AMDGPU_KERNEL) 228 return; 229 230 bool InitialValue = false; 231 if (F->hasFnAttribute("uniform-work-group-size")) 232 InitialValue = F->getFnAttribute("uniform-work-group-size") 233 .getValueAsString() 234 .equals("true"); 235 236 if (InitialValue) 237 indicateOptimisticFixpoint(); 238 else 239 indicatePessimisticFixpoint(); 240 } 241 242 ChangeStatus updateImpl(Attributor &A) override { 243 ChangeStatus Change = ChangeStatus::UNCHANGED; 244 245 auto CheckCallSite = [&](AbstractCallSite CS) { 246 Function *Caller = CS.getInstruction()->getFunction(); 247 LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName() 248 << "->" << getAssociatedFunction()->getName() << "\n"); 249 250 const auto &CallerInfo = A.getAAFor<AAAMDWorkGroupSize>( 251 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 252 253 Change = Change | clampStateAndIndicateChange(this->getState(), 254 CallerInfo.getState()); 255 256 return true; 257 }; 258 259 bool AllCallSitesKnown = true; 260 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 261 indicatePessimisticFixpoint(); 262 263 return Change; 264 } 265 266 ChangeStatus manifest(Attributor &A) override { 267 SmallVector<Attribute, 8> AttrList; 268 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 269 270 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size", 271 getAssumed() ? "true" : "false")); 272 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 273 /* ForceReplace */ true); 274 } 275 276 bool isValidState() const override { 277 // This state is always valid, even when the state is false. 278 return true; 279 } 280 281 const std::string getAsStr() const override { 282 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]"; 283 } 284 285 /// See AbstractAttribute::trackStatistics() 286 void trackStatistics() const override {} 287 }; 288 289 AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP, 290 Attributor &A) { 291 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 292 return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A); 293 llvm_unreachable("AAAMDWorkGroupSize is only valid for function position"); 294 } 295 296 struct AAAMDAttributesFunction : public AAAMDAttributes { 297 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A) 298 : AAAMDAttributes(IRP, A) {} 299 300 void initialize(Attributor &A) override { 301 Function *F = getAssociatedFunction(); 302 CallingConv::ID CC = F->getCallingConv(); 303 bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx); 304 305 // Don't add attributes to instrinsics 306 if (F->isIntrinsic()) { 307 indicatePessimisticFixpoint(); 308 return; 309 } 310 311 // Ignore functions with graphics calling conventions, these are currently 312 // not allowed to have kernel arguments. 313 if (AMDGPU::isGraphics(F->getCallingConv())) { 314 indicatePessimisticFixpoint(); 315 return; 316 } 317 318 for (StringRef Attr : ImplicitAttrNames) { 319 if (F->hasFnAttribute(Attr)) 320 Attributes.insert(Attr); 321 } 322 323 // TODO: We shouldn't need this in the future. 324 if (CallingConvSupportsAllImplicits && 325 F->hasAddressTaken(nullptr, true, true, true)) { 326 for (StringRef AttrName : ImplicitAttrNames) { 327 Attributes.insert(AttrName); 328 } 329 } 330 } 331 332 ChangeStatus updateImpl(Attributor &A) override { 333 Function *F = getAssociatedFunction(); 334 ChangeStatus Change = ChangeStatus::UNCHANGED; 335 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 336 CallingConv::ID CC = F->getCallingConv(); 337 bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx); 338 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 339 340 auto AddAttribute = [&](StringRef AttrName) { 341 if (Attributes.insert(AttrName).second) 342 Change = ChangeStatus::CHANGED; 343 }; 344 345 // Check for Intrinsics and propagate attributes. 346 const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( 347 *this, this->getIRPosition(), DepClassTy::REQUIRED); 348 349 // We have to assume that we can reach a function with these attributes. 350 // We do not consider inline assembly as a unknown callee. 351 if (CallingConvSupportsAllImplicits && AAEdges.hasNonAsmUnknownCallee()) { 352 for (StringRef AttrName : ImplicitAttrNames) { 353 AddAttribute(AttrName); 354 } 355 } 356 357 bool NeedsQueuePtr = false; 358 bool HasCall = false; 359 for (Function *Callee : AAEdges.getOptimisticEdges()) { 360 Intrinsic::ID IID = Callee->getIntrinsicID(); 361 if (IID != Intrinsic::not_intrinsic) { 362 if (!IsNonEntryFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) { 363 AddAttribute("amdgpu-kernarg-segment-ptr"); 364 continue; 365 } 366 367 bool NonKernelOnly = false; 368 StringRef AttrName = 369 intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr); 370 371 if (!AttrName.empty() && (IsNonEntryFunc || !NonKernelOnly)) 372 AddAttribute(AttrName); 373 374 continue; 375 } 376 377 HasCall = true; 378 const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>( 379 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); 380 const DenseSet<StringRef> &CalleeAttributes = AAAMD.getAttributes(); 381 // Propagate implicit attributes from called function. 382 for (StringRef AttrName : ImplicitAttrNames) 383 if (CalleeAttributes.count(AttrName)) 384 AddAttribute(AttrName); 385 } 386 387 HasCall |= AAEdges.hasUnknownCallee(); 388 if (!IsNonEntryFunc && HasCall) 389 AddAttribute("amdgpu-calls"); 390 391 // Check the function body. 392 auto CheckAlloca = [&](Instruction &I) { 393 AddAttribute("amdgpu-stack-objects"); 394 return false; 395 }; 396 397 bool UsedAssumedInformation = false; 398 A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca}, 399 UsedAssumedInformation); 400 401 // If we found that we need amdgpu-queue-ptr, nothing else to do. 402 if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) { 403 AddAttribute("amdgpu-queue-ptr"); 404 return Change; 405 } 406 407 auto CheckAddrSpaceCasts = [&](Instruction &I) { 408 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace(); 409 if (castRequiresQueuePtr(SrcAS)) { 410 NeedsQueuePtr = true; 411 return false; 412 } 413 return true; 414 }; 415 416 bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 417 418 // `checkForAllInstructions` is much more cheaper than going through all 419 // instructions, try it first. 420 421 // amdgpu-queue-ptr is not needed if aperture regs is present. 422 if (!HasApertureRegs) 423 A.checkForAllInstructions(CheckAddrSpaceCasts, *this, 424 {Instruction::AddrSpaceCast}, 425 UsedAssumedInformation); 426 427 // If we found that we need amdgpu-queue-ptr, nothing else to do. 428 if (NeedsQueuePtr) { 429 AddAttribute("amdgpu-queue-ptr"); 430 return Change; 431 } 432 433 if (!IsNonEntryFunc && HasApertureRegs) 434 return Change; 435 436 for (BasicBlock &BB : *F) { 437 for (Instruction &I : BB) { 438 for (const Use &U : I.operands()) { 439 if (const auto *C = dyn_cast<Constant>(U)) { 440 if (InfoCache.needsQueuePtr(C, *F)) { 441 AddAttribute("amdgpu-queue-ptr"); 442 return Change; 443 } 444 } 445 } 446 } 447 } 448 449 return Change; 450 } 451 452 ChangeStatus manifest(Attributor &A) override { 453 SmallVector<Attribute, 8> AttrList; 454 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 455 456 for (StringRef AttrName : Attributes) 457 AttrList.push_back(Attribute::get(Ctx, AttrName)); 458 459 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 460 /* ForceReplace */ true); 461 } 462 463 const std::string getAsStr() const override { 464 return "AMDInfo[" + std::to_string(Attributes.size()) + "]"; 465 } 466 467 const DenseSet<StringRef> &getAttributes() const override { 468 return Attributes; 469 } 470 471 /// See AbstractAttribute::trackStatistics() 472 void trackStatistics() const override {} 473 474 private: 475 DenseSet<StringRef> Attributes; 476 }; 477 478 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, 479 Attributor &A) { 480 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 481 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A); 482 llvm_unreachable("AAAMDAttributes is only valid for function position"); 483 } 484 485 class AMDGPUAttributor : public ModulePass { 486 public: 487 AMDGPUAttributor() : ModulePass(ID) {} 488 489 /// doInitialization - Virtual method overridden by subclasses to do 490 /// any necessary initialization before any pass is run. 491 bool doInitialization(Module &) override { 492 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 493 if (!TPC) 494 report_fatal_error("TargetMachine is required"); 495 496 TM = &TPC->getTM<TargetMachine>(); 497 return false; 498 } 499 500 bool runOnModule(Module &M) override { 501 SetVector<Function *> Functions; 502 AnalysisGetter AG; 503 for (Function &F : M) 504 Functions.insert(&F); 505 506 CallGraphUpdater CGUpdater; 507 BumpPtrAllocator Allocator; 508 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); 509 Attributor A(Functions, InfoCache, CGUpdater); 510 511 for (Function &F : M) { 512 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F)); 513 A.getOrCreateAAFor<AAAMDWorkGroupSize>(IRPosition::function(F)); 514 } 515 516 ChangeStatus Change = A.run(); 517 return Change == ChangeStatus::CHANGED; 518 } 519 520 StringRef getPassName() const override { return "AMDGPU Attributor"; } 521 TargetMachine *TM; 522 static char ID; 523 }; 524 525 char AMDGPUAttributor::ID = 0; 526 527 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); } 528 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false) 529