1 //===- AMDGPUAttributor.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "GCNSubtarget.h" 15 #include "llvm/CodeGen/TargetPassConfig.h" 16 #include "llvm/IR/IntrinsicsAMDGPU.h" 17 #include "llvm/IR/IntrinsicsR600.h" 18 #include "llvm/Target/TargetMachine.h" 19 #include "llvm/Transforms/IPO/Attributor.h" 20 21 #define DEBUG_TYPE "amdgpu-attributor" 22 23 using namespace llvm; 24 25 enum ImplicitArgumentMask { 26 NOT_IMPLICIT_INPUT = 0, 27 28 // SGPRs 29 DISPATCH_PTR = 1 << 0, 30 QUEUE_PTR = 1 << 1, 31 DISPATCH_ID = 1 << 2, 32 IMPLICIT_ARG_PTR = 1 << 3, 33 WORKGROUP_ID_X = 1 << 4, 34 WORKGROUP_ID_Y = 1 << 5, 35 WORKGROUP_ID_Z = 1 << 6, 36 37 // VGPRS: 38 WORKITEM_ID_X = 1 << 7, 39 WORKITEM_ID_Y = 1 << 8, 40 WORKITEM_ID_Z = 1 << 9, 41 ALL_ARGUMENT_MASK = (1 << 10) - 1 42 }; 43 44 static constexpr std::pair<ImplicitArgumentMask, 45 StringLiteral> ImplicitAttrs[] = { 46 {DISPATCH_PTR, "amdgpu-no-dispatch-ptr"}, 47 {QUEUE_PTR, "amdgpu-no-queue-ptr"}, 48 {DISPATCH_ID, "amdgpu-no-dispatch-id"}, 49 {IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"}, 50 {WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"}, 51 {WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y"}, 52 {WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z"}, 53 {WORKITEM_ID_X, "amdgpu-no-workitem-id-x"}, 54 {WORKITEM_ID_Y, "amdgpu-no-workitem-id-y"}, 55 {WORKITEM_ID_Z, "amdgpu-no-workitem-id-z"} 56 }; 57 58 // We do not need to note the x workitem or workgroup id because they are always 59 // initialized. 60 // 61 // TODO: We should not add the attributes if the known compile time workgroup 62 // size is 1 for y/z. 63 static ImplicitArgumentMask 64 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) { 65 switch (ID) { 66 case Intrinsic::amdgcn_workitem_id_x: 67 NonKernelOnly = true; 68 return WORKITEM_ID_X; 69 case Intrinsic::amdgcn_workgroup_id_x: 70 NonKernelOnly = true; 71 return WORKGROUP_ID_X; 72 case Intrinsic::amdgcn_workitem_id_y: 73 case Intrinsic::r600_read_tidig_y: 74 return WORKITEM_ID_Y; 75 case Intrinsic::amdgcn_workitem_id_z: 76 case Intrinsic::r600_read_tidig_z: 77 return WORKITEM_ID_Z; 78 case Intrinsic::amdgcn_workgroup_id_y: 79 case Intrinsic::r600_read_tgid_y: 80 return WORKGROUP_ID_Y; 81 case Intrinsic::amdgcn_workgroup_id_z: 82 case Intrinsic::r600_read_tgid_z: 83 return WORKGROUP_ID_Z; 84 case Intrinsic::amdgcn_dispatch_ptr: 85 return DISPATCH_PTR; 86 case Intrinsic::amdgcn_dispatch_id: 87 return DISPATCH_ID; 88 case Intrinsic::amdgcn_implicitarg_ptr: 89 return IMPLICIT_ARG_PTR; 90 case Intrinsic::amdgcn_queue_ptr: 91 case Intrinsic::amdgcn_is_shared: 92 case Intrinsic::amdgcn_is_private: 93 // TODO: Does not require queue ptr on gfx9+ 94 case Intrinsic::trap: 95 case Intrinsic::debugtrap: 96 IsQueuePtr = true; 97 return QUEUE_PTR; 98 default: 99 return NOT_IMPLICIT_INPUT; 100 } 101 } 102 103 static bool castRequiresQueuePtr(unsigned SrcAS) { 104 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 105 } 106 107 static bool isDSAddress(const Constant *C) { 108 const GlobalValue *GV = dyn_cast<GlobalValue>(C); 109 if (!GV) 110 return false; 111 unsigned AS = GV->getAddressSpace(); 112 return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; 113 } 114 115 /// Returns true if the function requires the implicit argument be passed 116 /// regardless of the function contents. 117 static bool funcRequiresImplicitArgPtr(const Function &F) { 118 // Sanitizers require the hostcall buffer passed in the implicit arguments. 119 return F.hasFnAttribute(Attribute::SanitizeAddress) || 120 F.hasFnAttribute(Attribute::SanitizeThread) || 121 F.hasFnAttribute(Attribute::SanitizeMemory) || 122 F.hasFnAttribute(Attribute::SanitizeHWAddress) || 123 F.hasFnAttribute(Attribute::SanitizeMemTag); 124 } 125 126 namespace { 127 class AMDGPUInformationCache : public InformationCache { 128 public: 129 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, 130 BumpPtrAllocator &Allocator, 131 SetVector<Function *> *CGSCC, TargetMachine &TM) 132 : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {} 133 TargetMachine &TM; 134 135 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; 136 137 /// Check if the subtarget has aperture regs. 138 bool hasApertureRegs(Function &F) { 139 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 140 return ST.hasApertureRegs(); 141 } 142 143 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) { 144 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 145 return ST.getFlatWorkGroupSizes(F); 146 } 147 148 std::pair<unsigned, unsigned> 149 getMaximumFlatWorkGroupRange(const Function &F) { 150 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 151 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()}; 152 } 153 154 private: 155 /// Check if the ConstantExpr \p CE requires queue ptr attribute. 156 static bool visitConstExpr(const ConstantExpr *CE) { 157 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 158 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 159 return castRequiresQueuePtr(SrcAS); 160 } 161 return false; 162 } 163 164 /// Get the constant access bitmap for \p C. 165 uint8_t getConstantAccess(const Constant *C) { 166 auto It = ConstantStatus.find(C); 167 if (It != ConstantStatus.end()) 168 return It->second; 169 170 uint8_t Result = 0; 171 if (isDSAddress(C)) 172 Result = DS_GLOBAL; 173 174 if (const auto *CE = dyn_cast<ConstantExpr>(C)) 175 if (visitConstExpr(CE)) 176 Result |= ADDR_SPACE_CAST; 177 178 for (const Use &U : C->operands()) { 179 const auto *OpC = dyn_cast<Constant>(U); 180 if (!OpC) 181 continue; 182 183 Result |= getConstantAccess(OpC); 184 } 185 return Result; 186 } 187 188 public: 189 /// Returns true if \p Fn needs a queue ptr attribute because of \p C. 190 bool needsQueuePtr(const Constant *C, Function &Fn) { 191 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()); 192 bool HasAperture = hasApertureRegs(Fn); 193 194 // No need to explore the constants. 195 if (!IsNonEntryFunc && HasAperture) 196 return false; 197 198 uint8_t Access = getConstantAccess(C); 199 200 // We need to trap on DS globals in non-entry functions. 201 if (IsNonEntryFunc && (Access & DS_GLOBAL)) 202 return true; 203 204 return !HasAperture && (Access & ADDR_SPACE_CAST); 205 } 206 207 private: 208 /// Used to determine if the Constant needs a queue ptr attribute. 209 DenseMap<const Constant *, uint8_t> ConstantStatus; 210 }; 211 212 struct AAAMDAttributes : public StateWrapper< 213 BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> { 214 using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, 215 AbstractAttribute>; 216 217 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 218 219 /// Create an abstract attribute view for the position \p IRP. 220 static AAAMDAttributes &createForPosition(const IRPosition &IRP, 221 Attributor &A); 222 223 /// See AbstractAttribute::getName(). 224 const std::string getName() const override { return "AAAMDAttributes"; } 225 226 /// See AbstractAttribute::getIdAddr(). 227 const char *getIdAddr() const override { return &ID; } 228 229 /// This function should return true if the type of the \p AA is 230 /// AAAMDAttributes. 231 static bool classof(const AbstractAttribute *AA) { 232 return (AA->getIdAddr() == &ID); 233 } 234 235 /// Unique ID (due to the unique address) 236 static const char ID; 237 }; 238 const char AAAMDAttributes::ID = 0; 239 240 struct AAUniformWorkGroupSize 241 : public StateWrapper<BooleanState, AbstractAttribute> { 242 using Base = StateWrapper<BooleanState, AbstractAttribute>; 243 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 244 245 /// Create an abstract attribute view for the position \p IRP. 246 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP, 247 Attributor &A); 248 249 /// See AbstractAttribute::getName(). 250 const std::string getName() const override { 251 return "AAUniformWorkGroupSize"; 252 } 253 254 /// See AbstractAttribute::getIdAddr(). 255 const char *getIdAddr() const override { return &ID; } 256 257 /// This function should return true if the type of the \p AA is 258 /// AAAMDAttributes. 259 static bool classof(const AbstractAttribute *AA) { 260 return (AA->getIdAddr() == &ID); 261 } 262 263 /// Unique ID (due to the unique address) 264 static const char ID; 265 }; 266 const char AAUniformWorkGroupSize::ID = 0; 267 268 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize { 269 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A) 270 : AAUniformWorkGroupSize(IRP, A) {} 271 272 void initialize(Attributor &A) override { 273 Function *F = getAssociatedFunction(); 274 CallingConv::ID CC = F->getCallingConv(); 275 276 if (CC != CallingConv::AMDGPU_KERNEL) 277 return; 278 279 bool InitialValue = false; 280 if (F->hasFnAttribute("uniform-work-group-size")) 281 InitialValue = F->getFnAttribute("uniform-work-group-size") 282 .getValueAsString() 283 .equals("true"); 284 285 if (InitialValue) 286 indicateOptimisticFixpoint(); 287 else 288 indicatePessimisticFixpoint(); 289 } 290 291 ChangeStatus updateImpl(Attributor &A) override { 292 ChangeStatus Change = ChangeStatus::UNCHANGED; 293 294 auto CheckCallSite = [&](AbstractCallSite CS) { 295 Function *Caller = CS.getInstruction()->getFunction(); 296 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName() 297 << "->" << getAssociatedFunction()->getName() << "\n"); 298 299 const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>( 300 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 301 302 Change = Change | clampStateAndIndicateChange(this->getState(), 303 CallerInfo.getState()); 304 305 return true; 306 }; 307 308 bool AllCallSitesKnown = true; 309 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 310 return indicatePessimisticFixpoint(); 311 312 return Change; 313 } 314 315 ChangeStatus manifest(Attributor &A) override { 316 SmallVector<Attribute, 8> AttrList; 317 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 318 319 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size", 320 getAssumed() ? "true" : "false")); 321 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 322 /* ForceReplace */ true); 323 } 324 325 bool isValidState() const override { 326 // This state is always valid, even when the state is false. 327 return true; 328 } 329 330 const std::string getAsStr() const override { 331 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]"; 332 } 333 334 /// See AbstractAttribute::trackStatistics() 335 void trackStatistics() const override {} 336 }; 337 338 AAUniformWorkGroupSize & 339 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP, 340 Attributor &A) { 341 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 342 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A); 343 llvm_unreachable( 344 "AAUniformWorkGroupSize is only valid for function position"); 345 } 346 347 struct AAAMDAttributesFunction : public AAAMDAttributes { 348 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A) 349 : AAAMDAttributes(IRP, A) {} 350 351 void initialize(Attributor &A) override { 352 Function *F = getAssociatedFunction(); 353 354 // If the function requires the implicit arg pointer due to sanitizers, 355 // assume it's needed even if explicitly marked as not requiring it. 356 const bool NeedsImplicit = funcRequiresImplicitArgPtr(*F); 357 if (NeedsImplicit) 358 removeAssumedBits(IMPLICIT_ARG_PTR); 359 360 for (auto Attr : ImplicitAttrs) { 361 if (NeedsImplicit && Attr.first == IMPLICIT_ARG_PTR) 362 continue; 363 364 if (F->hasFnAttribute(Attr.second)) 365 addKnownBits(Attr.first); 366 } 367 368 if (F->isDeclaration()) 369 return; 370 371 // Ignore functions with graphics calling conventions, these are currently 372 // not allowed to have kernel arguments. 373 if (AMDGPU::isGraphics(F->getCallingConv())) { 374 indicatePessimisticFixpoint(); 375 return; 376 } 377 } 378 379 ChangeStatus updateImpl(Attributor &A) override { 380 Function *F = getAssociatedFunction(); 381 // The current assumed state used to determine a change. 382 auto OrigAssumed = getAssumed(); 383 384 // Check for Intrinsics and propagate attributes. 385 const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( 386 *this, this->getIRPosition(), DepClassTy::REQUIRED); 387 if (AAEdges.hasNonAsmUnknownCallee()) 388 return indicatePessimisticFixpoint(); 389 390 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 391 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 392 393 bool NeedsQueuePtr = false; 394 395 for (Function *Callee : AAEdges.getOptimisticEdges()) { 396 Intrinsic::ID IID = Callee->getIntrinsicID(); 397 if (IID == Intrinsic::not_intrinsic) { 398 const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>( 399 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); 400 *this &= AAAMD; 401 continue; 402 } 403 404 bool NonKernelOnly = false; 405 ImplicitArgumentMask AttrMask = 406 intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr); 407 if (AttrMask != NOT_IMPLICIT_INPUT) { 408 if ((IsNonEntryFunc || !NonKernelOnly)) 409 removeAssumedBits(AttrMask); 410 } 411 } 412 413 // If we found that we need amdgpu-queue-ptr, nothing else to do. 414 if (NeedsQueuePtr) { 415 removeAssumedBits(QUEUE_PTR); 416 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 417 ChangeStatus::UNCHANGED; 418 } 419 420 auto CheckAddrSpaceCasts = [&](Instruction &I) { 421 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace(); 422 if (castRequiresQueuePtr(SrcAS)) { 423 NeedsQueuePtr = true; 424 return false; 425 } 426 return true; 427 }; 428 429 bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 430 431 // `checkForAllInstructions` is much more cheaper than going through all 432 // instructions, try it first. 433 434 // amdgpu-queue-ptr is not needed if aperture regs is present. 435 if (!HasApertureRegs) { 436 bool UsedAssumedInformation = false; 437 A.checkForAllInstructions(CheckAddrSpaceCasts, *this, 438 {Instruction::AddrSpaceCast}, 439 UsedAssumedInformation); 440 } 441 442 // If we found that we need amdgpu-queue-ptr, nothing else to do. 443 if (NeedsQueuePtr) { 444 removeAssumedBits(QUEUE_PTR); 445 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 446 ChangeStatus::UNCHANGED; 447 } 448 449 if (!IsNonEntryFunc && HasApertureRegs) { 450 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 451 ChangeStatus::UNCHANGED; 452 } 453 454 for (BasicBlock &BB : *F) { 455 for (Instruction &I : BB) { 456 for (const Use &U : I.operands()) { 457 if (const auto *C = dyn_cast<Constant>(U)) { 458 if (InfoCache.needsQueuePtr(C, *F)) { 459 removeAssumedBits(QUEUE_PTR); 460 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 461 ChangeStatus::UNCHANGED; 462 } 463 } 464 } 465 } 466 } 467 468 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 469 ChangeStatus::UNCHANGED; 470 } 471 472 ChangeStatus manifest(Attributor &A) override { 473 SmallVector<Attribute, 8> AttrList; 474 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 475 476 for (auto Attr : ImplicitAttrs) { 477 if (isKnown(Attr.first)) 478 AttrList.push_back(Attribute::get(Ctx, Attr.second)); 479 } 480 481 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 482 /* ForceReplace */ true); 483 } 484 485 const std::string getAsStr() const override { 486 std::string Str; 487 raw_string_ostream OS(Str); 488 OS << "AMDInfo["; 489 for (auto Attr : ImplicitAttrs) 490 OS << ' ' << Attr.second; 491 OS << " ]"; 492 return OS.str(); 493 } 494 495 /// See AbstractAttribute::trackStatistics() 496 void trackStatistics() const override {} 497 }; 498 499 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, 500 Attributor &A) { 501 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 502 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A); 503 llvm_unreachable("AAAMDAttributes is only valid for function position"); 504 } 505 506 /// Propagate amdgpu-flat-work-group-size attribute. 507 struct AAAMDFlatWorkGroupSize 508 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> { 509 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>; 510 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A) 511 : Base(IRP, 32) {} 512 513 /// See AbstractAttribute::getState(...). 514 IntegerRangeState &getState() override { return *this; } 515 const IntegerRangeState &getState() const override { return *this; } 516 517 void initialize(Attributor &A) override { 518 Function *F = getAssociatedFunction(); 519 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 520 unsigned MinGroupSize, MaxGroupSize; 521 std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F); 522 intersectKnown( 523 ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1))); 524 525 if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) 526 indicatePessimisticFixpoint(); 527 } 528 529 ChangeStatus updateImpl(Attributor &A) override { 530 ChangeStatus Change = ChangeStatus::UNCHANGED; 531 532 auto CheckCallSite = [&](AbstractCallSite CS) { 533 Function *Caller = CS.getInstruction()->getFunction(); 534 LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName() 535 << "->" << getAssociatedFunction()->getName() << '\n'); 536 537 const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>( 538 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 539 540 Change |= 541 clampStateAndIndicateChange(this->getState(), CallerInfo.getState()); 542 543 return true; 544 }; 545 546 bool AllCallSitesKnown = true; 547 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 548 return indicatePessimisticFixpoint(); 549 550 return Change; 551 } 552 553 ChangeStatus manifest(Attributor &A) override { 554 SmallVector<Attribute, 8> AttrList; 555 Function *F = getAssociatedFunction(); 556 LLVMContext &Ctx = F->getContext(); 557 558 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 559 unsigned Min, Max; 560 std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F); 561 562 // Don't add the attribute if it's the implied default. 563 if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max) 564 return ChangeStatus::UNCHANGED; 565 566 SmallString<10> Buffer; 567 raw_svector_ostream OS(Buffer); 568 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 569 570 AttrList.push_back( 571 Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str())); 572 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 573 /* ForceReplace */ true); 574 } 575 576 const std::string getAsStr() const override { 577 std::string Str; 578 raw_string_ostream OS(Str); 579 OS << "AMDFlatWorkGroupSize["; 580 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 581 OS << ']'; 582 return OS.str(); 583 } 584 585 /// See AbstractAttribute::trackStatistics() 586 void trackStatistics() const override {} 587 588 /// Create an abstract attribute view for the position \p IRP. 589 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP, 590 Attributor &A); 591 592 /// See AbstractAttribute::getName() 593 const std::string getName() const override { 594 return "AAAMDFlatWorkGroupSize"; 595 } 596 597 /// See AbstractAttribute::getIdAddr() 598 const char *getIdAddr() const override { return &ID; } 599 600 /// This function should return true if the type of the \p AA is 601 /// AAAMDFlatWorkGroupSize 602 static bool classof(const AbstractAttribute *AA) { 603 return (AA->getIdAddr() == &ID); 604 } 605 606 /// Unique ID (due to the unique address) 607 static const char ID; 608 }; 609 610 const char AAAMDFlatWorkGroupSize::ID = 0; 611 612 AAAMDFlatWorkGroupSize & 613 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP, 614 Attributor &A) { 615 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 616 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A); 617 llvm_unreachable( 618 "AAAMDFlatWorkGroupSize is only valid for function position"); 619 } 620 621 class AMDGPUAttributor : public ModulePass { 622 public: 623 AMDGPUAttributor() : ModulePass(ID) {} 624 625 /// doInitialization - Virtual method overridden by subclasses to do 626 /// any necessary initialization before any pass is run. 627 bool doInitialization(Module &) override { 628 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 629 if (!TPC) 630 report_fatal_error("TargetMachine is required"); 631 632 TM = &TPC->getTM<TargetMachine>(); 633 return false; 634 } 635 636 bool runOnModule(Module &M) override { 637 SetVector<Function *> Functions; 638 AnalysisGetter AG; 639 for (Function &F : M) { 640 if (!F.isIntrinsic()) 641 Functions.insert(&F); 642 } 643 644 CallGraphUpdater CGUpdater; 645 BumpPtrAllocator Allocator; 646 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); 647 DenseSet<const char *> Allowed( 648 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, 649 &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID}); 650 651 Attributor A(Functions, InfoCache, CGUpdater, &Allowed); 652 653 for (Function &F : M) { 654 if (!F.isIntrinsic()) { 655 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F)); 656 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F)); 657 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) { 658 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F)); 659 } 660 } 661 } 662 663 ChangeStatus Change = A.run(); 664 return Change == ChangeStatus::CHANGED; 665 } 666 667 StringRef getPassName() const override { return "AMDGPU Attributor"; } 668 TargetMachine *TM; 669 static char ID; 670 }; 671 } // namespace 672 673 char AMDGPUAttributor::ID = 0; 674 675 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); } 676 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false) 677