1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDGPU.h"
14 #include "GCNSubtarget.h"
15 #include "Utils/AMDGPUBaseInfo.h"
16 #include "llvm/Analysis/CycleAnalysis.h"
17 #include "llvm/CodeGen/TargetPassConfig.h"
18 #include "llvm/IR/IntrinsicsAMDGPU.h"
19 #include "llvm/IR/IntrinsicsR600.h"
20 #include "llvm/Target/TargetMachine.h"
21 #include "llvm/Transforms/IPO/Attributor.h"
22
23 #define DEBUG_TYPE "amdgpu-attributor"
24
25 namespace llvm {
26 void initializeCycleInfoWrapperPassPass(PassRegistry &);
27 } // namespace llvm
28
29 using namespace llvm;
30
31 static cl::opt<unsigned> KernargPreloadCount(
32 "amdgpu-kernarg-preload-count",
33 cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0));
34
35 #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
36
37 enum ImplicitArgumentPositions {
38 #include "AMDGPUAttributes.def"
39 LAST_ARG_POS
40 };
41
42 #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
43
44 enum ImplicitArgumentMask {
45 NOT_IMPLICIT_INPUT = 0,
46 #include "AMDGPUAttributes.def"
47 ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
48 };
49
50 #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
51 static constexpr std::pair<ImplicitArgumentMask,
52 StringLiteral> ImplicitAttrs[] = {
53 #include "AMDGPUAttributes.def"
54 };
55
56 // We do not need to note the x workitem or workgroup id because they are always
57 // initialized.
58 //
59 // TODO: We should not add the attributes if the known compile time workgroup
60 // size is 1 for y/z.
61 static ImplicitArgumentMask
intrinsicToAttrMask(Intrinsic::ID ID,bool & NonKernelOnly,bool & NeedsImplicit,bool HasApertureRegs,bool SupportsGetDoorBellID,unsigned CodeObjectVersion)62 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
63 bool HasApertureRegs, bool SupportsGetDoorBellID,
64 unsigned CodeObjectVersion) {
65 switch (ID) {
66 case Intrinsic::amdgcn_workitem_id_x:
67 NonKernelOnly = true;
68 return WORKITEM_ID_X;
69 case Intrinsic::amdgcn_workgroup_id_x:
70 NonKernelOnly = true;
71 return WORKGROUP_ID_X;
72 case Intrinsic::amdgcn_workitem_id_y:
73 case Intrinsic::r600_read_tidig_y:
74 return WORKITEM_ID_Y;
75 case Intrinsic::amdgcn_workitem_id_z:
76 case Intrinsic::r600_read_tidig_z:
77 return WORKITEM_ID_Z;
78 case Intrinsic::amdgcn_workgroup_id_y:
79 case Intrinsic::r600_read_tgid_y:
80 return WORKGROUP_ID_Y;
81 case Intrinsic::amdgcn_workgroup_id_z:
82 case Intrinsic::r600_read_tgid_z:
83 return WORKGROUP_ID_Z;
84 case Intrinsic::amdgcn_lds_kernel_id:
85 return LDS_KERNEL_ID;
86 case Intrinsic::amdgcn_dispatch_ptr:
87 return DISPATCH_PTR;
88 case Intrinsic::amdgcn_dispatch_id:
89 return DISPATCH_ID;
90 case Intrinsic::amdgcn_implicitarg_ptr:
91 return IMPLICIT_ARG_PTR;
92 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
93 // queue_ptr.
94 case Intrinsic::amdgcn_queue_ptr:
95 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
96 return QUEUE_PTR;
97 case Intrinsic::amdgcn_is_shared:
98 case Intrinsic::amdgcn_is_private:
99 if (HasApertureRegs)
100 return NOT_IMPLICIT_INPUT;
101 // Under V5, we need implicitarg_ptr + offsets to access private_base or
102 // shared_base. For pre-V5, however, need to access them through queue_ptr +
103 // offsets.
104 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR :
105 QUEUE_PTR;
106 case Intrinsic::trap:
107 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
108 return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT :
109 QUEUE_PTR;
110 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
111 return QUEUE_PTR;
112 default:
113 return NOT_IMPLICIT_INPUT;
114 }
115 }
116
castRequiresQueuePtr(unsigned SrcAS)117 static bool castRequiresQueuePtr(unsigned SrcAS) {
118 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
119 }
120
isDSAddress(const Constant * C)121 static bool isDSAddress(const Constant *C) {
122 const GlobalValue *GV = dyn_cast<GlobalValue>(C);
123 if (!GV)
124 return false;
125 unsigned AS = GV->getAddressSpace();
126 return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
127 }
128
129 /// Returns true if the function requires the implicit argument be passed
130 /// regardless of the function contents.
funcRequiresHostcallPtr(const Function & F)131 static bool funcRequiresHostcallPtr(const Function &F) {
132 // Sanitizers require the hostcall buffer passed in the implicit arguments.
133 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
134 F.hasFnAttribute(Attribute::SanitizeThread) ||
135 F.hasFnAttribute(Attribute::SanitizeMemory) ||
136 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
137 F.hasFnAttribute(Attribute::SanitizeMemTag);
138 }
139
140 namespace {
141 class AMDGPUInformationCache : public InformationCache {
142 public:
AMDGPUInformationCache(const Module & M,AnalysisGetter & AG,BumpPtrAllocator & Allocator,SetVector<Function * > * CGSCC,TargetMachine & TM)143 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
144 BumpPtrAllocator &Allocator,
145 SetVector<Function *> *CGSCC, TargetMachine &TM)
146 : InformationCache(M, AG, Allocator, CGSCC), TM(TM),
147 CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
148
149 TargetMachine &TM;
150
151 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
152
153 /// Check if the subtarget has aperture regs.
hasApertureRegs(Function & F)154 bool hasApertureRegs(Function &F) {
155 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
156 return ST.hasApertureRegs();
157 }
158
159 /// Check if the subtarget supports GetDoorbellID.
supportsGetDoorbellID(Function & F)160 bool supportsGetDoorbellID(Function &F) {
161 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
162 return ST.supportsGetDoorbellID();
163 }
164
getFlatWorkGroupSizes(const Function & F)165 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
166 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
167 return ST.getFlatWorkGroupSizes(F);
168 }
169
170 std::pair<unsigned, unsigned>
getMaximumFlatWorkGroupRange(const Function & F)171 getMaximumFlatWorkGroupRange(const Function &F) {
172 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
173 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
174 }
175
176 /// Get code object version.
getCodeObjectVersion() const177 unsigned getCodeObjectVersion() const {
178 return CodeObjectVersion;
179 }
180
181 /// Get the effective value of "amdgpu-waves-per-eu" for the function,
182 /// accounting for the interaction with the passed value to use for
183 /// "amdgpu-flat-work-group-size".
184 std::pair<unsigned, unsigned>
getWavesPerEU(const Function & F,std::pair<unsigned,unsigned> FlatWorkGroupSize)185 getWavesPerEU(const Function &F,
186 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
187 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
188 return ST.getWavesPerEU(F, FlatWorkGroupSize);
189 }
190
191 std::pair<unsigned, unsigned>
getEffectiveWavesPerEU(const Function & F,std::pair<unsigned,unsigned> WavesPerEU,std::pair<unsigned,unsigned> FlatWorkGroupSize)192 getEffectiveWavesPerEU(const Function &F,
193 std::pair<unsigned, unsigned> WavesPerEU,
194 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
195 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
196 return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize);
197 }
198
getMaxWavesPerEU(const Function & F)199 unsigned getMaxWavesPerEU(const Function &F) {
200 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
201 return ST.getMaxWavesPerEU();
202 }
203
204 private:
205 /// Check if the ConstantExpr \p CE requires the queue pointer.
visitConstExpr(const ConstantExpr * CE)206 static bool visitConstExpr(const ConstantExpr *CE) {
207 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
208 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
209 return castRequiresQueuePtr(SrcAS);
210 }
211 return false;
212 }
213
214 /// Get the constant access bitmap for \p C.
getConstantAccess(const Constant * C,SmallPtrSetImpl<const Constant * > & Visited)215 uint8_t getConstantAccess(const Constant *C,
216 SmallPtrSetImpl<const Constant *> &Visited) {
217 auto It = ConstantStatus.find(C);
218 if (It != ConstantStatus.end())
219 return It->second;
220
221 uint8_t Result = 0;
222 if (isDSAddress(C))
223 Result = DS_GLOBAL;
224
225 if (const auto *CE = dyn_cast<ConstantExpr>(C))
226 if (visitConstExpr(CE))
227 Result |= ADDR_SPACE_CAST;
228
229 for (const Use &U : C->operands()) {
230 const auto *OpC = dyn_cast<Constant>(U);
231 if (!OpC || !Visited.insert(OpC).second)
232 continue;
233
234 Result |= getConstantAccess(OpC, Visited);
235 }
236 return Result;
237 }
238
239 public:
240 /// Returns true if \p Fn needs the queue pointer because of \p C.
needsQueuePtr(const Constant * C,Function & Fn)241 bool needsQueuePtr(const Constant *C, Function &Fn) {
242 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
243 bool HasAperture = hasApertureRegs(Fn);
244
245 // No need to explore the constants.
246 if (!IsNonEntryFunc && HasAperture)
247 return false;
248
249 SmallPtrSet<const Constant *, 8> Visited;
250 uint8_t Access = getConstantAccess(C, Visited);
251
252 // We need to trap on DS globals in non-entry functions.
253 if (IsNonEntryFunc && (Access & DS_GLOBAL))
254 return true;
255
256 return !HasAperture && (Access & ADDR_SPACE_CAST);
257 }
258
259 private:
260 /// Used to determine if the Constant needs the queue pointer.
261 DenseMap<const Constant *, uint8_t> ConstantStatus;
262 const unsigned CodeObjectVersion;
263 };
264
265 struct AAAMDAttributes
266 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
267 AbstractAttribute> {
268 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
269 AbstractAttribute>;
270
AAAMDAttributes__anon93435ae10111::AAAMDAttributes271 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
272
273 /// Create an abstract attribute view for the position \p IRP.
274 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
275 Attributor &A);
276
277 /// See AbstractAttribute::getName().
getName__anon93435ae10111::AAAMDAttributes278 const std::string getName() const override { return "AAAMDAttributes"; }
279
280 /// See AbstractAttribute::getIdAddr().
getIdAddr__anon93435ae10111::AAAMDAttributes281 const char *getIdAddr() const override { return &ID; }
282
283 /// This function should return true if the type of the \p AA is
284 /// AAAMDAttributes.
classof__anon93435ae10111::AAAMDAttributes285 static bool classof(const AbstractAttribute *AA) {
286 return (AA->getIdAddr() == &ID);
287 }
288
289 /// Unique ID (due to the unique address)
290 static const char ID;
291 };
292 const char AAAMDAttributes::ID = 0;
293
294 struct AAUniformWorkGroupSize
295 : public StateWrapper<BooleanState, AbstractAttribute> {
296 using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAUniformWorkGroupSize__anon93435ae10111::AAUniformWorkGroupSize297 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
298
299 /// Create an abstract attribute view for the position \p IRP.
300 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
301 Attributor &A);
302
303 /// See AbstractAttribute::getName().
getName__anon93435ae10111::AAUniformWorkGroupSize304 const std::string getName() const override {
305 return "AAUniformWorkGroupSize";
306 }
307
308 /// See AbstractAttribute::getIdAddr().
getIdAddr__anon93435ae10111::AAUniformWorkGroupSize309 const char *getIdAddr() const override { return &ID; }
310
311 /// This function should return true if the type of the \p AA is
312 /// AAAMDAttributes.
classof__anon93435ae10111::AAUniformWorkGroupSize313 static bool classof(const AbstractAttribute *AA) {
314 return (AA->getIdAddr() == &ID);
315 }
316
317 /// Unique ID (due to the unique address)
318 static const char ID;
319 };
320 const char AAUniformWorkGroupSize::ID = 0;
321
322 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
AAUniformWorkGroupSizeFunction__anon93435ae10111::AAUniformWorkGroupSizeFunction323 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
324 : AAUniformWorkGroupSize(IRP, A) {}
325
initialize__anon93435ae10111::AAUniformWorkGroupSizeFunction326 void initialize(Attributor &A) override {
327 Function *F = getAssociatedFunction();
328 CallingConv::ID CC = F->getCallingConv();
329
330 if (CC != CallingConv::AMDGPU_KERNEL)
331 return;
332
333 bool InitialValue = false;
334 if (F->hasFnAttribute("uniform-work-group-size"))
335 InitialValue =
336 F->getFnAttribute("uniform-work-group-size").getValueAsString() ==
337 "true";
338
339 if (InitialValue)
340 indicateOptimisticFixpoint();
341 else
342 indicatePessimisticFixpoint();
343 }
344
updateImpl__anon93435ae10111::AAUniformWorkGroupSizeFunction345 ChangeStatus updateImpl(Attributor &A) override {
346 ChangeStatus Change = ChangeStatus::UNCHANGED;
347
348 auto CheckCallSite = [&](AbstractCallSite CS) {
349 Function *Caller = CS.getInstruction()->getFunction();
350 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
351 << "->" << getAssociatedFunction()->getName() << "\n");
352
353 const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
354 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
355 if (!CallerInfo)
356 return false;
357
358 Change = Change | clampStateAndIndicateChange(this->getState(),
359 CallerInfo->getState());
360
361 return true;
362 };
363
364 bool AllCallSitesKnown = true;
365 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
366 return indicatePessimisticFixpoint();
367
368 return Change;
369 }
370
manifest__anon93435ae10111::AAUniformWorkGroupSizeFunction371 ChangeStatus manifest(Attributor &A) override {
372 SmallVector<Attribute, 8> AttrList;
373 LLVMContext &Ctx = getAssociatedFunction()->getContext();
374
375 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
376 getAssumed() ? "true" : "false"));
377 return A.manifestAttrs(getIRPosition(), AttrList,
378 /* ForceReplace */ true);
379 }
380
isValidState__anon93435ae10111::AAUniformWorkGroupSizeFunction381 bool isValidState() const override {
382 // This state is always valid, even when the state is false.
383 return true;
384 }
385
getAsStr__anon93435ae10111::AAUniformWorkGroupSizeFunction386 const std::string getAsStr(Attributor *) const override {
387 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
388 }
389
390 /// See AbstractAttribute::trackStatistics()
trackStatistics__anon93435ae10111::AAUniformWorkGroupSizeFunction391 void trackStatistics() const override {}
392 };
393
394 AAUniformWorkGroupSize &
createForPosition(const IRPosition & IRP,Attributor & A)395 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
396 Attributor &A) {
397 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
398 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
399 llvm_unreachable(
400 "AAUniformWorkGroupSize is only valid for function position");
401 }
402
403 struct AAAMDAttributesFunction : public AAAMDAttributes {
AAAMDAttributesFunction__anon93435ae10111::AAAMDAttributesFunction404 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
405 : AAAMDAttributes(IRP, A) {}
406
initialize__anon93435ae10111::AAAMDAttributesFunction407 void initialize(Attributor &A) override {
408 Function *F = getAssociatedFunction();
409
410 // If the function requires the implicit arg pointer due to sanitizers,
411 // assume it's needed even if explicitly marked as not requiring it.
412 const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
413 if (NeedsHostcall) {
414 removeAssumedBits(IMPLICIT_ARG_PTR);
415 removeAssumedBits(HOSTCALL_PTR);
416 }
417
418 for (auto Attr : ImplicitAttrs) {
419 if (NeedsHostcall &&
420 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
421 continue;
422
423 if (F->hasFnAttribute(Attr.second))
424 addKnownBits(Attr.first);
425 }
426
427 if (F->isDeclaration())
428 return;
429
430 // Ignore functions with graphics calling conventions, these are currently
431 // not allowed to have kernel arguments.
432 if (AMDGPU::isGraphics(F->getCallingConv())) {
433 indicatePessimisticFixpoint();
434 return;
435 }
436 }
437
updateImpl__anon93435ae10111::AAAMDAttributesFunction438 ChangeStatus updateImpl(Attributor &A) override {
439 Function *F = getAssociatedFunction();
440 // The current assumed state used to determine a change.
441 auto OrigAssumed = getAssumed();
442
443 // Check for Intrinsics and propagate attributes.
444 const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
445 *this, this->getIRPosition(), DepClassTy::REQUIRED);
446 if (!AAEdges || AAEdges->hasNonAsmUnknownCallee())
447 return indicatePessimisticFixpoint();
448
449 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
450
451 bool NeedsImplicit = false;
452 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
453 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
454 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
455 unsigned COV = InfoCache.getCodeObjectVersion();
456
457 for (Function *Callee : AAEdges->getOptimisticEdges()) {
458 Intrinsic::ID IID = Callee->getIntrinsicID();
459 if (IID == Intrinsic::not_intrinsic) {
460 const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
461 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
462 if (!AAAMD)
463 return indicatePessimisticFixpoint();
464 *this &= *AAAMD;
465 continue;
466 }
467
468 bool NonKernelOnly = false;
469 ImplicitArgumentMask AttrMask =
470 intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
471 HasApertureRegs, SupportsGetDoorbellID, COV);
472 if (AttrMask != NOT_IMPLICIT_INPUT) {
473 if ((IsNonEntryFunc || !NonKernelOnly))
474 removeAssumedBits(AttrMask);
475 }
476 }
477
478 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
479 if (NeedsImplicit)
480 removeAssumedBits(IMPLICIT_ARG_PTR);
481
482 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
483 // Under V5, we need implicitarg_ptr + offsets to access private_base or
484 // shared_base. We do not actually need queue_ptr.
485 if (COV >= 5)
486 removeAssumedBits(IMPLICIT_ARG_PTR);
487 else
488 removeAssumedBits(QUEUE_PTR);
489 }
490
491 if (funcRetrievesMultigridSyncArg(A, COV)) {
492 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
493 "multigrid_sync_arg needs implicitarg_ptr");
494 removeAssumedBits(MULTIGRID_SYNC_ARG);
495 }
496
497 if (funcRetrievesHostcallPtr(A, COV)) {
498 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
499 removeAssumedBits(HOSTCALL_PTR);
500 }
501
502 if (funcRetrievesHeapPtr(A, COV)) {
503 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
504 removeAssumedBits(HEAP_PTR);
505 }
506
507 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
508 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
509 removeAssumedBits(QUEUE_PTR);
510 }
511
512 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
513 removeAssumedBits(LDS_KERNEL_ID);
514 }
515
516 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
517 removeAssumedBits(DEFAULT_QUEUE);
518
519 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
520 removeAssumedBits(COMPLETION_ACTION);
521
522 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
523 : ChangeStatus::UNCHANGED;
524 }
525
manifest__anon93435ae10111::AAAMDAttributesFunction526 ChangeStatus manifest(Attributor &A) override {
527 SmallVector<Attribute, 8> AttrList;
528 LLVMContext &Ctx = getAssociatedFunction()->getContext();
529
530 for (auto Attr : ImplicitAttrs) {
531 if (isKnown(Attr.first))
532 AttrList.push_back(Attribute::get(Ctx, Attr.second));
533 }
534
535 return A.manifestAttrs(getIRPosition(), AttrList,
536 /* ForceReplace */ true);
537 }
538
getAsStr__anon93435ae10111::AAAMDAttributesFunction539 const std::string getAsStr(Attributor *) const override {
540 std::string Str;
541 raw_string_ostream OS(Str);
542 OS << "AMDInfo[";
543 for (auto Attr : ImplicitAttrs)
544 if (isAssumed(Attr.first))
545 OS << ' ' << Attr.second;
546 OS << " ]";
547 return OS.str();
548 }
549
550 /// See AbstractAttribute::trackStatistics()
trackStatistics__anon93435ae10111::AAAMDAttributesFunction551 void trackStatistics() const override {}
552
553 private:
checkForQueuePtr__anon93435ae10111::AAAMDAttributesFunction554 bool checkForQueuePtr(Attributor &A) {
555 Function *F = getAssociatedFunction();
556 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
557
558 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
559
560 bool NeedsQueuePtr = false;
561
562 auto CheckAddrSpaceCasts = [&](Instruction &I) {
563 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
564 if (castRequiresQueuePtr(SrcAS)) {
565 NeedsQueuePtr = true;
566 return false;
567 }
568 return true;
569 };
570
571 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
572
573 // `checkForAllInstructions` is much more cheaper than going through all
574 // instructions, try it first.
575
576 // The queue pointer is not needed if aperture regs is present.
577 if (!HasApertureRegs) {
578 bool UsedAssumedInformation = false;
579 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
580 {Instruction::AddrSpaceCast},
581 UsedAssumedInformation);
582 }
583
584 // If we found that we need the queue pointer, nothing else to do.
585 if (NeedsQueuePtr)
586 return true;
587
588 if (!IsNonEntryFunc && HasApertureRegs)
589 return false;
590
591 for (BasicBlock &BB : *F) {
592 for (Instruction &I : BB) {
593 for (const Use &U : I.operands()) {
594 if (const auto *C = dyn_cast<Constant>(U)) {
595 if (InfoCache.needsQueuePtr(C, *F))
596 return true;
597 }
598 }
599 }
600 }
601
602 return false;
603 }
604
funcRetrievesMultigridSyncArg__anon93435ae10111::AAAMDAttributesFunction605 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
606 auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(COV);
607 AA::RangeTy Range(Pos, 8);
608 return funcRetrievesImplicitKernelArg(A, Range);
609 }
610
funcRetrievesHostcallPtr__anon93435ae10111::AAAMDAttributesFunction611 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
612 auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(COV);
613 AA::RangeTy Range(Pos, 8);
614 return funcRetrievesImplicitKernelArg(A, Range);
615 }
616
funcRetrievesDefaultQueue__anon93435ae10111::AAAMDAttributesFunction617 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
618 auto Pos = llvm::AMDGPU::getDefaultQueueImplicitArgPosition(COV);
619 AA::RangeTy Range(Pos, 8);
620 return funcRetrievesImplicitKernelArg(A, Range);
621 }
622
funcRetrievesCompletionAction__anon93435ae10111::AAAMDAttributesFunction623 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
624 auto Pos = llvm::AMDGPU::getCompletionActionImplicitArgPosition(COV);
625 AA::RangeTy Range(Pos, 8);
626 return funcRetrievesImplicitKernelArg(A, Range);
627 }
628
funcRetrievesHeapPtr__anon93435ae10111::AAAMDAttributesFunction629 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
630 if (COV < 5)
631 return false;
632 AA::RangeTy Range(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);
633 return funcRetrievesImplicitKernelArg(A, Range);
634 }
635
funcRetrievesQueuePtr__anon93435ae10111::AAAMDAttributesFunction636 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
637 if (COV < 5)
638 return false;
639 AA::RangeTy Range(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);
640 return funcRetrievesImplicitKernelArg(A, Range);
641 }
642
funcRetrievesImplicitKernelArg__anon93435ae10111::AAAMDAttributesFunction643 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
644 // Check if this is a call to the implicitarg_ptr builtin and it
645 // is used to retrieve the hostcall pointer. The implicit arg for
646 // hostcall is not used only if every use of the implicitarg_ptr
647 // is a load that clearly does not retrieve any byte of the
648 // hostcall pointer. We check this by tracing all the uses of the
649 // initial call to the implicitarg_ptr intrinsic.
650 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
651 auto &Call = cast<CallBase>(I);
652 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
653 return true;
654
655 const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
656 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
657 if (!PointerInfoAA)
658 return false;
659
660 return PointerInfoAA->forallInterferingAccesses(
661 Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
662 return Acc.getRemoteInst()->isDroppable();
663 });
664 };
665
666 bool UsedAssumedInformation = false;
667 return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
668 UsedAssumedInformation);
669 }
670
funcRetrievesLDSKernelId__anon93435ae10111::AAAMDAttributesFunction671 bool funcRetrievesLDSKernelId(Attributor &A) {
672 auto DoesNotRetrieve = [&](Instruction &I) {
673 auto &Call = cast<CallBase>(I);
674 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
675 };
676 bool UsedAssumedInformation = false;
677 return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
678 UsedAssumedInformation);
679 }
680 };
681
createForPosition(const IRPosition & IRP,Attributor & A)682 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
683 Attributor &A) {
684 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
685 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
686 llvm_unreachable("AAAMDAttributes is only valid for function position");
687 }
688
689 /// Base class to derive different size ranges.
690 struct AAAMDSizeRangeAttribute
691 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
692 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
693
694 StringRef AttrName;
695
AAAMDSizeRangeAttribute__anon93435ae10111::AAAMDSizeRangeAttribute696 AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
697 StringRef AttrName)
698 : Base(IRP, 32), AttrName(AttrName) {}
699
700 /// See AbstractAttribute::trackStatistics()
trackStatistics__anon93435ae10111::AAAMDSizeRangeAttribute701 void trackStatistics() const override {}
702
703 template <class AttributeImpl>
updateImplImpl__anon93435ae10111::AAAMDSizeRangeAttribute704 ChangeStatus updateImplImpl(Attributor &A) {
705 ChangeStatus Change = ChangeStatus::UNCHANGED;
706
707 auto CheckCallSite = [&](AbstractCallSite CS) {
708 Function *Caller = CS.getInstruction()->getFunction();
709 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
710 << "->" << getAssociatedFunction()->getName() << '\n');
711
712 const auto *CallerInfo = A.getAAFor<AttributeImpl>(
713 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
714 if (!CallerInfo)
715 return false;
716
717 Change |=
718 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
719
720 return true;
721 };
722
723 bool AllCallSitesKnown = true;
724 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
725 return indicatePessimisticFixpoint();
726
727 return Change;
728 }
729
emitAttributeIfNotDefault__anon93435ae10111::AAAMDSizeRangeAttribute730 ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min,
731 unsigned Max) {
732 // Don't add the attribute if it's the implied default.
733 if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
734 return ChangeStatus::UNCHANGED;
735
736 Function *F = getAssociatedFunction();
737 LLVMContext &Ctx = F->getContext();
738 SmallString<10> Buffer;
739 raw_svector_ostream OS(Buffer);
740 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
741 return A.manifestAttrs(getIRPosition(),
742 {Attribute::get(Ctx, AttrName, OS.str())},
743 /* ForceReplace */ true);
744 }
745
getAsStr__anon93435ae10111::AAAMDSizeRangeAttribute746 const std::string getAsStr(Attributor *) const override {
747 std::string Str;
748 raw_string_ostream OS(Str);
749 OS << getName() << '[';
750 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
751 OS << ']';
752 return OS.str();
753 }
754 };
755
756 /// Propagate amdgpu-flat-work-group-size attribute.
757 struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
AAAMDFlatWorkGroupSize__anon93435ae10111::AAAMDFlatWorkGroupSize758 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
759 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
760
initialize__anon93435ae10111::AAAMDFlatWorkGroupSize761 void initialize(Attributor &A) override {
762 Function *F = getAssociatedFunction();
763 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
764 unsigned MinGroupSize, MaxGroupSize;
765 std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
766 intersectKnown(
767 ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
768
769 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
770 indicatePessimisticFixpoint();
771 }
772
updateImpl__anon93435ae10111::AAAMDFlatWorkGroupSize773 ChangeStatus updateImpl(Attributor &A) override {
774 return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
775 }
776
777 /// Create an abstract attribute view for the position \p IRP.
778 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
779 Attributor &A);
780
manifest__anon93435ae10111::AAAMDFlatWorkGroupSize781 ChangeStatus manifest(Attributor &A) override {
782 Function *F = getAssociatedFunction();
783 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
784 unsigned Min, Max;
785 std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
786 return emitAttributeIfNotDefault(A, Min, Max);
787 }
788
789 /// See AbstractAttribute::getName()
getName__anon93435ae10111::AAAMDFlatWorkGroupSize790 const std::string getName() const override {
791 return "AAAMDFlatWorkGroupSize";
792 }
793
794 /// See AbstractAttribute::getIdAddr()
getIdAddr__anon93435ae10111::AAAMDFlatWorkGroupSize795 const char *getIdAddr() const override { return &ID; }
796
797 /// This function should return true if the type of the \p AA is
798 /// AAAMDFlatWorkGroupSize
classof__anon93435ae10111::AAAMDFlatWorkGroupSize799 static bool classof(const AbstractAttribute *AA) {
800 return (AA->getIdAddr() == &ID);
801 }
802
803 /// Unique ID (due to the unique address)
804 static const char ID;
805 };
806
807 const char AAAMDFlatWorkGroupSize::ID = 0;
808
809 AAAMDFlatWorkGroupSize &
createForPosition(const IRPosition & IRP,Attributor & A)810 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
811 Attributor &A) {
812 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
813 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
814 llvm_unreachable(
815 "AAAMDFlatWorkGroupSize is only valid for function position");
816 }
817
818 /// Propagate amdgpu-waves-per-eu attribute.
819 struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
AAAMDWavesPerEU__anon93435ae10111::AAAMDWavesPerEU820 AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
821 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
822
isValidState__anon93435ae10111::AAAMDWavesPerEU823 bool isValidState() const override {
824 return !Assumed.isEmptySet() && IntegerRangeState::isValidState();
825 }
826
initialize__anon93435ae10111::AAAMDWavesPerEU827 void initialize(Attributor &A) override {
828 Function *F = getAssociatedFunction();
829 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
830
831 if (const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
832 *this, IRPosition::function(*F), DepClassTy::REQUIRED)) {
833
834 unsigned Min, Max;
835 std::tie(Min, Max) = InfoCache.getWavesPerEU(
836 *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
837 AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
838
839 ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
840 intersectKnown(Range);
841 }
842
843 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
844 indicatePessimisticFixpoint();
845 }
846
updateImpl__anon93435ae10111::AAAMDWavesPerEU847 ChangeStatus updateImpl(Attributor &A) override {
848 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
849 ChangeStatus Change = ChangeStatus::UNCHANGED;
850
851 auto CheckCallSite = [&](AbstractCallSite CS) {
852 Function *Caller = CS.getInstruction()->getFunction();
853 Function *Func = getAssociatedFunction();
854 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
855 << "->" << Func->getName() << '\n');
856
857 const auto *CallerInfo = A.getAAFor<AAAMDWavesPerEU>(
858 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
859 const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
860 *this, IRPosition::function(*Func), DepClassTy::REQUIRED);
861 if (!CallerInfo || !AssumedGroupSize)
862 return false;
863
864 unsigned Min, Max;
865 std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(
866 *Caller,
867 {CallerInfo->getAssumed().getLower().getZExtValue(),
868 CallerInfo->getAssumed().getUpper().getZExtValue() - 1},
869 {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
870 AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
871 ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));
872 IntegerRangeState CallerRangeState(CallerRange);
873 Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState);
874
875 return true;
876 };
877
878 bool AllCallSitesKnown = true;
879 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
880 return indicatePessimisticFixpoint();
881
882 return Change;
883 }
884
885 /// Create an abstract attribute view for the position \p IRP.
886 static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
887 Attributor &A);
888
manifest__anon93435ae10111::AAAMDWavesPerEU889 ChangeStatus manifest(Attributor &A) override {
890 Function *F = getAssociatedFunction();
891 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
892 unsigned Max = InfoCache.getMaxWavesPerEU(*F);
893 return emitAttributeIfNotDefault(A, 1, Max);
894 }
895
896 /// See AbstractAttribute::getName()
getName__anon93435ae10111::AAAMDWavesPerEU897 const std::string getName() const override { return "AAAMDWavesPerEU"; }
898
899 /// See AbstractAttribute::getIdAddr()
getIdAddr__anon93435ae10111::AAAMDWavesPerEU900 const char *getIdAddr() const override { return &ID; }
901
902 /// This function should return true if the type of the \p AA is
903 /// AAAMDWavesPerEU
classof__anon93435ae10111::AAAMDWavesPerEU904 static bool classof(const AbstractAttribute *AA) {
905 return (AA->getIdAddr() == &ID);
906 }
907
908 /// Unique ID (due to the unique address)
909 static const char ID;
910 };
911
912 const char AAAMDWavesPerEU::ID = 0;
913
createForPosition(const IRPosition & IRP,Attributor & A)914 AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
915 Attributor &A) {
916 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
917 return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
918 llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
919 }
920
inlineAsmUsesAGPRs(const InlineAsm * IA)921 static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {
922 for (const auto &CI : IA->ParseConstraints()) {
923 for (StringRef Code : CI.Codes) {
924 Code.consume_front("{");
925 if (Code.starts_with("a"))
926 return true;
927 }
928 }
929
930 return false;
931 }
932
933 struct AAAMDGPUNoAGPR
934 : public IRAttribute<Attribute::NoUnwind,
935 StateWrapper<BooleanState, AbstractAttribute>,
936 AAAMDGPUNoAGPR> {
AAAMDGPUNoAGPR__anon93435ae10111::AAAMDGPUNoAGPR937 AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
938
createForPosition__anon93435ae10111::AAAMDGPUNoAGPR939 static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP,
940 Attributor &A) {
941 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
942 return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A);
943 llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position");
944 }
945
initialize__anon93435ae10111::AAAMDGPUNoAGPR946 void initialize(Attributor &A) override {
947 Function *F = getAssociatedFunction();
948 if (F->hasFnAttribute("amdgpu-no-agpr"))
949 indicateOptimisticFixpoint();
950 }
951
getAsStr__anon93435ae10111::AAAMDGPUNoAGPR952 const std::string getAsStr(Attributor *A) const override {
953 return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr";
954 }
955
trackStatistics__anon93435ae10111::AAAMDGPUNoAGPR956 void trackStatistics() const override {}
957
updateImpl__anon93435ae10111::AAAMDGPUNoAGPR958 ChangeStatus updateImpl(Attributor &A) override {
959 // TODO: Use AACallEdges, but then we need a way to inspect asm edges.
960
961 auto CheckForNoAGPRs = [&](Instruction &I) {
962 const auto &CB = cast<CallBase>(I);
963 const Value *CalleeOp = CB.getCalledOperand();
964 const Function *Callee = dyn_cast<Function>(CalleeOp);
965 if (!Callee) {
966 if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
967 return !inlineAsmUsesAGPRs(IA);
968 return false;
969 }
970
971 // Some intrinsics may use AGPRs, but if we have a choice, we are not
972 // required to use AGPRs.
973 if (Callee->isIntrinsic())
974 return true;
975
976 // TODO: Handle callsite attributes
977 const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
978 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
979 return CalleeInfo && CalleeInfo->getAssumed();
980 };
981
982 bool UsedAssumedInformation = false;
983 if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this,
984 UsedAssumedInformation))
985 return indicatePessimisticFixpoint();
986 return ChangeStatus::UNCHANGED;
987 }
988
manifest__anon93435ae10111::AAAMDGPUNoAGPR989 ChangeStatus manifest(Attributor &A) override {
990 if (!getAssumed())
991 return ChangeStatus::UNCHANGED;
992 LLVMContext &Ctx = getAssociatedFunction()->getContext();
993 return A.manifestAttrs(getIRPosition(),
994 {Attribute::get(Ctx, "amdgpu-no-agpr")});
995 }
996
getName__anon93435ae10111::AAAMDGPUNoAGPR997 const std::string getName() const override { return "AAAMDGPUNoAGPR"; }
getIdAddr__anon93435ae10111::AAAMDGPUNoAGPR998 const char *getIdAddr() const override { return &ID; }
999
1000 /// This function should return true if the type of the \p AA is
1001 /// AAAMDGPUNoAGPRs
classof__anon93435ae10111::AAAMDGPUNoAGPR1002 static bool classof(const AbstractAttribute *AA) {
1003 return (AA->getIdAddr() == &ID);
1004 }
1005
1006 static const char ID;
1007 };
1008
1009 const char AAAMDGPUNoAGPR::ID = 0;
1010
addPreloadKernArgHint(Function & F,TargetMachine & TM)1011 static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
1012 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
1013 for (unsigned I = 0;
1014 I < F.arg_size() &&
1015 I < std::min(KernargPreloadCount.getValue(), ST.getMaxNumUserSGPRs());
1016 ++I) {
1017 Argument &Arg = *F.getArg(I);
1018 // Check for incompatible attributes.
1019 if (Arg.hasByRefAttr() || Arg.hasNestAttr())
1020 break;
1021
1022 Arg.addAttr(Attribute::InReg);
1023 }
1024 }
1025
runImpl(Module & M,AnalysisGetter & AG,TargetMachine & TM)1026 static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
1027 SetVector<Function *> Functions;
1028 for (Function &F : M) {
1029 if (!F.isIntrinsic())
1030 Functions.insert(&F);
1031 }
1032
1033 CallGraphUpdater CGUpdater;
1034 BumpPtrAllocator Allocator;
1035 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1036 DenseSet<const char *> Allowed(
1037 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1038 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1039 &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
1040 &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1041 &AAUnderlyingObjects::ID});
1042
1043 AttributorConfig AC(CGUpdater);
1044 AC.Allowed = &Allowed;
1045 AC.IsModulePass = true;
1046 AC.DefaultInitializeLiveInternals = false;
1047 AC.IPOAmendableCB = [](const Function &F) {
1048 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1049 };
1050
1051 Attributor A(Functions, InfoCache, AC);
1052
1053 for (Function &F : M) {
1054 if (!F.isIntrinsic()) {
1055 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
1056 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
1057 A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(F));
1058 CallingConv::ID CC = F.getCallingConv();
1059 if (!AMDGPU::isEntryFunctionCC(CC)) {
1060 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
1061 A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F));
1062 } else if (CC == CallingConv::AMDGPU_KERNEL) {
1063 addPreloadKernArgHint(F, TM);
1064 }
1065 }
1066 }
1067
1068 ChangeStatus Change = A.run();
1069 return Change == ChangeStatus::CHANGED;
1070 }
1071
1072 class AMDGPUAttributorLegacy : public ModulePass {
1073 public:
AMDGPUAttributorLegacy()1074 AMDGPUAttributorLegacy() : ModulePass(ID) {}
1075
1076 /// doInitialization - Virtual method overridden by subclasses to do
1077 /// any necessary initialization before any pass is run.
doInitialization(Module &)1078 bool doInitialization(Module &) override {
1079 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
1080 if (!TPC)
1081 report_fatal_error("TargetMachine is required");
1082
1083 TM = &TPC->getTM<TargetMachine>();
1084 return false;
1085 }
1086
runOnModule(Module & M)1087 bool runOnModule(Module &M) override {
1088 AnalysisGetter AG(this);
1089 return runImpl(M, AG, *TM);
1090 }
1091
getAnalysisUsage(AnalysisUsage & AU) const1092 void getAnalysisUsage(AnalysisUsage &AU) const override {
1093 AU.addRequired<CycleInfoWrapperPass>();
1094 }
1095
getPassName() const1096 StringRef getPassName() const override { return "AMDGPU Attributor"; }
1097 TargetMachine *TM;
1098 static char ID;
1099 };
1100 } // namespace
1101
run(Module & M,ModuleAnalysisManager & AM)1102 PreservedAnalyses llvm::AMDGPUAttributorPass::run(Module &M,
1103 ModuleAnalysisManager &AM) {
1104
1105 FunctionAnalysisManager &FAM =
1106 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1107 AnalysisGetter AG(FAM);
1108
1109 // TODO: Probably preserves CFG
1110 return runImpl(M, AG, TM) ? PreservedAnalyses::none()
1111 : PreservedAnalyses::all();
1112 }
1113
1114 char AMDGPUAttributorLegacy::ID = 0;
1115
createAMDGPUAttributorLegacyPass()1116 Pass *llvm::createAMDGPUAttributorLegacyPass() {
1117 return new AMDGPUAttributorLegacy();
1118 }
1119 INITIALIZE_PASS_BEGIN(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",
1120 false, false)
1121 INITIALIZE_PASS_DEPENDENCY(CycleInfoWrapperPass);
1122 INITIALIZE_PASS_END(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",
1123 false, false)
1124