xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (revision 8ddb146abcdf061be9f2c0db7e391697dafad85c)
1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "GCNSubtarget.h"
15 #include "llvm/CodeGen/TargetPassConfig.h"
16 #include "llvm/IR/IntrinsicsAMDGPU.h"
17 #include "llvm/IR/IntrinsicsR600.h"
18 #include "llvm/Target/TargetMachine.h"
19 #include "llvm/Transforms/IPO/Attributor.h"
20 
21 #define DEBUG_TYPE "amdgpu-attributor"
22 
23 using namespace llvm;
24 
25 enum ImplicitArgumentMask {
26   NOT_IMPLICIT_INPUT = 0,
27 
28   // SGPRs
29   DISPATCH_PTR = 1 << 0,
30   QUEUE_PTR = 1 << 1,
31   DISPATCH_ID = 1 << 2,
32   IMPLICIT_ARG_PTR = 1 << 3,
33   WORKGROUP_ID_X = 1 << 4,
34   WORKGROUP_ID_Y = 1 << 5,
35   WORKGROUP_ID_Z = 1 << 6,
36 
37   // VGPRS:
38   WORKITEM_ID_X = 1 << 7,
39   WORKITEM_ID_Y = 1 << 8,
40   WORKITEM_ID_Z = 1 << 9,
41   ALL_ARGUMENT_MASK = (1 << 10) - 1
42 };
43 
44 static constexpr std::pair<ImplicitArgumentMask,
45                            StringLiteral> ImplicitAttrs[] = {
46   {DISPATCH_PTR, "amdgpu-no-dispatch-ptr"},
47   {QUEUE_PTR, "amdgpu-no-queue-ptr"},
48   {DISPATCH_ID, "amdgpu-no-dispatch-id"},
49   {IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"},
50   {WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"},
51   {WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y"},
52   {WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z"},
53   {WORKITEM_ID_X, "amdgpu-no-workitem-id-x"},
54   {WORKITEM_ID_Y, "amdgpu-no-workitem-id-y"},
55   {WORKITEM_ID_Z, "amdgpu-no-workitem-id-z"}
56 };
57 
58 // We do not need to note the x workitem or workgroup id because they are always
59 // initialized.
60 //
61 // TODO: We should not add the attributes if the known compile time workgroup
62 // size is 1 for y/z.
63 static ImplicitArgumentMask
64 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) {
65   switch (ID) {
66   case Intrinsic::amdgcn_workitem_id_x:
67     NonKernelOnly = true;
68     return WORKITEM_ID_X;
69   case Intrinsic::amdgcn_workgroup_id_x:
70     NonKernelOnly = true;
71     return WORKGROUP_ID_X;
72   case Intrinsic::amdgcn_workitem_id_y:
73   case Intrinsic::r600_read_tidig_y:
74     return WORKITEM_ID_Y;
75   case Intrinsic::amdgcn_workitem_id_z:
76   case Intrinsic::r600_read_tidig_z:
77     return WORKITEM_ID_Z;
78   case Intrinsic::amdgcn_workgroup_id_y:
79   case Intrinsic::r600_read_tgid_y:
80     return WORKGROUP_ID_Y;
81   case Intrinsic::amdgcn_workgroup_id_z:
82   case Intrinsic::r600_read_tgid_z:
83     return WORKGROUP_ID_Z;
84   case Intrinsic::amdgcn_dispatch_ptr:
85     return DISPATCH_PTR;
86   case Intrinsic::amdgcn_dispatch_id:
87     return DISPATCH_ID;
88   case Intrinsic::amdgcn_implicitarg_ptr:
89     return IMPLICIT_ARG_PTR;
90   case Intrinsic::amdgcn_queue_ptr:
91   case Intrinsic::amdgcn_is_shared:
92   case Intrinsic::amdgcn_is_private:
93     // TODO: Does not require queue ptr on gfx9+
94   case Intrinsic::trap:
95   case Intrinsic::debugtrap:
96     IsQueuePtr = true;
97     return QUEUE_PTR;
98   default:
99     return NOT_IMPLICIT_INPUT;
100   }
101 }
102 
103 static bool castRequiresQueuePtr(unsigned SrcAS) {
104   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
105 }
106 
107 static bool isDSAddress(const Constant *C) {
108   const GlobalValue *GV = dyn_cast<GlobalValue>(C);
109   if (!GV)
110     return false;
111   unsigned AS = GV->getAddressSpace();
112   return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
113 }
114 
115 /// Returns true if the function requires the implicit argument be passed
116 /// regardless of the function contents.
117 static bool funcRequiresImplicitArgPtr(const Function &F) {
118   // Sanitizers require the hostcall buffer passed in the implicit arguments.
119   return F.hasFnAttribute(Attribute::SanitizeAddress) ||
120          F.hasFnAttribute(Attribute::SanitizeThread) ||
121          F.hasFnAttribute(Attribute::SanitizeMemory) ||
122          F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
123          F.hasFnAttribute(Attribute::SanitizeMemTag);
124 }
125 
126 namespace {
127 class AMDGPUInformationCache : public InformationCache {
128 public:
129   AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
130                          BumpPtrAllocator &Allocator,
131                          SetVector<Function *> *CGSCC, TargetMachine &TM)
132       : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
133   TargetMachine &TM;
134 
135   enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
136 
137   /// Check if the subtarget has aperture regs.
138   bool hasApertureRegs(Function &F) {
139     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
140     return ST.hasApertureRegs();
141   }
142 
143   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
144     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
145     return ST.getFlatWorkGroupSizes(F);
146   }
147 
148   std::pair<unsigned, unsigned>
149   getMaximumFlatWorkGroupRange(const Function &F) {
150     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
151     return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
152   }
153 
154 private:
155   /// Check if the ConstantExpr \p CE requires queue ptr attribute.
156   static bool visitConstExpr(const ConstantExpr *CE) {
157     if (CE->getOpcode() == Instruction::AddrSpaceCast) {
158       unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
159       return castRequiresQueuePtr(SrcAS);
160     }
161     return false;
162   }
163 
164   /// Get the constant access bitmap for \p C.
165   uint8_t getConstantAccess(const Constant *C) {
166     auto It = ConstantStatus.find(C);
167     if (It != ConstantStatus.end())
168       return It->second;
169 
170     uint8_t Result = 0;
171     if (isDSAddress(C))
172       Result = DS_GLOBAL;
173 
174     if (const auto *CE = dyn_cast<ConstantExpr>(C))
175       if (visitConstExpr(CE))
176         Result |= ADDR_SPACE_CAST;
177 
178     for (const Use &U : C->operands()) {
179       const auto *OpC = dyn_cast<Constant>(U);
180       if (!OpC)
181         continue;
182 
183       Result |= getConstantAccess(OpC);
184     }
185     return Result;
186   }
187 
188 public:
189   /// Returns true if \p Fn needs a queue ptr attribute because of \p C.
190   bool needsQueuePtr(const Constant *C, Function &Fn) {
191     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
192     bool HasAperture = hasApertureRegs(Fn);
193 
194     // No need to explore the constants.
195     if (!IsNonEntryFunc && HasAperture)
196       return false;
197 
198     uint8_t Access = getConstantAccess(C);
199 
200     // We need to trap on DS globals in non-entry functions.
201     if (IsNonEntryFunc && (Access & DS_GLOBAL))
202       return true;
203 
204     return !HasAperture && (Access & ADDR_SPACE_CAST);
205   }
206 
207 private:
208   /// Used to determine if the Constant needs a queue ptr attribute.
209   DenseMap<const Constant *, uint8_t> ConstantStatus;
210 };
211 
212 struct AAAMDAttributes : public StateWrapper<
213   BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> {
214   using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>,
215                             AbstractAttribute>;
216 
217   AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
218 
219   /// Create an abstract attribute view for the position \p IRP.
220   static AAAMDAttributes &createForPosition(const IRPosition &IRP,
221                                             Attributor &A);
222 
223   /// See AbstractAttribute::getName().
224   const std::string getName() const override { return "AAAMDAttributes"; }
225 
226   /// See AbstractAttribute::getIdAddr().
227   const char *getIdAddr() const override { return &ID; }
228 
229   /// This function should return true if the type of the \p AA is
230   /// AAAMDAttributes.
231   static bool classof(const AbstractAttribute *AA) {
232     return (AA->getIdAddr() == &ID);
233   }
234 
235   /// Unique ID (due to the unique address)
236   static const char ID;
237 };
238 const char AAAMDAttributes::ID = 0;
239 
240 struct AAUniformWorkGroupSize
241     : public StateWrapper<BooleanState, AbstractAttribute> {
242   using Base = StateWrapper<BooleanState, AbstractAttribute>;
243   AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
244 
245   /// Create an abstract attribute view for the position \p IRP.
246   static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
247                                                    Attributor &A);
248 
249   /// See AbstractAttribute::getName().
250   const std::string getName() const override {
251     return "AAUniformWorkGroupSize";
252   }
253 
254   /// See AbstractAttribute::getIdAddr().
255   const char *getIdAddr() const override { return &ID; }
256 
257   /// This function should return true if the type of the \p AA is
258   /// AAAMDAttributes.
259   static bool classof(const AbstractAttribute *AA) {
260     return (AA->getIdAddr() == &ID);
261   }
262 
263   /// Unique ID (due to the unique address)
264   static const char ID;
265 };
266 const char AAUniformWorkGroupSize::ID = 0;
267 
268 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
269   AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
270       : AAUniformWorkGroupSize(IRP, A) {}
271 
272   void initialize(Attributor &A) override {
273     Function *F = getAssociatedFunction();
274     CallingConv::ID CC = F->getCallingConv();
275 
276     if (CC != CallingConv::AMDGPU_KERNEL)
277       return;
278 
279     bool InitialValue = false;
280     if (F->hasFnAttribute("uniform-work-group-size"))
281       InitialValue = F->getFnAttribute("uniform-work-group-size")
282                          .getValueAsString()
283                          .equals("true");
284 
285     if (InitialValue)
286       indicateOptimisticFixpoint();
287     else
288       indicatePessimisticFixpoint();
289   }
290 
291   ChangeStatus updateImpl(Attributor &A) override {
292     ChangeStatus Change = ChangeStatus::UNCHANGED;
293 
294     auto CheckCallSite = [&](AbstractCallSite CS) {
295       Function *Caller = CS.getInstruction()->getFunction();
296       LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
297                         << "->" << getAssociatedFunction()->getName() << "\n");
298 
299       const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
300           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
301 
302       Change = Change | clampStateAndIndicateChange(this->getState(),
303                                                     CallerInfo.getState());
304 
305       return true;
306     };
307 
308     bool AllCallSitesKnown = true;
309     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
310       return indicatePessimisticFixpoint();
311 
312     return Change;
313   }
314 
315   ChangeStatus manifest(Attributor &A) override {
316     SmallVector<Attribute, 8> AttrList;
317     LLVMContext &Ctx = getAssociatedFunction()->getContext();
318 
319     AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
320                                       getAssumed() ? "true" : "false"));
321     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
322                                               /* ForceReplace */ true);
323   }
324 
325   bool isValidState() const override {
326     // This state is always valid, even when the state is false.
327     return true;
328   }
329 
330   const std::string getAsStr() const override {
331     return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
332   }
333 
334   /// See AbstractAttribute::trackStatistics()
335   void trackStatistics() const override {}
336 };
337 
338 AAUniformWorkGroupSize &
339 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
340                                           Attributor &A) {
341   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
342     return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
343   llvm_unreachable(
344       "AAUniformWorkGroupSize is only valid for function position");
345 }
346 
347 struct AAAMDAttributesFunction : public AAAMDAttributes {
348   AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
349       : AAAMDAttributes(IRP, A) {}
350 
351   void initialize(Attributor &A) override {
352     Function *F = getAssociatedFunction();
353 
354     // If the function requires the implicit arg pointer due to sanitizers,
355     // assume it's needed even if explicitly marked as not requiring it.
356     const bool NeedsImplicit = funcRequiresImplicitArgPtr(*F);
357     if (NeedsImplicit)
358       removeAssumedBits(IMPLICIT_ARG_PTR);
359 
360     for (auto Attr : ImplicitAttrs) {
361       if (NeedsImplicit && Attr.first == IMPLICIT_ARG_PTR)
362         continue;
363 
364       if (F->hasFnAttribute(Attr.second))
365         addKnownBits(Attr.first);
366     }
367 
368     if (F->isDeclaration())
369       return;
370 
371     // Ignore functions with graphics calling conventions, these are currently
372     // not allowed to have kernel arguments.
373     if (AMDGPU::isGraphics(F->getCallingConv())) {
374       indicatePessimisticFixpoint();
375       return;
376     }
377   }
378 
379   ChangeStatus updateImpl(Attributor &A) override {
380     Function *F = getAssociatedFunction();
381     // The current assumed state used to determine a change.
382     auto OrigAssumed = getAssumed();
383 
384     // Check for Intrinsics and propagate attributes.
385     const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
386         *this, this->getIRPosition(), DepClassTy::REQUIRED);
387     if (AAEdges.hasNonAsmUnknownCallee())
388       return indicatePessimisticFixpoint();
389 
390     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
391     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
392 
393     bool NeedsQueuePtr = false;
394 
395     for (Function *Callee : AAEdges.getOptimisticEdges()) {
396       Intrinsic::ID IID = Callee->getIntrinsicID();
397       if (IID == Intrinsic::not_intrinsic) {
398         const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
399           *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
400         *this &= AAAMD;
401         continue;
402       }
403 
404       bool NonKernelOnly = false;
405       ImplicitArgumentMask AttrMask =
406           intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr);
407       if (AttrMask != NOT_IMPLICIT_INPUT) {
408         if ((IsNonEntryFunc || !NonKernelOnly))
409           removeAssumedBits(AttrMask);
410       }
411     }
412 
413     // If we found that we need amdgpu-queue-ptr, nothing else to do.
414     if (NeedsQueuePtr) {
415       removeAssumedBits(QUEUE_PTR);
416       return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
417                                            ChangeStatus::UNCHANGED;
418     }
419 
420     auto CheckAddrSpaceCasts = [&](Instruction &I) {
421       unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
422       if (castRequiresQueuePtr(SrcAS)) {
423         NeedsQueuePtr = true;
424         return false;
425       }
426       return true;
427     };
428 
429     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
430 
431     // `checkForAllInstructions` is much more cheaper than going through all
432     // instructions, try it first.
433 
434     // amdgpu-queue-ptr is not needed if aperture regs is present.
435     if (!HasApertureRegs) {
436       bool UsedAssumedInformation = false;
437       A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
438                                 {Instruction::AddrSpaceCast},
439                                 UsedAssumedInformation);
440     }
441 
442     // If we found  that we need amdgpu-queue-ptr, nothing else to do.
443     if (NeedsQueuePtr) {
444       removeAssumedBits(QUEUE_PTR);
445       return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
446                                            ChangeStatus::UNCHANGED;
447     }
448 
449     if (!IsNonEntryFunc && HasApertureRegs) {
450       return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
451                                            ChangeStatus::UNCHANGED;
452     }
453 
454     for (BasicBlock &BB : *F) {
455       for (Instruction &I : BB) {
456         for (const Use &U : I.operands()) {
457           if (const auto *C = dyn_cast<Constant>(U)) {
458             if (InfoCache.needsQueuePtr(C, *F)) {
459               removeAssumedBits(QUEUE_PTR);
460               return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
461                                                    ChangeStatus::UNCHANGED;
462             }
463           }
464         }
465       }
466     }
467 
468     return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
469                                          ChangeStatus::UNCHANGED;
470   }
471 
472   ChangeStatus manifest(Attributor &A) override {
473     SmallVector<Attribute, 8> AttrList;
474     LLVMContext &Ctx = getAssociatedFunction()->getContext();
475 
476     for (auto Attr : ImplicitAttrs) {
477       if (isKnown(Attr.first))
478         AttrList.push_back(Attribute::get(Ctx, Attr.second));
479     }
480 
481     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
482                                               /* ForceReplace */ true);
483   }
484 
485   const std::string getAsStr() const override {
486     std::string Str;
487     raw_string_ostream OS(Str);
488     OS << "AMDInfo[";
489     for (auto Attr : ImplicitAttrs)
490       OS << ' ' << Attr.second;
491     OS << " ]";
492     return OS.str();
493   }
494 
495   /// See AbstractAttribute::trackStatistics()
496   void trackStatistics() const override {}
497 };
498 
499 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
500                                                     Attributor &A) {
501   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
502     return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
503   llvm_unreachable("AAAMDAttributes is only valid for function position");
504 }
505 
506 /// Propagate amdgpu-flat-work-group-size attribute.
507 struct AAAMDFlatWorkGroupSize
508     : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
509   using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
510   AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
511       : Base(IRP, 32) {}
512 
513   /// See AbstractAttribute::getState(...).
514   IntegerRangeState &getState() override { return *this; }
515   const IntegerRangeState &getState() const override { return *this; }
516 
517   void initialize(Attributor &A) override {
518     Function *F = getAssociatedFunction();
519     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
520     unsigned MinGroupSize, MaxGroupSize;
521     std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
522     intersectKnown(
523         ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
524 
525     if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
526       indicatePessimisticFixpoint();
527   }
528 
529   ChangeStatus updateImpl(Attributor &A) override {
530     ChangeStatus Change = ChangeStatus::UNCHANGED;
531 
532     auto CheckCallSite = [&](AbstractCallSite CS) {
533       Function *Caller = CS.getInstruction()->getFunction();
534       LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
535                         << "->" << getAssociatedFunction()->getName() << '\n');
536 
537       const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
538           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
539 
540       Change |=
541           clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
542 
543       return true;
544     };
545 
546     bool AllCallSitesKnown = true;
547     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
548       return indicatePessimisticFixpoint();
549 
550     return Change;
551   }
552 
553   ChangeStatus manifest(Attributor &A) override {
554     SmallVector<Attribute, 8> AttrList;
555     Function *F = getAssociatedFunction();
556     LLVMContext &Ctx = F->getContext();
557 
558     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
559     unsigned Min, Max;
560     std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
561 
562     // Don't add the attribute if it's the implied default.
563     if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
564       return ChangeStatus::UNCHANGED;
565 
566     SmallString<10> Buffer;
567     raw_svector_ostream OS(Buffer);
568     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
569 
570     AttrList.push_back(
571         Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
572     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
573                                               /* ForceReplace */ true);
574   }
575 
576   const std::string getAsStr() const override {
577     std::string Str;
578     raw_string_ostream OS(Str);
579     OS << "AMDFlatWorkGroupSize[";
580     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
581     OS << ']';
582     return OS.str();
583   }
584 
585   /// See AbstractAttribute::trackStatistics()
586   void trackStatistics() const override {}
587 
588   /// Create an abstract attribute view for the position \p IRP.
589   static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
590                                                    Attributor &A);
591 
592   /// See AbstractAttribute::getName()
593   const std::string getName() const override {
594     return "AAAMDFlatWorkGroupSize";
595   }
596 
597   /// See AbstractAttribute::getIdAddr()
598   const char *getIdAddr() const override { return &ID; }
599 
600   /// This function should return true if the type of the \p AA is
601   /// AAAMDFlatWorkGroupSize
602   static bool classof(const AbstractAttribute *AA) {
603     return (AA->getIdAddr() == &ID);
604   }
605 
606   /// Unique ID (due to the unique address)
607   static const char ID;
608 };
609 
610 const char AAAMDFlatWorkGroupSize::ID = 0;
611 
612 AAAMDFlatWorkGroupSize &
613 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
614                                           Attributor &A) {
615   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
616     return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
617   llvm_unreachable(
618       "AAAMDFlatWorkGroupSize is only valid for function position");
619 }
620 
621 class AMDGPUAttributor : public ModulePass {
622 public:
623   AMDGPUAttributor() : ModulePass(ID) {}
624 
625   /// doInitialization - Virtual method overridden by subclasses to do
626   /// any necessary initialization before any pass is run.
627   bool doInitialization(Module &) override {
628     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
629     if (!TPC)
630       report_fatal_error("TargetMachine is required");
631 
632     TM = &TPC->getTM<TargetMachine>();
633     return false;
634   }
635 
636   bool runOnModule(Module &M) override {
637     SetVector<Function *> Functions;
638     AnalysisGetter AG;
639     for (Function &F : M) {
640       if (!F.isIntrinsic())
641         Functions.insert(&F);
642     }
643 
644     CallGraphUpdater CGUpdater;
645     BumpPtrAllocator Allocator;
646     AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
647     DenseSet<const char *> Allowed(
648         {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
649          &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID});
650 
651     Attributor A(Functions, InfoCache, CGUpdater, &Allowed);
652 
653     for (Function &F : M) {
654       if (!F.isIntrinsic()) {
655         A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
656         A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
657         if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
658           A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
659         }
660       }
661     }
662 
663     ChangeStatus Change = A.run();
664     return Change == ChangeStatus::CHANGED;
665   }
666 
667   StringRef getPassName() const override { return "AMDGPU Attributor"; }
668   TargetMachine *TM;
669   static char ID;
670 };
671 } // namespace
672 
673 char AMDGPUAttributor::ID = 0;
674 
675 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
676 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
677