xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp (revision ba3c1f5972d7b90feb6e6da47905ff2757e0fe57)
1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "llvm/IR/Module.h"
22 using namespace llvm;
23 
24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
25                                                      const DataLayout &DL) {
26   Type *Ty = Arg.getType();
27   MaybeAlign ArgAlign;
28   if (Arg.hasByRefAttr()) {
29     Ty = Arg.getParamByRefType();
30     ArgAlign = Arg.getParamAlign();
31   }
32 
33   if (!ArgAlign)
34     ArgAlign = DL.getABITypeAlign(Ty);
35 
36   return std::pair(Ty, *ArgAlign);
37 }
38 
39 namespace llvm {
40 
41 static cl::opt<bool> DumpHSAMetadata(
42     "amdgpu-dump-hsa-metadata",
43     cl::desc("Dump AMDGPU HSA Metadata"));
44 static cl::opt<bool> VerifyHSAMetadata(
45     "amdgpu-verify-hsa-metadata",
46     cl::desc("Verify AMDGPU HSA Metadata"));
47 
48 namespace AMDGPU {
49 namespace HSAMD {
50 
51 //===----------------------------------------------------------------------===//
52 // HSAMetadataStreamerV2
53 //===----------------------------------------------------------------------===//
54 void MetadataStreamerYamlV2::dump(StringRef HSAMetadataString) const {
55   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
56 }
57 
58 void MetadataStreamerYamlV2::verify(StringRef HSAMetadataString) const {
59   errs() << "AMDGPU HSA Metadata Parser Test: ";
60 
61   HSAMD::Metadata FromHSAMetadataString;
62   if (fromString(HSAMetadataString, FromHSAMetadataString)) {
63     errs() << "FAIL\n";
64     return;
65   }
66 
67   std::string ToHSAMetadataString;
68   if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
69     errs() << "FAIL\n";
70     return;
71   }
72 
73   errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
74          << '\n';
75   if (HSAMetadataString != ToHSAMetadataString) {
76     errs() << "Original input: " << HSAMetadataString << '\n'
77            << "Produced output: " << ToHSAMetadataString << '\n';
78   }
79 }
80 
81 AccessQualifier
82 MetadataStreamerYamlV2::getAccessQualifier(StringRef AccQual) const {
83   if (AccQual.empty())
84     return AccessQualifier::Unknown;
85 
86   return StringSwitch<AccessQualifier>(AccQual)
87              .Case("read_only",  AccessQualifier::ReadOnly)
88              .Case("write_only", AccessQualifier::WriteOnly)
89              .Case("read_write", AccessQualifier::ReadWrite)
90              .Default(AccessQualifier::Default);
91 }
92 
93 AddressSpaceQualifier
94 MetadataStreamerYamlV2::getAddressSpaceQualifier(unsigned AddressSpace) const {
95   switch (AddressSpace) {
96   case AMDGPUAS::PRIVATE_ADDRESS:
97     return AddressSpaceQualifier::Private;
98   case AMDGPUAS::GLOBAL_ADDRESS:
99     return AddressSpaceQualifier::Global;
100   case AMDGPUAS::CONSTANT_ADDRESS:
101     return AddressSpaceQualifier::Constant;
102   case AMDGPUAS::LOCAL_ADDRESS:
103     return AddressSpaceQualifier::Local;
104   case AMDGPUAS::FLAT_ADDRESS:
105     return AddressSpaceQualifier::Generic;
106   case AMDGPUAS::REGION_ADDRESS:
107     return AddressSpaceQualifier::Region;
108   default:
109     return AddressSpaceQualifier::Unknown;
110   }
111 }
112 
113 ValueKind MetadataStreamerYamlV2::getValueKind(Type *Ty, StringRef TypeQual,
114                                                StringRef BaseTypeName) const {
115   if (TypeQual.contains("pipe"))
116     return ValueKind::Pipe;
117 
118   return StringSwitch<ValueKind>(BaseTypeName)
119              .Case("image1d_t", ValueKind::Image)
120              .Case("image1d_array_t", ValueKind::Image)
121              .Case("image1d_buffer_t", ValueKind::Image)
122              .Case("image2d_t", ValueKind::Image)
123              .Case("image2d_array_t", ValueKind::Image)
124              .Case("image2d_array_depth_t", ValueKind::Image)
125              .Case("image2d_array_msaa_t", ValueKind::Image)
126              .Case("image2d_array_msaa_depth_t", ValueKind::Image)
127              .Case("image2d_depth_t", ValueKind::Image)
128              .Case("image2d_msaa_t", ValueKind::Image)
129              .Case("image2d_msaa_depth_t", ValueKind::Image)
130              .Case("image3d_t", ValueKind::Image)
131              .Case("sampler_t", ValueKind::Sampler)
132              .Case("queue_t", ValueKind::Queue)
133              .Default(isa<PointerType>(Ty) ?
134                           (Ty->getPointerAddressSpace() ==
135                            AMDGPUAS::LOCAL_ADDRESS ?
136                            ValueKind::DynamicSharedPointer :
137                            ValueKind::GlobalBuffer) :
138                       ValueKind::ByValue);
139 }
140 
141 std::string MetadataStreamerYamlV2::getTypeName(Type *Ty, bool Signed) const {
142   switch (Ty->getTypeID()) {
143   case Type::IntegerTyID: {
144     if (!Signed)
145       return (Twine('u') + getTypeName(Ty, true)).str();
146 
147     auto BitWidth = Ty->getIntegerBitWidth();
148     switch (BitWidth) {
149     case 8:
150       return "char";
151     case 16:
152       return "short";
153     case 32:
154       return "int";
155     case 64:
156       return "long";
157     default:
158       return (Twine('i') + Twine(BitWidth)).str();
159     }
160   }
161   case Type::HalfTyID:
162     return "half";
163   case Type::FloatTyID:
164     return "float";
165   case Type::DoubleTyID:
166     return "double";
167   case Type::FixedVectorTyID: {
168     auto VecTy = cast<FixedVectorType>(Ty);
169     auto ElTy = VecTy->getElementType();
170     auto NumElements = VecTy->getNumElements();
171     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
172   }
173   default:
174     return "unknown";
175   }
176 }
177 
178 std::vector<uint32_t>
179 MetadataStreamerYamlV2::getWorkGroupDimensions(MDNode *Node) const {
180   std::vector<uint32_t> Dims;
181   if (Node->getNumOperands() != 3)
182     return Dims;
183 
184   for (auto &Op : Node->operands())
185     Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
186   return Dims;
187 }
188 
189 Kernel::CodeProps::Metadata MetadataStreamerYamlV2::getHSACodeProps(
190     const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const {
191   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
192   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
193   HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
194   const Function &F = MF.getFunction();
195 
196   assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
197          F.getCallingConv() == CallingConv::SPIR_KERNEL);
198 
199   Align MaxKernArgAlign;
200   HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
201                                                                MaxKernArgAlign);
202   HSACodeProps.mKernargSegmentAlign =
203     std::max(MaxKernArgAlign, Align(4)).value();
204 
205   HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
206   HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
207   HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
208   HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
209   HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
210   HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
211   HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
212   HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
213   HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
214   HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
215 
216   return HSACodeProps;
217 }
218 
219 Kernel::DebugProps::Metadata MetadataStreamerYamlV2::getHSADebugProps(
220     const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const {
221   return HSAMD::Kernel::DebugProps::Metadata();
222 }
223 
224 void MetadataStreamerYamlV2::emitVersion() {
225   auto &Version = HSAMetadata.mVersion;
226 
227   Version.push_back(VersionMajorV2);
228   Version.push_back(VersionMinorV2);
229 }
230 
231 void MetadataStreamerYamlV2::emitPrintf(const Module &Mod) {
232   auto &Printf = HSAMetadata.mPrintf;
233 
234   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
235   if (!Node)
236     return;
237 
238   for (auto *Op : Node->operands())
239     if (Op->getNumOperands())
240       Printf.push_back(
241           std::string(cast<MDString>(Op->getOperand(0))->getString()));
242 }
243 
244 void MetadataStreamerYamlV2::emitKernelLanguage(const Function &Func) {
245   auto &Kernel = HSAMetadata.mKernels.back();
246 
247   // TODO: What about other languages?
248   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
249   if (!Node || !Node->getNumOperands())
250     return;
251   auto Op0 = Node->getOperand(0);
252   if (Op0->getNumOperands() <= 1)
253     return;
254 
255   Kernel.mLanguage = "OpenCL C";
256   Kernel.mLanguageVersion.push_back(
257       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
258   Kernel.mLanguageVersion.push_back(
259       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
260 }
261 
262 void MetadataStreamerYamlV2::emitKernelAttrs(const Function &Func) {
263   auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
264 
265   if (auto Node = Func.getMetadata("reqd_work_group_size"))
266     Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
267   if (auto Node = Func.getMetadata("work_group_size_hint"))
268     Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
269   if (auto Node = Func.getMetadata("vec_type_hint")) {
270     Attrs.mVecTypeHint = getTypeName(
271         cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
272         mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
273   }
274   if (Func.hasFnAttribute("runtime-handle")) {
275     Attrs.mRuntimeHandle =
276         Func.getFnAttribute("runtime-handle").getValueAsString().str();
277   }
278 }
279 
280 void MetadataStreamerYamlV2::emitKernelArgs(const Function &Func,
281                                             const GCNSubtarget &ST) {
282   for (auto &Arg : Func.args())
283     emitKernelArg(Arg);
284 
285   emitHiddenKernelArgs(Func, ST);
286 }
287 
288 void MetadataStreamerYamlV2::emitKernelArg(const Argument &Arg) {
289   auto Func = Arg.getParent();
290   auto ArgNo = Arg.getArgNo();
291   const MDNode *Node;
292 
293   StringRef Name;
294   Node = Func->getMetadata("kernel_arg_name");
295   if (Node && ArgNo < Node->getNumOperands())
296     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
297   else if (Arg.hasName())
298     Name = Arg.getName();
299 
300   StringRef TypeName;
301   Node = Func->getMetadata("kernel_arg_type");
302   if (Node && ArgNo < Node->getNumOperands())
303     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
304 
305   StringRef BaseTypeName;
306   Node = Func->getMetadata("kernel_arg_base_type");
307   if (Node && ArgNo < Node->getNumOperands())
308     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
309 
310   StringRef AccQual;
311   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
312       Arg.hasNoAliasAttr()) {
313     AccQual = "read_only";
314   } else {
315     Node = Func->getMetadata("kernel_arg_access_qual");
316     if (Node && ArgNo < Node->getNumOperands())
317       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
318   }
319 
320   StringRef TypeQual;
321   Node = Func->getMetadata("kernel_arg_type_qual");
322   if (Node && ArgNo < Node->getNumOperands())
323     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
324 
325   const DataLayout &DL = Func->getParent()->getDataLayout();
326 
327   MaybeAlign PointeeAlign;
328   if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
329     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
330       // FIXME: Should report this for all address spaces
331       PointeeAlign = Arg.getParamAlign().valueOrOne();
332     }
333   }
334 
335   Type *ArgTy;
336   Align ArgAlign;
337   std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
338 
339   emitKernelArg(DL, ArgTy, ArgAlign,
340                 getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
341                 TypeName, BaseTypeName, AccQual, TypeQual);
342 }
343 
344 void MetadataStreamerYamlV2::emitKernelArg(
345     const DataLayout &DL, Type *Ty, Align Alignment, ValueKind ValueKind,
346     MaybeAlign PointeeAlign, StringRef Name, StringRef TypeName,
347     StringRef BaseTypeName, StringRef AccQual, StringRef TypeQual) {
348   HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
349   auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
350 
351   Arg.mName = std::string(Name);
352   Arg.mTypeName = std::string(TypeName);
353   Arg.mSize = DL.getTypeAllocSize(Ty);
354   Arg.mAlign = Alignment.value();
355   Arg.mValueKind = ValueKind;
356   Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
357 
358   if (auto PtrTy = dyn_cast<PointerType>(Ty))
359     Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
360 
361   Arg.mAccQual = getAccessQualifier(AccQual);
362 
363   // TODO: Emit Arg.mActualAccQual.
364 
365   SmallVector<StringRef, 1> SplitTypeQuals;
366   TypeQual.split(SplitTypeQuals, " ", -1, false);
367   for (StringRef Key : SplitTypeQuals) {
368     auto P = StringSwitch<bool*>(Key)
369                  .Case("const",    &Arg.mIsConst)
370                  .Case("restrict", &Arg.mIsRestrict)
371                  .Case("volatile", &Arg.mIsVolatile)
372                  .Case("pipe",     &Arg.mIsPipe)
373                  .Default(nullptr);
374     if (P)
375       *P = true;
376   }
377 }
378 
379 void MetadataStreamerYamlV2::emitHiddenKernelArgs(const Function &Func,
380                                                   const GCNSubtarget &ST) {
381   unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
382   if (!HiddenArgNumBytes)
383     return;
384 
385   auto &DL = Func.getParent()->getDataLayout();
386   auto Int64Ty = Type::getInt64Ty(Func.getContext());
387 
388   if (HiddenArgNumBytes >= 8)
389     emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
390   if (HiddenArgNumBytes >= 16)
391     emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
392   if (HiddenArgNumBytes >= 24)
393     emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
394 
395   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
396                                       AMDGPUAS::GLOBAL_ADDRESS);
397 
398   if (HiddenArgNumBytes >= 32) {
399     // We forbid the use of features requiring hostcall when compiling OpenCL
400     // before code object V5, which makes the mutual exclusion between the
401     // "printf buffer" and "hostcall buffer" here sound.
402     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
403       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
404     else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
405       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
406     else
407       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
408   }
409 
410   // Emit "default queue" and "completion action" arguments if enqueue kernel is
411   // used, otherwise emit dummy "none" arguments.
412   if (HiddenArgNumBytes >= 40) {
413     if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
414       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
415     } else {
416       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
417     }
418   }
419 
420   if (HiddenArgNumBytes >= 48) {
421     if (!Func.hasFnAttribute("amdgpu-no-completion-action") &&
422         // FIXME: Hack for runtime bug if we fail to optimize this out
423         Func.hasFnAttribute("calls-enqueue-kernel")) {
424       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
425     } else {
426       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
427     }
428   }
429 
430   // Emit the pointer argument for multi-grid object.
431   if (HiddenArgNumBytes >= 56) {
432     if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg"))
433       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
434     else
435       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
436   }
437 }
438 
439 bool MetadataStreamerYamlV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
440   return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
441 }
442 
443 void MetadataStreamerYamlV2::begin(const Module &Mod,
444                                    const IsaInfo::AMDGPUTargetID &TargetID) {
445   emitVersion();
446   emitPrintf(Mod);
447 }
448 
449 void MetadataStreamerYamlV2::end() {
450   std::string HSAMetadataString;
451   if (toString(HSAMetadata, HSAMetadataString))
452     return;
453 
454   if (DumpHSAMetadata)
455     dump(HSAMetadataString);
456   if (VerifyHSAMetadata)
457     verify(HSAMetadataString);
458 }
459 
460 void MetadataStreamerYamlV2::emitKernel(const MachineFunction &MF,
461                                         const SIProgramInfo &ProgramInfo) {
462   auto &Func = MF.getFunction();
463   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
464     return;
465 
466   auto CodeProps = getHSACodeProps(MF, ProgramInfo);
467   auto DebugProps = getHSADebugProps(MF, ProgramInfo);
468 
469   HSAMetadata.mKernels.push_back(Kernel::Metadata());
470   auto &Kernel = HSAMetadata.mKernels.back();
471 
472   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
473   Kernel.mName = std::string(Func.getName());
474   Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
475   emitKernelLanguage(Func);
476   emitKernelAttrs(Func);
477   emitKernelArgs(Func, ST);
478   HSAMetadata.mKernels.back().mCodeProps = CodeProps;
479   HSAMetadata.mKernels.back().mDebugProps = DebugProps;
480 }
481 
482 //===----------------------------------------------------------------------===//
483 // HSAMetadataStreamerV3
484 //===----------------------------------------------------------------------===//
485 
486 void MetadataStreamerMsgPackV3::dump(StringRef HSAMetadataString) const {
487   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
488 }
489 
490 void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const {
491   errs() << "AMDGPU HSA Metadata Parser Test: ";
492 
493   msgpack::Document FromHSAMetadataString;
494 
495   if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
496     errs() << "FAIL\n";
497     return;
498   }
499 
500   std::string ToHSAMetadataString;
501   raw_string_ostream StrOS(ToHSAMetadataString);
502   FromHSAMetadataString.toYAML(StrOS);
503 
504   errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
505   if (HSAMetadataString != ToHSAMetadataString) {
506     errs() << "Original input: " << HSAMetadataString << '\n'
507            << "Produced output: " << StrOS.str() << '\n';
508   }
509 }
510 
511 std::optional<StringRef>
512 MetadataStreamerMsgPackV3::getAccessQualifier(StringRef AccQual) const {
513   return StringSwitch<std::optional<StringRef>>(AccQual)
514       .Case("read_only", StringRef("read_only"))
515       .Case("write_only", StringRef("write_only"))
516       .Case("read_write", StringRef("read_write"))
517       .Default(std::nullopt);
518 }
519 
520 std::optional<StringRef> MetadataStreamerMsgPackV3::getAddressSpaceQualifier(
521     unsigned AddressSpace) const {
522   switch (AddressSpace) {
523   case AMDGPUAS::PRIVATE_ADDRESS:
524     return StringRef("private");
525   case AMDGPUAS::GLOBAL_ADDRESS:
526     return StringRef("global");
527   case AMDGPUAS::CONSTANT_ADDRESS:
528     return StringRef("constant");
529   case AMDGPUAS::LOCAL_ADDRESS:
530     return StringRef("local");
531   case AMDGPUAS::FLAT_ADDRESS:
532     return StringRef("generic");
533   case AMDGPUAS::REGION_ADDRESS:
534     return StringRef("region");
535   default:
536     return std::nullopt;
537   }
538 }
539 
540 StringRef
541 MetadataStreamerMsgPackV3::getValueKind(Type *Ty, StringRef TypeQual,
542                                         StringRef BaseTypeName) const {
543   if (TypeQual.contains("pipe"))
544     return "pipe";
545 
546   return StringSwitch<StringRef>(BaseTypeName)
547       .Case("image1d_t", "image")
548       .Case("image1d_array_t", "image")
549       .Case("image1d_buffer_t", "image")
550       .Case("image2d_t", "image")
551       .Case("image2d_array_t", "image")
552       .Case("image2d_array_depth_t", "image")
553       .Case("image2d_array_msaa_t", "image")
554       .Case("image2d_array_msaa_depth_t", "image")
555       .Case("image2d_depth_t", "image")
556       .Case("image2d_msaa_t", "image")
557       .Case("image2d_msaa_depth_t", "image")
558       .Case("image3d_t", "image")
559       .Case("sampler_t", "sampler")
560       .Case("queue_t", "queue")
561       .Default(isa<PointerType>(Ty)
562                    ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
563                           ? "dynamic_shared_pointer"
564                           : "global_buffer")
565                    : "by_value");
566 }
567 
568 std::string MetadataStreamerMsgPackV3::getTypeName(Type *Ty,
569                                                    bool Signed) const {
570   switch (Ty->getTypeID()) {
571   case Type::IntegerTyID: {
572     if (!Signed)
573       return (Twine('u') + getTypeName(Ty, true)).str();
574 
575     auto BitWidth = Ty->getIntegerBitWidth();
576     switch (BitWidth) {
577     case 8:
578       return "char";
579     case 16:
580       return "short";
581     case 32:
582       return "int";
583     case 64:
584       return "long";
585     default:
586       return (Twine('i') + Twine(BitWidth)).str();
587     }
588   }
589   case Type::HalfTyID:
590     return "half";
591   case Type::FloatTyID:
592     return "float";
593   case Type::DoubleTyID:
594     return "double";
595   case Type::FixedVectorTyID: {
596     auto VecTy = cast<FixedVectorType>(Ty);
597     auto ElTy = VecTy->getElementType();
598     auto NumElements = VecTy->getNumElements();
599     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
600   }
601   default:
602     return "unknown";
603   }
604 }
605 
606 msgpack::ArrayDocNode
607 MetadataStreamerMsgPackV3::getWorkGroupDimensions(MDNode *Node) const {
608   auto Dims = HSAMetadataDoc->getArrayNode();
609   if (Node->getNumOperands() != 3)
610     return Dims;
611 
612   for (auto &Op : Node->operands())
613     Dims.push_back(Dims.getDocument()->getNode(
614         uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
615   return Dims;
616 }
617 
618 void MetadataStreamerMsgPackV3::emitVersion() {
619   auto Version = HSAMetadataDoc->getArrayNode();
620   Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
621   Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
622   getRootMetadata("amdhsa.version") = Version;
623 }
624 
625 void MetadataStreamerMsgPackV3::emitPrintf(const Module &Mod) {
626   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
627   if (!Node)
628     return;
629 
630   auto Printf = HSAMetadataDoc->getArrayNode();
631   for (auto *Op : Node->operands())
632     if (Op->getNumOperands())
633       Printf.push_back(Printf.getDocument()->getNode(
634           cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
635   getRootMetadata("amdhsa.printf") = Printf;
636 }
637 
638 void MetadataStreamerMsgPackV3::emitKernelLanguage(const Function &Func,
639                                                    msgpack::MapDocNode Kern) {
640   // TODO: What about other languages?
641   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
642   if (!Node || !Node->getNumOperands())
643     return;
644   auto Op0 = Node->getOperand(0);
645   if (Op0->getNumOperands() <= 1)
646     return;
647 
648   Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
649   auto LanguageVersion = Kern.getDocument()->getArrayNode();
650   LanguageVersion.push_back(Kern.getDocument()->getNode(
651       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
652   LanguageVersion.push_back(Kern.getDocument()->getNode(
653       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
654   Kern[".language_version"] = LanguageVersion;
655 }
656 
657 void MetadataStreamerMsgPackV3::emitKernelAttrs(const Function &Func,
658                                                 msgpack::MapDocNode Kern) {
659 
660   if (auto Node = Func.getMetadata("reqd_work_group_size"))
661     Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
662   if (auto Node = Func.getMetadata("work_group_size_hint"))
663     Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
664   if (auto Node = Func.getMetadata("vec_type_hint")) {
665     Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
666         getTypeName(
667             cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
668             mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
669         /*Copy=*/true);
670   }
671   if (Func.hasFnAttribute("runtime-handle")) {
672     Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
673         Func.getFnAttribute("runtime-handle").getValueAsString().str(),
674         /*Copy=*/true);
675   }
676   if (Func.hasFnAttribute("device-init"))
677     Kern[".kind"] = Kern.getDocument()->getNode("init");
678   else if (Func.hasFnAttribute("device-fini"))
679     Kern[".kind"] = Kern.getDocument()->getNode("fini");
680 }
681 
682 void MetadataStreamerMsgPackV3::emitKernelArgs(const MachineFunction &MF,
683                                                msgpack::MapDocNode Kern) {
684   auto &Func = MF.getFunction();
685   unsigned Offset = 0;
686   auto Args = HSAMetadataDoc->getArrayNode();
687   for (auto &Arg : Func.args())
688     emitKernelArg(Arg, Offset, Args);
689 
690   emitHiddenKernelArgs(MF, Offset, Args);
691 
692   Kern[".args"] = Args;
693 }
694 
695 void MetadataStreamerMsgPackV3::emitKernelArg(const Argument &Arg,
696                                               unsigned &Offset,
697                                               msgpack::ArrayDocNode Args) {
698   auto Func = Arg.getParent();
699   auto ArgNo = Arg.getArgNo();
700   const MDNode *Node;
701 
702   StringRef Name;
703   Node = Func->getMetadata("kernel_arg_name");
704   if (Node && ArgNo < Node->getNumOperands())
705     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
706   else if (Arg.hasName())
707     Name = Arg.getName();
708 
709   StringRef TypeName;
710   Node = Func->getMetadata("kernel_arg_type");
711   if (Node && ArgNo < Node->getNumOperands())
712     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
713 
714   StringRef BaseTypeName;
715   Node = Func->getMetadata("kernel_arg_base_type");
716   if (Node && ArgNo < Node->getNumOperands())
717     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
718 
719   StringRef AccQual;
720   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
721       Arg.hasNoAliasAttr()) {
722     AccQual = "read_only";
723   } else {
724     Node = Func->getMetadata("kernel_arg_access_qual");
725     if (Node && ArgNo < Node->getNumOperands())
726       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
727   }
728 
729   StringRef TypeQual;
730   Node = Func->getMetadata("kernel_arg_type_qual");
731   if (Node && ArgNo < Node->getNumOperands())
732     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
733 
734   const DataLayout &DL = Func->getParent()->getDataLayout();
735 
736   MaybeAlign PointeeAlign;
737   Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
738 
739   // FIXME: Need to distinguish in memory alignment from pointer alignment.
740   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
741     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
742       PointeeAlign = Arg.getParamAlign().valueOrOne();
743   }
744 
745   // There's no distinction between byval aggregates and raw aggregates.
746   Type *ArgTy;
747   Align ArgAlign;
748   std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
749 
750   emitKernelArg(DL, ArgTy, ArgAlign,
751                 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
752                 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
753 }
754 
755 void MetadataStreamerMsgPackV3::emitKernelArg(
756     const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
757     unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
758     StringRef Name, StringRef TypeName, StringRef BaseTypeName,
759     StringRef AccQual, StringRef TypeQual) {
760   auto Arg = Args.getDocument()->getMapNode();
761 
762   if (!Name.empty())
763     Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
764   if (!TypeName.empty())
765     Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
766   auto Size = DL.getTypeAllocSize(Ty);
767   Arg[".size"] = Arg.getDocument()->getNode(Size);
768   Offset = alignTo(Offset, Alignment);
769   Arg[".offset"] = Arg.getDocument()->getNode(Offset);
770   Offset += Size;
771   Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
772   if (PointeeAlign)
773     Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
774 
775   if (auto PtrTy = dyn_cast<PointerType>(Ty))
776     if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
777       // Limiting address space to emit only for a certain ValueKind.
778       if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer")
779         Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier,
780                                                            /*Copy=*/true);
781 
782   if (auto AQ = getAccessQualifier(AccQual))
783     Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
784 
785   // TODO: Emit Arg[".actual_access"].
786 
787   SmallVector<StringRef, 1> SplitTypeQuals;
788   TypeQual.split(SplitTypeQuals, " ", -1, false);
789   for (StringRef Key : SplitTypeQuals) {
790     if (Key == "const")
791       Arg[".is_const"] = Arg.getDocument()->getNode(true);
792     else if (Key == "restrict")
793       Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
794     else if (Key == "volatile")
795       Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
796     else if (Key == "pipe")
797       Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
798   }
799 
800   Args.push_back(Arg);
801 }
802 
803 void MetadataStreamerMsgPackV3::emitHiddenKernelArgs(
804     const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
805   auto &Func = MF.getFunction();
806   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
807 
808   unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
809   if (!HiddenArgNumBytes)
810     return;
811 
812   const Module *M = Func.getParent();
813   auto &DL = M->getDataLayout();
814   auto Int64Ty = Type::getInt64Ty(Func.getContext());
815 
816   Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
817 
818   if (HiddenArgNumBytes >= 8)
819     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
820                   Args);
821   if (HiddenArgNumBytes >= 16)
822     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
823                   Args);
824   if (HiddenArgNumBytes >= 24)
825     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
826                   Args);
827 
828   auto Int8PtrTy =
829       Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
830 
831   if (HiddenArgNumBytes >= 32) {
832     // We forbid the use of features requiring hostcall when compiling OpenCL
833     // before code object V5, which makes the mutual exclusion between the
834     // "printf buffer" and "hostcall buffer" here sound.
835     if (M->getNamedMetadata("llvm.printf.fmts"))
836       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
837                     Args);
838     else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
839       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
840                     Args);
841     else
842       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
843   }
844 
845   // Emit "default queue" and "completion action" arguments if enqueue kernel is
846   // used, otherwise emit dummy "none" arguments.
847   if (HiddenArgNumBytes >= 40) {
848     if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
849       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
850                     Args);
851     } else {
852       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
853     }
854   }
855 
856   if (HiddenArgNumBytes >= 48) {
857     if (!Func.hasFnAttribute("amdgpu-no-completion-action") &&
858         // FIXME: Hack for runtime bug if we fail to optimize this out
859         Func.hasFnAttribute("calls-enqueue-kernel")) {
860       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
861                     Args);
862     } else {
863       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
864     }
865   }
866 
867   // Emit the pointer argument for multi-grid object.
868   if (HiddenArgNumBytes >= 56) {
869     if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
870       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
871                     Args);
872     } else {
873       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
874     }
875   }
876 }
877 
878 msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps(
879     const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const {
880   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
881   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
882   const Function &F = MF.getFunction();
883 
884   auto Kern = HSAMetadataDoc->getMapNode();
885 
886   Align MaxKernArgAlign;
887   Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
888       STM.getKernArgSegmentSize(F, MaxKernArgAlign));
889   Kern[".group_segment_fixed_size"] =
890       Kern.getDocument()->getNode(ProgramInfo.LDSSize);
891   Kern[".private_segment_fixed_size"] =
892       Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
893   if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5)
894     Kern[".uses_dynamic_stack"] =
895         Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack);
896   if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5 && STM.supportsWGP())
897     Kern[".workgroup_processor_mode"] =
898         Kern.getDocument()->getNode(ProgramInfo.WgpMode);
899 
900   // FIXME: The metadata treats the minimum as 16?
901   Kern[".kernarg_segment_align"] =
902       Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
903   Kern[".wavefront_size"] =
904       Kern.getDocument()->getNode(STM.getWavefrontSize());
905   Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
906   Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
907 
908   // Only add AGPR count to metadata for supported devices
909   if (STM.hasMAIInsts()) {
910     Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR);
911   }
912 
913   Kern[".max_flat_workgroup_size"] =
914       Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
915   Kern[".sgpr_spill_count"] =
916       Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
917   Kern[".vgpr_spill_count"] =
918       Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
919 
920   return Kern;
921 }
922 
923 bool MetadataStreamerMsgPackV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
924   return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
925 }
926 
927 void MetadataStreamerMsgPackV3::begin(const Module &Mod,
928                                       const IsaInfo::AMDGPUTargetID &TargetID) {
929   emitVersion();
930   emitPrintf(Mod);
931   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
932 }
933 
934 void MetadataStreamerMsgPackV3::end() {
935   std::string HSAMetadataString;
936   raw_string_ostream StrOS(HSAMetadataString);
937   HSAMetadataDoc->toYAML(StrOS);
938 
939   if (DumpHSAMetadata)
940     dump(StrOS.str());
941   if (VerifyHSAMetadata)
942     verify(StrOS.str());
943 }
944 
945 void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF,
946                                            const SIProgramInfo &ProgramInfo) {
947   auto &Func = MF.getFunction();
948   auto Kern = getHSAKernelProps(MF, ProgramInfo);
949 
950   assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
951          Func.getCallingConv() == CallingConv::SPIR_KERNEL);
952 
953   auto Kernels =
954       getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
955 
956   {
957     Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
958     Kern[".symbol"] = Kern.getDocument()->getNode(
959         (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
960     emitKernelLanguage(Func, Kern);
961     emitKernelAttrs(Func, Kern);
962     emitKernelArgs(MF, Kern);
963   }
964 
965   Kernels.push_back(Kern);
966 }
967 
968 //===----------------------------------------------------------------------===//
969 // HSAMetadataStreamerV4
970 //===----------------------------------------------------------------------===//
971 
972 void MetadataStreamerMsgPackV4::emitVersion() {
973   auto Version = HSAMetadataDoc->getArrayNode();
974   Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
975   Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
976   getRootMetadata("amdhsa.version") = Version;
977 }
978 
979 void MetadataStreamerMsgPackV4::emitTargetID(
980     const IsaInfo::AMDGPUTargetID &TargetID) {
981   getRootMetadata("amdhsa.target") =
982       HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
983 }
984 
985 void MetadataStreamerMsgPackV4::begin(const Module &Mod,
986                                       const IsaInfo::AMDGPUTargetID &TargetID) {
987   emitVersion();
988   emitTargetID(TargetID);
989   emitPrintf(Mod);
990   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
991 }
992 
993 //===----------------------------------------------------------------------===//
994 // HSAMetadataStreamerV5
995 //===----------------------------------------------------------------------===//
996 
997 void MetadataStreamerMsgPackV5::emitVersion() {
998   auto Version = HSAMetadataDoc->getArrayNode();
999   Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
1000   Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
1001   getRootMetadata("amdhsa.version") = Version;
1002 }
1003 
1004 void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
1005     const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
1006   auto &Func = MF.getFunction();
1007   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1008 
1009   // No implicit kernel argument is used.
1010   if (ST.getImplicitArgNumBytes(Func) == 0)
1011     return;
1012 
1013   const Module *M = Func.getParent();
1014   auto &DL = M->getDataLayout();
1015   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
1016 
1017   auto Int64Ty = Type::getInt64Ty(Func.getContext());
1018   auto Int32Ty = Type::getInt32Ty(Func.getContext());
1019   auto Int16Ty = Type::getInt16Ty(Func.getContext());
1020 
1021   Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
1022   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
1023   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
1024   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
1025 
1026   emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
1027   emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
1028   emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
1029 
1030   emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
1031   emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
1032   emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
1033 
1034   // Reserved for hidden_tool_correlation_id.
1035   Offset += 8;
1036 
1037   Offset += 8; // Reserved.
1038 
1039   emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
1040   emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
1041   emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
1042 
1043   emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
1044 
1045   Offset += 6; // Reserved.
1046   auto Int8PtrTy =
1047       Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
1048 
1049   if (M->getNamedMetadata("llvm.printf.fmts")) {
1050     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
1051                   Args);
1052   } else {
1053     Offset += 8; // Skipped.
1054   }
1055 
1056   if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
1057     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
1058                   Args);
1059   } else {
1060     Offset += 8; // Skipped.
1061   }
1062 
1063   if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
1064     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
1065                 Args);
1066   } else {
1067     Offset += 8; // Skipped.
1068   }
1069 
1070   if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
1071     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
1072   else
1073     Offset += 8; // Skipped.
1074 
1075   if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
1076     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
1077                   Args);
1078   } else {
1079     Offset += 8; // Skipped.
1080   }
1081 
1082   if (!Func.hasFnAttribute("amdgpu-no-completion-action") &&
1083       // FIXME: Hack for runtime bug
1084       Func.hasFnAttribute("calls-enqueue-kernel")) {
1085     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
1086                   Args);
1087   } else {
1088     Offset += 8; // Skipped.
1089   }
1090 
1091   Offset += 72; // Reserved.
1092 
1093   // hidden_private_base and hidden_shared_base are only when the subtarget has
1094   // ApertureRegs.
1095   if (!ST.hasApertureRegs()) {
1096     emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
1097     emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
1098   } else {
1099     Offset += 8; // Skipped.
1100   }
1101 
1102   if (MFI.hasQueuePtr())
1103     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
1104 }
1105 
1106 void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func,
1107                                                 msgpack::MapDocNode Kern) {
1108   MetadataStreamerMsgPackV3::emitKernelAttrs(Func, Kern);
1109 
1110   if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
1111     Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
1112 }
1113 
1114 
1115 } // end namespace HSAMD
1116 } // end namespace AMDGPU
1117 } // end namespace llvm
1118