xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp (revision c9539b89010900499a200cdd6c0265ea5d950875)
1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "llvm/IR/Module.h"
22 using namespace llvm;
23 
24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
25                                                      const DataLayout &DL) {
26   Type *Ty = Arg.getType();
27   MaybeAlign ArgAlign;
28   if (Arg.hasByRefAttr()) {
29     Ty = Arg.getParamByRefType();
30     ArgAlign = Arg.getParamAlign();
31   }
32 
33   if (!ArgAlign)
34     ArgAlign = DL.getABITypeAlign(Ty);
35 
36   return std::make_pair(Ty, *ArgAlign);
37 }
38 
39 namespace llvm {
40 
41 static cl::opt<bool> DumpHSAMetadata(
42     "amdgpu-dump-hsa-metadata",
43     cl::desc("Dump AMDGPU HSA Metadata"));
44 static cl::opt<bool> VerifyHSAMetadata(
45     "amdgpu-verify-hsa-metadata",
46     cl::desc("Verify AMDGPU HSA Metadata"));
47 
48 namespace AMDGPU {
49 namespace HSAMD {
50 
51 //===----------------------------------------------------------------------===//
52 // HSAMetadataStreamerV2
53 //===----------------------------------------------------------------------===//
54 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
55   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
56 }
57 
58 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
59   errs() << "AMDGPU HSA Metadata Parser Test: ";
60 
61   HSAMD::Metadata FromHSAMetadataString;
62   if (fromString(HSAMetadataString, FromHSAMetadataString)) {
63     errs() << "FAIL\n";
64     return;
65   }
66 
67   std::string ToHSAMetadataString;
68   if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
69     errs() << "FAIL\n";
70     return;
71   }
72 
73   errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
74          << '\n';
75   if (HSAMetadataString != ToHSAMetadataString) {
76     errs() << "Original input: " << HSAMetadataString << '\n'
77            << "Produced output: " << ToHSAMetadataString << '\n';
78   }
79 }
80 
81 AccessQualifier
82 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
83   if (AccQual.empty())
84     return AccessQualifier::Unknown;
85 
86   return StringSwitch<AccessQualifier>(AccQual)
87              .Case("read_only",  AccessQualifier::ReadOnly)
88              .Case("write_only", AccessQualifier::WriteOnly)
89              .Case("read_write", AccessQualifier::ReadWrite)
90              .Default(AccessQualifier::Default);
91 }
92 
93 AddressSpaceQualifier
94 MetadataStreamerV2::getAddressSpaceQualifier(
95     unsigned AddressSpace) const {
96   switch (AddressSpace) {
97   case AMDGPUAS::PRIVATE_ADDRESS:
98     return AddressSpaceQualifier::Private;
99   case AMDGPUAS::GLOBAL_ADDRESS:
100     return AddressSpaceQualifier::Global;
101   case AMDGPUAS::CONSTANT_ADDRESS:
102     return AddressSpaceQualifier::Constant;
103   case AMDGPUAS::LOCAL_ADDRESS:
104     return AddressSpaceQualifier::Local;
105   case AMDGPUAS::FLAT_ADDRESS:
106     return AddressSpaceQualifier::Generic;
107   case AMDGPUAS::REGION_ADDRESS:
108     return AddressSpaceQualifier::Region;
109   default:
110     return AddressSpaceQualifier::Unknown;
111   }
112 }
113 
114 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
115                                            StringRef BaseTypeName) const {
116   if (TypeQual.contains("pipe"))
117     return ValueKind::Pipe;
118 
119   return StringSwitch<ValueKind>(BaseTypeName)
120              .Case("image1d_t", ValueKind::Image)
121              .Case("image1d_array_t", ValueKind::Image)
122              .Case("image1d_buffer_t", ValueKind::Image)
123              .Case("image2d_t", ValueKind::Image)
124              .Case("image2d_array_t", ValueKind::Image)
125              .Case("image2d_array_depth_t", ValueKind::Image)
126              .Case("image2d_array_msaa_t", ValueKind::Image)
127              .Case("image2d_array_msaa_depth_t", ValueKind::Image)
128              .Case("image2d_depth_t", ValueKind::Image)
129              .Case("image2d_msaa_t", ValueKind::Image)
130              .Case("image2d_msaa_depth_t", ValueKind::Image)
131              .Case("image3d_t", ValueKind::Image)
132              .Case("sampler_t", ValueKind::Sampler)
133              .Case("queue_t", ValueKind::Queue)
134              .Default(isa<PointerType>(Ty) ?
135                           (Ty->getPointerAddressSpace() ==
136                            AMDGPUAS::LOCAL_ADDRESS ?
137                            ValueKind::DynamicSharedPointer :
138                            ValueKind::GlobalBuffer) :
139                       ValueKind::ByValue);
140 }
141 
142 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
143   switch (Ty->getTypeID()) {
144   case Type::IntegerTyID: {
145     if (!Signed)
146       return (Twine('u') + getTypeName(Ty, true)).str();
147 
148     auto BitWidth = Ty->getIntegerBitWidth();
149     switch (BitWidth) {
150     case 8:
151       return "char";
152     case 16:
153       return "short";
154     case 32:
155       return "int";
156     case 64:
157       return "long";
158     default:
159       return (Twine('i') + Twine(BitWidth)).str();
160     }
161   }
162   case Type::HalfTyID:
163     return "half";
164   case Type::FloatTyID:
165     return "float";
166   case Type::DoubleTyID:
167     return "double";
168   case Type::FixedVectorTyID: {
169     auto VecTy = cast<FixedVectorType>(Ty);
170     auto ElTy = VecTy->getElementType();
171     auto NumElements = VecTy->getNumElements();
172     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
173   }
174   default:
175     return "unknown";
176   }
177 }
178 
179 std::vector<uint32_t>
180 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
181   std::vector<uint32_t> Dims;
182   if (Node->getNumOperands() != 3)
183     return Dims;
184 
185   for (auto &Op : Node->operands())
186     Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
187   return Dims;
188 }
189 
190 Kernel::CodeProps::Metadata
191 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
192                                     const SIProgramInfo &ProgramInfo) const {
193   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
194   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
195   HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
196   const Function &F = MF.getFunction();
197 
198   assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
199          F.getCallingConv() == CallingConv::SPIR_KERNEL);
200 
201   Align MaxKernArgAlign;
202   HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
203                                                                MaxKernArgAlign);
204   HSACodeProps.mKernargSegmentAlign =
205     std::max(MaxKernArgAlign, Align(4)).value();
206 
207   HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
208   HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
209   HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
210   HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
211   HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
212   HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
213   HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
214   HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
215   HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
216   HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
217 
218   return HSACodeProps;
219 }
220 
221 Kernel::DebugProps::Metadata
222 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
223                                      const SIProgramInfo &ProgramInfo) const {
224   return HSAMD::Kernel::DebugProps::Metadata();
225 }
226 
227 void MetadataStreamerV2::emitVersion() {
228   auto &Version = HSAMetadata.mVersion;
229 
230   Version.push_back(VersionMajorV2);
231   Version.push_back(VersionMinorV2);
232 }
233 
234 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
235   auto &Printf = HSAMetadata.mPrintf;
236 
237   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
238   if (!Node)
239     return;
240 
241   for (auto Op : Node->operands())
242     if (Op->getNumOperands())
243       Printf.push_back(
244           std::string(cast<MDString>(Op->getOperand(0))->getString()));
245 }
246 
247 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
248   auto &Kernel = HSAMetadata.mKernels.back();
249 
250   // TODO: What about other languages?
251   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
252   if (!Node || !Node->getNumOperands())
253     return;
254   auto Op0 = Node->getOperand(0);
255   if (Op0->getNumOperands() <= 1)
256     return;
257 
258   Kernel.mLanguage = "OpenCL C";
259   Kernel.mLanguageVersion.push_back(
260       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
261   Kernel.mLanguageVersion.push_back(
262       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
263 }
264 
265 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
266   auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
267 
268   if (auto Node = Func.getMetadata("reqd_work_group_size"))
269     Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
270   if (auto Node = Func.getMetadata("work_group_size_hint"))
271     Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
272   if (auto Node = Func.getMetadata("vec_type_hint")) {
273     Attrs.mVecTypeHint = getTypeName(
274         cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
275         mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
276   }
277   if (Func.hasFnAttribute("runtime-handle")) {
278     Attrs.mRuntimeHandle =
279         Func.getFnAttribute("runtime-handle").getValueAsString().str();
280   }
281 }
282 
283 void MetadataStreamerV2::emitKernelArgs(const Function &Func,
284                                         const GCNSubtarget &ST) {
285   for (auto &Arg : Func.args())
286     emitKernelArg(Arg);
287 
288   emitHiddenKernelArgs(Func, ST);
289 }
290 
291 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
292   auto Func = Arg.getParent();
293   auto ArgNo = Arg.getArgNo();
294   const MDNode *Node;
295 
296   StringRef Name;
297   Node = Func->getMetadata("kernel_arg_name");
298   if (Node && ArgNo < Node->getNumOperands())
299     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
300   else if (Arg.hasName())
301     Name = Arg.getName();
302 
303   StringRef TypeName;
304   Node = Func->getMetadata("kernel_arg_type");
305   if (Node && ArgNo < Node->getNumOperands())
306     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
307 
308   StringRef BaseTypeName;
309   Node = Func->getMetadata("kernel_arg_base_type");
310   if (Node && ArgNo < Node->getNumOperands())
311     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
312 
313   StringRef AccQual;
314   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
315       Arg.hasNoAliasAttr()) {
316     AccQual = "read_only";
317   } else {
318     Node = Func->getMetadata("kernel_arg_access_qual");
319     if (Node && ArgNo < Node->getNumOperands())
320       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
321   }
322 
323   StringRef TypeQual;
324   Node = Func->getMetadata("kernel_arg_type_qual");
325   if (Node && ArgNo < Node->getNumOperands())
326     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
327 
328   const DataLayout &DL = Func->getParent()->getDataLayout();
329 
330   MaybeAlign PointeeAlign;
331   if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
332     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
333       // FIXME: Should report this for all address spaces
334       PointeeAlign = Arg.getParamAlign().valueOrOne();
335     }
336   }
337 
338   Type *ArgTy;
339   Align ArgAlign;
340   std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
341 
342   emitKernelArg(DL, ArgTy, ArgAlign,
343                 getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
344                 TypeName, BaseTypeName, AccQual, TypeQual);
345 }
346 
347 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
348                                        Align Alignment, ValueKind ValueKind,
349                                        MaybeAlign PointeeAlign, StringRef Name,
350                                        StringRef TypeName,
351                                        StringRef BaseTypeName,
352                                        StringRef AccQual, StringRef TypeQual) {
353   HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
354   auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
355 
356   Arg.mName = std::string(Name);
357   Arg.mTypeName = std::string(TypeName);
358   Arg.mSize = DL.getTypeAllocSize(Ty);
359   Arg.mAlign = Alignment.value();
360   Arg.mValueKind = ValueKind;
361   Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
362 
363   if (auto PtrTy = dyn_cast<PointerType>(Ty))
364     Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
365 
366   Arg.mAccQual = getAccessQualifier(AccQual);
367 
368   // TODO: Emit Arg.mActualAccQual.
369 
370   SmallVector<StringRef, 1> SplitTypeQuals;
371   TypeQual.split(SplitTypeQuals, " ", -1, false);
372   for (StringRef Key : SplitTypeQuals) {
373     auto P = StringSwitch<bool*>(Key)
374                  .Case("const",    &Arg.mIsConst)
375                  .Case("restrict", &Arg.mIsRestrict)
376                  .Case("volatile", &Arg.mIsVolatile)
377                  .Case("pipe",     &Arg.mIsPipe)
378                  .Default(nullptr);
379     if (P)
380       *P = true;
381   }
382 }
383 
384 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func,
385                                               const GCNSubtarget &ST) {
386   unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
387   if (!HiddenArgNumBytes)
388     return;
389 
390   auto &DL = Func.getParent()->getDataLayout();
391   auto Int64Ty = Type::getInt64Ty(Func.getContext());
392 
393   if (HiddenArgNumBytes >= 8)
394     emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
395   if (HiddenArgNumBytes >= 16)
396     emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
397   if (HiddenArgNumBytes >= 24)
398     emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
399 
400   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
401                                       AMDGPUAS::GLOBAL_ADDRESS);
402 
403   if (HiddenArgNumBytes >= 32) {
404     // We forbid the use of features requiring hostcall when compiling OpenCL
405     // before code object V5, which makes the mutual exclusion between the
406     // "printf buffer" and "hostcall buffer" here sound.
407     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
408       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
409     else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
410       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
411     else
412       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
413   }
414 
415   // Emit "default queue" and "completion action" arguments if enqueue kernel is
416   // used, otherwise emit dummy "none" arguments.
417   if (HiddenArgNumBytes >= 48) {
418     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
419       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
420       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
421     } else {
422       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
423       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
424     }
425   }
426 
427   // Emit the pointer argument for multi-grid object.
428   if (HiddenArgNumBytes >= 56) {
429     if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg"))
430       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
431     else
432       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
433   }
434 }
435 
436 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
437   return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
438 }
439 
440 void MetadataStreamerV2::begin(const Module &Mod,
441                                const IsaInfo::AMDGPUTargetID &TargetID) {
442   emitVersion();
443   emitPrintf(Mod);
444 }
445 
446 void MetadataStreamerV2::end() {
447   std::string HSAMetadataString;
448   if (toString(HSAMetadata, HSAMetadataString))
449     return;
450 
451   if (DumpHSAMetadata)
452     dump(HSAMetadataString);
453   if (VerifyHSAMetadata)
454     verify(HSAMetadataString);
455 }
456 
457 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
458                                     const SIProgramInfo &ProgramInfo) {
459   auto &Func = MF.getFunction();
460   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
461     return;
462 
463   auto CodeProps = getHSACodeProps(MF, ProgramInfo);
464   auto DebugProps = getHSADebugProps(MF, ProgramInfo);
465 
466   HSAMetadata.mKernels.push_back(Kernel::Metadata());
467   auto &Kernel = HSAMetadata.mKernels.back();
468 
469   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
470   Kernel.mName = std::string(Func.getName());
471   Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
472   emitKernelLanguage(Func);
473   emitKernelAttrs(Func);
474   emitKernelArgs(Func, ST);
475   HSAMetadata.mKernels.back().mCodeProps = CodeProps;
476   HSAMetadata.mKernels.back().mDebugProps = DebugProps;
477 }
478 
479 //===----------------------------------------------------------------------===//
480 // HSAMetadataStreamerV3
481 //===----------------------------------------------------------------------===//
482 
483 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
484   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
485 }
486 
487 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
488   errs() << "AMDGPU HSA Metadata Parser Test: ";
489 
490   msgpack::Document FromHSAMetadataString;
491 
492   if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
493     errs() << "FAIL\n";
494     return;
495   }
496 
497   std::string ToHSAMetadataString;
498   raw_string_ostream StrOS(ToHSAMetadataString);
499   FromHSAMetadataString.toYAML(StrOS);
500 
501   errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
502   if (HSAMetadataString != ToHSAMetadataString) {
503     errs() << "Original input: " << HSAMetadataString << '\n'
504            << "Produced output: " << StrOS.str() << '\n';
505   }
506 }
507 
508 Optional<StringRef>
509 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
510   return StringSwitch<Optional<StringRef>>(AccQual)
511       .Case("read_only", StringRef("read_only"))
512       .Case("write_only", StringRef("write_only"))
513       .Case("read_write", StringRef("read_write"))
514       .Default(None);
515 }
516 
517 Optional<StringRef>
518 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
519   switch (AddressSpace) {
520   case AMDGPUAS::PRIVATE_ADDRESS:
521     return StringRef("private");
522   case AMDGPUAS::GLOBAL_ADDRESS:
523     return StringRef("global");
524   case AMDGPUAS::CONSTANT_ADDRESS:
525     return StringRef("constant");
526   case AMDGPUAS::LOCAL_ADDRESS:
527     return StringRef("local");
528   case AMDGPUAS::FLAT_ADDRESS:
529     return StringRef("generic");
530   case AMDGPUAS::REGION_ADDRESS:
531     return StringRef("region");
532   default:
533     return None;
534   }
535 }
536 
537 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
538                                            StringRef BaseTypeName) const {
539   if (TypeQual.contains("pipe"))
540     return "pipe";
541 
542   return StringSwitch<StringRef>(BaseTypeName)
543       .Case("image1d_t", "image")
544       .Case("image1d_array_t", "image")
545       .Case("image1d_buffer_t", "image")
546       .Case("image2d_t", "image")
547       .Case("image2d_array_t", "image")
548       .Case("image2d_array_depth_t", "image")
549       .Case("image2d_array_msaa_t", "image")
550       .Case("image2d_array_msaa_depth_t", "image")
551       .Case("image2d_depth_t", "image")
552       .Case("image2d_msaa_t", "image")
553       .Case("image2d_msaa_depth_t", "image")
554       .Case("image3d_t", "image")
555       .Case("sampler_t", "sampler")
556       .Case("queue_t", "queue")
557       .Default(isa<PointerType>(Ty)
558                    ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
559                           ? "dynamic_shared_pointer"
560                           : "global_buffer")
561                    : "by_value");
562 }
563 
564 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
565   switch (Ty->getTypeID()) {
566   case Type::IntegerTyID: {
567     if (!Signed)
568       return (Twine('u') + getTypeName(Ty, true)).str();
569 
570     auto BitWidth = Ty->getIntegerBitWidth();
571     switch (BitWidth) {
572     case 8:
573       return "char";
574     case 16:
575       return "short";
576     case 32:
577       return "int";
578     case 64:
579       return "long";
580     default:
581       return (Twine('i') + Twine(BitWidth)).str();
582     }
583   }
584   case Type::HalfTyID:
585     return "half";
586   case Type::FloatTyID:
587     return "float";
588   case Type::DoubleTyID:
589     return "double";
590   case Type::FixedVectorTyID: {
591     auto VecTy = cast<FixedVectorType>(Ty);
592     auto ElTy = VecTy->getElementType();
593     auto NumElements = VecTy->getNumElements();
594     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
595   }
596   default:
597     return "unknown";
598   }
599 }
600 
601 msgpack::ArrayDocNode
602 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
603   auto Dims = HSAMetadataDoc->getArrayNode();
604   if (Node->getNumOperands() != 3)
605     return Dims;
606 
607   for (auto &Op : Node->operands())
608     Dims.push_back(Dims.getDocument()->getNode(
609         uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
610   return Dims;
611 }
612 
613 void MetadataStreamerV3::emitVersion() {
614   auto Version = HSAMetadataDoc->getArrayNode();
615   Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
616   Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
617   getRootMetadata("amdhsa.version") = Version;
618 }
619 
620 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
621   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
622   if (!Node)
623     return;
624 
625   auto Printf = HSAMetadataDoc->getArrayNode();
626   for (auto Op : Node->operands())
627     if (Op->getNumOperands())
628       Printf.push_back(Printf.getDocument()->getNode(
629           cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
630   getRootMetadata("amdhsa.printf") = Printf;
631 }
632 
633 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
634                                             msgpack::MapDocNode Kern) {
635   // TODO: What about other languages?
636   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
637   if (!Node || !Node->getNumOperands())
638     return;
639   auto Op0 = Node->getOperand(0);
640   if (Op0->getNumOperands() <= 1)
641     return;
642 
643   Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
644   auto LanguageVersion = Kern.getDocument()->getArrayNode();
645   LanguageVersion.push_back(Kern.getDocument()->getNode(
646       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
647   LanguageVersion.push_back(Kern.getDocument()->getNode(
648       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
649   Kern[".language_version"] = LanguageVersion;
650 }
651 
652 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
653                                          msgpack::MapDocNode Kern) {
654 
655   if (auto Node = Func.getMetadata("reqd_work_group_size"))
656     Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
657   if (auto Node = Func.getMetadata("work_group_size_hint"))
658     Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
659   if (auto Node = Func.getMetadata("vec_type_hint")) {
660     Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
661         getTypeName(
662             cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
663             mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
664         /*Copy=*/true);
665   }
666   if (Func.hasFnAttribute("runtime-handle")) {
667     Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
668         Func.getFnAttribute("runtime-handle").getValueAsString().str(),
669         /*Copy=*/true);
670   }
671   if (Func.hasFnAttribute("device-init"))
672     Kern[".kind"] = Kern.getDocument()->getNode("init");
673   else if (Func.hasFnAttribute("device-fini"))
674     Kern[".kind"] = Kern.getDocument()->getNode("fini");
675 }
676 
677 void MetadataStreamerV3::emitKernelArgs(const MachineFunction &MF,
678                                         msgpack::MapDocNode Kern) {
679   auto &Func = MF.getFunction();
680   unsigned Offset = 0;
681   auto Args = HSAMetadataDoc->getArrayNode();
682   for (auto &Arg : Func.args())
683     emitKernelArg(Arg, Offset, Args);
684 
685   emitHiddenKernelArgs(MF, Offset, Args);
686 
687   Kern[".args"] = Args;
688 }
689 
690 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
691                                        msgpack::ArrayDocNode Args) {
692   auto Func = Arg.getParent();
693   auto ArgNo = Arg.getArgNo();
694   const MDNode *Node;
695 
696   StringRef Name;
697   Node = Func->getMetadata("kernel_arg_name");
698   if (Node && ArgNo < Node->getNumOperands())
699     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
700   else if (Arg.hasName())
701     Name = Arg.getName();
702 
703   StringRef TypeName;
704   Node = Func->getMetadata("kernel_arg_type");
705   if (Node && ArgNo < Node->getNumOperands())
706     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
707 
708   StringRef BaseTypeName;
709   Node = Func->getMetadata("kernel_arg_base_type");
710   if (Node && ArgNo < Node->getNumOperands())
711     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
712 
713   StringRef AccQual;
714   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
715       Arg.hasNoAliasAttr()) {
716     AccQual = "read_only";
717   } else {
718     Node = Func->getMetadata("kernel_arg_access_qual");
719     if (Node && ArgNo < Node->getNumOperands())
720       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
721   }
722 
723   StringRef TypeQual;
724   Node = Func->getMetadata("kernel_arg_type_qual");
725   if (Node && ArgNo < Node->getNumOperands())
726     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
727 
728   const DataLayout &DL = Func->getParent()->getDataLayout();
729 
730   MaybeAlign PointeeAlign;
731   Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
732 
733   // FIXME: Need to distinguish in memory alignment from pointer alignment.
734   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
735     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
736       PointeeAlign = Arg.getParamAlign().valueOrOne();
737   }
738 
739   // There's no distinction between byval aggregates and raw aggregates.
740   Type *ArgTy;
741   Align ArgAlign;
742   std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
743 
744   emitKernelArg(DL, ArgTy, ArgAlign,
745                 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
746                 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
747 }
748 
749 void MetadataStreamerV3::emitKernelArg(
750     const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
751     unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
752     StringRef Name, StringRef TypeName, StringRef BaseTypeName,
753     StringRef AccQual, StringRef TypeQual) {
754   auto Arg = Args.getDocument()->getMapNode();
755 
756   if (!Name.empty())
757     Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
758   if (!TypeName.empty())
759     Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
760   auto Size = DL.getTypeAllocSize(Ty);
761   Arg[".size"] = Arg.getDocument()->getNode(Size);
762   Offset = alignTo(Offset, Alignment);
763   Arg[".offset"] = Arg.getDocument()->getNode(Offset);
764   Offset += Size;
765   Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
766   if (PointeeAlign)
767     Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
768 
769   if (auto PtrTy = dyn_cast<PointerType>(Ty))
770     if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
771       Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
772 
773   if (auto AQ = getAccessQualifier(AccQual))
774     Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
775 
776   // TODO: Emit Arg[".actual_access"].
777 
778   SmallVector<StringRef, 1> SplitTypeQuals;
779   TypeQual.split(SplitTypeQuals, " ", -1, false);
780   for (StringRef Key : SplitTypeQuals) {
781     if (Key == "const")
782       Arg[".is_const"] = Arg.getDocument()->getNode(true);
783     else if (Key == "restrict")
784       Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
785     else if (Key == "volatile")
786       Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
787     else if (Key == "pipe")
788       Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
789   }
790 
791   Args.push_back(Arg);
792 }
793 
794 void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF,
795                                               unsigned &Offset,
796                                               msgpack::ArrayDocNode Args) {
797   auto &Func = MF.getFunction();
798   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
799 
800   unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
801   if (!HiddenArgNumBytes)
802     return;
803 
804   const Module *M = Func.getParent();
805   auto &DL = M->getDataLayout();
806   auto Int64Ty = Type::getInt64Ty(Func.getContext());
807 
808   Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
809 
810   if (HiddenArgNumBytes >= 8)
811     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
812                   Args);
813   if (HiddenArgNumBytes >= 16)
814     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
815                   Args);
816   if (HiddenArgNumBytes >= 24)
817     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
818                   Args);
819 
820   auto Int8PtrTy =
821       Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
822 
823   if (HiddenArgNumBytes >= 32) {
824     // We forbid the use of features requiring hostcall when compiling OpenCL
825     // before code object V5, which makes the mutual exclusion between the
826     // "printf buffer" and "hostcall buffer" here sound.
827     if (M->getNamedMetadata("llvm.printf.fmts"))
828       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
829                     Args);
830     else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
831       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
832                     Args);
833     else
834       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
835   }
836 
837   // Emit "default queue" and "completion action" arguments if enqueue kernel is
838   // used, otherwise emit dummy "none" arguments.
839   if (HiddenArgNumBytes >= 48) {
840     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
841       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
842                     Args);
843       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
844                     Args);
845     } else {
846       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
847       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
848     }
849   }
850 
851   // Emit the pointer argument for multi-grid object.
852   if (HiddenArgNumBytes >= 56) {
853     if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
854       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
855                     Args);
856     } else {
857       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
858     }
859   }
860 }
861 
862 msgpack::MapDocNode
863 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
864                                       const SIProgramInfo &ProgramInfo) const {
865   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
866   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
867   const Function &F = MF.getFunction();
868 
869   auto Kern = HSAMetadataDoc->getMapNode();
870 
871   Align MaxKernArgAlign;
872   Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
873       STM.getKernArgSegmentSize(F, MaxKernArgAlign));
874   Kern[".group_segment_fixed_size"] =
875       Kern.getDocument()->getNode(ProgramInfo.LDSSize);
876   Kern[".private_segment_fixed_size"] =
877       Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
878   Kern[".uses_dynamic_stack"] =
879       Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack);
880 
881   // FIXME: The metadata treats the minimum as 16?
882   Kern[".kernarg_segment_align"] =
883       Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
884   Kern[".wavefront_size"] =
885       Kern.getDocument()->getNode(STM.getWavefrontSize());
886   Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
887   Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
888 
889   // Only add AGPR count to metadata for supported devices
890   if (STM.hasMAIInsts()) {
891     Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR);
892   }
893 
894   Kern[".max_flat_workgroup_size"] =
895       Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
896   Kern[".sgpr_spill_count"] =
897       Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
898   Kern[".vgpr_spill_count"] =
899       Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
900 
901   return Kern;
902 }
903 
904 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
905   return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
906 }
907 
908 void MetadataStreamerV3::begin(const Module &Mod,
909                                const IsaInfo::AMDGPUTargetID &TargetID) {
910   emitVersion();
911   emitPrintf(Mod);
912   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
913 }
914 
915 void MetadataStreamerV3::end() {
916   std::string HSAMetadataString;
917   raw_string_ostream StrOS(HSAMetadataString);
918   HSAMetadataDoc->toYAML(StrOS);
919 
920   if (DumpHSAMetadata)
921     dump(StrOS.str());
922   if (VerifyHSAMetadata)
923     verify(StrOS.str());
924 }
925 
926 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
927                                     const SIProgramInfo &ProgramInfo) {
928   auto &Func = MF.getFunction();
929   auto Kern = getHSAKernelProps(MF, ProgramInfo);
930 
931   assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
932          Func.getCallingConv() == CallingConv::SPIR_KERNEL);
933 
934   auto Kernels =
935       getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
936 
937   {
938     Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
939     Kern[".symbol"] = Kern.getDocument()->getNode(
940         (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
941     emitKernelLanguage(Func, Kern);
942     emitKernelAttrs(Func, Kern);
943     emitKernelArgs(MF, Kern);
944   }
945 
946   Kernels.push_back(Kern);
947 }
948 
949 //===----------------------------------------------------------------------===//
950 // HSAMetadataStreamerV4
951 //===----------------------------------------------------------------------===//
952 
953 void MetadataStreamerV4::emitVersion() {
954   auto Version = HSAMetadataDoc->getArrayNode();
955   Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
956   Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
957   getRootMetadata("amdhsa.version") = Version;
958 }
959 
960 void MetadataStreamerV4::emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID) {
961   getRootMetadata("amdhsa.target") =
962       HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
963 }
964 
965 void MetadataStreamerV4::begin(const Module &Mod,
966                                const IsaInfo::AMDGPUTargetID &TargetID) {
967   emitVersion();
968   emitTargetID(TargetID);
969   emitPrintf(Mod);
970   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
971 }
972 
973 //===----------------------------------------------------------------------===//
974 // HSAMetadataStreamerV5
975 //===----------------------------------------------------------------------===//
976 
977 void MetadataStreamerV5::emitVersion() {
978   auto Version = HSAMetadataDoc->getArrayNode();
979   Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
980   Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
981   getRootMetadata("amdhsa.version") = Version;
982 }
983 
984 void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
985                                               unsigned &Offset,
986                                               msgpack::ArrayDocNode Args) {
987   auto &Func = MF.getFunction();
988   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
989 
990   // No implicit kernel argument is used.
991   if (ST.getImplicitArgNumBytes(Func) == 0)
992     return;
993 
994   const Module *M = Func.getParent();
995   auto &DL = M->getDataLayout();
996   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
997 
998   auto Int64Ty = Type::getInt64Ty(Func.getContext());
999   auto Int32Ty = Type::getInt32Ty(Func.getContext());
1000   auto Int16Ty = Type::getInt16Ty(Func.getContext());
1001 
1002   Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
1003   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
1004   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
1005   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
1006 
1007   emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
1008   emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
1009   emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
1010 
1011   emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
1012   emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
1013   emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
1014 
1015   // Reserved for hidden_tool_correlation_id.
1016   Offset += 8;
1017 
1018   Offset += 8; // Reserved.
1019 
1020   emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
1021   emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
1022   emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
1023 
1024   emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
1025 
1026   Offset += 6; // Reserved.
1027   auto Int8PtrTy =
1028       Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
1029 
1030   if (M->getNamedMetadata("llvm.printf.fmts")) {
1031     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
1032                   Args);
1033   } else {
1034     Offset += 8; // Skipped.
1035   }
1036 
1037   if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
1038     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
1039                   Args);
1040   } else {
1041     Offset += 8; // Skipped.
1042   }
1043 
1044   if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
1045     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
1046                 Args);
1047   } else {
1048     Offset += 8; // Skipped.
1049   }
1050 
1051   if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
1052     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
1053   else
1054     Offset += 8; // Skipped.
1055 
1056   if (Func.hasFnAttribute("calls-enqueue-kernel")) {
1057     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
1058                   Args);
1059     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
1060                   Args);
1061   } else {
1062     Offset += 16; // Skipped.
1063   }
1064 
1065   Offset += 72; // Reserved.
1066 
1067   // hidden_private_base and hidden_shared_base are only when the subtarget has
1068   // ApertureRegs.
1069   if (!ST.hasApertureRegs()) {
1070     emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
1071     emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
1072   } else {
1073     Offset += 8; // Skipped.
1074   }
1075 
1076   if (MFI.hasQueuePtr())
1077     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
1078 }
1079 
1080 } // end namespace HSAMD
1081 } // end namespace AMDGPU
1082 } // end namespace llvm
1083