xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp (revision 6966ac055c3b7a39266fb982493330df7a097997)
1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 namespace llvm {
28 
29 static cl::opt<bool> DumpHSAMetadata(
30     "amdgpu-dump-hsa-metadata",
31     cl::desc("Dump AMDGPU HSA Metadata"));
32 static cl::opt<bool> VerifyHSAMetadata(
33     "amdgpu-verify-hsa-metadata",
34     cl::desc("Verify AMDGPU HSA Metadata"));
35 
36 namespace AMDGPU {
37 namespace HSAMD {
38 
39 //===----------------------------------------------------------------------===//
40 // HSAMetadataStreamerV2
41 //===----------------------------------------------------------------------===//
42 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
43   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
44 }
45 
46 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
47   errs() << "AMDGPU HSA Metadata Parser Test: ";
48 
49   HSAMD::Metadata FromHSAMetadataString;
50   if (fromString(HSAMetadataString, FromHSAMetadataString)) {
51     errs() << "FAIL\n";
52     return;
53   }
54 
55   std::string ToHSAMetadataString;
56   if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
57     errs() << "FAIL\n";
58     return;
59   }
60 
61   errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
62          << '\n';
63   if (HSAMetadataString != ToHSAMetadataString) {
64     errs() << "Original input: " << HSAMetadataString << '\n'
65            << "Produced output: " << ToHSAMetadataString << '\n';
66   }
67 }
68 
69 AccessQualifier
70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
71   if (AccQual.empty())
72     return AccessQualifier::Unknown;
73 
74   return StringSwitch<AccessQualifier>(AccQual)
75              .Case("read_only",  AccessQualifier::ReadOnly)
76              .Case("write_only", AccessQualifier::WriteOnly)
77              .Case("read_write", AccessQualifier::ReadWrite)
78              .Default(AccessQualifier::Default);
79 }
80 
81 AddressSpaceQualifier
82 MetadataStreamerV2::getAddressSpaceQualifier(
83     unsigned AddressSpace) const {
84   switch (AddressSpace) {
85   case AMDGPUAS::PRIVATE_ADDRESS:
86     return AddressSpaceQualifier::Private;
87   case AMDGPUAS::GLOBAL_ADDRESS:
88     return AddressSpaceQualifier::Global;
89   case AMDGPUAS::CONSTANT_ADDRESS:
90     return AddressSpaceQualifier::Constant;
91   case AMDGPUAS::LOCAL_ADDRESS:
92     return AddressSpaceQualifier::Local;
93   case AMDGPUAS::FLAT_ADDRESS:
94     return AddressSpaceQualifier::Generic;
95   case AMDGPUAS::REGION_ADDRESS:
96     return AddressSpaceQualifier::Region;
97   default:
98     return AddressSpaceQualifier::Unknown;
99   }
100 }
101 
102 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
103                                            StringRef BaseTypeName) const {
104   if (TypeQual.find("pipe") != StringRef::npos)
105     return ValueKind::Pipe;
106 
107   return StringSwitch<ValueKind>(BaseTypeName)
108              .Case("image1d_t", ValueKind::Image)
109              .Case("image1d_array_t", ValueKind::Image)
110              .Case("image1d_buffer_t", ValueKind::Image)
111              .Case("image2d_t", ValueKind::Image)
112              .Case("image2d_array_t", ValueKind::Image)
113              .Case("image2d_array_depth_t", ValueKind::Image)
114              .Case("image2d_array_msaa_t", ValueKind::Image)
115              .Case("image2d_array_msaa_depth_t", ValueKind::Image)
116              .Case("image2d_depth_t", ValueKind::Image)
117              .Case("image2d_msaa_t", ValueKind::Image)
118              .Case("image2d_msaa_depth_t", ValueKind::Image)
119              .Case("image3d_t", ValueKind::Image)
120              .Case("sampler_t", ValueKind::Sampler)
121              .Case("queue_t", ValueKind::Queue)
122              .Default(isa<PointerType>(Ty) ?
123                           (Ty->getPointerAddressSpace() ==
124                            AMDGPUAS::LOCAL_ADDRESS ?
125                            ValueKind::DynamicSharedPointer :
126                            ValueKind::GlobalBuffer) :
127                       ValueKind::ByValue);
128 }
129 
130 ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
131   switch (Ty->getTypeID()) {
132   case Type::IntegerTyID: {
133     auto Signed = !TypeName.startswith("u");
134     switch (Ty->getIntegerBitWidth()) {
135     case 8:
136       return Signed ? ValueType::I8 : ValueType::U8;
137     case 16:
138       return Signed ? ValueType::I16 : ValueType::U16;
139     case 32:
140       return Signed ? ValueType::I32 : ValueType::U32;
141     case 64:
142       return Signed ? ValueType::I64 : ValueType::U64;
143     default:
144       return ValueType::Struct;
145     }
146   }
147   case Type::HalfTyID:
148     return ValueType::F16;
149   case Type::FloatTyID:
150     return ValueType::F32;
151   case Type::DoubleTyID:
152     return ValueType::F64;
153   case Type::PointerTyID:
154     return getValueType(Ty->getPointerElementType(), TypeName);
155   case Type::VectorTyID:
156     return getValueType(Ty->getVectorElementType(), TypeName);
157   default:
158     return ValueType::Struct;
159   }
160 }
161 
162 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
163   switch (Ty->getTypeID()) {
164   case Type::IntegerTyID: {
165     if (!Signed)
166       return (Twine('u') + getTypeName(Ty, true)).str();
167 
168     auto BitWidth = Ty->getIntegerBitWidth();
169     switch (BitWidth) {
170     case 8:
171       return "char";
172     case 16:
173       return "short";
174     case 32:
175       return "int";
176     case 64:
177       return "long";
178     default:
179       return (Twine('i') + Twine(BitWidth)).str();
180     }
181   }
182   case Type::HalfTyID:
183     return "half";
184   case Type::FloatTyID:
185     return "float";
186   case Type::DoubleTyID:
187     return "double";
188   case Type::VectorTyID: {
189     auto VecTy = cast<VectorType>(Ty);
190     auto ElTy = VecTy->getElementType();
191     auto NumElements = VecTy->getVectorNumElements();
192     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
193   }
194   default:
195     return "unknown";
196   }
197 }
198 
199 std::vector<uint32_t>
200 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
201   std::vector<uint32_t> Dims;
202   if (Node->getNumOperands() != 3)
203     return Dims;
204 
205   for (auto &Op : Node->operands())
206     Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
207   return Dims;
208 }
209 
210 Kernel::CodeProps::Metadata
211 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
212                                     const SIProgramInfo &ProgramInfo) const {
213   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
214   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
215   HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
216   const Function &F = MF.getFunction();
217 
218   assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
219          F.getCallingConv() == CallingConv::SPIR_KERNEL);
220 
221   unsigned MaxKernArgAlign;
222   HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
223                                                                MaxKernArgAlign);
224   HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
225   HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
226   HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u);
227   HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
228   HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
229   HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
230   HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
231   HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
232   HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
233   HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
234   HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
235 
236   return HSACodeProps;
237 }
238 
239 Kernel::DebugProps::Metadata
240 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
241                                      const SIProgramInfo &ProgramInfo) const {
242   return HSAMD::Kernel::DebugProps::Metadata();
243 }
244 
245 void MetadataStreamerV2::emitVersion() {
246   auto &Version = HSAMetadata.mVersion;
247 
248   Version.push_back(VersionMajor);
249   Version.push_back(VersionMinor);
250 }
251 
252 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
253   auto &Printf = HSAMetadata.mPrintf;
254 
255   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
256   if (!Node)
257     return;
258 
259   for (auto Op : Node->operands())
260     if (Op->getNumOperands())
261       Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
262 }
263 
264 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
265   auto &Kernel = HSAMetadata.mKernels.back();
266 
267   // TODO: What about other languages?
268   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
269   if (!Node || !Node->getNumOperands())
270     return;
271   auto Op0 = Node->getOperand(0);
272   if (Op0->getNumOperands() <= 1)
273     return;
274 
275   Kernel.mLanguage = "OpenCL C";
276   Kernel.mLanguageVersion.push_back(
277       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
278   Kernel.mLanguageVersion.push_back(
279       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
280 }
281 
282 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
283   auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
284 
285   if (auto Node = Func.getMetadata("reqd_work_group_size"))
286     Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
287   if (auto Node = Func.getMetadata("work_group_size_hint"))
288     Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
289   if (auto Node = Func.getMetadata("vec_type_hint")) {
290     Attrs.mVecTypeHint = getTypeName(
291         cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
292         mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
293   }
294   if (Func.hasFnAttribute("runtime-handle")) {
295     Attrs.mRuntimeHandle =
296         Func.getFnAttribute("runtime-handle").getValueAsString().str();
297   }
298 }
299 
300 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
301   for (auto &Arg : Func.args())
302     emitKernelArg(Arg);
303 
304   emitHiddenKernelArgs(Func);
305 }
306 
307 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
308   auto Func = Arg.getParent();
309   auto ArgNo = Arg.getArgNo();
310   const MDNode *Node;
311 
312   StringRef Name;
313   Node = Func->getMetadata("kernel_arg_name");
314   if (Node && ArgNo < Node->getNumOperands())
315     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
316   else if (Arg.hasName())
317     Name = Arg.getName();
318 
319   StringRef TypeName;
320   Node = Func->getMetadata("kernel_arg_type");
321   if (Node && ArgNo < Node->getNumOperands())
322     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
323 
324   StringRef BaseTypeName;
325   Node = Func->getMetadata("kernel_arg_base_type");
326   if (Node && ArgNo < Node->getNumOperands())
327     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
328 
329   StringRef AccQual;
330   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
331       Arg.hasNoAliasAttr()) {
332     AccQual = "read_only";
333   } else {
334     Node = Func->getMetadata("kernel_arg_access_qual");
335     if (Node && ArgNo < Node->getNumOperands())
336       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
337   }
338 
339   StringRef TypeQual;
340   Node = Func->getMetadata("kernel_arg_type_qual");
341   if (Node && ArgNo < Node->getNumOperands())
342     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
343 
344   Type *Ty = Arg.getType();
345   const DataLayout &DL = Func->getParent()->getDataLayout();
346 
347   unsigned PointeeAlign = 0;
348   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
349     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
350       PointeeAlign = Arg.getParamAlignment();
351       if (PointeeAlign == 0)
352         PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
353     }
354   }
355 
356   emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
357                 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
358 }
359 
360 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
361                                        ValueKind ValueKind,
362                                        unsigned PointeeAlign, StringRef Name,
363                                        StringRef TypeName,
364                                        StringRef BaseTypeName,
365                                        StringRef AccQual, StringRef TypeQual) {
366   HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
367   auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
368 
369   Arg.mName = Name;
370   Arg.mTypeName = TypeName;
371   Arg.mSize = DL.getTypeAllocSize(Ty);
372   Arg.mAlign = DL.getABITypeAlignment(Ty);
373   Arg.mValueKind = ValueKind;
374   Arg.mValueType = getValueType(Ty, BaseTypeName);
375   Arg.mPointeeAlign = PointeeAlign;
376 
377   if (auto PtrTy = dyn_cast<PointerType>(Ty))
378     Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
379 
380   Arg.mAccQual = getAccessQualifier(AccQual);
381 
382   // TODO: Emit Arg.mActualAccQual.
383 
384   SmallVector<StringRef, 1> SplitTypeQuals;
385   TypeQual.split(SplitTypeQuals, " ", -1, false);
386   for (StringRef Key : SplitTypeQuals) {
387     auto P = StringSwitch<bool*>(Key)
388                  .Case("const",    &Arg.mIsConst)
389                  .Case("restrict", &Arg.mIsRestrict)
390                  .Case("volatile", &Arg.mIsVolatile)
391                  .Case("pipe",     &Arg.mIsPipe)
392                  .Default(nullptr);
393     if (P)
394       *P = true;
395   }
396 }
397 
398 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
399   int HiddenArgNumBytes =
400       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
401 
402   if (!HiddenArgNumBytes)
403     return;
404 
405   auto &DL = Func.getParent()->getDataLayout();
406   auto Int64Ty = Type::getInt64Ty(Func.getContext());
407 
408   if (HiddenArgNumBytes >= 8)
409     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
410   if (HiddenArgNumBytes >= 16)
411     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
412   if (HiddenArgNumBytes >= 24)
413     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
414 
415   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
416                                       AMDGPUAS::GLOBAL_ADDRESS);
417 
418   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
419   // "none" argument.
420   if (HiddenArgNumBytes >= 32) {
421     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
422       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
423     else
424       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
425   }
426 
427   // Emit "default queue" and "completion action" arguments if enqueue kernel is
428   // used, otherwise emit dummy "none" arguments.
429   if (HiddenArgNumBytes >= 48) {
430     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
431       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
432       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
433     } else {
434       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
435       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
436     }
437   }
438 
439   // Emit the pointer argument for multi-grid object.
440   if (HiddenArgNumBytes >= 56)
441     emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg);
442 }
443 
444 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
445   return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
446 }
447 
448 void MetadataStreamerV2::begin(const Module &Mod) {
449   emitVersion();
450   emitPrintf(Mod);
451 }
452 
453 void MetadataStreamerV2::end() {
454   std::string HSAMetadataString;
455   if (toString(HSAMetadata, HSAMetadataString))
456     return;
457 
458   if (DumpHSAMetadata)
459     dump(HSAMetadataString);
460   if (VerifyHSAMetadata)
461     verify(HSAMetadataString);
462 }
463 
464 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
465                                     const SIProgramInfo &ProgramInfo) {
466   auto &Func = MF.getFunction();
467   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
468     return;
469 
470   auto CodeProps = getHSACodeProps(MF, ProgramInfo);
471   auto DebugProps = getHSADebugProps(MF, ProgramInfo);
472 
473   HSAMetadata.mKernels.push_back(Kernel::Metadata());
474   auto &Kernel = HSAMetadata.mKernels.back();
475 
476   Kernel.mName = Func.getName();
477   Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
478   emitKernelLanguage(Func);
479   emitKernelAttrs(Func);
480   emitKernelArgs(Func);
481   HSAMetadata.mKernels.back().mCodeProps = CodeProps;
482   HSAMetadata.mKernels.back().mDebugProps = DebugProps;
483 }
484 
485 //===----------------------------------------------------------------------===//
486 // HSAMetadataStreamerV3
487 //===----------------------------------------------------------------------===//
488 
489 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
490   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
491 }
492 
493 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
494   errs() << "AMDGPU HSA Metadata Parser Test: ";
495 
496   msgpack::Document FromHSAMetadataString;
497 
498   if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
499     errs() << "FAIL\n";
500     return;
501   }
502 
503   std::string ToHSAMetadataString;
504   raw_string_ostream StrOS(ToHSAMetadataString);
505   FromHSAMetadataString.toYAML(StrOS);
506 
507   errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
508   if (HSAMetadataString != ToHSAMetadataString) {
509     errs() << "Original input: " << HSAMetadataString << '\n'
510            << "Produced output: " << StrOS.str() << '\n';
511   }
512 }
513 
514 Optional<StringRef>
515 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
516   return StringSwitch<Optional<StringRef>>(AccQual)
517       .Case("read_only", StringRef("read_only"))
518       .Case("write_only", StringRef("write_only"))
519       .Case("read_write", StringRef("read_write"))
520       .Default(None);
521 }
522 
523 Optional<StringRef>
524 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
525   switch (AddressSpace) {
526   case AMDGPUAS::PRIVATE_ADDRESS:
527     return StringRef("private");
528   case AMDGPUAS::GLOBAL_ADDRESS:
529     return StringRef("global");
530   case AMDGPUAS::CONSTANT_ADDRESS:
531     return StringRef("constant");
532   case AMDGPUAS::LOCAL_ADDRESS:
533     return StringRef("local");
534   case AMDGPUAS::FLAT_ADDRESS:
535     return StringRef("generic");
536   case AMDGPUAS::REGION_ADDRESS:
537     return StringRef("region");
538   default:
539     return None;
540   }
541 }
542 
543 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
544                                            StringRef BaseTypeName) const {
545   if (TypeQual.find("pipe") != StringRef::npos)
546     return "pipe";
547 
548   return StringSwitch<StringRef>(BaseTypeName)
549       .Case("image1d_t", "image")
550       .Case("image1d_array_t", "image")
551       .Case("image1d_buffer_t", "image")
552       .Case("image2d_t", "image")
553       .Case("image2d_array_t", "image")
554       .Case("image2d_array_depth_t", "image")
555       .Case("image2d_array_msaa_t", "image")
556       .Case("image2d_array_msaa_depth_t", "image")
557       .Case("image2d_depth_t", "image")
558       .Case("image2d_msaa_t", "image")
559       .Case("image2d_msaa_depth_t", "image")
560       .Case("image3d_t", "image")
561       .Case("sampler_t", "sampler")
562       .Case("queue_t", "queue")
563       .Default(isa<PointerType>(Ty)
564                    ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
565                           ? "dynamic_shared_pointer"
566                           : "global_buffer")
567                    : "by_value");
568 }
569 
570 StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
571   switch (Ty->getTypeID()) {
572   case Type::IntegerTyID: {
573     auto Signed = !TypeName.startswith("u");
574     switch (Ty->getIntegerBitWidth()) {
575     case 8:
576       return Signed ? "i8" : "u8";
577     case 16:
578       return Signed ? "i16" : "u16";
579     case 32:
580       return Signed ? "i32" : "u32";
581     case 64:
582       return Signed ? "i64" : "u64";
583     default:
584       return "struct";
585     }
586   }
587   case Type::HalfTyID:
588     return "f16";
589   case Type::FloatTyID:
590     return "f32";
591   case Type::DoubleTyID:
592     return "f64";
593   case Type::PointerTyID:
594     return getValueType(Ty->getPointerElementType(), TypeName);
595   case Type::VectorTyID:
596     return getValueType(Ty->getVectorElementType(), TypeName);
597   default:
598     return "struct";
599   }
600 }
601 
602 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
603   switch (Ty->getTypeID()) {
604   case Type::IntegerTyID: {
605     if (!Signed)
606       return (Twine('u') + getTypeName(Ty, true)).str();
607 
608     auto BitWidth = Ty->getIntegerBitWidth();
609     switch (BitWidth) {
610     case 8:
611       return "char";
612     case 16:
613       return "short";
614     case 32:
615       return "int";
616     case 64:
617       return "long";
618     default:
619       return (Twine('i') + Twine(BitWidth)).str();
620     }
621   }
622   case Type::HalfTyID:
623     return "half";
624   case Type::FloatTyID:
625     return "float";
626   case Type::DoubleTyID:
627     return "double";
628   case Type::VectorTyID: {
629     auto VecTy = cast<VectorType>(Ty);
630     auto ElTy = VecTy->getElementType();
631     auto NumElements = VecTy->getVectorNumElements();
632     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
633   }
634   default:
635     return "unknown";
636   }
637 }
638 
639 msgpack::ArrayDocNode
640 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
641   auto Dims = HSAMetadataDoc->getArrayNode();
642   if (Node->getNumOperands() != 3)
643     return Dims;
644 
645   for (auto &Op : Node->operands())
646     Dims.push_back(Dims.getDocument()->getNode(
647         uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
648   return Dims;
649 }
650 
651 void MetadataStreamerV3::emitVersion() {
652   auto Version = HSAMetadataDoc->getArrayNode();
653   Version.push_back(Version.getDocument()->getNode(VersionMajor));
654   Version.push_back(Version.getDocument()->getNode(VersionMinor));
655   getRootMetadata("amdhsa.version") = Version;
656 }
657 
658 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
659   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
660   if (!Node)
661     return;
662 
663   auto Printf = HSAMetadataDoc->getArrayNode();
664   for (auto Op : Node->operands())
665     if (Op->getNumOperands())
666       Printf.push_back(Printf.getDocument()->getNode(
667           cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
668   getRootMetadata("amdhsa.printf") = Printf;
669 }
670 
671 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
672                                             msgpack::MapDocNode Kern) {
673   // TODO: What about other languages?
674   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
675   if (!Node || !Node->getNumOperands())
676     return;
677   auto Op0 = Node->getOperand(0);
678   if (Op0->getNumOperands() <= 1)
679     return;
680 
681   Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
682   auto LanguageVersion = Kern.getDocument()->getArrayNode();
683   LanguageVersion.push_back(Kern.getDocument()->getNode(
684       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
685   LanguageVersion.push_back(Kern.getDocument()->getNode(
686       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
687   Kern[".language_version"] = LanguageVersion;
688 }
689 
690 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
691                                          msgpack::MapDocNode Kern) {
692 
693   if (auto Node = Func.getMetadata("reqd_work_group_size"))
694     Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
695   if (auto Node = Func.getMetadata("work_group_size_hint"))
696     Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
697   if (auto Node = Func.getMetadata("vec_type_hint")) {
698     Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
699         getTypeName(
700             cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
701             mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
702         /*Copy=*/true);
703   }
704   if (Func.hasFnAttribute("runtime-handle")) {
705     Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
706         Func.getFnAttribute("runtime-handle").getValueAsString().str(),
707         /*Copy=*/true);
708   }
709 }
710 
711 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
712                                         msgpack::MapDocNode Kern) {
713   unsigned Offset = 0;
714   auto Args = HSAMetadataDoc->getArrayNode();
715   for (auto &Arg : Func.args())
716     emitKernelArg(Arg, Offset, Args);
717 
718   emitHiddenKernelArgs(Func, Offset, Args);
719 
720   Kern[".args"] = Args;
721 }
722 
723 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
724                                        msgpack::ArrayDocNode Args) {
725   auto Func = Arg.getParent();
726   auto ArgNo = Arg.getArgNo();
727   const MDNode *Node;
728 
729   StringRef Name;
730   Node = Func->getMetadata("kernel_arg_name");
731   if (Node && ArgNo < Node->getNumOperands())
732     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
733   else if (Arg.hasName())
734     Name = Arg.getName();
735 
736   StringRef TypeName;
737   Node = Func->getMetadata("kernel_arg_type");
738   if (Node && ArgNo < Node->getNumOperands())
739     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
740 
741   StringRef BaseTypeName;
742   Node = Func->getMetadata("kernel_arg_base_type");
743   if (Node && ArgNo < Node->getNumOperands())
744     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
745 
746   StringRef AccQual;
747   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
748       Arg.hasNoAliasAttr()) {
749     AccQual = "read_only";
750   } else {
751     Node = Func->getMetadata("kernel_arg_access_qual");
752     if (Node && ArgNo < Node->getNumOperands())
753       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
754   }
755 
756   StringRef TypeQual;
757   Node = Func->getMetadata("kernel_arg_type_qual");
758   if (Node && ArgNo < Node->getNumOperands())
759     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
760 
761   Type *Ty = Arg.getType();
762   const DataLayout &DL = Func->getParent()->getDataLayout();
763 
764   unsigned PointeeAlign = 0;
765   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
766     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
767       PointeeAlign = Arg.getParamAlignment();
768       if (PointeeAlign == 0)
769         PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
770     }
771   }
772 
773   emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
774                 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
775                 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
776                 TypeQual);
777 }
778 
779 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
780                                        StringRef ValueKind, unsigned &Offset,
781                                        msgpack::ArrayDocNode Args,
782                                        unsigned PointeeAlign, StringRef Name,
783                                        StringRef TypeName,
784                                        StringRef BaseTypeName,
785                                        StringRef AccQual, StringRef TypeQual) {
786   auto Arg = Args.getDocument()->getMapNode();
787 
788   if (!Name.empty())
789     Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
790   if (!TypeName.empty())
791     Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
792   auto Size = DL.getTypeAllocSize(Ty);
793   auto Align = DL.getABITypeAlignment(Ty);
794   Arg[".size"] = Arg.getDocument()->getNode(Size);
795   Offset = alignTo(Offset, Align);
796   Arg[".offset"] = Arg.getDocument()->getNode(Offset);
797   Offset += Size;
798   Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
799   Arg[".value_type"] =
800       Arg.getDocument()->getNode(getValueType(Ty, BaseTypeName), /*Copy=*/true);
801   if (PointeeAlign)
802     Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign);
803 
804   if (auto PtrTy = dyn_cast<PointerType>(Ty))
805     if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
806       Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
807 
808   if (auto AQ = getAccessQualifier(AccQual))
809     Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
810 
811   // TODO: Emit Arg[".actual_access"].
812 
813   SmallVector<StringRef, 1> SplitTypeQuals;
814   TypeQual.split(SplitTypeQuals, " ", -1, false);
815   for (StringRef Key : SplitTypeQuals) {
816     if (Key == "const")
817       Arg[".is_const"] = Arg.getDocument()->getNode(true);
818     else if (Key == "restrict")
819       Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
820     else if (Key == "volatile")
821       Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
822     else if (Key == "pipe")
823       Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
824   }
825 
826   Args.push_back(Arg);
827 }
828 
829 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
830                                               unsigned &Offset,
831                                               msgpack::ArrayDocNode Args) {
832   int HiddenArgNumBytes =
833       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
834 
835   if (!HiddenArgNumBytes)
836     return;
837 
838   auto &DL = Func.getParent()->getDataLayout();
839   auto Int64Ty = Type::getInt64Ty(Func.getContext());
840 
841   if (HiddenArgNumBytes >= 8)
842     emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
843   if (HiddenArgNumBytes >= 16)
844     emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
845   if (HiddenArgNumBytes >= 24)
846     emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
847 
848   auto Int8PtrTy =
849       Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
850 
851   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
852   // "none" argument.
853   if (HiddenArgNumBytes >= 32) {
854     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
855       emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
856     else
857       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
858   }
859 
860   // Emit "default queue" and "completion action" arguments if enqueue kernel is
861   // used, otherwise emit dummy "none" arguments.
862   if (HiddenArgNumBytes >= 48) {
863     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
864       emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
865       emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
866     } else {
867       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
868       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
869     }
870   }
871 
872   // Emit the pointer argument for multi-grid object.
873   if (HiddenArgNumBytes >= 56)
874     emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args);
875 }
876 
877 msgpack::MapDocNode
878 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
879                                       const SIProgramInfo &ProgramInfo) const {
880   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
881   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
882   const Function &F = MF.getFunction();
883 
884   auto Kern = HSAMetadataDoc->getMapNode();
885 
886   unsigned MaxKernArgAlign;
887   Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
888       STM.getKernArgSegmentSize(F, MaxKernArgAlign));
889   Kern[".group_segment_fixed_size"] =
890       Kern.getDocument()->getNode(ProgramInfo.LDSSize);
891   Kern[".private_segment_fixed_size"] =
892       Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
893   Kern[".kernarg_segment_align"] =
894       Kern.getDocument()->getNode(std::max(uint32_t(4), MaxKernArgAlign));
895   Kern[".wavefront_size"] =
896       Kern.getDocument()->getNode(STM.getWavefrontSize());
897   Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
898   Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
899   Kern[".max_flat_workgroup_size"] =
900       Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
901   Kern[".sgpr_spill_count"] =
902       Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
903   Kern[".vgpr_spill_count"] =
904       Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
905 
906   return Kern;
907 }
908 
909 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
910   return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
911 }
912 
913 void MetadataStreamerV3::begin(const Module &Mod) {
914   emitVersion();
915   emitPrintf(Mod);
916   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
917 }
918 
919 void MetadataStreamerV3::end() {
920   std::string HSAMetadataString;
921   raw_string_ostream StrOS(HSAMetadataString);
922   HSAMetadataDoc->toYAML(StrOS);
923 
924   if (DumpHSAMetadata)
925     dump(StrOS.str());
926   if (VerifyHSAMetadata)
927     verify(StrOS.str());
928 }
929 
930 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
931                                     const SIProgramInfo &ProgramInfo) {
932   auto &Func = MF.getFunction();
933   auto Kern = getHSAKernelProps(MF, ProgramInfo);
934 
935   assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
936          Func.getCallingConv() == CallingConv::SPIR_KERNEL);
937 
938   auto Kernels =
939       getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
940 
941   {
942     Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
943     Kern[".symbol"] = Kern.getDocument()->getNode(
944         (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
945     emitKernelLanguage(Func, Kern);
946     emitKernelAttrs(Func, Kern);
947     emitKernelArgs(Func, Kern);
948   }
949 
950   Kernels.push_back(Kern);
951 }
952 
953 } // end namespace HSAMD
954 } // end namespace AMDGPU
955 } // end namespace llvm
956