xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp (revision 1e4896b176ff664dc9c2fce5426bf2fdf8017a7d)
1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 namespace llvm {
28 
29 static cl::opt<bool> DumpHSAMetadata(
30     "amdgpu-dump-hsa-metadata",
31     cl::desc("Dump AMDGPU HSA Metadata"));
32 static cl::opt<bool> VerifyHSAMetadata(
33     "amdgpu-verify-hsa-metadata",
34     cl::desc("Verify AMDGPU HSA Metadata"));
35 
36 namespace AMDGPU {
37 namespace HSAMD {
38 
39 //===----------------------------------------------------------------------===//
40 // HSAMetadataStreamerV2
41 //===----------------------------------------------------------------------===//
42 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
43   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
44 }
45 
46 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
47   errs() << "AMDGPU HSA Metadata Parser Test: ";
48 
49   HSAMD::Metadata FromHSAMetadataString;
50   if (fromString(std::string(HSAMetadataString), FromHSAMetadataString)) {
51     errs() << "FAIL\n";
52     return;
53   }
54 
55   std::string ToHSAMetadataString;
56   if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
57     errs() << "FAIL\n";
58     return;
59   }
60 
61   errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
62          << '\n';
63   if (HSAMetadataString != ToHSAMetadataString) {
64     errs() << "Original input: " << HSAMetadataString << '\n'
65            << "Produced output: " << ToHSAMetadataString << '\n';
66   }
67 }
68 
69 AccessQualifier
70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
71   if (AccQual.empty())
72     return AccessQualifier::Unknown;
73 
74   return StringSwitch<AccessQualifier>(AccQual)
75              .Case("read_only",  AccessQualifier::ReadOnly)
76              .Case("write_only", AccessQualifier::WriteOnly)
77              .Case("read_write", AccessQualifier::ReadWrite)
78              .Default(AccessQualifier::Default);
79 }
80 
81 AddressSpaceQualifier
82 MetadataStreamerV2::getAddressSpaceQualifier(
83     unsigned AddressSpace) const {
84   switch (AddressSpace) {
85   case AMDGPUAS::PRIVATE_ADDRESS:
86     return AddressSpaceQualifier::Private;
87   case AMDGPUAS::GLOBAL_ADDRESS:
88     return AddressSpaceQualifier::Global;
89   case AMDGPUAS::CONSTANT_ADDRESS:
90     return AddressSpaceQualifier::Constant;
91   case AMDGPUAS::LOCAL_ADDRESS:
92     return AddressSpaceQualifier::Local;
93   case AMDGPUAS::FLAT_ADDRESS:
94     return AddressSpaceQualifier::Generic;
95   case AMDGPUAS::REGION_ADDRESS:
96     return AddressSpaceQualifier::Region;
97   default:
98     return AddressSpaceQualifier::Unknown;
99   }
100 }
101 
102 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
103                                            StringRef BaseTypeName) const {
104   if (TypeQual.find("pipe") != StringRef::npos)
105     return ValueKind::Pipe;
106 
107   return StringSwitch<ValueKind>(BaseTypeName)
108              .Case("image1d_t", ValueKind::Image)
109              .Case("image1d_array_t", ValueKind::Image)
110              .Case("image1d_buffer_t", ValueKind::Image)
111              .Case("image2d_t", ValueKind::Image)
112              .Case("image2d_array_t", ValueKind::Image)
113              .Case("image2d_array_depth_t", ValueKind::Image)
114              .Case("image2d_array_msaa_t", ValueKind::Image)
115              .Case("image2d_array_msaa_depth_t", ValueKind::Image)
116              .Case("image2d_depth_t", ValueKind::Image)
117              .Case("image2d_msaa_t", ValueKind::Image)
118              .Case("image2d_msaa_depth_t", ValueKind::Image)
119              .Case("image3d_t", ValueKind::Image)
120              .Case("sampler_t", ValueKind::Sampler)
121              .Case("queue_t", ValueKind::Queue)
122              .Default(isa<PointerType>(Ty) ?
123                           (Ty->getPointerAddressSpace() ==
124                            AMDGPUAS::LOCAL_ADDRESS ?
125                            ValueKind::DynamicSharedPointer :
126                            ValueKind::GlobalBuffer) :
127                       ValueKind::ByValue);
128 }
129 
130 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
131   switch (Ty->getTypeID()) {
132   case Type::IntegerTyID: {
133     if (!Signed)
134       return (Twine('u') + getTypeName(Ty, true)).str();
135 
136     auto BitWidth = Ty->getIntegerBitWidth();
137     switch (BitWidth) {
138     case 8:
139       return "char";
140     case 16:
141       return "short";
142     case 32:
143       return "int";
144     case 64:
145       return "long";
146     default:
147       return (Twine('i') + Twine(BitWidth)).str();
148     }
149   }
150   case Type::HalfTyID:
151     return "half";
152   case Type::FloatTyID:
153     return "float";
154   case Type::DoubleTyID:
155     return "double";
156   case Type::FixedVectorTyID: {
157     auto VecTy = cast<FixedVectorType>(Ty);
158     auto ElTy = VecTy->getElementType();
159     auto NumElements = VecTy->getNumElements();
160     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
161   }
162   default:
163     return "unknown";
164   }
165 }
166 
167 std::vector<uint32_t>
168 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
169   std::vector<uint32_t> Dims;
170   if (Node->getNumOperands() != 3)
171     return Dims;
172 
173   for (auto &Op : Node->operands())
174     Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
175   return Dims;
176 }
177 
178 Kernel::CodeProps::Metadata
179 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
180                                     const SIProgramInfo &ProgramInfo) const {
181   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
182   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
183   HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
184   const Function &F = MF.getFunction();
185 
186   assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
187          F.getCallingConv() == CallingConv::SPIR_KERNEL);
188 
189   Align MaxKernArgAlign;
190   HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
191                                                                MaxKernArgAlign);
192   HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
193   HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
194   HSACodeProps.mKernargSegmentAlign =
195       std::max(MaxKernArgAlign, Align(4)).value();
196   HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
197   HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
198   HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
199   HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
200   HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
201   HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
202   HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
203   HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
204 
205   return HSACodeProps;
206 }
207 
208 Kernel::DebugProps::Metadata
209 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
210                                      const SIProgramInfo &ProgramInfo) const {
211   return HSAMD::Kernel::DebugProps::Metadata();
212 }
213 
214 void MetadataStreamerV2::emitVersion() {
215   auto &Version = HSAMetadata.mVersion;
216 
217   Version.push_back(VersionMajor);
218   Version.push_back(VersionMinor);
219 }
220 
221 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
222   auto &Printf = HSAMetadata.mPrintf;
223 
224   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
225   if (!Node)
226     return;
227 
228   for (auto Op : Node->operands())
229     if (Op->getNumOperands())
230       Printf.push_back(
231           std::string(cast<MDString>(Op->getOperand(0))->getString()));
232 }
233 
234 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
235   auto &Kernel = HSAMetadata.mKernels.back();
236 
237   // TODO: What about other languages?
238   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
239   if (!Node || !Node->getNumOperands())
240     return;
241   auto Op0 = Node->getOperand(0);
242   if (Op0->getNumOperands() <= 1)
243     return;
244 
245   Kernel.mLanguage = "OpenCL C";
246   Kernel.mLanguageVersion.push_back(
247       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
248   Kernel.mLanguageVersion.push_back(
249       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
250 }
251 
252 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
253   auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
254 
255   if (auto Node = Func.getMetadata("reqd_work_group_size"))
256     Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
257   if (auto Node = Func.getMetadata("work_group_size_hint"))
258     Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
259   if (auto Node = Func.getMetadata("vec_type_hint")) {
260     Attrs.mVecTypeHint = getTypeName(
261         cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
262         mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
263   }
264   if (Func.hasFnAttribute("runtime-handle")) {
265     Attrs.mRuntimeHandle =
266         Func.getFnAttribute("runtime-handle").getValueAsString().str();
267   }
268 }
269 
270 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
271   for (auto &Arg : Func.args())
272     emitKernelArg(Arg);
273 
274   emitHiddenKernelArgs(Func);
275 }
276 
277 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
278   auto Func = Arg.getParent();
279   auto ArgNo = Arg.getArgNo();
280   const MDNode *Node;
281 
282   StringRef Name;
283   Node = Func->getMetadata("kernel_arg_name");
284   if (Node && ArgNo < Node->getNumOperands())
285     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
286   else if (Arg.hasName())
287     Name = Arg.getName();
288 
289   StringRef TypeName;
290   Node = Func->getMetadata("kernel_arg_type");
291   if (Node && ArgNo < Node->getNumOperands())
292     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
293 
294   StringRef BaseTypeName;
295   Node = Func->getMetadata("kernel_arg_base_type");
296   if (Node && ArgNo < Node->getNumOperands())
297     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
298 
299   StringRef AccQual;
300   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
301       Arg.hasNoAliasAttr()) {
302     AccQual = "read_only";
303   } else {
304     Node = Func->getMetadata("kernel_arg_access_qual");
305     if (Node && ArgNo < Node->getNumOperands())
306       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
307   }
308 
309   StringRef TypeQual;
310   Node = Func->getMetadata("kernel_arg_type_qual");
311   if (Node && ArgNo < Node->getNumOperands())
312     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
313 
314   Type *Ty = Arg.getType();
315   const DataLayout &DL = Func->getParent()->getDataLayout();
316 
317   MaybeAlign PointeeAlign;
318   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
319     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
320       PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
321                                                    PtrTy->getElementType());
322     }
323   }
324 
325   emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
326                 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
327 }
328 
329 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
330                                        ValueKind ValueKind,
331                                        MaybeAlign PointeeAlign, StringRef Name,
332                                        StringRef TypeName,
333                                        StringRef BaseTypeName,
334                                        StringRef AccQual, StringRef TypeQual) {
335   HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
336   auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
337 
338   Arg.mName = std::string(Name);
339   Arg.mTypeName = std::string(TypeName);
340   Arg.mSize = DL.getTypeAllocSize(Ty);
341   Arg.mAlign = DL.getABITypeAlign(Ty).value();
342   Arg.mValueKind = ValueKind;
343   Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
344 
345   if (auto PtrTy = dyn_cast<PointerType>(Ty))
346     Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
347 
348   Arg.mAccQual = getAccessQualifier(AccQual);
349 
350   // TODO: Emit Arg.mActualAccQual.
351 
352   SmallVector<StringRef, 1> SplitTypeQuals;
353   TypeQual.split(SplitTypeQuals, " ", -1, false);
354   for (StringRef Key : SplitTypeQuals) {
355     auto P = StringSwitch<bool*>(Key)
356                  .Case("const",    &Arg.mIsConst)
357                  .Case("restrict", &Arg.mIsRestrict)
358                  .Case("volatile", &Arg.mIsVolatile)
359                  .Case("pipe",     &Arg.mIsPipe)
360                  .Default(nullptr);
361     if (P)
362       *P = true;
363   }
364 }
365 
366 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
367   int HiddenArgNumBytes =
368       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
369 
370   if (!HiddenArgNumBytes)
371     return;
372 
373   auto &DL = Func.getParent()->getDataLayout();
374   auto Int64Ty = Type::getInt64Ty(Func.getContext());
375 
376   if (HiddenArgNumBytes >= 8)
377     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
378   if (HiddenArgNumBytes >= 16)
379     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
380   if (HiddenArgNumBytes >= 24)
381     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
382 
383   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
384                                       AMDGPUAS::GLOBAL_ADDRESS);
385 
386   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
387   // "none" argument.
388   if (HiddenArgNumBytes >= 32) {
389     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
390       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
391     else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
392       // The printf runtime binding pass should have ensured that hostcall and
393       // printf are not used in the same module.
394       assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
395       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenHostcallBuffer);
396     } else
397       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
398   }
399 
400   // Emit "default queue" and "completion action" arguments if enqueue kernel is
401   // used, otherwise emit dummy "none" arguments.
402   if (HiddenArgNumBytes >= 48) {
403     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
404       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
405       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
406     } else {
407       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
408       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
409     }
410   }
411 
412   // Emit the pointer argument for multi-grid object.
413   if (HiddenArgNumBytes >= 56)
414     emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg);
415 }
416 
417 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
418   return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
419 }
420 
421 void MetadataStreamerV2::begin(const Module &Mod) {
422   emitVersion();
423   emitPrintf(Mod);
424 }
425 
426 void MetadataStreamerV2::end() {
427   std::string HSAMetadataString;
428   if (toString(HSAMetadata, HSAMetadataString))
429     return;
430 
431   if (DumpHSAMetadata)
432     dump(HSAMetadataString);
433   if (VerifyHSAMetadata)
434     verify(HSAMetadataString);
435 }
436 
437 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
438                                     const SIProgramInfo &ProgramInfo) {
439   auto &Func = MF.getFunction();
440   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
441     return;
442 
443   auto CodeProps = getHSACodeProps(MF, ProgramInfo);
444   auto DebugProps = getHSADebugProps(MF, ProgramInfo);
445 
446   HSAMetadata.mKernels.push_back(Kernel::Metadata());
447   auto &Kernel = HSAMetadata.mKernels.back();
448 
449   Kernel.mName = std::string(Func.getName());
450   Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
451   emitKernelLanguage(Func);
452   emitKernelAttrs(Func);
453   emitKernelArgs(Func);
454   HSAMetadata.mKernels.back().mCodeProps = CodeProps;
455   HSAMetadata.mKernels.back().mDebugProps = DebugProps;
456 }
457 
458 //===----------------------------------------------------------------------===//
459 // HSAMetadataStreamerV3
460 //===----------------------------------------------------------------------===//
461 
462 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
463   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
464 }
465 
466 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
467   errs() << "AMDGPU HSA Metadata Parser Test: ";
468 
469   msgpack::Document FromHSAMetadataString;
470 
471   if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
472     errs() << "FAIL\n";
473     return;
474   }
475 
476   std::string ToHSAMetadataString;
477   raw_string_ostream StrOS(ToHSAMetadataString);
478   FromHSAMetadataString.toYAML(StrOS);
479 
480   errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
481   if (HSAMetadataString != ToHSAMetadataString) {
482     errs() << "Original input: " << HSAMetadataString << '\n'
483            << "Produced output: " << StrOS.str() << '\n';
484   }
485 }
486 
487 Optional<StringRef>
488 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
489   return StringSwitch<Optional<StringRef>>(AccQual)
490       .Case("read_only", StringRef("read_only"))
491       .Case("write_only", StringRef("write_only"))
492       .Case("read_write", StringRef("read_write"))
493       .Default(None);
494 }
495 
496 Optional<StringRef>
497 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
498   switch (AddressSpace) {
499   case AMDGPUAS::PRIVATE_ADDRESS:
500     return StringRef("private");
501   case AMDGPUAS::GLOBAL_ADDRESS:
502     return StringRef("global");
503   case AMDGPUAS::CONSTANT_ADDRESS:
504     return StringRef("constant");
505   case AMDGPUAS::LOCAL_ADDRESS:
506     return StringRef("local");
507   case AMDGPUAS::FLAT_ADDRESS:
508     return StringRef("generic");
509   case AMDGPUAS::REGION_ADDRESS:
510     return StringRef("region");
511   default:
512     return None;
513   }
514 }
515 
516 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
517                                            StringRef BaseTypeName) const {
518   if (TypeQual.find("pipe") != StringRef::npos)
519     return "pipe";
520 
521   return StringSwitch<StringRef>(BaseTypeName)
522       .Case("image1d_t", "image")
523       .Case("image1d_array_t", "image")
524       .Case("image1d_buffer_t", "image")
525       .Case("image2d_t", "image")
526       .Case("image2d_array_t", "image")
527       .Case("image2d_array_depth_t", "image")
528       .Case("image2d_array_msaa_t", "image")
529       .Case("image2d_array_msaa_depth_t", "image")
530       .Case("image2d_depth_t", "image")
531       .Case("image2d_msaa_t", "image")
532       .Case("image2d_msaa_depth_t", "image")
533       .Case("image3d_t", "image")
534       .Case("sampler_t", "sampler")
535       .Case("queue_t", "queue")
536       .Default(isa<PointerType>(Ty)
537                    ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
538                           ? "dynamic_shared_pointer"
539                           : "global_buffer")
540                    : "by_value");
541 }
542 
543 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
544   switch (Ty->getTypeID()) {
545   case Type::IntegerTyID: {
546     if (!Signed)
547       return (Twine('u') + getTypeName(Ty, true)).str();
548 
549     auto BitWidth = Ty->getIntegerBitWidth();
550     switch (BitWidth) {
551     case 8:
552       return "char";
553     case 16:
554       return "short";
555     case 32:
556       return "int";
557     case 64:
558       return "long";
559     default:
560       return (Twine('i') + Twine(BitWidth)).str();
561     }
562   }
563   case Type::HalfTyID:
564     return "half";
565   case Type::FloatTyID:
566     return "float";
567   case Type::DoubleTyID:
568     return "double";
569   case Type::FixedVectorTyID: {
570     auto VecTy = cast<FixedVectorType>(Ty);
571     auto ElTy = VecTy->getElementType();
572     auto NumElements = VecTy->getNumElements();
573     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
574   }
575   default:
576     return "unknown";
577   }
578 }
579 
580 msgpack::ArrayDocNode
581 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
582   auto Dims = HSAMetadataDoc->getArrayNode();
583   if (Node->getNumOperands() != 3)
584     return Dims;
585 
586   for (auto &Op : Node->operands())
587     Dims.push_back(Dims.getDocument()->getNode(
588         uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
589   return Dims;
590 }
591 
592 void MetadataStreamerV3::emitVersion() {
593   auto Version = HSAMetadataDoc->getArrayNode();
594   Version.push_back(Version.getDocument()->getNode(VersionMajor));
595   Version.push_back(Version.getDocument()->getNode(VersionMinor));
596   getRootMetadata("amdhsa.version") = Version;
597 }
598 
599 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
600   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
601   if (!Node)
602     return;
603 
604   auto Printf = HSAMetadataDoc->getArrayNode();
605   for (auto Op : Node->operands())
606     if (Op->getNumOperands())
607       Printf.push_back(Printf.getDocument()->getNode(
608           cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
609   getRootMetadata("amdhsa.printf") = Printf;
610 }
611 
612 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
613                                             msgpack::MapDocNode Kern) {
614   // TODO: What about other languages?
615   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
616   if (!Node || !Node->getNumOperands())
617     return;
618   auto Op0 = Node->getOperand(0);
619   if (Op0->getNumOperands() <= 1)
620     return;
621 
622   Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
623   auto LanguageVersion = Kern.getDocument()->getArrayNode();
624   LanguageVersion.push_back(Kern.getDocument()->getNode(
625       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
626   LanguageVersion.push_back(Kern.getDocument()->getNode(
627       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
628   Kern[".language_version"] = LanguageVersion;
629 }
630 
631 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
632                                          msgpack::MapDocNode Kern) {
633 
634   if (auto Node = Func.getMetadata("reqd_work_group_size"))
635     Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
636   if (auto Node = Func.getMetadata("work_group_size_hint"))
637     Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
638   if (auto Node = Func.getMetadata("vec_type_hint")) {
639     Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
640         getTypeName(
641             cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
642             mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
643         /*Copy=*/true);
644   }
645   if (Func.hasFnAttribute("runtime-handle")) {
646     Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
647         Func.getFnAttribute("runtime-handle").getValueAsString().str(),
648         /*Copy=*/true);
649   }
650 }
651 
652 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
653                                         msgpack::MapDocNode Kern) {
654   unsigned Offset = 0;
655   auto Args = HSAMetadataDoc->getArrayNode();
656   for (auto &Arg : Func.args())
657     emitKernelArg(Arg, Offset, Args);
658 
659   emitHiddenKernelArgs(Func, Offset, Args);
660 
661   Kern[".args"] = Args;
662 }
663 
664 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
665                                        msgpack::ArrayDocNode Args) {
666   auto Func = Arg.getParent();
667   auto ArgNo = Arg.getArgNo();
668   const MDNode *Node;
669 
670   StringRef Name;
671   Node = Func->getMetadata("kernel_arg_name");
672   if (Node && ArgNo < Node->getNumOperands())
673     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
674   else if (Arg.hasName())
675     Name = Arg.getName();
676 
677   StringRef TypeName;
678   Node = Func->getMetadata("kernel_arg_type");
679   if (Node && ArgNo < Node->getNumOperands())
680     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
681 
682   StringRef BaseTypeName;
683   Node = Func->getMetadata("kernel_arg_base_type");
684   if (Node && ArgNo < Node->getNumOperands())
685     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
686 
687   StringRef AccQual;
688   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
689       Arg.hasNoAliasAttr()) {
690     AccQual = "read_only";
691   } else {
692     Node = Func->getMetadata("kernel_arg_access_qual");
693     if (Node && ArgNo < Node->getNumOperands())
694       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
695   }
696 
697   StringRef TypeQual;
698   Node = Func->getMetadata("kernel_arg_type_qual");
699   if (Node && ArgNo < Node->getNumOperands())
700     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
701 
702   Type *Ty = Arg.getType();
703   const DataLayout &DL = Func->getParent()->getDataLayout();
704 
705   MaybeAlign PointeeAlign;
706   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
707     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
708       PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
709                                                    PtrTy->getElementType());
710     }
711   }
712 
713   emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
714                 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
715                 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
716                 TypeQual);
717 }
718 
719 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
720                                        StringRef ValueKind, unsigned &Offset,
721                                        msgpack::ArrayDocNode Args,
722                                        MaybeAlign PointeeAlign, StringRef Name,
723                                        StringRef TypeName,
724                                        StringRef BaseTypeName,
725                                        StringRef AccQual, StringRef TypeQual) {
726   auto Arg = Args.getDocument()->getMapNode();
727 
728   if (!Name.empty())
729     Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
730   if (!TypeName.empty())
731     Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
732   auto Size = DL.getTypeAllocSize(Ty);
733   Align Alignment = DL.getABITypeAlign(Ty);
734   Arg[".size"] = Arg.getDocument()->getNode(Size);
735   Offset = alignTo(Offset, Alignment);
736   Arg[".offset"] = Arg.getDocument()->getNode(Offset);
737   Offset += Size;
738   Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
739   if (PointeeAlign)
740     Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
741 
742   if (auto PtrTy = dyn_cast<PointerType>(Ty))
743     if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
744       Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
745 
746   if (auto AQ = getAccessQualifier(AccQual))
747     Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
748 
749   // TODO: Emit Arg[".actual_access"].
750 
751   SmallVector<StringRef, 1> SplitTypeQuals;
752   TypeQual.split(SplitTypeQuals, " ", -1, false);
753   for (StringRef Key : SplitTypeQuals) {
754     if (Key == "const")
755       Arg[".is_const"] = Arg.getDocument()->getNode(true);
756     else if (Key == "restrict")
757       Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
758     else if (Key == "volatile")
759       Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
760     else if (Key == "pipe")
761       Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
762   }
763 
764   Args.push_back(Arg);
765 }
766 
767 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
768                                               unsigned &Offset,
769                                               msgpack::ArrayDocNode Args) {
770   int HiddenArgNumBytes =
771       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
772 
773   if (!HiddenArgNumBytes)
774     return;
775 
776   auto &DL = Func.getParent()->getDataLayout();
777   auto Int64Ty = Type::getInt64Ty(Func.getContext());
778 
779   if (HiddenArgNumBytes >= 8)
780     emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
781   if (HiddenArgNumBytes >= 16)
782     emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
783   if (HiddenArgNumBytes >= 24)
784     emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
785 
786   auto Int8PtrTy =
787       Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
788 
789   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
790   // "none" argument.
791   if (HiddenArgNumBytes >= 32) {
792     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
793       emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
794     else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
795       // The printf runtime binding pass should have ensured that hostcall and
796       // printf are not used in the same module.
797       assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
798       emitKernelArg(DL, Int8PtrTy, "hidden_hostcall_buffer", Offset, Args);
799     } else
800       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
801   }
802 
803   // Emit "default queue" and "completion action" arguments if enqueue kernel is
804   // used, otherwise emit dummy "none" arguments.
805   if (HiddenArgNumBytes >= 48) {
806     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
807       emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
808       emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
809     } else {
810       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
811       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
812     }
813   }
814 
815   // Emit the pointer argument for multi-grid object.
816   if (HiddenArgNumBytes >= 56)
817     emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args);
818 }
819 
820 msgpack::MapDocNode
821 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
822                                       const SIProgramInfo &ProgramInfo) const {
823   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
824   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
825   const Function &F = MF.getFunction();
826 
827   auto Kern = HSAMetadataDoc->getMapNode();
828 
829   Align MaxKernArgAlign;
830   Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
831       STM.getKernArgSegmentSize(F, MaxKernArgAlign));
832   Kern[".group_segment_fixed_size"] =
833       Kern.getDocument()->getNode(ProgramInfo.LDSSize);
834   Kern[".private_segment_fixed_size"] =
835       Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
836   Kern[".kernarg_segment_align"] =
837       Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
838   Kern[".wavefront_size"] =
839       Kern.getDocument()->getNode(STM.getWavefrontSize());
840   Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
841   Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
842   Kern[".max_flat_workgroup_size"] =
843       Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
844   Kern[".sgpr_spill_count"] =
845       Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
846   Kern[".vgpr_spill_count"] =
847       Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
848 
849   return Kern;
850 }
851 
852 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
853   return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
854 }
855 
856 void MetadataStreamerV3::begin(const Module &Mod) {
857   emitVersion();
858   emitPrintf(Mod);
859   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
860 }
861 
862 void MetadataStreamerV3::end() {
863   std::string HSAMetadataString;
864   raw_string_ostream StrOS(HSAMetadataString);
865   HSAMetadataDoc->toYAML(StrOS);
866 
867   if (DumpHSAMetadata)
868     dump(StrOS.str());
869   if (VerifyHSAMetadata)
870     verify(StrOS.str());
871 }
872 
873 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
874                                     const SIProgramInfo &ProgramInfo) {
875   auto &Func = MF.getFunction();
876   auto Kern = getHSAKernelProps(MF, ProgramInfo);
877 
878   assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
879          Func.getCallingConv() == CallingConv::SPIR_KERNEL);
880 
881   auto Kernels =
882       getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
883 
884   {
885     Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
886     Kern[".symbol"] = Kern.getDocument()->getNode(
887         (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
888     emitKernelLanguage(Func, Kern);
889     emitKernelAttrs(Func, Kern);
890     emitKernelArgs(Func, Kern);
891   }
892 
893   Kernels.push_back(Kern);
894 }
895 
896 } // end namespace HSAMD
897 } // end namespace AMDGPU
898 } // end namespace llvm
899