1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// AMDGPU HSA Metadata Streamer. 11 /// 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUHSAMetadataStreamer.h" 16 #include "AMDGPU.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "SIProgramInfo.h" 21 #include "llvm/IR/Module.h" 22 #include "llvm/MC/MCContext.h" 23 #include "llvm/MC/MCExpr.h" 24 #include "llvm/Target/TargetLoweringObjectFile.h" 25 26 using namespace llvm; 27 28 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg, 29 const DataLayout &DL) { 30 Type *Ty = Arg.getType(); 31 MaybeAlign ArgAlign; 32 if (Arg.hasByRefAttr()) { 33 Ty = Arg.getParamByRefType(); 34 ArgAlign = Arg.getParamAlign(); 35 } 36 37 if (!ArgAlign) 38 ArgAlign = DL.getABITypeAlign(Ty); 39 40 return std::pair(Ty, *ArgAlign); 41 } 42 43 /// Find the mangled symbol name for the runtime handle for \p EnqueuedBlock 44 static std::string getEnqueuedBlockSymbolName(const AMDGPUTargetMachine &TM, 45 const Function &EnqueuedBlock) { 46 const MDNode *Associated = 47 EnqueuedBlock.getMetadata(LLVMContext::MD_associated); 48 if (!Associated) 49 return ""; 50 51 auto *VM = cast<ValueAsMetadata>(Associated->getOperand(0)); 52 auto *RuntimeHandle = 53 dyn_cast<GlobalVariable>(VM->getValue()->stripPointerCasts()); 54 if (!RuntimeHandle || 55 RuntimeHandle->getSection() != ".amdgpu.kernel.runtime.handle") 56 return ""; 57 58 SmallString<128> Name; 59 TM.getNameWithPrefix(Name, RuntimeHandle, 60 TM.getObjFileLowering()->getMangler()); 61 return Name.str().str(); 62 } 63 64 namespace llvm { 65 66 static cl::opt<bool> DumpHSAMetadata( 67 "amdgpu-dump-hsa-metadata", 68 cl::desc("Dump AMDGPU HSA Metadata")); 69 static cl::opt<bool> VerifyHSAMetadata( 70 "amdgpu-verify-hsa-metadata", 71 cl::desc("Verify AMDGPU HSA Metadata")); 72 73 namespace AMDGPU::HSAMD { 74 75 //===----------------------------------------------------------------------===// 76 // HSAMetadataStreamerV4 77 //===----------------------------------------------------------------------===// 78 79 void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const { 80 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 81 } 82 83 void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const { 84 errs() << "AMDGPU HSA Metadata Parser Test: "; 85 86 msgpack::Document FromHSAMetadataString; 87 88 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) { 89 errs() << "FAIL\n"; 90 return; 91 } 92 93 std::string ToHSAMetadataString; 94 raw_string_ostream StrOS(ToHSAMetadataString); 95 FromHSAMetadataString.toYAML(StrOS); 96 97 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; 98 if (HSAMetadataString != ToHSAMetadataString) { 99 errs() << "Original input: " << HSAMetadataString << '\n' 100 << "Produced output: " << StrOS.str() << '\n'; 101 } 102 } 103 104 std::optional<StringRef> 105 MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const { 106 return StringSwitch<std::optional<StringRef>>(AccQual) 107 .Case("read_only", StringRef("read_only")) 108 .Case("write_only", StringRef("write_only")) 109 .Case("read_write", StringRef("read_write")) 110 .Default(std::nullopt); 111 } 112 113 std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier( 114 unsigned AddressSpace) const { 115 switch (AddressSpace) { 116 case AMDGPUAS::PRIVATE_ADDRESS: 117 return StringRef("private"); 118 case AMDGPUAS::GLOBAL_ADDRESS: 119 return StringRef("global"); 120 case AMDGPUAS::CONSTANT_ADDRESS: 121 return StringRef("constant"); 122 case AMDGPUAS::LOCAL_ADDRESS: 123 return StringRef("local"); 124 case AMDGPUAS::FLAT_ADDRESS: 125 return StringRef("generic"); 126 case AMDGPUAS::REGION_ADDRESS: 127 return StringRef("region"); 128 default: 129 return std::nullopt; 130 } 131 } 132 133 StringRef 134 MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual, 135 StringRef BaseTypeName) const { 136 if (TypeQual.contains("pipe")) 137 return "pipe"; 138 139 return StringSwitch<StringRef>(BaseTypeName) 140 .Case("image1d_t", "image") 141 .Case("image1d_array_t", "image") 142 .Case("image1d_buffer_t", "image") 143 .Case("image2d_t", "image") 144 .Case("image2d_array_t", "image") 145 .Case("image2d_array_depth_t", "image") 146 .Case("image2d_array_msaa_t", "image") 147 .Case("image2d_array_msaa_depth_t", "image") 148 .Case("image2d_depth_t", "image") 149 .Case("image2d_msaa_t", "image") 150 .Case("image2d_msaa_depth_t", "image") 151 .Case("image3d_t", "image") 152 .Case("sampler_t", "sampler") 153 .Case("queue_t", "queue") 154 .Default(isa<PointerType>(Ty) 155 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS 156 ? "dynamic_shared_pointer" 157 : "global_buffer") 158 : "by_value"); 159 } 160 161 std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty, 162 bool Signed) const { 163 switch (Ty->getTypeID()) { 164 case Type::IntegerTyID: { 165 if (!Signed) 166 return (Twine('u') + getTypeName(Ty, true)).str(); 167 168 auto BitWidth = Ty->getIntegerBitWidth(); 169 switch (BitWidth) { 170 case 8: 171 return "char"; 172 case 16: 173 return "short"; 174 case 32: 175 return "int"; 176 case 64: 177 return "long"; 178 default: 179 return (Twine('i') + Twine(BitWidth)).str(); 180 } 181 } 182 case Type::HalfTyID: 183 return "half"; 184 case Type::FloatTyID: 185 return "float"; 186 case Type::DoubleTyID: 187 return "double"; 188 case Type::FixedVectorTyID: { 189 auto *VecTy = cast<FixedVectorType>(Ty); 190 auto *ElTy = VecTy->getElementType(); 191 auto NumElements = VecTy->getNumElements(); 192 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 193 } 194 default: 195 return "unknown"; 196 } 197 } 198 199 msgpack::ArrayDocNode 200 MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const { 201 auto Dims = HSAMetadataDoc->getArrayNode(); 202 if (Node->getNumOperands() != 3) 203 return Dims; 204 205 for (auto &Op : Node->operands()) 206 Dims.push_back(Dims.getDocument()->getNode( 207 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue()))); 208 return Dims; 209 } 210 211 void MetadataStreamerMsgPackV4::emitVersion() { 212 auto Version = HSAMetadataDoc->getArrayNode(); 213 Version.push_back(Version.getDocument()->getNode(VersionMajorV4)); 214 Version.push_back(Version.getDocument()->getNode(VersionMinorV4)); 215 getRootMetadata("amdhsa.version") = Version; 216 } 217 218 void MetadataStreamerMsgPackV4::emitTargetID( 219 const IsaInfo::AMDGPUTargetID &TargetID) { 220 getRootMetadata("amdhsa.target") = 221 HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true); 222 } 223 224 void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) { 225 auto *Node = Mod.getNamedMetadata("llvm.printf.fmts"); 226 if (!Node) 227 return; 228 229 auto Printf = HSAMetadataDoc->getArrayNode(); 230 for (auto *Op : Node->operands()) 231 if (Op->getNumOperands()) 232 Printf.push_back(Printf.getDocument()->getNode( 233 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true)); 234 getRootMetadata("amdhsa.printf") = Printf; 235 } 236 237 void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func, 238 msgpack::MapDocNode Kern) { 239 // TODO: What about other languages? 240 auto *Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 241 if (!Node || !Node->getNumOperands()) 242 return; 243 auto *Op0 = Node->getOperand(0); 244 if (Op0->getNumOperands() <= 1) 245 return; 246 247 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C"); 248 auto LanguageVersion = Kern.getDocument()->getArrayNode(); 249 LanguageVersion.push_back(Kern.getDocument()->getNode( 250 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); 251 LanguageVersion.push_back(Kern.getDocument()->getNode( 252 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); 253 Kern[".language_version"] = LanguageVersion; 254 } 255 256 void MetadataStreamerMsgPackV4::emitKernelAttrs(const AMDGPUTargetMachine &TM, 257 const Function &Func, 258 msgpack::MapDocNode Kern) { 259 260 if (auto *Node = Func.getMetadata("reqd_work_group_size")) 261 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); 262 if (auto *Node = Func.getMetadata("work_group_size_hint")) 263 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); 264 if (auto *Node = Func.getMetadata("vec_type_hint")) { 265 Kern[".vec_type_hint"] = Kern.getDocument()->getNode( 266 getTypeName( 267 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 268 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()), 269 /*Copy=*/true); 270 } 271 272 std::string HandleName = getEnqueuedBlockSymbolName(TM, Func); 273 if (!HandleName.empty()) { 274 Kern[".device_enqueue_symbol"] = 275 Kern.getDocument()->getNode(std::move(HandleName), /*Copy=*/true); 276 } 277 278 if (Func.hasFnAttribute("device-init")) 279 Kern[".kind"] = Kern.getDocument()->getNode("init"); 280 else if (Func.hasFnAttribute("device-fini")) 281 Kern[".kind"] = Kern.getDocument()->getNode("fini"); 282 } 283 284 void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF, 285 msgpack::MapDocNode Kern) { 286 auto &Func = MF.getFunction(); 287 unsigned Offset = 0; 288 auto Args = HSAMetadataDoc->getArrayNode(); 289 for (auto &Arg : Func.args()) { 290 if (Arg.hasAttribute("amdgpu-hidden-argument")) 291 continue; 292 293 emitKernelArg(Arg, Offset, Args); 294 } 295 296 emitHiddenKernelArgs(MF, Offset, Args); 297 298 Kern[".args"] = Args; 299 } 300 301 void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg, 302 unsigned &Offset, 303 msgpack::ArrayDocNode Args) { 304 const auto *Func = Arg.getParent(); 305 auto ArgNo = Arg.getArgNo(); 306 const MDNode *Node; 307 308 StringRef Name; 309 Node = Func->getMetadata("kernel_arg_name"); 310 if (Node && ArgNo < Node->getNumOperands()) 311 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 312 else if (Arg.hasName()) 313 Name = Arg.getName(); 314 315 StringRef TypeName; 316 Node = Func->getMetadata("kernel_arg_type"); 317 if (Node && ArgNo < Node->getNumOperands()) 318 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 319 320 StringRef BaseTypeName; 321 Node = Func->getMetadata("kernel_arg_base_type"); 322 if (Node && ArgNo < Node->getNumOperands()) 323 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 324 325 StringRef ActAccQual; 326 // Do we really need NoAlias check here? 327 if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) { 328 if (Arg.onlyReadsMemory()) 329 ActAccQual = "read_only"; 330 else if (Arg.hasAttribute(Attribute::WriteOnly)) 331 ActAccQual = "write_only"; 332 } 333 334 StringRef AccQual; 335 Node = Func->getMetadata("kernel_arg_access_qual"); 336 if (Node && ArgNo < Node->getNumOperands()) 337 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 338 339 StringRef TypeQual; 340 Node = Func->getMetadata("kernel_arg_type_qual"); 341 if (Node && ArgNo < Node->getNumOperands()) 342 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 343 344 const DataLayout &DL = Func->getDataLayout(); 345 346 MaybeAlign PointeeAlign; 347 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType(); 348 349 // FIXME: Need to distinguish in memory alignment from pointer alignment. 350 if (auto *PtrTy = dyn_cast<PointerType>(Ty)) { 351 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) 352 PointeeAlign = Arg.getParamAlign().valueOrOne(); 353 } 354 355 // There's no distinction between byval aggregates and raw aggregates. 356 Type *ArgTy; 357 Align ArgAlign; 358 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL); 359 360 emitKernelArg(DL, ArgTy, ArgAlign, 361 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args, 362 PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual, 363 AccQual, TypeQual); 364 } 365 366 void MetadataStreamerMsgPackV4::emitKernelArg( 367 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind, 368 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign, 369 StringRef Name, StringRef TypeName, StringRef BaseTypeName, 370 StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) { 371 auto Arg = Args.getDocument()->getMapNode(); 372 373 if (!Name.empty()) 374 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true); 375 if (!TypeName.empty()) 376 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true); 377 auto Size = DL.getTypeAllocSize(Ty); 378 Arg[".size"] = Arg.getDocument()->getNode(Size); 379 Offset = alignTo(Offset, Alignment); 380 Arg[".offset"] = Arg.getDocument()->getNode(Offset); 381 Offset += Size; 382 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true); 383 if (PointeeAlign) 384 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value()); 385 386 if (auto *PtrTy = dyn_cast<PointerType>(Ty)) 387 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) 388 // Limiting address space to emit only for a certain ValueKind. 389 if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer") 390 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, 391 /*Copy=*/true); 392 393 if (auto AQ = getAccessQualifier(AccQual)) 394 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true); 395 396 if (auto AAQ = getAccessQualifier(ActAccQual)) 397 Arg[".actual_access"] = Arg.getDocument()->getNode(*AAQ, /*Copy=*/true); 398 399 SmallVector<StringRef, 1> SplitTypeQuals; 400 TypeQual.split(SplitTypeQuals, " ", -1, false); 401 for (StringRef Key : SplitTypeQuals) { 402 if (Key == "const") 403 Arg[".is_const"] = Arg.getDocument()->getNode(true); 404 else if (Key == "restrict") 405 Arg[".is_restrict"] = Arg.getDocument()->getNode(true); 406 else if (Key == "volatile") 407 Arg[".is_volatile"] = Arg.getDocument()->getNode(true); 408 else if (Key == "pipe") 409 Arg[".is_pipe"] = Arg.getDocument()->getNode(true); 410 } 411 412 Args.push_back(Arg); 413 } 414 415 void MetadataStreamerMsgPackV4::emitHiddenKernelArgs( 416 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { 417 auto &Func = MF.getFunction(); 418 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 419 420 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func); 421 if (!HiddenArgNumBytes) 422 return; 423 424 const Module *M = Func.getParent(); 425 auto &DL = M->getDataLayout(); 426 auto *Int64Ty = Type::getInt64Ty(Func.getContext()); 427 428 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); 429 430 if (HiddenArgNumBytes >= 8) 431 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, 432 Args); 433 if (HiddenArgNumBytes >= 16) 434 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, 435 Args); 436 if (HiddenArgNumBytes >= 24) 437 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, 438 Args); 439 440 auto *Int8PtrTy = 441 PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 442 443 if (HiddenArgNumBytes >= 32) { 444 // We forbid the use of features requiring hostcall when compiling OpenCL 445 // before code object V5, which makes the mutual exclusion between the 446 // "printf buffer" and "hostcall buffer" here sound. 447 if (M->getNamedMetadata("llvm.printf.fmts")) 448 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, 449 Args); 450 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) 451 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, 452 Args); 453 else 454 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 455 } 456 457 // Emit "default queue" and "completion action" arguments if enqueue kernel is 458 // used, otherwise emit dummy "none" arguments. 459 if (HiddenArgNumBytes >= 40) { 460 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { 461 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, 462 Args); 463 } else { 464 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 465 } 466 } 467 468 if (HiddenArgNumBytes >= 48) { 469 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) { 470 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, 471 Args); 472 } else { 473 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 474 } 475 } 476 477 // Emit the pointer argument for multi-grid object. 478 if (HiddenArgNumBytes >= 56) { 479 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) { 480 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, 481 Args); 482 } else { 483 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 484 } 485 } 486 } 487 488 msgpack::MapDocNode 489 MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF, 490 const SIProgramInfo &ProgramInfo, 491 unsigned CodeObjectVersion) const { 492 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 493 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 494 const Function &F = MF.getFunction(); 495 496 auto Kern = HSAMetadataDoc->getMapNode(); 497 498 Align MaxKernArgAlign; 499 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode( 500 STM.getKernArgSegmentSize(F, MaxKernArgAlign)); 501 Kern[".group_segment_fixed_size"] = 502 Kern.getDocument()->getNode(ProgramInfo.LDSSize); 503 DelayedExprs->assignDocNode(Kern[".private_segment_fixed_size"], 504 msgpack::Type::UInt, ProgramInfo.ScratchSize); 505 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) { 506 DelayedExprs->assignDocNode(Kern[".uses_dynamic_stack"], 507 msgpack::Type::Boolean, 508 ProgramInfo.DynamicCallStack); 509 } 510 511 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP()) 512 Kern[".workgroup_processor_mode"] = 513 Kern.getDocument()->getNode(ProgramInfo.WgpMode); 514 515 // FIXME: The metadata treats the minimum as 16? 516 Kern[".kernarg_segment_align"] = 517 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value()); 518 Kern[".wavefront_size"] = 519 Kern.getDocument()->getNode(STM.getWavefrontSize()); 520 DelayedExprs->assignDocNode(Kern[".sgpr_count"], msgpack::Type::UInt, 521 ProgramInfo.NumSGPR); 522 DelayedExprs->assignDocNode(Kern[".vgpr_count"], msgpack::Type::UInt, 523 ProgramInfo.NumVGPR); 524 525 // Only add AGPR count to metadata for supported devices 526 if (STM.hasMAIInsts()) { 527 DelayedExprs->assignDocNode(Kern[".agpr_count"], msgpack::Type::UInt, 528 ProgramInfo.NumAccVGPR); 529 } 530 531 Kern[".max_flat_workgroup_size"] = 532 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); 533 534 uint32_t NumWGY = MFI.getMaxNumWorkGroupsY(); 535 uint32_t NumWGZ = MFI.getMaxNumWorkGroupsZ(); 536 uint32_t NumWGX = MFI.getMaxNumWorkGroupsX(); 537 538 // TODO: Should consider 0 invalid and reject in IR verifier. 539 if (NumWGX != std::numeric_limits<uint32_t>::max() && NumWGX != 0) 540 Kern[".max_num_workgroups_x"] = Kern.getDocument()->getNode(NumWGX); 541 542 if (NumWGY != std::numeric_limits<uint32_t>::max() && NumWGY != 0) 543 Kern[".max_num_workgroups_y"] = Kern.getDocument()->getNode(NumWGY); 544 545 if (NumWGZ != std::numeric_limits<uint32_t>::max() && NumWGZ != 0) 546 Kern[".max_num_workgroups_z"] = Kern.getDocument()->getNode(NumWGZ); 547 548 Kern[".sgpr_spill_count"] = 549 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); 550 Kern[".vgpr_spill_count"] = 551 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs()); 552 553 return Kern; 554 } 555 556 bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 557 DelayedExprs->resolveDelayedExpressions(); 558 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); 559 } 560 561 void MetadataStreamerMsgPackV4::begin(const Module &Mod, 562 const IsaInfo::AMDGPUTargetID &TargetID) { 563 emitVersion(); 564 emitTargetID(TargetID); 565 emitPrintf(Mod); 566 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); 567 DelayedExprs->clear(); 568 } 569 570 void MetadataStreamerMsgPackV4::end() { 571 DelayedExprs->resolveDelayedExpressions(); 572 std::string HSAMetadataString; 573 raw_string_ostream StrOS(HSAMetadataString); 574 HSAMetadataDoc->toYAML(StrOS); 575 576 if (DumpHSAMetadata) 577 dump(StrOS.str()); 578 if (VerifyHSAMetadata) 579 verify(StrOS.str()); 580 } 581 582 void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF, 583 const SIProgramInfo &ProgramInfo) { 584 auto &Func = MF.getFunction(); 585 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL && 586 Func.getCallingConv() != CallingConv::SPIR_KERNEL) 587 return; 588 589 auto CodeObjectVersion = 590 AMDGPU::getAMDHSACodeObjectVersion(*Func.getParent()); 591 auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion); 592 593 auto Kernels = 594 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true); 595 596 auto &TM = static_cast<const AMDGPUTargetMachine &>(MF.getTarget()); 597 { 598 Kern[".name"] = Kern.getDocument()->getNode(Func.getName()); 599 Kern[".symbol"] = Kern.getDocument()->getNode( 600 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true); 601 emitKernelLanguage(Func, Kern); 602 emitKernelAttrs(TM, Func, Kern); 603 emitKernelArgs(MF, Kern); 604 } 605 606 Kernels.push_back(Kern); 607 } 608 609 //===----------------------------------------------------------------------===// 610 // HSAMetadataStreamerV5 611 //===----------------------------------------------------------------------===// 612 613 void MetadataStreamerMsgPackV5::emitVersion() { 614 auto Version = HSAMetadataDoc->getArrayNode(); 615 Version.push_back(Version.getDocument()->getNode(VersionMajorV5)); 616 Version.push_back(Version.getDocument()->getNode(VersionMinorV5)); 617 getRootMetadata("amdhsa.version") = Version; 618 } 619 620 void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( 621 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { 622 auto &Func = MF.getFunction(); 623 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 624 625 // No implicit kernel argument is used. 626 if (ST.getImplicitArgNumBytes(Func) == 0) 627 return; 628 629 const Module *M = Func.getParent(); 630 auto &DL = M->getDataLayout(); 631 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 632 633 auto *Int64Ty = Type::getInt64Ty(Func.getContext()); 634 auto *Int32Ty = Type::getInt32Ty(Func.getContext()); 635 auto *Int16Ty = Type::getInt16Ty(Func.getContext()); 636 637 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); 638 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args); 639 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args); 640 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args); 641 642 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args); 643 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args); 644 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args); 645 646 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args); 647 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args); 648 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args); 649 650 // Reserved for hidden_tool_correlation_id. 651 Offset += 8; 652 653 Offset += 8; // Reserved. 654 655 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args); 656 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args); 657 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args); 658 659 emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args); 660 661 Offset += 6; // Reserved. 662 auto *Int8PtrTy = 663 PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 664 665 if (M->getNamedMetadata("llvm.printf.fmts")) { 666 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, 667 Args); 668 } else { 669 Offset += 8; // Skipped. 670 } 671 672 if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) { 673 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, 674 Args); 675 } else { 676 Offset += 8; // Skipped. 677 } 678 679 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) { 680 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, 681 Args); 682 } else { 683 Offset += 8; // Skipped. 684 } 685 686 if (!Func.hasFnAttribute("amdgpu-no-heap-ptr")) 687 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args); 688 else 689 Offset += 8; // Skipped. 690 691 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { 692 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, 693 Args); 694 } else { 695 Offset += 8; // Skipped. 696 } 697 698 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) { 699 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, 700 Args); 701 } else { 702 Offset += 8; // Skipped. 703 } 704 705 // Emit argument for hidden dynamic lds size 706 if (MFI.isDynamicLDSUsed()) { 707 emitKernelArg(DL, Int32Ty, Align(4), "hidden_dynamic_lds_size", Offset, 708 Args); 709 } else { 710 Offset += 4; // skipped 711 } 712 713 Offset += 68; // Reserved. 714 715 // hidden_private_base and hidden_shared_base are only when the subtarget has 716 // ApertureRegs. 717 if (!ST.hasApertureRegs()) { 718 emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args); 719 emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args); 720 } else { 721 Offset += 8; // Skipped. 722 } 723 724 if (MFI.getUserSGPRInfo().hasQueuePtr()) 725 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args); 726 } 727 728 void MetadataStreamerMsgPackV5::emitKernelAttrs(const AMDGPUTargetMachine &TM, 729 const Function &Func, 730 msgpack::MapDocNode Kern) { 731 MetadataStreamerMsgPackV4::emitKernelAttrs(TM, Func, Kern); 732 733 if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool()) 734 Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1); 735 } 736 737 //===----------------------------------------------------------------------===// 738 // HSAMetadataStreamerV6 739 //===----------------------------------------------------------------------===// 740 741 void MetadataStreamerMsgPackV6::emitVersion() { 742 auto Version = HSAMetadataDoc->getArrayNode(); 743 Version.push_back(Version.getDocument()->getNode(VersionMajorV6)); 744 Version.push_back(Version.getDocument()->getNode(VersionMinorV6)); 745 getRootMetadata("amdhsa.version") = Version; 746 } 747 748 } // end namespace AMDGPU::HSAMD 749 } // end namespace llvm 750