1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// AMDGPU HSA Metadata Streamer. 11 /// 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUHSAMetadataStreamer.h" 16 #include "AMDGPU.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "SIProgramInfo.h" 21 #include "llvm/IR/Module.h" 22 using namespace llvm; 23 24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg, 25 const DataLayout &DL) { 26 Type *Ty = Arg.getType(); 27 MaybeAlign ArgAlign; 28 if (Arg.hasByRefAttr()) { 29 Ty = Arg.getParamByRefType(); 30 ArgAlign = Arg.getParamAlign(); 31 } 32 33 if (!ArgAlign) 34 ArgAlign = DL.getABITypeAlign(Ty); 35 36 return std::pair(Ty, *ArgAlign); 37 } 38 39 namespace llvm { 40 41 static cl::opt<bool> DumpHSAMetadata( 42 "amdgpu-dump-hsa-metadata", 43 cl::desc("Dump AMDGPU HSA Metadata")); 44 static cl::opt<bool> VerifyHSAMetadata( 45 "amdgpu-verify-hsa-metadata", 46 cl::desc("Verify AMDGPU HSA Metadata")); 47 48 namespace AMDGPU { 49 namespace HSAMD { 50 51 //===----------------------------------------------------------------------===// 52 // HSAMetadataStreamerV4 53 //===----------------------------------------------------------------------===// 54 55 void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const { 56 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 57 } 58 59 void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const { 60 errs() << "AMDGPU HSA Metadata Parser Test: "; 61 62 msgpack::Document FromHSAMetadataString; 63 64 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) { 65 errs() << "FAIL\n"; 66 return; 67 } 68 69 std::string ToHSAMetadataString; 70 raw_string_ostream StrOS(ToHSAMetadataString); 71 FromHSAMetadataString.toYAML(StrOS); 72 73 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; 74 if (HSAMetadataString != ToHSAMetadataString) { 75 errs() << "Original input: " << HSAMetadataString << '\n' 76 << "Produced output: " << StrOS.str() << '\n'; 77 } 78 } 79 80 std::optional<StringRef> 81 MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const { 82 return StringSwitch<std::optional<StringRef>>(AccQual) 83 .Case("read_only", StringRef("read_only")) 84 .Case("write_only", StringRef("write_only")) 85 .Case("read_write", StringRef("read_write")) 86 .Default(std::nullopt); 87 } 88 89 std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier( 90 unsigned AddressSpace) const { 91 switch (AddressSpace) { 92 case AMDGPUAS::PRIVATE_ADDRESS: 93 return StringRef("private"); 94 case AMDGPUAS::GLOBAL_ADDRESS: 95 return StringRef("global"); 96 case AMDGPUAS::CONSTANT_ADDRESS: 97 return StringRef("constant"); 98 case AMDGPUAS::LOCAL_ADDRESS: 99 return StringRef("local"); 100 case AMDGPUAS::FLAT_ADDRESS: 101 return StringRef("generic"); 102 case AMDGPUAS::REGION_ADDRESS: 103 return StringRef("region"); 104 default: 105 return std::nullopt; 106 } 107 } 108 109 StringRef 110 MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual, 111 StringRef BaseTypeName) const { 112 if (TypeQual.contains("pipe")) 113 return "pipe"; 114 115 return StringSwitch<StringRef>(BaseTypeName) 116 .Case("image1d_t", "image") 117 .Case("image1d_array_t", "image") 118 .Case("image1d_buffer_t", "image") 119 .Case("image2d_t", "image") 120 .Case("image2d_array_t", "image") 121 .Case("image2d_array_depth_t", "image") 122 .Case("image2d_array_msaa_t", "image") 123 .Case("image2d_array_msaa_depth_t", "image") 124 .Case("image2d_depth_t", "image") 125 .Case("image2d_msaa_t", "image") 126 .Case("image2d_msaa_depth_t", "image") 127 .Case("image3d_t", "image") 128 .Case("sampler_t", "sampler") 129 .Case("queue_t", "queue") 130 .Default(isa<PointerType>(Ty) 131 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS 132 ? "dynamic_shared_pointer" 133 : "global_buffer") 134 : "by_value"); 135 } 136 137 std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty, 138 bool Signed) const { 139 switch (Ty->getTypeID()) { 140 case Type::IntegerTyID: { 141 if (!Signed) 142 return (Twine('u') + getTypeName(Ty, true)).str(); 143 144 auto BitWidth = Ty->getIntegerBitWidth(); 145 switch (BitWidth) { 146 case 8: 147 return "char"; 148 case 16: 149 return "short"; 150 case 32: 151 return "int"; 152 case 64: 153 return "long"; 154 default: 155 return (Twine('i') + Twine(BitWidth)).str(); 156 } 157 } 158 case Type::HalfTyID: 159 return "half"; 160 case Type::FloatTyID: 161 return "float"; 162 case Type::DoubleTyID: 163 return "double"; 164 case Type::FixedVectorTyID: { 165 auto VecTy = cast<FixedVectorType>(Ty); 166 auto ElTy = VecTy->getElementType(); 167 auto NumElements = VecTy->getNumElements(); 168 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 169 } 170 default: 171 return "unknown"; 172 } 173 } 174 175 msgpack::ArrayDocNode 176 MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const { 177 auto Dims = HSAMetadataDoc->getArrayNode(); 178 if (Node->getNumOperands() != 3) 179 return Dims; 180 181 for (auto &Op : Node->operands()) 182 Dims.push_back(Dims.getDocument()->getNode( 183 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue()))); 184 return Dims; 185 } 186 187 void MetadataStreamerMsgPackV4::emitVersion() { 188 auto Version = HSAMetadataDoc->getArrayNode(); 189 Version.push_back(Version.getDocument()->getNode(VersionMajorV4)); 190 Version.push_back(Version.getDocument()->getNode(VersionMinorV4)); 191 getRootMetadata("amdhsa.version") = Version; 192 } 193 194 void MetadataStreamerMsgPackV4::emitTargetID( 195 const IsaInfo::AMDGPUTargetID &TargetID) { 196 getRootMetadata("amdhsa.target") = 197 HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true); 198 } 199 200 void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) { 201 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 202 if (!Node) 203 return; 204 205 auto Printf = HSAMetadataDoc->getArrayNode(); 206 for (auto *Op : Node->operands()) 207 if (Op->getNumOperands()) 208 Printf.push_back(Printf.getDocument()->getNode( 209 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true)); 210 getRootMetadata("amdhsa.printf") = Printf; 211 } 212 213 void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func, 214 msgpack::MapDocNode Kern) { 215 // TODO: What about other languages? 216 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 217 if (!Node || !Node->getNumOperands()) 218 return; 219 auto Op0 = Node->getOperand(0); 220 if (Op0->getNumOperands() <= 1) 221 return; 222 223 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C"); 224 auto LanguageVersion = Kern.getDocument()->getArrayNode(); 225 LanguageVersion.push_back(Kern.getDocument()->getNode( 226 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); 227 LanguageVersion.push_back(Kern.getDocument()->getNode( 228 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); 229 Kern[".language_version"] = LanguageVersion; 230 } 231 232 void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function &Func, 233 msgpack::MapDocNode Kern) { 234 235 if (auto Node = Func.getMetadata("reqd_work_group_size")) 236 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); 237 if (auto Node = Func.getMetadata("work_group_size_hint")) 238 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); 239 if (auto Node = Func.getMetadata("vec_type_hint")) { 240 Kern[".vec_type_hint"] = Kern.getDocument()->getNode( 241 getTypeName( 242 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 243 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()), 244 /*Copy=*/true); 245 } 246 if (Func.hasFnAttribute("runtime-handle")) { 247 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode( 248 Func.getFnAttribute("runtime-handle").getValueAsString().str(), 249 /*Copy=*/true); 250 } 251 if (Func.hasFnAttribute("device-init")) 252 Kern[".kind"] = Kern.getDocument()->getNode("init"); 253 else if (Func.hasFnAttribute("device-fini")) 254 Kern[".kind"] = Kern.getDocument()->getNode("fini"); 255 } 256 257 void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF, 258 msgpack::MapDocNode Kern) { 259 auto &Func = MF.getFunction(); 260 unsigned Offset = 0; 261 auto Args = HSAMetadataDoc->getArrayNode(); 262 for (auto &Arg : Func.args()) 263 emitKernelArg(Arg, Offset, Args); 264 265 emitHiddenKernelArgs(MF, Offset, Args); 266 267 Kern[".args"] = Args; 268 } 269 270 void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg, 271 unsigned &Offset, 272 msgpack::ArrayDocNode Args) { 273 auto Func = Arg.getParent(); 274 auto ArgNo = Arg.getArgNo(); 275 const MDNode *Node; 276 277 StringRef Name; 278 Node = Func->getMetadata("kernel_arg_name"); 279 if (Node && ArgNo < Node->getNumOperands()) 280 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 281 else if (Arg.hasName()) 282 Name = Arg.getName(); 283 284 StringRef TypeName; 285 Node = Func->getMetadata("kernel_arg_type"); 286 if (Node && ArgNo < Node->getNumOperands()) 287 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 288 289 StringRef BaseTypeName; 290 Node = Func->getMetadata("kernel_arg_base_type"); 291 if (Node && ArgNo < Node->getNumOperands()) 292 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 293 294 StringRef ActAccQual; 295 // Do we really need NoAlias check here? 296 if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) { 297 if (Arg.onlyReadsMemory()) 298 ActAccQual = "read_only"; 299 else if (Arg.hasAttribute(Attribute::WriteOnly)) 300 ActAccQual = "write_only"; 301 } 302 303 StringRef AccQual; 304 Node = Func->getMetadata("kernel_arg_access_qual"); 305 if (Node && ArgNo < Node->getNumOperands()) 306 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 307 308 StringRef TypeQual; 309 Node = Func->getMetadata("kernel_arg_type_qual"); 310 if (Node && ArgNo < Node->getNumOperands()) 311 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 312 313 const DataLayout &DL = Func->getParent()->getDataLayout(); 314 315 MaybeAlign PointeeAlign; 316 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType(); 317 318 // FIXME: Need to distinguish in memory alignment from pointer alignment. 319 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 320 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) 321 PointeeAlign = Arg.getParamAlign().valueOrOne(); 322 } 323 324 // There's no distinction between byval aggregates and raw aggregates. 325 Type *ArgTy; 326 Align ArgAlign; 327 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL); 328 329 emitKernelArg(DL, ArgTy, ArgAlign, 330 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args, 331 PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual, 332 AccQual, TypeQual); 333 } 334 335 void MetadataStreamerMsgPackV4::emitKernelArg( 336 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind, 337 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign, 338 StringRef Name, StringRef TypeName, StringRef BaseTypeName, 339 StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) { 340 auto Arg = Args.getDocument()->getMapNode(); 341 342 if (!Name.empty()) 343 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true); 344 if (!TypeName.empty()) 345 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true); 346 auto Size = DL.getTypeAllocSize(Ty); 347 Arg[".size"] = Arg.getDocument()->getNode(Size); 348 Offset = alignTo(Offset, Alignment); 349 Arg[".offset"] = Arg.getDocument()->getNode(Offset); 350 Offset += Size; 351 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true); 352 if (PointeeAlign) 353 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value()); 354 355 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 356 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) 357 // Limiting address space to emit only for a certain ValueKind. 358 if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer") 359 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, 360 /*Copy=*/true); 361 362 if (auto AQ = getAccessQualifier(AccQual)) 363 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true); 364 365 if (auto AAQ = getAccessQualifier(ActAccQual)) 366 Arg[".actual_access"] = Arg.getDocument()->getNode(*AAQ, /*Copy=*/true); 367 368 SmallVector<StringRef, 1> SplitTypeQuals; 369 TypeQual.split(SplitTypeQuals, " ", -1, false); 370 for (StringRef Key : SplitTypeQuals) { 371 if (Key == "const") 372 Arg[".is_const"] = Arg.getDocument()->getNode(true); 373 else if (Key == "restrict") 374 Arg[".is_restrict"] = Arg.getDocument()->getNode(true); 375 else if (Key == "volatile") 376 Arg[".is_volatile"] = Arg.getDocument()->getNode(true); 377 else if (Key == "pipe") 378 Arg[".is_pipe"] = Arg.getDocument()->getNode(true); 379 } 380 381 Args.push_back(Arg); 382 } 383 384 void MetadataStreamerMsgPackV4::emitHiddenKernelArgs( 385 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { 386 auto &Func = MF.getFunction(); 387 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 388 389 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func); 390 if (!HiddenArgNumBytes) 391 return; 392 393 const Module *M = Func.getParent(); 394 auto &DL = M->getDataLayout(); 395 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 396 397 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); 398 399 if (HiddenArgNumBytes >= 8) 400 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, 401 Args); 402 if (HiddenArgNumBytes >= 16) 403 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, 404 Args); 405 if (HiddenArgNumBytes >= 24) 406 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, 407 Args); 408 409 auto Int8PtrTy = 410 PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 411 412 if (HiddenArgNumBytes >= 32) { 413 // We forbid the use of features requiring hostcall when compiling OpenCL 414 // before code object V5, which makes the mutual exclusion between the 415 // "printf buffer" and "hostcall buffer" here sound. 416 if (M->getNamedMetadata("llvm.printf.fmts")) 417 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, 418 Args); 419 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) 420 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, 421 Args); 422 else 423 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 424 } 425 426 // Emit "default queue" and "completion action" arguments if enqueue kernel is 427 // used, otherwise emit dummy "none" arguments. 428 if (HiddenArgNumBytes >= 40) { 429 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { 430 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, 431 Args); 432 } else { 433 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 434 } 435 } 436 437 if (HiddenArgNumBytes >= 48) { 438 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) { 439 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, 440 Args); 441 } else { 442 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 443 } 444 } 445 446 // Emit the pointer argument for multi-grid object. 447 if (HiddenArgNumBytes >= 56) { 448 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) { 449 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, 450 Args); 451 } else { 452 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 453 } 454 } 455 } 456 457 msgpack::MapDocNode 458 MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF, 459 const SIProgramInfo &ProgramInfo, 460 unsigned CodeObjectVersion) const { 461 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 462 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 463 const Function &F = MF.getFunction(); 464 465 auto Kern = HSAMetadataDoc->getMapNode(); 466 467 Align MaxKernArgAlign; 468 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode( 469 STM.getKernArgSegmentSize(F, MaxKernArgAlign)); 470 Kern[".group_segment_fixed_size"] = 471 Kern.getDocument()->getNode(ProgramInfo.LDSSize); 472 Kern[".private_segment_fixed_size"] = 473 Kern.getDocument()->getNode(ProgramInfo.ScratchSize); 474 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) 475 Kern[".uses_dynamic_stack"] = 476 Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack); 477 478 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP()) 479 Kern[".workgroup_processor_mode"] = 480 Kern.getDocument()->getNode(ProgramInfo.WgpMode); 481 482 // FIXME: The metadata treats the minimum as 16? 483 Kern[".kernarg_segment_align"] = 484 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value()); 485 Kern[".wavefront_size"] = 486 Kern.getDocument()->getNode(STM.getWavefrontSize()); 487 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR); 488 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR); 489 490 // Only add AGPR count to metadata for supported devices 491 if (STM.hasMAIInsts()) { 492 Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR); 493 } 494 495 Kern[".max_flat_workgroup_size"] = 496 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); 497 Kern[".sgpr_spill_count"] = 498 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); 499 Kern[".vgpr_spill_count"] = 500 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs()); 501 502 return Kern; 503 } 504 505 bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 506 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); 507 } 508 509 void MetadataStreamerMsgPackV4::begin(const Module &Mod, 510 const IsaInfo::AMDGPUTargetID &TargetID) { 511 emitVersion(); 512 emitTargetID(TargetID); 513 emitPrintf(Mod); 514 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); 515 } 516 517 void MetadataStreamerMsgPackV4::end() { 518 std::string HSAMetadataString; 519 raw_string_ostream StrOS(HSAMetadataString); 520 HSAMetadataDoc->toYAML(StrOS); 521 522 if (DumpHSAMetadata) 523 dump(StrOS.str()); 524 if (VerifyHSAMetadata) 525 verify(StrOS.str()); 526 } 527 528 void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF, 529 const SIProgramInfo &ProgramInfo) { 530 auto &Func = MF.getFunction(); 531 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL && 532 Func.getCallingConv() != CallingConv::SPIR_KERNEL) 533 return; 534 535 auto CodeObjectVersion = AMDGPU::getCodeObjectVersion(*Func.getParent()); 536 auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion); 537 538 auto Kernels = 539 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true); 540 541 { 542 Kern[".name"] = Kern.getDocument()->getNode(Func.getName()); 543 Kern[".symbol"] = Kern.getDocument()->getNode( 544 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true); 545 emitKernelLanguage(Func, Kern); 546 emitKernelAttrs(Func, Kern); 547 emitKernelArgs(MF, Kern); 548 } 549 550 Kernels.push_back(Kern); 551 } 552 553 //===----------------------------------------------------------------------===// 554 // HSAMetadataStreamerV5 555 //===----------------------------------------------------------------------===// 556 557 void MetadataStreamerMsgPackV5::emitVersion() { 558 auto Version = HSAMetadataDoc->getArrayNode(); 559 Version.push_back(Version.getDocument()->getNode(VersionMajorV5)); 560 Version.push_back(Version.getDocument()->getNode(VersionMinorV5)); 561 getRootMetadata("amdhsa.version") = Version; 562 } 563 564 void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( 565 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { 566 auto &Func = MF.getFunction(); 567 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 568 569 // No implicit kernel argument is used. 570 if (ST.getImplicitArgNumBytes(Func) == 0) 571 return; 572 573 const Module *M = Func.getParent(); 574 auto &DL = M->getDataLayout(); 575 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 576 577 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 578 auto Int32Ty = Type::getInt32Ty(Func.getContext()); 579 auto Int16Ty = Type::getInt16Ty(Func.getContext()); 580 581 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); 582 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args); 583 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args); 584 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args); 585 586 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args); 587 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args); 588 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args); 589 590 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args); 591 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args); 592 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args); 593 594 // Reserved for hidden_tool_correlation_id. 595 Offset += 8; 596 597 Offset += 8; // Reserved. 598 599 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args); 600 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args); 601 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args); 602 603 emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args); 604 605 Offset += 6; // Reserved. 606 auto Int8PtrTy = 607 PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 608 609 if (M->getNamedMetadata("llvm.printf.fmts")) { 610 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, 611 Args); 612 } else { 613 Offset += 8; // Skipped. 614 } 615 616 if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) { 617 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, 618 Args); 619 } else { 620 Offset += 8; // Skipped. 621 } 622 623 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) { 624 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, 625 Args); 626 } else { 627 Offset += 8; // Skipped. 628 } 629 630 if (!Func.hasFnAttribute("amdgpu-no-heap-ptr")) 631 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args); 632 else 633 Offset += 8; // Skipped. 634 635 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { 636 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, 637 Args); 638 } else { 639 Offset += 8; // Skipped. 640 } 641 642 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) { 643 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, 644 Args); 645 } else { 646 Offset += 8; // Skipped. 647 } 648 649 // Emit argument for hidden dynamic lds size 650 if (MFI.isDynamicLDSUsed()) { 651 emitKernelArg(DL, Int32Ty, Align(4), "hidden_dynamic_lds_size", Offset, 652 Args); 653 } else { 654 Offset += 4; // skipped 655 } 656 657 Offset += 68; // Reserved. 658 659 // hidden_private_base and hidden_shared_base are only when the subtarget has 660 // ApertureRegs. 661 if (!ST.hasApertureRegs()) { 662 emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args); 663 emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args); 664 } else { 665 Offset += 8; // Skipped. 666 } 667 668 if (MFI.getUserSGPRInfo().hasQueuePtr()) 669 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args); 670 } 671 672 void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func, 673 msgpack::MapDocNode Kern) { 674 MetadataStreamerMsgPackV4::emitKernelAttrs(Func, Kern); 675 676 if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool()) 677 Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1); 678 } 679 680 681 } // end namespace HSAMD 682 } // end namespace AMDGPU 683 } // end namespace llvm 684