1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// AMDGPU HSA Metadata Streamer. 11 /// 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUHSAMetadataStreamer.h" 16 #include "AMDGPU.h" 17 #include "AMDGPUSubtarget.h" 18 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "SIProgramInfo.h" 21 #include "Utils/AMDGPUBaseInfo.h" 22 #include "llvm/ADT/StringSwitch.h" 23 #include "llvm/IR/Constants.h" 24 #include "llvm/IR/Module.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 namespace llvm { 28 29 static cl::opt<bool> DumpHSAMetadata( 30 "amdgpu-dump-hsa-metadata", 31 cl::desc("Dump AMDGPU HSA Metadata")); 32 static cl::opt<bool> VerifyHSAMetadata( 33 "amdgpu-verify-hsa-metadata", 34 cl::desc("Verify AMDGPU HSA Metadata")); 35 36 namespace AMDGPU { 37 namespace HSAMD { 38 39 //===----------------------------------------------------------------------===// 40 // HSAMetadataStreamerV2 41 //===----------------------------------------------------------------------===// 42 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const { 43 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 44 } 45 46 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const { 47 errs() << "AMDGPU HSA Metadata Parser Test: "; 48 49 HSAMD::Metadata FromHSAMetadataString; 50 if (fromString(std::string(HSAMetadataString), FromHSAMetadataString)) { 51 errs() << "FAIL\n"; 52 return; 53 } 54 55 std::string ToHSAMetadataString; 56 if (toString(FromHSAMetadataString, ToHSAMetadataString)) { 57 errs() << "FAIL\n"; 58 return; 59 } 60 61 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") 62 << '\n'; 63 if (HSAMetadataString != ToHSAMetadataString) { 64 errs() << "Original input: " << HSAMetadataString << '\n' 65 << "Produced output: " << ToHSAMetadataString << '\n'; 66 } 67 } 68 69 AccessQualifier 70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const { 71 if (AccQual.empty()) 72 return AccessQualifier::Unknown; 73 74 return StringSwitch<AccessQualifier>(AccQual) 75 .Case("read_only", AccessQualifier::ReadOnly) 76 .Case("write_only", AccessQualifier::WriteOnly) 77 .Case("read_write", AccessQualifier::ReadWrite) 78 .Default(AccessQualifier::Default); 79 } 80 81 AddressSpaceQualifier 82 MetadataStreamerV2::getAddressSpaceQualifier( 83 unsigned AddressSpace) const { 84 switch (AddressSpace) { 85 case AMDGPUAS::PRIVATE_ADDRESS: 86 return AddressSpaceQualifier::Private; 87 case AMDGPUAS::GLOBAL_ADDRESS: 88 return AddressSpaceQualifier::Global; 89 case AMDGPUAS::CONSTANT_ADDRESS: 90 return AddressSpaceQualifier::Constant; 91 case AMDGPUAS::LOCAL_ADDRESS: 92 return AddressSpaceQualifier::Local; 93 case AMDGPUAS::FLAT_ADDRESS: 94 return AddressSpaceQualifier::Generic; 95 case AMDGPUAS::REGION_ADDRESS: 96 return AddressSpaceQualifier::Region; 97 default: 98 return AddressSpaceQualifier::Unknown; 99 } 100 } 101 102 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual, 103 StringRef BaseTypeName) const { 104 if (TypeQual.find("pipe") != StringRef::npos) 105 return ValueKind::Pipe; 106 107 return StringSwitch<ValueKind>(BaseTypeName) 108 .Case("image1d_t", ValueKind::Image) 109 .Case("image1d_array_t", ValueKind::Image) 110 .Case("image1d_buffer_t", ValueKind::Image) 111 .Case("image2d_t", ValueKind::Image) 112 .Case("image2d_array_t", ValueKind::Image) 113 .Case("image2d_array_depth_t", ValueKind::Image) 114 .Case("image2d_array_msaa_t", ValueKind::Image) 115 .Case("image2d_array_msaa_depth_t", ValueKind::Image) 116 .Case("image2d_depth_t", ValueKind::Image) 117 .Case("image2d_msaa_t", ValueKind::Image) 118 .Case("image2d_msaa_depth_t", ValueKind::Image) 119 .Case("image3d_t", ValueKind::Image) 120 .Case("sampler_t", ValueKind::Sampler) 121 .Case("queue_t", ValueKind::Queue) 122 .Default(isa<PointerType>(Ty) ? 123 (Ty->getPointerAddressSpace() == 124 AMDGPUAS::LOCAL_ADDRESS ? 125 ValueKind::DynamicSharedPointer : 126 ValueKind::GlobalBuffer) : 127 ValueKind::ByValue); 128 } 129 130 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const { 131 switch (Ty->getTypeID()) { 132 case Type::IntegerTyID: { 133 if (!Signed) 134 return (Twine('u') + getTypeName(Ty, true)).str(); 135 136 auto BitWidth = Ty->getIntegerBitWidth(); 137 switch (BitWidth) { 138 case 8: 139 return "char"; 140 case 16: 141 return "short"; 142 case 32: 143 return "int"; 144 case 64: 145 return "long"; 146 default: 147 return (Twine('i') + Twine(BitWidth)).str(); 148 } 149 } 150 case Type::HalfTyID: 151 return "half"; 152 case Type::FloatTyID: 153 return "float"; 154 case Type::DoubleTyID: 155 return "double"; 156 case Type::FixedVectorTyID: { 157 auto VecTy = cast<FixedVectorType>(Ty); 158 auto ElTy = VecTy->getElementType(); 159 auto NumElements = VecTy->getNumElements(); 160 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 161 } 162 default: 163 return "unknown"; 164 } 165 } 166 167 std::vector<uint32_t> 168 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const { 169 std::vector<uint32_t> Dims; 170 if (Node->getNumOperands() != 3) 171 return Dims; 172 173 for (auto &Op : Node->operands()) 174 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); 175 return Dims; 176 } 177 178 Kernel::CodeProps::Metadata 179 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, 180 const SIProgramInfo &ProgramInfo) const { 181 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 182 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 183 HSAMD::Kernel::CodeProps::Metadata HSACodeProps; 184 const Function &F = MF.getFunction(); 185 186 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || 187 F.getCallingConv() == CallingConv::SPIR_KERNEL); 188 189 Align MaxKernArgAlign; 190 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, 191 MaxKernArgAlign); 192 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; 193 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; 194 HSACodeProps.mKernargSegmentAlign = 195 std::max(MaxKernArgAlign, Align(4)).value(); 196 HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); 197 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; 198 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; 199 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); 200 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; 201 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); 202 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); 203 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); 204 205 return HSACodeProps; 206 } 207 208 Kernel::DebugProps::Metadata 209 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF, 210 const SIProgramInfo &ProgramInfo) const { 211 return HSAMD::Kernel::DebugProps::Metadata(); 212 } 213 214 void MetadataStreamerV2::emitVersion() { 215 auto &Version = HSAMetadata.mVersion; 216 217 Version.push_back(VersionMajor); 218 Version.push_back(VersionMinor); 219 } 220 221 void MetadataStreamerV2::emitPrintf(const Module &Mod) { 222 auto &Printf = HSAMetadata.mPrintf; 223 224 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 225 if (!Node) 226 return; 227 228 for (auto Op : Node->operands()) 229 if (Op->getNumOperands()) 230 Printf.push_back( 231 std::string(cast<MDString>(Op->getOperand(0))->getString())); 232 } 233 234 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) { 235 auto &Kernel = HSAMetadata.mKernels.back(); 236 237 // TODO: What about other languages? 238 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 239 if (!Node || !Node->getNumOperands()) 240 return; 241 auto Op0 = Node->getOperand(0); 242 if (Op0->getNumOperands() <= 1) 243 return; 244 245 Kernel.mLanguage = "OpenCL C"; 246 Kernel.mLanguageVersion.push_back( 247 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); 248 Kernel.mLanguageVersion.push_back( 249 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); 250 } 251 252 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) { 253 auto &Attrs = HSAMetadata.mKernels.back().mAttrs; 254 255 if (auto Node = Func.getMetadata("reqd_work_group_size")) 256 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); 257 if (auto Node = Func.getMetadata("work_group_size_hint")) 258 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); 259 if (auto Node = Func.getMetadata("vec_type_hint")) { 260 Attrs.mVecTypeHint = getTypeName( 261 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 262 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); 263 } 264 if (Func.hasFnAttribute("runtime-handle")) { 265 Attrs.mRuntimeHandle = 266 Func.getFnAttribute("runtime-handle").getValueAsString().str(); 267 } 268 } 269 270 void MetadataStreamerV2::emitKernelArgs(const Function &Func) { 271 for (auto &Arg : Func.args()) 272 emitKernelArg(Arg); 273 274 emitHiddenKernelArgs(Func); 275 } 276 277 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) { 278 auto Func = Arg.getParent(); 279 auto ArgNo = Arg.getArgNo(); 280 const MDNode *Node; 281 282 StringRef Name; 283 Node = Func->getMetadata("kernel_arg_name"); 284 if (Node && ArgNo < Node->getNumOperands()) 285 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 286 else if (Arg.hasName()) 287 Name = Arg.getName(); 288 289 StringRef TypeName; 290 Node = Func->getMetadata("kernel_arg_type"); 291 if (Node && ArgNo < Node->getNumOperands()) 292 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 293 294 StringRef BaseTypeName; 295 Node = Func->getMetadata("kernel_arg_base_type"); 296 if (Node && ArgNo < Node->getNumOperands()) 297 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 298 299 StringRef AccQual; 300 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 301 Arg.hasNoAliasAttr()) { 302 AccQual = "read_only"; 303 } else { 304 Node = Func->getMetadata("kernel_arg_access_qual"); 305 if (Node && ArgNo < Node->getNumOperands()) 306 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 307 } 308 309 StringRef TypeQual; 310 Node = Func->getMetadata("kernel_arg_type_qual"); 311 if (Node && ArgNo < Node->getNumOperands()) 312 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 313 314 Type *Ty = Arg.getType(); 315 const DataLayout &DL = Func->getParent()->getDataLayout(); 316 317 MaybeAlign PointeeAlign; 318 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 319 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 320 PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(), 321 PtrTy->getElementType()); 322 } 323 } 324 325 emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName), 326 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); 327 } 328 329 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty, 330 ValueKind ValueKind, 331 MaybeAlign PointeeAlign, StringRef Name, 332 StringRef TypeName, 333 StringRef BaseTypeName, 334 StringRef AccQual, StringRef TypeQual) { 335 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); 336 auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); 337 338 Arg.mName = std::string(Name); 339 Arg.mTypeName = std::string(TypeName); 340 Arg.mSize = DL.getTypeAllocSize(Ty); 341 Arg.mAlign = DL.getABITypeAlign(Ty).value(); 342 Arg.mValueKind = ValueKind; 343 Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0; 344 345 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 346 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); 347 348 Arg.mAccQual = getAccessQualifier(AccQual); 349 350 // TODO: Emit Arg.mActualAccQual. 351 352 SmallVector<StringRef, 1> SplitTypeQuals; 353 TypeQual.split(SplitTypeQuals, " ", -1, false); 354 for (StringRef Key : SplitTypeQuals) { 355 auto P = StringSwitch<bool*>(Key) 356 .Case("const", &Arg.mIsConst) 357 .Case("restrict", &Arg.mIsRestrict) 358 .Case("volatile", &Arg.mIsVolatile) 359 .Case("pipe", &Arg.mIsPipe) 360 .Default(nullptr); 361 if (P) 362 *P = true; 363 } 364 } 365 366 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) { 367 int HiddenArgNumBytes = 368 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); 369 370 if (!HiddenArgNumBytes) 371 return; 372 373 auto &DL = Func.getParent()->getDataLayout(); 374 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 375 376 if (HiddenArgNumBytes >= 8) 377 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX); 378 if (HiddenArgNumBytes >= 16) 379 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY); 380 if (HiddenArgNumBytes >= 24) 381 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ); 382 383 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), 384 AMDGPUAS::GLOBAL_ADDRESS); 385 386 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 387 // "none" argument. 388 if (HiddenArgNumBytes >= 32) { 389 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 390 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); 391 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) { 392 // The printf runtime binding pass should have ensured that hostcall and 393 // printf are not used in the same module. 394 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts")); 395 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenHostcallBuffer); 396 } else 397 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 398 } 399 400 // Emit "default queue" and "completion action" arguments if enqueue kernel is 401 // used, otherwise emit dummy "none" arguments. 402 if (HiddenArgNumBytes >= 48) { 403 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 404 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue); 405 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction); 406 } else { 407 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 408 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 409 } 410 } 411 412 // Emit the pointer argument for multi-grid object. 413 if (HiddenArgNumBytes >= 56) 414 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg); 415 } 416 417 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 418 return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); 419 } 420 421 void MetadataStreamerV2::begin(const Module &Mod) { 422 emitVersion(); 423 emitPrintf(Mod); 424 } 425 426 void MetadataStreamerV2::end() { 427 std::string HSAMetadataString; 428 if (toString(HSAMetadata, HSAMetadataString)) 429 return; 430 431 if (DumpHSAMetadata) 432 dump(HSAMetadataString); 433 if (VerifyHSAMetadata) 434 verify(HSAMetadataString); 435 } 436 437 void MetadataStreamerV2::emitKernel(const MachineFunction &MF, 438 const SIProgramInfo &ProgramInfo) { 439 auto &Func = MF.getFunction(); 440 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) 441 return; 442 443 auto CodeProps = getHSACodeProps(MF, ProgramInfo); 444 auto DebugProps = getHSADebugProps(MF, ProgramInfo); 445 446 HSAMetadata.mKernels.push_back(Kernel::Metadata()); 447 auto &Kernel = HSAMetadata.mKernels.back(); 448 449 Kernel.mName = std::string(Func.getName()); 450 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); 451 emitKernelLanguage(Func); 452 emitKernelAttrs(Func); 453 emitKernelArgs(Func); 454 HSAMetadata.mKernels.back().mCodeProps = CodeProps; 455 HSAMetadata.mKernels.back().mDebugProps = DebugProps; 456 } 457 458 //===----------------------------------------------------------------------===// 459 // HSAMetadataStreamerV3 460 //===----------------------------------------------------------------------===// 461 462 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const { 463 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 464 } 465 466 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const { 467 errs() << "AMDGPU HSA Metadata Parser Test: "; 468 469 msgpack::Document FromHSAMetadataString; 470 471 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) { 472 errs() << "FAIL\n"; 473 return; 474 } 475 476 std::string ToHSAMetadataString; 477 raw_string_ostream StrOS(ToHSAMetadataString); 478 FromHSAMetadataString.toYAML(StrOS); 479 480 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; 481 if (HSAMetadataString != ToHSAMetadataString) { 482 errs() << "Original input: " << HSAMetadataString << '\n' 483 << "Produced output: " << StrOS.str() << '\n'; 484 } 485 } 486 487 Optional<StringRef> 488 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const { 489 return StringSwitch<Optional<StringRef>>(AccQual) 490 .Case("read_only", StringRef("read_only")) 491 .Case("write_only", StringRef("write_only")) 492 .Case("read_write", StringRef("read_write")) 493 .Default(None); 494 } 495 496 Optional<StringRef> 497 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const { 498 switch (AddressSpace) { 499 case AMDGPUAS::PRIVATE_ADDRESS: 500 return StringRef("private"); 501 case AMDGPUAS::GLOBAL_ADDRESS: 502 return StringRef("global"); 503 case AMDGPUAS::CONSTANT_ADDRESS: 504 return StringRef("constant"); 505 case AMDGPUAS::LOCAL_ADDRESS: 506 return StringRef("local"); 507 case AMDGPUAS::FLAT_ADDRESS: 508 return StringRef("generic"); 509 case AMDGPUAS::REGION_ADDRESS: 510 return StringRef("region"); 511 default: 512 return None; 513 } 514 } 515 516 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual, 517 StringRef BaseTypeName) const { 518 if (TypeQual.find("pipe") != StringRef::npos) 519 return "pipe"; 520 521 return StringSwitch<StringRef>(BaseTypeName) 522 .Case("image1d_t", "image") 523 .Case("image1d_array_t", "image") 524 .Case("image1d_buffer_t", "image") 525 .Case("image2d_t", "image") 526 .Case("image2d_array_t", "image") 527 .Case("image2d_array_depth_t", "image") 528 .Case("image2d_array_msaa_t", "image") 529 .Case("image2d_array_msaa_depth_t", "image") 530 .Case("image2d_depth_t", "image") 531 .Case("image2d_msaa_t", "image") 532 .Case("image2d_msaa_depth_t", "image") 533 .Case("image3d_t", "image") 534 .Case("sampler_t", "sampler") 535 .Case("queue_t", "queue") 536 .Default(isa<PointerType>(Ty) 537 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS 538 ? "dynamic_shared_pointer" 539 : "global_buffer") 540 : "by_value"); 541 } 542 543 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const { 544 switch (Ty->getTypeID()) { 545 case Type::IntegerTyID: { 546 if (!Signed) 547 return (Twine('u') + getTypeName(Ty, true)).str(); 548 549 auto BitWidth = Ty->getIntegerBitWidth(); 550 switch (BitWidth) { 551 case 8: 552 return "char"; 553 case 16: 554 return "short"; 555 case 32: 556 return "int"; 557 case 64: 558 return "long"; 559 default: 560 return (Twine('i') + Twine(BitWidth)).str(); 561 } 562 } 563 case Type::HalfTyID: 564 return "half"; 565 case Type::FloatTyID: 566 return "float"; 567 case Type::DoubleTyID: 568 return "double"; 569 case Type::FixedVectorTyID: { 570 auto VecTy = cast<FixedVectorType>(Ty); 571 auto ElTy = VecTy->getElementType(); 572 auto NumElements = VecTy->getNumElements(); 573 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 574 } 575 default: 576 return "unknown"; 577 } 578 } 579 580 msgpack::ArrayDocNode 581 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const { 582 auto Dims = HSAMetadataDoc->getArrayNode(); 583 if (Node->getNumOperands() != 3) 584 return Dims; 585 586 for (auto &Op : Node->operands()) 587 Dims.push_back(Dims.getDocument()->getNode( 588 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue()))); 589 return Dims; 590 } 591 592 void MetadataStreamerV3::emitVersion() { 593 auto Version = HSAMetadataDoc->getArrayNode(); 594 Version.push_back(Version.getDocument()->getNode(VersionMajor)); 595 Version.push_back(Version.getDocument()->getNode(VersionMinor)); 596 getRootMetadata("amdhsa.version") = Version; 597 } 598 599 void MetadataStreamerV3::emitPrintf(const Module &Mod) { 600 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 601 if (!Node) 602 return; 603 604 auto Printf = HSAMetadataDoc->getArrayNode(); 605 for (auto Op : Node->operands()) 606 if (Op->getNumOperands()) 607 Printf.push_back(Printf.getDocument()->getNode( 608 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true)); 609 getRootMetadata("amdhsa.printf") = Printf; 610 } 611 612 void MetadataStreamerV3::emitKernelLanguage(const Function &Func, 613 msgpack::MapDocNode Kern) { 614 // TODO: What about other languages? 615 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 616 if (!Node || !Node->getNumOperands()) 617 return; 618 auto Op0 = Node->getOperand(0); 619 if (Op0->getNumOperands() <= 1) 620 return; 621 622 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C"); 623 auto LanguageVersion = Kern.getDocument()->getArrayNode(); 624 LanguageVersion.push_back(Kern.getDocument()->getNode( 625 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); 626 LanguageVersion.push_back(Kern.getDocument()->getNode( 627 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); 628 Kern[".language_version"] = LanguageVersion; 629 } 630 631 void MetadataStreamerV3::emitKernelAttrs(const Function &Func, 632 msgpack::MapDocNode Kern) { 633 634 if (auto Node = Func.getMetadata("reqd_work_group_size")) 635 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); 636 if (auto Node = Func.getMetadata("work_group_size_hint")) 637 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); 638 if (auto Node = Func.getMetadata("vec_type_hint")) { 639 Kern[".vec_type_hint"] = Kern.getDocument()->getNode( 640 getTypeName( 641 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 642 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()), 643 /*Copy=*/true); 644 } 645 if (Func.hasFnAttribute("runtime-handle")) { 646 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode( 647 Func.getFnAttribute("runtime-handle").getValueAsString().str(), 648 /*Copy=*/true); 649 } 650 } 651 652 void MetadataStreamerV3::emitKernelArgs(const Function &Func, 653 msgpack::MapDocNode Kern) { 654 unsigned Offset = 0; 655 auto Args = HSAMetadataDoc->getArrayNode(); 656 for (auto &Arg : Func.args()) 657 emitKernelArg(Arg, Offset, Args); 658 659 emitHiddenKernelArgs(Func, Offset, Args); 660 661 Kern[".args"] = Args; 662 } 663 664 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset, 665 msgpack::ArrayDocNode Args) { 666 auto Func = Arg.getParent(); 667 auto ArgNo = Arg.getArgNo(); 668 const MDNode *Node; 669 670 StringRef Name; 671 Node = Func->getMetadata("kernel_arg_name"); 672 if (Node && ArgNo < Node->getNumOperands()) 673 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 674 else if (Arg.hasName()) 675 Name = Arg.getName(); 676 677 StringRef TypeName; 678 Node = Func->getMetadata("kernel_arg_type"); 679 if (Node && ArgNo < Node->getNumOperands()) 680 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 681 682 StringRef BaseTypeName; 683 Node = Func->getMetadata("kernel_arg_base_type"); 684 if (Node && ArgNo < Node->getNumOperands()) 685 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 686 687 StringRef AccQual; 688 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 689 Arg.hasNoAliasAttr()) { 690 AccQual = "read_only"; 691 } else { 692 Node = Func->getMetadata("kernel_arg_access_qual"); 693 if (Node && ArgNo < Node->getNumOperands()) 694 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 695 } 696 697 StringRef TypeQual; 698 Node = Func->getMetadata("kernel_arg_type_qual"); 699 if (Node && ArgNo < Node->getNumOperands()) 700 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 701 702 Type *Ty = Arg.getType(); 703 const DataLayout &DL = Func->getParent()->getDataLayout(); 704 705 MaybeAlign PointeeAlign; 706 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 707 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 708 PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(), 709 PtrTy->getElementType()); 710 } 711 } 712 713 emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(), 714 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset, 715 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual, 716 TypeQual); 717 } 718 719 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty, 720 StringRef ValueKind, unsigned &Offset, 721 msgpack::ArrayDocNode Args, 722 MaybeAlign PointeeAlign, StringRef Name, 723 StringRef TypeName, 724 StringRef BaseTypeName, 725 StringRef AccQual, StringRef TypeQual) { 726 auto Arg = Args.getDocument()->getMapNode(); 727 728 if (!Name.empty()) 729 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true); 730 if (!TypeName.empty()) 731 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true); 732 auto Size = DL.getTypeAllocSize(Ty); 733 Align Alignment = DL.getABITypeAlign(Ty); 734 Arg[".size"] = Arg.getDocument()->getNode(Size); 735 Offset = alignTo(Offset, Alignment); 736 Arg[".offset"] = Arg.getDocument()->getNode(Offset); 737 Offset += Size; 738 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true); 739 if (PointeeAlign) 740 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value()); 741 742 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 743 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) 744 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true); 745 746 if (auto AQ = getAccessQualifier(AccQual)) 747 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true); 748 749 // TODO: Emit Arg[".actual_access"]. 750 751 SmallVector<StringRef, 1> SplitTypeQuals; 752 TypeQual.split(SplitTypeQuals, " ", -1, false); 753 for (StringRef Key : SplitTypeQuals) { 754 if (Key == "const") 755 Arg[".is_const"] = Arg.getDocument()->getNode(true); 756 else if (Key == "restrict") 757 Arg[".is_restrict"] = Arg.getDocument()->getNode(true); 758 else if (Key == "volatile") 759 Arg[".is_volatile"] = Arg.getDocument()->getNode(true); 760 else if (Key == "pipe") 761 Arg[".is_pipe"] = Arg.getDocument()->getNode(true); 762 } 763 764 Args.push_back(Arg); 765 } 766 767 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func, 768 unsigned &Offset, 769 msgpack::ArrayDocNode Args) { 770 int HiddenArgNumBytes = 771 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); 772 773 if (!HiddenArgNumBytes) 774 return; 775 776 auto &DL = Func.getParent()->getDataLayout(); 777 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 778 779 if (HiddenArgNumBytes >= 8) 780 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args); 781 if (HiddenArgNumBytes >= 16) 782 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args); 783 if (HiddenArgNumBytes >= 24) 784 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args); 785 786 auto Int8PtrTy = 787 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 788 789 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 790 // "none" argument. 791 if (HiddenArgNumBytes >= 32) { 792 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 793 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args); 794 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) { 795 // The printf runtime binding pass should have ensured that hostcall and 796 // printf are not used in the same module. 797 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts")); 798 emitKernelArg(DL, Int8PtrTy, "hidden_hostcall_buffer", Offset, Args); 799 } else 800 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); 801 } 802 803 // Emit "default queue" and "completion action" arguments if enqueue kernel is 804 // used, otherwise emit dummy "none" arguments. 805 if (HiddenArgNumBytes >= 48) { 806 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 807 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args); 808 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args); 809 } else { 810 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); 811 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); 812 } 813 } 814 815 // Emit the pointer argument for multi-grid object. 816 if (HiddenArgNumBytes >= 56) 817 emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args); 818 } 819 820 msgpack::MapDocNode 821 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, 822 const SIProgramInfo &ProgramInfo) const { 823 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 824 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 825 const Function &F = MF.getFunction(); 826 827 auto Kern = HSAMetadataDoc->getMapNode(); 828 829 Align MaxKernArgAlign; 830 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode( 831 STM.getKernArgSegmentSize(F, MaxKernArgAlign)); 832 Kern[".group_segment_fixed_size"] = 833 Kern.getDocument()->getNode(ProgramInfo.LDSSize); 834 Kern[".private_segment_fixed_size"] = 835 Kern.getDocument()->getNode(ProgramInfo.ScratchSize); 836 Kern[".kernarg_segment_align"] = 837 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value()); 838 Kern[".wavefront_size"] = 839 Kern.getDocument()->getNode(STM.getWavefrontSize()); 840 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR); 841 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR); 842 Kern[".max_flat_workgroup_size"] = 843 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); 844 Kern[".sgpr_spill_count"] = 845 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); 846 Kern[".vgpr_spill_count"] = 847 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs()); 848 849 return Kern; 850 } 851 852 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 853 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); 854 } 855 856 void MetadataStreamerV3::begin(const Module &Mod) { 857 emitVersion(); 858 emitPrintf(Mod); 859 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); 860 } 861 862 void MetadataStreamerV3::end() { 863 std::string HSAMetadataString; 864 raw_string_ostream StrOS(HSAMetadataString); 865 HSAMetadataDoc->toYAML(StrOS); 866 867 if (DumpHSAMetadata) 868 dump(StrOS.str()); 869 if (VerifyHSAMetadata) 870 verify(StrOS.str()); 871 } 872 873 void MetadataStreamerV3::emitKernel(const MachineFunction &MF, 874 const SIProgramInfo &ProgramInfo) { 875 auto &Func = MF.getFunction(); 876 auto Kern = getHSAKernelProps(MF, ProgramInfo); 877 878 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || 879 Func.getCallingConv() == CallingConv::SPIR_KERNEL); 880 881 auto Kernels = 882 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true); 883 884 { 885 Kern[".name"] = Kern.getDocument()->getNode(Func.getName()); 886 Kern[".symbol"] = Kern.getDocument()->getNode( 887 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true); 888 emitKernelLanguage(Func, Kern); 889 emitKernelAttrs(Func, Kern); 890 emitKernelArgs(Func, Kern); 891 } 892 893 Kernels.push_back(Kern); 894 } 895 896 } // end namespace HSAMD 897 } // end namespace AMDGPU 898 } // end namespace llvm 899