1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// AMDGPU HSA Metadata Streamer. 11 /// 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUHSAMetadataStreamer.h" 16 #include "AMDGPU.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "SIProgramInfo.h" 21 #include "llvm/IR/Module.h" 22 using namespace llvm; 23 24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg, 25 const DataLayout &DL) { 26 Type *Ty = Arg.getType(); 27 MaybeAlign ArgAlign; 28 if (Arg.hasByRefAttr()) { 29 Ty = Arg.getParamByRefType(); 30 ArgAlign = Arg.getParamAlign(); 31 } 32 33 if (!ArgAlign) 34 ArgAlign = DL.getABITypeAlign(Ty); 35 36 return std::pair(Ty, *ArgAlign); 37 } 38 39 namespace llvm { 40 41 static cl::opt<bool> DumpHSAMetadata( 42 "amdgpu-dump-hsa-metadata", 43 cl::desc("Dump AMDGPU HSA Metadata")); 44 static cl::opt<bool> VerifyHSAMetadata( 45 "amdgpu-verify-hsa-metadata", 46 cl::desc("Verify AMDGPU HSA Metadata")); 47 48 namespace AMDGPU { 49 namespace HSAMD { 50 51 //===----------------------------------------------------------------------===// 52 // HSAMetadataStreamerV2 53 //===----------------------------------------------------------------------===// 54 void MetadataStreamerYamlV2::dump(StringRef HSAMetadataString) const { 55 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 56 } 57 58 void MetadataStreamerYamlV2::verify(StringRef HSAMetadataString) const { 59 errs() << "AMDGPU HSA Metadata Parser Test: "; 60 61 HSAMD::Metadata FromHSAMetadataString; 62 if (fromString(HSAMetadataString, FromHSAMetadataString)) { 63 errs() << "FAIL\n"; 64 return; 65 } 66 67 std::string ToHSAMetadataString; 68 if (toString(FromHSAMetadataString, ToHSAMetadataString)) { 69 errs() << "FAIL\n"; 70 return; 71 } 72 73 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") 74 << '\n'; 75 if (HSAMetadataString != ToHSAMetadataString) { 76 errs() << "Original input: " << HSAMetadataString << '\n' 77 << "Produced output: " << ToHSAMetadataString << '\n'; 78 } 79 } 80 81 AccessQualifier 82 MetadataStreamerYamlV2::getAccessQualifier(StringRef AccQual) const { 83 if (AccQual.empty()) 84 return AccessQualifier::Unknown; 85 86 return StringSwitch<AccessQualifier>(AccQual) 87 .Case("read_only", AccessQualifier::ReadOnly) 88 .Case("write_only", AccessQualifier::WriteOnly) 89 .Case("read_write", AccessQualifier::ReadWrite) 90 .Default(AccessQualifier::Default); 91 } 92 93 AddressSpaceQualifier 94 MetadataStreamerYamlV2::getAddressSpaceQualifier(unsigned AddressSpace) const { 95 switch (AddressSpace) { 96 case AMDGPUAS::PRIVATE_ADDRESS: 97 return AddressSpaceQualifier::Private; 98 case AMDGPUAS::GLOBAL_ADDRESS: 99 return AddressSpaceQualifier::Global; 100 case AMDGPUAS::CONSTANT_ADDRESS: 101 return AddressSpaceQualifier::Constant; 102 case AMDGPUAS::LOCAL_ADDRESS: 103 return AddressSpaceQualifier::Local; 104 case AMDGPUAS::FLAT_ADDRESS: 105 return AddressSpaceQualifier::Generic; 106 case AMDGPUAS::REGION_ADDRESS: 107 return AddressSpaceQualifier::Region; 108 default: 109 return AddressSpaceQualifier::Unknown; 110 } 111 } 112 113 ValueKind MetadataStreamerYamlV2::getValueKind(Type *Ty, StringRef TypeQual, 114 StringRef BaseTypeName) const { 115 if (TypeQual.contains("pipe")) 116 return ValueKind::Pipe; 117 118 return StringSwitch<ValueKind>(BaseTypeName) 119 .Case("image1d_t", ValueKind::Image) 120 .Case("image1d_array_t", ValueKind::Image) 121 .Case("image1d_buffer_t", ValueKind::Image) 122 .Case("image2d_t", ValueKind::Image) 123 .Case("image2d_array_t", ValueKind::Image) 124 .Case("image2d_array_depth_t", ValueKind::Image) 125 .Case("image2d_array_msaa_t", ValueKind::Image) 126 .Case("image2d_array_msaa_depth_t", ValueKind::Image) 127 .Case("image2d_depth_t", ValueKind::Image) 128 .Case("image2d_msaa_t", ValueKind::Image) 129 .Case("image2d_msaa_depth_t", ValueKind::Image) 130 .Case("image3d_t", ValueKind::Image) 131 .Case("sampler_t", ValueKind::Sampler) 132 .Case("queue_t", ValueKind::Queue) 133 .Default(isa<PointerType>(Ty) ? 134 (Ty->getPointerAddressSpace() == 135 AMDGPUAS::LOCAL_ADDRESS ? 136 ValueKind::DynamicSharedPointer : 137 ValueKind::GlobalBuffer) : 138 ValueKind::ByValue); 139 } 140 141 std::string MetadataStreamerYamlV2::getTypeName(Type *Ty, bool Signed) const { 142 switch (Ty->getTypeID()) { 143 case Type::IntegerTyID: { 144 if (!Signed) 145 return (Twine('u') + getTypeName(Ty, true)).str(); 146 147 auto BitWidth = Ty->getIntegerBitWidth(); 148 switch (BitWidth) { 149 case 8: 150 return "char"; 151 case 16: 152 return "short"; 153 case 32: 154 return "int"; 155 case 64: 156 return "long"; 157 default: 158 return (Twine('i') + Twine(BitWidth)).str(); 159 } 160 } 161 case Type::HalfTyID: 162 return "half"; 163 case Type::FloatTyID: 164 return "float"; 165 case Type::DoubleTyID: 166 return "double"; 167 case Type::FixedVectorTyID: { 168 auto VecTy = cast<FixedVectorType>(Ty); 169 auto ElTy = VecTy->getElementType(); 170 auto NumElements = VecTy->getNumElements(); 171 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 172 } 173 default: 174 return "unknown"; 175 } 176 } 177 178 std::vector<uint32_t> 179 MetadataStreamerYamlV2::getWorkGroupDimensions(MDNode *Node) const { 180 std::vector<uint32_t> Dims; 181 if (Node->getNumOperands() != 3) 182 return Dims; 183 184 for (auto &Op : Node->operands()) 185 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); 186 return Dims; 187 } 188 189 Kernel::CodeProps::Metadata MetadataStreamerYamlV2::getHSACodeProps( 190 const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { 191 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 192 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 193 HSAMD::Kernel::CodeProps::Metadata HSACodeProps; 194 const Function &F = MF.getFunction(); 195 196 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || 197 F.getCallingConv() == CallingConv::SPIR_KERNEL); 198 199 Align MaxKernArgAlign; 200 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, 201 MaxKernArgAlign); 202 HSACodeProps.mKernargSegmentAlign = 203 std::max(MaxKernArgAlign, Align(4)).value(); 204 205 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; 206 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; 207 HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); 208 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; 209 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; 210 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); 211 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; 212 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); 213 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); 214 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); 215 216 return HSACodeProps; 217 } 218 219 Kernel::DebugProps::Metadata MetadataStreamerYamlV2::getHSADebugProps( 220 const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { 221 return HSAMD::Kernel::DebugProps::Metadata(); 222 } 223 224 void MetadataStreamerYamlV2::emitVersion() { 225 auto &Version = HSAMetadata.mVersion; 226 227 Version.push_back(VersionMajorV2); 228 Version.push_back(VersionMinorV2); 229 } 230 231 void MetadataStreamerYamlV2::emitPrintf(const Module &Mod) { 232 auto &Printf = HSAMetadata.mPrintf; 233 234 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 235 if (!Node) 236 return; 237 238 for (auto *Op : Node->operands()) 239 if (Op->getNumOperands()) 240 Printf.push_back( 241 std::string(cast<MDString>(Op->getOperand(0))->getString())); 242 } 243 244 void MetadataStreamerYamlV2::emitKernelLanguage(const Function &Func) { 245 auto &Kernel = HSAMetadata.mKernels.back(); 246 247 // TODO: What about other languages? 248 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 249 if (!Node || !Node->getNumOperands()) 250 return; 251 auto Op0 = Node->getOperand(0); 252 if (Op0->getNumOperands() <= 1) 253 return; 254 255 Kernel.mLanguage = "OpenCL C"; 256 Kernel.mLanguageVersion.push_back( 257 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); 258 Kernel.mLanguageVersion.push_back( 259 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); 260 } 261 262 void MetadataStreamerYamlV2::emitKernelAttrs(const Function &Func) { 263 auto &Attrs = HSAMetadata.mKernels.back().mAttrs; 264 265 if (auto Node = Func.getMetadata("reqd_work_group_size")) 266 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); 267 if (auto Node = Func.getMetadata("work_group_size_hint")) 268 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); 269 if (auto Node = Func.getMetadata("vec_type_hint")) { 270 Attrs.mVecTypeHint = getTypeName( 271 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 272 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); 273 } 274 if (Func.hasFnAttribute("runtime-handle")) { 275 Attrs.mRuntimeHandle = 276 Func.getFnAttribute("runtime-handle").getValueAsString().str(); 277 } 278 } 279 280 void MetadataStreamerYamlV2::emitKernelArgs(const Function &Func, 281 const GCNSubtarget &ST) { 282 for (auto &Arg : Func.args()) 283 emitKernelArg(Arg); 284 285 emitHiddenKernelArgs(Func, ST); 286 } 287 288 void MetadataStreamerYamlV2::emitKernelArg(const Argument &Arg) { 289 auto Func = Arg.getParent(); 290 auto ArgNo = Arg.getArgNo(); 291 const MDNode *Node; 292 293 StringRef Name; 294 Node = Func->getMetadata("kernel_arg_name"); 295 if (Node && ArgNo < Node->getNumOperands()) 296 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 297 else if (Arg.hasName()) 298 Name = Arg.getName(); 299 300 StringRef TypeName; 301 Node = Func->getMetadata("kernel_arg_type"); 302 if (Node && ArgNo < Node->getNumOperands()) 303 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 304 305 StringRef BaseTypeName; 306 Node = Func->getMetadata("kernel_arg_base_type"); 307 if (Node && ArgNo < Node->getNumOperands()) 308 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 309 310 StringRef AccQual; 311 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 312 Arg.hasNoAliasAttr()) { 313 AccQual = "read_only"; 314 } else { 315 Node = Func->getMetadata("kernel_arg_access_qual"); 316 if (Node && ArgNo < Node->getNumOperands()) 317 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 318 } 319 320 StringRef TypeQual; 321 Node = Func->getMetadata("kernel_arg_type_qual"); 322 if (Node && ArgNo < Node->getNumOperands()) 323 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 324 325 const DataLayout &DL = Func->getParent()->getDataLayout(); 326 327 MaybeAlign PointeeAlign; 328 if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) { 329 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 330 // FIXME: Should report this for all address spaces 331 PointeeAlign = Arg.getParamAlign().valueOrOne(); 332 } 333 } 334 335 Type *ArgTy; 336 Align ArgAlign; 337 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL); 338 339 emitKernelArg(DL, ArgTy, ArgAlign, 340 getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name, 341 TypeName, BaseTypeName, AccQual, TypeQual); 342 } 343 344 void MetadataStreamerYamlV2::emitKernelArg( 345 const DataLayout &DL, Type *Ty, Align Alignment, ValueKind ValueKind, 346 MaybeAlign PointeeAlign, StringRef Name, StringRef TypeName, 347 StringRef BaseTypeName, StringRef AccQual, StringRef TypeQual) { 348 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); 349 auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); 350 351 Arg.mName = std::string(Name); 352 Arg.mTypeName = std::string(TypeName); 353 Arg.mSize = DL.getTypeAllocSize(Ty); 354 Arg.mAlign = Alignment.value(); 355 Arg.mValueKind = ValueKind; 356 Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0; 357 358 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 359 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); 360 361 Arg.mAccQual = getAccessQualifier(AccQual); 362 363 // TODO: Emit Arg.mActualAccQual. 364 365 SmallVector<StringRef, 1> SplitTypeQuals; 366 TypeQual.split(SplitTypeQuals, " ", -1, false); 367 for (StringRef Key : SplitTypeQuals) { 368 auto P = StringSwitch<bool*>(Key) 369 .Case("const", &Arg.mIsConst) 370 .Case("restrict", &Arg.mIsRestrict) 371 .Case("volatile", &Arg.mIsVolatile) 372 .Case("pipe", &Arg.mIsPipe) 373 .Default(nullptr); 374 if (P) 375 *P = true; 376 } 377 } 378 379 void MetadataStreamerYamlV2::emitHiddenKernelArgs(const Function &Func, 380 const GCNSubtarget &ST) { 381 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func); 382 if (!HiddenArgNumBytes) 383 return; 384 385 auto &DL = Func.getParent()->getDataLayout(); 386 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 387 388 if (HiddenArgNumBytes >= 8) 389 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX); 390 if (HiddenArgNumBytes >= 16) 391 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY); 392 if (HiddenArgNumBytes >= 24) 393 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ); 394 395 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), 396 AMDGPUAS::GLOBAL_ADDRESS); 397 398 if (HiddenArgNumBytes >= 32) { 399 // We forbid the use of features requiring hostcall when compiling OpenCL 400 // before code object V5, which makes the mutual exclusion between the 401 // "printf buffer" and "hostcall buffer" here sound. 402 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 403 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer); 404 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) 405 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer); 406 else 407 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 408 } 409 410 // Emit "default queue" and "completion action" arguments if enqueue kernel is 411 // used, otherwise emit dummy "none" arguments. 412 if (HiddenArgNumBytes >= 40) { 413 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { 414 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue); 415 } else { 416 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 417 } 418 } 419 420 if (HiddenArgNumBytes >= 48) { 421 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) { 422 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction); 423 } else { 424 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 425 } 426 } 427 428 // Emit the pointer argument for multi-grid object. 429 if (HiddenArgNumBytes >= 56) { 430 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) 431 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg); 432 else 433 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 434 } 435 } 436 437 bool MetadataStreamerYamlV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 438 return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); 439 } 440 441 void MetadataStreamerYamlV2::begin(const Module &Mod, 442 const IsaInfo::AMDGPUTargetID &TargetID) { 443 emitVersion(); 444 emitPrintf(Mod); 445 } 446 447 void MetadataStreamerYamlV2::end() { 448 std::string HSAMetadataString; 449 if (toString(HSAMetadata, HSAMetadataString)) 450 return; 451 452 if (DumpHSAMetadata) 453 dump(HSAMetadataString); 454 if (VerifyHSAMetadata) 455 verify(HSAMetadataString); 456 } 457 458 void MetadataStreamerYamlV2::emitKernel(const MachineFunction &MF, 459 const SIProgramInfo &ProgramInfo) { 460 auto &Func = MF.getFunction(); 461 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) 462 return; 463 464 auto CodeProps = getHSACodeProps(MF, ProgramInfo); 465 auto DebugProps = getHSADebugProps(MF, ProgramInfo); 466 467 HSAMetadata.mKernels.push_back(Kernel::Metadata()); 468 auto &Kernel = HSAMetadata.mKernels.back(); 469 470 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 471 Kernel.mName = std::string(Func.getName()); 472 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); 473 emitKernelLanguage(Func); 474 emitKernelAttrs(Func); 475 emitKernelArgs(Func, ST); 476 HSAMetadata.mKernels.back().mCodeProps = CodeProps; 477 HSAMetadata.mKernels.back().mDebugProps = DebugProps; 478 } 479 480 //===----------------------------------------------------------------------===// 481 // HSAMetadataStreamerV3 482 //===----------------------------------------------------------------------===// 483 484 void MetadataStreamerMsgPackV3::dump(StringRef HSAMetadataString) const { 485 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 486 } 487 488 void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const { 489 errs() << "AMDGPU HSA Metadata Parser Test: "; 490 491 msgpack::Document FromHSAMetadataString; 492 493 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) { 494 errs() << "FAIL\n"; 495 return; 496 } 497 498 std::string ToHSAMetadataString; 499 raw_string_ostream StrOS(ToHSAMetadataString); 500 FromHSAMetadataString.toYAML(StrOS); 501 502 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; 503 if (HSAMetadataString != ToHSAMetadataString) { 504 errs() << "Original input: " << HSAMetadataString << '\n' 505 << "Produced output: " << StrOS.str() << '\n'; 506 } 507 } 508 509 std::optional<StringRef> 510 MetadataStreamerMsgPackV3::getAccessQualifier(StringRef AccQual) const { 511 return StringSwitch<std::optional<StringRef>>(AccQual) 512 .Case("read_only", StringRef("read_only")) 513 .Case("write_only", StringRef("write_only")) 514 .Case("read_write", StringRef("read_write")) 515 .Default(std::nullopt); 516 } 517 518 std::optional<StringRef> MetadataStreamerMsgPackV3::getAddressSpaceQualifier( 519 unsigned AddressSpace) const { 520 switch (AddressSpace) { 521 case AMDGPUAS::PRIVATE_ADDRESS: 522 return StringRef("private"); 523 case AMDGPUAS::GLOBAL_ADDRESS: 524 return StringRef("global"); 525 case AMDGPUAS::CONSTANT_ADDRESS: 526 return StringRef("constant"); 527 case AMDGPUAS::LOCAL_ADDRESS: 528 return StringRef("local"); 529 case AMDGPUAS::FLAT_ADDRESS: 530 return StringRef("generic"); 531 case AMDGPUAS::REGION_ADDRESS: 532 return StringRef("region"); 533 default: 534 return std::nullopt; 535 } 536 } 537 538 StringRef 539 MetadataStreamerMsgPackV3::getValueKind(Type *Ty, StringRef TypeQual, 540 StringRef BaseTypeName) const { 541 if (TypeQual.contains("pipe")) 542 return "pipe"; 543 544 return StringSwitch<StringRef>(BaseTypeName) 545 .Case("image1d_t", "image") 546 .Case("image1d_array_t", "image") 547 .Case("image1d_buffer_t", "image") 548 .Case("image2d_t", "image") 549 .Case("image2d_array_t", "image") 550 .Case("image2d_array_depth_t", "image") 551 .Case("image2d_array_msaa_t", "image") 552 .Case("image2d_array_msaa_depth_t", "image") 553 .Case("image2d_depth_t", "image") 554 .Case("image2d_msaa_t", "image") 555 .Case("image2d_msaa_depth_t", "image") 556 .Case("image3d_t", "image") 557 .Case("sampler_t", "sampler") 558 .Case("queue_t", "queue") 559 .Default(isa<PointerType>(Ty) 560 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS 561 ? "dynamic_shared_pointer" 562 : "global_buffer") 563 : "by_value"); 564 } 565 566 std::string MetadataStreamerMsgPackV3::getTypeName(Type *Ty, 567 bool Signed) const { 568 switch (Ty->getTypeID()) { 569 case Type::IntegerTyID: { 570 if (!Signed) 571 return (Twine('u') + getTypeName(Ty, true)).str(); 572 573 auto BitWidth = Ty->getIntegerBitWidth(); 574 switch (BitWidth) { 575 case 8: 576 return "char"; 577 case 16: 578 return "short"; 579 case 32: 580 return "int"; 581 case 64: 582 return "long"; 583 default: 584 return (Twine('i') + Twine(BitWidth)).str(); 585 } 586 } 587 case Type::HalfTyID: 588 return "half"; 589 case Type::FloatTyID: 590 return "float"; 591 case Type::DoubleTyID: 592 return "double"; 593 case Type::FixedVectorTyID: { 594 auto VecTy = cast<FixedVectorType>(Ty); 595 auto ElTy = VecTy->getElementType(); 596 auto NumElements = VecTy->getNumElements(); 597 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 598 } 599 default: 600 return "unknown"; 601 } 602 } 603 604 msgpack::ArrayDocNode 605 MetadataStreamerMsgPackV3::getWorkGroupDimensions(MDNode *Node) const { 606 auto Dims = HSAMetadataDoc->getArrayNode(); 607 if (Node->getNumOperands() != 3) 608 return Dims; 609 610 for (auto &Op : Node->operands()) 611 Dims.push_back(Dims.getDocument()->getNode( 612 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue()))); 613 return Dims; 614 } 615 616 void MetadataStreamerMsgPackV3::emitVersion() { 617 auto Version = HSAMetadataDoc->getArrayNode(); 618 Version.push_back(Version.getDocument()->getNode(VersionMajorV3)); 619 Version.push_back(Version.getDocument()->getNode(VersionMinorV3)); 620 getRootMetadata("amdhsa.version") = Version; 621 } 622 623 void MetadataStreamerMsgPackV3::emitPrintf(const Module &Mod) { 624 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 625 if (!Node) 626 return; 627 628 auto Printf = HSAMetadataDoc->getArrayNode(); 629 for (auto *Op : Node->operands()) 630 if (Op->getNumOperands()) 631 Printf.push_back(Printf.getDocument()->getNode( 632 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true)); 633 getRootMetadata("amdhsa.printf") = Printf; 634 } 635 636 void MetadataStreamerMsgPackV3::emitKernelLanguage(const Function &Func, 637 msgpack::MapDocNode Kern) { 638 // TODO: What about other languages? 639 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 640 if (!Node || !Node->getNumOperands()) 641 return; 642 auto Op0 = Node->getOperand(0); 643 if (Op0->getNumOperands() <= 1) 644 return; 645 646 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C"); 647 auto LanguageVersion = Kern.getDocument()->getArrayNode(); 648 LanguageVersion.push_back(Kern.getDocument()->getNode( 649 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); 650 LanguageVersion.push_back(Kern.getDocument()->getNode( 651 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); 652 Kern[".language_version"] = LanguageVersion; 653 } 654 655 void MetadataStreamerMsgPackV3::emitKernelAttrs(const Function &Func, 656 msgpack::MapDocNode Kern) { 657 658 if (auto Node = Func.getMetadata("reqd_work_group_size")) 659 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); 660 if (auto Node = Func.getMetadata("work_group_size_hint")) 661 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); 662 if (auto Node = Func.getMetadata("vec_type_hint")) { 663 Kern[".vec_type_hint"] = Kern.getDocument()->getNode( 664 getTypeName( 665 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 666 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()), 667 /*Copy=*/true); 668 } 669 if (Func.hasFnAttribute("runtime-handle")) { 670 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode( 671 Func.getFnAttribute("runtime-handle").getValueAsString().str(), 672 /*Copy=*/true); 673 } 674 if (Func.hasFnAttribute("device-init")) 675 Kern[".kind"] = Kern.getDocument()->getNode("init"); 676 else if (Func.hasFnAttribute("device-fini")) 677 Kern[".kind"] = Kern.getDocument()->getNode("fini"); 678 } 679 680 void MetadataStreamerMsgPackV3::emitKernelArgs(const MachineFunction &MF, 681 msgpack::MapDocNode Kern) { 682 auto &Func = MF.getFunction(); 683 unsigned Offset = 0; 684 auto Args = HSAMetadataDoc->getArrayNode(); 685 for (auto &Arg : Func.args()) 686 emitKernelArg(Arg, Offset, Args); 687 688 emitHiddenKernelArgs(MF, Offset, Args); 689 690 Kern[".args"] = Args; 691 } 692 693 void MetadataStreamerMsgPackV3::emitKernelArg(const Argument &Arg, 694 unsigned &Offset, 695 msgpack::ArrayDocNode Args) { 696 auto Func = Arg.getParent(); 697 auto ArgNo = Arg.getArgNo(); 698 const MDNode *Node; 699 700 StringRef Name; 701 Node = Func->getMetadata("kernel_arg_name"); 702 if (Node && ArgNo < Node->getNumOperands()) 703 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 704 else if (Arg.hasName()) 705 Name = Arg.getName(); 706 707 StringRef TypeName; 708 Node = Func->getMetadata("kernel_arg_type"); 709 if (Node && ArgNo < Node->getNumOperands()) 710 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 711 712 StringRef BaseTypeName; 713 Node = Func->getMetadata("kernel_arg_base_type"); 714 if (Node && ArgNo < Node->getNumOperands()) 715 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 716 717 StringRef AccQual; 718 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 719 Arg.hasNoAliasAttr()) { 720 AccQual = "read_only"; 721 } else { 722 Node = Func->getMetadata("kernel_arg_access_qual"); 723 if (Node && ArgNo < Node->getNumOperands()) 724 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 725 } 726 727 StringRef TypeQual; 728 Node = Func->getMetadata("kernel_arg_type_qual"); 729 if (Node && ArgNo < Node->getNumOperands()) 730 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 731 732 const DataLayout &DL = Func->getParent()->getDataLayout(); 733 734 MaybeAlign PointeeAlign; 735 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType(); 736 737 // FIXME: Need to distinguish in memory alignment from pointer alignment. 738 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 739 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) 740 PointeeAlign = Arg.getParamAlign().valueOrOne(); 741 } 742 743 // There's no distinction between byval aggregates and raw aggregates. 744 Type *ArgTy; 745 Align ArgAlign; 746 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL); 747 748 emitKernelArg(DL, ArgTy, ArgAlign, 749 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args, 750 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); 751 } 752 753 void MetadataStreamerMsgPackV3::emitKernelArg( 754 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind, 755 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign, 756 StringRef Name, StringRef TypeName, StringRef BaseTypeName, 757 StringRef AccQual, StringRef TypeQual) { 758 auto Arg = Args.getDocument()->getMapNode(); 759 760 if (!Name.empty()) 761 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true); 762 if (!TypeName.empty()) 763 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true); 764 auto Size = DL.getTypeAllocSize(Ty); 765 Arg[".size"] = Arg.getDocument()->getNode(Size); 766 Offset = alignTo(Offset, Alignment); 767 Arg[".offset"] = Arg.getDocument()->getNode(Offset); 768 Offset += Size; 769 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true); 770 if (PointeeAlign) 771 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value()); 772 773 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 774 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) 775 // Limiting address space to emit only for a certain ValueKind. 776 if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer") 777 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, 778 /*Copy=*/true); 779 780 if (auto AQ = getAccessQualifier(AccQual)) 781 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true); 782 783 // TODO: Emit Arg[".actual_access"]. 784 785 SmallVector<StringRef, 1> SplitTypeQuals; 786 TypeQual.split(SplitTypeQuals, " ", -1, false); 787 for (StringRef Key : SplitTypeQuals) { 788 if (Key == "const") 789 Arg[".is_const"] = Arg.getDocument()->getNode(true); 790 else if (Key == "restrict") 791 Arg[".is_restrict"] = Arg.getDocument()->getNode(true); 792 else if (Key == "volatile") 793 Arg[".is_volatile"] = Arg.getDocument()->getNode(true); 794 else if (Key == "pipe") 795 Arg[".is_pipe"] = Arg.getDocument()->getNode(true); 796 } 797 798 Args.push_back(Arg); 799 } 800 801 void MetadataStreamerMsgPackV3::emitHiddenKernelArgs( 802 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { 803 auto &Func = MF.getFunction(); 804 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 805 806 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func); 807 if (!HiddenArgNumBytes) 808 return; 809 810 const Module *M = Func.getParent(); 811 auto &DL = M->getDataLayout(); 812 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 813 814 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); 815 816 if (HiddenArgNumBytes >= 8) 817 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, 818 Args); 819 if (HiddenArgNumBytes >= 16) 820 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, 821 Args); 822 if (HiddenArgNumBytes >= 24) 823 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, 824 Args); 825 826 auto Int8PtrTy = 827 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 828 829 if (HiddenArgNumBytes >= 32) { 830 // We forbid the use of features requiring hostcall when compiling OpenCL 831 // before code object V5, which makes the mutual exclusion between the 832 // "printf buffer" and "hostcall buffer" here sound. 833 if (M->getNamedMetadata("llvm.printf.fmts")) 834 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, 835 Args); 836 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) 837 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, 838 Args); 839 else 840 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 841 } 842 843 // Emit "default queue" and "completion action" arguments if enqueue kernel is 844 // used, otherwise emit dummy "none" arguments. 845 if (HiddenArgNumBytes >= 40) { 846 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { 847 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, 848 Args); 849 } else { 850 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 851 } 852 } 853 854 if (HiddenArgNumBytes >= 48) { 855 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) { 856 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, 857 Args); 858 } else { 859 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 860 } 861 } 862 863 // Emit the pointer argument for multi-grid object. 864 if (HiddenArgNumBytes >= 56) { 865 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) { 866 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, 867 Args); 868 } else { 869 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 870 } 871 } 872 } 873 874 msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps( 875 const MachineFunction &MF, const SIProgramInfo &ProgramInfo, 876 unsigned CodeObjectVersion) const { 877 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 878 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 879 const Function &F = MF.getFunction(); 880 881 auto Kern = HSAMetadataDoc->getMapNode(); 882 883 Align MaxKernArgAlign; 884 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode( 885 STM.getKernArgSegmentSize(F, MaxKernArgAlign)); 886 Kern[".group_segment_fixed_size"] = 887 Kern.getDocument()->getNode(ProgramInfo.LDSSize); 888 Kern[".private_segment_fixed_size"] = 889 Kern.getDocument()->getNode(ProgramInfo.ScratchSize); 890 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) 891 Kern[".uses_dynamic_stack"] = 892 Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack); 893 894 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP()) 895 Kern[".workgroup_processor_mode"] = 896 Kern.getDocument()->getNode(ProgramInfo.WgpMode); 897 898 // FIXME: The metadata treats the minimum as 16? 899 Kern[".kernarg_segment_align"] = 900 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value()); 901 Kern[".wavefront_size"] = 902 Kern.getDocument()->getNode(STM.getWavefrontSize()); 903 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR); 904 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR); 905 906 // Only add AGPR count to metadata for supported devices 907 if (STM.hasMAIInsts()) { 908 Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR); 909 } 910 911 Kern[".max_flat_workgroup_size"] = 912 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); 913 Kern[".sgpr_spill_count"] = 914 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); 915 Kern[".vgpr_spill_count"] = 916 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs()); 917 918 return Kern; 919 } 920 921 bool MetadataStreamerMsgPackV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 922 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); 923 } 924 925 void MetadataStreamerMsgPackV3::begin(const Module &Mod, 926 const IsaInfo::AMDGPUTargetID &TargetID) { 927 emitVersion(); 928 emitPrintf(Mod); 929 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); 930 } 931 932 void MetadataStreamerMsgPackV3::end() { 933 std::string HSAMetadataString; 934 raw_string_ostream StrOS(HSAMetadataString); 935 HSAMetadataDoc->toYAML(StrOS); 936 937 if (DumpHSAMetadata) 938 dump(StrOS.str()); 939 if (VerifyHSAMetadata) 940 verify(StrOS.str()); 941 } 942 943 void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF, 944 const SIProgramInfo &ProgramInfo) { 945 auto &Func = MF.getFunction(); 946 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL && 947 Func.getCallingConv() != CallingConv::SPIR_KERNEL) 948 return; 949 950 auto CodeObjectVersion = AMDGPU::getCodeObjectVersion(*Func.getParent()); 951 auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion); 952 953 auto Kernels = 954 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true); 955 956 { 957 Kern[".name"] = Kern.getDocument()->getNode(Func.getName()); 958 Kern[".symbol"] = Kern.getDocument()->getNode( 959 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true); 960 emitKernelLanguage(Func, Kern); 961 emitKernelAttrs(Func, Kern); 962 emitKernelArgs(MF, Kern); 963 } 964 965 Kernels.push_back(Kern); 966 } 967 968 //===----------------------------------------------------------------------===// 969 // HSAMetadataStreamerV4 970 //===----------------------------------------------------------------------===// 971 972 void MetadataStreamerMsgPackV4::emitVersion() { 973 auto Version = HSAMetadataDoc->getArrayNode(); 974 Version.push_back(Version.getDocument()->getNode(VersionMajorV4)); 975 Version.push_back(Version.getDocument()->getNode(VersionMinorV4)); 976 getRootMetadata("amdhsa.version") = Version; 977 } 978 979 void MetadataStreamerMsgPackV4::emitTargetID( 980 const IsaInfo::AMDGPUTargetID &TargetID) { 981 getRootMetadata("amdhsa.target") = 982 HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true); 983 } 984 985 void MetadataStreamerMsgPackV4::begin(const Module &Mod, 986 const IsaInfo::AMDGPUTargetID &TargetID) { 987 emitVersion(); 988 emitTargetID(TargetID); 989 emitPrintf(Mod); 990 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); 991 } 992 993 //===----------------------------------------------------------------------===// 994 // HSAMetadataStreamerV5 995 //===----------------------------------------------------------------------===// 996 997 void MetadataStreamerMsgPackV5::emitVersion() { 998 auto Version = HSAMetadataDoc->getArrayNode(); 999 Version.push_back(Version.getDocument()->getNode(VersionMajorV5)); 1000 Version.push_back(Version.getDocument()->getNode(VersionMinorV5)); 1001 getRootMetadata("amdhsa.version") = Version; 1002 } 1003 1004 void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( 1005 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { 1006 auto &Func = MF.getFunction(); 1007 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1008 1009 // No implicit kernel argument is used. 1010 if (ST.getImplicitArgNumBytes(Func) == 0) 1011 return; 1012 1013 const Module *M = Func.getParent(); 1014 auto &DL = M->getDataLayout(); 1015 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 1016 1017 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 1018 auto Int32Ty = Type::getInt32Ty(Func.getContext()); 1019 auto Int16Ty = Type::getInt16Ty(Func.getContext()); 1020 1021 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); 1022 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args); 1023 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args); 1024 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args); 1025 1026 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args); 1027 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args); 1028 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args); 1029 1030 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args); 1031 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args); 1032 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args); 1033 1034 // Reserved for hidden_tool_correlation_id. 1035 Offset += 8; 1036 1037 Offset += 8; // Reserved. 1038 1039 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args); 1040 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args); 1041 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args); 1042 1043 emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args); 1044 1045 Offset += 6; // Reserved. 1046 auto Int8PtrTy = 1047 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 1048 1049 if (M->getNamedMetadata("llvm.printf.fmts")) { 1050 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, 1051 Args); 1052 } else { 1053 Offset += 8; // Skipped. 1054 } 1055 1056 if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) { 1057 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, 1058 Args); 1059 } else { 1060 Offset += 8; // Skipped. 1061 } 1062 1063 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) { 1064 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, 1065 Args); 1066 } else { 1067 Offset += 8; // Skipped. 1068 } 1069 1070 if (!Func.hasFnAttribute("amdgpu-no-heap-ptr")) 1071 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args); 1072 else 1073 Offset += 8; // Skipped. 1074 1075 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { 1076 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, 1077 Args); 1078 } else { 1079 Offset += 8; // Skipped. 1080 } 1081 1082 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) { 1083 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, 1084 Args); 1085 } else { 1086 Offset += 8; // Skipped. 1087 } 1088 1089 Offset += 72; // Reserved. 1090 1091 // hidden_private_base and hidden_shared_base are only when the subtarget has 1092 // ApertureRegs. 1093 if (!ST.hasApertureRegs()) { 1094 emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args); 1095 emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args); 1096 } else { 1097 Offset += 8; // Skipped. 1098 } 1099 1100 if (MFI.hasQueuePtr()) 1101 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args); 1102 } 1103 1104 void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func, 1105 msgpack::MapDocNode Kern) { 1106 MetadataStreamerMsgPackV3::emitKernelAttrs(Func, Kern); 1107 1108 if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool()) 1109 Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1); 1110 } 1111 1112 1113 } // end namespace HSAMD 1114 } // end namespace AMDGPU 1115 } // end namespace llvm 1116