1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// AMDGPU HSA Metadata Streamer. 11 /// 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUHSAMetadataStreamer.h" 16 #include "AMDGPU.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "SIProgramInfo.h" 21 #include "llvm/IR/Module.h" 22 using namespace llvm; 23 24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg, 25 const DataLayout &DL) { 26 Type *Ty = Arg.getType(); 27 MaybeAlign ArgAlign; 28 if (Arg.hasByRefAttr()) { 29 Ty = Arg.getParamByRefType(); 30 ArgAlign = Arg.getParamAlign(); 31 } 32 33 if (!ArgAlign) 34 ArgAlign = DL.getABITypeAlign(Ty); 35 36 return std::pair(Ty, *ArgAlign); 37 } 38 39 namespace llvm { 40 41 static cl::opt<bool> DumpHSAMetadata( 42 "amdgpu-dump-hsa-metadata", 43 cl::desc("Dump AMDGPU HSA Metadata")); 44 static cl::opt<bool> VerifyHSAMetadata( 45 "amdgpu-verify-hsa-metadata", 46 cl::desc("Verify AMDGPU HSA Metadata")); 47 48 namespace AMDGPU { 49 namespace HSAMD { 50 51 //===----------------------------------------------------------------------===// 52 // HSAMetadataStreamerV2 53 //===----------------------------------------------------------------------===// 54 void MetadataStreamerYamlV2::dump(StringRef HSAMetadataString) const { 55 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 56 } 57 58 void MetadataStreamerYamlV2::verify(StringRef HSAMetadataString) const { 59 errs() << "AMDGPU HSA Metadata Parser Test: "; 60 61 HSAMD::Metadata FromHSAMetadataString; 62 if (fromString(HSAMetadataString, FromHSAMetadataString)) { 63 errs() << "FAIL\n"; 64 return; 65 } 66 67 std::string ToHSAMetadataString; 68 if (toString(FromHSAMetadataString, ToHSAMetadataString)) { 69 errs() << "FAIL\n"; 70 return; 71 } 72 73 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") 74 << '\n'; 75 if (HSAMetadataString != ToHSAMetadataString) { 76 errs() << "Original input: " << HSAMetadataString << '\n' 77 << "Produced output: " << ToHSAMetadataString << '\n'; 78 } 79 } 80 81 AccessQualifier 82 MetadataStreamerYamlV2::getAccessQualifier(StringRef AccQual) const { 83 if (AccQual.empty()) 84 return AccessQualifier::Unknown; 85 86 return StringSwitch<AccessQualifier>(AccQual) 87 .Case("read_only", AccessQualifier::ReadOnly) 88 .Case("write_only", AccessQualifier::WriteOnly) 89 .Case("read_write", AccessQualifier::ReadWrite) 90 .Default(AccessQualifier::Default); 91 } 92 93 AddressSpaceQualifier 94 MetadataStreamerYamlV2::getAddressSpaceQualifier(unsigned AddressSpace) const { 95 switch (AddressSpace) { 96 case AMDGPUAS::PRIVATE_ADDRESS: 97 return AddressSpaceQualifier::Private; 98 case AMDGPUAS::GLOBAL_ADDRESS: 99 return AddressSpaceQualifier::Global; 100 case AMDGPUAS::CONSTANT_ADDRESS: 101 return AddressSpaceQualifier::Constant; 102 case AMDGPUAS::LOCAL_ADDRESS: 103 return AddressSpaceQualifier::Local; 104 case AMDGPUAS::FLAT_ADDRESS: 105 return AddressSpaceQualifier::Generic; 106 case AMDGPUAS::REGION_ADDRESS: 107 return AddressSpaceQualifier::Region; 108 default: 109 return AddressSpaceQualifier::Unknown; 110 } 111 } 112 113 ValueKind MetadataStreamerYamlV2::getValueKind(Type *Ty, StringRef TypeQual, 114 StringRef BaseTypeName) const { 115 if (TypeQual.contains("pipe")) 116 return ValueKind::Pipe; 117 118 return StringSwitch<ValueKind>(BaseTypeName) 119 .Case("image1d_t", ValueKind::Image) 120 .Case("image1d_array_t", ValueKind::Image) 121 .Case("image1d_buffer_t", ValueKind::Image) 122 .Case("image2d_t", ValueKind::Image) 123 .Case("image2d_array_t", ValueKind::Image) 124 .Case("image2d_array_depth_t", ValueKind::Image) 125 .Case("image2d_array_msaa_t", ValueKind::Image) 126 .Case("image2d_array_msaa_depth_t", ValueKind::Image) 127 .Case("image2d_depth_t", ValueKind::Image) 128 .Case("image2d_msaa_t", ValueKind::Image) 129 .Case("image2d_msaa_depth_t", ValueKind::Image) 130 .Case("image3d_t", ValueKind::Image) 131 .Case("sampler_t", ValueKind::Sampler) 132 .Case("queue_t", ValueKind::Queue) 133 .Default(isa<PointerType>(Ty) ? 134 (Ty->getPointerAddressSpace() == 135 AMDGPUAS::LOCAL_ADDRESS ? 136 ValueKind::DynamicSharedPointer : 137 ValueKind::GlobalBuffer) : 138 ValueKind::ByValue); 139 } 140 141 std::string MetadataStreamerYamlV2::getTypeName(Type *Ty, bool Signed) const { 142 switch (Ty->getTypeID()) { 143 case Type::IntegerTyID: { 144 if (!Signed) 145 return (Twine('u') + getTypeName(Ty, true)).str(); 146 147 auto BitWidth = Ty->getIntegerBitWidth(); 148 switch (BitWidth) { 149 case 8: 150 return "char"; 151 case 16: 152 return "short"; 153 case 32: 154 return "int"; 155 case 64: 156 return "long"; 157 default: 158 return (Twine('i') + Twine(BitWidth)).str(); 159 } 160 } 161 case Type::HalfTyID: 162 return "half"; 163 case Type::FloatTyID: 164 return "float"; 165 case Type::DoubleTyID: 166 return "double"; 167 case Type::FixedVectorTyID: { 168 auto VecTy = cast<FixedVectorType>(Ty); 169 auto ElTy = VecTy->getElementType(); 170 auto NumElements = VecTy->getNumElements(); 171 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 172 } 173 default: 174 return "unknown"; 175 } 176 } 177 178 std::vector<uint32_t> 179 MetadataStreamerYamlV2::getWorkGroupDimensions(MDNode *Node) const { 180 std::vector<uint32_t> Dims; 181 if (Node->getNumOperands() != 3) 182 return Dims; 183 184 for (auto &Op : Node->operands()) 185 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); 186 return Dims; 187 } 188 189 Kernel::CodeProps::Metadata MetadataStreamerYamlV2::getHSACodeProps( 190 const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { 191 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 192 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 193 HSAMD::Kernel::CodeProps::Metadata HSACodeProps; 194 const Function &F = MF.getFunction(); 195 196 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || 197 F.getCallingConv() == CallingConv::SPIR_KERNEL); 198 199 Align MaxKernArgAlign; 200 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, 201 MaxKernArgAlign); 202 HSACodeProps.mKernargSegmentAlign = 203 std::max(MaxKernArgAlign, Align(4)).value(); 204 205 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; 206 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; 207 HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); 208 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; 209 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; 210 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); 211 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; 212 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); 213 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); 214 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); 215 216 return HSACodeProps; 217 } 218 219 Kernel::DebugProps::Metadata MetadataStreamerYamlV2::getHSADebugProps( 220 const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { 221 return HSAMD::Kernel::DebugProps::Metadata(); 222 } 223 224 void MetadataStreamerYamlV2::emitVersion() { 225 auto &Version = HSAMetadata.mVersion; 226 227 Version.push_back(VersionMajorV2); 228 Version.push_back(VersionMinorV2); 229 } 230 231 void MetadataStreamerYamlV2::emitPrintf(const Module &Mod) { 232 auto &Printf = HSAMetadata.mPrintf; 233 234 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 235 if (!Node) 236 return; 237 238 for (auto *Op : Node->operands()) 239 if (Op->getNumOperands()) 240 Printf.push_back( 241 std::string(cast<MDString>(Op->getOperand(0))->getString())); 242 } 243 244 void MetadataStreamerYamlV2::emitKernelLanguage(const Function &Func) { 245 auto &Kernel = HSAMetadata.mKernels.back(); 246 247 // TODO: What about other languages? 248 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 249 if (!Node || !Node->getNumOperands()) 250 return; 251 auto Op0 = Node->getOperand(0); 252 if (Op0->getNumOperands() <= 1) 253 return; 254 255 Kernel.mLanguage = "OpenCL C"; 256 Kernel.mLanguageVersion.push_back( 257 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); 258 Kernel.mLanguageVersion.push_back( 259 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); 260 } 261 262 void MetadataStreamerYamlV2::emitKernelAttrs(const Function &Func) { 263 auto &Attrs = HSAMetadata.mKernels.back().mAttrs; 264 265 if (auto Node = Func.getMetadata("reqd_work_group_size")) 266 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); 267 if (auto Node = Func.getMetadata("work_group_size_hint")) 268 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); 269 if (auto Node = Func.getMetadata("vec_type_hint")) { 270 Attrs.mVecTypeHint = getTypeName( 271 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 272 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); 273 } 274 if (Func.hasFnAttribute("runtime-handle")) { 275 Attrs.mRuntimeHandle = 276 Func.getFnAttribute("runtime-handle").getValueAsString().str(); 277 } 278 } 279 280 void MetadataStreamerYamlV2::emitKernelArgs(const Function &Func, 281 const GCNSubtarget &ST) { 282 for (auto &Arg : Func.args()) 283 emitKernelArg(Arg); 284 285 emitHiddenKernelArgs(Func, ST); 286 } 287 288 void MetadataStreamerYamlV2::emitKernelArg(const Argument &Arg) { 289 auto Func = Arg.getParent(); 290 auto ArgNo = Arg.getArgNo(); 291 const MDNode *Node; 292 293 StringRef Name; 294 Node = Func->getMetadata("kernel_arg_name"); 295 if (Node && ArgNo < Node->getNumOperands()) 296 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 297 else if (Arg.hasName()) 298 Name = Arg.getName(); 299 300 StringRef TypeName; 301 Node = Func->getMetadata("kernel_arg_type"); 302 if (Node && ArgNo < Node->getNumOperands()) 303 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 304 305 StringRef BaseTypeName; 306 Node = Func->getMetadata("kernel_arg_base_type"); 307 if (Node && ArgNo < Node->getNumOperands()) 308 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 309 310 StringRef AccQual; 311 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 312 Arg.hasNoAliasAttr()) { 313 AccQual = "read_only"; 314 } else { 315 Node = Func->getMetadata("kernel_arg_access_qual"); 316 if (Node && ArgNo < Node->getNumOperands()) 317 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 318 } 319 320 StringRef TypeQual; 321 Node = Func->getMetadata("kernel_arg_type_qual"); 322 if (Node && ArgNo < Node->getNumOperands()) 323 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 324 325 const DataLayout &DL = Func->getParent()->getDataLayout(); 326 327 MaybeAlign PointeeAlign; 328 if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) { 329 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 330 // FIXME: Should report this for all address spaces 331 PointeeAlign = Arg.getParamAlign().valueOrOne(); 332 } 333 } 334 335 Type *ArgTy; 336 Align ArgAlign; 337 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL); 338 339 emitKernelArg(DL, ArgTy, ArgAlign, 340 getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name, 341 TypeName, BaseTypeName, AccQual, TypeQual); 342 } 343 344 void MetadataStreamerYamlV2::emitKernelArg( 345 const DataLayout &DL, Type *Ty, Align Alignment, ValueKind ValueKind, 346 MaybeAlign PointeeAlign, StringRef Name, StringRef TypeName, 347 StringRef BaseTypeName, StringRef AccQual, StringRef TypeQual) { 348 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); 349 auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); 350 351 Arg.mName = std::string(Name); 352 Arg.mTypeName = std::string(TypeName); 353 Arg.mSize = DL.getTypeAllocSize(Ty); 354 Arg.mAlign = Alignment.value(); 355 Arg.mValueKind = ValueKind; 356 Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0; 357 358 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 359 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); 360 361 Arg.mAccQual = getAccessQualifier(AccQual); 362 363 // TODO: Emit Arg.mActualAccQual. 364 365 SmallVector<StringRef, 1> SplitTypeQuals; 366 TypeQual.split(SplitTypeQuals, " ", -1, false); 367 for (StringRef Key : SplitTypeQuals) { 368 auto P = StringSwitch<bool*>(Key) 369 .Case("const", &Arg.mIsConst) 370 .Case("restrict", &Arg.mIsRestrict) 371 .Case("volatile", &Arg.mIsVolatile) 372 .Case("pipe", &Arg.mIsPipe) 373 .Default(nullptr); 374 if (P) 375 *P = true; 376 } 377 } 378 379 void MetadataStreamerYamlV2::emitHiddenKernelArgs(const Function &Func, 380 const GCNSubtarget &ST) { 381 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func); 382 if (!HiddenArgNumBytes) 383 return; 384 385 auto &DL = Func.getParent()->getDataLayout(); 386 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 387 388 if (HiddenArgNumBytes >= 8) 389 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX); 390 if (HiddenArgNumBytes >= 16) 391 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY); 392 if (HiddenArgNumBytes >= 24) 393 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ); 394 395 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), 396 AMDGPUAS::GLOBAL_ADDRESS); 397 398 if (HiddenArgNumBytes >= 32) { 399 // We forbid the use of features requiring hostcall when compiling OpenCL 400 // before code object V5, which makes the mutual exclusion between the 401 // "printf buffer" and "hostcall buffer" here sound. 402 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 403 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer); 404 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) 405 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer); 406 else 407 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 408 } 409 410 // Emit "default queue" and "completion action" arguments if enqueue kernel is 411 // used, otherwise emit dummy "none" arguments. 412 if (HiddenArgNumBytes >= 40) { 413 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { 414 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue); 415 } else { 416 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 417 } 418 } 419 420 if (HiddenArgNumBytes >= 48) { 421 if (!Func.hasFnAttribute("amdgpu-no-completion-action") && 422 // FIXME: Hack for runtime bug if we fail to optimize this out 423 Func.hasFnAttribute("calls-enqueue-kernel")) { 424 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction); 425 } else { 426 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 427 } 428 } 429 430 // Emit the pointer argument for multi-grid object. 431 if (HiddenArgNumBytes >= 56) { 432 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) 433 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg); 434 else 435 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 436 } 437 } 438 439 bool MetadataStreamerYamlV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 440 return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); 441 } 442 443 void MetadataStreamerYamlV2::begin(const Module &Mod, 444 const IsaInfo::AMDGPUTargetID &TargetID) { 445 emitVersion(); 446 emitPrintf(Mod); 447 } 448 449 void MetadataStreamerYamlV2::end() { 450 std::string HSAMetadataString; 451 if (toString(HSAMetadata, HSAMetadataString)) 452 return; 453 454 if (DumpHSAMetadata) 455 dump(HSAMetadataString); 456 if (VerifyHSAMetadata) 457 verify(HSAMetadataString); 458 } 459 460 void MetadataStreamerYamlV2::emitKernel(const MachineFunction &MF, 461 const SIProgramInfo &ProgramInfo) { 462 auto &Func = MF.getFunction(); 463 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) 464 return; 465 466 auto CodeProps = getHSACodeProps(MF, ProgramInfo); 467 auto DebugProps = getHSADebugProps(MF, ProgramInfo); 468 469 HSAMetadata.mKernels.push_back(Kernel::Metadata()); 470 auto &Kernel = HSAMetadata.mKernels.back(); 471 472 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 473 Kernel.mName = std::string(Func.getName()); 474 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); 475 emitKernelLanguage(Func); 476 emitKernelAttrs(Func); 477 emitKernelArgs(Func, ST); 478 HSAMetadata.mKernels.back().mCodeProps = CodeProps; 479 HSAMetadata.mKernels.back().mDebugProps = DebugProps; 480 } 481 482 //===----------------------------------------------------------------------===// 483 // HSAMetadataStreamerV3 484 //===----------------------------------------------------------------------===// 485 486 void MetadataStreamerMsgPackV3::dump(StringRef HSAMetadataString) const { 487 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 488 } 489 490 void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const { 491 errs() << "AMDGPU HSA Metadata Parser Test: "; 492 493 msgpack::Document FromHSAMetadataString; 494 495 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) { 496 errs() << "FAIL\n"; 497 return; 498 } 499 500 std::string ToHSAMetadataString; 501 raw_string_ostream StrOS(ToHSAMetadataString); 502 FromHSAMetadataString.toYAML(StrOS); 503 504 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; 505 if (HSAMetadataString != ToHSAMetadataString) { 506 errs() << "Original input: " << HSAMetadataString << '\n' 507 << "Produced output: " << StrOS.str() << '\n'; 508 } 509 } 510 511 std::optional<StringRef> 512 MetadataStreamerMsgPackV3::getAccessQualifier(StringRef AccQual) const { 513 return StringSwitch<std::optional<StringRef>>(AccQual) 514 .Case("read_only", StringRef("read_only")) 515 .Case("write_only", StringRef("write_only")) 516 .Case("read_write", StringRef("read_write")) 517 .Default(std::nullopt); 518 } 519 520 std::optional<StringRef> MetadataStreamerMsgPackV3::getAddressSpaceQualifier( 521 unsigned AddressSpace) const { 522 switch (AddressSpace) { 523 case AMDGPUAS::PRIVATE_ADDRESS: 524 return StringRef("private"); 525 case AMDGPUAS::GLOBAL_ADDRESS: 526 return StringRef("global"); 527 case AMDGPUAS::CONSTANT_ADDRESS: 528 return StringRef("constant"); 529 case AMDGPUAS::LOCAL_ADDRESS: 530 return StringRef("local"); 531 case AMDGPUAS::FLAT_ADDRESS: 532 return StringRef("generic"); 533 case AMDGPUAS::REGION_ADDRESS: 534 return StringRef("region"); 535 default: 536 return std::nullopt; 537 } 538 } 539 540 StringRef 541 MetadataStreamerMsgPackV3::getValueKind(Type *Ty, StringRef TypeQual, 542 StringRef BaseTypeName) const { 543 if (TypeQual.contains("pipe")) 544 return "pipe"; 545 546 return StringSwitch<StringRef>(BaseTypeName) 547 .Case("image1d_t", "image") 548 .Case("image1d_array_t", "image") 549 .Case("image1d_buffer_t", "image") 550 .Case("image2d_t", "image") 551 .Case("image2d_array_t", "image") 552 .Case("image2d_array_depth_t", "image") 553 .Case("image2d_array_msaa_t", "image") 554 .Case("image2d_array_msaa_depth_t", "image") 555 .Case("image2d_depth_t", "image") 556 .Case("image2d_msaa_t", "image") 557 .Case("image2d_msaa_depth_t", "image") 558 .Case("image3d_t", "image") 559 .Case("sampler_t", "sampler") 560 .Case("queue_t", "queue") 561 .Default(isa<PointerType>(Ty) 562 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS 563 ? "dynamic_shared_pointer" 564 : "global_buffer") 565 : "by_value"); 566 } 567 568 std::string MetadataStreamerMsgPackV3::getTypeName(Type *Ty, 569 bool Signed) const { 570 switch (Ty->getTypeID()) { 571 case Type::IntegerTyID: { 572 if (!Signed) 573 return (Twine('u') + getTypeName(Ty, true)).str(); 574 575 auto BitWidth = Ty->getIntegerBitWidth(); 576 switch (BitWidth) { 577 case 8: 578 return "char"; 579 case 16: 580 return "short"; 581 case 32: 582 return "int"; 583 case 64: 584 return "long"; 585 default: 586 return (Twine('i') + Twine(BitWidth)).str(); 587 } 588 } 589 case Type::HalfTyID: 590 return "half"; 591 case Type::FloatTyID: 592 return "float"; 593 case Type::DoubleTyID: 594 return "double"; 595 case Type::FixedVectorTyID: { 596 auto VecTy = cast<FixedVectorType>(Ty); 597 auto ElTy = VecTy->getElementType(); 598 auto NumElements = VecTy->getNumElements(); 599 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 600 } 601 default: 602 return "unknown"; 603 } 604 } 605 606 msgpack::ArrayDocNode 607 MetadataStreamerMsgPackV3::getWorkGroupDimensions(MDNode *Node) const { 608 auto Dims = HSAMetadataDoc->getArrayNode(); 609 if (Node->getNumOperands() != 3) 610 return Dims; 611 612 for (auto &Op : Node->operands()) 613 Dims.push_back(Dims.getDocument()->getNode( 614 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue()))); 615 return Dims; 616 } 617 618 void MetadataStreamerMsgPackV3::emitVersion() { 619 auto Version = HSAMetadataDoc->getArrayNode(); 620 Version.push_back(Version.getDocument()->getNode(VersionMajorV3)); 621 Version.push_back(Version.getDocument()->getNode(VersionMinorV3)); 622 getRootMetadata("amdhsa.version") = Version; 623 } 624 625 void MetadataStreamerMsgPackV3::emitPrintf(const Module &Mod) { 626 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 627 if (!Node) 628 return; 629 630 auto Printf = HSAMetadataDoc->getArrayNode(); 631 for (auto *Op : Node->operands()) 632 if (Op->getNumOperands()) 633 Printf.push_back(Printf.getDocument()->getNode( 634 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true)); 635 getRootMetadata("amdhsa.printf") = Printf; 636 } 637 638 void MetadataStreamerMsgPackV3::emitKernelLanguage(const Function &Func, 639 msgpack::MapDocNode Kern) { 640 // TODO: What about other languages? 641 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 642 if (!Node || !Node->getNumOperands()) 643 return; 644 auto Op0 = Node->getOperand(0); 645 if (Op0->getNumOperands() <= 1) 646 return; 647 648 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C"); 649 auto LanguageVersion = Kern.getDocument()->getArrayNode(); 650 LanguageVersion.push_back(Kern.getDocument()->getNode( 651 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); 652 LanguageVersion.push_back(Kern.getDocument()->getNode( 653 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); 654 Kern[".language_version"] = LanguageVersion; 655 } 656 657 void MetadataStreamerMsgPackV3::emitKernelAttrs(const Function &Func, 658 msgpack::MapDocNode Kern) { 659 660 if (auto Node = Func.getMetadata("reqd_work_group_size")) 661 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); 662 if (auto Node = Func.getMetadata("work_group_size_hint")) 663 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); 664 if (auto Node = Func.getMetadata("vec_type_hint")) { 665 Kern[".vec_type_hint"] = Kern.getDocument()->getNode( 666 getTypeName( 667 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 668 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()), 669 /*Copy=*/true); 670 } 671 if (Func.hasFnAttribute("runtime-handle")) { 672 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode( 673 Func.getFnAttribute("runtime-handle").getValueAsString().str(), 674 /*Copy=*/true); 675 } 676 if (Func.hasFnAttribute("device-init")) 677 Kern[".kind"] = Kern.getDocument()->getNode("init"); 678 else if (Func.hasFnAttribute("device-fini")) 679 Kern[".kind"] = Kern.getDocument()->getNode("fini"); 680 } 681 682 void MetadataStreamerMsgPackV3::emitKernelArgs(const MachineFunction &MF, 683 msgpack::MapDocNode Kern) { 684 auto &Func = MF.getFunction(); 685 unsigned Offset = 0; 686 auto Args = HSAMetadataDoc->getArrayNode(); 687 for (auto &Arg : Func.args()) 688 emitKernelArg(Arg, Offset, Args); 689 690 emitHiddenKernelArgs(MF, Offset, Args); 691 692 Kern[".args"] = Args; 693 } 694 695 void MetadataStreamerMsgPackV3::emitKernelArg(const Argument &Arg, 696 unsigned &Offset, 697 msgpack::ArrayDocNode Args) { 698 auto Func = Arg.getParent(); 699 auto ArgNo = Arg.getArgNo(); 700 const MDNode *Node; 701 702 StringRef Name; 703 Node = Func->getMetadata("kernel_arg_name"); 704 if (Node && ArgNo < Node->getNumOperands()) 705 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 706 else if (Arg.hasName()) 707 Name = Arg.getName(); 708 709 StringRef TypeName; 710 Node = Func->getMetadata("kernel_arg_type"); 711 if (Node && ArgNo < Node->getNumOperands()) 712 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 713 714 StringRef BaseTypeName; 715 Node = Func->getMetadata("kernel_arg_base_type"); 716 if (Node && ArgNo < Node->getNumOperands()) 717 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 718 719 StringRef AccQual; 720 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 721 Arg.hasNoAliasAttr()) { 722 AccQual = "read_only"; 723 } else { 724 Node = Func->getMetadata("kernel_arg_access_qual"); 725 if (Node && ArgNo < Node->getNumOperands()) 726 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 727 } 728 729 StringRef TypeQual; 730 Node = Func->getMetadata("kernel_arg_type_qual"); 731 if (Node && ArgNo < Node->getNumOperands()) 732 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 733 734 const DataLayout &DL = Func->getParent()->getDataLayout(); 735 736 MaybeAlign PointeeAlign; 737 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType(); 738 739 // FIXME: Need to distinguish in memory alignment from pointer alignment. 740 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 741 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) 742 PointeeAlign = Arg.getParamAlign().valueOrOne(); 743 } 744 745 // There's no distinction between byval aggregates and raw aggregates. 746 Type *ArgTy; 747 Align ArgAlign; 748 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL); 749 750 emitKernelArg(DL, ArgTy, ArgAlign, 751 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args, 752 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); 753 } 754 755 void MetadataStreamerMsgPackV3::emitKernelArg( 756 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind, 757 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign, 758 StringRef Name, StringRef TypeName, StringRef BaseTypeName, 759 StringRef AccQual, StringRef TypeQual) { 760 auto Arg = Args.getDocument()->getMapNode(); 761 762 if (!Name.empty()) 763 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true); 764 if (!TypeName.empty()) 765 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true); 766 auto Size = DL.getTypeAllocSize(Ty); 767 Arg[".size"] = Arg.getDocument()->getNode(Size); 768 Offset = alignTo(Offset, Alignment); 769 Arg[".offset"] = Arg.getDocument()->getNode(Offset); 770 Offset += Size; 771 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true); 772 if (PointeeAlign) 773 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value()); 774 775 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 776 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) 777 // Limiting address space to emit only for a certain ValueKind. 778 if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer") 779 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, 780 /*Copy=*/true); 781 782 if (auto AQ = getAccessQualifier(AccQual)) 783 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true); 784 785 // TODO: Emit Arg[".actual_access"]. 786 787 SmallVector<StringRef, 1> SplitTypeQuals; 788 TypeQual.split(SplitTypeQuals, " ", -1, false); 789 for (StringRef Key : SplitTypeQuals) { 790 if (Key == "const") 791 Arg[".is_const"] = Arg.getDocument()->getNode(true); 792 else if (Key == "restrict") 793 Arg[".is_restrict"] = Arg.getDocument()->getNode(true); 794 else if (Key == "volatile") 795 Arg[".is_volatile"] = Arg.getDocument()->getNode(true); 796 else if (Key == "pipe") 797 Arg[".is_pipe"] = Arg.getDocument()->getNode(true); 798 } 799 800 Args.push_back(Arg); 801 } 802 803 void MetadataStreamerMsgPackV3::emitHiddenKernelArgs( 804 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { 805 auto &Func = MF.getFunction(); 806 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 807 808 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func); 809 if (!HiddenArgNumBytes) 810 return; 811 812 const Module *M = Func.getParent(); 813 auto &DL = M->getDataLayout(); 814 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 815 816 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); 817 818 if (HiddenArgNumBytes >= 8) 819 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, 820 Args); 821 if (HiddenArgNumBytes >= 16) 822 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, 823 Args); 824 if (HiddenArgNumBytes >= 24) 825 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, 826 Args); 827 828 auto Int8PtrTy = 829 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 830 831 if (HiddenArgNumBytes >= 32) { 832 // We forbid the use of features requiring hostcall when compiling OpenCL 833 // before code object V5, which makes the mutual exclusion between the 834 // "printf buffer" and "hostcall buffer" here sound. 835 if (M->getNamedMetadata("llvm.printf.fmts")) 836 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, 837 Args); 838 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) 839 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, 840 Args); 841 else 842 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 843 } 844 845 // Emit "default queue" and "completion action" arguments if enqueue kernel is 846 // used, otherwise emit dummy "none" arguments. 847 if (HiddenArgNumBytes >= 40) { 848 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { 849 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, 850 Args); 851 } else { 852 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 853 } 854 } 855 856 if (HiddenArgNumBytes >= 48) { 857 if (!Func.hasFnAttribute("amdgpu-no-completion-action") && 858 // FIXME: Hack for runtime bug if we fail to optimize this out 859 Func.hasFnAttribute("calls-enqueue-kernel")) { 860 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, 861 Args); 862 } else { 863 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 864 } 865 } 866 867 // Emit the pointer argument for multi-grid object. 868 if (HiddenArgNumBytes >= 56) { 869 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) { 870 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, 871 Args); 872 } else { 873 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 874 } 875 } 876 } 877 878 msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps( 879 const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { 880 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 881 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 882 const Function &F = MF.getFunction(); 883 884 auto Kern = HSAMetadataDoc->getMapNode(); 885 886 Align MaxKernArgAlign; 887 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode( 888 STM.getKernArgSegmentSize(F, MaxKernArgAlign)); 889 Kern[".group_segment_fixed_size"] = 890 Kern.getDocument()->getNode(ProgramInfo.LDSSize); 891 Kern[".private_segment_fixed_size"] = 892 Kern.getDocument()->getNode(ProgramInfo.ScratchSize); 893 if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) 894 Kern[".uses_dynamic_stack"] = 895 Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack); 896 if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5 && STM.supportsWGP()) 897 Kern[".workgroup_processor_mode"] = 898 Kern.getDocument()->getNode(ProgramInfo.WgpMode); 899 900 // FIXME: The metadata treats the minimum as 16? 901 Kern[".kernarg_segment_align"] = 902 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value()); 903 Kern[".wavefront_size"] = 904 Kern.getDocument()->getNode(STM.getWavefrontSize()); 905 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR); 906 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR); 907 908 // Only add AGPR count to metadata for supported devices 909 if (STM.hasMAIInsts()) { 910 Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR); 911 } 912 913 Kern[".max_flat_workgroup_size"] = 914 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); 915 Kern[".sgpr_spill_count"] = 916 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); 917 Kern[".vgpr_spill_count"] = 918 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs()); 919 920 return Kern; 921 } 922 923 bool MetadataStreamerMsgPackV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 924 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); 925 } 926 927 void MetadataStreamerMsgPackV3::begin(const Module &Mod, 928 const IsaInfo::AMDGPUTargetID &TargetID) { 929 emitVersion(); 930 emitPrintf(Mod); 931 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); 932 } 933 934 void MetadataStreamerMsgPackV3::end() { 935 std::string HSAMetadataString; 936 raw_string_ostream StrOS(HSAMetadataString); 937 HSAMetadataDoc->toYAML(StrOS); 938 939 if (DumpHSAMetadata) 940 dump(StrOS.str()); 941 if (VerifyHSAMetadata) 942 verify(StrOS.str()); 943 } 944 945 void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF, 946 const SIProgramInfo &ProgramInfo) { 947 auto &Func = MF.getFunction(); 948 auto Kern = getHSAKernelProps(MF, ProgramInfo); 949 950 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || 951 Func.getCallingConv() == CallingConv::SPIR_KERNEL); 952 953 auto Kernels = 954 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true); 955 956 { 957 Kern[".name"] = Kern.getDocument()->getNode(Func.getName()); 958 Kern[".symbol"] = Kern.getDocument()->getNode( 959 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true); 960 emitKernelLanguage(Func, Kern); 961 emitKernelAttrs(Func, Kern); 962 emitKernelArgs(MF, Kern); 963 } 964 965 Kernels.push_back(Kern); 966 } 967 968 //===----------------------------------------------------------------------===// 969 // HSAMetadataStreamerV4 970 //===----------------------------------------------------------------------===// 971 972 void MetadataStreamerMsgPackV4::emitVersion() { 973 auto Version = HSAMetadataDoc->getArrayNode(); 974 Version.push_back(Version.getDocument()->getNode(VersionMajorV4)); 975 Version.push_back(Version.getDocument()->getNode(VersionMinorV4)); 976 getRootMetadata("amdhsa.version") = Version; 977 } 978 979 void MetadataStreamerMsgPackV4::emitTargetID( 980 const IsaInfo::AMDGPUTargetID &TargetID) { 981 getRootMetadata("amdhsa.target") = 982 HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true); 983 } 984 985 void MetadataStreamerMsgPackV4::begin(const Module &Mod, 986 const IsaInfo::AMDGPUTargetID &TargetID) { 987 emitVersion(); 988 emitTargetID(TargetID); 989 emitPrintf(Mod); 990 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); 991 } 992 993 //===----------------------------------------------------------------------===// 994 // HSAMetadataStreamerV5 995 //===----------------------------------------------------------------------===// 996 997 void MetadataStreamerMsgPackV5::emitVersion() { 998 auto Version = HSAMetadataDoc->getArrayNode(); 999 Version.push_back(Version.getDocument()->getNode(VersionMajorV5)); 1000 Version.push_back(Version.getDocument()->getNode(VersionMinorV5)); 1001 getRootMetadata("amdhsa.version") = Version; 1002 } 1003 1004 void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( 1005 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { 1006 auto &Func = MF.getFunction(); 1007 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1008 1009 // No implicit kernel argument is used. 1010 if (ST.getImplicitArgNumBytes(Func) == 0) 1011 return; 1012 1013 const Module *M = Func.getParent(); 1014 auto &DL = M->getDataLayout(); 1015 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 1016 1017 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 1018 auto Int32Ty = Type::getInt32Ty(Func.getContext()); 1019 auto Int16Ty = Type::getInt16Ty(Func.getContext()); 1020 1021 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); 1022 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args); 1023 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args); 1024 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args); 1025 1026 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args); 1027 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args); 1028 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args); 1029 1030 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args); 1031 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args); 1032 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args); 1033 1034 // Reserved for hidden_tool_correlation_id. 1035 Offset += 8; 1036 1037 Offset += 8; // Reserved. 1038 1039 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args); 1040 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args); 1041 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args); 1042 1043 emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args); 1044 1045 Offset += 6; // Reserved. 1046 auto Int8PtrTy = 1047 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 1048 1049 if (M->getNamedMetadata("llvm.printf.fmts")) { 1050 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, 1051 Args); 1052 } else { 1053 Offset += 8; // Skipped. 1054 } 1055 1056 if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) { 1057 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, 1058 Args); 1059 } else { 1060 Offset += 8; // Skipped. 1061 } 1062 1063 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) { 1064 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, 1065 Args); 1066 } else { 1067 Offset += 8; // Skipped. 1068 } 1069 1070 if (!Func.hasFnAttribute("amdgpu-no-heap-ptr")) 1071 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args); 1072 else 1073 Offset += 8; // Skipped. 1074 1075 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { 1076 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, 1077 Args); 1078 } else { 1079 Offset += 8; // Skipped. 1080 } 1081 1082 if (!Func.hasFnAttribute("amdgpu-no-completion-action") && 1083 // FIXME: Hack for runtime bug 1084 Func.hasFnAttribute("calls-enqueue-kernel")) { 1085 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, 1086 Args); 1087 } else { 1088 Offset += 8; // Skipped. 1089 } 1090 1091 Offset += 72; // Reserved. 1092 1093 // hidden_private_base and hidden_shared_base are only when the subtarget has 1094 // ApertureRegs. 1095 if (!ST.hasApertureRegs()) { 1096 emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args); 1097 emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args); 1098 } else { 1099 Offset += 8; // Skipped. 1100 } 1101 1102 if (MFI.hasQueuePtr()) 1103 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args); 1104 } 1105 1106 void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func, 1107 msgpack::MapDocNode Kern) { 1108 MetadataStreamerMsgPackV3::emitKernelAttrs(Func, Kern); 1109 1110 if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool()) 1111 Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1); 1112 } 1113 1114 1115 } // end namespace HSAMD 1116 } // end namespace AMDGPU 1117 } // end namespace llvm 1118