1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// AMDGPU HSA Metadata Streamer. 11 /// 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUHSAMetadataStreamer.h" 16 #include "AMDGPU.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "SIProgramInfo.h" 21 #include "llvm/IR/Module.h" 22 using namespace llvm; 23 24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg, 25 const DataLayout &DL) { 26 Type *Ty = Arg.getType(); 27 MaybeAlign ArgAlign; 28 if (Arg.hasByRefAttr()) { 29 Ty = Arg.getParamByRefType(); 30 ArgAlign = Arg.getParamAlign(); 31 } 32 33 if (!ArgAlign) 34 ArgAlign = DL.getABITypeAlign(Ty); 35 36 return std::make_pair(Ty, *ArgAlign); 37 } 38 39 namespace llvm { 40 41 static cl::opt<bool> DumpHSAMetadata( 42 "amdgpu-dump-hsa-metadata", 43 cl::desc("Dump AMDGPU HSA Metadata")); 44 static cl::opt<bool> VerifyHSAMetadata( 45 "amdgpu-verify-hsa-metadata", 46 cl::desc("Verify AMDGPU HSA Metadata")); 47 48 namespace AMDGPU { 49 namespace HSAMD { 50 51 //===----------------------------------------------------------------------===// 52 // HSAMetadataStreamerV2 53 //===----------------------------------------------------------------------===// 54 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const { 55 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 56 } 57 58 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const { 59 errs() << "AMDGPU HSA Metadata Parser Test: "; 60 61 HSAMD::Metadata FromHSAMetadataString; 62 if (fromString(HSAMetadataString, FromHSAMetadataString)) { 63 errs() << "FAIL\n"; 64 return; 65 } 66 67 std::string ToHSAMetadataString; 68 if (toString(FromHSAMetadataString, ToHSAMetadataString)) { 69 errs() << "FAIL\n"; 70 return; 71 } 72 73 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") 74 << '\n'; 75 if (HSAMetadataString != ToHSAMetadataString) { 76 errs() << "Original input: " << HSAMetadataString << '\n' 77 << "Produced output: " << ToHSAMetadataString << '\n'; 78 } 79 } 80 81 AccessQualifier 82 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const { 83 if (AccQual.empty()) 84 return AccessQualifier::Unknown; 85 86 return StringSwitch<AccessQualifier>(AccQual) 87 .Case("read_only", AccessQualifier::ReadOnly) 88 .Case("write_only", AccessQualifier::WriteOnly) 89 .Case("read_write", AccessQualifier::ReadWrite) 90 .Default(AccessQualifier::Default); 91 } 92 93 AddressSpaceQualifier 94 MetadataStreamerV2::getAddressSpaceQualifier( 95 unsigned AddressSpace) const { 96 switch (AddressSpace) { 97 case AMDGPUAS::PRIVATE_ADDRESS: 98 return AddressSpaceQualifier::Private; 99 case AMDGPUAS::GLOBAL_ADDRESS: 100 return AddressSpaceQualifier::Global; 101 case AMDGPUAS::CONSTANT_ADDRESS: 102 return AddressSpaceQualifier::Constant; 103 case AMDGPUAS::LOCAL_ADDRESS: 104 return AddressSpaceQualifier::Local; 105 case AMDGPUAS::FLAT_ADDRESS: 106 return AddressSpaceQualifier::Generic; 107 case AMDGPUAS::REGION_ADDRESS: 108 return AddressSpaceQualifier::Region; 109 default: 110 return AddressSpaceQualifier::Unknown; 111 } 112 } 113 114 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual, 115 StringRef BaseTypeName) const { 116 if (TypeQual.contains("pipe")) 117 return ValueKind::Pipe; 118 119 return StringSwitch<ValueKind>(BaseTypeName) 120 .Case("image1d_t", ValueKind::Image) 121 .Case("image1d_array_t", ValueKind::Image) 122 .Case("image1d_buffer_t", ValueKind::Image) 123 .Case("image2d_t", ValueKind::Image) 124 .Case("image2d_array_t", ValueKind::Image) 125 .Case("image2d_array_depth_t", ValueKind::Image) 126 .Case("image2d_array_msaa_t", ValueKind::Image) 127 .Case("image2d_array_msaa_depth_t", ValueKind::Image) 128 .Case("image2d_depth_t", ValueKind::Image) 129 .Case("image2d_msaa_t", ValueKind::Image) 130 .Case("image2d_msaa_depth_t", ValueKind::Image) 131 .Case("image3d_t", ValueKind::Image) 132 .Case("sampler_t", ValueKind::Sampler) 133 .Case("queue_t", ValueKind::Queue) 134 .Default(isa<PointerType>(Ty) ? 135 (Ty->getPointerAddressSpace() == 136 AMDGPUAS::LOCAL_ADDRESS ? 137 ValueKind::DynamicSharedPointer : 138 ValueKind::GlobalBuffer) : 139 ValueKind::ByValue); 140 } 141 142 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const { 143 switch (Ty->getTypeID()) { 144 case Type::IntegerTyID: { 145 if (!Signed) 146 return (Twine('u') + getTypeName(Ty, true)).str(); 147 148 auto BitWidth = Ty->getIntegerBitWidth(); 149 switch (BitWidth) { 150 case 8: 151 return "char"; 152 case 16: 153 return "short"; 154 case 32: 155 return "int"; 156 case 64: 157 return "long"; 158 default: 159 return (Twine('i') + Twine(BitWidth)).str(); 160 } 161 } 162 case Type::HalfTyID: 163 return "half"; 164 case Type::FloatTyID: 165 return "float"; 166 case Type::DoubleTyID: 167 return "double"; 168 case Type::FixedVectorTyID: { 169 auto VecTy = cast<FixedVectorType>(Ty); 170 auto ElTy = VecTy->getElementType(); 171 auto NumElements = VecTy->getNumElements(); 172 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 173 } 174 default: 175 return "unknown"; 176 } 177 } 178 179 std::vector<uint32_t> 180 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const { 181 std::vector<uint32_t> Dims; 182 if (Node->getNumOperands() != 3) 183 return Dims; 184 185 for (auto &Op : Node->operands()) 186 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); 187 return Dims; 188 } 189 190 Kernel::CodeProps::Metadata 191 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, 192 const SIProgramInfo &ProgramInfo) const { 193 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 194 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 195 HSAMD::Kernel::CodeProps::Metadata HSACodeProps; 196 const Function &F = MF.getFunction(); 197 198 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || 199 F.getCallingConv() == CallingConv::SPIR_KERNEL); 200 201 Align MaxKernArgAlign; 202 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, 203 MaxKernArgAlign); 204 HSACodeProps.mKernargSegmentAlign = 205 std::max(MaxKernArgAlign, Align(4)).value(); 206 207 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; 208 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; 209 HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); 210 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; 211 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; 212 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); 213 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; 214 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); 215 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); 216 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); 217 218 return HSACodeProps; 219 } 220 221 Kernel::DebugProps::Metadata 222 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF, 223 const SIProgramInfo &ProgramInfo) const { 224 return HSAMD::Kernel::DebugProps::Metadata(); 225 } 226 227 void MetadataStreamerV2::emitVersion() { 228 auto &Version = HSAMetadata.mVersion; 229 230 Version.push_back(VersionMajorV2); 231 Version.push_back(VersionMinorV2); 232 } 233 234 void MetadataStreamerV2::emitPrintf(const Module &Mod) { 235 auto &Printf = HSAMetadata.mPrintf; 236 237 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 238 if (!Node) 239 return; 240 241 for (auto Op : Node->operands()) 242 if (Op->getNumOperands()) 243 Printf.push_back( 244 std::string(cast<MDString>(Op->getOperand(0))->getString())); 245 } 246 247 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) { 248 auto &Kernel = HSAMetadata.mKernels.back(); 249 250 // TODO: What about other languages? 251 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 252 if (!Node || !Node->getNumOperands()) 253 return; 254 auto Op0 = Node->getOperand(0); 255 if (Op0->getNumOperands() <= 1) 256 return; 257 258 Kernel.mLanguage = "OpenCL C"; 259 Kernel.mLanguageVersion.push_back( 260 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); 261 Kernel.mLanguageVersion.push_back( 262 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); 263 } 264 265 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) { 266 auto &Attrs = HSAMetadata.mKernels.back().mAttrs; 267 268 if (auto Node = Func.getMetadata("reqd_work_group_size")) 269 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); 270 if (auto Node = Func.getMetadata("work_group_size_hint")) 271 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); 272 if (auto Node = Func.getMetadata("vec_type_hint")) { 273 Attrs.mVecTypeHint = getTypeName( 274 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 275 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); 276 } 277 if (Func.hasFnAttribute("runtime-handle")) { 278 Attrs.mRuntimeHandle = 279 Func.getFnAttribute("runtime-handle").getValueAsString().str(); 280 } 281 } 282 283 void MetadataStreamerV2::emitKernelArgs(const Function &Func, 284 const GCNSubtarget &ST) { 285 for (auto &Arg : Func.args()) 286 emitKernelArg(Arg); 287 288 emitHiddenKernelArgs(Func, ST); 289 } 290 291 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) { 292 auto Func = Arg.getParent(); 293 auto ArgNo = Arg.getArgNo(); 294 const MDNode *Node; 295 296 StringRef Name; 297 Node = Func->getMetadata("kernel_arg_name"); 298 if (Node && ArgNo < Node->getNumOperands()) 299 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 300 else if (Arg.hasName()) 301 Name = Arg.getName(); 302 303 StringRef TypeName; 304 Node = Func->getMetadata("kernel_arg_type"); 305 if (Node && ArgNo < Node->getNumOperands()) 306 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 307 308 StringRef BaseTypeName; 309 Node = Func->getMetadata("kernel_arg_base_type"); 310 if (Node && ArgNo < Node->getNumOperands()) 311 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 312 313 StringRef AccQual; 314 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 315 Arg.hasNoAliasAttr()) { 316 AccQual = "read_only"; 317 } else { 318 Node = Func->getMetadata("kernel_arg_access_qual"); 319 if (Node && ArgNo < Node->getNumOperands()) 320 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 321 } 322 323 StringRef TypeQual; 324 Node = Func->getMetadata("kernel_arg_type_qual"); 325 if (Node && ArgNo < Node->getNumOperands()) 326 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 327 328 const DataLayout &DL = Func->getParent()->getDataLayout(); 329 330 MaybeAlign PointeeAlign; 331 if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) { 332 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 333 // FIXME: Should report this for all address spaces 334 PointeeAlign = Arg.getParamAlign().valueOrOne(); 335 } 336 } 337 338 Type *ArgTy; 339 Align ArgAlign; 340 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL); 341 342 emitKernelArg(DL, ArgTy, ArgAlign, 343 getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name, 344 TypeName, BaseTypeName, AccQual, TypeQual); 345 } 346 347 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty, 348 Align Alignment, ValueKind ValueKind, 349 MaybeAlign PointeeAlign, StringRef Name, 350 StringRef TypeName, 351 StringRef BaseTypeName, 352 StringRef AccQual, StringRef TypeQual) { 353 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); 354 auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); 355 356 Arg.mName = std::string(Name); 357 Arg.mTypeName = std::string(TypeName); 358 Arg.mSize = DL.getTypeAllocSize(Ty); 359 Arg.mAlign = Alignment.value(); 360 Arg.mValueKind = ValueKind; 361 Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0; 362 363 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 364 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); 365 366 Arg.mAccQual = getAccessQualifier(AccQual); 367 368 // TODO: Emit Arg.mActualAccQual. 369 370 SmallVector<StringRef, 1> SplitTypeQuals; 371 TypeQual.split(SplitTypeQuals, " ", -1, false); 372 for (StringRef Key : SplitTypeQuals) { 373 auto P = StringSwitch<bool*>(Key) 374 .Case("const", &Arg.mIsConst) 375 .Case("restrict", &Arg.mIsRestrict) 376 .Case("volatile", &Arg.mIsVolatile) 377 .Case("pipe", &Arg.mIsPipe) 378 .Default(nullptr); 379 if (P) 380 *P = true; 381 } 382 } 383 384 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func, 385 const GCNSubtarget &ST) { 386 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func); 387 if (!HiddenArgNumBytes) 388 return; 389 390 auto &DL = Func.getParent()->getDataLayout(); 391 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 392 393 if (HiddenArgNumBytes >= 8) 394 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX); 395 if (HiddenArgNumBytes >= 16) 396 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY); 397 if (HiddenArgNumBytes >= 24) 398 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ); 399 400 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), 401 AMDGPUAS::GLOBAL_ADDRESS); 402 403 if (HiddenArgNumBytes >= 32) { 404 // We forbid the use of features requiring hostcall when compiling OpenCL 405 // before code object V5, which makes the mutual exclusion between the 406 // "printf buffer" and "hostcall buffer" here sound. 407 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 408 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer); 409 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) 410 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer); 411 else 412 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 413 } 414 415 // Emit "default queue" and "completion action" arguments if enqueue kernel is 416 // used, otherwise emit dummy "none" arguments. 417 if (HiddenArgNumBytes >= 48) { 418 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 419 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue); 420 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction); 421 } else { 422 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 423 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 424 } 425 } 426 427 // Emit the pointer argument for multi-grid object. 428 if (HiddenArgNumBytes >= 56) { 429 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) 430 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg); 431 else 432 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 433 } 434 } 435 436 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 437 return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); 438 } 439 440 void MetadataStreamerV2::begin(const Module &Mod, 441 const IsaInfo::AMDGPUTargetID &TargetID) { 442 emitVersion(); 443 emitPrintf(Mod); 444 } 445 446 void MetadataStreamerV2::end() { 447 std::string HSAMetadataString; 448 if (toString(HSAMetadata, HSAMetadataString)) 449 return; 450 451 if (DumpHSAMetadata) 452 dump(HSAMetadataString); 453 if (VerifyHSAMetadata) 454 verify(HSAMetadataString); 455 } 456 457 void MetadataStreamerV2::emitKernel(const MachineFunction &MF, 458 const SIProgramInfo &ProgramInfo) { 459 auto &Func = MF.getFunction(); 460 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) 461 return; 462 463 auto CodeProps = getHSACodeProps(MF, ProgramInfo); 464 auto DebugProps = getHSADebugProps(MF, ProgramInfo); 465 466 HSAMetadata.mKernels.push_back(Kernel::Metadata()); 467 auto &Kernel = HSAMetadata.mKernels.back(); 468 469 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 470 Kernel.mName = std::string(Func.getName()); 471 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); 472 emitKernelLanguage(Func); 473 emitKernelAttrs(Func); 474 emitKernelArgs(Func, ST); 475 HSAMetadata.mKernels.back().mCodeProps = CodeProps; 476 HSAMetadata.mKernels.back().mDebugProps = DebugProps; 477 } 478 479 //===----------------------------------------------------------------------===// 480 // HSAMetadataStreamerV3 481 //===----------------------------------------------------------------------===// 482 483 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const { 484 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 485 } 486 487 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const { 488 errs() << "AMDGPU HSA Metadata Parser Test: "; 489 490 msgpack::Document FromHSAMetadataString; 491 492 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) { 493 errs() << "FAIL\n"; 494 return; 495 } 496 497 std::string ToHSAMetadataString; 498 raw_string_ostream StrOS(ToHSAMetadataString); 499 FromHSAMetadataString.toYAML(StrOS); 500 501 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; 502 if (HSAMetadataString != ToHSAMetadataString) { 503 errs() << "Original input: " << HSAMetadataString << '\n' 504 << "Produced output: " << StrOS.str() << '\n'; 505 } 506 } 507 508 Optional<StringRef> 509 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const { 510 return StringSwitch<Optional<StringRef>>(AccQual) 511 .Case("read_only", StringRef("read_only")) 512 .Case("write_only", StringRef("write_only")) 513 .Case("read_write", StringRef("read_write")) 514 .Default(None); 515 } 516 517 Optional<StringRef> 518 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const { 519 switch (AddressSpace) { 520 case AMDGPUAS::PRIVATE_ADDRESS: 521 return StringRef("private"); 522 case AMDGPUAS::GLOBAL_ADDRESS: 523 return StringRef("global"); 524 case AMDGPUAS::CONSTANT_ADDRESS: 525 return StringRef("constant"); 526 case AMDGPUAS::LOCAL_ADDRESS: 527 return StringRef("local"); 528 case AMDGPUAS::FLAT_ADDRESS: 529 return StringRef("generic"); 530 case AMDGPUAS::REGION_ADDRESS: 531 return StringRef("region"); 532 default: 533 return None; 534 } 535 } 536 537 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual, 538 StringRef BaseTypeName) const { 539 if (TypeQual.contains("pipe")) 540 return "pipe"; 541 542 return StringSwitch<StringRef>(BaseTypeName) 543 .Case("image1d_t", "image") 544 .Case("image1d_array_t", "image") 545 .Case("image1d_buffer_t", "image") 546 .Case("image2d_t", "image") 547 .Case("image2d_array_t", "image") 548 .Case("image2d_array_depth_t", "image") 549 .Case("image2d_array_msaa_t", "image") 550 .Case("image2d_array_msaa_depth_t", "image") 551 .Case("image2d_depth_t", "image") 552 .Case("image2d_msaa_t", "image") 553 .Case("image2d_msaa_depth_t", "image") 554 .Case("image3d_t", "image") 555 .Case("sampler_t", "sampler") 556 .Case("queue_t", "queue") 557 .Default(isa<PointerType>(Ty) 558 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS 559 ? "dynamic_shared_pointer" 560 : "global_buffer") 561 : "by_value"); 562 } 563 564 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const { 565 switch (Ty->getTypeID()) { 566 case Type::IntegerTyID: { 567 if (!Signed) 568 return (Twine('u') + getTypeName(Ty, true)).str(); 569 570 auto BitWidth = Ty->getIntegerBitWidth(); 571 switch (BitWidth) { 572 case 8: 573 return "char"; 574 case 16: 575 return "short"; 576 case 32: 577 return "int"; 578 case 64: 579 return "long"; 580 default: 581 return (Twine('i') + Twine(BitWidth)).str(); 582 } 583 } 584 case Type::HalfTyID: 585 return "half"; 586 case Type::FloatTyID: 587 return "float"; 588 case Type::DoubleTyID: 589 return "double"; 590 case Type::FixedVectorTyID: { 591 auto VecTy = cast<FixedVectorType>(Ty); 592 auto ElTy = VecTy->getElementType(); 593 auto NumElements = VecTy->getNumElements(); 594 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 595 } 596 default: 597 return "unknown"; 598 } 599 } 600 601 msgpack::ArrayDocNode 602 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const { 603 auto Dims = HSAMetadataDoc->getArrayNode(); 604 if (Node->getNumOperands() != 3) 605 return Dims; 606 607 for (auto &Op : Node->operands()) 608 Dims.push_back(Dims.getDocument()->getNode( 609 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue()))); 610 return Dims; 611 } 612 613 void MetadataStreamerV3::emitVersion() { 614 auto Version = HSAMetadataDoc->getArrayNode(); 615 Version.push_back(Version.getDocument()->getNode(VersionMajorV3)); 616 Version.push_back(Version.getDocument()->getNode(VersionMinorV3)); 617 getRootMetadata("amdhsa.version") = Version; 618 } 619 620 void MetadataStreamerV3::emitPrintf(const Module &Mod) { 621 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 622 if (!Node) 623 return; 624 625 auto Printf = HSAMetadataDoc->getArrayNode(); 626 for (auto Op : Node->operands()) 627 if (Op->getNumOperands()) 628 Printf.push_back(Printf.getDocument()->getNode( 629 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true)); 630 getRootMetadata("amdhsa.printf") = Printf; 631 } 632 633 void MetadataStreamerV3::emitKernelLanguage(const Function &Func, 634 msgpack::MapDocNode Kern) { 635 // TODO: What about other languages? 636 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 637 if (!Node || !Node->getNumOperands()) 638 return; 639 auto Op0 = Node->getOperand(0); 640 if (Op0->getNumOperands() <= 1) 641 return; 642 643 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C"); 644 auto LanguageVersion = Kern.getDocument()->getArrayNode(); 645 LanguageVersion.push_back(Kern.getDocument()->getNode( 646 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); 647 LanguageVersion.push_back(Kern.getDocument()->getNode( 648 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); 649 Kern[".language_version"] = LanguageVersion; 650 } 651 652 void MetadataStreamerV3::emitKernelAttrs(const Function &Func, 653 msgpack::MapDocNode Kern) { 654 655 if (auto Node = Func.getMetadata("reqd_work_group_size")) 656 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); 657 if (auto Node = Func.getMetadata("work_group_size_hint")) 658 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); 659 if (auto Node = Func.getMetadata("vec_type_hint")) { 660 Kern[".vec_type_hint"] = Kern.getDocument()->getNode( 661 getTypeName( 662 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 663 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()), 664 /*Copy=*/true); 665 } 666 if (Func.hasFnAttribute("runtime-handle")) { 667 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode( 668 Func.getFnAttribute("runtime-handle").getValueAsString().str(), 669 /*Copy=*/true); 670 } 671 if (Func.hasFnAttribute("device-init")) 672 Kern[".kind"] = Kern.getDocument()->getNode("init"); 673 else if (Func.hasFnAttribute("device-fini")) 674 Kern[".kind"] = Kern.getDocument()->getNode("fini"); 675 } 676 677 void MetadataStreamerV3::emitKernelArgs(const MachineFunction &MF, 678 msgpack::MapDocNode Kern) { 679 auto &Func = MF.getFunction(); 680 unsigned Offset = 0; 681 auto Args = HSAMetadataDoc->getArrayNode(); 682 for (auto &Arg : Func.args()) 683 emitKernelArg(Arg, Offset, Args); 684 685 emitHiddenKernelArgs(MF, Offset, Args); 686 687 Kern[".args"] = Args; 688 } 689 690 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset, 691 msgpack::ArrayDocNode Args) { 692 auto Func = Arg.getParent(); 693 auto ArgNo = Arg.getArgNo(); 694 const MDNode *Node; 695 696 StringRef Name; 697 Node = Func->getMetadata("kernel_arg_name"); 698 if (Node && ArgNo < Node->getNumOperands()) 699 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 700 else if (Arg.hasName()) 701 Name = Arg.getName(); 702 703 StringRef TypeName; 704 Node = Func->getMetadata("kernel_arg_type"); 705 if (Node && ArgNo < Node->getNumOperands()) 706 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 707 708 StringRef BaseTypeName; 709 Node = Func->getMetadata("kernel_arg_base_type"); 710 if (Node && ArgNo < Node->getNumOperands()) 711 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 712 713 StringRef AccQual; 714 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 715 Arg.hasNoAliasAttr()) { 716 AccQual = "read_only"; 717 } else { 718 Node = Func->getMetadata("kernel_arg_access_qual"); 719 if (Node && ArgNo < Node->getNumOperands()) 720 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 721 } 722 723 StringRef TypeQual; 724 Node = Func->getMetadata("kernel_arg_type_qual"); 725 if (Node && ArgNo < Node->getNumOperands()) 726 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 727 728 const DataLayout &DL = Func->getParent()->getDataLayout(); 729 730 MaybeAlign PointeeAlign; 731 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType(); 732 733 // FIXME: Need to distinguish in memory alignment from pointer alignment. 734 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 735 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) 736 PointeeAlign = Arg.getParamAlign().valueOrOne(); 737 } 738 739 // There's no distinction between byval aggregates and raw aggregates. 740 Type *ArgTy; 741 Align ArgAlign; 742 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL); 743 744 emitKernelArg(DL, ArgTy, ArgAlign, 745 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args, 746 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); 747 } 748 749 void MetadataStreamerV3::emitKernelArg( 750 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind, 751 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign, 752 StringRef Name, StringRef TypeName, StringRef BaseTypeName, 753 StringRef AccQual, StringRef TypeQual) { 754 auto Arg = Args.getDocument()->getMapNode(); 755 756 if (!Name.empty()) 757 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true); 758 if (!TypeName.empty()) 759 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true); 760 auto Size = DL.getTypeAllocSize(Ty); 761 Arg[".size"] = Arg.getDocument()->getNode(Size); 762 Offset = alignTo(Offset, Alignment); 763 Arg[".offset"] = Arg.getDocument()->getNode(Offset); 764 Offset += Size; 765 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true); 766 if (PointeeAlign) 767 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value()); 768 769 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 770 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) 771 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true); 772 773 if (auto AQ = getAccessQualifier(AccQual)) 774 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true); 775 776 // TODO: Emit Arg[".actual_access"]. 777 778 SmallVector<StringRef, 1> SplitTypeQuals; 779 TypeQual.split(SplitTypeQuals, " ", -1, false); 780 for (StringRef Key : SplitTypeQuals) { 781 if (Key == "const") 782 Arg[".is_const"] = Arg.getDocument()->getNode(true); 783 else if (Key == "restrict") 784 Arg[".is_restrict"] = Arg.getDocument()->getNode(true); 785 else if (Key == "volatile") 786 Arg[".is_volatile"] = Arg.getDocument()->getNode(true); 787 else if (Key == "pipe") 788 Arg[".is_pipe"] = Arg.getDocument()->getNode(true); 789 } 790 791 Args.push_back(Arg); 792 } 793 794 void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF, 795 unsigned &Offset, 796 msgpack::ArrayDocNode Args) { 797 auto &Func = MF.getFunction(); 798 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 799 800 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func); 801 if (!HiddenArgNumBytes) 802 return; 803 804 const Module *M = Func.getParent(); 805 auto &DL = M->getDataLayout(); 806 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 807 808 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); 809 810 if (HiddenArgNumBytes >= 8) 811 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, 812 Args); 813 if (HiddenArgNumBytes >= 16) 814 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, 815 Args); 816 if (HiddenArgNumBytes >= 24) 817 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, 818 Args); 819 820 auto Int8PtrTy = 821 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 822 823 if (HiddenArgNumBytes >= 32) { 824 // We forbid the use of features requiring hostcall when compiling OpenCL 825 // before code object V5, which makes the mutual exclusion between the 826 // "printf buffer" and "hostcall buffer" here sound. 827 if (M->getNamedMetadata("llvm.printf.fmts")) 828 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, 829 Args); 830 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) 831 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, 832 Args); 833 else 834 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 835 } 836 837 // Emit "default queue" and "completion action" arguments if enqueue kernel is 838 // used, otherwise emit dummy "none" arguments. 839 if (HiddenArgNumBytes >= 48) { 840 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 841 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, 842 Args); 843 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, 844 Args); 845 } else { 846 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 847 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 848 } 849 } 850 851 // Emit the pointer argument for multi-grid object. 852 if (HiddenArgNumBytes >= 56) { 853 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) { 854 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, 855 Args); 856 } else { 857 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 858 } 859 } 860 } 861 862 msgpack::MapDocNode 863 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, 864 const SIProgramInfo &ProgramInfo) const { 865 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 866 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 867 const Function &F = MF.getFunction(); 868 869 auto Kern = HSAMetadataDoc->getMapNode(); 870 871 Align MaxKernArgAlign; 872 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode( 873 STM.getKernArgSegmentSize(F, MaxKernArgAlign)); 874 Kern[".group_segment_fixed_size"] = 875 Kern.getDocument()->getNode(ProgramInfo.LDSSize); 876 Kern[".private_segment_fixed_size"] = 877 Kern.getDocument()->getNode(ProgramInfo.ScratchSize); 878 Kern[".uses_dynamic_stack"] = 879 Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack); 880 881 // FIXME: The metadata treats the minimum as 16? 882 Kern[".kernarg_segment_align"] = 883 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value()); 884 Kern[".wavefront_size"] = 885 Kern.getDocument()->getNode(STM.getWavefrontSize()); 886 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR); 887 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR); 888 889 // Only add AGPR count to metadata for supported devices 890 if (STM.hasMAIInsts()) { 891 Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR); 892 } 893 894 Kern[".max_flat_workgroup_size"] = 895 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); 896 Kern[".sgpr_spill_count"] = 897 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); 898 Kern[".vgpr_spill_count"] = 899 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs()); 900 901 return Kern; 902 } 903 904 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 905 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); 906 } 907 908 void MetadataStreamerV3::begin(const Module &Mod, 909 const IsaInfo::AMDGPUTargetID &TargetID) { 910 emitVersion(); 911 emitPrintf(Mod); 912 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); 913 } 914 915 void MetadataStreamerV3::end() { 916 std::string HSAMetadataString; 917 raw_string_ostream StrOS(HSAMetadataString); 918 HSAMetadataDoc->toYAML(StrOS); 919 920 if (DumpHSAMetadata) 921 dump(StrOS.str()); 922 if (VerifyHSAMetadata) 923 verify(StrOS.str()); 924 } 925 926 void MetadataStreamerV3::emitKernel(const MachineFunction &MF, 927 const SIProgramInfo &ProgramInfo) { 928 auto &Func = MF.getFunction(); 929 auto Kern = getHSAKernelProps(MF, ProgramInfo); 930 931 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || 932 Func.getCallingConv() == CallingConv::SPIR_KERNEL); 933 934 auto Kernels = 935 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true); 936 937 { 938 Kern[".name"] = Kern.getDocument()->getNode(Func.getName()); 939 Kern[".symbol"] = Kern.getDocument()->getNode( 940 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true); 941 emitKernelLanguage(Func, Kern); 942 emitKernelAttrs(Func, Kern); 943 emitKernelArgs(MF, Kern); 944 } 945 946 Kernels.push_back(Kern); 947 } 948 949 //===----------------------------------------------------------------------===// 950 // HSAMetadataStreamerV4 951 //===----------------------------------------------------------------------===// 952 953 void MetadataStreamerV4::emitVersion() { 954 auto Version = HSAMetadataDoc->getArrayNode(); 955 Version.push_back(Version.getDocument()->getNode(VersionMajorV4)); 956 Version.push_back(Version.getDocument()->getNode(VersionMinorV4)); 957 getRootMetadata("amdhsa.version") = Version; 958 } 959 960 void MetadataStreamerV4::emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID) { 961 getRootMetadata("amdhsa.target") = 962 HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true); 963 } 964 965 void MetadataStreamerV4::begin(const Module &Mod, 966 const IsaInfo::AMDGPUTargetID &TargetID) { 967 emitVersion(); 968 emitTargetID(TargetID); 969 emitPrintf(Mod); 970 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); 971 } 972 973 //===----------------------------------------------------------------------===// 974 // HSAMetadataStreamerV5 975 //===----------------------------------------------------------------------===// 976 977 void MetadataStreamerV5::emitVersion() { 978 auto Version = HSAMetadataDoc->getArrayNode(); 979 Version.push_back(Version.getDocument()->getNode(VersionMajorV5)); 980 Version.push_back(Version.getDocument()->getNode(VersionMinorV5)); 981 getRootMetadata("amdhsa.version") = Version; 982 } 983 984 void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF, 985 unsigned &Offset, 986 msgpack::ArrayDocNode Args) { 987 auto &Func = MF.getFunction(); 988 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 989 990 // No implicit kernel argument is used. 991 if (ST.getImplicitArgNumBytes(Func) == 0) 992 return; 993 994 const Module *M = Func.getParent(); 995 auto &DL = M->getDataLayout(); 996 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 997 998 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 999 auto Int32Ty = Type::getInt32Ty(Func.getContext()); 1000 auto Int16Ty = Type::getInt16Ty(Func.getContext()); 1001 1002 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); 1003 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args); 1004 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args); 1005 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args); 1006 1007 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args); 1008 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args); 1009 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args); 1010 1011 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args); 1012 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args); 1013 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args); 1014 1015 // Reserved for hidden_tool_correlation_id. 1016 Offset += 8; 1017 1018 Offset += 8; // Reserved. 1019 1020 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args); 1021 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args); 1022 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args); 1023 1024 emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args); 1025 1026 Offset += 6; // Reserved. 1027 auto Int8PtrTy = 1028 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 1029 1030 if (M->getNamedMetadata("llvm.printf.fmts")) { 1031 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, 1032 Args); 1033 } else { 1034 Offset += 8; // Skipped. 1035 } 1036 1037 if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) { 1038 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, 1039 Args); 1040 } else { 1041 Offset += 8; // Skipped. 1042 } 1043 1044 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) { 1045 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, 1046 Args); 1047 } else { 1048 Offset += 8; // Skipped. 1049 } 1050 1051 if (!Func.hasFnAttribute("amdgpu-no-heap-ptr")) 1052 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args); 1053 else 1054 Offset += 8; // Skipped. 1055 1056 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 1057 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, 1058 Args); 1059 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, 1060 Args); 1061 } else { 1062 Offset += 16; // Skipped. 1063 } 1064 1065 Offset += 72; // Reserved. 1066 1067 // hidden_private_base and hidden_shared_base are only when the subtarget has 1068 // ApertureRegs. 1069 if (!ST.hasApertureRegs()) { 1070 emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args); 1071 emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args); 1072 } else { 1073 Offset += 8; // Skipped. 1074 } 1075 1076 if (MFI.hasQueuePtr()) 1077 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args); 1078 } 1079 1080 } // end namespace HSAMD 1081 } // end namespace AMDGPU 1082 } // end namespace llvm 1083