10b57cec5SDimitry Andric //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// AMDGPU HSA Metadata Streamer. 110b57cec5SDimitry Andric /// 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "AMDGPUHSAMetadataStreamer.h" 160b57cec5SDimitry Andric #include "AMDGPU.h" 17e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 180b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUTargetStreamer.h" 190b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 200b57cec5SDimitry Andric #include "SIProgramInfo.h" 210b57cec5SDimitry Andric #include "llvm/IR/Module.h" 22e8d8bef9SDimitry Andric using namespace llvm; 23e8d8bef9SDimitry Andric 24e8d8bef9SDimitry Andric static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg, 25e8d8bef9SDimitry Andric const DataLayout &DL) { 26e8d8bef9SDimitry Andric Type *Ty = Arg.getType(); 27e8d8bef9SDimitry Andric MaybeAlign ArgAlign; 28e8d8bef9SDimitry Andric if (Arg.hasByRefAttr()) { 29e8d8bef9SDimitry Andric Ty = Arg.getParamByRefType(); 30e8d8bef9SDimitry Andric ArgAlign = Arg.getParamAlign(); 31e8d8bef9SDimitry Andric } 32e8d8bef9SDimitry Andric 33e8d8bef9SDimitry Andric if (!ArgAlign) 34e8d8bef9SDimitry Andric ArgAlign = DL.getABITypeAlign(Ty); 35e8d8bef9SDimitry Andric 36*bdd1243dSDimitry Andric return std::pair(Ty, *ArgAlign); 37e8d8bef9SDimitry Andric } 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric namespace llvm { 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric static cl::opt<bool> DumpHSAMetadata( 420b57cec5SDimitry Andric "amdgpu-dump-hsa-metadata", 430b57cec5SDimitry Andric cl::desc("Dump AMDGPU HSA Metadata")); 440b57cec5SDimitry Andric static cl::opt<bool> VerifyHSAMetadata( 450b57cec5SDimitry Andric "amdgpu-verify-hsa-metadata", 460b57cec5SDimitry Andric cl::desc("Verify AMDGPU HSA Metadata")); 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric namespace AMDGPU { 490b57cec5SDimitry Andric namespace HSAMD { 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 520b57cec5SDimitry Andric // HSAMetadataStreamerV2 530b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 54*bdd1243dSDimitry Andric void MetadataStreamerYamlV2::dump(StringRef HSAMetadataString) const { 550b57cec5SDimitry Andric errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 560b57cec5SDimitry Andric } 570b57cec5SDimitry Andric 58*bdd1243dSDimitry Andric void MetadataStreamerYamlV2::verify(StringRef HSAMetadataString) const { 590b57cec5SDimitry Andric errs() << "AMDGPU HSA Metadata Parser Test: "; 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric HSAMD::Metadata FromHSAMetadataString; 62e8d8bef9SDimitry Andric if (fromString(HSAMetadataString, FromHSAMetadataString)) { 630b57cec5SDimitry Andric errs() << "FAIL\n"; 640b57cec5SDimitry Andric return; 650b57cec5SDimitry Andric } 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric std::string ToHSAMetadataString; 680b57cec5SDimitry Andric if (toString(FromHSAMetadataString, ToHSAMetadataString)) { 690b57cec5SDimitry Andric errs() << "FAIL\n"; 700b57cec5SDimitry Andric return; 710b57cec5SDimitry Andric } 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") 740b57cec5SDimitry Andric << '\n'; 750b57cec5SDimitry Andric if (HSAMetadataString != ToHSAMetadataString) { 760b57cec5SDimitry Andric errs() << "Original input: " << HSAMetadataString << '\n' 770b57cec5SDimitry Andric << "Produced output: " << ToHSAMetadataString << '\n'; 780b57cec5SDimitry Andric } 790b57cec5SDimitry Andric } 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric AccessQualifier 82*bdd1243dSDimitry Andric MetadataStreamerYamlV2::getAccessQualifier(StringRef AccQual) const { 830b57cec5SDimitry Andric if (AccQual.empty()) 840b57cec5SDimitry Andric return AccessQualifier::Unknown; 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric return StringSwitch<AccessQualifier>(AccQual) 870b57cec5SDimitry Andric .Case("read_only", AccessQualifier::ReadOnly) 880b57cec5SDimitry Andric .Case("write_only", AccessQualifier::WriteOnly) 890b57cec5SDimitry Andric .Case("read_write", AccessQualifier::ReadWrite) 900b57cec5SDimitry Andric .Default(AccessQualifier::Default); 910b57cec5SDimitry Andric } 920b57cec5SDimitry Andric 930b57cec5SDimitry Andric AddressSpaceQualifier 94*bdd1243dSDimitry Andric MetadataStreamerYamlV2::getAddressSpaceQualifier(unsigned AddressSpace) const { 950b57cec5SDimitry Andric switch (AddressSpace) { 960b57cec5SDimitry Andric case AMDGPUAS::PRIVATE_ADDRESS: 970b57cec5SDimitry Andric return AddressSpaceQualifier::Private; 980b57cec5SDimitry Andric case AMDGPUAS::GLOBAL_ADDRESS: 990b57cec5SDimitry Andric return AddressSpaceQualifier::Global; 1000b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_ADDRESS: 1010b57cec5SDimitry Andric return AddressSpaceQualifier::Constant; 1020b57cec5SDimitry Andric case AMDGPUAS::LOCAL_ADDRESS: 1030b57cec5SDimitry Andric return AddressSpaceQualifier::Local; 1040b57cec5SDimitry Andric case AMDGPUAS::FLAT_ADDRESS: 1050b57cec5SDimitry Andric return AddressSpaceQualifier::Generic; 1060b57cec5SDimitry Andric case AMDGPUAS::REGION_ADDRESS: 1070b57cec5SDimitry Andric return AddressSpaceQualifier::Region; 1080b57cec5SDimitry Andric default: 1090b57cec5SDimitry Andric return AddressSpaceQualifier::Unknown; 1100b57cec5SDimitry Andric } 1110b57cec5SDimitry Andric } 1120b57cec5SDimitry Andric 113*bdd1243dSDimitry Andric ValueKind MetadataStreamerYamlV2::getValueKind(Type *Ty, StringRef TypeQual, 1140b57cec5SDimitry Andric StringRef BaseTypeName) const { 115349cc55cSDimitry Andric if (TypeQual.contains("pipe")) 1160b57cec5SDimitry Andric return ValueKind::Pipe; 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andric return StringSwitch<ValueKind>(BaseTypeName) 1190b57cec5SDimitry Andric .Case("image1d_t", ValueKind::Image) 1200b57cec5SDimitry Andric .Case("image1d_array_t", ValueKind::Image) 1210b57cec5SDimitry Andric .Case("image1d_buffer_t", ValueKind::Image) 1220b57cec5SDimitry Andric .Case("image2d_t", ValueKind::Image) 1230b57cec5SDimitry Andric .Case("image2d_array_t", ValueKind::Image) 1240b57cec5SDimitry Andric .Case("image2d_array_depth_t", ValueKind::Image) 1250b57cec5SDimitry Andric .Case("image2d_array_msaa_t", ValueKind::Image) 1260b57cec5SDimitry Andric .Case("image2d_array_msaa_depth_t", ValueKind::Image) 1270b57cec5SDimitry Andric .Case("image2d_depth_t", ValueKind::Image) 1280b57cec5SDimitry Andric .Case("image2d_msaa_t", ValueKind::Image) 1290b57cec5SDimitry Andric .Case("image2d_msaa_depth_t", ValueKind::Image) 1300b57cec5SDimitry Andric .Case("image3d_t", ValueKind::Image) 1310b57cec5SDimitry Andric .Case("sampler_t", ValueKind::Sampler) 1320b57cec5SDimitry Andric .Case("queue_t", ValueKind::Queue) 1330b57cec5SDimitry Andric .Default(isa<PointerType>(Ty) ? 1340b57cec5SDimitry Andric (Ty->getPointerAddressSpace() == 1350b57cec5SDimitry Andric AMDGPUAS::LOCAL_ADDRESS ? 1360b57cec5SDimitry Andric ValueKind::DynamicSharedPointer : 1370b57cec5SDimitry Andric ValueKind::GlobalBuffer) : 1380b57cec5SDimitry Andric ValueKind::ByValue); 1390b57cec5SDimitry Andric } 1400b57cec5SDimitry Andric 141*bdd1243dSDimitry Andric std::string MetadataStreamerYamlV2::getTypeName(Type *Ty, bool Signed) const { 1420b57cec5SDimitry Andric switch (Ty->getTypeID()) { 1430b57cec5SDimitry Andric case Type::IntegerTyID: { 1440b57cec5SDimitry Andric if (!Signed) 1450b57cec5SDimitry Andric return (Twine('u') + getTypeName(Ty, true)).str(); 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric auto BitWidth = Ty->getIntegerBitWidth(); 1480b57cec5SDimitry Andric switch (BitWidth) { 1490b57cec5SDimitry Andric case 8: 1500b57cec5SDimitry Andric return "char"; 1510b57cec5SDimitry Andric case 16: 1520b57cec5SDimitry Andric return "short"; 1530b57cec5SDimitry Andric case 32: 1540b57cec5SDimitry Andric return "int"; 1550b57cec5SDimitry Andric case 64: 1560b57cec5SDimitry Andric return "long"; 1570b57cec5SDimitry Andric default: 1580b57cec5SDimitry Andric return (Twine('i') + Twine(BitWidth)).str(); 1590b57cec5SDimitry Andric } 1600b57cec5SDimitry Andric } 1610b57cec5SDimitry Andric case Type::HalfTyID: 1620b57cec5SDimitry Andric return "half"; 1630b57cec5SDimitry Andric case Type::FloatTyID: 1640b57cec5SDimitry Andric return "float"; 1650b57cec5SDimitry Andric case Type::DoubleTyID: 1660b57cec5SDimitry Andric return "double"; 1675ffd83dbSDimitry Andric case Type::FixedVectorTyID: { 1685ffd83dbSDimitry Andric auto VecTy = cast<FixedVectorType>(Ty); 1690b57cec5SDimitry Andric auto ElTy = VecTy->getElementType(); 1705ffd83dbSDimitry Andric auto NumElements = VecTy->getNumElements(); 1710b57cec5SDimitry Andric return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 1720b57cec5SDimitry Andric } 1730b57cec5SDimitry Andric default: 1740b57cec5SDimitry Andric return "unknown"; 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric } 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andric std::vector<uint32_t> 179*bdd1243dSDimitry Andric MetadataStreamerYamlV2::getWorkGroupDimensions(MDNode *Node) const { 1800b57cec5SDimitry Andric std::vector<uint32_t> Dims; 1810b57cec5SDimitry Andric if (Node->getNumOperands() != 3) 1820b57cec5SDimitry Andric return Dims; 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric for (auto &Op : Node->operands()) 1850b57cec5SDimitry Andric Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); 1860b57cec5SDimitry Andric return Dims; 1870b57cec5SDimitry Andric } 1880b57cec5SDimitry Andric 189*bdd1243dSDimitry Andric Kernel::CodeProps::Metadata MetadataStreamerYamlV2::getHSACodeProps( 190*bdd1243dSDimitry Andric const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { 1910b57cec5SDimitry Andric const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 1920b57cec5SDimitry Andric const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 1930b57cec5SDimitry Andric HSAMD::Kernel::CodeProps::Metadata HSACodeProps; 1940b57cec5SDimitry Andric const Function &F = MF.getFunction(); 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || 1970b57cec5SDimitry Andric F.getCallingConv() == CallingConv::SPIR_KERNEL); 1980b57cec5SDimitry Andric 1998bcb0991SDimitry Andric Align MaxKernArgAlign; 2000b57cec5SDimitry Andric HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, 2010b57cec5SDimitry Andric MaxKernArgAlign); 2028bcb0991SDimitry Andric HSACodeProps.mKernargSegmentAlign = 2038bcb0991SDimitry Andric std::max(MaxKernArgAlign, Align(4)).value(); 204349cc55cSDimitry Andric 205349cc55cSDimitry Andric HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; 206349cc55cSDimitry Andric HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; 2070b57cec5SDimitry Andric HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); 2080b57cec5SDimitry Andric HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; 2090b57cec5SDimitry Andric HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; 2100b57cec5SDimitry Andric HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); 2110b57cec5SDimitry Andric HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; 2120b57cec5SDimitry Andric HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); 2130b57cec5SDimitry Andric HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); 2140b57cec5SDimitry Andric HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric return HSACodeProps; 2170b57cec5SDimitry Andric } 2180b57cec5SDimitry Andric 219*bdd1243dSDimitry Andric Kernel::DebugProps::Metadata MetadataStreamerYamlV2::getHSADebugProps( 220*bdd1243dSDimitry Andric const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { 2210b57cec5SDimitry Andric return HSAMD::Kernel::DebugProps::Metadata(); 2220b57cec5SDimitry Andric } 2230b57cec5SDimitry Andric 224*bdd1243dSDimitry Andric void MetadataStreamerYamlV2::emitVersion() { 2250b57cec5SDimitry Andric auto &Version = HSAMetadata.mVersion; 2260b57cec5SDimitry Andric 227fe6060f1SDimitry Andric Version.push_back(VersionMajorV2); 228fe6060f1SDimitry Andric Version.push_back(VersionMinorV2); 2290b57cec5SDimitry Andric } 2300b57cec5SDimitry Andric 231*bdd1243dSDimitry Andric void MetadataStreamerYamlV2::emitPrintf(const Module &Mod) { 2320b57cec5SDimitry Andric auto &Printf = HSAMetadata.mPrintf; 2330b57cec5SDimitry Andric 2340b57cec5SDimitry Andric auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 2350b57cec5SDimitry Andric if (!Node) 2360b57cec5SDimitry Andric return; 2370b57cec5SDimitry Andric 238*bdd1243dSDimitry Andric for (auto *Op : Node->operands()) 2390b57cec5SDimitry Andric if (Op->getNumOperands()) 2405ffd83dbSDimitry Andric Printf.push_back( 2415ffd83dbSDimitry Andric std::string(cast<MDString>(Op->getOperand(0))->getString())); 2420b57cec5SDimitry Andric } 2430b57cec5SDimitry Andric 244*bdd1243dSDimitry Andric void MetadataStreamerYamlV2::emitKernelLanguage(const Function &Func) { 2450b57cec5SDimitry Andric auto &Kernel = HSAMetadata.mKernels.back(); 2460b57cec5SDimitry Andric 2470b57cec5SDimitry Andric // TODO: What about other languages? 2480b57cec5SDimitry Andric auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 2490b57cec5SDimitry Andric if (!Node || !Node->getNumOperands()) 2500b57cec5SDimitry Andric return; 2510b57cec5SDimitry Andric auto Op0 = Node->getOperand(0); 2520b57cec5SDimitry Andric if (Op0->getNumOperands() <= 1) 2530b57cec5SDimitry Andric return; 2540b57cec5SDimitry Andric 2550b57cec5SDimitry Andric Kernel.mLanguage = "OpenCL C"; 2560b57cec5SDimitry Andric Kernel.mLanguageVersion.push_back( 2570b57cec5SDimitry Andric mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); 2580b57cec5SDimitry Andric Kernel.mLanguageVersion.push_back( 2590b57cec5SDimitry Andric mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); 2600b57cec5SDimitry Andric } 2610b57cec5SDimitry Andric 262*bdd1243dSDimitry Andric void MetadataStreamerYamlV2::emitKernelAttrs(const Function &Func) { 2630b57cec5SDimitry Andric auto &Attrs = HSAMetadata.mKernels.back().mAttrs; 2640b57cec5SDimitry Andric 2650b57cec5SDimitry Andric if (auto Node = Func.getMetadata("reqd_work_group_size")) 2660b57cec5SDimitry Andric Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); 2670b57cec5SDimitry Andric if (auto Node = Func.getMetadata("work_group_size_hint")) 2680b57cec5SDimitry Andric Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); 2690b57cec5SDimitry Andric if (auto Node = Func.getMetadata("vec_type_hint")) { 2700b57cec5SDimitry Andric Attrs.mVecTypeHint = getTypeName( 2710b57cec5SDimitry Andric cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 2720b57cec5SDimitry Andric mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); 2730b57cec5SDimitry Andric } 2740b57cec5SDimitry Andric if (Func.hasFnAttribute("runtime-handle")) { 2750b57cec5SDimitry Andric Attrs.mRuntimeHandle = 2760b57cec5SDimitry Andric Func.getFnAttribute("runtime-handle").getValueAsString().str(); 2770b57cec5SDimitry Andric } 2780b57cec5SDimitry Andric } 2790b57cec5SDimitry Andric 280*bdd1243dSDimitry Andric void MetadataStreamerYamlV2::emitKernelArgs(const Function &Func, 2810eae32dcSDimitry Andric const GCNSubtarget &ST) { 2820b57cec5SDimitry Andric for (auto &Arg : Func.args()) 2830b57cec5SDimitry Andric emitKernelArg(Arg); 2840b57cec5SDimitry Andric 2850eae32dcSDimitry Andric emitHiddenKernelArgs(Func, ST); 2860b57cec5SDimitry Andric } 2870b57cec5SDimitry Andric 288*bdd1243dSDimitry Andric void MetadataStreamerYamlV2::emitKernelArg(const Argument &Arg) { 2890b57cec5SDimitry Andric auto Func = Arg.getParent(); 2900b57cec5SDimitry Andric auto ArgNo = Arg.getArgNo(); 2910b57cec5SDimitry Andric const MDNode *Node; 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric StringRef Name; 2940b57cec5SDimitry Andric Node = Func->getMetadata("kernel_arg_name"); 2950b57cec5SDimitry Andric if (Node && ArgNo < Node->getNumOperands()) 2960b57cec5SDimitry Andric Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 2970b57cec5SDimitry Andric else if (Arg.hasName()) 2980b57cec5SDimitry Andric Name = Arg.getName(); 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andric StringRef TypeName; 3010b57cec5SDimitry Andric Node = Func->getMetadata("kernel_arg_type"); 3020b57cec5SDimitry Andric if (Node && ArgNo < Node->getNumOperands()) 3030b57cec5SDimitry Andric TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric StringRef BaseTypeName; 3060b57cec5SDimitry Andric Node = Func->getMetadata("kernel_arg_base_type"); 3070b57cec5SDimitry Andric if (Node && ArgNo < Node->getNumOperands()) 3080b57cec5SDimitry Andric BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 3090b57cec5SDimitry Andric 3100b57cec5SDimitry Andric StringRef AccQual; 3110b57cec5SDimitry Andric if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 3120b57cec5SDimitry Andric Arg.hasNoAliasAttr()) { 3130b57cec5SDimitry Andric AccQual = "read_only"; 3140b57cec5SDimitry Andric } else { 3150b57cec5SDimitry Andric Node = Func->getMetadata("kernel_arg_access_qual"); 3160b57cec5SDimitry Andric if (Node && ArgNo < Node->getNumOperands()) 3170b57cec5SDimitry Andric AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 3180b57cec5SDimitry Andric } 3190b57cec5SDimitry Andric 3200b57cec5SDimitry Andric StringRef TypeQual; 3210b57cec5SDimitry Andric Node = Func->getMetadata("kernel_arg_type_qual"); 3220b57cec5SDimitry Andric if (Node && ArgNo < Node->getNumOperands()) 3230b57cec5SDimitry Andric TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric const DataLayout &DL = Func->getParent()->getDataLayout(); 3260b57cec5SDimitry Andric 3275ffd83dbSDimitry Andric MaybeAlign PointeeAlign; 328e8d8bef9SDimitry Andric if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) { 3290b57cec5SDimitry Andric if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 330e8d8bef9SDimitry Andric // FIXME: Should report this for all address spaces 33104eeddc0SDimitry Andric PointeeAlign = Arg.getParamAlign().valueOrOne(); 3320b57cec5SDimitry Andric } 3330b57cec5SDimitry Andric } 3340b57cec5SDimitry Andric 335e8d8bef9SDimitry Andric Type *ArgTy; 336e8d8bef9SDimitry Andric Align ArgAlign; 337e8d8bef9SDimitry Andric std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL); 338e8d8bef9SDimitry Andric 339e8d8bef9SDimitry Andric emitKernelArg(DL, ArgTy, ArgAlign, 340e8d8bef9SDimitry Andric getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name, 341e8d8bef9SDimitry Andric TypeName, BaseTypeName, AccQual, TypeQual); 3420b57cec5SDimitry Andric } 3430b57cec5SDimitry Andric 344*bdd1243dSDimitry Andric void MetadataStreamerYamlV2::emitKernelArg( 345*bdd1243dSDimitry Andric const DataLayout &DL, Type *Ty, Align Alignment, ValueKind ValueKind, 346*bdd1243dSDimitry Andric MaybeAlign PointeeAlign, StringRef Name, StringRef TypeName, 347*bdd1243dSDimitry Andric StringRef BaseTypeName, StringRef AccQual, StringRef TypeQual) { 3480b57cec5SDimitry Andric HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); 3490b57cec5SDimitry Andric auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); 3500b57cec5SDimitry Andric 3515ffd83dbSDimitry Andric Arg.mName = std::string(Name); 3525ffd83dbSDimitry Andric Arg.mTypeName = std::string(TypeName); 3530b57cec5SDimitry Andric Arg.mSize = DL.getTypeAllocSize(Ty); 354e8d8bef9SDimitry Andric Arg.mAlign = Alignment.value(); 3550b57cec5SDimitry Andric Arg.mValueKind = ValueKind; 3565ffd83dbSDimitry Andric Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0; 3570b57cec5SDimitry Andric 3580b57cec5SDimitry Andric if (auto PtrTy = dyn_cast<PointerType>(Ty)) 3590b57cec5SDimitry Andric Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); 3600b57cec5SDimitry Andric 3610b57cec5SDimitry Andric Arg.mAccQual = getAccessQualifier(AccQual); 3620b57cec5SDimitry Andric 3630b57cec5SDimitry Andric // TODO: Emit Arg.mActualAccQual. 3640b57cec5SDimitry Andric 3650b57cec5SDimitry Andric SmallVector<StringRef, 1> SplitTypeQuals; 3660b57cec5SDimitry Andric TypeQual.split(SplitTypeQuals, " ", -1, false); 3670b57cec5SDimitry Andric for (StringRef Key : SplitTypeQuals) { 3680b57cec5SDimitry Andric auto P = StringSwitch<bool*>(Key) 3690b57cec5SDimitry Andric .Case("const", &Arg.mIsConst) 3700b57cec5SDimitry Andric .Case("restrict", &Arg.mIsRestrict) 3710b57cec5SDimitry Andric .Case("volatile", &Arg.mIsVolatile) 3720b57cec5SDimitry Andric .Case("pipe", &Arg.mIsPipe) 3730b57cec5SDimitry Andric .Default(nullptr); 3740b57cec5SDimitry Andric if (P) 3750b57cec5SDimitry Andric *P = true; 3760b57cec5SDimitry Andric } 3770b57cec5SDimitry Andric } 3780b57cec5SDimitry Andric 379*bdd1243dSDimitry Andric void MetadataStreamerYamlV2::emitHiddenKernelArgs(const Function &Func, 3800eae32dcSDimitry Andric const GCNSubtarget &ST) { 3810eae32dcSDimitry Andric unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func); 3820b57cec5SDimitry Andric if (!HiddenArgNumBytes) 3830b57cec5SDimitry Andric return; 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric auto &DL = Func.getParent()->getDataLayout(); 3860b57cec5SDimitry Andric auto Int64Ty = Type::getInt64Ty(Func.getContext()); 3870b57cec5SDimitry Andric 3880b57cec5SDimitry Andric if (HiddenArgNumBytes >= 8) 389e8d8bef9SDimitry Andric emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX); 3900b57cec5SDimitry Andric if (HiddenArgNumBytes >= 16) 391e8d8bef9SDimitry Andric emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY); 3920b57cec5SDimitry Andric if (HiddenArgNumBytes >= 24) 393e8d8bef9SDimitry Andric emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ); 3940b57cec5SDimitry Andric 3950b57cec5SDimitry Andric auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), 3960b57cec5SDimitry Andric AMDGPUAS::GLOBAL_ADDRESS); 3970b57cec5SDimitry Andric 3980b57cec5SDimitry Andric if (HiddenArgNumBytes >= 32) { 39981ad6265SDimitry Andric // We forbid the use of features requiring hostcall when compiling OpenCL 40081ad6265SDimitry Andric // before code object V5, which makes the mutual exclusion between the 40181ad6265SDimitry Andric // "printf buffer" and "hostcall buffer" here sound. 4020b57cec5SDimitry Andric if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 403e8d8bef9SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer); 40481ad6265SDimitry Andric else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) 405e8d8bef9SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer); 40681ad6265SDimitry Andric else 407e8d8bef9SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 4080b57cec5SDimitry Andric } 4090b57cec5SDimitry Andric 4100b57cec5SDimitry Andric // Emit "default queue" and "completion action" arguments if enqueue kernel is 4110b57cec5SDimitry Andric // used, otherwise emit dummy "none" arguments. 412*bdd1243dSDimitry Andric if (HiddenArgNumBytes >= 40) { 413*bdd1243dSDimitry Andric if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { 414e8d8bef9SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue); 4150b57cec5SDimitry Andric } else { 416e8d8bef9SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 417*bdd1243dSDimitry Andric } 418*bdd1243dSDimitry Andric } 419*bdd1243dSDimitry Andric 420*bdd1243dSDimitry Andric if (HiddenArgNumBytes >= 48) { 421*bdd1243dSDimitry Andric if (!Func.hasFnAttribute("amdgpu-no-completion-action") && 422*bdd1243dSDimitry Andric // FIXME: Hack for runtime bug if we fail to optimize this out 423*bdd1243dSDimitry Andric Func.hasFnAttribute("calls-enqueue-kernel")) { 424*bdd1243dSDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction); 425*bdd1243dSDimitry Andric } else { 426e8d8bef9SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 4270b57cec5SDimitry Andric } 4280b57cec5SDimitry Andric } 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric // Emit the pointer argument for multi-grid object. 43181ad6265SDimitry Andric if (HiddenArgNumBytes >= 56) { 43281ad6265SDimitry Andric if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) 433e8d8bef9SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg); 43481ad6265SDimitry Andric else 43581ad6265SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 43681ad6265SDimitry Andric } 4370b57cec5SDimitry Andric } 4380b57cec5SDimitry Andric 439*bdd1243dSDimitry Andric bool MetadataStreamerYamlV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 4400b57cec5SDimitry Andric return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); 4410b57cec5SDimitry Andric } 4420b57cec5SDimitry Andric 443*bdd1243dSDimitry Andric void MetadataStreamerYamlV2::begin(const Module &Mod, 444fe6060f1SDimitry Andric const IsaInfo::AMDGPUTargetID &TargetID) { 4450b57cec5SDimitry Andric emitVersion(); 4460b57cec5SDimitry Andric emitPrintf(Mod); 4470b57cec5SDimitry Andric } 4480b57cec5SDimitry Andric 449*bdd1243dSDimitry Andric void MetadataStreamerYamlV2::end() { 4500b57cec5SDimitry Andric std::string HSAMetadataString; 4510b57cec5SDimitry Andric if (toString(HSAMetadata, HSAMetadataString)) 4520b57cec5SDimitry Andric return; 4530b57cec5SDimitry Andric 4540b57cec5SDimitry Andric if (DumpHSAMetadata) 4550b57cec5SDimitry Andric dump(HSAMetadataString); 4560b57cec5SDimitry Andric if (VerifyHSAMetadata) 4570b57cec5SDimitry Andric verify(HSAMetadataString); 4580b57cec5SDimitry Andric } 4590b57cec5SDimitry Andric 460*bdd1243dSDimitry Andric void MetadataStreamerYamlV2::emitKernel(const MachineFunction &MF, 4610b57cec5SDimitry Andric const SIProgramInfo &ProgramInfo) { 4620b57cec5SDimitry Andric auto &Func = MF.getFunction(); 4630b57cec5SDimitry Andric if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) 4640b57cec5SDimitry Andric return; 4650b57cec5SDimitry Andric 4660b57cec5SDimitry Andric auto CodeProps = getHSACodeProps(MF, ProgramInfo); 4670b57cec5SDimitry Andric auto DebugProps = getHSADebugProps(MF, ProgramInfo); 4680b57cec5SDimitry Andric 4690b57cec5SDimitry Andric HSAMetadata.mKernels.push_back(Kernel::Metadata()); 4700b57cec5SDimitry Andric auto &Kernel = HSAMetadata.mKernels.back(); 4710b57cec5SDimitry Andric 4720eae32dcSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 4735ffd83dbSDimitry Andric Kernel.mName = std::string(Func.getName()); 4740b57cec5SDimitry Andric Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); 4750b57cec5SDimitry Andric emitKernelLanguage(Func); 4760b57cec5SDimitry Andric emitKernelAttrs(Func); 4770eae32dcSDimitry Andric emitKernelArgs(Func, ST); 4780b57cec5SDimitry Andric HSAMetadata.mKernels.back().mCodeProps = CodeProps; 4790b57cec5SDimitry Andric HSAMetadata.mKernels.back().mDebugProps = DebugProps; 4800b57cec5SDimitry Andric } 4810b57cec5SDimitry Andric 4820b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 4830b57cec5SDimitry Andric // HSAMetadataStreamerV3 4840b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 4850b57cec5SDimitry Andric 486*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV3::dump(StringRef HSAMetadataString) const { 4870b57cec5SDimitry Andric errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 4880b57cec5SDimitry Andric } 4890b57cec5SDimitry Andric 490*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const { 4910b57cec5SDimitry Andric errs() << "AMDGPU HSA Metadata Parser Test: "; 4920b57cec5SDimitry Andric 4930b57cec5SDimitry Andric msgpack::Document FromHSAMetadataString; 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) { 4960b57cec5SDimitry Andric errs() << "FAIL\n"; 4970b57cec5SDimitry Andric return; 4980b57cec5SDimitry Andric } 4990b57cec5SDimitry Andric 5000b57cec5SDimitry Andric std::string ToHSAMetadataString; 5010b57cec5SDimitry Andric raw_string_ostream StrOS(ToHSAMetadataString); 5020b57cec5SDimitry Andric FromHSAMetadataString.toYAML(StrOS); 5030b57cec5SDimitry Andric 5040b57cec5SDimitry Andric errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; 5050b57cec5SDimitry Andric if (HSAMetadataString != ToHSAMetadataString) { 5060b57cec5SDimitry Andric errs() << "Original input: " << HSAMetadataString << '\n' 5070b57cec5SDimitry Andric << "Produced output: " << StrOS.str() << '\n'; 5080b57cec5SDimitry Andric } 5090b57cec5SDimitry Andric } 5100b57cec5SDimitry Andric 511*bdd1243dSDimitry Andric std::optional<StringRef> 512*bdd1243dSDimitry Andric MetadataStreamerMsgPackV3::getAccessQualifier(StringRef AccQual) const { 513*bdd1243dSDimitry Andric return StringSwitch<std::optional<StringRef>>(AccQual) 5140b57cec5SDimitry Andric .Case("read_only", StringRef("read_only")) 5150b57cec5SDimitry Andric .Case("write_only", StringRef("write_only")) 5160b57cec5SDimitry Andric .Case("read_write", StringRef("read_write")) 517*bdd1243dSDimitry Andric .Default(std::nullopt); 5180b57cec5SDimitry Andric } 5190b57cec5SDimitry Andric 520*bdd1243dSDimitry Andric std::optional<StringRef> MetadataStreamerMsgPackV3::getAddressSpaceQualifier( 521*bdd1243dSDimitry Andric unsigned AddressSpace) const { 5220b57cec5SDimitry Andric switch (AddressSpace) { 5230b57cec5SDimitry Andric case AMDGPUAS::PRIVATE_ADDRESS: 5240b57cec5SDimitry Andric return StringRef("private"); 5250b57cec5SDimitry Andric case AMDGPUAS::GLOBAL_ADDRESS: 5260b57cec5SDimitry Andric return StringRef("global"); 5270b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_ADDRESS: 5280b57cec5SDimitry Andric return StringRef("constant"); 5290b57cec5SDimitry Andric case AMDGPUAS::LOCAL_ADDRESS: 5300b57cec5SDimitry Andric return StringRef("local"); 5310b57cec5SDimitry Andric case AMDGPUAS::FLAT_ADDRESS: 5320b57cec5SDimitry Andric return StringRef("generic"); 5330b57cec5SDimitry Andric case AMDGPUAS::REGION_ADDRESS: 5340b57cec5SDimitry Andric return StringRef("region"); 5350b57cec5SDimitry Andric default: 536*bdd1243dSDimitry Andric return std::nullopt; 5370b57cec5SDimitry Andric } 5380b57cec5SDimitry Andric } 5390b57cec5SDimitry Andric 540*bdd1243dSDimitry Andric StringRef 541*bdd1243dSDimitry Andric MetadataStreamerMsgPackV3::getValueKind(Type *Ty, StringRef TypeQual, 5420b57cec5SDimitry Andric StringRef BaseTypeName) const { 543349cc55cSDimitry Andric if (TypeQual.contains("pipe")) 5440b57cec5SDimitry Andric return "pipe"; 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric return StringSwitch<StringRef>(BaseTypeName) 5470b57cec5SDimitry Andric .Case("image1d_t", "image") 5480b57cec5SDimitry Andric .Case("image1d_array_t", "image") 5490b57cec5SDimitry Andric .Case("image1d_buffer_t", "image") 5500b57cec5SDimitry Andric .Case("image2d_t", "image") 5510b57cec5SDimitry Andric .Case("image2d_array_t", "image") 5520b57cec5SDimitry Andric .Case("image2d_array_depth_t", "image") 5530b57cec5SDimitry Andric .Case("image2d_array_msaa_t", "image") 5540b57cec5SDimitry Andric .Case("image2d_array_msaa_depth_t", "image") 5550b57cec5SDimitry Andric .Case("image2d_depth_t", "image") 5560b57cec5SDimitry Andric .Case("image2d_msaa_t", "image") 5570b57cec5SDimitry Andric .Case("image2d_msaa_depth_t", "image") 5580b57cec5SDimitry Andric .Case("image3d_t", "image") 5590b57cec5SDimitry Andric .Case("sampler_t", "sampler") 5600b57cec5SDimitry Andric .Case("queue_t", "queue") 5610b57cec5SDimitry Andric .Default(isa<PointerType>(Ty) 5620b57cec5SDimitry Andric ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS 5630b57cec5SDimitry Andric ? "dynamic_shared_pointer" 5640b57cec5SDimitry Andric : "global_buffer") 5650b57cec5SDimitry Andric : "by_value"); 5660b57cec5SDimitry Andric } 5670b57cec5SDimitry Andric 568*bdd1243dSDimitry Andric std::string MetadataStreamerMsgPackV3::getTypeName(Type *Ty, 569*bdd1243dSDimitry Andric bool Signed) const { 5700b57cec5SDimitry Andric switch (Ty->getTypeID()) { 5710b57cec5SDimitry Andric case Type::IntegerTyID: { 5720b57cec5SDimitry Andric if (!Signed) 5730b57cec5SDimitry Andric return (Twine('u') + getTypeName(Ty, true)).str(); 5740b57cec5SDimitry Andric 5750b57cec5SDimitry Andric auto BitWidth = Ty->getIntegerBitWidth(); 5760b57cec5SDimitry Andric switch (BitWidth) { 5770b57cec5SDimitry Andric case 8: 5780b57cec5SDimitry Andric return "char"; 5790b57cec5SDimitry Andric case 16: 5800b57cec5SDimitry Andric return "short"; 5810b57cec5SDimitry Andric case 32: 5820b57cec5SDimitry Andric return "int"; 5830b57cec5SDimitry Andric case 64: 5840b57cec5SDimitry Andric return "long"; 5850b57cec5SDimitry Andric default: 5860b57cec5SDimitry Andric return (Twine('i') + Twine(BitWidth)).str(); 5870b57cec5SDimitry Andric } 5880b57cec5SDimitry Andric } 5890b57cec5SDimitry Andric case Type::HalfTyID: 5900b57cec5SDimitry Andric return "half"; 5910b57cec5SDimitry Andric case Type::FloatTyID: 5920b57cec5SDimitry Andric return "float"; 5930b57cec5SDimitry Andric case Type::DoubleTyID: 5940b57cec5SDimitry Andric return "double"; 5955ffd83dbSDimitry Andric case Type::FixedVectorTyID: { 5965ffd83dbSDimitry Andric auto VecTy = cast<FixedVectorType>(Ty); 5970b57cec5SDimitry Andric auto ElTy = VecTy->getElementType(); 5985ffd83dbSDimitry Andric auto NumElements = VecTy->getNumElements(); 5990b57cec5SDimitry Andric return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 6000b57cec5SDimitry Andric } 6010b57cec5SDimitry Andric default: 6020b57cec5SDimitry Andric return "unknown"; 6030b57cec5SDimitry Andric } 6040b57cec5SDimitry Andric } 6050b57cec5SDimitry Andric 6060b57cec5SDimitry Andric msgpack::ArrayDocNode 607*bdd1243dSDimitry Andric MetadataStreamerMsgPackV3::getWorkGroupDimensions(MDNode *Node) const { 6080b57cec5SDimitry Andric auto Dims = HSAMetadataDoc->getArrayNode(); 6090b57cec5SDimitry Andric if (Node->getNumOperands() != 3) 6100b57cec5SDimitry Andric return Dims; 6110b57cec5SDimitry Andric 6120b57cec5SDimitry Andric for (auto &Op : Node->operands()) 6130b57cec5SDimitry Andric Dims.push_back(Dims.getDocument()->getNode( 6140b57cec5SDimitry Andric uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue()))); 6150b57cec5SDimitry Andric return Dims; 6160b57cec5SDimitry Andric } 6170b57cec5SDimitry Andric 618*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV3::emitVersion() { 6190b57cec5SDimitry Andric auto Version = HSAMetadataDoc->getArrayNode(); 620fe6060f1SDimitry Andric Version.push_back(Version.getDocument()->getNode(VersionMajorV3)); 621fe6060f1SDimitry Andric Version.push_back(Version.getDocument()->getNode(VersionMinorV3)); 6220b57cec5SDimitry Andric getRootMetadata("amdhsa.version") = Version; 6230b57cec5SDimitry Andric } 6240b57cec5SDimitry Andric 625*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV3::emitPrintf(const Module &Mod) { 6260b57cec5SDimitry Andric auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 6270b57cec5SDimitry Andric if (!Node) 6280b57cec5SDimitry Andric return; 6290b57cec5SDimitry Andric 6300b57cec5SDimitry Andric auto Printf = HSAMetadataDoc->getArrayNode(); 631*bdd1243dSDimitry Andric for (auto *Op : Node->operands()) 6320b57cec5SDimitry Andric if (Op->getNumOperands()) 6330b57cec5SDimitry Andric Printf.push_back(Printf.getDocument()->getNode( 6340b57cec5SDimitry Andric cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true)); 6350b57cec5SDimitry Andric getRootMetadata("amdhsa.printf") = Printf; 6360b57cec5SDimitry Andric } 6370b57cec5SDimitry Andric 638*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV3::emitKernelLanguage(const Function &Func, 6390b57cec5SDimitry Andric msgpack::MapDocNode Kern) { 6400b57cec5SDimitry Andric // TODO: What about other languages? 6410b57cec5SDimitry Andric auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 6420b57cec5SDimitry Andric if (!Node || !Node->getNumOperands()) 6430b57cec5SDimitry Andric return; 6440b57cec5SDimitry Andric auto Op0 = Node->getOperand(0); 6450b57cec5SDimitry Andric if (Op0->getNumOperands() <= 1) 6460b57cec5SDimitry Andric return; 6470b57cec5SDimitry Andric 6480b57cec5SDimitry Andric Kern[".language"] = Kern.getDocument()->getNode("OpenCL C"); 6490b57cec5SDimitry Andric auto LanguageVersion = Kern.getDocument()->getArrayNode(); 6500b57cec5SDimitry Andric LanguageVersion.push_back(Kern.getDocument()->getNode( 6510b57cec5SDimitry Andric mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); 6520b57cec5SDimitry Andric LanguageVersion.push_back(Kern.getDocument()->getNode( 6530b57cec5SDimitry Andric mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); 6540b57cec5SDimitry Andric Kern[".language_version"] = LanguageVersion; 6550b57cec5SDimitry Andric } 6560b57cec5SDimitry Andric 657*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV3::emitKernelAttrs(const Function &Func, 6580b57cec5SDimitry Andric msgpack::MapDocNode Kern) { 6590b57cec5SDimitry Andric 6600b57cec5SDimitry Andric if (auto Node = Func.getMetadata("reqd_work_group_size")) 6610b57cec5SDimitry Andric Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); 6620b57cec5SDimitry Andric if (auto Node = Func.getMetadata("work_group_size_hint")) 6630b57cec5SDimitry Andric Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); 6640b57cec5SDimitry Andric if (auto Node = Func.getMetadata("vec_type_hint")) { 6650b57cec5SDimitry Andric Kern[".vec_type_hint"] = Kern.getDocument()->getNode( 6660b57cec5SDimitry Andric getTypeName( 6670b57cec5SDimitry Andric cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 6680b57cec5SDimitry Andric mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()), 6690b57cec5SDimitry Andric /*Copy=*/true); 6700b57cec5SDimitry Andric } 6710b57cec5SDimitry Andric if (Func.hasFnAttribute("runtime-handle")) { 6720b57cec5SDimitry Andric Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode( 6730b57cec5SDimitry Andric Func.getFnAttribute("runtime-handle").getValueAsString().str(), 6740b57cec5SDimitry Andric /*Copy=*/true); 6750b57cec5SDimitry Andric } 676349cc55cSDimitry Andric if (Func.hasFnAttribute("device-init")) 677349cc55cSDimitry Andric Kern[".kind"] = Kern.getDocument()->getNode("init"); 678349cc55cSDimitry Andric else if (Func.hasFnAttribute("device-fini")) 679349cc55cSDimitry Andric Kern[".kind"] = Kern.getDocument()->getNode("fini"); 6800b57cec5SDimitry Andric } 6810b57cec5SDimitry Andric 682*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV3::emitKernelArgs(const MachineFunction &MF, 6830b57cec5SDimitry Andric msgpack::MapDocNode Kern) { 6841fd87a68SDimitry Andric auto &Func = MF.getFunction(); 6850b57cec5SDimitry Andric unsigned Offset = 0; 6860b57cec5SDimitry Andric auto Args = HSAMetadataDoc->getArrayNode(); 6870b57cec5SDimitry Andric for (auto &Arg : Func.args()) 6880b57cec5SDimitry Andric emitKernelArg(Arg, Offset, Args); 6890b57cec5SDimitry Andric 6901fd87a68SDimitry Andric emitHiddenKernelArgs(MF, Offset, Args); 6910b57cec5SDimitry Andric 6920b57cec5SDimitry Andric Kern[".args"] = Args; 6930b57cec5SDimitry Andric } 6940b57cec5SDimitry Andric 695*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV3::emitKernelArg(const Argument &Arg, 696*bdd1243dSDimitry Andric unsigned &Offset, 6970b57cec5SDimitry Andric msgpack::ArrayDocNode Args) { 6980b57cec5SDimitry Andric auto Func = Arg.getParent(); 6990b57cec5SDimitry Andric auto ArgNo = Arg.getArgNo(); 7000b57cec5SDimitry Andric const MDNode *Node; 7010b57cec5SDimitry Andric 7020b57cec5SDimitry Andric StringRef Name; 7030b57cec5SDimitry Andric Node = Func->getMetadata("kernel_arg_name"); 7040b57cec5SDimitry Andric if (Node && ArgNo < Node->getNumOperands()) 7050b57cec5SDimitry Andric Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 7060b57cec5SDimitry Andric else if (Arg.hasName()) 7070b57cec5SDimitry Andric Name = Arg.getName(); 7080b57cec5SDimitry Andric 7090b57cec5SDimitry Andric StringRef TypeName; 7100b57cec5SDimitry Andric Node = Func->getMetadata("kernel_arg_type"); 7110b57cec5SDimitry Andric if (Node && ArgNo < Node->getNumOperands()) 7120b57cec5SDimitry Andric TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 7130b57cec5SDimitry Andric 7140b57cec5SDimitry Andric StringRef BaseTypeName; 7150b57cec5SDimitry Andric Node = Func->getMetadata("kernel_arg_base_type"); 7160b57cec5SDimitry Andric if (Node && ArgNo < Node->getNumOperands()) 7170b57cec5SDimitry Andric BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 7180b57cec5SDimitry Andric 7190b57cec5SDimitry Andric StringRef AccQual; 7200b57cec5SDimitry Andric if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 7210b57cec5SDimitry Andric Arg.hasNoAliasAttr()) { 7220b57cec5SDimitry Andric AccQual = "read_only"; 7230b57cec5SDimitry Andric } else { 7240b57cec5SDimitry Andric Node = Func->getMetadata("kernel_arg_access_qual"); 7250b57cec5SDimitry Andric if (Node && ArgNo < Node->getNumOperands()) 7260b57cec5SDimitry Andric AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 7270b57cec5SDimitry Andric } 7280b57cec5SDimitry Andric 7290b57cec5SDimitry Andric StringRef TypeQual; 7300b57cec5SDimitry Andric Node = Func->getMetadata("kernel_arg_type_qual"); 7310b57cec5SDimitry Andric if (Node && ArgNo < Node->getNumOperands()) 7320b57cec5SDimitry Andric TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 7330b57cec5SDimitry Andric 7340b57cec5SDimitry Andric const DataLayout &DL = Func->getParent()->getDataLayout(); 7350b57cec5SDimitry Andric 7365ffd83dbSDimitry Andric MaybeAlign PointeeAlign; 737e8d8bef9SDimitry Andric Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType(); 738e8d8bef9SDimitry Andric 739e8d8bef9SDimitry Andric // FIXME: Need to distinguish in memory alignment from pointer alignment. 7400b57cec5SDimitry Andric if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 74104eeddc0SDimitry Andric if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) 74204eeddc0SDimitry Andric PointeeAlign = Arg.getParamAlign().valueOrOne(); 7430b57cec5SDimitry Andric } 7440b57cec5SDimitry Andric 745e8d8bef9SDimitry Andric // There's no distinction between byval aggregates and raw aggregates. 746e8d8bef9SDimitry Andric Type *ArgTy; 747e8d8bef9SDimitry Andric Align ArgAlign; 748e8d8bef9SDimitry Andric std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL); 749e8d8bef9SDimitry Andric 750e8d8bef9SDimitry Andric emitKernelArg(DL, ArgTy, ArgAlign, 751e8d8bef9SDimitry Andric getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args, 752e8d8bef9SDimitry Andric PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); 7530b57cec5SDimitry Andric } 7540b57cec5SDimitry Andric 755*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV3::emitKernelArg( 756e8d8bef9SDimitry Andric const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind, 757e8d8bef9SDimitry Andric unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign, 758e8d8bef9SDimitry Andric StringRef Name, StringRef TypeName, StringRef BaseTypeName, 7590b57cec5SDimitry Andric StringRef AccQual, StringRef TypeQual) { 7600b57cec5SDimitry Andric auto Arg = Args.getDocument()->getMapNode(); 7610b57cec5SDimitry Andric 7620b57cec5SDimitry Andric if (!Name.empty()) 7630b57cec5SDimitry Andric Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true); 7640b57cec5SDimitry Andric if (!TypeName.empty()) 7650b57cec5SDimitry Andric Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true); 7660b57cec5SDimitry Andric auto Size = DL.getTypeAllocSize(Ty); 7670b57cec5SDimitry Andric Arg[".size"] = Arg.getDocument()->getNode(Size); 7685ffd83dbSDimitry Andric Offset = alignTo(Offset, Alignment); 7690b57cec5SDimitry Andric Arg[".offset"] = Arg.getDocument()->getNode(Offset); 7700b57cec5SDimitry Andric Offset += Size; 7710b57cec5SDimitry Andric Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true); 7720b57cec5SDimitry Andric if (PointeeAlign) 7735ffd83dbSDimitry Andric Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value()); 7740b57cec5SDimitry Andric 7750b57cec5SDimitry Andric if (auto PtrTy = dyn_cast<PointerType>(Ty)) 7760b57cec5SDimitry Andric if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) 777*bdd1243dSDimitry Andric // Limiting address space to emit only for a certain ValueKind. 778*bdd1243dSDimitry Andric if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer") 779*bdd1243dSDimitry Andric Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, 780*bdd1243dSDimitry Andric /*Copy=*/true); 7810b57cec5SDimitry Andric 7820b57cec5SDimitry Andric if (auto AQ = getAccessQualifier(AccQual)) 7830b57cec5SDimitry Andric Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true); 7840b57cec5SDimitry Andric 7850b57cec5SDimitry Andric // TODO: Emit Arg[".actual_access"]. 7860b57cec5SDimitry Andric 7870b57cec5SDimitry Andric SmallVector<StringRef, 1> SplitTypeQuals; 7880b57cec5SDimitry Andric TypeQual.split(SplitTypeQuals, " ", -1, false); 7890b57cec5SDimitry Andric for (StringRef Key : SplitTypeQuals) { 7900b57cec5SDimitry Andric if (Key == "const") 7910b57cec5SDimitry Andric Arg[".is_const"] = Arg.getDocument()->getNode(true); 7920b57cec5SDimitry Andric else if (Key == "restrict") 7930b57cec5SDimitry Andric Arg[".is_restrict"] = Arg.getDocument()->getNode(true); 7940b57cec5SDimitry Andric else if (Key == "volatile") 7950b57cec5SDimitry Andric Arg[".is_volatile"] = Arg.getDocument()->getNode(true); 7960b57cec5SDimitry Andric else if (Key == "pipe") 7970b57cec5SDimitry Andric Arg[".is_pipe"] = Arg.getDocument()->getNode(true); 7980b57cec5SDimitry Andric } 7990b57cec5SDimitry Andric 8000b57cec5SDimitry Andric Args.push_back(Arg); 8010b57cec5SDimitry Andric } 8020b57cec5SDimitry Andric 803*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV3::emitHiddenKernelArgs( 804*bdd1243dSDimitry Andric const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { 8051fd87a68SDimitry Andric auto &Func = MF.getFunction(); 8061fd87a68SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 8071fd87a68SDimitry Andric 8080eae32dcSDimitry Andric unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func); 8090b57cec5SDimitry Andric if (!HiddenArgNumBytes) 8100b57cec5SDimitry Andric return; 8110b57cec5SDimitry Andric 812349cc55cSDimitry Andric const Module *M = Func.getParent(); 813349cc55cSDimitry Andric auto &DL = M->getDataLayout(); 8140b57cec5SDimitry Andric auto Int64Ty = Type::getInt64Ty(Func.getContext()); 8150b57cec5SDimitry Andric 81681ad6265SDimitry Andric Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); 81781ad6265SDimitry Andric 8180b57cec5SDimitry Andric if (HiddenArgNumBytes >= 8) 819e8d8bef9SDimitry Andric emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, 820e8d8bef9SDimitry Andric Args); 8210b57cec5SDimitry Andric if (HiddenArgNumBytes >= 16) 822e8d8bef9SDimitry Andric emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, 823e8d8bef9SDimitry Andric Args); 8240b57cec5SDimitry Andric if (HiddenArgNumBytes >= 24) 825e8d8bef9SDimitry Andric emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, 826e8d8bef9SDimitry Andric Args); 8270b57cec5SDimitry Andric 8280b57cec5SDimitry Andric auto Int8PtrTy = 8290b57cec5SDimitry Andric Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 8300b57cec5SDimitry Andric 8310b57cec5SDimitry Andric if (HiddenArgNumBytes >= 32) { 83281ad6265SDimitry Andric // We forbid the use of features requiring hostcall when compiling OpenCL 83381ad6265SDimitry Andric // before code object V5, which makes the mutual exclusion between the 83481ad6265SDimitry Andric // "printf buffer" and "hostcall buffer" here sound. 835349cc55cSDimitry Andric if (M->getNamedMetadata("llvm.printf.fmts")) 836e8d8bef9SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, 837e8d8bef9SDimitry Andric Args); 83881ad6265SDimitry Andric else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) 839e8d8bef9SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, 840e8d8bef9SDimitry Andric Args); 84181ad6265SDimitry Andric else 842e8d8bef9SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 8430b57cec5SDimitry Andric } 8440b57cec5SDimitry Andric 8450b57cec5SDimitry Andric // Emit "default queue" and "completion action" arguments if enqueue kernel is 8460b57cec5SDimitry Andric // used, otherwise emit dummy "none" arguments. 847*bdd1243dSDimitry Andric if (HiddenArgNumBytes >= 40) { 848*bdd1243dSDimitry Andric if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { 849e8d8bef9SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, 850e8d8bef9SDimitry Andric Args); 8510b57cec5SDimitry Andric } else { 852e8d8bef9SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 853*bdd1243dSDimitry Andric } 854*bdd1243dSDimitry Andric } 855*bdd1243dSDimitry Andric 856*bdd1243dSDimitry Andric if (HiddenArgNumBytes >= 48) { 857*bdd1243dSDimitry Andric if (!Func.hasFnAttribute("amdgpu-no-completion-action") && 858*bdd1243dSDimitry Andric // FIXME: Hack for runtime bug if we fail to optimize this out 859*bdd1243dSDimitry Andric Func.hasFnAttribute("calls-enqueue-kernel")) { 860*bdd1243dSDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, 861*bdd1243dSDimitry Andric Args); 862*bdd1243dSDimitry Andric } else { 863e8d8bef9SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 8640b57cec5SDimitry Andric } 8650b57cec5SDimitry Andric } 8660b57cec5SDimitry Andric 8670b57cec5SDimitry Andric // Emit the pointer argument for multi-grid object. 86881ad6265SDimitry Andric if (HiddenArgNumBytes >= 56) { 86981ad6265SDimitry Andric if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) { 870e8d8bef9SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, 871e8d8bef9SDimitry Andric Args); 87281ad6265SDimitry Andric } else { 87381ad6265SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 87481ad6265SDimitry Andric } 87581ad6265SDimitry Andric } 8760b57cec5SDimitry Andric } 8770b57cec5SDimitry Andric 878*bdd1243dSDimitry Andric msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps( 879*bdd1243dSDimitry Andric const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { 8800b57cec5SDimitry Andric const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 8810b57cec5SDimitry Andric const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 8820b57cec5SDimitry Andric const Function &F = MF.getFunction(); 8830b57cec5SDimitry Andric 8840b57cec5SDimitry Andric auto Kern = HSAMetadataDoc->getMapNode(); 8850b57cec5SDimitry Andric 8868bcb0991SDimitry Andric Align MaxKernArgAlign; 8870b57cec5SDimitry Andric Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode( 8880b57cec5SDimitry Andric STM.getKernArgSegmentSize(F, MaxKernArgAlign)); 8890b57cec5SDimitry Andric Kern[".group_segment_fixed_size"] = 8900b57cec5SDimitry Andric Kern.getDocument()->getNode(ProgramInfo.LDSSize); 8910b57cec5SDimitry Andric Kern[".private_segment_fixed_size"] = 8920b57cec5SDimitry Andric Kern.getDocument()->getNode(ProgramInfo.ScratchSize); 893*bdd1243dSDimitry Andric if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) 894fcaf7f86SDimitry Andric Kern[".uses_dynamic_stack"] = 895fcaf7f86SDimitry Andric Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack); 896*bdd1243dSDimitry Andric if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5 && STM.supportsWGP()) 897*bdd1243dSDimitry Andric Kern[".workgroup_processor_mode"] = 898*bdd1243dSDimitry Andric Kern.getDocument()->getNode(ProgramInfo.WgpMode); 899349cc55cSDimitry Andric 900349cc55cSDimitry Andric // FIXME: The metadata treats the minimum as 16? 9010b57cec5SDimitry Andric Kern[".kernarg_segment_align"] = 9028bcb0991SDimitry Andric Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value()); 9030b57cec5SDimitry Andric Kern[".wavefront_size"] = 9040b57cec5SDimitry Andric Kern.getDocument()->getNode(STM.getWavefrontSize()); 9050b57cec5SDimitry Andric Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR); 9060b57cec5SDimitry Andric Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR); 90781ad6265SDimitry Andric 90881ad6265SDimitry Andric // Only add AGPR count to metadata for supported devices 90981ad6265SDimitry Andric if (STM.hasMAIInsts()) { 91081ad6265SDimitry Andric Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR); 91181ad6265SDimitry Andric } 91281ad6265SDimitry Andric 9130b57cec5SDimitry Andric Kern[".max_flat_workgroup_size"] = 9140b57cec5SDimitry Andric Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); 9150b57cec5SDimitry Andric Kern[".sgpr_spill_count"] = 9160b57cec5SDimitry Andric Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); 9170b57cec5SDimitry Andric Kern[".vgpr_spill_count"] = 9180b57cec5SDimitry Andric Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs()); 9190b57cec5SDimitry Andric 9200b57cec5SDimitry Andric return Kern; 9210b57cec5SDimitry Andric } 9220b57cec5SDimitry Andric 923*bdd1243dSDimitry Andric bool MetadataStreamerMsgPackV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 9240b57cec5SDimitry Andric return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); 9250b57cec5SDimitry Andric } 9260b57cec5SDimitry Andric 927*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV3::begin(const Module &Mod, 928fe6060f1SDimitry Andric const IsaInfo::AMDGPUTargetID &TargetID) { 9290b57cec5SDimitry Andric emitVersion(); 9300b57cec5SDimitry Andric emitPrintf(Mod); 9310b57cec5SDimitry Andric getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); 9320b57cec5SDimitry Andric } 9330b57cec5SDimitry Andric 934*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV3::end() { 9350b57cec5SDimitry Andric std::string HSAMetadataString; 9360b57cec5SDimitry Andric raw_string_ostream StrOS(HSAMetadataString); 9370b57cec5SDimitry Andric HSAMetadataDoc->toYAML(StrOS); 9380b57cec5SDimitry Andric 9390b57cec5SDimitry Andric if (DumpHSAMetadata) 9400b57cec5SDimitry Andric dump(StrOS.str()); 9410b57cec5SDimitry Andric if (VerifyHSAMetadata) 9420b57cec5SDimitry Andric verify(StrOS.str()); 9430b57cec5SDimitry Andric } 9440b57cec5SDimitry Andric 945*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF, 9460b57cec5SDimitry Andric const SIProgramInfo &ProgramInfo) { 9470b57cec5SDimitry Andric auto &Func = MF.getFunction(); 9480b57cec5SDimitry Andric auto Kern = getHSAKernelProps(MF, ProgramInfo); 9490b57cec5SDimitry Andric 9500b57cec5SDimitry Andric assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || 9510b57cec5SDimitry Andric Func.getCallingConv() == CallingConv::SPIR_KERNEL); 9520b57cec5SDimitry Andric 9530b57cec5SDimitry Andric auto Kernels = 9540b57cec5SDimitry Andric getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true); 9550b57cec5SDimitry Andric 9560b57cec5SDimitry Andric { 9570b57cec5SDimitry Andric Kern[".name"] = Kern.getDocument()->getNode(Func.getName()); 9580b57cec5SDimitry Andric Kern[".symbol"] = Kern.getDocument()->getNode( 9590b57cec5SDimitry Andric (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true); 9600b57cec5SDimitry Andric emitKernelLanguage(Func, Kern); 9610b57cec5SDimitry Andric emitKernelAttrs(Func, Kern); 9621fd87a68SDimitry Andric emitKernelArgs(MF, Kern); 9630b57cec5SDimitry Andric } 9640b57cec5SDimitry Andric 9650b57cec5SDimitry Andric Kernels.push_back(Kern); 9660b57cec5SDimitry Andric } 9670b57cec5SDimitry Andric 968fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 969fe6060f1SDimitry Andric // HSAMetadataStreamerV4 970fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 971fe6060f1SDimitry Andric 972*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV4::emitVersion() { 973fe6060f1SDimitry Andric auto Version = HSAMetadataDoc->getArrayNode(); 974fe6060f1SDimitry Andric Version.push_back(Version.getDocument()->getNode(VersionMajorV4)); 975fe6060f1SDimitry Andric Version.push_back(Version.getDocument()->getNode(VersionMinorV4)); 976fe6060f1SDimitry Andric getRootMetadata("amdhsa.version") = Version; 977fe6060f1SDimitry Andric } 978fe6060f1SDimitry Andric 979*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV4::emitTargetID( 980*bdd1243dSDimitry Andric const IsaInfo::AMDGPUTargetID &TargetID) { 981fe6060f1SDimitry Andric getRootMetadata("amdhsa.target") = 982fe6060f1SDimitry Andric HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true); 983fe6060f1SDimitry Andric } 984fe6060f1SDimitry Andric 985*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV4::begin(const Module &Mod, 986fe6060f1SDimitry Andric const IsaInfo::AMDGPUTargetID &TargetID) { 987fe6060f1SDimitry Andric emitVersion(); 988fe6060f1SDimitry Andric emitTargetID(TargetID); 989fe6060f1SDimitry Andric emitPrintf(Mod); 990fe6060f1SDimitry Andric getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); 991fe6060f1SDimitry Andric } 992fe6060f1SDimitry Andric 9931fd87a68SDimitry Andric //===----------------------------------------------------------------------===// 9941fd87a68SDimitry Andric // HSAMetadataStreamerV5 9951fd87a68SDimitry Andric //===----------------------------------------------------------------------===// 9961fd87a68SDimitry Andric 997*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV5::emitVersion() { 9981fd87a68SDimitry Andric auto Version = HSAMetadataDoc->getArrayNode(); 9991fd87a68SDimitry Andric Version.push_back(Version.getDocument()->getNode(VersionMajorV5)); 10001fd87a68SDimitry Andric Version.push_back(Version.getDocument()->getNode(VersionMinorV5)); 10011fd87a68SDimitry Andric getRootMetadata("amdhsa.version") = Version; 10021fd87a68SDimitry Andric } 10031fd87a68SDimitry Andric 1004*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( 1005*bdd1243dSDimitry Andric const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { 10061fd87a68SDimitry Andric auto &Func = MF.getFunction(); 10071fd87a68SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 100881ad6265SDimitry Andric 100981ad6265SDimitry Andric // No implicit kernel argument is used. 101081ad6265SDimitry Andric if (ST.getImplicitArgNumBytes(Func) == 0) 101181ad6265SDimitry Andric return; 101281ad6265SDimitry Andric 10131fd87a68SDimitry Andric const Module *M = Func.getParent(); 10141fd87a68SDimitry Andric auto &DL = M->getDataLayout(); 101581ad6265SDimitry Andric const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 10161fd87a68SDimitry Andric 10171fd87a68SDimitry Andric auto Int64Ty = Type::getInt64Ty(Func.getContext()); 10181fd87a68SDimitry Andric auto Int32Ty = Type::getInt32Ty(Func.getContext()); 10191fd87a68SDimitry Andric auto Int16Ty = Type::getInt16Ty(Func.getContext()); 10201fd87a68SDimitry Andric 102181ad6265SDimitry Andric Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); 10221fd87a68SDimitry Andric emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args); 10231fd87a68SDimitry Andric emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args); 10241fd87a68SDimitry Andric emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args); 10251fd87a68SDimitry Andric 10261fd87a68SDimitry Andric emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args); 10271fd87a68SDimitry Andric emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args); 10281fd87a68SDimitry Andric emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args); 10291fd87a68SDimitry Andric 10301fd87a68SDimitry Andric emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args); 10311fd87a68SDimitry Andric emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args); 10321fd87a68SDimitry Andric emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args); 10331fd87a68SDimitry Andric 10341fd87a68SDimitry Andric // Reserved for hidden_tool_correlation_id. 10351fd87a68SDimitry Andric Offset += 8; 10361fd87a68SDimitry Andric 10371fd87a68SDimitry Andric Offset += 8; // Reserved. 10381fd87a68SDimitry Andric 10391fd87a68SDimitry Andric emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args); 10401fd87a68SDimitry Andric emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args); 10411fd87a68SDimitry Andric emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args); 10421fd87a68SDimitry Andric 10431fd87a68SDimitry Andric emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args); 10441fd87a68SDimitry Andric 10451fd87a68SDimitry Andric Offset += 6; // Reserved. 10461fd87a68SDimitry Andric auto Int8PtrTy = 10471fd87a68SDimitry Andric Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 10481fd87a68SDimitry Andric 10491fd87a68SDimitry Andric if (M->getNamedMetadata("llvm.printf.fmts")) { 10501fd87a68SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, 10511fd87a68SDimitry Andric Args); 105281ad6265SDimitry Andric } else { 10531fd87a68SDimitry Andric Offset += 8; // Skipped. 105481ad6265SDimitry Andric } 10551fd87a68SDimitry Andric 105681ad6265SDimitry Andric if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) { 10571fd87a68SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, 10581fd87a68SDimitry Andric Args); 105981ad6265SDimitry Andric } else { 10601fd87a68SDimitry Andric Offset += 8; // Skipped. 106181ad6265SDimitry Andric } 10621fd87a68SDimitry Andric 106381ad6265SDimitry Andric if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) { 10641fd87a68SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, 10651fd87a68SDimitry Andric Args); 106681ad6265SDimitry Andric } else { 106781ad6265SDimitry Andric Offset += 8; // Skipped. 106881ad6265SDimitry Andric } 10691fd87a68SDimitry Andric 107081ad6265SDimitry Andric if (!Func.hasFnAttribute("amdgpu-no-heap-ptr")) 107181ad6265SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args); 107281ad6265SDimitry Andric else 107381ad6265SDimitry Andric Offset += 8; // Skipped. 10741fd87a68SDimitry Andric 1075*bdd1243dSDimitry Andric if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { 10761fd87a68SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, 10771fd87a68SDimitry Andric Args); 1078*bdd1243dSDimitry Andric } else { 1079*bdd1243dSDimitry Andric Offset += 8; // Skipped. 1080*bdd1243dSDimitry Andric } 1081*bdd1243dSDimitry Andric 1082*bdd1243dSDimitry Andric if (!Func.hasFnAttribute("amdgpu-no-completion-action") && 1083*bdd1243dSDimitry Andric // FIXME: Hack for runtime bug 1084*bdd1243dSDimitry Andric Func.hasFnAttribute("calls-enqueue-kernel")) { 10851fd87a68SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, 10861fd87a68SDimitry Andric Args); 108781ad6265SDimitry Andric } else { 1088*bdd1243dSDimitry Andric Offset += 8; // Skipped. 108981ad6265SDimitry Andric } 10901fd87a68SDimitry Andric 10911fd87a68SDimitry Andric Offset += 72; // Reserved. 10921fd87a68SDimitry Andric 109381ad6265SDimitry Andric // hidden_private_base and hidden_shared_base are only when the subtarget has 109481ad6265SDimitry Andric // ApertureRegs. 109581ad6265SDimitry Andric if (!ST.hasApertureRegs()) { 10961fd87a68SDimitry Andric emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args); 10971fd87a68SDimitry Andric emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args); 109881ad6265SDimitry Andric } else { 10991fd87a68SDimitry Andric Offset += 8; // Skipped. 110081ad6265SDimitry Andric } 11011fd87a68SDimitry Andric 11021fd87a68SDimitry Andric if (MFI.hasQueuePtr()) 11031fd87a68SDimitry Andric emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args); 11041fd87a68SDimitry Andric } 11051fd87a68SDimitry Andric 1106*bdd1243dSDimitry Andric void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func, 1107*bdd1243dSDimitry Andric msgpack::MapDocNode Kern) { 1108*bdd1243dSDimitry Andric MetadataStreamerMsgPackV3::emitKernelAttrs(Func, Kern); 1109*bdd1243dSDimitry Andric 1110*bdd1243dSDimitry Andric if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool()) 1111*bdd1243dSDimitry Andric Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1); 1112*bdd1243dSDimitry Andric } 1113*bdd1243dSDimitry Andric 1114*bdd1243dSDimitry Andric 11150b57cec5SDimitry Andric } // end namespace HSAMD 11160b57cec5SDimitry Andric } // end namespace AMDGPU 11170b57cec5SDimitry Andric } // end namespace llvm 1118