1 //===- Utility.cpp ------ Collection of generic offloading utilities ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Frontend/Offloading/Utility.h" 10 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" 11 #include "llvm/BinaryFormat/ELF.h" 12 #include "llvm/BinaryFormat/MsgPackDocument.h" 13 #include "llvm/IR/Constants.h" 14 #include "llvm/IR/GlobalValue.h" 15 #include "llvm/IR/GlobalVariable.h" 16 #include "llvm/IR/Value.h" 17 #include "llvm/Object/ELFObjectFile.h" 18 #include "llvm/ObjectYAML/ELFYAML.h" 19 #include "llvm/ObjectYAML/yaml2obj.h" 20 #include "llvm/Support/MemoryBufferRef.h" 21 #include "llvm/Transforms/Utils/ModuleUtils.h" 22 23 using namespace llvm; 24 using namespace llvm::offloading; 25 26 StructType *offloading::getEntryTy(Module &M) { 27 LLVMContext &C = M.getContext(); 28 StructType *EntryTy = 29 StructType::getTypeByName(C, "struct.__tgt_offload_entry"); 30 if (!EntryTy) 31 EntryTy = StructType::create( 32 "struct.__tgt_offload_entry", Type::getInt64Ty(C), Type::getInt16Ty(C), 33 Type::getInt16Ty(C), Type::getInt32Ty(C), PointerType::getUnqual(C), 34 PointerType::getUnqual(C), Type::getInt64Ty(C), Type::getInt64Ty(C), 35 PointerType::getUnqual(C)); 36 return EntryTy; 37 } 38 39 std::pair<Constant *, GlobalVariable *> 40 offloading::getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind, 41 Constant *Addr, StringRef Name, 42 uint64_t Size, uint32_t Flags, 43 uint64_t Data, Constant *AuxAddr) { 44 const llvm::Triple &Triple = M.getTargetTriple(); 45 Type *PtrTy = PointerType::getUnqual(M.getContext()); 46 Type *Int64Ty = Type::getInt64Ty(M.getContext()); 47 Type *Int32Ty = Type::getInt32Ty(M.getContext()); 48 Type *Int16Ty = Type::getInt16Ty(M.getContext()); 49 50 Constant *AddrName = ConstantDataArray::getString(M.getContext(), Name); 51 52 StringRef Prefix = 53 Triple.isNVPTX() ? "$offloading$entry_name" : ".offloading.entry_name"; 54 55 // Create the constant string used to look up the symbol in the device. 56 auto *Str = 57 new GlobalVariable(M, AddrName->getType(), /*isConstant=*/true, 58 GlobalValue::InternalLinkage, AddrName, Prefix); 59 StringRef SectionName = ".llvm.rodata.offloading"; 60 Str->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 61 Str->setSection(SectionName); 62 Str->setAlignment(Align(1)); 63 64 // Make a metadata node for these constants so it can be queried from IR. 65 NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.offloading.symbols"); 66 Metadata *MDVals[] = {ConstantAsMetadata::get(Str)}; 67 MD->addOperand(llvm::MDNode::get(M.getContext(), MDVals)); 68 69 // Construct the offloading entry. 70 Constant *EntryData[] = { 71 ConstantExpr::getNullValue(Int64Ty), 72 ConstantInt::get(Int16Ty, 1), 73 ConstantInt::get(Int16Ty, Kind), 74 ConstantInt::get(Int32Ty, Flags), 75 ConstantExpr::getPointerBitCastOrAddrSpaceCast(Addr, PtrTy), 76 ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, PtrTy), 77 ConstantInt::get(Int64Ty, Size), 78 ConstantInt::get(Int64Ty, Data), 79 AuxAddr ? ConstantExpr::getPointerBitCastOrAddrSpaceCast(AuxAddr, PtrTy) 80 : ConstantExpr::getNullValue(PtrTy)}; 81 Constant *EntryInitializer = ConstantStruct::get(getEntryTy(M), EntryData); 82 return {EntryInitializer, Str}; 83 } 84 85 void offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind, 86 Constant *Addr, StringRef Name, 87 uint64_t Size, uint32_t Flags, 88 uint64_t Data, Constant *AuxAddr, 89 StringRef SectionName) { 90 const llvm::Triple &Triple = M.getTargetTriple(); 91 92 auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer( 93 M, Kind, Addr, Name, Size, Flags, Data, AuxAddr); 94 95 StringRef Prefix = 96 Triple.isNVPTX() ? "$offloading$entry$" : ".offloading.entry."; 97 auto *Entry = new GlobalVariable( 98 M, getEntryTy(M), 99 /*isConstant=*/true, GlobalValue::WeakAnyLinkage, EntryInitializer, 100 Prefix + Name, nullptr, GlobalValue::NotThreadLocal, 101 M.getDataLayout().getDefaultGlobalsAddressSpace()); 102 103 // The entry has to be created in the section the linker expects it to be. 104 if (Triple.isOSBinFormatCOFF()) 105 Entry->setSection((SectionName + "$OE").str()); 106 else 107 Entry->setSection(SectionName); 108 Entry->setAlignment(Align(object::OffloadBinary::getAlignment())); 109 } 110 111 std::pair<GlobalVariable *, GlobalVariable *> 112 offloading::getOffloadEntryArray(Module &M, StringRef SectionName) { 113 const llvm::Triple &Triple = M.getTargetTriple(); 114 115 auto *ZeroInitilaizer = 116 ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u)); 117 auto *EntryInit = Triple.isOSBinFormatCOFF() ? ZeroInitilaizer : nullptr; 118 auto *EntryType = ArrayType::get(getEntryTy(M), 0); 119 auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage 120 : GlobalValue::ExternalLinkage; 121 122 auto *EntriesB = 123 new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit, 124 "__start_" + SectionName); 125 EntriesB->setVisibility(GlobalValue::HiddenVisibility); 126 auto *EntriesE = 127 new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit, 128 "__stop_" + SectionName); 129 EntriesE->setVisibility(GlobalValue::HiddenVisibility); 130 131 if (Triple.isOSBinFormatELF()) { 132 // We assume that external begin/end symbols that we have created above will 133 // be defined by the linker. This is done whenever a section name with a 134 // valid C-identifier is present. We define a dummy variable here to force 135 // the linker to always provide these symbols. 136 auto *DummyEntry = new GlobalVariable( 137 M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage, 138 ZeroInitilaizer, "__dummy." + SectionName); 139 DummyEntry->setSection(SectionName); 140 DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment())); 141 appendToCompilerUsed(M, DummyEntry); 142 } else { 143 // The COFF linker will merge sections containing a '$' together into a 144 // single section. The order of entries in this section will be sorted 145 // alphabetically by the characters following the '$' in the name. Set the 146 // sections here to ensure that the beginning and end symbols are sorted. 147 EntriesB->setSection((SectionName + "$OA").str()); 148 EntriesE->setSection((SectionName + "$OZ").str()); 149 } 150 151 return std::make_pair(EntriesB, EntriesE); 152 } 153 154 bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch, 155 uint32_t ImageFlags, 156 StringRef EnvTargetID) { 157 using namespace llvm::ELF; 158 StringRef EnvArch = EnvTargetID.split(":").first; 159 160 // Trivial check if the base processors match. 161 if (EnvArch != ImageArch) 162 return false; 163 164 // Check if the image is requesting xnack on or off. 165 switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) { 166 case EF_AMDGPU_FEATURE_XNACK_OFF_V4: 167 // The image is 'xnack-' so the environment must be 'xnack-'. 168 if (!EnvTargetID.contains("xnack-")) 169 return false; 170 break; 171 case EF_AMDGPU_FEATURE_XNACK_ON_V4: 172 // The image is 'xnack+' so the environment must be 'xnack+'. 173 if (!EnvTargetID.contains("xnack+")) 174 return false; 175 break; 176 case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4: 177 case EF_AMDGPU_FEATURE_XNACK_ANY_V4: 178 default: 179 break; 180 } 181 182 // Check if the image is requesting sramecc on or off. 183 switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) { 184 case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4: 185 // The image is 'sramecc-' so the environment must be 'sramecc-'. 186 if (!EnvTargetID.contains("sramecc-")) 187 return false; 188 break; 189 case EF_AMDGPU_FEATURE_SRAMECC_ON_V4: 190 // The image is 'sramecc+' so the environment must be 'sramecc+'. 191 if (!EnvTargetID.contains("sramecc+")) 192 return false; 193 break; 194 case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4: 195 case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4: 196 break; 197 } 198 199 return true; 200 } 201 202 namespace { 203 /// Reads the AMDGPU specific per-kernel-metadata from an image. 204 class KernelInfoReader { 205 public: 206 KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM) 207 : KernelInfoMap(KIM) {} 208 209 /// Process ELF note to read AMDGPU metadata from respective information 210 /// fields. 211 Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) { 212 if (Note.getName() != "AMDGPU") 213 return Error::success(); // We are not interested in other things 214 215 assert(Note.getType() == ELF::NT_AMDGPU_METADATA && 216 "Parse AMDGPU MetaData"); 217 auto Desc = Note.getDesc(Align); 218 StringRef MsgPackString = 219 StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size()); 220 msgpack::Document MsgPackDoc; 221 if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false)) 222 return Error::success(); 223 224 AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true); 225 if (!Verifier.verify(MsgPackDoc.getRoot())) 226 return Error::success(); 227 228 auto RootMap = MsgPackDoc.getRoot().getMap(true); 229 230 if (auto Err = iterateAMDKernels(RootMap)) 231 return Err; 232 233 return Error::success(); 234 } 235 236 private: 237 /// Extracts the relevant information via simple string look-up in the msgpack 238 /// document elements. 239 Error 240 extractKernelData(msgpack::MapDocNode::MapTy::value_type V, 241 std::string &KernelName, 242 offloading::amdgpu::AMDGPUKernelMetaData &KernelData) { 243 if (!V.first.isString()) 244 return Error::success(); 245 246 const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) { 247 return DK.getString() == SK; 248 }; 249 250 const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN, 251 uint32_t *Vals) { 252 assert(DN.isArray() && "MsgPack DocNode is an array node"); 253 auto DNA = DN.getArray(); 254 assert(DNA.size() == 3 && "ArrayNode has at most three elements"); 255 256 int I = 0; 257 for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd; 258 ++DNABegin) { 259 Vals[I++] = DNABegin->getUInt(); 260 } 261 }; 262 263 if (IsKey(V.first, ".name")) { 264 KernelName = V.second.toString(); 265 } else if (IsKey(V.first, ".sgpr_count")) { 266 KernelData.SGPRCount = V.second.getUInt(); 267 } else if (IsKey(V.first, ".sgpr_spill_count")) { 268 KernelData.SGPRSpillCount = V.second.getUInt(); 269 } else if (IsKey(V.first, ".vgpr_count")) { 270 KernelData.VGPRCount = V.second.getUInt(); 271 } else if (IsKey(V.first, ".vgpr_spill_count")) { 272 KernelData.VGPRSpillCount = V.second.getUInt(); 273 } else if (IsKey(V.first, ".agpr_count")) { 274 KernelData.AGPRCount = V.second.getUInt(); 275 } else if (IsKey(V.first, ".private_segment_fixed_size")) { 276 KernelData.PrivateSegmentSize = V.second.getUInt(); 277 } else if (IsKey(V.first, ".group_segment_fixed_size")) { 278 KernelData.GroupSegmentList = V.second.getUInt(); 279 } else if (IsKey(V.first, ".reqd_workgroup_size")) { 280 GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize); 281 } else if (IsKey(V.first, ".workgroup_size_hint")) { 282 GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint); 283 } else if (IsKey(V.first, ".wavefront_size")) { 284 KernelData.WavefrontSize = V.second.getUInt(); 285 } else if (IsKey(V.first, ".max_flat_workgroup_size")) { 286 KernelData.MaxFlatWorkgroupSize = V.second.getUInt(); 287 } 288 289 return Error::success(); 290 } 291 292 /// Get the "amdhsa.kernels" element from the msgpack Document 293 Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) { 294 auto Res = MDN.find("amdhsa.kernels"); 295 if (Res == MDN.end()) 296 return createStringError(inconvertibleErrorCode(), 297 "Could not find amdhsa.kernels key"); 298 299 auto Pair = *Res; 300 assert(Pair.second.isArray() && 301 "AMDGPU kernel entries are arrays of entries"); 302 303 return Pair.second.getArray(); 304 } 305 306 /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a 307 /// MapDocNode that either maps a string to a single value (most of them) or 308 /// to another array of things. Currently, we only handle the case that maps 309 /// to scalar value. 310 Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) { 311 offloading::amdgpu::AMDGPUKernelMetaData KernelData; 312 std::string KernelName; 313 auto Entry = (*It).getMap(); 314 for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI) 315 if (auto Err = extractKernelData(*MI, KernelName, KernelData)) 316 return Err; 317 318 KernelInfoMap.insert({KernelName, KernelData}); 319 return Error::success(); 320 } 321 322 /// Go over the list of AMD kernels in the "amdhsa.kernels" entry 323 Error iterateAMDKernels(msgpack::MapDocNode &MDN) { 324 auto KernelsOrErr = getAMDKernelsArray(MDN); 325 if (auto Err = KernelsOrErr.takeError()) 326 return Err; 327 328 auto KernelsArr = *KernelsOrErr; 329 for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) { 330 if (!It->isMap()) 331 continue; // we expect <key,value> pairs 332 333 // Obtain the value for the different entries. Each array entry is a 334 // MapDocNode 335 if (auto Err = generateKernelInfo(It)) 336 return Err; 337 } 338 return Error::success(); 339 } 340 341 // Kernel names are the keys 342 StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap; 343 }; 344 } // namespace 345 346 Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage( 347 MemoryBufferRef MemBuffer, 348 StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap, 349 uint16_t &ELFABIVersion) { 350 Error Err = Error::success(); // Used later as out-parameter 351 352 auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer()); 353 if (auto Err = ELFOrError.takeError()) 354 return Err; 355 356 const object::ELF64LEFile ELFObj = ELFOrError.get(); 357 Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections(); 358 if (!Sections) 359 return Sections.takeError(); 360 KernelInfoReader Reader(KernelInfoMap); 361 362 // Read the code object version from ELF image header 363 auto Header = ELFObj.getHeader(); 364 ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]); 365 for (const auto &S : *Sections) { 366 if (S.sh_type != ELF::SHT_NOTE) 367 continue; 368 369 for (const auto N : ELFObj.notes(S, Err)) { 370 if (Err) 371 return Err; 372 // Fills the KernelInfoTabel entries in the reader 373 if ((Err = Reader.processNote(N, S.sh_addralign))) 374 return Err; 375 } 376 } 377 return Error::success(); 378 } 379 Error offloading::intel::containerizeOpenMPSPIRVImage( 380 std::unique_ptr<MemoryBuffer> &Img) { 381 constexpr char INTEL_ONEOMP_OFFLOAD_VERSION[] = "1.0"; 382 constexpr int NT_INTEL_ONEOMP_OFFLOAD_VERSION = 1; 383 constexpr int NT_INTEL_ONEOMP_OFFLOAD_IMAGE_COUNT = 2; 384 constexpr int NT_INTEL_ONEOMP_OFFLOAD_IMAGE_AUX = 3; 385 386 // Start creating notes for the ELF container. 387 std::vector<ELFYAML::NoteEntry> Notes; 388 std::string Version = toHex(INTEL_ONEOMP_OFFLOAD_VERSION); 389 Notes.emplace_back(ELFYAML::NoteEntry{"INTELONEOMPOFFLOAD", 390 yaml::BinaryRef(Version), 391 NT_INTEL_ONEOMP_OFFLOAD_VERSION}); 392 393 // The AuxInfo string will hold auxiliary information for the image. 394 // ELFYAML::NoteEntry structures will hold references to the 395 // string, so we have to make sure the string is valid. 396 std::string AuxInfo; 397 398 // TODO: Pass compile/link opts 399 StringRef CompileOpts = ""; 400 StringRef LinkOpts = ""; 401 402 unsigned ImageFmt = 1; // SPIR-V format 403 404 AuxInfo = toHex((Twine(0) + Twine('\0') + Twine(ImageFmt) + Twine('\0') + 405 CompileOpts + Twine('\0') + LinkOpts) 406 .str()); 407 Notes.emplace_back(ELFYAML::NoteEntry{"INTELONEOMPOFFLOAD", 408 yaml::BinaryRef(AuxInfo), 409 NT_INTEL_ONEOMP_OFFLOAD_IMAGE_AUX}); 410 411 std::string ImgCount = toHex(Twine(1).str()); // always one image per ELF 412 Notes.emplace_back(ELFYAML::NoteEntry{"INTELONEOMPOFFLOAD", 413 yaml::BinaryRef(ImgCount), 414 NT_INTEL_ONEOMP_OFFLOAD_IMAGE_COUNT}); 415 416 std::string YamlFile; 417 llvm::raw_string_ostream YamlFileStream(YamlFile); 418 419 // Write the YAML template file. 420 421 // We use 64-bit little-endian ELF currently. 422 ELFYAML::FileHeader Header{}; 423 Header.Class = ELF::ELFCLASS64; 424 Header.Data = ELF::ELFDATA2LSB; 425 Header.Type = ELF::ET_DYN; 426 // Use an existing Intel machine type as there is not one specifically for 427 // Intel GPUs. 428 Header.Machine = ELF::EM_IA_64; 429 430 // Create a section with notes. 431 ELFYAML::NoteSection Section{}; 432 Section.Type = ELF::SHT_NOTE; 433 Section.AddressAlign = 0; 434 Section.Name = ".note.inteloneompoffload"; 435 Section.Notes.emplace(std::move(Notes)); 436 437 ELFYAML::Object Object{}; 438 Object.Header = Header; 439 Object.Chunks.push_back( 440 std::make_unique<ELFYAML::NoteSection>(std::move(Section))); 441 442 // Create the section that will hold the image 443 ELFYAML::RawContentSection ImageSection{}; 444 ImageSection.Type = ELF::SHT_PROGBITS; 445 ImageSection.AddressAlign = 0; 446 std::string Name = "__openmp_offload_spirv_0"; 447 ImageSection.Name = Name; 448 ImageSection.Content = 449 llvm::yaml::BinaryRef(arrayRefFromStringRef(Img->getBuffer())); 450 Object.Chunks.push_back( 451 std::make_unique<ELFYAML::RawContentSection>(std::move(ImageSection))); 452 Error Err = Error::success(); 453 llvm::yaml::yaml2elf( 454 Object, YamlFileStream, 455 [&Err](const Twine &Msg) { Err = createStringError(Msg); }, UINT64_MAX); 456 if (Err) 457 return Err; 458 459 Img = MemoryBuffer::getMemBufferCopy(YamlFile); 460 return Error::success(); 461 } 462