xref: /freebsd/contrib/llvm-project/llvm/lib/Frontend/Offloading/Utility.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- Utility.cpp ------ Collection of generic offloading utilities ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Frontend/Offloading/Utility.h"
10 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
11 #include "llvm/BinaryFormat/ELF.h"
12 #include "llvm/BinaryFormat/MsgPackDocument.h"
13 #include "llvm/IR/Constants.h"
14 #include "llvm/IR/GlobalValue.h"
15 #include "llvm/IR/GlobalVariable.h"
16 #include "llvm/IR/Value.h"
17 #include "llvm/Object/ELFObjectFile.h"
18 #include "llvm/ObjectYAML/ELFYAML.h"
19 #include "llvm/ObjectYAML/yaml2obj.h"
20 #include "llvm/Support/MemoryBufferRef.h"
21 #include "llvm/Transforms/Utils/ModuleUtils.h"
22 
23 using namespace llvm;
24 using namespace llvm::offloading;
25 
26 StructType *offloading::getEntryTy(Module &M) {
27   LLVMContext &C = M.getContext();
28   StructType *EntryTy =
29       StructType::getTypeByName(C, "struct.__tgt_offload_entry");
30   if (!EntryTy)
31     EntryTy = StructType::create(
32         "struct.__tgt_offload_entry", Type::getInt64Ty(C), Type::getInt16Ty(C),
33         Type::getInt16Ty(C), Type::getInt32Ty(C), PointerType::getUnqual(C),
34         PointerType::getUnqual(C), Type::getInt64Ty(C), Type::getInt64Ty(C),
35         PointerType::getUnqual(C));
36   return EntryTy;
37 }
38 
39 std::pair<Constant *, GlobalVariable *>
40 offloading::getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
41                                           Constant *Addr, StringRef Name,
42                                           uint64_t Size, uint32_t Flags,
43                                           uint64_t Data, Constant *AuxAddr) {
44   const llvm::Triple &Triple = M.getTargetTriple();
45   Type *PtrTy = PointerType::getUnqual(M.getContext());
46   Type *Int64Ty = Type::getInt64Ty(M.getContext());
47   Type *Int32Ty = Type::getInt32Ty(M.getContext());
48   Type *Int16Ty = Type::getInt16Ty(M.getContext());
49 
50   Constant *AddrName = ConstantDataArray::getString(M.getContext(), Name);
51 
52   StringRef Prefix =
53       Triple.isNVPTX() ? "$offloading$entry_name" : ".offloading.entry_name";
54 
55   // Create the constant string used to look up the symbol in the device.
56   auto *Str =
57       new GlobalVariable(M, AddrName->getType(), /*isConstant=*/true,
58                          GlobalValue::InternalLinkage, AddrName, Prefix);
59   StringRef SectionName = ".llvm.rodata.offloading";
60   Str->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
61   Str->setSection(SectionName);
62   Str->setAlignment(Align(1));
63 
64   // Make a metadata node for these constants so it can be queried from IR.
65   NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.offloading.symbols");
66   Metadata *MDVals[] = {ConstantAsMetadata::get(Str)};
67   MD->addOperand(llvm::MDNode::get(M.getContext(), MDVals));
68 
69   // Construct the offloading entry.
70   Constant *EntryData[] = {
71       ConstantExpr::getNullValue(Int64Ty),
72       ConstantInt::get(Int16Ty, 1),
73       ConstantInt::get(Int16Ty, Kind),
74       ConstantInt::get(Int32Ty, Flags),
75       ConstantExpr::getPointerBitCastOrAddrSpaceCast(Addr, PtrTy),
76       ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, PtrTy),
77       ConstantInt::get(Int64Ty, Size),
78       ConstantInt::get(Int64Ty, Data),
79       AuxAddr ? ConstantExpr::getPointerBitCastOrAddrSpaceCast(AuxAddr, PtrTy)
80               : ConstantExpr::getNullValue(PtrTy)};
81   Constant *EntryInitializer = ConstantStruct::get(getEntryTy(M), EntryData);
82   return {EntryInitializer, Str};
83 }
84 
85 void offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
86                                      Constant *Addr, StringRef Name,
87                                      uint64_t Size, uint32_t Flags,
88                                      uint64_t Data, Constant *AuxAddr,
89                                      StringRef SectionName) {
90   const llvm::Triple &Triple = M.getTargetTriple();
91 
92   auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer(
93       M, Kind, Addr, Name, Size, Flags, Data, AuxAddr);
94 
95   StringRef Prefix =
96       Triple.isNVPTX() ? "$offloading$entry$" : ".offloading.entry.";
97   auto *Entry = new GlobalVariable(
98       M, getEntryTy(M),
99       /*isConstant=*/true, GlobalValue::WeakAnyLinkage, EntryInitializer,
100       Prefix + Name, nullptr, GlobalValue::NotThreadLocal,
101       M.getDataLayout().getDefaultGlobalsAddressSpace());
102 
103   // The entry has to be created in the section the linker expects it to be.
104   if (Triple.isOSBinFormatCOFF())
105     Entry->setSection((SectionName + "$OE").str());
106   else
107     Entry->setSection(SectionName);
108   Entry->setAlignment(Align(object::OffloadBinary::getAlignment()));
109 }
110 
111 std::pair<GlobalVariable *, GlobalVariable *>
112 offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
113   const llvm::Triple &Triple = M.getTargetTriple();
114 
115   auto *ZeroInitilaizer =
116       ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u));
117   auto *EntryInit = Triple.isOSBinFormatCOFF() ? ZeroInitilaizer : nullptr;
118   auto *EntryType = ArrayType::get(getEntryTy(M), 0);
119   auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage
120                                             : GlobalValue::ExternalLinkage;
121 
122   auto *EntriesB =
123       new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
124                          "__start_" + SectionName);
125   EntriesB->setVisibility(GlobalValue::HiddenVisibility);
126   auto *EntriesE =
127       new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
128                          "__stop_" + SectionName);
129   EntriesE->setVisibility(GlobalValue::HiddenVisibility);
130 
131   if (Triple.isOSBinFormatELF()) {
132     // We assume that external begin/end symbols that we have created above will
133     // be defined by the linker. This is done whenever a section name with a
134     // valid C-identifier is present. We define a dummy variable here to force
135     // the linker to always provide these symbols.
136     auto *DummyEntry = new GlobalVariable(
137         M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage,
138         ZeroInitilaizer, "__dummy." + SectionName);
139     DummyEntry->setSection(SectionName);
140     DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
141     appendToCompilerUsed(M, DummyEntry);
142   } else {
143     // The COFF linker will merge sections containing a '$' together into a
144     // single section. The order of entries in this section will be sorted
145     // alphabetically by the characters following the '$' in the name. Set the
146     // sections here to ensure that the beginning and end symbols are sorted.
147     EntriesB->setSection((SectionName + "$OA").str());
148     EntriesE->setSection((SectionName + "$OZ").str());
149   }
150 
151   return std::make_pair(EntriesB, EntriesE);
152 }
153 
154 bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
155                                                         uint32_t ImageFlags,
156                                                         StringRef EnvTargetID) {
157   using namespace llvm::ELF;
158   StringRef EnvArch = EnvTargetID.split(":").first;
159 
160   // Trivial check if the base processors match.
161   if (EnvArch != ImageArch)
162     return false;
163 
164   // Check if the image is requesting xnack on or off.
165   switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
166   case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
167     // The image is 'xnack-' so the environment must be 'xnack-'.
168     if (!EnvTargetID.contains("xnack-"))
169       return false;
170     break;
171   case EF_AMDGPU_FEATURE_XNACK_ON_V4:
172     // The image is 'xnack+' so the environment must be 'xnack+'.
173     if (!EnvTargetID.contains("xnack+"))
174       return false;
175     break;
176   case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
177   case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
178   default:
179     break;
180   }
181 
182   // Check if the image is requesting sramecc on or off.
183   switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
184   case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
185     // The image is 'sramecc-' so the environment must be 'sramecc-'.
186     if (!EnvTargetID.contains("sramecc-"))
187       return false;
188     break;
189   case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
190     // The image is 'sramecc+' so the environment must be 'sramecc+'.
191     if (!EnvTargetID.contains("sramecc+"))
192       return false;
193     break;
194   case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
195   case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
196     break;
197   }
198 
199   return true;
200 }
201 
202 namespace {
203 /// Reads the AMDGPU specific per-kernel-metadata from an image.
204 class KernelInfoReader {
205 public:
206   KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
207       : KernelInfoMap(KIM) {}
208 
209   /// Process ELF note to read AMDGPU metadata from respective information
210   /// fields.
211   Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
212     if (Note.getName() != "AMDGPU")
213       return Error::success(); // We are not interested in other things
214 
215     assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
216            "Parse AMDGPU MetaData");
217     auto Desc = Note.getDesc(Align);
218     StringRef MsgPackString =
219         StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
220     msgpack::Document MsgPackDoc;
221     if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
222       return Error::success();
223 
224     AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
225     if (!Verifier.verify(MsgPackDoc.getRoot()))
226       return Error::success();
227 
228     auto RootMap = MsgPackDoc.getRoot().getMap(true);
229 
230     if (auto Err = iterateAMDKernels(RootMap))
231       return Err;
232 
233     return Error::success();
234   }
235 
236 private:
237   /// Extracts the relevant information via simple string look-up in the msgpack
238   /// document elements.
239   Error
240   extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
241                     std::string &KernelName,
242                     offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
243     if (!V.first.isString())
244       return Error::success();
245 
246     const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
247       return DK.getString() == SK;
248     };
249 
250     const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
251                                            uint32_t *Vals) {
252       assert(DN.isArray() && "MsgPack DocNode is an array node");
253       auto DNA = DN.getArray();
254       assert(DNA.size() == 3 && "ArrayNode has at most three elements");
255 
256       int I = 0;
257       for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
258            ++DNABegin) {
259         Vals[I++] = DNABegin->getUInt();
260       }
261     };
262 
263     if (IsKey(V.first, ".name")) {
264       KernelName = V.second.toString();
265     } else if (IsKey(V.first, ".sgpr_count")) {
266       KernelData.SGPRCount = V.second.getUInt();
267     } else if (IsKey(V.first, ".sgpr_spill_count")) {
268       KernelData.SGPRSpillCount = V.second.getUInt();
269     } else if (IsKey(V.first, ".vgpr_count")) {
270       KernelData.VGPRCount = V.second.getUInt();
271     } else if (IsKey(V.first, ".vgpr_spill_count")) {
272       KernelData.VGPRSpillCount = V.second.getUInt();
273     } else if (IsKey(V.first, ".agpr_count")) {
274       KernelData.AGPRCount = V.second.getUInt();
275     } else if (IsKey(V.first, ".private_segment_fixed_size")) {
276       KernelData.PrivateSegmentSize = V.second.getUInt();
277     } else if (IsKey(V.first, ".group_segment_fixed_size")) {
278       KernelData.GroupSegmentList = V.second.getUInt();
279     } else if (IsKey(V.first, ".reqd_workgroup_size")) {
280       GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
281     } else if (IsKey(V.first, ".workgroup_size_hint")) {
282       GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
283     } else if (IsKey(V.first, ".wavefront_size")) {
284       KernelData.WavefrontSize = V.second.getUInt();
285     } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
286       KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
287     }
288 
289     return Error::success();
290   }
291 
292   /// Get the "amdhsa.kernels" element from the msgpack Document
293   Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
294     auto Res = MDN.find("amdhsa.kernels");
295     if (Res == MDN.end())
296       return createStringError(inconvertibleErrorCode(),
297                                "Could not find amdhsa.kernels key");
298 
299     auto Pair = *Res;
300     assert(Pair.second.isArray() &&
301            "AMDGPU kernel entries are arrays of entries");
302 
303     return Pair.second.getArray();
304   }
305 
306   /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
307   /// MapDocNode that either maps a string to a single value (most of them) or
308   /// to another array of things. Currently, we only handle the case that maps
309   /// to scalar value.
310   Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
311     offloading::amdgpu::AMDGPUKernelMetaData KernelData;
312     std::string KernelName;
313     auto Entry = (*It).getMap();
314     for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
315       if (auto Err = extractKernelData(*MI, KernelName, KernelData))
316         return Err;
317 
318     KernelInfoMap.insert({KernelName, KernelData});
319     return Error::success();
320   }
321 
322   /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
323   Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
324     auto KernelsOrErr = getAMDKernelsArray(MDN);
325     if (auto Err = KernelsOrErr.takeError())
326       return Err;
327 
328     auto KernelsArr = *KernelsOrErr;
329     for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
330       if (!It->isMap())
331         continue; // we expect <key,value> pairs
332 
333       // Obtain the value for the different entries. Each array entry is a
334       // MapDocNode
335       if (auto Err = generateKernelInfo(It))
336         return Err;
337     }
338     return Error::success();
339   }
340 
341   // Kernel names are the keys
342   StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
343 };
344 } // namespace
345 
346 Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
347     MemoryBufferRef MemBuffer,
348     StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
349     uint16_t &ELFABIVersion) {
350   Error Err = Error::success(); // Used later as out-parameter
351 
352   auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
353   if (auto Err = ELFOrError.takeError())
354     return Err;
355 
356   const object::ELF64LEFile ELFObj = ELFOrError.get();
357   Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
358   if (!Sections)
359     return Sections.takeError();
360   KernelInfoReader Reader(KernelInfoMap);
361 
362   // Read the code object version from ELF image header
363   auto Header = ELFObj.getHeader();
364   ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
365   for (const auto &S : *Sections) {
366     if (S.sh_type != ELF::SHT_NOTE)
367       continue;
368 
369     for (const auto N : ELFObj.notes(S, Err)) {
370       if (Err)
371         return Err;
372       // Fills the KernelInfoTabel entries in the reader
373       if ((Err = Reader.processNote(N, S.sh_addralign)))
374         return Err;
375     }
376   }
377   return Error::success();
378 }
379 Error offloading::intel::containerizeOpenMPSPIRVImage(
380     std::unique_ptr<MemoryBuffer> &Img) {
381   constexpr char INTEL_ONEOMP_OFFLOAD_VERSION[] = "1.0";
382   constexpr int NT_INTEL_ONEOMP_OFFLOAD_VERSION = 1;
383   constexpr int NT_INTEL_ONEOMP_OFFLOAD_IMAGE_COUNT = 2;
384   constexpr int NT_INTEL_ONEOMP_OFFLOAD_IMAGE_AUX = 3;
385 
386   // Start creating notes for the ELF container.
387   std::vector<ELFYAML::NoteEntry> Notes;
388   std::string Version = toHex(INTEL_ONEOMP_OFFLOAD_VERSION);
389   Notes.emplace_back(ELFYAML::NoteEntry{"INTELONEOMPOFFLOAD",
390                                         yaml::BinaryRef(Version),
391                                         NT_INTEL_ONEOMP_OFFLOAD_VERSION});
392 
393   // The AuxInfo string will hold auxiliary information for the image.
394   // ELFYAML::NoteEntry structures will hold references to the
395   // string, so we have to make sure the string is valid.
396   std::string AuxInfo;
397 
398   // TODO: Pass compile/link opts
399   StringRef CompileOpts = "";
400   StringRef LinkOpts = "";
401 
402   unsigned ImageFmt = 1; // SPIR-V format
403 
404   AuxInfo = toHex((Twine(0) + Twine('\0') + Twine(ImageFmt) + Twine('\0') +
405                    CompileOpts + Twine('\0') + LinkOpts)
406                       .str());
407   Notes.emplace_back(ELFYAML::NoteEntry{"INTELONEOMPOFFLOAD",
408                                         yaml::BinaryRef(AuxInfo),
409                                         NT_INTEL_ONEOMP_OFFLOAD_IMAGE_AUX});
410 
411   std::string ImgCount = toHex(Twine(1).str()); // always one image per ELF
412   Notes.emplace_back(ELFYAML::NoteEntry{"INTELONEOMPOFFLOAD",
413                                         yaml::BinaryRef(ImgCount),
414                                         NT_INTEL_ONEOMP_OFFLOAD_IMAGE_COUNT});
415 
416   std::string YamlFile;
417   llvm::raw_string_ostream YamlFileStream(YamlFile);
418 
419   // Write the YAML template file.
420 
421   // We use 64-bit little-endian ELF currently.
422   ELFYAML::FileHeader Header{};
423   Header.Class = ELF::ELFCLASS64;
424   Header.Data = ELF::ELFDATA2LSB;
425   Header.Type = ELF::ET_DYN;
426   // Use an existing Intel machine type as there is not one specifically for
427   // Intel GPUs.
428   Header.Machine = ELF::EM_IA_64;
429 
430   // Create a section with notes.
431   ELFYAML::NoteSection Section{};
432   Section.Type = ELF::SHT_NOTE;
433   Section.AddressAlign = 0;
434   Section.Name = ".note.inteloneompoffload";
435   Section.Notes.emplace(std::move(Notes));
436 
437   ELFYAML::Object Object{};
438   Object.Header = Header;
439   Object.Chunks.push_back(
440       std::make_unique<ELFYAML::NoteSection>(std::move(Section)));
441 
442   // Create the section that will hold the image
443   ELFYAML::RawContentSection ImageSection{};
444   ImageSection.Type = ELF::SHT_PROGBITS;
445   ImageSection.AddressAlign = 0;
446   std::string Name = "__openmp_offload_spirv_0";
447   ImageSection.Name = Name;
448   ImageSection.Content =
449       llvm::yaml::BinaryRef(arrayRefFromStringRef(Img->getBuffer()));
450   Object.Chunks.push_back(
451       std::make_unique<ELFYAML::RawContentSection>(std::move(ImageSection)));
452   Error Err = Error::success();
453   llvm::yaml::yaml2elf(
454       Object, YamlFileStream,
455       [&Err](const Twine &Msg) { Err = createStringError(Msg); }, UINT64_MAX);
456   if (Err)
457     return Err;
458 
459   Img = MemoryBuffer::getMemBufferCopy(YamlFile);
460   return Error::success();
461 }
462