xref: /freebsd/contrib/llvm-project/clang/lib/Driver/OffloadBundler.cpp (revision b9128a37faafede823eb456aa65a11ac69997284)
1 //===- OffloadBundler.cpp - File Bundling and Unbundling ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements an offload bundling API that bundles different files
11 /// that relate with the same source code but different targets into a single
12 /// one. Also the implements the opposite functionality, i.e. unbundle files
13 /// previous created by this API.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #include "clang/Driver/OffloadBundler.h"
18 #include "clang/Basic/Cuda.h"
19 #include "clang/Basic/TargetID.h"
20 #include "clang/Basic/Version.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/ADT/StringMap.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/BinaryFormat/Magic.h"
28 #include "llvm/Object/Archive.h"
29 #include "llvm/Object/ArchiveWriter.h"
30 #include "llvm/Object/Binary.h"
31 #include "llvm/Object/ObjectFile.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/Compression.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/EndianStream.h"
36 #include "llvm/Support/Errc.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/ErrorOr.h"
39 #include "llvm/Support/FileSystem.h"
40 #include "llvm/Support/MD5.h"
41 #include "llvm/Support/MemoryBuffer.h"
42 #include "llvm/Support/Path.h"
43 #include "llvm/Support/Program.h"
44 #include "llvm/Support/Signals.h"
45 #include "llvm/Support/StringSaver.h"
46 #include "llvm/Support/Timer.h"
47 #include "llvm/Support/WithColor.h"
48 #include "llvm/Support/raw_ostream.h"
49 #include "llvm/TargetParser/Host.h"
50 #include "llvm/TargetParser/Triple.h"
51 #include <algorithm>
52 #include <cassert>
53 #include <cstddef>
54 #include <cstdint>
55 #include <forward_list>
56 #include <llvm/Support/Process.h>
57 #include <memory>
58 #include <set>
59 #include <string>
60 #include <system_error>
61 #include <utility>
62 
63 using namespace llvm;
64 using namespace llvm::object;
65 using namespace clang;
66 
67 static llvm::TimerGroup
68     ClangOffloadBundlerTimerGroup("Clang Offload Bundler Timer Group",
69                                   "Timer group for clang offload bundler");
70 
71 /// Magic string that marks the existence of offloading data.
72 #define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
73 
74 OffloadTargetInfo::OffloadTargetInfo(const StringRef Target,
75                                      const OffloadBundlerConfig &BC)
76     : BundlerConfig(BC) {
77 
78   // TODO: Add error checking from ClangOffloadBundler.cpp
79   auto TargetFeatures = Target.split(':');
80   auto TripleOrGPU = TargetFeatures.first.rsplit('-');
81 
82   if (clang::StringToCudaArch(TripleOrGPU.second) != clang::CudaArch::UNKNOWN) {
83     auto KindTriple = TripleOrGPU.first.split('-');
84     this->OffloadKind = KindTriple.first;
85 
86     // Enforce optional env field to standardize bundles
87     llvm::Triple t = llvm::Triple(KindTriple.second);
88     this->Triple = llvm::Triple(t.getArchName(), t.getVendorName(),
89                                 t.getOSName(), t.getEnvironmentName());
90 
91     this->TargetID = Target.substr(Target.find(TripleOrGPU.second));
92   } else {
93     auto KindTriple = TargetFeatures.first.split('-');
94     this->OffloadKind = KindTriple.first;
95 
96     // Enforce optional env field to standardize bundles
97     llvm::Triple t = llvm::Triple(KindTriple.second);
98     this->Triple = llvm::Triple(t.getArchName(), t.getVendorName(),
99                                 t.getOSName(), t.getEnvironmentName());
100 
101     this->TargetID = "";
102   }
103 }
104 
105 bool OffloadTargetInfo::hasHostKind() const {
106   return this->OffloadKind == "host";
107 }
108 
109 bool OffloadTargetInfo::isOffloadKindValid() const {
110   return OffloadKind == "host" || OffloadKind == "openmp" ||
111          OffloadKind == "hip" || OffloadKind == "hipv4";
112 }
113 
114 bool OffloadTargetInfo::isOffloadKindCompatible(
115     const StringRef TargetOffloadKind) const {
116   if (OffloadKind == TargetOffloadKind)
117     return true;
118   if (BundlerConfig.HipOpenmpCompatible) {
119     bool HIPCompatibleWithOpenMP = OffloadKind.starts_with_insensitive("hip") &&
120                                    TargetOffloadKind == "openmp";
121     bool OpenMPCompatibleWithHIP =
122         OffloadKind == "openmp" &&
123         TargetOffloadKind.starts_with_insensitive("hip");
124     return HIPCompatibleWithOpenMP || OpenMPCompatibleWithHIP;
125   }
126   return false;
127 }
128 
129 bool OffloadTargetInfo::isTripleValid() const {
130   return !Triple.str().empty() && Triple.getArch() != Triple::UnknownArch;
131 }
132 
133 bool OffloadTargetInfo::operator==(const OffloadTargetInfo &Target) const {
134   return OffloadKind == Target.OffloadKind &&
135          Triple.isCompatibleWith(Target.Triple) && TargetID == Target.TargetID;
136 }
137 
138 std::string OffloadTargetInfo::str() const {
139   return Twine(OffloadKind + "-" + Triple.str() + "-" + TargetID).str();
140 }
141 
142 static StringRef getDeviceFileExtension(StringRef Device,
143                                         StringRef BundleFileName) {
144   if (Device.contains("gfx"))
145     return ".bc";
146   if (Device.contains("sm_"))
147     return ".cubin";
148   return sys::path::extension(BundleFileName);
149 }
150 
151 static std::string getDeviceLibraryFileName(StringRef BundleFileName,
152                                             StringRef Device) {
153   StringRef LibName = sys::path::stem(BundleFileName);
154   StringRef Extension = getDeviceFileExtension(Device, BundleFileName);
155 
156   std::string Result;
157   Result += LibName;
158   Result += Extension;
159   return Result;
160 }
161 
162 namespace {
163 /// Generic file handler interface.
164 class FileHandler {
165 public:
166   struct BundleInfo {
167     StringRef BundleID;
168   };
169 
170   FileHandler() {}
171 
172   virtual ~FileHandler() {}
173 
174   /// Update the file handler with information from the header of the bundled
175   /// file.
176   virtual Error ReadHeader(MemoryBuffer &Input) = 0;
177 
178   /// Read the marker of the next bundled to be read in the file. The bundle
179   /// name is returned if there is one in the file, or `std::nullopt` if there
180   /// are no more bundles to be read.
181   virtual Expected<std::optional<StringRef>>
182   ReadBundleStart(MemoryBuffer &Input) = 0;
183 
184   /// Read the marker that closes the current bundle.
185   virtual Error ReadBundleEnd(MemoryBuffer &Input) = 0;
186 
187   /// Read the current bundle and write the result into the stream \a OS.
188   virtual Error ReadBundle(raw_ostream &OS, MemoryBuffer &Input) = 0;
189 
190   /// Write the header of the bundled file to \a OS based on the information
191   /// gathered from \a Inputs.
192   virtual Error WriteHeader(raw_ostream &OS,
193                             ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) = 0;
194 
195   /// Write the marker that initiates a bundle for the triple \a TargetTriple to
196   /// \a OS.
197   virtual Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) = 0;
198 
199   /// Write the marker that closes a bundle for the triple \a TargetTriple to \a
200   /// OS.
201   virtual Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) = 0;
202 
203   /// Write the bundle from \a Input into \a OS.
204   virtual Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) = 0;
205 
206   /// Finalize output file.
207   virtual Error finalizeOutputFile() { return Error::success(); }
208 
209   /// List bundle IDs in \a Input.
210   virtual Error listBundleIDs(MemoryBuffer &Input) {
211     if (Error Err = ReadHeader(Input))
212       return Err;
213     return forEachBundle(Input, [&](const BundleInfo &Info) -> Error {
214       llvm::outs() << Info.BundleID << '\n';
215       Error Err = listBundleIDsCallback(Input, Info);
216       if (Err)
217         return Err;
218       return Error::success();
219     });
220   }
221 
222   /// Get bundle IDs in \a Input in \a BundleIds.
223   virtual Error getBundleIDs(MemoryBuffer &Input,
224                              std::set<StringRef> &BundleIds) {
225     if (Error Err = ReadHeader(Input))
226       return Err;
227     return forEachBundle(Input, [&](const BundleInfo &Info) -> Error {
228       BundleIds.insert(Info.BundleID);
229       Error Err = listBundleIDsCallback(Input, Info);
230       if (Err)
231         return Err;
232       return Error::success();
233     });
234   }
235 
236   /// For each bundle in \a Input, do \a Func.
237   Error forEachBundle(MemoryBuffer &Input,
238                       std::function<Error(const BundleInfo &)> Func) {
239     while (true) {
240       Expected<std::optional<StringRef>> CurTripleOrErr =
241           ReadBundleStart(Input);
242       if (!CurTripleOrErr)
243         return CurTripleOrErr.takeError();
244 
245       // No more bundles.
246       if (!*CurTripleOrErr)
247         break;
248 
249       StringRef CurTriple = **CurTripleOrErr;
250       assert(!CurTriple.empty());
251 
252       BundleInfo Info{CurTriple};
253       if (Error Err = Func(Info))
254         return Err;
255     }
256     return Error::success();
257   }
258 
259 protected:
260   virtual Error listBundleIDsCallback(MemoryBuffer &Input,
261                                       const BundleInfo &Info) {
262     return Error::success();
263   }
264 };
265 
266 /// Handler for binary files. The bundled file will have the following format
267 /// (all integers are stored in little-endian format):
268 ///
269 /// "OFFLOAD_BUNDLER_MAGIC_STR" (ASCII encoding of the string)
270 ///
271 /// NumberOfOffloadBundles (8-byte integer)
272 ///
273 /// OffsetOfBundle1 (8-byte integer)
274 /// SizeOfBundle1 (8-byte integer)
275 /// NumberOfBytesInTripleOfBundle1 (8-byte integer)
276 /// TripleOfBundle1 (byte length defined before)
277 ///
278 /// ...
279 ///
280 /// OffsetOfBundleN (8-byte integer)
281 /// SizeOfBundleN (8-byte integer)
282 /// NumberOfBytesInTripleOfBundleN (8-byte integer)
283 /// TripleOfBundleN (byte length defined before)
284 ///
285 /// Bundle1
286 /// ...
287 /// BundleN
288 
289 /// Read 8-byte integers from a buffer in little-endian format.
290 static uint64_t Read8byteIntegerFromBuffer(StringRef Buffer, size_t pos) {
291   return llvm::support::endian::read64le(Buffer.data() + pos);
292 }
293 
294 /// Write 8-byte integers to a buffer in little-endian format.
295 static void Write8byteIntegerToBuffer(raw_ostream &OS, uint64_t Val) {
296   llvm::support::endian::write(OS, Val, llvm::endianness::little);
297 }
298 
299 class BinaryFileHandler final : public FileHandler {
300   /// Information about the bundles extracted from the header.
301   struct BinaryBundleInfo final : public BundleInfo {
302     /// Size of the bundle.
303     uint64_t Size = 0u;
304     /// Offset at which the bundle starts in the bundled file.
305     uint64_t Offset = 0u;
306 
307     BinaryBundleInfo() {}
308     BinaryBundleInfo(uint64_t Size, uint64_t Offset)
309         : Size(Size), Offset(Offset) {}
310   };
311 
312   /// Map between a triple and the corresponding bundle information.
313   StringMap<BinaryBundleInfo> BundlesInfo;
314 
315   /// Iterator for the bundle information that is being read.
316   StringMap<BinaryBundleInfo>::iterator CurBundleInfo;
317   StringMap<BinaryBundleInfo>::iterator NextBundleInfo;
318 
319   /// Current bundle target to be written.
320   std::string CurWriteBundleTarget;
321 
322   /// Configuration options and arrays for this bundler job
323   const OffloadBundlerConfig &BundlerConfig;
324 
325 public:
326   // TODO: Add error checking from ClangOffloadBundler.cpp
327   BinaryFileHandler(const OffloadBundlerConfig &BC) : BundlerConfig(BC) {}
328 
329   ~BinaryFileHandler() final {}
330 
331   Error ReadHeader(MemoryBuffer &Input) final {
332     StringRef FC = Input.getBuffer();
333 
334     // Initialize the current bundle with the end of the container.
335     CurBundleInfo = BundlesInfo.end();
336 
337     // Check if buffer is smaller than magic string.
338     size_t ReadChars = sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1;
339     if (ReadChars > FC.size())
340       return Error::success();
341 
342     // Check if no magic was found.
343     if (llvm::identify_magic(FC) != llvm::file_magic::offload_bundle)
344       return Error::success();
345 
346     // Read number of bundles.
347     if (ReadChars + 8 > FC.size())
348       return Error::success();
349 
350     uint64_t NumberOfBundles = Read8byteIntegerFromBuffer(FC, ReadChars);
351     ReadChars += 8;
352 
353     // Read bundle offsets, sizes and triples.
354     for (uint64_t i = 0; i < NumberOfBundles; ++i) {
355 
356       // Read offset.
357       if (ReadChars + 8 > FC.size())
358         return Error::success();
359 
360       uint64_t Offset = Read8byteIntegerFromBuffer(FC, ReadChars);
361       ReadChars += 8;
362 
363       // Read size.
364       if (ReadChars + 8 > FC.size())
365         return Error::success();
366 
367       uint64_t Size = Read8byteIntegerFromBuffer(FC, ReadChars);
368       ReadChars += 8;
369 
370       // Read triple size.
371       if (ReadChars + 8 > FC.size())
372         return Error::success();
373 
374       uint64_t TripleSize = Read8byteIntegerFromBuffer(FC, ReadChars);
375       ReadChars += 8;
376 
377       // Read triple.
378       if (ReadChars + TripleSize > FC.size())
379         return Error::success();
380 
381       StringRef Triple(&FC.data()[ReadChars], TripleSize);
382       ReadChars += TripleSize;
383 
384       // Check if the offset and size make sense.
385       if (!Offset || Offset + Size > FC.size())
386         return Error::success();
387 
388       assert(!BundlesInfo.contains(Triple) && "Triple is duplicated??");
389       BundlesInfo[Triple] = BinaryBundleInfo(Size, Offset);
390     }
391     // Set the iterator to where we will start to read.
392     CurBundleInfo = BundlesInfo.end();
393     NextBundleInfo = BundlesInfo.begin();
394     return Error::success();
395   }
396 
397   Expected<std::optional<StringRef>>
398   ReadBundleStart(MemoryBuffer &Input) final {
399     if (NextBundleInfo == BundlesInfo.end())
400       return std::nullopt;
401     CurBundleInfo = NextBundleInfo++;
402     return CurBundleInfo->first();
403   }
404 
405   Error ReadBundleEnd(MemoryBuffer &Input) final {
406     assert(CurBundleInfo != BundlesInfo.end() && "Invalid reader info!");
407     return Error::success();
408   }
409 
410   Error ReadBundle(raw_ostream &OS, MemoryBuffer &Input) final {
411     assert(CurBundleInfo != BundlesInfo.end() && "Invalid reader info!");
412     StringRef FC = Input.getBuffer();
413     OS.write(FC.data() + CurBundleInfo->second.Offset,
414              CurBundleInfo->second.Size);
415     return Error::success();
416   }
417 
418   Error WriteHeader(raw_ostream &OS,
419                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
420 
421     // Compute size of the header.
422     uint64_t HeaderSize = 0;
423 
424     HeaderSize += sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1;
425     HeaderSize += 8; // Number of Bundles
426 
427     for (auto &T : BundlerConfig.TargetNames) {
428       HeaderSize += 3 * 8; // Bundle offset, Size of bundle and size of triple.
429       HeaderSize += T.size(); // The triple.
430     }
431 
432     // Write to the buffer the header.
433     OS << OFFLOAD_BUNDLER_MAGIC_STR;
434 
435     Write8byteIntegerToBuffer(OS, BundlerConfig.TargetNames.size());
436 
437     unsigned Idx = 0;
438     for (auto &T : BundlerConfig.TargetNames) {
439       MemoryBuffer &MB = *Inputs[Idx++];
440       HeaderSize = alignTo(HeaderSize, BundlerConfig.BundleAlignment);
441       // Bundle offset.
442       Write8byteIntegerToBuffer(OS, HeaderSize);
443       // Size of the bundle (adds to the next bundle's offset)
444       Write8byteIntegerToBuffer(OS, MB.getBufferSize());
445       BundlesInfo[T] = BinaryBundleInfo(MB.getBufferSize(), HeaderSize);
446       HeaderSize += MB.getBufferSize();
447       // Size of the triple
448       Write8byteIntegerToBuffer(OS, T.size());
449       // Triple
450       OS << T;
451     }
452     return Error::success();
453   }
454 
455   Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
456     CurWriteBundleTarget = TargetTriple.str();
457     return Error::success();
458   }
459 
460   Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
461     return Error::success();
462   }
463 
464   Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
465     auto BI = BundlesInfo[CurWriteBundleTarget];
466 
467     // Pad with 0 to reach specified offset.
468     size_t CurrentPos = OS.tell();
469     size_t PaddingSize = BI.Offset > CurrentPos ? BI.Offset - CurrentPos : 0;
470     for (size_t I = 0; I < PaddingSize; ++I)
471       OS.write('\0');
472     assert(OS.tell() == BI.Offset);
473 
474     OS.write(Input.getBufferStart(), Input.getBufferSize());
475 
476     return Error::success();
477   }
478 };
479 
480 // This class implements a list of temporary files that are removed upon
481 // object destruction.
482 class TempFileHandlerRAII {
483 public:
484   ~TempFileHandlerRAII() {
485     for (const auto &File : Files)
486       sys::fs::remove(File);
487   }
488 
489   // Creates temporary file with given contents.
490   Expected<StringRef> Create(std::optional<ArrayRef<char>> Contents) {
491     SmallString<128u> File;
492     if (std::error_code EC =
493             sys::fs::createTemporaryFile("clang-offload-bundler", "tmp", File))
494       return createFileError(File, EC);
495     Files.push_front(File);
496 
497     if (Contents) {
498       std::error_code EC;
499       raw_fd_ostream OS(File, EC);
500       if (EC)
501         return createFileError(File, EC);
502       OS.write(Contents->data(), Contents->size());
503     }
504     return Files.front().str();
505   }
506 
507 private:
508   std::forward_list<SmallString<128u>> Files;
509 };
510 
511 /// Handler for object files. The bundles are organized by sections with a
512 /// designated name.
513 ///
514 /// To unbundle, we just copy the contents of the designated section.
515 class ObjectFileHandler final : public FileHandler {
516 
517   /// The object file we are currently dealing with.
518   std::unique_ptr<ObjectFile> Obj;
519 
520   /// Return the input file contents.
521   StringRef getInputFileContents() const { return Obj->getData(); }
522 
523   /// Return bundle name (<kind>-<triple>) if the provided section is an offload
524   /// section.
525   static Expected<std::optional<StringRef>>
526   IsOffloadSection(SectionRef CurSection) {
527     Expected<StringRef> NameOrErr = CurSection.getName();
528     if (!NameOrErr)
529       return NameOrErr.takeError();
530 
531     // If it does not start with the reserved suffix, just skip this section.
532     if (llvm::identify_magic(*NameOrErr) != llvm::file_magic::offload_bundle)
533       return std::nullopt;
534 
535     // Return the triple that is right after the reserved prefix.
536     return NameOrErr->substr(sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1);
537   }
538 
539   /// Total number of inputs.
540   unsigned NumberOfInputs = 0;
541 
542   /// Total number of processed inputs, i.e, inputs that were already
543   /// read from the buffers.
544   unsigned NumberOfProcessedInputs = 0;
545 
546   /// Iterator of the current and next section.
547   section_iterator CurrentSection;
548   section_iterator NextSection;
549 
550   /// Configuration options and arrays for this bundler job
551   const OffloadBundlerConfig &BundlerConfig;
552 
553 public:
554   // TODO: Add error checking from ClangOffloadBundler.cpp
555   ObjectFileHandler(std::unique_ptr<ObjectFile> ObjIn,
556                     const OffloadBundlerConfig &BC)
557       : Obj(std::move(ObjIn)), CurrentSection(Obj->section_begin()),
558         NextSection(Obj->section_begin()), BundlerConfig(BC) {}
559 
560   ~ObjectFileHandler() final {}
561 
562   Error ReadHeader(MemoryBuffer &Input) final { return Error::success(); }
563 
564   Expected<std::optional<StringRef>>
565   ReadBundleStart(MemoryBuffer &Input) final {
566     while (NextSection != Obj->section_end()) {
567       CurrentSection = NextSection;
568       ++NextSection;
569 
570       // Check if the current section name starts with the reserved prefix. If
571       // so, return the triple.
572       Expected<std::optional<StringRef>> TripleOrErr =
573           IsOffloadSection(*CurrentSection);
574       if (!TripleOrErr)
575         return TripleOrErr.takeError();
576       if (*TripleOrErr)
577         return **TripleOrErr;
578     }
579     return std::nullopt;
580   }
581 
582   Error ReadBundleEnd(MemoryBuffer &Input) final { return Error::success(); }
583 
584   Error ReadBundle(raw_ostream &OS, MemoryBuffer &Input) final {
585     Expected<StringRef> ContentOrErr = CurrentSection->getContents();
586     if (!ContentOrErr)
587       return ContentOrErr.takeError();
588     StringRef Content = *ContentOrErr;
589 
590     // Copy fat object contents to the output when extracting host bundle.
591     if (Content.size() == 1u && Content.front() == 0)
592       Content = StringRef(Input.getBufferStart(), Input.getBufferSize());
593 
594     OS.write(Content.data(), Content.size());
595     return Error::success();
596   }
597 
598   Error WriteHeader(raw_ostream &OS,
599                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
600     assert(BundlerConfig.HostInputIndex != ~0u &&
601            "Host input index not defined.");
602 
603     // Record number of inputs.
604     NumberOfInputs = Inputs.size();
605     return Error::success();
606   }
607 
608   Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
609     ++NumberOfProcessedInputs;
610     return Error::success();
611   }
612 
613   Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
614     return Error::success();
615   }
616 
617   Error finalizeOutputFile() final {
618     assert(NumberOfProcessedInputs <= NumberOfInputs &&
619            "Processing more inputs that actually exist!");
620     assert(BundlerConfig.HostInputIndex != ~0u &&
621            "Host input index not defined.");
622 
623     // If this is not the last output, we don't have to do anything.
624     if (NumberOfProcessedInputs != NumberOfInputs)
625       return Error::success();
626 
627     // We will use llvm-objcopy to add target objects sections to the output
628     // fat object. These sections should have 'exclude' flag set which tells
629     // link editor to remove them from linker inputs when linking executable or
630     // shared library.
631 
632     assert(BundlerConfig.ObjcopyPath != "" &&
633            "llvm-objcopy path not specified");
634 
635     // Temporary files that need to be removed.
636     TempFileHandlerRAII TempFiles;
637 
638     // Compose llvm-objcopy command line for add target objects' sections with
639     // appropriate flags.
640     BumpPtrAllocator Alloc;
641     StringSaver SS{Alloc};
642     SmallVector<StringRef, 8u> ObjcopyArgs{"llvm-objcopy"};
643 
644     for (unsigned I = 0; I < NumberOfInputs; ++I) {
645       StringRef InputFile = BundlerConfig.InputFileNames[I];
646       if (I == BundlerConfig.HostInputIndex) {
647         // Special handling for the host bundle. We do not need to add a
648         // standard bundle for the host object since we are going to use fat
649         // object as a host object. Therefore use dummy contents (one zero byte)
650         // when creating section for the host bundle.
651         Expected<StringRef> TempFileOrErr = TempFiles.Create(ArrayRef<char>(0));
652         if (!TempFileOrErr)
653           return TempFileOrErr.takeError();
654         InputFile = *TempFileOrErr;
655       }
656 
657       ObjcopyArgs.push_back(
658           SS.save(Twine("--add-section=") + OFFLOAD_BUNDLER_MAGIC_STR +
659                   BundlerConfig.TargetNames[I] + "=" + InputFile));
660       ObjcopyArgs.push_back(
661           SS.save(Twine("--set-section-flags=") + OFFLOAD_BUNDLER_MAGIC_STR +
662                   BundlerConfig.TargetNames[I] + "=readonly,exclude"));
663     }
664     ObjcopyArgs.push_back("--");
665     ObjcopyArgs.push_back(
666         BundlerConfig.InputFileNames[BundlerConfig.HostInputIndex]);
667     ObjcopyArgs.push_back(BundlerConfig.OutputFileNames.front());
668 
669     if (Error Err = executeObjcopy(BundlerConfig.ObjcopyPath, ObjcopyArgs))
670       return Err;
671 
672     return Error::success();
673   }
674 
675   Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
676     return Error::success();
677   }
678 
679 private:
680   Error executeObjcopy(StringRef Objcopy, ArrayRef<StringRef> Args) {
681     // If the user asked for the commands to be printed out, we do that
682     // instead of executing it.
683     if (BundlerConfig.PrintExternalCommands) {
684       errs() << "\"" << Objcopy << "\"";
685       for (StringRef Arg : drop_begin(Args, 1))
686         errs() << " \"" << Arg << "\"";
687       errs() << "\n";
688     } else {
689       if (sys::ExecuteAndWait(Objcopy, Args))
690         return createStringError(inconvertibleErrorCode(),
691                                  "'llvm-objcopy' tool failed");
692     }
693     return Error::success();
694   }
695 };
696 
697 /// Handler for text files. The bundled file will have the following format.
698 ///
699 /// "Comment OFFLOAD_BUNDLER_MAGIC_STR__START__ triple"
700 /// Bundle 1
701 /// "Comment OFFLOAD_BUNDLER_MAGIC_STR__END__ triple"
702 /// ...
703 /// "Comment OFFLOAD_BUNDLER_MAGIC_STR__START__ triple"
704 /// Bundle N
705 /// "Comment OFFLOAD_BUNDLER_MAGIC_STR__END__ triple"
706 class TextFileHandler final : public FileHandler {
707   /// String that begins a line comment.
708   StringRef Comment;
709 
710   /// String that initiates a bundle.
711   std::string BundleStartString;
712 
713   /// String that closes a bundle.
714   std::string BundleEndString;
715 
716   /// Number of chars read from input.
717   size_t ReadChars = 0u;
718 
719 protected:
720   Error ReadHeader(MemoryBuffer &Input) final { return Error::success(); }
721 
722   Expected<std::optional<StringRef>>
723   ReadBundleStart(MemoryBuffer &Input) final {
724     StringRef FC = Input.getBuffer();
725 
726     // Find start of the bundle.
727     ReadChars = FC.find(BundleStartString, ReadChars);
728     if (ReadChars == FC.npos)
729       return std::nullopt;
730 
731     // Get position of the triple.
732     size_t TripleStart = ReadChars = ReadChars + BundleStartString.size();
733 
734     // Get position that closes the triple.
735     size_t TripleEnd = ReadChars = FC.find("\n", ReadChars);
736     if (TripleEnd == FC.npos)
737       return std::nullopt;
738 
739     // Next time we read after the new line.
740     ++ReadChars;
741 
742     return StringRef(&FC.data()[TripleStart], TripleEnd - TripleStart);
743   }
744 
745   Error ReadBundleEnd(MemoryBuffer &Input) final {
746     StringRef FC = Input.getBuffer();
747 
748     // Read up to the next new line.
749     assert(FC[ReadChars] == '\n' && "The bundle should end with a new line.");
750 
751     size_t TripleEnd = ReadChars = FC.find("\n", ReadChars + 1);
752     if (TripleEnd != FC.npos)
753       // Next time we read after the new line.
754       ++ReadChars;
755 
756     return Error::success();
757   }
758 
759   Error ReadBundle(raw_ostream &OS, MemoryBuffer &Input) final {
760     StringRef FC = Input.getBuffer();
761     size_t BundleStart = ReadChars;
762 
763     // Find end of the bundle.
764     size_t BundleEnd = ReadChars = FC.find(BundleEndString, ReadChars);
765 
766     StringRef Bundle(&FC.data()[BundleStart], BundleEnd - BundleStart);
767     OS << Bundle;
768 
769     return Error::success();
770   }
771 
772   Error WriteHeader(raw_ostream &OS,
773                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
774     return Error::success();
775   }
776 
777   Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
778     OS << BundleStartString << TargetTriple << "\n";
779     return Error::success();
780   }
781 
782   Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
783     OS << BundleEndString << TargetTriple << "\n";
784     return Error::success();
785   }
786 
787   Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
788     OS << Input.getBuffer();
789     return Error::success();
790   }
791 
792 public:
793   TextFileHandler(StringRef Comment) : Comment(Comment), ReadChars(0) {
794     BundleStartString =
795         "\n" + Comment.str() + " " OFFLOAD_BUNDLER_MAGIC_STR "__START__ ";
796     BundleEndString =
797         "\n" + Comment.str() + " " OFFLOAD_BUNDLER_MAGIC_STR "__END__ ";
798   }
799 
800   Error listBundleIDsCallback(MemoryBuffer &Input,
801                               const BundleInfo &Info) final {
802     // TODO: To list bundle IDs in a bundled text file we need to go through
803     // all bundles. The format of bundled text file may need to include a
804     // header if the performance of listing bundle IDs of bundled text file is
805     // important.
806     ReadChars = Input.getBuffer().find(BundleEndString, ReadChars);
807     if (Error Err = ReadBundleEnd(Input))
808       return Err;
809     return Error::success();
810   }
811 };
812 } // namespace
813 
814 /// Return an appropriate object file handler. We use the specific object
815 /// handler if we know how to deal with that format, otherwise we use a default
816 /// binary file handler.
817 static std::unique_ptr<FileHandler>
818 CreateObjectFileHandler(MemoryBuffer &FirstInput,
819                         const OffloadBundlerConfig &BundlerConfig) {
820   // Check if the input file format is one that we know how to deal with.
821   Expected<std::unique_ptr<Binary>> BinaryOrErr = createBinary(FirstInput);
822 
823   // We only support regular object files. If failed to open the input as a
824   // known binary or this is not an object file use the default binary handler.
825   if (errorToBool(BinaryOrErr.takeError()) || !isa<ObjectFile>(*BinaryOrErr))
826     return std::make_unique<BinaryFileHandler>(BundlerConfig);
827 
828   // Otherwise create an object file handler. The handler will be owned by the
829   // client of this function.
830   return std::make_unique<ObjectFileHandler>(
831       std::unique_ptr<ObjectFile>(cast<ObjectFile>(BinaryOrErr->release())),
832       BundlerConfig);
833 }
834 
835 /// Return an appropriate handler given the input files and options.
836 static Expected<std::unique_ptr<FileHandler>>
837 CreateFileHandler(MemoryBuffer &FirstInput,
838                   const OffloadBundlerConfig &BundlerConfig) {
839   std::string FilesType = BundlerConfig.FilesType;
840 
841   if (FilesType == "i")
842     return std::make_unique<TextFileHandler>(/*Comment=*/"//");
843   if (FilesType == "ii")
844     return std::make_unique<TextFileHandler>(/*Comment=*/"//");
845   if (FilesType == "cui")
846     return std::make_unique<TextFileHandler>(/*Comment=*/"//");
847   if (FilesType == "hipi")
848     return std::make_unique<TextFileHandler>(/*Comment=*/"//");
849   // TODO: `.d` should be eventually removed once `-M` and its variants are
850   // handled properly in offload compilation.
851   if (FilesType == "d")
852     return std::make_unique<TextFileHandler>(/*Comment=*/"#");
853   if (FilesType == "ll")
854     return std::make_unique<TextFileHandler>(/*Comment=*/";");
855   if (FilesType == "bc")
856     return std::make_unique<BinaryFileHandler>(BundlerConfig);
857   if (FilesType == "s")
858     return std::make_unique<TextFileHandler>(/*Comment=*/"#");
859   if (FilesType == "o")
860     return CreateObjectFileHandler(FirstInput, BundlerConfig);
861   if (FilesType == "a")
862     return CreateObjectFileHandler(FirstInput, BundlerConfig);
863   if (FilesType == "gch")
864     return std::make_unique<BinaryFileHandler>(BundlerConfig);
865   if (FilesType == "ast")
866     return std::make_unique<BinaryFileHandler>(BundlerConfig);
867 
868   return createStringError(errc::invalid_argument,
869                            "'" + FilesType + "': invalid file type specified");
870 }
871 
872 OffloadBundlerConfig::OffloadBundlerConfig() {
873   auto IgnoreEnvVarOpt =
874       llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR");
875   if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1")
876     return;
877 
878   auto VerboseEnvVarOpt = llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_VERBOSE");
879   if (VerboseEnvVarOpt.has_value())
880     Verbose = VerboseEnvVarOpt.value() == "1";
881 
882   auto CompressEnvVarOpt =
883       llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS");
884   if (CompressEnvVarOpt.has_value())
885     Compress = CompressEnvVarOpt.value() == "1";
886 }
887 
888 llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
889 CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
890                                   bool Verbose) {
891   llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
892                         ClangOffloadBundlerTimerGroup);
893   if (Verbose)
894     HashTimer.startTimer();
895   llvm::MD5 Hash;
896   llvm::MD5::MD5Result Result;
897   Hash.update(Input.getBuffer());
898   Hash.final(Result);
899   uint64_t TruncatedHash = Result.low();
900   if (Verbose)
901     HashTimer.stopTimer();
902 
903   SmallVector<uint8_t, 0> CompressedBuffer;
904   auto BufferUint8 = llvm::ArrayRef<uint8_t>(
905       reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
906       Input.getBuffer().size());
907 
908   llvm::compression::Format CompressionFormat;
909 
910   if (llvm::compression::zstd::isAvailable())
911     CompressionFormat = llvm::compression::Format::Zstd;
912   else if (llvm::compression::zlib::isAvailable())
913     CompressionFormat = llvm::compression::Format::Zlib;
914   else
915     return createStringError(llvm::inconvertibleErrorCode(),
916                              "Compression not supported");
917 
918   llvm::Timer CompressTimer("Compression Timer", "Compression time",
919                             ClangOffloadBundlerTimerGroup);
920   if (Verbose)
921     CompressTimer.startTimer();
922   llvm::compression::compress(CompressionFormat, BufferUint8, CompressedBuffer);
923   if (Verbose)
924     CompressTimer.stopTimer();
925 
926   uint16_t CompressionMethod = static_cast<uint16_t>(CompressionFormat);
927   uint32_t UncompressedSize = Input.getBuffer().size();
928 
929   SmallVector<char, 0> FinalBuffer;
930   llvm::raw_svector_ostream OS(FinalBuffer);
931   OS << MagicNumber;
932   OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
933   OS.write(reinterpret_cast<const char *>(&CompressionMethod),
934            sizeof(CompressionMethod));
935   OS.write(reinterpret_cast<const char *>(&UncompressedSize),
936            sizeof(UncompressedSize));
937   OS.write(reinterpret_cast<const char *>(&TruncatedHash),
938            sizeof(TruncatedHash));
939   OS.write(reinterpret_cast<const char *>(CompressedBuffer.data()),
940            CompressedBuffer.size());
941 
942   if (Verbose) {
943     auto MethodUsed =
944         CompressionFormat == llvm::compression::Format::Zstd ? "zstd" : "zlib";
945     llvm::errs() << "Compressed bundle format version: " << Version << "\n"
946                  << "Compression method used: " << MethodUsed << "\n"
947                  << "Binary size before compression: " << UncompressedSize
948                  << " bytes\n"
949                  << "Binary size after compression: " << CompressedBuffer.size()
950                  << " bytes\n"
951                  << "Truncated MD5 hash: "
952                  << llvm::format_hex(TruncatedHash, 16) << "\n";
953   }
954 
955   return llvm::MemoryBuffer::getMemBufferCopy(
956       llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
957 }
958 
959 llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
960 CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
961                                     bool Verbose) {
962 
963   StringRef Blob = Input.getBuffer();
964 
965   if (Blob.size() < HeaderSize) {
966     return llvm::MemoryBuffer::getMemBufferCopy(Blob);
967   }
968   if (llvm::identify_magic(Blob) !=
969       llvm::file_magic::offload_bundle_compressed) {
970     if (Verbose)
971       llvm::errs() << "Uncompressed bundle.\n";
972     return llvm::MemoryBuffer::getMemBufferCopy(Blob);
973   }
974 
975   uint16_t ThisVersion;
976   uint16_t CompressionMethod;
977   uint32_t UncompressedSize;
978   uint64_t StoredHash;
979   memcpy(&ThisVersion, Input.getBuffer().data() + MagicNumber.size(),
980          sizeof(uint16_t));
981   memcpy(&CompressionMethod, Blob.data() + MagicSize + VersionFieldSize,
982          sizeof(uint16_t));
983   memcpy(&UncompressedSize,
984          Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize,
985          sizeof(uint32_t));
986   memcpy(&StoredHash,
987          Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize +
988              SizeFieldSize,
989          sizeof(uint64_t));
990 
991   llvm::compression::Format CompressionFormat;
992   if (CompressionMethod ==
993       static_cast<uint16_t>(llvm::compression::Format::Zlib))
994     CompressionFormat = llvm::compression::Format::Zlib;
995   else if (CompressionMethod ==
996            static_cast<uint16_t>(llvm::compression::Format::Zstd))
997     CompressionFormat = llvm::compression::Format::Zstd;
998   else
999     return createStringError(inconvertibleErrorCode(),
1000                              "Unknown compressing method");
1001 
1002   llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
1003                               ClangOffloadBundlerTimerGroup);
1004   if (Verbose)
1005     DecompressTimer.startTimer();
1006 
1007   SmallVector<uint8_t, 0> DecompressedData;
1008   StringRef CompressedData = Blob.substr(HeaderSize);
1009   if (llvm::Error DecompressionError = llvm::compression::decompress(
1010           CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
1011           DecompressedData, UncompressedSize))
1012     return createStringError(inconvertibleErrorCode(),
1013                              "Could not decompress embedded file contents: " +
1014                                  llvm::toString(std::move(DecompressionError)));
1015 
1016   if (Verbose) {
1017     DecompressTimer.stopTimer();
1018 
1019     // Recalculate MD5 hash
1020     llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
1021                                 "Hash recalculation time",
1022                                 ClangOffloadBundlerTimerGroup);
1023     HashRecalcTimer.startTimer();
1024     llvm::MD5 Hash;
1025     llvm::MD5::MD5Result Result;
1026     Hash.update(llvm::ArrayRef<uint8_t>(DecompressedData.data(),
1027                                         DecompressedData.size()));
1028     Hash.final(Result);
1029     uint64_t RecalculatedHash = Result.low();
1030     HashRecalcTimer.stopTimer();
1031     bool HashMatch = (StoredHash == RecalculatedHash);
1032 
1033     llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n"
1034                  << "Decompression method: "
1035                  << (CompressionFormat == llvm::compression::Format::Zlib
1036                          ? "zlib"
1037                          : "zstd")
1038                  << "\n"
1039                  << "Size before decompression: " << CompressedData.size()
1040                  << " bytes\n"
1041                  << "Size after decompression: " << UncompressedSize
1042                  << " bytes\n"
1043                  << "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
1044                  << "Recalculated hash: "
1045                  << llvm::format_hex(RecalculatedHash, 16) << "\n"
1046                  << "Hashes match: " << (HashMatch ? "Yes" : "No") << "\n";
1047   }
1048 
1049   return llvm::MemoryBuffer::getMemBufferCopy(
1050       llvm::toStringRef(DecompressedData));
1051 }
1052 
1053 // List bundle IDs. Return true if an error was found.
1054 Error OffloadBundler::ListBundleIDsInFile(
1055     StringRef InputFileName, const OffloadBundlerConfig &BundlerConfig) {
1056   // Open Input file.
1057   ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
1058       MemoryBuffer::getFileOrSTDIN(InputFileName);
1059   if (std::error_code EC = CodeOrErr.getError())
1060     return createFileError(InputFileName, EC);
1061 
1062   // Decompress the input if necessary.
1063   Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
1064       CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose);
1065   if (!DecompressedBufferOrErr)
1066     return createStringError(
1067         inconvertibleErrorCode(),
1068         "Failed to decompress input: " +
1069             llvm::toString(DecompressedBufferOrErr.takeError()));
1070 
1071   MemoryBuffer &DecompressedInput = **DecompressedBufferOrErr;
1072 
1073   // Select the right files handler.
1074   Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
1075       CreateFileHandler(DecompressedInput, BundlerConfig);
1076   if (!FileHandlerOrErr)
1077     return FileHandlerOrErr.takeError();
1078 
1079   std::unique_ptr<FileHandler> &FH = *FileHandlerOrErr;
1080   assert(FH);
1081   return FH->listBundleIDs(DecompressedInput);
1082 }
1083 
1084 /// @brief Checks if a code object \p CodeObjectInfo is compatible with a given
1085 /// target \p TargetInfo.
1086 /// @link https://clang.llvm.org/docs/ClangOffloadBundler.html#bundle-entry-id
1087 bool isCodeObjectCompatible(const OffloadTargetInfo &CodeObjectInfo,
1088                             const OffloadTargetInfo &TargetInfo) {
1089 
1090   // Compatible in case of exact match.
1091   if (CodeObjectInfo == TargetInfo) {
1092     DEBUG_WITH_TYPE("CodeObjectCompatibility",
1093                     dbgs() << "Compatible: Exact match: \t[CodeObject: "
1094                            << CodeObjectInfo.str()
1095                            << "]\t:\t[Target: " << TargetInfo.str() << "]\n");
1096     return true;
1097   }
1098 
1099   // Incompatible if Kinds or Triples mismatch.
1100   if (!CodeObjectInfo.isOffloadKindCompatible(TargetInfo.OffloadKind) ||
1101       !CodeObjectInfo.Triple.isCompatibleWith(TargetInfo.Triple)) {
1102     DEBUG_WITH_TYPE(
1103         "CodeObjectCompatibility",
1104         dbgs() << "Incompatible: Kind/Triple mismatch \t[CodeObject: "
1105                << CodeObjectInfo.str() << "]\t:\t[Target: " << TargetInfo.str()
1106                << "]\n");
1107     return false;
1108   }
1109 
1110   // Incompatible if Processors mismatch.
1111   llvm::StringMap<bool> CodeObjectFeatureMap, TargetFeatureMap;
1112   std::optional<StringRef> CodeObjectProc = clang::parseTargetID(
1113       CodeObjectInfo.Triple, CodeObjectInfo.TargetID, &CodeObjectFeatureMap);
1114   std::optional<StringRef> TargetProc = clang::parseTargetID(
1115       TargetInfo.Triple, TargetInfo.TargetID, &TargetFeatureMap);
1116 
1117   // Both TargetProc and CodeObjectProc can't be empty here.
1118   if (!TargetProc || !CodeObjectProc ||
1119       CodeObjectProc.value() != TargetProc.value()) {
1120     DEBUG_WITH_TYPE("CodeObjectCompatibility",
1121                     dbgs() << "Incompatible: Processor mismatch \t[CodeObject: "
1122                            << CodeObjectInfo.str()
1123                            << "]\t:\t[Target: " << TargetInfo.str() << "]\n");
1124     return false;
1125   }
1126 
1127   // Incompatible if CodeObject has more features than Target, irrespective of
1128   // type or sign of features.
1129   if (CodeObjectFeatureMap.getNumItems() > TargetFeatureMap.getNumItems()) {
1130     DEBUG_WITH_TYPE("CodeObjectCompatibility",
1131                     dbgs() << "Incompatible: CodeObject has more features "
1132                               "than target \t[CodeObject: "
1133                            << CodeObjectInfo.str()
1134                            << "]\t:\t[Target: " << TargetInfo.str() << "]\n");
1135     return false;
1136   }
1137 
1138   // Compatible if each target feature specified by target is compatible with
1139   // target feature of code object. The target feature is compatible if the
1140   // code object does not specify it (meaning Any), or if it specifies it
1141   // with the same value (meaning On or Off).
1142   for (const auto &CodeObjectFeature : CodeObjectFeatureMap) {
1143     auto TargetFeature = TargetFeatureMap.find(CodeObjectFeature.getKey());
1144     if (TargetFeature == TargetFeatureMap.end()) {
1145       DEBUG_WITH_TYPE(
1146           "CodeObjectCompatibility",
1147           dbgs()
1148               << "Incompatible: Value of CodeObject's non-ANY feature is "
1149                  "not matching with Target feature's ANY value \t[CodeObject: "
1150               << CodeObjectInfo.str() << "]\t:\t[Target: " << TargetInfo.str()
1151               << "]\n");
1152       return false;
1153     } else if (TargetFeature->getValue() != CodeObjectFeature.getValue()) {
1154       DEBUG_WITH_TYPE(
1155           "CodeObjectCompatibility",
1156           dbgs() << "Incompatible: Value of CodeObject's non-ANY feature is "
1157                     "not matching with Target feature's non-ANY value "
1158                     "\t[CodeObject: "
1159                  << CodeObjectInfo.str()
1160                  << "]\t:\t[Target: " << TargetInfo.str() << "]\n");
1161       return false;
1162     }
1163   }
1164 
1165   // CodeObject is compatible if all features of Target are:
1166   //   - either, present in the Code Object's features map with the same sign,
1167   //   - or, the feature is missing from CodeObjects's features map i.e. it is
1168   //   set to ANY
1169   DEBUG_WITH_TYPE(
1170       "CodeObjectCompatibility",
1171       dbgs() << "Compatible: Target IDs are compatible \t[CodeObject: "
1172              << CodeObjectInfo.str() << "]\t:\t[Target: " << TargetInfo.str()
1173              << "]\n");
1174   return true;
1175 }
1176 
1177 /// Bundle the files. Return true if an error was found.
1178 Error OffloadBundler::BundleFiles() {
1179   std::error_code EC;
1180 
1181   // Create a buffer to hold the content before compressing.
1182   SmallVector<char, 0> Buffer;
1183   llvm::raw_svector_ostream BufferStream(Buffer);
1184 
1185   // Open input files.
1186   SmallVector<std::unique_ptr<MemoryBuffer>, 8u> InputBuffers;
1187   InputBuffers.reserve(BundlerConfig.InputFileNames.size());
1188   for (auto &I : BundlerConfig.InputFileNames) {
1189     ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
1190         MemoryBuffer::getFileOrSTDIN(I);
1191     if (std::error_code EC = CodeOrErr.getError())
1192       return createFileError(I, EC);
1193     InputBuffers.emplace_back(std::move(*CodeOrErr));
1194   }
1195 
1196   // Get the file handler. We use the host buffer as reference.
1197   assert((BundlerConfig.HostInputIndex != ~0u || BundlerConfig.AllowNoHost) &&
1198          "Host input index undefined??");
1199   Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr = CreateFileHandler(
1200       *InputBuffers[BundlerConfig.AllowNoHost ? 0
1201                                               : BundlerConfig.HostInputIndex],
1202       BundlerConfig);
1203   if (!FileHandlerOrErr)
1204     return FileHandlerOrErr.takeError();
1205 
1206   std::unique_ptr<FileHandler> &FH = *FileHandlerOrErr;
1207   assert(FH);
1208 
1209   // Write header.
1210   if (Error Err = FH->WriteHeader(BufferStream, InputBuffers))
1211     return Err;
1212 
1213   // Write all bundles along with the start/end markers. If an error was found
1214   // writing the end of the bundle component, abort the bundle writing.
1215   auto Input = InputBuffers.begin();
1216   for (auto &Triple : BundlerConfig.TargetNames) {
1217     if (Error Err = FH->WriteBundleStart(BufferStream, Triple))
1218       return Err;
1219     if (Error Err = FH->WriteBundle(BufferStream, **Input))
1220       return Err;
1221     if (Error Err = FH->WriteBundleEnd(BufferStream, Triple))
1222       return Err;
1223     ++Input;
1224   }
1225 
1226   raw_fd_ostream OutputFile(BundlerConfig.OutputFileNames.front(), EC,
1227                             sys::fs::OF_None);
1228   if (EC)
1229     return createFileError(BundlerConfig.OutputFileNames.front(), EC);
1230 
1231   SmallVector<char, 0> CompressedBuffer;
1232   if (BundlerConfig.Compress) {
1233     std::unique_ptr<llvm::MemoryBuffer> BufferMemory =
1234         llvm::MemoryBuffer::getMemBufferCopy(
1235             llvm::StringRef(Buffer.data(), Buffer.size()));
1236     auto CompressionResult =
1237         CompressedOffloadBundle::compress(*BufferMemory, BundlerConfig.Verbose);
1238     if (auto Error = CompressionResult.takeError())
1239       return Error;
1240 
1241     auto CompressedMemBuffer = std::move(CompressionResult.get());
1242     CompressedBuffer.assign(CompressedMemBuffer->getBufferStart(),
1243                             CompressedMemBuffer->getBufferEnd());
1244   } else
1245     CompressedBuffer = Buffer;
1246 
1247   OutputFile.write(CompressedBuffer.data(), CompressedBuffer.size());
1248 
1249   return FH->finalizeOutputFile();
1250 }
1251 
1252 // Unbundle the files. Return true if an error was found.
1253 Error OffloadBundler::UnbundleFiles() {
1254   // Open Input file.
1255   ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
1256       MemoryBuffer::getFileOrSTDIN(BundlerConfig.InputFileNames.front());
1257   if (std::error_code EC = CodeOrErr.getError())
1258     return createFileError(BundlerConfig.InputFileNames.front(), EC);
1259 
1260   // Decompress the input if necessary.
1261   Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
1262       CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose);
1263   if (!DecompressedBufferOrErr)
1264     return createStringError(
1265         inconvertibleErrorCode(),
1266         "Failed to decompress input: " +
1267             llvm::toString(DecompressedBufferOrErr.takeError()));
1268 
1269   MemoryBuffer &Input = **DecompressedBufferOrErr;
1270 
1271   // Select the right files handler.
1272   Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
1273       CreateFileHandler(Input, BundlerConfig);
1274   if (!FileHandlerOrErr)
1275     return FileHandlerOrErr.takeError();
1276 
1277   std::unique_ptr<FileHandler> &FH = *FileHandlerOrErr;
1278   assert(FH);
1279 
1280   // Read the header of the bundled file.
1281   if (Error Err = FH->ReadHeader(Input))
1282     return Err;
1283 
1284   // Create a work list that consist of the map triple/output file.
1285   StringMap<StringRef> Worklist;
1286   auto Output = BundlerConfig.OutputFileNames.begin();
1287   for (auto &Triple : BundlerConfig.TargetNames) {
1288     Worklist[Triple] = *Output;
1289     ++Output;
1290   }
1291 
1292   // Read all the bundles that are in the work list. If we find no bundles we
1293   // assume the file is meant for the host target.
1294   bool FoundHostBundle = false;
1295   while (!Worklist.empty()) {
1296     Expected<std::optional<StringRef>> CurTripleOrErr =
1297         FH->ReadBundleStart(Input);
1298     if (!CurTripleOrErr)
1299       return CurTripleOrErr.takeError();
1300 
1301     // We don't have more bundles.
1302     if (!*CurTripleOrErr)
1303       break;
1304 
1305     StringRef CurTriple = **CurTripleOrErr;
1306     assert(!CurTriple.empty());
1307 
1308     auto Output = Worklist.begin();
1309     for (auto E = Worklist.end(); Output != E; Output++) {
1310       if (isCodeObjectCompatible(
1311               OffloadTargetInfo(CurTriple, BundlerConfig),
1312               OffloadTargetInfo((*Output).first(), BundlerConfig))) {
1313         break;
1314       }
1315     }
1316 
1317     if (Output == Worklist.end())
1318       continue;
1319     // Check if the output file can be opened and copy the bundle to it.
1320     std::error_code EC;
1321     raw_fd_ostream OutputFile((*Output).second, EC, sys::fs::OF_None);
1322     if (EC)
1323       return createFileError((*Output).second, EC);
1324     if (Error Err = FH->ReadBundle(OutputFile, Input))
1325       return Err;
1326     if (Error Err = FH->ReadBundleEnd(Input))
1327       return Err;
1328     Worklist.erase(Output);
1329 
1330     // Record if we found the host bundle.
1331     auto OffloadInfo = OffloadTargetInfo(CurTriple, BundlerConfig);
1332     if (OffloadInfo.hasHostKind())
1333       FoundHostBundle = true;
1334   }
1335 
1336   if (!BundlerConfig.AllowMissingBundles && !Worklist.empty()) {
1337     std::string ErrMsg = "Can't find bundles for";
1338     std::set<StringRef> Sorted;
1339     for (auto &E : Worklist)
1340       Sorted.insert(E.first());
1341     unsigned I = 0;
1342     unsigned Last = Sorted.size() - 1;
1343     for (auto &E : Sorted) {
1344       if (I != 0 && Last > 1)
1345         ErrMsg += ",";
1346       ErrMsg += " ";
1347       if (I == Last && I != 0)
1348         ErrMsg += "and ";
1349       ErrMsg += E.str();
1350       ++I;
1351     }
1352     return createStringError(inconvertibleErrorCode(), ErrMsg);
1353   }
1354 
1355   // If no bundles were found, assume the input file is the host bundle and
1356   // create empty files for the remaining targets.
1357   if (Worklist.size() == BundlerConfig.TargetNames.size()) {
1358     for (auto &E : Worklist) {
1359       std::error_code EC;
1360       raw_fd_ostream OutputFile(E.second, EC, sys::fs::OF_None);
1361       if (EC)
1362         return createFileError(E.second, EC);
1363 
1364       // If this entry has a host kind, copy the input file to the output file.
1365       auto OffloadInfo = OffloadTargetInfo(E.getKey(), BundlerConfig);
1366       if (OffloadInfo.hasHostKind())
1367         OutputFile.write(Input.getBufferStart(), Input.getBufferSize());
1368     }
1369     return Error::success();
1370   }
1371 
1372   // If we found elements, we emit an error if none of those were for the host
1373   // in case host bundle name was provided in command line.
1374   if (!(FoundHostBundle || BundlerConfig.HostInputIndex == ~0u ||
1375         BundlerConfig.AllowMissingBundles))
1376     return createStringError(inconvertibleErrorCode(),
1377                              "Can't find bundle for the host target");
1378 
1379   // If we still have any elements in the worklist, create empty files for them.
1380   for (auto &E : Worklist) {
1381     std::error_code EC;
1382     raw_fd_ostream OutputFile(E.second, EC, sys::fs::OF_None);
1383     if (EC)
1384       return createFileError(E.second, EC);
1385   }
1386 
1387   return Error::success();
1388 }
1389 
1390 static Archive::Kind getDefaultArchiveKindForHost() {
1391   return Triple(sys::getDefaultTargetTriple()).isOSDarwin() ? Archive::K_DARWIN
1392                                                             : Archive::K_GNU;
1393 }
1394 
1395 /// @brief Computes a list of targets among all given targets which are
1396 /// compatible with this code object
1397 /// @param [in] CodeObjectInfo Code Object
1398 /// @param [out] CompatibleTargets List of all compatible targets among all
1399 /// given targets
1400 /// @return false, if no compatible target is found.
1401 static bool
1402 getCompatibleOffloadTargets(OffloadTargetInfo &CodeObjectInfo,
1403                             SmallVectorImpl<StringRef> &CompatibleTargets,
1404                             const OffloadBundlerConfig &BundlerConfig) {
1405   if (!CompatibleTargets.empty()) {
1406     DEBUG_WITH_TYPE("CodeObjectCompatibility",
1407                     dbgs() << "CompatibleTargets list should be empty\n");
1408     return false;
1409   }
1410   for (auto &Target : BundlerConfig.TargetNames) {
1411     auto TargetInfo = OffloadTargetInfo(Target, BundlerConfig);
1412     if (isCodeObjectCompatible(CodeObjectInfo, TargetInfo))
1413       CompatibleTargets.push_back(Target);
1414   }
1415   return !CompatibleTargets.empty();
1416 }
1417 
1418 // Check that each code object file in the input archive conforms to following
1419 // rule: for a specific processor, a feature either shows up in all target IDs,
1420 // or does not show up in any target IDs. Otherwise the target ID combination is
1421 // invalid.
1422 static Error
1423 CheckHeterogeneousArchive(StringRef ArchiveName,
1424                           const OffloadBundlerConfig &BundlerConfig) {
1425   std::vector<std::unique_ptr<MemoryBuffer>> ArchiveBuffers;
1426   ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
1427       MemoryBuffer::getFileOrSTDIN(ArchiveName, true, false);
1428   if (std::error_code EC = BufOrErr.getError())
1429     return createFileError(ArchiveName, EC);
1430 
1431   ArchiveBuffers.push_back(std::move(*BufOrErr));
1432   Expected<std::unique_ptr<llvm::object::Archive>> LibOrErr =
1433       Archive::create(ArchiveBuffers.back()->getMemBufferRef());
1434   if (!LibOrErr)
1435     return LibOrErr.takeError();
1436 
1437   auto Archive = std::move(*LibOrErr);
1438 
1439   Error ArchiveErr = Error::success();
1440   auto ChildEnd = Archive->child_end();
1441 
1442   /// Iterate over all bundled code object files in the input archive.
1443   for (auto ArchiveIter = Archive->child_begin(ArchiveErr);
1444        ArchiveIter != ChildEnd; ++ArchiveIter) {
1445     if (ArchiveErr)
1446       return ArchiveErr;
1447     auto ArchiveChildNameOrErr = (*ArchiveIter).getName();
1448     if (!ArchiveChildNameOrErr)
1449       return ArchiveChildNameOrErr.takeError();
1450 
1451     auto CodeObjectBufferRefOrErr = (*ArchiveIter).getMemoryBufferRef();
1452     if (!CodeObjectBufferRefOrErr)
1453       return CodeObjectBufferRefOrErr.takeError();
1454 
1455     auto CodeObjectBuffer =
1456         MemoryBuffer::getMemBuffer(*CodeObjectBufferRefOrErr, false);
1457 
1458     Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
1459         CreateFileHandler(*CodeObjectBuffer, BundlerConfig);
1460     if (!FileHandlerOrErr)
1461       return FileHandlerOrErr.takeError();
1462 
1463     std::unique_ptr<FileHandler> &FileHandler = *FileHandlerOrErr;
1464     assert(FileHandler);
1465 
1466     std::set<StringRef> BundleIds;
1467     auto CodeObjectFileError =
1468         FileHandler->getBundleIDs(*CodeObjectBuffer, BundleIds);
1469     if (CodeObjectFileError)
1470       return CodeObjectFileError;
1471 
1472     auto &&ConflictingArchs = clang::getConflictTargetIDCombination(BundleIds);
1473     if (ConflictingArchs) {
1474       std::string ErrMsg =
1475           Twine("conflicting TargetIDs [" + ConflictingArchs.value().first +
1476                 ", " + ConflictingArchs.value().second + "] found in " +
1477                 ArchiveChildNameOrErr.get() + " of " + ArchiveName)
1478               .str();
1479       return createStringError(inconvertibleErrorCode(), ErrMsg);
1480     }
1481   }
1482 
1483   return ArchiveErr;
1484 }
1485 
1486 /// UnbundleArchive takes an archive file (".a") as input containing bundled
1487 /// code object files, and a list of offload targets (not host), and extracts
1488 /// the code objects into a new archive file for each offload target. Each
1489 /// resulting archive file contains all code object files corresponding to that
1490 /// particular offload target. The created archive file does not
1491 /// contain an index of the symbols and code object files are named as
1492 /// <<Parent Bundle Name>-<CodeObject's TargetID>>, with ':' replaced with '_'.
1493 Error OffloadBundler::UnbundleArchive() {
1494   std::vector<std::unique_ptr<MemoryBuffer>> ArchiveBuffers;
1495 
1496   /// Map of target names with list of object files that will form the device
1497   /// specific archive for that target
1498   StringMap<std::vector<NewArchiveMember>> OutputArchivesMap;
1499 
1500   // Map of target names and output archive filenames
1501   StringMap<StringRef> TargetOutputFileNameMap;
1502 
1503   auto Output = BundlerConfig.OutputFileNames.begin();
1504   for (auto &Target : BundlerConfig.TargetNames) {
1505     TargetOutputFileNameMap[Target] = *Output;
1506     ++Output;
1507   }
1508 
1509   StringRef IFName = BundlerConfig.InputFileNames.front();
1510 
1511   if (BundlerConfig.CheckInputArchive) {
1512     // For a specific processor, a feature either shows up in all target IDs, or
1513     // does not show up in any target IDs. Otherwise the target ID combination
1514     // is invalid.
1515     auto ArchiveError = CheckHeterogeneousArchive(IFName, BundlerConfig);
1516     if (ArchiveError) {
1517       return ArchiveError;
1518     }
1519   }
1520 
1521   ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
1522       MemoryBuffer::getFileOrSTDIN(IFName, true, false);
1523   if (std::error_code EC = BufOrErr.getError())
1524     return createFileError(BundlerConfig.InputFileNames.front(), EC);
1525 
1526   ArchiveBuffers.push_back(std::move(*BufOrErr));
1527   Expected<std::unique_ptr<llvm::object::Archive>> LibOrErr =
1528       Archive::create(ArchiveBuffers.back()->getMemBufferRef());
1529   if (!LibOrErr)
1530     return LibOrErr.takeError();
1531 
1532   auto Archive = std::move(*LibOrErr);
1533 
1534   Error ArchiveErr = Error::success();
1535   auto ChildEnd = Archive->child_end();
1536 
1537   /// Iterate over all bundled code object files in the input archive.
1538   for (auto ArchiveIter = Archive->child_begin(ArchiveErr);
1539        ArchiveIter != ChildEnd; ++ArchiveIter) {
1540     if (ArchiveErr)
1541       return ArchiveErr;
1542     auto ArchiveChildNameOrErr = (*ArchiveIter).getName();
1543     if (!ArchiveChildNameOrErr)
1544       return ArchiveChildNameOrErr.takeError();
1545 
1546     StringRef BundledObjectFile = sys::path::filename(*ArchiveChildNameOrErr);
1547 
1548     auto CodeObjectBufferRefOrErr = (*ArchiveIter).getMemoryBufferRef();
1549     if (!CodeObjectBufferRefOrErr)
1550       return CodeObjectBufferRefOrErr.takeError();
1551 
1552     auto TempCodeObjectBuffer =
1553         MemoryBuffer::getMemBuffer(*CodeObjectBufferRefOrErr, false);
1554 
1555     // Decompress the buffer if necessary.
1556     Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
1557         CompressedOffloadBundle::decompress(*TempCodeObjectBuffer,
1558                                             BundlerConfig.Verbose);
1559     if (!DecompressedBufferOrErr)
1560       return createStringError(
1561           inconvertibleErrorCode(),
1562           "Failed to decompress code object: " +
1563               llvm::toString(DecompressedBufferOrErr.takeError()));
1564 
1565     MemoryBuffer &CodeObjectBuffer = **DecompressedBufferOrErr;
1566 
1567     Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
1568         CreateFileHandler(CodeObjectBuffer, BundlerConfig);
1569     if (!FileHandlerOrErr)
1570       return FileHandlerOrErr.takeError();
1571 
1572     std::unique_ptr<FileHandler> &FileHandler = *FileHandlerOrErr;
1573     assert(FileHandler &&
1574            "FileHandle creation failed for file in the archive!");
1575 
1576     if (Error ReadErr = FileHandler->ReadHeader(CodeObjectBuffer))
1577       return ReadErr;
1578 
1579     Expected<std::optional<StringRef>> CurBundleIDOrErr =
1580         FileHandler->ReadBundleStart(CodeObjectBuffer);
1581     if (!CurBundleIDOrErr)
1582       return CurBundleIDOrErr.takeError();
1583 
1584     std::optional<StringRef> OptionalCurBundleID = *CurBundleIDOrErr;
1585     // No device code in this child, skip.
1586     if (!OptionalCurBundleID)
1587       continue;
1588     StringRef CodeObject = *OptionalCurBundleID;
1589 
1590     // Process all bundle entries (CodeObjects) found in this child of input
1591     // archive.
1592     while (!CodeObject.empty()) {
1593       SmallVector<StringRef> CompatibleTargets;
1594       auto CodeObjectInfo = OffloadTargetInfo(CodeObject, BundlerConfig);
1595       if (CodeObjectInfo.hasHostKind()) {
1596         // Do nothing, we don't extract host code yet.
1597       } else if (getCompatibleOffloadTargets(CodeObjectInfo, CompatibleTargets,
1598                                              BundlerConfig)) {
1599         std::string BundleData;
1600         raw_string_ostream DataStream(BundleData);
1601         if (Error Err = FileHandler->ReadBundle(DataStream, CodeObjectBuffer))
1602           return Err;
1603 
1604         for (auto &CompatibleTarget : CompatibleTargets) {
1605           SmallString<128> BundledObjectFileName;
1606           BundledObjectFileName.assign(BundledObjectFile);
1607           auto OutputBundleName =
1608               Twine(llvm::sys::path::stem(BundledObjectFileName) + "-" +
1609                     CodeObject +
1610                     getDeviceLibraryFileName(BundledObjectFileName,
1611                                              CodeObjectInfo.TargetID))
1612                   .str();
1613           // Replace ':' in optional target feature list with '_' to ensure
1614           // cross-platform validity.
1615           std::replace(OutputBundleName.begin(), OutputBundleName.end(), ':',
1616                        '_');
1617 
1618           std::unique_ptr<MemoryBuffer> MemBuf = MemoryBuffer::getMemBufferCopy(
1619               DataStream.str(), OutputBundleName);
1620           ArchiveBuffers.push_back(std::move(MemBuf));
1621           llvm::MemoryBufferRef MemBufRef =
1622               MemoryBufferRef(*(ArchiveBuffers.back()));
1623 
1624           // For inserting <CompatibleTarget, list<CodeObject>> entry in
1625           // OutputArchivesMap.
1626           if (!OutputArchivesMap.contains(CompatibleTarget)) {
1627 
1628             std::vector<NewArchiveMember> ArchiveMembers;
1629             ArchiveMembers.push_back(NewArchiveMember(MemBufRef));
1630             OutputArchivesMap.insert_or_assign(CompatibleTarget,
1631                                                std::move(ArchiveMembers));
1632           } else {
1633             OutputArchivesMap[CompatibleTarget].push_back(
1634                 NewArchiveMember(MemBufRef));
1635           }
1636         }
1637       }
1638 
1639       if (Error Err = FileHandler->ReadBundleEnd(CodeObjectBuffer))
1640         return Err;
1641 
1642       Expected<std::optional<StringRef>> NextTripleOrErr =
1643           FileHandler->ReadBundleStart(CodeObjectBuffer);
1644       if (!NextTripleOrErr)
1645         return NextTripleOrErr.takeError();
1646 
1647       CodeObject = ((*NextTripleOrErr).has_value()) ? **NextTripleOrErr : "";
1648     } // End of processing of all bundle entries of this child of input archive.
1649   }   // End of while over children of input archive.
1650 
1651   assert(!ArchiveErr && "Error occurred while reading archive!");
1652 
1653   /// Write out an archive for each target
1654   for (auto &Target : BundlerConfig.TargetNames) {
1655     StringRef FileName = TargetOutputFileNameMap[Target];
1656     StringMapIterator<std::vector<llvm::NewArchiveMember>> CurArchiveMembers =
1657         OutputArchivesMap.find(Target);
1658     if (CurArchiveMembers != OutputArchivesMap.end()) {
1659       if (Error WriteErr = writeArchive(FileName, CurArchiveMembers->getValue(),
1660                                         SymtabWritingMode::NormalSymtab,
1661                                         getDefaultArchiveKindForHost(), true,
1662                                         false, nullptr))
1663         return WriteErr;
1664     } else if (!BundlerConfig.AllowMissingBundles) {
1665       std::string ErrMsg =
1666           Twine("no compatible code object found for the target '" + Target +
1667                 "' in heterogeneous archive library: " + IFName)
1668               .str();
1669       return createStringError(inconvertibleErrorCode(), ErrMsg);
1670     } else { // Create an empty archive file if no compatible code object is
1671              // found and "allow-missing-bundles" is enabled. It ensures that
1672              // the linker using output of this step doesn't complain about
1673              // the missing input file.
1674       std::vector<llvm::NewArchiveMember> EmptyArchive;
1675       EmptyArchive.clear();
1676       if (Error WriteErr = writeArchive(
1677               FileName, EmptyArchive, SymtabWritingMode::NormalSymtab,
1678               getDefaultArchiveKindForHost(), true, false, nullptr))
1679         return WriteErr;
1680     }
1681   }
1682 
1683   return Error::success();
1684 }
1685