xref: /freebsd/contrib/llvm-project/lld/MachO/InputFiles.h (revision 753f127f3ace09432b2baeffd71a308760641a62)
15ffd83dbSDimitry Andric //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric 
95ffd83dbSDimitry Andric #ifndef LLD_MACHO_INPUT_FILES_H
105ffd83dbSDimitry Andric #define LLD_MACHO_INPUT_FILES_H
115ffd83dbSDimitry Andric 
125ffd83dbSDimitry Andric #include "MachOStructs.h"
13fe6060f1SDimitry Andric #include "Target.h"
145ffd83dbSDimitry Andric 
1581ad6265SDimitry Andric #include "lld/Common/DWARF.h"
165ffd83dbSDimitry Andric #include "lld/Common/LLVM.h"
17e8d8bef9SDimitry Andric #include "lld/Common/Memory.h"
18349cc55cSDimitry Andric #include "llvm/ADT/CachedHashString.h"
195ffd83dbSDimitry Andric #include "llvm/ADT/DenseSet.h"
20e8d8bef9SDimitry Andric #include "llvm/ADT/SetVector.h"
215ffd83dbSDimitry Andric #include "llvm/BinaryFormat/MachO.h"
22e8d8bef9SDimitry Andric #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
235ffd83dbSDimitry Andric #include "llvm/Object/Archive.h"
245ffd83dbSDimitry Andric #include "llvm/Support/MemoryBuffer.h"
2581ad6265SDimitry Andric #include "llvm/Support/Threading.h"
26fe6060f1SDimitry Andric #include "llvm/TextAPI/TextAPIReader.h"
275ffd83dbSDimitry Andric 
285ffd83dbSDimitry Andric #include <vector>
295ffd83dbSDimitry Andric 
30e8d8bef9SDimitry Andric namespace llvm {
31e8d8bef9SDimitry Andric namespace lto {
32e8d8bef9SDimitry Andric class InputFile;
33e8d8bef9SDimitry Andric } // namespace lto
34fe6060f1SDimitry Andric namespace MachO {
35fe6060f1SDimitry Andric class InterfaceFile;
36fe6060f1SDimitry Andric } // namespace MachO
37e8d8bef9SDimitry Andric class TarWriter;
38e8d8bef9SDimitry Andric } // namespace llvm
39e8d8bef9SDimitry Andric 
405ffd83dbSDimitry Andric namespace lld {
415ffd83dbSDimitry Andric namespace macho {
425ffd83dbSDimitry Andric 
43fe6060f1SDimitry Andric struct PlatformInfo;
44fe6060f1SDimitry Andric class ConcatInputSection;
455ffd83dbSDimitry Andric class Symbol;
46349cc55cSDimitry Andric class Defined;
475ffd83dbSDimitry Andric struct Reloc;
48e8d8bef9SDimitry Andric enum class RefState : uint8_t;
49e8d8bef9SDimitry Andric 
50e8d8bef9SDimitry Andric // If --reproduce option is given, all input files are written
51e8d8bef9SDimitry Andric // to this tar archive.
52e8d8bef9SDimitry Andric extern std::unique_ptr<llvm::TarWriter> tar;
535ffd83dbSDimitry Andric 
545ffd83dbSDimitry Andric // If .subsections_via_symbols is set, each InputSection will be split along
55fe6060f1SDimitry Andric // symbol boundaries. The field offset represents the offset of the subsection
56fe6060f1SDimitry Andric // from the start of the original pre-split InputSection.
57349cc55cSDimitry Andric struct Subsection {
58349cc55cSDimitry Andric   uint64_t offset = 0;
59349cc55cSDimitry Andric   InputSection *isec = nullptr;
60fe6060f1SDimitry Andric };
61349cc55cSDimitry Andric 
62349cc55cSDimitry Andric using Subsections = std::vector<Subsection>;
6381ad6265SDimitry Andric class InputFile;
64349cc55cSDimitry Andric 
6581ad6265SDimitry Andric class Section {
6681ad6265SDimitry Andric public:
6781ad6265SDimitry Andric   InputFile *file;
6881ad6265SDimitry Andric   StringRef segname;
6981ad6265SDimitry Andric   StringRef name;
7081ad6265SDimitry Andric   uint32_t flags;
7181ad6265SDimitry Andric   uint64_t addr;
72349cc55cSDimitry Andric   Subsections subsections;
7381ad6265SDimitry Andric 
7481ad6265SDimitry Andric   Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags,
7581ad6265SDimitry Andric           uint64_t addr)
7681ad6265SDimitry Andric       : file(file), segname(segname), name(name), flags(flags), addr(addr) {}
7781ad6265SDimitry Andric   // Ensure pointers to Sections are never invalidated.
7881ad6265SDimitry Andric   Section(const Section &) = delete;
7981ad6265SDimitry Andric   Section &operator=(const Section &) = delete;
8081ad6265SDimitry Andric   Section(Section &&) = delete;
8181ad6265SDimitry Andric   Section &operator=(Section &&) = delete;
8281ad6265SDimitry Andric 
8381ad6265SDimitry Andric private:
8481ad6265SDimitry Andric   // Whether we have already split this section into individual subsections.
8581ad6265SDimitry Andric   // For sections that cannot be split (e.g. literal sections), this is always
8681ad6265SDimitry Andric   // false.
8781ad6265SDimitry Andric   bool doneSplitting = false;
8881ad6265SDimitry Andric   friend class ObjFile;
89349cc55cSDimitry Andric };
905ffd83dbSDimitry Andric 
9104eeddc0SDimitry Andric // Represents a call graph profile edge.
9204eeddc0SDimitry Andric struct CallGraphEntry {
9304eeddc0SDimitry Andric   // The index of the caller in the symbol table.
9404eeddc0SDimitry Andric   uint32_t fromIndex;
9504eeddc0SDimitry Andric   // The index of the callee in the symbol table.
9604eeddc0SDimitry Andric   uint32_t toIndex;
9704eeddc0SDimitry Andric   // Number of calls from callee to caller in the profile.
9804eeddc0SDimitry Andric   uint64_t count;
9981ad6265SDimitry Andric 
10081ad6265SDimitry Andric   CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count)
10181ad6265SDimitry Andric       : fromIndex(fromIndex), toIndex(toIndex), count(count) {}
10204eeddc0SDimitry Andric };
10304eeddc0SDimitry Andric 
1045ffd83dbSDimitry Andric class InputFile {
1055ffd83dbSDimitry Andric public:
1065ffd83dbSDimitry Andric   enum Kind {
1075ffd83dbSDimitry Andric     ObjKind,
108e8d8bef9SDimitry Andric     OpaqueKind,
1095ffd83dbSDimitry Andric     DylibKind,
1105ffd83dbSDimitry Andric     ArchiveKind,
111e8d8bef9SDimitry Andric     BitcodeKind,
1125ffd83dbSDimitry Andric   };
1135ffd83dbSDimitry Andric 
1145ffd83dbSDimitry Andric   virtual ~InputFile() = default;
1155ffd83dbSDimitry Andric   Kind kind() const { return fileKind; }
116e8d8bef9SDimitry Andric   StringRef getName() const { return name; }
117349cc55cSDimitry Andric   static void resetIdCount() { idCount = 0; }
1185ffd83dbSDimitry Andric 
1195ffd83dbSDimitry Andric   MemoryBufferRef mb;
120e8d8bef9SDimitry Andric 
1215ffd83dbSDimitry Andric   std::vector<Symbol *> symbols;
12281ad6265SDimitry Andric   std::vector<Section *> sections;
123e8d8bef9SDimitry Andric 
124e8d8bef9SDimitry Andric   // If not empty, this stores the name of the archive containing this file.
125e8d8bef9SDimitry Andric   // We use this string for creating error messages.
126e8d8bef9SDimitry Andric   std::string archiveName;
1275ffd83dbSDimitry Andric 
12804eeddc0SDimitry Andric   // Provides an easy way to sort InputFiles deterministically.
12904eeddc0SDimitry Andric   const int id;
13004eeddc0SDimitry Andric 
13104eeddc0SDimitry Andric   // True if this is a lazy ObjFile or BitcodeFile.
13204eeddc0SDimitry Andric   bool lazy = false;
13304eeddc0SDimitry Andric 
1345ffd83dbSDimitry Andric protected:
13504eeddc0SDimitry Andric   InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false)
13604eeddc0SDimitry Andric       : mb(mb), id(idCount++), lazy(lazy), fileKind(kind),
13704eeddc0SDimitry Andric         name(mb.getBufferIdentifier()) {}
1385ffd83dbSDimitry Andric 
139fe6060f1SDimitry Andric   InputFile(Kind, const llvm::MachO::InterfaceFile &);
1405ffd83dbSDimitry Andric 
1415ffd83dbSDimitry Andric private:
1425ffd83dbSDimitry Andric   const Kind fileKind;
143e8d8bef9SDimitry Andric   const StringRef name;
144e8d8bef9SDimitry Andric 
145e8d8bef9SDimitry Andric   static int idCount;
1465ffd83dbSDimitry Andric };
1475ffd83dbSDimitry Andric 
14881ad6265SDimitry Andric struct FDE {
14981ad6265SDimitry Andric   uint32_t funcLength;
15081ad6265SDimitry Andric   Symbol *personality;
15181ad6265SDimitry Andric   InputSection *lsda;
15281ad6265SDimitry Andric };
15381ad6265SDimitry Andric 
1545ffd83dbSDimitry Andric // .o file
155fe6060f1SDimitry Andric class ObjFile final : public InputFile {
1565ffd83dbSDimitry Andric public:
15704eeddc0SDimitry Andric   ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
15804eeddc0SDimitry Andric           bool lazy = false);
1590eae32dcSDimitry Andric   ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
16004eeddc0SDimitry Andric   template <class LP> void parse();
1610eae32dcSDimitry Andric 
1625ffd83dbSDimitry Andric   static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
163e8d8bef9SDimitry Andric 
16481ad6265SDimitry Andric   std::string sourceFile() const;
16581ad6265SDimitry Andric   // Parses line table information for diagnostics. compileUnit should be used
16681ad6265SDimitry Andric   // for other purposes.
16781ad6265SDimitry Andric   lld::DWARFCache *getDwarf();
16881ad6265SDimitry Andric 
169e8d8bef9SDimitry Andric   llvm::DWARFUnit *compileUnit = nullptr;
17081ad6265SDimitry Andric   std::unique_ptr<lld::DWARFCache> dwarfCache;
17181ad6265SDimitry Andric   Section *addrSigSection = nullptr;
172e8d8bef9SDimitry Andric   const uint32_t modTime;
173fe6060f1SDimitry Andric   std::vector<ConcatInputSection *> debugSections;
17404eeddc0SDimitry Andric   std::vector<CallGraphEntry> callGraph;
17581ad6265SDimitry Andric   llvm::DenseMap<ConcatInputSection *, FDE> fdes;
17681ad6265SDimitry Andric   std::vector<OptimizationHint> optimizationHints;
177e8d8bef9SDimitry Andric 
178e8d8bef9SDimitry Andric private:
17981ad6265SDimitry Andric   llvm::once_flag initDwarf;
18004eeddc0SDimitry Andric   template <class LP> void parseLazy();
181349cc55cSDimitry Andric   template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
182fe6060f1SDimitry Andric   template <class LP>
183fe6060f1SDimitry Andric   void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
184fe6060f1SDimitry Andric                     ArrayRef<typename LP::nlist> nList, const char *strtab,
185e8d8bef9SDimitry Andric                     bool subsectionsViaSymbols);
186fe6060f1SDimitry Andric   template <class NList>
187fe6060f1SDimitry Andric   Symbol *parseNonSectionSymbol(const NList &sym, StringRef name);
188349cc55cSDimitry Andric   template <class SectionHeader>
189349cc55cSDimitry Andric   void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
19081ad6265SDimitry Andric                         const SectionHeader &, Section &);
191e8d8bef9SDimitry Andric   void parseDebugInfo();
19281ad6265SDimitry Andric   void parseOptimizationHints(ArrayRef<uint8_t> data);
19381ad6265SDimitry Andric   void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);
19481ad6265SDimitry Andric   void registerCompactUnwind(Section &compactUnwindSection);
19581ad6265SDimitry Andric   void registerEhFrames(Section &ehFrameSection);
196e8d8bef9SDimitry Andric };
197e8d8bef9SDimitry Andric 
198e8d8bef9SDimitry Andric // command-line -sectcreate file
199fe6060f1SDimitry Andric class OpaqueFile final : public InputFile {
200e8d8bef9SDimitry Andric public:
201e8d8bef9SDimitry Andric   OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName);
202e8d8bef9SDimitry Andric   static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; }
2035ffd83dbSDimitry Andric };
2045ffd83dbSDimitry Andric 
205fe6060f1SDimitry Andric // .dylib or .tbd file
206fe6060f1SDimitry Andric class DylibFile final : public InputFile {
2075ffd83dbSDimitry Andric public:
2085ffd83dbSDimitry Andric   // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
2095ffd83dbSDimitry Andric   // symbols in those sub-libraries will be available under the umbrella
2105ffd83dbSDimitry Andric   // library's namespace. Those sub-libraries can also have their own
2115ffd83dbSDimitry Andric   // re-exports. When loading a re-exported dylib, `umbrella` should be set to
2125ffd83dbSDimitry Andric   // the root dylib to ensure symbols in the child library are correctly bound
2135ffd83dbSDimitry Andric   // to the root. On the other hand, if a dylib is being directly loaded
2145ffd83dbSDimitry Andric   // (through an -lfoo flag), then `umbrella` should be a nullptr.
215fe6060f1SDimitry Andric   explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
21681ad6265SDimitry Andric                      bool isBundleLoader, bool explicitlyLinked);
217e8d8bef9SDimitry Andric   explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
21881ad6265SDimitry Andric                      DylibFile *umbrella, bool isBundleLoader,
21981ad6265SDimitry Andric                      bool explicitlyLinked);
220fe6060f1SDimitry Andric 
221fe6060f1SDimitry Andric   void parseLoadCommands(MemoryBufferRef mb);
222fe6060f1SDimitry Andric   void parseReexports(const llvm::MachO::InterfaceFile &interface);
2231fd87a68SDimitry Andric   bool isReferenced() const { return numReferencedSymbols > 0; }
224e8d8bef9SDimitry Andric 
2255ffd83dbSDimitry Andric   static bool classof(const InputFile *f) { return f->kind() == DylibKind; }
2265ffd83dbSDimitry Andric 
227fe6060f1SDimitry Andric   StringRef installName;
228fe6060f1SDimitry Andric   DylibFile *exportingFile = nullptr;
229fe6060f1SDimitry Andric   DylibFile *umbrella;
230fe6060f1SDimitry Andric   SmallVector<StringRef, 2> rpaths;
231e8d8bef9SDimitry Andric   uint32_t compatibilityVersion = 0;
232e8d8bef9SDimitry Andric   uint32_t currentVersion = 0;
233fe6060f1SDimitry Andric   int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
2341fd87a68SDimitry Andric   unsigned numReferencedSymbols = 0;
235e8d8bef9SDimitry Andric   RefState refState;
2365ffd83dbSDimitry Andric   bool reexport = false;
237fe6060f1SDimitry Andric   bool forceNeeded = false;
238e8d8bef9SDimitry Andric   bool forceWeakImport = false;
239fe6060f1SDimitry Andric   bool deadStrippable = false;
240fe6060f1SDimitry Andric   bool explicitlyLinked = false;
241fe6060f1SDimitry Andric   // An executable can be used as a bundle loader that will load the output
242fe6060f1SDimitry Andric   // file being linked, and that contains symbols referenced, but not
243fe6060f1SDimitry Andric   // implemented in the bundle. When used like this, it is very similar
2441fd87a68SDimitry Andric   // to a dylib, so we've used the same class to represent it.
245fe6060f1SDimitry Andric   bool isBundleLoader;
246fe6060f1SDimitry Andric 
247fe6060f1SDimitry Andric private:
248fe6060f1SDimitry Andric   bool handleLDSymbol(StringRef originalName);
249fe6060f1SDimitry Andric   void handleLDPreviousSymbol(StringRef name, StringRef originalName);
250fe6060f1SDimitry Andric   void handleLDInstallNameSymbol(StringRef name, StringRef originalName);
2510eae32dcSDimitry Andric   void handleLDHideSymbol(StringRef name, StringRef originalName);
252fe6060f1SDimitry Andric   void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const;
253*753f127fSDimitry Andric   void parseExportedSymbols(uint32_t offset, uint32_t size);
2540eae32dcSDimitry Andric 
2550eae32dcSDimitry Andric   llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols;
2565ffd83dbSDimitry Andric };
2575ffd83dbSDimitry Andric 
2585ffd83dbSDimitry Andric // .a file
259fe6060f1SDimitry Andric class ArchiveFile final : public InputFile {
2605ffd83dbSDimitry Andric public:
2615ffd83dbSDimitry Andric   explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file);
262349cc55cSDimitry Andric   void addLazySymbols();
263349cc55cSDimitry Andric   void fetch(const llvm::object::Archive::Symbol &);
264349cc55cSDimitry Andric   // LLD normally doesn't use Error for error-handling, but the underlying
265349cc55cSDimitry Andric   // Archive library does, so this is the cleanest way to wrap it.
266349cc55cSDimitry Andric   Error fetch(const llvm::object::Archive::Child &, StringRef reason);
267349cc55cSDimitry Andric   const llvm::object::Archive &getArchive() const { return *file; };
2685ffd83dbSDimitry Andric   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
2695ffd83dbSDimitry Andric 
2705ffd83dbSDimitry Andric private:
2715ffd83dbSDimitry Andric   std::unique_ptr<llvm::object::Archive> file;
2725ffd83dbSDimitry Andric   // Keep track of children fetched from the archive by tracking
2735ffd83dbSDimitry Andric   // which address offsets have been fetched already.
2745ffd83dbSDimitry Andric   llvm::DenseSet<uint64_t> seen;
2755ffd83dbSDimitry Andric };
2765ffd83dbSDimitry Andric 
277fe6060f1SDimitry Andric class BitcodeFile final : public InputFile {
278e8d8bef9SDimitry Andric public:
279fe6060f1SDimitry Andric   explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
28004eeddc0SDimitry Andric                        uint64_t offsetInArchive, bool lazy = false);
281e8d8bef9SDimitry Andric   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
28204eeddc0SDimitry Andric   void parse();
283e8d8bef9SDimitry Andric 
284e8d8bef9SDimitry Andric   std::unique_ptr<llvm::lto::InputFile> obj;
28504eeddc0SDimitry Andric 
28604eeddc0SDimitry Andric private:
28704eeddc0SDimitry Andric   void parseLazy();
288e8d8bef9SDimitry Andric };
289e8d8bef9SDimitry Andric 
290e8d8bef9SDimitry Andric extern llvm::SetVector<InputFile *> inputFiles;
291349cc55cSDimitry Andric extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads;
2925ffd83dbSDimitry Andric 
2935ffd83dbSDimitry Andric llvm::Optional<MemoryBufferRef> readFile(StringRef path);
2945ffd83dbSDimitry Andric 
29504eeddc0SDimitry Andric void extract(InputFile &file, StringRef reason);
29604eeddc0SDimitry Andric 
297fe6060f1SDimitry Andric namespace detail {
298fe6060f1SDimitry Andric 
299fe6060f1SDimitry Andric template <class CommandType, class... Types>
300fe6060f1SDimitry Andric std::vector<const CommandType *>
301fe6060f1SDimitry Andric findCommands(const void *anyHdr, size_t maxCommands, Types... types) {
302fe6060f1SDimitry Andric   std::vector<const CommandType *> cmds;
303fe6060f1SDimitry Andric   std::initializer_list<uint32_t> typesList{types...};
304fe6060f1SDimitry Andric   const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr);
305fe6060f1SDimitry Andric   const uint8_t *p =
306fe6060f1SDimitry Andric       reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;
307fe6060f1SDimitry Andric   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
308fe6060f1SDimitry Andric     auto *cmd = reinterpret_cast<const CommandType *>(p);
309fe6060f1SDimitry Andric     if (llvm::is_contained(typesList, cmd->cmd)) {
310fe6060f1SDimitry Andric       cmds.push_back(cmd);
311fe6060f1SDimitry Andric       if (cmds.size() == maxCommands)
312fe6060f1SDimitry Andric         return cmds;
313fe6060f1SDimitry Andric     }
314fe6060f1SDimitry Andric     p += cmd->cmdsize;
315fe6060f1SDimitry Andric   }
316fe6060f1SDimitry Andric   return cmds;
317fe6060f1SDimitry Andric }
318fe6060f1SDimitry Andric 
319fe6060f1SDimitry Andric } // namespace detail
320fe6060f1SDimitry Andric 
321fe6060f1SDimitry Andric // anyHdr should be a pointer to either mach_header or mach_header_64
322fe6060f1SDimitry Andric template <class CommandType = llvm::MachO::load_command, class... Types>
323fe6060f1SDimitry Andric const CommandType *findCommand(const void *anyHdr, Types... types) {
324fe6060f1SDimitry Andric   std::vector<const CommandType *> cmds =
325fe6060f1SDimitry Andric       detail::findCommands<CommandType>(anyHdr, 1, types...);
326fe6060f1SDimitry Andric   return cmds.size() ? cmds[0] : nullptr;
327fe6060f1SDimitry Andric }
328fe6060f1SDimitry Andric 
329fe6060f1SDimitry Andric template <class CommandType = llvm::MachO::load_command, class... Types>
330fe6060f1SDimitry Andric std::vector<const CommandType *> findCommands(const void *anyHdr,
331fe6060f1SDimitry Andric                                               Types... types) {
332fe6060f1SDimitry Andric   return detail::findCommands<CommandType>(anyHdr, 0, types...);
333fe6060f1SDimitry Andric }
334e8d8bef9SDimitry Andric 
3355ffd83dbSDimitry Andric } // namespace macho
3365ffd83dbSDimitry Andric 
3375ffd83dbSDimitry Andric std::string toString(const macho::InputFile *file);
33881ad6265SDimitry Andric std::string toString(const macho::Section &);
3395ffd83dbSDimitry Andric } // namespace lld
3405ffd83dbSDimitry Andric 
3415ffd83dbSDimitry Andric #endif
342