xref: /freebsd/contrib/llvm-project/lld/MachO/InputFiles.h (revision d30a1689f5b37e78ea189232a8b94a7011dc0dc8)
1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_MACHO_INPUT_FILES_H
10 #define LLD_MACHO_INPUT_FILES_H
11 
12 #include "MachOStructs.h"
13 #include "Target.h"
14 
15 #include "lld/Common/LLVM.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/ADT/CachedHashString.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/BinaryFormat/MachO.h"
21 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
22 #include "llvm/Object/Archive.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/TextAPI/TextAPIReader.h"
25 
26 #include <vector>
27 
28 namespace llvm {
29 namespace lto {
30 class InputFile;
31 } // namespace lto
32 namespace MachO {
33 class InterfaceFile;
34 } // namespace MachO
35 class TarWriter;
36 } // namespace llvm
37 
38 namespace lld {
39 namespace macho {
40 
41 struct PlatformInfo;
42 class ConcatInputSection;
43 class Symbol;
44 class Defined;
45 struct Reloc;
46 enum class RefState : uint8_t;
47 
48 // If --reproduce option is given, all input files are written
49 // to this tar archive.
50 extern std::unique_ptr<llvm::TarWriter> tar;
51 
52 // If .subsections_via_symbols is set, each InputSection will be split along
53 // symbol boundaries. The field offset represents the offset of the subsection
54 // from the start of the original pre-split InputSection.
55 struct Subsection {
56   uint64_t offset = 0;
57   InputSection *isec = nullptr;
58 };
59 
60 using Subsections = std::vector<Subsection>;
61 
62 struct Section {
63   uint64_t address = 0;
64   Subsections subsections;
65   Section(uint64_t addr) : address(addr){};
66 };
67 
68 // Represents a call graph profile edge.
69 struct CallGraphEntry {
70   // The index of the caller in the symbol table.
71   uint32_t fromIndex;
72   // The index of the callee in the symbol table.
73   uint32_t toIndex;
74   // Number of calls from callee to caller in the profile.
75   uint64_t count;
76 };
77 
78 class InputFile {
79 public:
80   enum Kind {
81     ObjKind,
82     OpaqueKind,
83     DylibKind,
84     ArchiveKind,
85     BitcodeKind,
86   };
87 
88   virtual ~InputFile() = default;
89   Kind kind() const { return fileKind; }
90   StringRef getName() const { return name; }
91   static void resetIdCount() { idCount = 0; }
92 
93   MemoryBufferRef mb;
94 
95   std::vector<Symbol *> symbols;
96   std::vector<Section> sections;
97 
98   // If not empty, this stores the name of the archive containing this file.
99   // We use this string for creating error messages.
100   std::string archiveName;
101 
102   // Provides an easy way to sort InputFiles deterministically.
103   const int id;
104 
105   // True if this is a lazy ObjFile or BitcodeFile.
106   bool lazy = false;
107 
108 protected:
109   InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false)
110       : mb(mb), id(idCount++), lazy(lazy), fileKind(kind),
111         name(mb.getBufferIdentifier()) {}
112 
113   InputFile(Kind, const llvm::MachO::InterfaceFile &);
114 
115 private:
116   const Kind fileKind;
117   const StringRef name;
118 
119   static int idCount;
120 };
121 
122 // .o file
123 class ObjFile final : public InputFile {
124 public:
125   ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
126           bool lazy = false);
127   ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
128   template <class LP> void parse();
129 
130   static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
131 
132   llvm::DWARFUnit *compileUnit = nullptr;
133   const uint32_t modTime;
134   std::vector<ConcatInputSection *> debugSections;
135   std::vector<CallGraphEntry> callGraph;
136 
137 private:
138   Section *compactUnwindSection = nullptr;
139 
140   template <class LP> void parseLazy();
141   template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
142   template <class LP>
143   void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
144                     ArrayRef<typename LP::nlist> nList, const char *strtab,
145                     bool subsectionsViaSymbols);
146   template <class NList>
147   Symbol *parseNonSectionSymbol(const NList &sym, StringRef name);
148   template <class SectionHeader>
149   void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
150                         const SectionHeader &, Subsections &);
151   void parseDebugInfo();
152   void registerCompactUnwind();
153 };
154 
155 // command-line -sectcreate file
156 class OpaqueFile final : public InputFile {
157 public:
158   OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName);
159   static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; }
160 };
161 
162 // .dylib or .tbd file
163 class DylibFile final : public InputFile {
164 public:
165   // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
166   // symbols in those sub-libraries will be available under the umbrella
167   // library's namespace. Those sub-libraries can also have their own
168   // re-exports. When loading a re-exported dylib, `umbrella` should be set to
169   // the root dylib to ensure symbols in the child library are correctly bound
170   // to the root. On the other hand, if a dylib is being directly loaded
171   // (through an -lfoo flag), then `umbrella` should be a nullptr.
172   explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
173                      bool isBundleLoader = false);
174   explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
175                      DylibFile *umbrella = nullptr,
176                      bool isBundleLoader = false);
177 
178   void parseLoadCommands(MemoryBufferRef mb);
179   void parseReexports(const llvm::MachO::InterfaceFile &interface);
180   bool isReferenced() const { return numReferencedSymbols > 0; }
181 
182   static bool classof(const InputFile *f) { return f->kind() == DylibKind; }
183 
184   StringRef installName;
185   DylibFile *exportingFile = nullptr;
186   DylibFile *umbrella;
187   SmallVector<StringRef, 2> rpaths;
188   uint32_t compatibilityVersion = 0;
189   uint32_t currentVersion = 0;
190   int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
191   unsigned numReferencedSymbols = 0;
192   RefState refState;
193   bool reexport = false;
194   bool forceNeeded = false;
195   bool forceWeakImport = false;
196   bool deadStrippable = false;
197   bool explicitlyLinked = false;
198   // An executable can be used as a bundle loader that will load the output
199   // file being linked, and that contains symbols referenced, but not
200   // implemented in the bundle. When used like this, it is very similar
201   // to a dylib, so we've used the same class to represent it.
202   bool isBundleLoader;
203 
204 private:
205   bool handleLDSymbol(StringRef originalName);
206   void handleLDPreviousSymbol(StringRef name, StringRef originalName);
207   void handleLDInstallNameSymbol(StringRef name, StringRef originalName);
208   void handleLDHideSymbol(StringRef name, StringRef originalName);
209   void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const;
210 
211   llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols;
212 };
213 
214 // .a file
215 class ArchiveFile final : public InputFile {
216 public:
217   explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file);
218   void addLazySymbols();
219   void fetch(const llvm::object::Archive::Symbol &);
220   // LLD normally doesn't use Error for error-handling, but the underlying
221   // Archive library does, so this is the cleanest way to wrap it.
222   Error fetch(const llvm::object::Archive::Child &, StringRef reason);
223   const llvm::object::Archive &getArchive() const { return *file; };
224   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
225 
226 private:
227   std::unique_ptr<llvm::object::Archive> file;
228   // Keep track of children fetched from the archive by tracking
229   // which address offsets have been fetched already.
230   llvm::DenseSet<uint64_t> seen;
231 };
232 
233 class BitcodeFile final : public InputFile {
234 public:
235   explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
236                        uint64_t offsetInArchive, bool lazy = false);
237   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
238   void parse();
239 
240   std::unique_ptr<llvm::lto::InputFile> obj;
241 
242 private:
243   void parseLazy();
244 };
245 
246 extern llvm::SetVector<InputFile *> inputFiles;
247 extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads;
248 
249 llvm::Optional<MemoryBufferRef> readFile(StringRef path);
250 
251 void extract(InputFile &file, StringRef reason);
252 
253 namespace detail {
254 
255 template <class CommandType, class... Types>
256 std::vector<const CommandType *>
257 findCommands(const void *anyHdr, size_t maxCommands, Types... types) {
258   std::vector<const CommandType *> cmds;
259   std::initializer_list<uint32_t> typesList{types...};
260   const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr);
261   const uint8_t *p =
262       reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;
263   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
264     auto *cmd = reinterpret_cast<const CommandType *>(p);
265     if (llvm::is_contained(typesList, cmd->cmd)) {
266       cmds.push_back(cmd);
267       if (cmds.size() == maxCommands)
268         return cmds;
269     }
270     p += cmd->cmdsize;
271   }
272   return cmds;
273 }
274 
275 } // namespace detail
276 
277 // anyHdr should be a pointer to either mach_header or mach_header_64
278 template <class CommandType = llvm::MachO::load_command, class... Types>
279 const CommandType *findCommand(const void *anyHdr, Types... types) {
280   std::vector<const CommandType *> cmds =
281       detail::findCommands<CommandType>(anyHdr, 1, types...);
282   return cmds.size() ? cmds[0] : nullptr;
283 }
284 
285 template <class CommandType = llvm::MachO::load_command, class... Types>
286 std::vector<const CommandType *> findCommands(const void *anyHdr,
287                                               Types... types) {
288   return detail::findCommands<CommandType>(anyHdr, 0, types...);
289 }
290 
291 } // namespace macho
292 
293 std::string toString(const macho::InputFile *file);
294 } // namespace lld
295 
296 #endif
297