1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_INPUT_FILES_H 10 #define LLD_MACHO_INPUT_FILES_H 11 12 #include "MachOStructs.h" 13 #include "Target.h" 14 15 #include "lld/Common/LLVM.h" 16 #include "lld/Common/Memory.h" 17 #include "llvm/ADT/CachedHashString.h" 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/ADT/SetVector.h" 20 #include "llvm/BinaryFormat/MachO.h" 21 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 22 #include "llvm/Object/Archive.h" 23 #include "llvm/Support/MemoryBuffer.h" 24 #include "llvm/TextAPI/TextAPIReader.h" 25 26 #include <vector> 27 28 namespace llvm { 29 namespace lto { 30 class InputFile; 31 } // namespace lto 32 namespace MachO { 33 class InterfaceFile; 34 } // namespace MachO 35 class TarWriter; 36 } // namespace llvm 37 38 namespace lld { 39 namespace macho { 40 41 struct PlatformInfo; 42 class ConcatInputSection; 43 class Symbol; 44 class Defined; 45 struct Reloc; 46 enum class RefState : uint8_t; 47 48 // If --reproduce option is given, all input files are written 49 // to this tar archive. 50 extern std::unique_ptr<llvm::TarWriter> tar; 51 52 // If .subsections_via_symbols is set, each InputSection will be split along 53 // symbol boundaries. The field offset represents the offset of the subsection 54 // from the start of the original pre-split InputSection. 55 struct Subsection { 56 uint64_t offset = 0; 57 InputSection *isec = nullptr; 58 }; 59 60 using Subsections = std::vector<Subsection>; 61 62 struct Section { 63 uint64_t address = 0; 64 Subsections subsections; 65 Section(uint64_t addr) : address(addr){}; 66 }; 67 68 // Represents a call graph profile edge. 69 struct CallGraphEntry { 70 // The index of the caller in the symbol table. 71 uint32_t fromIndex; 72 // The index of the callee in the symbol table. 73 uint32_t toIndex; 74 // Number of calls from callee to caller in the profile. 75 uint64_t count; 76 }; 77 78 class InputFile { 79 public: 80 enum Kind { 81 ObjKind, 82 OpaqueKind, 83 DylibKind, 84 ArchiveKind, 85 BitcodeKind, 86 }; 87 88 virtual ~InputFile() = default; 89 Kind kind() const { return fileKind; } 90 StringRef getName() const { return name; } 91 static void resetIdCount() { idCount = 0; } 92 93 MemoryBufferRef mb; 94 95 std::vector<Symbol *> symbols; 96 std::vector<Section> sections; 97 98 // If not empty, this stores the name of the archive containing this file. 99 // We use this string for creating error messages. 100 std::string archiveName; 101 102 // Provides an easy way to sort InputFiles deterministically. 103 const int id; 104 105 // True if this is a lazy ObjFile or BitcodeFile. 106 bool lazy = false; 107 108 protected: 109 InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false) 110 : mb(mb), id(idCount++), lazy(lazy), fileKind(kind), 111 name(mb.getBufferIdentifier()) {} 112 113 InputFile(Kind, const llvm::MachO::InterfaceFile &); 114 115 private: 116 const Kind fileKind; 117 const StringRef name; 118 119 static int idCount; 120 }; 121 122 // .o file 123 class ObjFile final : public InputFile { 124 public: 125 ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName, 126 bool lazy = false); 127 ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const; 128 template <class LP> void parse(); 129 130 static bool classof(const InputFile *f) { return f->kind() == ObjKind; } 131 132 llvm::DWARFUnit *compileUnit = nullptr; 133 const uint32_t modTime; 134 std::vector<ConcatInputSection *> debugSections; 135 std::vector<CallGraphEntry> callGraph; 136 137 private: 138 Section *compactUnwindSection = nullptr; 139 140 template <class LP> void parseLazy(); 141 template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>); 142 template <class LP> 143 void parseSymbols(ArrayRef<typename LP::section> sectionHeaders, 144 ArrayRef<typename LP::nlist> nList, const char *strtab, 145 bool subsectionsViaSymbols); 146 template <class NList> 147 Symbol *parseNonSectionSymbol(const NList &sym, StringRef name); 148 template <class SectionHeader> 149 void parseRelocations(ArrayRef<SectionHeader> sectionHeaders, 150 const SectionHeader &, Subsections &); 151 void parseDebugInfo(); 152 void registerCompactUnwind(); 153 }; 154 155 // command-line -sectcreate file 156 class OpaqueFile final : public InputFile { 157 public: 158 OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName); 159 static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; } 160 }; 161 162 // .dylib or .tbd file 163 class DylibFile final : public InputFile { 164 public: 165 // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the 166 // symbols in those sub-libraries will be available under the umbrella 167 // library's namespace. Those sub-libraries can also have their own 168 // re-exports. When loading a re-exported dylib, `umbrella` should be set to 169 // the root dylib to ensure symbols in the child library are correctly bound 170 // to the root. On the other hand, if a dylib is being directly loaded 171 // (through an -lfoo flag), then `umbrella` should be a nullptr. 172 explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella, 173 bool isBundleLoader = false); 174 explicit DylibFile(const llvm::MachO::InterfaceFile &interface, 175 DylibFile *umbrella = nullptr, 176 bool isBundleLoader = false); 177 178 void parseLoadCommands(MemoryBufferRef mb); 179 void parseReexports(const llvm::MachO::InterfaceFile &interface); 180 bool isReferenced() const { return numReferencedSymbols > 0; } 181 182 static bool classof(const InputFile *f) { return f->kind() == DylibKind; } 183 184 StringRef installName; 185 DylibFile *exportingFile = nullptr; 186 DylibFile *umbrella; 187 SmallVector<StringRef, 2> rpaths; 188 uint32_t compatibilityVersion = 0; 189 uint32_t currentVersion = 0; 190 int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel 191 unsigned numReferencedSymbols = 0; 192 RefState refState; 193 bool reexport = false; 194 bool forceNeeded = false; 195 bool forceWeakImport = false; 196 bool deadStrippable = false; 197 bool explicitlyLinked = false; 198 // An executable can be used as a bundle loader that will load the output 199 // file being linked, and that contains symbols referenced, but not 200 // implemented in the bundle. When used like this, it is very similar 201 // to a dylib, so we've used the same class to represent it. 202 bool isBundleLoader; 203 204 private: 205 bool handleLDSymbol(StringRef originalName); 206 void handleLDPreviousSymbol(StringRef name, StringRef originalName); 207 void handleLDInstallNameSymbol(StringRef name, StringRef originalName); 208 void handleLDHideSymbol(StringRef name, StringRef originalName); 209 void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const; 210 211 llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols; 212 }; 213 214 // .a file 215 class ArchiveFile final : public InputFile { 216 public: 217 explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file); 218 void addLazySymbols(); 219 void fetch(const llvm::object::Archive::Symbol &); 220 // LLD normally doesn't use Error for error-handling, but the underlying 221 // Archive library does, so this is the cleanest way to wrap it. 222 Error fetch(const llvm::object::Archive::Child &, StringRef reason); 223 const llvm::object::Archive &getArchive() const { return *file; }; 224 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 225 226 private: 227 std::unique_ptr<llvm::object::Archive> file; 228 // Keep track of children fetched from the archive by tracking 229 // which address offsets have been fetched already. 230 llvm::DenseSet<uint64_t> seen; 231 }; 232 233 class BitcodeFile final : public InputFile { 234 public: 235 explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName, 236 uint64_t offsetInArchive, bool lazy = false); 237 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 238 void parse(); 239 240 std::unique_ptr<llvm::lto::InputFile> obj; 241 242 private: 243 void parseLazy(); 244 }; 245 246 extern llvm::SetVector<InputFile *> inputFiles; 247 extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads; 248 249 llvm::Optional<MemoryBufferRef> readFile(StringRef path); 250 251 void extract(InputFile &file, StringRef reason); 252 253 namespace detail { 254 255 template <class CommandType, class... Types> 256 std::vector<const CommandType *> 257 findCommands(const void *anyHdr, size_t maxCommands, Types... types) { 258 std::vector<const CommandType *> cmds; 259 std::initializer_list<uint32_t> typesList{types...}; 260 const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr); 261 const uint8_t *p = 262 reinterpret_cast<const uint8_t *>(hdr) + target->headerSize; 263 for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) { 264 auto *cmd = reinterpret_cast<const CommandType *>(p); 265 if (llvm::is_contained(typesList, cmd->cmd)) { 266 cmds.push_back(cmd); 267 if (cmds.size() == maxCommands) 268 return cmds; 269 } 270 p += cmd->cmdsize; 271 } 272 return cmds; 273 } 274 275 } // namespace detail 276 277 // anyHdr should be a pointer to either mach_header or mach_header_64 278 template <class CommandType = llvm::MachO::load_command, class... Types> 279 const CommandType *findCommand(const void *anyHdr, Types... types) { 280 std::vector<const CommandType *> cmds = 281 detail::findCommands<CommandType>(anyHdr, 1, types...); 282 return cmds.size() ? cmds[0] : nullptr; 283 } 284 285 template <class CommandType = llvm::MachO::load_command, class... Types> 286 std::vector<const CommandType *> findCommands(const void *anyHdr, 287 Types... types) { 288 return detail::findCommands<CommandType>(anyHdr, 0, types...); 289 } 290 291 } // namespace macho 292 293 std::string toString(const macho::InputFile *file); 294 } // namespace lld 295 296 #endif 297