1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_INPUT_FILES_H 10 #define LLD_MACHO_INPUT_FILES_H 11 12 #include "MachOStructs.h" 13 #include "Target.h" 14 15 #include "lld/Common/DWARF.h" 16 #include "lld/Common/LLVM.h" 17 #include "lld/Common/Memory.h" 18 #include "llvm/ADT/CachedHashString.h" 19 #include "llvm/ADT/DenseSet.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/BinaryFormat/MachO.h" 22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 23 #include "llvm/Object/Archive.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/Threading.h" 26 #include "llvm/TextAPI/TextAPIReader.h" 27 28 #include <vector> 29 30 namespace llvm { 31 namespace lto { 32 class InputFile; 33 } // namespace lto 34 namespace MachO { 35 class InterfaceFile; 36 } // namespace MachO 37 class TarWriter; 38 } // namespace llvm 39 40 namespace lld { 41 namespace macho { 42 43 struct PlatformInfo; 44 class ConcatInputSection; 45 class Symbol; 46 class Defined; 47 class AliasSymbol; 48 struct Reloc; 49 enum class RefState : uint8_t; 50 51 // If --reproduce option is given, all input files are written 52 // to this tar archive. 53 extern std::unique_ptr<llvm::TarWriter> tar; 54 55 // If .subsections_via_symbols is set, each InputSection will be split along 56 // symbol boundaries. The field offset represents the offset of the subsection 57 // from the start of the original pre-split InputSection. 58 struct Subsection { 59 uint64_t offset = 0; 60 InputSection *isec = nullptr; 61 }; 62 63 using Subsections = std::vector<Subsection>; 64 class InputFile; 65 66 class Section { 67 public: 68 InputFile *file; 69 StringRef segname; 70 StringRef name; 71 uint32_t flags; 72 uint64_t addr; 73 Subsections subsections; 74 75 Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags, 76 uint64_t addr) 77 : file(file), segname(segname), name(name), flags(flags), addr(addr) {} 78 // Ensure pointers to Sections are never invalidated. 79 Section(const Section &) = delete; 80 Section &operator=(const Section &) = delete; 81 Section(Section &&) = delete; 82 Section &operator=(Section &&) = delete; 83 84 private: 85 // Whether we have already split this section into individual subsections. 86 // For sections that cannot be split (e.g. literal sections), this is always 87 // false. 88 bool doneSplitting = false; 89 friend class ObjFile; 90 }; 91 92 // Represents a call graph profile edge. 93 struct CallGraphEntry { 94 // The index of the caller in the symbol table. 95 uint32_t fromIndex; 96 // The index of the callee in the symbol table. 97 uint32_t toIndex; 98 // Number of calls from callee to caller in the profile. 99 uint64_t count; 100 101 CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count) 102 : fromIndex(fromIndex), toIndex(toIndex), count(count) {} 103 }; 104 105 class InputFile { 106 public: 107 enum Kind { 108 ObjKind, 109 OpaqueKind, 110 DylibKind, 111 ArchiveKind, 112 BitcodeKind, 113 }; 114 115 virtual ~InputFile() = default; 116 Kind kind() const { return fileKind; } 117 StringRef getName() const { return name; } 118 static void resetIdCount() { idCount = 0; } 119 120 MemoryBufferRef mb; 121 122 std::vector<Symbol *> symbols; 123 std::vector<Section *> sections; 124 ArrayRef<uint8_t> objCImageInfo; 125 126 // If not empty, this stores the name of the archive containing this file. 127 // We use this string for creating error messages. 128 std::string archiveName; 129 130 // Provides an easy way to sort InputFiles deterministically. 131 const int id; 132 133 // True if this is a lazy ObjFile or BitcodeFile. 134 bool lazy = false; 135 136 protected: 137 InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false) 138 : mb(mb), id(idCount++), lazy(lazy), fileKind(kind), 139 name(mb.getBufferIdentifier()) {} 140 141 InputFile(Kind, const llvm::MachO::InterfaceFile &); 142 143 // If true, this input's arch is compatible with target. 144 bool compatArch = true; 145 146 private: 147 const Kind fileKind; 148 const StringRef name; 149 150 static int idCount; 151 }; 152 153 struct FDE { 154 uint32_t funcLength; 155 Symbol *personality; 156 InputSection *lsda; 157 }; 158 159 // .o file 160 class ObjFile final : public InputFile { 161 public: 162 ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName, 163 bool lazy = false, bool forceHidden = false, bool compatArch = true, 164 bool builtFromBitcode = false); 165 ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const; 166 ArrayRef<uint8_t> getOptimizationHints() const; 167 template <class LP> void parse(); 168 template <class LP> 169 void parseLinkerOptions(llvm::SmallVectorImpl<StringRef> &LinkerOptions); 170 171 static bool classof(const InputFile *f) { return f->kind() == ObjKind; } 172 173 std::string sourceFile() const; 174 // Parses line table information for diagnostics. compileUnit should be used 175 // for other purposes. 176 lld::DWARFCache *getDwarf(); 177 178 llvm::DWARFUnit *compileUnit = nullptr; 179 std::unique_ptr<lld::DWARFCache> dwarfCache; 180 Section *addrSigSection = nullptr; 181 const uint32_t modTime; 182 bool forceHidden; 183 bool builtFromBitcode; 184 std::vector<ConcatInputSection *> debugSections; 185 std::vector<CallGraphEntry> callGraph; 186 llvm::DenseMap<ConcatInputSection *, FDE> fdes; 187 std::vector<AliasSymbol *> aliases; 188 189 private: 190 llvm::once_flag initDwarf; 191 template <class LP> void parseLazy(); 192 template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>); 193 template <class LP> 194 void parseSymbols(ArrayRef<typename LP::section> sectionHeaders, 195 ArrayRef<typename LP::nlist> nList, const char *strtab, 196 bool subsectionsViaSymbols); 197 template <class NList> 198 Symbol *parseNonSectionSymbol(const NList &sym, const char *strtab); 199 template <class SectionHeader> 200 void parseRelocations(ArrayRef<SectionHeader> sectionHeaders, 201 const SectionHeader &, Section &); 202 void parseDebugInfo(); 203 void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection); 204 void registerCompactUnwind(Section &compactUnwindSection); 205 void registerEhFrames(Section &ehFrameSection); 206 }; 207 208 // command-line -sectcreate file 209 class OpaqueFile final : public InputFile { 210 public: 211 OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName); 212 static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; } 213 }; 214 215 // .dylib or .tbd file 216 class DylibFile final : public InputFile { 217 public: 218 // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the 219 // symbols in those sub-libraries will be available under the umbrella 220 // library's namespace. Those sub-libraries can also have their own 221 // re-exports. When loading a re-exported dylib, `umbrella` should be set to 222 // the root dylib to ensure symbols in the child library are correctly bound 223 // to the root. On the other hand, if a dylib is being directly loaded 224 // (through an -lfoo flag), then `umbrella` should be a nullptr. 225 explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella, 226 bool isBundleLoader, bool explicitlyLinked); 227 explicit DylibFile(const llvm::MachO::InterfaceFile &interface, 228 DylibFile *umbrella, bool isBundleLoader, 229 bool explicitlyLinked); 230 explicit DylibFile(DylibFile *umbrella); 231 232 void parseLoadCommands(MemoryBufferRef mb); 233 void parseReexports(const llvm::MachO::InterfaceFile &interface); 234 bool isReferenced() const { return numReferencedSymbols > 0; } 235 bool isExplicitlyLinked() const; 236 void setExplicitlyLinked() { explicitlyLinked = true; } 237 238 static bool classof(const InputFile *f) { return f->kind() == DylibKind; } 239 240 StringRef installName; 241 DylibFile *exportingFile = nullptr; 242 DylibFile *umbrella; 243 SmallVector<StringRef, 2> rpaths; 244 uint32_t compatibilityVersion = 0; 245 uint32_t currentVersion = 0; 246 int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel 247 unsigned numReferencedSymbols = 0; 248 RefState refState; 249 bool reexport = false; 250 bool forceNeeded = false; 251 bool forceWeakImport = false; 252 bool deadStrippable = false; 253 254 private: 255 bool explicitlyLinked = false; // Access via isExplicitlyLinked(). 256 257 public: 258 // An executable can be used as a bundle loader that will load the output 259 // file being linked, and that contains symbols referenced, but not 260 // implemented in the bundle. When used like this, it is very similar 261 // to a dylib, so we've used the same class to represent it. 262 bool isBundleLoader; 263 264 // Synthetic Dylib objects created by $ld$previous symbols in this dylib. 265 // Usually empty. These synthetic dylibs won't have synthetic dylibs 266 // themselves. 267 SmallVector<DylibFile *, 2> extraDylibs; 268 269 private: 270 DylibFile *getSyntheticDylib(StringRef installName, uint32_t currentVersion, 271 uint32_t compatVersion); 272 273 bool handleLDSymbol(StringRef originalName); 274 void handleLDPreviousSymbol(StringRef name, StringRef originalName); 275 void handleLDInstallNameSymbol(StringRef name, StringRef originalName); 276 void handleLDHideSymbol(StringRef name, StringRef originalName); 277 void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const; 278 void parseExportedSymbols(uint32_t offset, uint32_t size); 279 void loadReexport(StringRef path, DylibFile *umbrella, 280 const llvm::MachO::InterfaceFile *currentTopLevelTapi); 281 282 llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols; 283 }; 284 285 // .a file 286 class ArchiveFile final : public InputFile { 287 public: 288 explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file, 289 bool forceHidden); 290 void addLazySymbols(); 291 void fetch(const llvm::object::Archive::Symbol &); 292 // LLD normally doesn't use Error for error-handling, but the underlying 293 // Archive library does, so this is the cleanest way to wrap it. 294 Error fetch(const llvm::object::Archive::Child &, StringRef reason); 295 const llvm::object::Archive &getArchive() const { return *file; }; 296 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 297 298 private: 299 std::unique_ptr<llvm::object::Archive> file; 300 // Keep track of children fetched from the archive by tracking 301 // which address offsets have been fetched already. 302 llvm::DenseSet<uint64_t> seen; 303 // Load all symbols with hidden visibility (-load_hidden). 304 bool forceHidden; 305 }; 306 307 class BitcodeFile final : public InputFile { 308 public: 309 explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName, 310 uint64_t offsetInArchive, bool lazy = false, 311 bool forceHidden = false, bool compatArch = true); 312 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 313 void parse(); 314 315 std::unique_ptr<llvm::lto::InputFile> obj; 316 bool forceHidden; 317 318 private: 319 void parseLazy(); 320 }; 321 322 extern llvm::SetVector<InputFile *> inputFiles; 323 extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads; 324 extern llvm::SmallVector<StringRef> unprocessedLCLinkerOptions; 325 326 std::optional<MemoryBufferRef> readFile(StringRef path); 327 328 void extract(InputFile &file, StringRef reason); 329 330 namespace detail { 331 332 template <class CommandType, class... Types> 333 std::vector<const CommandType *> 334 findCommands(const void *anyHdr, size_t maxCommands, Types... types) { 335 std::vector<const CommandType *> cmds; 336 std::initializer_list<uint32_t> typesList{types...}; 337 const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr); 338 const uint8_t *p = 339 reinterpret_cast<const uint8_t *>(hdr) + target->headerSize; 340 for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) { 341 auto *cmd = reinterpret_cast<const CommandType *>(p); 342 if (llvm::is_contained(typesList, cmd->cmd)) { 343 cmds.push_back(cmd); 344 if (cmds.size() == maxCommands) 345 return cmds; 346 } 347 p += cmd->cmdsize; 348 } 349 return cmds; 350 } 351 352 } // namespace detail 353 354 // anyHdr should be a pointer to either mach_header or mach_header_64 355 template <class CommandType = llvm::MachO::load_command, class... Types> 356 const CommandType *findCommand(const void *anyHdr, Types... types) { 357 std::vector<const CommandType *> cmds = 358 detail::findCommands<CommandType>(anyHdr, 1, types...); 359 return cmds.size() ? cmds[0] : nullptr; 360 } 361 362 template <class CommandType = llvm::MachO::load_command, class... Types> 363 std::vector<const CommandType *> findCommands(const void *anyHdr, 364 Types... types) { 365 return detail::findCommands<CommandType>(anyHdr, 0, types...); 366 } 367 368 std::string replaceThinLTOSuffix(StringRef path); 369 } // namespace macho 370 371 std::string toString(const macho::InputFile *file); 372 std::string toString(const macho::Section &); 373 } // namespace lld 374 375 #endif 376