15ffd83dbSDimitry Andric //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric 95ffd83dbSDimitry Andric #ifndef LLD_MACHO_INPUT_FILES_H 105ffd83dbSDimitry Andric #define LLD_MACHO_INPUT_FILES_H 115ffd83dbSDimitry Andric 125ffd83dbSDimitry Andric #include "MachOStructs.h" 13fe6060f1SDimitry Andric #include "Target.h" 145ffd83dbSDimitry Andric 1581ad6265SDimitry Andric #include "lld/Common/DWARF.h" 165ffd83dbSDimitry Andric #include "lld/Common/LLVM.h" 17e8d8bef9SDimitry Andric #include "lld/Common/Memory.h" 18349cc55cSDimitry Andric #include "llvm/ADT/CachedHashString.h" 195ffd83dbSDimitry Andric #include "llvm/ADT/DenseSet.h" 20e8d8bef9SDimitry Andric #include "llvm/ADT/SetVector.h" 215ffd83dbSDimitry Andric #include "llvm/BinaryFormat/MachO.h" 22e8d8bef9SDimitry Andric #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 235ffd83dbSDimitry Andric #include "llvm/Object/Archive.h" 245ffd83dbSDimitry Andric #include "llvm/Support/MemoryBuffer.h" 2581ad6265SDimitry Andric #include "llvm/Support/Threading.h" 26fe6060f1SDimitry Andric #include "llvm/TextAPI/TextAPIReader.h" 275ffd83dbSDimitry Andric 285ffd83dbSDimitry Andric #include <vector> 295ffd83dbSDimitry Andric 30e8d8bef9SDimitry Andric namespace llvm { 31e8d8bef9SDimitry Andric namespace lto { 32e8d8bef9SDimitry Andric class InputFile; 33e8d8bef9SDimitry Andric } // namespace lto 34fe6060f1SDimitry Andric namespace MachO { 35fe6060f1SDimitry Andric class InterfaceFile; 36fe6060f1SDimitry Andric } // namespace MachO 37e8d8bef9SDimitry Andric class TarWriter; 38e8d8bef9SDimitry Andric } // namespace llvm 39e8d8bef9SDimitry Andric 405ffd83dbSDimitry Andric namespace lld { 415ffd83dbSDimitry Andric namespace macho { 425ffd83dbSDimitry Andric 43fe6060f1SDimitry Andric struct PlatformInfo; 44fe6060f1SDimitry Andric class ConcatInputSection; 455ffd83dbSDimitry Andric class Symbol; 46349cc55cSDimitry Andric class Defined; 475ffd83dbSDimitry Andric struct Reloc; 48e8d8bef9SDimitry Andric enum class RefState : uint8_t; 49e8d8bef9SDimitry Andric 50e8d8bef9SDimitry Andric // If --reproduce option is given, all input files are written 51e8d8bef9SDimitry Andric // to this tar archive. 52e8d8bef9SDimitry Andric extern std::unique_ptr<llvm::TarWriter> tar; 535ffd83dbSDimitry Andric 545ffd83dbSDimitry Andric // If .subsections_via_symbols is set, each InputSection will be split along 55fe6060f1SDimitry Andric // symbol boundaries. The field offset represents the offset of the subsection 56fe6060f1SDimitry Andric // from the start of the original pre-split InputSection. 57349cc55cSDimitry Andric struct Subsection { 58349cc55cSDimitry Andric uint64_t offset = 0; 59349cc55cSDimitry Andric InputSection *isec = nullptr; 60fe6060f1SDimitry Andric }; 61349cc55cSDimitry Andric 62349cc55cSDimitry Andric using Subsections = std::vector<Subsection>; 6381ad6265SDimitry Andric class InputFile; 64349cc55cSDimitry Andric 6581ad6265SDimitry Andric class Section { 6681ad6265SDimitry Andric public: 6781ad6265SDimitry Andric InputFile *file; 6881ad6265SDimitry Andric StringRef segname; 6981ad6265SDimitry Andric StringRef name; 7081ad6265SDimitry Andric uint32_t flags; 7181ad6265SDimitry Andric uint64_t addr; 72349cc55cSDimitry Andric Subsections subsections; 7381ad6265SDimitry Andric 7481ad6265SDimitry Andric Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags, 7581ad6265SDimitry Andric uint64_t addr) 7681ad6265SDimitry Andric : file(file), segname(segname), name(name), flags(flags), addr(addr) {} 7781ad6265SDimitry Andric // Ensure pointers to Sections are never invalidated. 7881ad6265SDimitry Andric Section(const Section &) = delete; 7981ad6265SDimitry Andric Section &operator=(const Section &) = delete; 8081ad6265SDimitry Andric Section(Section &&) = delete; 8181ad6265SDimitry Andric Section &operator=(Section &&) = delete; 8281ad6265SDimitry Andric 8381ad6265SDimitry Andric private: 8481ad6265SDimitry Andric // Whether we have already split this section into individual subsections. 8581ad6265SDimitry Andric // For sections that cannot be split (e.g. literal sections), this is always 8681ad6265SDimitry Andric // false. 8781ad6265SDimitry Andric bool doneSplitting = false; 8881ad6265SDimitry Andric friend class ObjFile; 89349cc55cSDimitry Andric }; 905ffd83dbSDimitry Andric 9104eeddc0SDimitry Andric // Represents a call graph profile edge. 9204eeddc0SDimitry Andric struct CallGraphEntry { 9304eeddc0SDimitry Andric // The index of the caller in the symbol table. 9404eeddc0SDimitry Andric uint32_t fromIndex; 9504eeddc0SDimitry Andric // The index of the callee in the symbol table. 9604eeddc0SDimitry Andric uint32_t toIndex; 9704eeddc0SDimitry Andric // Number of calls from callee to caller in the profile. 9804eeddc0SDimitry Andric uint64_t count; 9981ad6265SDimitry Andric 10081ad6265SDimitry Andric CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count) 10181ad6265SDimitry Andric : fromIndex(fromIndex), toIndex(toIndex), count(count) {} 10204eeddc0SDimitry Andric }; 10304eeddc0SDimitry Andric 1045ffd83dbSDimitry Andric class InputFile { 1055ffd83dbSDimitry Andric public: 1065ffd83dbSDimitry Andric enum Kind { 1075ffd83dbSDimitry Andric ObjKind, 108e8d8bef9SDimitry Andric OpaqueKind, 1095ffd83dbSDimitry Andric DylibKind, 1105ffd83dbSDimitry Andric ArchiveKind, 111e8d8bef9SDimitry Andric BitcodeKind, 1125ffd83dbSDimitry Andric }; 1135ffd83dbSDimitry Andric 1145ffd83dbSDimitry Andric virtual ~InputFile() = default; 1155ffd83dbSDimitry Andric Kind kind() const { return fileKind; } 116e8d8bef9SDimitry Andric StringRef getName() const { return name; } 117349cc55cSDimitry Andric static void resetIdCount() { idCount = 0; } 1185ffd83dbSDimitry Andric 1195ffd83dbSDimitry Andric MemoryBufferRef mb; 120e8d8bef9SDimitry Andric 1215ffd83dbSDimitry Andric std::vector<Symbol *> symbols; 12281ad6265SDimitry Andric std::vector<Section *> sections; 123fcaf7f86SDimitry Andric ArrayRef<uint8_t> objCImageInfo; 124e8d8bef9SDimitry Andric 125e8d8bef9SDimitry Andric // If not empty, this stores the name of the archive containing this file. 126e8d8bef9SDimitry Andric // We use this string for creating error messages. 127e8d8bef9SDimitry Andric std::string archiveName; 1285ffd83dbSDimitry Andric 12904eeddc0SDimitry Andric // Provides an easy way to sort InputFiles deterministically. 13004eeddc0SDimitry Andric const int id; 13104eeddc0SDimitry Andric 13204eeddc0SDimitry Andric // True if this is a lazy ObjFile or BitcodeFile. 13304eeddc0SDimitry Andric bool lazy = false; 13404eeddc0SDimitry Andric 1355ffd83dbSDimitry Andric protected: 13604eeddc0SDimitry Andric InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false) 13704eeddc0SDimitry Andric : mb(mb), id(idCount++), lazy(lazy), fileKind(kind), 13804eeddc0SDimitry Andric name(mb.getBufferIdentifier()) {} 1395ffd83dbSDimitry Andric 140fe6060f1SDimitry Andric InputFile(Kind, const llvm::MachO::InterfaceFile &); 1415ffd83dbSDimitry Andric 1425ffd83dbSDimitry Andric private: 1435ffd83dbSDimitry Andric const Kind fileKind; 144e8d8bef9SDimitry Andric const StringRef name; 145e8d8bef9SDimitry Andric 146e8d8bef9SDimitry Andric static int idCount; 1475ffd83dbSDimitry Andric }; 1485ffd83dbSDimitry Andric 14981ad6265SDimitry Andric struct FDE { 15081ad6265SDimitry Andric uint32_t funcLength; 15181ad6265SDimitry Andric Symbol *personality; 15281ad6265SDimitry Andric InputSection *lsda; 15381ad6265SDimitry Andric }; 15481ad6265SDimitry Andric 1555ffd83dbSDimitry Andric // .o file 156fe6060f1SDimitry Andric class ObjFile final : public InputFile { 1575ffd83dbSDimitry Andric public: 15804eeddc0SDimitry Andric ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName, 159*972a253aSDimitry Andric bool lazy = false, bool forceHidden = false); 1600eae32dcSDimitry Andric ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const; 16104eeddc0SDimitry Andric template <class LP> void parse(); 1620eae32dcSDimitry Andric 1635ffd83dbSDimitry Andric static bool classof(const InputFile *f) { return f->kind() == ObjKind; } 164e8d8bef9SDimitry Andric 16581ad6265SDimitry Andric std::string sourceFile() const; 16681ad6265SDimitry Andric // Parses line table information for diagnostics. compileUnit should be used 16781ad6265SDimitry Andric // for other purposes. 16881ad6265SDimitry Andric lld::DWARFCache *getDwarf(); 16981ad6265SDimitry Andric 170e8d8bef9SDimitry Andric llvm::DWARFUnit *compileUnit = nullptr; 17181ad6265SDimitry Andric std::unique_ptr<lld::DWARFCache> dwarfCache; 17281ad6265SDimitry Andric Section *addrSigSection = nullptr; 173e8d8bef9SDimitry Andric const uint32_t modTime; 174*972a253aSDimitry Andric bool forceHidden; 175fe6060f1SDimitry Andric std::vector<ConcatInputSection *> debugSections; 17604eeddc0SDimitry Andric std::vector<CallGraphEntry> callGraph; 17781ad6265SDimitry Andric llvm::DenseMap<ConcatInputSection *, FDE> fdes; 17881ad6265SDimitry Andric std::vector<OptimizationHint> optimizationHints; 179e8d8bef9SDimitry Andric 180e8d8bef9SDimitry Andric private: 18181ad6265SDimitry Andric llvm::once_flag initDwarf; 18204eeddc0SDimitry Andric template <class LP> void parseLazy(); 183349cc55cSDimitry Andric template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>); 184fe6060f1SDimitry Andric template <class LP> 185fe6060f1SDimitry Andric void parseSymbols(ArrayRef<typename LP::section> sectionHeaders, 186fe6060f1SDimitry Andric ArrayRef<typename LP::nlist> nList, const char *strtab, 187e8d8bef9SDimitry Andric bool subsectionsViaSymbols); 188fe6060f1SDimitry Andric template <class NList> 189fe6060f1SDimitry Andric Symbol *parseNonSectionSymbol(const NList &sym, StringRef name); 190349cc55cSDimitry Andric template <class SectionHeader> 191349cc55cSDimitry Andric void parseRelocations(ArrayRef<SectionHeader> sectionHeaders, 19281ad6265SDimitry Andric const SectionHeader &, Section &); 193e8d8bef9SDimitry Andric void parseDebugInfo(); 19481ad6265SDimitry Andric void parseOptimizationHints(ArrayRef<uint8_t> data); 19581ad6265SDimitry Andric void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection); 19681ad6265SDimitry Andric void registerCompactUnwind(Section &compactUnwindSection); 19781ad6265SDimitry Andric void registerEhFrames(Section &ehFrameSection); 198e8d8bef9SDimitry Andric }; 199e8d8bef9SDimitry Andric 200e8d8bef9SDimitry Andric // command-line -sectcreate file 201fe6060f1SDimitry Andric class OpaqueFile final : public InputFile { 202e8d8bef9SDimitry Andric public: 203e8d8bef9SDimitry Andric OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName); 204e8d8bef9SDimitry Andric static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; } 2055ffd83dbSDimitry Andric }; 2065ffd83dbSDimitry Andric 207fe6060f1SDimitry Andric // .dylib or .tbd file 208fe6060f1SDimitry Andric class DylibFile final : public InputFile { 2095ffd83dbSDimitry Andric public: 2105ffd83dbSDimitry Andric // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the 2115ffd83dbSDimitry Andric // symbols in those sub-libraries will be available under the umbrella 2125ffd83dbSDimitry Andric // library's namespace. Those sub-libraries can also have their own 2135ffd83dbSDimitry Andric // re-exports. When loading a re-exported dylib, `umbrella` should be set to 2145ffd83dbSDimitry Andric // the root dylib to ensure symbols in the child library are correctly bound 2155ffd83dbSDimitry Andric // to the root. On the other hand, if a dylib is being directly loaded 2165ffd83dbSDimitry Andric // (through an -lfoo flag), then `umbrella` should be a nullptr. 217fe6060f1SDimitry Andric explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella, 21881ad6265SDimitry Andric bool isBundleLoader, bool explicitlyLinked); 219e8d8bef9SDimitry Andric explicit DylibFile(const llvm::MachO::InterfaceFile &interface, 22081ad6265SDimitry Andric DylibFile *umbrella, bool isBundleLoader, 22181ad6265SDimitry Andric bool explicitlyLinked); 222fe6060f1SDimitry Andric 223fe6060f1SDimitry Andric void parseLoadCommands(MemoryBufferRef mb); 224fe6060f1SDimitry Andric void parseReexports(const llvm::MachO::InterfaceFile &interface); 2251fd87a68SDimitry Andric bool isReferenced() const { return numReferencedSymbols > 0; } 226e8d8bef9SDimitry Andric 2275ffd83dbSDimitry Andric static bool classof(const InputFile *f) { return f->kind() == DylibKind; } 2285ffd83dbSDimitry Andric 229fe6060f1SDimitry Andric StringRef installName; 230fe6060f1SDimitry Andric DylibFile *exportingFile = nullptr; 231fe6060f1SDimitry Andric DylibFile *umbrella; 232fe6060f1SDimitry Andric SmallVector<StringRef, 2> rpaths; 233e8d8bef9SDimitry Andric uint32_t compatibilityVersion = 0; 234e8d8bef9SDimitry Andric uint32_t currentVersion = 0; 235fe6060f1SDimitry Andric int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel 2361fd87a68SDimitry Andric unsigned numReferencedSymbols = 0; 237e8d8bef9SDimitry Andric RefState refState; 2385ffd83dbSDimitry Andric bool reexport = false; 239fe6060f1SDimitry Andric bool forceNeeded = false; 240e8d8bef9SDimitry Andric bool forceWeakImport = false; 241fe6060f1SDimitry Andric bool deadStrippable = false; 242fe6060f1SDimitry Andric bool explicitlyLinked = false; 243fe6060f1SDimitry Andric // An executable can be used as a bundle loader that will load the output 244fe6060f1SDimitry Andric // file being linked, and that contains symbols referenced, but not 245fe6060f1SDimitry Andric // implemented in the bundle. When used like this, it is very similar 2461fd87a68SDimitry Andric // to a dylib, so we've used the same class to represent it. 247fe6060f1SDimitry Andric bool isBundleLoader; 248fe6060f1SDimitry Andric 249fe6060f1SDimitry Andric private: 250fe6060f1SDimitry Andric bool handleLDSymbol(StringRef originalName); 251fe6060f1SDimitry Andric void handleLDPreviousSymbol(StringRef name, StringRef originalName); 252fe6060f1SDimitry Andric void handleLDInstallNameSymbol(StringRef name, StringRef originalName); 2530eae32dcSDimitry Andric void handleLDHideSymbol(StringRef name, StringRef originalName); 254fe6060f1SDimitry Andric void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const; 255753f127fSDimitry Andric void parseExportedSymbols(uint32_t offset, uint32_t size); 2560eae32dcSDimitry Andric 2570eae32dcSDimitry Andric llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols; 2585ffd83dbSDimitry Andric }; 2595ffd83dbSDimitry Andric 2605ffd83dbSDimitry Andric // .a file 261fe6060f1SDimitry Andric class ArchiveFile final : public InputFile { 2625ffd83dbSDimitry Andric public: 263*972a253aSDimitry Andric explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file, 264*972a253aSDimitry Andric bool forceHidden); 265349cc55cSDimitry Andric void addLazySymbols(); 266349cc55cSDimitry Andric void fetch(const llvm::object::Archive::Symbol &); 267349cc55cSDimitry Andric // LLD normally doesn't use Error for error-handling, but the underlying 268349cc55cSDimitry Andric // Archive library does, so this is the cleanest way to wrap it. 269349cc55cSDimitry Andric Error fetch(const llvm::object::Archive::Child &, StringRef reason); 270349cc55cSDimitry Andric const llvm::object::Archive &getArchive() const { return *file; }; 2715ffd83dbSDimitry Andric static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 2725ffd83dbSDimitry Andric 2735ffd83dbSDimitry Andric private: 2745ffd83dbSDimitry Andric std::unique_ptr<llvm::object::Archive> file; 2755ffd83dbSDimitry Andric // Keep track of children fetched from the archive by tracking 2765ffd83dbSDimitry Andric // which address offsets have been fetched already. 2775ffd83dbSDimitry Andric llvm::DenseSet<uint64_t> seen; 278*972a253aSDimitry Andric // Load all symbols with hidden visibility (-load_hidden). 279*972a253aSDimitry Andric bool forceHidden; 2805ffd83dbSDimitry Andric }; 2815ffd83dbSDimitry Andric 282fe6060f1SDimitry Andric class BitcodeFile final : public InputFile { 283e8d8bef9SDimitry Andric public: 284fe6060f1SDimitry Andric explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName, 285*972a253aSDimitry Andric uint64_t offsetInArchive, bool lazy = false, 286*972a253aSDimitry Andric bool forceHidden = false); 287e8d8bef9SDimitry Andric static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 28804eeddc0SDimitry Andric void parse(); 289e8d8bef9SDimitry Andric 290e8d8bef9SDimitry Andric std::unique_ptr<llvm::lto::InputFile> obj; 291*972a253aSDimitry Andric bool forceHidden; 29204eeddc0SDimitry Andric 29304eeddc0SDimitry Andric private: 29404eeddc0SDimitry Andric void parseLazy(); 295e8d8bef9SDimitry Andric }; 296e8d8bef9SDimitry Andric 297e8d8bef9SDimitry Andric extern llvm::SetVector<InputFile *> inputFiles; 298349cc55cSDimitry Andric extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads; 2995ffd83dbSDimitry Andric 3005ffd83dbSDimitry Andric llvm::Optional<MemoryBufferRef> readFile(StringRef path); 3015ffd83dbSDimitry Andric 30204eeddc0SDimitry Andric void extract(InputFile &file, StringRef reason); 30304eeddc0SDimitry Andric 304fe6060f1SDimitry Andric namespace detail { 305fe6060f1SDimitry Andric 306fe6060f1SDimitry Andric template <class CommandType, class... Types> 307fe6060f1SDimitry Andric std::vector<const CommandType *> 308fe6060f1SDimitry Andric findCommands(const void *anyHdr, size_t maxCommands, Types... types) { 309fe6060f1SDimitry Andric std::vector<const CommandType *> cmds; 310fe6060f1SDimitry Andric std::initializer_list<uint32_t> typesList{types...}; 311fe6060f1SDimitry Andric const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr); 312fe6060f1SDimitry Andric const uint8_t *p = 313fe6060f1SDimitry Andric reinterpret_cast<const uint8_t *>(hdr) + target->headerSize; 314fe6060f1SDimitry Andric for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) { 315fe6060f1SDimitry Andric auto *cmd = reinterpret_cast<const CommandType *>(p); 316fe6060f1SDimitry Andric if (llvm::is_contained(typesList, cmd->cmd)) { 317fe6060f1SDimitry Andric cmds.push_back(cmd); 318fe6060f1SDimitry Andric if (cmds.size() == maxCommands) 319fe6060f1SDimitry Andric return cmds; 320fe6060f1SDimitry Andric } 321fe6060f1SDimitry Andric p += cmd->cmdsize; 322fe6060f1SDimitry Andric } 323fe6060f1SDimitry Andric return cmds; 324fe6060f1SDimitry Andric } 325fe6060f1SDimitry Andric 326fe6060f1SDimitry Andric } // namespace detail 327fe6060f1SDimitry Andric 328fe6060f1SDimitry Andric // anyHdr should be a pointer to either mach_header or mach_header_64 329fe6060f1SDimitry Andric template <class CommandType = llvm::MachO::load_command, class... Types> 330fe6060f1SDimitry Andric const CommandType *findCommand(const void *anyHdr, Types... types) { 331fe6060f1SDimitry Andric std::vector<const CommandType *> cmds = 332fe6060f1SDimitry Andric detail::findCommands<CommandType>(anyHdr, 1, types...); 333fe6060f1SDimitry Andric return cmds.size() ? cmds[0] : nullptr; 334fe6060f1SDimitry Andric } 335fe6060f1SDimitry Andric 336fe6060f1SDimitry Andric template <class CommandType = llvm::MachO::load_command, class... Types> 337fe6060f1SDimitry Andric std::vector<const CommandType *> findCommands(const void *anyHdr, 338fe6060f1SDimitry Andric Types... types) { 339fe6060f1SDimitry Andric return detail::findCommands<CommandType>(anyHdr, 0, types...); 340fe6060f1SDimitry Andric } 341e8d8bef9SDimitry Andric 3425ffd83dbSDimitry Andric } // namespace macho 3435ffd83dbSDimitry Andric 3445ffd83dbSDimitry Andric std::string toString(const macho::InputFile *file); 34581ad6265SDimitry Andric std::string toString(const macho::Section &); 3465ffd83dbSDimitry Andric } // namespace lld 3475ffd83dbSDimitry Andric 3485ffd83dbSDimitry Andric #endif 349