1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_INPUT_FILES_H 10 #define LLD_MACHO_INPUT_FILES_H 11 12 #include "MachOStructs.h" 13 #include "Target.h" 14 15 #include "lld/Common/LLVM.h" 16 #include "lld/Common/Memory.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/SetVector.h" 19 #include "llvm/BinaryFormat/MachO.h" 20 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 21 #include "llvm/Object/Archive.h" 22 #include "llvm/Support/MemoryBuffer.h" 23 #include "llvm/TextAPI/TextAPIReader.h" 24 25 #include <vector> 26 27 namespace llvm { 28 namespace lto { 29 class InputFile; 30 } // namespace lto 31 namespace MachO { 32 class InterfaceFile; 33 } // namespace MachO 34 class TarWriter; 35 } // namespace llvm 36 37 namespace lld { 38 namespace macho { 39 40 struct PlatformInfo; 41 class ConcatInputSection; 42 class Symbol; 43 struct Reloc; 44 enum class RefState : uint8_t; 45 46 // If --reproduce option is given, all input files are written 47 // to this tar archive. 48 extern std::unique_ptr<llvm::TarWriter> tar; 49 50 // If .subsections_via_symbols is set, each InputSection will be split along 51 // symbol boundaries. The field offset represents the offset of the subsection 52 // from the start of the original pre-split InputSection. 53 struct SubsectionEntry { 54 uint64_t offset; 55 InputSection *isec; 56 }; 57 using SubsectionMap = std::vector<SubsectionEntry>; 58 59 class InputFile { 60 public: 61 enum Kind { 62 ObjKind, 63 OpaqueKind, 64 DylibKind, 65 ArchiveKind, 66 BitcodeKind, 67 }; 68 69 virtual ~InputFile() = default; 70 Kind kind() const { return fileKind; } 71 StringRef getName() const { return name; } 72 73 MemoryBufferRef mb; 74 75 std::vector<Symbol *> symbols; 76 std::vector<SubsectionMap> subsections; 77 // Provides an easy way to sort InputFiles deterministically. 78 const int id; 79 80 // If not empty, this stores the name of the archive containing this file. 81 // We use this string for creating error messages. 82 std::string archiveName; 83 84 protected: 85 InputFile(Kind kind, MemoryBufferRef mb) 86 : mb(mb), id(idCount++), fileKind(kind), name(mb.getBufferIdentifier()) {} 87 88 InputFile(Kind, const llvm::MachO::InterfaceFile &); 89 90 private: 91 const Kind fileKind; 92 const StringRef name; 93 94 static int idCount; 95 }; 96 97 // .o file 98 class ObjFile final : public InputFile { 99 public: 100 ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName); 101 static bool classof(const InputFile *f) { return f->kind() == ObjKind; } 102 103 llvm::DWARFUnit *compileUnit = nullptr; 104 const uint32_t modTime; 105 std::vector<ConcatInputSection *> debugSections; 106 ArrayRef<llvm::MachO::data_in_code_entry> dataInCodeEntries; 107 108 private: 109 template <class LP> void parse(); 110 template <class Section> void parseSections(ArrayRef<Section>); 111 template <class LP> 112 void parseSymbols(ArrayRef<typename LP::section> sectionHeaders, 113 ArrayRef<typename LP::nlist> nList, const char *strtab, 114 bool subsectionsViaSymbols); 115 template <class NList> 116 Symbol *parseNonSectionSymbol(const NList &sym, StringRef name); 117 template <class Section> 118 void parseRelocations(ArrayRef<Section> sectionHeaders, const Section &, 119 SubsectionMap &); 120 void parseDebugInfo(); 121 void parseDataInCode(); 122 }; 123 124 // command-line -sectcreate file 125 class OpaqueFile final : public InputFile { 126 public: 127 OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName); 128 static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; } 129 }; 130 131 // .dylib or .tbd file 132 class DylibFile final : public InputFile { 133 public: 134 // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the 135 // symbols in those sub-libraries will be available under the umbrella 136 // library's namespace. Those sub-libraries can also have their own 137 // re-exports. When loading a re-exported dylib, `umbrella` should be set to 138 // the root dylib to ensure symbols in the child library are correctly bound 139 // to the root. On the other hand, if a dylib is being directly loaded 140 // (through an -lfoo flag), then `umbrella` should be a nullptr. 141 explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella, 142 bool isBundleLoader = false); 143 explicit DylibFile(const llvm::MachO::InterfaceFile &interface, 144 DylibFile *umbrella = nullptr, 145 bool isBundleLoader = false); 146 147 void parseLoadCommands(MemoryBufferRef mb); 148 void parseReexports(const llvm::MachO::InterfaceFile &interface); 149 150 static bool classof(const InputFile *f) { return f->kind() == DylibKind; } 151 152 StringRef installName; 153 DylibFile *exportingFile = nullptr; 154 DylibFile *umbrella; 155 SmallVector<StringRef, 2> rpaths; 156 uint32_t compatibilityVersion = 0; 157 uint32_t currentVersion = 0; 158 int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel 159 RefState refState; 160 bool reexport = false; 161 bool forceNeeded = false; 162 bool forceWeakImport = false; 163 bool deadStrippable = false; 164 bool explicitlyLinked = false; 165 166 unsigned numReferencedSymbols = 0; 167 168 bool isReferenced() const { return numReferencedSymbols > 0; } 169 170 // An executable can be used as a bundle loader that will load the output 171 // file being linked, and that contains symbols referenced, but not 172 // implemented in the bundle. When used like this, it is very similar 173 // to a Dylib, so we re-used the same class to represent it. 174 bool isBundleLoader; 175 176 private: 177 bool handleLDSymbol(StringRef originalName); 178 void handleLDPreviousSymbol(StringRef name, StringRef originalName); 179 void handleLDInstallNameSymbol(StringRef name, StringRef originalName); 180 void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const; 181 }; 182 183 // .a file 184 class ArchiveFile final : public InputFile { 185 public: 186 explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file); 187 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 188 void fetch(const llvm::object::Archive::Symbol &sym); 189 190 private: 191 std::unique_ptr<llvm::object::Archive> file; 192 // Keep track of children fetched from the archive by tracking 193 // which address offsets have been fetched already. 194 llvm::DenseSet<uint64_t> seen; 195 }; 196 197 class BitcodeFile final : public InputFile { 198 public: 199 explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName, 200 uint64_t offsetInArchive); 201 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 202 203 std::unique_ptr<llvm::lto::InputFile> obj; 204 }; 205 206 extern llvm::SetVector<InputFile *> inputFiles; 207 208 llvm::Optional<MemoryBufferRef> readFile(StringRef path); 209 210 namespace detail { 211 212 template <class CommandType, class... Types> 213 std::vector<const CommandType *> 214 findCommands(const void *anyHdr, size_t maxCommands, Types... types) { 215 std::vector<const CommandType *> cmds; 216 std::initializer_list<uint32_t> typesList{types...}; 217 const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr); 218 const uint8_t *p = 219 reinterpret_cast<const uint8_t *>(hdr) + target->headerSize; 220 for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) { 221 auto *cmd = reinterpret_cast<const CommandType *>(p); 222 if (llvm::is_contained(typesList, cmd->cmd)) { 223 cmds.push_back(cmd); 224 if (cmds.size() == maxCommands) 225 return cmds; 226 } 227 p += cmd->cmdsize; 228 } 229 return cmds; 230 } 231 232 } // namespace detail 233 234 // anyHdr should be a pointer to either mach_header or mach_header_64 235 template <class CommandType = llvm::MachO::load_command, class... Types> 236 const CommandType *findCommand(const void *anyHdr, Types... types) { 237 std::vector<const CommandType *> cmds = 238 detail::findCommands<CommandType>(anyHdr, 1, types...); 239 return cmds.size() ? cmds[0] : nullptr; 240 } 241 242 template <class CommandType = llvm::MachO::load_command, class... Types> 243 std::vector<const CommandType *> findCommands(const void *anyHdr, 244 Types... types) { 245 return detail::findCommands<CommandType>(anyHdr, 0, types...); 246 } 247 248 } // namespace macho 249 250 std::string toString(const macho::InputFile *file); 251 } // namespace lld 252 253 #endif 254