xref: /freebsd/contrib/llvm-project/lld/MachO/InputFiles.cpp (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
15ffd83dbSDimitry Andric //===- InputFiles.cpp -----------------------------------------------------===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric // This file contains functions to parse Mach-O object files. In this comment,
105ffd83dbSDimitry Andric // we describe the Mach-O file structure and how we parse it.
115ffd83dbSDimitry Andric //
125ffd83dbSDimitry Andric // Mach-O is not very different from ELF or COFF. The notion of symbols,
135ffd83dbSDimitry Andric // sections and relocations exists in Mach-O as it does in ELF and COFF.
145ffd83dbSDimitry Andric //
155ffd83dbSDimitry Andric // Perhaps the notion that is new to those who know ELF/COFF is "subsections".
165ffd83dbSDimitry Andric // In ELF/COFF, sections are an atomic unit of data copied from input files to
175ffd83dbSDimitry Andric // output files. When we merge or garbage-collect sections, we treat each
185ffd83dbSDimitry Andric // section as an atomic unit. In Mach-O, that's not the case. Sections can
195ffd83dbSDimitry Andric // consist of multiple subsections, and subsections are a unit of merging and
205ffd83dbSDimitry Andric // garbage-collecting. Therefore, Mach-O's subsections are more similar to
215ffd83dbSDimitry Andric // ELF/COFF's sections than Mach-O's sections are.
225ffd83dbSDimitry Andric //
235ffd83dbSDimitry Andric // A section can have multiple symbols. A symbol that does not have the
245ffd83dbSDimitry Andric // N_ALT_ENTRY attribute indicates a beginning of a subsection. Therefore, by
255ffd83dbSDimitry Andric // definition, a symbol is always present at the beginning of each subsection. A
265ffd83dbSDimitry Andric // symbol with N_ALT_ENTRY attribute does not start a new subsection and can
275ffd83dbSDimitry Andric // point to a middle of a subsection.
285ffd83dbSDimitry Andric //
295ffd83dbSDimitry Andric // The notion of subsections also affects how relocations are represented in
305ffd83dbSDimitry Andric // Mach-O. All references within a section need to be explicitly represented as
315ffd83dbSDimitry Andric // relocations if they refer to different subsections, because we obviously need
325ffd83dbSDimitry Andric // to fix up addresses if subsections are laid out in an output file differently
335ffd83dbSDimitry Andric // than they were in object files. To represent that, Mach-O relocations can
345ffd83dbSDimitry Andric // refer to an unnamed location via its address. Scattered relocations (those
355ffd83dbSDimitry Andric // with the R_SCATTERED bit set) always refer to unnamed locations.
365ffd83dbSDimitry Andric // Non-scattered relocations refer to an unnamed location if r_extern is not set
375ffd83dbSDimitry Andric // and r_symbolnum is zero.
385ffd83dbSDimitry Andric //
395ffd83dbSDimitry Andric // Without the above differences, I think you can use your knowledge about ELF
405ffd83dbSDimitry Andric // and COFF for Mach-O.
415ffd83dbSDimitry Andric //
425ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
435ffd83dbSDimitry Andric 
445ffd83dbSDimitry Andric #include "InputFiles.h"
455ffd83dbSDimitry Andric #include "Config.h"
46*e8d8bef9SDimitry Andric #include "Driver.h"
47*e8d8bef9SDimitry Andric #include "Dwarf.h"
485ffd83dbSDimitry Andric #include "ExportTrie.h"
495ffd83dbSDimitry Andric #include "InputSection.h"
505ffd83dbSDimitry Andric #include "MachOStructs.h"
51*e8d8bef9SDimitry Andric #include "ObjC.h"
525ffd83dbSDimitry Andric #include "OutputSection.h"
53*e8d8bef9SDimitry Andric #include "OutputSegment.h"
545ffd83dbSDimitry Andric #include "SymbolTable.h"
555ffd83dbSDimitry Andric #include "Symbols.h"
565ffd83dbSDimitry Andric #include "Target.h"
575ffd83dbSDimitry Andric 
58*e8d8bef9SDimitry Andric #include "lld/Common/DWARF.h"
595ffd83dbSDimitry Andric #include "lld/Common/ErrorHandler.h"
605ffd83dbSDimitry Andric #include "lld/Common/Memory.h"
61*e8d8bef9SDimitry Andric #include "lld/Common/Reproduce.h"
62*e8d8bef9SDimitry Andric #include "llvm/ADT/iterator.h"
635ffd83dbSDimitry Andric #include "llvm/BinaryFormat/MachO.h"
64*e8d8bef9SDimitry Andric #include "llvm/LTO/LTO.h"
655ffd83dbSDimitry Andric #include "llvm/Support/Endian.h"
665ffd83dbSDimitry Andric #include "llvm/Support/MemoryBuffer.h"
675ffd83dbSDimitry Andric #include "llvm/Support/Path.h"
68*e8d8bef9SDimitry Andric #include "llvm/Support/TarWriter.h"
695ffd83dbSDimitry Andric 
705ffd83dbSDimitry Andric using namespace llvm;
715ffd83dbSDimitry Andric using namespace llvm::MachO;
725ffd83dbSDimitry Andric using namespace llvm::support::endian;
735ffd83dbSDimitry Andric using namespace llvm::sys;
745ffd83dbSDimitry Andric using namespace lld;
755ffd83dbSDimitry Andric using namespace lld::macho;
765ffd83dbSDimitry Andric 
77*e8d8bef9SDimitry Andric // Returns "<internal>", "foo.a(bar.o)", or "baz.o".
78*e8d8bef9SDimitry Andric std::string lld::toString(const InputFile *f) {
79*e8d8bef9SDimitry Andric   if (!f)
80*e8d8bef9SDimitry Andric     return "<internal>";
81*e8d8bef9SDimitry Andric   if (f->archiveName.empty())
82*e8d8bef9SDimitry Andric     return std::string(f->getName());
83*e8d8bef9SDimitry Andric   return (path::filename(f->archiveName) + "(" + path::filename(f->getName()) +
84*e8d8bef9SDimitry Andric           ")")
85*e8d8bef9SDimitry Andric       .str();
86*e8d8bef9SDimitry Andric }
87*e8d8bef9SDimitry Andric 
88*e8d8bef9SDimitry Andric SetVector<InputFile *> macho::inputFiles;
89*e8d8bef9SDimitry Andric std::unique_ptr<TarWriter> macho::tar;
90*e8d8bef9SDimitry Andric int InputFile::idCount = 0;
915ffd83dbSDimitry Andric 
925ffd83dbSDimitry Andric // Open a given file path and return it as a memory-mapped file.
935ffd83dbSDimitry Andric Optional<MemoryBufferRef> macho::readFile(StringRef path) {
945ffd83dbSDimitry Andric   // Open a file.
955ffd83dbSDimitry Andric   auto mbOrErr = MemoryBuffer::getFile(path);
965ffd83dbSDimitry Andric   if (auto ec = mbOrErr.getError()) {
975ffd83dbSDimitry Andric     error("cannot open " + path + ": " + ec.message());
985ffd83dbSDimitry Andric     return None;
995ffd83dbSDimitry Andric   }
1005ffd83dbSDimitry Andric 
1015ffd83dbSDimitry Andric   std::unique_ptr<MemoryBuffer> &mb = *mbOrErr;
1025ffd83dbSDimitry Andric   MemoryBufferRef mbref = mb->getMemBufferRef();
1035ffd83dbSDimitry Andric   make<std::unique_ptr<MemoryBuffer>>(std::move(mb)); // take mb ownership
1045ffd83dbSDimitry Andric 
1055ffd83dbSDimitry Andric   // If this is a regular non-fat file, return it.
1065ffd83dbSDimitry Andric   const char *buf = mbref.getBufferStart();
1075ffd83dbSDimitry Andric   auto *hdr = reinterpret_cast<const MachO::fat_header *>(buf);
108*e8d8bef9SDimitry Andric   if (read32be(&hdr->magic) != MachO::FAT_MAGIC) {
109*e8d8bef9SDimitry Andric     if (tar)
110*e8d8bef9SDimitry Andric       tar->append(relativeToRoot(path), mbref.getBuffer());
1115ffd83dbSDimitry Andric     return mbref;
112*e8d8bef9SDimitry Andric   }
1135ffd83dbSDimitry Andric 
1145ffd83dbSDimitry Andric   // Object files and archive files may be fat files, which contains
1155ffd83dbSDimitry Andric   // multiple real files for different CPU ISAs. Here, we search for a
1165ffd83dbSDimitry Andric   // file that matches with the current link target and returns it as
1175ffd83dbSDimitry Andric   // a MemoryBufferRef.
1185ffd83dbSDimitry Andric   auto *arch = reinterpret_cast<const MachO::fat_arch *>(buf + sizeof(*hdr));
1195ffd83dbSDimitry Andric 
1205ffd83dbSDimitry Andric   for (uint32_t i = 0, n = read32be(&hdr->nfat_arch); i < n; ++i) {
1215ffd83dbSDimitry Andric     if (reinterpret_cast<const char *>(arch + i + 1) >
1225ffd83dbSDimitry Andric         buf + mbref.getBufferSize()) {
1235ffd83dbSDimitry Andric       error(path + ": fat_arch struct extends beyond end of file");
1245ffd83dbSDimitry Andric       return None;
1255ffd83dbSDimitry Andric     }
1265ffd83dbSDimitry Andric 
1275ffd83dbSDimitry Andric     if (read32be(&arch[i].cputype) != target->cpuType ||
1285ffd83dbSDimitry Andric         read32be(&arch[i].cpusubtype) != target->cpuSubtype)
1295ffd83dbSDimitry Andric       continue;
1305ffd83dbSDimitry Andric 
1315ffd83dbSDimitry Andric     uint32_t offset = read32be(&arch[i].offset);
1325ffd83dbSDimitry Andric     uint32_t size = read32be(&arch[i].size);
1335ffd83dbSDimitry Andric     if (offset + size > mbref.getBufferSize())
1345ffd83dbSDimitry Andric       error(path + ": slice extends beyond end of file");
135*e8d8bef9SDimitry Andric     if (tar)
136*e8d8bef9SDimitry Andric       tar->append(relativeToRoot(path), mbref.getBuffer());
1375ffd83dbSDimitry Andric     return MemoryBufferRef(StringRef(buf + offset, size), path.copy(bAlloc));
1385ffd83dbSDimitry Andric   }
1395ffd83dbSDimitry Andric 
1405ffd83dbSDimitry Andric   error("unable to find matching architecture in " + path);
1415ffd83dbSDimitry Andric   return None;
1425ffd83dbSDimitry Andric }
1435ffd83dbSDimitry Andric 
144*e8d8bef9SDimitry Andric const load_command *macho::findCommand(const mach_header_64 *hdr,
1455ffd83dbSDimitry Andric                                        uint32_t type) {
1465ffd83dbSDimitry Andric   const uint8_t *p =
1475ffd83dbSDimitry Andric       reinterpret_cast<const uint8_t *>(hdr) + sizeof(mach_header_64);
1485ffd83dbSDimitry Andric 
1495ffd83dbSDimitry Andric   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
1505ffd83dbSDimitry Andric     auto *cmd = reinterpret_cast<const load_command *>(p);
1515ffd83dbSDimitry Andric     if (cmd->cmd == type)
1525ffd83dbSDimitry Andric       return cmd;
1535ffd83dbSDimitry Andric     p += cmd->cmdsize;
1545ffd83dbSDimitry Andric   }
1555ffd83dbSDimitry Andric   return nullptr;
1565ffd83dbSDimitry Andric }
1575ffd83dbSDimitry Andric 
158*e8d8bef9SDimitry Andric void ObjFile::parseSections(ArrayRef<section_64> sections) {
1595ffd83dbSDimitry Andric   subsections.reserve(sections.size());
1605ffd83dbSDimitry Andric   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
1615ffd83dbSDimitry Andric 
1625ffd83dbSDimitry Andric   for (const section_64 &sec : sections) {
1635ffd83dbSDimitry Andric     InputSection *isec = make<InputSection>();
1645ffd83dbSDimitry Andric     isec->file = this;
165*e8d8bef9SDimitry Andric     isec->name =
166*e8d8bef9SDimitry Andric         StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname)));
167*e8d8bef9SDimitry Andric     isec->segname =
168*e8d8bef9SDimitry Andric         StringRef(sec.segname, strnlen(sec.segname, sizeof(sec.segname)));
1695ffd83dbSDimitry Andric     isec->data = {isZeroFill(sec.flags) ? nullptr : buf + sec.offset,
1705ffd83dbSDimitry Andric                   static_cast<size_t>(sec.size)};
1715ffd83dbSDimitry Andric     if (sec.align >= 32)
1725ffd83dbSDimitry Andric       error("alignment " + std::to_string(sec.align) + " of section " +
1735ffd83dbSDimitry Andric             isec->name + " is too large");
1745ffd83dbSDimitry Andric     else
1755ffd83dbSDimitry Andric       isec->align = 1 << sec.align;
1765ffd83dbSDimitry Andric     isec->flags = sec.flags;
177*e8d8bef9SDimitry Andric 
178*e8d8bef9SDimitry Andric     if (!(isDebugSection(isec->flags) &&
179*e8d8bef9SDimitry Andric           isec->segname == segment_names::dwarf)) {
1805ffd83dbSDimitry Andric       subsections.push_back({{0, isec}});
181*e8d8bef9SDimitry Andric     } else {
182*e8d8bef9SDimitry Andric       // Instead of emitting DWARF sections, we emit STABS symbols to the
183*e8d8bef9SDimitry Andric       // object files that contain them. We filter them out early to avoid
184*e8d8bef9SDimitry Andric       // parsing their relocations unnecessarily. But we must still push an
185*e8d8bef9SDimitry Andric       // empty map to ensure the indices line up for the remaining sections.
186*e8d8bef9SDimitry Andric       subsections.push_back({});
187*e8d8bef9SDimitry Andric       debugSections.push_back(isec);
188*e8d8bef9SDimitry Andric     }
1895ffd83dbSDimitry Andric   }
1905ffd83dbSDimitry Andric }
1915ffd83dbSDimitry Andric 
1925ffd83dbSDimitry Andric // Find the subsection corresponding to the greatest section offset that is <=
1935ffd83dbSDimitry Andric // that of the given offset.
1945ffd83dbSDimitry Andric //
1955ffd83dbSDimitry Andric // offset: an offset relative to the start of the original InputSection (before
1965ffd83dbSDimitry Andric // any subsection splitting has occurred). It will be updated to represent the
1975ffd83dbSDimitry Andric // same location as an offset relative to the start of the containing
1985ffd83dbSDimitry Andric // subsection.
1995ffd83dbSDimitry Andric static InputSection *findContainingSubsection(SubsectionMap &map,
2005ffd83dbSDimitry Andric                                               uint32_t *offset) {
2015ffd83dbSDimitry Andric   auto it = std::prev(map.upper_bound(*offset));
2025ffd83dbSDimitry Andric   *offset -= it->first;
2035ffd83dbSDimitry Andric   return it->second;
2045ffd83dbSDimitry Andric }
2055ffd83dbSDimitry Andric 
206*e8d8bef9SDimitry Andric void ObjFile::parseRelocations(const section_64 &sec,
2075ffd83dbSDimitry Andric                                SubsectionMap &subsecMap) {
2085ffd83dbSDimitry Andric   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
209*e8d8bef9SDimitry Andric   ArrayRef<relocation_info> relInfos(
210*e8d8bef9SDimitry Andric       reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc);
2115ffd83dbSDimitry Andric 
212*e8d8bef9SDimitry Andric   for (size_t i = 0; i < relInfos.size(); i++) {
213*e8d8bef9SDimitry Andric     // Paired relocations serve as Mach-O's method for attaching a
214*e8d8bef9SDimitry Andric     // supplemental datum to a primary relocation record. ELF does not
215*e8d8bef9SDimitry Andric     // need them because the *_RELOC_RELA records contain the extra
216*e8d8bef9SDimitry Andric     // addend field, vs. *_RELOC_REL which omit the addend.
217*e8d8bef9SDimitry Andric     //
218*e8d8bef9SDimitry Andric     // The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend,
219*e8d8bef9SDimitry Andric     // and the paired *_RELOC_UNSIGNED record holds the minuend. The
220*e8d8bef9SDimitry Andric     // datum for each is a symbolic address. The result is the runtime
221*e8d8bef9SDimitry Andric     // offset between two addresses.
222*e8d8bef9SDimitry Andric     //
223*e8d8bef9SDimitry Andric     // The ARM64_RELOC_ADDEND record holds the addend, and the paired
224*e8d8bef9SDimitry Andric     // ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the
225*e8d8bef9SDimitry Andric     // base symbolic address.
226*e8d8bef9SDimitry Andric     //
227*e8d8bef9SDimitry Andric     // Note: X86 does not use *_RELOC_ADDEND because it can embed an
228*e8d8bef9SDimitry Andric     // addend into the instruction stream. On X86, a relocatable address
229*e8d8bef9SDimitry Andric     // field always occupies an entire contiguous sequence of byte(s),
230*e8d8bef9SDimitry Andric     // so there is no need to merge opcode bits with address
231*e8d8bef9SDimitry Andric     // bits. Therefore, it's easy and convenient to store addends in the
232*e8d8bef9SDimitry Andric     // instruction-stream bytes that would otherwise contain zeroes. By
233*e8d8bef9SDimitry Andric     // contrast, RISC ISAs such as ARM64 mix opcode bits with with
234*e8d8bef9SDimitry Andric     // address bits so that bitwise arithmetic is necessary to extract
235*e8d8bef9SDimitry Andric     // and insert them. Storing addends in the instruction stream is
236*e8d8bef9SDimitry Andric     // possible, but inconvenient and more costly at link time.
237*e8d8bef9SDimitry Andric 
238*e8d8bef9SDimitry Andric     relocation_info pairedInfo = relInfos[i];
239*e8d8bef9SDimitry Andric     relocation_info relInfo =
240*e8d8bef9SDimitry Andric         target->isPairedReloc(pairedInfo) ? relInfos[++i] : pairedInfo;
241*e8d8bef9SDimitry Andric     assert(i < relInfos.size());
242*e8d8bef9SDimitry Andric     if (relInfo.r_address & R_SCATTERED)
2435ffd83dbSDimitry Andric       fatal("TODO: Scattered relocations not supported");
2445ffd83dbSDimitry Andric 
2455ffd83dbSDimitry Andric     Reloc r;
246*e8d8bef9SDimitry Andric     r.type = relInfo.r_type;
247*e8d8bef9SDimitry Andric     r.pcrel = relInfo.r_pcrel;
248*e8d8bef9SDimitry Andric     r.length = relInfo.r_length;
249*e8d8bef9SDimitry Andric     r.offset = relInfo.r_address;
250*e8d8bef9SDimitry Andric     // For unpaired relocs, pairdInfo (just a copy of relInfo) is ignored
251*e8d8bef9SDimitry Andric     uint64_t rawAddend = target->getAddend(mb, sec, relInfo, pairedInfo);
252*e8d8bef9SDimitry Andric     if (relInfo.r_extern) {
253*e8d8bef9SDimitry Andric       r.referent = symbols[relInfo.r_symbolnum];
2545ffd83dbSDimitry Andric       r.addend = rawAddend;
2555ffd83dbSDimitry Andric     } else {
256*e8d8bef9SDimitry Andric       SubsectionMap &referentSubsecMap = subsections[relInfo.r_symbolnum - 1];
257*e8d8bef9SDimitry Andric       const section_64 &referentSec = sectionHeaders[relInfo.r_symbolnum - 1];
258*e8d8bef9SDimitry Andric       uint32_t referentOffset;
259*e8d8bef9SDimitry Andric       if (relInfo.r_pcrel) {
2605ffd83dbSDimitry Andric         // The implicit addend for pcrel section relocations is the pcrel offset
2615ffd83dbSDimitry Andric         // in terms of the addresses in the input file. Here we adjust it so
262*e8d8bef9SDimitry Andric         // that it describes the offset from the start of the referent section.
2635ffd83dbSDimitry Andric         // TODO: The offset of 4 is probably not right for ARM64, nor for
2645ffd83dbSDimitry Andric         //       relocations with r_length != 2.
265*e8d8bef9SDimitry Andric         referentOffset =
266*e8d8bef9SDimitry Andric             sec.addr + relInfo.r_address + 4 + rawAddend - referentSec.addr;
2675ffd83dbSDimitry Andric       } else {
2685ffd83dbSDimitry Andric         // The addend for a non-pcrel relocation is its absolute address.
269*e8d8bef9SDimitry Andric         referentOffset = rawAddend - referentSec.addr;
2705ffd83dbSDimitry Andric       }
271*e8d8bef9SDimitry Andric       r.referent = findContainingSubsection(referentSubsecMap, &referentOffset);
272*e8d8bef9SDimitry Andric       r.addend = referentOffset;
2735ffd83dbSDimitry Andric     }
2745ffd83dbSDimitry Andric 
2755ffd83dbSDimitry Andric     InputSection *subsec = findContainingSubsection(subsecMap, &r.offset);
2765ffd83dbSDimitry Andric     subsec->relocs.push_back(r);
2775ffd83dbSDimitry Andric   }
2785ffd83dbSDimitry Andric }
2795ffd83dbSDimitry Andric 
280*e8d8bef9SDimitry Andric static macho::Symbol *createDefined(const structs::nlist_64 &sym,
281*e8d8bef9SDimitry Andric                                     StringRef name, InputSection *isec,
282*e8d8bef9SDimitry Andric                                     uint32_t value) {
283*e8d8bef9SDimitry Andric   // Symbol scope is determined by sym.n_type & (N_EXT | N_PEXT):
284*e8d8bef9SDimitry Andric   // N_EXT: Global symbols
285*e8d8bef9SDimitry Andric   // N_EXT | N_PEXT: Linkage unit (think: dylib) scoped
286*e8d8bef9SDimitry Andric   // N_PEXT: Does not occur in input files in practice,
287*e8d8bef9SDimitry Andric   //         a private extern must be external.
288*e8d8bef9SDimitry Andric   // 0: Translation-unit scoped. These are not in the symbol table.
289*e8d8bef9SDimitry Andric 
290*e8d8bef9SDimitry Andric   if (sym.n_type & (N_EXT | N_PEXT)) {
291*e8d8bef9SDimitry Andric     assert((sym.n_type & N_EXT) && "invalid input");
292*e8d8bef9SDimitry Andric     return symtab->addDefined(name, isec, value, sym.n_desc & N_WEAK_DEF,
293*e8d8bef9SDimitry Andric                               sym.n_type & N_PEXT);
294*e8d8bef9SDimitry Andric   }
295*e8d8bef9SDimitry Andric   return make<Defined>(name, isec, value, sym.n_desc & N_WEAK_DEF,
296*e8d8bef9SDimitry Andric                        /*isExternal=*/false, /*isPrivateExtern=*/false);
297*e8d8bef9SDimitry Andric }
298*e8d8bef9SDimitry Andric 
299*e8d8bef9SDimitry Andric // Absolute symbols are defined symbols that do not have an associated
300*e8d8bef9SDimitry Andric // InputSection. They cannot be weak.
301*e8d8bef9SDimitry Andric static macho::Symbol *createAbsolute(const structs::nlist_64 &sym,
302*e8d8bef9SDimitry Andric                                      StringRef name) {
303*e8d8bef9SDimitry Andric   if (sym.n_type & (N_EXT | N_PEXT)) {
304*e8d8bef9SDimitry Andric     assert((sym.n_type & N_EXT) && "invalid input");
305*e8d8bef9SDimitry Andric     return symtab->addDefined(name, nullptr, sym.n_value, /*isWeakDef=*/false,
306*e8d8bef9SDimitry Andric                               sym.n_type & N_PEXT);
307*e8d8bef9SDimitry Andric   }
308*e8d8bef9SDimitry Andric   return make<Defined>(name, nullptr, sym.n_value, /*isWeakDef=*/false,
309*e8d8bef9SDimitry Andric                        /*isExternal=*/false, /*isPrivateExtern=*/false);
310*e8d8bef9SDimitry Andric }
311*e8d8bef9SDimitry Andric 
312*e8d8bef9SDimitry Andric macho::Symbol *ObjFile::parseNonSectionSymbol(const structs::nlist_64 &sym,
313*e8d8bef9SDimitry Andric                                               StringRef name) {
314*e8d8bef9SDimitry Andric   uint8_t type = sym.n_type & N_TYPE;
315*e8d8bef9SDimitry Andric   switch (type) {
316*e8d8bef9SDimitry Andric   case N_UNDF:
317*e8d8bef9SDimitry Andric     return sym.n_value == 0
318*e8d8bef9SDimitry Andric                ? symtab->addUndefined(name, sym.n_desc & N_WEAK_REF)
319*e8d8bef9SDimitry Andric                : symtab->addCommon(name, this, sym.n_value,
320*e8d8bef9SDimitry Andric                                    1 << GET_COMM_ALIGN(sym.n_desc),
321*e8d8bef9SDimitry Andric                                    sym.n_type & N_PEXT);
322*e8d8bef9SDimitry Andric   case N_ABS:
323*e8d8bef9SDimitry Andric     return createAbsolute(sym, name);
324*e8d8bef9SDimitry Andric   case N_PBUD:
325*e8d8bef9SDimitry Andric   case N_INDR:
326*e8d8bef9SDimitry Andric     error("TODO: support symbols of type " + std::to_string(type));
327*e8d8bef9SDimitry Andric     return nullptr;
328*e8d8bef9SDimitry Andric   case N_SECT:
329*e8d8bef9SDimitry Andric     llvm_unreachable(
330*e8d8bef9SDimitry Andric         "N_SECT symbols should not be passed to parseNonSectionSymbol");
331*e8d8bef9SDimitry Andric   default:
332*e8d8bef9SDimitry Andric     llvm_unreachable("invalid symbol type");
333*e8d8bef9SDimitry Andric   }
334*e8d8bef9SDimitry Andric }
335*e8d8bef9SDimitry Andric 
336*e8d8bef9SDimitry Andric void ObjFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
3375ffd83dbSDimitry Andric                            const char *strtab, bool subsectionsViaSymbols) {
3385ffd83dbSDimitry Andric   // resize(), not reserve(), because we are going to create N_ALT_ENTRY symbols
3395ffd83dbSDimitry Andric   // out-of-sequence.
3405ffd83dbSDimitry Andric   symbols.resize(nList.size());
3415ffd83dbSDimitry Andric   std::vector<size_t> altEntrySymIdxs;
3425ffd83dbSDimitry Andric 
3435ffd83dbSDimitry Andric   for (size_t i = 0, n = nList.size(); i < n; ++i) {
3445ffd83dbSDimitry Andric     const structs::nlist_64 &sym = nList[i];
3455ffd83dbSDimitry Andric     StringRef name = strtab + sym.n_strx;
346*e8d8bef9SDimitry Andric 
347*e8d8bef9SDimitry Andric     if ((sym.n_type & N_TYPE) != N_SECT) {
348*e8d8bef9SDimitry Andric       symbols[i] = parseNonSectionSymbol(sym, name);
3495ffd83dbSDimitry Andric       continue;
3505ffd83dbSDimitry Andric     }
3515ffd83dbSDimitry Andric 
3525ffd83dbSDimitry Andric     const section_64 &sec = sectionHeaders[sym.n_sect - 1];
3535ffd83dbSDimitry Andric     SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
354*e8d8bef9SDimitry Andric     assert(!subsecMap.empty());
3555ffd83dbSDimitry Andric     uint64_t offset = sym.n_value - sec.addr;
3565ffd83dbSDimitry Andric 
3575ffd83dbSDimitry Andric     // If the input file does not use subsections-via-symbols, all symbols can
3585ffd83dbSDimitry Andric     // use the same subsection. Otherwise, we must split the sections along
3595ffd83dbSDimitry Andric     // symbol boundaries.
3605ffd83dbSDimitry Andric     if (!subsectionsViaSymbols) {
361*e8d8bef9SDimitry Andric       symbols[i] = createDefined(sym, name, subsecMap[0], offset);
3625ffd83dbSDimitry Andric       continue;
3635ffd83dbSDimitry Andric     }
3645ffd83dbSDimitry Andric 
3655ffd83dbSDimitry Andric     // nList entries aren't necessarily arranged in address order. Therefore,
3665ffd83dbSDimitry Andric     // we can't create alt-entry symbols at this point because a later symbol
3675ffd83dbSDimitry Andric     // may split its section, which may affect which subsection the alt-entry
3685ffd83dbSDimitry Andric     // symbol is assigned to. So we need to handle them in a second pass below.
3695ffd83dbSDimitry Andric     if (sym.n_desc & N_ALT_ENTRY) {
3705ffd83dbSDimitry Andric       altEntrySymIdxs.push_back(i);
3715ffd83dbSDimitry Andric       continue;
3725ffd83dbSDimitry Andric     }
3735ffd83dbSDimitry Andric 
3745ffd83dbSDimitry Andric     // Find the subsection corresponding to the greatest section offset that is
3755ffd83dbSDimitry Andric     // <= that of the current symbol. The subsection that we find either needs
3765ffd83dbSDimitry Andric     // to be used directly or split in two.
3775ffd83dbSDimitry Andric     uint32_t firstSize = offset;
3785ffd83dbSDimitry Andric     InputSection *firstIsec = findContainingSubsection(subsecMap, &firstSize);
3795ffd83dbSDimitry Andric 
3805ffd83dbSDimitry Andric     if (firstSize == 0) {
3815ffd83dbSDimitry Andric       // Alias of an existing symbol, or the first symbol in the section. These
3825ffd83dbSDimitry Andric       // are handled by reusing the existing section.
383*e8d8bef9SDimitry Andric       symbols[i] = createDefined(sym, name, firstIsec, 0);
3845ffd83dbSDimitry Andric       continue;
3855ffd83dbSDimitry Andric     }
3865ffd83dbSDimitry Andric 
3875ffd83dbSDimitry Andric     // We saw a symbol definition at a new offset. Split the section into two
3885ffd83dbSDimitry Andric     // subsections. The new symbol uses the second subsection.
3895ffd83dbSDimitry Andric     auto *secondIsec = make<InputSection>(*firstIsec);
3905ffd83dbSDimitry Andric     secondIsec->data = firstIsec->data.slice(firstSize);
3915ffd83dbSDimitry Andric     firstIsec->data = firstIsec->data.slice(0, firstSize);
3925ffd83dbSDimitry Andric     // TODO: ld64 appears to preserve the original alignment as well as each
3935ffd83dbSDimitry Andric     // subsection's offset from the last aligned address. We should consider
3945ffd83dbSDimitry Andric     // emulating that behavior.
3955ffd83dbSDimitry Andric     secondIsec->align = MinAlign(firstIsec->align, offset);
3965ffd83dbSDimitry Andric 
3975ffd83dbSDimitry Andric     subsecMap[offset] = secondIsec;
3985ffd83dbSDimitry Andric     // By construction, the symbol will be at offset zero in the new section.
399*e8d8bef9SDimitry Andric     symbols[i] = createDefined(sym, name, secondIsec, 0);
4005ffd83dbSDimitry Andric   }
4015ffd83dbSDimitry Andric 
4025ffd83dbSDimitry Andric   for (size_t idx : altEntrySymIdxs) {
4035ffd83dbSDimitry Andric     const structs::nlist_64 &sym = nList[idx];
404*e8d8bef9SDimitry Andric     StringRef name = strtab + sym.n_strx;
4055ffd83dbSDimitry Andric     SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
4065ffd83dbSDimitry Andric     uint32_t off = sym.n_value - sectionHeaders[sym.n_sect - 1].addr;
4075ffd83dbSDimitry Andric     InputSection *subsec = findContainingSubsection(subsecMap, &off);
408*e8d8bef9SDimitry Andric     symbols[idx] = createDefined(sym, name, subsec, off);
4095ffd83dbSDimitry Andric   }
4105ffd83dbSDimitry Andric }
4115ffd83dbSDimitry Andric 
412*e8d8bef9SDimitry Andric OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
413*e8d8bef9SDimitry Andric                        StringRef sectName)
414*e8d8bef9SDimitry Andric     : InputFile(OpaqueKind, mb) {
415*e8d8bef9SDimitry Andric   InputSection *isec = make<InputSection>();
416*e8d8bef9SDimitry Andric   isec->file = this;
417*e8d8bef9SDimitry Andric   isec->name = sectName.take_front(16);
418*e8d8bef9SDimitry Andric   isec->segname = segName.take_front(16);
419*e8d8bef9SDimitry Andric   const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
420*e8d8bef9SDimitry Andric   isec->data = {buf, mb.getBufferSize()};
421*e8d8bef9SDimitry Andric   subsections.push_back({{0, isec}});
422*e8d8bef9SDimitry Andric }
423*e8d8bef9SDimitry Andric 
424*e8d8bef9SDimitry Andric ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName)
425*e8d8bef9SDimitry Andric     : InputFile(ObjKind, mb), modTime(modTime) {
426*e8d8bef9SDimitry Andric   this->archiveName = std::string(archiveName);
427*e8d8bef9SDimitry Andric 
4285ffd83dbSDimitry Andric   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
4295ffd83dbSDimitry Andric   auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart());
4305ffd83dbSDimitry Andric 
431*e8d8bef9SDimitry Andric   if (const load_command *cmd = findCommand(hdr, LC_LINKER_OPTION)) {
432*e8d8bef9SDimitry Andric     auto *c = reinterpret_cast<const linker_option_command *>(cmd);
433*e8d8bef9SDimitry Andric     StringRef data{reinterpret_cast<const char *>(c + 1),
434*e8d8bef9SDimitry Andric                    c->cmdsize - sizeof(linker_option_command)};
435*e8d8bef9SDimitry Andric     parseLCLinkerOption(this, c->count, data);
436*e8d8bef9SDimitry Andric   }
437*e8d8bef9SDimitry Andric 
4385ffd83dbSDimitry Andric   if (const load_command *cmd = findCommand(hdr, LC_SEGMENT_64)) {
4395ffd83dbSDimitry Andric     auto *c = reinterpret_cast<const segment_command_64 *>(cmd);
4405ffd83dbSDimitry Andric     sectionHeaders = ArrayRef<section_64>{
4415ffd83dbSDimitry Andric         reinterpret_cast<const section_64 *>(c + 1), c->nsects};
4425ffd83dbSDimitry Andric     parseSections(sectionHeaders);
4435ffd83dbSDimitry Andric   }
4445ffd83dbSDimitry Andric 
4455ffd83dbSDimitry Andric   // TODO: Error on missing LC_SYMTAB?
4465ffd83dbSDimitry Andric   if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
4475ffd83dbSDimitry Andric     auto *c = reinterpret_cast<const symtab_command *>(cmd);
4485ffd83dbSDimitry Andric     ArrayRef<structs::nlist_64> nList(
4495ffd83dbSDimitry Andric         reinterpret_cast<const structs::nlist_64 *>(buf + c->symoff), c->nsyms);
4505ffd83dbSDimitry Andric     const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
4515ffd83dbSDimitry Andric     bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
4525ffd83dbSDimitry Andric     parseSymbols(nList, strtab, subsectionsViaSymbols);
4535ffd83dbSDimitry Andric   }
4545ffd83dbSDimitry Andric 
4555ffd83dbSDimitry Andric   // The relocations may refer to the symbols, so we parse them after we have
4565ffd83dbSDimitry Andric   // parsed all the symbols.
4575ffd83dbSDimitry Andric   for (size_t i = 0, n = subsections.size(); i < n; ++i)
458*e8d8bef9SDimitry Andric     if (!subsections[i].empty())
4595ffd83dbSDimitry Andric       parseRelocations(sectionHeaders[i], subsections[i]);
460*e8d8bef9SDimitry Andric 
461*e8d8bef9SDimitry Andric   parseDebugInfo();
462*e8d8bef9SDimitry Andric }
463*e8d8bef9SDimitry Andric 
464*e8d8bef9SDimitry Andric void ObjFile::parseDebugInfo() {
465*e8d8bef9SDimitry Andric   std::unique_ptr<DwarfObject> dObj = DwarfObject::create(this);
466*e8d8bef9SDimitry Andric   if (!dObj)
467*e8d8bef9SDimitry Andric     return;
468*e8d8bef9SDimitry Andric 
469*e8d8bef9SDimitry Andric   auto *ctx = make<DWARFContext>(
470*e8d8bef9SDimitry Andric       std::move(dObj), "",
471*e8d8bef9SDimitry Andric       [&](Error err) {
472*e8d8bef9SDimitry Andric         warn(toString(this) + ": " + toString(std::move(err)));
473*e8d8bef9SDimitry Andric       },
474*e8d8bef9SDimitry Andric       [&](Error warning) {
475*e8d8bef9SDimitry Andric         warn(toString(this) + ": " + toString(std::move(warning)));
476*e8d8bef9SDimitry Andric       });
477*e8d8bef9SDimitry Andric 
478*e8d8bef9SDimitry Andric   // TODO: Since object files can contain a lot of DWARF info, we should verify
479*e8d8bef9SDimitry Andric   // that we are parsing just the info we need
480*e8d8bef9SDimitry Andric   const DWARFContext::compile_unit_range &units = ctx->compile_units();
481*e8d8bef9SDimitry Andric   auto it = units.begin();
482*e8d8bef9SDimitry Andric   compileUnit = it->get();
483*e8d8bef9SDimitry Andric   assert(std::next(it) == units.end());
484*e8d8bef9SDimitry Andric }
485*e8d8bef9SDimitry Andric 
486*e8d8bef9SDimitry Andric // The path can point to either a dylib or a .tbd file.
487*e8d8bef9SDimitry Andric static Optional<DylibFile *> loadDylib(StringRef path, DylibFile *umbrella) {
488*e8d8bef9SDimitry Andric   Optional<MemoryBufferRef> mbref = readFile(path);
489*e8d8bef9SDimitry Andric   if (!mbref) {
490*e8d8bef9SDimitry Andric     error("could not read dylib file at " + path);
491*e8d8bef9SDimitry Andric     return {};
492*e8d8bef9SDimitry Andric   }
493*e8d8bef9SDimitry Andric   return loadDylib(*mbref, umbrella);
494*e8d8bef9SDimitry Andric }
495*e8d8bef9SDimitry Andric 
496*e8d8bef9SDimitry Andric // TBD files are parsed into a series of TAPI documents (InterfaceFiles), with
497*e8d8bef9SDimitry Andric // the first document storing child pointers to the rest of them. When we are
498*e8d8bef9SDimitry Andric // processing a given TBD file, we store that top-level document here. When
499*e8d8bef9SDimitry Andric // processing re-exports, we search its children for potentially matching
500*e8d8bef9SDimitry Andric // documents in the same TBD file. Note that the children themselves don't
501*e8d8bef9SDimitry Andric // point to further documents, i.e. this is a two-level tree.
502*e8d8bef9SDimitry Andric //
503*e8d8bef9SDimitry Andric // ld64 allows a TAPI re-export to reference documents nested within other TBD
504*e8d8bef9SDimitry Andric // files, but that seems like a strange design, so this is an intentional
505*e8d8bef9SDimitry Andric // deviation.
506*e8d8bef9SDimitry Andric const InterfaceFile *currentTopLevelTapi = nullptr;
507*e8d8bef9SDimitry Andric 
508*e8d8bef9SDimitry Andric // Re-exports can either refer to on-disk files, or to documents within .tbd
509*e8d8bef9SDimitry Andric // files.
510*e8d8bef9SDimitry Andric static Optional<DylibFile *> loadReexportHelper(StringRef path,
511*e8d8bef9SDimitry Andric                                                 DylibFile *umbrella) {
512*e8d8bef9SDimitry Andric   if (path::is_absolute(path, path::Style::posix))
513*e8d8bef9SDimitry Andric     for (StringRef root : config->systemLibraryRoots)
514*e8d8bef9SDimitry Andric       if (Optional<std::string> dylibPath =
515*e8d8bef9SDimitry Andric               resolveDylibPath((root + path).str()))
516*e8d8bef9SDimitry Andric         return loadDylib(*dylibPath, umbrella);
517*e8d8bef9SDimitry Andric 
518*e8d8bef9SDimitry Andric   // TODO: Expand @loader_path, @executable_path etc
519*e8d8bef9SDimitry Andric 
520*e8d8bef9SDimitry Andric   if (currentTopLevelTapi) {
521*e8d8bef9SDimitry Andric     for (InterfaceFile &child :
522*e8d8bef9SDimitry Andric          make_pointee_range(currentTopLevelTapi->documents())) {
523*e8d8bef9SDimitry Andric       if (path == child.getInstallName())
524*e8d8bef9SDimitry Andric         return make<DylibFile>(child, umbrella);
525*e8d8bef9SDimitry Andric       assert(child.documents().empty());
526*e8d8bef9SDimitry Andric     }
527*e8d8bef9SDimitry Andric   }
528*e8d8bef9SDimitry Andric 
529*e8d8bef9SDimitry Andric   if (Optional<std::string> dylibPath = resolveDylibPath(path))
530*e8d8bef9SDimitry Andric     return loadDylib(*dylibPath, umbrella);
531*e8d8bef9SDimitry Andric 
532*e8d8bef9SDimitry Andric   error("unable to locate re-export with install name " + path);
533*e8d8bef9SDimitry Andric   return {};
534*e8d8bef9SDimitry Andric }
535*e8d8bef9SDimitry Andric 
536*e8d8bef9SDimitry Andric // If a re-exported dylib is public (lives in /usr/lib or
537*e8d8bef9SDimitry Andric // /System/Library/Frameworks), then it is considered implicitly linked: we
538*e8d8bef9SDimitry Andric // should bind to its symbols directly instead of via the re-exporting umbrella
539*e8d8bef9SDimitry Andric // library.
540*e8d8bef9SDimitry Andric static bool isImplicitlyLinked(StringRef path) {
541*e8d8bef9SDimitry Andric   if (!config->implicitDylibs)
542*e8d8bef9SDimitry Andric     return false;
543*e8d8bef9SDimitry Andric 
544*e8d8bef9SDimitry Andric   if (path::parent_path(path) == "/usr/lib")
545*e8d8bef9SDimitry Andric     return true;
546*e8d8bef9SDimitry Andric 
547*e8d8bef9SDimitry Andric   // Match /System/Library/Frameworks/$FOO.framework/**/$FOO
548*e8d8bef9SDimitry Andric   if (path.consume_front("/System/Library/Frameworks/")) {
549*e8d8bef9SDimitry Andric     StringRef frameworkName = path.take_until([](char c) { return c == '.'; });
550*e8d8bef9SDimitry Andric     return path::filename(path) == frameworkName;
551*e8d8bef9SDimitry Andric   }
552*e8d8bef9SDimitry Andric 
553*e8d8bef9SDimitry Andric   return false;
554*e8d8bef9SDimitry Andric }
555*e8d8bef9SDimitry Andric 
556*e8d8bef9SDimitry Andric void loadReexport(StringRef path, DylibFile *umbrella) {
557*e8d8bef9SDimitry Andric   Optional<DylibFile *> reexport = loadReexportHelper(path, umbrella);
558*e8d8bef9SDimitry Andric   if (reexport && isImplicitlyLinked(path))
559*e8d8bef9SDimitry Andric     inputFiles.insert(*reexport);
5605ffd83dbSDimitry Andric }
5615ffd83dbSDimitry Andric 
5625ffd83dbSDimitry Andric DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella)
563*e8d8bef9SDimitry Andric     : InputFile(DylibKind, mb), refState(RefState::Unreferenced) {
5645ffd83dbSDimitry Andric   if (umbrella == nullptr)
5655ffd83dbSDimitry Andric     umbrella = this;
5665ffd83dbSDimitry Andric 
5675ffd83dbSDimitry Andric   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
5685ffd83dbSDimitry Andric   auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart());
5695ffd83dbSDimitry Andric 
5705ffd83dbSDimitry Andric   // Initialize dylibName.
5715ffd83dbSDimitry Andric   if (const load_command *cmd = findCommand(hdr, LC_ID_DYLIB)) {
5725ffd83dbSDimitry Andric     auto *c = reinterpret_cast<const dylib_command *>(cmd);
573*e8d8bef9SDimitry Andric     currentVersion = read32le(&c->dylib.current_version);
574*e8d8bef9SDimitry Andric     compatibilityVersion = read32le(&c->dylib.compatibility_version);
5755ffd83dbSDimitry Andric     dylibName = reinterpret_cast<const char *>(cmd) + read32le(&c->dylib.name);
5765ffd83dbSDimitry Andric   } else {
577*e8d8bef9SDimitry Andric     error("dylib " + toString(this) + " missing LC_ID_DYLIB load command");
5785ffd83dbSDimitry Andric     return;
5795ffd83dbSDimitry Andric   }
5805ffd83dbSDimitry Andric 
5815ffd83dbSDimitry Andric   // Initialize symbols.
582*e8d8bef9SDimitry Andric   DylibFile *exportingFile = isImplicitlyLinked(dylibName) ? this : umbrella;
5835ffd83dbSDimitry Andric   if (const load_command *cmd = findCommand(hdr, LC_DYLD_INFO_ONLY)) {
5845ffd83dbSDimitry Andric     auto *c = reinterpret_cast<const dyld_info_command *>(cmd);
5855ffd83dbSDimitry Andric     parseTrie(buf + c->export_off, c->export_size,
5865ffd83dbSDimitry Andric               [&](const Twine &name, uint64_t flags) {
587*e8d8bef9SDimitry Andric                 bool isWeakDef = flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
588*e8d8bef9SDimitry Andric                 bool isTlv = flags & EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
589*e8d8bef9SDimitry Andric                 symbols.push_back(symtab->addDylib(
590*e8d8bef9SDimitry Andric                     saver.save(name), exportingFile, isWeakDef, isTlv));
5915ffd83dbSDimitry Andric               });
5925ffd83dbSDimitry Andric   } else {
593*e8d8bef9SDimitry Andric     error("LC_DYLD_INFO_ONLY not found in " + toString(this));
5945ffd83dbSDimitry Andric     return;
5955ffd83dbSDimitry Andric   }
5965ffd83dbSDimitry Andric 
5975ffd83dbSDimitry Andric   if (hdr->flags & MH_NO_REEXPORTED_DYLIBS)
5985ffd83dbSDimitry Andric     return;
5995ffd83dbSDimitry Andric 
6005ffd83dbSDimitry Andric   const uint8_t *p =
6015ffd83dbSDimitry Andric       reinterpret_cast<const uint8_t *>(hdr) + sizeof(mach_header_64);
6025ffd83dbSDimitry Andric   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
6035ffd83dbSDimitry Andric     auto *cmd = reinterpret_cast<const load_command *>(p);
6045ffd83dbSDimitry Andric     p += cmd->cmdsize;
6055ffd83dbSDimitry Andric     if (cmd->cmd != LC_REEXPORT_DYLIB)
6065ffd83dbSDimitry Andric       continue;
6075ffd83dbSDimitry Andric 
6085ffd83dbSDimitry Andric     auto *c = reinterpret_cast<const dylib_command *>(cmd);
6095ffd83dbSDimitry Andric     StringRef reexportPath =
6105ffd83dbSDimitry Andric         reinterpret_cast<const char *>(c) + read32le(&c->dylib.name);
611*e8d8bef9SDimitry Andric     loadReexport(reexportPath, umbrella);
6125ffd83dbSDimitry Andric   }
6135ffd83dbSDimitry Andric }
6145ffd83dbSDimitry Andric 
615*e8d8bef9SDimitry Andric DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella)
616*e8d8bef9SDimitry Andric     : InputFile(DylibKind, interface), refState(RefState::Unreferenced) {
6175ffd83dbSDimitry Andric   if (umbrella == nullptr)
6185ffd83dbSDimitry Andric     umbrella = this;
6195ffd83dbSDimitry Andric 
620*e8d8bef9SDimitry Andric   dylibName = saver.save(interface.getInstallName());
621*e8d8bef9SDimitry Andric   compatibilityVersion = interface.getCompatibilityVersion().rawValue();
622*e8d8bef9SDimitry Andric   currentVersion = interface.getCurrentVersion().rawValue();
623*e8d8bef9SDimitry Andric   DylibFile *exportingFile = isImplicitlyLinked(dylibName) ? this : umbrella;
624*e8d8bef9SDimitry Andric   auto addSymbol = [&](const Twine &name) -> void {
625*e8d8bef9SDimitry Andric     symbols.push_back(symtab->addDylib(saver.save(name), exportingFile,
626*e8d8bef9SDimitry Andric                                        /*isWeakDef=*/false,
627*e8d8bef9SDimitry Andric                                        /*isTlv=*/false));
628*e8d8bef9SDimitry Andric   };
6295ffd83dbSDimitry Andric   // TODO(compnerd) filter out symbols based on the target platform
630*e8d8bef9SDimitry Andric   // TODO: handle weak defs, thread locals
631*e8d8bef9SDimitry Andric   for (const auto symbol : interface.symbols()) {
632*e8d8bef9SDimitry Andric     if (!symbol->getArchitectures().has(config->arch))
633*e8d8bef9SDimitry Andric       continue;
634*e8d8bef9SDimitry Andric 
635*e8d8bef9SDimitry Andric     switch (symbol->getKind()) {
636*e8d8bef9SDimitry Andric     case SymbolKind::GlobalSymbol:
637*e8d8bef9SDimitry Andric       addSymbol(symbol->getName());
638*e8d8bef9SDimitry Andric       break;
639*e8d8bef9SDimitry Andric     case SymbolKind::ObjectiveCClass:
640*e8d8bef9SDimitry Andric       // XXX ld64 only creates these symbols when -ObjC is passed in. We may
641*e8d8bef9SDimitry Andric       // want to emulate that.
642*e8d8bef9SDimitry Andric       addSymbol(objc::klass + symbol->getName());
643*e8d8bef9SDimitry Andric       addSymbol(objc::metaclass + symbol->getName());
644*e8d8bef9SDimitry Andric       break;
645*e8d8bef9SDimitry Andric     case SymbolKind::ObjectiveCClassEHType:
646*e8d8bef9SDimitry Andric       addSymbol(objc::ehtype + symbol->getName());
647*e8d8bef9SDimitry Andric       break;
648*e8d8bef9SDimitry Andric     case SymbolKind::ObjectiveCInstanceVariable:
649*e8d8bef9SDimitry Andric       addSymbol(objc::ivar + symbol->getName());
650*e8d8bef9SDimitry Andric       break;
651*e8d8bef9SDimitry Andric     }
6525ffd83dbSDimitry Andric   }
6535ffd83dbSDimitry Andric 
654*e8d8bef9SDimitry Andric   bool isTopLevelTapi = false;
655*e8d8bef9SDimitry Andric   if (currentTopLevelTapi == nullptr) {
656*e8d8bef9SDimitry Andric     currentTopLevelTapi = &interface;
657*e8d8bef9SDimitry Andric     isTopLevelTapi = true;
658*e8d8bef9SDimitry Andric   }
659*e8d8bef9SDimitry Andric 
660*e8d8bef9SDimitry Andric   for (InterfaceFileRef intfRef : interface.reexportedLibraries())
661*e8d8bef9SDimitry Andric     loadReexport(intfRef.getInstallName(), umbrella);
662*e8d8bef9SDimitry Andric 
663*e8d8bef9SDimitry Andric   if (isTopLevelTapi)
664*e8d8bef9SDimitry Andric     currentTopLevelTapi = nullptr;
665*e8d8bef9SDimitry Andric }
666*e8d8bef9SDimitry Andric 
667*e8d8bef9SDimitry Andric ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f)
6685ffd83dbSDimitry Andric     : InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)) {
6695ffd83dbSDimitry Andric   for (const object::Archive::Symbol &sym : file->symbols())
6705ffd83dbSDimitry Andric     symtab->addLazy(sym.getName(), this, sym);
6715ffd83dbSDimitry Andric }
6725ffd83dbSDimitry Andric 
6735ffd83dbSDimitry Andric void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
6745ffd83dbSDimitry Andric   object::Archive::Child c =
6755ffd83dbSDimitry Andric       CHECK(sym.getMember(), toString(this) +
6765ffd83dbSDimitry Andric                                  ": could not get the member for symbol " +
677*e8d8bef9SDimitry Andric                                  toMachOString(sym));
6785ffd83dbSDimitry Andric 
6795ffd83dbSDimitry Andric   if (!seen.insert(c.getChildOffset()).second)
6805ffd83dbSDimitry Andric     return;
6815ffd83dbSDimitry Andric 
6825ffd83dbSDimitry Andric   MemoryBufferRef mb =
6835ffd83dbSDimitry Andric       CHECK(c.getMemoryBufferRef(),
6845ffd83dbSDimitry Andric             toString(this) +
6855ffd83dbSDimitry Andric                 ": could not get the buffer for the member defining symbol " +
686*e8d8bef9SDimitry Andric                 toMachOString(sym));
687*e8d8bef9SDimitry Andric 
688*e8d8bef9SDimitry Andric   if (tar && c.getParent()->isThin())
689*e8d8bef9SDimitry Andric     tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb.getBuffer());
690*e8d8bef9SDimitry Andric 
691*e8d8bef9SDimitry Andric   uint32_t modTime = toTimeT(
692*e8d8bef9SDimitry Andric       CHECK(c.getLastModified(), toString(this) +
693*e8d8bef9SDimitry Andric                                      ": could not get the modification time "
694*e8d8bef9SDimitry Andric                                      "for the member defining symbol " +
695*e8d8bef9SDimitry Andric                                      toMachOString(sym)));
696*e8d8bef9SDimitry Andric 
697*e8d8bef9SDimitry Andric   // `sym` is owned by a LazySym, which will be replace<>() by make<ObjFile>
698*e8d8bef9SDimitry Andric   // and become invalid after that call. Copy it to the stack so we can refer
699*e8d8bef9SDimitry Andric   // to it later.
700*e8d8bef9SDimitry Andric   const object::Archive::Symbol sym_copy = sym;
701*e8d8bef9SDimitry Andric 
702*e8d8bef9SDimitry Andric   InputFile *file;
703*e8d8bef9SDimitry Andric   switch (identify_magic(mb.getBuffer())) {
704*e8d8bef9SDimitry Andric   case file_magic::macho_object:
705*e8d8bef9SDimitry Andric     file = make<ObjFile>(mb, modTime, getName());
706*e8d8bef9SDimitry Andric     break;
707*e8d8bef9SDimitry Andric   case file_magic::bitcode:
708*e8d8bef9SDimitry Andric     file = make<BitcodeFile>(mb);
709*e8d8bef9SDimitry Andric     break;
710*e8d8bef9SDimitry Andric   default:
711*e8d8bef9SDimitry Andric     StringRef bufname =
712*e8d8bef9SDimitry Andric         CHECK(c.getName(), toString(this) + ": could not get buffer name");
713*e8d8bef9SDimitry Andric     error(toString(this) + ": archive member " + bufname +
714*e8d8bef9SDimitry Andric           " has unhandled file type");
715*e8d8bef9SDimitry Andric     return;
716*e8d8bef9SDimitry Andric   }
717*e8d8bef9SDimitry Andric   inputFiles.insert(file);
718*e8d8bef9SDimitry Andric 
719*e8d8bef9SDimitry Andric   // ld64 doesn't demangle sym here even with -demangle. Match that, so
720*e8d8bef9SDimitry Andric   // intentionally no call to toMachOString() here.
721*e8d8bef9SDimitry Andric   printArchiveMemberLoad(sym_copy.getName(), file);
7225ffd83dbSDimitry Andric }
7235ffd83dbSDimitry Andric 
724*e8d8bef9SDimitry Andric BitcodeFile::BitcodeFile(MemoryBufferRef mbref)
725*e8d8bef9SDimitry Andric     : InputFile(BitcodeKind, mbref) {
726*e8d8bef9SDimitry Andric   obj = check(lto::InputFile::create(mbref));
7275ffd83dbSDimitry Andric }
728