xref: /freebsd/contrib/llvm-project/lld/MachO/InputFiles.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
15ffd83dbSDimitry Andric //===- InputFiles.cpp -----------------------------------------------------===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric // This file contains functions to parse Mach-O object files. In this comment,
105ffd83dbSDimitry Andric // we describe the Mach-O file structure and how we parse it.
115ffd83dbSDimitry Andric //
125ffd83dbSDimitry Andric // Mach-O is not very different from ELF or COFF. The notion of symbols,
135ffd83dbSDimitry Andric // sections and relocations exists in Mach-O as it does in ELF and COFF.
145ffd83dbSDimitry Andric //
155ffd83dbSDimitry Andric // Perhaps the notion that is new to those who know ELF/COFF is "subsections".
165ffd83dbSDimitry Andric // In ELF/COFF, sections are an atomic unit of data copied from input files to
175ffd83dbSDimitry Andric // output files. When we merge or garbage-collect sections, we treat each
185ffd83dbSDimitry Andric // section as an atomic unit. In Mach-O, that's not the case. Sections can
195ffd83dbSDimitry Andric // consist of multiple subsections, and subsections are a unit of merging and
205ffd83dbSDimitry Andric // garbage-collecting. Therefore, Mach-O's subsections are more similar to
215ffd83dbSDimitry Andric // ELF/COFF's sections than Mach-O's sections are.
225ffd83dbSDimitry Andric //
235ffd83dbSDimitry Andric // A section can have multiple symbols. A symbol that does not have the
245ffd83dbSDimitry Andric // N_ALT_ENTRY attribute indicates a beginning of a subsection. Therefore, by
255ffd83dbSDimitry Andric // definition, a symbol is always present at the beginning of each subsection. A
265ffd83dbSDimitry Andric // symbol with N_ALT_ENTRY attribute does not start a new subsection and can
275ffd83dbSDimitry Andric // point to a middle of a subsection.
285ffd83dbSDimitry Andric //
295ffd83dbSDimitry Andric // The notion of subsections also affects how relocations are represented in
305ffd83dbSDimitry Andric // Mach-O. All references within a section need to be explicitly represented as
315ffd83dbSDimitry Andric // relocations if they refer to different subsections, because we obviously need
325ffd83dbSDimitry Andric // to fix up addresses if subsections are laid out in an output file differently
335ffd83dbSDimitry Andric // than they were in object files. To represent that, Mach-O relocations can
345ffd83dbSDimitry Andric // refer to an unnamed location via its address. Scattered relocations (those
355ffd83dbSDimitry Andric // with the R_SCATTERED bit set) always refer to unnamed locations.
365ffd83dbSDimitry Andric // Non-scattered relocations refer to an unnamed location if r_extern is not set
375ffd83dbSDimitry Andric // and r_symbolnum is zero.
385ffd83dbSDimitry Andric //
395ffd83dbSDimitry Andric // Without the above differences, I think you can use your knowledge about ELF
405ffd83dbSDimitry Andric // and COFF for Mach-O.
415ffd83dbSDimitry Andric //
425ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
435ffd83dbSDimitry Andric 
445ffd83dbSDimitry Andric #include "InputFiles.h"
455ffd83dbSDimitry Andric #include "Config.h"
46e8d8bef9SDimitry Andric #include "Driver.h"
47e8d8bef9SDimitry Andric #include "Dwarf.h"
4881ad6265SDimitry Andric #include "EhFrame.h"
495ffd83dbSDimitry Andric #include "ExportTrie.h"
505ffd83dbSDimitry Andric #include "InputSection.h"
515ffd83dbSDimitry Andric #include "MachOStructs.h"
52e8d8bef9SDimitry Andric #include "ObjC.h"
535ffd83dbSDimitry Andric #include "OutputSection.h"
54e8d8bef9SDimitry Andric #include "OutputSegment.h"
555ffd83dbSDimitry Andric #include "SymbolTable.h"
565ffd83dbSDimitry Andric #include "Symbols.h"
57fe6060f1SDimitry Andric #include "SyntheticSections.h"
585ffd83dbSDimitry Andric #include "Target.h"
595ffd83dbSDimitry Andric 
6004eeddc0SDimitry Andric #include "lld/Common/CommonLinkerContext.h"
61e8d8bef9SDimitry Andric #include "lld/Common/DWARF.h"
62e8d8bef9SDimitry Andric #include "lld/Common/Reproduce.h"
63e8d8bef9SDimitry Andric #include "llvm/ADT/iterator.h"
645ffd83dbSDimitry Andric #include "llvm/BinaryFormat/MachO.h"
65e8d8bef9SDimitry Andric #include "llvm/LTO/LTO.h"
6604eeddc0SDimitry Andric #include "llvm/Support/BinaryStreamReader.h"
675ffd83dbSDimitry Andric #include "llvm/Support/Endian.h"
6881ad6265SDimitry Andric #include "llvm/Support/LEB128.h"
695ffd83dbSDimitry Andric #include "llvm/Support/MemoryBuffer.h"
705ffd83dbSDimitry Andric #include "llvm/Support/Path.h"
71e8d8bef9SDimitry Andric #include "llvm/Support/TarWriter.h"
7204eeddc0SDimitry Andric #include "llvm/Support/TimeProfiler.h"
73fe6060f1SDimitry Andric #include "llvm/TextAPI/Architecture.h"
74fe6060f1SDimitry Andric #include "llvm/TextAPI/InterfaceFile.h"
755ffd83dbSDimitry Andric 
76bdd1243dSDimitry Andric #include <optional>
77349cc55cSDimitry Andric #include <type_traits>
78349cc55cSDimitry Andric 
795ffd83dbSDimitry Andric using namespace llvm;
805ffd83dbSDimitry Andric using namespace llvm::MachO;
815ffd83dbSDimitry Andric using namespace llvm::support::endian;
825ffd83dbSDimitry Andric using namespace llvm::sys;
835ffd83dbSDimitry Andric using namespace lld;
845ffd83dbSDimitry Andric using namespace lld::macho;
855ffd83dbSDimitry Andric 
86e8d8bef9SDimitry Andric // Returns "<internal>", "foo.a(bar.o)", or "baz.o".
87e8d8bef9SDimitry Andric std::string lld::toString(const InputFile *f) {
88e8d8bef9SDimitry Andric   if (!f)
89e8d8bef9SDimitry Andric     return "<internal>";
90fe6060f1SDimitry Andric 
91fe6060f1SDimitry Andric   // Multiple dylibs can be defined in one .tbd file.
92*06c3fb27SDimitry Andric   if (const auto *dylibFile = dyn_cast<DylibFile>(f))
93*06c3fb27SDimitry Andric     if (f->getName().ends_with(".tbd"))
94fe6060f1SDimitry Andric       return (f->getName() + "(" + dylibFile->installName + ")").str();
95fe6060f1SDimitry Andric 
96e8d8bef9SDimitry Andric   if (f->archiveName.empty())
97e8d8bef9SDimitry Andric     return std::string(f->getName());
98fe6060f1SDimitry Andric   return (f->archiveName + "(" + path::filename(f->getName()) + ")").str();
99e8d8bef9SDimitry Andric }
100e8d8bef9SDimitry Andric 
10181ad6265SDimitry Andric std::string lld::toString(const Section &sec) {
10281ad6265SDimitry Andric   return (toString(sec.file) + ":(" + sec.name + ")").str();
10381ad6265SDimitry Andric }
10481ad6265SDimitry Andric 
105e8d8bef9SDimitry Andric SetVector<InputFile *> macho::inputFiles;
106e8d8bef9SDimitry Andric std::unique_ptr<TarWriter> macho::tar;
107e8d8bef9SDimitry Andric int InputFile::idCount = 0;
1085ffd83dbSDimitry Andric 
109fe6060f1SDimitry Andric static VersionTuple decodeVersion(uint32_t version) {
110fe6060f1SDimitry Andric   unsigned major = version >> 16;
111fe6060f1SDimitry Andric   unsigned minor = (version >> 8) & 0xffu;
112fe6060f1SDimitry Andric   unsigned subMinor = version & 0xffu;
113fe6060f1SDimitry Andric   return VersionTuple(major, minor, subMinor);
114fe6060f1SDimitry Andric }
115fe6060f1SDimitry Andric 
116fe6060f1SDimitry Andric static std::vector<PlatformInfo> getPlatformInfos(const InputFile *input) {
117fe6060f1SDimitry Andric   if (!isa<ObjFile>(input) && !isa<DylibFile>(input))
118fe6060f1SDimitry Andric     return {};
119fe6060f1SDimitry Andric 
120fe6060f1SDimitry Andric   const char *hdr = input->mb.getBufferStart();
121fe6060f1SDimitry Andric 
12281ad6265SDimitry Andric   // "Zippered" object files can have multiple LC_BUILD_VERSION load commands.
123fe6060f1SDimitry Andric   std::vector<PlatformInfo> platformInfos;
124fe6060f1SDimitry Andric   for (auto *cmd : findCommands<build_version_command>(hdr, LC_BUILD_VERSION)) {
125fe6060f1SDimitry Andric     PlatformInfo info;
12604eeddc0SDimitry Andric     info.target.Platform = static_cast<PlatformType>(cmd->platform);
127*06c3fb27SDimitry Andric     info.target.MinDeployment = decodeVersion(cmd->minos);
128fe6060f1SDimitry Andric     platformInfos.emplace_back(std::move(info));
129fe6060f1SDimitry Andric   }
130fe6060f1SDimitry Andric   for (auto *cmd : findCommands<version_min_command>(
131fe6060f1SDimitry Andric            hdr, LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS,
132fe6060f1SDimitry Andric            LC_VERSION_MIN_TVOS, LC_VERSION_MIN_WATCHOS)) {
133fe6060f1SDimitry Andric     PlatformInfo info;
134fe6060f1SDimitry Andric     switch (cmd->cmd) {
135fe6060f1SDimitry Andric     case LC_VERSION_MIN_MACOSX:
13604eeddc0SDimitry Andric       info.target.Platform = PLATFORM_MACOS;
137fe6060f1SDimitry Andric       break;
138fe6060f1SDimitry Andric     case LC_VERSION_MIN_IPHONEOS:
13904eeddc0SDimitry Andric       info.target.Platform = PLATFORM_IOS;
140fe6060f1SDimitry Andric       break;
141fe6060f1SDimitry Andric     case LC_VERSION_MIN_TVOS:
14204eeddc0SDimitry Andric       info.target.Platform = PLATFORM_TVOS;
143fe6060f1SDimitry Andric       break;
144fe6060f1SDimitry Andric     case LC_VERSION_MIN_WATCHOS:
14504eeddc0SDimitry Andric       info.target.Platform = PLATFORM_WATCHOS;
146fe6060f1SDimitry Andric       break;
147fe6060f1SDimitry Andric     }
148*06c3fb27SDimitry Andric     info.target.MinDeployment = decodeVersion(cmd->version);
149fe6060f1SDimitry Andric     platformInfos.emplace_back(std::move(info));
150fe6060f1SDimitry Andric   }
151fe6060f1SDimitry Andric 
152fe6060f1SDimitry Andric   return platformInfos;
153fe6060f1SDimitry Andric }
154fe6060f1SDimitry Andric 
155fe6060f1SDimitry Andric static bool checkCompatibility(const InputFile *input) {
156fe6060f1SDimitry Andric   std::vector<PlatformInfo> platformInfos = getPlatformInfos(input);
157fe6060f1SDimitry Andric   if (platformInfos.empty())
158fe6060f1SDimitry Andric     return true;
159fe6060f1SDimitry Andric 
160fe6060f1SDimitry Andric   auto it = find_if(platformInfos, [&](const PlatformInfo &info) {
161fe6060f1SDimitry Andric     return removeSimulator(info.target.Platform) ==
162fe6060f1SDimitry Andric            removeSimulator(config->platform());
163fe6060f1SDimitry Andric   });
164fe6060f1SDimitry Andric   if (it == platformInfos.end()) {
165fe6060f1SDimitry Andric     std::string platformNames;
166fe6060f1SDimitry Andric     raw_string_ostream os(platformNames);
167fe6060f1SDimitry Andric     interleave(
168fe6060f1SDimitry Andric         platformInfos, os,
169fe6060f1SDimitry Andric         [&](const PlatformInfo &info) {
170fe6060f1SDimitry Andric           os << getPlatformName(info.target.Platform);
171fe6060f1SDimitry Andric         },
172fe6060f1SDimitry Andric         "/");
173fe6060f1SDimitry Andric     error(toString(input) + " has platform " + platformNames +
174fe6060f1SDimitry Andric           Twine(", which is different from target platform ") +
175fe6060f1SDimitry Andric           getPlatformName(config->platform()));
176fe6060f1SDimitry Andric     return false;
177fe6060f1SDimitry Andric   }
178fe6060f1SDimitry Andric 
179*06c3fb27SDimitry Andric   if (it->target.MinDeployment > config->platformInfo.target.MinDeployment)
180*06c3fb27SDimitry Andric     warn(toString(input) + " has version " +
181*06c3fb27SDimitry Andric          it->target.MinDeployment.getAsString() +
182fe6060f1SDimitry Andric          ", which is newer than target minimum of " +
183*06c3fb27SDimitry Andric          config->platformInfo.target.MinDeployment.getAsString());
184fe6060f1SDimitry Andric 
185fe6060f1SDimitry Andric   return true;
186fe6060f1SDimitry Andric }
187fe6060f1SDimitry Andric 
188349cc55cSDimitry Andric // This cache mostly exists to store system libraries (and .tbds) as they're
189349cc55cSDimitry Andric // loaded, rather than the input archives, which are already cached at a higher
190349cc55cSDimitry Andric // level, and other files like the filelist that are only read once.
191349cc55cSDimitry Andric // Theoretically this caching could be more efficient by hoisting it, but that
192349cc55cSDimitry Andric // would require altering many callers to track the state.
193349cc55cSDimitry Andric DenseMap<CachedHashStringRef, MemoryBufferRef> macho::cachedReads;
1945ffd83dbSDimitry Andric // Open a given file path and return it as a memory-mapped file.
195bdd1243dSDimitry Andric std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
196349cc55cSDimitry Andric   CachedHashStringRef key(path);
197349cc55cSDimitry Andric   auto entry = cachedReads.find(key);
198349cc55cSDimitry Andric   if (entry != cachedReads.end())
199349cc55cSDimitry Andric     return entry->second;
200349cc55cSDimitry Andric 
201fe6060f1SDimitry Andric   ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(path);
202fe6060f1SDimitry Andric   if (std::error_code ec = mbOrErr.getError()) {
2035ffd83dbSDimitry Andric     error("cannot open " + path + ": " + ec.message());
204bdd1243dSDimitry Andric     return std::nullopt;
2055ffd83dbSDimitry Andric   }
2065ffd83dbSDimitry Andric 
2075ffd83dbSDimitry Andric   std::unique_ptr<MemoryBuffer> &mb = *mbOrErr;
2085ffd83dbSDimitry Andric   MemoryBufferRef mbref = mb->getMemBufferRef();
2095ffd83dbSDimitry Andric   make<std::unique_ptr<MemoryBuffer>>(std::move(mb)); // take mb ownership
2105ffd83dbSDimitry Andric 
2115ffd83dbSDimitry Andric   // If this is a regular non-fat file, return it.
2125ffd83dbSDimitry Andric   const char *buf = mbref.getBufferStart();
213fe6060f1SDimitry Andric   const auto *hdr = reinterpret_cast<const fat_header *>(buf);
214fe6060f1SDimitry Andric   if (mbref.getBufferSize() < sizeof(uint32_t) ||
215fe6060f1SDimitry Andric       read32be(&hdr->magic) != FAT_MAGIC) {
216e8d8bef9SDimitry Andric     if (tar)
217e8d8bef9SDimitry Andric       tar->append(relativeToRoot(path), mbref.getBuffer());
218349cc55cSDimitry Andric     return cachedReads[key] = mbref;
219e8d8bef9SDimitry Andric   }
2205ffd83dbSDimitry Andric 
22104eeddc0SDimitry Andric   llvm::BumpPtrAllocator &bAlloc = lld::bAlloc();
22204eeddc0SDimitry Andric 
223fe6060f1SDimitry Andric   // Object files and archive files may be fat files, which contain multiple
224fe6060f1SDimitry Andric   // real files for different CPU ISAs. Here, we search for a file that matches
225fe6060f1SDimitry Andric   // with the current link target and returns it as a MemoryBufferRef.
226fe6060f1SDimitry Andric   const auto *arch = reinterpret_cast<const fat_arch *>(buf + sizeof(*hdr));
227bdd1243dSDimitry Andric   auto getArchName = [](uint32_t cpuType, uint32_t cpuSubtype) {
228bdd1243dSDimitry Andric     return getArchitectureName(getArchitectureFromCpuType(cpuType, cpuSubtype));
229bdd1243dSDimitry Andric   };
2305ffd83dbSDimitry Andric 
231bdd1243dSDimitry Andric   std::vector<StringRef> archs;
2325ffd83dbSDimitry Andric   for (uint32_t i = 0, n = read32be(&hdr->nfat_arch); i < n; ++i) {
2335ffd83dbSDimitry Andric     if (reinterpret_cast<const char *>(arch + i + 1) >
2345ffd83dbSDimitry Andric         buf + mbref.getBufferSize()) {
2355ffd83dbSDimitry Andric       error(path + ": fat_arch struct extends beyond end of file");
236bdd1243dSDimitry Andric       return std::nullopt;
2375ffd83dbSDimitry Andric     }
2385ffd83dbSDimitry Andric 
239bdd1243dSDimitry Andric     uint32_t cpuType = read32be(&arch[i].cputype);
240bdd1243dSDimitry Andric     uint32_t cpuSubtype =
241bdd1243dSDimitry Andric         read32be(&arch[i].cpusubtype) & ~MachO::CPU_SUBTYPE_MASK;
242bdd1243dSDimitry Andric 
243bdd1243dSDimitry Andric     // FIXME: LD64 has a more complex fallback logic here.
244bdd1243dSDimitry Andric     // Consider implementing that as well?
245bdd1243dSDimitry Andric     if (cpuType != static_cast<uint32_t>(target->cpuType) ||
246bdd1243dSDimitry Andric         cpuSubtype != target->cpuSubtype) {
247bdd1243dSDimitry Andric       archs.emplace_back(getArchName(cpuType, cpuSubtype));
2485ffd83dbSDimitry Andric       continue;
249bdd1243dSDimitry Andric     }
2505ffd83dbSDimitry Andric 
2515ffd83dbSDimitry Andric     uint32_t offset = read32be(&arch[i].offset);
2525ffd83dbSDimitry Andric     uint32_t size = read32be(&arch[i].size);
2535ffd83dbSDimitry Andric     if (offset + size > mbref.getBufferSize())
2545ffd83dbSDimitry Andric       error(path + ": slice extends beyond end of file");
255e8d8bef9SDimitry Andric     if (tar)
256e8d8bef9SDimitry Andric       tar->append(relativeToRoot(path), mbref.getBuffer());
257349cc55cSDimitry Andric     return cachedReads[key] = MemoryBufferRef(StringRef(buf + offset, size),
258349cc55cSDimitry Andric                                               path.copy(bAlloc));
2595ffd83dbSDimitry Andric   }
2605ffd83dbSDimitry Andric 
261bdd1243dSDimitry Andric   auto targetArchName = getArchName(target->cpuType, target->cpuSubtype);
262bdd1243dSDimitry Andric   warn(path + ": ignoring file because it is universal (" + join(archs, ",") +
263bdd1243dSDimitry Andric        ") but does not contain the " + targetArchName + " architecture");
264bdd1243dSDimitry Andric   return std::nullopt;
2655ffd83dbSDimitry Andric }
2665ffd83dbSDimitry Andric 
267fe6060f1SDimitry Andric InputFile::InputFile(Kind kind, const InterfaceFile &interface)
26804eeddc0SDimitry Andric     : id(idCount++), fileKind(kind), name(saver().save(interface.getPath())) {}
2695ffd83dbSDimitry Andric 
270349cc55cSDimitry Andric // Some sections comprise of fixed-size records, so instead of splitting them at
271349cc55cSDimitry Andric // symbol boundaries, we split them based on size. Records are distinct from
272349cc55cSDimitry Andric // literals in that they may contain references to other sections, instead of
273349cc55cSDimitry Andric // being leaf nodes in the InputSection graph.
274349cc55cSDimitry Andric //
275349cc55cSDimitry Andric // Note that "record" is a term I came up with. In contrast, "literal" is a term
276349cc55cSDimitry Andric // used by the Mach-O format.
277bdd1243dSDimitry Andric static std::optional<size_t> getRecordSize(StringRef segname, StringRef name) {
27881ad6265SDimitry Andric   if (name == section_names::compactUnwind) {
279349cc55cSDimitry Andric     if (segname == segment_names::ld)
280349cc55cSDimitry Andric       return target->wordSize == 8 ? 32 : 20;
281349cc55cSDimitry Andric   }
282bdd1243dSDimitry Andric   if (!config->dedupStrings)
283349cc55cSDimitry Andric     return {};
28481ad6265SDimitry Andric 
28581ad6265SDimitry Andric   if (name == section_names::cfString && segname == segment_names::data)
28681ad6265SDimitry Andric     return target->wordSize == 8 ? 32 : 16;
287fcaf7f86SDimitry Andric 
288fcaf7f86SDimitry Andric   if (config->icfLevel == ICFLevel::none)
289fcaf7f86SDimitry Andric     return {};
290fcaf7f86SDimitry Andric 
29181ad6265SDimitry Andric   if (name == section_names::objcClassRefs && segname == segment_names::data)
29281ad6265SDimitry Andric     return target->wordSize;
293bdd1243dSDimitry Andric 
294bdd1243dSDimitry Andric   if (name == section_names::objcSelrefs && segname == segment_names::data)
295bdd1243dSDimitry Andric     return target->wordSize;
29681ad6265SDimitry Andric   return {};
29781ad6265SDimitry Andric }
29881ad6265SDimitry Andric 
29981ad6265SDimitry Andric static Error parseCallGraph(ArrayRef<uint8_t> data,
30081ad6265SDimitry Andric                             std::vector<CallGraphEntry> &callGraph) {
30181ad6265SDimitry Andric   TimeTraceScope timeScope("Parsing call graph section");
30281ad6265SDimitry Andric   BinaryStreamReader reader(data, support::little);
30381ad6265SDimitry Andric   while (!reader.empty()) {
30481ad6265SDimitry Andric     uint32_t fromIndex, toIndex;
30581ad6265SDimitry Andric     uint64_t count;
30681ad6265SDimitry Andric     if (Error err = reader.readInteger(fromIndex))
30781ad6265SDimitry Andric       return err;
30881ad6265SDimitry Andric     if (Error err = reader.readInteger(toIndex))
30981ad6265SDimitry Andric       return err;
31081ad6265SDimitry Andric     if (Error err = reader.readInteger(count))
31181ad6265SDimitry Andric       return err;
31281ad6265SDimitry Andric     callGraph.emplace_back(fromIndex, toIndex, count);
31381ad6265SDimitry Andric   }
31481ad6265SDimitry Andric   return Error::success();
315349cc55cSDimitry Andric }
316349cc55cSDimitry Andric 
317349cc55cSDimitry Andric // Parse the sequence of sections within a single LC_SEGMENT(_64).
318349cc55cSDimitry Andric // Split each section into subsections.
319349cc55cSDimitry Andric template <class SectionHeader>
320349cc55cSDimitry Andric void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
321349cc55cSDimitry Andric   sections.reserve(sectionHeaders.size());
3225ffd83dbSDimitry Andric   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
3235ffd83dbSDimitry Andric 
324349cc55cSDimitry Andric   for (const SectionHeader &sec : sectionHeaders) {
325fe6060f1SDimitry Andric     StringRef name =
326e8d8bef9SDimitry Andric         StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname)));
327fe6060f1SDimitry Andric     StringRef segname =
328e8d8bef9SDimitry Andric         StringRef(sec.segname, strnlen(sec.segname, sizeof(sec.segname)));
32981ad6265SDimitry Andric     sections.push_back(make<Section>(this, segname, name, sec.flags, sec.addr));
330fe6060f1SDimitry Andric     if (sec.align >= 32) {
331fe6060f1SDimitry Andric       error("alignment " + std::to_string(sec.align) + " of section " + name +
332fe6060f1SDimitry Andric             " is too large");
333fe6060f1SDimitry Andric       continue;
334fe6060f1SDimitry Andric     }
33581ad6265SDimitry Andric     Section &section = *sections.back();
336fe6060f1SDimitry Andric     uint32_t align = 1 << sec.align;
33781ad6265SDimitry Andric     ArrayRef<uint8_t> data = {isZeroFill(sec.flags) ? nullptr
33881ad6265SDimitry Andric                                                     : buf + sec.offset,
33981ad6265SDimitry Andric                               static_cast<size_t>(sec.size)};
340e8d8bef9SDimitry Andric 
341bdd1243dSDimitry Andric     auto splitRecords = [&](size_t recordSize) -> void {
342349cc55cSDimitry Andric       if (data.empty())
343349cc55cSDimitry Andric         return;
34481ad6265SDimitry Andric       Subsections &subsections = section.subsections;
345349cc55cSDimitry Andric       subsections.reserve(data.size() / recordSize);
34681ad6265SDimitry Andric       for (uint64_t off = 0; off < data.size(); off += recordSize) {
347349cc55cSDimitry Andric         auto *isec = make<ConcatInputSection>(
348bdd1243dSDimitry Andric             section, data.slice(off, std::min(data.size(), recordSize)), align);
34981ad6265SDimitry Andric         subsections.push_back({off, isec});
350349cc55cSDimitry Andric       }
35181ad6265SDimitry Andric       section.doneSplitting = true;
352349cc55cSDimitry Andric     };
353349cc55cSDimitry Andric 
354fe6060f1SDimitry Andric     if (sectionType(sec.flags) == S_CSTRING_LITERALS) {
355bdd1243dSDimitry Andric       if (sec.nreloc)
356bdd1243dSDimitry Andric         fatal(toString(this) + ": " + sec.segname + "," + sec.sectname +
357bdd1243dSDimitry Andric               " contains relocations, which is unsupported");
358bdd1243dSDimitry Andric       bool dedupLiterals =
359bdd1243dSDimitry Andric           name == section_names::objcMethname || config->dedupStrings;
360bdd1243dSDimitry Andric       InputSection *isec =
361bdd1243dSDimitry Andric           make<CStringInputSection>(section, data, align, dedupLiterals);
362fe6060f1SDimitry Andric       // FIXME: parallelize this?
363fe6060f1SDimitry Andric       cast<CStringInputSection>(isec)->splitIntoPieces();
364bdd1243dSDimitry Andric       section.subsections.push_back({0, isec});
365bdd1243dSDimitry Andric     } else if (isWordLiteralSection(sec.flags)) {
366bdd1243dSDimitry Andric       if (sec.nreloc)
367bdd1243dSDimitry Andric         fatal(toString(this) + ": " + sec.segname + "," + sec.sectname +
368bdd1243dSDimitry Andric               " contains relocations, which is unsupported");
369bdd1243dSDimitry Andric       InputSection *isec = make<WordLiteralInputSection>(section, data, align);
37081ad6265SDimitry Andric       section.subsections.push_back({0, isec});
371349cc55cSDimitry Andric     } else if (auto recordSize = getRecordSize(segname, name)) {
372349cc55cSDimitry Andric       splitRecords(*recordSize);
373753f127fSDimitry Andric     } else if (name == section_names::ehFrame &&
37481ad6265SDimitry Andric                segname == segment_names::text) {
37581ad6265SDimitry Andric       splitEhFrames(data, *sections.back());
376349cc55cSDimitry Andric     } else if (segname == segment_names::llvm) {
37781ad6265SDimitry Andric       if (config->callGraphProfileSort && name == section_names::cgProfile)
37881ad6265SDimitry Andric         checkError(parseCallGraph(data, callGraph));
379349cc55cSDimitry Andric       // ld64 does not appear to emit contents from sections within the __LLVM
380349cc55cSDimitry Andric       // segment. Symbols within those sections point to bitcode metadata
381349cc55cSDimitry Andric       // instead of actual symbols. Global symbols within those sections could
38281ad6265SDimitry Andric       // have the same name without causing duplicate symbol errors. To avoid
38381ad6265SDimitry Andric       // spurious duplicate symbol errors, we do not parse these sections.
384349cc55cSDimitry Andric       // TODO: Evaluate whether the bitcode metadata is needed.
385fcaf7f86SDimitry Andric     } else if (name == section_names::objCImageInfo &&
386fcaf7f86SDimitry Andric                segname == segment_names::data) {
387fcaf7f86SDimitry Andric       objCImageInfo = data;
388fe6060f1SDimitry Andric     } else {
38981ad6265SDimitry Andric       if (name == section_names::addrSig)
39081ad6265SDimitry Andric         addrSigSection = sections.back();
39181ad6265SDimitry Andric 
39281ad6265SDimitry Andric       auto *isec = make<ConcatInputSection>(section, data, align);
393349cc55cSDimitry Andric       if (isDebugSection(isec->getFlags()) &&
394349cc55cSDimitry Andric           isec->getSegName() == segment_names::dwarf) {
395e8d8bef9SDimitry Andric         // Instead of emitting DWARF sections, we emit STABS symbols to the
396e8d8bef9SDimitry Andric         // object files that contain them. We filter them out early to avoid
39781ad6265SDimitry Andric         // parsing their relocations unnecessarily.
398e8d8bef9SDimitry Andric         debugSections.push_back(isec);
399349cc55cSDimitry Andric       } else {
40081ad6265SDimitry Andric         section.subsections.push_back({0, isec});
401e8d8bef9SDimitry Andric       }
4025ffd83dbSDimitry Andric     }
4035ffd83dbSDimitry Andric   }
404fe6060f1SDimitry Andric }
4055ffd83dbSDimitry Andric 
40681ad6265SDimitry Andric void ObjFile::splitEhFrames(ArrayRef<uint8_t> data, Section &ehFrameSection) {
40761cfbce3SDimitry Andric   EhReader reader(this, data, /*dataOff=*/0);
40881ad6265SDimitry Andric   size_t off = 0;
40981ad6265SDimitry Andric   while (off < reader.size()) {
41081ad6265SDimitry Andric     uint64_t frameOff = off;
41181ad6265SDimitry Andric     uint64_t length = reader.readLength(&off);
41281ad6265SDimitry Andric     if (length == 0)
41381ad6265SDimitry Andric       break;
41481ad6265SDimitry Andric     uint64_t fullLength = length + (off - frameOff);
41581ad6265SDimitry Andric     off += length;
41681ad6265SDimitry Andric     // We hard-code an alignment of 1 here because we don't actually want our
41781ad6265SDimitry Andric     // EH frames to be aligned to the section alignment. EH frame decoders don't
41881ad6265SDimitry Andric     // expect this alignment. Moreover, each EH frame must start where the
41981ad6265SDimitry Andric     // previous one ends, and where it ends is indicated by the length field.
42081ad6265SDimitry Andric     // Unless we update the length field (troublesome), we should keep the
42181ad6265SDimitry Andric     // alignment to 1.
42281ad6265SDimitry Andric     // Note that we still want to preserve the alignment of the overall section,
42381ad6265SDimitry Andric     // just not of the individual EH frames.
42481ad6265SDimitry Andric     ehFrameSection.subsections.push_back(
42581ad6265SDimitry Andric         {frameOff, make<ConcatInputSection>(ehFrameSection,
42681ad6265SDimitry Andric                                             data.slice(frameOff, fullLength),
42781ad6265SDimitry Andric                                             /*align=*/1)});
42881ad6265SDimitry Andric   }
42981ad6265SDimitry Andric   ehFrameSection.doneSplitting = true;
43081ad6265SDimitry Andric }
43181ad6265SDimitry Andric 
43281ad6265SDimitry Andric template <class T>
43381ad6265SDimitry Andric static Section *findContainingSection(const std::vector<Section *> &sections,
43481ad6265SDimitry Andric                                       T *offset) {
43581ad6265SDimitry Andric   static_assert(std::is_same<uint64_t, T>::value ||
43681ad6265SDimitry Andric                     std::is_same<uint32_t, T>::value,
43781ad6265SDimitry Andric                 "unexpected type for offset");
43881ad6265SDimitry Andric   auto it = std::prev(llvm::upper_bound(
43981ad6265SDimitry Andric       sections, *offset,
44081ad6265SDimitry Andric       [](uint64_t value, const Section *sec) { return value < sec->addr; }));
44181ad6265SDimitry Andric   *offset -= (*it)->addr;
44281ad6265SDimitry Andric   return *it;
44381ad6265SDimitry Andric }
44481ad6265SDimitry Andric 
4455ffd83dbSDimitry Andric // Find the subsection corresponding to the greatest section offset that is <=
4465ffd83dbSDimitry Andric // that of the given offset.
4475ffd83dbSDimitry Andric //
4485ffd83dbSDimitry Andric // offset: an offset relative to the start of the original InputSection (before
4495ffd83dbSDimitry Andric // any subsection splitting has occurred). It will be updated to represent the
4505ffd83dbSDimitry Andric // same location as an offset relative to the start of the containing
4515ffd83dbSDimitry Andric // subsection.
452349cc55cSDimitry Andric template <class T>
45381ad6265SDimitry Andric static InputSection *findContainingSubsection(const Section &section,
454349cc55cSDimitry Andric                                               T *offset) {
455349cc55cSDimitry Andric   static_assert(std::is_same<uint64_t, T>::value ||
456349cc55cSDimitry Andric                     std::is_same<uint32_t, T>::value,
457349cc55cSDimitry Andric                 "unexpected type for offset");
458fe6060f1SDimitry Andric   auto it = std::prev(llvm::upper_bound(
45981ad6265SDimitry Andric       section.subsections, *offset,
460349cc55cSDimitry Andric       [](uint64_t value, Subsection subsec) { return value < subsec.offset; }));
461fe6060f1SDimitry Andric   *offset -= it->offset;
462fe6060f1SDimitry Andric   return it->isec;
4635ffd83dbSDimitry Andric }
4645ffd83dbSDimitry Andric 
46581ad6265SDimitry Andric // Find a symbol at offset `off` within `isec`.
46681ad6265SDimitry Andric static Defined *findSymbolAtOffset(const ConcatInputSection *isec,
46781ad6265SDimitry Andric                                    uint64_t off) {
46881ad6265SDimitry Andric   auto it = llvm::lower_bound(isec->symbols, off, [](Defined *d, uint64_t off) {
46981ad6265SDimitry Andric     return d->value < off;
47081ad6265SDimitry Andric   });
47181ad6265SDimitry Andric   // The offset should point at the exact address of a symbol (with no addend.)
47281ad6265SDimitry Andric   if (it == isec->symbols.end() || (*it)->value != off) {
47381ad6265SDimitry Andric     assert(isec->wasCoalesced);
47481ad6265SDimitry Andric     return nullptr;
47581ad6265SDimitry Andric   }
47681ad6265SDimitry Andric   return *it;
47781ad6265SDimitry Andric }
47881ad6265SDimitry Andric 
479349cc55cSDimitry Andric template <class SectionHeader>
480349cc55cSDimitry Andric static bool validateRelocationInfo(InputFile *file, const SectionHeader &sec,
481fe6060f1SDimitry Andric                                    relocation_info rel) {
482fe6060f1SDimitry Andric   const RelocAttrs &relocAttrs = target->getRelocAttrs(rel.r_type);
483fe6060f1SDimitry Andric   bool valid = true;
484fe6060f1SDimitry Andric   auto message = [relocAttrs, file, sec, rel, &valid](const Twine &diagnostic) {
485fe6060f1SDimitry Andric     valid = false;
486fe6060f1SDimitry Andric     return (relocAttrs.name + " relocation " + diagnostic + " at offset " +
487fe6060f1SDimitry Andric             std::to_string(rel.r_address) + " of " + sec.segname + "," +
488fe6060f1SDimitry Andric             sec.sectname + " in " + toString(file))
489fe6060f1SDimitry Andric         .str();
490fe6060f1SDimitry Andric   };
491fe6060f1SDimitry Andric 
492fe6060f1SDimitry Andric   if (!relocAttrs.hasAttr(RelocAttrBits::LOCAL) && !rel.r_extern)
493fe6060f1SDimitry Andric     error(message("must be extern"));
494fe6060f1SDimitry Andric   if (relocAttrs.hasAttr(RelocAttrBits::PCREL) != rel.r_pcrel)
495fe6060f1SDimitry Andric     error(message(Twine("must ") + (rel.r_pcrel ? "not " : "") +
496fe6060f1SDimitry Andric                   "be PC-relative"));
497fe6060f1SDimitry Andric   if (isThreadLocalVariables(sec.flags) &&
498fe6060f1SDimitry Andric       !relocAttrs.hasAttr(RelocAttrBits::UNSIGNED))
499fe6060f1SDimitry Andric     error(message("not allowed in thread-local section, must be UNSIGNED"));
500fe6060f1SDimitry Andric   if (rel.r_length < 2 || rel.r_length > 3 ||
501fe6060f1SDimitry Andric       !relocAttrs.hasAttr(static_cast<RelocAttrBits>(1 << rel.r_length))) {
502fe6060f1SDimitry Andric     static SmallVector<StringRef, 4> widths{"0", "4", "8", "4 or 8"};
503fe6060f1SDimitry Andric     error(message("has width " + std::to_string(1 << rel.r_length) +
504fe6060f1SDimitry Andric                   " bytes, but must be " +
505fe6060f1SDimitry Andric                   widths[(static_cast<int>(relocAttrs.bits) >> 2) & 3] +
506fe6060f1SDimitry Andric                   " bytes"));
507fe6060f1SDimitry Andric   }
508fe6060f1SDimitry Andric   return valid;
509fe6060f1SDimitry Andric }
510fe6060f1SDimitry Andric 
511349cc55cSDimitry Andric template <class SectionHeader>
512349cc55cSDimitry Andric void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
51381ad6265SDimitry Andric                                const SectionHeader &sec, Section &section) {
5145ffd83dbSDimitry Andric   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
515e8d8bef9SDimitry Andric   ArrayRef<relocation_info> relInfos(
516e8d8bef9SDimitry Andric       reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc);
5175ffd83dbSDimitry Andric 
51881ad6265SDimitry Andric   Subsections &subsections = section.subsections;
519349cc55cSDimitry Andric   auto subsecIt = subsections.rbegin();
520e8d8bef9SDimitry Andric   for (size_t i = 0; i < relInfos.size(); i++) {
521e8d8bef9SDimitry Andric     // Paired relocations serve as Mach-O's method for attaching a
522e8d8bef9SDimitry Andric     // supplemental datum to a primary relocation record. ELF does not
523e8d8bef9SDimitry Andric     // need them because the *_RELOC_RELA records contain the extra
524e8d8bef9SDimitry Andric     // addend field, vs. *_RELOC_REL which omit the addend.
525e8d8bef9SDimitry Andric     //
526e8d8bef9SDimitry Andric     // The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend,
527e8d8bef9SDimitry Andric     // and the paired *_RELOC_UNSIGNED record holds the minuend. The
528fe6060f1SDimitry Andric     // datum for each is a symbolic address. The result is the offset
529fe6060f1SDimitry Andric     // between two addresses.
530e8d8bef9SDimitry Andric     //
531e8d8bef9SDimitry Andric     // The ARM64_RELOC_ADDEND record holds the addend, and the paired
532e8d8bef9SDimitry Andric     // ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the
533e8d8bef9SDimitry Andric     // base symbolic address.
534e8d8bef9SDimitry Andric     //
535bdd1243dSDimitry Andric     // Note: X86 does not use *_RELOC_ADDEND because it can embed an addend into
536bdd1243dSDimitry Andric     // the instruction stream. On X86, a relocatable address field always
537bdd1243dSDimitry Andric     // occupies an entire contiguous sequence of byte(s), so there is no need to
538bdd1243dSDimitry Andric     // merge opcode bits with address bits. Therefore, it's easy and convenient
539bdd1243dSDimitry Andric     // to store addends in the instruction-stream bytes that would otherwise
540bdd1243dSDimitry Andric     // contain zeroes. By contrast, RISC ISAs such as ARM64 mix opcode bits with
541bdd1243dSDimitry Andric     // address bits so that bitwise arithmetic is necessary to extract and
542bdd1243dSDimitry Andric     // insert them. Storing addends in the instruction stream is possible, but
543bdd1243dSDimitry Andric     // inconvenient and more costly at link time.
544e8d8bef9SDimitry Andric 
545fe6060f1SDimitry Andric     relocation_info relInfo = relInfos[i];
546349cc55cSDimitry Andric     bool isSubtrahend =
547349cc55cSDimitry Andric         target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND);
548349cc55cSDimitry Andric     int64_t pairedAddend = 0;
549fe6060f1SDimitry Andric     if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
550fe6060f1SDimitry Andric       pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
551fe6060f1SDimitry Andric       relInfo = relInfos[++i];
552fe6060f1SDimitry Andric     }
553e8d8bef9SDimitry Andric     assert(i < relInfos.size());
554fe6060f1SDimitry Andric     if (!validateRelocationInfo(this, sec, relInfo))
555fe6060f1SDimitry Andric       continue;
556e8d8bef9SDimitry Andric     if (relInfo.r_address & R_SCATTERED)
5575ffd83dbSDimitry Andric       fatal("TODO: Scattered relocations not supported");
5585ffd83dbSDimitry Andric 
559fe6060f1SDimitry Andric     int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec.offset, relInfo);
560fe6060f1SDimitry Andric     assert(!(embeddedAddend && pairedAddend));
561fe6060f1SDimitry Andric     int64_t totalAddend = pairedAddend + embeddedAddend;
5625ffd83dbSDimitry Andric     Reloc r;
563e8d8bef9SDimitry Andric     r.type = relInfo.r_type;
564e8d8bef9SDimitry Andric     r.pcrel = relInfo.r_pcrel;
565e8d8bef9SDimitry Andric     r.length = relInfo.r_length;
566e8d8bef9SDimitry Andric     r.offset = relInfo.r_address;
567e8d8bef9SDimitry Andric     if (relInfo.r_extern) {
568e8d8bef9SDimitry Andric       r.referent = symbols[relInfo.r_symbolnum];
569fe6060f1SDimitry Andric       r.addend = isSubtrahend ? 0 : totalAddend;
5705ffd83dbSDimitry Andric     } else {
571fe6060f1SDimitry Andric       assert(!isSubtrahend);
572349cc55cSDimitry Andric       const SectionHeader &referentSecHead =
573349cc55cSDimitry Andric           sectionHeaders[relInfo.r_symbolnum - 1];
574fe6060f1SDimitry Andric       uint64_t referentOffset;
575e8d8bef9SDimitry Andric       if (relInfo.r_pcrel) {
5765ffd83dbSDimitry Andric         // The implicit addend for pcrel section relocations is the pcrel offset
5775ffd83dbSDimitry Andric         // in terms of the addresses in the input file. Here we adjust it so
578e8d8bef9SDimitry Andric         // that it describes the offset from the start of the referent section.
579fe6060f1SDimitry Andric         // FIXME This logic was written around x86_64 behavior -- ARM64 doesn't
580fe6060f1SDimitry Andric         // have pcrel section relocations. We may want to factor this out into
581fe6060f1SDimitry Andric         // the arch-specific .cpp file.
582fe6060f1SDimitry Andric         assert(target->hasAttr(r.type, RelocAttrBits::BYTE4));
583349cc55cSDimitry Andric         referentOffset = sec.addr + relInfo.r_address + 4 + totalAddend -
584349cc55cSDimitry Andric                          referentSecHead.addr;
5855ffd83dbSDimitry Andric       } else {
5865ffd83dbSDimitry Andric         // The addend for a non-pcrel relocation is its absolute address.
587349cc55cSDimitry Andric         referentOffset = totalAddend - referentSecHead.addr;
5885ffd83dbSDimitry Andric       }
58981ad6265SDimitry Andric       r.referent = findContainingSubsection(*sections[relInfo.r_symbolnum - 1],
59081ad6265SDimitry Andric                                             &referentOffset);
591e8d8bef9SDimitry Andric       r.addend = referentOffset;
5925ffd83dbSDimitry Andric     }
5935ffd83dbSDimitry Andric 
594fe6060f1SDimitry Andric     // Find the subsection that this relocation belongs to.
595fe6060f1SDimitry Andric     // Though not required by the Mach-O format, clang and gcc seem to emit
596fe6060f1SDimitry Andric     // relocations in order, so let's take advantage of it. However, ld64 emits
597fe6060f1SDimitry Andric     // unsorted relocations (in `-r` mode), so we have a fallback for that
598fe6060f1SDimitry Andric     // uncommon case.
599fe6060f1SDimitry Andric     InputSection *subsec;
600349cc55cSDimitry Andric     while (subsecIt != subsections.rend() && subsecIt->offset > r.offset)
601fe6060f1SDimitry Andric       ++subsecIt;
602349cc55cSDimitry Andric     if (subsecIt == subsections.rend() ||
603fe6060f1SDimitry Andric         subsecIt->offset + subsecIt->isec->getSize() <= r.offset) {
60481ad6265SDimitry Andric       subsec = findContainingSubsection(section, &r.offset);
605fe6060f1SDimitry Andric       // Now that we know the relocs are unsorted, avoid trying the 'fast path'
606fe6060f1SDimitry Andric       // for the other relocations.
607349cc55cSDimitry Andric       subsecIt = subsections.rend();
608fe6060f1SDimitry Andric     } else {
609fe6060f1SDimitry Andric       subsec = subsecIt->isec;
610fe6060f1SDimitry Andric       r.offset -= subsecIt->offset;
611fe6060f1SDimitry Andric     }
6125ffd83dbSDimitry Andric     subsec->relocs.push_back(r);
613fe6060f1SDimitry Andric 
614fe6060f1SDimitry Andric     if (isSubtrahend) {
615fe6060f1SDimitry Andric       relocation_info minuendInfo = relInfos[++i];
616fe6060f1SDimitry Andric       // SUBTRACTOR relocations should always be followed by an UNSIGNED one
617fe6060f1SDimitry Andric       // attached to the same address.
618fe6060f1SDimitry Andric       assert(target->hasAttr(minuendInfo.r_type, RelocAttrBits::UNSIGNED) &&
619fe6060f1SDimitry Andric              relInfo.r_address == minuendInfo.r_address);
620fe6060f1SDimitry Andric       Reloc p;
621fe6060f1SDimitry Andric       p.type = minuendInfo.r_type;
622fe6060f1SDimitry Andric       if (minuendInfo.r_extern) {
623fe6060f1SDimitry Andric         p.referent = symbols[minuendInfo.r_symbolnum];
624fe6060f1SDimitry Andric         p.addend = totalAddend;
625fe6060f1SDimitry Andric       } else {
626fe6060f1SDimitry Andric         uint64_t referentOffset =
627fe6060f1SDimitry Andric             totalAddend - sectionHeaders[minuendInfo.r_symbolnum - 1].addr;
62881ad6265SDimitry Andric         p.referent = findContainingSubsection(
62981ad6265SDimitry Andric             *sections[minuendInfo.r_symbolnum - 1], &referentOffset);
630fe6060f1SDimitry Andric         p.addend = referentOffset;
631fe6060f1SDimitry Andric       }
632fe6060f1SDimitry Andric       subsec->relocs.push_back(p);
633fe6060f1SDimitry Andric     }
6345ffd83dbSDimitry Andric   }
6355ffd83dbSDimitry Andric }
6365ffd83dbSDimitry Andric 
637fe6060f1SDimitry Andric template <class NList>
638fe6060f1SDimitry Andric static macho::Symbol *createDefined(const NList &sym, StringRef name,
639fe6060f1SDimitry Andric                                     InputSection *isec, uint64_t value,
640972a253aSDimitry Andric                                     uint64_t size, bool forceHidden) {
641e8d8bef9SDimitry Andric   // Symbol scope is determined by sym.n_type & (N_EXT | N_PEXT):
642fe6060f1SDimitry Andric   // N_EXT: Global symbols. These go in the symbol table during the link,
643fe6060f1SDimitry Andric   //        and also in the export table of the output so that the dynamic
644fe6060f1SDimitry Andric   //        linker sees them.
645fe6060f1SDimitry Andric   // N_EXT | N_PEXT: Linkage unit (think: dylib) scoped. These go in the
646fe6060f1SDimitry Andric   //                 symbol table during the link so that duplicates are
647fe6060f1SDimitry Andric   //                 either reported (for non-weak symbols) or merged
648fe6060f1SDimitry Andric   //                 (for weak symbols), but they do not go in the export
649fe6060f1SDimitry Andric   //                 table of the output.
650fe6060f1SDimitry Andric   // N_PEXT: llvm-mc does not emit these, but `ld -r` (wherein ld64 emits
651fe6060f1SDimitry Andric   //         object files) may produce them. LLD does not yet support -r.
652fe6060f1SDimitry Andric   //         These are translation-unit scoped, identical to the `0` case.
653fe6060f1SDimitry Andric   // 0: Translation-unit scoped. These are not in the symbol table during
654fe6060f1SDimitry Andric   //    link, and not in the export table of the output either.
655fe6060f1SDimitry Andric   bool isWeakDefCanBeHidden =
656fe6060f1SDimitry Andric       (sym.n_desc & (N_WEAK_DEF | N_WEAK_REF)) == (N_WEAK_DEF | N_WEAK_REF);
657e8d8bef9SDimitry Andric 
658*06c3fb27SDimitry Andric   assert(!(sym.n_desc & N_ARM_THUMB_DEF) && "ARM32 arch is not supported");
659*06c3fb27SDimitry Andric 
660fe6060f1SDimitry Andric   if (sym.n_type & N_EXT) {
661972a253aSDimitry Andric     // -load_hidden makes us treat global symbols as linkage unit scoped.
662972a253aSDimitry Andric     // Duplicates are reported but the symbol does not go in the export trie.
663972a253aSDimitry Andric     bool isPrivateExtern = sym.n_type & N_PEXT || forceHidden;
664972a253aSDimitry Andric 
665fe6060f1SDimitry Andric     // lld's behavior for merging symbols is slightly different from ld64:
666fe6060f1SDimitry Andric     // ld64 picks the winning symbol based on several criteria (see
667fe6060f1SDimitry Andric     // pickBetweenRegularAtoms() in ld64's SymbolTable.cpp), while lld
668fe6060f1SDimitry Andric     // just merges metadata and keeps the contents of the first symbol
669fe6060f1SDimitry Andric     // with that name (see SymbolTable::addDefined). For:
670fe6060f1SDimitry Andric     // * inline function F in a TU built with -fvisibility-inlines-hidden
671fe6060f1SDimitry Andric     // * and inline function F in another TU built without that flag
672fe6060f1SDimitry Andric     // ld64 will pick the one from the file built without
673fe6060f1SDimitry Andric     // -fvisibility-inlines-hidden.
674fe6060f1SDimitry Andric     // lld will instead pick the one listed first on the link command line and
675fe6060f1SDimitry Andric     // give it visibility as if the function was built without
676fe6060f1SDimitry Andric     // -fvisibility-inlines-hidden.
677fe6060f1SDimitry Andric     // If both functions have the same contents, this will have the same
678fe6060f1SDimitry Andric     // behavior. If not, it won't, but the input had an ODR violation in
679fe6060f1SDimitry Andric     // that case.
680fe6060f1SDimitry Andric     //
681fe6060f1SDimitry Andric     // Similarly, merging a symbol
682fe6060f1SDimitry Andric     // that's isPrivateExtern and not isWeakDefCanBeHidden with one
683fe6060f1SDimitry Andric     // that's not isPrivateExtern but isWeakDefCanBeHidden technically
684fe6060f1SDimitry Andric     // should produce one
685fe6060f1SDimitry Andric     // that's not isPrivateExtern but isWeakDefCanBeHidden. That matters
686fe6060f1SDimitry Andric     // with ld64's semantics, because it means the non-private-extern
687fe6060f1SDimitry Andric     // definition will continue to take priority if more private extern
688fe6060f1SDimitry Andric     // definitions are encountered. With lld's semantics there's no observable
689349cc55cSDimitry Andric     // difference between a symbol that's isWeakDefCanBeHidden(autohide) or one
690349cc55cSDimitry Andric     // that's privateExtern -- neither makes it into the dynamic symbol table,
691349cc55cSDimitry Andric     // unless the autohide symbol is explicitly exported.
692349cc55cSDimitry Andric     // But if a symbol is both privateExtern and autohide then it can't
693349cc55cSDimitry Andric     // be exported.
694349cc55cSDimitry Andric     // So we nullify the autohide flag when privateExtern is present
695349cc55cSDimitry Andric     // and promote the symbol to privateExtern when it is not already.
696349cc55cSDimitry Andric     if (isWeakDefCanBeHidden && isPrivateExtern)
697349cc55cSDimitry Andric       isWeakDefCanBeHidden = false;
698349cc55cSDimitry Andric     else if (isWeakDefCanBeHidden)
699fe6060f1SDimitry Andric       isPrivateExtern = true;
700fe6060f1SDimitry Andric     return symtab->addDefined(
701fe6060f1SDimitry Andric         name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
702*06c3fb27SDimitry Andric         isPrivateExtern, sym.n_desc & REFERENCED_DYNAMICALLY,
703*06c3fb27SDimitry Andric         sym.n_desc & N_NO_DEAD_STRIP, isWeakDefCanBeHidden);
704e8d8bef9SDimitry Andric   }
705bdd1243dSDimitry Andric   bool includeInSymtab = !isPrivateLabel(name) && !isEhFrameSection(isec);
706fe6060f1SDimitry Andric   return make<Defined>(
707fe6060f1SDimitry Andric       name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
70881ad6265SDimitry Andric       /*isExternal=*/false, /*isPrivateExtern=*/false, includeInSymtab,
709*06c3fb27SDimitry Andric       sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP);
710e8d8bef9SDimitry Andric }
711e8d8bef9SDimitry Andric 
712e8d8bef9SDimitry Andric // Absolute symbols are defined symbols that do not have an associated
713e8d8bef9SDimitry Andric // InputSection. They cannot be weak.
714fe6060f1SDimitry Andric template <class NList>
715fe6060f1SDimitry Andric static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
716972a253aSDimitry Andric                                      StringRef name, bool forceHidden) {
717*06c3fb27SDimitry Andric   assert(!(sym.n_desc & N_ARM_THUMB_DEF) && "ARM32 arch is not supported");
718*06c3fb27SDimitry Andric 
719fe6060f1SDimitry Andric   if (sym.n_type & N_EXT) {
720972a253aSDimitry Andric     bool isPrivateExtern = sym.n_type & N_PEXT || forceHidden;
721*06c3fb27SDimitry Andric     return symtab->addDefined(name, file, nullptr, sym.n_value, /*size=*/0,
722*06c3fb27SDimitry Andric                               /*isWeakDef=*/false, isPrivateExtern,
723*06c3fb27SDimitry Andric                               /*isReferencedDynamically=*/false,
724*06c3fb27SDimitry Andric                               sym.n_desc & N_NO_DEAD_STRIP,
725349cc55cSDimitry Andric                               /*isWeakDefCanBeHidden=*/false);
726e8d8bef9SDimitry Andric   }
727fe6060f1SDimitry Andric   return make<Defined>(name, file, nullptr, sym.n_value, /*size=*/0,
728fe6060f1SDimitry Andric                        /*isWeakDef=*/false,
729fe6060f1SDimitry Andric                        /*isExternal=*/false, /*isPrivateExtern=*/false,
730*06c3fb27SDimitry Andric                        /*includeInSymtab=*/true,
731fe6060f1SDimitry Andric                        /*isReferencedDynamically=*/false,
732fe6060f1SDimitry Andric                        sym.n_desc & N_NO_DEAD_STRIP);
733e8d8bef9SDimitry Andric }
734e8d8bef9SDimitry Andric 
735fe6060f1SDimitry Andric template <class NList>
736fe6060f1SDimitry Andric macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym,
737bdd1243dSDimitry Andric                                               const char *strtab) {
738bdd1243dSDimitry Andric   StringRef name = StringRef(strtab + sym.n_strx);
739e8d8bef9SDimitry Andric   uint8_t type = sym.n_type & N_TYPE;
740972a253aSDimitry Andric   bool isPrivateExtern = sym.n_type & N_PEXT || forceHidden;
741e8d8bef9SDimitry Andric   switch (type) {
742e8d8bef9SDimitry Andric   case N_UNDF:
743e8d8bef9SDimitry Andric     return sym.n_value == 0
744fe6060f1SDimitry Andric                ? symtab->addUndefined(name, this, sym.n_desc & N_WEAK_REF)
745e8d8bef9SDimitry Andric                : symtab->addCommon(name, this, sym.n_value,
746e8d8bef9SDimitry Andric                                    1 << GET_COMM_ALIGN(sym.n_desc),
747972a253aSDimitry Andric                                    isPrivateExtern);
748e8d8bef9SDimitry Andric   case N_ABS:
749972a253aSDimitry Andric     return createAbsolute(sym, this, name, forceHidden);
750bdd1243dSDimitry Andric   case N_INDR: {
751bdd1243dSDimitry Andric     // Not much point in making local aliases -- relocs in the current file can
752bdd1243dSDimitry Andric     // just refer to the actual symbol itself. ld64 ignores these symbols too.
753bdd1243dSDimitry Andric     if (!(sym.n_type & N_EXT))
754bdd1243dSDimitry Andric       return nullptr;
755bdd1243dSDimitry Andric     StringRef aliasedName = StringRef(strtab + sym.n_value);
756bdd1243dSDimitry Andric     // isPrivateExtern is the only symbol flag that has an impact on the final
757bdd1243dSDimitry Andric     // aliased symbol.
758*06c3fb27SDimitry Andric     auto *alias = make<AliasSymbol>(this, name, aliasedName, isPrivateExtern);
759bdd1243dSDimitry Andric     aliases.push_back(alias);
760bdd1243dSDimitry Andric     return alias;
761bdd1243dSDimitry Andric   }
762e8d8bef9SDimitry Andric   case N_PBUD:
763bdd1243dSDimitry Andric     error("TODO: support symbols of type N_PBUD");
764e8d8bef9SDimitry Andric     return nullptr;
765e8d8bef9SDimitry Andric   case N_SECT:
766e8d8bef9SDimitry Andric     llvm_unreachable(
767e8d8bef9SDimitry Andric         "N_SECT symbols should not be passed to parseNonSectionSymbol");
768e8d8bef9SDimitry Andric   default:
769e8d8bef9SDimitry Andric     llvm_unreachable("invalid symbol type");
770e8d8bef9SDimitry Andric   }
771e8d8bef9SDimitry Andric }
772e8d8bef9SDimitry Andric 
773349cc55cSDimitry Andric template <class NList> static bool isUndef(const NList &sym) {
774fe6060f1SDimitry Andric   return (sym.n_type & N_TYPE) == N_UNDF && sym.n_value == 0;
775fe6060f1SDimitry Andric }
776fe6060f1SDimitry Andric 
777fe6060f1SDimitry Andric template <class LP>
778fe6060f1SDimitry Andric void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
779fe6060f1SDimitry Andric                            ArrayRef<typename LP::nlist> nList,
7805ffd83dbSDimitry Andric                            const char *strtab, bool subsectionsViaSymbols) {
781fe6060f1SDimitry Andric   using NList = typename LP::nlist;
782fe6060f1SDimitry Andric 
783fe6060f1SDimitry Andric   // Groups indices of the symbols by the sections that contain them.
784349cc55cSDimitry Andric   std::vector<std::vector<uint32_t>> symbolsBySection(sections.size());
7855ffd83dbSDimitry Andric   symbols.resize(nList.size());
786fe6060f1SDimitry Andric   SmallVector<unsigned, 32> undefineds;
787fe6060f1SDimitry Andric   for (uint32_t i = 0; i < nList.size(); ++i) {
788fe6060f1SDimitry Andric     const NList &sym = nList[i];
7895ffd83dbSDimitry Andric 
790fe6060f1SDimitry Andric     // Ignore debug symbols for now.
791fe6060f1SDimitry Andric     // FIXME: may need special handling.
792fe6060f1SDimitry Andric     if (sym.n_type & N_STAB)
793fe6060f1SDimitry Andric       continue;
794fe6060f1SDimitry Andric 
795fe6060f1SDimitry Andric     if ((sym.n_type & N_TYPE) == N_SECT) {
79681ad6265SDimitry Andric       Subsections &subsections = sections[sym.n_sect - 1]->subsections;
797fe6060f1SDimitry Andric       // parseSections() may have chosen not to parse this section.
798349cc55cSDimitry Andric       if (subsections.empty())
799fe6060f1SDimitry Andric         continue;
800fe6060f1SDimitry Andric       symbolsBySection[sym.n_sect - 1].push_back(i);
801fe6060f1SDimitry Andric     } else if (isUndef(sym)) {
802fe6060f1SDimitry Andric       undefineds.push_back(i);
803fe6060f1SDimitry Andric     } else {
804bdd1243dSDimitry Andric       symbols[i] = parseNonSectionSymbol(sym, strtab);
805fe6060f1SDimitry Andric     }
806fe6060f1SDimitry Andric   }
8075ffd83dbSDimitry Andric 
808349cc55cSDimitry Andric   for (size_t i = 0; i < sections.size(); ++i) {
80981ad6265SDimitry Andric     Subsections &subsections = sections[i]->subsections;
810349cc55cSDimitry Andric     if (subsections.empty())
811fe6060f1SDimitry Andric       continue;
812fe6060f1SDimitry Andric     std::vector<uint32_t> &symbolIndices = symbolsBySection[i];
813fe6060f1SDimitry Andric     uint64_t sectionAddr = sectionHeaders[i].addr;
814fe6060f1SDimitry Andric     uint32_t sectionAlign = 1u << sectionHeaders[i].align;
815fe6060f1SDimitry Andric 
81681ad6265SDimitry Andric     // Some sections have already been split into subsections during
817fe6060f1SDimitry Andric     // parseSections(), so we simply need to match Symbols to the corresponding
818fe6060f1SDimitry Andric     // subsection here.
81981ad6265SDimitry Andric     if (sections[i]->doneSplitting) {
820fe6060f1SDimitry Andric       for (size_t j = 0; j < symbolIndices.size(); ++j) {
821bdd1243dSDimitry Andric         const uint32_t symIndex = symbolIndices[j];
822fe6060f1SDimitry Andric         const NList &sym = nList[symIndex];
823fe6060f1SDimitry Andric         StringRef name = strtab + sym.n_strx;
824fe6060f1SDimitry Andric         uint64_t symbolOffset = sym.n_value - sectionAddr;
825349cc55cSDimitry Andric         InputSection *isec =
82681ad6265SDimitry Andric             findContainingSubsection(*sections[i], &symbolOffset);
827fe6060f1SDimitry Andric         if (symbolOffset != 0) {
82881ad6265SDimitry Andric           error(toString(*sections[i]) + ":  symbol " + name +
829fe6060f1SDimitry Andric                 " at misaligned offset");
830fe6060f1SDimitry Andric           continue;
831fe6060f1SDimitry Andric         }
832972a253aSDimitry Andric         symbols[symIndex] =
833972a253aSDimitry Andric             createDefined(sym, name, isec, 0, isec->getSize(), forceHidden);
834fe6060f1SDimitry Andric       }
8355ffd83dbSDimitry Andric       continue;
8365ffd83dbSDimitry Andric     }
83781ad6265SDimitry Andric     sections[i]->doneSplitting = true;
8385ffd83dbSDimitry Andric 
839bdd1243dSDimitry Andric     auto getSymName = [strtab](const NList& sym) -> StringRef {
840bdd1243dSDimitry Andric       return StringRef(strtab + sym.n_strx);
841bdd1243dSDimitry Andric     };
842bdd1243dSDimitry Andric 
843fe6060f1SDimitry Andric     // Calculate symbol sizes and create subsections by splitting the sections
844fe6060f1SDimitry Andric     // along symbol boundaries.
845349cc55cSDimitry Andric     // We populate subsections by repeatedly splitting the last (highest
846349cc55cSDimitry Andric     // address) subsection.
847fe6060f1SDimitry Andric     llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) {
848*06c3fb27SDimitry Andric       // Put extern weak symbols after other symbols at the same address so
849*06c3fb27SDimitry Andric       // that weak symbol coalescing works correctly. See
850*06c3fb27SDimitry Andric       // SymbolTable::addDefined() for details.
851*06c3fb27SDimitry Andric       if (nList[lhs].n_value == nList[rhs].n_value &&
852*06c3fb27SDimitry Andric           nList[lhs].n_type & N_EXT && nList[rhs].n_type & N_EXT)
853*06c3fb27SDimitry Andric         return !(nList[lhs].n_desc & N_WEAK_DEF) && (nList[rhs].n_desc & N_WEAK_DEF);
854fe6060f1SDimitry Andric       return nList[lhs].n_value < nList[rhs].n_value;
855fe6060f1SDimitry Andric     });
856fe6060f1SDimitry Andric     for (size_t j = 0; j < symbolIndices.size(); ++j) {
857bdd1243dSDimitry Andric       const uint32_t symIndex = symbolIndices[j];
858fe6060f1SDimitry Andric       const NList &sym = nList[symIndex];
859bdd1243dSDimitry Andric       StringRef name = getSymName(sym);
860349cc55cSDimitry Andric       Subsection &subsec = subsections.back();
861349cc55cSDimitry Andric       InputSection *isec = subsec.isec;
862fe6060f1SDimitry Andric 
863349cc55cSDimitry Andric       uint64_t subsecAddr = sectionAddr + subsec.offset;
864fe6060f1SDimitry Andric       size_t symbolOffset = sym.n_value - subsecAddr;
865fe6060f1SDimitry Andric       uint64_t symbolSize =
866fe6060f1SDimitry Andric           j + 1 < symbolIndices.size()
867fe6060f1SDimitry Andric               ? nList[symbolIndices[j + 1]].n_value - sym.n_value
868fe6060f1SDimitry Andric               : isec->data.size() - symbolOffset;
869fe6060f1SDimitry Andric       // There are 4 cases where we do not need to create a new subsection:
870fe6060f1SDimitry Andric       //   1. If the input file does not use subsections-via-symbols.
871fe6060f1SDimitry Andric       //   2. Multiple symbols at the same address only induce one subsection.
872fe6060f1SDimitry Andric       //      (The symbolOffset == 0 check covers both this case as well as
873fe6060f1SDimitry Andric       //      the first loop iteration.)
874fe6060f1SDimitry Andric       //   3. Alternative entry points do not induce new subsections.
875fe6060f1SDimitry Andric       //   4. If we have a literal section (e.g. __cstring and __literal4).
876fe6060f1SDimitry Andric       if (!subsectionsViaSymbols || symbolOffset == 0 ||
877fe6060f1SDimitry Andric           sym.n_desc & N_ALT_ENTRY || !isa<ConcatInputSection>(isec)) {
878bdd1243dSDimitry Andric         isec->hasAltEntry = symbolOffset != 0;
879972a253aSDimitry Andric         symbols[symIndex] = createDefined(sym, name, isec, symbolOffset,
880972a253aSDimitry Andric                                           symbolSize, forceHidden);
8815ffd83dbSDimitry Andric         continue;
8825ffd83dbSDimitry Andric       }
883fe6060f1SDimitry Andric       auto *concatIsec = cast<ConcatInputSection>(isec);
8845ffd83dbSDimitry Andric 
885fe6060f1SDimitry Andric       auto *nextIsec = make<ConcatInputSection>(*concatIsec);
886fe6060f1SDimitry Andric       nextIsec->wasCoalesced = false;
887fe6060f1SDimitry Andric       if (isZeroFill(isec->getFlags())) {
888fe6060f1SDimitry Andric         // Zero-fill sections have NULL data.data() non-zero data.size()
889fe6060f1SDimitry Andric         nextIsec->data = {nullptr, isec->data.size() - symbolOffset};
890fe6060f1SDimitry Andric         isec->data = {nullptr, symbolOffset};
891fe6060f1SDimitry Andric       } else {
892fe6060f1SDimitry Andric         nextIsec->data = isec->data.slice(symbolOffset);
893fe6060f1SDimitry Andric         isec->data = isec->data.slice(0, symbolOffset);
8945ffd83dbSDimitry Andric       }
8955ffd83dbSDimitry Andric 
896fe6060f1SDimitry Andric       // By construction, the symbol will be at offset zero in the new
897fe6060f1SDimitry Andric       // subsection.
898972a253aSDimitry Andric       symbols[symIndex] = createDefined(sym, name, nextIsec, /*value=*/0,
899972a253aSDimitry Andric                                         symbolSize, forceHidden);
9005ffd83dbSDimitry Andric       // TODO: ld64 appears to preserve the original alignment as well as each
9015ffd83dbSDimitry Andric       // subsection's offset from the last aligned address. We should consider
9025ffd83dbSDimitry Andric       // emulating that behavior.
903fe6060f1SDimitry Andric       nextIsec->align = MinAlign(sectionAlign, sym.n_value);
904349cc55cSDimitry Andric       subsections.push_back({sym.n_value - sectionAddr, nextIsec});
905fe6060f1SDimitry Andric     }
9065ffd83dbSDimitry Andric   }
9075ffd83dbSDimitry Andric 
908fe6060f1SDimitry Andric   // Undefined symbols can trigger recursive fetch from Archives due to
909fe6060f1SDimitry Andric   // LazySymbols. Process defined symbols first so that the relative order
910fe6060f1SDimitry Andric   // between a defined symbol and an undefined symbol does not change the
911fe6060f1SDimitry Andric   // symbol resolution behavior. In addition, a set of interconnected symbols
912fe6060f1SDimitry Andric   // will all be resolved to the same file, instead of being resolved to
913fe6060f1SDimitry Andric   // different files.
914bdd1243dSDimitry Andric   for (unsigned i : undefineds)
915bdd1243dSDimitry Andric     symbols[i] = parseNonSectionSymbol(nList[i], strtab);
9165ffd83dbSDimitry Andric }
9175ffd83dbSDimitry Andric 
918e8d8bef9SDimitry Andric OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
919e8d8bef9SDimitry Andric                        StringRef sectName)
920e8d8bef9SDimitry Andric     : InputFile(OpaqueKind, mb) {
921e8d8bef9SDimitry Andric   const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
922fe6060f1SDimitry Andric   ArrayRef<uint8_t> data = {buf, mb.getBufferSize()};
92381ad6265SDimitry Andric   sections.push_back(make<Section>(/*file=*/this, segName.take_front(16),
92481ad6265SDimitry Andric                                    sectName.take_front(16),
92581ad6265SDimitry Andric                                    /*flags=*/0, /*addr=*/0));
92681ad6265SDimitry Andric   Section &section = *sections.back();
92781ad6265SDimitry Andric   ConcatInputSection *isec = make<ConcatInputSection>(section, data);
928fe6060f1SDimitry Andric   isec->live = true;
92981ad6265SDimitry Andric   section.subsections.push_back({0, isec});
930e8d8bef9SDimitry Andric }
931e8d8bef9SDimitry Andric 
93204eeddc0SDimitry Andric ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
933972a253aSDimitry Andric                  bool lazy, bool forceHidden)
934972a253aSDimitry Andric     : InputFile(ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden) {
935e8d8bef9SDimitry Andric   this->archiveName = std::string(archiveName);
93604eeddc0SDimitry Andric   if (lazy) {
93704eeddc0SDimitry Andric     if (target->wordSize == 8)
93804eeddc0SDimitry Andric       parseLazy<LP64>();
93904eeddc0SDimitry Andric     else
94004eeddc0SDimitry Andric       parseLazy<ILP32>();
94104eeddc0SDimitry Andric   } else {
942fe6060f1SDimitry Andric     if (target->wordSize == 8)
943fe6060f1SDimitry Andric       parse<LP64>();
944fe6060f1SDimitry Andric     else
945fe6060f1SDimitry Andric       parse<ILP32>();
946e8d8bef9SDimitry Andric   }
94704eeddc0SDimitry Andric }
948e8d8bef9SDimitry Andric 
949fe6060f1SDimitry Andric template <class LP> void ObjFile::parse() {
950fe6060f1SDimitry Andric   using Header = typename LP::mach_header;
951fe6060f1SDimitry Andric   using SegmentCommand = typename LP::segment_command;
952349cc55cSDimitry Andric   using SectionHeader = typename LP::section;
953fe6060f1SDimitry Andric   using NList = typename LP::nlist;
954fe6060f1SDimitry Andric 
955fe6060f1SDimitry Andric   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
956fe6060f1SDimitry Andric   auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());
957fe6060f1SDimitry Andric 
958bdd1243dSDimitry Andric   uint32_t cpuType;
959bdd1243dSDimitry Andric   std::tie(cpuType, std::ignore) = getCPUTypeFromArchitecture(config->arch());
960bdd1243dSDimitry Andric   if (hdr->cputype != cpuType) {
961bdd1243dSDimitry Andric     Architecture arch =
962bdd1243dSDimitry Andric         getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype);
963349cc55cSDimitry Andric     auto msg = config->errorForArchMismatch
964349cc55cSDimitry Andric                    ? static_cast<void (*)(const Twine &)>(error)
965349cc55cSDimitry Andric                    : warn;
966349cc55cSDimitry Andric     msg(toString(this) + " has architecture " + getArchitectureName(arch) +
967fe6060f1SDimitry Andric         " which is incompatible with target architecture " +
968fe6060f1SDimitry Andric         getArchitectureName(config->arch()));
969fe6060f1SDimitry Andric     return;
970fe6060f1SDimitry Andric   }
971fe6060f1SDimitry Andric 
972fe6060f1SDimitry Andric   if (!checkCompatibility(this))
973fe6060f1SDimitry Andric     return;
974fe6060f1SDimitry Andric 
975fe6060f1SDimitry Andric   for (auto *cmd : findCommands<linker_option_command>(hdr, LC_LINKER_OPTION)) {
976fe6060f1SDimitry Andric     StringRef data{reinterpret_cast<const char *>(cmd + 1),
977fe6060f1SDimitry Andric                    cmd->cmdsize - sizeof(linker_option_command)};
978fe6060f1SDimitry Andric     parseLCLinkerOption(this, cmd->count, data);
979fe6060f1SDimitry Andric   }
980fe6060f1SDimitry Andric 
981349cc55cSDimitry Andric   ArrayRef<SectionHeader> sectionHeaders;
982fe6060f1SDimitry Andric   if (const load_command *cmd = findCommand(hdr, LP::segmentLCType)) {
983fe6060f1SDimitry Andric     auto *c = reinterpret_cast<const SegmentCommand *>(cmd);
984349cc55cSDimitry Andric     sectionHeaders = ArrayRef<SectionHeader>{
985349cc55cSDimitry Andric         reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
9865ffd83dbSDimitry Andric     parseSections(sectionHeaders);
9875ffd83dbSDimitry Andric   }
9885ffd83dbSDimitry Andric 
9895ffd83dbSDimitry Andric   // TODO: Error on missing LC_SYMTAB?
9905ffd83dbSDimitry Andric   if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
9915ffd83dbSDimitry Andric     auto *c = reinterpret_cast<const symtab_command *>(cmd);
992fe6060f1SDimitry Andric     ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff),
993fe6060f1SDimitry Andric                           c->nsyms);
9945ffd83dbSDimitry Andric     const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
9955ffd83dbSDimitry Andric     bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
996fe6060f1SDimitry Andric     parseSymbols<LP>(sectionHeaders, nList, strtab, subsectionsViaSymbols);
9975ffd83dbSDimitry Andric   }
9985ffd83dbSDimitry Andric 
9995ffd83dbSDimitry Andric   // The relocations may refer to the symbols, so we parse them after we have
10005ffd83dbSDimitry Andric   // parsed all the symbols.
1001349cc55cSDimitry Andric   for (size_t i = 0, n = sections.size(); i < n; ++i)
100281ad6265SDimitry Andric     if (!sections[i]->subsections.empty())
100381ad6265SDimitry Andric       parseRelocations(sectionHeaders, sectionHeaders[i], *sections[i]);
100481ad6265SDimitry Andric 
1005e8d8bef9SDimitry Andric   parseDebugInfo();
100681ad6265SDimitry Andric 
100781ad6265SDimitry Andric   Section *ehFrameSection = nullptr;
100881ad6265SDimitry Andric   Section *compactUnwindSection = nullptr;
100981ad6265SDimitry Andric   for (Section *sec : sections) {
101081ad6265SDimitry Andric     Section **s = StringSwitch<Section **>(sec->name)
101181ad6265SDimitry Andric                       .Case(section_names::compactUnwind, &compactUnwindSection)
101281ad6265SDimitry Andric                       .Case(section_names::ehFrame, &ehFrameSection)
101381ad6265SDimitry Andric                       .Default(nullptr);
101481ad6265SDimitry Andric     if (s)
101581ad6265SDimitry Andric       *s = sec;
101681ad6265SDimitry Andric   }
1017349cc55cSDimitry Andric   if (compactUnwindSection)
101881ad6265SDimitry Andric     registerCompactUnwind(*compactUnwindSection);
1019753f127fSDimitry Andric   if (ehFrameSection)
102081ad6265SDimitry Andric     registerEhFrames(*ehFrameSection);
1021e8d8bef9SDimitry Andric }
1022e8d8bef9SDimitry Andric 
102304eeddc0SDimitry Andric template <class LP> void ObjFile::parseLazy() {
102404eeddc0SDimitry Andric   using Header = typename LP::mach_header;
102504eeddc0SDimitry Andric   using NList = typename LP::nlist;
102604eeddc0SDimitry Andric 
102704eeddc0SDimitry Andric   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
102804eeddc0SDimitry Andric   auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());
102904eeddc0SDimitry Andric   const load_command *cmd = findCommand(hdr, LC_SYMTAB);
103004eeddc0SDimitry Andric   if (!cmd)
103104eeddc0SDimitry Andric     return;
103204eeddc0SDimitry Andric   auto *c = reinterpret_cast<const symtab_command *>(cmd);
103304eeddc0SDimitry Andric   ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff),
103404eeddc0SDimitry Andric                         c->nsyms);
103504eeddc0SDimitry Andric   const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
103604eeddc0SDimitry Andric   symbols.resize(nList.size());
1037bdd1243dSDimitry Andric   for (const auto &[i, sym] : llvm::enumerate(nList)) {
103804eeddc0SDimitry Andric     if ((sym.n_type & N_EXT) && !isUndef(sym)) {
103904eeddc0SDimitry Andric       // TODO: Bound checking
104004eeddc0SDimitry Andric       StringRef name = strtab + sym.n_strx;
1041bdd1243dSDimitry Andric       symbols[i] = symtab->addLazyObject(name, *this);
104204eeddc0SDimitry Andric       if (!lazy)
104304eeddc0SDimitry Andric         break;
104404eeddc0SDimitry Andric     }
104504eeddc0SDimitry Andric   }
104604eeddc0SDimitry Andric }
104704eeddc0SDimitry Andric 
1048e8d8bef9SDimitry Andric void ObjFile::parseDebugInfo() {
1049e8d8bef9SDimitry Andric   std::unique_ptr<DwarfObject> dObj = DwarfObject::create(this);
1050e8d8bef9SDimitry Andric   if (!dObj)
1051e8d8bef9SDimitry Andric     return;
1052e8d8bef9SDimitry Andric 
105381ad6265SDimitry Andric   // We do not re-use the context from getDwarf() here as that function
105481ad6265SDimitry Andric   // constructs an expensive DWARFCache object.
1055e8d8bef9SDimitry Andric   auto *ctx = make<DWARFContext>(
1056e8d8bef9SDimitry Andric       std::move(dObj), "",
1057e8d8bef9SDimitry Andric       [&](Error err) {
1058e8d8bef9SDimitry Andric         warn(toString(this) + ": " + toString(std::move(err)));
1059e8d8bef9SDimitry Andric       },
1060e8d8bef9SDimitry Andric       [&](Error warning) {
1061e8d8bef9SDimitry Andric         warn(toString(this) + ": " + toString(std::move(warning)));
1062e8d8bef9SDimitry Andric       });
1063e8d8bef9SDimitry Andric 
1064e8d8bef9SDimitry Andric   // TODO: Since object files can contain a lot of DWARF info, we should verify
1065e8d8bef9SDimitry Andric   // that we are parsing just the info we need
1066e8d8bef9SDimitry Andric   const DWARFContext::compile_unit_range &units = ctx->compile_units();
1067fe6060f1SDimitry Andric   // FIXME: There can be more than one compile unit per object file. See
1068fe6060f1SDimitry Andric   // PR48637.
1069e8d8bef9SDimitry Andric   auto it = units.begin();
107081ad6265SDimitry Andric   compileUnit = it != units.end() ? it->get() : nullptr;
1071fe6060f1SDimitry Andric }
1072fe6060f1SDimitry Andric 
10730eae32dcSDimitry Andric ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const {
1074fe6060f1SDimitry Andric   const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
1075fe6060f1SDimitry Andric   const load_command *cmd = findCommand(buf, LC_DATA_IN_CODE);
1076fe6060f1SDimitry Andric   if (!cmd)
10770eae32dcSDimitry Andric     return {};
1078fe6060f1SDimitry Andric   const auto *c = reinterpret_cast<const linkedit_data_command *>(cmd);
10790eae32dcSDimitry Andric   return {reinterpret_cast<const data_in_code_entry *>(buf + c->dataoff),
1080fe6060f1SDimitry Andric           c->datasize / sizeof(data_in_code_entry)};
1081e8d8bef9SDimitry Andric }
1082e8d8bef9SDimitry Andric 
1083bdd1243dSDimitry Andric ArrayRef<uint8_t> ObjFile::getOptimizationHints() const {
1084bdd1243dSDimitry Andric   const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
1085bdd1243dSDimitry Andric   if (auto *cmd =
1086bdd1243dSDimitry Andric           findCommand<linkedit_data_command>(buf, LC_LINKER_OPTIMIZATION_HINT))
1087bdd1243dSDimitry Andric     return {buf + cmd->dataoff, cmd->datasize};
1088bdd1243dSDimitry Andric   return {};
1089bdd1243dSDimitry Andric }
1090bdd1243dSDimitry Andric 
1091349cc55cSDimitry Andric // Create pointers from symbols to their associated compact unwind entries.
109281ad6265SDimitry Andric void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
109381ad6265SDimitry Andric   for (const Subsection &subsection : compactUnwindSection.subsections) {
1094349cc55cSDimitry Andric     ConcatInputSection *isec = cast<ConcatInputSection>(subsection.isec);
1095fcaf7f86SDimitry Andric     // Hack!! Each compact unwind entry (CUE) has its UNSIGNED relocations embed
1096fcaf7f86SDimitry Andric     // their addends in its data. Thus if ICF operated naively and compared the
1097fcaf7f86SDimitry Andric     // entire contents of each CUE, entries with identical unwind info but e.g.
1098fcaf7f86SDimitry Andric     // belonging to different functions would never be considered equivalent. To
1099fcaf7f86SDimitry Andric     // work around this problem, we remove some parts of the data containing the
1100fcaf7f86SDimitry Andric     // embedded addends. In particular, we remove the function address and LSDA
1101fcaf7f86SDimitry Andric     // pointers.  Since these locations are at the start and end of the entry,
1102fcaf7f86SDimitry Andric     // we can do this using a simple, efficient slice rather than performing a
1103fcaf7f86SDimitry Andric     // copy.  We are not losing any information here because the embedded
1104fcaf7f86SDimitry Andric     // addends have already been parsed in the corresponding Reloc structs.
1105fcaf7f86SDimitry Andric     //
1106fcaf7f86SDimitry Andric     // Removing these pointers would not be safe if they were pointers to
1107fcaf7f86SDimitry Andric     // absolute symbols. In that case, there would be no corresponding
1108fcaf7f86SDimitry Andric     // relocation. However, (AFAIK) MC cannot emit references to absolute
1109fcaf7f86SDimitry Andric     // symbols for either the function address or the LSDA. However, it *can* do
1110fcaf7f86SDimitry Andric     // so for the personality pointer, so we are not slicing that field away.
1111fcaf7f86SDimitry Andric     //
1112fcaf7f86SDimitry Andric     // Note that we do not adjust the offsets of the corresponding relocations;
1113fcaf7f86SDimitry Andric     // instead, we rely on `relocateCompactUnwind()` to correctly handle these
1114fcaf7f86SDimitry Andric     // truncated input sections.
1115fcaf7f86SDimitry Andric     isec->data = isec->data.slice(target->wordSize, 8 + target->wordSize);
111681ad6265SDimitry Andric     uint32_t encoding = read32le(isec->data.data() + sizeof(uint32_t));
111781ad6265SDimitry Andric     // llvm-mc omits CU entries for functions that need DWARF encoding, but
111881ad6265SDimitry Andric     // `ld -r` doesn't. We can ignore them because we will re-synthesize these
111981ad6265SDimitry Andric     // CU entries from the DWARF info during the output phase.
1120bdd1243dSDimitry Andric     if ((encoding & static_cast<uint32_t>(UNWIND_MODE_MASK)) ==
1121bdd1243dSDimitry Andric         target->modeDwarfEncoding)
112281ad6265SDimitry Andric       continue;
1123349cc55cSDimitry Andric 
1124349cc55cSDimitry Andric     ConcatInputSection *referentIsec;
1125349cc55cSDimitry Andric     for (auto it = isec->relocs.begin(); it != isec->relocs.end();) {
1126349cc55cSDimitry Andric       Reloc &r = *it;
1127349cc55cSDimitry Andric       // CUE::functionAddress is at offset 0. Skip personality & LSDA relocs.
1128349cc55cSDimitry Andric       if (r.offset != 0) {
1129349cc55cSDimitry Andric         ++it;
1130349cc55cSDimitry Andric         continue;
1131349cc55cSDimitry Andric       }
1132349cc55cSDimitry Andric       uint64_t add = r.addend;
1133349cc55cSDimitry Andric       if (auto *sym = cast_or_null<Defined>(r.referent.dyn_cast<Symbol *>())) {
1134349cc55cSDimitry Andric         // Check whether the symbol defined in this file is the prevailing one.
1135349cc55cSDimitry Andric         // Skip if it is e.g. a weak def that didn't prevail.
1136349cc55cSDimitry Andric         if (sym->getFile() != this) {
1137349cc55cSDimitry Andric           ++it;
1138349cc55cSDimitry Andric           continue;
1139349cc55cSDimitry Andric         }
1140349cc55cSDimitry Andric         add += sym->value;
1141349cc55cSDimitry Andric         referentIsec = cast<ConcatInputSection>(sym->isec);
1142349cc55cSDimitry Andric       } else {
1143349cc55cSDimitry Andric         referentIsec =
1144349cc55cSDimitry Andric             cast<ConcatInputSection>(r.referent.dyn_cast<InputSection *>());
1145349cc55cSDimitry Andric       }
114681ad6265SDimitry Andric       // Unwind info lives in __DATA, and finalization of __TEXT will occur
114781ad6265SDimitry Andric       // before finalization of __DATA. Moreover, the finalization of unwind
114881ad6265SDimitry Andric       // info depends on the exact addresses that it references. So it is safe
114981ad6265SDimitry Andric       // for compact unwind to reference addresses in __TEXT, but not addresses
115081ad6265SDimitry Andric       // in any other segment.
1151349cc55cSDimitry Andric       if (referentIsec->getSegName() != segment_names::text)
115281ad6265SDimitry Andric         error(isec->getLocation(r.offset) + " references section " +
115381ad6265SDimitry Andric               referentIsec->getName() + " which is not in segment __TEXT");
1154349cc55cSDimitry Andric       // The functionAddress relocations are typically section relocations.
1155349cc55cSDimitry Andric       // However, unwind info operates on a per-symbol basis, so we search for
1156349cc55cSDimitry Andric       // the function symbol here.
115781ad6265SDimitry Andric       Defined *d = findSymbolAtOffset(referentIsec, add);
115881ad6265SDimitry Andric       if (!d) {
1159349cc55cSDimitry Andric         ++it;
1160349cc55cSDimitry Andric         continue;
1161349cc55cSDimitry Andric       }
116281ad6265SDimitry Andric       d->unwindEntry = isec;
1163fcaf7f86SDimitry Andric       // Now that the symbol points to the unwind entry, we can remove the reloc
1164fcaf7f86SDimitry Andric       // that points from the unwind entry back to the symbol.
1165fcaf7f86SDimitry Andric       //
1166fcaf7f86SDimitry Andric       // First, the symbol keeps the unwind entry alive (and not vice versa), so
1167fcaf7f86SDimitry Andric       // this keeps dead-stripping simple.
1168fcaf7f86SDimitry Andric       //
1169fcaf7f86SDimitry Andric       // Moreover, it reduces the work that ICF needs to do to figure out if
1170fcaf7f86SDimitry Andric       // functions with unwind info are foldable.
1171fcaf7f86SDimitry Andric       //
1172fcaf7f86SDimitry Andric       // However, this does make it possible for ICF to fold CUEs that point to
1173fcaf7f86SDimitry Andric       // distinct functions (if the CUEs are otherwise identical).
1174fcaf7f86SDimitry Andric       // UnwindInfoSection takes care of this by re-duplicating the CUEs so that
1175fcaf7f86SDimitry Andric       // each one can hold a distinct functionAddress value.
1176fcaf7f86SDimitry Andric       //
1177fcaf7f86SDimitry Andric       // Given that clang emits relocations in reverse order of address, this
1178fcaf7f86SDimitry Andric       // relocation should be at the end of the vector for most of our input
1179fcaf7f86SDimitry Andric       // object files, so this erase() is typically an O(1) operation.
1180349cc55cSDimitry Andric       it = isec->relocs.erase(it);
1181349cc55cSDimitry Andric     }
1182349cc55cSDimitry Andric   }
1183349cc55cSDimitry Andric }
1184349cc55cSDimitry Andric 
118581ad6265SDimitry Andric struct CIE {
118681ad6265SDimitry Andric   macho::Symbol *personalitySymbol = nullptr;
118781ad6265SDimitry Andric   bool fdesHaveAug = false;
118861cfbce3SDimitry Andric   uint8_t lsdaPtrSize = 0; // 0 => no LSDA
118961cfbce3SDimitry Andric   uint8_t funcPtrSize = 0;
119081ad6265SDimitry Andric };
119181ad6265SDimitry Andric 
119261cfbce3SDimitry Andric static uint8_t pointerEncodingToSize(uint8_t enc) {
119361cfbce3SDimitry Andric   switch (enc & 0xf) {
119461cfbce3SDimitry Andric   case dwarf::DW_EH_PE_absptr:
119561cfbce3SDimitry Andric     return target->wordSize;
119661cfbce3SDimitry Andric   case dwarf::DW_EH_PE_sdata4:
119761cfbce3SDimitry Andric     return 4;
119861cfbce3SDimitry Andric   case dwarf::DW_EH_PE_sdata8:
119961cfbce3SDimitry Andric     // ld64 doesn't actually support sdata8, but this seems simple enough...
120061cfbce3SDimitry Andric     return 8;
120161cfbce3SDimitry Andric   default:
120261cfbce3SDimitry Andric     return 0;
120361cfbce3SDimitry Andric   };
120461cfbce3SDimitry Andric }
120561cfbce3SDimitry Andric 
120681ad6265SDimitry Andric static CIE parseCIE(const InputSection *isec, const EhReader &reader,
120781ad6265SDimitry Andric                     size_t off) {
120881ad6265SDimitry Andric   // Handling the full generality of possible DWARF encodings would be a major
120981ad6265SDimitry Andric   // pain. We instead take advantage of our knowledge of how llvm-mc encodes
121081ad6265SDimitry Andric   // DWARF and handle just that.
121181ad6265SDimitry Andric   constexpr uint8_t expectedPersonalityEnc =
121281ad6265SDimitry Andric       dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_sdata4;
121381ad6265SDimitry Andric 
121481ad6265SDimitry Andric   CIE cie;
121581ad6265SDimitry Andric   uint8_t version = reader.readByte(&off);
121681ad6265SDimitry Andric   if (version != 1 && version != 3)
121781ad6265SDimitry Andric     fatal("Expected CIE version of 1 or 3, got " + Twine(version));
121881ad6265SDimitry Andric   StringRef aug = reader.readString(&off);
121981ad6265SDimitry Andric   reader.skipLeb128(&off); // skip code alignment
122081ad6265SDimitry Andric   reader.skipLeb128(&off); // skip data alignment
122181ad6265SDimitry Andric   reader.skipLeb128(&off); // skip return address register
122281ad6265SDimitry Andric   reader.skipLeb128(&off); // skip aug data length
122381ad6265SDimitry Andric   uint64_t personalityAddrOff = 0;
122481ad6265SDimitry Andric   for (char c : aug) {
122581ad6265SDimitry Andric     switch (c) {
122681ad6265SDimitry Andric     case 'z':
122781ad6265SDimitry Andric       cie.fdesHaveAug = true;
122881ad6265SDimitry Andric       break;
122981ad6265SDimitry Andric     case 'P': {
123081ad6265SDimitry Andric       uint8_t personalityEnc = reader.readByte(&off);
123181ad6265SDimitry Andric       if (personalityEnc != expectedPersonalityEnc)
123281ad6265SDimitry Andric         reader.failOn(off, "unexpected personality encoding 0x" +
123381ad6265SDimitry Andric                                Twine::utohexstr(personalityEnc));
123481ad6265SDimitry Andric       personalityAddrOff = off;
123581ad6265SDimitry Andric       off += 4;
123681ad6265SDimitry Andric       break;
123781ad6265SDimitry Andric     }
123881ad6265SDimitry Andric     case 'L': {
123981ad6265SDimitry Andric       uint8_t lsdaEnc = reader.readByte(&off);
124061cfbce3SDimitry Andric       cie.lsdaPtrSize = pointerEncodingToSize(lsdaEnc);
124161cfbce3SDimitry Andric       if (cie.lsdaPtrSize == 0)
124281ad6265SDimitry Andric         reader.failOn(off, "unexpected LSDA encoding 0x" +
124381ad6265SDimitry Andric                                Twine::utohexstr(lsdaEnc));
124481ad6265SDimitry Andric       break;
124581ad6265SDimitry Andric     }
124681ad6265SDimitry Andric     case 'R': {
124781ad6265SDimitry Andric       uint8_t pointerEnc = reader.readByte(&off);
124861cfbce3SDimitry Andric       cie.funcPtrSize = pointerEncodingToSize(pointerEnc);
124961cfbce3SDimitry Andric       if (cie.funcPtrSize == 0 || !(pointerEnc & dwarf::DW_EH_PE_pcrel))
125081ad6265SDimitry Andric         reader.failOn(off, "unexpected pointer encoding 0x" +
125181ad6265SDimitry Andric                                Twine::utohexstr(pointerEnc));
125281ad6265SDimitry Andric       break;
125381ad6265SDimitry Andric     }
125481ad6265SDimitry Andric     default:
125581ad6265SDimitry Andric       break;
125681ad6265SDimitry Andric     }
125781ad6265SDimitry Andric   }
125881ad6265SDimitry Andric   if (personalityAddrOff != 0) {
1259*06c3fb27SDimitry Andric     const auto *personalityReloc = isec->getRelocAt(personalityAddrOff);
1260*06c3fb27SDimitry Andric     if (!personalityReloc)
126181ad6265SDimitry Andric       reader.failOn(off, "Failed to locate relocation for personality symbol");
1262*06c3fb27SDimitry Andric     cie.personalitySymbol = personalityReloc->referent.get<macho::Symbol *>();
126381ad6265SDimitry Andric   }
126481ad6265SDimitry Andric   return cie;
126581ad6265SDimitry Andric }
126681ad6265SDimitry Andric 
126781ad6265SDimitry Andric // EH frame target addresses may be encoded as pcrel offsets. However, instead
126881ad6265SDimitry Andric // of using an actual pcrel reloc, ld64 emits subtractor relocations instead.
126981ad6265SDimitry Andric // This function recovers the target address from the subtractors, essentially
127081ad6265SDimitry Andric // performing the inverse operation of EhRelocator.
127181ad6265SDimitry Andric //
127281ad6265SDimitry Andric // Concretely, we expect our relocations to write the value of `PC -
127381ad6265SDimitry Andric // target_addr` to `PC`. `PC` itself is denoted by a minuend relocation that
127481ad6265SDimitry Andric // points to a symbol plus an addend.
127581ad6265SDimitry Andric //
127681ad6265SDimitry Andric // It is important that the minuend relocation point to a symbol within the
127781ad6265SDimitry Andric // same section as the fixup value, since sections may get moved around.
127881ad6265SDimitry Andric //
127981ad6265SDimitry Andric // For example, for arm64, llvm-mc emits relocations for the target function
128081ad6265SDimitry Andric // address like so:
128181ad6265SDimitry Andric //
128281ad6265SDimitry Andric //   ltmp:
128381ad6265SDimitry Andric //     <CIE start>
128481ad6265SDimitry Andric //     ...
128581ad6265SDimitry Andric //     <CIE end>
128681ad6265SDimitry Andric //     ... multiple FDEs ...
128781ad6265SDimitry Andric //     <FDE start>
128881ad6265SDimitry Andric //     <target function address - (ltmp + pcrel offset)>
128981ad6265SDimitry Andric //     ...
129081ad6265SDimitry Andric //
129181ad6265SDimitry Andric // If any of the FDEs in `multiple FDEs` get dead-stripped, then `FDE start`
129281ad6265SDimitry Andric // will move to an earlier address, and `ltmp + pcrel offset` will no longer
129381ad6265SDimitry Andric // reflect an accurate pcrel value. To avoid this problem, we "canonicalize"
129481ad6265SDimitry Andric // our relocation by adding an `EH_Frame` symbol at `FDE start`, and updating
129581ad6265SDimitry Andric // the reloc to be `target function address - (EH_Frame + new pcrel offset)`.
129681ad6265SDimitry Andric //
129781ad6265SDimitry Andric // If `Invert` is set, then we instead expect `target_addr - PC` to be written
129881ad6265SDimitry Andric // to `PC`.
129981ad6265SDimitry Andric template <bool Invert = false>
130081ad6265SDimitry Andric Defined *
130181ad6265SDimitry Andric targetSymFromCanonicalSubtractor(const InputSection *isec,
130281ad6265SDimitry Andric                                  std::vector<macho::Reloc>::iterator relocIt) {
130381ad6265SDimitry Andric   macho::Reloc &subtrahend = *relocIt;
130481ad6265SDimitry Andric   macho::Reloc &minuend = *std::next(relocIt);
130581ad6265SDimitry Andric   assert(target->hasAttr(subtrahend.type, RelocAttrBits::SUBTRAHEND));
130681ad6265SDimitry Andric   assert(target->hasAttr(minuend.type, RelocAttrBits::UNSIGNED));
130781ad6265SDimitry Andric   // Note: pcSym may *not* be exactly at the PC; there's usually a non-zero
130881ad6265SDimitry Andric   // addend.
130981ad6265SDimitry Andric   auto *pcSym = cast<Defined>(subtrahend.referent.get<macho::Symbol *>());
131081ad6265SDimitry Andric   Defined *target =
131181ad6265SDimitry Andric       cast_or_null<Defined>(minuend.referent.dyn_cast<macho::Symbol *>());
131281ad6265SDimitry Andric   if (!pcSym) {
131381ad6265SDimitry Andric     auto *targetIsec =
131481ad6265SDimitry Andric         cast<ConcatInputSection>(minuend.referent.get<InputSection *>());
131581ad6265SDimitry Andric     target = findSymbolAtOffset(targetIsec, minuend.addend);
131681ad6265SDimitry Andric   }
131781ad6265SDimitry Andric   if (Invert)
131881ad6265SDimitry Andric     std::swap(pcSym, target);
131981ad6265SDimitry Andric   if (pcSym->isec == isec) {
132081ad6265SDimitry Andric     if (pcSym->value - (Invert ? -1 : 1) * minuend.addend != subtrahend.offset)
132181ad6265SDimitry Andric       fatal("invalid FDE relocation in __eh_frame");
132281ad6265SDimitry Andric   } else {
132381ad6265SDimitry Andric     // Ensure the pcReloc points to a symbol within the current EH frame.
132481ad6265SDimitry Andric     // HACK: we should really verify that the original relocation's semantics
132581ad6265SDimitry Andric     // are preserved. In particular, we should have
132681ad6265SDimitry Andric     // `oldSym->value + oldOffset == newSym + newOffset`. However, we don't
132781ad6265SDimitry Andric     // have an easy way to access the offsets from this point in the code; some
132881ad6265SDimitry Andric     // refactoring is needed for that.
132981ad6265SDimitry Andric     macho::Reloc &pcReloc = Invert ? minuend : subtrahend;
133081ad6265SDimitry Andric     pcReloc.referent = isec->symbols[0];
133181ad6265SDimitry Andric     assert(isec->symbols[0]->value == 0);
133281ad6265SDimitry Andric     minuend.addend = pcReloc.offset * (Invert ? 1LL : -1LL);
133381ad6265SDimitry Andric   }
133481ad6265SDimitry Andric   return target;
133581ad6265SDimitry Andric }
133681ad6265SDimitry Andric 
133781ad6265SDimitry Andric Defined *findSymbolAtAddress(const std::vector<Section *> &sections,
133881ad6265SDimitry Andric                              uint64_t addr) {
133981ad6265SDimitry Andric   Section *sec = findContainingSection(sections, &addr);
134081ad6265SDimitry Andric   auto *isec = cast<ConcatInputSection>(findContainingSubsection(*sec, &addr));
134181ad6265SDimitry Andric   return findSymbolAtOffset(isec, addr);
134281ad6265SDimitry Andric }
134381ad6265SDimitry Andric 
134481ad6265SDimitry Andric // For symbols that don't have compact unwind info, associate them with the more
134581ad6265SDimitry Andric // general-purpose (and verbose) DWARF unwind info found in __eh_frame.
134681ad6265SDimitry Andric //
134781ad6265SDimitry Andric // This requires us to parse the contents of __eh_frame. See EhFrame.h for a
134881ad6265SDimitry Andric // description of its format.
134981ad6265SDimitry Andric //
135081ad6265SDimitry Andric // While parsing, we also look for what MC calls "abs-ified" relocations -- they
135181ad6265SDimitry Andric // are relocations which are implicitly encoded as offsets in the section data.
135281ad6265SDimitry Andric // We convert them into explicit Reloc structs so that the EH frames can be
135381ad6265SDimitry Andric // handled just like a regular ConcatInputSection later in our output phase.
135481ad6265SDimitry Andric //
135581ad6265SDimitry Andric // We also need to handle the case where our input object file has explicit
135681ad6265SDimitry Andric // relocations. This is the case when e.g. it's the output of `ld -r`. We only
135781ad6265SDimitry Andric // look for the "abs-ified" relocation if an explicit relocation is absent.
135881ad6265SDimitry Andric void ObjFile::registerEhFrames(Section &ehFrameSection) {
135981ad6265SDimitry Andric   DenseMap<const InputSection *, CIE> cieMap;
136081ad6265SDimitry Andric   for (const Subsection &subsec : ehFrameSection.subsections) {
136181ad6265SDimitry Andric     auto *isec = cast<ConcatInputSection>(subsec.isec);
136281ad6265SDimitry Andric     uint64_t isecOff = subsec.offset;
136381ad6265SDimitry Andric 
136481ad6265SDimitry Andric     // Subtractor relocs require the subtrahend to be a symbol reloc. Ensure
136581ad6265SDimitry Andric     // that all EH frames have an associated symbol so that we can generate
136681ad6265SDimitry Andric     // subtractor relocs that reference them.
136781ad6265SDimitry Andric     if (isec->symbols.size() == 0)
1368bdd1243dSDimitry Andric       make<Defined>("EH_Frame", isec->getFile(), isec, /*value=*/0,
1369bdd1243dSDimitry Andric                     isec->getSize(), /*isWeakDef=*/false, /*isExternal=*/false,
1370bdd1243dSDimitry Andric                     /*isPrivateExtern=*/false, /*includeInSymtab=*/false,
1371*06c3fb27SDimitry Andric                     /*isReferencedDynamically=*/false,
1372bdd1243dSDimitry Andric                     /*noDeadStrip=*/false);
137381ad6265SDimitry Andric     else if (isec->symbols[0]->value != 0)
137481ad6265SDimitry Andric       fatal("found symbol at unexpected offset in __eh_frame");
137581ad6265SDimitry Andric 
137661cfbce3SDimitry Andric     EhReader reader(this, isec->data, subsec.offset);
137781ad6265SDimitry Andric     size_t dataOff = 0; // Offset from the start of the EH frame.
137881ad6265SDimitry Andric     reader.skipValidLength(&dataOff); // readLength() already validated this.
137981ad6265SDimitry Andric     // cieOffOff is the offset from the start of the EH frame to the cieOff
138081ad6265SDimitry Andric     // value, which is itself an offset from the current PC to a CIE.
138181ad6265SDimitry Andric     const size_t cieOffOff = dataOff;
138281ad6265SDimitry Andric 
138381ad6265SDimitry Andric     EhRelocator ehRelocator(isec);
138481ad6265SDimitry Andric     auto cieOffRelocIt = llvm::find_if(
138581ad6265SDimitry Andric         isec->relocs, [=](const Reloc &r) { return r.offset == cieOffOff; });
138681ad6265SDimitry Andric     InputSection *cieIsec = nullptr;
138781ad6265SDimitry Andric     if (cieOffRelocIt != isec->relocs.end()) {
138881ad6265SDimitry Andric       // We already have an explicit relocation for the CIE offset.
138981ad6265SDimitry Andric       cieIsec =
139081ad6265SDimitry Andric           targetSymFromCanonicalSubtractor</*Invert=*/true>(isec, cieOffRelocIt)
139181ad6265SDimitry Andric               ->isec;
139281ad6265SDimitry Andric       dataOff += sizeof(uint32_t);
139381ad6265SDimitry Andric     } else {
139481ad6265SDimitry Andric       // If we haven't found a relocation, then the CIE offset is most likely
139581ad6265SDimitry Andric       // embedded in the section data (AKA an "abs-ified" reloc.). Parse that
139681ad6265SDimitry Andric       // and generate a Reloc struct.
139781ad6265SDimitry Andric       uint32_t cieMinuend = reader.readU32(&dataOff);
1398bdd1243dSDimitry Andric       if (cieMinuend == 0) {
139981ad6265SDimitry Andric         cieIsec = isec;
1400bdd1243dSDimitry Andric       } else {
140181ad6265SDimitry Andric         uint32_t cieOff = isecOff + dataOff - cieMinuend;
140281ad6265SDimitry Andric         cieIsec = findContainingSubsection(ehFrameSection, &cieOff);
140381ad6265SDimitry Andric         if (cieIsec == nullptr)
140481ad6265SDimitry Andric           fatal("failed to find CIE");
140581ad6265SDimitry Andric       }
140681ad6265SDimitry Andric       if (cieIsec != isec)
140781ad6265SDimitry Andric         ehRelocator.makeNegativePcRel(cieOffOff, cieIsec->symbols[0],
140881ad6265SDimitry Andric                                       /*length=*/2);
140981ad6265SDimitry Andric     }
141081ad6265SDimitry Andric     if (cieIsec == isec) {
141181ad6265SDimitry Andric       cieMap[cieIsec] = parseCIE(isec, reader, dataOff);
141281ad6265SDimitry Andric       continue;
141381ad6265SDimitry Andric     }
141481ad6265SDimitry Andric 
141581ad6265SDimitry Andric     assert(cieMap.count(cieIsec));
141681ad6265SDimitry Andric     const CIE &cie = cieMap[cieIsec];
141761cfbce3SDimitry Andric     // Offset of the function address within the EH frame.
141861cfbce3SDimitry Andric     const size_t funcAddrOff = dataOff;
141961cfbce3SDimitry Andric     uint64_t funcAddr = reader.readPointer(&dataOff, cie.funcPtrSize) +
142061cfbce3SDimitry Andric                         ehFrameSection.addr + isecOff + funcAddrOff;
142161cfbce3SDimitry Andric     uint32_t funcLength = reader.readPointer(&dataOff, cie.funcPtrSize);
142261cfbce3SDimitry Andric     size_t lsdaAddrOff = 0; // Offset of the LSDA address within the EH frame.
1423bdd1243dSDimitry Andric     std::optional<uint64_t> lsdaAddrOpt;
142481ad6265SDimitry Andric     if (cie.fdesHaveAug) {
142581ad6265SDimitry Andric       reader.skipLeb128(&dataOff);
142681ad6265SDimitry Andric       lsdaAddrOff = dataOff;
142761cfbce3SDimitry Andric       if (cie.lsdaPtrSize != 0) {
142861cfbce3SDimitry Andric         uint64_t lsdaOff = reader.readPointer(&dataOff, cie.lsdaPtrSize);
142981ad6265SDimitry Andric         if (lsdaOff != 0) // FIXME possible to test this?
143081ad6265SDimitry Andric           lsdaAddrOpt = ehFrameSection.addr + isecOff + lsdaAddrOff + lsdaOff;
143181ad6265SDimitry Andric       }
143281ad6265SDimitry Andric     }
143381ad6265SDimitry Andric 
143481ad6265SDimitry Andric     auto funcAddrRelocIt = isec->relocs.end();
143581ad6265SDimitry Andric     auto lsdaAddrRelocIt = isec->relocs.end();
143681ad6265SDimitry Andric     for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) {
143781ad6265SDimitry Andric       if (it->offset == funcAddrOff)
143881ad6265SDimitry Andric         funcAddrRelocIt = it++; // Found subtrahend; skip over minuend reloc
143981ad6265SDimitry Andric       else if (lsdaAddrOpt && it->offset == lsdaAddrOff)
144081ad6265SDimitry Andric         lsdaAddrRelocIt = it++; // Found subtrahend; skip over minuend reloc
144181ad6265SDimitry Andric     }
144281ad6265SDimitry Andric 
144381ad6265SDimitry Andric     Defined *funcSym;
144481ad6265SDimitry Andric     if (funcAddrRelocIt != isec->relocs.end()) {
144581ad6265SDimitry Andric       funcSym = targetSymFromCanonicalSubtractor(isec, funcAddrRelocIt);
1446fcaf7f86SDimitry Andric       // Canonicalize the symbol. If there are multiple symbols at the same
1447fcaf7f86SDimitry Andric       // address, we want both `registerEhFrame` and `registerCompactUnwind`
1448fcaf7f86SDimitry Andric       // to register the unwind entry under same symbol.
1449fcaf7f86SDimitry Andric       // This is not particularly efficient, but we should run into this case
1450fcaf7f86SDimitry Andric       // infrequently (only when handling the output of `ld -r`).
1451fcaf7f86SDimitry Andric       if (funcSym->isec)
1452fcaf7f86SDimitry Andric         funcSym = findSymbolAtOffset(cast<ConcatInputSection>(funcSym->isec),
1453fcaf7f86SDimitry Andric                                      funcSym->value);
145481ad6265SDimitry Andric     } else {
145581ad6265SDimitry Andric       funcSym = findSymbolAtAddress(sections, funcAddr);
145681ad6265SDimitry Andric       ehRelocator.makePcRel(funcAddrOff, funcSym, target->p2WordSize);
145781ad6265SDimitry Andric     }
145881ad6265SDimitry Andric     // The symbol has been coalesced, or already has a compact unwind entry.
145981ad6265SDimitry Andric     if (!funcSym || funcSym->getFile() != this || funcSym->unwindEntry) {
146081ad6265SDimitry Andric       // We must prune unused FDEs for correctness, so we cannot rely on
146181ad6265SDimitry Andric       // -dead_strip being enabled.
146281ad6265SDimitry Andric       isec->live = false;
146381ad6265SDimitry Andric       continue;
146481ad6265SDimitry Andric     }
146581ad6265SDimitry Andric 
146681ad6265SDimitry Andric     InputSection *lsdaIsec = nullptr;
146781ad6265SDimitry Andric     if (lsdaAddrRelocIt != isec->relocs.end()) {
146881ad6265SDimitry Andric       lsdaIsec = targetSymFromCanonicalSubtractor(isec, lsdaAddrRelocIt)->isec;
146981ad6265SDimitry Andric     } else if (lsdaAddrOpt) {
147081ad6265SDimitry Andric       uint64_t lsdaAddr = *lsdaAddrOpt;
147181ad6265SDimitry Andric       Section *sec = findContainingSection(sections, &lsdaAddr);
147281ad6265SDimitry Andric       lsdaIsec =
147381ad6265SDimitry Andric           cast<ConcatInputSection>(findContainingSubsection(*sec, &lsdaAddr));
147481ad6265SDimitry Andric       ehRelocator.makePcRel(lsdaAddrOff, lsdaIsec, target->p2WordSize);
147581ad6265SDimitry Andric     }
147681ad6265SDimitry Andric 
147781ad6265SDimitry Andric     fdes[isec] = {funcLength, cie.personalitySymbol, lsdaIsec};
147881ad6265SDimitry Andric     funcSym->unwindEntry = isec;
147981ad6265SDimitry Andric     ehRelocator.commit();
148081ad6265SDimitry Andric   }
14816246ae0bSDimitry Andric 
14826246ae0bSDimitry Andric   // __eh_frame is marked as S_ATTR_LIVE_SUPPORT in input files, because FDEs
14836246ae0bSDimitry Andric   // are normally required to be kept alive if they reference a live symbol.
14846246ae0bSDimitry Andric   // However, we've explicitly created a dependency from a symbol to its FDE, so
14856246ae0bSDimitry Andric   // dead-stripping will just work as usual, and S_ATTR_LIVE_SUPPORT will only
14866246ae0bSDimitry Andric   // serve to incorrectly prevent us from dead-stripping duplicate FDEs for a
14876246ae0bSDimitry Andric   // live symbol (e.g. if there were multiple weak copies). Remove this flag to
14886246ae0bSDimitry Andric   // let dead-stripping proceed correctly.
14896246ae0bSDimitry Andric   ehFrameSection.flags &= ~S_ATTR_LIVE_SUPPORT;
149081ad6265SDimitry Andric }
149181ad6265SDimitry Andric 
149281ad6265SDimitry Andric std::string ObjFile::sourceFile() const {
149381ad6265SDimitry Andric   SmallString<261> dir(compileUnit->getCompilationDir());
149481ad6265SDimitry Andric   StringRef sep = sys::path::get_separator();
149581ad6265SDimitry Andric   // We don't use `path::append` here because we want an empty `dir` to result
149681ad6265SDimitry Andric   // in an absolute path. `append` would give us a relative path for that case.
149781ad6265SDimitry Andric   if (!dir.endswith(sep))
149881ad6265SDimitry Andric     dir += sep;
149981ad6265SDimitry Andric   return (dir + compileUnit->getUnitDIE().getShortName()).str();
150081ad6265SDimitry Andric }
150181ad6265SDimitry Andric 
150281ad6265SDimitry Andric lld::DWARFCache *ObjFile::getDwarf() {
150381ad6265SDimitry Andric   llvm::call_once(initDwarf, [this]() {
150481ad6265SDimitry Andric     auto dwObj = DwarfObject::create(this);
150581ad6265SDimitry Andric     if (!dwObj)
150681ad6265SDimitry Andric       return;
150781ad6265SDimitry Andric     dwarfCache = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>(
150881ad6265SDimitry Andric         std::move(dwObj), "",
150981ad6265SDimitry Andric         [&](Error err) { warn(getName() + ": " + toString(std::move(err))); },
151081ad6265SDimitry Andric         [&](Error warning) {
151181ad6265SDimitry Andric           warn(getName() + ": " + toString(std::move(warning)));
151281ad6265SDimitry Andric         }));
151381ad6265SDimitry Andric   });
151481ad6265SDimitry Andric 
151581ad6265SDimitry Andric   return dwarfCache.get();
151681ad6265SDimitry Andric }
1517e8d8bef9SDimitry Andric // The path can point to either a dylib or a .tbd file.
1518fe6060f1SDimitry Andric static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) {
1519bdd1243dSDimitry Andric   std::optional<MemoryBufferRef> mbref = readFile(path);
1520e8d8bef9SDimitry Andric   if (!mbref) {
1521e8d8bef9SDimitry Andric     error("could not read dylib file at " + path);
1522fe6060f1SDimitry Andric     return nullptr;
1523e8d8bef9SDimitry Andric   }
1524e8d8bef9SDimitry Andric   return loadDylib(*mbref, umbrella);
1525e8d8bef9SDimitry Andric }
1526e8d8bef9SDimitry Andric 
1527e8d8bef9SDimitry Andric // TBD files are parsed into a series of TAPI documents (InterfaceFiles), with
1528e8d8bef9SDimitry Andric // the first document storing child pointers to the rest of them. When we are
1529fe6060f1SDimitry Andric // processing a given TBD file, we store that top-level document in
1530fe6060f1SDimitry Andric // currentTopLevelTapi. When processing re-exports, we search its children for
1531fe6060f1SDimitry Andric // potentially matching documents in the same TBD file. Note that the children
1532fe6060f1SDimitry Andric // themselves don't point to further documents, i.e. this is a two-level tree.
1533e8d8bef9SDimitry Andric //
1534e8d8bef9SDimitry Andric // Re-exports can either refer to on-disk files, or to documents within .tbd
1535e8d8bef9SDimitry Andric // files.
1536fe6060f1SDimitry Andric static DylibFile *findDylib(StringRef path, DylibFile *umbrella,
1537fe6060f1SDimitry Andric                             const InterfaceFile *currentTopLevelTapi) {
1538fe6060f1SDimitry Andric   // Search order:
1539fe6060f1SDimitry Andric   // 1. Install name basename in -F / -L directories.
1540fe6060f1SDimitry Andric   {
1541fe6060f1SDimitry Andric     StringRef stem = path::stem(path);
1542fe6060f1SDimitry Andric     SmallString<128> frameworkName;
1543fe6060f1SDimitry Andric     path::append(frameworkName, path::Style::posix, stem + ".framework", stem);
1544*06c3fb27SDimitry Andric     bool isFramework = path.ends_with(frameworkName);
1545fe6060f1SDimitry Andric     if (isFramework) {
1546fe6060f1SDimitry Andric       for (StringRef dir : config->frameworkSearchPaths) {
1547fe6060f1SDimitry Andric         SmallString<128> candidate = dir;
1548fe6060f1SDimitry Andric         path::append(candidate, frameworkName);
1549bdd1243dSDimitry Andric         if (std::optional<StringRef> dylibPath =
1550bdd1243dSDimitry Andric                 resolveDylibPath(candidate.str()))
1551fe6060f1SDimitry Andric           return loadDylib(*dylibPath, umbrella);
1552fe6060f1SDimitry Andric       }
1553bdd1243dSDimitry Andric     } else if (std::optional<StringRef> dylibPath = findPathCombination(
1554*06c3fb27SDimitry Andric                    stem, config->librarySearchPaths, {".tbd", ".dylib", ".so"}))
1555fe6060f1SDimitry Andric       return loadDylib(*dylibPath, umbrella);
1556fe6060f1SDimitry Andric   }
1557fe6060f1SDimitry Andric 
1558fe6060f1SDimitry Andric   // 2. As absolute path.
1559e8d8bef9SDimitry Andric   if (path::is_absolute(path, path::Style::posix))
1560e8d8bef9SDimitry Andric     for (StringRef root : config->systemLibraryRoots)
1561bdd1243dSDimitry Andric       if (std::optional<StringRef> dylibPath =
1562bdd1243dSDimitry Andric               resolveDylibPath((root + path).str()))
1563e8d8bef9SDimitry Andric         return loadDylib(*dylibPath, umbrella);
1564e8d8bef9SDimitry Andric 
1565fe6060f1SDimitry Andric   // 3. As relative path.
1566e8d8bef9SDimitry Andric 
1567fe6060f1SDimitry Andric   // TODO: Handle -dylib_file
1568fe6060f1SDimitry Andric 
1569fe6060f1SDimitry Andric   // Replace @executable_path, @loader_path, @rpath prefixes in install name.
1570fe6060f1SDimitry Andric   SmallString<128> newPath;
1571fe6060f1SDimitry Andric   if (config->outputType == MH_EXECUTE &&
1572fe6060f1SDimitry Andric       path.consume_front("@executable_path/")) {
1573fe6060f1SDimitry Andric     // ld64 allows overriding this with the undocumented flag -executable_path.
1574fe6060f1SDimitry Andric     // lld doesn't currently implement that flag.
1575fe6060f1SDimitry Andric     // FIXME: Consider using finalOutput instead of outputFile.
1576fe6060f1SDimitry Andric     path::append(newPath, path::parent_path(config->outputFile), path);
1577fe6060f1SDimitry Andric     path = newPath;
1578fe6060f1SDimitry Andric   } else if (path.consume_front("@loader_path/")) {
1579fe6060f1SDimitry Andric     fs::real_path(umbrella->getName(), newPath);
1580fe6060f1SDimitry Andric     path::remove_filename(newPath);
1581fe6060f1SDimitry Andric     path::append(newPath, path);
1582fe6060f1SDimitry Andric     path = newPath;
1583*06c3fb27SDimitry Andric   } else if (path.starts_with("@rpath/")) {
1584fe6060f1SDimitry Andric     for (StringRef rpath : umbrella->rpaths) {
1585fe6060f1SDimitry Andric       newPath.clear();
1586fe6060f1SDimitry Andric       if (rpath.consume_front("@loader_path/")) {
1587fe6060f1SDimitry Andric         fs::real_path(umbrella->getName(), newPath);
1588fe6060f1SDimitry Andric         path::remove_filename(newPath);
1589fe6060f1SDimitry Andric       }
1590fe6060f1SDimitry Andric       path::append(newPath, rpath, path.drop_front(strlen("@rpath/")));
1591bdd1243dSDimitry Andric       if (std::optional<StringRef> dylibPath = resolveDylibPath(newPath.str()))
1592fe6060f1SDimitry Andric         return loadDylib(*dylibPath, umbrella);
1593fe6060f1SDimitry Andric     }
1594fe6060f1SDimitry Andric   }
1595fe6060f1SDimitry Andric 
1596fe6060f1SDimitry Andric   // FIXME: Should this be further up?
1597e8d8bef9SDimitry Andric   if (currentTopLevelTapi) {
1598e8d8bef9SDimitry Andric     for (InterfaceFile &child :
1599e8d8bef9SDimitry Andric          make_pointee_range(currentTopLevelTapi->documents())) {
1600e8d8bef9SDimitry Andric       assert(child.documents().empty());
1601fe6060f1SDimitry Andric       if (path == child.getInstallName()) {
1602*06c3fb27SDimitry Andric         auto *file = make<DylibFile>(child, umbrella, /*isBundleLoader=*/false,
160381ad6265SDimitry Andric                                      /*explicitlyLinked=*/false);
1604fe6060f1SDimitry Andric         file->parseReexports(child);
1605fe6060f1SDimitry Andric         return file;
1606fe6060f1SDimitry Andric       }
1607e8d8bef9SDimitry Andric     }
1608e8d8bef9SDimitry Andric   }
1609e8d8bef9SDimitry Andric 
1610bdd1243dSDimitry Andric   if (std::optional<StringRef> dylibPath = resolveDylibPath(path))
1611e8d8bef9SDimitry Andric     return loadDylib(*dylibPath, umbrella);
1612e8d8bef9SDimitry Andric 
1613fe6060f1SDimitry Andric   return nullptr;
1614e8d8bef9SDimitry Andric }
1615e8d8bef9SDimitry Andric 
1616e8d8bef9SDimitry Andric // If a re-exported dylib is public (lives in /usr/lib or
1617e8d8bef9SDimitry Andric // /System/Library/Frameworks), then it is considered implicitly linked: we
1618e8d8bef9SDimitry Andric // should bind to its symbols directly instead of via the re-exporting umbrella
1619e8d8bef9SDimitry Andric // library.
1620e8d8bef9SDimitry Andric static bool isImplicitlyLinked(StringRef path) {
1621e8d8bef9SDimitry Andric   if (!config->implicitDylibs)
1622e8d8bef9SDimitry Andric     return false;
1623e8d8bef9SDimitry Andric 
1624e8d8bef9SDimitry Andric   if (path::parent_path(path) == "/usr/lib")
1625e8d8bef9SDimitry Andric     return true;
1626e8d8bef9SDimitry Andric 
1627e8d8bef9SDimitry Andric   // Match /System/Library/Frameworks/$FOO.framework/**/$FOO
1628e8d8bef9SDimitry Andric   if (path.consume_front("/System/Library/Frameworks/")) {
1629e8d8bef9SDimitry Andric     StringRef frameworkName = path.take_until([](char c) { return c == '.'; });
1630e8d8bef9SDimitry Andric     return path::filename(path) == frameworkName;
1631e8d8bef9SDimitry Andric   }
1632e8d8bef9SDimitry Andric 
1633e8d8bef9SDimitry Andric   return false;
1634e8d8bef9SDimitry Andric }
1635e8d8bef9SDimitry Andric 
1636bdd1243dSDimitry Andric void DylibFile::loadReexport(StringRef path, DylibFile *umbrella,
1637fe6060f1SDimitry Andric                          const InterfaceFile *currentTopLevelTapi) {
1638fe6060f1SDimitry Andric   DylibFile *reexport = findDylib(path, umbrella, currentTopLevelTapi);
1639fe6060f1SDimitry Andric   if (!reexport)
1640bdd1243dSDimitry Andric     error(toString(this) + ": unable to locate re-export with install name " +
1641bdd1243dSDimitry Andric           path);
16425ffd83dbSDimitry Andric }
16435ffd83dbSDimitry Andric 
1644fe6060f1SDimitry Andric DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
164581ad6265SDimitry Andric                      bool isBundleLoader, bool explicitlyLinked)
1646fe6060f1SDimitry Andric     : InputFile(DylibKind, mb), refState(RefState::Unreferenced),
164781ad6265SDimitry Andric       explicitlyLinked(explicitlyLinked), isBundleLoader(isBundleLoader) {
1648fe6060f1SDimitry Andric   assert(!isBundleLoader || !umbrella);
16495ffd83dbSDimitry Andric   if (umbrella == nullptr)
16505ffd83dbSDimitry Andric     umbrella = this;
1651fe6060f1SDimitry Andric   this->umbrella = umbrella;
16525ffd83dbSDimitry Andric 
1653fe6060f1SDimitry Andric   auto *hdr = reinterpret_cast<const mach_header *>(mb.getBufferStart());
16545ffd83dbSDimitry Andric 
1655fe6060f1SDimitry Andric   // Initialize installName.
16565ffd83dbSDimitry Andric   if (const load_command *cmd = findCommand(hdr, LC_ID_DYLIB)) {
16575ffd83dbSDimitry Andric     auto *c = reinterpret_cast<const dylib_command *>(cmd);
1658e8d8bef9SDimitry Andric     currentVersion = read32le(&c->dylib.current_version);
1659e8d8bef9SDimitry Andric     compatibilityVersion = read32le(&c->dylib.compatibility_version);
1660fe6060f1SDimitry Andric     installName =
1661fe6060f1SDimitry Andric         reinterpret_cast<const char *>(cmd) + read32le(&c->dylib.name);
1662fe6060f1SDimitry Andric   } else if (!isBundleLoader) {
1663fe6060f1SDimitry Andric     // macho_executable and macho_bundle don't have LC_ID_DYLIB,
1664fe6060f1SDimitry Andric     // so it's OK.
1665bdd1243dSDimitry Andric     error(toString(this) + ": dylib missing LC_ID_DYLIB load command");
16665ffd83dbSDimitry Andric     return;
16675ffd83dbSDimitry Andric   }
16685ffd83dbSDimitry Andric 
1669fe6060f1SDimitry Andric   if (config->printEachFile)
1670fe6060f1SDimitry Andric     message(toString(this));
1671fe6060f1SDimitry Andric   inputFiles.insert(this);
1672fe6060f1SDimitry Andric 
1673fe6060f1SDimitry Andric   deadStrippable = hdr->flags & MH_DEAD_STRIPPABLE_DYLIB;
1674fe6060f1SDimitry Andric 
1675fe6060f1SDimitry Andric   if (!checkCompatibility(this))
1676fe6060f1SDimitry Andric     return;
1677fe6060f1SDimitry Andric 
1678fe6060f1SDimitry Andric   checkAppExtensionSafety(hdr->flags & MH_APP_EXTENSION_SAFE);
1679fe6060f1SDimitry Andric 
1680fe6060f1SDimitry Andric   for (auto *cmd : findCommands<rpath_command>(hdr, LC_RPATH)) {
1681fe6060f1SDimitry Andric     StringRef rpath{reinterpret_cast<const char *>(cmd) + cmd->path};
1682fe6060f1SDimitry Andric     rpaths.push_back(rpath);
1683fe6060f1SDimitry Andric   }
1684fe6060f1SDimitry Andric 
16855ffd83dbSDimitry Andric   // Initialize symbols.
1686fe6060f1SDimitry Andric   exportingFile = isImplicitlyLinked(installName) ? this : this->umbrella;
1687753f127fSDimitry Andric 
1688753f127fSDimitry Andric   const auto *dyldInfo = findCommand<dyld_info_command>(hdr, LC_DYLD_INFO_ONLY);
1689753f127fSDimitry Andric   const auto *exportsTrie =
1690753f127fSDimitry Andric       findCommand<linkedit_data_command>(hdr, LC_DYLD_EXPORTS_TRIE);
1691753f127fSDimitry Andric   if (dyldInfo && exportsTrie) {
1692753f127fSDimitry Andric     // It's unclear what should happen in this case. Maybe we should only error
1693753f127fSDimitry Andric     // out if the two load commands refer to different data?
1694bdd1243dSDimitry Andric     error(toString(this) +
1695bdd1243dSDimitry Andric           ": dylib has both LC_DYLD_INFO_ONLY and LC_DYLD_EXPORTS_TRIE");
1696753f127fSDimitry Andric     return;
1697*06c3fb27SDimitry Andric   }
1698*06c3fb27SDimitry Andric 
1699*06c3fb27SDimitry Andric   if (dyldInfo) {
1700753f127fSDimitry Andric     parseExportedSymbols(dyldInfo->export_off, dyldInfo->export_size);
1701753f127fSDimitry Andric   } else if (exportsTrie) {
1702753f127fSDimitry Andric     parseExportedSymbols(exportsTrie->dataoff, exportsTrie->datasize);
1703753f127fSDimitry Andric   } else {
1704753f127fSDimitry Andric     error("No LC_DYLD_INFO_ONLY or LC_DYLD_EXPORTS_TRIE found in " +
1705753f127fSDimitry Andric           toString(this));
1706753f127fSDimitry Andric   }
1707753f127fSDimitry Andric }
1708753f127fSDimitry Andric 
1709753f127fSDimitry Andric void DylibFile::parseExportedSymbols(uint32_t offset, uint32_t size) {
17100eae32dcSDimitry Andric   struct TrieEntry {
17110eae32dcSDimitry Andric     StringRef name;
17120eae32dcSDimitry Andric     uint64_t flags;
17130eae32dcSDimitry Andric   };
17140eae32dcSDimitry Andric 
1715753f127fSDimitry Andric   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
17160eae32dcSDimitry Andric   std::vector<TrieEntry> entries;
17170eae32dcSDimitry Andric   // Find all the $ld$* symbols to process first.
1718753f127fSDimitry Andric   parseTrie(buf + offset, size, [&](const Twine &name, uint64_t flags) {
171904eeddc0SDimitry Andric     StringRef savedName = saver().save(name);
1720fe6060f1SDimitry Andric     if (handleLDSymbol(savedName))
1721fe6060f1SDimitry Andric       return;
17220eae32dcSDimitry Andric     entries.push_back({savedName, flags});
17235ffd83dbSDimitry Andric   });
17240eae32dcSDimitry Andric 
17250eae32dcSDimitry Andric   // Process the "normal" symbols.
17260eae32dcSDimitry Andric   for (TrieEntry &entry : entries) {
1727753f127fSDimitry Andric     if (exportingFile->hiddenSymbols.contains(CachedHashStringRef(entry.name)))
17280eae32dcSDimitry Andric       continue;
17290eae32dcSDimitry Andric 
17300eae32dcSDimitry Andric     bool isWeakDef = entry.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
17310eae32dcSDimitry Andric     bool isTlv = entry.flags & EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
17320eae32dcSDimitry Andric 
17330eae32dcSDimitry Andric     symbols.push_back(
17340eae32dcSDimitry Andric         symtab->addDylib(entry.name, exportingFile, isWeakDef, isTlv));
17350eae32dcSDimitry Andric   }
1736fe6060f1SDimitry Andric }
17375ffd83dbSDimitry Andric 
1738fe6060f1SDimitry Andric void DylibFile::parseLoadCommands(MemoryBufferRef mb) {
1739fe6060f1SDimitry Andric   auto *hdr = reinterpret_cast<const mach_header *>(mb.getBufferStart());
1740fe6060f1SDimitry Andric   const uint8_t *p = reinterpret_cast<const uint8_t *>(mb.getBufferStart()) +
1741fe6060f1SDimitry Andric                      target->headerSize;
17425ffd83dbSDimitry Andric   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
17435ffd83dbSDimitry Andric     auto *cmd = reinterpret_cast<const load_command *>(p);
17445ffd83dbSDimitry Andric     p += cmd->cmdsize;
17455ffd83dbSDimitry Andric 
1746fe6060f1SDimitry Andric     if (!(hdr->flags & MH_NO_REEXPORTED_DYLIBS) &&
1747fe6060f1SDimitry Andric         cmd->cmd == LC_REEXPORT_DYLIB) {
1748fe6060f1SDimitry Andric       const auto *c = reinterpret_cast<const dylib_command *>(cmd);
17495ffd83dbSDimitry Andric       StringRef reexportPath =
17505ffd83dbSDimitry Andric           reinterpret_cast<const char *>(c) + read32le(&c->dylib.name);
1751fe6060f1SDimitry Andric       loadReexport(reexportPath, exportingFile, nullptr);
1752fe6060f1SDimitry Andric     }
1753fe6060f1SDimitry Andric 
1754fe6060f1SDimitry Andric     // FIXME: What about LC_LOAD_UPWARD_DYLIB, LC_LAZY_LOAD_DYLIB,
1755fe6060f1SDimitry Andric     // LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB (..are reexports from dylibs with
1756fe6060f1SDimitry Andric     // MH_NO_REEXPORTED_DYLIBS loaded for -flat_namespace)?
1757fe6060f1SDimitry Andric     if (config->namespaceKind == NamespaceKind::flat &&
1758fe6060f1SDimitry Andric         cmd->cmd == LC_LOAD_DYLIB) {
1759fe6060f1SDimitry Andric       const auto *c = reinterpret_cast<const dylib_command *>(cmd);
1760fe6060f1SDimitry Andric       StringRef dylibPath =
1761fe6060f1SDimitry Andric           reinterpret_cast<const char *>(c) + read32le(&c->dylib.name);
1762fe6060f1SDimitry Andric       DylibFile *dylib = findDylib(dylibPath, umbrella, nullptr);
1763fe6060f1SDimitry Andric       if (!dylib)
1764fe6060f1SDimitry Andric         error(Twine("unable to locate library '") + dylibPath +
1765fe6060f1SDimitry Andric               "' loaded from '" + toString(this) + "' for -flat_namespace");
1766fe6060f1SDimitry Andric     }
17675ffd83dbSDimitry Andric   }
17685ffd83dbSDimitry Andric }
17695ffd83dbSDimitry Andric 
177081ad6265SDimitry Andric // Some versions of Xcode ship with .tbd files that don't have the right
1771fe6060f1SDimitry Andric // platform settings.
177281ad6265SDimitry Andric constexpr std::array<StringRef, 3> skipPlatformChecks{
1773fe6060f1SDimitry Andric     "/usr/lib/system/libsystem_kernel.dylib",
1774fe6060f1SDimitry Andric     "/usr/lib/system/libsystem_platform.dylib",
1775fe6060f1SDimitry Andric     "/usr/lib/system/libsystem_pthread.dylib"};
1776fe6060f1SDimitry Andric 
177781ad6265SDimitry Andric static bool skipPlatformCheckForCatalyst(const InterfaceFile &interface,
177881ad6265SDimitry Andric                                          bool explicitlyLinked) {
177981ad6265SDimitry Andric   // Catalyst outputs can link against implicitly linked macOS-only libraries.
178081ad6265SDimitry Andric   if (config->platform() != PLATFORM_MACCATALYST || explicitlyLinked)
178181ad6265SDimitry Andric     return false;
178281ad6265SDimitry Andric   return is_contained(interface.targets(),
178381ad6265SDimitry Andric                       MachO::Target(config->arch(), PLATFORM_MACOS));
178481ad6265SDimitry Andric }
178581ad6265SDimitry Andric 
1786bdd1243dSDimitry Andric static bool isArchABICompatible(ArchitectureSet archSet,
1787bdd1243dSDimitry Andric                                 Architecture targetArch) {
1788bdd1243dSDimitry Andric   uint32_t cpuType;
1789bdd1243dSDimitry Andric   uint32_t targetCpuType;
1790bdd1243dSDimitry Andric   std::tie(targetCpuType, std::ignore) = getCPUTypeFromArchitecture(targetArch);
1791bdd1243dSDimitry Andric 
1792bdd1243dSDimitry Andric   return llvm::any_of(archSet, [&](const auto &p) {
1793bdd1243dSDimitry Andric     std::tie(cpuType, std::ignore) = getCPUTypeFromArchitecture(p);
1794bdd1243dSDimitry Andric     return cpuType == targetCpuType;
1795bdd1243dSDimitry Andric   });
1796bdd1243dSDimitry Andric }
1797bdd1243dSDimitry Andric 
1798bdd1243dSDimitry Andric static bool isTargetPlatformArchCompatible(
1799bdd1243dSDimitry Andric     InterfaceFile::const_target_range interfaceTargets, Target target) {
1800bdd1243dSDimitry Andric   if (is_contained(interfaceTargets, target))
1801bdd1243dSDimitry Andric     return true;
1802bdd1243dSDimitry Andric 
1803bdd1243dSDimitry Andric   if (config->forceExactCpuSubtypeMatch)
1804bdd1243dSDimitry Andric     return false;
1805bdd1243dSDimitry Andric 
1806bdd1243dSDimitry Andric   ArchitectureSet archSet;
1807bdd1243dSDimitry Andric   for (const auto &p : interfaceTargets)
1808bdd1243dSDimitry Andric     if (p.Platform == target.Platform)
1809bdd1243dSDimitry Andric       archSet.set(p.Arch);
1810bdd1243dSDimitry Andric   if (archSet.empty())
1811bdd1243dSDimitry Andric     return false;
1812bdd1243dSDimitry Andric 
1813bdd1243dSDimitry Andric   return isArchABICompatible(archSet, target.Arch);
1814bdd1243dSDimitry Andric }
1815bdd1243dSDimitry Andric 
1816fe6060f1SDimitry Andric DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella,
181781ad6265SDimitry Andric                      bool isBundleLoader, bool explicitlyLinked)
1818fe6060f1SDimitry Andric     : InputFile(DylibKind, interface), refState(RefState::Unreferenced),
181981ad6265SDimitry Andric       explicitlyLinked(explicitlyLinked), isBundleLoader(isBundleLoader) {
1820fe6060f1SDimitry Andric   // FIXME: Add test for the missing TBD code path.
1821fe6060f1SDimitry Andric 
18225ffd83dbSDimitry Andric   if (umbrella == nullptr)
18235ffd83dbSDimitry Andric     umbrella = this;
1824fe6060f1SDimitry Andric   this->umbrella = umbrella;
18255ffd83dbSDimitry Andric 
182604eeddc0SDimitry Andric   installName = saver().save(interface.getInstallName());
1827e8d8bef9SDimitry Andric   compatibilityVersion = interface.getCompatibilityVersion().rawValue();
1828e8d8bef9SDimitry Andric   currentVersion = interface.getCurrentVersion().rawValue();
1829fe6060f1SDimitry Andric 
1830fe6060f1SDimitry Andric   if (config->printEachFile)
1831fe6060f1SDimitry Andric     message(toString(this));
1832fe6060f1SDimitry Andric   inputFiles.insert(this);
1833fe6060f1SDimitry Andric 
1834fe6060f1SDimitry Andric   if (!is_contained(skipPlatformChecks, installName) &&
1835bdd1243dSDimitry Andric       !isTargetPlatformArchCompatible(interface.targets(),
1836bdd1243dSDimitry Andric                                       config->platformInfo.target) &&
183781ad6265SDimitry Andric       !skipPlatformCheckForCatalyst(interface, explicitlyLinked)) {
1838fe6060f1SDimitry Andric     error(toString(this) + " is incompatible with " +
1839fe6060f1SDimitry Andric           std::string(config->platformInfo.target));
1840fe6060f1SDimitry Andric     return;
1841fe6060f1SDimitry Andric   }
1842fe6060f1SDimitry Andric 
1843fe6060f1SDimitry Andric   checkAppExtensionSafety(interface.isApplicationExtensionSafe());
1844fe6060f1SDimitry Andric 
1845fe6060f1SDimitry Andric   exportingFile = isImplicitlyLinked(installName) ? this : umbrella;
1846bdd1243dSDimitry Andric   auto addSymbol = [&](const llvm::MachO::Symbol &symbol,
1847bdd1243dSDimitry Andric                        const Twine &name) -> void {
184804eeddc0SDimitry Andric     StringRef savedName = saver().save(name);
18490eae32dcSDimitry Andric     if (exportingFile->hiddenSymbols.contains(CachedHashStringRef(savedName)))
18500eae32dcSDimitry Andric       return;
18510eae32dcSDimitry Andric 
18520eae32dcSDimitry Andric     symbols.push_back(symtab->addDylib(savedName, exportingFile,
1853bdd1243dSDimitry Andric                                        symbol.isWeakDefined(),
1854bdd1243dSDimitry Andric                                        symbol.isThreadLocalValue()));
1855e8d8bef9SDimitry Andric   };
18560eae32dcSDimitry Andric 
18570eae32dcSDimitry Andric   std::vector<const llvm::MachO::Symbol *> normalSymbols;
18580eae32dcSDimitry Andric   normalSymbols.reserve(interface.symbolsCount());
1859fe6060f1SDimitry Andric   for (const auto *symbol : interface.symbols()) {
1860bdd1243dSDimitry Andric     if (!isArchABICompatible(symbol->getArchitectures(), config->arch()))
1861fe6060f1SDimitry Andric       continue;
1862fe6060f1SDimitry Andric     if (handleLDSymbol(symbol->getName()))
1863e8d8bef9SDimitry Andric       continue;
1864e8d8bef9SDimitry Andric 
1865e8d8bef9SDimitry Andric     switch (symbol->getKind()) {
1866bdd1243dSDimitry Andric     case SymbolKind::GlobalSymbol:
1867bdd1243dSDimitry Andric     case SymbolKind::ObjectiveCClass:
1868bdd1243dSDimitry Andric     case SymbolKind::ObjectiveCClassEHType:
1869bdd1243dSDimitry Andric     case SymbolKind::ObjectiveCInstanceVariable:
18700eae32dcSDimitry Andric       normalSymbols.push_back(symbol);
18710eae32dcSDimitry Andric     }
18720eae32dcSDimitry Andric   }
18730eae32dcSDimitry Andric 
18740eae32dcSDimitry Andric   // TODO(compnerd) filter out symbols based on the target platform
18750eae32dcSDimitry Andric   for (const auto *symbol : normalSymbols) {
18760eae32dcSDimitry Andric     switch (symbol->getKind()) {
1877e8d8bef9SDimitry Andric     case SymbolKind::GlobalSymbol:
1878bdd1243dSDimitry Andric       addSymbol(*symbol, symbol->getName());
1879e8d8bef9SDimitry Andric       break;
1880e8d8bef9SDimitry Andric     case SymbolKind::ObjectiveCClass:
1881e8d8bef9SDimitry Andric       // XXX ld64 only creates these symbols when -ObjC is passed in. We may
1882e8d8bef9SDimitry Andric       // want to emulate that.
1883bdd1243dSDimitry Andric       addSymbol(*symbol, objc::klass + symbol->getName());
1884bdd1243dSDimitry Andric       addSymbol(*symbol, objc::metaclass + symbol->getName());
1885e8d8bef9SDimitry Andric       break;
1886e8d8bef9SDimitry Andric     case SymbolKind::ObjectiveCClassEHType:
1887bdd1243dSDimitry Andric       addSymbol(*symbol, objc::ehtype + symbol->getName());
1888e8d8bef9SDimitry Andric       break;
1889e8d8bef9SDimitry Andric     case SymbolKind::ObjectiveCInstanceVariable:
1890bdd1243dSDimitry Andric       addSymbol(*symbol, objc::ivar + symbol->getName());
1891e8d8bef9SDimitry Andric       break;
1892e8d8bef9SDimitry Andric     }
18935ffd83dbSDimitry Andric   }
1894e8d8bef9SDimitry Andric }
1895e8d8bef9SDimitry Andric 
189661cfbce3SDimitry Andric DylibFile::DylibFile(DylibFile *umbrella)
189761cfbce3SDimitry Andric     : InputFile(DylibKind, MemoryBufferRef{}), refState(RefState::Unreferenced),
189861cfbce3SDimitry Andric       explicitlyLinked(false), isBundleLoader(false) {
189961cfbce3SDimitry Andric   if (umbrella == nullptr)
190061cfbce3SDimitry Andric     umbrella = this;
190161cfbce3SDimitry Andric   this->umbrella = umbrella;
190261cfbce3SDimitry Andric }
190361cfbce3SDimitry Andric 
1904fe6060f1SDimitry Andric void DylibFile::parseReexports(const InterfaceFile &interface) {
1905fe6060f1SDimitry Andric   const InterfaceFile *topLevel =
1906fe6060f1SDimitry Andric       interface.getParent() == nullptr ? &interface : interface.getParent();
1907349cc55cSDimitry Andric   for (const InterfaceFileRef &intfRef : interface.reexportedLibraries()) {
1908fe6060f1SDimitry Andric     InterfaceFile::const_target_range targets = intfRef.targets();
1909fe6060f1SDimitry Andric     if (is_contained(skipPlatformChecks, intfRef.getInstallName()) ||
1910bdd1243dSDimitry Andric         isTargetPlatformArchCompatible(targets, config->platformInfo.target))
1911fe6060f1SDimitry Andric       loadReexport(intfRef.getInstallName(), exportingFile, topLevel);
1912fe6060f1SDimitry Andric   }
1913fe6060f1SDimitry Andric }
1914e8d8bef9SDimitry Andric 
191561cfbce3SDimitry Andric bool DylibFile::isExplicitlyLinked() const {
191661cfbce3SDimitry Andric   if (!explicitlyLinked)
191761cfbce3SDimitry Andric     return false;
191861cfbce3SDimitry Andric 
191961cfbce3SDimitry Andric   // If this dylib was explicitly linked, but at least one of the symbols
192061cfbce3SDimitry Andric   // of the synthetic dylibs it created via $ld$previous symbols is
192161cfbce3SDimitry Andric   // referenced, then that synthetic dylib fulfils the explicit linkedness
192261cfbce3SDimitry Andric   // and we can deadstrip this dylib if it's unreferenced.
192361cfbce3SDimitry Andric   for (const auto *dylib : extraDylibs)
192461cfbce3SDimitry Andric     if (dylib->isReferenced())
192561cfbce3SDimitry Andric       return false;
192661cfbce3SDimitry Andric 
192761cfbce3SDimitry Andric   return true;
192861cfbce3SDimitry Andric }
192961cfbce3SDimitry Andric 
193061cfbce3SDimitry Andric DylibFile *DylibFile::getSyntheticDylib(StringRef installName,
193161cfbce3SDimitry Andric                                         uint32_t currentVersion,
193261cfbce3SDimitry Andric                                         uint32_t compatVersion) {
193361cfbce3SDimitry Andric   for (DylibFile *dylib : extraDylibs)
193461cfbce3SDimitry Andric     if (dylib->installName == installName) {
193561cfbce3SDimitry Andric       // FIXME: Check what to do if different $ld$previous symbols
193661cfbce3SDimitry Andric       // request the same dylib, but with different versions.
193761cfbce3SDimitry Andric       return dylib;
193861cfbce3SDimitry Andric     }
193961cfbce3SDimitry Andric 
194061cfbce3SDimitry Andric   auto *dylib = make<DylibFile>(umbrella == this ? nullptr : umbrella);
194161cfbce3SDimitry Andric   dylib->installName = saver().save(installName);
194261cfbce3SDimitry Andric   dylib->currentVersion = currentVersion;
194361cfbce3SDimitry Andric   dylib->compatibilityVersion = compatVersion;
194461cfbce3SDimitry Andric   extraDylibs.push_back(dylib);
194561cfbce3SDimitry Andric   return dylib;
194661cfbce3SDimitry Andric }
194761cfbce3SDimitry Andric 
1948fe6060f1SDimitry Andric // $ld$ symbols modify the properties/behavior of the library (e.g. its install
1949fe6060f1SDimitry Andric // name, compatibility version or hide/add symbols) for specific target
1950fe6060f1SDimitry Andric // versions.
1951fe6060f1SDimitry Andric bool DylibFile::handleLDSymbol(StringRef originalName) {
1952*06c3fb27SDimitry Andric   if (!originalName.starts_with("$ld$"))
1953fe6060f1SDimitry Andric     return false;
1954fe6060f1SDimitry Andric 
1955fe6060f1SDimitry Andric   StringRef action;
1956fe6060f1SDimitry Andric   StringRef name;
1957fe6060f1SDimitry Andric   std::tie(action, name) = originalName.drop_front(strlen("$ld$")).split('$');
1958fe6060f1SDimitry Andric   if (action == "previous")
1959fe6060f1SDimitry Andric     handleLDPreviousSymbol(name, originalName);
1960fe6060f1SDimitry Andric   else if (action == "install_name")
1961fe6060f1SDimitry Andric     handleLDInstallNameSymbol(name, originalName);
19620eae32dcSDimitry Andric   else if (action == "hide")
19630eae32dcSDimitry Andric     handleLDHideSymbol(name, originalName);
1964fe6060f1SDimitry Andric   return true;
1965fe6060f1SDimitry Andric }
1966fe6060f1SDimitry Andric 
1967fe6060f1SDimitry Andric void DylibFile::handleLDPreviousSymbol(StringRef name, StringRef originalName) {
1968fe6060f1SDimitry Andric   // originalName: $ld$ previous $ <installname> $ <compatversion> $
1969fe6060f1SDimitry Andric   // <platformstr> $ <startversion> $ <endversion> $ <symbol-name> $
1970fe6060f1SDimitry Andric   StringRef installName;
1971fe6060f1SDimitry Andric   StringRef compatVersion;
1972fe6060f1SDimitry Andric   StringRef platformStr;
1973fe6060f1SDimitry Andric   StringRef startVersion;
1974fe6060f1SDimitry Andric   StringRef endVersion;
1975fe6060f1SDimitry Andric   StringRef symbolName;
1976fe6060f1SDimitry Andric   StringRef rest;
1977fe6060f1SDimitry Andric 
1978fe6060f1SDimitry Andric   std::tie(installName, name) = name.split('$');
1979fe6060f1SDimitry Andric   std::tie(compatVersion, name) = name.split('$');
1980fe6060f1SDimitry Andric   std::tie(platformStr, name) = name.split('$');
1981fe6060f1SDimitry Andric   std::tie(startVersion, name) = name.split('$');
1982fe6060f1SDimitry Andric   std::tie(endVersion, name) = name.split('$');
198361cfbce3SDimitry Andric   std::tie(symbolName, rest) = name.rsplit('$');
198461cfbce3SDimitry Andric 
198561cfbce3SDimitry Andric   // FIXME: Does this do the right thing for zippered files?
1986fe6060f1SDimitry Andric   unsigned platform;
1987fe6060f1SDimitry Andric   if (platformStr.getAsInteger(10, platform) ||
1988fe6060f1SDimitry Andric       platform != static_cast<unsigned>(config->platform()))
1989fe6060f1SDimitry Andric     return;
1990fe6060f1SDimitry Andric 
1991fe6060f1SDimitry Andric   VersionTuple start;
1992fe6060f1SDimitry Andric   if (start.tryParse(startVersion)) {
1993bdd1243dSDimitry Andric     warn(toString(this) + ": failed to parse start version, symbol '" +
1994bdd1243dSDimitry Andric          originalName + "' ignored");
1995fe6060f1SDimitry Andric     return;
1996fe6060f1SDimitry Andric   }
1997fe6060f1SDimitry Andric   VersionTuple end;
1998fe6060f1SDimitry Andric   if (end.tryParse(endVersion)) {
1999bdd1243dSDimitry Andric     warn(toString(this) + ": failed to parse end version, symbol '" +
2000bdd1243dSDimitry Andric          originalName + "' ignored");
2001fe6060f1SDimitry Andric     return;
2002fe6060f1SDimitry Andric   }
2003*06c3fb27SDimitry Andric   if (config->platformInfo.target.MinDeployment < start ||
2004*06c3fb27SDimitry Andric       config->platformInfo.target.MinDeployment >= end)
2005fe6060f1SDimitry Andric     return;
2006fe6060f1SDimitry Andric 
200761cfbce3SDimitry Andric   // Initialized to compatibilityVersion for the symbolName branch below.
200861cfbce3SDimitry Andric   uint32_t newCompatibilityVersion = compatibilityVersion;
200961cfbce3SDimitry Andric   uint32_t newCurrentVersionForSymbol = currentVersion;
2010fe6060f1SDimitry Andric   if (!compatVersion.empty()) {
2011fe6060f1SDimitry Andric     VersionTuple cVersion;
2012fe6060f1SDimitry Andric     if (cVersion.tryParse(compatVersion)) {
2013bdd1243dSDimitry Andric       warn(toString(this) +
2014bdd1243dSDimitry Andric            ": failed to parse compatibility version, symbol '" + originalName +
2015fe6060f1SDimitry Andric            "' ignored");
2016fe6060f1SDimitry Andric       return;
2017fe6060f1SDimitry Andric     }
201861cfbce3SDimitry Andric     newCompatibilityVersion = encodeVersion(cVersion);
201961cfbce3SDimitry Andric     newCurrentVersionForSymbol = newCompatibilityVersion;
2020fe6060f1SDimitry Andric   }
202161cfbce3SDimitry Andric 
202261cfbce3SDimitry Andric   if (!symbolName.empty()) {
202361cfbce3SDimitry Andric     // A $ld$previous$ symbol with symbol name adds a symbol with that name to
202461cfbce3SDimitry Andric     // a dylib with given name and version.
202561cfbce3SDimitry Andric     auto *dylib = getSyntheticDylib(installName, newCurrentVersionForSymbol,
202661cfbce3SDimitry Andric                                     newCompatibilityVersion);
202761cfbce3SDimitry Andric 
2028bdd1243dSDimitry Andric     // The tbd file usually contains the $ld$previous symbol for an old version,
2029bdd1243dSDimitry Andric     // and then the symbol itself later, for newer deployment targets, like so:
2030bdd1243dSDimitry Andric     //    symbols: [
2031bdd1243dSDimitry Andric     //      '$ld$previous$/Another$$1$3.0$14.0$_zzz$',
2032bdd1243dSDimitry Andric     //      _zzz,
2033bdd1243dSDimitry Andric     //    ]
2034bdd1243dSDimitry Andric     // Since the symbols are sorted, adding them to the symtab in the given
2035bdd1243dSDimitry Andric     // order means the $ld$previous version of _zzz will prevail, as desired.
203661cfbce3SDimitry Andric     dylib->symbols.push_back(symtab->addDylib(
203761cfbce3SDimitry Andric         saver().save(symbolName), dylib, /*isWeakDef=*/false, /*isTlv=*/false));
203861cfbce3SDimitry Andric     return;
203961cfbce3SDimitry Andric   }
204061cfbce3SDimitry Andric 
204161cfbce3SDimitry Andric   // A $ld$previous$ symbol without symbol name modifies the dylib it's in.
204261cfbce3SDimitry Andric   this->installName = saver().save(installName);
204361cfbce3SDimitry Andric   this->compatibilityVersion = newCompatibilityVersion;
2044fe6060f1SDimitry Andric }
2045fe6060f1SDimitry Andric 
2046fe6060f1SDimitry Andric void DylibFile::handleLDInstallNameSymbol(StringRef name,
2047fe6060f1SDimitry Andric                                           StringRef originalName) {
2048fe6060f1SDimitry Andric   // originalName: $ld$ install_name $ os<version> $ install_name
2049fe6060f1SDimitry Andric   StringRef condition, installName;
2050fe6060f1SDimitry Andric   std::tie(condition, installName) = name.split('$');
2051fe6060f1SDimitry Andric   VersionTuple version;
2052fe6060f1SDimitry Andric   if (!condition.consume_front("os") || version.tryParse(condition))
2053bdd1243dSDimitry Andric     warn(toString(this) + ": failed to parse os version, symbol '" +
2054bdd1243dSDimitry Andric          originalName + "' ignored");
2055*06c3fb27SDimitry Andric   else if (version == config->platformInfo.target.MinDeployment)
205604eeddc0SDimitry Andric     this->installName = saver().save(installName);
2057fe6060f1SDimitry Andric }
2058fe6060f1SDimitry Andric 
20590eae32dcSDimitry Andric void DylibFile::handleLDHideSymbol(StringRef name, StringRef originalName) {
20600eae32dcSDimitry Andric   StringRef symbolName;
20610eae32dcSDimitry Andric   bool shouldHide = true;
2062*06c3fb27SDimitry Andric   if (name.starts_with("os")) {
20630eae32dcSDimitry Andric     // If it's hidden based on versions.
20640eae32dcSDimitry Andric     name = name.drop_front(2);
20650eae32dcSDimitry Andric     StringRef minVersion;
20660eae32dcSDimitry Andric     std::tie(minVersion, symbolName) = name.split('$');
20670eae32dcSDimitry Andric     VersionTuple versionTup;
20680eae32dcSDimitry Andric     if (versionTup.tryParse(minVersion)) {
2069bdd1243dSDimitry Andric       warn(toString(this) + ": failed to parse hidden version, symbol `" + originalName +
20700eae32dcSDimitry Andric            "` ignored.");
20710eae32dcSDimitry Andric       return;
20720eae32dcSDimitry Andric     }
2073*06c3fb27SDimitry Andric     shouldHide = versionTup == config->platformInfo.target.MinDeployment;
20740eae32dcSDimitry Andric   } else {
20750eae32dcSDimitry Andric     symbolName = name;
20760eae32dcSDimitry Andric   }
20770eae32dcSDimitry Andric 
20780eae32dcSDimitry Andric   if (shouldHide)
20790eae32dcSDimitry Andric     exportingFile->hiddenSymbols.insert(CachedHashStringRef(symbolName));
20800eae32dcSDimitry Andric }
20810eae32dcSDimitry Andric 
2082fe6060f1SDimitry Andric void DylibFile::checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const {
2083fe6060f1SDimitry Andric   if (config->applicationExtension && !dylibIsAppExtensionSafe)
2084fe6060f1SDimitry Andric     warn("using '-application_extension' with unsafe dylib: " + toString(this));
2085e8d8bef9SDimitry Andric }
2086e8d8bef9SDimitry Andric 
2087972a253aSDimitry Andric ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f, bool forceHidden)
2088972a253aSDimitry Andric     : InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)),
2089972a253aSDimitry Andric       forceHidden(forceHidden) {}
2090349cc55cSDimitry Andric 
2091349cc55cSDimitry Andric void ArchiveFile::addLazySymbols() {
20925ffd83dbSDimitry Andric   for (const object::Archive::Symbol &sym : file->symbols())
209304eeddc0SDimitry Andric     symtab->addLazyArchive(sym.getName(), this, sym);
20945ffd83dbSDimitry Andric }
20955ffd83dbSDimitry Andric 
2096972a253aSDimitry Andric static Expected<InputFile *>
2097972a253aSDimitry Andric loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
2098972a253aSDimitry Andric                   uint64_t offsetInArchive, bool forceHidden) {
2099349cc55cSDimitry Andric   if (config->zeroModTime)
2100349cc55cSDimitry Andric     modTime = 0;
2101349cc55cSDimitry Andric 
2102349cc55cSDimitry Andric   switch (identify_magic(mb.getBuffer())) {
2103349cc55cSDimitry Andric   case file_magic::macho_object:
2104972a253aSDimitry Andric     return make<ObjFile>(mb, modTime, archiveName, /*lazy=*/false, forceHidden);
2105349cc55cSDimitry Andric   case file_magic::bitcode:
2106972a253aSDimitry Andric     return make<BitcodeFile>(mb, archiveName, offsetInArchive, /*lazy=*/false,
2107972a253aSDimitry Andric                              forceHidden);
2108349cc55cSDimitry Andric   default:
2109349cc55cSDimitry Andric     return createStringError(inconvertibleErrorCode(),
2110349cc55cSDimitry Andric                              mb.getBufferIdentifier() +
2111349cc55cSDimitry Andric                                  " has unhandled file type");
2112349cc55cSDimitry Andric   }
2113349cc55cSDimitry Andric }
2114349cc55cSDimitry Andric 
2115349cc55cSDimitry Andric Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
2116349cc55cSDimitry Andric   if (!seen.insert(c.getChildOffset()).second)
2117349cc55cSDimitry Andric     return Error::success();
2118349cc55cSDimitry Andric 
2119349cc55cSDimitry Andric   Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
2120349cc55cSDimitry Andric   if (!mb)
2121349cc55cSDimitry Andric     return mb.takeError();
2122349cc55cSDimitry Andric 
2123349cc55cSDimitry Andric   // Thin archives refer to .o files, so --reproduce needs the .o files too.
2124349cc55cSDimitry Andric   if (tar && c.getParent()->isThin())
2125349cc55cSDimitry Andric     tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb->getBuffer());
2126349cc55cSDimitry Andric 
2127349cc55cSDimitry Andric   Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
2128349cc55cSDimitry Andric   if (!modTime)
2129349cc55cSDimitry Andric     return modTime.takeError();
2130349cc55cSDimitry Andric 
2131972a253aSDimitry Andric   Expected<InputFile *> file = loadArchiveMember(
2132972a253aSDimitry Andric       *mb, toTimeT(*modTime), getName(), c.getChildOffset(), forceHidden);
2133349cc55cSDimitry Andric 
2134349cc55cSDimitry Andric   if (!file)
2135349cc55cSDimitry Andric     return file.takeError();
2136349cc55cSDimitry Andric 
2137349cc55cSDimitry Andric   inputFiles.insert(*file);
2138349cc55cSDimitry Andric   printArchiveMemberLoad(reason, *file);
2139349cc55cSDimitry Andric   return Error::success();
2140349cc55cSDimitry Andric }
2141349cc55cSDimitry Andric 
21425ffd83dbSDimitry Andric void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
21435ffd83dbSDimitry Andric   object::Archive::Child c =
21445ffd83dbSDimitry Andric       CHECK(sym.getMember(), toString(this) +
2145349cc55cSDimitry Andric                                  ": could not get the member defining symbol " +
2146e8d8bef9SDimitry Andric                                  toMachOString(sym));
21475ffd83dbSDimitry Andric 
2148fe6060f1SDimitry Andric   // `sym` is owned by a LazySym, which will be replace<>()d by make<ObjFile>
2149e8d8bef9SDimitry Andric   // and become invalid after that call. Copy it to the stack so we can refer
2150e8d8bef9SDimitry Andric   // to it later.
2151fe6060f1SDimitry Andric   const object::Archive::Symbol symCopy = sym;
2152e8d8bef9SDimitry Andric 
2153fe6060f1SDimitry Andric   // ld64 doesn't demangle sym here even with -demangle.
2154fe6060f1SDimitry Andric   // Match that: intentionally don't call toMachOString().
2155349cc55cSDimitry Andric   if (Error e = fetch(c, symCopy.getName()))
2156349cc55cSDimitry Andric     error(toString(this) + ": could not get the member defining symbol " +
2157349cc55cSDimitry Andric           toMachOString(symCopy) + ": " + toString(std::move(e)));
21585ffd83dbSDimitry Andric }
21595ffd83dbSDimitry Andric 
2160fe6060f1SDimitry Andric static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
2161fe6060f1SDimitry Andric                                           BitcodeFile &file) {
216204eeddc0SDimitry Andric   StringRef name = saver().save(objSym.getName());
2163fe6060f1SDimitry Andric 
2164fe6060f1SDimitry Andric   if (objSym.isUndefined())
21650eae32dcSDimitry Andric     return symtab->addUndefined(name, &file, /*isWeakRef=*/objSym.isWeak());
2166fe6060f1SDimitry Andric 
2167fe6060f1SDimitry Andric   // TODO: Write a test demonstrating why computing isPrivateExtern before
2168fe6060f1SDimitry Andric   // LTO compilation is important.
2169fe6060f1SDimitry Andric   bool isPrivateExtern = false;
2170fe6060f1SDimitry Andric   switch (objSym.getVisibility()) {
2171fe6060f1SDimitry Andric   case GlobalValue::HiddenVisibility:
2172fe6060f1SDimitry Andric     isPrivateExtern = true;
2173fe6060f1SDimitry Andric     break;
2174fe6060f1SDimitry Andric   case GlobalValue::ProtectedVisibility:
2175fe6060f1SDimitry Andric     error(name + " has protected visibility, which is not supported by Mach-O");
2176fe6060f1SDimitry Andric     break;
2177fe6060f1SDimitry Andric   case GlobalValue::DefaultVisibility:
2178fe6060f1SDimitry Andric     break;
2179fe6060f1SDimitry Andric   }
2180972a253aSDimitry Andric   isPrivateExtern = isPrivateExtern || objSym.canBeOmittedFromSymbolTable() ||
2181972a253aSDimitry Andric                     file.forceHidden;
2182fe6060f1SDimitry Andric 
2183349cc55cSDimitry Andric   if (objSym.isCommon())
2184349cc55cSDimitry Andric     return symtab->addCommon(name, &file, objSym.getCommonSize(),
2185349cc55cSDimitry Andric                              objSym.getCommonAlignment(), isPrivateExtern);
2186349cc55cSDimitry Andric 
2187fe6060f1SDimitry Andric   return symtab->addDefined(name, &file, /*isec=*/nullptr, /*value=*/0,
2188fe6060f1SDimitry Andric                             /*size=*/0, objSym.isWeak(), isPrivateExtern,
2189fe6060f1SDimitry Andric                             /*isReferencedDynamically=*/false,
2190349cc55cSDimitry Andric                             /*noDeadStrip=*/false,
2191349cc55cSDimitry Andric                             /*isWeakDefCanBeHidden=*/false);
2192fe6060f1SDimitry Andric }
2193fe6060f1SDimitry Andric 
2194fe6060f1SDimitry Andric BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
2195972a253aSDimitry Andric                          uint64_t offsetInArchive, bool lazy, bool forceHidden)
2196972a253aSDimitry Andric     : InputFile(BitcodeKind, mb, lazy), forceHidden(forceHidden) {
21970eae32dcSDimitry Andric   this->archiveName = std::string(archiveName);
2198fe6060f1SDimitry Andric   std::string path = mb.getBufferIdentifier().str();
2199bdd1243dSDimitry Andric   if (config->thinLTOIndexOnly)
2200bdd1243dSDimitry Andric     path = replaceThinLTOSuffix(mb.getBufferIdentifier());
2201bdd1243dSDimitry Andric 
2202fe6060f1SDimitry Andric   // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
2203fe6060f1SDimitry Andric   // name. If two members with the same name are provided, this causes a
2204fe6060f1SDimitry Andric   // collision and ThinLTO can't proceed.
2205fe6060f1SDimitry Andric   // So, we append the archive name to disambiguate two members with the same
2206fe6060f1SDimitry Andric   // name from multiple different archives, and offset within the archive to
2207fe6060f1SDimitry Andric   // disambiguate two members of the same name from a single archive.
220804eeddc0SDimitry Andric   MemoryBufferRef mbref(mb.getBuffer(),
220904eeddc0SDimitry Andric                         saver().save(archiveName.empty()
221004eeddc0SDimitry Andric                                          ? path
2211*06c3fb27SDimitry Andric                                          : archiveName + "(" +
2212*06c3fb27SDimitry Andric                                                sys::path::filename(path) + ")" +
2213fe6060f1SDimitry Andric                                                utostr(offsetInArchive)));
2214e8d8bef9SDimitry Andric   obj = check(lto::InputFile::create(mbref));
221504eeddc0SDimitry Andric   if (lazy)
221604eeddc0SDimitry Andric     parseLazy();
221704eeddc0SDimitry Andric   else
221804eeddc0SDimitry Andric     parse();
221904eeddc0SDimitry Andric }
2220fe6060f1SDimitry Andric 
222104eeddc0SDimitry Andric void BitcodeFile::parse() {
2222fe6060f1SDimitry Andric   // Convert LTO Symbols to LLD Symbols in order to perform resolution. The
2223fe6060f1SDimitry Andric   // "winning" symbol will then be marked as Prevailing at LTO compilation
2224fe6060f1SDimitry Andric   // time.
222504eeddc0SDimitry Andric   symbols.clear();
2226fe6060f1SDimitry Andric   for (const lto::InputFile::Symbol &objSym : obj->symbols())
2227fe6060f1SDimitry Andric     symbols.push_back(createBitcodeSymbol(objSym, *this));
22285ffd83dbSDimitry Andric }
2229fe6060f1SDimitry Andric 
223004eeddc0SDimitry Andric void BitcodeFile::parseLazy() {
223104eeddc0SDimitry Andric   symbols.resize(obj->symbols().size());
2232bdd1243dSDimitry Andric   for (const auto &[i, objSym] : llvm::enumerate(obj->symbols())) {
223304eeddc0SDimitry Andric     if (!objSym.isUndefined()) {
2234bdd1243dSDimitry Andric       symbols[i] = symtab->addLazyObject(saver().save(objSym.getName()), *this);
223504eeddc0SDimitry Andric       if (!lazy)
223604eeddc0SDimitry Andric         break;
223704eeddc0SDimitry Andric     }
223804eeddc0SDimitry Andric   }
223904eeddc0SDimitry Andric }
224004eeddc0SDimitry Andric 
2241bdd1243dSDimitry Andric std::string macho::replaceThinLTOSuffix(StringRef path) {
2242bdd1243dSDimitry Andric   auto [suffix, repl] = config->thinLTOObjectSuffixReplace;
2243bdd1243dSDimitry Andric   if (path.consume_back(suffix))
2244bdd1243dSDimitry Andric     return (path + repl).str();
2245bdd1243dSDimitry Andric   return std::string(path);
2246bdd1243dSDimitry Andric }
2247bdd1243dSDimitry Andric 
224804eeddc0SDimitry Andric void macho::extract(InputFile &file, StringRef reason) {
2249bdd1243dSDimitry Andric   if (!file.lazy)
2250bdd1243dSDimitry Andric     return;
225104eeddc0SDimitry Andric   file.lazy = false;
2252bdd1243dSDimitry Andric 
225304eeddc0SDimitry Andric   printArchiveMemberLoad(reason, &file);
225404eeddc0SDimitry Andric   if (auto *bitcode = dyn_cast<BitcodeFile>(&file)) {
225504eeddc0SDimitry Andric     bitcode->parse();
225604eeddc0SDimitry Andric   } else {
225704eeddc0SDimitry Andric     auto &f = cast<ObjFile>(file);
225804eeddc0SDimitry Andric     if (target->wordSize == 8)
225904eeddc0SDimitry Andric       f.parse<LP64>();
226004eeddc0SDimitry Andric     else
226104eeddc0SDimitry Andric       f.parse<ILP32>();
226204eeddc0SDimitry Andric   }
226304eeddc0SDimitry Andric }
226404eeddc0SDimitry Andric 
2265fe6060f1SDimitry Andric template void ObjFile::parse<LP64>();
2266