xref: /freebsd/contrib/llvm-project/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- DylibReader.cpp -------------- TAPI MachO Dylib Reader --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// Implements the TAPI Reader for Mach-O dynamic libraries.
10 ///
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/TextAPI/DylibReader.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
16 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
17 #include "llvm/Object/Binary.h"
18 #include "llvm/Object/MachOUniversal.h"
19 #include "llvm/Support/Endian.h"
20 #include "llvm/TargetParser/Triple.h"
21 #include "llvm/TextAPI/InterfaceFile.h"
22 #include "llvm/TextAPI/RecordsSlice.h"
23 #include "llvm/TextAPI/TextAPIError.h"
24 #include <iomanip>
25 #include <set>
26 #include <sstream>
27 #include <string>
28 #include <tuple>
29 
30 using namespace llvm;
31 using namespace llvm::object;
32 using namespace llvm::MachO;
33 using namespace llvm::MachO::DylibReader;
34 
35 using TripleVec = std::vector<Triple>;
emplace(TripleVec & Container,Triple && T)36 static typename TripleVec::iterator emplace(TripleVec &Container, Triple &&T) {
37   auto I = partition_point(Container, [=](const Triple &CT) {
38     return std::forward_as_tuple(CT.getArch(), CT.getOS(),
39                                  CT.getEnvironment()) <
40            std::forward_as_tuple(T.getArch(), T.getOS(), T.getEnvironment());
41   });
42 
43   if (I != Container.end() && *I == T)
44     return I;
45   return Container.emplace(I, T);
46 }
47 
constructTriples(MachOObjectFile * Obj,const Architecture ArchT)48 static TripleVec constructTriples(MachOObjectFile *Obj,
49                                   const Architecture ArchT) {
50   auto getOSVersionStr = [](uint32_t V) {
51     PackedVersion OSVersion(V);
52     std::string Vers;
53     raw_string_ostream VStream(Vers);
54     VStream << OSVersion;
55     return VStream.str();
56   };
57   auto getOSVersion = [&](const MachOObjectFile::LoadCommandInfo &cmd) {
58     auto Vers = Obj->getVersionMinLoadCommand(cmd);
59     return getOSVersionStr(Vers.version);
60   };
61 
62   TripleVec Triples;
63   bool IsIntel = ArchitectureSet(ArchT).hasX86();
64   auto Arch = getArchitectureName(ArchT);
65 
66   for (const auto &cmd : Obj->load_commands()) {
67     std::string OSVersion;
68     switch (cmd.C.cmd) {
69     case MachO::LC_VERSION_MIN_MACOSX:
70       OSVersion = getOSVersion(cmd);
71       emplace(Triples, {Arch, "apple", "macos" + OSVersion});
72       break;
73     case MachO::LC_VERSION_MIN_IPHONEOS:
74       OSVersion = getOSVersion(cmd);
75       if (IsIntel)
76         emplace(Triples, {Arch, "apple", "ios" + OSVersion, "simulator"});
77       else
78         emplace(Triples, {Arch, "apple", "ios" + OSVersion});
79       break;
80     case MachO::LC_VERSION_MIN_TVOS:
81       OSVersion = getOSVersion(cmd);
82       if (IsIntel)
83         emplace(Triples, {Arch, "apple", "tvos" + OSVersion, "simulator"});
84       else
85         emplace(Triples, {Arch, "apple", "tvos" + OSVersion});
86       break;
87     case MachO::LC_VERSION_MIN_WATCHOS:
88       OSVersion = getOSVersion(cmd);
89       if (IsIntel)
90         emplace(Triples, {Arch, "apple", "watchos" + OSVersion, "simulator"});
91       else
92         emplace(Triples, {Arch, "apple", "watchos" + OSVersion});
93       break;
94     case MachO::LC_BUILD_VERSION: {
95       OSVersion = getOSVersionStr(Obj->getBuildVersionLoadCommand(cmd).minos);
96       switch (Obj->getBuildVersionLoadCommand(cmd).platform) {
97       case MachO::PLATFORM_MACOS:
98         emplace(Triples, {Arch, "apple", "macos" + OSVersion});
99         break;
100       case MachO::PLATFORM_IOS:
101         emplace(Triples, {Arch, "apple", "ios" + OSVersion});
102         break;
103       case MachO::PLATFORM_TVOS:
104         emplace(Triples, {Arch, "apple", "tvos" + OSVersion});
105         break;
106       case MachO::PLATFORM_WATCHOS:
107         emplace(Triples, {Arch, "apple", "watchos" + OSVersion});
108         break;
109       case MachO::PLATFORM_BRIDGEOS:
110         emplace(Triples, {Arch, "apple", "bridgeos" + OSVersion});
111         break;
112       case MachO::PLATFORM_MACCATALYST:
113         emplace(Triples, {Arch, "apple", "ios" + OSVersion, "macabi"});
114         break;
115       case MachO::PLATFORM_IOSSIMULATOR:
116         emplace(Triples, {Arch, "apple", "ios" + OSVersion, "simulator"});
117         break;
118       case MachO::PLATFORM_TVOSSIMULATOR:
119         emplace(Triples, {Arch, "apple", "tvos" + OSVersion, "simulator"});
120         break;
121       case MachO::PLATFORM_WATCHOSSIMULATOR:
122         emplace(Triples, {Arch, "apple", "watchos" + OSVersion, "simulator"});
123         break;
124       case MachO::PLATFORM_DRIVERKIT:
125         emplace(Triples, {Arch, "apple", "driverkit" + OSVersion});
126         break;
127       default:
128         break; // Skip any others.
129       }
130       break;
131     }
132     default:
133       break;
134     }
135   }
136 
137   // Record unknown platform for older binaries that don't enforce platform
138   // load commands.
139   if (Triples.empty())
140     emplace(Triples, {Arch, "apple", "unknown"});
141 
142   return Triples;
143 }
144 
readMachOHeader(MachOObjectFile * Obj,RecordsSlice & Slice)145 static Error readMachOHeader(MachOObjectFile *Obj, RecordsSlice &Slice) {
146   auto H = Obj->getHeader();
147   auto &BA = Slice.getBinaryAttrs();
148 
149   switch (H.filetype) {
150   default:
151     llvm_unreachable("unsupported binary type");
152   case MachO::MH_DYLIB:
153     BA.File = FileType::MachO_DynamicLibrary;
154     break;
155   case MachO::MH_DYLIB_STUB:
156     BA.File = FileType::MachO_DynamicLibrary_Stub;
157     break;
158   case MachO::MH_BUNDLE:
159     BA.File = FileType::MachO_Bundle;
160     break;
161   }
162 
163   if (H.flags & MachO::MH_TWOLEVEL)
164     BA.TwoLevelNamespace = true;
165   if (H.flags & MachO::MH_APP_EXTENSION_SAFE)
166     BA.AppExtensionSafe = true;
167 
168   for (const auto &LCI : Obj->load_commands()) {
169     switch (LCI.C.cmd) {
170     case MachO::LC_ID_DYLIB: {
171       auto DLLC = Obj->getDylibIDLoadCommand(LCI);
172       BA.InstallName = Slice.copyString(LCI.Ptr + DLLC.dylib.name);
173       BA.CurrentVersion = DLLC.dylib.current_version;
174       BA.CompatVersion = DLLC.dylib.compatibility_version;
175       break;
176     }
177     case MachO::LC_REEXPORT_DYLIB: {
178       auto DLLC = Obj->getDylibIDLoadCommand(LCI);
179       BA.RexportedLibraries.emplace_back(
180           Slice.copyString(LCI.Ptr + DLLC.dylib.name));
181       break;
182     }
183     case MachO::LC_SUB_FRAMEWORK: {
184       auto SFC = Obj->getSubFrameworkCommand(LCI);
185       BA.ParentUmbrella = Slice.copyString(LCI.Ptr + SFC.umbrella);
186       break;
187     }
188     case MachO::LC_SUB_CLIENT: {
189       auto SCLC = Obj->getSubClientCommand(LCI);
190       BA.AllowableClients.emplace_back(Slice.copyString(LCI.Ptr + SCLC.client));
191       break;
192     }
193     case MachO::LC_UUID: {
194       auto UUIDLC = Obj->getUuidCommand(LCI);
195       std::stringstream Stream;
196       for (unsigned I = 0; I < 16; ++I) {
197         if (I == 4 || I == 6 || I == 8 || I == 10)
198           Stream << '-';
199         Stream << std::setfill('0') << std::setw(2) << std::uppercase
200                << std::hex << static_cast<int>(UUIDLC.uuid[I]);
201       }
202       BA.UUID = Slice.copyString(Stream.str());
203       break;
204     }
205     case MachO::LC_RPATH: {
206       auto RPLC = Obj->getRpathCommand(LCI);
207       BA.RPaths.emplace_back(Slice.copyString(LCI.Ptr + RPLC.path));
208       break;
209     }
210     case MachO::LC_SEGMENT_SPLIT_INFO: {
211       auto SSILC = Obj->getLinkeditDataLoadCommand(LCI);
212       if (SSILC.datasize == 0)
213         BA.OSLibNotForSharedCache = true;
214       break;
215     }
216     default:
217       break;
218     }
219   }
220 
221   for (auto &Sect : Obj->sections()) {
222     auto SectName = Sect.getName();
223     if (!SectName)
224       return SectName.takeError();
225     if (*SectName != "__objc_imageinfo" && *SectName != "__image_info")
226       continue;
227 
228     auto Content = Sect.getContents();
229     if (!Content)
230       return Content.takeError();
231 
232     if ((Content->size() >= 8) && (Content->front() == 0)) {
233       uint32_t Flags;
234       if (Obj->isLittleEndian()) {
235         auto *p =
236             reinterpret_cast<const support::ulittle32_t *>(Content->data() + 4);
237         Flags = *p;
238       } else {
239         auto *p =
240             reinterpret_cast<const support::ubig32_t *>(Content->data() + 4);
241         Flags = *p;
242       }
243       BA.SwiftABI = (Flags >> 8) & 0xFF;
244     }
245   }
246   return Error::success();
247 }
248 
readSymbols(MachOObjectFile * Obj,RecordsSlice & Slice,const ParseOption & Opt)249 static Error readSymbols(MachOObjectFile *Obj, RecordsSlice &Slice,
250                          const ParseOption &Opt) {
251 
252   auto parseExport = [](const auto ExportFlags,
253                         auto Addr) -> std::tuple<SymbolFlags, RecordLinkage> {
254     SymbolFlags Flags = SymbolFlags::None;
255     switch (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) {
256     case MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR:
257       if (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION)
258         Flags |= SymbolFlags::WeakDefined;
259       break;
260     case MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL:
261       Flags |= SymbolFlags::ThreadLocalValue;
262       break;
263     }
264 
265     RecordLinkage Linkage = (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT)
266                                 ? RecordLinkage::Rexported
267                                 : RecordLinkage::Exported;
268     return {Flags, Linkage};
269   };
270 
271   Error Err = Error::success();
272 
273   StringMap<std::pair<SymbolFlags, RecordLinkage>> Exports;
274   // Collect symbols from export trie first. Sometimes, there are more exports
275   // in the trie than in n-list due to stripping. This is common for swift
276   // mangled symbols.
277   for (auto &Sym : Obj->exports(Err)) {
278     auto [Flags, Linkage] = parseExport(Sym.flags(), Sym.address());
279     Slice.addRecord(Sym.name(), Flags, GlobalRecord::Kind::Unknown, Linkage);
280     Exports[Sym.name()] = {Flags, Linkage};
281   }
282 
283   for (const auto &Sym : Obj->symbols()) {
284     auto FlagsOrErr = Sym.getFlags();
285     if (!FlagsOrErr)
286       return FlagsOrErr.takeError();
287     auto Flags = *FlagsOrErr;
288 
289     auto NameOrErr = Sym.getName();
290     if (!NameOrErr)
291       return NameOrErr.takeError();
292     auto Name = *NameOrErr;
293 
294     RecordLinkage Linkage = RecordLinkage::Unknown;
295     SymbolFlags RecordFlags = SymbolFlags::None;
296 
297     if (Flags & SymbolRef::SF_Undefined) {
298       if (Opt.Undefineds)
299         Linkage = RecordLinkage::Undefined;
300       else
301         continue;
302       if (Flags & SymbolRef::SF_Weak)
303         RecordFlags |= SymbolFlags::WeakReferenced;
304     } else if (Flags & SymbolRef::SF_Exported) {
305       auto Exp = Exports.find(Name);
306       // This should never be possible when binaries are produced with Apple
307       // linkers. However it is possible to craft dylibs where the export trie
308       // is either malformed or has conflicting symbols compared to n_list.
309       if (Exp != Exports.end())
310         std::tie(RecordFlags, Linkage) = Exp->second;
311       else
312         Linkage = RecordLinkage::Exported;
313     } else if (Flags & SymbolRef::SF_Hidden) {
314       Linkage = RecordLinkage::Internal;
315     } else
316       continue;
317 
318     auto TypeOrErr = Sym.getType();
319     if (!TypeOrErr)
320       return TypeOrErr.takeError();
321     auto Type = *TypeOrErr;
322 
323     GlobalRecord::Kind GV = (Type & SymbolRef::ST_Function)
324                                 ? GlobalRecord::Kind::Function
325                                 : GlobalRecord::Kind::Variable;
326 
327     if (GV == GlobalRecord::Kind::Function)
328       RecordFlags |= SymbolFlags::Text;
329     else
330       RecordFlags |= SymbolFlags::Data;
331 
332     Slice.addRecord(Name, RecordFlags, GV, Linkage);
333   }
334   return Err;
335 }
336 
load(MachOObjectFile * Obj,RecordsSlice & Slice,const ParseOption & Opt,const Architecture Arch)337 static Error load(MachOObjectFile *Obj, RecordsSlice &Slice,
338                   const ParseOption &Opt, const Architecture Arch) {
339   if (Arch == AK_unknown)
340     return make_error<TextAPIError>(TextAPIErrorCode::UnsupportedTarget);
341 
342   if (Opt.MachOHeader)
343     if (auto Err = readMachOHeader(Obj, Slice))
344       return Err;
345 
346   if (Opt.SymbolTable)
347     if (auto Err = readSymbols(Obj, Slice, Opt))
348       return Err;
349 
350   return Error::success();
351 }
352 
readFile(MemoryBufferRef Buffer,const ParseOption & Opt)353 Expected<Records> DylibReader::readFile(MemoryBufferRef Buffer,
354                                         const ParseOption &Opt) {
355   Records Results;
356 
357   auto BinOrErr = createBinary(Buffer);
358   if (!BinOrErr)
359     return BinOrErr.takeError();
360 
361   Binary &Bin = *BinOrErr.get();
362   if (auto *Obj = dyn_cast<MachOObjectFile>(&Bin)) {
363     const auto Arch = getArchitectureFromCpuType(Obj->getHeader().cputype,
364                                                  Obj->getHeader().cpusubtype);
365     if (!Opt.Archs.has(Arch))
366       return make_error<TextAPIError>(TextAPIErrorCode::NoSuchArchitecture);
367 
368     auto Triples = constructTriples(Obj, Arch);
369     for (const auto &T : Triples) {
370       if (mapToPlatformType(T) == PLATFORM_UNKNOWN)
371         return make_error<TextAPIError>(TextAPIErrorCode::UnsupportedTarget);
372       Results.emplace_back(std::make_shared<RecordsSlice>(RecordsSlice({T})));
373       if (auto Err = load(Obj, *Results.back(), Opt, Arch))
374         return std::move(Err);
375       Results.back()->getBinaryAttrs().Path = Buffer.getBufferIdentifier();
376     }
377     return Results;
378   }
379 
380   // Only expect MachO universal binaries at this point.
381   assert(isa<MachOUniversalBinary>(&Bin) &&
382          "Expected a MachO universal binary.");
383   auto *UB = cast<MachOUniversalBinary>(&Bin);
384 
385   for (auto OI = UB->begin_objects(), OE = UB->end_objects(); OI != OE; ++OI) {
386     // Skip architecture if not requested.
387     auto Arch =
388         getArchitectureFromCpuType(OI->getCPUType(), OI->getCPUSubType());
389     if (!Opt.Archs.has(Arch))
390       continue;
391 
392     // Skip unknown architectures.
393     if (Arch == AK_unknown)
394       continue;
395 
396     // This can fail if the object is an archive.
397     auto ObjOrErr = OI->getAsObjectFile();
398 
399     // Skip the archive and consume the error.
400     if (!ObjOrErr) {
401       consumeError(ObjOrErr.takeError());
402       continue;
403     }
404 
405     auto &Obj = *ObjOrErr.get();
406     switch (Obj.getHeader().filetype) {
407     default:
408       break;
409     case MachO::MH_BUNDLE:
410     case MachO::MH_DYLIB:
411     case MachO::MH_DYLIB_STUB:
412       for (const auto &T : constructTriples(&Obj, Arch)) {
413         Results.emplace_back(std::make_shared<RecordsSlice>(RecordsSlice({T})));
414         if (auto Err = load(&Obj, *Results.back(), Opt, Arch))
415           return std::move(Err);
416         Results.back()->getBinaryAttrs().Path = Buffer.getBufferIdentifier();
417       }
418       break;
419     }
420   }
421 
422   if (Results.empty())
423     return make_error<TextAPIError>(TextAPIErrorCode::EmptyResults);
424   return Results;
425 }
426 
427 Expected<std::unique_ptr<InterfaceFile>>
get(MemoryBufferRef Buffer)428 DylibReader::get(MemoryBufferRef Buffer) {
429   ParseOption Options;
430   auto SlicesOrErr = readFile(Buffer, Options);
431   if (!SlicesOrErr)
432     return SlicesOrErr.takeError();
433 
434   return convertToInterfaceFile(*SlicesOrErr);
435 }
436 
DWARFErrorHandler(Error Err)437 static void DWARFErrorHandler(Error Err) { /**/ }
438 
439 static SymbolToSourceLocMap
accumulateLocs(MachOObjectFile & Obj,const std::unique_ptr<DWARFContext> & DiCtx)440 accumulateLocs(MachOObjectFile &Obj,
441                const std::unique_ptr<DWARFContext> &DiCtx) {
442   SymbolToSourceLocMap LocMap;
443   for (const auto &Symbol : Obj.symbols()) {
444     Expected<uint32_t> FlagsOrErr = Symbol.getFlags();
445     if (!FlagsOrErr) {
446       consumeError(FlagsOrErr.takeError());
447       continue;
448     }
449 
450     if (!(*FlagsOrErr & SymbolRef::SF_Exported))
451       continue;
452 
453     Expected<uint64_t> AddressOrErr = Symbol.getAddress();
454     if (!AddressOrErr) {
455       consumeError(AddressOrErr.takeError());
456       continue;
457     }
458     const uint64_t Address = *AddressOrErr;
459 
460     auto TypeOrErr = Symbol.getType();
461     if (!TypeOrErr) {
462       consumeError(TypeOrErr.takeError());
463       continue;
464     }
465     const bool IsCode = (*TypeOrErr & SymbolRef::ST_Function);
466 
467     auto *DWARFCU = IsCode ? DiCtx->getCompileUnitForCodeAddress(Address)
468                            : DiCtx->getCompileUnitForDataAddress(Address);
469     if (!DWARFCU)
470       continue;
471 
472     const DWARFDie &DIE = IsCode ? DWARFCU->getSubroutineForAddress(Address)
473                                  : DWARFCU->getVariableForAddress(Address);
474     const std::string File = DIE.getDeclFile(
475         llvm::DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
476     const uint64_t Line = DIE.getDeclLine();
477 
478     auto NameOrErr = Symbol.getName();
479     if (!NameOrErr) {
480       consumeError(NameOrErr.takeError());
481       continue;
482     }
483     auto Name = *NameOrErr;
484     auto Sym = parseSymbol(Name);
485 
486     if (!File.empty() && Line != 0)
487       LocMap.insert({Sym.Name, RecordLoc(File, Line)});
488   }
489 
490   return LocMap;
491 }
492 
493 SymbolToSourceLocMap
accumulateSourceLocFromDSYM(const StringRef DSYM,const Target & T)494 DylibReader::accumulateSourceLocFromDSYM(const StringRef DSYM,
495                                          const Target &T) {
496   // Find sidecar file.
497   auto DSYMsOrErr = MachOObjectFile::findDsymObjectMembers(DSYM);
498   if (!DSYMsOrErr) {
499     consumeError(DSYMsOrErr.takeError());
500     return SymbolToSourceLocMap();
501   }
502   if (DSYMsOrErr->empty())
503     return SymbolToSourceLocMap();
504 
505   const StringRef Path = DSYMsOrErr->front();
506   auto BufOrErr = MemoryBuffer::getFile(Path);
507   if (auto Err = BufOrErr.getError())
508     return SymbolToSourceLocMap();
509 
510   auto BinOrErr = createBinary(*BufOrErr.get());
511   if (!BinOrErr) {
512     consumeError(BinOrErr.takeError());
513     return SymbolToSourceLocMap();
514   }
515   // Handle single arch.
516   if (auto *Single = dyn_cast<MachOObjectFile>(BinOrErr->get())) {
517     auto DiCtx = DWARFContext::create(
518         *Single, DWARFContext::ProcessDebugRelocations::Process, nullptr, "",
519         DWARFErrorHandler, DWARFErrorHandler);
520 
521     return accumulateLocs(*Single, DiCtx);
522   }
523   // Handle universal companion file.
524   if (auto *Fat = dyn_cast<MachOUniversalBinary>(BinOrErr->get())) {
525     auto ObjForArch = Fat->getObjectForArch(getArchitectureName(T.Arch));
526     if (!ObjForArch) {
527       consumeError(ObjForArch.takeError());
528       return SymbolToSourceLocMap();
529     }
530     auto MachOOrErr = ObjForArch->getAsObjectFile();
531     if (!MachOOrErr) {
532       consumeError(MachOOrErr.takeError());
533       return SymbolToSourceLocMap();
534     }
535     auto &Obj = **MachOOrErr;
536     auto DiCtx = DWARFContext::create(
537         Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, "",
538         DWARFErrorHandler, DWARFErrorHandler);
539 
540     return accumulateLocs(Obj, DiCtx);
541   }
542   return SymbolToSourceLocMap();
543 }
544