1 //===- DylibReader.cpp -------------- TAPI MachO Dylib Reader --*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// Implements the TAPI Reader for Mach-O dynamic libraries. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/TextAPI/DylibReader.h" 14 #include "llvm/ADT/STLExtras.h" 15 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 16 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 17 #include "llvm/Object/Binary.h" 18 #include "llvm/Object/MachOUniversal.h" 19 #include "llvm/Support/Endian.h" 20 #include "llvm/TargetParser/Triple.h" 21 #include "llvm/TextAPI/InterfaceFile.h" 22 #include "llvm/TextAPI/RecordsSlice.h" 23 #include "llvm/TextAPI/TextAPIError.h" 24 #include <iomanip> 25 #include <set> 26 #include <sstream> 27 #include <string> 28 #include <tuple> 29 30 using namespace llvm; 31 using namespace llvm::object; 32 using namespace llvm::MachO; 33 using namespace llvm::MachO::DylibReader; 34 35 using TripleVec = std::vector<Triple>; 36 static typename TripleVec::iterator emplace(TripleVec &Container, Triple &&T) { 37 auto I = partition_point(Container, [=](const Triple &CT) { 38 return std::forward_as_tuple(CT.getArch(), CT.getOS(), 39 CT.getEnvironment()) < 40 std::forward_as_tuple(T.getArch(), T.getOS(), T.getEnvironment()); 41 }); 42 43 if (I != Container.end() && *I == T) 44 return I; 45 return Container.emplace(I, T); 46 } 47 48 static TripleVec constructTriples(MachOObjectFile *Obj, 49 const Architecture ArchT) { 50 auto getOSVersionStr = [](uint32_t V) { 51 PackedVersion OSVersion(V); 52 std::string Vers; 53 raw_string_ostream VStream(Vers); 54 VStream << OSVersion; 55 return VStream.str(); 56 }; 57 auto getOSVersion = [&](const MachOObjectFile::LoadCommandInfo &cmd) { 58 auto Vers = Obj->getVersionMinLoadCommand(cmd); 59 return getOSVersionStr(Vers.version); 60 }; 61 62 TripleVec Triples; 63 bool IsIntel = ArchitectureSet(ArchT).hasX86(); 64 auto Arch = getArchitectureName(ArchT); 65 66 for (const auto &cmd : Obj->load_commands()) { 67 std::string OSVersion; 68 switch (cmd.C.cmd) { 69 case MachO::LC_VERSION_MIN_MACOSX: 70 OSVersion = getOSVersion(cmd); 71 emplace(Triples, {Arch, "apple", "macos" + OSVersion}); 72 break; 73 case MachO::LC_VERSION_MIN_IPHONEOS: 74 OSVersion = getOSVersion(cmd); 75 if (IsIntel) 76 emplace(Triples, {Arch, "apple", "ios" + OSVersion, "simulator"}); 77 else 78 emplace(Triples, {Arch, "apple", "ios" + OSVersion}); 79 break; 80 case MachO::LC_VERSION_MIN_TVOS: 81 OSVersion = getOSVersion(cmd); 82 if (IsIntel) 83 emplace(Triples, {Arch, "apple", "tvos" + OSVersion, "simulator"}); 84 else 85 emplace(Triples, {Arch, "apple", "tvos" + OSVersion}); 86 break; 87 case MachO::LC_VERSION_MIN_WATCHOS: 88 OSVersion = getOSVersion(cmd); 89 if (IsIntel) 90 emplace(Triples, {Arch, "apple", "watchos" + OSVersion, "simulator"}); 91 else 92 emplace(Triples, {Arch, "apple", "watchos" + OSVersion}); 93 break; 94 case MachO::LC_BUILD_VERSION: { 95 OSVersion = getOSVersionStr(Obj->getBuildVersionLoadCommand(cmd).minos); 96 switch (Obj->getBuildVersionLoadCommand(cmd).platform) { 97 case MachO::PLATFORM_MACOS: 98 emplace(Triples, {Arch, "apple", "macos" + OSVersion}); 99 break; 100 case MachO::PLATFORM_IOS: 101 emplace(Triples, {Arch, "apple", "ios" + OSVersion}); 102 break; 103 case MachO::PLATFORM_TVOS: 104 emplace(Triples, {Arch, "apple", "tvos" + OSVersion}); 105 break; 106 case MachO::PLATFORM_WATCHOS: 107 emplace(Triples, {Arch, "apple", "watchos" + OSVersion}); 108 break; 109 case MachO::PLATFORM_BRIDGEOS: 110 emplace(Triples, {Arch, "apple", "bridgeos" + OSVersion}); 111 break; 112 case MachO::PLATFORM_MACCATALYST: 113 emplace(Triples, {Arch, "apple", "ios" + OSVersion, "macabi"}); 114 break; 115 case MachO::PLATFORM_IOSSIMULATOR: 116 emplace(Triples, {Arch, "apple", "ios" + OSVersion, "simulator"}); 117 break; 118 case MachO::PLATFORM_TVOSSIMULATOR: 119 emplace(Triples, {Arch, "apple", "tvos" + OSVersion, "simulator"}); 120 break; 121 case MachO::PLATFORM_WATCHOSSIMULATOR: 122 emplace(Triples, {Arch, "apple", "watchos" + OSVersion, "simulator"}); 123 break; 124 case MachO::PLATFORM_DRIVERKIT: 125 emplace(Triples, {Arch, "apple", "driverkit" + OSVersion}); 126 break; 127 default: 128 break; // Skip any others. 129 } 130 break; 131 } 132 default: 133 break; 134 } 135 } 136 137 // Record unknown platform for older binaries that don't enforce platform 138 // load commands. 139 if (Triples.empty()) 140 emplace(Triples, {Arch, "apple", "unknown"}); 141 142 return Triples; 143 } 144 145 static Error readMachOHeader(MachOObjectFile *Obj, RecordsSlice &Slice) { 146 auto H = Obj->getHeader(); 147 auto &BA = Slice.getBinaryAttrs(); 148 149 switch (H.filetype) { 150 default: 151 llvm_unreachable("unsupported binary type"); 152 case MachO::MH_DYLIB: 153 BA.File = FileType::MachO_DynamicLibrary; 154 break; 155 case MachO::MH_DYLIB_STUB: 156 BA.File = FileType::MachO_DynamicLibrary_Stub; 157 break; 158 case MachO::MH_BUNDLE: 159 BA.File = FileType::MachO_Bundle; 160 break; 161 } 162 163 if (H.flags & MachO::MH_TWOLEVEL) 164 BA.TwoLevelNamespace = true; 165 if (H.flags & MachO::MH_APP_EXTENSION_SAFE) 166 BA.AppExtensionSafe = true; 167 168 for (const auto &LCI : Obj->load_commands()) { 169 switch (LCI.C.cmd) { 170 case MachO::LC_ID_DYLIB: { 171 auto DLLC = Obj->getDylibIDLoadCommand(LCI); 172 BA.InstallName = Slice.copyString(LCI.Ptr + DLLC.dylib.name); 173 BA.CurrentVersion = DLLC.dylib.current_version; 174 BA.CompatVersion = DLLC.dylib.compatibility_version; 175 break; 176 } 177 case MachO::LC_REEXPORT_DYLIB: { 178 auto DLLC = Obj->getDylibIDLoadCommand(LCI); 179 BA.RexportedLibraries.emplace_back( 180 Slice.copyString(LCI.Ptr + DLLC.dylib.name)); 181 break; 182 } 183 case MachO::LC_SUB_FRAMEWORK: { 184 auto SFC = Obj->getSubFrameworkCommand(LCI); 185 BA.ParentUmbrella = Slice.copyString(LCI.Ptr + SFC.umbrella); 186 break; 187 } 188 case MachO::LC_SUB_CLIENT: { 189 auto SCLC = Obj->getSubClientCommand(LCI); 190 BA.AllowableClients.emplace_back(Slice.copyString(LCI.Ptr + SCLC.client)); 191 break; 192 } 193 case MachO::LC_UUID: { 194 auto UUIDLC = Obj->getUuidCommand(LCI); 195 std::stringstream Stream; 196 for (unsigned I = 0; I < 16; ++I) { 197 if (I == 4 || I == 6 || I == 8 || I == 10) 198 Stream << '-'; 199 Stream << std::setfill('0') << std::setw(2) << std::uppercase 200 << std::hex << static_cast<int>(UUIDLC.uuid[I]); 201 } 202 BA.UUID = Slice.copyString(Stream.str()); 203 break; 204 } 205 case MachO::LC_RPATH: { 206 auto RPLC = Obj->getRpathCommand(LCI); 207 BA.RPaths.emplace_back(Slice.copyString(LCI.Ptr + RPLC.path)); 208 break; 209 } 210 case MachO::LC_SEGMENT_SPLIT_INFO: { 211 auto SSILC = Obj->getLinkeditDataLoadCommand(LCI); 212 if (SSILC.datasize == 0) 213 BA.OSLibNotForSharedCache = true; 214 break; 215 } 216 default: 217 break; 218 } 219 } 220 221 for (auto &Sect : Obj->sections()) { 222 auto SectName = Sect.getName(); 223 if (!SectName) 224 return SectName.takeError(); 225 if (*SectName != "__objc_imageinfo" && *SectName != "__image_info") 226 continue; 227 228 auto Content = Sect.getContents(); 229 if (!Content) 230 return Content.takeError(); 231 232 if ((Content->size() >= 8) && (Content->front() == 0)) { 233 uint32_t Flags; 234 if (Obj->isLittleEndian()) { 235 auto *p = 236 reinterpret_cast<const support::ulittle32_t *>(Content->data() + 4); 237 Flags = *p; 238 } else { 239 auto *p = 240 reinterpret_cast<const support::ubig32_t *>(Content->data() + 4); 241 Flags = *p; 242 } 243 BA.SwiftABI = (Flags >> 8) & 0xFF; 244 } 245 } 246 return Error::success(); 247 } 248 249 static Error readSymbols(MachOObjectFile *Obj, RecordsSlice &Slice, 250 const ParseOption &Opt) { 251 252 auto parseExport = [](const auto ExportFlags, 253 auto Addr) -> std::tuple<SymbolFlags, RecordLinkage> { 254 SymbolFlags Flags = SymbolFlags::None; 255 switch (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) { 256 case MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR: 257 if (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION) 258 Flags |= SymbolFlags::WeakDefined; 259 break; 260 case MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL: 261 Flags |= SymbolFlags::ThreadLocalValue; 262 break; 263 } 264 265 RecordLinkage Linkage = (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) 266 ? RecordLinkage::Rexported 267 : RecordLinkage::Exported; 268 return {Flags, Linkage}; 269 }; 270 271 Error Err = Error::success(); 272 273 StringMap<std::pair<SymbolFlags, RecordLinkage>> Exports; 274 // Collect symbols from export trie first. Sometimes, there are more exports 275 // in the trie than in n-list due to stripping. This is common for swift 276 // mangled symbols. 277 for (auto &Sym : Obj->exports(Err)) { 278 auto [Flags, Linkage] = parseExport(Sym.flags(), Sym.address()); 279 Slice.addRecord(Sym.name(), Flags, GlobalRecord::Kind::Unknown, Linkage); 280 Exports[Sym.name()] = {Flags, Linkage}; 281 } 282 283 for (const auto &Sym : Obj->symbols()) { 284 auto FlagsOrErr = Sym.getFlags(); 285 if (!FlagsOrErr) 286 return FlagsOrErr.takeError(); 287 auto Flags = *FlagsOrErr; 288 289 auto NameOrErr = Sym.getName(); 290 if (!NameOrErr) 291 return NameOrErr.takeError(); 292 auto Name = *NameOrErr; 293 294 RecordLinkage Linkage = RecordLinkage::Unknown; 295 SymbolFlags RecordFlags = SymbolFlags::None; 296 297 if (Flags & SymbolRef::SF_Undefined) { 298 if (Opt.Undefineds) 299 Linkage = RecordLinkage::Undefined; 300 else 301 continue; 302 if (Flags & SymbolRef::SF_Weak) 303 RecordFlags |= SymbolFlags::WeakReferenced; 304 } else if (Flags & SymbolRef::SF_Exported) { 305 auto Exp = Exports.find(Name); 306 // This should never be possible when binaries are produced with Apple 307 // linkers. However it is possible to craft dylibs where the export trie 308 // is either malformed or has conflicting symbols compared to n_list. 309 if (Exp != Exports.end()) 310 std::tie(RecordFlags, Linkage) = Exp->second; 311 else 312 Linkage = RecordLinkage::Exported; 313 } else if (Flags & SymbolRef::SF_Hidden) { 314 Linkage = RecordLinkage::Internal; 315 } else 316 continue; 317 318 auto TypeOrErr = Sym.getType(); 319 if (!TypeOrErr) 320 return TypeOrErr.takeError(); 321 auto Type = *TypeOrErr; 322 323 GlobalRecord::Kind GV = (Type & SymbolRef::ST_Function) 324 ? GlobalRecord::Kind::Function 325 : GlobalRecord::Kind::Variable; 326 327 if (GV == GlobalRecord::Kind::Function) 328 RecordFlags |= SymbolFlags::Text; 329 else 330 RecordFlags |= SymbolFlags::Data; 331 332 Slice.addRecord(Name, RecordFlags, GV, Linkage); 333 } 334 return Err; 335 } 336 337 static Error load(MachOObjectFile *Obj, RecordsSlice &Slice, 338 const ParseOption &Opt, const Architecture Arch) { 339 if (Arch == AK_unknown) 340 return make_error<TextAPIError>(TextAPIErrorCode::UnsupportedTarget); 341 342 if (Opt.MachOHeader) 343 if (auto Err = readMachOHeader(Obj, Slice)) 344 return Err; 345 346 if (Opt.SymbolTable) 347 if (auto Err = readSymbols(Obj, Slice, Opt)) 348 return Err; 349 350 return Error::success(); 351 } 352 353 Expected<Records> DylibReader::readFile(MemoryBufferRef Buffer, 354 const ParseOption &Opt) { 355 Records Results; 356 357 auto BinOrErr = createBinary(Buffer); 358 if (!BinOrErr) 359 return BinOrErr.takeError(); 360 361 Binary &Bin = *BinOrErr.get(); 362 if (auto *Obj = dyn_cast<MachOObjectFile>(&Bin)) { 363 const auto Arch = getArchitectureFromCpuType(Obj->getHeader().cputype, 364 Obj->getHeader().cpusubtype); 365 if (!Opt.Archs.has(Arch)) 366 return make_error<TextAPIError>(TextAPIErrorCode::NoSuchArchitecture); 367 368 auto Triples = constructTriples(Obj, Arch); 369 for (const auto &T : Triples) { 370 if (mapToPlatformType(T) == PLATFORM_UNKNOWN) 371 return make_error<TextAPIError>(TextAPIErrorCode::UnsupportedTarget); 372 Results.emplace_back(std::make_shared<RecordsSlice>(RecordsSlice({T}))); 373 if (auto Err = load(Obj, *Results.back(), Opt, Arch)) 374 return std::move(Err); 375 Results.back()->getBinaryAttrs().Path = Buffer.getBufferIdentifier(); 376 } 377 return Results; 378 } 379 380 // Only expect MachO universal binaries at this point. 381 assert(isa<MachOUniversalBinary>(&Bin) && 382 "Expected a MachO universal binary."); 383 auto *UB = cast<MachOUniversalBinary>(&Bin); 384 385 for (auto OI = UB->begin_objects(), OE = UB->end_objects(); OI != OE; ++OI) { 386 // Skip architecture if not requested. 387 auto Arch = 388 getArchitectureFromCpuType(OI->getCPUType(), OI->getCPUSubType()); 389 if (!Opt.Archs.has(Arch)) 390 continue; 391 392 // Skip unknown architectures. 393 if (Arch == AK_unknown) 394 continue; 395 396 // This can fail if the object is an archive. 397 auto ObjOrErr = OI->getAsObjectFile(); 398 399 // Skip the archive and consume the error. 400 if (!ObjOrErr) { 401 consumeError(ObjOrErr.takeError()); 402 continue; 403 } 404 405 auto &Obj = *ObjOrErr.get(); 406 switch (Obj.getHeader().filetype) { 407 default: 408 break; 409 case MachO::MH_BUNDLE: 410 case MachO::MH_DYLIB: 411 case MachO::MH_DYLIB_STUB: 412 for (const auto &T : constructTriples(&Obj, Arch)) { 413 Results.emplace_back(std::make_shared<RecordsSlice>(RecordsSlice({T}))); 414 if (auto Err = load(&Obj, *Results.back(), Opt, Arch)) 415 return std::move(Err); 416 Results.back()->getBinaryAttrs().Path = Buffer.getBufferIdentifier(); 417 } 418 break; 419 } 420 } 421 422 if (Results.empty()) 423 return make_error<TextAPIError>(TextAPIErrorCode::EmptyResults); 424 return Results; 425 } 426 427 Expected<std::unique_ptr<InterfaceFile>> 428 DylibReader::get(MemoryBufferRef Buffer) { 429 ParseOption Options; 430 auto SlicesOrErr = readFile(Buffer, Options); 431 if (!SlicesOrErr) 432 return SlicesOrErr.takeError(); 433 434 return convertToInterfaceFile(*SlicesOrErr); 435 } 436 437 static void DWARFErrorHandler(Error Err) { /**/ } 438 439 static SymbolToSourceLocMap 440 accumulateLocs(MachOObjectFile &Obj, 441 const std::unique_ptr<DWARFContext> &DiCtx) { 442 SymbolToSourceLocMap LocMap; 443 for (const auto &Symbol : Obj.symbols()) { 444 Expected<uint32_t> FlagsOrErr = Symbol.getFlags(); 445 if (!FlagsOrErr) { 446 consumeError(FlagsOrErr.takeError()); 447 continue; 448 } 449 450 if (!(*FlagsOrErr & SymbolRef::SF_Exported)) 451 continue; 452 453 Expected<uint64_t> AddressOrErr = Symbol.getAddress(); 454 if (!AddressOrErr) { 455 consumeError(AddressOrErr.takeError()); 456 continue; 457 } 458 const uint64_t Address = *AddressOrErr; 459 460 auto TypeOrErr = Symbol.getType(); 461 if (!TypeOrErr) { 462 consumeError(TypeOrErr.takeError()); 463 continue; 464 } 465 const bool IsCode = (*TypeOrErr & SymbolRef::ST_Function); 466 467 auto *DWARFCU = IsCode ? DiCtx->getCompileUnitForCodeAddress(Address) 468 : DiCtx->getCompileUnitForDataAddress(Address); 469 if (!DWARFCU) 470 continue; 471 472 const DWARFDie &DIE = IsCode ? DWARFCU->getSubroutineForAddress(Address) 473 : DWARFCU->getVariableForAddress(Address); 474 const std::string File = DIE.getDeclFile( 475 llvm::DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath); 476 const uint64_t Line = DIE.getDeclLine(); 477 478 auto NameOrErr = Symbol.getName(); 479 if (!NameOrErr) { 480 consumeError(NameOrErr.takeError()); 481 continue; 482 } 483 auto Name = *NameOrErr; 484 auto Sym = parseSymbol(Name); 485 486 if (!File.empty() && Line != 0) 487 LocMap.insert({Sym.Name, RecordLoc(File, Line)}); 488 } 489 490 return LocMap; 491 } 492 493 SymbolToSourceLocMap 494 DylibReader::accumulateSourceLocFromDSYM(const StringRef DSYM, 495 const Target &T) { 496 // Find sidecar file. 497 auto DSYMsOrErr = MachOObjectFile::findDsymObjectMembers(DSYM); 498 if (!DSYMsOrErr) { 499 consumeError(DSYMsOrErr.takeError()); 500 return SymbolToSourceLocMap(); 501 } 502 if (DSYMsOrErr->empty()) 503 return SymbolToSourceLocMap(); 504 505 const StringRef Path = DSYMsOrErr->front(); 506 auto BufOrErr = MemoryBuffer::getFile(Path); 507 if (auto Err = BufOrErr.getError()) 508 return SymbolToSourceLocMap(); 509 510 auto BinOrErr = createBinary(*BufOrErr.get()); 511 if (!BinOrErr) { 512 consumeError(BinOrErr.takeError()); 513 return SymbolToSourceLocMap(); 514 } 515 // Handle single arch. 516 if (auto *Single = dyn_cast<MachOObjectFile>(BinOrErr->get())) { 517 auto DiCtx = DWARFContext::create( 518 *Single, DWARFContext::ProcessDebugRelocations::Process, nullptr, "", 519 DWARFErrorHandler, DWARFErrorHandler); 520 521 return accumulateLocs(*Single, DiCtx); 522 } 523 // Handle universal companion file. 524 if (auto *Fat = dyn_cast<MachOUniversalBinary>(BinOrErr->get())) { 525 auto ObjForArch = Fat->getObjectForArch(getArchitectureName(T.Arch)); 526 if (!ObjForArch) { 527 consumeError(ObjForArch.takeError()); 528 return SymbolToSourceLocMap(); 529 } 530 auto MachOOrErr = ObjForArch->getAsObjectFile(); 531 if (!MachOOrErr) { 532 consumeError(MachOOrErr.takeError()); 533 return SymbolToSourceLocMap(); 534 } 535 auto &Obj = **MachOOrErr; 536 auto DiCtx = DWARFContext::create( 537 Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, "", 538 DWARFErrorHandler, DWARFErrorHandler); 539 540 return accumulateLocs(Obj, DiCtx); 541 } 542 return SymbolToSourceLocMap(); 543 } 544