1 //===-- LLVMSymbolize.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation for LLVM symbolization library. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 14 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/DebugInfo/BTF/BTFContext.h" 17 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 18 #include "llvm/DebugInfo/GSYM/GsymContext.h" 19 #include "llvm/DebugInfo/GSYM/GsymReader.h" 20 #include "llvm/DebugInfo/PDB/PDB.h" 21 #include "llvm/DebugInfo/PDB/PDBContext.h" 22 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" 23 #include "llvm/Demangle/Demangle.h" 24 #include "llvm/Object/BuildID.h" 25 #include "llvm/Object/COFF.h" 26 #include "llvm/Object/ELFObjectFile.h" 27 #include "llvm/Object/MachO.h" 28 #include "llvm/Object/MachOUniversal.h" 29 #include "llvm/Support/CRC.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/DataExtractor.h" 32 #include "llvm/Support/Errc.h" 33 #include "llvm/Support/FileSystem.h" 34 #include "llvm/Support/MemoryBuffer.h" 35 #include "llvm/Support/Path.h" 36 #include <cassert> 37 #include <cstring> 38 39 namespace llvm { 40 namespace codeview { 41 union DebugInfo; 42 } 43 namespace symbolize { 44 45 LLVMSymbolizer::LLVMSymbolizer() = default; 46 47 LLVMSymbolizer::LLVMSymbolizer(const Options &Opts) 48 : Opts(Opts), 49 BIDFetcher(std::make_unique<BuildIDFetcher>(Opts.DebugFileDirectory)) {} 50 51 LLVMSymbolizer::~LLVMSymbolizer() = default; 52 53 template <typename T> 54 Expected<DILineInfo> 55 LLVMSymbolizer::symbolizeCodeCommon(const T &ModuleSpecifier, 56 object::SectionedAddress ModuleOffset) { 57 58 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 59 if (!InfoOrErr) 60 return InfoOrErr.takeError(); 61 62 SymbolizableModule *Info = *InfoOrErr; 63 64 // A null module means an error has already been reported. Return an empty 65 // result. 66 if (!Info) 67 return DILineInfo(); 68 69 // If the user is giving us relative addresses, add the preferred base of the 70 // object to the offset before we do the query. It's what DIContext expects. 71 if (Opts.RelativeAddresses) 72 ModuleOffset.Address += Info->getModulePreferredBase(); 73 74 DILineInfo LineInfo = Info->symbolizeCode( 75 ModuleOffset, 76 DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions, 77 Opts.SkipLineZero), 78 Opts.UseSymbolTable); 79 if (Opts.Demangle) 80 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); 81 return LineInfo; 82 } 83 84 Expected<DILineInfo> 85 LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj, 86 object::SectionedAddress ModuleOffset) { 87 return symbolizeCodeCommon(Obj, ModuleOffset); 88 } 89 90 Expected<DILineInfo> 91 LLVMSymbolizer::symbolizeCode(StringRef ModuleName, 92 object::SectionedAddress ModuleOffset) { 93 return symbolizeCodeCommon(ModuleName, ModuleOffset); 94 } 95 96 Expected<DILineInfo> 97 LLVMSymbolizer::symbolizeCode(ArrayRef<uint8_t> BuildID, 98 object::SectionedAddress ModuleOffset) { 99 return symbolizeCodeCommon(BuildID, ModuleOffset); 100 } 101 102 template <typename T> 103 Expected<DIInliningInfo> LLVMSymbolizer::symbolizeInlinedCodeCommon( 104 const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) { 105 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 106 if (!InfoOrErr) 107 return InfoOrErr.takeError(); 108 109 SymbolizableModule *Info = *InfoOrErr; 110 111 // A null module means an error has already been reported. Return an empty 112 // result. 113 if (!Info) 114 return DIInliningInfo(); 115 116 // If the user is giving us relative addresses, add the preferred base of the 117 // object to the offset before we do the query. It's what DIContext expects. 118 if (Opts.RelativeAddresses) 119 ModuleOffset.Address += Info->getModulePreferredBase(); 120 121 DIInliningInfo InlinedContext = Info->symbolizeInlinedCode( 122 ModuleOffset, 123 DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions, 124 Opts.SkipLineZero), 125 Opts.UseSymbolTable); 126 if (Opts.Demangle) { 127 for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { 128 auto *Frame = InlinedContext.getMutableFrame(i); 129 Frame->FunctionName = DemangleName(Frame->FunctionName, Info); 130 } 131 } 132 return InlinedContext; 133 } 134 135 Expected<DIInliningInfo> 136 LLVMSymbolizer::symbolizeInlinedCode(const ObjectFile &Obj, 137 object::SectionedAddress ModuleOffset) { 138 return symbolizeInlinedCodeCommon(Obj, ModuleOffset); 139 } 140 141 Expected<DIInliningInfo> 142 LLVMSymbolizer::symbolizeInlinedCode(StringRef ModuleName, 143 object::SectionedAddress ModuleOffset) { 144 return symbolizeInlinedCodeCommon(ModuleName, ModuleOffset); 145 } 146 147 Expected<DIInliningInfo> 148 LLVMSymbolizer::symbolizeInlinedCode(ArrayRef<uint8_t> BuildID, 149 object::SectionedAddress ModuleOffset) { 150 return symbolizeInlinedCodeCommon(BuildID, ModuleOffset); 151 } 152 153 template <typename T> 154 Expected<DIGlobal> 155 LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier, 156 object::SectionedAddress ModuleOffset) { 157 158 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 159 if (!InfoOrErr) 160 return InfoOrErr.takeError(); 161 162 SymbolizableModule *Info = *InfoOrErr; 163 // A null module means an error has already been reported. Return an empty 164 // result. 165 if (!Info) 166 return DIGlobal(); 167 168 // If the user is giving us relative addresses, add the preferred base of 169 // the object to the offset before we do the query. It's what DIContext 170 // expects. 171 if (Opts.RelativeAddresses) 172 ModuleOffset.Address += Info->getModulePreferredBase(); 173 174 DIGlobal Global = Info->symbolizeData(ModuleOffset); 175 if (Opts.Demangle) 176 Global.Name = DemangleName(Global.Name, Info); 177 return Global; 178 } 179 180 Expected<DIGlobal> 181 LLVMSymbolizer::symbolizeData(const ObjectFile &Obj, 182 object::SectionedAddress ModuleOffset) { 183 return symbolizeDataCommon(Obj, ModuleOffset); 184 } 185 186 Expected<DIGlobal> 187 LLVMSymbolizer::symbolizeData(StringRef ModuleName, 188 object::SectionedAddress ModuleOffset) { 189 return symbolizeDataCommon(ModuleName, ModuleOffset); 190 } 191 192 Expected<DIGlobal> 193 LLVMSymbolizer::symbolizeData(ArrayRef<uint8_t> BuildID, 194 object::SectionedAddress ModuleOffset) { 195 return symbolizeDataCommon(BuildID, ModuleOffset); 196 } 197 198 template <typename T> 199 Expected<std::vector<DILocal>> 200 LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier, 201 object::SectionedAddress ModuleOffset) { 202 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 203 if (!InfoOrErr) 204 return InfoOrErr.takeError(); 205 206 SymbolizableModule *Info = *InfoOrErr; 207 // A null module means an error has already been reported. Return an empty 208 // result. 209 if (!Info) 210 return std::vector<DILocal>(); 211 212 // If the user is giving us relative addresses, add the preferred base of 213 // the object to the offset before we do the query. It's what DIContext 214 // expects. 215 if (Opts.RelativeAddresses) 216 ModuleOffset.Address += Info->getModulePreferredBase(); 217 218 return Info->symbolizeFrame(ModuleOffset); 219 } 220 221 Expected<std::vector<DILocal>> 222 LLVMSymbolizer::symbolizeFrame(const ObjectFile &Obj, 223 object::SectionedAddress ModuleOffset) { 224 return symbolizeFrameCommon(Obj, ModuleOffset); 225 } 226 227 Expected<std::vector<DILocal>> 228 LLVMSymbolizer::symbolizeFrame(StringRef ModuleName, 229 object::SectionedAddress ModuleOffset) { 230 return symbolizeFrameCommon(ModuleName, ModuleOffset); 231 } 232 233 Expected<std::vector<DILocal>> 234 LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID, 235 object::SectionedAddress ModuleOffset) { 236 return symbolizeFrameCommon(BuildID, ModuleOffset); 237 } 238 239 template <typename T> 240 Expected<std::vector<DILineInfo>> 241 LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol, 242 uint64_t Offset) { 243 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 244 if (!InfoOrErr) 245 return InfoOrErr.takeError(); 246 247 SymbolizableModule *Info = *InfoOrErr; 248 std::vector<DILineInfo> Result; 249 250 // A null module means an error has already been reported. Return an empty 251 // result. 252 if (!Info) 253 return Result; 254 255 for (object::SectionedAddress A : Info->findSymbol(Symbol, Offset)) { 256 DILineInfo LineInfo = Info->symbolizeCode( 257 A, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), 258 Opts.UseSymbolTable); 259 if (LineInfo.FileName != DILineInfo::BadString) { 260 if (Opts.Demangle) 261 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); 262 Result.push_back(std::move(LineInfo)); 263 } 264 } 265 266 return Result; 267 } 268 269 Expected<std::vector<DILineInfo>> 270 LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol, 271 uint64_t Offset) { 272 return findSymbolCommon(Obj, Symbol, Offset); 273 } 274 275 Expected<std::vector<DILineInfo>> 276 LLVMSymbolizer::findSymbol(StringRef ModuleName, StringRef Symbol, 277 uint64_t Offset) { 278 return findSymbolCommon(ModuleName, Symbol, Offset); 279 } 280 281 Expected<std::vector<DILineInfo>> 282 LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol, 283 uint64_t Offset) { 284 return findSymbolCommon(BuildID, Symbol, Offset); 285 } 286 287 void LLVMSymbolizer::flush() { 288 ObjectForUBPathAndArch.clear(); 289 LRUBinaries.clear(); 290 CacheSize = 0; 291 BinaryForPath.clear(); 292 ObjectPairForPathArch.clear(); 293 Modules.clear(); 294 BuildIDPaths.clear(); 295 } 296 297 namespace { 298 299 // For Path="/path/to/foo" and Basename="foo" assume that debug info is in 300 // /path/to/foo.dSYM/Contents/Resources/DWARF/foo. 301 // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in 302 // /path/to/bar.dSYM/Contents/Resources/DWARF/foo. 303 std::string getDarwinDWARFResourceForPath(const std::string &Path, 304 const std::string &Basename) { 305 SmallString<16> ResourceName = StringRef(Path); 306 if (sys::path::extension(Path) != ".dSYM") { 307 ResourceName += ".dSYM"; 308 } 309 sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); 310 sys::path::append(ResourceName, Basename); 311 return std::string(ResourceName); 312 } 313 314 bool checkFileCRC(StringRef Path, uint32_t CRCHash) { 315 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 316 MemoryBuffer::getFileOrSTDIN(Path); 317 if (!MB) 318 return false; 319 return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer())); 320 } 321 322 bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, 323 uint32_t &CRCHash) { 324 if (!Obj) 325 return false; 326 for (const SectionRef &Section : Obj->sections()) { 327 StringRef Name; 328 consumeError(Section.getName().moveInto(Name)); 329 330 Name = Name.substr(Name.find_first_not_of("._")); 331 if (Name == "gnu_debuglink") { 332 Expected<StringRef> ContentsOrErr = Section.getContents(); 333 if (!ContentsOrErr) { 334 consumeError(ContentsOrErr.takeError()); 335 return false; 336 } 337 DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0); 338 uint64_t Offset = 0; 339 if (const char *DebugNameStr = DE.getCStr(&Offset)) { 340 // 4-byte align the offset. 341 Offset = (Offset + 3) & ~0x3; 342 if (DE.isValidOffsetForDataOfSize(Offset, 4)) { 343 DebugName = DebugNameStr; 344 CRCHash = DE.getU32(&Offset); 345 return true; 346 } 347 } 348 break; 349 } 350 } 351 return false; 352 } 353 354 bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, 355 const MachOObjectFile *Obj) { 356 ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid(); 357 ArrayRef<uint8_t> bin_uuid = Obj->getUuid(); 358 if (dbg_uuid.empty() || bin_uuid.empty()) 359 return false; 360 return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size()); 361 } 362 363 } // end anonymous namespace 364 365 ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, 366 const MachOObjectFile *MachExeObj, 367 const std::string &ArchName) { 368 // On Darwin we may find DWARF in separate object file in 369 // resource directory. 370 std::vector<std::string> DsymPaths; 371 StringRef Filename = sys::path::filename(ExePath); 372 DsymPaths.push_back( 373 getDarwinDWARFResourceForPath(ExePath, std::string(Filename))); 374 for (const auto &Path : Opts.DsymHints) { 375 DsymPaths.push_back( 376 getDarwinDWARFResourceForPath(Path, std::string(Filename))); 377 } 378 for (const auto &Path : DsymPaths) { 379 auto DbgObjOrErr = getOrCreateObject(Path, ArchName); 380 if (!DbgObjOrErr) { 381 // Ignore errors, the file might not exist. 382 consumeError(DbgObjOrErr.takeError()); 383 continue; 384 } 385 ObjectFile *DbgObj = DbgObjOrErr.get(); 386 if (!DbgObj) 387 continue; 388 const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj); 389 if (!MachDbgObj) 390 continue; 391 if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) 392 return DbgObj; 393 } 394 return nullptr; 395 } 396 397 ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path, 398 const ObjectFile *Obj, 399 const std::string &ArchName) { 400 std::string DebuglinkName; 401 uint32_t CRCHash; 402 std::string DebugBinaryPath; 403 if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash)) 404 return nullptr; 405 if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) 406 return nullptr; 407 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); 408 if (!DbgObjOrErr) { 409 // Ignore errors, the file might not exist. 410 consumeError(DbgObjOrErr.takeError()); 411 return nullptr; 412 } 413 return DbgObjOrErr.get(); 414 } 415 416 ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path, 417 const ELFObjectFileBase *Obj, 418 const std::string &ArchName) { 419 auto BuildID = getBuildID(Obj); 420 if (BuildID.size() < 2) 421 return nullptr; 422 std::string DebugBinaryPath; 423 if (!getOrFindDebugBinary(BuildID, DebugBinaryPath)) 424 return nullptr; 425 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); 426 if (!DbgObjOrErr) { 427 consumeError(DbgObjOrErr.takeError()); 428 return nullptr; 429 } 430 return DbgObjOrErr.get(); 431 } 432 433 bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath, 434 const std::string &DebuglinkName, 435 uint32_t CRCHash, std::string &Result) { 436 SmallString<16> OrigDir(OrigPath); 437 llvm::sys::path::remove_filename(OrigDir); 438 SmallString<16> DebugPath = OrigDir; 439 // Try relative/path/to/original_binary/debuglink_name 440 llvm::sys::path::append(DebugPath, DebuglinkName); 441 if (checkFileCRC(DebugPath, CRCHash)) { 442 Result = std::string(DebugPath); 443 return true; 444 } 445 // Try relative/path/to/original_binary/.debug/debuglink_name 446 DebugPath = OrigDir; 447 llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); 448 if (checkFileCRC(DebugPath, CRCHash)) { 449 Result = std::string(DebugPath); 450 return true; 451 } 452 // Make the path absolute so that lookups will go to 453 // "/usr/lib/debug/full/path/to/debug", not 454 // "/usr/lib/debug/to/debug" 455 llvm::sys::fs::make_absolute(OrigDir); 456 if (!Opts.FallbackDebugPath.empty()) { 457 // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name 458 DebugPath = Opts.FallbackDebugPath; 459 } else { 460 #if defined(__NetBSD__) 461 // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name 462 DebugPath = "/usr/libdata/debug"; 463 #else 464 // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name 465 DebugPath = "/usr/lib/debug"; 466 #endif 467 } 468 llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), 469 DebuglinkName); 470 if (checkFileCRC(DebugPath, CRCHash)) { 471 Result = std::string(DebugPath); 472 return true; 473 } 474 return false; 475 } 476 477 static StringRef getBuildIDStr(ArrayRef<uint8_t> BuildID) { 478 return StringRef(reinterpret_cast<const char *>(BuildID.data()), 479 BuildID.size()); 480 } 481 482 bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID, 483 std::string &Result) { 484 StringRef BuildIDStr = getBuildIDStr(BuildID); 485 auto I = BuildIDPaths.find(BuildIDStr); 486 if (I != BuildIDPaths.end()) { 487 Result = I->second; 488 return true; 489 } 490 if (!BIDFetcher) 491 return false; 492 if (std::optional<std::string> Path = BIDFetcher->fetch(BuildID)) { 493 Result = *Path; 494 auto InsertResult = BuildIDPaths.insert({BuildIDStr, Result}); 495 assert(InsertResult.second); 496 (void)InsertResult; 497 return true; 498 } 499 500 return false; 501 } 502 503 std::string LLVMSymbolizer::lookUpGsymFile(const std::string &Path) { 504 if (Opts.DisableGsym) 505 return {}; 506 507 auto CheckGsymFile = [](const llvm::StringRef &GsymPath) { 508 sys::fs::file_status Status; 509 std::error_code EC = llvm::sys::fs::status(GsymPath, Status); 510 return !EC && !llvm::sys::fs::is_directory(Status); 511 }; 512 513 // First, look beside the binary file 514 if (const auto GsymPath = Path + ".gsym"; CheckGsymFile(GsymPath)) 515 return GsymPath; 516 517 // Then, look in the directories specified by GsymFileDirectory 518 519 for (const auto &Directory : Opts.GsymFileDirectory) { 520 SmallString<16> GsymPath = llvm::StringRef{Directory}; 521 llvm::sys::path::append(GsymPath, 522 llvm::sys::path::filename(Path) + ".gsym"); 523 524 if (CheckGsymFile(GsymPath)) 525 return static_cast<std::string>(GsymPath); 526 } 527 528 return {}; 529 } 530 531 Expected<LLVMSymbolizer::ObjectPair> 532 LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, 533 const std::string &ArchName) { 534 auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName)); 535 if (I != ObjectPairForPathArch.end()) { 536 recordAccess(BinaryForPath.find(Path)->second); 537 return I->second; 538 } 539 540 auto ObjOrErr = getOrCreateObject(Path, ArchName); 541 if (!ObjOrErr) { 542 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), 543 ObjectPair(nullptr, nullptr)); 544 return ObjOrErr.takeError(); 545 } 546 547 ObjectFile *Obj = ObjOrErr.get(); 548 assert(Obj != nullptr); 549 ObjectFile *DbgObj = nullptr; 550 551 if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj)) 552 DbgObj = lookUpDsymFile(Path, MachObj, ArchName); 553 else if (auto ELFObj = dyn_cast<const ELFObjectFileBase>(Obj)) 554 DbgObj = lookUpBuildIDObject(Path, ELFObj, ArchName); 555 if (!DbgObj) 556 DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName); 557 if (!DbgObj) 558 DbgObj = Obj; 559 ObjectPair Res = std::make_pair(Obj, DbgObj); 560 std::string DbgObjPath = DbgObj->getFileName().str(); 561 auto Pair = 562 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res); 563 BinaryForPath.find(DbgObjPath)->second.pushEvictor([this, I = Pair.first]() { 564 ObjectPairForPathArch.erase(I); 565 }); 566 return Res; 567 } 568 569 Expected<ObjectFile *> 570 LLVMSymbolizer::getOrCreateObject(const std::string &Path, 571 const std::string &ArchName) { 572 Binary *Bin; 573 auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>()); 574 if (!Pair.second) { 575 Bin = Pair.first->second->getBinary(); 576 recordAccess(Pair.first->second); 577 } else { 578 Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path); 579 if (!BinOrErr) 580 return BinOrErr.takeError(); 581 582 CachedBinary &CachedBin = Pair.first->second; 583 CachedBin = std::move(BinOrErr.get()); 584 CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); }); 585 LRUBinaries.push_back(CachedBin); 586 CacheSize += CachedBin.size(); 587 Bin = CachedBin->getBinary(); 588 } 589 590 if (!Bin) 591 return static_cast<ObjectFile *>(nullptr); 592 593 if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) { 594 auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName)); 595 if (I != ObjectForUBPathAndArch.end()) 596 return I->second.get(); 597 598 Expected<std::unique_ptr<ObjectFile>> ObjOrErr = 599 UB->getMachOObjectForArch(ArchName); 600 if (!ObjOrErr) { 601 ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), 602 std::unique_ptr<ObjectFile>()); 603 return ObjOrErr.takeError(); 604 } 605 ObjectFile *Res = ObjOrErr->get(); 606 auto Pair = ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), 607 std::move(ObjOrErr.get())); 608 BinaryForPath.find(Path)->second.pushEvictor( 609 [this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(Iter); }); 610 return Res; 611 } 612 if (Bin->isObject()) { 613 return cast<ObjectFile>(Bin); 614 } 615 return errorCodeToError(object_error::arch_not_found); 616 } 617 618 Expected<SymbolizableModule *> 619 LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj, 620 std::unique_ptr<DIContext> Context, 621 StringRef ModuleName) { 622 auto InfoOrErr = SymbolizableObjectFile::create(Obj, std::move(Context), 623 Opts.UntagAddresses); 624 std::unique_ptr<SymbolizableModule> SymMod; 625 if (InfoOrErr) 626 SymMod = std::move(*InfoOrErr); 627 auto InsertResult = Modules.insert( 628 std::make_pair(std::string(ModuleName), std::move(SymMod))); 629 assert(InsertResult.second); 630 if (!InfoOrErr) 631 return InfoOrErr.takeError(); 632 return InsertResult.first->second.get(); 633 } 634 635 Expected<SymbolizableModule *> 636 LLVMSymbolizer::getOrCreateModuleInfo(StringRef ModuleName) { 637 StringRef BinaryName = ModuleName; 638 StringRef ArchName = Opts.DefaultArch; 639 size_t ColonPos = ModuleName.find_last_of(':'); 640 // Verify that substring after colon form a valid arch name. 641 if (ColonPos != std::string::npos) { 642 StringRef ArchStr = ModuleName.substr(ColonPos + 1); 643 if (Triple(ArchStr).getArch() != Triple::UnknownArch) { 644 BinaryName = ModuleName.substr(0, ColonPos); 645 ArchName = ArchStr; 646 } 647 } 648 649 auto I = Modules.find(ModuleName); 650 if (I != Modules.end()) { 651 recordAccess(BinaryForPath.find(BinaryName)->second); 652 return I->second.get(); 653 } 654 655 auto ObjectsOrErr = 656 getOrCreateObjectPair(std::string{BinaryName}, std::string{ArchName}); 657 if (!ObjectsOrErr) { 658 // Failed to find valid object file. 659 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>()); 660 return ObjectsOrErr.takeError(); 661 } 662 ObjectPair Objects = ObjectsOrErr.get(); 663 664 std::unique_ptr<DIContext> Context; 665 // If this is a COFF object containing PDB info and not containing DWARF 666 // section, use a PDBContext to symbolize. Otherwise, use DWARF. 667 // Create a DIContext to symbolize as follows: 668 // - If there is a GSYM file, create a GsymContext. 669 // - Otherwise, if this is a COFF object containing PDB info, create a 670 // PDBContext. 671 // - Otherwise, create a DWARFContext. 672 const auto GsymFile = lookUpGsymFile(BinaryName.str()); 673 if (!GsymFile.empty()) { 674 auto ReaderOrErr = gsym::GsymReader::openFile(GsymFile); 675 676 if (ReaderOrErr) { 677 std::unique_ptr<gsym::GsymReader> Reader = 678 std::make_unique<gsym::GsymReader>(std::move(*ReaderOrErr)); 679 680 Context = std::make_unique<gsym::GsymContext>(std::move(Reader)); 681 } 682 } 683 if (!Context) { 684 if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) { 685 const codeview::DebugInfo *DebugInfo; 686 StringRef PDBFileName; 687 auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName); 688 // Use DWARF if there're DWARF sections. 689 bool HasDwarf = llvm::any_of( 690 Objects.first->sections(), [](SectionRef Section) -> bool { 691 if (Expected<StringRef> SectionName = Section.getName()) 692 return SectionName.get() == ".debug_info"; 693 return false; 694 }); 695 if (!EC && !HasDwarf && DebugInfo != nullptr && !PDBFileName.empty()) { 696 #if 0 697 using namespace pdb; 698 std::unique_ptr<IPDBSession> Session; 699 700 PDB_ReaderType ReaderType = 701 Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native; 702 if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(), 703 Session)) { 704 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>()); 705 // Return along the PDB filename to provide more context 706 return createFileError(PDBFileName, std::move(Err)); 707 } 708 Context.reset(new PDBContext(*CoffObject, std::move(Session))); 709 #else 710 return make_error<StringError>( 711 "PDB support not compiled in", 712 std::make_error_code(std::errc::not_supported)); 713 #endif 714 } 715 } 716 } 717 if (!Context) 718 Context = DWARFContext::create( 719 *Objects.second, DWARFContext::ProcessDebugRelocations::Process, 720 nullptr, Opts.DWPName); 721 auto ModuleOrErr = 722 createModuleInfo(Objects.first, std::move(Context), ModuleName); 723 if (ModuleOrErr) { 724 auto I = Modules.find(ModuleName); 725 BinaryForPath.find(BinaryName)->second.pushEvictor([this, I]() { 726 Modules.erase(I); 727 }); 728 } 729 return ModuleOrErr; 730 } 731 732 // For BPF programs .BTF.ext section contains line numbers information, 733 // use it if regular DWARF is not available (e.g. for stripped binary). 734 static bool useBTFContext(const ObjectFile &Obj) { 735 return Obj.makeTriple().isBPF() && !Obj.hasDebugInfo() && 736 BTFParser::hasBTFSections(Obj); 737 } 738 739 Expected<SymbolizableModule *> 740 LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) { 741 StringRef ObjName = Obj.getFileName(); 742 auto I = Modules.find(ObjName); 743 if (I != Modules.end()) 744 return I->second.get(); 745 746 std::unique_ptr<DIContext> Context; 747 if (useBTFContext(Obj)) 748 Context = BTFContext::create(Obj); 749 else 750 Context = DWARFContext::create(Obj); 751 // FIXME: handle COFF object with PDB info to use PDBContext 752 return createModuleInfo(&Obj, std::move(Context), ObjName); 753 } 754 755 Expected<SymbolizableModule *> 756 LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) { 757 std::string Path; 758 if (!getOrFindDebugBinary(BuildID, Path)) { 759 return createStringError(errc::no_such_file_or_directory, 760 "could not find build ID"); 761 } 762 return getOrCreateModuleInfo(Path); 763 } 764 765 namespace { 766 767 // Undo these various manglings for Win32 extern "C" functions: 768 // cdecl - _foo 769 // stdcall - _foo@12 770 // fastcall - @foo@12 771 // vectorcall - foo@@12 772 // These are all different linkage names for 'foo'. 773 StringRef demanglePE32ExternCFunc(StringRef SymbolName) { 774 char Front = SymbolName.empty() ? '\0' : SymbolName[0]; 775 776 // Remove any '@[0-9]+' suffix. 777 bool HasAtNumSuffix = false; 778 if (Front != '?') { 779 size_t AtPos = SymbolName.rfind('@'); 780 if (AtPos != StringRef::npos && 781 all_of(drop_begin(SymbolName, AtPos + 1), isDigit)) { 782 SymbolName = SymbolName.substr(0, AtPos); 783 HasAtNumSuffix = true; 784 } 785 } 786 787 // Remove any ending '@' for vectorcall. 788 bool IsVectorCall = false; 789 if (HasAtNumSuffix && SymbolName.ends_with("@")) { 790 SymbolName = SymbolName.drop_back(); 791 IsVectorCall = true; 792 } 793 794 // If not vectorcall, remove any '_' or '@' prefix. 795 if (!IsVectorCall && (Front == '_' || Front == '@')) 796 SymbolName = SymbolName.drop_front(); 797 798 return SymbolName; 799 } 800 801 } // end anonymous namespace 802 803 std::string 804 LLVMSymbolizer::DemangleName(StringRef Name, 805 const SymbolizableModule *DbiModuleDescriptor) { 806 std::string Result; 807 if (nonMicrosoftDemangle(Name, Result)) 808 return Result; 809 810 if (Name.starts_with('?')) { 811 // Only do MSVC C++ demangling on symbols starting with '?'. 812 int status = 0; 813 char *DemangledName = microsoftDemangle( 814 Name, nullptr, &status, 815 MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention | 816 MSDF_NoMemberType | MSDF_NoReturnType)); 817 if (status != 0) 818 return std::string{Name}; 819 Result = DemangledName; 820 free(DemangledName); 821 return Result; 822 } 823 824 if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) { 825 std::string DemangledCName(demanglePE32ExternCFunc(Name)); 826 // On i386 Windows, the C name mangling for different calling conventions 827 // may also be applied on top of the Itanium or Rust name mangling. 828 if (nonMicrosoftDemangle(DemangledCName, Result)) 829 return Result; 830 return DemangledCName; 831 } 832 return std::string{Name}; 833 } 834 835 void LLVMSymbolizer::recordAccess(CachedBinary &Bin) { 836 if (Bin->getBinary()) 837 LRUBinaries.splice(LRUBinaries.end(), LRUBinaries, Bin.getIterator()); 838 } 839 840 void LLVMSymbolizer::pruneCache() { 841 // Evict the LRU binary until the max cache size is reached or there's <= 1 842 // item in the cache. The MRU binary is always kept to avoid thrashing if it's 843 // larger than the cache size. 844 while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() && 845 std::next(LRUBinaries.begin()) != LRUBinaries.end()) { 846 CachedBinary &Bin = LRUBinaries.front(); 847 CacheSize -= Bin.size(); 848 LRUBinaries.pop_front(); 849 Bin.evict(); 850 } 851 } 852 853 void CachedBinary::pushEvictor(std::function<void()> NewEvictor) { 854 if (Evictor) { 855 this->Evictor = [OldEvictor = std::move(this->Evictor), 856 NewEvictor = std::move(NewEvictor)]() { 857 NewEvictor(); 858 OldEvictor(); 859 }; 860 } else { 861 this->Evictor = std::move(NewEvictor); 862 } 863 } 864 865 } // namespace symbolize 866 } // namespace llvm 867