1 //===-- LLVMSymbolize.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation for LLVM symbolization library. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 14 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/DebugInfo/BTF/BTFContext.h" 17 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 18 #include "llvm/DebugInfo/PDB/PDB.h" 19 #include "llvm/DebugInfo/PDB/PDBContext.h" 20 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" 21 #include "llvm/Demangle/Demangle.h" 22 #include "llvm/Object/BuildID.h" 23 #include "llvm/Object/COFF.h" 24 #include "llvm/Object/ELFObjectFile.h" 25 #include "llvm/Object/MachO.h" 26 #include "llvm/Object/MachOUniversal.h" 27 #include "llvm/Support/CRC.h" 28 #include "llvm/Support/Casting.h" 29 #include "llvm/Support/DataExtractor.h" 30 #include "llvm/Support/Errc.h" 31 #include "llvm/Support/FileSystem.h" 32 #include "llvm/Support/MemoryBuffer.h" 33 #include "llvm/Support/Path.h" 34 #include <algorithm> 35 #include <cassert> 36 #include <cstring> 37 38 namespace llvm { 39 namespace codeview { 40 union DebugInfo; 41 } 42 namespace symbolize { 43 44 LLVMSymbolizer::LLVMSymbolizer() = default; 45 46 LLVMSymbolizer::LLVMSymbolizer(const Options &Opts) 47 : Opts(Opts), 48 BIDFetcher(std::make_unique<BuildIDFetcher>(Opts.DebugFileDirectory)) {} 49 50 LLVMSymbolizer::~LLVMSymbolizer() = default; 51 52 template <typename T> 53 Expected<DILineInfo> 54 LLVMSymbolizer::symbolizeCodeCommon(const T &ModuleSpecifier, 55 object::SectionedAddress ModuleOffset) { 56 57 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 58 if (!InfoOrErr) 59 return InfoOrErr.takeError(); 60 61 SymbolizableModule *Info = *InfoOrErr; 62 63 // A null module means an error has already been reported. Return an empty 64 // result. 65 if (!Info) 66 return DILineInfo(); 67 68 // If the user is giving us relative addresses, add the preferred base of the 69 // object to the offset before we do the query. It's what DIContext expects. 70 if (Opts.RelativeAddresses) 71 ModuleOffset.Address += Info->getModulePreferredBase(); 72 73 DILineInfo LineInfo = Info->symbolizeCode( 74 ModuleOffset, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), 75 Opts.UseSymbolTable); 76 if (Opts.Demangle) 77 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); 78 return LineInfo; 79 } 80 81 Expected<DILineInfo> 82 LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj, 83 object::SectionedAddress ModuleOffset) { 84 return symbolizeCodeCommon(Obj, ModuleOffset); 85 } 86 87 Expected<DILineInfo> 88 LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, 89 object::SectionedAddress ModuleOffset) { 90 return symbolizeCodeCommon(ModuleName, ModuleOffset); 91 } 92 93 Expected<DILineInfo> 94 LLVMSymbolizer::symbolizeCode(ArrayRef<uint8_t> BuildID, 95 object::SectionedAddress ModuleOffset) { 96 return symbolizeCodeCommon(BuildID, ModuleOffset); 97 } 98 99 template <typename T> 100 Expected<DIInliningInfo> LLVMSymbolizer::symbolizeInlinedCodeCommon( 101 const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) { 102 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 103 if (!InfoOrErr) 104 return InfoOrErr.takeError(); 105 106 SymbolizableModule *Info = *InfoOrErr; 107 108 // A null module means an error has already been reported. Return an empty 109 // result. 110 if (!Info) 111 return DIInliningInfo(); 112 113 // If the user is giving us relative addresses, add the preferred base of the 114 // object to the offset before we do the query. It's what DIContext expects. 115 if (Opts.RelativeAddresses) 116 ModuleOffset.Address += Info->getModulePreferredBase(); 117 118 DIInliningInfo InlinedContext = Info->symbolizeInlinedCode( 119 ModuleOffset, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), 120 Opts.UseSymbolTable); 121 if (Opts.Demangle) { 122 for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { 123 auto *Frame = InlinedContext.getMutableFrame(i); 124 Frame->FunctionName = DemangleName(Frame->FunctionName, Info); 125 } 126 } 127 return InlinedContext; 128 } 129 130 Expected<DIInliningInfo> 131 LLVMSymbolizer::symbolizeInlinedCode(const ObjectFile &Obj, 132 object::SectionedAddress ModuleOffset) { 133 return symbolizeInlinedCodeCommon(Obj, ModuleOffset); 134 } 135 136 Expected<DIInliningInfo> 137 LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName, 138 object::SectionedAddress ModuleOffset) { 139 return symbolizeInlinedCodeCommon(ModuleName, ModuleOffset); 140 } 141 142 Expected<DIInliningInfo> 143 LLVMSymbolizer::symbolizeInlinedCode(ArrayRef<uint8_t> BuildID, 144 object::SectionedAddress ModuleOffset) { 145 return symbolizeInlinedCodeCommon(BuildID, ModuleOffset); 146 } 147 148 template <typename T> 149 Expected<DIGlobal> 150 LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier, 151 object::SectionedAddress ModuleOffset) { 152 153 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 154 if (!InfoOrErr) 155 return InfoOrErr.takeError(); 156 157 SymbolizableModule *Info = *InfoOrErr; 158 // A null module means an error has already been reported. Return an empty 159 // result. 160 if (!Info) 161 return DIGlobal(); 162 163 // If the user is giving us relative addresses, add the preferred base of 164 // the object to the offset before we do the query. It's what DIContext 165 // expects. 166 if (Opts.RelativeAddresses) 167 ModuleOffset.Address += Info->getModulePreferredBase(); 168 169 DIGlobal Global = Info->symbolizeData(ModuleOffset); 170 if (Opts.Demangle) 171 Global.Name = DemangleName(Global.Name, Info); 172 return Global; 173 } 174 175 Expected<DIGlobal> 176 LLVMSymbolizer::symbolizeData(const ObjectFile &Obj, 177 object::SectionedAddress ModuleOffset) { 178 return symbolizeDataCommon(Obj, ModuleOffset); 179 } 180 181 Expected<DIGlobal> 182 LLVMSymbolizer::symbolizeData(const std::string &ModuleName, 183 object::SectionedAddress ModuleOffset) { 184 return symbolizeDataCommon(ModuleName, ModuleOffset); 185 } 186 187 Expected<DIGlobal> 188 LLVMSymbolizer::symbolizeData(ArrayRef<uint8_t> BuildID, 189 object::SectionedAddress ModuleOffset) { 190 return symbolizeDataCommon(BuildID, ModuleOffset); 191 } 192 193 template <typename T> 194 Expected<std::vector<DILocal>> 195 LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier, 196 object::SectionedAddress ModuleOffset) { 197 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 198 if (!InfoOrErr) 199 return InfoOrErr.takeError(); 200 201 SymbolizableModule *Info = *InfoOrErr; 202 // A null module means an error has already been reported. Return an empty 203 // result. 204 if (!Info) 205 return std::vector<DILocal>(); 206 207 // If the user is giving us relative addresses, add the preferred base of 208 // the object to the offset before we do the query. It's what DIContext 209 // expects. 210 if (Opts.RelativeAddresses) 211 ModuleOffset.Address += Info->getModulePreferredBase(); 212 213 return Info->symbolizeFrame(ModuleOffset); 214 } 215 216 Expected<std::vector<DILocal>> 217 LLVMSymbolizer::symbolizeFrame(const ObjectFile &Obj, 218 object::SectionedAddress ModuleOffset) { 219 return symbolizeFrameCommon(Obj, ModuleOffset); 220 } 221 222 Expected<std::vector<DILocal>> 223 LLVMSymbolizer::symbolizeFrame(const std::string &ModuleName, 224 object::SectionedAddress ModuleOffset) { 225 return symbolizeFrameCommon(ModuleName, ModuleOffset); 226 } 227 228 Expected<std::vector<DILocal>> 229 LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID, 230 object::SectionedAddress ModuleOffset) { 231 return symbolizeFrameCommon(BuildID, ModuleOffset); 232 } 233 234 template <typename T> 235 Expected<std::vector<DILineInfo>> 236 LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol, 237 uint64_t Offset) { 238 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 239 if (!InfoOrErr) 240 return InfoOrErr.takeError(); 241 242 SymbolizableModule *Info = *InfoOrErr; 243 std::vector<DILineInfo> Result; 244 245 // A null module means an error has already been reported. Return an empty 246 // result. 247 if (!Info) 248 return Result; 249 250 for (object::SectionedAddress A : Info->findSymbol(Symbol, Offset)) { 251 DILineInfo LineInfo = Info->symbolizeCode( 252 A, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), 253 Opts.UseSymbolTable); 254 if (LineInfo.FileName != DILineInfo::BadString) { 255 if (Opts.Demangle) 256 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); 257 Result.push_back(LineInfo); 258 } 259 } 260 261 return Result; 262 } 263 264 Expected<std::vector<DILineInfo>> 265 LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol, 266 uint64_t Offset) { 267 return findSymbolCommon(Obj, Symbol, Offset); 268 } 269 270 Expected<std::vector<DILineInfo>> 271 LLVMSymbolizer::findSymbol(const std::string &ModuleName, StringRef Symbol, 272 uint64_t Offset) { 273 return findSymbolCommon(ModuleName, Symbol, Offset); 274 } 275 276 Expected<std::vector<DILineInfo>> 277 LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol, 278 uint64_t Offset) { 279 return findSymbolCommon(BuildID, Symbol, Offset); 280 } 281 282 void LLVMSymbolizer::flush() { 283 ObjectForUBPathAndArch.clear(); 284 LRUBinaries.clear(); 285 CacheSize = 0; 286 BinaryForPath.clear(); 287 ObjectPairForPathArch.clear(); 288 Modules.clear(); 289 BuildIDPaths.clear(); 290 } 291 292 namespace { 293 294 // For Path="/path/to/foo" and Basename="foo" assume that debug info is in 295 // /path/to/foo.dSYM/Contents/Resources/DWARF/foo. 296 // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in 297 // /path/to/bar.dSYM/Contents/Resources/DWARF/foo. 298 std::string getDarwinDWARFResourceForPath(const std::string &Path, 299 const std::string &Basename) { 300 SmallString<16> ResourceName = StringRef(Path); 301 if (sys::path::extension(Path) != ".dSYM") { 302 ResourceName += ".dSYM"; 303 } 304 sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); 305 sys::path::append(ResourceName, Basename); 306 return std::string(ResourceName); 307 } 308 309 bool checkFileCRC(StringRef Path, uint32_t CRCHash) { 310 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 311 MemoryBuffer::getFileOrSTDIN(Path); 312 if (!MB) 313 return false; 314 return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer())); 315 } 316 317 bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, 318 uint32_t &CRCHash) { 319 if (!Obj) 320 return false; 321 for (const SectionRef &Section : Obj->sections()) { 322 StringRef Name; 323 consumeError(Section.getName().moveInto(Name)); 324 325 Name = Name.substr(Name.find_first_not_of("._")); 326 if (Name == "gnu_debuglink") { 327 Expected<StringRef> ContentsOrErr = Section.getContents(); 328 if (!ContentsOrErr) { 329 consumeError(ContentsOrErr.takeError()); 330 return false; 331 } 332 DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0); 333 uint64_t Offset = 0; 334 if (const char *DebugNameStr = DE.getCStr(&Offset)) { 335 // 4-byte align the offset. 336 Offset = (Offset + 3) & ~0x3; 337 if (DE.isValidOffsetForDataOfSize(Offset, 4)) { 338 DebugName = DebugNameStr; 339 CRCHash = DE.getU32(&Offset); 340 return true; 341 } 342 } 343 break; 344 } 345 } 346 return false; 347 } 348 349 bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, 350 const MachOObjectFile *Obj) { 351 ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid(); 352 ArrayRef<uint8_t> bin_uuid = Obj->getUuid(); 353 if (dbg_uuid.empty() || bin_uuid.empty()) 354 return false; 355 return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size()); 356 } 357 358 } // end anonymous namespace 359 360 ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, 361 const MachOObjectFile *MachExeObj, 362 const std::string &ArchName) { 363 // On Darwin we may find DWARF in separate object file in 364 // resource directory. 365 std::vector<std::string> DsymPaths; 366 StringRef Filename = sys::path::filename(ExePath); 367 DsymPaths.push_back( 368 getDarwinDWARFResourceForPath(ExePath, std::string(Filename))); 369 for (const auto &Path : Opts.DsymHints) { 370 DsymPaths.push_back( 371 getDarwinDWARFResourceForPath(Path, std::string(Filename))); 372 } 373 for (const auto &Path : DsymPaths) { 374 auto DbgObjOrErr = getOrCreateObject(Path, ArchName); 375 if (!DbgObjOrErr) { 376 // Ignore errors, the file might not exist. 377 consumeError(DbgObjOrErr.takeError()); 378 continue; 379 } 380 ObjectFile *DbgObj = DbgObjOrErr.get(); 381 if (!DbgObj) 382 continue; 383 const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj); 384 if (!MachDbgObj) 385 continue; 386 if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) 387 return DbgObj; 388 } 389 return nullptr; 390 } 391 392 ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path, 393 const ObjectFile *Obj, 394 const std::string &ArchName) { 395 std::string DebuglinkName; 396 uint32_t CRCHash; 397 std::string DebugBinaryPath; 398 if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash)) 399 return nullptr; 400 if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) 401 return nullptr; 402 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); 403 if (!DbgObjOrErr) { 404 // Ignore errors, the file might not exist. 405 consumeError(DbgObjOrErr.takeError()); 406 return nullptr; 407 } 408 return DbgObjOrErr.get(); 409 } 410 411 ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path, 412 const ELFObjectFileBase *Obj, 413 const std::string &ArchName) { 414 auto BuildID = getBuildID(Obj); 415 if (BuildID.size() < 2) 416 return nullptr; 417 std::string DebugBinaryPath; 418 if (!getOrFindDebugBinary(BuildID, DebugBinaryPath)) 419 return nullptr; 420 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); 421 if (!DbgObjOrErr) { 422 consumeError(DbgObjOrErr.takeError()); 423 return nullptr; 424 } 425 return DbgObjOrErr.get(); 426 } 427 428 bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath, 429 const std::string &DebuglinkName, 430 uint32_t CRCHash, std::string &Result) { 431 SmallString<16> OrigDir(OrigPath); 432 llvm::sys::path::remove_filename(OrigDir); 433 SmallString<16> DebugPath = OrigDir; 434 // Try relative/path/to/original_binary/debuglink_name 435 llvm::sys::path::append(DebugPath, DebuglinkName); 436 if (checkFileCRC(DebugPath, CRCHash)) { 437 Result = std::string(DebugPath); 438 return true; 439 } 440 // Try relative/path/to/original_binary/.debug/debuglink_name 441 DebugPath = OrigDir; 442 llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); 443 if (checkFileCRC(DebugPath, CRCHash)) { 444 Result = std::string(DebugPath); 445 return true; 446 } 447 // Make the path absolute so that lookups will go to 448 // "/usr/lib/debug/full/path/to/debug", not 449 // "/usr/lib/debug/to/debug" 450 llvm::sys::fs::make_absolute(OrigDir); 451 if (!Opts.FallbackDebugPath.empty()) { 452 // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name 453 DebugPath = Opts.FallbackDebugPath; 454 } else { 455 #if defined(__NetBSD__) 456 // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name 457 DebugPath = "/usr/libdata/debug"; 458 #else 459 // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name 460 DebugPath = "/usr/lib/debug"; 461 #endif 462 } 463 llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), 464 DebuglinkName); 465 if (checkFileCRC(DebugPath, CRCHash)) { 466 Result = std::string(DebugPath); 467 return true; 468 } 469 return false; 470 } 471 472 static StringRef getBuildIDStr(ArrayRef<uint8_t> BuildID) { 473 return StringRef(reinterpret_cast<const char *>(BuildID.data()), 474 BuildID.size()); 475 } 476 477 bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID, 478 std::string &Result) { 479 StringRef BuildIDStr = getBuildIDStr(BuildID); 480 auto I = BuildIDPaths.find(BuildIDStr); 481 if (I != BuildIDPaths.end()) { 482 Result = I->second; 483 return true; 484 } 485 if (!BIDFetcher) 486 return false; 487 if (std::optional<std::string> Path = BIDFetcher->fetch(BuildID)) { 488 Result = *Path; 489 auto InsertResult = BuildIDPaths.insert({BuildIDStr, Result}); 490 assert(InsertResult.second); 491 (void)InsertResult; 492 return true; 493 } 494 495 return false; 496 } 497 498 Expected<LLVMSymbolizer::ObjectPair> 499 LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, 500 const std::string &ArchName) { 501 auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName)); 502 if (I != ObjectPairForPathArch.end()) { 503 recordAccess(BinaryForPath.find(Path)->second); 504 return I->second; 505 } 506 507 auto ObjOrErr = getOrCreateObject(Path, ArchName); 508 if (!ObjOrErr) { 509 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), 510 ObjectPair(nullptr, nullptr)); 511 return ObjOrErr.takeError(); 512 } 513 514 ObjectFile *Obj = ObjOrErr.get(); 515 assert(Obj != nullptr); 516 ObjectFile *DbgObj = nullptr; 517 518 if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj)) 519 DbgObj = lookUpDsymFile(Path, MachObj, ArchName); 520 else if (auto ELFObj = dyn_cast<const ELFObjectFileBase>(Obj)) 521 DbgObj = lookUpBuildIDObject(Path, ELFObj, ArchName); 522 if (!DbgObj) 523 DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName); 524 if (!DbgObj) 525 DbgObj = Obj; 526 ObjectPair Res = std::make_pair(Obj, DbgObj); 527 std::string DbgObjPath = DbgObj->getFileName().str(); 528 auto Pair = 529 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res); 530 BinaryForPath.find(DbgObjPath)->second.pushEvictor([this, I = Pair.first]() { 531 ObjectPairForPathArch.erase(I); 532 }); 533 return Res; 534 } 535 536 Expected<ObjectFile *> 537 LLVMSymbolizer::getOrCreateObject(const std::string &Path, 538 const std::string &ArchName) { 539 Binary *Bin; 540 auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>()); 541 if (!Pair.second) { 542 Bin = Pair.first->second->getBinary(); 543 recordAccess(Pair.first->second); 544 } else { 545 Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path); 546 if (!BinOrErr) 547 return BinOrErr.takeError(); 548 549 CachedBinary &CachedBin = Pair.first->second; 550 CachedBin = std::move(BinOrErr.get()); 551 CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); }); 552 LRUBinaries.push_back(CachedBin); 553 CacheSize += CachedBin.size(); 554 Bin = CachedBin->getBinary(); 555 } 556 557 if (!Bin) 558 return static_cast<ObjectFile *>(nullptr); 559 560 if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) { 561 auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName)); 562 if (I != ObjectForUBPathAndArch.end()) 563 return I->second.get(); 564 565 Expected<std::unique_ptr<ObjectFile>> ObjOrErr = 566 UB->getMachOObjectForArch(ArchName); 567 if (!ObjOrErr) { 568 ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), 569 std::unique_ptr<ObjectFile>()); 570 return ObjOrErr.takeError(); 571 } 572 ObjectFile *Res = ObjOrErr->get(); 573 auto Pair = ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), 574 std::move(ObjOrErr.get())); 575 BinaryForPath.find(Path)->second.pushEvictor( 576 [this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(Iter); }); 577 return Res; 578 } 579 if (Bin->isObject()) { 580 return cast<ObjectFile>(Bin); 581 } 582 return errorCodeToError(object_error::arch_not_found); 583 } 584 585 Expected<SymbolizableModule *> 586 LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj, 587 std::unique_ptr<DIContext> Context, 588 StringRef ModuleName) { 589 auto InfoOrErr = SymbolizableObjectFile::create(Obj, std::move(Context), 590 Opts.UntagAddresses); 591 std::unique_ptr<SymbolizableModule> SymMod; 592 if (InfoOrErr) 593 SymMod = std::move(*InfoOrErr); 594 auto InsertResult = Modules.insert( 595 std::make_pair(std::string(ModuleName), std::move(SymMod))); 596 assert(InsertResult.second); 597 if (!InfoOrErr) 598 return InfoOrErr.takeError(); 599 return InsertResult.first->second.get(); 600 } 601 602 Expected<SymbolizableModule *> 603 LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { 604 std::string BinaryName = ModuleName; 605 std::string ArchName = Opts.DefaultArch; 606 size_t ColonPos = ModuleName.find_last_of(':'); 607 // Verify that substring after colon form a valid arch name. 608 if (ColonPos != std::string::npos) { 609 std::string ArchStr = ModuleName.substr(ColonPos + 1); 610 if (Triple(ArchStr).getArch() != Triple::UnknownArch) { 611 BinaryName = ModuleName.substr(0, ColonPos); 612 ArchName = ArchStr; 613 } 614 } 615 616 auto I = Modules.find(ModuleName); 617 if (I != Modules.end()) { 618 recordAccess(BinaryForPath.find(BinaryName)->second); 619 return I->second.get(); 620 } 621 622 auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName); 623 if (!ObjectsOrErr) { 624 // Failed to find valid object file. 625 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>()); 626 return ObjectsOrErr.takeError(); 627 } 628 ObjectPair Objects = ObjectsOrErr.get(); 629 630 std::unique_ptr<DIContext> Context; 631 // If this is a COFF object containing PDB info, use a PDBContext to 632 // symbolize. Otherwise, use DWARF. 633 if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) { 634 const codeview::DebugInfo *DebugInfo; 635 StringRef PDBFileName; 636 auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName); 637 if (!EC && DebugInfo != nullptr && !PDBFileName.empty()) { 638 #if 0 639 using namespace pdb; 640 std::unique_ptr<IPDBSession> Session; 641 642 PDB_ReaderType ReaderType = 643 Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native; 644 if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(), 645 Session)) { 646 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>()); 647 // Return along the PDB filename to provide more context 648 return createFileError(PDBFileName, std::move(Err)); 649 } 650 Context.reset(new PDBContext(*CoffObject, std::move(Session))); 651 #else 652 return make_error<StringError>( 653 "PDB support not compiled in", 654 std::make_error_code(std::errc::not_supported)); 655 #endif 656 } 657 } 658 if (!Context) 659 Context = DWARFContext::create( 660 *Objects.second, DWARFContext::ProcessDebugRelocations::Process, 661 nullptr, Opts.DWPName); 662 auto ModuleOrErr = 663 createModuleInfo(Objects.first, std::move(Context), ModuleName); 664 if (ModuleOrErr) { 665 auto I = Modules.find(ModuleName); 666 BinaryForPath.find(BinaryName)->second.pushEvictor([this, I]() { 667 Modules.erase(I); 668 }); 669 } 670 return ModuleOrErr; 671 } 672 673 // For BPF programs .BTF.ext section contains line numbers information, 674 // use it if regular DWARF is not available (e.g. for stripped binary). 675 static bool useBTFContext(const ObjectFile &Obj) { 676 return Obj.makeTriple().isBPF() && !Obj.hasDebugInfo() && 677 BTFParser::hasBTFSections(Obj); 678 } 679 680 Expected<SymbolizableModule *> 681 LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) { 682 StringRef ObjName = Obj.getFileName(); 683 auto I = Modules.find(ObjName); 684 if (I != Modules.end()) 685 return I->second.get(); 686 687 std::unique_ptr<DIContext> Context; 688 if (useBTFContext(Obj)) 689 Context = BTFContext::create(Obj); 690 else 691 Context = DWARFContext::create(Obj); 692 // FIXME: handle COFF object with PDB info to use PDBContext 693 return createModuleInfo(&Obj, std::move(Context), ObjName); 694 } 695 696 Expected<SymbolizableModule *> 697 LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) { 698 std::string Path; 699 if (!getOrFindDebugBinary(BuildID, Path)) { 700 return createStringError(errc::no_such_file_or_directory, 701 "could not find build ID"); 702 } 703 return getOrCreateModuleInfo(Path); 704 } 705 706 namespace { 707 708 // Undo these various manglings for Win32 extern "C" functions: 709 // cdecl - _foo 710 // stdcall - _foo@12 711 // fastcall - @foo@12 712 // vectorcall - foo@@12 713 // These are all different linkage names for 'foo'. 714 StringRef demanglePE32ExternCFunc(StringRef SymbolName) { 715 char Front = SymbolName.empty() ? '\0' : SymbolName[0]; 716 717 // Remove any '@[0-9]+' suffix. 718 bool HasAtNumSuffix = false; 719 if (Front != '?') { 720 size_t AtPos = SymbolName.rfind('@'); 721 if (AtPos != StringRef::npos && 722 all_of(drop_begin(SymbolName, AtPos + 1), isDigit)) { 723 SymbolName = SymbolName.substr(0, AtPos); 724 HasAtNumSuffix = true; 725 } 726 } 727 728 // Remove any ending '@' for vectorcall. 729 bool IsVectorCall = false; 730 if (HasAtNumSuffix && SymbolName.ends_with("@")) { 731 SymbolName = SymbolName.drop_back(); 732 IsVectorCall = true; 733 } 734 735 // If not vectorcall, remove any '_' or '@' prefix. 736 if (!IsVectorCall && (Front == '_' || Front == '@')) 737 SymbolName = SymbolName.drop_front(); 738 739 return SymbolName; 740 } 741 742 } // end anonymous namespace 743 744 std::string 745 LLVMSymbolizer::DemangleName(const std::string &Name, 746 const SymbolizableModule *DbiModuleDescriptor) { 747 std::string Result; 748 if (nonMicrosoftDemangle(Name, Result)) 749 return Result; 750 751 if (!Name.empty() && Name.front() == '?') { 752 // Only do MSVC C++ demangling on symbols starting with '?'. 753 int status = 0; 754 char *DemangledName = microsoftDemangle( 755 Name, nullptr, &status, 756 MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention | 757 MSDF_NoMemberType | MSDF_NoReturnType)); 758 if (status != 0) 759 return Name; 760 Result = DemangledName; 761 free(DemangledName); 762 return Result; 763 } 764 765 if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) { 766 std::string DemangledCName(demanglePE32ExternCFunc(Name)); 767 // On i386 Windows, the C name mangling for different calling conventions 768 // may also be applied on top of the Itanium or Rust name mangling. 769 if (nonMicrosoftDemangle(DemangledCName, Result)) 770 return Result; 771 return DemangledCName; 772 } 773 return Name; 774 } 775 776 void LLVMSymbolizer::recordAccess(CachedBinary &Bin) { 777 if (Bin->getBinary()) 778 LRUBinaries.splice(LRUBinaries.end(), LRUBinaries, Bin.getIterator()); 779 } 780 781 void LLVMSymbolizer::pruneCache() { 782 // Evict the LRU binary until the max cache size is reached or there's <= 1 783 // item in the cache. The MRU binary is always kept to avoid thrashing if it's 784 // larger than the cache size. 785 while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() && 786 std::next(LRUBinaries.begin()) != LRUBinaries.end()) { 787 CachedBinary &Bin = LRUBinaries.front(); 788 CacheSize -= Bin.size(); 789 LRUBinaries.pop_front(); 790 Bin.evict(); 791 } 792 } 793 794 void CachedBinary::pushEvictor(std::function<void()> NewEvictor) { 795 if (Evictor) { 796 this->Evictor = [OldEvictor = std::move(this->Evictor), 797 NewEvictor = std::move(NewEvictor)]() { 798 NewEvictor(); 799 OldEvictor(); 800 }; 801 } else { 802 this->Evictor = std::move(NewEvictor); 803 } 804 } 805 806 } // namespace symbolize 807 } // namespace llvm 808