1 //===-- LLVMSymbolize.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation for LLVM symbolization library.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
14
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/DebugInfo/BTF/BTFContext.h"
17 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
18 #include "llvm/DebugInfo/GSYM/GsymContext.h"
19 #include "llvm/DebugInfo/GSYM/GsymReader.h"
20 #include "llvm/DebugInfo/PDB/PDB.h"
21 #include "llvm/DebugInfo/PDB/PDBContext.h"
22 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
23 #include "llvm/Demangle/Demangle.h"
24 #include "llvm/Object/BuildID.h"
25 #include "llvm/Object/COFF.h"
26 #include "llvm/Object/ELFObjectFile.h"
27 #include "llvm/Object/MachO.h"
28 #include "llvm/Object/MachOUniversal.h"
29 #include "llvm/Support/CRC.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/DataExtractor.h"
32 #include "llvm/Support/Errc.h"
33 #include "llvm/Support/FileSystem.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/Path.h"
36 #include <cassert>
37 #include <cstring>
38
39 namespace llvm {
40 namespace codeview {
41 union DebugInfo;
42 }
43 namespace symbolize {
44
45 LLVMSymbolizer::LLVMSymbolizer() = default;
46
LLVMSymbolizer(const Options & Opts)47 LLVMSymbolizer::LLVMSymbolizer(const Options &Opts)
48 : Opts(Opts),
49 BIDFetcher(std::make_unique<BuildIDFetcher>(Opts.DebugFileDirectory)) {}
50
51 LLVMSymbolizer::~LLVMSymbolizer() = default;
52
53 template <typename T>
54 Expected<DILineInfo>
symbolizeCodeCommon(const T & ModuleSpecifier,object::SectionedAddress ModuleOffset)55 LLVMSymbolizer::symbolizeCodeCommon(const T &ModuleSpecifier,
56 object::SectionedAddress ModuleOffset) {
57
58 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
59 if (!InfoOrErr)
60 return InfoOrErr.takeError();
61
62 SymbolizableModule *Info = *InfoOrErr;
63
64 // A null module means an error has already been reported. Return an empty
65 // result.
66 if (!Info)
67 return DILineInfo();
68
69 // If the user is giving us relative addresses, add the preferred base of the
70 // object to the offset before we do the query. It's what DIContext expects.
71 if (Opts.RelativeAddresses)
72 ModuleOffset.Address += Info->getModulePreferredBase();
73
74 DILineInfo LineInfo = Info->symbolizeCode(
75 ModuleOffset,
76 DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions,
77 Opts.SkipLineZero),
78 Opts.UseSymbolTable);
79 if (Opts.Demangle)
80 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
81 return LineInfo;
82 }
83
84 Expected<DILineInfo>
symbolizeCode(const ObjectFile & Obj,object::SectionedAddress ModuleOffset)85 LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj,
86 object::SectionedAddress ModuleOffset) {
87 return symbolizeCodeCommon(Obj, ModuleOffset);
88 }
89
90 Expected<DILineInfo>
symbolizeCode(StringRef ModuleName,object::SectionedAddress ModuleOffset)91 LLVMSymbolizer::symbolizeCode(StringRef ModuleName,
92 object::SectionedAddress ModuleOffset) {
93 return symbolizeCodeCommon(ModuleName, ModuleOffset);
94 }
95
96 Expected<DILineInfo>
symbolizeCode(ArrayRef<uint8_t> BuildID,object::SectionedAddress ModuleOffset)97 LLVMSymbolizer::symbolizeCode(ArrayRef<uint8_t> BuildID,
98 object::SectionedAddress ModuleOffset) {
99 return symbolizeCodeCommon(BuildID, ModuleOffset);
100 }
101
102 template <typename T>
symbolizeInlinedCodeCommon(const T & ModuleSpecifier,object::SectionedAddress ModuleOffset)103 Expected<DIInliningInfo> LLVMSymbolizer::symbolizeInlinedCodeCommon(
104 const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) {
105 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
106 if (!InfoOrErr)
107 return InfoOrErr.takeError();
108
109 SymbolizableModule *Info = *InfoOrErr;
110
111 // A null module means an error has already been reported. Return an empty
112 // result.
113 if (!Info)
114 return DIInliningInfo();
115
116 // If the user is giving us relative addresses, add the preferred base of the
117 // object to the offset before we do the query. It's what DIContext expects.
118 if (Opts.RelativeAddresses)
119 ModuleOffset.Address += Info->getModulePreferredBase();
120
121 DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(
122 ModuleOffset,
123 DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions,
124 Opts.SkipLineZero),
125 Opts.UseSymbolTable);
126 if (Opts.Demangle) {
127 for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
128 auto *Frame = InlinedContext.getMutableFrame(i);
129 Frame->FunctionName = DemangleName(Frame->FunctionName, Info);
130 }
131 }
132 return InlinedContext;
133 }
134
135 Expected<DIInliningInfo>
symbolizeInlinedCode(const ObjectFile & Obj,object::SectionedAddress ModuleOffset)136 LLVMSymbolizer::symbolizeInlinedCode(const ObjectFile &Obj,
137 object::SectionedAddress ModuleOffset) {
138 return symbolizeInlinedCodeCommon(Obj, ModuleOffset);
139 }
140
141 Expected<DIInliningInfo>
symbolizeInlinedCode(StringRef ModuleName,object::SectionedAddress ModuleOffset)142 LLVMSymbolizer::symbolizeInlinedCode(StringRef ModuleName,
143 object::SectionedAddress ModuleOffset) {
144 return symbolizeInlinedCodeCommon(ModuleName, ModuleOffset);
145 }
146
147 Expected<DIInliningInfo>
symbolizeInlinedCode(ArrayRef<uint8_t> BuildID,object::SectionedAddress ModuleOffset)148 LLVMSymbolizer::symbolizeInlinedCode(ArrayRef<uint8_t> BuildID,
149 object::SectionedAddress ModuleOffset) {
150 return symbolizeInlinedCodeCommon(BuildID, ModuleOffset);
151 }
152
153 template <typename T>
154 Expected<DIGlobal>
symbolizeDataCommon(const T & ModuleSpecifier,object::SectionedAddress ModuleOffset)155 LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier,
156 object::SectionedAddress ModuleOffset) {
157
158 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
159 if (!InfoOrErr)
160 return InfoOrErr.takeError();
161
162 SymbolizableModule *Info = *InfoOrErr;
163 // A null module means an error has already been reported. Return an empty
164 // result.
165 if (!Info)
166 return DIGlobal();
167
168 // If the user is giving us relative addresses, add the preferred base of
169 // the object to the offset before we do the query. It's what DIContext
170 // expects.
171 if (Opts.RelativeAddresses)
172 ModuleOffset.Address += Info->getModulePreferredBase();
173
174 DIGlobal Global = Info->symbolizeData(ModuleOffset);
175 if (Opts.Demangle)
176 Global.Name = DemangleName(Global.Name, Info);
177 return Global;
178 }
179
180 Expected<DIGlobal>
symbolizeData(const ObjectFile & Obj,object::SectionedAddress ModuleOffset)181 LLVMSymbolizer::symbolizeData(const ObjectFile &Obj,
182 object::SectionedAddress ModuleOffset) {
183 return symbolizeDataCommon(Obj, ModuleOffset);
184 }
185
186 Expected<DIGlobal>
symbolizeData(StringRef ModuleName,object::SectionedAddress ModuleOffset)187 LLVMSymbolizer::symbolizeData(StringRef ModuleName,
188 object::SectionedAddress ModuleOffset) {
189 return symbolizeDataCommon(ModuleName, ModuleOffset);
190 }
191
192 Expected<DIGlobal>
symbolizeData(ArrayRef<uint8_t> BuildID,object::SectionedAddress ModuleOffset)193 LLVMSymbolizer::symbolizeData(ArrayRef<uint8_t> BuildID,
194 object::SectionedAddress ModuleOffset) {
195 return symbolizeDataCommon(BuildID, ModuleOffset);
196 }
197
198 template <typename T>
199 Expected<std::vector<DILocal>>
symbolizeFrameCommon(const T & ModuleSpecifier,object::SectionedAddress ModuleOffset)200 LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier,
201 object::SectionedAddress ModuleOffset) {
202 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
203 if (!InfoOrErr)
204 return InfoOrErr.takeError();
205
206 SymbolizableModule *Info = *InfoOrErr;
207 // A null module means an error has already been reported. Return an empty
208 // result.
209 if (!Info)
210 return std::vector<DILocal>();
211
212 // If the user is giving us relative addresses, add the preferred base of
213 // the object to the offset before we do the query. It's what DIContext
214 // expects.
215 if (Opts.RelativeAddresses)
216 ModuleOffset.Address += Info->getModulePreferredBase();
217
218 return Info->symbolizeFrame(ModuleOffset);
219 }
220
221 Expected<std::vector<DILocal>>
symbolizeFrame(const ObjectFile & Obj,object::SectionedAddress ModuleOffset)222 LLVMSymbolizer::symbolizeFrame(const ObjectFile &Obj,
223 object::SectionedAddress ModuleOffset) {
224 return symbolizeFrameCommon(Obj, ModuleOffset);
225 }
226
227 Expected<std::vector<DILocal>>
symbolizeFrame(StringRef ModuleName,object::SectionedAddress ModuleOffset)228 LLVMSymbolizer::symbolizeFrame(StringRef ModuleName,
229 object::SectionedAddress ModuleOffset) {
230 return symbolizeFrameCommon(ModuleName, ModuleOffset);
231 }
232
233 Expected<std::vector<DILocal>>
symbolizeFrame(ArrayRef<uint8_t> BuildID,object::SectionedAddress ModuleOffset)234 LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID,
235 object::SectionedAddress ModuleOffset) {
236 return symbolizeFrameCommon(BuildID, ModuleOffset);
237 }
238
239 template <typename T>
240 Expected<std::vector<DILineInfo>>
findSymbolCommon(const T & ModuleSpecifier,StringRef Symbol,uint64_t Offset)241 LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol,
242 uint64_t Offset) {
243 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
244 if (!InfoOrErr)
245 return InfoOrErr.takeError();
246
247 SymbolizableModule *Info = *InfoOrErr;
248 std::vector<DILineInfo> Result;
249
250 // A null module means an error has already been reported. Return an empty
251 // result.
252 if (!Info)
253 return Result;
254
255 for (object::SectionedAddress A : Info->findSymbol(Symbol, Offset)) {
256 DILineInfo LineInfo = Info->symbolizeCode(
257 A, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions),
258 Opts.UseSymbolTable);
259 if (LineInfo.FileName != DILineInfo::BadString) {
260 if (Opts.Demangle)
261 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
262 Result.push_back(std::move(LineInfo));
263 }
264 }
265
266 return Result;
267 }
268
269 Expected<std::vector<DILineInfo>>
findSymbol(const ObjectFile & Obj,StringRef Symbol,uint64_t Offset)270 LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol,
271 uint64_t Offset) {
272 return findSymbolCommon(Obj, Symbol, Offset);
273 }
274
275 Expected<std::vector<DILineInfo>>
findSymbol(StringRef ModuleName,StringRef Symbol,uint64_t Offset)276 LLVMSymbolizer::findSymbol(StringRef ModuleName, StringRef Symbol,
277 uint64_t Offset) {
278 return findSymbolCommon(ModuleName, Symbol, Offset);
279 }
280
281 Expected<std::vector<DILineInfo>>
findSymbol(ArrayRef<uint8_t> BuildID,StringRef Symbol,uint64_t Offset)282 LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol,
283 uint64_t Offset) {
284 return findSymbolCommon(BuildID, Symbol, Offset);
285 }
286
flush()287 void LLVMSymbolizer::flush() {
288 ObjectForUBPathAndArch.clear();
289 LRUBinaries.clear();
290 CacheSize = 0;
291 BinaryForPath.clear();
292 ObjectPairForPathArch.clear();
293 Modules.clear();
294 BuildIDPaths.clear();
295 }
296
297 namespace {
298
299 // For Path="/path/to/foo" and Basename="foo" assume that debug info is in
300 // /path/to/foo.dSYM/Contents/Resources/DWARF/foo.
301 // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in
302 // /path/to/bar.dSYM/Contents/Resources/DWARF/foo.
getDarwinDWARFResourceForPath(const std::string & Path,const std::string & Basename)303 std::string getDarwinDWARFResourceForPath(const std::string &Path,
304 const std::string &Basename) {
305 SmallString<16> ResourceName = StringRef(Path);
306 if (sys::path::extension(Path) != ".dSYM") {
307 ResourceName += ".dSYM";
308 }
309 sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
310 sys::path::append(ResourceName, Basename);
311 return std::string(ResourceName);
312 }
313
checkFileCRC(StringRef Path,uint32_t CRCHash)314 bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
315 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
316 MemoryBuffer::getFileOrSTDIN(Path);
317 if (!MB)
318 return false;
319 return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer()));
320 }
321
getGNUDebuglinkContents(const ObjectFile * Obj,std::string & DebugName,uint32_t & CRCHash)322 bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
323 uint32_t &CRCHash) {
324 if (!Obj)
325 return false;
326 for (const SectionRef &Section : Obj->sections()) {
327 StringRef Name;
328 consumeError(Section.getName().moveInto(Name));
329
330 Name = Name.substr(Name.find_first_not_of("._"));
331 if (Name == "gnu_debuglink") {
332 Expected<StringRef> ContentsOrErr = Section.getContents();
333 if (!ContentsOrErr) {
334 consumeError(ContentsOrErr.takeError());
335 return false;
336 }
337 DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0);
338 uint64_t Offset = 0;
339 if (const char *DebugNameStr = DE.getCStr(&Offset)) {
340 // 4-byte align the offset.
341 Offset = (Offset + 3) & ~0x3;
342 if (DE.isValidOffsetForDataOfSize(Offset, 4)) {
343 DebugName = DebugNameStr;
344 CRCHash = DE.getU32(&Offset);
345 return true;
346 }
347 }
348 break;
349 }
350 }
351 return false;
352 }
353
darwinDsymMatchesBinary(const MachOObjectFile * DbgObj,const MachOObjectFile * Obj)354 bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
355 const MachOObjectFile *Obj) {
356 ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid();
357 ArrayRef<uint8_t> bin_uuid = Obj->getUuid();
358 if (dbg_uuid.empty() || bin_uuid.empty())
359 return false;
360 return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size());
361 }
362
363 } // end anonymous namespace
364
lookUpDsymFile(const std::string & ExePath,const MachOObjectFile * MachExeObj,const std::string & ArchName)365 ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
366 const MachOObjectFile *MachExeObj,
367 const std::string &ArchName) {
368 // On Darwin we may find DWARF in separate object file in
369 // resource directory.
370 std::vector<std::string> DsymPaths;
371 StringRef Filename = sys::path::filename(ExePath);
372 DsymPaths.push_back(
373 getDarwinDWARFResourceForPath(ExePath, std::string(Filename)));
374 for (const auto &Path : Opts.DsymHints) {
375 DsymPaths.push_back(
376 getDarwinDWARFResourceForPath(Path, std::string(Filename)));
377 }
378 for (const auto &Path : DsymPaths) {
379 auto DbgObjOrErr = getOrCreateObject(Path, ArchName);
380 if (!DbgObjOrErr) {
381 // Ignore errors, the file might not exist.
382 consumeError(DbgObjOrErr.takeError());
383 continue;
384 }
385 ObjectFile *DbgObj = DbgObjOrErr.get();
386 if (!DbgObj)
387 continue;
388 const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj);
389 if (!MachDbgObj)
390 continue;
391 if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj))
392 return DbgObj;
393 }
394 return nullptr;
395 }
396
lookUpDebuglinkObject(const std::string & Path,const ObjectFile * Obj,const std::string & ArchName)397 ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
398 const ObjectFile *Obj,
399 const std::string &ArchName) {
400 std::string DebuglinkName;
401 uint32_t CRCHash;
402 std::string DebugBinaryPath;
403 if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash))
404 return nullptr;
405 if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath))
406 return nullptr;
407 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
408 if (!DbgObjOrErr) {
409 // Ignore errors, the file might not exist.
410 consumeError(DbgObjOrErr.takeError());
411 return nullptr;
412 }
413 return DbgObjOrErr.get();
414 }
415
lookUpBuildIDObject(const std::string & Path,const ELFObjectFileBase * Obj,const std::string & ArchName)416 ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path,
417 const ELFObjectFileBase *Obj,
418 const std::string &ArchName) {
419 auto BuildID = getBuildID(Obj);
420 if (BuildID.size() < 2)
421 return nullptr;
422 std::string DebugBinaryPath;
423 if (!getOrFindDebugBinary(BuildID, DebugBinaryPath))
424 return nullptr;
425 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
426 if (!DbgObjOrErr) {
427 consumeError(DbgObjOrErr.takeError());
428 return nullptr;
429 }
430 return DbgObjOrErr.get();
431 }
432
findDebugBinary(const std::string & OrigPath,const std::string & DebuglinkName,uint32_t CRCHash,std::string & Result)433 bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath,
434 const std::string &DebuglinkName,
435 uint32_t CRCHash, std::string &Result) {
436 SmallString<16> OrigDir(OrigPath);
437 llvm::sys::path::remove_filename(OrigDir);
438 SmallString<16> DebugPath = OrigDir;
439 // Try relative/path/to/original_binary/debuglink_name
440 llvm::sys::path::append(DebugPath, DebuglinkName);
441 if (checkFileCRC(DebugPath, CRCHash)) {
442 Result = std::string(DebugPath);
443 return true;
444 }
445 // Try relative/path/to/original_binary/.debug/debuglink_name
446 DebugPath = OrigDir;
447 llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
448 if (checkFileCRC(DebugPath, CRCHash)) {
449 Result = std::string(DebugPath);
450 return true;
451 }
452 // Make the path absolute so that lookups will go to
453 // "/usr/lib/debug/full/path/to/debug", not
454 // "/usr/lib/debug/to/debug"
455 llvm::sys::fs::make_absolute(OrigDir);
456 if (!Opts.FallbackDebugPath.empty()) {
457 // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name
458 DebugPath = Opts.FallbackDebugPath;
459 } else {
460 #if defined(__NetBSD__)
461 // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name
462 DebugPath = "/usr/libdata/debug";
463 #else
464 // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name
465 DebugPath = "/usr/lib/debug";
466 #endif
467 }
468 llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
469 DebuglinkName);
470 if (checkFileCRC(DebugPath, CRCHash)) {
471 Result = std::string(DebugPath);
472 return true;
473 }
474 return false;
475 }
476
getBuildIDStr(ArrayRef<uint8_t> BuildID)477 static StringRef getBuildIDStr(ArrayRef<uint8_t> BuildID) {
478 return StringRef(reinterpret_cast<const char *>(BuildID.data()),
479 BuildID.size());
480 }
481
getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID,std::string & Result)482 bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID,
483 std::string &Result) {
484 StringRef BuildIDStr = getBuildIDStr(BuildID);
485 auto I = BuildIDPaths.find(BuildIDStr);
486 if (I != BuildIDPaths.end()) {
487 Result = I->second;
488 return true;
489 }
490 if (!BIDFetcher)
491 return false;
492 if (std::optional<std::string> Path = BIDFetcher->fetch(BuildID)) {
493 Result = *Path;
494 auto InsertResult = BuildIDPaths.insert({BuildIDStr, Result});
495 assert(InsertResult.second);
496 (void)InsertResult;
497 return true;
498 }
499
500 return false;
501 }
502
lookUpGsymFile(const std::string & Path)503 std::string LLVMSymbolizer::lookUpGsymFile(const std::string &Path) {
504 if (Opts.DisableGsym)
505 return {};
506
507 auto CheckGsymFile = [](const llvm::StringRef &GsymPath) {
508 sys::fs::file_status Status;
509 std::error_code EC = llvm::sys::fs::status(GsymPath, Status);
510 return !EC && !llvm::sys::fs::is_directory(Status);
511 };
512
513 // First, look beside the binary file
514 if (const auto GsymPath = Path + ".gsym"; CheckGsymFile(GsymPath))
515 return GsymPath;
516
517 // Then, look in the directories specified by GsymFileDirectory
518
519 for (const auto &Directory : Opts.GsymFileDirectory) {
520 SmallString<16> GsymPath = llvm::StringRef{Directory};
521 llvm::sys::path::append(GsymPath,
522 llvm::sys::path::filename(Path) + ".gsym");
523
524 if (CheckGsymFile(GsymPath))
525 return static_cast<std::string>(GsymPath);
526 }
527
528 return {};
529 }
530
531 Expected<LLVMSymbolizer::ObjectPair>
getOrCreateObjectPair(const std::string & Path,const std::string & ArchName)532 LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
533 const std::string &ArchName) {
534 auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
535 if (I != ObjectPairForPathArch.end()) {
536 recordAccess(BinaryForPath.find(Path)->second);
537 return I->second;
538 }
539
540 auto ObjOrErr = getOrCreateObject(Path, ArchName);
541 if (!ObjOrErr) {
542 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName),
543 ObjectPair(nullptr, nullptr));
544 return ObjOrErr.takeError();
545 }
546
547 ObjectFile *Obj = ObjOrErr.get();
548 assert(Obj != nullptr);
549 ObjectFile *DbgObj = nullptr;
550
551 if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj))
552 DbgObj = lookUpDsymFile(Path, MachObj, ArchName);
553 else if (auto ELFObj = dyn_cast<const ELFObjectFileBase>(Obj))
554 DbgObj = lookUpBuildIDObject(Path, ELFObj, ArchName);
555 if (!DbgObj)
556 DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName);
557 if (!DbgObj)
558 DbgObj = Obj;
559 ObjectPair Res = std::make_pair(Obj, DbgObj);
560 std::string DbgObjPath = DbgObj->getFileName().str();
561 auto Pair =
562 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res);
563 BinaryForPath.find(DbgObjPath)->second.pushEvictor([this, I = Pair.first]() {
564 ObjectPairForPathArch.erase(I);
565 });
566 return Res;
567 }
568
569 Expected<ObjectFile *>
getOrCreateObject(const std::string & Path,const std::string & ArchName)570 LLVMSymbolizer::getOrCreateObject(const std::string &Path,
571 const std::string &ArchName) {
572 Binary *Bin;
573 auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>());
574 if (!Pair.second) {
575 Bin = Pair.first->second->getBinary();
576 recordAccess(Pair.first->second);
577 } else {
578 Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
579 if (!BinOrErr)
580 return BinOrErr.takeError();
581
582 CachedBinary &CachedBin = Pair.first->second;
583 CachedBin = std::move(BinOrErr.get());
584 CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); });
585 LRUBinaries.push_back(CachedBin);
586 CacheSize += CachedBin.size();
587 Bin = CachedBin->getBinary();
588 }
589
590 if (!Bin)
591 return static_cast<ObjectFile *>(nullptr);
592
593 if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) {
594 auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
595 if (I != ObjectForUBPathAndArch.end())
596 return I->second.get();
597
598 Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
599 UB->getMachOObjectForArch(ArchName);
600 if (!ObjOrErr) {
601 ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
602 std::unique_ptr<ObjectFile>());
603 return ObjOrErr.takeError();
604 }
605 ObjectFile *Res = ObjOrErr->get();
606 auto Pair = ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
607 std::move(ObjOrErr.get()));
608 BinaryForPath.find(Path)->second.pushEvictor(
609 [this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(Iter); });
610 return Res;
611 }
612 if (Bin->isObject()) {
613 return cast<ObjectFile>(Bin);
614 }
615 return errorCodeToError(object_error::arch_not_found);
616 }
617
618 Expected<SymbolizableModule *>
createModuleInfo(const ObjectFile * Obj,std::unique_ptr<DIContext> Context,StringRef ModuleName)619 LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj,
620 std::unique_ptr<DIContext> Context,
621 StringRef ModuleName) {
622 auto InfoOrErr = SymbolizableObjectFile::create(Obj, std::move(Context),
623 Opts.UntagAddresses);
624 std::unique_ptr<SymbolizableModule> SymMod;
625 if (InfoOrErr)
626 SymMod = std::move(*InfoOrErr);
627 auto InsertResult = Modules.insert(
628 std::make_pair(std::string(ModuleName), std::move(SymMod)));
629 assert(InsertResult.second);
630 if (!InfoOrErr)
631 return InfoOrErr.takeError();
632 return InsertResult.first->second.get();
633 }
634
635 Expected<SymbolizableModule *>
getOrCreateModuleInfo(StringRef ModuleName)636 LLVMSymbolizer::getOrCreateModuleInfo(StringRef ModuleName) {
637 StringRef BinaryName = ModuleName;
638 StringRef ArchName = Opts.DefaultArch;
639 size_t ColonPos = ModuleName.find_last_of(':');
640 // Verify that substring after colon form a valid arch name.
641 if (ColonPos != std::string::npos) {
642 StringRef ArchStr = ModuleName.substr(ColonPos + 1);
643 if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
644 BinaryName = ModuleName.substr(0, ColonPos);
645 ArchName = ArchStr;
646 }
647 }
648
649 auto I = Modules.find(ModuleName);
650 if (I != Modules.end()) {
651 recordAccess(BinaryForPath.find(BinaryName)->second);
652 return I->second.get();
653 }
654
655 auto ObjectsOrErr =
656 getOrCreateObjectPair(std::string{BinaryName}, std::string{ArchName});
657 if (!ObjectsOrErr) {
658 // Failed to find valid object file.
659 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
660 return ObjectsOrErr.takeError();
661 }
662 ObjectPair Objects = ObjectsOrErr.get();
663
664 std::unique_ptr<DIContext> Context;
665 // If this is a COFF object containing PDB info and not containing DWARF
666 // section, use a PDBContext to symbolize. Otherwise, use DWARF.
667 // Create a DIContext to symbolize as follows:
668 // - If there is a GSYM file, create a GsymContext.
669 // - Otherwise, if this is a COFF object containing PDB info, create a
670 // PDBContext.
671 // - Otherwise, create a DWARFContext.
672 const auto GsymFile = lookUpGsymFile(BinaryName.str());
673 if (!GsymFile.empty()) {
674 auto ReaderOrErr = gsym::GsymReader::openFile(GsymFile);
675
676 if (ReaderOrErr) {
677 std::unique_ptr<gsym::GsymReader> Reader =
678 std::make_unique<gsym::GsymReader>(std::move(*ReaderOrErr));
679
680 Context = std::make_unique<gsym::GsymContext>(std::move(Reader));
681 }
682 }
683 if (!Context) {
684 if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
685 const codeview::DebugInfo *DebugInfo;
686 StringRef PDBFileName;
687 auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName);
688 // Use DWARF if there're DWARF sections.
689 bool HasDwarf = llvm::any_of(
690 Objects.first->sections(), [](SectionRef Section) -> bool {
691 if (Expected<StringRef> SectionName = Section.getName())
692 return SectionName.get() == ".debug_info";
693 return false;
694 });
695 if (!EC && !HasDwarf && DebugInfo != nullptr && !PDBFileName.empty()) {
696 #if 0
697 using namespace pdb;
698 std::unique_ptr<IPDBSession> Session;
699
700 PDB_ReaderType ReaderType =
701 Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native;
702 if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(),
703 Session)) {
704 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
705 // Return along the PDB filename to provide more context
706 return createFileError(PDBFileName, std::move(Err));
707 }
708 Context.reset(new PDBContext(*CoffObject, std::move(Session)));
709 #else
710 return make_error<StringError>(
711 "PDB support not compiled in",
712 std::make_error_code(std::errc::not_supported));
713 #endif
714 }
715 }
716 }
717 if (!Context)
718 Context = DWARFContext::create(
719 *Objects.second, DWARFContext::ProcessDebugRelocations::Process,
720 nullptr, Opts.DWPName);
721 auto ModuleOrErr =
722 createModuleInfo(Objects.first, std::move(Context), ModuleName);
723 if (ModuleOrErr) {
724 auto I = Modules.find(ModuleName);
725 BinaryForPath.find(BinaryName)->second.pushEvictor([this, I]() {
726 Modules.erase(I);
727 });
728 }
729 return ModuleOrErr;
730 }
731
732 // For BPF programs .BTF.ext section contains line numbers information,
733 // use it if regular DWARF is not available (e.g. for stripped binary).
useBTFContext(const ObjectFile & Obj)734 static bool useBTFContext(const ObjectFile &Obj) {
735 return Obj.makeTriple().isBPF() && !Obj.hasDebugInfo() &&
736 BTFParser::hasBTFSections(Obj);
737 }
738
739 Expected<SymbolizableModule *>
getOrCreateModuleInfo(const ObjectFile & Obj)740 LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) {
741 StringRef ObjName = Obj.getFileName();
742 auto I = Modules.find(ObjName);
743 if (I != Modules.end())
744 return I->second.get();
745
746 std::unique_ptr<DIContext> Context;
747 if (useBTFContext(Obj))
748 Context = BTFContext::create(Obj);
749 else
750 Context = DWARFContext::create(Obj);
751 // FIXME: handle COFF object with PDB info to use PDBContext
752 return createModuleInfo(&Obj, std::move(Context), ObjName);
753 }
754
755 Expected<SymbolizableModule *>
getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID)756 LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) {
757 std::string Path;
758 if (!getOrFindDebugBinary(BuildID, Path)) {
759 return createStringError(errc::no_such_file_or_directory,
760 "could not find build ID");
761 }
762 return getOrCreateModuleInfo(Path);
763 }
764
765 namespace {
766
767 // Undo these various manglings for Win32 extern "C" functions:
768 // cdecl - _foo
769 // stdcall - _foo@12
770 // fastcall - @foo@12
771 // vectorcall - foo@@12
772 // These are all different linkage names for 'foo'.
demanglePE32ExternCFunc(StringRef SymbolName)773 StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
774 char Front = SymbolName.empty() ? '\0' : SymbolName[0];
775
776 // Remove any '@[0-9]+' suffix.
777 bool HasAtNumSuffix = false;
778 if (Front != '?') {
779 size_t AtPos = SymbolName.rfind('@');
780 if (AtPos != StringRef::npos &&
781 all_of(drop_begin(SymbolName, AtPos + 1), isDigit)) {
782 SymbolName = SymbolName.substr(0, AtPos);
783 HasAtNumSuffix = true;
784 }
785 }
786
787 // Remove any ending '@' for vectorcall.
788 bool IsVectorCall = false;
789 if (HasAtNumSuffix && SymbolName.ends_with("@")) {
790 SymbolName = SymbolName.drop_back();
791 IsVectorCall = true;
792 }
793
794 // If not vectorcall, remove any '_' or '@' prefix.
795 if (!IsVectorCall && (Front == '_' || Front == '@'))
796 SymbolName = SymbolName.drop_front();
797
798 return SymbolName;
799 }
800
801 } // end anonymous namespace
802
803 std::string
DemangleName(StringRef Name,const SymbolizableModule * DbiModuleDescriptor)804 LLVMSymbolizer::DemangleName(StringRef Name,
805 const SymbolizableModule *DbiModuleDescriptor) {
806 std::string Result;
807 if (nonMicrosoftDemangle(Name, Result))
808 return Result;
809
810 if (Name.starts_with('?')) {
811 // Only do MSVC C++ demangling on symbols starting with '?'.
812 int status = 0;
813 char *DemangledName = microsoftDemangle(
814 Name, nullptr, &status,
815 MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention |
816 MSDF_NoMemberType | MSDF_NoReturnType));
817 if (status != 0)
818 return std::string{Name};
819 Result = DemangledName;
820 free(DemangledName);
821 return Result;
822 }
823
824 if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) {
825 std::string DemangledCName(demanglePE32ExternCFunc(Name));
826 // On i386 Windows, the C name mangling for different calling conventions
827 // may also be applied on top of the Itanium or Rust name mangling.
828 if (nonMicrosoftDemangle(DemangledCName, Result))
829 return Result;
830 return DemangledCName;
831 }
832 return std::string{Name};
833 }
834
recordAccess(CachedBinary & Bin)835 void LLVMSymbolizer::recordAccess(CachedBinary &Bin) {
836 if (Bin->getBinary())
837 LRUBinaries.splice(LRUBinaries.end(), LRUBinaries, Bin.getIterator());
838 }
839
pruneCache()840 void LLVMSymbolizer::pruneCache() {
841 // Evict the LRU binary until the max cache size is reached or there's <= 1
842 // item in the cache. The MRU binary is always kept to avoid thrashing if it's
843 // larger than the cache size.
844 while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() &&
845 std::next(LRUBinaries.begin()) != LRUBinaries.end()) {
846 CachedBinary &Bin = LRUBinaries.front();
847 CacheSize -= Bin.size();
848 LRUBinaries.pop_front();
849 Bin.evict();
850 }
851 }
852
pushEvictor(std::function<void ()> NewEvictor)853 void CachedBinary::pushEvictor(std::function<void()> NewEvictor) {
854 if (Evictor) {
855 this->Evictor = [OldEvictor = std::move(this->Evictor),
856 NewEvictor = std::move(NewEvictor)]() {
857 NewEvictor();
858 OldEvictor();
859 };
860 } else {
861 this->Evictor = std::move(NewEvictor);
862 }
863 }
864
865 } // namespace symbolize
866 } // namespace llvm
867