1 //===-- LLVMSymbolize.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation for LLVM symbolization library.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
14
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/DebugInfo/BTF/BTFContext.h"
17 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
18 #include "llvm/DebugInfo/PDB/PDB.h"
19 #include "llvm/DebugInfo/PDB/PDBContext.h"
20 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
21 #include "llvm/Demangle/Demangle.h"
22 #include "llvm/Object/BuildID.h"
23 #include "llvm/Object/COFF.h"
24 #include "llvm/Object/ELFObjectFile.h"
25 #include "llvm/Object/MachO.h"
26 #include "llvm/Object/MachOUniversal.h"
27 #include "llvm/Support/CRC.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/DataExtractor.h"
30 #include "llvm/Support/Errc.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/Support/MemoryBuffer.h"
33 #include "llvm/Support/Path.h"
34 #include <algorithm>
35 #include <cassert>
36 #include <cstring>
37
38 namespace llvm {
39 namespace codeview {
40 union DebugInfo;
41 }
42 namespace symbolize {
43
44 LLVMSymbolizer::LLVMSymbolizer() = default;
45
LLVMSymbolizer(const Options & Opts)46 LLVMSymbolizer::LLVMSymbolizer(const Options &Opts)
47 : Opts(Opts),
48 BIDFetcher(std::make_unique<BuildIDFetcher>(Opts.DebugFileDirectory)) {}
49
50 LLVMSymbolizer::~LLVMSymbolizer() = default;
51
52 template <typename T>
53 Expected<DILineInfo>
symbolizeCodeCommon(const T & ModuleSpecifier,object::SectionedAddress ModuleOffset)54 LLVMSymbolizer::symbolizeCodeCommon(const T &ModuleSpecifier,
55 object::SectionedAddress ModuleOffset) {
56
57 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
58 if (!InfoOrErr)
59 return InfoOrErr.takeError();
60
61 SymbolizableModule *Info = *InfoOrErr;
62
63 // A null module means an error has already been reported. Return an empty
64 // result.
65 if (!Info)
66 return DILineInfo();
67
68 // If the user is giving us relative addresses, add the preferred base of the
69 // object to the offset before we do the query. It's what DIContext expects.
70 if (Opts.RelativeAddresses)
71 ModuleOffset.Address += Info->getModulePreferredBase();
72
73 DILineInfo LineInfo = Info->symbolizeCode(
74 ModuleOffset, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions),
75 Opts.UseSymbolTable);
76 if (Opts.Demangle)
77 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
78 return LineInfo;
79 }
80
81 Expected<DILineInfo>
symbolizeCode(const ObjectFile & Obj,object::SectionedAddress ModuleOffset)82 LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj,
83 object::SectionedAddress ModuleOffset) {
84 return symbolizeCodeCommon(Obj, ModuleOffset);
85 }
86
87 Expected<DILineInfo>
symbolizeCode(const std::string & ModuleName,object::SectionedAddress ModuleOffset)88 LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
89 object::SectionedAddress ModuleOffset) {
90 return symbolizeCodeCommon(ModuleName, ModuleOffset);
91 }
92
93 Expected<DILineInfo>
symbolizeCode(ArrayRef<uint8_t> BuildID,object::SectionedAddress ModuleOffset)94 LLVMSymbolizer::symbolizeCode(ArrayRef<uint8_t> BuildID,
95 object::SectionedAddress ModuleOffset) {
96 return symbolizeCodeCommon(BuildID, ModuleOffset);
97 }
98
99 template <typename T>
symbolizeInlinedCodeCommon(const T & ModuleSpecifier,object::SectionedAddress ModuleOffset)100 Expected<DIInliningInfo> LLVMSymbolizer::symbolizeInlinedCodeCommon(
101 const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) {
102 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
103 if (!InfoOrErr)
104 return InfoOrErr.takeError();
105
106 SymbolizableModule *Info = *InfoOrErr;
107
108 // A null module means an error has already been reported. Return an empty
109 // result.
110 if (!Info)
111 return DIInliningInfo();
112
113 // If the user is giving us relative addresses, add the preferred base of the
114 // object to the offset before we do the query. It's what DIContext expects.
115 if (Opts.RelativeAddresses)
116 ModuleOffset.Address += Info->getModulePreferredBase();
117
118 DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(
119 ModuleOffset, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions),
120 Opts.UseSymbolTable);
121 if (Opts.Demangle) {
122 for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
123 auto *Frame = InlinedContext.getMutableFrame(i);
124 Frame->FunctionName = DemangleName(Frame->FunctionName, Info);
125 }
126 }
127 return InlinedContext;
128 }
129
130 Expected<DIInliningInfo>
symbolizeInlinedCode(const ObjectFile & Obj,object::SectionedAddress ModuleOffset)131 LLVMSymbolizer::symbolizeInlinedCode(const ObjectFile &Obj,
132 object::SectionedAddress ModuleOffset) {
133 return symbolizeInlinedCodeCommon(Obj, ModuleOffset);
134 }
135
136 Expected<DIInliningInfo>
symbolizeInlinedCode(const std::string & ModuleName,object::SectionedAddress ModuleOffset)137 LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName,
138 object::SectionedAddress ModuleOffset) {
139 return symbolizeInlinedCodeCommon(ModuleName, ModuleOffset);
140 }
141
142 Expected<DIInliningInfo>
symbolizeInlinedCode(ArrayRef<uint8_t> BuildID,object::SectionedAddress ModuleOffset)143 LLVMSymbolizer::symbolizeInlinedCode(ArrayRef<uint8_t> BuildID,
144 object::SectionedAddress ModuleOffset) {
145 return symbolizeInlinedCodeCommon(BuildID, ModuleOffset);
146 }
147
148 template <typename T>
149 Expected<DIGlobal>
symbolizeDataCommon(const T & ModuleSpecifier,object::SectionedAddress ModuleOffset)150 LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier,
151 object::SectionedAddress ModuleOffset) {
152
153 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
154 if (!InfoOrErr)
155 return InfoOrErr.takeError();
156
157 SymbolizableModule *Info = *InfoOrErr;
158 // A null module means an error has already been reported. Return an empty
159 // result.
160 if (!Info)
161 return DIGlobal();
162
163 // If the user is giving us relative addresses, add the preferred base of
164 // the object to the offset before we do the query. It's what DIContext
165 // expects.
166 if (Opts.RelativeAddresses)
167 ModuleOffset.Address += Info->getModulePreferredBase();
168
169 DIGlobal Global = Info->symbolizeData(ModuleOffset);
170 if (Opts.Demangle)
171 Global.Name = DemangleName(Global.Name, Info);
172 return Global;
173 }
174
175 Expected<DIGlobal>
symbolizeData(const ObjectFile & Obj,object::SectionedAddress ModuleOffset)176 LLVMSymbolizer::symbolizeData(const ObjectFile &Obj,
177 object::SectionedAddress ModuleOffset) {
178 return symbolizeDataCommon(Obj, ModuleOffset);
179 }
180
181 Expected<DIGlobal>
symbolizeData(const std::string & ModuleName,object::SectionedAddress ModuleOffset)182 LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
183 object::SectionedAddress ModuleOffset) {
184 return symbolizeDataCommon(ModuleName, ModuleOffset);
185 }
186
187 Expected<DIGlobal>
symbolizeData(ArrayRef<uint8_t> BuildID,object::SectionedAddress ModuleOffset)188 LLVMSymbolizer::symbolizeData(ArrayRef<uint8_t> BuildID,
189 object::SectionedAddress ModuleOffset) {
190 return symbolizeDataCommon(BuildID, ModuleOffset);
191 }
192
193 template <typename T>
194 Expected<std::vector<DILocal>>
symbolizeFrameCommon(const T & ModuleSpecifier,object::SectionedAddress ModuleOffset)195 LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier,
196 object::SectionedAddress ModuleOffset) {
197 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
198 if (!InfoOrErr)
199 return InfoOrErr.takeError();
200
201 SymbolizableModule *Info = *InfoOrErr;
202 // A null module means an error has already been reported. Return an empty
203 // result.
204 if (!Info)
205 return std::vector<DILocal>();
206
207 // If the user is giving us relative addresses, add the preferred base of
208 // the object to the offset before we do the query. It's what DIContext
209 // expects.
210 if (Opts.RelativeAddresses)
211 ModuleOffset.Address += Info->getModulePreferredBase();
212
213 return Info->symbolizeFrame(ModuleOffset);
214 }
215
216 Expected<std::vector<DILocal>>
symbolizeFrame(const ObjectFile & Obj,object::SectionedAddress ModuleOffset)217 LLVMSymbolizer::symbolizeFrame(const ObjectFile &Obj,
218 object::SectionedAddress ModuleOffset) {
219 return symbolizeFrameCommon(Obj, ModuleOffset);
220 }
221
222 Expected<std::vector<DILocal>>
symbolizeFrame(const std::string & ModuleName,object::SectionedAddress ModuleOffset)223 LLVMSymbolizer::symbolizeFrame(const std::string &ModuleName,
224 object::SectionedAddress ModuleOffset) {
225 return symbolizeFrameCommon(ModuleName, ModuleOffset);
226 }
227
228 Expected<std::vector<DILocal>>
symbolizeFrame(ArrayRef<uint8_t> BuildID,object::SectionedAddress ModuleOffset)229 LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID,
230 object::SectionedAddress ModuleOffset) {
231 return symbolizeFrameCommon(BuildID, ModuleOffset);
232 }
233
234 template <typename T>
235 Expected<std::vector<DILineInfo>>
findSymbolCommon(const T & ModuleSpecifier,StringRef Symbol,uint64_t Offset)236 LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol,
237 uint64_t Offset) {
238 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
239 if (!InfoOrErr)
240 return InfoOrErr.takeError();
241
242 SymbolizableModule *Info = *InfoOrErr;
243 std::vector<DILineInfo> Result;
244
245 // A null module means an error has already been reported. Return an empty
246 // result.
247 if (!Info)
248 return Result;
249
250 for (object::SectionedAddress A : Info->findSymbol(Symbol, Offset)) {
251 DILineInfo LineInfo = Info->symbolizeCode(
252 A, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions),
253 Opts.UseSymbolTable);
254 if (LineInfo.FileName != DILineInfo::BadString) {
255 if (Opts.Demangle)
256 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
257 Result.push_back(LineInfo);
258 }
259 }
260
261 return Result;
262 }
263
264 Expected<std::vector<DILineInfo>>
findSymbol(const ObjectFile & Obj,StringRef Symbol,uint64_t Offset)265 LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol,
266 uint64_t Offset) {
267 return findSymbolCommon(Obj, Symbol, Offset);
268 }
269
270 Expected<std::vector<DILineInfo>>
findSymbol(const std::string & ModuleName,StringRef Symbol,uint64_t Offset)271 LLVMSymbolizer::findSymbol(const std::string &ModuleName, StringRef Symbol,
272 uint64_t Offset) {
273 return findSymbolCommon(ModuleName, Symbol, Offset);
274 }
275
276 Expected<std::vector<DILineInfo>>
findSymbol(ArrayRef<uint8_t> BuildID,StringRef Symbol,uint64_t Offset)277 LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol,
278 uint64_t Offset) {
279 return findSymbolCommon(BuildID, Symbol, Offset);
280 }
281
flush()282 void LLVMSymbolizer::flush() {
283 ObjectForUBPathAndArch.clear();
284 LRUBinaries.clear();
285 CacheSize = 0;
286 BinaryForPath.clear();
287 ObjectPairForPathArch.clear();
288 Modules.clear();
289 BuildIDPaths.clear();
290 }
291
292 namespace {
293
294 // For Path="/path/to/foo" and Basename="foo" assume that debug info is in
295 // /path/to/foo.dSYM/Contents/Resources/DWARF/foo.
296 // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in
297 // /path/to/bar.dSYM/Contents/Resources/DWARF/foo.
getDarwinDWARFResourceForPath(const std::string & Path,const std::string & Basename)298 std::string getDarwinDWARFResourceForPath(const std::string &Path,
299 const std::string &Basename) {
300 SmallString<16> ResourceName = StringRef(Path);
301 if (sys::path::extension(Path) != ".dSYM") {
302 ResourceName += ".dSYM";
303 }
304 sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
305 sys::path::append(ResourceName, Basename);
306 return std::string(ResourceName);
307 }
308
checkFileCRC(StringRef Path,uint32_t CRCHash)309 bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
310 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
311 MemoryBuffer::getFileOrSTDIN(Path);
312 if (!MB)
313 return false;
314 return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer()));
315 }
316
getGNUDebuglinkContents(const ObjectFile * Obj,std::string & DebugName,uint32_t & CRCHash)317 bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
318 uint32_t &CRCHash) {
319 if (!Obj)
320 return false;
321 for (const SectionRef &Section : Obj->sections()) {
322 StringRef Name;
323 consumeError(Section.getName().moveInto(Name));
324
325 Name = Name.substr(Name.find_first_not_of("._"));
326 if (Name == "gnu_debuglink") {
327 Expected<StringRef> ContentsOrErr = Section.getContents();
328 if (!ContentsOrErr) {
329 consumeError(ContentsOrErr.takeError());
330 return false;
331 }
332 DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0);
333 uint64_t Offset = 0;
334 if (const char *DebugNameStr = DE.getCStr(&Offset)) {
335 // 4-byte align the offset.
336 Offset = (Offset + 3) & ~0x3;
337 if (DE.isValidOffsetForDataOfSize(Offset, 4)) {
338 DebugName = DebugNameStr;
339 CRCHash = DE.getU32(&Offset);
340 return true;
341 }
342 }
343 break;
344 }
345 }
346 return false;
347 }
348
darwinDsymMatchesBinary(const MachOObjectFile * DbgObj,const MachOObjectFile * Obj)349 bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
350 const MachOObjectFile *Obj) {
351 ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid();
352 ArrayRef<uint8_t> bin_uuid = Obj->getUuid();
353 if (dbg_uuid.empty() || bin_uuid.empty())
354 return false;
355 return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size());
356 }
357
358 } // end anonymous namespace
359
lookUpDsymFile(const std::string & ExePath,const MachOObjectFile * MachExeObj,const std::string & ArchName)360 ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
361 const MachOObjectFile *MachExeObj,
362 const std::string &ArchName) {
363 // On Darwin we may find DWARF in separate object file in
364 // resource directory.
365 std::vector<std::string> DsymPaths;
366 StringRef Filename = sys::path::filename(ExePath);
367 DsymPaths.push_back(
368 getDarwinDWARFResourceForPath(ExePath, std::string(Filename)));
369 for (const auto &Path : Opts.DsymHints) {
370 DsymPaths.push_back(
371 getDarwinDWARFResourceForPath(Path, std::string(Filename)));
372 }
373 for (const auto &Path : DsymPaths) {
374 auto DbgObjOrErr = getOrCreateObject(Path, ArchName);
375 if (!DbgObjOrErr) {
376 // Ignore errors, the file might not exist.
377 consumeError(DbgObjOrErr.takeError());
378 continue;
379 }
380 ObjectFile *DbgObj = DbgObjOrErr.get();
381 if (!DbgObj)
382 continue;
383 const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj);
384 if (!MachDbgObj)
385 continue;
386 if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj))
387 return DbgObj;
388 }
389 return nullptr;
390 }
391
lookUpDebuglinkObject(const std::string & Path,const ObjectFile * Obj,const std::string & ArchName)392 ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
393 const ObjectFile *Obj,
394 const std::string &ArchName) {
395 std::string DebuglinkName;
396 uint32_t CRCHash;
397 std::string DebugBinaryPath;
398 if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash))
399 return nullptr;
400 if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath))
401 return nullptr;
402 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
403 if (!DbgObjOrErr) {
404 // Ignore errors, the file might not exist.
405 consumeError(DbgObjOrErr.takeError());
406 return nullptr;
407 }
408 return DbgObjOrErr.get();
409 }
410
lookUpBuildIDObject(const std::string & Path,const ELFObjectFileBase * Obj,const std::string & ArchName)411 ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path,
412 const ELFObjectFileBase *Obj,
413 const std::string &ArchName) {
414 auto BuildID = getBuildID(Obj);
415 if (BuildID.size() < 2)
416 return nullptr;
417 std::string DebugBinaryPath;
418 if (!getOrFindDebugBinary(BuildID, DebugBinaryPath))
419 return nullptr;
420 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
421 if (!DbgObjOrErr) {
422 consumeError(DbgObjOrErr.takeError());
423 return nullptr;
424 }
425 return DbgObjOrErr.get();
426 }
427
findDebugBinary(const std::string & OrigPath,const std::string & DebuglinkName,uint32_t CRCHash,std::string & Result)428 bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath,
429 const std::string &DebuglinkName,
430 uint32_t CRCHash, std::string &Result) {
431 SmallString<16> OrigDir(OrigPath);
432 llvm::sys::path::remove_filename(OrigDir);
433 SmallString<16> DebugPath = OrigDir;
434 // Try relative/path/to/original_binary/debuglink_name
435 llvm::sys::path::append(DebugPath, DebuglinkName);
436 if (checkFileCRC(DebugPath, CRCHash)) {
437 Result = std::string(DebugPath);
438 return true;
439 }
440 // Try relative/path/to/original_binary/.debug/debuglink_name
441 DebugPath = OrigDir;
442 llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
443 if (checkFileCRC(DebugPath, CRCHash)) {
444 Result = std::string(DebugPath);
445 return true;
446 }
447 // Make the path absolute so that lookups will go to
448 // "/usr/lib/debug/full/path/to/debug", not
449 // "/usr/lib/debug/to/debug"
450 llvm::sys::fs::make_absolute(OrigDir);
451 if (!Opts.FallbackDebugPath.empty()) {
452 // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name
453 DebugPath = Opts.FallbackDebugPath;
454 } else {
455 #if defined(__NetBSD__)
456 // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name
457 DebugPath = "/usr/libdata/debug";
458 #else
459 // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name
460 DebugPath = "/usr/lib/debug";
461 #endif
462 }
463 llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
464 DebuglinkName);
465 if (checkFileCRC(DebugPath, CRCHash)) {
466 Result = std::string(DebugPath);
467 return true;
468 }
469 return false;
470 }
471
getBuildIDStr(ArrayRef<uint8_t> BuildID)472 static StringRef getBuildIDStr(ArrayRef<uint8_t> BuildID) {
473 return StringRef(reinterpret_cast<const char *>(BuildID.data()),
474 BuildID.size());
475 }
476
getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID,std::string & Result)477 bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID,
478 std::string &Result) {
479 StringRef BuildIDStr = getBuildIDStr(BuildID);
480 auto I = BuildIDPaths.find(BuildIDStr);
481 if (I != BuildIDPaths.end()) {
482 Result = I->second;
483 return true;
484 }
485 if (!BIDFetcher)
486 return false;
487 if (std::optional<std::string> Path = BIDFetcher->fetch(BuildID)) {
488 Result = *Path;
489 auto InsertResult = BuildIDPaths.insert({BuildIDStr, Result});
490 assert(InsertResult.second);
491 (void)InsertResult;
492 return true;
493 }
494
495 return false;
496 }
497
498 Expected<LLVMSymbolizer::ObjectPair>
getOrCreateObjectPair(const std::string & Path,const std::string & ArchName)499 LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
500 const std::string &ArchName) {
501 auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
502 if (I != ObjectPairForPathArch.end()) {
503 recordAccess(BinaryForPath.find(Path)->second);
504 return I->second;
505 }
506
507 auto ObjOrErr = getOrCreateObject(Path, ArchName);
508 if (!ObjOrErr) {
509 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName),
510 ObjectPair(nullptr, nullptr));
511 return ObjOrErr.takeError();
512 }
513
514 ObjectFile *Obj = ObjOrErr.get();
515 assert(Obj != nullptr);
516 ObjectFile *DbgObj = nullptr;
517
518 if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj))
519 DbgObj = lookUpDsymFile(Path, MachObj, ArchName);
520 else if (auto ELFObj = dyn_cast<const ELFObjectFileBase>(Obj))
521 DbgObj = lookUpBuildIDObject(Path, ELFObj, ArchName);
522 if (!DbgObj)
523 DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName);
524 if (!DbgObj)
525 DbgObj = Obj;
526 ObjectPair Res = std::make_pair(Obj, DbgObj);
527 std::string DbgObjPath = DbgObj->getFileName().str();
528 auto Pair =
529 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res);
530 BinaryForPath.find(DbgObjPath)->second.pushEvictor([this, I = Pair.first]() {
531 ObjectPairForPathArch.erase(I);
532 });
533 return Res;
534 }
535
536 Expected<ObjectFile *>
getOrCreateObject(const std::string & Path,const std::string & ArchName)537 LLVMSymbolizer::getOrCreateObject(const std::string &Path,
538 const std::string &ArchName) {
539 Binary *Bin;
540 auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>());
541 if (!Pair.second) {
542 Bin = Pair.first->second->getBinary();
543 recordAccess(Pair.first->second);
544 } else {
545 Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
546 if (!BinOrErr)
547 return BinOrErr.takeError();
548
549 CachedBinary &CachedBin = Pair.first->second;
550 CachedBin = std::move(BinOrErr.get());
551 CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); });
552 LRUBinaries.push_back(CachedBin);
553 CacheSize += CachedBin.size();
554 Bin = CachedBin->getBinary();
555 }
556
557 if (!Bin)
558 return static_cast<ObjectFile *>(nullptr);
559
560 if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) {
561 auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
562 if (I != ObjectForUBPathAndArch.end())
563 return I->second.get();
564
565 Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
566 UB->getMachOObjectForArch(ArchName);
567 if (!ObjOrErr) {
568 ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
569 std::unique_ptr<ObjectFile>());
570 return ObjOrErr.takeError();
571 }
572 ObjectFile *Res = ObjOrErr->get();
573 auto Pair = ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
574 std::move(ObjOrErr.get()));
575 BinaryForPath.find(Path)->second.pushEvictor(
576 [this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(Iter); });
577 return Res;
578 }
579 if (Bin->isObject()) {
580 return cast<ObjectFile>(Bin);
581 }
582 return errorCodeToError(object_error::arch_not_found);
583 }
584
585 Expected<SymbolizableModule *>
createModuleInfo(const ObjectFile * Obj,std::unique_ptr<DIContext> Context,StringRef ModuleName)586 LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj,
587 std::unique_ptr<DIContext> Context,
588 StringRef ModuleName) {
589 auto InfoOrErr = SymbolizableObjectFile::create(Obj, std::move(Context),
590 Opts.UntagAddresses);
591 std::unique_ptr<SymbolizableModule> SymMod;
592 if (InfoOrErr)
593 SymMod = std::move(*InfoOrErr);
594 auto InsertResult = Modules.insert(
595 std::make_pair(std::string(ModuleName), std::move(SymMod)));
596 assert(InsertResult.second);
597 if (!InfoOrErr)
598 return InfoOrErr.takeError();
599 return InsertResult.first->second.get();
600 }
601
602 Expected<SymbolizableModule *>
getOrCreateModuleInfo(const std::string & ModuleName)603 LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
604 std::string BinaryName = ModuleName;
605 std::string ArchName = Opts.DefaultArch;
606 size_t ColonPos = ModuleName.find_last_of(':');
607 // Verify that substring after colon form a valid arch name.
608 if (ColonPos != std::string::npos) {
609 std::string ArchStr = ModuleName.substr(ColonPos + 1);
610 if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
611 BinaryName = ModuleName.substr(0, ColonPos);
612 ArchName = ArchStr;
613 }
614 }
615
616 auto I = Modules.find(ModuleName);
617 if (I != Modules.end()) {
618 recordAccess(BinaryForPath.find(BinaryName)->second);
619 return I->second.get();
620 }
621
622 auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName);
623 if (!ObjectsOrErr) {
624 // Failed to find valid object file.
625 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
626 return ObjectsOrErr.takeError();
627 }
628 ObjectPair Objects = ObjectsOrErr.get();
629
630 std::unique_ptr<DIContext> Context;
631 // If this is a COFF object containing PDB info and not containing DWARF
632 // section, use a PDBContext to symbolize. Otherwise, use DWARF.
633 if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
634 const codeview::DebugInfo *DebugInfo;
635 StringRef PDBFileName;
636 auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName);
637 // Use DWARF if there're DWARF sections.
638 bool HasDwarf =
639 llvm::any_of(Objects.first->sections(), [](SectionRef Section) -> bool {
640 if (Expected<StringRef> SectionName = Section.getName())
641 return SectionName.get() == ".debug_info";
642 return false;
643 });
644 if (!EC && !HasDwarf && DebugInfo != nullptr && !PDBFileName.empty()) {
645 #if 0
646 using namespace pdb;
647 std::unique_ptr<IPDBSession> Session;
648
649 PDB_ReaderType ReaderType =
650 Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native;
651 if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(),
652 Session)) {
653 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
654 // Return along the PDB filename to provide more context
655 return createFileError(PDBFileName, std::move(Err));
656 }
657 Context.reset(new PDBContext(*CoffObject, std::move(Session)));
658 #else
659 return make_error<StringError>(
660 "PDB support not compiled in",
661 std::make_error_code(std::errc::not_supported));
662 #endif
663 }
664 }
665 if (!Context)
666 Context = DWARFContext::create(
667 *Objects.second, DWARFContext::ProcessDebugRelocations::Process,
668 nullptr, Opts.DWPName);
669 auto ModuleOrErr =
670 createModuleInfo(Objects.first, std::move(Context), ModuleName);
671 if (ModuleOrErr) {
672 auto I = Modules.find(ModuleName);
673 BinaryForPath.find(BinaryName)->second.pushEvictor([this, I]() {
674 Modules.erase(I);
675 });
676 }
677 return ModuleOrErr;
678 }
679
680 // For BPF programs .BTF.ext section contains line numbers information,
681 // use it if regular DWARF is not available (e.g. for stripped binary).
useBTFContext(const ObjectFile & Obj)682 static bool useBTFContext(const ObjectFile &Obj) {
683 return Obj.makeTriple().isBPF() && !Obj.hasDebugInfo() &&
684 BTFParser::hasBTFSections(Obj);
685 }
686
687 Expected<SymbolizableModule *>
getOrCreateModuleInfo(const ObjectFile & Obj)688 LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) {
689 StringRef ObjName = Obj.getFileName();
690 auto I = Modules.find(ObjName);
691 if (I != Modules.end())
692 return I->second.get();
693
694 std::unique_ptr<DIContext> Context;
695 if (useBTFContext(Obj))
696 Context = BTFContext::create(Obj);
697 else
698 Context = DWARFContext::create(Obj);
699 // FIXME: handle COFF object with PDB info to use PDBContext
700 return createModuleInfo(&Obj, std::move(Context), ObjName);
701 }
702
703 Expected<SymbolizableModule *>
getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID)704 LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) {
705 std::string Path;
706 if (!getOrFindDebugBinary(BuildID, Path)) {
707 return createStringError(errc::no_such_file_or_directory,
708 "could not find build ID");
709 }
710 return getOrCreateModuleInfo(Path);
711 }
712
713 namespace {
714
715 // Undo these various manglings for Win32 extern "C" functions:
716 // cdecl - _foo
717 // stdcall - _foo@12
718 // fastcall - @foo@12
719 // vectorcall - foo@@12
720 // These are all different linkage names for 'foo'.
demanglePE32ExternCFunc(StringRef SymbolName)721 StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
722 char Front = SymbolName.empty() ? '\0' : SymbolName[0];
723
724 // Remove any '@[0-9]+' suffix.
725 bool HasAtNumSuffix = false;
726 if (Front != '?') {
727 size_t AtPos = SymbolName.rfind('@');
728 if (AtPos != StringRef::npos &&
729 all_of(drop_begin(SymbolName, AtPos + 1), isDigit)) {
730 SymbolName = SymbolName.substr(0, AtPos);
731 HasAtNumSuffix = true;
732 }
733 }
734
735 // Remove any ending '@' for vectorcall.
736 bool IsVectorCall = false;
737 if (HasAtNumSuffix && SymbolName.ends_with("@")) {
738 SymbolName = SymbolName.drop_back();
739 IsVectorCall = true;
740 }
741
742 // If not vectorcall, remove any '_' or '@' prefix.
743 if (!IsVectorCall && (Front == '_' || Front == '@'))
744 SymbolName = SymbolName.drop_front();
745
746 return SymbolName;
747 }
748
749 } // end anonymous namespace
750
751 std::string
DemangleName(const std::string & Name,const SymbolizableModule * DbiModuleDescriptor)752 LLVMSymbolizer::DemangleName(const std::string &Name,
753 const SymbolizableModule *DbiModuleDescriptor) {
754 std::string Result;
755 if (nonMicrosoftDemangle(Name, Result))
756 return Result;
757
758 if (!Name.empty() && Name.front() == '?') {
759 // Only do MSVC C++ demangling on symbols starting with '?'.
760 int status = 0;
761 char *DemangledName = microsoftDemangle(
762 Name, nullptr, &status,
763 MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention |
764 MSDF_NoMemberType | MSDF_NoReturnType));
765 if (status != 0)
766 return Name;
767 Result = DemangledName;
768 free(DemangledName);
769 return Result;
770 }
771
772 if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) {
773 std::string DemangledCName(demanglePE32ExternCFunc(Name));
774 // On i386 Windows, the C name mangling for different calling conventions
775 // may also be applied on top of the Itanium or Rust name mangling.
776 if (nonMicrosoftDemangle(DemangledCName, Result))
777 return Result;
778 return DemangledCName;
779 }
780 return Name;
781 }
782
recordAccess(CachedBinary & Bin)783 void LLVMSymbolizer::recordAccess(CachedBinary &Bin) {
784 if (Bin->getBinary())
785 LRUBinaries.splice(LRUBinaries.end(), LRUBinaries, Bin.getIterator());
786 }
787
pruneCache()788 void LLVMSymbolizer::pruneCache() {
789 // Evict the LRU binary until the max cache size is reached or there's <= 1
790 // item in the cache. The MRU binary is always kept to avoid thrashing if it's
791 // larger than the cache size.
792 while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() &&
793 std::next(LRUBinaries.begin()) != LRUBinaries.end()) {
794 CachedBinary &Bin = LRUBinaries.front();
795 CacheSize -= Bin.size();
796 LRUBinaries.pop_front();
797 Bin.evict();
798 }
799 }
800
pushEvictor(std::function<void ()> NewEvictor)801 void CachedBinary::pushEvictor(std::function<void()> NewEvictor) {
802 if (Evictor) {
803 this->Evictor = [OldEvictor = std::move(this->Evictor),
804 NewEvictor = std::move(NewEvictor)]() {
805 NewEvictor();
806 OldEvictor();
807 };
808 } else {
809 this->Evictor = std::move(NewEvictor);
810 }
811 }
812
813 } // namespace symbolize
814 } // namespace llvm
815