xref: /freebsd/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===-- LLVMSymbolize.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation for LLVM symbolization library.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
14 
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/DebugInfo/BTF/BTFContext.h"
17 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
18 #include "llvm/DebugInfo/GSYM/GsymContext.h"
19 #include "llvm/DebugInfo/GSYM/GsymReader.h"
20 #include "llvm/DebugInfo/PDB/PDB.h"
21 #include "llvm/DebugInfo/PDB/PDBContext.h"
22 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
23 #include "llvm/Demangle/Demangle.h"
24 #include "llvm/Object/BuildID.h"
25 #include "llvm/Object/COFF.h"
26 #include "llvm/Object/ELFObjectFile.h"
27 #include "llvm/Object/MachO.h"
28 #include "llvm/Object/MachOUniversal.h"
29 #include "llvm/Support/CRC.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/DataExtractor.h"
32 #include "llvm/Support/Errc.h"
33 #include "llvm/Support/FileSystem.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/Path.h"
36 #include <cassert>
37 #include <cstring>
38 
39 namespace llvm {
40 namespace codeview {
41 union DebugInfo;
42 }
43 namespace symbolize {
44 
45 LLVMSymbolizer::LLVMSymbolizer() = default;
46 
47 LLVMSymbolizer::LLVMSymbolizer(const Options &Opts)
48     : Opts(Opts),
49       BIDFetcher(std::make_unique<BuildIDFetcher>(Opts.DebugFileDirectory)) {}
50 
51 LLVMSymbolizer::~LLVMSymbolizer() = default;
52 
53 template <typename T>
54 Expected<DILineInfo>
55 LLVMSymbolizer::symbolizeCodeCommon(const T &ModuleSpecifier,
56                                     object::SectionedAddress ModuleOffset) {
57 
58   auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
59   if (!InfoOrErr)
60     return InfoOrErr.takeError();
61 
62   SymbolizableModule *Info = *InfoOrErr;
63 
64   // A null module means an error has already been reported. Return an empty
65   // result.
66   if (!Info)
67     return DILineInfo();
68 
69   // If the user is giving us relative addresses, add the preferred base of the
70   // object to the offset before we do the query. It's what DIContext expects.
71   if (Opts.RelativeAddresses)
72     ModuleOffset.Address += Info->getModulePreferredBase();
73 
74   DILineInfo LineInfo = Info->symbolizeCode(
75       ModuleOffset,
76       DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions,
77                           Opts.SkipLineZero),
78       Opts.UseSymbolTable);
79   if (Opts.Demangle)
80     LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
81   return LineInfo;
82 }
83 
84 Expected<DILineInfo>
85 LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj,
86                               object::SectionedAddress ModuleOffset) {
87   return symbolizeCodeCommon(Obj, ModuleOffset);
88 }
89 
90 Expected<DILineInfo>
91 LLVMSymbolizer::symbolizeCode(StringRef ModuleName,
92                               object::SectionedAddress ModuleOffset) {
93   return symbolizeCodeCommon(ModuleName, ModuleOffset);
94 }
95 
96 Expected<DILineInfo>
97 LLVMSymbolizer::symbolizeCode(ArrayRef<uint8_t> BuildID,
98                               object::SectionedAddress ModuleOffset) {
99   return symbolizeCodeCommon(BuildID, ModuleOffset);
100 }
101 
102 template <typename T>
103 Expected<DIInliningInfo> LLVMSymbolizer::symbolizeInlinedCodeCommon(
104     const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) {
105   auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
106   if (!InfoOrErr)
107     return InfoOrErr.takeError();
108 
109   SymbolizableModule *Info = *InfoOrErr;
110 
111   // A null module means an error has already been reported. Return an empty
112   // result.
113   if (!Info)
114     return DIInliningInfo();
115 
116   // If the user is giving us relative addresses, add the preferred base of the
117   // object to the offset before we do the query. It's what DIContext expects.
118   if (Opts.RelativeAddresses)
119     ModuleOffset.Address += Info->getModulePreferredBase();
120 
121   DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(
122       ModuleOffset,
123       DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions,
124                           Opts.SkipLineZero),
125       Opts.UseSymbolTable);
126   if (Opts.Demangle) {
127     for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
128       auto *Frame = InlinedContext.getMutableFrame(i);
129       Frame->FunctionName = DemangleName(Frame->FunctionName, Info);
130     }
131   }
132   return InlinedContext;
133 }
134 
135 Expected<DIInliningInfo>
136 LLVMSymbolizer::symbolizeInlinedCode(const ObjectFile &Obj,
137                                      object::SectionedAddress ModuleOffset) {
138   return symbolizeInlinedCodeCommon(Obj, ModuleOffset);
139 }
140 
141 Expected<DIInliningInfo>
142 LLVMSymbolizer::symbolizeInlinedCode(StringRef ModuleName,
143                                      object::SectionedAddress ModuleOffset) {
144   return symbolizeInlinedCodeCommon(ModuleName, ModuleOffset);
145 }
146 
147 Expected<DIInliningInfo>
148 LLVMSymbolizer::symbolizeInlinedCode(ArrayRef<uint8_t> BuildID,
149                                      object::SectionedAddress ModuleOffset) {
150   return symbolizeInlinedCodeCommon(BuildID, ModuleOffset);
151 }
152 
153 template <typename T>
154 Expected<DIGlobal>
155 LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier,
156                                     object::SectionedAddress ModuleOffset) {
157 
158   auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
159   if (!InfoOrErr)
160     return InfoOrErr.takeError();
161 
162   SymbolizableModule *Info = *InfoOrErr;
163   // A null module means an error has already been reported. Return an empty
164   // result.
165   if (!Info)
166     return DIGlobal();
167 
168   // If the user is giving us relative addresses, add the preferred base of
169   // the object to the offset before we do the query. It's what DIContext
170   // expects.
171   if (Opts.RelativeAddresses)
172     ModuleOffset.Address += Info->getModulePreferredBase();
173 
174   DIGlobal Global = Info->symbolizeData(ModuleOffset);
175   if (Opts.Demangle)
176     Global.Name = DemangleName(Global.Name, Info);
177   return Global;
178 }
179 
180 Expected<DIGlobal>
181 LLVMSymbolizer::symbolizeData(const ObjectFile &Obj,
182                               object::SectionedAddress ModuleOffset) {
183   return symbolizeDataCommon(Obj, ModuleOffset);
184 }
185 
186 Expected<DIGlobal>
187 LLVMSymbolizer::symbolizeData(StringRef ModuleName,
188                               object::SectionedAddress ModuleOffset) {
189   return symbolizeDataCommon(ModuleName, ModuleOffset);
190 }
191 
192 Expected<DIGlobal>
193 LLVMSymbolizer::symbolizeData(ArrayRef<uint8_t> BuildID,
194                               object::SectionedAddress ModuleOffset) {
195   return symbolizeDataCommon(BuildID, ModuleOffset);
196 }
197 
198 template <typename T>
199 Expected<std::vector<DILocal>>
200 LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier,
201                                      object::SectionedAddress ModuleOffset) {
202   auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
203   if (!InfoOrErr)
204     return InfoOrErr.takeError();
205 
206   SymbolizableModule *Info = *InfoOrErr;
207   // A null module means an error has already been reported. Return an empty
208   // result.
209   if (!Info)
210     return std::vector<DILocal>();
211 
212   // If the user is giving us relative addresses, add the preferred base of
213   // the object to the offset before we do the query. It's what DIContext
214   // expects.
215   if (Opts.RelativeAddresses)
216     ModuleOffset.Address += Info->getModulePreferredBase();
217 
218   return Info->symbolizeFrame(ModuleOffset);
219 }
220 
221 Expected<std::vector<DILocal>>
222 LLVMSymbolizer::symbolizeFrame(const ObjectFile &Obj,
223                                object::SectionedAddress ModuleOffset) {
224   return symbolizeFrameCommon(Obj, ModuleOffset);
225 }
226 
227 Expected<std::vector<DILocal>>
228 LLVMSymbolizer::symbolizeFrame(StringRef ModuleName,
229                                object::SectionedAddress ModuleOffset) {
230   return symbolizeFrameCommon(ModuleName, ModuleOffset);
231 }
232 
233 Expected<std::vector<DILocal>>
234 LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID,
235                                object::SectionedAddress ModuleOffset) {
236   return symbolizeFrameCommon(BuildID, ModuleOffset);
237 }
238 
239 template <typename T>
240 Expected<std::vector<DILineInfo>>
241 LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol,
242                                  uint64_t Offset) {
243   auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
244   if (!InfoOrErr)
245     return InfoOrErr.takeError();
246 
247   SymbolizableModule *Info = *InfoOrErr;
248   std::vector<DILineInfo> Result;
249 
250   // A null module means an error has already been reported. Return an empty
251   // result.
252   if (!Info)
253     return Result;
254 
255   for (object::SectionedAddress A : Info->findSymbol(Symbol, Offset)) {
256     DILineInfo LineInfo = Info->symbolizeCode(
257         A, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions),
258         Opts.UseSymbolTable);
259     if (LineInfo.FileName != DILineInfo::BadString) {
260       if (Opts.Demangle)
261         LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
262       Result.push_back(std::move(LineInfo));
263     }
264   }
265 
266   return Result;
267 }
268 
269 Expected<std::vector<DILineInfo>>
270 LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol,
271                            uint64_t Offset) {
272   return findSymbolCommon(Obj, Symbol, Offset);
273 }
274 
275 Expected<std::vector<DILineInfo>>
276 LLVMSymbolizer::findSymbol(StringRef ModuleName, StringRef Symbol,
277                            uint64_t Offset) {
278   return findSymbolCommon(ModuleName, Symbol, Offset);
279 }
280 
281 Expected<std::vector<DILineInfo>>
282 LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol,
283                            uint64_t Offset) {
284   return findSymbolCommon(BuildID, Symbol, Offset);
285 }
286 
287 void LLVMSymbolizer::flush() {
288   ObjectForUBPathAndArch.clear();
289   LRUBinaries.clear();
290   CacheSize = 0;
291   BinaryForPath.clear();
292   ObjectPairForPathArch.clear();
293   Modules.clear();
294   BuildIDPaths.clear();
295 }
296 
297 namespace {
298 
299 // For Path="/path/to/foo" and Basename="foo" assume that debug info is in
300 // /path/to/foo.dSYM/Contents/Resources/DWARF/foo.
301 // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in
302 // /path/to/bar.dSYM/Contents/Resources/DWARF/foo.
303 std::string getDarwinDWARFResourceForPath(const std::string &Path,
304                                           const std::string &Basename) {
305   SmallString<16> ResourceName = StringRef(Path);
306   if (sys::path::extension(Path) != ".dSYM") {
307     ResourceName += ".dSYM";
308   }
309   sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
310   sys::path::append(ResourceName, Basename);
311   return std::string(ResourceName);
312 }
313 
314 bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
315   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
316       MemoryBuffer::getFileOrSTDIN(Path);
317   if (!MB)
318     return false;
319   return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer()));
320 }
321 
322 bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
323                              uint32_t &CRCHash) {
324   if (!Obj)
325     return false;
326   for (const SectionRef &Section : Obj->sections()) {
327     StringRef Name;
328     consumeError(Section.getName().moveInto(Name));
329 
330     Name = Name.substr(Name.find_first_not_of("._"));
331     if (Name == "gnu_debuglink") {
332       Expected<StringRef> ContentsOrErr = Section.getContents();
333       if (!ContentsOrErr) {
334         consumeError(ContentsOrErr.takeError());
335         return false;
336       }
337       DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0);
338       uint64_t Offset = 0;
339       if (const char *DebugNameStr = DE.getCStr(&Offset)) {
340         // 4-byte align the offset.
341         Offset = (Offset + 3) & ~0x3;
342         if (DE.isValidOffsetForDataOfSize(Offset, 4)) {
343           DebugName = DebugNameStr;
344           CRCHash = DE.getU32(&Offset);
345           return true;
346         }
347       }
348       break;
349     }
350   }
351   return false;
352 }
353 
354 bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
355                              const MachOObjectFile *Obj) {
356   ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid();
357   ArrayRef<uint8_t> bin_uuid = Obj->getUuid();
358   if (dbg_uuid.empty() || bin_uuid.empty())
359     return false;
360   return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size());
361 }
362 
363 } // end anonymous namespace
364 
365 ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
366                                            const MachOObjectFile *MachExeObj,
367                                            const std::string &ArchName) {
368   // On Darwin we may find DWARF in separate object file in
369   // resource directory.
370   std::vector<std::string> DsymPaths;
371   StringRef Filename = sys::path::filename(ExePath);
372   DsymPaths.push_back(
373       getDarwinDWARFResourceForPath(ExePath, std::string(Filename)));
374   for (const auto &Path : Opts.DsymHints) {
375     DsymPaths.push_back(
376         getDarwinDWARFResourceForPath(Path, std::string(Filename)));
377   }
378   for (const auto &Path : DsymPaths) {
379     auto DbgObjOrErr = getOrCreateObject(Path, ArchName);
380     if (!DbgObjOrErr) {
381       // Ignore errors, the file might not exist.
382       consumeError(DbgObjOrErr.takeError());
383       continue;
384     }
385     ObjectFile *DbgObj = DbgObjOrErr.get();
386     if (!DbgObj)
387       continue;
388     const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj);
389     if (!MachDbgObj)
390       continue;
391     if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj))
392       return DbgObj;
393   }
394   return nullptr;
395 }
396 
397 ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
398                                                   const ObjectFile *Obj,
399                                                   const std::string &ArchName) {
400   std::string DebuglinkName;
401   uint32_t CRCHash;
402   std::string DebugBinaryPath;
403   if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash))
404     return nullptr;
405   if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath))
406     return nullptr;
407   auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
408   if (!DbgObjOrErr) {
409     // Ignore errors, the file might not exist.
410     consumeError(DbgObjOrErr.takeError());
411     return nullptr;
412   }
413   return DbgObjOrErr.get();
414 }
415 
416 ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path,
417                                                 const ELFObjectFileBase *Obj,
418                                                 const std::string &ArchName) {
419   auto BuildID = getBuildID(Obj);
420   if (BuildID.size() < 2)
421     return nullptr;
422   std::string DebugBinaryPath;
423   if (!getOrFindDebugBinary(BuildID, DebugBinaryPath))
424     return nullptr;
425   auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
426   if (!DbgObjOrErr) {
427     consumeError(DbgObjOrErr.takeError());
428     return nullptr;
429   }
430   return DbgObjOrErr.get();
431 }
432 
433 bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath,
434                                      const std::string &DebuglinkName,
435                                      uint32_t CRCHash, std::string &Result) {
436   SmallString<16> OrigDir(OrigPath);
437   llvm::sys::path::remove_filename(OrigDir);
438   SmallString<16> DebugPath = OrigDir;
439   // Try relative/path/to/original_binary/debuglink_name
440   llvm::sys::path::append(DebugPath, DebuglinkName);
441   if (checkFileCRC(DebugPath, CRCHash)) {
442     Result = std::string(DebugPath);
443     return true;
444   }
445   // Try relative/path/to/original_binary/.debug/debuglink_name
446   DebugPath = OrigDir;
447   llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
448   if (checkFileCRC(DebugPath, CRCHash)) {
449     Result = std::string(DebugPath);
450     return true;
451   }
452   // Make the path absolute so that lookups will go to
453   // "/usr/lib/debug/full/path/to/debug", not
454   // "/usr/lib/debug/to/debug"
455   llvm::sys::fs::make_absolute(OrigDir);
456   if (!Opts.FallbackDebugPath.empty()) {
457     // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name
458     DebugPath = Opts.FallbackDebugPath;
459   } else {
460 #if defined(__NetBSD__)
461     // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name
462     DebugPath = "/usr/libdata/debug";
463 #else
464     // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name
465     DebugPath = "/usr/lib/debug";
466 #endif
467   }
468   llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
469                           DebuglinkName);
470   if (checkFileCRC(DebugPath, CRCHash)) {
471     Result = std::string(DebugPath);
472     return true;
473   }
474   return false;
475 }
476 
477 static StringRef getBuildIDStr(ArrayRef<uint8_t> BuildID) {
478   return StringRef(reinterpret_cast<const char *>(BuildID.data()),
479                    BuildID.size());
480 }
481 
482 bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID,
483                                           std::string &Result) {
484   StringRef BuildIDStr = getBuildIDStr(BuildID);
485   auto I = BuildIDPaths.find(BuildIDStr);
486   if (I != BuildIDPaths.end()) {
487     Result = I->second;
488     return true;
489   }
490   if (!BIDFetcher)
491     return false;
492   if (std::optional<std::string> Path = BIDFetcher->fetch(BuildID)) {
493     Result = *Path;
494     auto InsertResult = BuildIDPaths.insert({BuildIDStr, Result});
495     assert(InsertResult.second);
496     (void)InsertResult;
497     return true;
498   }
499 
500   return false;
501 }
502 
503 std::string LLVMSymbolizer::lookUpGsymFile(const std::string &Path) {
504   if (Opts.DisableGsym)
505     return {};
506 
507   auto CheckGsymFile = [](const llvm::StringRef &GsymPath) {
508     sys::fs::file_status Status;
509     std::error_code EC = llvm::sys::fs::status(GsymPath, Status);
510     return !EC && !llvm::sys::fs::is_directory(Status);
511   };
512 
513   // First, look beside the binary file
514   if (const auto GsymPath = Path + ".gsym"; CheckGsymFile(GsymPath))
515     return GsymPath;
516 
517   // Then, look in the directories specified by GsymFileDirectory
518 
519   for (const auto &Directory : Opts.GsymFileDirectory) {
520     SmallString<16> GsymPath = llvm::StringRef{Directory};
521     llvm::sys::path::append(GsymPath,
522                             llvm::sys::path::filename(Path) + ".gsym");
523 
524     if (CheckGsymFile(GsymPath))
525       return static_cast<std::string>(GsymPath);
526   }
527 
528   return {};
529 }
530 
531 Expected<LLVMSymbolizer::ObjectPair>
532 LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
533                                       const std::string &ArchName) {
534   auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
535   if (I != ObjectPairForPathArch.end()) {
536     recordAccess(BinaryForPath.find(Path)->second);
537     return I->second;
538   }
539 
540   auto ObjOrErr = getOrCreateObject(Path, ArchName);
541   if (!ObjOrErr) {
542     ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName),
543                                   ObjectPair(nullptr, nullptr));
544     return ObjOrErr.takeError();
545   }
546 
547   ObjectFile *Obj = ObjOrErr.get();
548   assert(Obj != nullptr);
549   ObjectFile *DbgObj = nullptr;
550 
551   if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj))
552     DbgObj = lookUpDsymFile(Path, MachObj, ArchName);
553   else if (auto ELFObj = dyn_cast<const ELFObjectFileBase>(Obj))
554     DbgObj = lookUpBuildIDObject(Path, ELFObj, ArchName);
555   if (!DbgObj)
556     DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName);
557   if (!DbgObj)
558     DbgObj = Obj;
559   ObjectPair Res = std::make_pair(Obj, DbgObj);
560   std::string DbgObjPath = DbgObj->getFileName().str();
561   auto Pair =
562       ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res);
563   BinaryForPath.find(DbgObjPath)->second.pushEvictor([this, I = Pair.first]() {
564     ObjectPairForPathArch.erase(I);
565   });
566   return Res;
567 }
568 
569 Expected<ObjectFile *>
570 LLVMSymbolizer::getOrCreateObject(const std::string &Path,
571                                   const std::string &ArchName) {
572   Binary *Bin;
573   auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>());
574   if (!Pair.second) {
575     Bin = Pair.first->second->getBinary();
576     recordAccess(Pair.first->second);
577   } else {
578     Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
579     if (!BinOrErr)
580       return BinOrErr.takeError();
581 
582     CachedBinary &CachedBin = Pair.first->second;
583     CachedBin = std::move(BinOrErr.get());
584     CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); });
585     LRUBinaries.push_back(CachedBin);
586     CacheSize += CachedBin.size();
587     Bin = CachedBin->getBinary();
588   }
589 
590   if (!Bin)
591     return static_cast<ObjectFile *>(nullptr);
592 
593   if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) {
594     auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
595     if (I != ObjectForUBPathAndArch.end())
596       return I->second.get();
597 
598     Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
599         UB->getMachOObjectForArch(ArchName);
600     if (!ObjOrErr) {
601       ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
602                                      std::unique_ptr<ObjectFile>());
603       return ObjOrErr.takeError();
604     }
605     ObjectFile *Res = ObjOrErr->get();
606     auto Pair = ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
607                                                std::move(ObjOrErr.get()));
608     BinaryForPath.find(Path)->second.pushEvictor(
609         [this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(Iter); });
610     return Res;
611   }
612   if (Bin->isObject()) {
613     return cast<ObjectFile>(Bin);
614   }
615   return errorCodeToError(object_error::arch_not_found);
616 }
617 
618 Expected<SymbolizableModule *>
619 LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj,
620                                  std::unique_ptr<DIContext> Context,
621                                  StringRef ModuleName) {
622   auto InfoOrErr = SymbolizableObjectFile::create(Obj, std::move(Context),
623                                                   Opts.UntagAddresses);
624   std::unique_ptr<SymbolizableModule> SymMod;
625   if (InfoOrErr)
626     SymMod = std::move(*InfoOrErr);
627   auto InsertResult = Modules.insert(
628       std::make_pair(std::string(ModuleName), std::move(SymMod)));
629   assert(InsertResult.second);
630   if (!InfoOrErr)
631     return InfoOrErr.takeError();
632   return InsertResult.first->second.get();
633 }
634 
635 Expected<SymbolizableModule *>
636 LLVMSymbolizer::getOrCreateModuleInfo(StringRef ModuleName) {
637   StringRef BinaryName = ModuleName;
638   StringRef ArchName = Opts.DefaultArch;
639   size_t ColonPos = ModuleName.find_last_of(':');
640   // Verify that substring after colon form a valid arch name.
641   if (ColonPos != std::string::npos) {
642     StringRef ArchStr = ModuleName.substr(ColonPos + 1);
643     if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
644       BinaryName = ModuleName.substr(0, ColonPos);
645       ArchName = ArchStr;
646     }
647   }
648 
649   auto I = Modules.find(ModuleName);
650   if (I != Modules.end()) {
651     recordAccess(BinaryForPath.find(BinaryName)->second);
652     return I->second.get();
653   }
654 
655   auto ObjectsOrErr =
656       getOrCreateObjectPair(std::string{BinaryName}, std::string{ArchName});
657   if (!ObjectsOrErr) {
658     // Failed to find valid object file.
659     Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
660     return ObjectsOrErr.takeError();
661   }
662   ObjectPair Objects = ObjectsOrErr.get();
663 
664   std::unique_ptr<DIContext> Context;
665   // If this is a COFF object containing PDB info and not containing DWARF
666   // section, use a PDBContext to symbolize. Otherwise, use DWARF.
667   // Create a DIContext to symbolize as follows:
668   // - If there is a GSYM file, create a GsymContext.
669   // - Otherwise, if this is a COFF object containing PDB info, create a
670   // PDBContext.
671   // - Otherwise, create a DWARFContext.
672   const auto GsymFile = lookUpGsymFile(BinaryName.str());
673   if (!GsymFile.empty()) {
674     auto ReaderOrErr = gsym::GsymReader::openFile(GsymFile);
675 
676     if (ReaderOrErr) {
677       std::unique_ptr<gsym::GsymReader> Reader =
678           std::make_unique<gsym::GsymReader>(std::move(*ReaderOrErr));
679 
680       Context = std::make_unique<gsym::GsymContext>(std::move(Reader));
681     }
682   }
683   if (!Context) {
684     if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
685       const codeview::DebugInfo *DebugInfo;
686       StringRef PDBFileName;
687       auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName);
688       // Use DWARF if there're DWARF sections.
689       bool HasDwarf = llvm::any_of(
690           Objects.first->sections(), [](SectionRef Section) -> bool {
691             if (Expected<StringRef> SectionName = Section.getName())
692               return SectionName.get() == ".debug_info";
693             return false;
694           });
695       if (!EC && !HasDwarf && DebugInfo != nullptr && !PDBFileName.empty()) {
696 #if 0
697         using namespace pdb;
698         std::unique_ptr<IPDBSession> Session;
699 
700         PDB_ReaderType ReaderType =
701             Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native;
702         if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(),
703                                       Session)) {
704           Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
705           // Return along the PDB filename to provide more context
706           return createFileError(PDBFileName, std::move(Err));
707         }
708         Context.reset(new PDBContext(*CoffObject, std::move(Session)));
709 #else
710         return make_error<StringError>(
711           "PDB support not compiled in",
712           std::make_error_code(std::errc::not_supported));
713 #endif
714       }
715     }
716   }
717   if (!Context)
718     Context = DWARFContext::create(
719         *Objects.second, DWARFContext::ProcessDebugRelocations::Process,
720         nullptr, Opts.DWPName);
721   auto ModuleOrErr =
722       createModuleInfo(Objects.first, std::move(Context), ModuleName);
723   if (ModuleOrErr) {
724     auto I = Modules.find(ModuleName);
725     BinaryForPath.find(BinaryName)->second.pushEvictor([this, I]() {
726       Modules.erase(I);
727     });
728   }
729   return ModuleOrErr;
730 }
731 
732 // For BPF programs .BTF.ext section contains line numbers information,
733 // use it if regular DWARF is not available (e.g. for stripped binary).
734 static bool useBTFContext(const ObjectFile &Obj) {
735   return Obj.makeTriple().isBPF() && !Obj.hasDebugInfo() &&
736          BTFParser::hasBTFSections(Obj);
737 }
738 
739 Expected<SymbolizableModule *>
740 LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) {
741   StringRef ObjName = Obj.getFileName();
742   auto I = Modules.find(ObjName);
743   if (I != Modules.end())
744     return I->second.get();
745 
746   std::unique_ptr<DIContext> Context;
747   if (useBTFContext(Obj))
748     Context = BTFContext::create(Obj);
749   else
750     Context = DWARFContext::create(Obj);
751   // FIXME: handle COFF object with PDB info to use PDBContext
752   return createModuleInfo(&Obj, std::move(Context), ObjName);
753 }
754 
755 Expected<SymbolizableModule *>
756 LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) {
757   std::string Path;
758   if (!getOrFindDebugBinary(BuildID, Path)) {
759     return createStringError(errc::no_such_file_or_directory,
760                              "could not find build ID");
761   }
762   return getOrCreateModuleInfo(Path);
763 }
764 
765 namespace {
766 
767 // Undo these various manglings for Win32 extern "C" functions:
768 // cdecl       - _foo
769 // stdcall     - _foo@12
770 // fastcall    - @foo@12
771 // vectorcall  - foo@@12
772 // These are all different linkage names for 'foo'.
773 StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
774   char Front = SymbolName.empty() ? '\0' : SymbolName[0];
775 
776   // Remove any '@[0-9]+' suffix.
777   bool HasAtNumSuffix = false;
778   if (Front != '?') {
779     size_t AtPos = SymbolName.rfind('@');
780     if (AtPos != StringRef::npos &&
781         all_of(drop_begin(SymbolName, AtPos + 1), isDigit)) {
782       SymbolName = SymbolName.substr(0, AtPos);
783       HasAtNumSuffix = true;
784     }
785   }
786 
787   // Remove any ending '@' for vectorcall.
788   bool IsVectorCall = false;
789   if (HasAtNumSuffix && SymbolName.ends_with("@")) {
790     SymbolName = SymbolName.drop_back();
791     IsVectorCall = true;
792   }
793 
794   // If not vectorcall, remove any '_' or '@' prefix.
795   if (!IsVectorCall && (Front == '_' || Front == '@'))
796     SymbolName = SymbolName.drop_front();
797 
798   return SymbolName;
799 }
800 
801 } // end anonymous namespace
802 
803 std::string
804 LLVMSymbolizer::DemangleName(StringRef Name,
805                              const SymbolizableModule *DbiModuleDescriptor) {
806   std::string Result;
807   if (nonMicrosoftDemangle(Name, Result))
808     return Result;
809 
810   if (Name.starts_with('?')) {
811     // Only do MSVC C++ demangling on symbols starting with '?'.
812     int status = 0;
813     char *DemangledName = microsoftDemangle(
814         Name, nullptr, &status,
815         MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention |
816                         MSDF_NoMemberType | MSDF_NoReturnType));
817     if (status != 0)
818       return std::string{Name};
819     Result = DemangledName;
820     free(DemangledName);
821     return Result;
822   }
823 
824   if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) {
825     std::string DemangledCName(demanglePE32ExternCFunc(Name));
826     // On i386 Windows, the C name mangling for different calling conventions
827     // may also be applied on top of the Itanium or Rust name mangling.
828     if (nonMicrosoftDemangle(DemangledCName, Result))
829       return Result;
830     return DemangledCName;
831   }
832   return std::string{Name};
833 }
834 
835 void LLVMSymbolizer::recordAccess(CachedBinary &Bin) {
836   if (Bin->getBinary())
837     LRUBinaries.splice(LRUBinaries.end(), LRUBinaries, Bin.getIterator());
838 }
839 
840 void LLVMSymbolizer::pruneCache() {
841   // Evict the LRU binary until the max cache size is reached or there's <= 1
842   // item in the cache. The MRU binary is always kept to avoid thrashing if it's
843   // larger than the cache size.
844   while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() &&
845          std::next(LRUBinaries.begin()) != LRUBinaries.end()) {
846     CachedBinary &Bin = LRUBinaries.front();
847     CacheSize -= Bin.size();
848     LRUBinaries.pop_front();
849     Bin.evict();
850   }
851 }
852 
853 void CachedBinary::pushEvictor(std::function<void()> NewEvictor) {
854   if (Evictor) {
855     this->Evictor = [OldEvictor = std::move(this->Evictor),
856                      NewEvictor = std::move(NewEvictor)]() {
857       NewEvictor();
858       OldEvictor();
859     };
860   } else {
861     this->Evictor = std::move(NewEvictor);
862   }
863 }
864 
865 } // namespace symbolize
866 } // namespace llvm
867