xref: /freebsd/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- DwarfTransformer.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <thread>
10 #include <unordered_set>
11 
12 #include "llvm/DebugInfo/DIContext.h"
13 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
14 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
15 #include "llvm/Support/Error.h"
16 #include "llvm/Support/ThreadPool.h"
17 #include "llvm/Support/raw_ostream.h"
18 
19 #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
20 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
21 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
22 #include "llvm/DebugInfo/GSYM/GsymReader.h"
23 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
24 #include "llvm/DebugInfo/GSYM/OutputAggregator.h"
25 
26 #include <optional>
27 
28 using namespace llvm;
29 using namespace gsym;
30 
31 struct llvm::gsym::CUInfo {
32   const DWARFDebugLine::LineTable *LineTable;
33   const char *CompDir;
34   std::vector<uint32_t> FileCache;
35   uint64_t Language = 0;
36   uint8_t AddrSize = 0;
37 
CUInfollvm::gsym::CUInfo38   CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) {
39     LineTable = DICtx.getLineTableForUnit(CU);
40     CompDir = CU->getCompilationDir();
41     FileCache.clear();
42     if (LineTable)
43       FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX);
44     DWARFDie Die = CU->getUnitDIE();
45     Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0);
46     AddrSize = CU->getAddressByteSize();
47   }
48 
49   /// Return true if Addr is the highest address for a given compile unit. The
50   /// highest address is encoded as -1, of all ones in the address. These high
51   /// addresses are used by some linkers to indicate that a function has been
52   /// dead stripped or didn't end up in the linked executable.
isHighestAddressllvm::gsym::CUInfo53   bool isHighestAddress(uint64_t Addr) const {
54     if (AddrSize == 4)
55       return Addr == UINT32_MAX;
56     else if (AddrSize == 8)
57       return Addr == UINT64_MAX;
58     return false;
59   }
60 
61   /// Convert a DWARF compile unit file index into a GSYM global file index.
62   ///
63   /// Each compile unit in DWARF has its own file table in the line table
64   /// prologue. GSYM has a single large file table that applies to all files
65   /// from all of the info in a GSYM file. This function converts between the
66   /// two and caches and DWARF CU file index that has already been converted so
67   /// the first client that asks for a compile unit file index will end up
68   /// doing the conversion, and subsequent clients will get the cached GSYM
69   /// index.
DWARFToGSYMFileIndexllvm::gsym::CUInfo70   std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym,
71                                                uint32_t DwarfFileIdx) {
72     if (!LineTable || DwarfFileIdx >= FileCache.size())
73       return std::nullopt;
74     uint32_t &GsymFileIdx = FileCache[DwarfFileIdx];
75     if (GsymFileIdx != UINT32_MAX)
76       return GsymFileIdx;
77     std::string File;
78     if (LineTable->getFileNameByIndex(
79             DwarfFileIdx, CompDir,
80             DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File))
81       GsymFileIdx = Gsym.insertFile(File);
82     else
83       GsymFileIdx = 0;
84     return GsymFileIdx;
85   }
86 };
87 
88 
GetParentDeclContextDIE(DWARFDie & Die)89 static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) {
90   if (DWARFDie SpecDie =
91           Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) {
92     if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie))
93       return SpecParent;
94   }
95   if (DWARFDie AbstDie =
96           Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) {
97     if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie))
98       return AbstParent;
99   }
100 
101   // We never want to follow parent for inlined subroutine - that would
102   // give us information about where the function is inlined, not what
103   // function is inlined
104   if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine)
105     return DWARFDie();
106 
107   DWARFDie ParentDie = Die.getParent();
108   if (!ParentDie)
109     return DWARFDie();
110 
111   switch (ParentDie.getTag()) {
112   case dwarf::DW_TAG_namespace:
113   case dwarf::DW_TAG_structure_type:
114   case dwarf::DW_TAG_union_type:
115   case dwarf::DW_TAG_class_type:
116   case dwarf::DW_TAG_subprogram:
117     return ParentDie; // Found parent decl context DIE
118   case dwarf::DW_TAG_lexical_block:
119     return GetParentDeclContextDIE(ParentDie);
120   default:
121     break;
122   }
123 
124   return DWARFDie();
125 }
126 
127 /// Get the GsymCreator string table offset for the qualified name for the
128 /// DIE passed in. This function will avoid making copies of any strings in
129 /// the GsymCreator when possible. We don't need to copy a string when the
130 /// string comes from our .debug_str section or is an inlined string in the
131 /// .debug_info. If we create a qualified name string in this function by
132 /// combining multiple strings in the DWARF string table or info, we will make
133 /// a copy of the string when we add it to the string table.
134 static std::optional<uint32_t>
getQualifiedNameIndex(DWARFDie & Die,uint64_t Language,GsymCreator & Gsym)135 getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) {
136   // If the dwarf has mangled name, use mangled name
137   if (auto LinkageName = Die.getLinkageName()) {
138     // We have seen cases were linkage name is actually empty.
139     if (strlen(LinkageName) > 0)
140       return Gsym.insertString(LinkageName, /* Copy */ false);
141   }
142 
143   StringRef ShortName(Die.getName(DINameKind::ShortName));
144   if (ShortName.empty())
145     return std::nullopt;
146 
147   // For C++ and ObjC, prepend names of all parent declaration contexts
148   if (!(Language == dwarf::DW_LANG_C_plus_plus ||
149         Language == dwarf::DW_LANG_C_plus_plus_03 ||
150         Language == dwarf::DW_LANG_C_plus_plus_11 ||
151         Language == dwarf::DW_LANG_C_plus_plus_14 ||
152         Language == dwarf::DW_LANG_ObjC_plus_plus ||
153         // This should not be needed for C, but we see C++ code marked as C
154         // in some binaries. This should hurt, so let's do it for C as well
155         Language == dwarf::DW_LANG_C))
156     return Gsym.insertString(ShortName, /* Copy */ false);
157 
158   // Some GCC optimizations create functions with names ending with .isra.<num>
159   // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name
160   // If it looks like it could be the case, don't add any prefix
161   if (ShortName.starts_with("_Z") &&
162       (ShortName.contains(".isra.") || ShortName.contains(".part.")))
163     return Gsym.insertString(ShortName, /* Copy */ false);
164 
165   DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die);
166   if (ParentDeclCtxDie) {
167     std::string Name = ShortName.str();
168     while (ParentDeclCtxDie) {
169       StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName));
170       if (!ParentName.empty()) {
171         // "lambda" names are wrapped in < >. Replace with { }
172         // to be consistent with demangled names and not to confuse with
173         // templates
174         if (ParentName.front() == '<' && ParentName.back() == '>')
175           Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" +
176                 "::" + Name;
177         else
178           Name = ParentName.str() + "::" + Name;
179       }
180       ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie);
181     }
182     // Copy the name since we created a new name in a std::string.
183     return Gsym.insertString(Name, /* Copy */ true);
184   }
185   // Don't copy the name since it exists in the DWARF object file.
186   return Gsym.insertString(ShortName, /* Copy */ false);
187 }
188 
hasInlineInfo(DWARFDie Die,uint32_t Depth)189 static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) {
190   bool CheckChildren = true;
191   switch (Die.getTag()) {
192   case dwarf::DW_TAG_subprogram:
193     // Don't look into functions within functions.
194     CheckChildren = Depth == 0;
195     break;
196   case dwarf::DW_TAG_inlined_subroutine:
197     return true;
198   default:
199     break;
200   }
201   if (!CheckChildren)
202     return false;
203   for (DWARFDie ChildDie : Die.children()) {
204     if (hasInlineInfo(ChildDie, Depth + 1))
205       return true;
206   }
207   return false;
208 }
209 
210 static AddressRanges
ConvertDWARFRanges(const DWARFAddressRangesVector & DwarfRanges)211 ConvertDWARFRanges(const DWARFAddressRangesVector &DwarfRanges) {
212   AddressRanges Ranges;
213   for (const DWARFAddressRange &DwarfRange : DwarfRanges) {
214     if (DwarfRange.LowPC < DwarfRange.HighPC)
215       Ranges.insert({DwarfRange.LowPC, DwarfRange.HighPC});
216   }
217   return Ranges;
218 }
219 
parseInlineInfo(GsymCreator & Gsym,OutputAggregator & Out,CUInfo & CUI,DWARFDie Die,uint32_t Depth,FunctionInfo & FI,InlineInfo & Parent,const AddressRanges & AllParentRanges,bool & WarnIfEmpty)220 static void parseInlineInfo(GsymCreator &Gsym, OutputAggregator &Out,
221                             CUInfo &CUI, DWARFDie Die, uint32_t Depth,
222                             FunctionInfo &FI, InlineInfo &Parent,
223                             const AddressRanges &AllParentRanges,
224                             bool &WarnIfEmpty) {
225   if (!hasInlineInfo(Die, Depth))
226     return;
227 
228   dwarf::Tag Tag = Die.getTag();
229   if (Tag == dwarf::DW_TAG_inlined_subroutine) {
230     // create new InlineInfo and append to parent.children
231     InlineInfo II;
232     AddressRanges AllInlineRanges;
233     Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
234     if (RangesOrError) {
235       AllInlineRanges = ConvertDWARFRanges(RangesOrError.get());
236       uint32_t EmptyCount = 0;
237       for (const AddressRange &InlineRange : AllInlineRanges) {
238         // Check for empty inline range in case inline function was outlined
239         // or has not code
240         if (InlineRange.empty()) {
241           ++EmptyCount;
242         } else {
243           if (Parent.Ranges.contains(InlineRange)) {
244             II.Ranges.insert(InlineRange);
245           } else {
246             // Only warn if the current inline range is not within any of all
247             // of the parent ranges. If we have a DW_TAG_subpgram with multiple
248             // ranges we will emit a FunctionInfo for each range of that
249             // function that only emits information within the current range,
250             // so we only want to emit an error if the DWARF has issues, not
251             // when a range currently just isn't in the range we are currently
252             // parsing for.
253             if (AllParentRanges.contains(InlineRange)) {
254               WarnIfEmpty = false;
255             } else
256               Out.Report("Function DIE has uncontained address range",
257                          [&](raw_ostream &OS) {
258                            OS << "error: inlined function DIE at "
259                               << HEX32(Die.getOffset()) << " has a range ["
260                               << HEX64(InlineRange.start()) << " - "
261                               << HEX64(InlineRange.end())
262                               << ") that isn't contained in "
263                               << "any parent address ranges, this inline range "
264                                  "will be "
265                                  "removed.\n";
266                          });
267           }
268         }
269       }
270       // If we have all empty ranges for the inlines, then don't warn if we
271       // have an empty InlineInfo at the top level as all inline functions
272       // were elided.
273       if (EmptyCount == AllInlineRanges.size())
274         WarnIfEmpty = false;
275     }
276     if (II.Ranges.empty())
277       return;
278 
279     if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym))
280       II.Name = *NameIndex;
281     const uint64_t DwarfFileIdx = dwarf::toUnsigned(
282         Die.findRecursively(dwarf::DW_AT_call_file), UINT32_MAX);
283     std::optional<uint32_t> OptGSymFileIdx =
284         CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx);
285     if (OptGSymFileIdx) {
286       II.CallFile = OptGSymFileIdx.value();
287       II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
288       // parse all children and append to parent
289       for (DWARFDie ChildDie : Die.children())
290         parseInlineInfo(Gsym, Out, CUI, ChildDie, Depth + 1, FI, II,
291                         AllInlineRanges, WarnIfEmpty);
292       Parent.Children.emplace_back(std::move(II));
293     } else
294       Out.Report(
295           "Inlined function die has invlaid file index in DW_AT_call_file",
296           [&](raw_ostream &OS) {
297             OS << "error: inlined function DIE at " << HEX32(Die.getOffset())
298                << " has an invalid file index " << DwarfFileIdx
299                << " in its DW_AT_call_file attribute, this inline entry and "
300                   "all "
301                << "children will be removed.\n";
302           });
303     return;
304   }
305   if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) {
306     // skip this Die and just recurse down
307     for (DWARFDie ChildDie : Die.children())
308       parseInlineInfo(Gsym, Out, CUI, ChildDie, Depth + 1, FI, Parent,
309                       AllParentRanges, WarnIfEmpty);
310   }
311 }
312 
convertFunctionLineTable(OutputAggregator & Out,CUInfo & CUI,DWARFDie Die,GsymCreator & Gsym,FunctionInfo & FI)313 static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI,
314                                      DWARFDie Die, GsymCreator &Gsym,
315                                      FunctionInfo &FI) {
316   std::vector<uint32_t> RowVector;
317   const uint64_t StartAddress = FI.startAddress();
318   const uint64_t EndAddress = FI.endAddress();
319   const uint64_t RangeSize = EndAddress - StartAddress;
320   const object::SectionedAddress SecAddress{
321       StartAddress, object::SectionedAddress::UndefSection};
322 
323 
324   if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) {
325     // If we have a DW_TAG_subprogram but no line entries, fall back to using
326     // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
327     std::string FilePath = Die.getDeclFile(
328         DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
329     if (FilePath.empty()) {
330       // If we had a DW_AT_decl_file, but got no file then we need to emit a
331       // warning.
332       Out.Report("Invalid file index in DW_AT_decl_file", [&](raw_ostream &OS) {
333         const uint64_t DwarfFileIdx = dwarf::toUnsigned(
334             Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
335         OS << "error: function DIE at " << HEX32(Die.getOffset())
336            << " has an invalid file index " << DwarfFileIdx
337            << " in its DW_AT_decl_file attribute, unable to create a single "
338            << "line entry from the DW_AT_decl_file/DW_AT_decl_line "
339            << "attributes.\n";
340       });
341       return;
342     }
343     if (auto Line =
344             dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
345       LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line);
346       FI.OptLineTable = LineTable();
347       FI.OptLineTable->push(LE);
348     }
349     return;
350   }
351 
352   FI.OptLineTable = LineTable();
353   DWARFDebugLine::Row PrevRow;
354   for (uint32_t RowIndex : RowVector) {
355     // Take file number and line/column from the row.
356     const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex];
357     std::optional<uint32_t> OptFileIdx =
358         CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
359     if (!OptFileIdx) {
360       Out.Report(
361           "Invalid file index in DWARF line table", [&](raw_ostream &OS) {
362             OS << "error: function DIE at " << HEX32(Die.getOffset()) << " has "
363                << "a line entry with invalid DWARF file index, this entry will "
364                << "be removed:\n";
365             Row.dumpTableHeader(OS, /*Indent=*/0);
366             Row.dump(OS);
367             OS << "\n";
368           });
369       continue;
370     }
371     const uint32_t FileIdx = OptFileIdx.value();
372     uint64_t RowAddress = Row.Address.Address;
373     // Watch out for a RowAddress that is in the middle of a line table entry
374     // in the DWARF. If we pass an address in between two line table entries
375     // we will get a RowIndex for the previous valid line table row which won't
376     // be contained in our function. This is usually a bug in the DWARF due to
377     // linker problems or LTO or other DWARF re-linking so it is worth emitting
378     // an error, but not worth stopping the creation of the GSYM.
379     if (!FI.Range.contains(RowAddress)) {
380       if (RowAddress < FI.Range.start()) {
381         Out.Report("Start address lies between valid Row table entries",
382                    [&](raw_ostream &OS) {
383                      OS << "error: DIE has a start address whose LowPC is "
384                            "between the "
385                            "line table Row["
386                         << RowIndex << "] with address " << HEX64(RowAddress)
387                         << " and the next one.\n";
388                      Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
389                    });
390         RowAddress = FI.Range.start();
391       } else {
392         continue;
393       }
394     }
395 
396     LineEntry LE(RowAddress, FileIdx, Row.Line);
397     if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) {
398       // We have seen full duplicate line tables for functions in some
399       // DWARF files. Watch for those here by checking the last
400       // row was the function's end address (HighPC) and that the
401       // current line table entry's address is the same as the first
402       // line entry we already have in our "function_info.Lines". If
403       // so break out after printing a warning.
404       auto FirstLE = FI.OptLineTable->first();
405       if (FirstLE && *FirstLE == LE)
406         // if (Log && !Gsym.isQuiet()) { TODO <-- This looks weird
407         Out.Report("Duplicate line table detected", [&](raw_ostream &OS) {
408           OS << "warning: duplicate line table detected for DIE:\n";
409           Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
410         });
411       else
412         Out.Report("Non-monotonically increasing addresses",
413                    [&](raw_ostream &OS) {
414                      OS << "error: line table has addresses that do not "
415                         << "monotonically increase:\n";
416                      for (uint32_t RowIndex2 : RowVector)
417                        CUI.LineTable->Rows[RowIndex2].dump(OS);
418                      Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
419                    });
420       break;
421     }
422 
423     // Skip multiple line entries for the same file and line.
424     auto LastLE = FI.OptLineTable->last();
425     if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line)
426         continue;
427     // Only push a row if it isn't an end sequence. End sequence markers are
428     // included for the last address in a function or the last contiguous
429     // address in a sequence.
430     if (Row.EndSequence) {
431       // End sequence means that the next line entry could have a lower address
432       // that the previous entries. So we clear the previous row so we don't
433       // trigger the line table error about address that do not monotonically
434       // increase.
435       PrevRow = DWARFDebugLine::Row();
436     } else {
437       FI.OptLineTable->push(LE);
438       PrevRow = Row;
439     }
440   }
441   // If not line table rows were added, clear the line table so we don't encode
442   // on in the GSYM file.
443   if (FI.OptLineTable->empty())
444     FI.OptLineTable = std::nullopt;
445 }
446 
handleDie(OutputAggregator & Out,CUInfo & CUI,DWARFDie Die)447 void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI,
448                                  DWARFDie Die) {
449   switch (Die.getTag()) {
450   case dwarf::DW_TAG_subprogram: {
451     Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
452     if (!RangesOrError) {
453       consumeError(RangesOrError.takeError());
454       break;
455     }
456     const DWARFAddressRangesVector &Ranges = RangesOrError.get();
457     if (Ranges.empty())
458       break;
459     auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym);
460     if (!NameIndex) {
461       Out.Report("Function has no name", [&](raw_ostream &OS) {
462         OS << "error: function at " << HEX64(Die.getOffset())
463            << " has no name\n ";
464         Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
465       });
466       break;
467     }
468     // All ranges for the subprogram DIE in case it has multiple. We need to
469     // pass this down into parseInlineInfo so we don't warn about inline
470     // ranges that are not in the current subrange of a function when they
471     // actually are in another subgrange. We do this because when a function
472     // has discontiguos ranges, we create multiple function entries with only
473     // the info for that range contained inside of it.
474     AddressRanges AllSubprogramRanges = ConvertDWARFRanges(Ranges);
475 
476     // Create a function_info for each range
477     for (const DWARFAddressRange &Range : Ranges) {
478       // The low PC must be less than the high PC. Many linkers don't remove
479       // DWARF for functions that don't get linked into the final executable.
480       // If both the high and low pc have relocations, linkers will often set
481       // the address values for both to the same value to indicate the function
482       // has been remove. Other linkers have been known to set the one or both
483       // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8
484       // byte addresses to indicate the function isn't valid. The check below
485       // tries to watch for these cases and abort if it runs into them.
486       if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC))
487         break;
488 
489       // Many linkers can't remove DWARF and might set the LowPC to zero. Since
490       // high PC can be an offset from the low PC in more recent DWARF versions
491       // we need to watch for a zero'ed low pc which we do using ValidTextRanges
492       // below.
493       if (!Gsym.IsValidTextAddress(Range.LowPC)) {
494         // We expect zero and -1 to be invalid addresses in DWARF depending
495         // on the linker of the DWARF. This indicates a function was stripped
496         // and the debug info wasn't able to be stripped from the DWARF. If
497         // the LowPC isn't zero or -1, then we should emit an error.
498         if (Range.LowPC != 0) {
499           if (!Gsym.isQuiet()) {
500             // Unexpected invalid address, emit a warning
501             Out.Report("Address range starts outside executable section",
502                        [&](raw_ostream &OS) {
503                          OS << "warning: DIE has an address range whose "
504                                "start address "
505                                "is not in any executable sections ("
506                             << *Gsym.GetValidTextRanges()
507                             << ") and will not be processed:\n";
508                          Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
509                        });
510           }
511         }
512         break;
513       }
514 
515       FunctionInfo FI;
516       FI.Range = {Range.LowPC, Range.HighPC};
517       FI.Name = *NameIndex;
518       if (CUI.LineTable)
519         convertFunctionLineTable(Out, CUI, Die, Gsym, FI);
520 
521       if (hasInlineInfo(Die, 0)) {
522         FI.Inline = InlineInfo();
523         FI.Inline->Name = *NameIndex;
524         FI.Inline->Ranges.insert(FI.Range);
525         bool WarnIfEmpty = true;
526         parseInlineInfo(Gsym, Out, CUI, Die, 0, FI, *FI.Inline,
527                         AllSubprogramRanges, WarnIfEmpty);
528         // Make sure we at least got some valid inline info other than just
529         // the top level function. If we didn't then remove the inline info
530         // from the function info. We have seen cases where LTO tries to modify
531         // the DWARF for functions and it messes up the address ranges for
532         // the inline functions so it is no longer valid.
533         //
534         // By checking if there are any valid children on the top level inline
535         // information object, we will know if we got anything valid from the
536         // debug info.
537         if (FI.Inline->Children.empty()) {
538           if (WarnIfEmpty && !Gsym.isQuiet())
539             Out.Report("DIE contains inline functions with no valid ranges",
540                        [&](raw_ostream &OS) {
541                          OS << "warning: DIE contains inline function "
542                                "information that has no valid ranges, removing "
543                                "inline information:\n";
544                          Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
545                        });
546           FI.Inline = std::nullopt;
547         }
548       }
549       Gsym.addFunctionInfo(std::move(FI));
550     }
551   } break;
552   default:
553     break;
554   }
555   for (DWARFDie ChildDie : Die.children())
556     handleDie(Out, CUI, ChildDie);
557 }
558 
convert(uint32_t NumThreads,OutputAggregator & Out)559 Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
560   size_t NumBefore = Gsym.getNumFunctionInfos();
561   auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
562     DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false);
563     if (DwarfUnit.getDWOId()) {
564       DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit();
565       if (!DWOCU->isDWOUnit())
566         Out.Report(
567             "warning: Unable to retrieve DWO .debug_info section for some "
568             "object files. (Remove the --quiet flag for full output)",
569             [&](raw_ostream &OS) {
570               std::string DWOName = dwarf::toString(
571                   DwarfUnit.getUnitDIE().find(
572                       {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
573                   "");
574               OS << "warning: Unable to retrieve DWO .debug_info section for "
575                  << DWOName << "\n";
576             });
577       else {
578         ReturnDie = DWOCU->getUnitDIE(false);
579       }
580     }
581     return ReturnDie;
582   };
583   if (NumThreads == 1) {
584     // Parse all DWARF data from this thread, use the same string/file table
585     // for everything
586     for (const auto &CU : DICtx.compile_units()) {
587       DWARFDie Die = getDie(*CU);
588       CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
589       handleDie(Out, CUI, Die);
590     }
591   } else {
592     // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
593     // front before we start accessing any DIEs since there might be
594     // cross compile unit references in the DWARF. If we don't do this we can
595     // end up crashing.
596 
597     // We need to call getAbbreviations sequentially first so that getUnitDIE()
598     // only works with its local data.
599     for (const auto &CU : DICtx.compile_units())
600       CU->getAbbreviations();
601 
602     // Now parse all DIEs in case we have cross compile unit references in a
603     // thread pool.
604     DefaultThreadPool pool(hardware_concurrency(NumThreads));
605     for (const auto &CU : DICtx.compile_units())
606       pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); });
607     pool.wait();
608 
609     // Now convert all DWARF to GSYM in a thread pool.
610     std::mutex LogMutex;
611     for (const auto &CU : DICtx.compile_units()) {
612       DWARFDie Die = getDie(*CU);
613       if (Die) {
614         CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
615         pool.async([this, CUI, &LogMutex, &Out, Die]() mutable {
616           std::string storage;
617           raw_string_ostream StrStream(storage);
618           OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr);
619           handleDie(ThreadOut, CUI, Die);
620           // Print ThreadLogStorage lines into an actual stream under a lock
621           std::lock_guard<std::mutex> guard(LogMutex);
622           if (Out.GetOS()) {
623             StrStream.flush();
624             Out << storage;
625           }
626           Out.Merge(ThreadOut);
627         });
628       }
629     }
630     pool.wait();
631   }
632   size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
633   Out << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
634   return Error::success();
635 }
636 
verify(StringRef GsymPath,OutputAggregator & Out)637 llvm::Error DwarfTransformer::verify(StringRef GsymPath,
638                                      OutputAggregator &Out) {
639   Out << "Verifying GSYM file \"" << GsymPath << "\":\n";
640 
641   auto Gsym = GsymReader::openFile(GsymPath);
642   if (!Gsym)
643     return Gsym.takeError();
644 
645   auto NumAddrs = Gsym->getNumAddresses();
646   DILineInfoSpecifier DLIS(
647       DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
648       DILineInfoSpecifier::FunctionNameKind::LinkageName);
649   std::string gsymFilename;
650   for (uint32_t I = 0; I < NumAddrs; ++I) {
651     auto FuncAddr = Gsym->getAddress(I);
652     if (!FuncAddr)
653         return createStringError(std::errc::invalid_argument,
654                                   "failed to extract address[%i]", I);
655 
656     auto FI = Gsym->getFunctionInfo(*FuncAddr);
657     if (!FI)
658       return createStringError(
659           std::errc::invalid_argument,
660           "failed to extract function info for address 0x%" PRIu64, *FuncAddr);
661 
662     for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) {
663       const object::SectionedAddress SectAddr{
664           Addr, object::SectionedAddress::UndefSection};
665       auto LR = Gsym->lookup(Addr);
666       if (!LR)
667         return LR.takeError();
668 
669       auto DwarfInlineInfos =
670           DICtx.getInliningInfoForAddress(SectAddr, DLIS);
671       uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames();
672       if (NumDwarfInlineInfos == 0) {
673         DwarfInlineInfos.addFrame(
674             DICtx.getLineInfoForAddress(SectAddr, DLIS));
675       }
676 
677       // Check for 1 entry that has no file and line info
678       if (NumDwarfInlineInfos == 1 &&
679           DwarfInlineInfos.getFrame(0).FileName == "<invalid>") {
680         DwarfInlineInfos = DIInliningInfo();
681         NumDwarfInlineInfos = 0;
682       }
683       if (NumDwarfInlineInfos > 0 &&
684           NumDwarfInlineInfos != LR->Locations.size()) {
685         if (Out.GetOS()) {
686           raw_ostream &Log = *Out.GetOS();
687           Log << "error: address " << HEX64(Addr) << " has "
688               << NumDwarfInlineInfos << " DWARF inline frames and GSYM has "
689               << LR->Locations.size() << "\n";
690           Log << "    " << NumDwarfInlineInfos << " DWARF frames:\n";
691           for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) {
692             const auto &dii = DwarfInlineInfos.getFrame(Idx);
693             Log << "    [" << Idx << "]: " << dii.FunctionName << " @ "
694                 << dii.FileName << ':' << dii.Line << '\n';
695           }
696           Log << "    " << LR->Locations.size() << " GSYM frames:\n";
697           for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
698                ++Idx) {
699             const auto &gii = LR->Locations[Idx];
700             Log << "    [" << Idx << "]: " << gii.Name << " @ " << gii.Dir
701                 << '/' << gii.Base << ':' << gii.Line << '\n';
702           }
703           DwarfInlineInfos = DICtx.getInliningInfoForAddress(SectAddr, DLIS);
704           Gsym->dump(Log, *FI);
705         }
706         continue;
707       }
708 
709       for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
710             ++Idx) {
711         const auto &gii = LR->Locations[Idx];
712         if (Idx < NumDwarfInlineInfos) {
713           const auto &dii = DwarfInlineInfos.getFrame(Idx);
714           gsymFilename = LR->getSourceFile(Idx);
715           // Verify function name
716           if (dii.FunctionName.find(gii.Name.str()) != 0)
717             Out << "error: address " << HEX64(Addr) << " DWARF function \""
718                 << dii.FunctionName.c_str()
719                 << "\" doesn't match GSYM function \"" << gii.Name << "\"\n";
720 
721           // Verify source file path
722           if (dii.FileName != gsymFilename)
723             Out << "error: address " << HEX64(Addr) << " DWARF path \""
724                 << dii.FileName.c_str() << "\" doesn't match GSYM path \""
725                 << gsymFilename.c_str() << "\"\n";
726           // Verify source file line
727           if (dii.Line != gii.Line)
728             Out << "error: address " << HEX64(Addr) << " DWARF line "
729                 << dii.Line << " != GSYM line " << gii.Line << "\n";
730         }
731       }
732     }
733   }
734   return Error::success();
735 }
736