xref: /freebsd/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- DWARFVerifier.cpp --------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
9 #include "llvm/ADT/IntervalMap.h"
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/ADT/SmallSet.h"
12 #include "llvm/BinaryFormat/Dwarf.h"
13 #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
14 #include "llvm/DebugInfo/DWARF/DWARFAttribute.h"
15 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
16 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
17 #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
18 #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
19 #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
21 #include "llvm/DebugInfo/DWARF/DWARFDie.h"
22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
23 #include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h"
24 #include "llvm/DebugInfo/DWARF/DWARFObject.h"
25 #include "llvm/DebugInfo/DWARF/DWARFSection.h"
26 #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
27 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
28 #include "llvm/DebugInfo/DWARF/LowLevel/DWARFExpression.h"
29 #include "llvm/Object/Error.h"
30 #include "llvm/Support/DJB.h"
31 #include "llvm/Support/Error.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/FileSystem.h"
34 #include "llvm/Support/FormatVariadic.h"
35 #include "llvm/Support/JSON.h"
36 #include "llvm/Support/Parallel.h"
37 #include "llvm/Support/WithColor.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <map>
40 #include <set>
41 #include <vector>
42 
43 using namespace llvm;
44 using namespace dwarf;
45 using namespace object;
46 
47 namespace llvm {
48 class DWARFDebugInfoEntry;
49 }
50 
51 std::optional<DWARFAddressRange>
insert(const DWARFAddressRange & R)52 DWARFVerifier::DieRangeInfo::insert(const DWARFAddressRange &R) {
53   auto Begin = Ranges.begin();
54   auto End = Ranges.end();
55   auto Pos = std::lower_bound(Begin, End, R);
56 
57   // Check for exact duplicates which is an allowed special case
58   if (Pos != End && *Pos == R) {
59     return std::nullopt;
60   }
61 
62   if (Pos != End) {
63     DWARFAddressRange Range(*Pos);
64     if (Pos->merge(R))
65       return Range;
66   }
67   if (Pos != Begin) {
68     auto Iter = Pos - 1;
69     DWARFAddressRange Range(*Iter);
70     if (Iter->merge(R))
71       return Range;
72   }
73 
74   Ranges.insert(Pos, R);
75   return std::nullopt;
76 }
77 
78 DWARFVerifier::DieRangeInfo::die_range_info_iterator
insert(const DieRangeInfo & RI)79 DWARFVerifier::DieRangeInfo::insert(const DieRangeInfo &RI) {
80   if (RI.Ranges.empty())
81     return Children.end();
82 
83   auto End = Children.end();
84   auto Iter = Children.begin();
85   while (Iter != End) {
86     if (Iter->intersects(RI))
87       return Iter;
88     ++Iter;
89   }
90   Children.insert(RI);
91   return Children.end();
92 }
93 
contains(const DieRangeInfo & RHS) const94 bool DWARFVerifier::DieRangeInfo::contains(const DieRangeInfo &RHS) const {
95   auto I1 = Ranges.begin(), E1 = Ranges.end();
96   auto I2 = RHS.Ranges.begin(), E2 = RHS.Ranges.end();
97   if (I2 == E2)
98     return true;
99 
100   DWARFAddressRange R = *I2;
101   while (I1 != E1) {
102     bool Covered = I1->LowPC <= R.LowPC;
103     if (R.LowPC == R.HighPC || (Covered && R.HighPC <= I1->HighPC)) {
104       if (++I2 == E2)
105         return true;
106       R = *I2;
107       continue;
108     }
109     if (!Covered)
110       return false;
111     if (R.LowPC < I1->HighPC)
112       R.LowPC = I1->HighPC;
113     ++I1;
114   }
115   return false;
116 }
117 
intersects(const DieRangeInfo & RHS) const118 bool DWARFVerifier::DieRangeInfo::intersects(const DieRangeInfo &RHS) const {
119   auto I1 = Ranges.begin(), E1 = Ranges.end();
120   auto I2 = RHS.Ranges.begin(), E2 = RHS.Ranges.end();
121   while (I1 != E1 && I2 != E2) {
122     if (I1->intersects(*I2)) {
123       // Exact duplicates are allowed
124       if (!(*I1 == *I2))
125         return true;
126     }
127     if (I1->LowPC < I2->LowPC)
128       ++I1;
129     else
130       ++I2;
131   }
132   return false;
133 }
134 
verifyUnitHeader(const DWARFDataExtractor DebugInfoData,uint64_t * Offset,unsigned UnitIndex,uint8_t & UnitType,bool & isUnitDWARF64)135 bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
136                                      uint64_t *Offset, unsigned UnitIndex,
137                                      uint8_t &UnitType, bool &isUnitDWARF64) {
138   uint64_t AbbrOffset, Length;
139   uint8_t AddrSize = 0;
140   uint16_t Version;
141   bool Success = true;
142 
143   bool ValidLength = false;
144   bool ValidVersion = false;
145   bool ValidAddrSize = false;
146   bool ValidType = true;
147   bool ValidAbbrevOffset = true;
148 
149   uint64_t OffsetStart = *Offset;
150   DwarfFormat Format;
151   std::tie(Length, Format) = DebugInfoData.getInitialLength(Offset);
152   isUnitDWARF64 = Format == DWARF64;
153   Version = DebugInfoData.getU16(Offset);
154 
155   if (Version >= 5) {
156     UnitType = DebugInfoData.getU8(Offset);
157     AddrSize = DebugInfoData.getU8(Offset);
158     AbbrOffset = isUnitDWARF64 ? DebugInfoData.getU64(Offset) : DebugInfoData.getU32(Offset);
159     ValidType = dwarf::isUnitType(UnitType);
160   } else {
161     UnitType = 0;
162     AbbrOffset = isUnitDWARF64 ? DebugInfoData.getU64(Offset) : DebugInfoData.getU32(Offset);
163     AddrSize = DebugInfoData.getU8(Offset);
164   }
165 
166   Expected<const DWARFAbbreviationDeclarationSet *> AbbrevSetOrErr =
167       DCtx.getDebugAbbrev()->getAbbreviationDeclarationSet(AbbrOffset);
168   if (!AbbrevSetOrErr) {
169     ValidAbbrevOffset = false;
170     // FIXME: A problematic debug_abbrev section is reported below in the form
171     // of a `note:`. We should propagate this error there (or elsewhere) to
172     // avoid losing the specific problem with the debug_abbrev section.
173     consumeError(AbbrevSetOrErr.takeError());
174   }
175 
176   ValidLength = DebugInfoData.isValidOffset(OffsetStart + Length + 3);
177   ValidVersion = DWARFContext::isSupportedVersion(Version);
178   ValidAddrSize = DWARFContext::isAddressSizeSupported(AddrSize);
179   if (!ValidLength || !ValidVersion || !ValidAddrSize || !ValidAbbrevOffset ||
180       !ValidType) {
181     Success = false;
182     bool HeaderShown = false;
183     auto ShowHeaderOnce = [&]() {
184       if (!HeaderShown) {
185         error() << format("Units[%d] - start offset: 0x%08" PRIx64 " \n",
186                           UnitIndex, OffsetStart);
187         HeaderShown = true;
188       }
189     };
190     if (!ValidLength)
191       ErrorCategory.Report(
192           "Unit Header Length: Unit too large for .debug_info provided", [&]() {
193             ShowHeaderOnce();
194             note() << "The length for this unit is too "
195                       "large for the .debug_info provided.\n";
196           });
197     if (!ValidVersion)
198       ErrorCategory.Report(
199           "Unit Header Length: 16 bit unit header version is not valid", [&]() {
200             ShowHeaderOnce();
201             note() << "The 16 bit unit header version is not valid.\n";
202           });
203     if (!ValidType)
204       ErrorCategory.Report(
205           "Unit Header Length: Unit type encoding is not valid", [&]() {
206             ShowHeaderOnce();
207             note() << "The unit type encoding is not valid.\n";
208           });
209     if (!ValidAbbrevOffset)
210       ErrorCategory.Report(
211           "Unit Header Length: Offset into the .debug_abbrev section is not "
212           "valid",
213           [&]() {
214             ShowHeaderOnce();
215             note() << "The offset into the .debug_abbrev section is "
216                       "not valid.\n";
217           });
218     if (!ValidAddrSize)
219       ErrorCategory.Report("Unit Header Length: Address size is unsupported",
220                            [&]() {
221                              ShowHeaderOnce();
222                              note() << "The address size is unsupported.\n";
223                            });
224   }
225   *Offset = OffsetStart + Length + (isUnitDWARF64 ? 12 : 4);
226   return Success;
227 }
228 
verifyName(const DWARFDie & Die)229 bool DWARFVerifier::verifyName(const DWARFDie &Die) {
230   // FIXME Add some kind of record of which DIE names have already failed and
231   // don't bother checking a DIE that uses an already failed DIE.
232 
233   std::string ReconstructedName;
234   raw_string_ostream OS(ReconstructedName);
235   std::string OriginalFullName;
236   Die.getFullName(OS, &OriginalFullName);
237   OS.flush();
238   if (OriginalFullName.empty() || OriginalFullName == ReconstructedName)
239     return false;
240 
241   ErrorCategory.Report(
242       "Simplified template DW_AT_name could not be reconstituted", [&]() {
243         error()
244             << "Simplified template DW_AT_name could not be reconstituted:\n"
245             << formatv("         original: {0}\n"
246                        "    reconstituted: {1}\n",
247                        OriginalFullName, ReconstructedName);
248         dump(Die) << '\n';
249         dump(Die.getDwarfUnit()->getUnitDIE()) << '\n';
250       });
251   return true;
252 }
253 
verifyUnitContents(DWARFUnit & Unit,ReferenceMap & UnitLocalReferences,ReferenceMap & CrossUnitReferences)254 unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit,
255                                            ReferenceMap &UnitLocalReferences,
256                                            ReferenceMap &CrossUnitReferences) {
257   unsigned NumUnitErrors = 0;
258   unsigned NumDies = Unit.getNumDIEs();
259   for (unsigned I = 0; I < NumDies; ++I) {
260     auto Die = Unit.getDIEAtIndex(I);
261 
262     if (Die.getTag() == DW_TAG_null)
263       continue;
264 
265     for (auto AttrValue : Die.attributes()) {
266       NumUnitErrors += verifyDebugInfoAttribute(Die, AttrValue);
267       NumUnitErrors += verifyDebugInfoForm(Die, AttrValue, UnitLocalReferences,
268                                            CrossUnitReferences);
269     }
270 
271     NumUnitErrors += verifyName(Die);
272 
273     if (Die.hasChildren()) {
274       if (Die.getFirstChild().isValid() &&
275           Die.getFirstChild().getTag() == DW_TAG_null) {
276         warn() << dwarf::TagString(Die.getTag())
277                << " has DW_CHILDREN_yes but DIE has no children: ";
278         Die.dump(OS);
279       }
280     }
281 
282     NumUnitErrors += verifyDebugInfoCallSite(Die);
283   }
284 
285   DWARFDie Die = Unit.getUnitDIE(/* ExtractUnitDIEOnly = */ false);
286   if (!Die) {
287     ErrorCategory.Report("Compilation unit missing DIE", [&]() {
288       error() << "Compilation unit without DIE.\n";
289     });
290     NumUnitErrors++;
291     return NumUnitErrors;
292   }
293 
294   if (!dwarf::isUnitType(Die.getTag())) {
295     ErrorCategory.Report("Compilation unit root DIE is not a unit DIE", [&]() {
296       error() << "Compilation unit root DIE is not a unit DIE: "
297               << dwarf::TagString(Die.getTag()) << ".\n";
298     });
299     NumUnitErrors++;
300   }
301 
302   uint8_t UnitType = Unit.getUnitType();
303   if (!DWARFUnit::isMatchingUnitTypeAndTag(UnitType, Die.getTag())) {
304     ErrorCategory.Report("Mismatched unit type", [&]() {
305       error() << "Compilation unit type (" << dwarf::UnitTypeString(UnitType)
306               << ") and root DIE (" << dwarf::TagString(Die.getTag())
307               << ") do not match.\n";
308     });
309     NumUnitErrors++;
310   }
311 
312   //  According to DWARF Debugging Information Format Version 5,
313   //  3.1.2 Skeleton Compilation Unit Entries:
314   //  "A skeleton compilation unit has no children."
315   if (Die.getTag() == dwarf::DW_TAG_skeleton_unit && Die.hasChildren()) {
316     ErrorCategory.Report("Skeleton CU has children", [&]() {
317       error() << "Skeleton compilation unit has children.\n";
318     });
319     NumUnitErrors++;
320   }
321 
322   DieRangeInfo RI;
323   NumUnitErrors += verifyDieRanges(Die, RI);
324 
325   return NumUnitErrors;
326 }
327 
verifyDebugInfoCallSite(const DWARFDie & Die)328 unsigned DWARFVerifier::verifyDebugInfoCallSite(const DWARFDie &Die) {
329   if (Die.getTag() != DW_TAG_call_site && Die.getTag() != DW_TAG_GNU_call_site)
330     return 0;
331 
332   DWARFDie Curr = Die.getParent();
333   for (; Curr.isValid() && !Curr.isSubprogramDIE(); Curr = Die.getParent()) {
334     if (Curr.getTag() == DW_TAG_inlined_subroutine) {
335       ErrorCategory.Report(
336           "Call site nested entry within inlined subroutine", [&]() {
337             error() << "Call site entry nested within inlined subroutine:";
338             Curr.dump(OS);
339           });
340       return 1;
341     }
342   }
343 
344   if (!Curr.isValid()) {
345     ErrorCategory.Report(
346         "Call site entry not nested within valid subprogram", [&]() {
347           error() << "Call site entry not nested within a valid subprogram:";
348           Die.dump(OS);
349         });
350     return 1;
351   }
352 
353   std::optional<DWARFFormValue> CallAttr = Curr.find(
354       {DW_AT_call_all_calls, DW_AT_call_all_source_calls,
355        DW_AT_call_all_tail_calls, DW_AT_GNU_all_call_sites,
356        DW_AT_GNU_all_source_call_sites, DW_AT_GNU_all_tail_call_sites});
357   if (!CallAttr) {
358     ErrorCategory.Report(
359         "Subprogram with call site entry has no DW_AT_call attribute", [&]() {
360           error()
361               << "Subprogram with call site entry has no DW_AT_call attribute:";
362           Curr.dump(OS);
363           Die.dump(OS, /*indent*/ 1);
364         });
365     return 1;
366   }
367 
368   return 0;
369 }
370 
verifyAbbrevSection(const DWARFDebugAbbrev * Abbrev)371 unsigned DWARFVerifier::verifyAbbrevSection(const DWARFDebugAbbrev *Abbrev) {
372   if (!Abbrev)
373     return 0;
374 
375   Expected<const DWARFAbbreviationDeclarationSet *> AbbrDeclsOrErr =
376       Abbrev->getAbbreviationDeclarationSet(0);
377   if (!AbbrDeclsOrErr) {
378     std::string ErrMsg = toString(AbbrDeclsOrErr.takeError());
379     ErrorCategory.Report("Abbreviation Declaration error",
380                          [&]() { error() << ErrMsg << "\n"; });
381     return 1;
382   }
383 
384   const auto *AbbrDecls = *AbbrDeclsOrErr;
385   unsigned NumErrors = 0;
386   for (auto AbbrDecl : *AbbrDecls) {
387     SmallDenseSet<uint16_t> AttributeSet;
388     for (auto Attribute : AbbrDecl.attributes()) {
389       auto Result = AttributeSet.insert(Attribute.Attr);
390       if (!Result.second) {
391         ErrorCategory.Report(
392             "Abbreviation declartion contains multiple attributes", [&]() {
393               error() << "Abbreviation declaration contains multiple "
394                       << AttributeString(Attribute.Attr) << " attributes.\n";
395               AbbrDecl.dump(OS);
396             });
397         ++NumErrors;
398       }
399     }
400   }
401   return NumErrors;
402 }
403 
handleDebugAbbrev()404 bool DWARFVerifier::handleDebugAbbrev() {
405   OS << "Verifying .debug_abbrev...\n";
406 
407   const DWARFObject &DObj = DCtx.getDWARFObj();
408   unsigned NumErrors = 0;
409   if (!DObj.getAbbrevSection().empty())
410     NumErrors += verifyAbbrevSection(DCtx.getDebugAbbrev());
411   if (!DObj.getAbbrevDWOSection().empty())
412     NumErrors += verifyAbbrevSection(DCtx.getDebugAbbrevDWO());
413 
414   return NumErrors == 0;
415 }
416 
verifyUnits(const DWARFUnitVector & Units)417 unsigned DWARFVerifier::verifyUnits(const DWARFUnitVector &Units) {
418   unsigned NumDebugInfoErrors = 0;
419   ReferenceMap CrossUnitReferences;
420 
421   unsigned Index = 1;
422   for (const auto &Unit : Units) {
423     OS << "Verifying unit: " << Index << " / " << Units.getNumUnits();
424     if (const char* Name = Unit->getUnitDIE(true).getShortName())
425       OS << ", \"" << Name << '\"';
426     OS << '\n';
427     OS.flush();
428     ReferenceMap UnitLocalReferences;
429     NumDebugInfoErrors +=
430         verifyUnitContents(*Unit, UnitLocalReferences, CrossUnitReferences);
431     NumDebugInfoErrors += verifyDebugInfoReferences(
432         UnitLocalReferences, [&](uint64_t Offset) { return Unit.get(); });
433     ++Index;
434   }
435 
436   NumDebugInfoErrors += verifyDebugInfoReferences(
437       CrossUnitReferences, [&](uint64_t Offset) -> DWARFUnit * {
438         if (DWARFUnit *U = Units.getUnitForOffset(Offset))
439           return U;
440         return nullptr;
441       });
442 
443   return NumDebugInfoErrors;
444 }
445 
verifyUnitSection(const DWARFSection & S)446 unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S) {
447   const DWARFObject &DObj = DCtx.getDWARFObj();
448   DWARFDataExtractor DebugInfoData(DObj, S, DCtx.isLittleEndian(), 0);
449   unsigned NumDebugInfoErrors = 0;
450   uint64_t Offset = 0, UnitIdx = 0;
451   uint8_t UnitType = 0;
452   bool isUnitDWARF64 = false;
453   bool isHeaderChainValid = true;
454   bool hasDIE = DebugInfoData.isValidOffset(Offset);
455   DWARFUnitVector TypeUnitVector;
456   DWARFUnitVector CompileUnitVector;
457   while (hasDIE) {
458     if (!verifyUnitHeader(DebugInfoData, &Offset, UnitIdx, UnitType,
459                           isUnitDWARF64)) {
460       isHeaderChainValid = false;
461       if (isUnitDWARF64)
462         break;
463     }
464     hasDIE = DebugInfoData.isValidOffset(Offset);
465     ++UnitIdx;
466   }
467   if (UnitIdx == 0 && !hasDIE) {
468     warn() << "Section is empty.\n";
469     isHeaderChainValid = true;
470   }
471   if (!isHeaderChainValid)
472     ++NumDebugInfoErrors;
473   return NumDebugInfoErrors;
474 }
475 
verifyIndex(StringRef Name,DWARFSectionKind InfoColumnKind,StringRef IndexStr)476 unsigned DWARFVerifier::verifyIndex(StringRef Name,
477                                     DWARFSectionKind InfoColumnKind,
478                                     StringRef IndexStr) {
479   if (IndexStr.empty())
480     return 0;
481   OS << "Verifying " << Name << "...\n";
482   DWARFUnitIndex Index(InfoColumnKind);
483   DataExtractor D(IndexStr, DCtx.isLittleEndian(), 0);
484   if (!Index.parse(D))
485     return 1;
486   using MapType = IntervalMap<uint64_t, uint64_t>;
487   MapType::Allocator Alloc;
488   std::vector<std::unique_ptr<MapType>> Sections(Index.getColumnKinds().size());
489   for (const DWARFUnitIndex::Entry &E : Index.getRows()) {
490     uint64_t Sig = E.getSignature();
491     if (!E.getContributions())
492       continue;
493     for (auto E : enumerate(
494              InfoColumnKind == DW_SECT_INFO
495                  ? ArrayRef(E.getContributions(), Index.getColumnKinds().size())
496                  : ArrayRef(E.getContribution(), 1))) {
497       const DWARFUnitIndex::Entry::SectionContribution &SC = E.value();
498       int Col = E.index();
499       if (SC.getLength() == 0)
500         continue;
501       if (!Sections[Col])
502         Sections[Col] = std::make_unique<MapType>(Alloc);
503       auto &M = *Sections[Col];
504       auto I = M.find(SC.getOffset());
505       if (I != M.end() && I.start() < (SC.getOffset() + SC.getLength())) {
506         StringRef Category = InfoColumnKind == DWARFSectionKind::DW_SECT_INFO
507                                  ? "Overlapping CU index entries"
508                                  : "Overlapping TU index entries";
509         ErrorCategory.Report(Category, [&]() {
510           error() << llvm::formatv(
511               "overlapping index entries for entries {0:x16} "
512               "and {1:x16} for column {2}\n",
513               *I, Sig, toString(Index.getColumnKinds()[Col]));
514         });
515         return 1;
516       }
517       M.insert(SC.getOffset(), SC.getOffset() + SC.getLength() - 1, Sig);
518     }
519   }
520 
521   return 0;
522 }
523 
handleDebugCUIndex()524 bool DWARFVerifier::handleDebugCUIndex() {
525   return verifyIndex(".debug_cu_index", DWARFSectionKind::DW_SECT_INFO,
526                      DCtx.getDWARFObj().getCUIndexSection()) == 0;
527 }
528 
handleDebugTUIndex()529 bool DWARFVerifier::handleDebugTUIndex() {
530   return verifyIndex(".debug_tu_index", DWARFSectionKind::DW_SECT_EXT_TYPES,
531                      DCtx.getDWARFObj().getTUIndexSection()) == 0;
532 }
533 
handleDebugInfo()534 bool DWARFVerifier::handleDebugInfo() {
535   const DWARFObject &DObj = DCtx.getDWARFObj();
536   unsigned NumErrors = 0;
537 
538   OS << "Verifying .debug_info Unit Header Chain...\n";
539   DObj.forEachInfoSections([&](const DWARFSection &S) {
540     NumErrors += verifyUnitSection(S);
541   });
542 
543   OS << "Verifying .debug_types Unit Header Chain...\n";
544   DObj.forEachTypesSections([&](const DWARFSection &S) {
545     NumErrors += verifyUnitSection(S);
546   });
547 
548   OS << "Verifying non-dwo Units...\n";
549   NumErrors += verifyUnits(DCtx.getNormalUnitsVector());
550 
551   OS << "Verifying dwo Units...\n";
552   NumErrors += verifyUnits(DCtx.getDWOUnitsVector());
553   return NumErrors == 0;
554 }
555 
verifyDieRanges(const DWARFDie & Die,DieRangeInfo & ParentRI)556 unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
557                                         DieRangeInfo &ParentRI) {
558   unsigned NumErrors = 0;
559 
560   if (!Die.isValid())
561     return NumErrors;
562 
563   DWARFUnit *Unit = Die.getDwarfUnit();
564 
565   auto RangesOrError = Die.getAddressRanges();
566   if (!RangesOrError) {
567     // FIXME: Report the error.
568     if (!Unit->isDWOUnit())
569       ++NumErrors;
570     llvm::consumeError(RangesOrError.takeError());
571     return NumErrors;
572   }
573 
574   const DWARFAddressRangesVector &Ranges = RangesOrError.get();
575   // Build RI for this DIE and check that ranges within this DIE do not
576   // overlap.
577   DieRangeInfo RI(Die);
578 
579   // TODO support object files better
580   //
581   // Some object file formats (i.e. non-MachO) support COMDAT.  ELF in
582   // particular does so by placing each function into a section.  The DWARF data
583   // for the function at that point uses a section relative DW_FORM_addrp for
584   // the DW_AT_low_pc and a DW_FORM_data4 for the offset as the DW_AT_high_pc.
585   // In such a case, when the Die is the CU, the ranges will overlap, and we
586   // will flag valid conflicting ranges as invalid.
587   //
588   // For such targets, we should read the ranges from the CU and partition them
589   // by the section id.  The ranges within a particular section should be
590   // disjoint, although the ranges across sections may overlap.  We would map
591   // the child die to the entity that it references and the section with which
592   // it is associated.  The child would then be checked against the range
593   // information for the associated section.
594   //
595   // For now, simply elide the range verification for the CU DIEs if we are
596   // processing an object file.
597 
598   if (!IsObjectFile || IsMachOObject || Die.getTag() != DW_TAG_compile_unit) {
599     bool DumpDieAfterError = false;
600     for (const auto &Range : Ranges) {
601       if (!Range.valid()) {
602         ++NumErrors;
603         ErrorCategory.Report("Invalid address range", [&]() {
604           error() << "Invalid address range " << Range << "\n";
605           DumpDieAfterError = true;
606         });
607         continue;
608       }
609 
610       // Verify that ranges don't intersect and also build up the DieRangeInfo
611       // address ranges. Don't break out of the loop below early, or we will
612       // think this DIE doesn't have all of the address ranges it is supposed
613       // to have. Compile units often have DW_AT_ranges that can contain one or
614       // more dead stripped address ranges which tend to all be at the same
615       // address: 0 or -1.
616       if (auto PrevRange = RI.insert(Range)) {
617         ++NumErrors;
618         ErrorCategory.Report("DIE has overlapping DW_AT_ranges", [&]() {
619           error() << "DIE has overlapping ranges in DW_AT_ranges attribute: "
620                   << *PrevRange << " and " << Range << '\n';
621           DumpDieAfterError = true;
622         });
623       }
624     }
625     if (DumpDieAfterError)
626       dump(Die, 2) << '\n';
627   }
628 
629   // Verify that children don't intersect.
630   const auto IntersectingChild = ParentRI.insert(RI);
631   if (IntersectingChild != ParentRI.Children.end()) {
632     ++NumErrors;
633     ErrorCategory.Report("DIEs have overlapping address ranges", [&]() {
634       error() << "DIEs have overlapping address ranges:";
635       dump(Die);
636       dump(IntersectingChild->Die) << '\n';
637     });
638   }
639 
640   // Verify that ranges are contained within their parent.
641   bool ShouldBeContained = !RI.Ranges.empty() && !ParentRI.Ranges.empty() &&
642                            !(Die.getTag() == DW_TAG_subprogram &&
643                              ParentRI.Die.getTag() == DW_TAG_subprogram);
644   if (ShouldBeContained && !ParentRI.contains(RI)) {
645     ++NumErrors;
646     ErrorCategory.Report(
647         "DIE address ranges are not contained by parent ranges", [&]() {
648           error()
649               << "DIE address ranges are not contained in its parent's ranges:";
650           dump(ParentRI.Die);
651           dump(Die, 2) << '\n';
652         });
653   }
654 
655   // Recursively check children.
656   for (DWARFDie Child : Die)
657     NumErrors += verifyDieRanges(Child, RI);
658 
659   return NumErrors;
660 }
661 
verifyExpressionOp(const DWARFExpression::Operation & Op,DWARFUnit * U)662 bool DWARFVerifier::verifyExpressionOp(const DWARFExpression::Operation &Op,
663                                        DWARFUnit *U) {
664   for (unsigned Operand = 0; Operand < Op.Desc.Op.size(); ++Operand) {
665     unsigned Size = Op.Desc.Op[Operand];
666 
667     if (Size == DWARFExpression::Operation::BaseTypeRef) {
668       // For DW_OP_convert the operand may be 0 to indicate that conversion to
669       // the generic type should be done, so don't look up a base type in that
670       // case. The same holds for DW_OP_reinterpret, which is currently not
671       // supported.
672       if (Op.Opcode == DW_OP_convert && Op.Operands[Operand] == 0)
673         continue;
674       auto Die = U->getDIEForOffset(U->getOffset() + Op.Operands[Operand]);
675       if (!Die || Die.getTag() != dwarf::DW_TAG_base_type)
676         return false;
677     }
678   }
679 
680   return true;
681 }
682 
verifyExpression(const DWARFExpression & E,DWARFUnit * U)683 bool DWARFVerifier::verifyExpression(const DWARFExpression &E, DWARFUnit *U) {
684   for (auto &Op : E)
685     if (!verifyExpressionOp(Op, U))
686       return false;
687 
688   return true;
689 }
690 
verifyDebugInfoAttribute(const DWARFDie & Die,DWARFAttribute & AttrValue)691 unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
692                                                  DWARFAttribute &AttrValue) {
693   unsigned NumErrors = 0;
694   auto ReportError = [&](StringRef category, const Twine &TitleMsg) {
695     ++NumErrors;
696     ErrorCategory.Report(category, [&]() {
697       error() << TitleMsg << '\n';
698       dump(Die) << '\n';
699     });
700   };
701 
702   const DWARFObject &DObj = DCtx.getDWARFObj();
703   DWARFUnit *U = Die.getDwarfUnit();
704   const auto Attr = AttrValue.Attr;
705   switch (Attr) {
706   case DW_AT_ranges:
707     // Make sure the offset in the DW_AT_ranges attribute is valid.
708     if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
709       unsigned DwarfVersion = U->getVersion();
710       const DWARFSection &RangeSection = DwarfVersion < 5
711                                              ? DObj.getRangesSection()
712                                              : DObj.getRnglistsSection();
713       if (U->isDWOUnit() && RangeSection.Data.empty())
714         break;
715       if (*SectionOffset >= RangeSection.Data.size())
716         ReportError("DW_AT_ranges offset out of bounds",
717                     "DW_AT_ranges offset is beyond " +
718                         StringRef(DwarfVersion < 5 ? ".debug_ranges"
719                                                    : ".debug_rnglists") +
720                         " bounds: " + llvm::formatv("{0:x8}", *SectionOffset));
721       break;
722     }
723     ReportError("Invalid DW_AT_ranges encoding",
724                 "DIE has invalid DW_AT_ranges encoding:");
725     break;
726   case DW_AT_stmt_list:
727     // Make sure the offset in the DW_AT_stmt_list attribute is valid.
728     if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
729       if (*SectionOffset >= U->getLineSection().Data.size())
730         ReportError("DW_AT_stmt_list offset out of bounds",
731                     "DW_AT_stmt_list offset is beyond .debug_line bounds: " +
732                         llvm::formatv("{0:x8}", *SectionOffset));
733       break;
734     }
735     ReportError("Invalid DW_AT_stmt_list encoding",
736                 "DIE has invalid DW_AT_stmt_list encoding:");
737     break;
738   case DW_AT_location: {
739     // FIXME: It might be nice if there's a way to walk location expressions
740     // without trying to resolve the address ranges - it'd be a more efficient
741     // API (since the API is currently unnecessarily resolving addresses for
742     // this use case which only wants to validate the expressions themselves) &
743     // then the expressions could be validated even if the addresses can't be
744     // resolved.
745     // That sort of API would probably look like a callback "for each
746     // expression" with some way to lazily resolve the address ranges when
747     // needed (& then the existing API used here could be built on top of that -
748     // using the callback API to build the data structure and return it).
749     if (Expected<std::vector<DWARFLocationExpression>> Loc =
750             Die.getLocations(DW_AT_location)) {
751       for (const auto &Entry : *Loc) {
752         DataExtractor Data(toStringRef(Entry.Expr), DCtx.isLittleEndian(), 0);
753         DWARFExpression Expression(Data, U->getAddressByteSize(),
754                                    U->getFormParams().Format);
755         bool Error =
756             any_of(Expression, [](const DWARFExpression::Operation &Op) {
757               return Op.isError();
758             });
759         if (Error || !verifyExpression(Expression, U))
760           ReportError("Invalid DWARF expressions",
761                       "DIE contains invalid DWARF expression:");
762       }
763     } else if (Error Err = handleErrors(
764                    Loc.takeError(), [&](std::unique_ptr<ResolverError> E) {
765                      return U->isDWOUnit() ? Error::success()
766                                            : Error(std::move(E));
767                    }))
768       ReportError("Invalid DW_AT_location", toString(std::move(Err)));
769     break;
770   }
771   case DW_AT_specification:
772   case DW_AT_abstract_origin: {
773     if (auto ReferencedDie = Die.getAttributeValueAsReferencedDie(Attr)) {
774       auto DieTag = Die.getTag();
775       auto RefTag = ReferencedDie.getTag();
776       if (DieTag == RefTag)
777         break;
778       if (DieTag == DW_TAG_inlined_subroutine && RefTag == DW_TAG_subprogram)
779         break;
780       if (DieTag == DW_TAG_variable && RefTag == DW_TAG_member)
781         break;
782       // This might be reference to a function declaration.
783       if (DieTag == DW_TAG_GNU_call_site && RefTag == DW_TAG_subprogram)
784         break;
785       ReportError("Incompatible DW_AT_abstract_origin tag reference",
786                   "DIE with tag " + TagString(DieTag) + " has " +
787                       AttributeString(Attr) +
788                       " that points to DIE with "
789                       "incompatible tag " +
790                       TagString(RefTag));
791     }
792     break;
793   }
794   case DW_AT_type: {
795     DWARFDie TypeDie = Die.getAttributeValueAsReferencedDie(DW_AT_type);
796     if (TypeDie && !isType(TypeDie.getTag())) {
797       ReportError("Incompatible DW_AT_type attribute tag",
798                   "DIE has " + AttributeString(Attr) +
799                       " with incompatible tag " + TagString(TypeDie.getTag()));
800     }
801     break;
802   }
803   case DW_AT_call_file:
804   case DW_AT_decl_file: {
805     if (auto FileIdx = AttrValue.Value.getAsUnsignedConstant()) {
806       if (U->isDWOUnit() && !U->isTypeUnit())
807         break;
808       const auto *LT = U->getContext().getLineTableForUnit(U);
809       if (LT) {
810         if (!LT->hasFileAtIndex(*FileIdx)) {
811           bool IsZeroIndexed = LT->Prologue.getVersion() >= 5;
812           if (std::optional<uint64_t> LastFileIdx =
813                   LT->getLastValidFileIndex()) {
814             ReportError("Invalid file index in DW_AT_decl_file",
815                         "DIE has " + AttributeString(Attr) +
816                             " with an invalid file index " +
817                             llvm::formatv("{0}", *FileIdx) +
818                             " (valid values are [" +
819                             (IsZeroIndexed ? "0-" : "1-") +
820                             llvm::formatv("{0}", *LastFileIdx) + "])");
821           } else {
822             ReportError("Invalid file index in DW_AT_decl_file",
823                         "DIE has " + AttributeString(Attr) +
824                             " with an invalid file index " +
825                             llvm::formatv("{0}", *FileIdx) +
826                             " (the file table in the prologue is empty)");
827           }
828         }
829       } else {
830         ReportError(
831             "File index in DW_AT_decl_file reference CU with no line table",
832             "DIE has " + AttributeString(Attr) +
833                 " that references a file with index " +
834                 llvm::formatv("{0}", *FileIdx) +
835                 " and the compile unit has no line table");
836       }
837     } else {
838       ReportError("Invalid encoding in DW_AT_decl_file",
839                   "DIE has " + AttributeString(Attr) +
840                       " with invalid encoding");
841     }
842     break;
843   }
844   case DW_AT_call_line:
845   case DW_AT_decl_line: {
846     if (!AttrValue.Value.getAsUnsignedConstant()) {
847       ReportError(
848           Attr == DW_AT_call_line ? "Invalid file index in DW_AT_decl_line"
849                                   : "Invalid file index in DW_AT_call_line",
850           "DIE has " + AttributeString(Attr) + " with invalid encoding");
851     }
852     break;
853   }
854   default:
855     break;
856   }
857   return NumErrors;
858 }
859 
verifyDebugInfoForm(const DWARFDie & Die,DWARFAttribute & AttrValue,ReferenceMap & LocalReferences,ReferenceMap & CrossUnitReferences)860 unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
861                                             DWARFAttribute &AttrValue,
862                                             ReferenceMap &LocalReferences,
863                                             ReferenceMap &CrossUnitReferences) {
864   auto DieCU = Die.getDwarfUnit();
865   unsigned NumErrors = 0;
866   const auto Form = AttrValue.Value.getForm();
867   switch (Form) {
868   case DW_FORM_ref1:
869   case DW_FORM_ref2:
870   case DW_FORM_ref4:
871   case DW_FORM_ref8:
872   case DW_FORM_ref_udata: {
873     // Verify all CU relative references are valid CU offsets.
874     std::optional<uint64_t> RefVal = AttrValue.Value.getAsRelativeReference();
875     assert(RefVal);
876     if (RefVal) {
877       auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset();
878       auto CUOffset = AttrValue.Value.getRawUValue();
879       if (CUOffset >= CUSize) {
880         ++NumErrors;
881         ErrorCategory.Report("Invalid CU offset", [&]() {
882           error() << FormEncodingString(Form) << " CU offset "
883                   << format("0x%08" PRIx64, CUOffset)
884                   << " is invalid (must be less than CU size of "
885                   << format("0x%08" PRIx64, CUSize) << "):\n";
886           Die.dump(OS, 0, DumpOpts);
887           dump(Die) << '\n';
888         });
889       } else {
890         // Valid reference, but we will verify it points to an actual
891         // DIE later.
892         LocalReferences[AttrValue.Value.getUnit()->getOffset() + *RefVal]
893             .insert(Die.getOffset());
894       }
895     }
896     break;
897   }
898   case DW_FORM_ref_addr: {
899     // Verify all absolute DIE references have valid offsets in the
900     // .debug_info section.
901     std::optional<uint64_t> RefVal = AttrValue.Value.getAsDebugInfoReference();
902     assert(RefVal);
903     if (RefVal) {
904       if (*RefVal >= DieCU->getInfoSection().Data.size()) {
905         ++NumErrors;
906         ErrorCategory.Report("DW_FORM_ref_addr offset out of bounds", [&]() {
907           error() << "DW_FORM_ref_addr offset beyond .debug_info "
908                      "bounds:\n";
909           dump(Die) << '\n';
910         });
911       } else {
912         // Valid reference, but we will verify it points to an actual
913         // DIE later.
914         CrossUnitReferences[*RefVal].insert(Die.getOffset());
915       }
916     }
917     break;
918   }
919   case DW_FORM_strp:
920   case DW_FORM_strx:
921   case DW_FORM_strx1:
922   case DW_FORM_strx2:
923   case DW_FORM_strx3:
924   case DW_FORM_strx4:
925   case DW_FORM_line_strp: {
926     if (Error E = AttrValue.Value.getAsCString().takeError()) {
927       ++NumErrors;
928       std::string ErrMsg = toString(std::move(E));
929       ErrorCategory.Report("Invalid DW_FORM attribute", [&]() {
930         error() << ErrMsg << ":\n";
931         dump(Die) << '\n';
932       });
933     }
934     break;
935   }
936   default:
937     break;
938   }
939   return NumErrors;
940 }
941 
verifyDebugInfoReferences(const ReferenceMap & References,llvm::function_ref<DWARFUnit * (uint64_t)> GetUnitForOffset)942 unsigned DWARFVerifier::verifyDebugInfoReferences(
943     const ReferenceMap &References,
944     llvm::function_ref<DWARFUnit *(uint64_t)> GetUnitForOffset) {
945   auto GetDIEForOffset = [&](uint64_t Offset) {
946     if (DWARFUnit *U = GetUnitForOffset(Offset))
947       return U->getDIEForOffset(Offset);
948     return DWARFDie();
949   };
950   unsigned NumErrors = 0;
951   for (const std::pair<const uint64_t, std::set<uint64_t>> &Pair :
952        References) {
953     if (GetDIEForOffset(Pair.first))
954       continue;
955     ++NumErrors;
956     ErrorCategory.Report("Invalid DIE reference", [&]() {
957       error() << "invalid DIE reference " << format("0x%08" PRIx64, Pair.first)
958               << ". Offset is in between DIEs:\n";
959       for (auto Offset : Pair.second)
960         dump(GetDIEForOffset(Offset)) << '\n';
961       OS << "\n";
962     });
963   }
964   return NumErrors;
965 }
966 
verifyDebugLineStmtOffsets()967 void DWARFVerifier::verifyDebugLineStmtOffsets() {
968   std::map<uint64_t, DWARFDie> StmtListToDie;
969   for (const auto &CU : DCtx.compile_units()) {
970     auto Die = CU->getUnitDIE();
971     // Get the attribute value as a section offset. No need to produce an
972     // error here if the encoding isn't correct because we validate this in
973     // the .debug_info verifier.
974     auto StmtSectionOffset = toSectionOffset(Die.find(DW_AT_stmt_list));
975     if (!StmtSectionOffset)
976       continue;
977     const uint64_t LineTableOffset = *StmtSectionOffset;
978     auto LineTable = DCtx.getLineTableForUnit(CU.get());
979     if (LineTableOffset < DCtx.getDWARFObj().getLineSection().Data.size()) {
980       if (!LineTable) {
981         ++NumDebugLineErrors;
982         ErrorCategory.Report("Unparsable .debug_line entry", [&]() {
983           error() << ".debug_line[" << format("0x%08" PRIx64, LineTableOffset)
984                   << "] was not able to be parsed for CU:\n";
985           dump(Die) << '\n';
986         });
987         continue;
988       }
989     } else {
990       // Make sure we don't get a valid line table back if the offset is wrong.
991       assert(LineTable == nullptr);
992       // Skip this line table as it isn't valid. No need to create an error
993       // here because we validate this in the .debug_info verifier.
994       continue;
995     }
996     auto [Iter, Inserted] = StmtListToDie.try_emplace(LineTableOffset, Die);
997     if (!Inserted) {
998       ++NumDebugLineErrors;
999       const auto &OldDie = Iter->second;
1000       ErrorCategory.Report("Identical DW_AT_stmt_list section offset", [&]() {
1001         error() << "two compile unit DIEs, "
1002                 << format("0x%08" PRIx64, OldDie.getOffset()) << " and "
1003                 << format("0x%08" PRIx64, Die.getOffset())
1004                 << ", have the same DW_AT_stmt_list section offset:\n";
1005         dump(OldDie);
1006         dump(Die) << '\n';
1007       });
1008       // Already verified this line table before, no need to do it again.
1009     }
1010   }
1011 }
1012 
verifyDebugLineRows()1013 void DWARFVerifier::verifyDebugLineRows() {
1014   for (const auto &CU : DCtx.compile_units()) {
1015     auto Die = CU->getUnitDIE();
1016     auto LineTable = DCtx.getLineTableForUnit(CU.get());
1017     // If there is no line table we will have created an error in the
1018     // .debug_info verifier or in verifyDebugLineStmtOffsets().
1019     if (!LineTable)
1020       continue;
1021 
1022     // Verify prologue.
1023     bool isDWARF5 = LineTable->Prologue.getVersion() >= 5;
1024     uint32_t MaxDirIndex = LineTable->Prologue.IncludeDirectories.size();
1025     uint32_t MinFileIndex = isDWARF5 ? 0 : 1;
1026     uint32_t FileIndex = MinFileIndex;
1027     StringMap<uint16_t> FullPathMap;
1028     for (const auto &FileName : LineTable->Prologue.FileNames) {
1029       // Verify directory index.
1030       if (FileName.DirIdx > MaxDirIndex) {
1031         ++NumDebugLineErrors;
1032         ErrorCategory.Report(
1033             "Invalid index in .debug_line->prologue.file_names->dir_idx",
1034             [&]() {
1035               error() << ".debug_line["
1036                       << format("0x%08" PRIx64,
1037                                 *toSectionOffset(Die.find(DW_AT_stmt_list)))
1038                       << "].prologue.file_names[" << FileIndex
1039                       << "].dir_idx contains an invalid index: "
1040                       << FileName.DirIdx << "\n";
1041             });
1042       }
1043 
1044       // Check file paths for duplicates.
1045       std::string FullPath;
1046       const bool HasFullPath = LineTable->getFileNameByIndex(
1047           FileIndex, CU->getCompilationDir(),
1048           DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FullPath);
1049       assert(HasFullPath && "Invalid index?");
1050       (void)HasFullPath;
1051       auto [It, Inserted] = FullPathMap.try_emplace(FullPath, FileIndex);
1052       if (!Inserted && It->second != FileIndex && DumpOpts.Verbose) {
1053         warn() << ".debug_line["
1054                << format("0x%08" PRIx64,
1055                          *toSectionOffset(Die.find(DW_AT_stmt_list)))
1056                << "].prologue.file_names[" << FileIndex
1057                << "] is a duplicate of file_names[" << It->second << "]\n";
1058       }
1059 
1060       FileIndex++;
1061     }
1062 
1063     // Nothing to verify in a line table with a single row containing the end
1064     // sequence.
1065     if (LineTable->Rows.size() == 1 && LineTable->Rows.front().EndSequence)
1066       continue;
1067 
1068     // Verify rows.
1069     uint64_t PrevAddress = 0;
1070     uint32_t RowIndex = 0;
1071     for (const auto &Row : LineTable->Rows) {
1072       // Verify row address.
1073       if (Row.Address.Address < PrevAddress) {
1074         ++NumDebugLineErrors;
1075         ErrorCategory.Report(
1076             "decreasing address between debug_line rows", [&]() {
1077               error() << ".debug_line["
1078                       << format("0x%08" PRIx64,
1079                                 *toSectionOffset(Die.find(DW_AT_stmt_list)))
1080                       << "] row[" << RowIndex
1081                       << "] decreases in address from previous row:\n";
1082 
1083               DWARFDebugLine::Row::dumpTableHeader(OS, 0);
1084               if (RowIndex > 0)
1085                 LineTable->Rows[RowIndex - 1].dump(OS);
1086               Row.dump(OS);
1087               OS << '\n';
1088             });
1089       }
1090 
1091       if (!LineTable->hasFileAtIndex(Row.File)) {
1092         ++NumDebugLineErrors;
1093         ErrorCategory.Report("Invalid file index in debug_line", [&]() {
1094           error() << ".debug_line["
1095                   << format("0x%08" PRIx64,
1096                             *toSectionOffset(Die.find(DW_AT_stmt_list)))
1097                   << "][" << RowIndex << "] has invalid file index " << Row.File
1098                   << " (valid values are [" << MinFileIndex << ','
1099                   << LineTable->Prologue.FileNames.size()
1100                   << (isDWARF5 ? ")" : "]") << "):\n";
1101           DWARFDebugLine::Row::dumpTableHeader(OS, 0);
1102           Row.dump(OS);
1103           OS << '\n';
1104         });
1105       }
1106       if (Row.EndSequence)
1107         PrevAddress = 0;
1108       else
1109         PrevAddress = Row.Address.Address;
1110       ++RowIndex;
1111     }
1112   }
1113 }
1114 
DWARFVerifier(raw_ostream & S,DWARFContext & D,DIDumpOptions DumpOpts)1115 DWARFVerifier::DWARFVerifier(raw_ostream &S, DWARFContext &D,
1116                              DIDumpOptions DumpOpts)
1117     : OS(S), DCtx(D), DumpOpts(std::move(DumpOpts)), IsObjectFile(false),
1118       IsMachOObject(false) {
1119   ErrorCategory.ShowDetail(this->DumpOpts.Verbose ||
1120                            !this->DumpOpts.ShowAggregateErrors);
1121   if (const auto *F = DCtx.getDWARFObj().getFile()) {
1122     IsObjectFile = F->isRelocatableObject();
1123     IsMachOObject = F->isMachO();
1124   }
1125 }
1126 
handleDebugLine()1127 bool DWARFVerifier::handleDebugLine() {
1128   NumDebugLineErrors = 0;
1129   OS << "Verifying .debug_line...\n";
1130   verifyDebugLineStmtOffsets();
1131   verifyDebugLineRows();
1132   return NumDebugLineErrors == 0;
1133 }
1134 
verifyAppleAccelTable(const DWARFSection * AccelSection,DataExtractor * StrData,const char * SectionName)1135 void DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection,
1136                                           DataExtractor *StrData,
1137                                           const char *SectionName) {
1138   DWARFDataExtractor AccelSectionData(DCtx.getDWARFObj(), *AccelSection,
1139                                       DCtx.isLittleEndian(), 0);
1140   AppleAcceleratorTable AccelTable(AccelSectionData, *StrData);
1141 
1142   OS << "Verifying " << SectionName << "...\n";
1143 
1144   // Verify that the fixed part of the header is not too short.
1145   if (!AccelSectionData.isValidOffset(AccelTable.getSizeHdr())) {
1146     ErrorCategory.Report("Section is too small to fit a section header", [&]() {
1147       error() << "Section is too small to fit a section header.\n";
1148     });
1149     return;
1150   }
1151 
1152   // Verify that the section is not too short.
1153   if (Error E = AccelTable.extract()) {
1154     std::string Msg = toString(std::move(E));
1155     ErrorCategory.Report("Section is too small to fit a section header",
1156                          [&]() { error() << Msg << '\n'; });
1157     return;
1158   }
1159 
1160   // Verify that all buckets have a valid hash index or are empty.
1161   uint32_t NumBuckets = AccelTable.getNumBuckets();
1162   uint32_t NumHashes = AccelTable.getNumHashes();
1163 
1164   uint64_t BucketsOffset =
1165       AccelTable.getSizeHdr() + AccelTable.getHeaderDataLength();
1166   uint64_t HashesBase = BucketsOffset + NumBuckets * 4;
1167   uint64_t OffsetsBase = HashesBase + NumHashes * 4;
1168   for (uint32_t BucketIdx = 0; BucketIdx < NumBuckets; ++BucketIdx) {
1169     uint32_t HashIdx = AccelSectionData.getU32(&BucketsOffset);
1170     if (HashIdx >= NumHashes && HashIdx != UINT32_MAX) {
1171       ErrorCategory.Report("Invalid hash index", [&]() {
1172         error() << format("Bucket[%d] has invalid hash index: %u.\n", BucketIdx,
1173                           HashIdx);
1174       });
1175     }
1176   }
1177   uint32_t NumAtoms = AccelTable.getAtomsDesc().size();
1178   if (NumAtoms == 0) {
1179     ErrorCategory.Report("No atoms", [&]() {
1180       error() << "No atoms: failed to read HashData.\n";
1181     });
1182     return;
1183   }
1184   if (!AccelTable.validateForms()) {
1185     ErrorCategory.Report("Unsupported form", [&]() {
1186       error() << "Unsupported form: failed to read HashData.\n";
1187     });
1188     return;
1189   }
1190 
1191   for (uint32_t HashIdx = 0; HashIdx < NumHashes; ++HashIdx) {
1192     uint64_t HashOffset = HashesBase + 4 * HashIdx;
1193     uint64_t DataOffset = OffsetsBase + 4 * HashIdx;
1194     uint32_t Hash = AccelSectionData.getU32(&HashOffset);
1195     uint64_t HashDataOffset = AccelSectionData.getU32(&DataOffset);
1196     if (!AccelSectionData.isValidOffsetForDataOfSize(HashDataOffset,
1197                                                      sizeof(uint64_t))) {
1198       ErrorCategory.Report("Invalid HashData offset", [&]() {
1199         error() << format("Hash[%d] has invalid HashData offset: "
1200                           "0x%08" PRIx64 ".\n",
1201                           HashIdx, HashDataOffset);
1202       });
1203     }
1204 
1205     uint64_t StrpOffset;
1206     uint64_t StringOffset;
1207     uint32_t StringCount = 0;
1208     uint64_t Offset;
1209     unsigned Tag;
1210     while ((StrpOffset = AccelSectionData.getU32(&HashDataOffset)) != 0) {
1211       const uint32_t NumHashDataObjects =
1212           AccelSectionData.getU32(&HashDataOffset);
1213       for (uint32_t HashDataIdx = 0; HashDataIdx < NumHashDataObjects;
1214            ++HashDataIdx) {
1215         std::tie(Offset, Tag) = AccelTable.readAtoms(&HashDataOffset);
1216         auto Die = DCtx.getDIEForOffset(Offset);
1217         if (!Die) {
1218           const uint32_t BucketIdx =
1219               NumBuckets ? (Hash % NumBuckets) : UINT32_MAX;
1220           StringOffset = StrpOffset;
1221           const char *Name = StrData->getCStr(&StringOffset);
1222           if (!Name)
1223             Name = "<NULL>";
1224 
1225           ErrorCategory.Report("Invalid DIE offset", [&]() {
1226             error() << format(
1227                 "%s Bucket[%d] Hash[%d] = 0x%08x "
1228                 "Str[%u] = 0x%08" PRIx64 " DIE[%d] = 0x%08" PRIx64 " "
1229                 "is not a valid DIE offset for \"%s\".\n",
1230                 SectionName, BucketIdx, HashIdx, Hash, StringCount, StrpOffset,
1231                 HashDataIdx, Offset, Name);
1232           });
1233           continue;
1234         }
1235         if ((Tag != dwarf::DW_TAG_null) && (Die.getTag() != Tag)) {
1236           ErrorCategory.Report("Mismatched Tag in accellerator table", [&]() {
1237             error() << "Tag " << dwarf::TagString(Tag)
1238                     << " in accelerator table does not match Tag "
1239                     << dwarf::TagString(Die.getTag()) << " of DIE["
1240                     << HashDataIdx << "].\n";
1241           });
1242         }
1243       }
1244     }
1245   }
1246 }
1247 
verifyDebugNamesCULists(const DWARFDebugNames & AccelTable)1248 void DWARFVerifier::verifyDebugNamesCULists(const DWARFDebugNames &AccelTable) {
1249   // A map from CU offset to the (first) Name Index offset which claims to index
1250   // this CU.
1251   DenseMap<uint64_t, uint64_t> CUMap;
1252   CUMap.reserve(DCtx.getNumCompileUnits());
1253 
1254   DenseSet<uint64_t> CUOffsets;
1255   for (const auto &CU : DCtx.compile_units())
1256     CUOffsets.insert(CU->getOffset());
1257 
1258   parallelForEach(AccelTable, [&](const DWARFDebugNames::NameIndex &NI) {
1259     if (NI.getCUCount() == 0) {
1260       ErrorCategory.Report("Name Index doesn't index any CU", [&]() {
1261         error() << formatv("Name Index @ {0:x} does not index any CU\n",
1262                            NI.getUnitOffset());
1263       });
1264       return;
1265     }
1266     for (uint32_t CU = 0, End = NI.getCUCount(); CU < End; ++CU) {
1267       uint64_t Offset = NI.getCUOffset(CU);
1268       if (!CUOffsets.count(Offset)) {
1269         ErrorCategory.Report("Name Index references non-existing CU", [&]() {
1270           error() << formatv(
1271               "Name Index @ {0:x} references a non-existing CU @ {1:x}\n",
1272               NI.getUnitOffset(), Offset);
1273         });
1274         continue;
1275       }
1276       uint64_t DuplicateCUOffset = 0;
1277       {
1278         std::lock_guard<std::mutex> Lock(AccessMutex);
1279         auto Iter = CUMap.find(Offset);
1280         if (Iter != CUMap.end())
1281           DuplicateCUOffset = Iter->second;
1282         else
1283           CUMap[Offset] = NI.getUnitOffset();
1284       }
1285       if (DuplicateCUOffset) {
1286         ErrorCategory.Report("Duplicate Name Index", [&]() {
1287           error() << formatv(
1288               "Name Index @ {0:x} references a CU @ {1:x}, but "
1289               "this CU is already indexed by Name Index @ {2:x}\n",
1290               NI.getUnitOffset(), Offset, DuplicateCUOffset);
1291         });
1292         continue;
1293       }
1294     }
1295   });
1296 
1297   for (const auto &CU : DCtx.compile_units()) {
1298     if (CUMap.count(CU->getOffset()) == 0)
1299       warn() << formatv("CU @ {0:x} not covered by any Name Index\n",
1300                         CU->getOffset());
1301   }
1302 }
1303 
verifyNameIndexBuckets(const DWARFDebugNames::NameIndex & NI,const DataExtractor & StrData)1304 void DWARFVerifier::verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
1305                                            const DataExtractor &StrData) {
1306   struct BucketInfo {
1307     uint32_t Bucket;
1308     uint32_t Index;
1309 
1310     constexpr BucketInfo(uint32_t Bucket, uint32_t Index)
1311         : Bucket(Bucket), Index(Index) {}
1312     bool operator<(const BucketInfo &RHS) const { return Index < RHS.Index; }
1313   };
1314 
1315   if (NI.getBucketCount() == 0) {
1316     warn() << formatv("Name Index @ {0:x} does not contain a hash table.\n",
1317                       NI.getUnitOffset());
1318     return;
1319   }
1320 
1321   // Build up a list of (Bucket, Index) pairs. We use this later to verify that
1322   // each Name is reachable from the appropriate bucket.
1323   std::vector<BucketInfo> BucketStarts;
1324   BucketStarts.reserve(NI.getBucketCount() + 1);
1325   const uint64_t OrigNumberOfErrors = ErrorCategory.GetNumErrors();
1326   for (uint32_t Bucket = 0, End = NI.getBucketCount(); Bucket < End; ++Bucket) {
1327     uint32_t Index = NI.getBucketArrayEntry(Bucket);
1328     if (Index > NI.getNameCount()) {
1329       ErrorCategory.Report("Name Index Bucket contains invalid value", [&]() {
1330         error() << formatv("Bucket {0} of Name Index @ {1:x} contains invalid "
1331                            "value {2}. Valid range is [0, {3}].\n",
1332                            Bucket, NI.getUnitOffset(), Index,
1333                            NI.getNameCount());
1334       });
1335       continue;
1336     }
1337     if (Index > 0)
1338       BucketStarts.emplace_back(Bucket, Index);
1339   }
1340 
1341   // If there were any buckets with invalid values, skip further checks as they
1342   // will likely produce many errors which will only confuse the actual root
1343   // problem.
1344   if (OrigNumberOfErrors != ErrorCategory.GetNumErrors())
1345     return;
1346 
1347   // Sort the list in the order of increasing "Index" entries.
1348   array_pod_sort(BucketStarts.begin(), BucketStarts.end());
1349 
1350   // Insert a sentinel entry at the end, so we can check that the end of the
1351   // table is covered in the loop below.
1352   BucketStarts.emplace_back(NI.getBucketCount(), NI.getNameCount() + 1);
1353 
1354   // Loop invariant: NextUncovered is the (1-based) index of the first Name
1355   // which is not reachable by any of the buckets we processed so far (and
1356   // hasn't been reported as uncovered).
1357   uint32_t NextUncovered = 1;
1358   for (const BucketInfo &B : BucketStarts) {
1359     // Under normal circumstances B.Index be equal to NextUncovered, but it can
1360     // be less if a bucket points to names which are already known to be in some
1361     // bucket we processed earlier. In that case, we won't trigger this error,
1362     // but report the mismatched hash value error instead. (We know the hash
1363     // will not match because we have already verified that the name's hash
1364     // puts it into the previous bucket.)
1365     if (B.Index > NextUncovered) {
1366       ErrorCategory.Report("Name table entries uncovered by hash table", [&]() {
1367         error() << formatv("Name Index @ {0:x}: Name table entries [{1}, {2}] "
1368                            "are not covered by the hash table.\n",
1369                            NI.getUnitOffset(), NextUncovered, B.Index - 1);
1370       });
1371     }
1372     uint32_t Idx = B.Index;
1373 
1374     // The rest of the checks apply only to non-sentinel entries.
1375     if (B.Bucket == NI.getBucketCount())
1376       break;
1377 
1378     // This triggers if a non-empty bucket points to a name with a mismatched
1379     // hash. Clients are likely to interpret this as an empty bucket, because a
1380     // mismatched hash signals the end of a bucket, but if this is indeed an
1381     // empty bucket, the producer should have signalled this by marking the
1382     // bucket as empty.
1383     uint32_t FirstHash = NI.getHashArrayEntry(Idx);
1384     if (FirstHash % NI.getBucketCount() != B.Bucket) {
1385       ErrorCategory.Report("Name Index point to mismatched hash value", [&]() {
1386         error() << formatv(
1387             "Name Index @ {0:x}: Bucket {1} is not empty but points to a "
1388             "mismatched hash value {2:x} (belonging to bucket {3}).\n",
1389             NI.getUnitOffset(), B.Bucket, FirstHash,
1390             FirstHash % NI.getBucketCount());
1391       });
1392     }
1393 
1394     // This find the end of this bucket and also verifies that all the hashes in
1395     // this bucket are correct by comparing the stored hashes to the ones we
1396     // compute ourselves.
1397     while (Idx <= NI.getNameCount()) {
1398       uint32_t Hash = NI.getHashArrayEntry(Idx);
1399       if (Hash % NI.getBucketCount() != B.Bucket)
1400         break;
1401 
1402       const char *Str = NI.getNameTableEntry(Idx).getString();
1403       if (caseFoldingDjbHash(Str) != Hash) {
1404         ErrorCategory.Report(
1405             "String hash doesn't match Name Index hash", [&]() {
1406               error() << formatv(
1407                   "Name Index @ {0:x}: String ({1}) at index {2} "
1408                   "hashes to {3:x}, but "
1409                   "the Name Index hash is {4:x}\n",
1410                   NI.getUnitOffset(), Str, Idx, caseFoldingDjbHash(Str), Hash);
1411             });
1412       }
1413       ++Idx;
1414     }
1415     NextUncovered = std::max(NextUncovered, Idx);
1416   }
1417 }
1418 
verifyNameIndexAttribute(const DWARFDebugNames::NameIndex & NI,const DWARFDebugNames::Abbrev & Abbr,DWARFDebugNames::AttributeEncoding AttrEnc)1419 void DWARFVerifier::verifyNameIndexAttribute(
1420     const DWARFDebugNames::NameIndex &NI, const DWARFDebugNames::Abbrev &Abbr,
1421     DWARFDebugNames::AttributeEncoding AttrEnc) {
1422   StringRef FormName = dwarf::FormEncodingString(AttrEnc.Form);
1423   if (FormName.empty()) {
1424     ErrorCategory.Report("Unknown NameIndex Abbreviation", [&]() {
1425       error() << formatv("NameIndex @ {0:x}: Abbreviation {1:x}: {2} uses an "
1426                          "unknown form: {3}.\n",
1427                          NI.getUnitOffset(), Abbr.Code, AttrEnc.Index,
1428                          AttrEnc.Form);
1429     });
1430     return;
1431   }
1432 
1433   if (AttrEnc.Index == DW_IDX_type_hash) {
1434     if (AttrEnc.Form != dwarf::DW_FORM_data8) {
1435       ErrorCategory.Report("Unexpected NameIndex Abbreviation", [&]() {
1436         error() << formatv(
1437             "NameIndex @ {0:x}: Abbreviation {1:x}: DW_IDX_type_hash "
1438             "uses an unexpected form {2} (should be {3}).\n",
1439             NI.getUnitOffset(), Abbr.Code, AttrEnc.Form, dwarf::DW_FORM_data8);
1440       });
1441       return;
1442     }
1443     return;
1444   }
1445 
1446   if (AttrEnc.Index == dwarf::DW_IDX_parent) {
1447     constexpr static auto AllowedForms = {dwarf::Form::DW_FORM_flag_present,
1448                                           dwarf::Form::DW_FORM_ref4};
1449     if (!is_contained(AllowedForms, AttrEnc.Form)) {
1450       ErrorCategory.Report("Unexpected NameIndex Abbreviation", [&]() {
1451         error() << formatv(
1452             "NameIndex @ {0:x}: Abbreviation {1:x}: DW_IDX_parent "
1453             "uses an unexpected form {2} (should be "
1454             "DW_FORM_ref4 or DW_FORM_flag_present).\n",
1455             NI.getUnitOffset(), Abbr.Code, AttrEnc.Form);
1456       });
1457       return;
1458     }
1459     return;
1460   }
1461 
1462   // A list of known index attributes and their expected form classes.
1463   // DW_IDX_type_hash is handled specially in the check above, as it has a
1464   // specific form (not just a form class) we should expect.
1465   struct FormClassTable {
1466     dwarf::Index Index;
1467     DWARFFormValue::FormClass Class;
1468     StringLiteral ClassName;
1469   };
1470   static constexpr FormClassTable Table[] = {
1471       {dwarf::DW_IDX_compile_unit, DWARFFormValue::FC_Constant, {"constant"}},
1472       {dwarf::DW_IDX_type_unit, DWARFFormValue::FC_Constant, {"constant"}},
1473       {dwarf::DW_IDX_die_offset, DWARFFormValue::FC_Reference, {"reference"}},
1474   };
1475 
1476   ArrayRef<FormClassTable> TableRef(Table);
1477   auto Iter = find_if(TableRef, [AttrEnc](const FormClassTable &T) {
1478     return T.Index == AttrEnc.Index;
1479   });
1480   if (Iter == TableRef.end()) {
1481     warn() << formatv("NameIndex @ {0:x}: Abbreviation {1:x} contains an "
1482                       "unknown index attribute: {2}.\n",
1483                       NI.getUnitOffset(), Abbr.Code, AttrEnc.Index);
1484     return;
1485   }
1486 
1487   if (!DWARFFormValue(AttrEnc.Form).isFormClass(Iter->Class)) {
1488     ErrorCategory.Report("Unexpected NameIndex Abbreviation", [&]() {
1489       error() << formatv("NameIndex @ {0:x}: Abbreviation {1:x}: {2} uses an "
1490                          "unexpected form {3} (expected form class {4}).\n",
1491                          NI.getUnitOffset(), Abbr.Code, AttrEnc.Index,
1492                          AttrEnc.Form, Iter->ClassName);
1493     });
1494     return;
1495   }
1496 }
1497 
verifyNameIndexAbbrevs(const DWARFDebugNames::NameIndex & NI)1498 void DWARFVerifier::verifyNameIndexAbbrevs(
1499     const DWARFDebugNames::NameIndex &NI) {
1500   for (const auto &Abbrev : NI.getAbbrevs()) {
1501     StringRef TagName = dwarf::TagString(Abbrev.Tag);
1502     if (TagName.empty()) {
1503       warn() << formatv("NameIndex @ {0:x}: Abbreviation {1:x} references an "
1504                         "unknown tag: {2}.\n",
1505                         NI.getUnitOffset(), Abbrev.Code, Abbrev.Tag);
1506     }
1507     SmallSet<unsigned, 5> Attributes;
1508     for (const auto &AttrEnc : Abbrev.Attributes) {
1509       if (!Attributes.insert(AttrEnc.Index).second) {
1510         ErrorCategory.Report(
1511             "NameIndex Abbreviateion contains multiple attributes", [&]() {
1512               error() << formatv(
1513                   "NameIndex @ {0:x}: Abbreviation {1:x} contains "
1514                   "multiple {2} attributes.\n",
1515                   NI.getUnitOffset(), Abbrev.Code, AttrEnc.Index);
1516             });
1517         continue;
1518       }
1519       verifyNameIndexAttribute(NI, Abbrev, AttrEnc);
1520     }
1521 
1522     if (NI.getCUCount() > 1 && !Attributes.count(dwarf::DW_IDX_compile_unit) &&
1523         !Attributes.count(dwarf::DW_IDX_type_unit)) {
1524       ErrorCategory.Report("Abbreviation contains no attribute", [&]() {
1525         error() << formatv("NameIndex @ {0:x}: Indexing multiple compile units "
1526                            "and abbreviation {1:x} has no DW_IDX_compile_unit "
1527                            "or DW_IDX_type_unit attribute.\n",
1528                            NI.getUnitOffset(), Abbrev.Code);
1529       });
1530     }
1531     if (!Attributes.count(dwarf::DW_IDX_die_offset)) {
1532       ErrorCategory.Report("Abbreviate in NameIndex missing attribute", [&]() {
1533         error() << formatv(
1534             "NameIndex @ {0:x}: Abbreviation {1:x} has no {2} attribute.\n",
1535             NI.getUnitOffset(), Abbrev.Code, dwarf::DW_IDX_die_offset);
1536       });
1537     }
1538   }
1539 }
1540 
1541 /// Constructs a full name for a DIE. Potentially it does recursive lookup on
1542 /// DIEs. This can lead to extraction of DIEs in a different CU or TU.
getNames(const DWARFDie & DIE,bool IncludeStrippedTemplateNames,bool IncludeObjCNames=true,bool IncludeLinkageName=true)1543 static SmallVector<std::string, 3> getNames(const DWARFDie &DIE,
1544                                             bool IncludeStrippedTemplateNames,
1545                                             bool IncludeObjCNames = true,
1546                                             bool IncludeLinkageName = true) {
1547   SmallVector<std::string, 3> Result;
1548   if (const char *Str = DIE.getShortName()) {
1549     StringRef Name(Str);
1550     Result.emplace_back(Name);
1551     if (IncludeStrippedTemplateNames) {
1552       if (std::optional<StringRef> StrippedName =
1553               StripTemplateParameters(Result.back()))
1554         // Convert to std::string and push; emplacing the StringRef may trigger
1555         // a vector resize which may destroy the StringRef memory.
1556         Result.push_back(StrippedName->str());
1557     }
1558 
1559     if (IncludeObjCNames) {
1560       if (std::optional<ObjCSelectorNames> ObjCNames =
1561               getObjCNamesIfSelector(Name)) {
1562         Result.emplace_back(ObjCNames->ClassName);
1563         Result.emplace_back(ObjCNames->Selector);
1564         if (ObjCNames->ClassNameNoCategory)
1565           Result.emplace_back(*ObjCNames->ClassNameNoCategory);
1566         if (ObjCNames->MethodNameNoCategory)
1567           Result.push_back(std::move(*ObjCNames->MethodNameNoCategory));
1568       }
1569     }
1570   } else if (DIE.getTag() == dwarf::DW_TAG_namespace)
1571     Result.emplace_back("(anonymous namespace)");
1572 
1573   if (IncludeLinkageName) {
1574     if (const char *Str = DIE.getLinkageName())
1575       Result.emplace_back(Str);
1576   }
1577 
1578   return Result;
1579 }
1580 
verifyNameIndexEntries(const DWARFDebugNames::NameIndex & NI,const DWARFDebugNames::NameTableEntry & NTE,const DenseMap<uint64_t,DWARFUnit * > & CUOffsetsToDUMap)1581 void DWARFVerifier::verifyNameIndexEntries(
1582     const DWARFDebugNames::NameIndex &NI,
1583     const DWARFDebugNames::NameTableEntry &NTE,
1584     const DenseMap<uint64_t, DWARFUnit *> &CUOffsetsToDUMap) {
1585   const char *CStr = NTE.getString();
1586   if (!CStr) {
1587     ErrorCategory.Report("Unable to get string associated with name", [&]() {
1588       error() << formatv("Name Index @ {0:x}: Unable to get string associated "
1589                          "with name {1}.\n",
1590                          NI.getUnitOffset(), NTE.getIndex());
1591     });
1592     return;
1593   }
1594   StringRef Str(CStr);
1595   unsigned NumEntries = 0;
1596   uint64_t EntryID = NTE.getEntryOffset();
1597   uint64_t NextEntryID = EntryID;
1598   Expected<DWARFDebugNames::Entry> EntryOr = NI.getEntry(&NextEntryID);
1599   for (; EntryOr; ++NumEntries, EntryID = NextEntryID,
1600                                 EntryOr = NI.getEntry(&NextEntryID)) {
1601 
1602     std::optional<uint64_t> CUIndex = EntryOr->getRelatedCUIndex();
1603     std::optional<uint64_t> TUIndex = EntryOr->getTUIndex();
1604     if (CUIndex && *CUIndex >= NI.getCUCount()) {
1605       ErrorCategory.Report("Name Index entry contains invalid CU index", [&]() {
1606         error() << formatv("Name Index @ {0:x}: Entry @ {1:x} contains an "
1607                            "invalid CU index ({2}).\n",
1608                            NI.getUnitOffset(), EntryID, *CUIndex);
1609       });
1610       continue;
1611     }
1612     const uint32_t NumLocalTUs = NI.getLocalTUCount();
1613     const uint32_t NumForeignTUs = NI.getForeignTUCount();
1614     if (TUIndex && *TUIndex >= (NumLocalTUs + NumForeignTUs)) {
1615       ErrorCategory.Report("Name Index entry contains invalid TU index", [&]() {
1616         error() << formatv("Name Index @ {0:x}: Entry @ {1:x} contains an "
1617                            "invalid TU index ({2}).\n",
1618                            NI.getUnitOffset(), EntryID, *TUIndex);
1619       });
1620       continue;
1621     }
1622     std::optional<uint64_t> UnitOffset;
1623     if (TUIndex) {
1624       // We have a local or foreign type unit.
1625       if (*TUIndex >= NumLocalTUs) {
1626         // This is a foreign type unit, we will find the right type unit by
1627         // type unit signature later in this function.
1628 
1629         // Foreign type units must have a valid CU index, either from a
1630         // DW_IDX_comp_unit attribute value or from the .debug_names table only
1631         // having a single compile unit. We need the originating compile unit
1632         // because foreign type units can come from any .dwo file, yet only one
1633         // copy of the type unit will end up in the .dwp file.
1634         if (CUIndex) {
1635           // We need the local skeleton unit offset for the code below.
1636           UnitOffset = NI.getCUOffset(*CUIndex);
1637         } else {
1638           ErrorCategory.Report(
1639               "Name Index entry contains foreign TU index with invalid CU "
1640               "index",
1641               [&]() {
1642                 error() << formatv(
1643                     "Name Index @ {0:x}: Entry @ {1:x} contains an "
1644                     "foreign TU index ({2}) with no CU index.\n",
1645                     NI.getUnitOffset(), EntryID, *TUIndex);
1646               });
1647           continue;
1648         }
1649       } else {
1650         // Local type unit, get the DWARF unit offset for the type unit.
1651         UnitOffset = NI.getLocalTUOffset(*TUIndex);
1652       }
1653     } else if (CUIndex) {
1654       // Local CU entry, get the DWARF unit offset for the CU.
1655       UnitOffset = NI.getCUOffset(*CUIndex);
1656     }
1657 
1658     // Watch for tombstoned type unit entries.
1659     if (!UnitOffset || UnitOffset == UINT32_MAX)
1660       continue;
1661     // For split DWARF entries we need to make sure we find the non skeleton
1662     // DWARF unit that is needed and use that's DWARF unit offset as the
1663     // DIE offset to add the DW_IDX_die_offset to.
1664     DWARFUnit *DU = DCtx.getUnitForOffset(*UnitOffset);
1665     if (DU == nullptr || DU->getOffset() != *UnitOffset) {
1666       // If we didn't find a DWARF Unit from the UnitOffset, or if the offset
1667       // of the unit doesn't match exactly, report an error.
1668       ErrorCategory.Report(
1669           "Name Index entry contains invalid CU or TU offset", [&]() {
1670             error() << formatv("Name Index @ {0:x}: Entry @ {1:x} contains an "
1671                                "invalid CU or TU offset {2:x}.\n",
1672                                NI.getUnitOffset(), EntryID, *UnitOffset);
1673           });
1674       continue;
1675     }
1676     // This function will try to get the non skeleton unit DIE, but if it is
1677     // unable to load the .dwo file from the .dwo or .dwp, it will return the
1678     // unit DIE of the DWARFUnit in "DU". So we need to check if the DWARFUnit
1679     // has a .dwo file, but we couldn't load it.
1680 
1681     // FIXME: Need a follow up patch to fix usage of
1682     // DWARFUnit::getNonSkeletonUnitDIE() so that it returns an empty DWARFDie
1683     // if the .dwo file isn't available and clean up other uses of this function
1684     // call to properly deal with it. It isn't clear that getNonSkeletonUnitDIE
1685     // will return the unit DIE of DU if we aren't able to get the .dwo file,
1686     // but that is what the function currently does.
1687     DWARFUnit *NonSkeletonUnit = nullptr;
1688     if (DU->getDWOId()) {
1689       auto Iter = CUOffsetsToDUMap.find(DU->getOffset());
1690       NonSkeletonUnit = Iter->second;
1691     } else {
1692       NonSkeletonUnit = DU;
1693     }
1694     DWARFDie UnitDie = DU->getUnitDIE();
1695     if (DU->getDWOId() && !NonSkeletonUnit->isDWOUnit()) {
1696       ErrorCategory.Report("Unable to get load .dwo file", [&]() {
1697         error() << formatv(
1698             "Name Index @ {0:x}: Entry @ {1:x} unable to load "
1699             ".dwo file \"{2}\" for DWARF unit @ {3:x}.\n",
1700             NI.getUnitOffset(), EntryID,
1701             dwarf::toString(UnitDie.find({DW_AT_dwo_name, DW_AT_GNU_dwo_name})),
1702             *UnitOffset);
1703       });
1704       continue;
1705     }
1706 
1707     if (TUIndex && *TUIndex >= NumLocalTUs) {
1708       // We have a foreign TU index, which either means we have a .dwo file
1709       // that has one or more type units, or we have a .dwp file with one or
1710       // more type units. We need to get the type unit from the DWARFContext
1711       // of the .dwo. We got the NonSkeletonUnitDie above that has the .dwo
1712       // or .dwp DWARF context, so we have to get the type unit from that file.
1713       // We have also verified that NonSkeletonUnitDie points to a DWO file
1714       // above, so we know we have the right file.
1715       const uint32_t ForeignTUIdx = *TUIndex - NumLocalTUs;
1716       const uint64_t TypeSig = NI.getForeignTUSignature(ForeignTUIdx);
1717       llvm::DWARFContext &NonSkeletonDCtx = NonSkeletonUnit->getContext();
1718       // Now find the type unit from the type signature and then update the
1719       // NonSkeletonUnitDie to point to the actual type unit in the .dwo/.dwp.
1720       NonSkeletonUnit =
1721           NonSkeletonDCtx.getTypeUnitForHash(TypeSig, /*IsDWO=*/true);
1722       // If we have foreign type unit in a DWP file, then we need to ignore
1723       // any entries from type units that don't match the one that made it into
1724       // the .dwp file.
1725       if (NonSkeletonDCtx.isDWP()) {
1726         DWARFDie NonSkeletonUnitDie = NonSkeletonUnit->getUnitDIE(true);
1727         StringRef DUDwoName = dwarf::toStringRef(
1728             UnitDie.find({DW_AT_dwo_name, DW_AT_GNU_dwo_name}));
1729         StringRef TUDwoName = dwarf::toStringRef(
1730             NonSkeletonUnitDie.find({DW_AT_dwo_name, DW_AT_GNU_dwo_name}));
1731         if (DUDwoName != TUDwoName)
1732           continue; // Skip this TU, it isn't the one in the .dwp file.
1733       }
1734     }
1735     uint64_t DIEOffset =
1736         NonSkeletonUnit->getOffset() + *EntryOr->getDIEUnitOffset();
1737     const uint64_t NextUnitOffset = NonSkeletonUnit->getNextUnitOffset();
1738     // DIE offsets are relative to the specified CU or TU. Make sure the DIE
1739     // offsets is a valid relative offset.
1740     if (DIEOffset >= NextUnitOffset) {
1741       ErrorCategory.Report("NameIndex relative DIE offset too large", [&]() {
1742         error() << formatv("Name Index @ {0:x}: Entry @ {1:x} references a "
1743                            "DIE @ {2:x} when CU or TU ends at {3:x}.\n",
1744                            NI.getUnitOffset(), EntryID, DIEOffset,
1745                            NextUnitOffset);
1746       });
1747       continue;
1748     }
1749     DWARFDie DIE = NonSkeletonUnit->getDIEForOffset(DIEOffset);
1750     if (!DIE) {
1751       ErrorCategory.Report("NameIndex references nonexistent DIE", [&]() {
1752         error() << formatv("Name Index @ {0:x}: Entry @ {1:x} references a "
1753                            "non-existing DIE @ {2:x}.\n",
1754                            NI.getUnitOffset(), EntryID, DIEOffset);
1755       });
1756       continue;
1757     }
1758     // Only compare the DIE we found's DWARFUnit offset if the DIE lives in
1759     // the DWARFUnit from the DW_IDX_comp_unit or DW_IDX_type_unit. If we are
1760     // using split DWARF, then the DIE's DWARFUnit doesn't need to match the
1761     // skeleton unit.
1762     if (DIE.getDwarfUnit() == DU &&
1763         DIE.getDwarfUnit()->getOffset() != *UnitOffset) {
1764       ErrorCategory.Report("Name index contains mismatched CU of DIE", [&]() {
1765         error() << formatv(
1766             "Name Index @ {0:x}: Entry @ {1:x}: mismatched CU of "
1767             "DIE @ {2:x}: index - {3:x}; debug_info - {4:x}.\n",
1768             NI.getUnitOffset(), EntryID, DIEOffset, *UnitOffset,
1769             DIE.getDwarfUnit()->getOffset());
1770       });
1771     }
1772     if (DIE.getTag() != EntryOr->tag()) {
1773       ErrorCategory.Report("Name Index contains mismatched Tag of DIE", [&]() {
1774         error() << formatv(
1775             "Name Index @ {0:x}: Entry @ {1:x}: mismatched Tag of "
1776             "DIE @ {2:x}: index - {3}; debug_info - {4}.\n",
1777             NI.getUnitOffset(), EntryID, DIEOffset, EntryOr->tag(),
1778             DIE.getTag());
1779       });
1780     }
1781 
1782     // We allow an extra name for functions: their name without any template
1783     // parameters.
1784     auto IncludeStrippedTemplateNames =
1785         DIE.getTag() == DW_TAG_subprogram ||
1786         DIE.getTag() == DW_TAG_inlined_subroutine;
1787     auto EntryNames = getNames(DIE, IncludeStrippedTemplateNames);
1788     if (!is_contained(EntryNames, Str)) {
1789       ErrorCategory.Report("Name Index contains mismatched name of DIE", [&]() {
1790         error() << formatv("Name Index @ {0:x}: Entry @ {1:x}: mismatched Name "
1791                            "of DIE @ {2:x}: index - {3}; debug_info - {4}.\n",
1792                            NI.getUnitOffset(), EntryID, DIEOffset, Str,
1793                            make_range(EntryNames.begin(), EntryNames.end()));
1794       });
1795     }
1796   }
1797   handleAllErrors(
1798       EntryOr.takeError(),
1799       [&](const DWARFDebugNames::SentinelError &) {
1800         if (NumEntries > 0)
1801           return;
1802         ErrorCategory.Report(
1803             "NameIndex Name is not associated with any entries", [&]() {
1804               error() << formatv("Name Index @ {0:x}: Name {1} ({2}) is "
1805                                  "not associated with any entries.\n",
1806                                  NI.getUnitOffset(), NTE.getIndex(), Str);
1807             });
1808       },
1809       [&](const ErrorInfoBase &Info) {
1810         ErrorCategory.Report("Uncategorized NameIndex error", [&]() {
1811           error() << formatv("Name Index @ {0:x}: Name {1} ({2}): {3}\n",
1812                              NI.getUnitOffset(), NTE.getIndex(), Str,
1813                              Info.message());
1814         });
1815       });
1816 }
1817 
isVariableIndexable(const DWARFDie & Die,DWARFContext & DCtx)1818 static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) {
1819   Expected<std::vector<DWARFLocationExpression>> Loc =
1820       Die.getLocations(DW_AT_location);
1821   if (!Loc) {
1822     consumeError(Loc.takeError());
1823     return false;
1824   }
1825   DWARFUnit *U = Die.getDwarfUnit();
1826   for (const auto &Entry : *Loc) {
1827     DataExtractor Data(toStringRef(Entry.Expr), DCtx.isLittleEndian(),
1828                        U->getAddressByteSize());
1829     DWARFExpression Expression(Data, U->getAddressByteSize(),
1830                                U->getFormParams().Format);
1831     bool IsInteresting =
1832         any_of(Expression, [](const DWARFExpression::Operation &Op) {
1833           return !Op.isError() && (Op.getCode() == DW_OP_addr ||
1834                                    Op.getCode() == DW_OP_form_tls_address ||
1835                                    Op.getCode() == DW_OP_GNU_push_tls_address);
1836         });
1837     if (IsInteresting)
1838       return true;
1839   }
1840   return false;
1841 }
1842 
verifyNameIndexCompleteness(const DWARFDie & Die,const DWARFDebugNames::NameIndex & NI,const StringMap<DenseSet<uint64_t>> & NamesToDieOffsets)1843 void DWARFVerifier::verifyNameIndexCompleteness(
1844     const DWARFDie &Die, const DWARFDebugNames::NameIndex &NI,
1845     const StringMap<DenseSet<uint64_t>> &NamesToDieOffsets) {
1846 
1847   // First check, if the Die should be indexed. The code follows the DWARF v5
1848   // wording as closely as possible.
1849 
1850   // "All non-defining declarations (that is, debugging information entries
1851   // with a DW_AT_declaration attribute) are excluded."
1852   if (Die.find(DW_AT_declaration))
1853     return;
1854 
1855   // "DW_TAG_namespace debugging information entries without a DW_AT_name
1856   // attribute are included with the name “(anonymous namespace)”.
1857   // All other debugging information entries without a DW_AT_name attribute
1858   // are excluded."
1859   // "If a subprogram or inlined subroutine is included, and has a
1860   // DW_AT_linkage_name attribute, there will be an additional index entry for
1861   // the linkage name."
1862   auto IncludeLinkageName = Die.getTag() == DW_TAG_subprogram ||
1863                             Die.getTag() == DW_TAG_inlined_subroutine;
1864   // We *allow* stripped template names / ObjectiveC names as extra entries into
1865   // the table, but we don't *require* them to pass the completeness test.
1866   auto IncludeStrippedTemplateNames = false;
1867   auto IncludeObjCNames = false;
1868   auto EntryNames = getNames(Die, IncludeStrippedTemplateNames,
1869                              IncludeObjCNames, IncludeLinkageName);
1870   if (EntryNames.empty())
1871     return;
1872 
1873   // We deviate from the specification here, which says:
1874   // "The name index must contain an entry for each debugging information entry
1875   // that defines a named subprogram, label, variable, type, or namespace,
1876   // subject to ..."
1877   // Explicitly exclude all TAGs that we know shouldn't be indexed.
1878   switch (Die.getTag()) {
1879   // Compile units and modules have names but shouldn't be indexed.
1880   case DW_TAG_compile_unit:
1881   case DW_TAG_module:
1882     return;
1883 
1884   // Function and template parameters are not globally visible, so we shouldn't
1885   // index them.
1886   case DW_TAG_formal_parameter:
1887   case DW_TAG_template_value_parameter:
1888   case DW_TAG_template_type_parameter:
1889   case DW_TAG_GNU_template_parameter_pack:
1890   case DW_TAG_GNU_template_template_param:
1891     return;
1892 
1893   // Object members aren't globally visible.
1894   case DW_TAG_member:
1895     return;
1896 
1897   // According to a strict reading of the specification, enumerators should not
1898   // be indexed (and LLVM currently does not do that). However, this causes
1899   // problems for the debuggers, so we may need to reconsider this.
1900   case DW_TAG_enumerator:
1901     return;
1902 
1903   // Imported declarations should not be indexed according to the specification
1904   // and LLVM currently does not do that.
1905   case DW_TAG_imported_declaration:
1906     return;
1907 
1908   // "DW_TAG_subprogram, DW_TAG_inlined_subroutine, and DW_TAG_label debugging
1909   // information entries without an address attribute (DW_AT_low_pc,
1910   // DW_AT_high_pc, DW_AT_ranges, or DW_AT_entry_pc) are excluded."
1911   case DW_TAG_subprogram:
1912   case DW_TAG_inlined_subroutine:
1913   case DW_TAG_label:
1914     if (Die.findRecursively(
1915             {DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_entry_pc}))
1916       break;
1917     return;
1918 
1919   // "DW_TAG_variable debugging information entries with a DW_AT_location
1920   // attribute that includes a DW_OP_addr or DW_OP_form_tls_address operator are
1921   // included; otherwise, they are excluded."
1922   //
1923   // LLVM extension: We also add DW_OP_GNU_push_tls_address to this list.
1924   case DW_TAG_variable:
1925     if (isVariableIndexable(Die, DCtx))
1926       break;
1927     return;
1928 
1929   default:
1930     break;
1931   }
1932 
1933   // Now we know that our Die should be present in the Index. Let's check if
1934   // that's the case.
1935   uint64_t DieUnitOffset = Die.getOffset() - Die.getDwarfUnit()->getOffset();
1936   for (StringRef Name : EntryNames) {
1937     auto iter = NamesToDieOffsets.find(Name);
1938     if (iter == NamesToDieOffsets.end() || !iter->second.count(DieUnitOffset)) {
1939       ErrorCategory.Report(
1940           "Name Index DIE entry missing name",
1941           llvm::dwarf::TagString(Die.getTag()), [&]() {
1942             error() << formatv(
1943                 "Name Index @ {0:x}: Entry for DIE @ {1:x} ({2}) with "
1944                 "name {3} missing.\n",
1945                 NI.getUnitOffset(), Die.getOffset(), Die.getTag(), Name);
1946           });
1947     }
1948   }
1949 }
1950 
1951 /// Extracts all the data for CU/TUs so we can access it in parallel without
1952 /// locks.
extractCUsTus(DWARFContext & DCtx)1953 static void extractCUsTus(DWARFContext &DCtx) {
1954   // Abbrev DeclSet is shared beween the units.
1955   for (auto &CUTU : DCtx.normal_units()) {
1956     CUTU->getUnitDIE();
1957     CUTU->getBaseAddress();
1958   }
1959   parallelForEach(DCtx.normal_units(), [&](const auto &CUTU) {
1960     if (Error E = CUTU->tryExtractDIEsIfNeeded(false))
1961       DCtx.getRecoverableErrorHandler()(std::move(E));
1962   });
1963 
1964   // Invoking getNonSkeletonUnitDIE() sets up all the base pointers for DWO
1965   // Units. This is needed for getBaseAddress().
1966   for (const auto &CU : DCtx.compile_units()) {
1967     if (!CU->getDWOId())
1968       continue;
1969     DWARFContext &NonSkeletonContext =
1970         CU->getNonSkeletonUnitDIE().getDwarfUnit()->getContext();
1971     // Iterates over CUs and TUs.
1972     for (auto &CUTU : NonSkeletonContext.dwo_units()) {
1973       CUTU->getUnitDIE();
1974       CUTU->getBaseAddress();
1975     }
1976     parallelForEach(NonSkeletonContext.dwo_units(), [&](const auto &CUTU) {
1977       if (Error E = CUTU->tryExtractDIEsIfNeeded(false))
1978         DCtx.getRecoverableErrorHandler()(std::move(E));
1979     });
1980     // If context is for DWP we only need to extract once.
1981     if (NonSkeletonContext.isDWP())
1982       break;
1983   }
1984 }
1985 
verifyDebugNames(const DWARFSection & AccelSection,const DataExtractor & StrData)1986 void DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection,
1987                                      const DataExtractor &StrData) {
1988   DWARFDataExtractor AccelSectionData(DCtx.getDWARFObj(), AccelSection,
1989                                       DCtx.isLittleEndian(), 0);
1990   DWARFDebugNames AccelTable(AccelSectionData, StrData);
1991 
1992   OS << "Verifying .debug_names...\n";
1993 
1994   // This verifies that we can read individual name indices and their
1995   // abbreviation tables.
1996   if (Error E = AccelTable.extract()) {
1997     std::string Msg = toString(std::move(E));
1998     ErrorCategory.Report("Accelerator Table Error",
1999                          [&]() { error() << Msg << '\n'; });
2000     return;
2001   }
2002   const uint64_t OriginalNumErrors = ErrorCategory.GetNumErrors();
2003   verifyDebugNamesCULists(AccelTable);
2004   for (const auto &NI : AccelTable)
2005     verifyNameIndexBuckets(NI, StrData);
2006   parallelForEach(AccelTable, [&](const DWARFDebugNames::NameIndex &NI) {
2007     verifyNameIndexAbbrevs(NI);
2008   });
2009 
2010   // Don't attempt Entry validation if any of the previous checks found errors
2011   if (OriginalNumErrors != ErrorCategory.GetNumErrors())
2012     return;
2013   DenseMap<uint64_t, DWARFUnit *> CUOffsetsToDUMap;
2014   for (const auto &CU : DCtx.compile_units()) {
2015     if (!(CU->getVersion() >= 5 && CU->getDWOId()))
2016       continue;
2017     CUOffsetsToDUMap[CU->getOffset()] =
2018         CU->getNonSkeletonUnitDIE().getDwarfUnit();
2019   }
2020   extractCUsTus(DCtx);
2021   for (const DWARFDebugNames::NameIndex &NI : AccelTable) {
2022     parallelForEach(NI, [&](DWARFDebugNames::NameTableEntry NTE) {
2023       verifyNameIndexEntries(NI, NTE, CUOffsetsToDUMap);
2024     });
2025   }
2026 
2027   auto populateNameToOffset =
2028       [&](const DWARFDebugNames::NameIndex &NI,
2029           StringMap<DenseSet<uint64_t>> &NamesToDieOffsets) {
2030         for (const DWARFDebugNames::NameTableEntry &NTE : NI) {
2031           const char *tName = NTE.getString();
2032           const std::string Name = tName ? std::string(tName) : "";
2033           uint64_t EntryID = NTE.getEntryOffset();
2034           Expected<DWARFDebugNames::Entry> EntryOr = NI.getEntry(&EntryID);
2035           auto Iter = NamesToDieOffsets.insert({Name, DenseSet<uint64_t>(3)});
2036           for (; EntryOr; EntryOr = NI.getEntry(&EntryID)) {
2037             if (std::optional<uint64_t> DieOffset = EntryOr->getDIEUnitOffset())
2038               Iter.first->second.insert(*DieOffset);
2039           }
2040           handleAllErrors(
2041               EntryOr.takeError(),
2042               [&](const DWARFDebugNames::SentinelError &) {
2043                 if (!NamesToDieOffsets.empty())
2044                   return;
2045                 ErrorCategory.Report(
2046                     "NameIndex Name is not associated with any entries", [&]() {
2047                       error()
2048                           << formatv("Name Index @ {0:x}: Name {1} ({2}) is "
2049                                      "not associated with any entries.\n",
2050                                      NI.getUnitOffset(), NTE.getIndex(), Name);
2051                     });
2052               },
2053               [&](const ErrorInfoBase &Info) {
2054                 ErrorCategory.Report("Uncategorized NameIndex error", [&]() {
2055                   error() << formatv(
2056                       "Name Index @ {0:x}: Name {1} ({2}): {3}\n",
2057                       NI.getUnitOffset(), NTE.getIndex(), Name, Info.message());
2058                 });
2059               });
2060         }
2061       };
2062   // NameIndex can have multiple CUs. For example if it was created by BOLT.
2063   // So better to iterate over NI, and then over CUs in it.
2064   for (const DWARFDebugNames::NameIndex &NI : AccelTable) {
2065     StringMap<DenseSet<uint64_t>> NamesToDieOffsets(NI.getNameCount());
2066     populateNameToOffset(NI, NamesToDieOffsets);
2067     for (uint32_t i = 0, iEnd = NI.getCUCount(); i < iEnd; ++i) {
2068       const uint64_t CUOffset = NI.getCUOffset(i);
2069       DWARFUnit *U = DCtx.getUnitForOffset(CUOffset);
2070       DWARFCompileUnit *CU = dyn_cast<DWARFCompileUnit>(U);
2071       if (CU) {
2072         if (CU->getDWOId()) {
2073           DWARFDie CUDie = CU->getUnitDIE(true);
2074           DWARFDie NonSkeletonUnitDie =
2075               CUDie.getDwarfUnit()->getNonSkeletonUnitDIE(false);
2076           if (CUDie != NonSkeletonUnitDie) {
2077             parallelForEach(
2078                 NonSkeletonUnitDie.getDwarfUnit()->dies(),
2079                 [&](const DWARFDebugInfoEntry &Die) {
2080                   verifyNameIndexCompleteness(
2081                       DWARFDie(NonSkeletonUnitDie.getDwarfUnit(), &Die), NI,
2082                       NamesToDieOffsets);
2083                 });
2084           }
2085         } else {
2086           parallelForEach(CU->dies(), [&](const DWARFDebugInfoEntry &Die) {
2087             verifyNameIndexCompleteness(DWARFDie(CU, &Die), NI,
2088                                         NamesToDieOffsets);
2089           });
2090         }
2091       }
2092     }
2093   }
2094 }
2095 
handleAccelTables()2096 bool DWARFVerifier::handleAccelTables() {
2097   const DWARFObject &D = DCtx.getDWARFObj();
2098   DataExtractor StrData(D.getStrSection(), DCtx.isLittleEndian(), 0);
2099   if (!D.getAppleNamesSection().Data.empty())
2100     verifyAppleAccelTable(&D.getAppleNamesSection(), &StrData, ".apple_names");
2101   if (!D.getAppleTypesSection().Data.empty())
2102     verifyAppleAccelTable(&D.getAppleTypesSection(), &StrData, ".apple_types");
2103   if (!D.getAppleNamespacesSection().Data.empty())
2104     verifyAppleAccelTable(&D.getAppleNamespacesSection(), &StrData,
2105                           ".apple_namespaces");
2106   if (!D.getAppleObjCSection().Data.empty())
2107     verifyAppleAccelTable(&D.getAppleObjCSection(), &StrData, ".apple_objc");
2108 
2109   if (!D.getNamesSection().Data.empty())
2110     verifyDebugNames(D.getNamesSection(), StrData);
2111   return ErrorCategory.GetNumErrors() == 0;
2112 }
2113 
handleDebugStrOffsets()2114 bool DWARFVerifier::handleDebugStrOffsets() {
2115   OS << "Verifying .debug_str_offsets...\n";
2116   const DWARFObject &DObj = DCtx.getDWARFObj();
2117   bool Success = true;
2118 
2119   // dwo sections may contain the legacy debug_str_offsets format (and they
2120   // can't be mixed with dwarf 5's format). This section format contains no
2121   // header.
2122   // As such, check the version from debug_info and, if we are in the legacy
2123   // mode (Dwarf <= 4), extract Dwarf32/Dwarf64.
2124   std::optional<DwarfFormat> DwoLegacyDwarf4Format;
2125   DObj.forEachInfoDWOSections([&](const DWARFSection &S) {
2126     if (DwoLegacyDwarf4Format)
2127       return;
2128     DWARFDataExtractor DebugInfoData(DObj, S, DCtx.isLittleEndian(), 0);
2129     uint64_t Offset = 0;
2130     DwarfFormat InfoFormat = DebugInfoData.getInitialLength(&Offset).second;
2131     if (uint16_t InfoVersion = DebugInfoData.getU16(&Offset); InfoVersion <= 4)
2132       DwoLegacyDwarf4Format = InfoFormat;
2133   });
2134 
2135   Success &= verifyDebugStrOffsets(
2136       DwoLegacyDwarf4Format, ".debug_str_offsets.dwo",
2137       DObj.getStrOffsetsDWOSection(), DObj.getStrDWOSection());
2138   Success &= verifyDebugStrOffsets(
2139       /*LegacyFormat=*/std::nullopt, ".debug_str_offsets",
2140       DObj.getStrOffsetsSection(), DObj.getStrSection());
2141   return Success;
2142 }
2143 
verifyDebugStrOffsets(std::optional<DwarfFormat> LegacyFormat,StringRef SectionName,const DWARFSection & Section,StringRef StrData)2144 bool DWARFVerifier::verifyDebugStrOffsets(
2145     std::optional<DwarfFormat> LegacyFormat, StringRef SectionName,
2146     const DWARFSection &Section, StringRef StrData) {
2147   const DWARFObject &DObj = DCtx.getDWARFObj();
2148 
2149   DWARFDataExtractor DA(DObj, Section, DCtx.isLittleEndian(), 0);
2150   DataExtractor::Cursor C(0);
2151   uint64_t NextUnit = 0;
2152   bool Success = true;
2153   while (C.seek(NextUnit), C.tell() < DA.getData().size()) {
2154     DwarfFormat Format;
2155     uint64_t Length;
2156     uint64_t StartOffset = C.tell();
2157     if (LegacyFormat) {
2158       Format = *LegacyFormat;
2159       Length = DA.getData().size();
2160       NextUnit = C.tell() + Length;
2161     } else {
2162       std::tie(Length, Format) = DA.getInitialLength(C);
2163       if (!C)
2164         break;
2165       if (C.tell() + Length > DA.getData().size()) {
2166         ErrorCategory.Report(
2167             "Section contribution length exceeds available space", [&]() {
2168               error() << formatv(
2169                   "{0}: contribution {1:X}: length exceeds available space "
2170                   "(contribution "
2171                   "offset ({1:X}) + length field space ({2:X}) + length "
2172                   "({3:X}) == "
2173                   "{4:X} > section size {5:X})\n",
2174                   SectionName, StartOffset, C.tell() - StartOffset, Length,
2175                   C.tell() + Length, DA.getData().size());
2176             });
2177         Success = false;
2178         // Nothing more to do - no other contributions to try.
2179         break;
2180       }
2181       NextUnit = C.tell() + Length;
2182       uint8_t Version = DA.getU16(C);
2183       if (C && Version != 5) {
2184         ErrorCategory.Report("Invalid Section version", [&]() {
2185           error() << formatv("{0}: contribution {1:X}: invalid version {2}\n",
2186                              SectionName, StartOffset, Version);
2187         });
2188         Success = false;
2189         // Can't parse the rest of this contribution, since we don't know the
2190         // version, but we can pick up with the next contribution.
2191         continue;
2192       }
2193       (void)DA.getU16(C); // padding
2194     }
2195     uint64_t OffsetByteSize = getDwarfOffsetByteSize(Format);
2196     DA.setAddressSize(OffsetByteSize);
2197     uint64_t Remainder = (Length - 4) % OffsetByteSize;
2198     if (Remainder != 0) {
2199       ErrorCategory.Report("Invalid section contribution length", [&]() {
2200         error() << formatv(
2201             "{0}: contribution {1:X}: invalid length ((length ({2:X}) "
2202             "- header (0x4)) % offset size {3:X} == {4:X} != 0)\n",
2203             SectionName, StartOffset, Length, OffsetByteSize, Remainder);
2204       });
2205       Success = false;
2206     }
2207     for (uint64_t Index = 0; C && C.tell() + OffsetByteSize <= NextUnit; ++Index) {
2208       uint64_t OffOff = C.tell();
2209       uint64_t StrOff = DA.getAddress(C);
2210       // check StrOff refers to the start of a string
2211       if (StrOff == 0)
2212         continue;
2213       if (StrData.size() <= StrOff) {
2214         ErrorCategory.Report(
2215             "String offset out of bounds of string section", [&]() {
2216               error() << formatv(
2217                   "{0}: contribution {1:X}: index {2:X}: invalid string "
2218                   "offset *{3:X} == {4:X}, is beyond the bounds of the string "
2219                   "section of length {5:X}\n",
2220                   SectionName, StartOffset, Index, OffOff, StrOff,
2221                   StrData.size());
2222             });
2223         continue;
2224       }
2225       if (StrData[StrOff - 1] == '\0')
2226         continue;
2227       ErrorCategory.Report(
2228           "Section contribution contains invalid string offset", [&]() {
2229             error() << formatv(
2230                 "{0}: contribution {1:X}: index {2:X}: invalid string "
2231                 "offset *{3:X} == {4:X}, is neither zero nor "
2232                 "immediately following a null character\n",
2233                 SectionName, StartOffset, Index, OffOff, StrOff);
2234           });
2235       Success = false;
2236     }
2237   }
2238 
2239   if (Error E = C.takeError()) {
2240     std::string Msg = toString(std::move(E));
2241     ErrorCategory.Report("String offset error", [&]() {
2242       error() << SectionName << ": " << Msg << '\n';
2243       return false;
2244     });
2245   }
2246   return Success;
2247 }
2248 
Report(StringRef s,std::function<void (void)> detailCallback)2249 void OutputCategoryAggregator::Report(
2250     StringRef s, std::function<void(void)> detailCallback) {
2251   this->Report(s, "", detailCallback);
2252 }
2253 
Report(StringRef category,StringRef sub_category,std::function<void (void)> detailCallback)2254 void OutputCategoryAggregator::Report(
2255     StringRef category, StringRef sub_category,
2256     std::function<void(void)> detailCallback) {
2257   std::lock_guard<std::mutex> Lock(WriteMutex);
2258   ++NumErrors;
2259   std::string category_str = std::string(category);
2260   AggregationData &Agg = Aggregation[category_str];
2261   Agg.OverallCount++;
2262   if (!sub_category.empty()) {
2263     Agg.DetailedCounts[std::string(sub_category)]++;
2264   }
2265   if (IncludeDetail)
2266     detailCallback();
2267 }
2268 
EnumerateResults(std::function<void (StringRef,unsigned)> handleCounts)2269 void OutputCategoryAggregator::EnumerateResults(
2270     std::function<void(StringRef, unsigned)> handleCounts) {
2271   for (const auto &[name, aggData] : Aggregation) {
2272     handleCounts(name, aggData.OverallCount);
2273   }
2274 }
EnumerateDetailedResultsFor(StringRef category,std::function<void (StringRef,unsigned)> handleCounts)2275 void OutputCategoryAggregator::EnumerateDetailedResultsFor(
2276     StringRef category, std::function<void(StringRef, unsigned)> handleCounts) {
2277   const auto Agg = Aggregation.find(category);
2278   if (Agg != Aggregation.end()) {
2279     for (const auto &[name, aggData] : Agg->second.DetailedCounts) {
2280       handleCounts(name, aggData);
2281     }
2282   }
2283 }
2284 
summarize()2285 void DWARFVerifier::summarize() {
2286   if (DumpOpts.ShowAggregateErrors && ErrorCategory.GetNumCategories()) {
2287     error() << "Aggregated error counts:\n";
2288     ErrorCategory.EnumerateResults([&](StringRef s, unsigned count) {
2289       error() << s << " occurred " << count << " time(s).\n";
2290     });
2291   }
2292   if (!DumpOpts.JsonErrSummaryFile.empty()) {
2293     std::error_code EC;
2294     raw_fd_ostream JsonStream(DumpOpts.JsonErrSummaryFile, EC,
2295                               sys::fs::OF_Text);
2296     if (EC) {
2297       error() << "unable to open json summary file '"
2298               << DumpOpts.JsonErrSummaryFile
2299               << "' for writing: " << EC.message() << '\n';
2300       return;
2301     }
2302 
2303     llvm::json::Object Categories;
2304     uint64_t ErrorCount = 0;
2305     ErrorCategory.EnumerateResults([&](StringRef Category, unsigned Count) {
2306       llvm::json::Object Val;
2307       Val.try_emplace("count", Count);
2308       llvm::json::Object Details;
2309       ErrorCategory.EnumerateDetailedResultsFor(
2310           Category, [&](StringRef SubCategory, unsigned SubCount) {
2311             Details.try_emplace(SubCategory, SubCount);
2312           });
2313       Val.try_emplace("details", std::move(Details));
2314       Categories.try_emplace(Category, std::move(Val));
2315       ErrorCount += Count;
2316     });
2317     llvm::json::Object RootNode;
2318     RootNode.try_emplace("error-categories", std::move(Categories));
2319     RootNode.try_emplace("error-count", ErrorCount);
2320 
2321     JsonStream << llvm::json::Value(std::move(RootNode));
2322   }
2323 }
2324 
error() const2325 raw_ostream &DWARFVerifier::error() const { return WithColor::error(OS); }
2326 
warn() const2327 raw_ostream &DWARFVerifier::warn() const { return WithColor::warning(OS); }
2328 
note() const2329 raw_ostream &DWARFVerifier::note() const { return WithColor::note(OS); }
2330 
dump(const DWARFDie & Die,unsigned indent) const2331 raw_ostream &DWARFVerifier::dump(const DWARFDie &Die, unsigned indent) const {
2332   Die.dump(OS, indent, DumpOpts);
2333   return OS;
2334 }
2335