xref: /freebsd/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- LVDWARFReader.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the LVDWARFReader class.
10 // It supports ELF, Mach-O and Wasm binary formats.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/DebugInfo/LogicalView/Readers/LVDWARFReader.h"
15 #include "llvm/DebugInfo/DIContext.h"
16 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
17 #include "llvm/DebugInfo/DWARF/DWARFExpressionPrinter.h"
18 #include "llvm/DebugInfo/DWARF/LowLevel/DWARFExpression.h"
19 #include "llvm/DebugInfo/LogicalView/Core/LVLine.h"
20 #include "llvm/DebugInfo/LogicalView/Core/LVScope.h"
21 #include "llvm/DebugInfo/LogicalView/Core/LVSymbol.h"
22 #include "llvm/DebugInfo/LogicalView/Core/LVType.h"
23 #include "llvm/Object/MachO.h"
24 #include "llvm/Support/FormatVariadic.h"
25 
26 using namespace llvm;
27 using namespace llvm::object;
28 using namespace llvm::logicalview;
29 
30 #define DEBUG_TYPE "DWARFReader"
31 
processOneAttribute(const DWARFDie & Die,LVOffset * OffsetPtr,const AttributeSpec & AttrSpec)32 void LVDWARFReader::processOneAttribute(const DWARFDie &Die,
33                                         LVOffset *OffsetPtr,
34                                         const AttributeSpec &AttrSpec) {
35   uint64_t OffsetOnEntry = *OffsetPtr;
36   DWARFUnit *U = Die.getDwarfUnit();
37   const DWARFFormValue &FormValue =
38       DWARFFormValue::createFromUnit(AttrSpec.Form, U, OffsetPtr);
39 
40   // We are processing .debug_info section, implicit_const attribute
41   // values are not really stored here, but in .debug_abbrev section.
42   auto GetAsUnsignedConstant = [&]() -> int64_t {
43     if (AttrSpec.isImplicitConst())
44       return AttrSpec.getImplicitConstValue();
45     if (std::optional<uint64_t> Val = FormValue.getAsUnsignedConstant())
46       return *Val;
47     return 0;
48   };
49 
50   auto GetFlag = [](const DWARFFormValue &FormValue) -> bool {
51     return FormValue.isFormClass(DWARFFormValue::FC_Flag);
52   };
53 
54   auto GetBoundValue = [&AttrSpec](const DWARFFormValue &FormValue) -> int64_t {
55     switch (FormValue.getForm()) {
56     case dwarf::DW_FORM_ref_addr:
57     case dwarf::DW_FORM_ref1:
58     case dwarf::DW_FORM_ref2:
59     case dwarf::DW_FORM_ref4:
60     case dwarf::DW_FORM_ref8:
61     case dwarf::DW_FORM_ref_udata:
62     case dwarf::DW_FORM_ref_sig8:
63       return *FormValue.getAsReferenceUVal();
64     case dwarf::DW_FORM_data1:
65     case dwarf::DW_FORM_flag:
66     case dwarf::DW_FORM_data2:
67     case dwarf::DW_FORM_data4:
68     case dwarf::DW_FORM_data8:
69     case dwarf::DW_FORM_udata:
70     case dwarf::DW_FORM_ref_sup4:
71     case dwarf::DW_FORM_ref_sup8:
72       return *FormValue.getAsUnsignedConstant();
73     case dwarf::DW_FORM_sdata:
74       return *FormValue.getAsSignedConstant();
75     case dwarf::DW_FORM_implicit_const:
76       return AttrSpec.getImplicitConstValue();
77     default:
78       return 0;
79     }
80   };
81 
82   LLVM_DEBUG({
83     dbgs() << "     " << hexValue(OffsetOnEntry)
84            << formatv(" {0}", AttrSpec.Attr) << "\n";
85   });
86 
87   switch (AttrSpec.Attr) {
88   case dwarf::DW_AT_accessibility:
89     CurrentElement->setAccessibilityCode(GetAsUnsignedConstant());
90     break;
91   case dwarf::DW_AT_artificial:
92     CurrentElement->setIsArtificial();
93     break;
94   case dwarf::DW_AT_bit_size:
95     CurrentElement->setBitSize(GetAsUnsignedConstant());
96     break;
97   case dwarf::DW_AT_byte_size:
98     CurrentElement->setBitSize(GetAsUnsignedConstant() * DWARF_CHAR_BIT);
99     break;
100   case dwarf::DW_AT_call_file:
101     CurrentElement->setCallFilenameIndex(IncrementFileIndex
102                                              ? GetAsUnsignedConstant() + 1
103                                              : GetAsUnsignedConstant());
104     break;
105   case dwarf::DW_AT_call_line:
106     CurrentElement->setCallLineNumber(GetAsUnsignedConstant());
107     break;
108   case dwarf::DW_AT_comp_dir:
109     CompileUnit->setCompilationDirectory(dwarf::toStringRef(FormValue));
110     break;
111   case dwarf::DW_AT_const_value:
112     if (FormValue.isFormClass(DWARFFormValue::FC_Block)) {
113       ArrayRef<uint8_t> Expr = *FormValue.getAsBlock();
114       // Store the expression as a hexadecimal string.
115       CurrentElement->setValue(
116           llvm::toHex(llvm::toStringRef(Expr), /*LowerCase=*/true));
117     } else if (FormValue.isFormClass(DWARFFormValue::FC_Constant)) {
118       // In the case of negative values, generate the string representation
119       // for a positive value prefixed with the negative sign.
120       if (FormValue.getForm() == dwarf::DW_FORM_sdata) {
121         std::stringstream Stream;
122         int64_t Value = *FormValue.getAsSignedConstant();
123         if (Value < 0) {
124           Stream << "-";
125           Value = std::abs(Value);
126         }
127         Stream << hexString(Value, 2);
128         CurrentElement->setValue(Stream.str());
129       } else
130         CurrentElement->setValue(hexString(GetAsUnsignedConstant(), 2));
131     } else
132       CurrentElement->setValue(dwarf::toStringRef(FormValue));
133     break;
134   case dwarf::DW_AT_count:
135     CurrentElement->setCount(GetAsUnsignedConstant());
136     break;
137   case dwarf::DW_AT_decl_line:
138     CurrentElement->setLineNumber(GetAsUnsignedConstant());
139     break;
140   case dwarf::DW_AT_decl_file:
141     CurrentElement->setFilenameIndex(IncrementFileIndex
142                                          ? GetAsUnsignedConstant() + 1
143                                          : GetAsUnsignedConstant());
144     break;
145   case dwarf::DW_AT_enum_class:
146     if (GetFlag(FormValue))
147       CurrentElement->setIsEnumClass();
148     break;
149   case dwarf::DW_AT_external:
150     if (GetFlag(FormValue))
151       CurrentElement->setIsExternal();
152     break;
153   case dwarf::DW_AT_GNU_discriminator:
154     CurrentElement->setDiscriminator(GetAsUnsignedConstant());
155     break;
156   case dwarf::DW_AT_inline:
157     CurrentElement->setInlineCode(GetAsUnsignedConstant());
158     break;
159   case dwarf::DW_AT_lower_bound:
160     CurrentElement->setLowerBound(GetBoundValue(FormValue));
161     break;
162   case dwarf::DW_AT_name:
163     CurrentElement->setName(dwarf::toStringRef(FormValue));
164     break;
165   case dwarf::DW_AT_GNU_template_name:
166     CurrentElement->setValue(dwarf::toStringRef(FormValue));
167     break;
168   case dwarf::DW_AT_linkage_name:
169   case dwarf::DW_AT_MIPS_linkage_name:
170     CurrentElement->setLinkageName(dwarf::toStringRef(FormValue));
171     break;
172   case dwarf::DW_AT_producer:
173     if (options().getAttributeProducer())
174       CurrentElement->setProducer(dwarf::toStringRef(FormValue));
175     break;
176   case dwarf::DW_AT_language:
177     if (options().getAttributeLanguage())
178       CurrentElement->setSourceLanguage(LVSourceLanguage{
179           static_cast<llvm::dwarf::SourceLanguage>(GetAsUnsignedConstant())});
180     break;
181   case dwarf::DW_AT_upper_bound:
182     CurrentElement->setUpperBound(GetBoundValue(FormValue));
183     break;
184   case dwarf::DW_AT_virtuality:
185     CurrentElement->setVirtualityCode(GetAsUnsignedConstant());
186     break;
187 
188   case dwarf::DW_AT_abstract_origin:
189   case dwarf::DW_AT_call_origin:
190   case dwarf::DW_AT_extension:
191   case dwarf::DW_AT_import:
192   case dwarf::DW_AT_specification:
193   case dwarf::DW_AT_type:
194     updateReference(AttrSpec.Attr, FormValue);
195     break;
196 
197   case dwarf::DW_AT_low_pc:
198     if (options().getGeneralCollectRanges()) {
199       FoundLowPC = true;
200       // For toolchains that support the removal of unused code, the linker
201       // marks functions that have been removed, by setting the value for the
202       // low_pc to the max address.
203       if (std::optional<uint64_t> Value = FormValue.getAsAddress()) {
204         CurrentLowPC = *Value;
205       } else {
206         uint64_t UValue = FormValue.getRawUValue();
207         if (U->getAddrOffsetSectionItem(UValue)) {
208           CurrentLowPC = *FormValue.getAsAddress();
209         } else {
210           FoundLowPC = false;
211           // We are dealing with an index into the .debug_addr section.
212           LLVM_DEBUG({
213             dbgs() << format("indexed (%8.8x) address = ", (uint32_t)UValue);
214           });
215         }
216       }
217       if (FoundLowPC) {
218         if (CurrentLowPC == getTombstoneAddress())
219           CurrentElement->setIsDiscarded();
220         else
221           // Consider the case of WebAssembly.
222           CurrentLowPC += WasmCodeSectionOffset;
223         if (CurrentElement->isCompileUnit())
224           setCUBaseAddress(CurrentLowPC);
225       }
226     }
227     break;
228 
229   case dwarf::DW_AT_high_pc:
230     if (options().getGeneralCollectRanges()) {
231       FoundHighPC = true;
232       if (std::optional<uint64_t> Address = FormValue.getAsAddress())
233         // High PC is an address.
234         CurrentHighPC = *Address;
235       if (std::optional<uint64_t> Offset = FormValue.getAsUnsignedConstant())
236         // High PC is an offset from LowPC.
237         // Don't add the WebAssembly offset if we have seen a DW_AT_low_pc, as
238         // the CurrentLowPC has already that offset added. Basically, use the
239         // original DW_AT_loc_pc value.
240         CurrentHighPC =
241             (FoundLowPC ? CurrentLowPC - WasmCodeSectionOffset : CurrentLowPC) +
242             *Offset;
243       // Store the real upper limit for the address range.
244       if (UpdateHighAddress && CurrentHighPC > 0)
245         --CurrentHighPC;
246       // Consider the case of WebAssembly.
247       CurrentHighPC += WasmCodeSectionOffset;
248       if (CurrentElement->isCompileUnit())
249         setCUHighAddress(CurrentHighPC);
250     }
251     break;
252 
253   case dwarf::DW_AT_ranges:
254     if (RangesDataAvailable && options().getGeneralCollectRanges()) {
255       auto GetRanges = [](const DWARFFormValue &FormValue,
256                           DWARFUnit *U) -> Expected<DWARFAddressRangesVector> {
257         if (FormValue.getForm() == dwarf::DW_FORM_rnglistx)
258           return U->findRnglistFromIndex(*FormValue.getAsSectionOffset());
259         return U->findRnglistFromOffset(*FormValue.getAsSectionOffset());
260       };
261       Expected<DWARFAddressRangesVector> RangesOrError =
262           GetRanges(FormValue, U);
263       if (!RangesOrError) {
264         LLVM_DEBUG({
265           std::string TheError(toString(RangesOrError.takeError()));
266           dbgs() << format("error decoding address ranges = ",
267                            TheError.c_str());
268         });
269         consumeError(RangesOrError.takeError());
270         break;
271       }
272       // The address ranges are absolute. There is no need to add any addend.
273       DWARFAddressRangesVector Ranges = RangesOrError.get();
274       for (DWARFAddressRange &Range : Ranges) {
275         // This seems to be a tombstone for empty ranges.
276         if ((Range.LowPC == Range.HighPC) ||
277             (Range.LowPC = getTombstoneAddress()))
278           continue;
279         // Store the real upper limit for the address range.
280         if (UpdateHighAddress && Range.HighPC > 0)
281           --Range.HighPC;
282         // Consider the case of WebAssembly.
283         Range.LowPC += WasmCodeSectionOffset;
284         Range.HighPC += WasmCodeSectionOffset;
285         // Add the pair of addresses.
286         CurrentScope->addObject(Range.LowPC, Range.HighPC);
287         // If the scope is the CU, do not update the ranges set.
288         if (!CurrentElement->isCompileUnit())
289           CurrentRanges.emplace_back(Range.LowPC, Range.HighPC);
290       }
291     }
292     break;
293 
294   // Get the location list for the symbol.
295   case dwarf::DW_AT_data_member_location:
296     if (options().getAttributeAnyLocation())
297       processLocationMember(AttrSpec.Attr, FormValue, Die, OffsetOnEntry);
298     break;
299 
300   // Get the location list for the symbol.
301   case dwarf::DW_AT_location:
302   case dwarf::DW_AT_string_length:
303   case dwarf::DW_AT_use_location:
304     if (options().getAttributeAnyLocation() && CurrentSymbol)
305       processLocationList(AttrSpec.Attr, FormValue, Die, OffsetOnEntry);
306     break;
307 
308   case dwarf::DW_AT_call_data_value:
309   case dwarf::DW_AT_call_value:
310   case dwarf::DW_AT_GNU_call_site_data_value:
311   case dwarf::DW_AT_GNU_call_site_value:
312     if (options().getAttributeAnyLocation() && CurrentSymbol)
313       processLocationList(AttrSpec.Attr, FormValue, Die, OffsetOnEntry,
314                           /*CallSiteLocation=*/true);
315     break;
316 
317   default:
318     break;
319   }
320 }
321 
processOneDie(const DWARFDie & InputDIE,LVScope * Parent,DWARFDie & SkeletonDie)322 LVScope *LVDWARFReader::processOneDie(const DWARFDie &InputDIE, LVScope *Parent,
323                                       DWARFDie &SkeletonDie) {
324   // If the input DIE corresponds to the compile unit, it can be:
325   // a) Simple DWARF: a standard DIE. Ignore the skeleton DIE (is empty).
326   // b) Split DWARF: the DIE for the split DWARF. The skeleton is the DIE
327   //    for the skeleton DWARF. Process both DIEs.
328   const DWARFDie &DIE = SkeletonDie.isValid() ? SkeletonDie : InputDIE;
329   DWARFDataExtractor DebugInfoData =
330       DIE.getDwarfUnit()->getDebugInfoExtractor();
331   LVOffset Offset = DIE.getOffset();
332 
333   // Reset values for the current DIE.
334   CurrentLowPC = 0;
335   CurrentHighPC = 0;
336   CurrentOffset = Offset;
337   CurrentEndOffset = 0;
338   FoundLowPC = false;
339   FoundHighPC = false;
340 
341   // Process supported attributes.
342   if (DebugInfoData.isValidOffset(Offset)) {
343 
344     LLVM_DEBUG({
345       dbgs() << "DIE: " << hexValue(Offset) << formatv(" {0}", DIE.getTag())
346              << "\n";
347     });
348 
349     // Create the logical view element for the current DIE.
350     dwarf::Tag Tag = DIE.getTag();
351     CurrentElement = createElement(Tag);
352     if (!CurrentElement)
353       return CurrentScope;
354 
355     CurrentElement->setTag(Tag);
356     CurrentElement->setOffset(Offset);
357 
358     if (options().getAttributeAnySource() && CurrentElement->isCompileUnit())
359       addCompileUnitOffset(Offset,
360                            static_cast<LVScopeCompileUnit *>(CurrentElement));
361 
362     // Insert the newly created element into the element symbol table. If the
363     // element is in the list, it means there are previously created elements
364     // referencing this element.
365     auto [It, Inserted] = ElementTable.try_emplace(Offset, CurrentElement);
366     if (!Inserted) {
367       // There are previous references to this element. We need to update the
368       // element and all the references pointing to this element.
369       LVElementEntry &Reference = ElementTable[Offset];
370       Reference.Element = CurrentElement;
371       // Traverse the element set and update the elements (backtracking).
372       for (LVElement *Target : Reference.References)
373         Target->setReference(CurrentElement);
374       for (LVElement *Target : Reference.Types)
375         Target->setType(CurrentElement);
376       // Clear the pending elements.
377       Reference.References.clear();
378       Reference.Types.clear();
379     }
380 
381     // Add the current element to its parent as there are attributes
382     // (locations) that require the scope level.
383     if (CurrentScope)
384       Parent->addElement(CurrentScope);
385     else if (CurrentSymbol)
386       Parent->addElement(CurrentSymbol);
387     else if (CurrentType)
388       Parent->addElement(CurrentType);
389 
390     // Process the attributes for the given DIE.
391     auto ProcessAttributes = [&](const DWARFDie &TheDIE,
392                                  DWARFDataExtractor &DebugData) {
393       CurrentEndOffset = Offset;
394       uint32_t abbrCode = DebugData.getULEB128(&CurrentEndOffset);
395       if (abbrCode) {
396         if (const DWARFAbbreviationDeclaration *AbbrevDecl =
397                 TheDIE.getAbbreviationDeclarationPtr())
398           if (AbbrevDecl)
399             for (const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec :
400                  AbbrevDecl->attributes())
401               processOneAttribute(TheDIE, &CurrentEndOffset, AttrSpec);
402       }
403     };
404 
405     ProcessAttributes(DIE, DebugInfoData);
406 
407     // If the input DIE is for a compile unit, process its attributes in
408     // the case of split DWARF, to override any common attribute values.
409     if (SkeletonDie.isValid()) {
410       DWARFDataExtractor DebugInfoData =
411           InputDIE.getDwarfUnit()->getDebugInfoExtractor();
412       LVOffset Offset = InputDIE.getOffset();
413       if (DebugInfoData.isValidOffset(Offset))
414         ProcessAttributes(InputDIE, DebugInfoData);
415     }
416   }
417 
418   if (CurrentScope) {
419     if (CurrentScope->getCanHaveRanges()) {
420       // If the scope has ranges, they are already added to the scope.
421       // Add any collected LowPC/HighPC values.
422       bool IsCompileUnit = CurrentScope->getIsCompileUnit();
423       if (FoundLowPC && FoundHighPC) {
424         CurrentScope->addObject(CurrentLowPC, CurrentHighPC);
425         if (!IsCompileUnit) {
426           // If the scope is a function, add it to the public names.
427           if ((options().getAttributePublics() ||
428                options().getPrintAnyLine()) &&
429               CurrentScope->getIsFunction() &&
430               !CurrentScope->getIsInlinedFunction())
431             CompileUnit->addPublicName(CurrentScope, CurrentLowPC,
432                                        CurrentHighPC);
433         }
434       }
435 
436       // Look for scopes with ranges and no linkage name information that
437       // are referencing another scopes via DW_AT_specification. They are
438       // possible candidates for a comdat scope.
439       if (CurrentScope->getHasRanges() &&
440           !CurrentScope->getLinkageNameIndex() &&
441           CurrentScope->getHasReferenceSpecification()) {
442         // Get the linkage name in order to search for a possible comdat.
443         std::optional<DWARFFormValue> LinkageDIE =
444             DIE.findRecursively(dwarf::DW_AT_linkage_name);
445         if (LinkageDIE.has_value()) {
446           StringRef Name(dwarf::toStringRef(LinkageDIE));
447           if (!Name.empty())
448             CurrentScope->setLinkageName(Name);
449         }
450       }
451 
452       // If the current scope is in the 'LinkageNames' table, update its
453       // logical scope. For other scopes, always we will assume the default
454       // ".text" section index.
455       LVSectionIndex SectionIndex = updateSymbolTable(CurrentScope);
456       if (CurrentScope->getIsComdat())
457         CompileUnit->setHasComdatScopes();
458 
459       // Update section index contained ranges.
460       if (SectionIndex) {
461         if (!CurrentRanges.empty()) {
462           for (LVAddressRange &Range : CurrentRanges)
463             addSectionRange(SectionIndex, CurrentScope, Range.first,
464                             Range.second);
465           CurrentRanges.clear();
466         }
467         // If the scope is the CU, do not update the ranges set.
468         if (FoundLowPC && FoundHighPC && !IsCompileUnit) {
469           addSectionRange(SectionIndex, CurrentScope, CurrentLowPC,
470                           CurrentHighPC);
471         }
472       }
473     }
474     // Mark member functions.
475     if (Parent->getIsAggregate())
476       CurrentScope->setIsMember();
477   }
478 
479   // Keep track of symbols with locations.
480   if (options().getAttributeAnyLocation() && CurrentSymbol &&
481       CurrentSymbol->getHasLocation())
482     SymbolsWithLocations.push_back(CurrentSymbol);
483 
484   // If we have template parameters, mark the parent as template.
485   if (CurrentType && CurrentType->getIsTemplateParam())
486     Parent->setIsTemplate();
487 
488   return CurrentScope;
489 }
490 
traverseDieAndChildren(DWARFDie & DIE,LVScope * Parent,DWARFDie & SkeletonDie)491 void LVDWARFReader::traverseDieAndChildren(DWARFDie &DIE, LVScope *Parent,
492                                            DWARFDie &SkeletonDie) {
493   // Process the current DIE.
494   LVScope *Scope = processOneDie(DIE, Parent, SkeletonDie);
495   if (Scope) {
496     LVOffset Lower = DIE.getOffset();
497     LVOffset Upper = CurrentEndOffset;
498     DWARFDie DummyDie;
499     // Traverse the children chain.
500     DWARFDie Child = DIE.getFirstChild();
501     while (Child) {
502       traverseDieAndChildren(Child, Scope, DummyDie);
503       Upper = Child.getOffset();
504       Child = Child.getSibling();
505     }
506     // Calculate contributions to the debug info section.
507     if (options().getPrintSizes() && Upper)
508       CompileUnit->addSize(Scope, Lower, Upper);
509   }
510 }
511 
processLocationGaps()512 void LVDWARFReader::processLocationGaps() {
513   if (options().getAttributeAnyLocation())
514     for (LVSymbol *Symbol : SymbolsWithLocations)
515       Symbol->fillLocationGaps();
516 }
517 
createLineAndFileRecords(const DWARFDebugLine::LineTable * Lines)518 void LVDWARFReader::createLineAndFileRecords(
519     const DWARFDebugLine::LineTable *Lines) {
520   if (!Lines)
521     return;
522 
523   // Get the source filenames.
524   if (!Lines->Prologue.FileNames.empty())
525     for (const DWARFDebugLine::FileNameEntry &Entry :
526          Lines->Prologue.FileNames) {
527       std::string Directory;
528       if (Lines->getDirectoryForEntry(Entry, Directory))
529         Directory = transformPath(Directory);
530       if (Directory.empty())
531         Directory = std::string(CompileUnit->getCompilationDirectory());
532       std::string File = transformPath(dwarf::toStringRef(Entry.Name));
533       std::string String;
534       raw_string_ostream(String) << Directory << "/" << File;
535       CompileUnit->addFilename(String);
536     }
537 
538   // In DWARF5 the file indexes start at 0;
539   bool IncrementIndex = Lines->Prologue.getVersion() >= 5;
540 
541   // Get the source lines if requested by command line option.
542   if (options().getPrintLines() && Lines->Rows.size())
543     for (const DWARFDebugLine::Row &Row : Lines->Rows) {
544       // Here we collect logical debug lines in CULines. Later on,
545       // the 'processLines()' function will move each created logical line
546       // to its enclosing logical scope, using the debug ranges information
547       // and they will be released when its scope parent is deleted.
548       LVLineDebug *Line = createLineDebug();
549       CULines.push_back(Line);
550       // Consider the case of WebAssembly.
551       Line->setAddress(Row.Address.Address + WasmCodeSectionOffset);
552       Line->setFilename(
553           CompileUnit->getFilename(IncrementIndex ? Row.File + 1 : Row.File));
554       Line->setLineNumber(Row.Line);
555       if (Row.Discriminator)
556         Line->setDiscriminator(Row.Discriminator);
557       if (Row.IsStmt)
558         Line->setIsNewStatement();
559       if (Row.BasicBlock)
560         Line->setIsBasicBlock();
561       if (Row.EndSequence)
562         Line->setIsEndSequence();
563       if (Row.EpilogueBegin)
564         Line->setIsEpilogueBegin();
565       if (Row.PrologueEnd)
566         Line->setIsPrologueEnd();
567       LLVM_DEBUG({
568         dbgs() << "Address: " << hexValue(Line->getAddress())
569                << " Line: " << Line->lineNumberAsString(/*ShowZero=*/true)
570                << "\n";
571       });
572     }
573 }
574 
getRegisterName(LVSmall Opcode,ArrayRef<uint64_t> Operands)575 std::string LVDWARFReader::getRegisterName(LVSmall Opcode,
576                                            ArrayRef<uint64_t> Operands) {
577   // The 'prettyPrintRegisterOp' function uses the DWARFUnit to support
578   // DW_OP_regval_type. At this point we are operating on a logical view
579   // item, with no access to the underlying DWARF data used by LLVM.
580   // We do not support DW_OP_regval_type here.
581   if (Opcode == dwarf::DW_OP_regval_type)
582     return {};
583 
584   std::string string;
585   raw_string_ostream Stream(string);
586   DIDumpOptions DumpOpts;
587   auto *MCRegInfo = MRI.get();
588   auto GetRegName = [&MCRegInfo](uint64_t DwarfRegNum, bool IsEH) -> StringRef {
589     if (!MCRegInfo)
590       return {};
591     if (std::optional<MCRegister> LLVMRegNum =
592             MCRegInfo->getLLVMRegNum(DwarfRegNum, IsEH))
593       if (const char *RegName = MCRegInfo->getName(*LLVMRegNum))
594         return StringRef(RegName);
595     return {};
596   };
597   DumpOpts.GetNameForDWARFReg = GetRegName;
598   prettyPrintRegisterOp(/*U=*/nullptr, Stream, DumpOpts, Opcode, Operands);
599   return Stream.str();
600 }
601 
createScopes()602 Error LVDWARFReader::createScopes() {
603   LLVM_DEBUG({
604     W.startLine() << "\n";
605     W.printString("File", Obj.getFileName().str());
606     W.printString("Format", FileFormatName);
607   });
608 
609   if (Error Err = LVReader::createScopes())
610     return Err;
611 
612   // As the DwarfContext object is valid only during the scopes creation,
613   // we need to create our own Target information, to be used during the
614   // logical view printing, in the case of instructions being requested.
615   std::unique_ptr<DWARFContext> DwarfContext = DWARFContext::create(Obj);
616   if (!DwarfContext)
617     return createStringError(errc::invalid_argument,
618                              "Could not create DWARF information: %s",
619                              getFilename().str().c_str());
620 
621   if (Error Err = loadTargetInfo(Obj))
622     return Err;
623 
624   // Create a mapping for virtual addresses.
625   mapVirtualAddress(Obj);
626 
627   // Select the correct compile unit range, depending if we are dealing with
628   // a standard or split DWARF object.
629   DWARFContext::compile_unit_range CompileUnits =
630       DwarfContext->getNumCompileUnits() ? DwarfContext->compile_units()
631                                          : DwarfContext->dwo_compile_units();
632   for (const std::unique_ptr<DWARFUnit> &CU : CompileUnits) {
633 
634     // Take into account the address byte size for a correct 'tombstone'
635     // value identification.
636     setTombstoneAddress(
637         dwarf::computeTombstoneAddress(CU->getAddressByteSize()));
638 
639     // Deduction of index used for the line records.
640     //
641     // For the following test case: test.cpp
642     //  void foo(void ParamPtr) { }
643 
644     // Both GCC and Clang generate DWARF-5 .debug_line layout.
645 
646     // * GCC (GNU C++17 11.3.0) - All DW_AT_decl_file use index 1.
647     //
648     //   .debug_info:
649     //     format = DWARF32, version = 0x0005
650     //     DW_TAG_compile_unit
651     //       DW_AT_name	("test.cpp")
652     //       DW_TAG_subprogram ("foo")
653     //         DW_AT_decl_file (1)
654     //         DW_TAG_formal_parameter ("ParamPtr")
655     //           DW_AT_decl_file (1)
656     //   .debug_line:
657     //     Line table prologue: format (DWARF32), version (5)
658     //     include_directories[0] = "..."
659     //     file_names[0]: name ("test.cpp"), dir_index (0)
660     //     file_names[1]: name ("test.cpp"), dir_index (0)
661 
662     // * Clang (14.0.6) - All DW_AT_decl_file use index 0.
663     //
664     //   .debug_info:
665     //     format = DWARF32, version = 0x0005
666     //     DW_AT_producer	("clang version 14.0.6")
667     //     DW_AT_name	("test.cpp")
668     //
669     //     DW_TAG_subprogram ("foo")
670     //       DW_AT_decl_file (0)
671     //       DW_TAG_formal_parameter ("ParamPtr")
672     //         DW_AT_decl_file (0)
673     //   .debug_line:
674     //     Line table prologue: format (DWARF32), version (5)
675     //     include_directories[0] = "..."
676     //     file_names[0]: name ("test.cpp"), dir_index (0)
677 
678     // From DWARFDebugLine::getFileNameByIndex documentation:
679     //   In Dwarf 4, the files are 1-indexed.
680     //   In Dwarf 5, the files are 0-indexed.
681     // Additional discussions here:
682     // https://www.mail-archive.com/dwarf-discuss@lists.dwarfstd.org/msg00883.html
683 
684     // The DWARF reader is expecting the files are 1-indexed, so using
685     // the .debug_line header information decide if the indexed require
686     // an internal adjustment.
687 
688     // For the case of GCC (DWARF5), if the entries[0] and [1] are the
689     // same, do not perform any adjustment.
690     auto DeduceIncrementFileIndex = [&]() -> bool {
691       if (CU->getVersion() < 5)
692         // DWARF-4 or earlier -> Don't increment index.
693         return false;
694 
695       if (const DWARFDebugLine::LineTable *LT =
696               CU->getContext().getLineTableForUnit(CU.get())) {
697         // Check if there are at least 2 entries and if they are the same.
698         if (LT->hasFileAtIndex(0) && LT->hasFileAtIndex(1)) {
699           const DWARFDebugLine::FileNameEntry &EntryZero =
700               LT->Prologue.getFileNameEntry(0);
701           const DWARFDebugLine::FileNameEntry &EntryOne =
702               LT->Prologue.getFileNameEntry(1);
703           // Check directory indexes.
704           if (EntryZero.DirIdx != EntryOne.DirIdx)
705             // DWARF-5 -> Increment index.
706             return true;
707           // Check filename.
708           std::string FileZero;
709           std::string FileOne;
710           StringRef None;
711           LT->getFileNameByIndex(
712               0, None, DILineInfoSpecifier::FileLineInfoKind::RawValue,
713               FileZero);
714           LT->getFileNameByIndex(
715               1, None, DILineInfoSpecifier::FileLineInfoKind::RawValue,
716               FileOne);
717           return FileZero != FileOne;
718         }
719       }
720 
721       // DWARF-5 -> Increment index.
722       return true;
723     };
724     // The DWARF reader expects the indexes as 1-indexed.
725     IncrementFileIndex = DeduceIncrementFileIndex();
726 
727     DWARFDie UnitDie = CU->getUnitDIE();
728     SmallString<16> DWOAlternativeLocation;
729     if (UnitDie) {
730       std::optional<const char *> DWOFileName =
731           CU->getVersion() >= 5
732               ? dwarf::toString(UnitDie.find(dwarf::DW_AT_dwo_name))
733               : dwarf::toString(UnitDie.find(dwarf::DW_AT_GNU_dwo_name));
734       StringRef From(DWOFileName.value_or(""));
735       DWOAlternativeLocation = createAlternativePath(From);
736     }
737 
738     // The current CU can be a normal compile unit (standard) or a skeleton
739     // compile unit (split). For both cases, the returned die, will be used
740     // to create the logical scopes.
741     DWARFDie CUDie = CU->getNonSkeletonUnitDIE(
742         /*ExtractUnitDIEOnly=*/false,
743         /*DWOAlternativeLocation=*/DWOAlternativeLocation);
744     if (!CUDie.isValid())
745       continue;
746 
747     // The current unit corresponds to the .dwo file. We need to get the
748     // skeleton unit and query for any ranges that will enclose any ranges
749     // in the non-skeleton unit.
750     DWARFDie DummyDie;
751     DWARFDie SkeletonDie =
752         CUDie.getDwarfUnit()->isDWOUnit() ? CU->getUnitDIE(false) : DummyDie;
753     // Disable the ranges processing if we have just a single .dwo object,
754     // as any DW_AT_ranges will access not available range information.
755     RangesDataAvailable =
756         (!CUDie.getDwarfUnit()->isDWOUnit() ||
757          (SkeletonDie.isValid() ? !SkeletonDie.getDwarfUnit()->isDWOUnit()
758                                 : true));
759 
760     traverseDieAndChildren(CUDie, Root, SkeletonDie);
761 
762     createLineAndFileRecords(DwarfContext->getLineTableForUnit(CU.get()));
763     if (Error Err = createInstructions())
764       return Err;
765 
766     // Process the compilation unit, as there are cases where enclosed
767     // functions have the same ranges values. Insert the compilation unit
768     // ranges at the end, to allow enclosing ranges to be first in the list.
769     LVSectionIndex SectionIndex = getSectionIndex(CompileUnit);
770     addSectionRange(SectionIndex, CompileUnit);
771     LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
772     ScopesWithRanges->sort();
773 
774     processLines(&CULines, SectionIndex);
775     processLocationGaps();
776 
777     // These are per compile unit.
778     ScopesWithRanges->clear();
779     SymbolsWithLocations.clear();
780     CULines.clear();
781   }
782 
783   return Error::success();
784 }
785 
786 // Get the location information for the associated attribute.
processLocationList(dwarf::Attribute Attr,const DWARFFormValue & FormValue,const DWARFDie & Die,uint64_t OffsetOnEntry,bool CallSiteLocation)787 void LVDWARFReader::processLocationList(dwarf::Attribute Attr,
788                                         const DWARFFormValue &FormValue,
789                                         const DWARFDie &Die,
790                                         uint64_t OffsetOnEntry,
791                                         bool CallSiteLocation) {
792 
793   auto ProcessLocationExpression = [&](const DWARFExpression &Expression) {
794     for (const DWARFExpression::Operation &Op : Expression)
795       CurrentSymbol->addLocationOperands(Op.getCode(), Op.getRawOperands());
796   };
797 
798   DWARFUnit *U = Die.getDwarfUnit();
799   DWARFContext &DwarfContext = U->getContext();
800   bool IsLittleEndian = DwarfContext.isLittleEndian();
801   if (FormValue.isFormClass(DWARFFormValue::FC_Block) ||
802       (DWARFAttribute::mayHaveLocationExpr(Attr) &&
803        FormValue.isFormClass(DWARFFormValue::FC_Exprloc))) {
804     ArrayRef<uint8_t> Expr = *FormValue.getAsBlock();
805     DataExtractor Data(StringRef((const char *)Expr.data(), Expr.size()),
806                        IsLittleEndian, 0);
807     DWARFExpression Expression(Data, U->getAddressByteSize(),
808                                U->getFormParams().Format);
809 
810     // Add location and operation entries.
811     CurrentSymbol->addLocation(Attr, /*LowPC=*/0, /*HighPC=*/-1,
812                                /*SectionOffset=*/0, OffsetOnEntry,
813                                CallSiteLocation);
814     ProcessLocationExpression(Expression);
815     return;
816   }
817 
818   if (DWARFAttribute::mayHaveLocationList(Attr) &&
819       FormValue.isFormClass(DWARFFormValue::FC_SectionOffset)) {
820     uint64_t Offset = *FormValue.getAsSectionOffset();
821     if (FormValue.getForm() == dwarf::DW_FORM_loclistx) {
822       std::optional<uint64_t> LoclistOffset = U->getLoclistOffset(Offset);
823       if (!LoclistOffset)
824         return;
825       Offset = *LoclistOffset;
826     }
827     uint64_t BaseAddr = 0;
828     if (std::optional<SectionedAddress> BA = U->getBaseAddress())
829       BaseAddr = BA->Address;
830     LVAddress LowPC = 0;
831     LVAddress HighPC = 0;
832 
833     auto ProcessLocationEntry = [&](const DWARFLocationEntry &Entry) {
834       if (Entry.Kind == dwarf::DW_LLE_base_address) {
835         BaseAddr = Entry.Value0;
836         return;
837       }
838       if (Entry.Kind == dwarf::DW_LLE_offset_pair) {
839         LowPC = BaseAddr + Entry.Value0;
840         HighPC = BaseAddr + Entry.Value1;
841         DWARFAddressRange Range{LowPC, HighPC, Entry.SectionIndex};
842         if (Range.SectionIndex == SectionedAddress::UndefSection)
843           Range.SectionIndex = Entry.SectionIndex;
844         DWARFLocationExpression Loc{Range, Entry.Loc};
845         DWARFDataExtractor Data(Loc.Expr, IsLittleEndian,
846                                 U->getAddressByteSize());
847         DWARFExpression Expression(Data, U->getAddressByteSize());
848 
849         // Store the real upper limit for the address range.
850         if (UpdateHighAddress && HighPC > 0)
851           --HighPC;
852         // Add location and operation entries.
853         CurrentSymbol->addLocation(Attr, LowPC, HighPC, Offset, OffsetOnEntry,
854                                    CallSiteLocation);
855         ProcessLocationExpression(Expression);
856       }
857     };
858     Error E = U->getLocationTable().visitLocationList(
859         &Offset, [&](const DWARFLocationEntry &E) {
860           ProcessLocationEntry(E);
861           return true;
862         });
863     if (E)
864       consumeError(std::move(E));
865   }
866 }
867 
processLocationMember(dwarf::Attribute Attr,const DWARFFormValue & FormValue,const DWARFDie & Die,uint64_t OffsetOnEntry)868 void LVDWARFReader::processLocationMember(dwarf::Attribute Attr,
869                                           const DWARFFormValue &FormValue,
870                                           const DWARFDie &Die,
871                                           uint64_t OffsetOnEntry) {
872   // Check if the value is an integer constant.
873   if (FormValue.isFormClass(DWARFFormValue::FC_Constant))
874     // Add a record to hold a constant as location.
875     CurrentSymbol->addLocationConstant(Attr, *FormValue.getAsUnsignedConstant(),
876                                        OffsetOnEntry);
877   else
878     // This is a location description, or a reference to one.
879     processLocationList(Attr, FormValue, Die, OffsetOnEntry);
880 }
881 
882 // Update the current element with the reference.
updateReference(dwarf::Attribute Attr,const DWARFFormValue & FormValue)883 void LVDWARFReader::updateReference(dwarf::Attribute Attr,
884                                     const DWARFFormValue &FormValue) {
885   // FIXME: We are assuming that at most one Reference (DW_AT_specification,
886   // DW_AT_abstract_origin, ...) and at most one Type (DW_AT_import, DW_AT_type)
887   // appear in any single DIE, but this may not be true.
888   uint64_t Offset;
889   if (std::optional<uint64_t> Off = FormValue.getAsRelativeReference())
890     Offset = FormValue.getUnit()->getOffset() + *Off;
891   else if (Off = FormValue.getAsDebugInfoReference(); Off)
892     Offset = *Off;
893   else
894     llvm_unreachable("Unsupported reference type");
895 
896   // Get target for the given reference, if already created.
897   LVElement *Target = getElementForOffset(
898       Offset, CurrentElement,
899       /*IsType=*/Attr == dwarf::DW_AT_import || Attr == dwarf::DW_AT_type);
900   // Check if we are dealing with cross CU references.
901   if (FormValue.getForm() == dwarf::DW_FORM_ref_addr) {
902     if (Target) {
903       // The global reference is ready. Mark it as global.
904       Target->setIsGlobalReference();
905       // Remove global reference from the unseen list.
906       removeGlobalOffset(Offset);
907     } else
908       // Record the unseen cross CU reference.
909       addGlobalOffset(Offset);
910   }
911 
912   // At this point, 'Target' can be null, in the case of the target element
913   // not being seen. But the correct bit is set, to indicate that the target
914   // is being referenced by (abstract_origin, extension, specification) or
915   // (import, type).
916   // We must differentiate between the kind of reference. This is needed to
917   // complete inlined function instances with dropped abstract references,
918   // in order to facilitate a logical comparison.
919   switch (Attr) {
920   case dwarf::DW_AT_abstract_origin:
921   case dwarf::DW_AT_call_origin:
922     CurrentElement->setReference(Target);
923     CurrentElement->setHasReferenceAbstract();
924     break;
925   case dwarf::DW_AT_extension:
926     CurrentElement->setReference(Target);
927     CurrentElement->setHasReferenceExtension();
928     break;
929   case dwarf::DW_AT_specification:
930     CurrentElement->setReference(Target);
931     CurrentElement->setHasReferenceSpecification();
932     break;
933   case dwarf::DW_AT_import:
934   case dwarf::DW_AT_type:
935     CurrentElement->setType(Target);
936     break;
937   default:
938     break;
939   }
940 }
941 
942 // Get an element given the DIE offset.
getElementForOffset(LVOffset Offset,LVElement * Element,bool IsType)943 LVElement *LVDWARFReader::getElementForOffset(LVOffset Offset,
944                                               LVElement *Element, bool IsType) {
945   // Update the element and all the references pointing to this element.
946   LVElementEntry &Entry = ElementTable[Offset];
947   if (!Entry.Element) {
948     if (IsType)
949       Entry.Types.insert(Element);
950     else
951       Entry.References.insert(Element);
952   }
953   return Entry.Element;
954 }
955 
loadTargetInfo(const ObjectFile & Obj)956 Error LVDWARFReader::loadTargetInfo(const ObjectFile &Obj) {
957   // Detect the architecture from the object file. We usually don't need OS
958   // info to lookup a target and create register info.
959   Triple TT;
960   TT.setArch(Triple::ArchType(Obj.getArch()));
961   TT.setVendor(Triple::UnknownVendor);
962   TT.setOS(Triple::UnknownOS);
963 
964   // Features to be passed to target/subtarget
965   Expected<SubtargetFeatures> Features = Obj.getFeatures();
966   SubtargetFeatures FeaturesValue;
967   if (!Features) {
968     consumeError(Features.takeError());
969     FeaturesValue = SubtargetFeatures();
970   }
971   FeaturesValue = *Features;
972   return loadGenericTargetInfo(TT.str(), FeaturesValue.getString());
973 }
974 
mapRangeAddress(const ObjectFile & Obj)975 void LVDWARFReader::mapRangeAddress(const ObjectFile &Obj) {
976   for (auto Iter = Obj.symbol_begin(); Iter != Obj.symbol_end(); ++Iter) {
977     const SymbolRef &Symbol = *Iter;
978 
979     Expected<SymbolRef::Type> TypeOrErr = Symbol.getType();
980     if (!TypeOrErr) {
981       consumeError(TypeOrErr.takeError());
982       continue;
983     }
984 
985     // Process only symbols that represent a function.
986     SymbolRef::Type Type = *TypeOrErr;
987     if (Type != SymbolRef::ST_Function)
988       continue;
989 
990     // In the case of a Mach-O STAB symbol, get its section only if
991     // the STAB symbol's section field refers to a valid section index.
992     // Otherwise the symbol may error trying to load a section that
993     // does not exist.
994     const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(&Obj);
995     bool IsSTAB = false;
996     if (MachO) {
997       DataRefImpl SymDRI = Symbol.getRawDataRefImpl();
998       uint8_t NType =
999           (MachO->is64Bit() ? MachO->getSymbol64TableEntry(SymDRI).n_type
1000                             : MachO->getSymbolTableEntry(SymDRI).n_type);
1001       if (NType & MachO::N_STAB)
1002         IsSTAB = true;
1003     }
1004 
1005     Expected<section_iterator> IterOrErr = Symbol.getSection();
1006     if (!IterOrErr) {
1007       consumeError(IterOrErr.takeError());
1008       continue;
1009     }
1010     section_iterator Section = IsSTAB ? Obj.section_end() : *IterOrErr;
1011     if (Section == Obj.section_end())
1012       continue;
1013 
1014     // Get the symbol value.
1015     Expected<uint64_t> AddressOrErr = Symbol.getAddress();
1016     if (!AddressOrErr) {
1017       consumeError(AddressOrErr.takeError());
1018       continue;
1019     }
1020     uint64_t Address = *AddressOrErr;
1021 
1022     // Get symbol name.
1023     StringRef Name;
1024     Expected<StringRef> NameOrErr = Symbol.getName();
1025     if (!NameOrErr) {
1026       consumeError(NameOrErr.takeError());
1027       continue;
1028     }
1029     Name = *NameOrErr;
1030 
1031     // Check if the symbol is Comdat.
1032     Expected<uint32_t> FlagsOrErr = Symbol.getFlags();
1033     if (!FlagsOrErr) {
1034       consumeError(FlagsOrErr.takeError());
1035       continue;
1036     }
1037     uint32_t Flags = *FlagsOrErr;
1038 
1039     // Mark the symbol as 'comdat' in any of the following cases:
1040     // - Symbol has the SF_Weak flag or
1041     // - Symbol section index different from the DotTextSectionIndex.
1042     LVSectionIndex SectionIndex = Section->getIndex();
1043     bool IsComdat =
1044         (Flags & SymbolRef::SF_Weak) || (SectionIndex != DotTextSectionIndex);
1045 
1046     // Record the symbol name (linkage) and its loading address.
1047     addToSymbolTable(Name, Address, SectionIndex, IsComdat);
1048   }
1049 }
1050 
sortScopes()1051 void LVDWARFReader::sortScopes() { Root->sort(); }
1052 
print(raw_ostream & OS) const1053 void LVDWARFReader::print(raw_ostream &OS) const {
1054   OS << "LVType\n";
1055   LLVM_DEBUG(dbgs() << "CreateReaders\n");
1056 }
1057