xref: /freebsd/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- LVBinaryReader.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the LVBinaryReader class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h"
14 #include "llvm/Support/Errc.h"
15 #include "llvm/Support/FormatAdapters.h"
16 #include "llvm/Support/FormatVariadic.h"
17 
18 using namespace llvm;
19 using namespace llvm::logicalview;
20 
21 #define DEBUG_TYPE "BinaryReader"
22 
23 // Function names extracted from the object symbol table.
add(StringRef Name,LVScope * Function,LVSectionIndex SectionIndex)24 void LVSymbolTable::add(StringRef Name, LVScope *Function,
25                         LVSectionIndex SectionIndex) {
26   std::string SymbolName(Name);
27   auto [It, Inserted] =
28       SymbolNames.try_emplace(SymbolName, Function, 0, SectionIndex, false);
29   if (!Inserted) {
30     // Update a recorded entry with its logical scope and section index.
31     It->second.Scope = Function;
32     if (SectionIndex)
33       It->second.SectionIndex = SectionIndex;
34   }
35 
36   if (Function && It->second.IsComdat)
37     Function->setIsComdat();
38 
39   LLVM_DEBUG({ print(dbgs()); });
40 }
41 
add(StringRef Name,LVAddress Address,LVSectionIndex SectionIndex,bool IsComdat)42 void LVSymbolTable::add(StringRef Name, LVAddress Address,
43                         LVSectionIndex SectionIndex, bool IsComdat) {
44   std::string SymbolName(Name);
45   auto [It, Inserted] = SymbolNames.try_emplace(SymbolName, nullptr, Address,
46                                                 SectionIndex, IsComdat);
47   if (!Inserted)
48     // Update a recorded symbol name with its logical scope.
49     It->second.Address = Address;
50 
51   LVScope *Function = It->second.Scope;
52   if (Function && IsComdat)
53     Function->setIsComdat();
54   LLVM_DEBUG({ print(dbgs()); });
55 }
56 
update(LVScope * Function)57 LVSectionIndex LVSymbolTable::update(LVScope *Function) {
58   LVSectionIndex SectionIndex = getReader().getDotTextSectionIndex();
59   StringRef Name = Function->getLinkageName();
60   if (Name.empty())
61     Name = Function->getName();
62   std::string SymbolName(Name);
63 
64   if (SymbolName.empty())
65     return SectionIndex;
66 
67   auto It = SymbolNames.find(SymbolName);
68   if (It == SymbolNames.end())
69     return SectionIndex;
70 
71   // Update a recorded entry with its logical scope, only if the scope has
72   // ranges. That is the case when in DWARF there are 2 DIEs connected via
73   // the DW_AT_specification.
74   if (Function->getHasRanges()) {
75     It->second.Scope = Function;
76     SectionIndex = It->second.SectionIndex;
77   } else {
78     SectionIndex = UndefinedSectionIndex;
79   }
80 
81   if (It->second.IsComdat)
82     Function->setIsComdat();
83 
84   LLVM_DEBUG({ print(dbgs()); });
85   return SectionIndex;
86 }
87 
getEntry(StringRef Name)88 const LVSymbolTableEntry &LVSymbolTable::getEntry(StringRef Name) {
89   static LVSymbolTableEntry Empty = LVSymbolTableEntry();
90   LVSymbolNames::iterator Iter = SymbolNames.find(Name);
91   return Iter != SymbolNames.end() ? Iter->second : Empty;
92 }
getAddress(StringRef Name)93 LVAddress LVSymbolTable::getAddress(StringRef Name) {
94   LVSymbolNames::iterator Iter = SymbolNames.find(Name);
95   return Iter != SymbolNames.end() ? Iter->second.Address : 0;
96 }
getIndex(StringRef Name)97 LVSectionIndex LVSymbolTable::getIndex(StringRef Name) {
98   LVSymbolNames::iterator Iter = SymbolNames.find(Name);
99   return Iter != SymbolNames.end() ? Iter->second.SectionIndex
100                                    : getReader().getDotTextSectionIndex();
101 }
getIsComdat(StringRef Name)102 bool LVSymbolTable::getIsComdat(StringRef Name) {
103   LVSymbolNames::iterator Iter = SymbolNames.find(Name);
104   return Iter != SymbolNames.end() ? Iter->second.IsComdat : false;
105 }
106 
print(raw_ostream & OS)107 void LVSymbolTable::print(raw_ostream &OS) {
108   OS << "Symbol Table\n";
109   for (LVSymbolNames::reference Entry : SymbolNames) {
110     LVSymbolTableEntry &SymbolName = Entry.second;
111     LVScope *Scope = SymbolName.Scope;
112     LVOffset Offset = Scope ? Scope->getOffset() : 0;
113     OS << "Index: " << hexValue(SymbolName.SectionIndex, 5)
114        << " Comdat: " << (SymbolName.IsComdat ? "Y" : "N")
115        << " Scope: " << hexValue(Offset)
116        << " Address: " << hexValue(SymbolName.Address)
117        << " Name: " << Entry.first << "\n";
118   }
119 }
120 
addToSymbolTable(StringRef Name,LVScope * Function,LVSectionIndex SectionIndex)121 void LVBinaryReader::addToSymbolTable(StringRef Name, LVScope *Function,
122                                       LVSectionIndex SectionIndex) {
123   SymbolTable.add(Name, Function, SectionIndex);
124 }
addToSymbolTable(StringRef Name,LVAddress Address,LVSectionIndex SectionIndex,bool IsComdat)125 void LVBinaryReader::addToSymbolTable(StringRef Name, LVAddress Address,
126                                       LVSectionIndex SectionIndex,
127                                       bool IsComdat) {
128   SymbolTable.add(Name, Address, SectionIndex, IsComdat);
129 }
updateSymbolTable(LVScope * Function)130 LVSectionIndex LVBinaryReader::updateSymbolTable(LVScope *Function) {
131   return SymbolTable.update(Function);
132 }
133 
getSymbolTableEntry(StringRef Name)134 const LVSymbolTableEntry &LVBinaryReader::getSymbolTableEntry(StringRef Name) {
135   return SymbolTable.getEntry(Name);
136 }
getSymbolTableAddress(StringRef Name)137 LVAddress LVBinaryReader::getSymbolTableAddress(StringRef Name) {
138   return SymbolTable.getAddress(Name);
139 }
getSymbolTableIndex(StringRef Name)140 LVSectionIndex LVBinaryReader::getSymbolTableIndex(StringRef Name) {
141   return SymbolTable.getIndex(Name);
142 }
getSymbolTableIsComdat(StringRef Name)143 bool LVBinaryReader::getSymbolTableIsComdat(StringRef Name) {
144   return SymbolTable.getIsComdat(Name);
145 }
146 
mapVirtualAddress(const object::ObjectFile & Obj)147 void LVBinaryReader::mapVirtualAddress(const object::ObjectFile &Obj) {
148   for (const object::SectionRef &Section : Obj.sections()) {
149     LLVM_DEBUG({
150       Expected<StringRef> SectionNameOrErr = Section.getName();
151       StringRef Name;
152       if (!SectionNameOrErr)
153         consumeError(SectionNameOrErr.takeError());
154       else
155         Name = *SectionNameOrErr;
156       dbgs() << "Index: " << format_decimal(Section.getIndex(), 3) << ", "
157              << "Address: " << hexValue(Section.getAddress()) << ", "
158              << "Size: " << hexValue(Section.getSize()) << ", "
159              << "Name: " << Name << "\n";
160       dbgs() << "isCompressed:   " << Section.isCompressed() << ", "
161              << "isText:         " << Section.isText() << ", "
162              << "isData:         " << Section.isData() << ", "
163              << "isBSS:          " << Section.isBSS() << ", "
164              << "isVirtual:      " << Section.isVirtual() << "\n";
165       dbgs() << "isBitcode:      " << Section.isBitcode() << ", "
166              << "isStripped:     " << Section.isStripped() << ", "
167              << "isBerkeleyText: " << Section.isBerkeleyText() << ", "
168              << "isBerkeleyData: " << Section.isBerkeleyData() << ", "
169              << "isDebugSection: " << Section.isDebugSection() << "\n";
170       dbgs() << "\n";
171     });
172 
173     if (!Section.isText() || Section.isVirtual() || !Section.getSize())
174       continue;
175 
176     // Record section information required for symbol resolution.
177     // Note: The section index returned by 'getIndex()' is one based.
178     Sections.emplace(Section.getIndex(), Section);
179     addSectionAddress(Section);
180 
181     // Identify the ".text" section.
182     Expected<StringRef> SectionNameOrErr = Section.getName();
183     if (!SectionNameOrErr) {
184       consumeError(SectionNameOrErr.takeError());
185       continue;
186     }
187     if (*SectionNameOrErr == ".text" || *SectionNameOrErr == "CODE" ||
188         *SectionNameOrErr == ".code") {
189       DotTextSectionIndex = Section.getIndex();
190       // If the object is WebAssembly, update the address offset that
191       // will be added to DWARF DW_AT_* attributes.
192       if (Obj.isWasm())
193         WasmCodeSectionOffset = Section.getAddress();
194     }
195   }
196 
197   // Process the symbol table.
198   mapRangeAddress(Obj);
199 
200   LLVM_DEBUG({
201     dbgs() << "\nSections Information:\n";
202     for (LVSections::reference Entry : Sections) {
203       LVSectionIndex SectionIndex = Entry.first;
204       const object::SectionRef Section = Entry.second;
205       Expected<StringRef> SectionNameOrErr = Section.getName();
206       if (!SectionNameOrErr)
207         consumeError(SectionNameOrErr.takeError());
208       dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3)
209              << " Name: " << *SectionNameOrErr << "\n"
210              << "Size: " << hexValue(Section.getSize()) << "\n"
211              << "VirtualAddress: " << hexValue(VirtualAddress) << "\n"
212              << "SectionAddress: " << hexValue(Section.getAddress()) << "\n";
213     }
214     dbgs() << "\nObject Section Information:\n";
215     for (LVSectionAddresses::const_reference Entry : SectionAddresses)
216       dbgs() << "[" << hexValue(Entry.first) << ":"
217              << hexValue(Entry.first + Entry.second.getSize())
218              << "] Size: " << hexValue(Entry.second.getSize()) << "\n";
219   });
220 }
221 
mapVirtualAddress(const object::COFFObjectFile & COFFObj)222 void LVBinaryReader::mapVirtualAddress(const object::COFFObjectFile &COFFObj) {
223   ErrorOr<uint64_t> ImageBase = COFFObj.getImageBase();
224   if (ImageBase)
225     ImageBaseAddress = ImageBase.get();
226 
227   LLVM_DEBUG({
228     dbgs() << "ImageBaseAddress: " << hexValue(ImageBaseAddress) << "\n";
229   });
230 
231   uint32_t Flags = COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_LNK_COMDAT;
232 
233   for (const object::SectionRef &Section : COFFObj.sections()) {
234     if (!Section.isText() || Section.isVirtual() || !Section.getSize())
235       continue;
236 
237     const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section);
238     VirtualAddress = COFFSection->VirtualAddress;
239     bool IsComdat = (COFFSection->Characteristics & Flags) == Flags;
240 
241     // Record section information required for symbol resolution.
242     // Note: The section index returned by 'getIndex()' is zero based.
243     Sections.emplace(Section.getIndex() + 1, Section);
244     addSectionAddress(Section);
245 
246     // Additional initialization on the specific object format.
247     mapRangeAddress(COFFObj, Section, IsComdat);
248   }
249 
250   LLVM_DEBUG({
251     dbgs() << "\nSections Information:\n";
252     for (LVSections::reference Entry : Sections) {
253       LVSectionIndex SectionIndex = Entry.first;
254       const object::SectionRef Section = Entry.second;
255       const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section);
256       Expected<StringRef> SectionNameOrErr = Section.getName();
257       if (!SectionNameOrErr)
258         consumeError(SectionNameOrErr.takeError());
259       dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3)
260              << " Name: " << *SectionNameOrErr << "\n"
261              << "Size: " << hexValue(Section.getSize()) << "\n"
262              << "VirtualAddress: " << hexValue(VirtualAddress) << "\n"
263              << "SectionAddress: " << hexValue(Section.getAddress()) << "\n"
264              << "PointerToRawData: " << hexValue(COFFSection->PointerToRawData)
265              << "\n"
266              << "SizeOfRawData: " << hexValue(COFFSection->SizeOfRawData)
267              << "\n";
268     }
269     dbgs() << "\nObject Section Information:\n";
270     for (LVSectionAddresses::const_reference Entry : SectionAddresses)
271       dbgs() << "[" << hexValue(Entry.first) << ":"
272              << hexValue(Entry.first + Entry.second.getSize())
273              << "] Size: " << hexValue(Entry.second.getSize()) << "\n";
274   });
275 }
276 
loadGenericTargetInfo(StringRef TheTriple,StringRef TheFeatures)277 Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple,
278                                             StringRef TheFeatures) {
279   std::string TargetLookupError;
280   const Target *TheTarget =
281       TargetRegistry::lookupTarget(TheTriple, TargetLookupError);
282   if (!TheTarget)
283     return createStringError(errc::invalid_argument, TargetLookupError.c_str());
284 
285   // Register information.
286   MCRegisterInfo *RegisterInfo = TheTarget->createMCRegInfo(TheTriple);
287   if (!RegisterInfo)
288     return createStringError(errc::invalid_argument,
289                              "no register info for target " + TheTriple);
290   MRI.reset(RegisterInfo);
291 
292   // Assembler properties and features.
293   MCTargetOptions MCOptions;
294   MCAsmInfo *AsmInfo(TheTarget->createMCAsmInfo(*MRI, TheTriple, MCOptions));
295   if (!AsmInfo)
296     return createStringError(errc::invalid_argument,
297                              "no assembly info for target " + TheTriple);
298   MAI.reset(AsmInfo);
299 
300   // Target subtargets.
301   StringRef CPU;
302   MCSubtargetInfo *SubtargetInfo(
303       TheTarget->createMCSubtargetInfo(TheTriple, CPU, TheFeatures));
304   if (!SubtargetInfo)
305     return createStringError(errc::invalid_argument,
306                              "no subtarget info for target " + TheTriple);
307   STI.reset(SubtargetInfo);
308 
309   // Instructions Info.
310   MCInstrInfo *InstructionInfo(TheTarget->createMCInstrInfo());
311   if (!InstructionInfo)
312     return createStringError(errc::invalid_argument,
313                              "no instruction info for target " + TheTriple);
314   MII.reset(InstructionInfo);
315 
316   MC = std::make_unique<MCContext>(Triple(TheTriple), MAI.get(), MRI.get(),
317                                    STI.get());
318 
319   // Assembler.
320   MCDisassembler *DisAsm(TheTarget->createMCDisassembler(*STI, *MC));
321   if (!DisAsm)
322     return createStringError(errc::invalid_argument,
323                              "no disassembler for target " + TheTriple);
324   MD.reset(DisAsm);
325 
326   MCInstPrinter *InstructionPrinter(TheTarget->createMCInstPrinter(
327       Triple(TheTriple), AsmInfo->getAssemblerDialect(), *MAI, *MII, *MRI));
328   if (!InstructionPrinter)
329     return createStringError(errc::invalid_argument,
330                              "no target assembly language printer for target " +
331                                  TheTriple);
332   MIP.reset(InstructionPrinter);
333   InstructionPrinter->setPrintImmHex(true);
334 
335   return Error::success();
336 }
337 
338 Expected<std::pair<uint64_t, object::SectionRef>>
getSection(LVScope * Scope,LVAddress Address,LVSectionIndex SectionIndex)339 LVBinaryReader::getSection(LVScope *Scope, LVAddress Address,
340                            LVSectionIndex SectionIndex) {
341   // Return the 'text' section with the code for this logical scope.
342   // COFF: SectionIndex is zero. Use 'SectionAddresses' data.
343   // ELF: SectionIndex is the section index in the file.
344   if (SectionIndex) {
345     LVSections::iterator Iter = Sections.find(SectionIndex);
346     if (Iter == Sections.end()) {
347       return createStringError(errc::invalid_argument,
348                                "invalid section index for: '%s'",
349                                Scope->getName().str().c_str());
350     }
351     const object::SectionRef Section = Iter->second;
352     return std::make_pair(Section.getAddress(), Section);
353   }
354 
355   // Ensure a valid starting address for the public names.
356   LVSectionAddresses::const_iterator Iter =
357       SectionAddresses.upper_bound(Address);
358   if (Iter == SectionAddresses.begin())
359     return createStringError(errc::invalid_argument,
360                              "invalid section address for: '%s'",
361                              Scope->getName().str().c_str());
362 
363   // Get section that contains the code for this function.
364   Iter = SectionAddresses.lower_bound(Address);
365   if (Iter != SectionAddresses.begin())
366     --Iter;
367   return std::make_pair(Iter->first, Iter->second);
368 }
369 
createInstructions(LVScope * Scope,LVSectionIndex SectionIndex,const LVNameInfo & NameInfo)370 Error LVBinaryReader::createInstructions(LVScope *Scope,
371                                          LVSectionIndex SectionIndex,
372                                          const LVNameInfo &NameInfo) {
373   assert(Scope && "Scope is null.");
374 
375   // Skip stripped functions.
376   if (Scope->getIsDiscarded())
377     return Error::success();
378 
379   // Find associated address and size for the given function entry point.
380   LVAddress Address = NameInfo.first;
381   uint64_t Size = NameInfo.second;
382 
383   LLVM_DEBUG({
384     dbgs() << "\nPublic Name instructions: '" << Scope->getName() << "' / '"
385            << Scope->getLinkageName() << "'\n"
386            << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: ["
387            << hexValue(Address) << ":" << hexValue(Address + Size) << "]\n";
388   });
389 
390   Expected<std::pair<uint64_t, const object::SectionRef>> SectionOrErr =
391       getSection(Scope, Address, SectionIndex);
392   if (!SectionOrErr)
393     return SectionOrErr.takeError();
394   const object::SectionRef Section = (*SectionOrErr).second;
395   uint64_t SectionAddress = (*SectionOrErr).first;
396 
397   Expected<StringRef> SectionContentsOrErr = Section.getContents();
398   if (!SectionContentsOrErr)
399     return SectionOrErr.takeError();
400 
401   // There are cases where the section size is smaller than the [LowPC,HighPC]
402   // range; it causes us to decode invalid addresses. The recorded size in the
403   // logical scope is one less than the real size.
404   LLVM_DEBUG({
405     dbgs() << " Size: " << hexValue(Size)
406            << ", Section Size: " << hexValue(Section.getSize()) << "\n";
407   });
408   Size = std::min(Size + 1, Section.getSize());
409 
410   ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*SectionContentsOrErr);
411   uint64_t Offset = Address - SectionAddress;
412   if (Offset > Bytes.size()) {
413     LLVM_DEBUG({
414       dbgs() << "offset (" << hexValue(Offset) << ") is beyond section size ("
415              << hexValue(Bytes.size()) << "); malformed input?\n";
416     });
417     return createStringError(
418         errc::bad_address,
419         "Failed to parse instructions; offset beyond section size");
420   }
421   uint8_t const *Begin = Bytes.data() + Offset;
422   uint8_t const *End = Bytes.data() + Offset + Size;
423 
424   LLVM_DEBUG({
425     Expected<StringRef> SectionNameOrErr = Section.getName();
426     if (!SectionNameOrErr)
427       consumeError(SectionNameOrErr.takeError());
428     else
429       dbgs() << "Section Index: " << hexValue(Section.getIndex()) << " ["
430              << hexValue((uint64_t)Section.getAddress()) << ":"
431              << hexValue((uint64_t)Section.getAddress() + Section.getSize(), 10)
432              << "] Name: '" << *SectionNameOrErr << "'\n"
433              << "Begin: " << hexValue((uint64_t)Begin)
434              << ", End: " << hexValue((uint64_t)End) << "\n";
435   });
436 
437   // Address for first instruction line.
438   LVAddress FirstAddress = Address;
439   auto InstructionsSP = std::make_unique<LVLines>();
440   LVLines &Instructions = *InstructionsSP;
441   DiscoveredLines.emplace_back(std::move(InstructionsSP));
442 
443   while (Begin < End) {
444     MCInst Instruction;
445     uint64_t BytesConsumed = 0;
446     SmallVector<char, 64> InsnStr;
447     raw_svector_ostream Annotations(InsnStr);
448     MCDisassembler::DecodeStatus const S =
449         MD->getInstruction(Instruction, BytesConsumed,
450                            ArrayRef<uint8_t>(Begin, End), Address, outs());
451     switch (S) {
452     case MCDisassembler::Fail:
453       LLVM_DEBUG({ dbgs() << "Invalid instruction\n"; });
454       if (BytesConsumed == 0)
455         // Skip invalid bytes
456         BytesConsumed = 1;
457       break;
458     case MCDisassembler::SoftFail:
459       LLVM_DEBUG({ dbgs() << "Potentially undefined instruction:"; });
460       [[fallthrough]];
461     case MCDisassembler::Success: {
462       std::string Buffer;
463       raw_string_ostream Stream(Buffer);
464       StringRef AnnotationsStr = Annotations.str();
465       MIP->printInst(&Instruction, Address, AnnotationsStr, *STI, Stream);
466       LLVM_DEBUG({
467         std::string BufferCodes;
468         raw_string_ostream StreamCodes(BufferCodes);
469         StreamCodes << format_bytes(
470             ArrayRef<uint8_t>(Begin, Begin + BytesConsumed), std::nullopt, 16,
471             16);
472         dbgs() << "[" << hexValue((uint64_t)Begin) << "] "
473                << "Size: " << format_decimal(BytesConsumed, 2) << " ("
474                << formatv("{0}",
475                           fmt_align(StreamCodes.str(), AlignStyle::Left, 32))
476                << ") " << hexValue((uint64_t)Address) << ": " << Stream.str()
477                << "\n";
478       });
479       // Here we add logical lines to the Instructions. Later on,
480       // the 'processLines()' function will move each created logical line
481       // to its enclosing logical scope, using the debug ranges information
482       // and they will be released when its scope parent is deleted.
483       LVLineAssembler *Line = createLineAssembler();
484       Line->setAddress(Address);
485       Line->setName(StringRef(Stream.str()).trim());
486       Instructions.push_back(Line);
487       break;
488     }
489     }
490     Address += BytesConsumed;
491     Begin += BytesConsumed;
492   }
493 
494   LLVM_DEBUG({
495     size_t Index = 0;
496     dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
497            << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
498            << "Address: " << hexValue(FirstAddress)
499            << format(" - Collected instructions lines: %d\n",
500                      Instructions.size());
501     for (const LVLine *Line : Instructions)
502       dbgs() << format_decimal(++Index, 5) << ": "
503              << hexValue(Line->getOffset()) << ", (" << Line->getName()
504              << ")\n";
505   });
506 
507   // The scope in the assembler names is linked to its own instructions.
508   ScopeInstructions.add(SectionIndex, Scope, &Instructions);
509   AssemblerMappings.add(SectionIndex, FirstAddress, Scope);
510 
511   return Error::success();
512 }
513 
createInstructions(LVScope * Function,LVSectionIndex SectionIndex)514 Error LVBinaryReader::createInstructions(LVScope *Function,
515                                          LVSectionIndex SectionIndex) {
516   if (!options().getPrintInstructions())
517     return Error::success();
518 
519   LVNameInfo Name = CompileUnit->findPublicName(Function);
520   if (Name.first != LVAddress(UINT64_MAX))
521     return createInstructions(Function, SectionIndex, Name);
522 
523   return Error::success();
524 }
525 
createInstructions()526 Error LVBinaryReader::createInstructions() {
527   if (!options().getPrintInstructions())
528     return Error::success();
529 
530   LLVM_DEBUG({
531     size_t Index = 1;
532     dbgs() << "\nPublic Names (Scope):\n";
533     for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) {
534       LVScope *Scope = Name.first;
535       const LVNameInfo &NameInfo = Name.second;
536       LVAddress Address = NameInfo.first;
537       uint64_t Size = NameInfo.second;
538       dbgs() << format_decimal(Index++, 5) << ": "
539              << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: ["
540              << hexValue(Address) << ":" << hexValue(Address + Size) << "] "
541              << "Name: '" << Scope->getName() << "' / '"
542              << Scope->getLinkageName() << "'\n";
543     }
544   });
545 
546   // For each public name in the current compile unit, create the line
547   // records that represent the executable instructions.
548   for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) {
549     LVScope *Scope = Name.first;
550     // The symbol table extracted from the object file always contains a
551     // non-empty name (linkage name). However, the logical scope does not
552     // guarantee to have a name for the linkage name (main is one case).
553     // For those cases, set the linkage name the same as the name.
554     if (!Scope->getLinkageNameIndex())
555       Scope->setLinkageName(Scope->getName());
556     LVSectionIndex SectionIndex = getSymbolTableIndex(Scope->getLinkageName());
557     if (Error Err = createInstructions(Scope, SectionIndex, Name.second))
558       return Err;
559   }
560 
561   return Error::success();
562 }
563 
564 // During the traversal of the debug information sections, we created the
565 // logical lines representing the disassembled instructions from the text
566 // section and the logical lines representing the line records from the
567 // debug line section. Using the ranges associated with the logical scopes,
568 // we will allocate those logical lines to their logical scopes.
processLines(LVLines * DebugLines,LVSectionIndex SectionIndex,LVScope * Function)569 void LVBinaryReader::processLines(LVLines *DebugLines,
570                                   LVSectionIndex SectionIndex,
571                                   LVScope *Function) {
572   assert(DebugLines && "DebugLines is null.");
573 
574   // Just return if this compilation unit does not have any line records
575   // and no instruction lines were created.
576   if (DebugLines->empty() && !options().getPrintInstructions())
577     return;
578 
579   // Merge the debug lines and instruction lines using their text address;
580   // the logical line representing the debug line record is followed by the
581   // line(s) representing the disassembled instructions, whose addresses are
582   // equal or greater that the line address and less than the address of the
583   // next debug line record.
584   LLVM_DEBUG({
585     size_t Index = 1;
586     size_t PerLine = 4;
587     dbgs() << format("\nProcess debug lines: %d\n", DebugLines->size());
588     for (const LVLine *Line : *DebugLines) {
589       dbgs() << format_decimal(Index, 5) << ": " << hexValue(Line->getOffset())
590              << ", (" << Line->getLineNumber() << ")"
591              << ((Index % PerLine) ? "  " : "\n");
592       ++Index;
593     }
594     dbgs() << ((Index % PerLine) ? "\n" : "");
595   });
596 
597   bool TraverseLines = true;
598   LVLines::iterator Iter = DebugLines->begin();
599   while (TraverseLines && Iter != DebugLines->end()) {
600     uint64_t DebugAddress = (*Iter)->getAddress();
601 
602     // Get the function with an entry point that matches this line and
603     // its associated assembler entries. In the case of COMDAT, the input
604     // 'Function' is not null. Use it to find its address ranges.
605     LVScope *Scope = Function;
606     if (!Function) {
607       Scope = AssemblerMappings.find(SectionIndex, DebugAddress);
608       if (!Scope) {
609         ++Iter;
610         continue;
611       }
612     }
613 
614     // Get the associated instructions for the found 'Scope'.
615     LVLines InstructionLines;
616     LVLines *Lines = ScopeInstructions.find(SectionIndex, Scope);
617     if (Lines)
618       InstructionLines = std::move(*Lines);
619 
620     LLVM_DEBUG({
621       size_t Index = 0;
622       dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
623              << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
624              << format("Process instruction lines: %d\n",
625                        InstructionLines.size());
626       for (const LVLine *Line : InstructionLines)
627         dbgs() << format_decimal(++Index, 5) << ": "
628                << hexValue(Line->getOffset()) << ", (" << Line->getName()
629                << ")\n";
630     });
631 
632     // Continue with next debug line if there are not instructions lines.
633     if (InstructionLines.empty()) {
634       ++Iter;
635       continue;
636     }
637 
638     for (LVLine *InstructionLine : InstructionLines) {
639       uint64_t InstructionAddress = InstructionLine->getAddress();
640       LLVM_DEBUG({
641         dbgs() << "Instruction address: " << hexValue(InstructionAddress)
642                << "\n";
643       });
644       if (TraverseLines) {
645         while (Iter != DebugLines->end()) {
646           DebugAddress = (*Iter)->getAddress();
647           LLVM_DEBUG({
648             bool IsDebug = (*Iter)->getIsLineDebug();
649             dbgs() << "Line " << (IsDebug ? "dbg:" : "ins:") << " ["
650                    << hexValue(DebugAddress) << "]";
651             if (IsDebug)
652               dbgs() << format(" %d", (*Iter)->getLineNumber());
653             dbgs() << "\n";
654           });
655           // Instruction address before debug line.
656           if (InstructionAddress < DebugAddress) {
657             LLVM_DEBUG({
658               dbgs() << "Inserted instruction address: "
659                      << hexValue(InstructionAddress) << " before line: "
660                      << format("%d", (*Iter)->getLineNumber()) << " ["
661                      << hexValue(DebugAddress) << "]\n";
662             });
663             Iter = DebugLines->insert(Iter, InstructionLine);
664             // The returned iterator points to the inserted instruction.
665             // Skip it and point to the line acting as reference.
666             ++Iter;
667             break;
668           }
669           ++Iter;
670         }
671         if (Iter == DebugLines->end()) {
672           // We have reached the end of the source lines and the current
673           // instruction line address is greater than the last source line.
674           TraverseLines = false;
675           DebugLines->push_back(InstructionLine);
676         }
677       } else {
678         DebugLines->push_back(InstructionLine);
679       }
680     }
681   }
682 
683   LLVM_DEBUG({
684     dbgs() << format("Lines after merge: %d\n", DebugLines->size());
685     size_t Index = 0;
686     for (const LVLine *Line : *DebugLines) {
687       dbgs() << format_decimal(++Index, 5) << ": "
688              << hexValue(Line->getOffset()) << ", ("
689              << ((Line->getIsLineDebug())
690                      ? Line->lineNumberAsStringStripped(/*ShowZero=*/true)
691                      : Line->getName())
692              << ")\n";
693     }
694   });
695 
696   // If this compilation unit does not have line records, traverse its scopes
697   // and take any collected instruction lines as the working set in order
698   // to move them to their associated scope.
699   if (DebugLines->empty()) {
700     if (const LVScopes *Scopes = CompileUnit->getScopes())
701       for (LVScope *Scope : *Scopes) {
702         LVLines *Lines = ScopeInstructions.find(Scope);
703         if (Lines) {
704 
705           LLVM_DEBUG({
706             size_t Index = 0;
707             dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
708                    << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
709                    << format("Instruction lines: %d\n", Lines->size());
710             for (const LVLine *Line : *Lines)
711               dbgs() << format_decimal(++Index, 5) << ": "
712                      << hexValue(Line->getOffset()) << ", (" << Line->getName()
713                      << ")\n";
714           });
715 
716           if (Scope->getIsArtificial()) {
717             // Add the instruction lines to their artificial scope.
718             for (LVLine *Line : *Lines)
719               Scope->addElement(Line);
720           } else {
721             DebugLines->append(*Lines);
722           }
723           Lines->clear();
724         }
725       }
726   }
727 
728   LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
729   ScopesWithRanges->startSearch();
730 
731   // Process collected lines.
732   LVScope *Scope;
733   for (LVLine *Line : *DebugLines) {
734     // Using the current line address, get its associated lexical scope and
735     // add the line information to it.
736     Scope = ScopesWithRanges->getEntry(Line->getAddress());
737     if (!Scope) {
738       // If missing scope, use the compile unit.
739       Scope = CompileUnit;
740       LLVM_DEBUG({
741         dbgs() << "Adding line to CU: " << hexValue(Line->getOffset()) << ", ("
742                << ((Line->getIsLineDebug())
743                        ? Line->lineNumberAsStringStripped(/*ShowZero=*/true)
744                        : Line->getName())
745                << ")\n";
746       });
747     }
748 
749     // Add line object to scope.
750     Scope->addElement(Line);
751 
752     // Report any line zero.
753     if (options().getWarningLines() && Line->getIsLineDebug() &&
754         !Line->getLineNumber())
755       CompileUnit->addLineZero(Line);
756 
757     // Some compilers generate ranges in the compile unit; other compilers
758     // only DW_AT_low_pc/DW_AT_high_pc. In order to correctly map global
759     // variables, we need to generate the map ranges for the compile unit.
760     // If we use the ranges stored at the scope level, there are cases where
761     // the address referenced by a symbol location, is not in the enclosing
762     // scope, but in an outer one. By using the ranges stored in the compile
763     // unit, we can catch all those addresses.
764     if (Line->getIsLineDebug())
765       CompileUnit->addMapping(Line, SectionIndex);
766 
767     // Resolve any given pattern.
768     patterns().resolvePatternMatch(Line);
769   }
770 
771   ScopesWithRanges->endSearch();
772 }
773 
processLines(LVLines * DebugLines,LVSectionIndex SectionIndex)774 void LVBinaryReader::processLines(LVLines *DebugLines,
775                                   LVSectionIndex SectionIndex) {
776   assert(DebugLines && "DebugLines is null.");
777   if (DebugLines->empty() && !ScopeInstructions.findMap(SectionIndex))
778     return;
779 
780   // If the Compile Unit does not contain comdat functions, use the whole
781   // set of debug lines, as the addresses don't have conflicts.
782   if (!CompileUnit->getHasComdatScopes()) {
783     processLines(DebugLines, SectionIndex, nullptr);
784     return;
785   }
786 
787   // Find the indexes for the lines whose address is zero.
788   std::vector<size_t> AddressZero;
789   LVLines::iterator It = llvm::find_if(
790       *DebugLines, [](LVLine *Line) { return !Line->getAddress(); });
791   while (It != std::end(*DebugLines)) {
792     AddressZero.emplace_back(std::distance(std::begin(*DebugLines), It));
793     It = std::find_if(std::next(It), std::end(*DebugLines),
794                       [](LVLine *Line) { return !Line->getAddress(); });
795   }
796 
797   // If the set of debug lines does not contain any line with address zero,
798   // use the whole set. It means we are dealing with an initialization
799   // section from a fully linked binary.
800   if (AddressZero.empty()) {
801     processLines(DebugLines, SectionIndex, nullptr);
802     return;
803   }
804 
805   // The Compile unit contains comdat functions. Traverse the collected
806   // debug lines and identify logical groups based on their start and
807   // address. Each group starts with a zero address.
808   // Begin, End, Address, IsDone.
809   using LVBucket = std::tuple<size_t, size_t, LVAddress, bool>;
810   std::vector<LVBucket> Buckets;
811 
812   LVAddress Address;
813   size_t Begin = 0;
814   size_t End = 0;
815   size_t Index = 0;
816   for (Index = 0; Index < AddressZero.size() - 1; ++Index) {
817     Begin = AddressZero[Index];
818     End = AddressZero[Index + 1] - 1;
819     Address = (*DebugLines)[End]->getAddress();
820     Buckets.emplace_back(Begin, End, Address, false);
821   }
822 
823   // Add the last bucket.
824   if (Index) {
825     Begin = AddressZero[Index];
826     End = DebugLines->size() - 1;
827     Address = (*DebugLines)[End]->getAddress();
828     Buckets.emplace_back(Begin, End, Address, false);
829   }
830 
831   LLVM_DEBUG({
832     dbgs() << "\nDebug Lines buckets: " << Buckets.size() << "\n";
833     for (LVBucket &Bucket : Buckets) {
834       dbgs() << "Begin: " << format_decimal(std::get<0>(Bucket), 5) << ", "
835              << "End: " << format_decimal(std::get<1>(Bucket), 5) << ", "
836              << "Address: " << hexValue(std::get<2>(Bucket)) << "\n";
837     }
838   });
839 
840   // Traverse the sections and buckets looking for matches on the section
841   // sizes. In the unlikely event of different buckets with the same size
842   // process them in order and mark them as done.
843   LVLines Group;
844   for (LVSections::reference Entry : Sections) {
845     LVSectionIndex SectionIndex = Entry.first;
846     const object::SectionRef Section = Entry.second;
847     uint64_t Size = Section.getSize();
848     LLVM_DEBUG({
849       dbgs() << "\nSection Index: " << format_decimal(SectionIndex, 3)
850              << " , Section Size: " << hexValue(Section.getSize())
851              << " , Section Address: " << hexValue(Section.getAddress())
852              << "\n";
853     });
854 
855     for (LVBucket &Bucket : Buckets) {
856       if (std::get<3>(Bucket))
857         // Already done for previous section.
858         continue;
859       if (Size == std::get<2>(Bucket)) {
860         // We have a match on the section size.
861         Group.clear();
862         LVLines::iterator IterStart = DebugLines->begin() + std::get<0>(Bucket);
863         LVLines::iterator IterEnd =
864             DebugLines->begin() + std::get<1>(Bucket) + 1;
865         for (LVLines::iterator Iter = IterStart; Iter < IterEnd; ++Iter)
866           Group.push_back(*Iter);
867         processLines(&Group, SectionIndex, /*Function=*/nullptr);
868         std::get<3>(Bucket) = true;
869         break;
870       }
871     }
872   }
873 }
874 
875 // Traverse the scopes for the given 'Function' looking for any inlined
876 // scopes with inlined lines, which are found in 'CUInlineeLines'.
includeInlineeLines(LVSectionIndex SectionIndex,LVScope * Function)877 void LVBinaryReader::includeInlineeLines(LVSectionIndex SectionIndex,
878                                          LVScope *Function) {
879   SmallVector<LVInlineeLine::iterator> InlineeIters;
880   std::function<void(LVScope * Parent)> FindInlinedScopes =
881       [&](LVScope *Parent) {
882         if (const LVScopes *Scopes = Parent->getScopes())
883           for (LVScope *Scope : *Scopes) {
884             LVInlineeLine::iterator Iter = CUInlineeLines.find(Scope);
885             if (Iter != CUInlineeLines.end())
886               InlineeIters.push_back(Iter);
887             FindInlinedScopes(Scope);
888           }
889       };
890 
891   // Find all inlined scopes for the given 'Function'.
892   FindInlinedScopes(Function);
893   for (LVInlineeLine::iterator InlineeIter : InlineeIters) {
894     LVScope *Scope = InlineeIter->first;
895     addToSymbolTable(Scope->getLinkageName(), Scope, SectionIndex);
896 
897     // TODO: Convert this into a reference.
898     LVLines *InlineeLines = InlineeIter->second.get();
899     LLVM_DEBUG({
900       dbgs() << "Inlined lines for: " << Scope->getName() << "\n";
901       for (const LVLine *Line : *InlineeLines)
902         dbgs() << "[" << hexValue(Line->getAddress()) << "] "
903                << Line->getLineNumber() << "\n";
904       dbgs() << format("Debug lines: %d\n", CULines.size());
905       for (const LVLine *Line : CULines)
906         dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", ("
907                << Line->getLineNumber() << ")\n";
908       ;
909     });
910 
911     // The inlined lines must be merged using its address, in order to keep
912     // the real order of the instructions. The inlined lines are mixed with
913     // the other non-inlined lines.
914     if (InlineeLines->size()) {
915       // First address of inlinee code.
916       uint64_t InlineeStart = (InlineeLines->front())->getAddress();
917       LVLines::iterator Iter =
918           llvm::find_if(CULines, [&](LVLine *Item) -> bool {
919             return Item->getAddress() == InlineeStart;
920           });
921       if (Iter != CULines.end()) {
922         // 'Iter' points to the line where the inlined function is called.
923         // Emulate the DW_AT_call_line attribute.
924         Scope->setCallLineNumber((*Iter)->getLineNumber());
925         // Mark the referenced line as the start of the inlined function.
926         // Skip the first line during the insertion, as the address and
927         // line number as the same. Otherwise we have to erase and insert.
928         (*Iter)->setLineNumber((*InlineeLines->begin())->getLineNumber());
929         ++Iter;
930         CULines.insert(Iter, InlineeLines->begin() + 1, InlineeLines->end());
931       }
932     }
933 
934     // Remove this set of lines from the container; each inlined function
935     // creates an unique set of lines. Remove only the created container.
936     CUInlineeLines.erase(InlineeIter);
937     InlineeLines->clear();
938   }
939   LLVM_DEBUG({
940     dbgs() << "Merged Inlined lines for: " << Function->getName() << "\n";
941     dbgs() << format("Debug lines: %d\n", CULines.size());
942     for (const LVLine *Line : CULines)
943       dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", ("
944              << Line->getLineNumber() << ")\n";
945     ;
946   });
947 }
948 
print(raw_ostream & OS) const949 void LVBinaryReader::print(raw_ostream &OS) const {
950   OS << "LVBinaryReader\n";
951   LLVM_DEBUG(dbgs() << "PrintReader\n");
952 }
953