1 //===-- LVBinaryReader.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the LVBinaryReader class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h"
14 #include "llvm/Support/Errc.h"
15 #include "llvm/Support/FormatAdapters.h"
16 #include "llvm/Support/FormatVariadic.h"
17
18 using namespace llvm;
19 using namespace llvm::logicalview;
20
21 #define DEBUG_TYPE "BinaryReader"
22
23 // Function names extracted from the object symbol table.
add(StringRef Name,LVScope * Function,LVSectionIndex SectionIndex)24 void LVSymbolTable::add(StringRef Name, LVScope *Function,
25 LVSectionIndex SectionIndex) {
26 std::string SymbolName(Name);
27 if (SymbolNames.find(SymbolName) == SymbolNames.end()) {
28 SymbolNames.emplace(
29 std::piecewise_construct, std::forward_as_tuple(SymbolName),
30 std::forward_as_tuple(Function, 0, SectionIndex, false));
31 } else {
32 // Update a recorded entry with its logical scope and section index.
33 SymbolNames[SymbolName].Scope = Function;
34 if (SectionIndex)
35 SymbolNames[SymbolName].SectionIndex = SectionIndex;
36 }
37
38 if (Function && SymbolNames[SymbolName].IsComdat)
39 Function->setIsComdat();
40
41 LLVM_DEBUG({ print(dbgs()); });
42 }
43
add(StringRef Name,LVAddress Address,LVSectionIndex SectionIndex,bool IsComdat)44 void LVSymbolTable::add(StringRef Name, LVAddress Address,
45 LVSectionIndex SectionIndex, bool IsComdat) {
46 std::string SymbolName(Name);
47 if (SymbolNames.find(SymbolName) == SymbolNames.end())
48 SymbolNames.emplace(
49 std::piecewise_construct, std::forward_as_tuple(SymbolName),
50 std::forward_as_tuple(nullptr, Address, SectionIndex, IsComdat));
51 else
52 // Update a recorded symbol name with its logical scope.
53 SymbolNames[SymbolName].Address = Address;
54
55 LVScope *Function = SymbolNames[SymbolName].Scope;
56 if (Function && IsComdat)
57 Function->setIsComdat();
58 LLVM_DEBUG({ print(dbgs()); });
59 }
60
update(LVScope * Function)61 LVSectionIndex LVSymbolTable::update(LVScope *Function) {
62 LVSectionIndex SectionIndex = getReader().getDotTextSectionIndex();
63 StringRef Name = Function->getLinkageName();
64 if (Name.empty())
65 Name = Function->getName();
66 std::string SymbolName(Name);
67
68 if (SymbolName.empty() || (SymbolNames.find(SymbolName) == SymbolNames.end()))
69 return SectionIndex;
70
71 // Update a recorded entry with its logical scope, only if the scope has
72 // ranges. That is the case when in DWARF there are 2 DIEs connected via
73 // the DW_AT_specification.
74 if (Function->getHasRanges()) {
75 SymbolNames[SymbolName].Scope = Function;
76 SectionIndex = SymbolNames[SymbolName].SectionIndex;
77 } else {
78 SectionIndex = UndefinedSectionIndex;
79 }
80
81 if (SymbolNames[SymbolName].IsComdat)
82 Function->setIsComdat();
83
84 LLVM_DEBUG({ print(dbgs()); });
85 return SectionIndex;
86 }
87
getEntry(StringRef Name)88 const LVSymbolTableEntry &LVSymbolTable::getEntry(StringRef Name) {
89 static LVSymbolTableEntry Empty = LVSymbolTableEntry();
90 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name));
91 return Iter != SymbolNames.end() ? Iter->second : Empty;
92 }
getAddress(StringRef Name)93 LVAddress LVSymbolTable::getAddress(StringRef Name) {
94 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name));
95 return Iter != SymbolNames.end() ? Iter->second.Address : 0;
96 }
getIndex(StringRef Name)97 LVSectionIndex LVSymbolTable::getIndex(StringRef Name) {
98 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name));
99 return Iter != SymbolNames.end() ? Iter->second.SectionIndex
100 : getReader().getDotTextSectionIndex();
101 }
getIsComdat(StringRef Name)102 bool LVSymbolTable::getIsComdat(StringRef Name) {
103 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name));
104 return Iter != SymbolNames.end() ? Iter->second.IsComdat : false;
105 }
106
print(raw_ostream & OS)107 void LVSymbolTable::print(raw_ostream &OS) {
108 OS << "Symbol Table\n";
109 for (LVSymbolNames::reference Entry : SymbolNames) {
110 LVSymbolTableEntry &SymbolName = Entry.second;
111 LVScope *Scope = SymbolName.Scope;
112 LVOffset Offset = Scope ? Scope->getOffset() : 0;
113 OS << "Index: " << hexValue(SymbolName.SectionIndex, 5)
114 << " Comdat: " << (SymbolName.IsComdat ? "Y" : "N")
115 << " Scope: " << hexValue(Offset)
116 << " Address: " << hexValue(SymbolName.Address)
117 << " Name: " << Entry.first << "\n";
118 }
119 }
120
addToSymbolTable(StringRef Name,LVScope * Function,LVSectionIndex SectionIndex)121 void LVBinaryReader::addToSymbolTable(StringRef Name, LVScope *Function,
122 LVSectionIndex SectionIndex) {
123 SymbolTable.add(Name, Function, SectionIndex);
124 }
addToSymbolTable(StringRef Name,LVAddress Address,LVSectionIndex SectionIndex,bool IsComdat)125 void LVBinaryReader::addToSymbolTable(StringRef Name, LVAddress Address,
126 LVSectionIndex SectionIndex,
127 bool IsComdat) {
128 SymbolTable.add(Name, Address, SectionIndex, IsComdat);
129 }
updateSymbolTable(LVScope * Function)130 LVSectionIndex LVBinaryReader::updateSymbolTable(LVScope *Function) {
131 return SymbolTable.update(Function);
132 }
133
getSymbolTableEntry(StringRef Name)134 const LVSymbolTableEntry &LVBinaryReader::getSymbolTableEntry(StringRef Name) {
135 return SymbolTable.getEntry(Name);
136 }
getSymbolTableAddress(StringRef Name)137 LVAddress LVBinaryReader::getSymbolTableAddress(StringRef Name) {
138 return SymbolTable.getAddress(Name);
139 }
getSymbolTableIndex(StringRef Name)140 LVSectionIndex LVBinaryReader::getSymbolTableIndex(StringRef Name) {
141 return SymbolTable.getIndex(Name);
142 }
getSymbolTableIsComdat(StringRef Name)143 bool LVBinaryReader::getSymbolTableIsComdat(StringRef Name) {
144 return SymbolTable.getIsComdat(Name);
145 }
146
mapVirtualAddress(const object::ObjectFile & Obj)147 void LVBinaryReader::mapVirtualAddress(const object::ObjectFile &Obj) {
148 for (const object::SectionRef &Section : Obj.sections()) {
149 LLVM_DEBUG({
150 Expected<StringRef> SectionNameOrErr = Section.getName();
151 StringRef Name;
152 if (!SectionNameOrErr)
153 consumeError(SectionNameOrErr.takeError());
154 else
155 Name = *SectionNameOrErr;
156 dbgs() << "Index: " << format_decimal(Section.getIndex(), 3) << ", "
157 << "Address: " << hexValue(Section.getAddress()) << ", "
158 << "Size: " << hexValue(Section.getSize()) << ", "
159 << "Name: " << Name << "\n";
160 dbgs() << "isCompressed: " << Section.isCompressed() << ", "
161 << "isText: " << Section.isText() << ", "
162 << "isData: " << Section.isData() << ", "
163 << "isBSS: " << Section.isBSS() << ", "
164 << "isVirtual: " << Section.isVirtual() << "\n";
165 dbgs() << "isBitcode: " << Section.isBitcode() << ", "
166 << "isStripped: " << Section.isStripped() << ", "
167 << "isBerkeleyText: " << Section.isBerkeleyText() << ", "
168 << "isBerkeleyData: " << Section.isBerkeleyData() << ", "
169 << "isDebugSection: " << Section.isDebugSection() << "\n";
170 dbgs() << "\n";
171 });
172
173 if (!Section.isText() || Section.isVirtual() || !Section.getSize())
174 continue;
175
176 // Record section information required for symbol resolution.
177 // Note: The section index returned by 'getIndex()' is one based.
178 Sections.emplace(Section.getIndex(), Section);
179 addSectionAddress(Section);
180
181 // Identify the ".text" section.
182 Expected<StringRef> SectionNameOrErr = Section.getName();
183 if (!SectionNameOrErr) {
184 consumeError(SectionNameOrErr.takeError());
185 continue;
186 }
187 if (*SectionNameOrErr == ".text" || *SectionNameOrErr == "CODE" ||
188 *SectionNameOrErr == ".code") {
189 DotTextSectionIndex = Section.getIndex();
190 // If the object is WebAssembly, update the address offset that
191 // will be added to DWARF DW_AT_* attributes.
192 if (Obj.isWasm())
193 WasmCodeSectionOffset = Section.getAddress();
194 }
195 }
196
197 // Process the symbol table.
198 mapRangeAddress(Obj);
199
200 LLVM_DEBUG({
201 dbgs() << "\nSections Information:\n";
202 for (LVSections::reference Entry : Sections) {
203 LVSectionIndex SectionIndex = Entry.first;
204 const object::SectionRef Section = Entry.second;
205 Expected<StringRef> SectionNameOrErr = Section.getName();
206 if (!SectionNameOrErr)
207 consumeError(SectionNameOrErr.takeError());
208 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3)
209 << " Name: " << *SectionNameOrErr << "\n"
210 << "Size: " << hexValue(Section.getSize()) << "\n"
211 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n"
212 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n";
213 }
214 dbgs() << "\nObject Section Information:\n";
215 for (LVSectionAddresses::const_reference Entry : SectionAddresses)
216 dbgs() << "[" << hexValue(Entry.first) << ":"
217 << hexValue(Entry.first + Entry.second.getSize())
218 << "] Size: " << hexValue(Entry.second.getSize()) << "\n";
219 });
220 }
221
mapVirtualAddress(const object::COFFObjectFile & COFFObj)222 void LVBinaryReader::mapVirtualAddress(const object::COFFObjectFile &COFFObj) {
223 ErrorOr<uint64_t> ImageBase = COFFObj.getImageBase();
224 if (ImageBase)
225 ImageBaseAddress = ImageBase.get();
226
227 LLVM_DEBUG({
228 dbgs() << "ImageBaseAddress: " << hexValue(ImageBaseAddress) << "\n";
229 });
230
231 uint32_t Flags = COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_LNK_COMDAT;
232
233 for (const object::SectionRef &Section : COFFObj.sections()) {
234 if (!Section.isText() || Section.isVirtual() || !Section.getSize())
235 continue;
236
237 const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section);
238 VirtualAddress = COFFSection->VirtualAddress;
239 bool IsComdat = (COFFSection->Characteristics & Flags) == Flags;
240
241 // Record section information required for symbol resolution.
242 // Note: The section index returned by 'getIndex()' is zero based.
243 Sections.emplace(Section.getIndex() + 1, Section);
244 addSectionAddress(Section);
245
246 // Additional initialization on the specific object format.
247 mapRangeAddress(COFFObj, Section, IsComdat);
248 }
249
250 LLVM_DEBUG({
251 dbgs() << "\nSections Information:\n";
252 for (LVSections::reference Entry : Sections) {
253 LVSectionIndex SectionIndex = Entry.first;
254 const object::SectionRef Section = Entry.second;
255 const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section);
256 Expected<StringRef> SectionNameOrErr = Section.getName();
257 if (!SectionNameOrErr)
258 consumeError(SectionNameOrErr.takeError());
259 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3)
260 << " Name: " << *SectionNameOrErr << "\n"
261 << "Size: " << hexValue(Section.getSize()) << "\n"
262 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n"
263 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n"
264 << "PointerToRawData: " << hexValue(COFFSection->PointerToRawData)
265 << "\n"
266 << "SizeOfRawData: " << hexValue(COFFSection->SizeOfRawData)
267 << "\n";
268 }
269 dbgs() << "\nObject Section Information:\n";
270 for (LVSectionAddresses::const_reference Entry : SectionAddresses)
271 dbgs() << "[" << hexValue(Entry.first) << ":"
272 << hexValue(Entry.first + Entry.second.getSize())
273 << "] Size: " << hexValue(Entry.second.getSize()) << "\n";
274 });
275 }
276
loadGenericTargetInfo(StringRef TheTriple,StringRef TheFeatures)277 Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple,
278 StringRef TheFeatures) {
279 std::string TargetLookupError;
280 const Target *TheTarget =
281 TargetRegistry::lookupTarget(std::string(TheTriple), TargetLookupError);
282 if (!TheTarget)
283 return createStringError(errc::invalid_argument, TargetLookupError.c_str());
284
285 // Register information.
286 MCRegisterInfo *RegisterInfo = TheTarget->createMCRegInfo(TheTriple);
287 if (!RegisterInfo)
288 return createStringError(errc::invalid_argument,
289 "no register info for target " + TheTriple);
290 MRI.reset(RegisterInfo);
291
292 // Assembler properties and features.
293 MCTargetOptions MCOptions;
294 MCAsmInfo *AsmInfo(TheTarget->createMCAsmInfo(*MRI, TheTriple, MCOptions));
295 if (!AsmInfo)
296 return createStringError(errc::invalid_argument,
297 "no assembly info for target " + TheTriple);
298 MAI.reset(AsmInfo);
299
300 // Target subtargets.
301 StringRef CPU;
302 MCSubtargetInfo *SubtargetInfo(
303 TheTarget->createMCSubtargetInfo(TheTriple, CPU, TheFeatures));
304 if (!SubtargetInfo)
305 return createStringError(errc::invalid_argument,
306 "no subtarget info for target " + TheTriple);
307 STI.reset(SubtargetInfo);
308
309 // Instructions Info.
310 MCInstrInfo *InstructionInfo(TheTarget->createMCInstrInfo());
311 if (!InstructionInfo)
312 return createStringError(errc::invalid_argument,
313 "no instruction info for target " + TheTriple);
314 MII.reset(InstructionInfo);
315
316 MC = std::make_unique<MCContext>(Triple(TheTriple), MAI.get(), MRI.get(),
317 STI.get());
318
319 // Assembler.
320 MCDisassembler *DisAsm(TheTarget->createMCDisassembler(*STI, *MC));
321 if (!DisAsm)
322 return createStringError(errc::invalid_argument,
323 "no disassembler for target " + TheTriple);
324 MD.reset(DisAsm);
325
326 MCInstPrinter *InstructionPrinter(TheTarget->createMCInstPrinter(
327 Triple(TheTriple), AsmInfo->getAssemblerDialect(), *MAI, *MII, *MRI));
328 if (!InstructionPrinter)
329 return createStringError(errc::invalid_argument,
330 "no target assembly language printer for target " +
331 TheTriple);
332 MIP.reset(InstructionPrinter);
333 InstructionPrinter->setPrintImmHex(true);
334
335 return Error::success();
336 }
337
338 Expected<std::pair<uint64_t, object::SectionRef>>
getSection(LVScope * Scope,LVAddress Address,LVSectionIndex SectionIndex)339 LVBinaryReader::getSection(LVScope *Scope, LVAddress Address,
340 LVSectionIndex SectionIndex) {
341 // Return the 'text' section with the code for this logical scope.
342 // COFF: SectionIndex is zero. Use 'SectionAddresses' data.
343 // ELF: SectionIndex is the section index in the file.
344 if (SectionIndex) {
345 LVSections::iterator Iter = Sections.find(SectionIndex);
346 if (Iter == Sections.end()) {
347 return createStringError(errc::invalid_argument,
348 "invalid section index for: '%s'",
349 Scope->getName().str().c_str());
350 }
351 const object::SectionRef Section = Iter->second;
352 return std::make_pair(Section.getAddress(), Section);
353 }
354
355 // Ensure a valid starting address for the public names.
356 LVSectionAddresses::const_iterator Iter =
357 SectionAddresses.upper_bound(Address);
358 if (Iter == SectionAddresses.begin())
359 return createStringError(errc::invalid_argument,
360 "invalid section address for: '%s'",
361 Scope->getName().str().c_str());
362
363 // Get section that contains the code for this function.
364 Iter = SectionAddresses.lower_bound(Address);
365 if (Iter != SectionAddresses.begin())
366 --Iter;
367 return std::make_pair(Iter->first, Iter->second);
368 }
369
addSectionRange(LVSectionIndex SectionIndex,LVScope * Scope)370 void LVBinaryReader::addSectionRange(LVSectionIndex SectionIndex,
371 LVScope *Scope) {
372 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
373 ScopesWithRanges->addEntry(Scope);
374 }
375
addSectionRange(LVSectionIndex SectionIndex,LVScope * Scope,LVAddress LowerAddress,LVAddress UpperAddress)376 void LVBinaryReader::addSectionRange(LVSectionIndex SectionIndex,
377 LVScope *Scope, LVAddress LowerAddress,
378 LVAddress UpperAddress) {
379 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
380 ScopesWithRanges->addEntry(Scope, LowerAddress, UpperAddress);
381 }
382
getSectionRanges(LVSectionIndex SectionIndex)383 LVRange *LVBinaryReader::getSectionRanges(LVSectionIndex SectionIndex) {
384 // Check if we already have a mapping for this section index.
385 LVSectionRanges::iterator IterSection = SectionRanges.find(SectionIndex);
386 if (IterSection == SectionRanges.end())
387 IterSection =
388 SectionRanges.emplace(SectionIndex, std::make_unique<LVRange>()).first;
389 LVRange *Range = IterSection->second.get();
390 assert(Range && "Range is null.");
391 return Range;
392 }
393
createInstructions(LVScope * Scope,LVSectionIndex SectionIndex,const LVNameInfo & NameInfo)394 Error LVBinaryReader::createInstructions(LVScope *Scope,
395 LVSectionIndex SectionIndex,
396 const LVNameInfo &NameInfo) {
397 assert(Scope && "Scope is null.");
398
399 // Skip stripped functions.
400 if (Scope->getIsDiscarded())
401 return Error::success();
402
403 // Find associated address and size for the given function entry point.
404 LVAddress Address = NameInfo.first;
405 uint64_t Size = NameInfo.second;
406
407 LLVM_DEBUG({
408 dbgs() << "\nPublic Name instructions: '" << Scope->getName() << "' / '"
409 << Scope->getLinkageName() << "'\n"
410 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: ["
411 << hexValue(Address) << ":" << hexValue(Address + Size) << "]\n";
412 });
413
414 Expected<std::pair<uint64_t, const object::SectionRef>> SectionOrErr =
415 getSection(Scope, Address, SectionIndex);
416 if (!SectionOrErr)
417 return SectionOrErr.takeError();
418 const object::SectionRef Section = (*SectionOrErr).second;
419 uint64_t SectionAddress = (*SectionOrErr).first;
420
421 Expected<StringRef> SectionContentsOrErr = Section.getContents();
422 if (!SectionContentsOrErr)
423 return SectionOrErr.takeError();
424
425 // There are cases where the section size is smaller than the [LowPC,HighPC]
426 // range; it causes us to decode invalid addresses. The recorded size in the
427 // logical scope is one less than the real size.
428 LLVM_DEBUG({
429 dbgs() << " Size: " << hexValue(Size)
430 << ", Section Size: " << hexValue(Section.getSize()) << "\n";
431 });
432 Size = std::min(Size + 1, Section.getSize());
433
434 ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*SectionContentsOrErr);
435 uint64_t Offset = Address - SectionAddress;
436 uint8_t const *Begin = Bytes.data() + Offset;
437 uint8_t const *End = Bytes.data() + Offset + Size;
438
439 LLVM_DEBUG({
440 Expected<StringRef> SectionNameOrErr = Section.getName();
441 if (!SectionNameOrErr)
442 consumeError(SectionNameOrErr.takeError());
443 else
444 dbgs() << "Section Index: " << hexValue(Section.getIndex()) << " ["
445 << hexValue((uint64_t)Section.getAddress()) << ":"
446 << hexValue((uint64_t)Section.getAddress() + Section.getSize(), 10)
447 << "] Name: '" << *SectionNameOrErr << "'\n"
448 << "Begin: " << hexValue((uint64_t)Begin)
449 << ", End: " << hexValue((uint64_t)End) << "\n";
450 });
451
452 // Address for first instruction line.
453 LVAddress FirstAddress = Address;
454 auto InstructionsSP = std::make_unique<LVLines>();
455 LVLines &Instructions = *InstructionsSP;
456 DiscoveredLines.emplace_back(std::move(InstructionsSP));
457
458 while (Begin < End) {
459 MCInst Instruction;
460 uint64_t BytesConsumed = 0;
461 SmallVector<char, 64> InsnStr;
462 raw_svector_ostream Annotations(InsnStr);
463 MCDisassembler::DecodeStatus const S =
464 MD->getInstruction(Instruction, BytesConsumed,
465 ArrayRef<uint8_t>(Begin, End), Address, outs());
466 switch (S) {
467 case MCDisassembler::Fail:
468 LLVM_DEBUG({ dbgs() << "Invalid instruction\n"; });
469 if (BytesConsumed == 0)
470 // Skip invalid bytes
471 BytesConsumed = 1;
472 break;
473 case MCDisassembler::SoftFail:
474 LLVM_DEBUG({ dbgs() << "Potentially undefined instruction:"; });
475 [[fallthrough]];
476 case MCDisassembler::Success: {
477 std::string Buffer;
478 raw_string_ostream Stream(Buffer);
479 StringRef AnnotationsStr = Annotations.str();
480 MIP->printInst(&Instruction, Address, AnnotationsStr, *STI, Stream);
481 LLVM_DEBUG({
482 std::string BufferCodes;
483 raw_string_ostream StreamCodes(BufferCodes);
484 StreamCodes << format_bytes(
485 ArrayRef<uint8_t>(Begin, Begin + BytesConsumed), std::nullopt, 16,
486 16);
487 dbgs() << "[" << hexValue((uint64_t)Begin) << "] "
488 << "Size: " << format_decimal(BytesConsumed, 2) << " ("
489 << formatv("{0}",
490 fmt_align(StreamCodes.str(), AlignStyle::Left, 32))
491 << ") " << hexValue((uint64_t)Address) << ": " << Stream.str()
492 << "\n";
493 });
494 // Here we add logical lines to the Instructions. Later on,
495 // the 'processLines()' function will move each created logical line
496 // to its enclosing logical scope, using the debug ranges information
497 // and they will be released when its scope parent is deleted.
498 LVLineAssembler *Line = createLineAssembler();
499 Line->setAddress(Address);
500 Line->setName(StringRef(Stream.str()).trim());
501 Instructions.push_back(Line);
502 break;
503 }
504 }
505 Address += BytesConsumed;
506 Begin += BytesConsumed;
507 }
508
509 LLVM_DEBUG({
510 size_t Index = 0;
511 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
512 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
513 << "Address: " << hexValue(FirstAddress)
514 << format(" - Collected instructions lines: %d\n",
515 Instructions.size());
516 for (const LVLine *Line : Instructions)
517 dbgs() << format_decimal(++Index, 5) << ": "
518 << hexValue(Line->getOffset()) << ", (" << Line->getName()
519 << ")\n";
520 });
521
522 // The scope in the assembler names is linked to its own instructions.
523 ScopeInstructions.add(SectionIndex, Scope, &Instructions);
524 AssemblerMappings.add(SectionIndex, FirstAddress, Scope);
525
526 return Error::success();
527 }
528
createInstructions(LVScope * Function,LVSectionIndex SectionIndex)529 Error LVBinaryReader::createInstructions(LVScope *Function,
530 LVSectionIndex SectionIndex) {
531 if (!options().getPrintInstructions())
532 return Error::success();
533
534 LVNameInfo Name = CompileUnit->findPublicName(Function);
535 if (Name.first != LVAddress(UINT64_MAX))
536 return createInstructions(Function, SectionIndex, Name);
537
538 return Error::success();
539 }
540
createInstructions()541 Error LVBinaryReader::createInstructions() {
542 if (!options().getPrintInstructions())
543 return Error::success();
544
545 LLVM_DEBUG({
546 size_t Index = 1;
547 dbgs() << "\nPublic Names (Scope):\n";
548 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) {
549 LVScope *Scope = Name.first;
550 const LVNameInfo &NameInfo = Name.second;
551 LVAddress Address = NameInfo.first;
552 uint64_t Size = NameInfo.second;
553 dbgs() << format_decimal(Index++, 5) << ": "
554 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: ["
555 << hexValue(Address) << ":" << hexValue(Address + Size) << "] "
556 << "Name: '" << Scope->getName() << "' / '"
557 << Scope->getLinkageName() << "'\n";
558 }
559 });
560
561 // For each public name in the current compile unit, create the line
562 // records that represent the executable instructions.
563 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) {
564 LVScope *Scope = Name.first;
565 // The symbol table extracted from the object file always contains a
566 // non-empty name (linkage name). However, the logical scope does not
567 // guarantee to have a name for the linkage name (main is one case).
568 // For those cases, set the linkage name the same as the name.
569 if (!Scope->getLinkageNameIndex())
570 Scope->setLinkageName(Scope->getName());
571 LVSectionIndex SectionIndex = getSymbolTableIndex(Scope->getLinkageName());
572 if (Error Err = createInstructions(Scope, SectionIndex, Name.second))
573 return Err;
574 }
575
576 return Error::success();
577 }
578
579 // During the traversal of the debug information sections, we created the
580 // logical lines representing the disassembled instructions from the text
581 // section and the logical lines representing the line records from the
582 // debug line section. Using the ranges associated with the logical scopes,
583 // we will allocate those logical lines to their logical scopes.
processLines(LVLines * DebugLines,LVSectionIndex SectionIndex,LVScope * Function)584 void LVBinaryReader::processLines(LVLines *DebugLines,
585 LVSectionIndex SectionIndex,
586 LVScope *Function) {
587 assert(DebugLines && "DebugLines is null.");
588
589 // Just return if this compilation unit does not have any line records
590 // and no instruction lines were created.
591 if (DebugLines->empty() && !options().getPrintInstructions())
592 return;
593
594 // Merge the debug lines and instruction lines using their text address;
595 // the logical line representing the debug line record is followed by the
596 // line(s) representing the disassembled instructions, whose addresses are
597 // equal or greater that the line address and less than the address of the
598 // next debug line record.
599 LLVM_DEBUG({
600 size_t Index = 1;
601 size_t PerLine = 4;
602 dbgs() << format("\nProcess debug lines: %d\n", DebugLines->size());
603 for (const LVLine *Line : *DebugLines) {
604 dbgs() << format_decimal(Index, 5) << ": " << hexValue(Line->getOffset())
605 << ", (" << Line->getLineNumber() << ")"
606 << ((Index % PerLine) ? " " : "\n");
607 ++Index;
608 }
609 dbgs() << ((Index % PerLine) ? "\n" : "");
610 });
611
612 bool TraverseLines = true;
613 LVLines::iterator Iter = DebugLines->begin();
614 while (TraverseLines && Iter != DebugLines->end()) {
615 uint64_t DebugAddress = (*Iter)->getAddress();
616
617 // Get the function with an entry point that matches this line and
618 // its associated assembler entries. In the case of COMDAT, the input
619 // 'Function' is not null. Use it to find its address ranges.
620 LVScope *Scope = Function;
621 if (!Function) {
622 Scope = AssemblerMappings.find(SectionIndex, DebugAddress);
623 if (!Scope) {
624 ++Iter;
625 continue;
626 }
627 }
628
629 // Get the associated instructions for the found 'Scope'.
630 LVLines InstructionLines;
631 LVLines *Lines = ScopeInstructions.find(SectionIndex, Scope);
632 if (Lines)
633 InstructionLines = std::move(*Lines);
634
635 LLVM_DEBUG({
636 size_t Index = 0;
637 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
638 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
639 << format("Process instruction lines: %d\n",
640 InstructionLines.size());
641 for (const LVLine *Line : InstructionLines)
642 dbgs() << format_decimal(++Index, 5) << ": "
643 << hexValue(Line->getOffset()) << ", (" << Line->getName()
644 << ")\n";
645 });
646
647 // Continue with next debug line if there are not instructions lines.
648 if (InstructionLines.empty()) {
649 ++Iter;
650 continue;
651 }
652
653 for (LVLine *InstructionLine : InstructionLines) {
654 uint64_t InstructionAddress = InstructionLine->getAddress();
655 LLVM_DEBUG({
656 dbgs() << "Instruction address: " << hexValue(InstructionAddress)
657 << "\n";
658 });
659 if (TraverseLines) {
660 while (Iter != DebugLines->end()) {
661 DebugAddress = (*Iter)->getAddress();
662 LLVM_DEBUG({
663 bool IsDebug = (*Iter)->getIsLineDebug();
664 dbgs() << "Line " << (IsDebug ? "dbg:" : "ins:") << " ["
665 << hexValue(DebugAddress) << "]";
666 if (IsDebug)
667 dbgs() << format(" %d", (*Iter)->getLineNumber());
668 dbgs() << "\n";
669 });
670 // Instruction address before debug line.
671 if (InstructionAddress < DebugAddress) {
672 LLVM_DEBUG({
673 dbgs() << "Inserted instruction address: "
674 << hexValue(InstructionAddress) << " before line: "
675 << format("%d", (*Iter)->getLineNumber()) << " ["
676 << hexValue(DebugAddress) << "]\n";
677 });
678 Iter = DebugLines->insert(Iter, InstructionLine);
679 // The returned iterator points to the inserted instruction.
680 // Skip it and point to the line acting as reference.
681 ++Iter;
682 break;
683 }
684 ++Iter;
685 }
686 if (Iter == DebugLines->end()) {
687 // We have reached the end of the source lines and the current
688 // instruction line address is greater than the last source line.
689 TraverseLines = false;
690 DebugLines->push_back(InstructionLine);
691 }
692 } else {
693 DebugLines->push_back(InstructionLine);
694 }
695 }
696 }
697
698 LLVM_DEBUG({
699 dbgs() << format("Lines after merge: %d\n", DebugLines->size());
700 size_t Index = 0;
701 for (const LVLine *Line : *DebugLines) {
702 dbgs() << format_decimal(++Index, 5) << ": "
703 << hexValue(Line->getOffset()) << ", ("
704 << ((Line->getIsLineDebug())
705 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true)
706 : Line->getName())
707 << ")\n";
708 }
709 });
710
711 // If this compilation unit does not have line records, traverse its scopes
712 // and take any collected instruction lines as the working set in order
713 // to move them to their associated scope.
714 if (DebugLines->empty()) {
715 if (const LVScopes *Scopes = CompileUnit->getScopes())
716 for (LVScope *Scope : *Scopes) {
717 LVLines *Lines = ScopeInstructions.find(Scope);
718 if (Lines) {
719
720 LLVM_DEBUG({
721 size_t Index = 0;
722 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
723 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
724 << format("Instruction lines: %d\n", Lines->size());
725 for (const LVLine *Line : *Lines)
726 dbgs() << format_decimal(++Index, 5) << ": "
727 << hexValue(Line->getOffset()) << ", (" << Line->getName()
728 << ")\n";
729 });
730
731 if (Scope->getIsArtificial()) {
732 // Add the instruction lines to their artificial scope.
733 for (LVLine *Line : *Lines)
734 Scope->addElement(Line);
735 } else {
736 DebugLines->append(*Lines);
737 }
738 Lines->clear();
739 }
740 }
741 }
742
743 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
744 ScopesWithRanges->startSearch();
745
746 // Process collected lines.
747 LVScope *Scope;
748 for (LVLine *Line : *DebugLines) {
749 // Using the current line address, get its associated lexical scope and
750 // add the line information to it.
751 Scope = ScopesWithRanges->getEntry(Line->getAddress());
752 if (!Scope) {
753 // If missing scope, use the compile unit.
754 Scope = CompileUnit;
755 LLVM_DEBUG({
756 dbgs() << "Adding line to CU: " << hexValue(Line->getOffset()) << ", ("
757 << ((Line->getIsLineDebug())
758 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true)
759 : Line->getName())
760 << ")\n";
761 });
762 }
763
764 // Add line object to scope.
765 Scope->addElement(Line);
766
767 // Report any line zero.
768 if (options().getWarningLines() && Line->getIsLineDebug() &&
769 !Line->getLineNumber())
770 CompileUnit->addLineZero(Line);
771
772 // Some compilers generate ranges in the compile unit; other compilers
773 // only DW_AT_low_pc/DW_AT_high_pc. In order to correctly map global
774 // variables, we need to generate the map ranges for the compile unit.
775 // If we use the ranges stored at the scope level, there are cases where
776 // the address referenced by a symbol location, is not in the enclosing
777 // scope, but in an outer one. By using the ranges stored in the compile
778 // unit, we can catch all those addresses.
779 if (Line->getIsLineDebug())
780 CompileUnit->addMapping(Line, SectionIndex);
781
782 // Resolve any given pattern.
783 patterns().resolvePatternMatch(Line);
784 }
785
786 ScopesWithRanges->endSearch();
787 }
788
processLines(LVLines * DebugLines,LVSectionIndex SectionIndex)789 void LVBinaryReader::processLines(LVLines *DebugLines,
790 LVSectionIndex SectionIndex) {
791 assert(DebugLines && "DebugLines is null.");
792 if (DebugLines->empty() && !ScopeInstructions.findMap(SectionIndex))
793 return;
794
795 // If the Compile Unit does not contain comdat functions, use the whole
796 // set of debug lines, as the addresses don't have conflicts.
797 if (!CompileUnit->getHasComdatScopes()) {
798 processLines(DebugLines, SectionIndex, nullptr);
799 return;
800 }
801
802 // Find the indexes for the lines whose address is zero.
803 std::vector<size_t> AddressZero;
804 LVLines::iterator It =
805 std::find_if(std::begin(*DebugLines), std::end(*DebugLines),
806 [](LVLine *Line) { return !Line->getAddress(); });
807 while (It != std::end(*DebugLines)) {
808 AddressZero.emplace_back(std::distance(std::begin(*DebugLines), It));
809 It = std::find_if(std::next(It), std::end(*DebugLines),
810 [](LVLine *Line) { return !Line->getAddress(); });
811 }
812
813 // If the set of debug lines does not contain any line with address zero,
814 // use the whole set. It means we are dealing with an initialization
815 // section from a fully linked binary.
816 if (AddressZero.empty()) {
817 processLines(DebugLines, SectionIndex, nullptr);
818 return;
819 }
820
821 // The Compile unit contains comdat functions. Traverse the collected
822 // debug lines and identify logical groups based on their start and
823 // address. Each group starts with a zero address.
824 // Begin, End, Address, IsDone.
825 using LVBucket = std::tuple<size_t, size_t, LVAddress, bool>;
826 std::vector<LVBucket> Buckets;
827
828 LVAddress Address;
829 size_t Begin = 0;
830 size_t End = 0;
831 size_t Index = 0;
832 for (Index = 0; Index < AddressZero.size() - 1; ++Index) {
833 Begin = AddressZero[Index];
834 End = AddressZero[Index + 1] - 1;
835 Address = (*DebugLines)[End]->getAddress();
836 Buckets.emplace_back(Begin, End, Address, false);
837 }
838
839 // Add the last bucket.
840 if (Index) {
841 Begin = AddressZero[Index];
842 End = DebugLines->size() - 1;
843 Address = (*DebugLines)[End]->getAddress();
844 Buckets.emplace_back(Begin, End, Address, false);
845 }
846
847 LLVM_DEBUG({
848 dbgs() << "\nDebug Lines buckets: " << Buckets.size() << "\n";
849 for (LVBucket &Bucket : Buckets) {
850 dbgs() << "Begin: " << format_decimal(std::get<0>(Bucket), 5) << ", "
851 << "End: " << format_decimal(std::get<1>(Bucket), 5) << ", "
852 << "Address: " << hexValue(std::get<2>(Bucket)) << "\n";
853 }
854 });
855
856 // Traverse the sections and buckets looking for matches on the section
857 // sizes. In the unlikely event of different buckets with the same size
858 // process them in order and mark them as done.
859 LVLines Group;
860 for (LVSections::reference Entry : Sections) {
861 LVSectionIndex SectionIndex = Entry.first;
862 const object::SectionRef Section = Entry.second;
863 uint64_t Size = Section.getSize();
864 LLVM_DEBUG({
865 dbgs() << "\nSection Index: " << format_decimal(SectionIndex, 3)
866 << " , Section Size: " << hexValue(Section.getSize())
867 << " , Section Address: " << hexValue(Section.getAddress())
868 << "\n";
869 });
870
871 for (LVBucket &Bucket : Buckets) {
872 if (std::get<3>(Bucket))
873 // Already done for previous section.
874 continue;
875 if (Size == std::get<2>(Bucket)) {
876 // We have a match on the section size.
877 Group.clear();
878 LVLines::iterator IterStart = DebugLines->begin() + std::get<0>(Bucket);
879 LVLines::iterator IterEnd =
880 DebugLines->begin() + std::get<1>(Bucket) + 1;
881 for (LVLines::iterator Iter = IterStart; Iter < IterEnd; ++Iter)
882 Group.push_back(*Iter);
883 processLines(&Group, SectionIndex, /*Function=*/nullptr);
884 std::get<3>(Bucket) = true;
885 break;
886 }
887 }
888 }
889 }
890
891 // Traverse the scopes for the given 'Function' looking for any inlined
892 // scopes with inlined lines, which are found in 'CUInlineeLines'.
includeInlineeLines(LVSectionIndex SectionIndex,LVScope * Function)893 void LVBinaryReader::includeInlineeLines(LVSectionIndex SectionIndex,
894 LVScope *Function) {
895 SmallVector<LVInlineeLine::iterator> InlineeIters;
896 std::function<void(LVScope * Parent)> FindInlinedScopes =
897 [&](LVScope *Parent) {
898 if (const LVScopes *Scopes = Parent->getScopes())
899 for (LVScope *Scope : *Scopes) {
900 LVInlineeLine::iterator Iter = CUInlineeLines.find(Scope);
901 if (Iter != CUInlineeLines.end())
902 InlineeIters.push_back(Iter);
903 FindInlinedScopes(Scope);
904 }
905 };
906
907 // Find all inlined scopes for the given 'Function'.
908 FindInlinedScopes(Function);
909 for (LVInlineeLine::iterator InlineeIter : InlineeIters) {
910 LVScope *Scope = InlineeIter->first;
911 addToSymbolTable(Scope->getLinkageName(), Scope, SectionIndex);
912
913 // TODO: Convert this into a reference.
914 LVLines *InlineeLines = InlineeIter->second.get();
915 LLVM_DEBUG({
916 dbgs() << "Inlined lines for: " << Scope->getName() << "\n";
917 for (const LVLine *Line : *InlineeLines)
918 dbgs() << "[" << hexValue(Line->getAddress()) << "] "
919 << Line->getLineNumber() << "\n";
920 dbgs() << format("Debug lines: %d\n", CULines.size());
921 for (const LVLine *Line : CULines)
922 dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", ("
923 << Line->getLineNumber() << ")\n";
924 ;
925 });
926
927 // The inlined lines must be merged using its address, in order to keep
928 // the real order of the instructions. The inlined lines are mixed with
929 // the other non-inlined lines.
930 if (InlineeLines->size()) {
931 // First address of inlinee code.
932 uint64_t InlineeStart = (InlineeLines->front())->getAddress();
933 LVLines::iterator Iter = std::find_if(
934 CULines.begin(), CULines.end(), [&](LVLine *Item) -> bool {
935 return Item->getAddress() == InlineeStart;
936 });
937 if (Iter != CULines.end()) {
938 // 'Iter' points to the line where the inlined function is called.
939 // Emulate the DW_AT_call_line attribute.
940 Scope->setCallLineNumber((*Iter)->getLineNumber());
941 // Mark the referenced line as the start of the inlined function.
942 // Skip the first line during the insertion, as the address and
943 // line number as the same. Otherwise we have to erase and insert.
944 (*Iter)->setLineNumber((*InlineeLines->begin())->getLineNumber());
945 ++Iter;
946 CULines.insert(Iter, InlineeLines->begin() + 1, InlineeLines->end());
947 }
948 }
949
950 // Remove this set of lines from the container; each inlined function
951 // creates an unique set of lines. Remove only the created container.
952 CUInlineeLines.erase(InlineeIter);
953 InlineeLines->clear();
954 }
955 LLVM_DEBUG({
956 dbgs() << "Merged Inlined lines for: " << Function->getName() << "\n";
957 dbgs() << format("Debug lines: %d\n", CULines.size());
958 for (const LVLine *Line : CULines)
959 dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", ("
960 << Line->getLineNumber() << ")\n";
961 ;
962 });
963 }
964
print(raw_ostream & OS) const965 void LVBinaryReader::print(raw_ostream &OS) const {
966 OS << "LVBinaryReader\n";
967 LLVM_DEBUG(dbgs() << "PrintReader\n");
968 }
969