1 //===-- LVBinaryReader.cpp ------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This implements the LVBinaryReader class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h" 14 #include "llvm/Support/Errc.h" 15 #include "llvm/Support/FormatAdapters.h" 16 #include "llvm/Support/FormatVariadic.h" 17 18 using namespace llvm; 19 using namespace llvm::logicalview; 20 21 #define DEBUG_TYPE "BinaryReader" 22 23 // Function names extracted from the object symbol table. 24 void LVSymbolTable::add(StringRef Name, LVScope *Function, 25 LVSectionIndex SectionIndex) { 26 std::string SymbolName(Name); 27 if (SymbolNames.find(SymbolName) == SymbolNames.end()) { 28 SymbolNames.emplace( 29 std::piecewise_construct, std::forward_as_tuple(SymbolName), 30 std::forward_as_tuple(Function, 0, SectionIndex, false)); 31 } else { 32 // Update a recorded entry with its logical scope and section index. 33 SymbolNames[SymbolName].Scope = Function; 34 if (SectionIndex) 35 SymbolNames[SymbolName].SectionIndex = SectionIndex; 36 } 37 38 if (Function && SymbolNames[SymbolName].IsComdat) 39 Function->setIsComdat(); 40 41 LLVM_DEBUG({ print(dbgs()); }); 42 } 43 44 void LVSymbolTable::add(StringRef Name, LVAddress Address, 45 LVSectionIndex SectionIndex, bool IsComdat) { 46 std::string SymbolName(Name); 47 if (SymbolNames.find(SymbolName) == SymbolNames.end()) 48 SymbolNames.emplace( 49 std::piecewise_construct, std::forward_as_tuple(SymbolName), 50 std::forward_as_tuple(nullptr, Address, SectionIndex, IsComdat)); 51 else 52 // Update a recorded symbol name with its logical scope. 53 SymbolNames[SymbolName].Address = Address; 54 55 LVScope *Function = SymbolNames[SymbolName].Scope; 56 if (Function && IsComdat) 57 Function->setIsComdat(); 58 LLVM_DEBUG({ print(dbgs()); }); 59 } 60 61 LVSectionIndex LVSymbolTable::update(LVScope *Function) { 62 LVSectionIndex SectionIndex = getReader().getDotTextSectionIndex(); 63 StringRef Name = Function->getLinkageName(); 64 if (Name.empty()) 65 Name = Function->getName(); 66 std::string SymbolName(Name); 67 68 if (SymbolName.empty() || (SymbolNames.find(SymbolName) == SymbolNames.end())) 69 return SectionIndex; 70 71 // Update a recorded entry with its logical scope, only if the scope has 72 // ranges. That is the case when in DWARF there are 2 DIEs connected via 73 // the DW_AT_specification. 74 if (Function->getHasRanges()) { 75 SymbolNames[SymbolName].Scope = Function; 76 SectionIndex = SymbolNames[SymbolName].SectionIndex; 77 } else { 78 SectionIndex = UndefinedSectionIndex; 79 } 80 81 if (SymbolNames[SymbolName].IsComdat) 82 Function->setIsComdat(); 83 84 LLVM_DEBUG({ print(dbgs()); }); 85 return SectionIndex; 86 } 87 88 const LVSymbolTableEntry &LVSymbolTable::getEntry(StringRef Name) { 89 static LVSymbolTableEntry Empty = LVSymbolTableEntry(); 90 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name)); 91 return Iter != SymbolNames.end() ? Iter->second : Empty; 92 } 93 LVAddress LVSymbolTable::getAddress(StringRef Name) { 94 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name)); 95 return Iter != SymbolNames.end() ? Iter->second.Address : 0; 96 } 97 LVSectionIndex LVSymbolTable::getIndex(StringRef Name) { 98 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name)); 99 return Iter != SymbolNames.end() ? Iter->second.SectionIndex 100 : getReader().getDotTextSectionIndex(); 101 } 102 bool LVSymbolTable::getIsComdat(StringRef Name) { 103 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name)); 104 return Iter != SymbolNames.end() ? Iter->second.IsComdat : false; 105 } 106 107 void LVSymbolTable::print(raw_ostream &OS) { 108 OS << "Symbol Table\n"; 109 for (LVSymbolNames::reference Entry : SymbolNames) { 110 LVSymbolTableEntry &SymbolName = Entry.second; 111 LVScope *Scope = SymbolName.Scope; 112 LVOffset Offset = Scope ? Scope->getOffset() : 0; 113 OS << "Index: " << hexValue(SymbolName.SectionIndex, 5) 114 << " Comdat: " << (SymbolName.IsComdat ? "Y" : "N") 115 << " Scope: " << hexValue(Offset) 116 << " Address: " << hexValue(SymbolName.Address) 117 << " Name: " << Entry.first << "\n"; 118 } 119 } 120 121 void LVBinaryReader::addToSymbolTable(StringRef Name, LVScope *Function, 122 LVSectionIndex SectionIndex) { 123 SymbolTable.add(Name, Function, SectionIndex); 124 } 125 void LVBinaryReader::addToSymbolTable(StringRef Name, LVAddress Address, 126 LVSectionIndex SectionIndex, 127 bool IsComdat) { 128 SymbolTable.add(Name, Address, SectionIndex, IsComdat); 129 } 130 LVSectionIndex LVBinaryReader::updateSymbolTable(LVScope *Function) { 131 return SymbolTable.update(Function); 132 } 133 134 const LVSymbolTableEntry &LVBinaryReader::getSymbolTableEntry(StringRef Name) { 135 return SymbolTable.getEntry(Name); 136 } 137 LVAddress LVBinaryReader::getSymbolTableAddress(StringRef Name) { 138 return SymbolTable.getAddress(Name); 139 } 140 LVSectionIndex LVBinaryReader::getSymbolTableIndex(StringRef Name) { 141 return SymbolTable.getIndex(Name); 142 } 143 bool LVBinaryReader::getSymbolTableIsComdat(StringRef Name) { 144 return SymbolTable.getIsComdat(Name); 145 } 146 147 void LVBinaryReader::mapVirtualAddress(const object::ObjectFile &Obj) { 148 for (const object::SectionRef &Section : Obj.sections()) { 149 if (!Section.isText() || Section.isVirtual() || !Section.getSize()) 150 continue; 151 152 // Record section information required for symbol resolution. 153 // Note: The section index returned by 'getIndex()' is one based. 154 Sections.emplace(Section.getIndex(), Section); 155 addSectionAddress(Section); 156 157 // Identify the ".text" section. 158 Expected<StringRef> SectionNameOrErr = Section.getName(); 159 if (!SectionNameOrErr) { 160 consumeError(SectionNameOrErr.takeError()); 161 continue; 162 } 163 if ((*SectionNameOrErr).equals(".text") || 164 (*SectionNameOrErr).equals(".code")) 165 DotTextSectionIndex = Section.getIndex(); 166 } 167 168 // Process the symbol table. 169 mapRangeAddress(Obj); 170 171 LLVM_DEBUG({ 172 dbgs() << "\nSections Information:\n"; 173 for (LVSections::reference Entry : Sections) { 174 LVSectionIndex SectionIndex = Entry.first; 175 const object::SectionRef Section = Entry.second; 176 Expected<StringRef> SectionNameOrErr = Section.getName(); 177 if (!SectionNameOrErr) 178 consumeError(SectionNameOrErr.takeError()); 179 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3) 180 << " Name: " << *SectionNameOrErr << "\n" 181 << "Size: " << hexValue(Section.getSize()) << "\n" 182 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n" 183 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n"; 184 } 185 dbgs() << "\nObject Section Information:\n"; 186 for (LVSectionAddresses::const_reference Entry : SectionAddresses) 187 dbgs() << "[" << hexValue(Entry.first) << ":" 188 << hexValue(Entry.first + Entry.second.getSize()) 189 << "] Size: " << hexValue(Entry.second.getSize()) << "\n"; 190 }); 191 } 192 193 Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple, 194 StringRef TheFeatures) { 195 std::string TargetLookupError; 196 const Target *TheTarget = 197 TargetRegistry::lookupTarget(std::string(TheTriple), TargetLookupError); 198 if (!TheTarget) 199 return createStringError(errc::invalid_argument, TargetLookupError.c_str()); 200 201 // Register information. 202 MCRegisterInfo *RegisterInfo = TheTarget->createMCRegInfo(TheTriple); 203 if (!RegisterInfo) 204 return createStringError(errc::invalid_argument, 205 "no register info for target " + TheTriple); 206 MRI.reset(RegisterInfo); 207 208 // Assembler properties and features. 209 MCTargetOptions MCOptions; 210 MCAsmInfo *AsmInfo(TheTarget->createMCAsmInfo(*MRI, TheTriple, MCOptions)); 211 if (!AsmInfo) 212 return createStringError(errc::invalid_argument, 213 "no assembly info for target " + TheTriple); 214 MAI.reset(AsmInfo); 215 216 // Target subtargets. 217 StringRef CPU; 218 MCSubtargetInfo *SubtargetInfo( 219 TheTarget->createMCSubtargetInfo(TheTriple, CPU, TheFeatures)); 220 if (!SubtargetInfo) 221 return createStringError(errc::invalid_argument, 222 "no subtarget info for target " + TheTriple); 223 STI.reset(SubtargetInfo); 224 225 // Instructions Info. 226 MCInstrInfo *InstructionInfo(TheTarget->createMCInstrInfo()); 227 if (!InstructionInfo) 228 return createStringError(errc::invalid_argument, 229 "no instruction info for target " + TheTriple); 230 MII.reset(InstructionInfo); 231 232 MC = std::make_unique<MCContext>(Triple(TheTriple), MAI.get(), MRI.get(), 233 STI.get()); 234 235 // Assembler. 236 MCDisassembler *DisAsm(TheTarget->createMCDisassembler(*STI, *MC)); 237 if (!DisAsm) 238 return createStringError(errc::invalid_argument, 239 "no disassembler for target " + TheTriple); 240 MD.reset(DisAsm); 241 242 MCInstPrinter *InstructionPrinter(TheTarget->createMCInstPrinter( 243 Triple(TheTriple), AsmInfo->getAssemblerDialect(), *MAI, *MII, *MRI)); 244 if (!InstructionPrinter) 245 return createStringError(errc::invalid_argument, 246 "no target assembly language printer for target " + 247 TheTriple); 248 MIP.reset(InstructionPrinter); 249 InstructionPrinter->setPrintImmHex(true); 250 251 return Error::success(); 252 } 253 254 Expected<std::pair<uint64_t, object::SectionRef>> 255 LVBinaryReader::getSection(LVScope *Scope, LVAddress Address, 256 LVSectionIndex SectionIndex) { 257 // Return the 'text' section with the code for this logical scope. 258 // COFF: SectionIndex is zero. Use 'SectionAddresses' data. 259 // ELF: SectionIndex is the section index in the file. 260 if (SectionIndex) { 261 LVSections::iterator Iter = Sections.find(SectionIndex); 262 if (Iter == Sections.end()) { 263 return createStringError(errc::invalid_argument, 264 "invalid section index for: '%s'", 265 Scope->getName().str().c_str()); 266 } 267 const object::SectionRef Section = Iter->second; 268 return std::make_pair(Section.getAddress(), Section); 269 } 270 271 // Ensure a valid starting address for the public names. 272 LVSectionAddresses::const_iterator Iter = 273 SectionAddresses.upper_bound(Address); 274 if (Iter == SectionAddresses.begin()) 275 return createStringError(errc::invalid_argument, 276 "invalid section address for: '%s'", 277 Scope->getName().str().c_str()); 278 279 // Get section that contains the code for this function. 280 Iter = SectionAddresses.lower_bound(Address); 281 if (Iter != SectionAddresses.begin()) 282 --Iter; 283 return std::make_pair(Iter->first, Iter->second); 284 } 285 286 void LVBinaryReader::addSectionRange(LVSectionIndex SectionIndex, 287 LVScope *Scope) { 288 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex); 289 ScopesWithRanges->addEntry(Scope); 290 } 291 292 void LVBinaryReader::addSectionRange(LVSectionIndex SectionIndex, 293 LVScope *Scope, LVAddress LowerAddress, 294 LVAddress UpperAddress) { 295 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex); 296 ScopesWithRanges->addEntry(Scope, LowerAddress, UpperAddress); 297 } 298 299 LVRange *LVBinaryReader::getSectionRanges(LVSectionIndex SectionIndex) { 300 LVRange *Range = nullptr; 301 // Check if we already have a mapping for this section index. 302 LVSectionRanges::iterator IterSection = SectionRanges.find(SectionIndex); 303 if (IterSection == SectionRanges.end()) { 304 Range = new LVRange(); 305 SectionRanges.emplace(SectionIndex, Range); 306 } else { 307 Range = IterSection->second; 308 } 309 assert(Range && "Range is null."); 310 return Range; 311 } 312 313 LVBinaryReader::~LVBinaryReader() { 314 // Delete the lines created by 'createInstructions'. 315 std::vector<LVLines *> AllInstructionLines = ScopeInstructions.find(); 316 for (LVLines *Entry : AllInstructionLines) 317 delete Entry; 318 // Delete the ranges created by 'getSectionRanges'. 319 for (LVSectionRanges::reference Entry : SectionRanges) 320 delete Entry.second; 321 } 322 323 Error LVBinaryReader::createInstructions(LVScope *Scope, 324 LVSectionIndex SectionIndex, 325 const LVNameInfo &NameInfo) { 326 assert(Scope && "Scope is null."); 327 328 // Skip stripped functions. 329 if (Scope->getIsDiscarded()) 330 return Error::success(); 331 332 // Find associated address and size for the given function entry point. 333 LVAddress Address = NameInfo.first; 334 uint64_t Size = NameInfo.second; 335 336 LLVM_DEBUG({ 337 dbgs() << "\nPublic Name instructions: '" << Scope->getName() << "' / '" 338 << Scope->getLinkageName() << "'\n" 339 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: [" 340 << hexValue(Address) << ":" << hexValue(Address + Size) << "]\n"; 341 }); 342 343 Expected<std::pair<uint64_t, const object::SectionRef>> SectionOrErr = 344 getSection(Scope, Address, SectionIndex); 345 if (!SectionOrErr) 346 return SectionOrErr.takeError(); 347 const object::SectionRef Section = (*SectionOrErr).second; 348 uint64_t SectionAddress = (*SectionOrErr).first; 349 350 Expected<StringRef> SectionContentsOrErr = Section.getContents(); 351 if (!SectionContentsOrErr) 352 return SectionOrErr.takeError(); 353 354 // There are cases where the section size is smaller than the [LowPC,HighPC] 355 // range; it causes us to decode invalid addresses. The recorded size in the 356 // logical scope is one less than the real size. 357 LLVM_DEBUG({ 358 dbgs() << " Size: " << hexValue(Size) 359 << ", Section Size: " << hexValue(Section.getSize()) << "\n"; 360 }); 361 Size = std::min(Size + 1, Section.getSize()); 362 363 ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*SectionContentsOrErr); 364 uint64_t Offset = Address - SectionAddress; 365 uint8_t const *Begin = Bytes.data() + Offset; 366 uint8_t const *End = Bytes.data() + Offset + Size; 367 368 LLVM_DEBUG({ 369 Expected<StringRef> SectionNameOrErr = Section.getName(); 370 if (!SectionNameOrErr) 371 consumeError(SectionNameOrErr.takeError()); 372 else 373 dbgs() << "Section Index: " << hexValue(Section.getIndex()) << " [" 374 << hexValue((uint64_t)Section.getAddress()) << ":" 375 << hexValue((uint64_t)Section.getAddress() + Section.getSize(), 10) 376 << "] Name: '" << *SectionNameOrErr << "'\n" 377 << "Begin: " << hexValue((uint64_t)Begin) 378 << ", End: " << hexValue((uint64_t)End) << "\n"; 379 }); 380 381 // Address for first instruction line. 382 LVAddress FirstAddress = Address; 383 LVLines *Instructions = new LVLines(); 384 385 while (Begin < End) { 386 MCInst Instruction; 387 uint64_t BytesConsumed = 0; 388 SmallVector<char, 64> InsnStr; 389 raw_svector_ostream Annotations(InsnStr); 390 MCDisassembler::DecodeStatus const S = 391 MD->getInstruction(Instruction, BytesConsumed, 392 ArrayRef<uint8_t>(Begin, End), Address, outs()); 393 switch (S) { 394 case MCDisassembler::Fail: 395 LLVM_DEBUG({ dbgs() << "Invalid instruction\n"; }); 396 if (BytesConsumed == 0) 397 // Skip invalid bytes 398 BytesConsumed = 1; 399 break; 400 case MCDisassembler::SoftFail: 401 LLVM_DEBUG({ dbgs() << "Potentially undefined instruction:"; }); 402 LLVM_FALLTHROUGH; 403 case MCDisassembler::Success: { 404 std::string Buffer; 405 raw_string_ostream Stream(Buffer); 406 StringRef AnnotationsStr = Annotations.str(); 407 MIP->printInst(&Instruction, Address, AnnotationsStr, *STI, Stream); 408 LLVM_DEBUG({ 409 std::string BufferCodes; 410 raw_string_ostream StreamCodes(BufferCodes); 411 StreamCodes << format_bytes( 412 ArrayRef<uint8_t>(Begin, Begin + BytesConsumed), std::nullopt, 16, 413 16); 414 dbgs() << "[" << hexValue((uint64_t)Begin) << "] " 415 << "Size: " << format_decimal(BytesConsumed, 2) << " (" 416 << formatv("{0}", 417 fmt_align(StreamCodes.str(), AlignStyle::Left, 32)) 418 << ") " << hexValue((uint64_t)Address) << ": " << Stream.str() 419 << "\n"; 420 }); 421 // Here we add logical lines to the Instructions. Later on, 422 // the 'processLines()' function will move each created logical line 423 // to its enclosing logical scope, using the debug ranges information 424 // and they will be released when its scope parent is deleted. 425 LVLineAssembler *Line = new LVLineAssembler(); 426 Line->setAddress(Address); 427 Line->setName(StringRef(Stream.str()).trim()); 428 Instructions->push_back(Line); 429 break; 430 } 431 } 432 Address += BytesConsumed; 433 Begin += BytesConsumed; 434 } 435 436 LLVM_DEBUG({ 437 size_t Index = 0; 438 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3) 439 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n" 440 << "Address: " << hexValue(FirstAddress) 441 << format(" - Collected instructions lines: %d\n", 442 Instructions->size()); 443 for (const LVLine *Line : *Instructions) 444 dbgs() << format_decimal(++Index, 5) << ": " 445 << hexValue(Line->getOffset()) << ", (" << Line->getName() 446 << ")\n"; 447 }); 448 449 // The scope in the assembler names is linked to its own instructions. 450 ScopeInstructions.add(SectionIndex, Scope, Instructions); 451 AssemblerMappings.add(SectionIndex, FirstAddress, Scope); 452 453 return Error::success(); 454 } 455 456 Error LVBinaryReader::createInstructions(LVScope *Function, 457 LVSectionIndex SectionIndex) { 458 if (!options().getPrintInstructions()) 459 return Error::success(); 460 461 LVNameInfo Name = CompileUnit->findPublicName(Function); 462 if (Name.first != LVAddress(UINT64_MAX)) 463 return createInstructions(Function, SectionIndex, Name); 464 465 return Error::success(); 466 } 467 468 Error LVBinaryReader::createInstructions() { 469 if (!options().getPrintInstructions()) 470 return Error::success(); 471 472 LLVM_DEBUG({ 473 size_t Index = 1; 474 dbgs() << "\nPublic Names (Scope):\n"; 475 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) { 476 LVScope *Scope = Name.first; 477 const LVNameInfo &NameInfo = Name.second; 478 LVAddress Address = NameInfo.first; 479 uint64_t Size = NameInfo.second; 480 dbgs() << format_decimal(Index++, 5) << ": " 481 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: [" 482 << hexValue(Address) << ":" << hexValue(Address + Size) << "] " 483 << "Name: '" << Scope->getName() << "' / '" 484 << Scope->getLinkageName() << "'\n"; 485 } 486 }); 487 488 // For each public name in the current compile unit, create the line 489 // records that represent the executable instructions. 490 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) { 491 LVScope *Scope = Name.first; 492 // The symbol table extracted from the object file always contains a 493 // non-empty name (linkage name). However, the logical scope does not 494 // guarantee to have a name for the linkage name (main is one case). 495 // For those cases, set the linkage name the same as the name. 496 if (!Scope->getLinkageNameIndex()) 497 Scope->setLinkageName(Scope->getName()); 498 LVSectionIndex SectionIndex = getSymbolTableIndex(Scope->getLinkageName()); 499 if (Error Err = createInstructions(Scope, SectionIndex, Name.second)) 500 return Err; 501 } 502 503 return Error::success(); 504 } 505 506 // During the traversal of the debug information sections, we created the 507 // logical lines representing the disassembled instructions from the text 508 // section and the logical lines representing the line records from the 509 // debug line section. Using the ranges associated with the logical scopes, 510 // we will allocate those logical lines to their logical scopes. 511 void LVBinaryReader::processLines(LVLines *DebugLines, 512 LVSectionIndex SectionIndex, 513 LVScope *Function) { 514 assert(DebugLines && "DebugLines is null."); 515 516 // Just return if this compilation unit does not have any line records 517 // and no instruction lines were created. 518 if (DebugLines->empty() && !options().getPrintInstructions()) 519 return; 520 521 // Merge the debug lines and instruction lines using their text address; 522 // the logical line representing the debug line record is followed by the 523 // line(s) representing the disassembled instructions, whose addresses are 524 // equal or greater that the line address and less than the address of the 525 // next debug line record. 526 LLVM_DEBUG({ 527 size_t Index = 1; 528 size_t PerLine = 4; 529 dbgs() << format("\nProcess debug lines: %d\n", DebugLines->size()); 530 for (const LVLine *Line : *DebugLines) { 531 dbgs() << format_decimal(Index, 5) << ": " << hexValue(Line->getOffset()) 532 << ", (" << Line->getLineNumber() << ")" 533 << ((Index % PerLine) ? " " : "\n"); 534 ++Index; 535 } 536 dbgs() << ((Index % PerLine) ? "\n" : ""); 537 }); 538 539 bool TraverseLines = true; 540 LVLines::iterator Iter = DebugLines->begin(); 541 while (TraverseLines && Iter != DebugLines->end()) { 542 uint64_t DebugAddress = (*Iter)->getAddress(); 543 544 // Get the function with an entry point that matches this line and 545 // its associated assembler entries. In the case of COMDAT, the input 546 // 'Function' is not null. Use it to find its address ranges. 547 LVScope *Scope = Function; 548 if (!Function) { 549 Scope = AssemblerMappings.find(SectionIndex, DebugAddress); 550 if (!Scope) { 551 ++Iter; 552 continue; 553 } 554 } 555 556 // Get the associated instructions for the found 'Scope'. 557 LVLines InstructionLines; 558 LVLines *Lines = ScopeInstructions.find(SectionIndex, Scope); 559 if (Lines) 560 InstructionLines = std::move(*Lines); 561 562 LLVM_DEBUG({ 563 size_t Index = 0; 564 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3) 565 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n" 566 << format("Process instruction lines: %d\n", 567 InstructionLines.size()); 568 for (const LVLine *Line : InstructionLines) 569 dbgs() << format_decimal(++Index, 5) << ": " 570 << hexValue(Line->getOffset()) << ", (" << Line->getName() 571 << ")\n"; 572 }); 573 574 // Continue with next debug line if there are not instructions lines. 575 if (InstructionLines.empty()) { 576 ++Iter; 577 continue; 578 } 579 580 for (LVLine *InstructionLine : InstructionLines) { 581 uint64_t InstructionAddress = InstructionLine->getAddress(); 582 LLVM_DEBUG({ 583 dbgs() << "Instruction address: " << hexValue(InstructionAddress) 584 << "\n"; 585 }); 586 if (TraverseLines) { 587 while (Iter != DebugLines->end()) { 588 DebugAddress = (*Iter)->getAddress(); 589 LLVM_DEBUG({ 590 bool IsDebug = (*Iter)->getIsLineDebug(); 591 dbgs() << "Line " << (IsDebug ? "dbg:" : "ins:") << " [" 592 << hexValue(DebugAddress) << "]"; 593 if (IsDebug) 594 dbgs() << format(" %d", (*Iter)->getLineNumber()); 595 dbgs() << "\n"; 596 }); 597 // Instruction address before debug line. 598 if (InstructionAddress < DebugAddress) { 599 LLVM_DEBUG({ 600 dbgs() << "Inserted instruction address: " 601 << hexValue(InstructionAddress) << " before line: " 602 << format("%d", (*Iter)->getLineNumber()) << " [" 603 << hexValue(DebugAddress) << "]\n"; 604 }); 605 Iter = DebugLines->insert(Iter, InstructionLine); 606 // The returned iterator points to the inserted instruction. 607 // Skip it and point to the line acting as reference. 608 ++Iter; 609 break; 610 } 611 ++Iter; 612 } 613 if (Iter == DebugLines->end()) { 614 // We have reached the end of the source lines and the current 615 // instruction line address is greater than the last source line. 616 TraverseLines = false; 617 DebugLines->push_back(InstructionLine); 618 } 619 } else { 620 DebugLines->push_back(InstructionLine); 621 } 622 } 623 } 624 625 LLVM_DEBUG({ 626 dbgs() << format("Lines after merge: %d\n", DebugLines->size()); 627 size_t Index = 0; 628 for (const LVLine *Line : *DebugLines) { 629 dbgs() << format_decimal(++Index, 5) << ": " 630 << hexValue(Line->getOffset()) << ", (" 631 << ((Line->getIsLineDebug()) 632 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true) 633 : Line->getName()) 634 << ")\n"; 635 } 636 }); 637 638 // If this compilation unit does not have line records, traverse its scopes 639 // and take any collected instruction lines as the working set in order 640 // to move them to their associated scope. 641 if (DebugLines->empty()) { 642 if (const LVScopes *Scopes = CompileUnit->getScopes()) 643 for (LVScope *Scope : *Scopes) { 644 LVLines *Lines = ScopeInstructions.find(Scope); 645 if (Lines) { 646 647 LLVM_DEBUG({ 648 size_t Index = 0; 649 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3) 650 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n" 651 << format("Instruction lines: %d\n", Lines->size()); 652 for (const LVLine *Line : *Lines) 653 dbgs() << format_decimal(++Index, 5) << ": " 654 << hexValue(Line->getOffset()) << ", (" << Line->getName() 655 << ")\n"; 656 }); 657 658 if (Scope->getIsArtificial()) { 659 // Add the instruction lines to their artificial scope. 660 for (LVLine *Line : *Lines) 661 Scope->addElement(Line); 662 } else { 663 DebugLines->append(*Lines); 664 } 665 Lines->clear(); 666 } 667 } 668 } 669 670 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex); 671 ScopesWithRanges->startSearch(); 672 673 // Process collected lines. 674 LVScope *Scope; 675 for (LVLine *Line : *DebugLines) { 676 // Using the current line address, get its associated lexical scope and 677 // add the line information to it. 678 Scope = ScopesWithRanges->getEntry(Line->getAddress()); 679 if (!Scope) { 680 // If missing scope, use the compile unit. 681 Scope = CompileUnit; 682 LLVM_DEBUG({ 683 dbgs() << "Adding line to CU: " << hexValue(Line->getOffset()) << ", (" 684 << ((Line->getIsLineDebug()) 685 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true) 686 : Line->getName()) 687 << ")\n"; 688 }); 689 } 690 691 // Add line object to scope. 692 Scope->addElement(Line); 693 694 // Report any line zero. 695 if (options().getWarningLines() && Line->getIsLineDebug() && 696 !Line->getLineNumber()) 697 CompileUnit->addLineZero(Line); 698 699 // Some compilers generate ranges in the compile unit; other compilers 700 // only DW_AT_low_pc/DW_AT_high_pc. In order to correctly map global 701 // variables, we need to generate the map ranges for the compile unit. 702 // If we use the ranges stored at the scope level, there are cases where 703 // the address referenced by a symbol location, is not in the enclosing 704 // scope, but in an outer one. By using the ranges stored in the compile 705 // unit, we can catch all those addresses. 706 if (Line->getIsLineDebug()) 707 CompileUnit->addMapping(Line, SectionIndex); 708 709 // Resolve any given pattern. 710 patterns().resolvePatternMatch(Line); 711 } 712 713 ScopesWithRanges->endSearch(); 714 } 715 716 void LVBinaryReader::processLines(LVLines *DebugLines, 717 LVSectionIndex SectionIndex) { 718 assert(DebugLines && "DebugLines is null."); 719 if (DebugLines->empty() && !ScopeInstructions.findMap(SectionIndex)) 720 return; 721 722 // If the Compile Unit does not contain comdat functions, use the whole 723 // set of debug lines, as the addresses don't have conflicts. 724 if (!CompileUnit->getHasComdatScopes()) { 725 processLines(DebugLines, SectionIndex, nullptr); 726 return; 727 } 728 729 // Find the indexes for the lines whose address is zero. 730 std::vector<size_t> AddressZero; 731 LVLines::iterator It = 732 std::find_if(std::begin(*DebugLines), std::end(*DebugLines), 733 [](LVLine *Line) { return !Line->getAddress(); }); 734 while (It != std::end(*DebugLines)) { 735 AddressZero.emplace_back(std::distance(std::begin(*DebugLines), It)); 736 It = std::find_if(std::next(It), std::end(*DebugLines), 737 [](LVLine *Line) { return !Line->getAddress(); }); 738 } 739 740 // If the set of debug lines does not contain any line with address zero, 741 // use the whole set. It means we are dealing with an initialization 742 // section from a fully linked binary. 743 if (AddressZero.empty()) { 744 processLines(DebugLines, SectionIndex, nullptr); 745 return; 746 } 747 748 // The Compile unit contains comdat functions. Traverse the collected 749 // debug lines and identify logical groups based on their start and 750 // address. Each group starts with a zero address. 751 // Begin, End, Address, IsDone. 752 using LVBucket = std::tuple<size_t, size_t, LVAddress, bool>; 753 std::vector<LVBucket> Buckets; 754 755 LVAddress Address; 756 size_t Begin = 0; 757 size_t End = 0; 758 size_t Index = 0; 759 for (Index = 0; Index < AddressZero.size() - 1; ++Index) { 760 Begin = AddressZero[Index]; 761 End = AddressZero[Index + 1] - 1; 762 Address = (*DebugLines)[End]->getAddress(); 763 Buckets.emplace_back(Begin, End, Address, false); 764 } 765 766 // Add the last bucket. 767 if (Index) { 768 Begin = AddressZero[Index]; 769 End = DebugLines->size() - 1; 770 Address = (*DebugLines)[End]->getAddress(); 771 Buckets.emplace_back(Begin, End, Address, false); 772 } 773 774 LLVM_DEBUG({ 775 dbgs() << "\nDebug Lines buckets: " << Buckets.size() << "\n"; 776 for (LVBucket &Bucket : Buckets) { 777 dbgs() << "Begin: " << format_decimal(std::get<0>(Bucket), 5) << ", " 778 << "End: " << format_decimal(std::get<1>(Bucket), 5) << ", " 779 << "Address: " << hexValue(std::get<2>(Bucket)) << "\n"; 780 } 781 }); 782 783 // Traverse the sections and buckets looking for matches on the section 784 // sizes. In the unlikely event of different buckets with the same size 785 // process them in order and mark them as done. 786 LVLines Group; 787 for (LVSections::reference Entry : Sections) { 788 LVSectionIndex SectionIndex = Entry.first; 789 const object::SectionRef Section = Entry.second; 790 uint64_t Size = Section.getSize(); 791 LLVM_DEBUG({ 792 dbgs() << "\nSection Index: " << format_decimal(SectionIndex, 3) 793 << " , Section Size: " << hexValue(Section.getSize()) 794 << " , Section Address: " << hexValue(Section.getAddress()) 795 << "\n"; 796 }); 797 798 for (LVBucket &Bucket : Buckets) { 799 if (std::get<3>(Bucket)) 800 // Already done for previous section. 801 continue; 802 if (Size == std::get<2>(Bucket)) { 803 // We have a match on the section size. 804 Group.clear(); 805 LVLines::iterator IterStart = DebugLines->begin() + std::get<0>(Bucket); 806 LVLines::iterator IterEnd = 807 DebugLines->begin() + std::get<1>(Bucket) + 1; 808 for (LVLines::iterator Iter = IterStart; Iter < IterEnd; ++Iter) 809 Group.push_back(*Iter); 810 processLines(&Group, SectionIndex, /*Function=*/nullptr); 811 std::get<3>(Bucket) = true; 812 break; 813 } 814 } 815 } 816 } 817 818 void LVBinaryReader::print(raw_ostream &OS) const { 819 OS << "LVBinaryReader\n"; 820 LLVM_DEBUG(dbgs() << "PrintReader\n"); 821 } 822