1 //===-- LVBinaryReader.cpp ------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This implements the LVBinaryReader class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h" 14 #include "llvm/Support/Errc.h" 15 #include "llvm/Support/FormatAdapters.h" 16 #include "llvm/Support/FormatVariadic.h" 17 18 using namespace llvm; 19 using namespace llvm::logicalview; 20 21 #define DEBUG_TYPE "BinaryReader" 22 23 // Function names extracted from the object symbol table. 24 void LVSymbolTable::add(StringRef Name, LVScope *Function, 25 LVSectionIndex SectionIndex) { 26 std::string SymbolName(Name); 27 if (SymbolNames.find(SymbolName) == SymbolNames.end()) { 28 SymbolNames.emplace( 29 std::piecewise_construct, std::forward_as_tuple(SymbolName), 30 std::forward_as_tuple(Function, 0, SectionIndex, false)); 31 } else { 32 // Update a recorded entry with its logical scope and section index. 33 SymbolNames[SymbolName].Scope = Function; 34 if (SectionIndex) 35 SymbolNames[SymbolName].SectionIndex = SectionIndex; 36 } 37 38 if (Function && SymbolNames[SymbolName].IsComdat) 39 Function->setIsComdat(); 40 41 LLVM_DEBUG({ print(dbgs()); }); 42 } 43 44 void LVSymbolTable::add(StringRef Name, LVAddress Address, 45 LVSectionIndex SectionIndex, bool IsComdat) { 46 std::string SymbolName(Name); 47 if (SymbolNames.find(SymbolName) == SymbolNames.end()) 48 SymbolNames.emplace( 49 std::piecewise_construct, std::forward_as_tuple(SymbolName), 50 std::forward_as_tuple(nullptr, Address, SectionIndex, IsComdat)); 51 else 52 // Update a recorded symbol name with its logical scope. 53 SymbolNames[SymbolName].Address = Address; 54 55 LVScope *Function = SymbolNames[SymbolName].Scope; 56 if (Function && IsComdat) 57 Function->setIsComdat(); 58 LLVM_DEBUG({ print(dbgs()); }); 59 } 60 61 LVSectionIndex LVSymbolTable::update(LVScope *Function) { 62 LVSectionIndex SectionIndex = getReader().getDotTextSectionIndex(); 63 StringRef Name = Function->getLinkageName(); 64 if (Name.empty()) 65 Name = Function->getName(); 66 std::string SymbolName(Name); 67 68 if (SymbolName.empty() || (SymbolNames.find(SymbolName) == SymbolNames.end())) 69 return SectionIndex; 70 71 // Update a recorded entry with its logical scope, only if the scope has 72 // ranges. That is the case when in DWARF there are 2 DIEs connected via 73 // the DW_AT_specification. 74 if (Function->getHasRanges()) { 75 SymbolNames[SymbolName].Scope = Function; 76 SectionIndex = SymbolNames[SymbolName].SectionIndex; 77 } else { 78 SectionIndex = UndefinedSectionIndex; 79 } 80 81 if (SymbolNames[SymbolName].IsComdat) 82 Function->setIsComdat(); 83 84 LLVM_DEBUG({ print(dbgs()); }); 85 return SectionIndex; 86 } 87 88 const LVSymbolTableEntry &LVSymbolTable::getEntry(StringRef Name) { 89 static LVSymbolTableEntry Empty = LVSymbolTableEntry(); 90 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name)); 91 return Iter != SymbolNames.end() ? Iter->second : Empty; 92 } 93 LVAddress LVSymbolTable::getAddress(StringRef Name) { 94 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name)); 95 return Iter != SymbolNames.end() ? Iter->second.Address : 0; 96 } 97 LVSectionIndex LVSymbolTable::getIndex(StringRef Name) { 98 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name)); 99 return Iter != SymbolNames.end() ? Iter->second.SectionIndex 100 : getReader().getDotTextSectionIndex(); 101 } 102 bool LVSymbolTable::getIsComdat(StringRef Name) { 103 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name)); 104 return Iter != SymbolNames.end() ? Iter->second.IsComdat : false; 105 } 106 107 void LVSymbolTable::print(raw_ostream &OS) { 108 OS << "Symbol Table\n"; 109 for (LVSymbolNames::reference Entry : SymbolNames) { 110 LVSymbolTableEntry &SymbolName = Entry.second; 111 LVScope *Scope = SymbolName.Scope; 112 LVOffset Offset = Scope ? Scope->getOffset() : 0; 113 OS << "Index: " << hexValue(SymbolName.SectionIndex, 5) 114 << " Comdat: " << (SymbolName.IsComdat ? "Y" : "N") 115 << " Scope: " << hexValue(Offset) 116 << " Address: " << hexValue(SymbolName.Address) 117 << " Name: " << Entry.first << "\n"; 118 } 119 } 120 121 void LVBinaryReader::addToSymbolTable(StringRef Name, LVScope *Function, 122 LVSectionIndex SectionIndex) { 123 SymbolTable.add(Name, Function, SectionIndex); 124 } 125 void LVBinaryReader::addToSymbolTable(StringRef Name, LVAddress Address, 126 LVSectionIndex SectionIndex, 127 bool IsComdat) { 128 SymbolTable.add(Name, Address, SectionIndex, IsComdat); 129 } 130 LVSectionIndex LVBinaryReader::updateSymbolTable(LVScope *Function) { 131 return SymbolTable.update(Function); 132 } 133 134 const LVSymbolTableEntry &LVBinaryReader::getSymbolTableEntry(StringRef Name) { 135 return SymbolTable.getEntry(Name); 136 } 137 LVAddress LVBinaryReader::getSymbolTableAddress(StringRef Name) { 138 return SymbolTable.getAddress(Name); 139 } 140 LVSectionIndex LVBinaryReader::getSymbolTableIndex(StringRef Name) { 141 return SymbolTable.getIndex(Name); 142 } 143 bool LVBinaryReader::getSymbolTableIsComdat(StringRef Name) { 144 return SymbolTable.getIsComdat(Name); 145 } 146 147 void LVBinaryReader::mapVirtualAddress(const object::ObjectFile &Obj) { 148 for (const object::SectionRef &Section : Obj.sections()) { 149 if (!Section.isText() || Section.isVirtual() || !Section.getSize()) 150 continue; 151 152 // Record section information required for symbol resolution. 153 // Note: The section index returned by 'getIndex()' is one based. 154 Sections.emplace(Section.getIndex(), Section); 155 addSectionAddress(Section); 156 157 // Identify the ".text" section. 158 Expected<StringRef> SectionNameOrErr = Section.getName(); 159 if (!SectionNameOrErr) { 160 consumeError(SectionNameOrErr.takeError()); 161 continue; 162 } 163 if ((*SectionNameOrErr).equals(".text") || 164 (*SectionNameOrErr).equals(".code")) 165 DotTextSectionIndex = Section.getIndex(); 166 } 167 168 // Process the symbol table. 169 mapRangeAddress(Obj); 170 171 LLVM_DEBUG({ 172 dbgs() << "\nSections Information:\n"; 173 for (LVSections::reference Entry : Sections) { 174 LVSectionIndex SectionIndex = Entry.first; 175 const object::SectionRef Section = Entry.second; 176 Expected<StringRef> SectionNameOrErr = Section.getName(); 177 if (!SectionNameOrErr) 178 consumeError(SectionNameOrErr.takeError()); 179 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3) 180 << " Name: " << *SectionNameOrErr << "\n" 181 << "Size: " << hexValue(Section.getSize()) << "\n" 182 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n" 183 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n"; 184 } 185 dbgs() << "\nObject Section Information:\n"; 186 for (LVSectionAddresses::const_reference Entry : SectionAddresses) 187 dbgs() << "[" << hexValue(Entry.first) << ":" 188 << hexValue(Entry.first + Entry.second.getSize()) 189 << "] Size: " << hexValue(Entry.second.getSize()) << "\n"; 190 }); 191 } 192 193 void LVBinaryReader::mapVirtualAddress(const object::COFFObjectFile &COFFObj) { 194 ErrorOr<uint64_t> ImageBase = COFFObj.getImageBase(); 195 if (ImageBase) 196 ImageBaseAddress = ImageBase.get(); 197 198 LLVM_DEBUG({ 199 dbgs() << "ImageBaseAddress: " << hexValue(ImageBaseAddress) << "\n"; 200 }); 201 202 uint32_t Flags = COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_LNK_COMDAT; 203 204 for (const object::SectionRef &Section : COFFObj.sections()) { 205 if (!Section.isText() || Section.isVirtual() || !Section.getSize()) 206 continue; 207 208 const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section); 209 VirtualAddress = COFFSection->VirtualAddress; 210 bool IsComdat = (COFFSection->Characteristics & Flags) == Flags; 211 212 // Record section information required for symbol resolution. 213 // Note: The section index returned by 'getIndex()' is zero based. 214 Sections.emplace(Section.getIndex() + 1, Section); 215 addSectionAddress(Section); 216 217 // Additional initialization on the specific object format. 218 mapRangeAddress(COFFObj, Section, IsComdat); 219 } 220 221 LLVM_DEBUG({ 222 dbgs() << "\nSections Information:\n"; 223 for (LVSections::reference Entry : Sections) { 224 LVSectionIndex SectionIndex = Entry.first; 225 const object::SectionRef Section = Entry.second; 226 const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section); 227 Expected<StringRef> SectionNameOrErr = Section.getName(); 228 if (!SectionNameOrErr) 229 consumeError(SectionNameOrErr.takeError()); 230 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3) 231 << " Name: " << *SectionNameOrErr << "\n" 232 << "Size: " << hexValue(Section.getSize()) << "\n" 233 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n" 234 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n" 235 << "PointerToRawData: " << hexValue(COFFSection->PointerToRawData) 236 << "\n" 237 << "SizeOfRawData: " << hexValue(COFFSection->SizeOfRawData) 238 << "\n"; 239 } 240 dbgs() << "\nObject Section Information:\n"; 241 for (LVSectionAddresses::const_reference Entry : SectionAddresses) 242 dbgs() << "[" << hexValue(Entry.first) << ":" 243 << hexValue(Entry.first + Entry.second.getSize()) 244 << "] Size: " << hexValue(Entry.second.getSize()) << "\n"; 245 }); 246 } 247 248 Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple, 249 StringRef TheFeatures) { 250 std::string TargetLookupError; 251 const Target *TheTarget = 252 TargetRegistry::lookupTarget(std::string(TheTriple), TargetLookupError); 253 if (!TheTarget) 254 return createStringError(errc::invalid_argument, TargetLookupError.c_str()); 255 256 // Register information. 257 MCRegisterInfo *RegisterInfo = TheTarget->createMCRegInfo(TheTriple); 258 if (!RegisterInfo) 259 return createStringError(errc::invalid_argument, 260 "no register info for target " + TheTriple); 261 MRI.reset(RegisterInfo); 262 263 // Assembler properties and features. 264 MCTargetOptions MCOptions; 265 MCAsmInfo *AsmInfo(TheTarget->createMCAsmInfo(*MRI, TheTriple, MCOptions)); 266 if (!AsmInfo) 267 return createStringError(errc::invalid_argument, 268 "no assembly info for target " + TheTriple); 269 MAI.reset(AsmInfo); 270 271 // Target subtargets. 272 StringRef CPU; 273 MCSubtargetInfo *SubtargetInfo( 274 TheTarget->createMCSubtargetInfo(TheTriple, CPU, TheFeatures)); 275 if (!SubtargetInfo) 276 return createStringError(errc::invalid_argument, 277 "no subtarget info for target " + TheTriple); 278 STI.reset(SubtargetInfo); 279 280 // Instructions Info. 281 MCInstrInfo *InstructionInfo(TheTarget->createMCInstrInfo()); 282 if (!InstructionInfo) 283 return createStringError(errc::invalid_argument, 284 "no instruction info for target " + TheTriple); 285 MII.reset(InstructionInfo); 286 287 MC = std::make_unique<MCContext>(Triple(TheTriple), MAI.get(), MRI.get(), 288 STI.get()); 289 290 // Assembler. 291 MCDisassembler *DisAsm(TheTarget->createMCDisassembler(*STI, *MC)); 292 if (!DisAsm) 293 return createStringError(errc::invalid_argument, 294 "no disassembler for target " + TheTriple); 295 MD.reset(DisAsm); 296 297 MCInstPrinter *InstructionPrinter(TheTarget->createMCInstPrinter( 298 Triple(TheTriple), AsmInfo->getAssemblerDialect(), *MAI, *MII, *MRI)); 299 if (!InstructionPrinter) 300 return createStringError(errc::invalid_argument, 301 "no target assembly language printer for target " + 302 TheTriple); 303 MIP.reset(InstructionPrinter); 304 InstructionPrinter->setPrintImmHex(true); 305 306 return Error::success(); 307 } 308 309 Expected<std::pair<uint64_t, object::SectionRef>> 310 LVBinaryReader::getSection(LVScope *Scope, LVAddress Address, 311 LVSectionIndex SectionIndex) { 312 // Return the 'text' section with the code for this logical scope. 313 // COFF: SectionIndex is zero. Use 'SectionAddresses' data. 314 // ELF: SectionIndex is the section index in the file. 315 if (SectionIndex) { 316 LVSections::iterator Iter = Sections.find(SectionIndex); 317 if (Iter == Sections.end()) { 318 return createStringError(errc::invalid_argument, 319 "invalid section index for: '%s'", 320 Scope->getName().str().c_str()); 321 } 322 const object::SectionRef Section = Iter->second; 323 return std::make_pair(Section.getAddress(), Section); 324 } 325 326 // Ensure a valid starting address for the public names. 327 LVSectionAddresses::const_iterator Iter = 328 SectionAddresses.upper_bound(Address); 329 if (Iter == SectionAddresses.begin()) 330 return createStringError(errc::invalid_argument, 331 "invalid section address for: '%s'", 332 Scope->getName().str().c_str()); 333 334 // Get section that contains the code for this function. 335 Iter = SectionAddresses.lower_bound(Address); 336 if (Iter != SectionAddresses.begin()) 337 --Iter; 338 return std::make_pair(Iter->first, Iter->second); 339 } 340 341 void LVBinaryReader::addSectionRange(LVSectionIndex SectionIndex, 342 LVScope *Scope) { 343 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex); 344 ScopesWithRanges->addEntry(Scope); 345 } 346 347 void LVBinaryReader::addSectionRange(LVSectionIndex SectionIndex, 348 LVScope *Scope, LVAddress LowerAddress, 349 LVAddress UpperAddress) { 350 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex); 351 ScopesWithRanges->addEntry(Scope, LowerAddress, UpperAddress); 352 } 353 354 LVRange *LVBinaryReader::getSectionRanges(LVSectionIndex SectionIndex) { 355 // Check if we already have a mapping for this section index. 356 LVSectionRanges::iterator IterSection = SectionRanges.find(SectionIndex); 357 if (IterSection == SectionRanges.end()) 358 IterSection = 359 SectionRanges.emplace(SectionIndex, std::make_unique<LVRange>()).first; 360 LVRange *Range = IterSection->second.get(); 361 assert(Range && "Range is null."); 362 return Range; 363 } 364 365 Error LVBinaryReader::createInstructions(LVScope *Scope, 366 LVSectionIndex SectionIndex, 367 const LVNameInfo &NameInfo) { 368 assert(Scope && "Scope is null."); 369 370 // Skip stripped functions. 371 if (Scope->getIsDiscarded()) 372 return Error::success(); 373 374 // Find associated address and size for the given function entry point. 375 LVAddress Address = NameInfo.first; 376 uint64_t Size = NameInfo.second; 377 378 LLVM_DEBUG({ 379 dbgs() << "\nPublic Name instructions: '" << Scope->getName() << "' / '" 380 << Scope->getLinkageName() << "'\n" 381 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: [" 382 << hexValue(Address) << ":" << hexValue(Address + Size) << "]\n"; 383 }); 384 385 Expected<std::pair<uint64_t, const object::SectionRef>> SectionOrErr = 386 getSection(Scope, Address, SectionIndex); 387 if (!SectionOrErr) 388 return SectionOrErr.takeError(); 389 const object::SectionRef Section = (*SectionOrErr).second; 390 uint64_t SectionAddress = (*SectionOrErr).first; 391 392 Expected<StringRef> SectionContentsOrErr = Section.getContents(); 393 if (!SectionContentsOrErr) 394 return SectionOrErr.takeError(); 395 396 // There are cases where the section size is smaller than the [LowPC,HighPC] 397 // range; it causes us to decode invalid addresses. The recorded size in the 398 // logical scope is one less than the real size. 399 LLVM_DEBUG({ 400 dbgs() << " Size: " << hexValue(Size) 401 << ", Section Size: " << hexValue(Section.getSize()) << "\n"; 402 }); 403 Size = std::min(Size + 1, Section.getSize()); 404 405 ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*SectionContentsOrErr); 406 uint64_t Offset = Address - SectionAddress; 407 uint8_t const *Begin = Bytes.data() + Offset; 408 uint8_t const *End = Bytes.data() + Offset + Size; 409 410 LLVM_DEBUG({ 411 Expected<StringRef> SectionNameOrErr = Section.getName(); 412 if (!SectionNameOrErr) 413 consumeError(SectionNameOrErr.takeError()); 414 else 415 dbgs() << "Section Index: " << hexValue(Section.getIndex()) << " [" 416 << hexValue((uint64_t)Section.getAddress()) << ":" 417 << hexValue((uint64_t)Section.getAddress() + Section.getSize(), 10) 418 << "] Name: '" << *SectionNameOrErr << "'\n" 419 << "Begin: " << hexValue((uint64_t)Begin) 420 << ", End: " << hexValue((uint64_t)End) << "\n"; 421 }); 422 423 // Address for first instruction line. 424 LVAddress FirstAddress = Address; 425 auto InstructionsSP = std::make_unique<LVLines>(); 426 LVLines &Instructions = *InstructionsSP; 427 DiscoveredLines.emplace_back(std::move(InstructionsSP)); 428 429 while (Begin < End) { 430 MCInst Instruction; 431 uint64_t BytesConsumed = 0; 432 SmallVector<char, 64> InsnStr; 433 raw_svector_ostream Annotations(InsnStr); 434 MCDisassembler::DecodeStatus const S = 435 MD->getInstruction(Instruction, BytesConsumed, 436 ArrayRef<uint8_t>(Begin, End), Address, outs()); 437 switch (S) { 438 case MCDisassembler::Fail: 439 LLVM_DEBUG({ dbgs() << "Invalid instruction\n"; }); 440 if (BytesConsumed == 0) 441 // Skip invalid bytes 442 BytesConsumed = 1; 443 break; 444 case MCDisassembler::SoftFail: 445 LLVM_DEBUG({ dbgs() << "Potentially undefined instruction:"; }); 446 [[fallthrough]]; 447 case MCDisassembler::Success: { 448 std::string Buffer; 449 raw_string_ostream Stream(Buffer); 450 StringRef AnnotationsStr = Annotations.str(); 451 MIP->printInst(&Instruction, Address, AnnotationsStr, *STI, Stream); 452 LLVM_DEBUG({ 453 std::string BufferCodes; 454 raw_string_ostream StreamCodes(BufferCodes); 455 StreamCodes << format_bytes( 456 ArrayRef<uint8_t>(Begin, Begin + BytesConsumed), std::nullopt, 16, 457 16); 458 dbgs() << "[" << hexValue((uint64_t)Begin) << "] " 459 << "Size: " << format_decimal(BytesConsumed, 2) << " (" 460 << formatv("{0}", 461 fmt_align(StreamCodes.str(), AlignStyle::Left, 32)) 462 << ") " << hexValue((uint64_t)Address) << ": " << Stream.str() 463 << "\n"; 464 }); 465 // Here we add logical lines to the Instructions. Later on, 466 // the 'processLines()' function will move each created logical line 467 // to its enclosing logical scope, using the debug ranges information 468 // and they will be released when its scope parent is deleted. 469 LVLineAssembler *Line = createLineAssembler(); 470 Line->setAddress(Address); 471 Line->setName(StringRef(Stream.str()).trim()); 472 Instructions.push_back(Line); 473 break; 474 } 475 } 476 Address += BytesConsumed; 477 Begin += BytesConsumed; 478 } 479 480 LLVM_DEBUG({ 481 size_t Index = 0; 482 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3) 483 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n" 484 << "Address: " << hexValue(FirstAddress) 485 << format(" - Collected instructions lines: %d\n", 486 Instructions.size()); 487 for (const LVLine *Line : Instructions) 488 dbgs() << format_decimal(++Index, 5) << ": " 489 << hexValue(Line->getOffset()) << ", (" << Line->getName() 490 << ")\n"; 491 }); 492 493 // The scope in the assembler names is linked to its own instructions. 494 ScopeInstructions.add(SectionIndex, Scope, &Instructions); 495 AssemblerMappings.add(SectionIndex, FirstAddress, Scope); 496 497 return Error::success(); 498 } 499 500 Error LVBinaryReader::createInstructions(LVScope *Function, 501 LVSectionIndex SectionIndex) { 502 if (!options().getPrintInstructions()) 503 return Error::success(); 504 505 LVNameInfo Name = CompileUnit->findPublicName(Function); 506 if (Name.first != LVAddress(UINT64_MAX)) 507 return createInstructions(Function, SectionIndex, Name); 508 509 return Error::success(); 510 } 511 512 Error LVBinaryReader::createInstructions() { 513 if (!options().getPrintInstructions()) 514 return Error::success(); 515 516 LLVM_DEBUG({ 517 size_t Index = 1; 518 dbgs() << "\nPublic Names (Scope):\n"; 519 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) { 520 LVScope *Scope = Name.first; 521 const LVNameInfo &NameInfo = Name.second; 522 LVAddress Address = NameInfo.first; 523 uint64_t Size = NameInfo.second; 524 dbgs() << format_decimal(Index++, 5) << ": " 525 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: [" 526 << hexValue(Address) << ":" << hexValue(Address + Size) << "] " 527 << "Name: '" << Scope->getName() << "' / '" 528 << Scope->getLinkageName() << "'\n"; 529 } 530 }); 531 532 // For each public name in the current compile unit, create the line 533 // records that represent the executable instructions. 534 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) { 535 LVScope *Scope = Name.first; 536 // The symbol table extracted from the object file always contains a 537 // non-empty name (linkage name). However, the logical scope does not 538 // guarantee to have a name for the linkage name (main is one case). 539 // For those cases, set the linkage name the same as the name. 540 if (!Scope->getLinkageNameIndex()) 541 Scope->setLinkageName(Scope->getName()); 542 LVSectionIndex SectionIndex = getSymbolTableIndex(Scope->getLinkageName()); 543 if (Error Err = createInstructions(Scope, SectionIndex, Name.second)) 544 return Err; 545 } 546 547 return Error::success(); 548 } 549 550 // During the traversal of the debug information sections, we created the 551 // logical lines representing the disassembled instructions from the text 552 // section and the logical lines representing the line records from the 553 // debug line section. Using the ranges associated with the logical scopes, 554 // we will allocate those logical lines to their logical scopes. 555 void LVBinaryReader::processLines(LVLines *DebugLines, 556 LVSectionIndex SectionIndex, 557 LVScope *Function) { 558 assert(DebugLines && "DebugLines is null."); 559 560 // Just return if this compilation unit does not have any line records 561 // and no instruction lines were created. 562 if (DebugLines->empty() && !options().getPrintInstructions()) 563 return; 564 565 // Merge the debug lines and instruction lines using their text address; 566 // the logical line representing the debug line record is followed by the 567 // line(s) representing the disassembled instructions, whose addresses are 568 // equal or greater that the line address and less than the address of the 569 // next debug line record. 570 LLVM_DEBUG({ 571 size_t Index = 1; 572 size_t PerLine = 4; 573 dbgs() << format("\nProcess debug lines: %d\n", DebugLines->size()); 574 for (const LVLine *Line : *DebugLines) { 575 dbgs() << format_decimal(Index, 5) << ": " << hexValue(Line->getOffset()) 576 << ", (" << Line->getLineNumber() << ")" 577 << ((Index % PerLine) ? " " : "\n"); 578 ++Index; 579 } 580 dbgs() << ((Index % PerLine) ? "\n" : ""); 581 }); 582 583 bool TraverseLines = true; 584 LVLines::iterator Iter = DebugLines->begin(); 585 while (TraverseLines && Iter != DebugLines->end()) { 586 uint64_t DebugAddress = (*Iter)->getAddress(); 587 588 // Get the function with an entry point that matches this line and 589 // its associated assembler entries. In the case of COMDAT, the input 590 // 'Function' is not null. Use it to find its address ranges. 591 LVScope *Scope = Function; 592 if (!Function) { 593 Scope = AssemblerMappings.find(SectionIndex, DebugAddress); 594 if (!Scope) { 595 ++Iter; 596 continue; 597 } 598 } 599 600 // Get the associated instructions for the found 'Scope'. 601 LVLines InstructionLines; 602 LVLines *Lines = ScopeInstructions.find(SectionIndex, Scope); 603 if (Lines) 604 InstructionLines = std::move(*Lines); 605 606 LLVM_DEBUG({ 607 size_t Index = 0; 608 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3) 609 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n" 610 << format("Process instruction lines: %d\n", 611 InstructionLines.size()); 612 for (const LVLine *Line : InstructionLines) 613 dbgs() << format_decimal(++Index, 5) << ": " 614 << hexValue(Line->getOffset()) << ", (" << Line->getName() 615 << ")\n"; 616 }); 617 618 // Continue with next debug line if there are not instructions lines. 619 if (InstructionLines.empty()) { 620 ++Iter; 621 continue; 622 } 623 624 for (LVLine *InstructionLine : InstructionLines) { 625 uint64_t InstructionAddress = InstructionLine->getAddress(); 626 LLVM_DEBUG({ 627 dbgs() << "Instruction address: " << hexValue(InstructionAddress) 628 << "\n"; 629 }); 630 if (TraverseLines) { 631 while (Iter != DebugLines->end()) { 632 DebugAddress = (*Iter)->getAddress(); 633 LLVM_DEBUG({ 634 bool IsDebug = (*Iter)->getIsLineDebug(); 635 dbgs() << "Line " << (IsDebug ? "dbg:" : "ins:") << " [" 636 << hexValue(DebugAddress) << "]"; 637 if (IsDebug) 638 dbgs() << format(" %d", (*Iter)->getLineNumber()); 639 dbgs() << "\n"; 640 }); 641 // Instruction address before debug line. 642 if (InstructionAddress < DebugAddress) { 643 LLVM_DEBUG({ 644 dbgs() << "Inserted instruction address: " 645 << hexValue(InstructionAddress) << " before line: " 646 << format("%d", (*Iter)->getLineNumber()) << " [" 647 << hexValue(DebugAddress) << "]\n"; 648 }); 649 Iter = DebugLines->insert(Iter, InstructionLine); 650 // The returned iterator points to the inserted instruction. 651 // Skip it and point to the line acting as reference. 652 ++Iter; 653 break; 654 } 655 ++Iter; 656 } 657 if (Iter == DebugLines->end()) { 658 // We have reached the end of the source lines and the current 659 // instruction line address is greater than the last source line. 660 TraverseLines = false; 661 DebugLines->push_back(InstructionLine); 662 } 663 } else { 664 DebugLines->push_back(InstructionLine); 665 } 666 } 667 } 668 669 LLVM_DEBUG({ 670 dbgs() << format("Lines after merge: %d\n", DebugLines->size()); 671 size_t Index = 0; 672 for (const LVLine *Line : *DebugLines) { 673 dbgs() << format_decimal(++Index, 5) << ": " 674 << hexValue(Line->getOffset()) << ", (" 675 << ((Line->getIsLineDebug()) 676 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true) 677 : Line->getName()) 678 << ")\n"; 679 } 680 }); 681 682 // If this compilation unit does not have line records, traverse its scopes 683 // and take any collected instruction lines as the working set in order 684 // to move them to their associated scope. 685 if (DebugLines->empty()) { 686 if (const LVScopes *Scopes = CompileUnit->getScopes()) 687 for (LVScope *Scope : *Scopes) { 688 LVLines *Lines = ScopeInstructions.find(Scope); 689 if (Lines) { 690 691 LLVM_DEBUG({ 692 size_t Index = 0; 693 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3) 694 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n" 695 << format("Instruction lines: %d\n", Lines->size()); 696 for (const LVLine *Line : *Lines) 697 dbgs() << format_decimal(++Index, 5) << ": " 698 << hexValue(Line->getOffset()) << ", (" << Line->getName() 699 << ")\n"; 700 }); 701 702 if (Scope->getIsArtificial()) { 703 // Add the instruction lines to their artificial scope. 704 for (LVLine *Line : *Lines) 705 Scope->addElement(Line); 706 } else { 707 DebugLines->append(*Lines); 708 } 709 Lines->clear(); 710 } 711 } 712 } 713 714 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex); 715 ScopesWithRanges->startSearch(); 716 717 // Process collected lines. 718 LVScope *Scope; 719 for (LVLine *Line : *DebugLines) { 720 // Using the current line address, get its associated lexical scope and 721 // add the line information to it. 722 Scope = ScopesWithRanges->getEntry(Line->getAddress()); 723 if (!Scope) { 724 // If missing scope, use the compile unit. 725 Scope = CompileUnit; 726 LLVM_DEBUG({ 727 dbgs() << "Adding line to CU: " << hexValue(Line->getOffset()) << ", (" 728 << ((Line->getIsLineDebug()) 729 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true) 730 : Line->getName()) 731 << ")\n"; 732 }); 733 } 734 735 // Add line object to scope. 736 Scope->addElement(Line); 737 738 // Report any line zero. 739 if (options().getWarningLines() && Line->getIsLineDebug() && 740 !Line->getLineNumber()) 741 CompileUnit->addLineZero(Line); 742 743 // Some compilers generate ranges in the compile unit; other compilers 744 // only DW_AT_low_pc/DW_AT_high_pc. In order to correctly map global 745 // variables, we need to generate the map ranges for the compile unit. 746 // If we use the ranges stored at the scope level, there are cases where 747 // the address referenced by a symbol location, is not in the enclosing 748 // scope, but in an outer one. By using the ranges stored in the compile 749 // unit, we can catch all those addresses. 750 if (Line->getIsLineDebug()) 751 CompileUnit->addMapping(Line, SectionIndex); 752 753 // Resolve any given pattern. 754 patterns().resolvePatternMatch(Line); 755 } 756 757 ScopesWithRanges->endSearch(); 758 } 759 760 void LVBinaryReader::processLines(LVLines *DebugLines, 761 LVSectionIndex SectionIndex) { 762 assert(DebugLines && "DebugLines is null."); 763 if (DebugLines->empty() && !ScopeInstructions.findMap(SectionIndex)) 764 return; 765 766 // If the Compile Unit does not contain comdat functions, use the whole 767 // set of debug lines, as the addresses don't have conflicts. 768 if (!CompileUnit->getHasComdatScopes()) { 769 processLines(DebugLines, SectionIndex, nullptr); 770 return; 771 } 772 773 // Find the indexes for the lines whose address is zero. 774 std::vector<size_t> AddressZero; 775 LVLines::iterator It = 776 std::find_if(std::begin(*DebugLines), std::end(*DebugLines), 777 [](LVLine *Line) { return !Line->getAddress(); }); 778 while (It != std::end(*DebugLines)) { 779 AddressZero.emplace_back(std::distance(std::begin(*DebugLines), It)); 780 It = std::find_if(std::next(It), std::end(*DebugLines), 781 [](LVLine *Line) { return !Line->getAddress(); }); 782 } 783 784 // If the set of debug lines does not contain any line with address zero, 785 // use the whole set. It means we are dealing with an initialization 786 // section from a fully linked binary. 787 if (AddressZero.empty()) { 788 processLines(DebugLines, SectionIndex, nullptr); 789 return; 790 } 791 792 // The Compile unit contains comdat functions. Traverse the collected 793 // debug lines and identify logical groups based on their start and 794 // address. Each group starts with a zero address. 795 // Begin, End, Address, IsDone. 796 using LVBucket = std::tuple<size_t, size_t, LVAddress, bool>; 797 std::vector<LVBucket> Buckets; 798 799 LVAddress Address; 800 size_t Begin = 0; 801 size_t End = 0; 802 size_t Index = 0; 803 for (Index = 0; Index < AddressZero.size() - 1; ++Index) { 804 Begin = AddressZero[Index]; 805 End = AddressZero[Index + 1] - 1; 806 Address = (*DebugLines)[End]->getAddress(); 807 Buckets.emplace_back(Begin, End, Address, false); 808 } 809 810 // Add the last bucket. 811 if (Index) { 812 Begin = AddressZero[Index]; 813 End = DebugLines->size() - 1; 814 Address = (*DebugLines)[End]->getAddress(); 815 Buckets.emplace_back(Begin, End, Address, false); 816 } 817 818 LLVM_DEBUG({ 819 dbgs() << "\nDebug Lines buckets: " << Buckets.size() << "\n"; 820 for (LVBucket &Bucket : Buckets) { 821 dbgs() << "Begin: " << format_decimal(std::get<0>(Bucket), 5) << ", " 822 << "End: " << format_decimal(std::get<1>(Bucket), 5) << ", " 823 << "Address: " << hexValue(std::get<2>(Bucket)) << "\n"; 824 } 825 }); 826 827 // Traverse the sections and buckets looking for matches on the section 828 // sizes. In the unlikely event of different buckets with the same size 829 // process them in order and mark them as done. 830 LVLines Group; 831 for (LVSections::reference Entry : Sections) { 832 LVSectionIndex SectionIndex = Entry.first; 833 const object::SectionRef Section = Entry.second; 834 uint64_t Size = Section.getSize(); 835 LLVM_DEBUG({ 836 dbgs() << "\nSection Index: " << format_decimal(SectionIndex, 3) 837 << " , Section Size: " << hexValue(Section.getSize()) 838 << " , Section Address: " << hexValue(Section.getAddress()) 839 << "\n"; 840 }); 841 842 for (LVBucket &Bucket : Buckets) { 843 if (std::get<3>(Bucket)) 844 // Already done for previous section. 845 continue; 846 if (Size == std::get<2>(Bucket)) { 847 // We have a match on the section size. 848 Group.clear(); 849 LVLines::iterator IterStart = DebugLines->begin() + std::get<0>(Bucket); 850 LVLines::iterator IterEnd = 851 DebugLines->begin() + std::get<1>(Bucket) + 1; 852 for (LVLines::iterator Iter = IterStart; Iter < IterEnd; ++Iter) 853 Group.push_back(*Iter); 854 processLines(&Group, SectionIndex, /*Function=*/nullptr); 855 std::get<3>(Bucket) = true; 856 break; 857 } 858 } 859 } 860 } 861 862 // Traverse the scopes for the given 'Function' looking for any inlined 863 // scopes with inlined lines, which are found in 'CUInlineeLines'. 864 void LVBinaryReader::includeInlineeLines(LVSectionIndex SectionIndex, 865 LVScope *Function) { 866 SmallVector<LVInlineeLine::iterator> InlineeIters; 867 std::function<void(LVScope * Parent)> FindInlinedScopes = 868 [&](LVScope *Parent) { 869 if (const LVScopes *Scopes = Parent->getScopes()) 870 for (LVScope *Scope : *Scopes) { 871 LVInlineeLine::iterator Iter = CUInlineeLines.find(Scope); 872 if (Iter != CUInlineeLines.end()) 873 InlineeIters.push_back(Iter); 874 FindInlinedScopes(Scope); 875 } 876 }; 877 878 // Find all inlined scopes for the given 'Function'. 879 FindInlinedScopes(Function); 880 for (LVInlineeLine::iterator InlineeIter : InlineeIters) { 881 LVScope *Scope = InlineeIter->first; 882 addToSymbolTable(Scope->getLinkageName(), Scope, SectionIndex); 883 884 // TODO: Convert this into a reference. 885 LVLines *InlineeLines = InlineeIter->second.get(); 886 LLVM_DEBUG({ 887 dbgs() << "Inlined lines for: " << Scope->getName() << "\n"; 888 for (const LVLine *Line : *InlineeLines) 889 dbgs() << "[" << hexValue(Line->getAddress()) << "] " 890 << Line->getLineNumber() << "\n"; 891 dbgs() << format("Debug lines: %d\n", CULines.size()); 892 for (const LVLine *Line : CULines) 893 dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", (" 894 << Line->getLineNumber() << ")\n"; 895 ; 896 }); 897 898 // The inlined lines must be merged using its address, in order to keep 899 // the real order of the instructions. The inlined lines are mixed with 900 // the other non-inlined lines. 901 if (InlineeLines->size()) { 902 // First address of inlinee code. 903 uint64_t InlineeStart = (InlineeLines->front())->getAddress(); 904 LVLines::iterator Iter = std::find_if( 905 CULines.begin(), CULines.end(), [&](LVLine *Item) -> bool { 906 return Item->getAddress() == InlineeStart; 907 }); 908 if (Iter != CULines.end()) { 909 // 'Iter' points to the line where the inlined function is called. 910 // Emulate the DW_AT_call_line attribute. 911 Scope->setCallLineNumber((*Iter)->getLineNumber()); 912 // Mark the referenced line as the start of the inlined function. 913 // Skip the first line during the insertion, as the address and 914 // line number as the same. Otherwise we have to erase and insert. 915 (*Iter)->setLineNumber((*InlineeLines->begin())->getLineNumber()); 916 ++Iter; 917 CULines.insert(Iter, InlineeLines->begin() + 1, InlineeLines->end()); 918 } 919 } 920 921 // Remove this set of lines from the container; each inlined function 922 // creates an unique set of lines. Remove only the created container. 923 CUInlineeLines.erase(InlineeIter); 924 InlineeLines->clear(); 925 } 926 LLVM_DEBUG({ 927 dbgs() << "Merged Inlined lines for: " << Function->getName() << "\n"; 928 dbgs() << format("Debug lines: %d\n", CULines.size()); 929 for (const LVLine *Line : CULines) 930 dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", (" 931 << Line->getLineNumber() << ")\n"; 932 ; 933 }); 934 } 935 936 void LVBinaryReader::print(raw_ostream &OS) const { 937 OS << "LVBinaryReader\n"; 938 LLVM_DEBUG(dbgs() << "PrintReader\n"); 939 } 940