1 //===-- LVBinaryReader.cpp ------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This implements the LVBinaryReader class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h" 14 #include "llvm/Support/Errc.h" 15 #include "llvm/Support/FormatAdapters.h" 16 #include "llvm/Support/FormatVariadic.h" 17 18 using namespace llvm; 19 using namespace llvm::logicalview; 20 21 #define DEBUG_TYPE "BinaryReader" 22 23 // Function names extracted from the object symbol table. 24 void LVSymbolTable::add(StringRef Name, LVScope *Function, 25 LVSectionIndex SectionIndex) { 26 std::string SymbolName(Name); 27 auto [It, Inserted] = 28 SymbolNames.try_emplace(SymbolName, Function, 0, SectionIndex, false); 29 if (!Inserted) { 30 // Update a recorded entry with its logical scope and section index. 31 It->second.Scope = Function; 32 if (SectionIndex) 33 It->second.SectionIndex = SectionIndex; 34 } 35 36 if (Function && It->second.IsComdat) 37 Function->setIsComdat(); 38 39 LLVM_DEBUG({ print(dbgs()); }); 40 } 41 42 void LVSymbolTable::add(StringRef Name, LVAddress Address, 43 LVSectionIndex SectionIndex, bool IsComdat) { 44 std::string SymbolName(Name); 45 auto [It, Inserted] = SymbolNames.try_emplace(SymbolName, nullptr, Address, 46 SectionIndex, IsComdat); 47 if (!Inserted) 48 // Update a recorded symbol name with its logical scope. 49 It->second.Address = Address; 50 51 LVScope *Function = It->second.Scope; 52 if (Function && IsComdat) 53 Function->setIsComdat(); 54 LLVM_DEBUG({ print(dbgs()); }); 55 } 56 57 LVSectionIndex LVSymbolTable::update(LVScope *Function) { 58 LVSectionIndex SectionIndex = getReader().getDotTextSectionIndex(); 59 StringRef Name = Function->getLinkageName(); 60 if (Name.empty()) 61 Name = Function->getName(); 62 std::string SymbolName(Name); 63 64 if (SymbolName.empty()) 65 return SectionIndex; 66 67 auto It = SymbolNames.find(SymbolName); 68 if (It == SymbolNames.end()) 69 return SectionIndex; 70 71 // Update a recorded entry with its logical scope, only if the scope has 72 // ranges. That is the case when in DWARF there are 2 DIEs connected via 73 // the DW_AT_specification. 74 if (Function->getHasRanges()) { 75 It->second.Scope = Function; 76 SectionIndex = It->second.SectionIndex; 77 } else { 78 SectionIndex = UndefinedSectionIndex; 79 } 80 81 if (It->second.IsComdat) 82 Function->setIsComdat(); 83 84 LLVM_DEBUG({ print(dbgs()); }); 85 return SectionIndex; 86 } 87 88 const LVSymbolTableEntry &LVSymbolTable::getEntry(StringRef Name) { 89 static LVSymbolTableEntry Empty = LVSymbolTableEntry(); 90 LVSymbolNames::iterator Iter = SymbolNames.find(Name); 91 return Iter != SymbolNames.end() ? Iter->second : Empty; 92 } 93 LVAddress LVSymbolTable::getAddress(StringRef Name) { 94 LVSymbolNames::iterator Iter = SymbolNames.find(Name); 95 return Iter != SymbolNames.end() ? Iter->second.Address : 0; 96 } 97 LVSectionIndex LVSymbolTable::getIndex(StringRef Name) { 98 LVSymbolNames::iterator Iter = SymbolNames.find(Name); 99 return Iter != SymbolNames.end() ? Iter->second.SectionIndex 100 : getReader().getDotTextSectionIndex(); 101 } 102 bool LVSymbolTable::getIsComdat(StringRef Name) { 103 LVSymbolNames::iterator Iter = SymbolNames.find(Name); 104 return Iter != SymbolNames.end() ? Iter->second.IsComdat : false; 105 } 106 107 void LVSymbolTable::print(raw_ostream &OS) { 108 OS << "Symbol Table\n"; 109 for (LVSymbolNames::reference Entry : SymbolNames) { 110 LVSymbolTableEntry &SymbolName = Entry.second; 111 LVScope *Scope = SymbolName.Scope; 112 LVOffset Offset = Scope ? Scope->getOffset() : 0; 113 OS << "Index: " << hexValue(SymbolName.SectionIndex, 5) 114 << " Comdat: " << (SymbolName.IsComdat ? "Y" : "N") 115 << " Scope: " << hexValue(Offset) 116 << " Address: " << hexValue(SymbolName.Address) 117 << " Name: " << Entry.first << "\n"; 118 } 119 } 120 121 void LVBinaryReader::addToSymbolTable(StringRef Name, LVScope *Function, 122 LVSectionIndex SectionIndex) { 123 SymbolTable.add(Name, Function, SectionIndex); 124 } 125 void LVBinaryReader::addToSymbolTable(StringRef Name, LVAddress Address, 126 LVSectionIndex SectionIndex, 127 bool IsComdat) { 128 SymbolTable.add(Name, Address, SectionIndex, IsComdat); 129 } 130 LVSectionIndex LVBinaryReader::updateSymbolTable(LVScope *Function) { 131 return SymbolTable.update(Function); 132 } 133 134 const LVSymbolTableEntry &LVBinaryReader::getSymbolTableEntry(StringRef Name) { 135 return SymbolTable.getEntry(Name); 136 } 137 LVAddress LVBinaryReader::getSymbolTableAddress(StringRef Name) { 138 return SymbolTable.getAddress(Name); 139 } 140 LVSectionIndex LVBinaryReader::getSymbolTableIndex(StringRef Name) { 141 return SymbolTable.getIndex(Name); 142 } 143 bool LVBinaryReader::getSymbolTableIsComdat(StringRef Name) { 144 return SymbolTable.getIsComdat(Name); 145 } 146 147 void LVBinaryReader::mapVirtualAddress(const object::ObjectFile &Obj) { 148 for (const object::SectionRef &Section : Obj.sections()) { 149 LLVM_DEBUG({ 150 Expected<StringRef> SectionNameOrErr = Section.getName(); 151 StringRef Name; 152 if (!SectionNameOrErr) 153 consumeError(SectionNameOrErr.takeError()); 154 else 155 Name = *SectionNameOrErr; 156 dbgs() << "Index: " << format_decimal(Section.getIndex(), 3) << ", " 157 << "Address: " << hexValue(Section.getAddress()) << ", " 158 << "Size: " << hexValue(Section.getSize()) << ", " 159 << "Name: " << Name << "\n"; 160 dbgs() << "isCompressed: " << Section.isCompressed() << ", " 161 << "isText: " << Section.isText() << ", " 162 << "isData: " << Section.isData() << ", " 163 << "isBSS: " << Section.isBSS() << ", " 164 << "isVirtual: " << Section.isVirtual() << "\n"; 165 dbgs() << "isBitcode: " << Section.isBitcode() << ", " 166 << "isStripped: " << Section.isStripped() << ", " 167 << "isBerkeleyText: " << Section.isBerkeleyText() << ", " 168 << "isBerkeleyData: " << Section.isBerkeleyData() << ", " 169 << "isDebugSection: " << Section.isDebugSection() << "\n"; 170 dbgs() << "\n"; 171 }); 172 173 if (!Section.isText() || Section.isVirtual() || !Section.getSize()) 174 continue; 175 176 // Record section information required for symbol resolution. 177 // Note: The section index returned by 'getIndex()' is one based. 178 Sections.emplace(Section.getIndex(), Section); 179 addSectionAddress(Section); 180 181 // Identify the ".text" section. 182 Expected<StringRef> SectionNameOrErr = Section.getName(); 183 if (!SectionNameOrErr) { 184 consumeError(SectionNameOrErr.takeError()); 185 continue; 186 } 187 if (*SectionNameOrErr == ".text" || *SectionNameOrErr == "CODE" || 188 *SectionNameOrErr == ".code") { 189 DotTextSectionIndex = Section.getIndex(); 190 // If the object is WebAssembly, update the address offset that 191 // will be added to DWARF DW_AT_* attributes. 192 if (Obj.isWasm()) 193 WasmCodeSectionOffset = Section.getAddress(); 194 } 195 } 196 197 // Process the symbol table. 198 mapRangeAddress(Obj); 199 200 LLVM_DEBUG({ 201 dbgs() << "\nSections Information:\n"; 202 for (LVSections::reference Entry : Sections) { 203 LVSectionIndex SectionIndex = Entry.first; 204 const object::SectionRef Section = Entry.second; 205 Expected<StringRef> SectionNameOrErr = Section.getName(); 206 if (!SectionNameOrErr) 207 consumeError(SectionNameOrErr.takeError()); 208 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3) 209 << " Name: " << *SectionNameOrErr << "\n" 210 << "Size: " << hexValue(Section.getSize()) << "\n" 211 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n" 212 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n"; 213 } 214 dbgs() << "\nObject Section Information:\n"; 215 for (LVSectionAddresses::const_reference Entry : SectionAddresses) 216 dbgs() << "[" << hexValue(Entry.first) << ":" 217 << hexValue(Entry.first + Entry.second.getSize()) 218 << "] Size: " << hexValue(Entry.second.getSize()) << "\n"; 219 }); 220 } 221 222 void LVBinaryReader::mapVirtualAddress(const object::COFFObjectFile &COFFObj) { 223 ErrorOr<uint64_t> ImageBase = COFFObj.getImageBase(); 224 if (ImageBase) 225 ImageBaseAddress = ImageBase.get(); 226 227 LLVM_DEBUG({ 228 dbgs() << "ImageBaseAddress: " << hexValue(ImageBaseAddress) << "\n"; 229 }); 230 231 uint32_t Flags = COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_LNK_COMDAT; 232 233 for (const object::SectionRef &Section : COFFObj.sections()) { 234 if (!Section.isText() || Section.isVirtual() || !Section.getSize()) 235 continue; 236 237 const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section); 238 VirtualAddress = COFFSection->VirtualAddress; 239 bool IsComdat = (COFFSection->Characteristics & Flags) == Flags; 240 241 // Record section information required for symbol resolution. 242 // Note: The section index returned by 'getIndex()' is zero based. 243 Sections.emplace(Section.getIndex() + 1, Section); 244 addSectionAddress(Section); 245 246 // Additional initialization on the specific object format. 247 mapRangeAddress(COFFObj, Section, IsComdat); 248 } 249 250 LLVM_DEBUG({ 251 dbgs() << "\nSections Information:\n"; 252 for (LVSections::reference Entry : Sections) { 253 LVSectionIndex SectionIndex = Entry.first; 254 const object::SectionRef Section = Entry.second; 255 const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section); 256 Expected<StringRef> SectionNameOrErr = Section.getName(); 257 if (!SectionNameOrErr) 258 consumeError(SectionNameOrErr.takeError()); 259 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3) 260 << " Name: " << *SectionNameOrErr << "\n" 261 << "Size: " << hexValue(Section.getSize()) << "\n" 262 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n" 263 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n" 264 << "PointerToRawData: " << hexValue(COFFSection->PointerToRawData) 265 << "\n" 266 << "SizeOfRawData: " << hexValue(COFFSection->SizeOfRawData) 267 << "\n"; 268 } 269 dbgs() << "\nObject Section Information:\n"; 270 for (LVSectionAddresses::const_reference Entry : SectionAddresses) 271 dbgs() << "[" << hexValue(Entry.first) << ":" 272 << hexValue(Entry.first + Entry.second.getSize()) 273 << "] Size: " << hexValue(Entry.second.getSize()) << "\n"; 274 }); 275 } 276 277 Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple, 278 StringRef TheFeatures) { 279 std::string TargetLookupError; 280 const Target *TheTarget = 281 TargetRegistry::lookupTarget(TheTriple, TargetLookupError); 282 if (!TheTarget) 283 return createStringError(errc::invalid_argument, TargetLookupError.c_str()); 284 285 // Register information. 286 MCRegisterInfo *RegisterInfo = TheTarget->createMCRegInfo(TheTriple); 287 if (!RegisterInfo) 288 return createStringError(errc::invalid_argument, 289 "no register info for target " + TheTriple); 290 MRI.reset(RegisterInfo); 291 292 // Assembler properties and features. 293 MCTargetOptions MCOptions; 294 MCAsmInfo *AsmInfo(TheTarget->createMCAsmInfo(*MRI, TheTriple, MCOptions)); 295 if (!AsmInfo) 296 return createStringError(errc::invalid_argument, 297 "no assembly info for target " + TheTriple); 298 MAI.reset(AsmInfo); 299 300 // Target subtargets. 301 StringRef CPU; 302 MCSubtargetInfo *SubtargetInfo( 303 TheTarget->createMCSubtargetInfo(TheTriple, CPU, TheFeatures)); 304 if (!SubtargetInfo) 305 return createStringError(errc::invalid_argument, 306 "no subtarget info for target " + TheTriple); 307 STI.reset(SubtargetInfo); 308 309 // Instructions Info. 310 MCInstrInfo *InstructionInfo(TheTarget->createMCInstrInfo()); 311 if (!InstructionInfo) 312 return createStringError(errc::invalid_argument, 313 "no instruction info for target " + TheTriple); 314 MII.reset(InstructionInfo); 315 316 MC = std::make_unique<MCContext>(Triple(TheTriple), MAI.get(), MRI.get(), 317 STI.get()); 318 319 // Assembler. 320 MCDisassembler *DisAsm(TheTarget->createMCDisassembler(*STI, *MC)); 321 if (!DisAsm) 322 return createStringError(errc::invalid_argument, 323 "no disassembler for target " + TheTriple); 324 MD.reset(DisAsm); 325 326 MCInstPrinter *InstructionPrinter(TheTarget->createMCInstPrinter( 327 Triple(TheTriple), AsmInfo->getAssemblerDialect(), *MAI, *MII, *MRI)); 328 if (!InstructionPrinter) 329 return createStringError(errc::invalid_argument, 330 "no target assembly language printer for target " + 331 TheTriple); 332 MIP.reset(InstructionPrinter); 333 InstructionPrinter->setPrintImmHex(true); 334 335 return Error::success(); 336 } 337 338 Expected<std::pair<uint64_t, object::SectionRef>> 339 LVBinaryReader::getSection(LVScope *Scope, LVAddress Address, 340 LVSectionIndex SectionIndex) { 341 // Return the 'text' section with the code for this logical scope. 342 // COFF: SectionIndex is zero. Use 'SectionAddresses' data. 343 // ELF: SectionIndex is the section index in the file. 344 if (SectionIndex) { 345 LVSections::iterator Iter = Sections.find(SectionIndex); 346 if (Iter == Sections.end()) { 347 return createStringError(errc::invalid_argument, 348 "invalid section index for: '%s'", 349 Scope->getName().str().c_str()); 350 } 351 const object::SectionRef Section = Iter->second; 352 return std::make_pair(Section.getAddress(), Section); 353 } 354 355 // Ensure a valid starting address for the public names. 356 LVSectionAddresses::const_iterator Iter = 357 SectionAddresses.upper_bound(Address); 358 if (Iter == SectionAddresses.begin()) 359 return createStringError(errc::invalid_argument, 360 "invalid section address for: '%s'", 361 Scope->getName().str().c_str()); 362 363 // Get section that contains the code for this function. 364 Iter = SectionAddresses.lower_bound(Address); 365 if (Iter != SectionAddresses.begin()) 366 --Iter; 367 return std::make_pair(Iter->first, Iter->second); 368 } 369 370 Error LVBinaryReader::createInstructions(LVScope *Scope, 371 LVSectionIndex SectionIndex, 372 const LVNameInfo &NameInfo) { 373 assert(Scope && "Scope is null."); 374 375 // Skip stripped functions. 376 if (Scope->getIsDiscarded()) 377 return Error::success(); 378 379 // Find associated address and size for the given function entry point. 380 LVAddress Address = NameInfo.first; 381 uint64_t Size = NameInfo.second; 382 383 LLVM_DEBUG({ 384 dbgs() << "\nPublic Name instructions: '" << Scope->getName() << "' / '" 385 << Scope->getLinkageName() << "'\n" 386 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: [" 387 << hexValue(Address) << ":" << hexValue(Address + Size) << "]\n"; 388 }); 389 390 Expected<std::pair<uint64_t, const object::SectionRef>> SectionOrErr = 391 getSection(Scope, Address, SectionIndex); 392 if (!SectionOrErr) 393 return SectionOrErr.takeError(); 394 const object::SectionRef Section = (*SectionOrErr).second; 395 uint64_t SectionAddress = (*SectionOrErr).first; 396 397 Expected<StringRef> SectionContentsOrErr = Section.getContents(); 398 if (!SectionContentsOrErr) 399 return SectionOrErr.takeError(); 400 401 // There are cases where the section size is smaller than the [LowPC,HighPC] 402 // range; it causes us to decode invalid addresses. The recorded size in the 403 // logical scope is one less than the real size. 404 LLVM_DEBUG({ 405 dbgs() << " Size: " << hexValue(Size) 406 << ", Section Size: " << hexValue(Section.getSize()) << "\n"; 407 }); 408 Size = std::min(Size + 1, Section.getSize()); 409 410 ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*SectionContentsOrErr); 411 uint64_t Offset = Address - SectionAddress; 412 if (Offset > Bytes.size()) { 413 LLVM_DEBUG({ 414 dbgs() << "offset (" << hexValue(Offset) << ") is beyond section size (" 415 << hexValue(Bytes.size()) << "); malformed input?\n"; 416 }); 417 return createStringError( 418 errc::bad_address, 419 "Failed to parse instructions; offset beyond section size"); 420 } 421 uint8_t const *Begin = Bytes.data() + Offset; 422 uint8_t const *End = Bytes.data() + Offset + Size; 423 424 LLVM_DEBUG({ 425 Expected<StringRef> SectionNameOrErr = Section.getName(); 426 if (!SectionNameOrErr) 427 consumeError(SectionNameOrErr.takeError()); 428 else 429 dbgs() << "Section Index: " << hexValue(Section.getIndex()) << " [" 430 << hexValue((uint64_t)Section.getAddress()) << ":" 431 << hexValue((uint64_t)Section.getAddress() + Section.getSize(), 10) 432 << "] Name: '" << *SectionNameOrErr << "'\n" 433 << "Begin: " << hexValue((uint64_t)Begin) 434 << ", End: " << hexValue((uint64_t)End) << "\n"; 435 }); 436 437 // Address for first instruction line. 438 LVAddress FirstAddress = Address; 439 auto InstructionsSP = std::make_unique<LVLines>(); 440 LVLines &Instructions = *InstructionsSP; 441 DiscoveredLines.emplace_back(std::move(InstructionsSP)); 442 443 while (Begin < End) { 444 MCInst Instruction; 445 uint64_t BytesConsumed = 0; 446 SmallVector<char, 64> InsnStr; 447 raw_svector_ostream Annotations(InsnStr); 448 MCDisassembler::DecodeStatus const S = 449 MD->getInstruction(Instruction, BytesConsumed, 450 ArrayRef<uint8_t>(Begin, End), Address, outs()); 451 switch (S) { 452 case MCDisassembler::Fail: 453 LLVM_DEBUG({ dbgs() << "Invalid instruction\n"; }); 454 if (BytesConsumed == 0) 455 // Skip invalid bytes 456 BytesConsumed = 1; 457 break; 458 case MCDisassembler::SoftFail: 459 LLVM_DEBUG({ dbgs() << "Potentially undefined instruction:"; }); 460 [[fallthrough]]; 461 case MCDisassembler::Success: { 462 std::string Buffer; 463 raw_string_ostream Stream(Buffer); 464 StringRef AnnotationsStr = Annotations.str(); 465 MIP->printInst(&Instruction, Address, AnnotationsStr, *STI, Stream); 466 LLVM_DEBUG({ 467 std::string BufferCodes; 468 raw_string_ostream StreamCodes(BufferCodes); 469 StreamCodes << format_bytes( 470 ArrayRef<uint8_t>(Begin, Begin + BytesConsumed), std::nullopt, 16, 471 16); 472 dbgs() << "[" << hexValue((uint64_t)Begin) << "] " 473 << "Size: " << format_decimal(BytesConsumed, 2) << " (" 474 << formatv("{0}", 475 fmt_align(StreamCodes.str(), AlignStyle::Left, 32)) 476 << ") " << hexValue((uint64_t)Address) << ": " << Stream.str() 477 << "\n"; 478 }); 479 // Here we add logical lines to the Instructions. Later on, 480 // the 'processLines()' function will move each created logical line 481 // to its enclosing logical scope, using the debug ranges information 482 // and they will be released when its scope parent is deleted. 483 LVLineAssembler *Line = createLineAssembler(); 484 Line->setAddress(Address); 485 Line->setName(StringRef(Stream.str()).trim()); 486 Instructions.push_back(Line); 487 break; 488 } 489 } 490 Address += BytesConsumed; 491 Begin += BytesConsumed; 492 } 493 494 LLVM_DEBUG({ 495 size_t Index = 0; 496 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3) 497 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n" 498 << "Address: " << hexValue(FirstAddress) 499 << format(" - Collected instructions lines: %d\n", 500 Instructions.size()); 501 for (const LVLine *Line : Instructions) 502 dbgs() << format_decimal(++Index, 5) << ": " 503 << hexValue(Line->getOffset()) << ", (" << Line->getName() 504 << ")\n"; 505 }); 506 507 // The scope in the assembler names is linked to its own instructions. 508 ScopeInstructions.add(SectionIndex, Scope, &Instructions); 509 AssemblerMappings.add(SectionIndex, FirstAddress, Scope); 510 511 return Error::success(); 512 } 513 514 Error LVBinaryReader::createInstructions(LVScope *Function, 515 LVSectionIndex SectionIndex) { 516 if (!options().getPrintInstructions()) 517 return Error::success(); 518 519 LVNameInfo Name = CompileUnit->findPublicName(Function); 520 if (Name.first != LVAddress(UINT64_MAX)) 521 return createInstructions(Function, SectionIndex, Name); 522 523 return Error::success(); 524 } 525 526 Error LVBinaryReader::createInstructions() { 527 if (!options().getPrintInstructions()) 528 return Error::success(); 529 530 LLVM_DEBUG({ 531 size_t Index = 1; 532 dbgs() << "\nPublic Names (Scope):\n"; 533 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) { 534 LVScope *Scope = Name.first; 535 const LVNameInfo &NameInfo = Name.second; 536 LVAddress Address = NameInfo.first; 537 uint64_t Size = NameInfo.second; 538 dbgs() << format_decimal(Index++, 5) << ": " 539 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: [" 540 << hexValue(Address) << ":" << hexValue(Address + Size) << "] " 541 << "Name: '" << Scope->getName() << "' / '" 542 << Scope->getLinkageName() << "'\n"; 543 } 544 }); 545 546 // For each public name in the current compile unit, create the line 547 // records that represent the executable instructions. 548 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) { 549 LVScope *Scope = Name.first; 550 // The symbol table extracted from the object file always contains a 551 // non-empty name (linkage name). However, the logical scope does not 552 // guarantee to have a name for the linkage name (main is one case). 553 // For those cases, set the linkage name the same as the name. 554 if (!Scope->getLinkageNameIndex()) 555 Scope->setLinkageName(Scope->getName()); 556 LVSectionIndex SectionIndex = getSymbolTableIndex(Scope->getLinkageName()); 557 if (Error Err = createInstructions(Scope, SectionIndex, Name.second)) 558 return Err; 559 } 560 561 return Error::success(); 562 } 563 564 // During the traversal of the debug information sections, we created the 565 // logical lines representing the disassembled instructions from the text 566 // section and the logical lines representing the line records from the 567 // debug line section. Using the ranges associated with the logical scopes, 568 // we will allocate those logical lines to their logical scopes. 569 void LVBinaryReader::processLines(LVLines *DebugLines, 570 LVSectionIndex SectionIndex, 571 LVScope *Function) { 572 assert(DebugLines && "DebugLines is null."); 573 574 // Just return if this compilation unit does not have any line records 575 // and no instruction lines were created. 576 if (DebugLines->empty() && !options().getPrintInstructions()) 577 return; 578 579 // Merge the debug lines and instruction lines using their text address; 580 // the logical line representing the debug line record is followed by the 581 // line(s) representing the disassembled instructions, whose addresses are 582 // equal or greater that the line address and less than the address of the 583 // next debug line record. 584 LLVM_DEBUG({ 585 size_t Index = 1; 586 size_t PerLine = 4; 587 dbgs() << format("\nProcess debug lines: %d\n", DebugLines->size()); 588 for (const LVLine *Line : *DebugLines) { 589 dbgs() << format_decimal(Index, 5) << ": " << hexValue(Line->getOffset()) 590 << ", (" << Line->getLineNumber() << ")" 591 << ((Index % PerLine) ? " " : "\n"); 592 ++Index; 593 } 594 dbgs() << ((Index % PerLine) ? "\n" : ""); 595 }); 596 597 bool TraverseLines = true; 598 LVLines::iterator Iter = DebugLines->begin(); 599 while (TraverseLines && Iter != DebugLines->end()) { 600 uint64_t DebugAddress = (*Iter)->getAddress(); 601 602 // Get the function with an entry point that matches this line and 603 // its associated assembler entries. In the case of COMDAT, the input 604 // 'Function' is not null. Use it to find its address ranges. 605 LVScope *Scope = Function; 606 if (!Function) { 607 Scope = AssemblerMappings.find(SectionIndex, DebugAddress); 608 if (!Scope) { 609 ++Iter; 610 continue; 611 } 612 } 613 614 // Get the associated instructions for the found 'Scope'. 615 LVLines InstructionLines; 616 LVLines *Lines = ScopeInstructions.find(SectionIndex, Scope); 617 if (Lines) 618 InstructionLines = std::move(*Lines); 619 620 LLVM_DEBUG({ 621 size_t Index = 0; 622 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3) 623 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n" 624 << format("Process instruction lines: %d\n", 625 InstructionLines.size()); 626 for (const LVLine *Line : InstructionLines) 627 dbgs() << format_decimal(++Index, 5) << ": " 628 << hexValue(Line->getOffset()) << ", (" << Line->getName() 629 << ")\n"; 630 }); 631 632 // Continue with next debug line if there are not instructions lines. 633 if (InstructionLines.empty()) { 634 ++Iter; 635 continue; 636 } 637 638 for (LVLine *InstructionLine : InstructionLines) { 639 uint64_t InstructionAddress = InstructionLine->getAddress(); 640 LLVM_DEBUG({ 641 dbgs() << "Instruction address: " << hexValue(InstructionAddress) 642 << "\n"; 643 }); 644 if (TraverseLines) { 645 while (Iter != DebugLines->end()) { 646 DebugAddress = (*Iter)->getAddress(); 647 LLVM_DEBUG({ 648 bool IsDebug = (*Iter)->getIsLineDebug(); 649 dbgs() << "Line " << (IsDebug ? "dbg:" : "ins:") << " [" 650 << hexValue(DebugAddress) << "]"; 651 if (IsDebug) 652 dbgs() << format(" %d", (*Iter)->getLineNumber()); 653 dbgs() << "\n"; 654 }); 655 // Instruction address before debug line. 656 if (InstructionAddress < DebugAddress) { 657 LLVM_DEBUG({ 658 dbgs() << "Inserted instruction address: " 659 << hexValue(InstructionAddress) << " before line: " 660 << format("%d", (*Iter)->getLineNumber()) << " [" 661 << hexValue(DebugAddress) << "]\n"; 662 }); 663 Iter = DebugLines->insert(Iter, InstructionLine); 664 // The returned iterator points to the inserted instruction. 665 // Skip it and point to the line acting as reference. 666 ++Iter; 667 break; 668 } 669 ++Iter; 670 } 671 if (Iter == DebugLines->end()) { 672 // We have reached the end of the source lines and the current 673 // instruction line address is greater than the last source line. 674 TraverseLines = false; 675 DebugLines->push_back(InstructionLine); 676 } 677 } else { 678 DebugLines->push_back(InstructionLine); 679 } 680 } 681 } 682 683 LLVM_DEBUG({ 684 dbgs() << format("Lines after merge: %d\n", DebugLines->size()); 685 size_t Index = 0; 686 for (const LVLine *Line : *DebugLines) { 687 dbgs() << format_decimal(++Index, 5) << ": " 688 << hexValue(Line->getOffset()) << ", (" 689 << ((Line->getIsLineDebug()) 690 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true) 691 : Line->getName()) 692 << ")\n"; 693 } 694 }); 695 696 // If this compilation unit does not have line records, traverse its scopes 697 // and take any collected instruction lines as the working set in order 698 // to move them to their associated scope. 699 if (DebugLines->empty()) { 700 if (const LVScopes *Scopes = CompileUnit->getScopes()) 701 for (LVScope *Scope : *Scopes) { 702 LVLines *Lines = ScopeInstructions.find(Scope); 703 if (Lines) { 704 705 LLVM_DEBUG({ 706 size_t Index = 0; 707 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3) 708 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n" 709 << format("Instruction lines: %d\n", Lines->size()); 710 for (const LVLine *Line : *Lines) 711 dbgs() << format_decimal(++Index, 5) << ": " 712 << hexValue(Line->getOffset()) << ", (" << Line->getName() 713 << ")\n"; 714 }); 715 716 if (Scope->getIsArtificial()) { 717 // Add the instruction lines to their artificial scope. 718 for (LVLine *Line : *Lines) 719 Scope->addElement(Line); 720 } else { 721 DebugLines->append(*Lines); 722 } 723 Lines->clear(); 724 } 725 } 726 } 727 728 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex); 729 ScopesWithRanges->startSearch(); 730 731 // Process collected lines. 732 LVScope *Scope; 733 for (LVLine *Line : *DebugLines) { 734 // Using the current line address, get its associated lexical scope and 735 // add the line information to it. 736 Scope = ScopesWithRanges->getEntry(Line->getAddress()); 737 if (!Scope) { 738 // If missing scope, use the compile unit. 739 Scope = CompileUnit; 740 LLVM_DEBUG({ 741 dbgs() << "Adding line to CU: " << hexValue(Line->getOffset()) << ", (" 742 << ((Line->getIsLineDebug()) 743 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true) 744 : Line->getName()) 745 << ")\n"; 746 }); 747 } 748 749 // Add line object to scope. 750 Scope->addElement(Line); 751 752 // Report any line zero. 753 if (options().getWarningLines() && Line->getIsLineDebug() && 754 !Line->getLineNumber()) 755 CompileUnit->addLineZero(Line); 756 757 // Some compilers generate ranges in the compile unit; other compilers 758 // only DW_AT_low_pc/DW_AT_high_pc. In order to correctly map global 759 // variables, we need to generate the map ranges for the compile unit. 760 // If we use the ranges stored at the scope level, there are cases where 761 // the address referenced by a symbol location, is not in the enclosing 762 // scope, but in an outer one. By using the ranges stored in the compile 763 // unit, we can catch all those addresses. 764 if (Line->getIsLineDebug()) 765 CompileUnit->addMapping(Line, SectionIndex); 766 767 // Resolve any given pattern. 768 patterns().resolvePatternMatch(Line); 769 } 770 771 ScopesWithRanges->endSearch(); 772 } 773 774 void LVBinaryReader::processLines(LVLines *DebugLines, 775 LVSectionIndex SectionIndex) { 776 assert(DebugLines && "DebugLines is null."); 777 if (DebugLines->empty() && !ScopeInstructions.findMap(SectionIndex)) 778 return; 779 780 // If the Compile Unit does not contain comdat functions, use the whole 781 // set of debug lines, as the addresses don't have conflicts. 782 if (!CompileUnit->getHasComdatScopes()) { 783 processLines(DebugLines, SectionIndex, nullptr); 784 return; 785 } 786 787 // Find the indexes for the lines whose address is zero. 788 std::vector<size_t> AddressZero; 789 LVLines::iterator It = llvm::find_if( 790 *DebugLines, [](LVLine *Line) { return !Line->getAddress(); }); 791 while (It != std::end(*DebugLines)) { 792 AddressZero.emplace_back(std::distance(std::begin(*DebugLines), It)); 793 It = std::find_if(std::next(It), std::end(*DebugLines), 794 [](LVLine *Line) { return !Line->getAddress(); }); 795 } 796 797 // If the set of debug lines does not contain any line with address zero, 798 // use the whole set. It means we are dealing with an initialization 799 // section from a fully linked binary. 800 if (AddressZero.empty()) { 801 processLines(DebugLines, SectionIndex, nullptr); 802 return; 803 } 804 805 // The Compile unit contains comdat functions. Traverse the collected 806 // debug lines and identify logical groups based on their start and 807 // address. Each group starts with a zero address. 808 // Begin, End, Address, IsDone. 809 using LVBucket = std::tuple<size_t, size_t, LVAddress, bool>; 810 std::vector<LVBucket> Buckets; 811 812 LVAddress Address; 813 size_t Begin = 0; 814 size_t End = 0; 815 size_t Index = 0; 816 for (Index = 0; Index < AddressZero.size() - 1; ++Index) { 817 Begin = AddressZero[Index]; 818 End = AddressZero[Index + 1] - 1; 819 Address = (*DebugLines)[End]->getAddress(); 820 Buckets.emplace_back(Begin, End, Address, false); 821 } 822 823 // Add the last bucket. 824 if (Index) { 825 Begin = AddressZero[Index]; 826 End = DebugLines->size() - 1; 827 Address = (*DebugLines)[End]->getAddress(); 828 Buckets.emplace_back(Begin, End, Address, false); 829 } 830 831 LLVM_DEBUG({ 832 dbgs() << "\nDebug Lines buckets: " << Buckets.size() << "\n"; 833 for (LVBucket &Bucket : Buckets) { 834 dbgs() << "Begin: " << format_decimal(std::get<0>(Bucket), 5) << ", " 835 << "End: " << format_decimal(std::get<1>(Bucket), 5) << ", " 836 << "Address: " << hexValue(std::get<2>(Bucket)) << "\n"; 837 } 838 }); 839 840 // Traverse the sections and buckets looking for matches on the section 841 // sizes. In the unlikely event of different buckets with the same size 842 // process them in order and mark them as done. 843 LVLines Group; 844 for (LVSections::reference Entry : Sections) { 845 LVSectionIndex SectionIndex = Entry.first; 846 const object::SectionRef Section = Entry.second; 847 uint64_t Size = Section.getSize(); 848 LLVM_DEBUG({ 849 dbgs() << "\nSection Index: " << format_decimal(SectionIndex, 3) 850 << " , Section Size: " << hexValue(Section.getSize()) 851 << " , Section Address: " << hexValue(Section.getAddress()) 852 << "\n"; 853 }); 854 855 for (LVBucket &Bucket : Buckets) { 856 if (std::get<3>(Bucket)) 857 // Already done for previous section. 858 continue; 859 if (Size == std::get<2>(Bucket)) { 860 // We have a match on the section size. 861 Group.clear(); 862 LVLines::iterator IterStart = DebugLines->begin() + std::get<0>(Bucket); 863 LVLines::iterator IterEnd = 864 DebugLines->begin() + std::get<1>(Bucket) + 1; 865 for (LVLines::iterator Iter = IterStart; Iter < IterEnd; ++Iter) 866 Group.push_back(*Iter); 867 processLines(&Group, SectionIndex, /*Function=*/nullptr); 868 std::get<3>(Bucket) = true; 869 break; 870 } 871 } 872 } 873 } 874 875 // Traverse the scopes for the given 'Function' looking for any inlined 876 // scopes with inlined lines, which are found in 'CUInlineeLines'. 877 void LVBinaryReader::includeInlineeLines(LVSectionIndex SectionIndex, 878 LVScope *Function) { 879 SmallVector<LVInlineeLine::iterator> InlineeIters; 880 std::function<void(LVScope * Parent)> FindInlinedScopes = 881 [&](LVScope *Parent) { 882 if (const LVScopes *Scopes = Parent->getScopes()) 883 for (LVScope *Scope : *Scopes) { 884 LVInlineeLine::iterator Iter = CUInlineeLines.find(Scope); 885 if (Iter != CUInlineeLines.end()) 886 InlineeIters.push_back(Iter); 887 FindInlinedScopes(Scope); 888 } 889 }; 890 891 // Find all inlined scopes for the given 'Function'. 892 FindInlinedScopes(Function); 893 for (LVInlineeLine::iterator InlineeIter : InlineeIters) { 894 LVScope *Scope = InlineeIter->first; 895 addToSymbolTable(Scope->getLinkageName(), Scope, SectionIndex); 896 897 // TODO: Convert this into a reference. 898 LVLines *InlineeLines = InlineeIter->second.get(); 899 LLVM_DEBUG({ 900 dbgs() << "Inlined lines for: " << Scope->getName() << "\n"; 901 for (const LVLine *Line : *InlineeLines) 902 dbgs() << "[" << hexValue(Line->getAddress()) << "] " 903 << Line->getLineNumber() << "\n"; 904 dbgs() << format("Debug lines: %d\n", CULines.size()); 905 for (const LVLine *Line : CULines) 906 dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", (" 907 << Line->getLineNumber() << ")\n"; 908 ; 909 }); 910 911 // The inlined lines must be merged using its address, in order to keep 912 // the real order of the instructions. The inlined lines are mixed with 913 // the other non-inlined lines. 914 if (InlineeLines->size()) { 915 // First address of inlinee code. 916 uint64_t InlineeStart = (InlineeLines->front())->getAddress(); 917 LVLines::iterator Iter = 918 llvm::find_if(CULines, [&](LVLine *Item) -> bool { 919 return Item->getAddress() == InlineeStart; 920 }); 921 if (Iter != CULines.end()) { 922 // 'Iter' points to the line where the inlined function is called. 923 // Emulate the DW_AT_call_line attribute. 924 Scope->setCallLineNumber((*Iter)->getLineNumber()); 925 // Mark the referenced line as the start of the inlined function. 926 // Skip the first line during the insertion, as the address and 927 // line number as the same. Otherwise we have to erase and insert. 928 (*Iter)->setLineNumber((*InlineeLines->begin())->getLineNumber()); 929 ++Iter; 930 CULines.insert(Iter, InlineeLines->begin() + 1, InlineeLines->end()); 931 } 932 } 933 934 // Remove this set of lines from the container; each inlined function 935 // creates an unique set of lines. Remove only the created container. 936 CUInlineeLines.erase(InlineeIter); 937 InlineeLines->clear(); 938 } 939 LLVM_DEBUG({ 940 dbgs() << "Merged Inlined lines for: " << Function->getName() << "\n"; 941 dbgs() << format("Debug lines: %d\n", CULines.size()); 942 for (const LVLine *Line : CULines) 943 dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", (" 944 << Line->getLineNumber() << ")\n"; 945 ; 946 }); 947 } 948 949 void LVBinaryReader::print(raw_ostream &OS) const { 950 OS << "LVBinaryReader\n"; 951 LLVM_DEBUG(dbgs() << "PrintReader\n"); 952 } 953