1 //===- DwarfTransformer.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/DebugInfo/DIContext.h" 10 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 11 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 12 #include "llvm/Support/Error.h" 13 #include "llvm/Support/ThreadPool.h" 14 #include "llvm/Support/raw_ostream.h" 15 16 #include "llvm/DebugInfo/GSYM/DwarfTransformer.h" 17 #include "llvm/DebugInfo/GSYM/FunctionInfo.h" 18 #include "llvm/DebugInfo/GSYM/GsymCreator.h" 19 #include "llvm/DebugInfo/GSYM/GsymReader.h" 20 #include "llvm/DebugInfo/GSYM/InlineInfo.h" 21 #include "llvm/DebugInfo/GSYM/OutputAggregator.h" 22 23 #include <optional> 24 25 using namespace llvm; 26 using namespace gsym; 27 28 struct llvm::gsym::CUInfo { 29 const DWARFDebugLine::LineTable *LineTable; 30 const char *CompDir; 31 std::vector<uint32_t> FileCache; 32 uint64_t Language = 0; 33 uint8_t AddrSize = 0; 34 35 CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) { 36 LineTable = DICtx.getLineTableForUnit(CU); 37 CompDir = CU->getCompilationDir(); 38 FileCache.clear(); 39 if (LineTable) 40 FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX); 41 DWARFDie Die = CU->getUnitDIE(); 42 Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0); 43 AddrSize = CU->getAddressByteSize(); 44 } 45 46 /// Return true if Addr is the highest address for a given compile unit. The 47 /// highest address is encoded as -1, of all ones in the address. These high 48 /// addresses are used by some linkers to indicate that a function has been 49 /// dead stripped or didn't end up in the linked executable. 50 bool isHighestAddress(uint64_t Addr) const { 51 if (AddrSize == 4) 52 return Addr == UINT32_MAX; 53 else if (AddrSize == 8) 54 return Addr == UINT64_MAX; 55 return false; 56 } 57 58 /// Convert a DWARF compile unit file index into a GSYM global file index. 59 /// 60 /// Each compile unit in DWARF has its own file table in the line table 61 /// prologue. GSYM has a single large file table that applies to all files 62 /// from all of the info in a GSYM file. This function converts between the 63 /// two and caches and DWARF CU file index that has already been converted so 64 /// the first client that asks for a compile unit file index will end up 65 /// doing the conversion, and subsequent clients will get the cached GSYM 66 /// index. 67 std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym, 68 uint32_t DwarfFileIdx) { 69 if (!LineTable || DwarfFileIdx >= FileCache.size()) 70 return std::nullopt; 71 uint32_t &GsymFileIdx = FileCache[DwarfFileIdx]; 72 if (GsymFileIdx != UINT32_MAX) 73 return GsymFileIdx; 74 std::string File; 75 if (LineTable->getFileNameByIndex( 76 DwarfFileIdx, CompDir, 77 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) 78 GsymFileIdx = Gsym.insertFile(File); 79 else 80 GsymFileIdx = 0; 81 return GsymFileIdx; 82 } 83 }; 84 85 86 static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) { 87 if (DWARFDie SpecDie = 88 Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) { 89 if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie)) 90 return SpecParent; 91 } 92 if (DWARFDie AbstDie = 93 Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) { 94 if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie)) 95 return AbstParent; 96 } 97 98 // We never want to follow parent for inlined subroutine - that would 99 // give us information about where the function is inlined, not what 100 // function is inlined 101 if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine) 102 return DWARFDie(); 103 104 DWARFDie ParentDie = Die.getParent(); 105 if (!ParentDie) 106 return DWARFDie(); 107 108 switch (ParentDie.getTag()) { 109 case dwarf::DW_TAG_namespace: 110 case dwarf::DW_TAG_structure_type: 111 case dwarf::DW_TAG_union_type: 112 case dwarf::DW_TAG_class_type: 113 case dwarf::DW_TAG_subprogram: 114 return ParentDie; // Found parent decl context DIE 115 case dwarf::DW_TAG_lexical_block: 116 return GetParentDeclContextDIE(ParentDie); 117 default: 118 break; 119 } 120 121 return DWARFDie(); 122 } 123 124 /// Get the GsymCreator string table offset for the qualified name for the 125 /// DIE passed in. This function will avoid making copies of any strings in 126 /// the GsymCreator when possible. We don't need to copy a string when the 127 /// string comes from our .debug_str section or is an inlined string in the 128 /// .debug_info. If we create a qualified name string in this function by 129 /// combining multiple strings in the DWARF string table or info, we will make 130 /// a copy of the string when we add it to the string table. 131 static std::optional<uint32_t> 132 getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) { 133 // If the dwarf has mangled name, use mangled name 134 if (auto LinkageName = Die.getLinkageName()) { 135 // We have seen cases were linkage name is actually empty. 136 if (strlen(LinkageName) > 0) 137 return Gsym.insertString(LinkageName, /* Copy */ false); 138 } 139 140 StringRef ShortName(Die.getName(DINameKind::ShortName)); 141 if (ShortName.empty()) 142 return std::nullopt; 143 144 // For C++ and ObjC, prepend names of all parent declaration contexts 145 if (!(Language == dwarf::DW_LANG_C_plus_plus || 146 Language == dwarf::DW_LANG_C_plus_plus_03 || 147 Language == dwarf::DW_LANG_C_plus_plus_11 || 148 Language == dwarf::DW_LANG_C_plus_plus_14 || 149 Language == dwarf::DW_LANG_ObjC_plus_plus || 150 // This should not be needed for C, but we see C++ code marked as C 151 // in some binaries. This should hurt, so let's do it for C as well 152 Language == dwarf::DW_LANG_C)) 153 return Gsym.insertString(ShortName, /* Copy */ false); 154 155 // Some GCC optimizations create functions with names ending with .isra.<num> 156 // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name 157 // If it looks like it could be the case, don't add any prefix 158 if (ShortName.starts_with("_Z") && 159 (ShortName.contains(".isra.") || ShortName.contains(".part."))) 160 return Gsym.insertString(ShortName, /* Copy */ false); 161 162 DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die); 163 if (ParentDeclCtxDie) { 164 std::string Name = ShortName.str(); 165 while (ParentDeclCtxDie) { 166 StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName)); 167 if (!ParentName.empty()) { 168 // "lambda" names are wrapped in < >. Replace with { } 169 // to be consistent with demangled names and not to confuse with 170 // templates 171 if (ParentName.front() == '<' && ParentName.back() == '>') 172 Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" + 173 "::" + Name; 174 else 175 Name = ParentName.str() + "::" + Name; 176 } 177 ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie); 178 } 179 // Copy the name since we created a new name in a std::string. 180 return Gsym.insertString(Name, /* Copy */ true); 181 } 182 // Don't copy the name since it exists in the DWARF object file. 183 return Gsym.insertString(ShortName, /* Copy */ false); 184 } 185 186 static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) { 187 bool CheckChildren = true; 188 switch (Die.getTag()) { 189 case dwarf::DW_TAG_subprogram: 190 // Don't look into functions within functions. 191 CheckChildren = Depth == 0; 192 break; 193 case dwarf::DW_TAG_inlined_subroutine: 194 return true; 195 default: 196 break; 197 } 198 if (!CheckChildren) 199 return false; 200 for (DWARFDie ChildDie : Die.children()) { 201 if (hasInlineInfo(ChildDie, Depth + 1)) 202 return true; 203 } 204 return false; 205 } 206 207 static AddressRanges 208 ConvertDWARFRanges(const DWARFAddressRangesVector &DwarfRanges) { 209 AddressRanges Ranges; 210 for (const DWARFAddressRange &DwarfRange : DwarfRanges) { 211 if (DwarfRange.LowPC < DwarfRange.HighPC) 212 Ranges.insert({DwarfRange.LowPC, DwarfRange.HighPC}); 213 } 214 return Ranges; 215 } 216 217 static void parseInlineInfo(GsymCreator &Gsym, OutputAggregator &Out, 218 CUInfo &CUI, DWARFDie Die, uint32_t Depth, 219 FunctionInfo &FI, InlineInfo &Parent, 220 const AddressRanges &AllParentRanges, 221 bool &WarnIfEmpty) { 222 if (!hasInlineInfo(Die, Depth)) 223 return; 224 225 dwarf::Tag Tag = Die.getTag(); 226 if (Tag == dwarf::DW_TAG_inlined_subroutine) { 227 // create new InlineInfo and append to parent.children 228 InlineInfo II; 229 AddressRanges AllInlineRanges; 230 Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges(); 231 if (RangesOrError) { 232 AllInlineRanges = ConvertDWARFRanges(RangesOrError.get()); 233 uint32_t EmptyCount = 0; 234 for (const AddressRange &InlineRange : AllInlineRanges) { 235 // Check for empty inline range in case inline function was outlined 236 // or has not code 237 if (InlineRange.empty()) { 238 ++EmptyCount; 239 } else { 240 if (Parent.Ranges.contains(InlineRange)) { 241 II.Ranges.insert(InlineRange); 242 } else { 243 // Only warn if the current inline range is not within any of all 244 // of the parent ranges. If we have a DW_TAG_subpgram with multiple 245 // ranges we will emit a FunctionInfo for each range of that 246 // function that only emits information within the current range, 247 // so we only want to emit an error if the DWARF has issues, not 248 // when a range currently just isn't in the range we are currently 249 // parsing for. 250 if (AllParentRanges.contains(InlineRange)) { 251 WarnIfEmpty = false; 252 } else 253 Out.Report("Function DIE has uncontained address range", 254 [&](raw_ostream &OS) { 255 OS << "error: inlined function DIE at " 256 << HEX32(Die.getOffset()) << " has a range [" 257 << HEX64(InlineRange.start()) << " - " 258 << HEX64(InlineRange.end()) 259 << ") that isn't contained in " 260 << "any parent address ranges, this inline range " 261 "will be " 262 "removed.\n"; 263 }); 264 } 265 } 266 } 267 // If we have all empty ranges for the inlines, then don't warn if we 268 // have an empty InlineInfo at the top level as all inline functions 269 // were elided. 270 if (EmptyCount == AllInlineRanges.size()) 271 WarnIfEmpty = false; 272 } 273 if (II.Ranges.empty()) 274 return; 275 276 if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym)) 277 II.Name = *NameIndex; 278 const uint64_t DwarfFileIdx = dwarf::toUnsigned( 279 Die.findRecursively(dwarf::DW_AT_call_file), UINT32_MAX); 280 std::optional<uint32_t> OptGSymFileIdx = 281 CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx); 282 if (OptGSymFileIdx) { 283 II.CallFile = OptGSymFileIdx.value(); 284 II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0); 285 // parse all children and append to parent 286 for (DWARFDie ChildDie : Die.children()) 287 parseInlineInfo(Gsym, Out, CUI, ChildDie, Depth + 1, FI, II, 288 AllInlineRanges, WarnIfEmpty); 289 Parent.Children.emplace_back(std::move(II)); 290 } else 291 Out.Report( 292 "Inlined function die has invlaid file index in DW_AT_call_file", 293 [&](raw_ostream &OS) { 294 OS << "error: inlined function DIE at " << HEX32(Die.getOffset()) 295 << " has an invalid file index " << DwarfFileIdx 296 << " in its DW_AT_call_file attribute, this inline entry and " 297 "all " 298 << "children will be removed.\n"; 299 }); 300 return; 301 } 302 if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) { 303 // skip this Die and just recurse down 304 for (DWARFDie ChildDie : Die.children()) 305 parseInlineInfo(Gsym, Out, CUI, ChildDie, Depth + 1, FI, Parent, 306 AllParentRanges, WarnIfEmpty); 307 } 308 } 309 310 static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI, 311 DWARFDie Die, GsymCreator &Gsym, 312 FunctionInfo &FI) { 313 std::vector<uint32_t> RowVector; 314 const uint64_t StartAddress = FI.startAddress(); 315 const uint64_t EndAddress = FI.endAddress(); 316 const uint64_t RangeSize = EndAddress - StartAddress; 317 const object::SectionedAddress SecAddress{ 318 StartAddress, object::SectionedAddress::UndefSection}; 319 320 // Attempt to retrieve DW_AT_LLVM_stmt_sequence if present. 321 std::optional<uint64_t> StmtSeqOffset; 322 if (auto StmtSeqAttr = Die.find(llvm::dwarf::DW_AT_LLVM_stmt_sequence)) { 323 // The `DW_AT_LLVM_stmt_sequence` attribute might be set to `UINT64_MAX` 324 // when it refers to an empty line sequence. In such cases, the DWARF linker 325 // will exclude the empty sequence from the final output and assign 326 // `UINT64_MAX` to the `DW_AT_LLVM_stmt_sequence` attribute. 327 uint64_t StmtSeqVal = dwarf::toSectionOffset(StmtSeqAttr, UINT64_MAX); 328 if (StmtSeqVal != UINT64_MAX) 329 StmtSeqOffset = StmtSeqVal; 330 } 331 332 if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector, 333 StmtSeqOffset)) { 334 // If we have a DW_TAG_subprogram but no line entries, fall back to using 335 // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes. 336 std::string FilePath = Die.getDeclFile( 337 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath); 338 if (FilePath.empty()) { 339 // If we had a DW_AT_decl_file, but got no file then we need to emit a 340 // warning. 341 Out.Report("Invalid file index in DW_AT_decl_file", [&](raw_ostream &OS) { 342 const uint64_t DwarfFileIdx = dwarf::toUnsigned( 343 Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX); 344 OS << "error: function DIE at " << HEX32(Die.getOffset()) 345 << " has an invalid file index " << DwarfFileIdx 346 << " in its DW_AT_decl_file attribute, unable to create a single " 347 << "line entry from the DW_AT_decl_file/DW_AT_decl_line " 348 << "attributes.\n"; 349 }); 350 return; 351 } 352 if (auto Line = 353 dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) { 354 LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line); 355 FI.OptLineTable = LineTable(); 356 FI.OptLineTable->push(LE); 357 } 358 return; 359 } 360 361 FI.OptLineTable = LineTable(); 362 DWARFDebugLine::Row PrevRow; 363 for (uint32_t RowIndex : RowVector) { 364 // Take file number and line/column from the row. 365 const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex]; 366 std::optional<uint32_t> OptFileIdx = 367 CUI.DWARFToGSYMFileIndex(Gsym, Row.File); 368 if (!OptFileIdx) { 369 Out.Report( 370 "Invalid file index in DWARF line table", [&](raw_ostream &OS) { 371 OS << "error: function DIE at " << HEX32(Die.getOffset()) << " has " 372 << "a line entry with invalid DWARF file index, this entry will " 373 << "be removed:\n"; 374 Row.dumpTableHeader(OS, /*Indent=*/0); 375 Row.dump(OS); 376 OS << "\n"; 377 }); 378 continue; 379 } 380 const uint32_t FileIdx = OptFileIdx.value(); 381 uint64_t RowAddress = Row.Address.Address; 382 // Watch out for a RowAddress that is in the middle of a line table entry 383 // in the DWARF. If we pass an address in between two line table entries 384 // we will get a RowIndex for the previous valid line table row which won't 385 // be contained in our function. This is usually a bug in the DWARF due to 386 // linker problems or LTO or other DWARF re-linking so it is worth emitting 387 // an error, but not worth stopping the creation of the GSYM. 388 if (!FI.Range.contains(RowAddress)) { 389 if (RowAddress < FI.Range.start()) { 390 Out.Report("Start address lies between valid Row table entries", 391 [&](raw_ostream &OS) { 392 OS << "error: DIE has a start address whose LowPC is " 393 "between the " 394 "line table Row[" 395 << RowIndex << "] with address " << HEX64(RowAddress) 396 << " and the next one.\n"; 397 Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); 398 }); 399 RowAddress = FI.Range.start(); 400 } else { 401 continue; 402 } 403 } 404 405 LineEntry LE(RowAddress, FileIdx, Row.Line); 406 if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) { 407 // We have seen full duplicate line tables for functions in some 408 // DWARF files. Watch for those here by checking the last 409 // row was the function's end address (HighPC) and that the 410 // current line table entry's address is the same as the first 411 // line entry we already have in our "function_info.Lines". If 412 // so break out after printing a warning. 413 auto FirstLE = FI.OptLineTable->first(); 414 if (FirstLE && *FirstLE == LE) 415 // if (Log && !Gsym.isQuiet()) { TODO <-- This looks weird 416 Out.Report("Duplicate line table detected", [&](raw_ostream &OS) { 417 OS << "warning: duplicate line table detected for DIE:\n"; 418 Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); 419 }); 420 else 421 Out.Report("Non-monotonically increasing addresses", 422 [&](raw_ostream &OS) { 423 OS << "error: line table has addresses that do not " 424 << "monotonically increase:\n"; 425 for (uint32_t RowIndex2 : RowVector) 426 CUI.LineTable->Rows[RowIndex2].dump(OS); 427 Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); 428 }); 429 break; 430 } 431 432 // Skip multiple line entries for the same file and line. 433 auto LastLE = FI.OptLineTable->last(); 434 if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line) 435 continue; 436 // Only push a row if it isn't an end sequence. End sequence markers are 437 // included for the last address in a function or the last contiguous 438 // address in a sequence. 439 if (Row.EndSequence) { 440 // End sequence means that the next line entry could have a lower address 441 // that the previous entries. So we clear the previous row so we don't 442 // trigger the line table error about address that do not monotonically 443 // increase. 444 PrevRow = DWARFDebugLine::Row(); 445 } else { 446 FI.OptLineTable->push(LE); 447 PrevRow = Row; 448 } 449 } 450 // If not line table rows were added, clear the line table so we don't encode 451 // on in the GSYM file. 452 if (FI.OptLineTable->empty()) 453 FI.OptLineTable = std::nullopt; 454 } 455 456 void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI, 457 DWARFDie Die) { 458 switch (Die.getTag()) { 459 case dwarf::DW_TAG_subprogram: { 460 Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges(); 461 if (!RangesOrError) { 462 consumeError(RangesOrError.takeError()); 463 break; 464 } 465 const DWARFAddressRangesVector &Ranges = RangesOrError.get(); 466 if (Ranges.empty()) 467 break; 468 auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym); 469 if (!NameIndex) { 470 Out.Report("Function has no name", [&](raw_ostream &OS) { 471 OS << "error: function at " << HEX64(Die.getOffset()) 472 << " has no name\n "; 473 Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); 474 }); 475 break; 476 } 477 // All ranges for the subprogram DIE in case it has multiple. We need to 478 // pass this down into parseInlineInfo so we don't warn about inline 479 // ranges that are not in the current subrange of a function when they 480 // actually are in another subgrange. We do this because when a function 481 // has discontiguos ranges, we create multiple function entries with only 482 // the info for that range contained inside of it. 483 AddressRanges AllSubprogramRanges = ConvertDWARFRanges(Ranges); 484 485 // Create a function_info for each range 486 for (const DWARFAddressRange &Range : Ranges) { 487 // The low PC must be less than the high PC. Many linkers don't remove 488 // DWARF for functions that don't get linked into the final executable. 489 // If both the high and low pc have relocations, linkers will often set 490 // the address values for both to the same value to indicate the function 491 // has been remove. Other linkers have been known to set the one or both 492 // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8 493 // byte addresses to indicate the function isn't valid. The check below 494 // tries to watch for these cases and abort if it runs into them. 495 if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC)) 496 break; 497 498 // Many linkers can't remove DWARF and might set the LowPC to zero. Since 499 // high PC can be an offset from the low PC in more recent DWARF versions 500 // we need to watch for a zero'ed low pc which we do using ValidTextRanges 501 // below. 502 if (!Gsym.IsValidTextAddress(Range.LowPC)) { 503 // We expect zero and -1 to be invalid addresses in DWARF depending 504 // on the linker of the DWARF. This indicates a function was stripped 505 // and the debug info wasn't able to be stripped from the DWARF. If 506 // the LowPC isn't zero or -1, then we should emit an error. 507 if (Range.LowPC != 0) { 508 if (!Gsym.isQuiet()) { 509 // Unexpected invalid address, emit a warning 510 Out.Report("Address range starts outside executable section", 511 [&](raw_ostream &OS) { 512 OS << "warning: DIE has an address range whose " 513 "start address " 514 "is not in any executable sections (" 515 << *Gsym.GetValidTextRanges() 516 << ") and will not be processed:\n"; 517 Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); 518 }); 519 } 520 } 521 break; 522 } 523 524 FunctionInfo FI; 525 FI.Range = {Range.LowPC, Range.HighPC}; 526 FI.Name = *NameIndex; 527 if (CUI.LineTable) 528 convertFunctionLineTable(Out, CUI, Die, Gsym, FI); 529 530 if (hasInlineInfo(Die, 0)) { 531 FI.Inline = InlineInfo(); 532 FI.Inline->Name = *NameIndex; 533 FI.Inline->Ranges.insert(FI.Range); 534 bool WarnIfEmpty = true; 535 parseInlineInfo(Gsym, Out, CUI, Die, 0, FI, *FI.Inline, 536 AllSubprogramRanges, WarnIfEmpty); 537 // Make sure we at least got some valid inline info other than just 538 // the top level function. If we didn't then remove the inline info 539 // from the function info. We have seen cases where LTO tries to modify 540 // the DWARF for functions and it messes up the address ranges for 541 // the inline functions so it is no longer valid. 542 // 543 // By checking if there are any valid children on the top level inline 544 // information object, we will know if we got anything valid from the 545 // debug info. 546 if (FI.Inline->Children.empty()) { 547 if (WarnIfEmpty && !Gsym.isQuiet()) 548 Out.Report("DIE contains inline functions with no valid ranges", 549 [&](raw_ostream &OS) { 550 OS << "warning: DIE contains inline function " 551 "information that has no valid ranges, removing " 552 "inline information:\n"; 553 Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); 554 }); 555 FI.Inline = std::nullopt; 556 } 557 } 558 559 // If dwarf-callsites flag is set, parse DW_TAG_call_site DIEs. 560 if (LoadDwarfCallSites) 561 parseCallSiteInfoFromDwarf(CUI, Die, FI); 562 563 Gsym.addFunctionInfo(std::move(FI)); 564 } 565 } break; 566 default: 567 break; 568 } 569 for (DWARFDie ChildDie : Die.children()) 570 handleDie(Out, CUI, ChildDie); 571 } 572 573 void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die, 574 FunctionInfo &FI) { 575 // Parse all DW_TAG_call_site DIEs that are children of this subprogram DIE. 576 // DWARF specification: 577 // - DW_TAG_call_site can have DW_AT_call_return_pc for return address offset. 578 // - DW_AT_call_origin might point to a DIE of the function being called. 579 // For simplicity, we will just extract return_offset and possibly target name 580 // if available. 581 582 CallSiteInfoCollection CSIC; 583 584 for (DWARFDie Child : Die.children()) { 585 if (Child.getTag() != dwarf::DW_TAG_call_site) 586 continue; 587 588 CallSiteInfo CSI; 589 // DW_AT_call_return_pc: the return PC (address). We'll convert it to 590 // offset relative to FI's start. 591 auto ReturnPC = 592 dwarf::toAddress(Child.findRecursively(dwarf::DW_AT_call_return_pc)); 593 if (!ReturnPC || !FI.Range.contains(*ReturnPC)) 594 continue; 595 596 CSI.ReturnOffset = *ReturnPC - FI.startAddress(); 597 598 // Attempt to get function name from DW_AT_call_origin. If present, we can 599 // insert it as a match regex. 600 if (DWARFDie OriginDie = 601 Child.getAttributeValueAsReferencedDie(dwarf::DW_AT_call_origin)) { 602 603 // Include the full unmangled name if available, otherwise the short name. 604 if (const char *LinkName = OriginDie.getLinkageName()) { 605 uint32_t LinkNameOff = Gsym.insertString(LinkName, /*Copy=*/false); 606 CSI.MatchRegex.push_back(LinkNameOff); 607 } else if (const char *ShortName = OriginDie.getShortName()) { 608 uint32_t ShortNameOff = Gsym.insertString(ShortName, /*Copy=*/false); 609 CSI.MatchRegex.push_back(ShortNameOff); 610 } 611 } 612 613 // For now, we won't attempt to deduce InternalCall/ExternalCall flags 614 // from DWARF. 615 CSI.Flags = CallSiteInfo::Flags::None; 616 617 CSIC.CallSites.push_back(CSI); 618 } 619 620 if (!CSIC.CallSites.empty()) { 621 if (!FI.CallSites) 622 FI.CallSites = CallSiteInfoCollection(); 623 // Append parsed DWARF callsites: 624 llvm::append_range(FI.CallSites->CallSites, CSIC.CallSites); 625 } 626 } 627 628 Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) { 629 size_t NumBefore = Gsym.getNumFunctionInfos(); 630 auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie { 631 DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false); 632 if (DwarfUnit.getDWOId()) { 633 DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit(); 634 if (!DWOCU->isDWOUnit()) 635 Out.Report( 636 "warning: Unable to retrieve DWO .debug_info section for some " 637 "object files. (Remove the --quiet flag for full output)", 638 [&](raw_ostream &OS) { 639 std::string DWOName = dwarf::toString( 640 DwarfUnit.getUnitDIE().find( 641 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 642 ""); 643 OS << "warning: Unable to retrieve DWO .debug_info section for " 644 << DWOName << "\n"; 645 }); 646 else { 647 ReturnDie = DWOCU->getUnitDIE(false); 648 } 649 } 650 return ReturnDie; 651 }; 652 if (NumThreads == 1) { 653 // Parse all DWARF data from this thread, use the same string/file table 654 // for everything 655 for (const auto &CU : DICtx.compile_units()) { 656 DWARFDie Die = getDie(*CU); 657 CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get())); 658 handleDie(Out, CUI, Die); 659 } 660 } else { 661 // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up 662 // front before we start accessing any DIEs since there might be 663 // cross compile unit references in the DWARF. If we don't do this we can 664 // end up crashing. 665 666 // We need to call getAbbreviations sequentially first so that getUnitDIE() 667 // only works with its local data. 668 for (const auto &CU : DICtx.compile_units()) 669 CU->getAbbreviations(); 670 671 // Now parse all DIEs in case we have cross compile unit references in a 672 // thread pool. 673 DefaultThreadPool pool(hardware_concurrency(NumThreads)); 674 for (const auto &CU : DICtx.compile_units()) 675 pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); }); 676 pool.wait(); 677 678 // Now convert all DWARF to GSYM in a thread pool. 679 std::mutex LogMutex; 680 for (const auto &CU : DICtx.compile_units()) { 681 DWARFDie Die = getDie(*CU); 682 if (Die) { 683 CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get())); 684 pool.async([this, CUI, &LogMutex, &Out, Die]() mutable { 685 std::string storage; 686 raw_string_ostream StrStream(storage); 687 OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr); 688 handleDie(ThreadOut, CUI, Die); 689 // Print ThreadLogStorage lines into an actual stream under a lock 690 std::lock_guard<std::mutex> guard(LogMutex); 691 if (Out.GetOS()) { 692 Out << storage; 693 } 694 Out.Merge(ThreadOut); 695 }); 696 } 697 } 698 pool.wait(); 699 } 700 size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore; 701 Out << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n"; 702 return Error::success(); 703 } 704 705 llvm::Error DwarfTransformer::verify(StringRef GsymPath, 706 OutputAggregator &Out) { 707 Out << "Verifying GSYM file \"" << GsymPath << "\":\n"; 708 709 auto Gsym = GsymReader::openFile(GsymPath); 710 if (!Gsym) 711 return Gsym.takeError(); 712 713 auto NumAddrs = Gsym->getNumAddresses(); 714 DILineInfoSpecifier DLIS( 715 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, 716 DILineInfoSpecifier::FunctionNameKind::LinkageName); 717 std::string gsymFilename; 718 for (uint32_t I = 0; I < NumAddrs; ++I) { 719 auto FuncAddr = Gsym->getAddress(I); 720 if (!FuncAddr) 721 return createStringError(std::errc::invalid_argument, 722 "failed to extract address[%i]", I); 723 724 auto FI = Gsym->getFunctionInfo(*FuncAddr); 725 if (!FI) 726 return createStringError( 727 std::errc::invalid_argument, 728 "failed to extract function info for address 0x%" PRIu64, *FuncAddr); 729 730 for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) { 731 const object::SectionedAddress SectAddr{ 732 Addr, object::SectionedAddress::UndefSection}; 733 auto LR = Gsym->lookup(Addr); 734 if (!LR) 735 return LR.takeError(); 736 737 auto DwarfInlineInfos = 738 DICtx.getInliningInfoForAddress(SectAddr, DLIS); 739 uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames(); 740 if (NumDwarfInlineInfos == 0) { 741 DwarfInlineInfos.addFrame( 742 DICtx.getLineInfoForAddress(SectAddr, DLIS).value_or(DILineInfo())); 743 } 744 745 // Check for 1 entry that has no file and line info 746 if (NumDwarfInlineInfos == 1 && 747 DwarfInlineInfos.getFrame(0).FileName == "<invalid>") { 748 DwarfInlineInfos = DIInliningInfo(); 749 NumDwarfInlineInfos = 0; 750 } 751 if (NumDwarfInlineInfos > 0 && 752 NumDwarfInlineInfos != LR->Locations.size()) { 753 if (Out.GetOS()) { 754 raw_ostream &Log = *Out.GetOS(); 755 Log << "error: address " << HEX64(Addr) << " has " 756 << NumDwarfInlineInfos << " DWARF inline frames and GSYM has " 757 << LR->Locations.size() << "\n"; 758 Log << " " << NumDwarfInlineInfos << " DWARF frames:\n"; 759 for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) { 760 const auto &dii = DwarfInlineInfos.getFrame(Idx); 761 Log << " [" << Idx << "]: " << dii.FunctionName << " @ " 762 << dii.FileName << ':' << dii.Line << '\n'; 763 } 764 Log << " " << LR->Locations.size() << " GSYM frames:\n"; 765 for (size_t Idx = 0, count = LR->Locations.size(); Idx < count; 766 ++Idx) { 767 const auto &gii = LR->Locations[Idx]; 768 Log << " [" << Idx << "]: " << gii.Name << " @ " << gii.Dir 769 << '/' << gii.Base << ':' << gii.Line << '\n'; 770 } 771 Gsym->dump(Log, *FI); 772 } 773 continue; 774 } 775 776 for (size_t Idx = 0, count = LR->Locations.size(); Idx < count; 777 ++Idx) { 778 const auto &gii = LR->Locations[Idx]; 779 if (Idx < NumDwarfInlineInfos) { 780 const auto &dii = DwarfInlineInfos.getFrame(Idx); 781 gsymFilename = LR->getSourceFile(Idx); 782 // Verify function name 783 if (!StringRef(dii.FunctionName).starts_with(gii.Name)) 784 Out << "error: address " << HEX64(Addr) << " DWARF function \"" 785 << dii.FunctionName.c_str() 786 << "\" doesn't match GSYM function \"" << gii.Name << "\"\n"; 787 788 // Verify source file path 789 if (dii.FileName != gsymFilename) 790 Out << "error: address " << HEX64(Addr) << " DWARF path \"" 791 << dii.FileName.c_str() << "\" doesn't match GSYM path \"" 792 << gsymFilename.c_str() << "\"\n"; 793 // Verify source file line 794 if (dii.Line != gii.Line) 795 Out << "error: address " << HEX64(Addr) << " DWARF line " 796 << dii.Line << " != GSYM line " << gii.Line << "\n"; 797 } 798 } 799 } 800 } 801 return Error::success(); 802 } 803