1 //===- GsymCreator.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 //===----------------------------------------------------------------------===// 7 8 #include "llvm/DebugInfo/GSYM/GsymCreator.h" 9 #include "llvm/DebugInfo/GSYM/FileWriter.h" 10 #include "llvm/DebugInfo/GSYM/Header.h" 11 #include "llvm/DebugInfo/GSYM/LineTable.h" 12 #include "llvm/DebugInfo/GSYM/OutputAggregator.h" 13 #include "llvm/MC/StringTableBuilder.h" 14 #include "llvm/Support/raw_ostream.h" 15 16 #include <algorithm> 17 #include <cassert> 18 #include <functional> 19 #include <vector> 20 21 using namespace llvm; 22 using namespace gsym; 23 24 GsymCreator::GsymCreator(bool Quiet) 25 : StrTab(StringTableBuilder::ELF), Quiet(Quiet) { 26 insertFile(StringRef()); 27 } 28 29 uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) { 30 llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); 31 llvm::StringRef filename = llvm::sys::path::filename(Path, Style); 32 // We must insert the strings first, then call the FileEntry constructor. 33 // If we inline the insertString() function call into the constructor, the 34 // call order is undefined due to parameter lists not having any ordering 35 // requirements. 36 const uint32_t Dir = insertString(directory); 37 const uint32_t Base = insertString(filename); 38 return insertFileEntry(FileEntry(Dir, Base)); 39 } 40 41 uint32_t GsymCreator::insertFileEntry(FileEntry FE) { 42 std::lock_guard<std::mutex> Guard(Mutex); 43 const auto NextIndex = Files.size(); 44 // Find FE in hash map and insert if not present. 45 auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); 46 if (R.second) 47 Files.emplace_back(FE); 48 return R.first->second; 49 } 50 51 uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) { 52 // File index zero is reserved for a FileEntry with no directory and no 53 // filename. Any other file and we need to copy the strings for the directory 54 // and filename. 55 if (FileIdx == 0) 56 return 0; 57 const FileEntry SrcFE = SrcGC.Files[FileIdx]; 58 // Copy the strings for the file and then add the newly converted file entry. 59 uint32_t Dir = 60 SrcFE.Dir == 0 61 ? 0 62 : StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second); 63 uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second); 64 FileEntry DstFE(Dir, Base); 65 return insertFileEntry(DstFE); 66 } 67 68 llvm::Error GsymCreator::save(StringRef Path, llvm::endianness ByteOrder, 69 std::optional<uint64_t> SegmentSize) const { 70 if (SegmentSize) 71 return saveSegments(Path, ByteOrder, *SegmentSize); 72 std::error_code EC; 73 raw_fd_ostream OutStrm(Path, EC); 74 if (EC) 75 return llvm::errorCodeToError(EC); 76 FileWriter O(OutStrm, ByteOrder); 77 return encode(O); 78 } 79 80 llvm::Error GsymCreator::encode(FileWriter &O) const { 81 std::lock_guard<std::mutex> Guard(Mutex); 82 if (Funcs.empty()) 83 return createStringError(std::errc::invalid_argument, 84 "no functions to encode"); 85 if (!Finalized) 86 return createStringError(std::errc::invalid_argument, 87 "GsymCreator wasn't finalized prior to encoding"); 88 89 if (Funcs.size() > UINT32_MAX) 90 return createStringError(std::errc::invalid_argument, 91 "too many FunctionInfos"); 92 93 std::optional<uint64_t> BaseAddress = getBaseAddress(); 94 // Base address should be valid if we have any functions. 95 if (!BaseAddress) 96 return createStringError(std::errc::invalid_argument, 97 "invalid base address"); 98 Header Hdr; 99 Hdr.Magic = GSYM_MAGIC; 100 Hdr.Version = GSYM_VERSION; 101 Hdr.AddrOffSize = getAddressOffsetSize(); 102 Hdr.UUIDSize = static_cast<uint8_t>(UUID.size()); 103 Hdr.BaseAddress = *BaseAddress; 104 Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size()); 105 Hdr.StrtabOffset = 0; // We will fix this up later. 106 Hdr.StrtabSize = 0; // We will fix this up later. 107 memset(Hdr.UUID, 0, sizeof(Hdr.UUID)); 108 if (UUID.size() > sizeof(Hdr.UUID)) 109 return createStringError(std::errc::invalid_argument, 110 "invalid UUID size %u", (uint32_t)UUID.size()); 111 // Copy the UUID value if we have one. 112 if (UUID.size() > 0) 113 memcpy(Hdr.UUID, UUID.data(), UUID.size()); 114 // Write out the header. 115 llvm::Error Err = Hdr.encode(O); 116 if (Err) 117 return Err; 118 119 const uint64_t MaxAddressOffset = getMaxAddressOffset(); 120 // Write out the address offsets. 121 O.alignTo(Hdr.AddrOffSize); 122 for (const auto &FuncInfo : Funcs) { 123 uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; 124 // Make sure we calculated the address offsets byte size correctly by 125 // verifying the current address offset is within ranges. We have seen bugs 126 // introduced when the code changes that can cause problems here so it is 127 // good to catch this during testing. 128 assert(AddrOffset <= MaxAddressOffset); 129 (void)MaxAddressOffset; 130 switch (Hdr.AddrOffSize) { 131 case 1: 132 O.writeU8(static_cast<uint8_t>(AddrOffset)); 133 break; 134 case 2: 135 O.writeU16(static_cast<uint16_t>(AddrOffset)); 136 break; 137 case 4: 138 O.writeU32(static_cast<uint32_t>(AddrOffset)); 139 break; 140 case 8: 141 O.writeU64(AddrOffset); 142 break; 143 } 144 } 145 146 // Write out all zeros for the AddrInfoOffsets. 147 O.alignTo(4); 148 const off_t AddrInfoOffsetsOffset = O.tell(); 149 for (size_t i = 0, n = Funcs.size(); i < n; ++i) 150 O.writeU32(0); 151 152 // Write out the file table 153 O.alignTo(4); 154 assert(!Files.empty()); 155 assert(Files[0].Dir == 0); 156 assert(Files[0].Base == 0); 157 size_t NumFiles = Files.size(); 158 if (NumFiles > UINT32_MAX) 159 return createStringError(std::errc::invalid_argument, "too many files"); 160 O.writeU32(static_cast<uint32_t>(NumFiles)); 161 for (auto File : Files) { 162 O.writeU32(File.Dir); 163 O.writeU32(File.Base); 164 } 165 166 // Write out the string table. 167 const off_t StrtabOffset = O.tell(); 168 StrTab.write(O.get_stream()); 169 const off_t StrtabSize = O.tell() - StrtabOffset; 170 std::vector<uint32_t> AddrInfoOffsets; 171 172 // Write out the address infos for each function info. 173 for (const auto &FuncInfo : Funcs) { 174 if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O)) 175 AddrInfoOffsets.push_back(OffsetOrErr.get()); 176 else 177 return OffsetOrErr.takeError(); 178 } 179 // Fixup the string table offset and size in the header 180 O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); 181 O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); 182 183 // Fixup all address info offsets 184 uint64_t Offset = 0; 185 for (auto AddrInfoOffset : AddrInfoOffsets) { 186 O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); 187 Offset += 4; 188 } 189 return ErrorSuccess(); 190 } 191 192 llvm::Error GsymCreator::finalize(OutputAggregator &Out) { 193 std::lock_guard<std::mutex> Guard(Mutex); 194 if (Finalized) 195 return createStringError(std::errc::invalid_argument, "already finalized"); 196 Finalized = true; 197 198 // Don't let the string table indexes change by finalizing in order. 199 StrTab.finalizeInOrder(); 200 201 // Remove duplicates function infos that have both entries from debug info 202 // (DWARF or Breakpad) and entries from the SymbolTable. 203 // 204 // Also handle overlapping function. Usually there shouldn't be any, but they 205 // can and do happen in some rare cases. 206 // 207 // (a) (b) (c) 208 // ^ ^ ^ ^ 209 // |X |Y |X ^ |X 210 // | | | |Y | ^ 211 // | | | v v |Y 212 // v v v v 213 // 214 // In (a) and (b), Y is ignored and X will be reported for the full range. 215 // In (c), both functions will be included in the result and lookups for an 216 // address in the intersection will return Y because of binary search. 217 // 218 // Note that in case of (b), we cannot include Y in the result because then 219 // we wouldn't find any function for range (end of Y, end of X) 220 // with binary search 221 222 const auto NumBefore = Funcs.size(); 223 // Only sort and unique if this isn't a segment. If this is a segment we 224 // already finalized the main GsymCreator with all of the function infos 225 // and then the already sorted and uniqued function infos were added to this 226 // object. 227 if (!IsSegment) { 228 if (NumBefore > 1) { 229 // Sort function infos so we can emit sorted functions. 230 llvm::sort(Funcs); 231 std::vector<FunctionInfo> FinalizedFuncs; 232 FinalizedFuncs.reserve(Funcs.size()); 233 FinalizedFuncs.emplace_back(std::move(Funcs.front())); 234 for (size_t Idx=1; Idx < NumBefore; ++Idx) { 235 FunctionInfo &Prev = FinalizedFuncs.back(); 236 FunctionInfo &Curr = Funcs[Idx]; 237 // Empty ranges won't intersect, but we still need to 238 // catch the case where we have multiple symbols at the 239 // same address and coalesce them. 240 const bool ranges_equal = Prev.Range == Curr.Range; 241 if (ranges_equal || Prev.Range.intersects(Curr.Range)) { 242 // Overlapping ranges or empty identical ranges. 243 if (ranges_equal) { 244 // Same address range. Check if one is from debug 245 // info and the other is from a symbol table. If 246 // so, then keep the one with debug info. Our 247 // sorting guarantees that entries with matching 248 // address ranges that have debug info are last in 249 // the sort. 250 if (!(Prev == Curr)) { 251 if (Prev.hasRichInfo() && Curr.hasRichInfo()) 252 Out.Report( 253 "Duplicate address ranges with different debug info.", 254 [&](raw_ostream &OS) { 255 OS << "warning: same address range contains " 256 "different debug " 257 << "info. Removing:\n" 258 << Prev << "\nIn favor of this one:\n" 259 << Curr << "\n"; 260 }); 261 262 // We want to swap the current entry with the previous since 263 // later entries with the same range always have more debug info 264 // or different debug info. 265 std::swap(Prev, Curr); 266 } 267 } else { 268 Out.Report("Overlapping function ranges", [&](raw_ostream &OS) { 269 // print warnings about overlaps 270 OS << "warning: function ranges overlap:\n" 271 << Prev << "\n" 272 << Curr << "\n"; 273 }); 274 FinalizedFuncs.emplace_back(std::move(Curr)); 275 } 276 } else { 277 if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) { 278 // Symbols on macOS don't have address ranges, so if the range 279 // doesn't match and the size is zero, then we replace the empty 280 // symbol function info with the current one. 281 std::swap(Prev, Curr); 282 } else { 283 FinalizedFuncs.emplace_back(std::move(Curr)); 284 } 285 } 286 } 287 std::swap(Funcs, FinalizedFuncs); 288 } 289 // If our last function info entry doesn't have a size and if we have valid 290 // text ranges, we should set the size of the last entry since any search for 291 // a high address might match our last entry. By fixing up this size, we can 292 // help ensure we don't cause lookups to always return the last symbol that 293 // has no size when doing lookups. 294 if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) { 295 if (auto Range = 296 ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) { 297 Funcs.back().Range = {Funcs.back().Range.start(), Range->end()}; 298 } 299 } 300 Out << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " 301 << Funcs.size() << " total\n"; 302 } 303 return Error::success(); 304 } 305 306 uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) { 307 // String offset at zero is always the empty string, no copying needed. 308 if (StrOff == 0) 309 return 0; 310 return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second); 311 } 312 313 uint32_t GsymCreator::insertString(StringRef S, bool Copy) { 314 if (S.empty()) 315 return 0; 316 317 // The hash can be calculated outside the lock. 318 CachedHashStringRef CHStr(S); 319 std::lock_guard<std::mutex> Guard(Mutex); 320 if (Copy) { 321 // We need to provide backing storage for the string if requested 322 // since StringTableBuilder stores references to strings. Any string 323 // that comes from a section in an object file doesn't need to be 324 // copied, but any string created by code will need to be copied. 325 // This allows GsymCreator to be really fast when parsing DWARF and 326 // other object files as most strings don't need to be copied. 327 if (!StrTab.contains(CHStr)) 328 CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(), 329 CHStr.hash()}; 330 } 331 const uint32_t StrOff = StrTab.add(CHStr); 332 // Save a mapping of string offsets to the cached string reference in case 333 // we need to segment the GSYM file and copy string from one string table to 334 // another. 335 if (StringOffsetMap.count(StrOff) == 0) 336 StringOffsetMap.insert(std::make_pair(StrOff, CHStr)); 337 return StrOff; 338 } 339 340 void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { 341 std::lock_guard<std::mutex> Guard(Mutex); 342 Funcs.emplace_back(std::move(FI)); 343 } 344 345 void GsymCreator::forEachFunctionInfo( 346 std::function<bool(FunctionInfo &)> const &Callback) { 347 std::lock_guard<std::mutex> Guard(Mutex); 348 for (auto &FI : Funcs) { 349 if (!Callback(FI)) 350 break; 351 } 352 } 353 354 void GsymCreator::forEachFunctionInfo( 355 std::function<bool(const FunctionInfo &)> const &Callback) const { 356 std::lock_guard<std::mutex> Guard(Mutex); 357 for (const auto &FI : Funcs) { 358 if (!Callback(FI)) 359 break; 360 } 361 } 362 363 size_t GsymCreator::getNumFunctionInfos() const { 364 std::lock_guard<std::mutex> Guard(Mutex); 365 return Funcs.size(); 366 } 367 368 bool GsymCreator::IsValidTextAddress(uint64_t Addr) const { 369 if (ValidTextRanges) 370 return ValidTextRanges->contains(Addr); 371 return true; // No valid text ranges has been set, so accept all ranges. 372 } 373 374 std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const { 375 // If we have finalized then Funcs are sorted. If we are a segment then 376 // Funcs will be sorted as well since function infos get added from an 377 // already finalized GsymCreator object where its functions were sorted and 378 // uniqued. 379 if ((Finalized || IsSegment) && !Funcs.empty()) 380 return std::optional<uint64_t>(Funcs.front().startAddress()); 381 return std::nullopt; 382 } 383 384 std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const { 385 // If we have finalized then Funcs are sorted. If we are a segment then 386 // Funcs will be sorted as well since function infos get added from an 387 // already finalized GsymCreator object where its functions were sorted and 388 // uniqued. 389 if ((Finalized || IsSegment) && !Funcs.empty()) 390 return std::optional<uint64_t>(Funcs.back().startAddress()); 391 return std::nullopt; 392 } 393 394 std::optional<uint64_t> GsymCreator::getBaseAddress() const { 395 if (BaseAddress) 396 return BaseAddress; 397 return getFirstFunctionAddress(); 398 } 399 400 uint64_t GsymCreator::getMaxAddressOffset() const { 401 switch (getAddressOffsetSize()) { 402 case 1: return UINT8_MAX; 403 case 2: return UINT16_MAX; 404 case 4: return UINT32_MAX; 405 case 8: return UINT64_MAX; 406 } 407 llvm_unreachable("invalid address offset"); 408 } 409 410 uint8_t GsymCreator::getAddressOffsetSize() const { 411 const std::optional<uint64_t> BaseAddress = getBaseAddress(); 412 const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress(); 413 if (BaseAddress && LastFuncAddr) { 414 const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress; 415 if (AddrDelta <= UINT8_MAX) 416 return 1; 417 else if (AddrDelta <= UINT16_MAX) 418 return 2; 419 else if (AddrDelta <= UINT32_MAX) 420 return 4; 421 return 8; 422 } 423 return 1; 424 } 425 426 uint64_t GsymCreator::calculateHeaderAndTableSize() const { 427 uint64_t Size = sizeof(Header); 428 const size_t NumFuncs = Funcs.size(); 429 // Add size of address offset table 430 Size += NumFuncs * getAddressOffsetSize(); 431 // Add size of address info offsets which are 32 bit integers in version 1. 432 Size += NumFuncs * sizeof(uint32_t); 433 // Add file table size 434 Size += Files.size() * sizeof(FileEntry); 435 // Add string table size 436 Size += StrTab.getSize(); 437 438 return Size; 439 } 440 441 // This function takes a InlineInfo class that was copy constructed from an 442 // InlineInfo from the \a SrcGC and updates all members that point to strings 443 // and files to point to strings and files from this GsymCreator. 444 void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) { 445 II.Name = copyString(SrcGC, II.Name); 446 II.CallFile = copyFile(SrcGC, II.CallFile); 447 for (auto &ChildII: II.Children) 448 fixupInlineInfo(SrcGC, ChildII); 449 } 450 451 uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) { 452 // To copy a function info we need to copy any files and strings over into 453 // this GsymCreator and then copy the function info and update the string 454 // table offsets to match the new offsets. 455 const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx]; 456 457 FunctionInfo DstFI; 458 DstFI.Range = SrcFI.Range; 459 DstFI.Name = copyString(SrcGC, SrcFI.Name); 460 // Copy the line table if there is one. 461 if (SrcFI.OptLineTable) { 462 // Copy the entire line table. 463 DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value()); 464 // Fixup all LineEntry::File entries which are indexes in the the file table 465 // from SrcGC and must be converted to file indexes from this GsymCreator. 466 LineTable &DstLT = DstFI.OptLineTable.value(); 467 const size_t NumLines = DstLT.size(); 468 for (size_t I=0; I<NumLines; ++I) { 469 LineEntry &LE = DstLT.get(I); 470 LE.File = copyFile(SrcGC, LE.File); 471 } 472 } 473 // Copy the inline information if needed. 474 if (SrcFI.Inline) { 475 // Make a copy of the source inline information. 476 DstFI.Inline = SrcFI.Inline.value(); 477 // Fixup all strings and files in the copied inline information. 478 fixupInlineInfo(SrcGC, *DstFI.Inline); 479 } 480 std::lock_guard<std::mutex> Guard(Mutex); 481 Funcs.emplace_back(DstFI); 482 return Funcs.back().cacheEncoding(); 483 } 484 485 llvm::Error GsymCreator::saveSegments(StringRef Path, 486 llvm::endianness ByteOrder, 487 uint64_t SegmentSize) const { 488 if (SegmentSize == 0) 489 return createStringError(std::errc::invalid_argument, 490 "invalid segment size zero"); 491 492 size_t FuncIdx = 0; 493 const size_t NumFuncs = Funcs.size(); 494 while (FuncIdx < NumFuncs) { 495 llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC = 496 createSegment(SegmentSize, FuncIdx); 497 if (ExpectedGC) { 498 GsymCreator *GC = ExpectedGC->get(); 499 if (GC == NULL) 500 break; // We had not more functions to encode. 501 // Don't collect any messages at all 502 OutputAggregator Out(nullptr); 503 llvm::Error Err = GC->finalize(Out); 504 if (Err) 505 return Err; 506 std::string SegmentedGsymPath; 507 raw_string_ostream SGP(SegmentedGsymPath); 508 std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress(); 509 if (FirstFuncAddr) { 510 SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1); 511 SGP.flush(); 512 Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt); 513 if (Err) 514 return Err; 515 } 516 } else { 517 return ExpectedGC.takeError(); 518 } 519 } 520 return Error::success(); 521 } 522 523 llvm::Expected<std::unique_ptr<GsymCreator>> 524 GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const { 525 // No function entries, return empty unique pointer 526 if (FuncIdx >= Funcs.size()) 527 return std::unique_ptr<GsymCreator>(); 528 529 std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true)); 530 531 // Tell the creator that this is a segment. 532 GC->setIsSegment(); 533 534 // Set the base address if there is one. 535 if (BaseAddress) 536 GC->setBaseAddress(*BaseAddress); 537 // Copy the UUID value from this object into the new creator. 538 GC->setUUID(UUID); 539 const size_t NumFuncs = Funcs.size(); 540 // Track how big the function infos are for the current segment so we can 541 // emit segments that are close to the requested size. It is quick math to 542 // determine the current header and tables sizes, so we can do that each loop. 543 uint64_t SegmentFuncInfosSize = 0; 544 for (; FuncIdx < NumFuncs; ++FuncIdx) { 545 const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize(); 546 if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) { 547 if (SegmentFuncInfosSize == 0) 548 return createStringError(std::errc::invalid_argument, 549 "a segment size of %" PRIu64 " is to small to " 550 "fit any function infos, specify a larger value", 551 SegmentSize); 552 553 break; 554 } 555 SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4); 556 } 557 return std::move(GC); 558 } 559