1 //===- GsymCreator.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 //===----------------------------------------------------------------------===// 7 8 #include "llvm/DebugInfo/GSYM/GsymCreator.h" 9 #include "llvm/DebugInfo/GSYM/FileWriter.h" 10 #include "llvm/DebugInfo/GSYM/Header.h" 11 #include "llvm/DebugInfo/GSYM/LineTable.h" 12 #include "llvm/MC/StringTableBuilder.h" 13 #include "llvm/Support/raw_ostream.h" 14 15 #include <algorithm> 16 #include <cassert> 17 #include <functional> 18 #include <vector> 19 20 using namespace llvm; 21 using namespace gsym; 22 23 GsymCreator::GsymCreator(bool Quiet) 24 : StrTab(StringTableBuilder::ELF), Quiet(Quiet) { 25 insertFile(StringRef()); 26 } 27 28 uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) { 29 llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); 30 llvm::StringRef filename = llvm::sys::path::filename(Path, Style); 31 // We must insert the strings first, then call the FileEntry constructor. 32 // If we inline the insertString() function call into the constructor, the 33 // call order is undefined due to parameter lists not having any ordering 34 // requirements. 35 const uint32_t Dir = insertString(directory); 36 const uint32_t Base = insertString(filename); 37 return insertFileEntry(FileEntry(Dir, Base)); 38 } 39 40 uint32_t GsymCreator::insertFileEntry(FileEntry FE) { 41 std::lock_guard<std::mutex> Guard(Mutex); 42 const auto NextIndex = Files.size(); 43 // Find FE in hash map and insert if not present. 44 auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); 45 if (R.second) 46 Files.emplace_back(FE); 47 return R.first->second; 48 } 49 50 uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) { 51 // File index zero is reserved for a FileEntry with no directory and no 52 // filename. Any other file and we need to copy the strings for the directory 53 // and filename. 54 if (FileIdx == 0) 55 return 0; 56 const FileEntry SrcFE = SrcGC.Files[FileIdx]; 57 // Copy the strings for the file and then add the newly converted file entry. 58 uint32_t Dir = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second); 59 uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second); 60 FileEntry DstFE(Dir, Base); 61 return insertFileEntry(DstFE); 62 } 63 64 llvm::Error GsymCreator::save(StringRef Path, llvm::endianness ByteOrder, 65 std::optional<uint64_t> SegmentSize) const { 66 if (SegmentSize) 67 return saveSegments(Path, ByteOrder, *SegmentSize); 68 std::error_code EC; 69 raw_fd_ostream OutStrm(Path, EC); 70 if (EC) 71 return llvm::errorCodeToError(EC); 72 FileWriter O(OutStrm, ByteOrder); 73 return encode(O); 74 } 75 76 llvm::Error GsymCreator::encode(FileWriter &O) const { 77 std::lock_guard<std::mutex> Guard(Mutex); 78 if (Funcs.empty()) 79 return createStringError(std::errc::invalid_argument, 80 "no functions to encode"); 81 if (!Finalized) 82 return createStringError(std::errc::invalid_argument, 83 "GsymCreator wasn't finalized prior to encoding"); 84 85 if (Funcs.size() > UINT32_MAX) 86 return createStringError(std::errc::invalid_argument, 87 "too many FunctionInfos"); 88 89 std::optional<uint64_t> BaseAddress = getBaseAddress(); 90 // Base address should be valid if we have any functions. 91 if (!BaseAddress) 92 return createStringError(std::errc::invalid_argument, 93 "invalid base address"); 94 Header Hdr; 95 Hdr.Magic = GSYM_MAGIC; 96 Hdr.Version = GSYM_VERSION; 97 Hdr.AddrOffSize = getAddressOffsetSize(); 98 Hdr.UUIDSize = static_cast<uint8_t>(UUID.size()); 99 Hdr.BaseAddress = *BaseAddress; 100 Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size()); 101 Hdr.StrtabOffset = 0; // We will fix this up later. 102 Hdr.StrtabSize = 0; // We will fix this up later. 103 memset(Hdr.UUID, 0, sizeof(Hdr.UUID)); 104 if (UUID.size() > sizeof(Hdr.UUID)) 105 return createStringError(std::errc::invalid_argument, 106 "invalid UUID size %u", (uint32_t)UUID.size()); 107 // Copy the UUID value if we have one. 108 if (UUID.size() > 0) 109 memcpy(Hdr.UUID, UUID.data(), UUID.size()); 110 // Write out the header. 111 llvm::Error Err = Hdr.encode(O); 112 if (Err) 113 return Err; 114 115 const uint64_t MaxAddressOffset = getMaxAddressOffset(); 116 // Write out the address offsets. 117 O.alignTo(Hdr.AddrOffSize); 118 for (const auto &FuncInfo : Funcs) { 119 uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; 120 // Make sure we calculated the address offsets byte size correctly by 121 // verifying the current address offset is within ranges. We have seen bugs 122 // introduced when the code changes that can cause problems here so it is 123 // good to catch this during testing. 124 assert(AddrOffset <= MaxAddressOffset); 125 (void)MaxAddressOffset; 126 switch (Hdr.AddrOffSize) { 127 case 1: 128 O.writeU8(static_cast<uint8_t>(AddrOffset)); 129 break; 130 case 2: 131 O.writeU16(static_cast<uint16_t>(AddrOffset)); 132 break; 133 case 4: 134 O.writeU32(static_cast<uint32_t>(AddrOffset)); 135 break; 136 case 8: 137 O.writeU64(AddrOffset); 138 break; 139 } 140 } 141 142 // Write out all zeros for the AddrInfoOffsets. 143 O.alignTo(4); 144 const off_t AddrInfoOffsetsOffset = O.tell(); 145 for (size_t i = 0, n = Funcs.size(); i < n; ++i) 146 O.writeU32(0); 147 148 // Write out the file table 149 O.alignTo(4); 150 assert(!Files.empty()); 151 assert(Files[0].Dir == 0); 152 assert(Files[0].Base == 0); 153 size_t NumFiles = Files.size(); 154 if (NumFiles > UINT32_MAX) 155 return createStringError(std::errc::invalid_argument, "too many files"); 156 O.writeU32(static_cast<uint32_t>(NumFiles)); 157 for (auto File : Files) { 158 O.writeU32(File.Dir); 159 O.writeU32(File.Base); 160 } 161 162 // Write out the string table. 163 const off_t StrtabOffset = O.tell(); 164 StrTab.write(O.get_stream()); 165 const off_t StrtabSize = O.tell() - StrtabOffset; 166 std::vector<uint32_t> AddrInfoOffsets; 167 168 // Write out the address infos for each function info. 169 for (const auto &FuncInfo : Funcs) { 170 if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O)) 171 AddrInfoOffsets.push_back(OffsetOrErr.get()); 172 else 173 return OffsetOrErr.takeError(); 174 } 175 // Fixup the string table offset and size in the header 176 O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); 177 O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); 178 179 // Fixup all address info offsets 180 uint64_t Offset = 0; 181 for (auto AddrInfoOffset : AddrInfoOffsets) { 182 O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); 183 Offset += 4; 184 } 185 return ErrorSuccess(); 186 } 187 188 llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { 189 std::lock_guard<std::mutex> Guard(Mutex); 190 if (Finalized) 191 return createStringError(std::errc::invalid_argument, "already finalized"); 192 Finalized = true; 193 194 // Don't let the string table indexes change by finalizing in order. 195 StrTab.finalizeInOrder(); 196 197 // Remove duplicates function infos that have both entries from debug info 198 // (DWARF or Breakpad) and entries from the SymbolTable. 199 // 200 // Also handle overlapping function. Usually there shouldn't be any, but they 201 // can and do happen in some rare cases. 202 // 203 // (a) (b) (c) 204 // ^ ^ ^ ^ 205 // |X |Y |X ^ |X 206 // | | | |Y | ^ 207 // | | | v v |Y 208 // v v v v 209 // 210 // In (a) and (b), Y is ignored and X will be reported for the full range. 211 // In (c), both functions will be included in the result and lookups for an 212 // address in the intersection will return Y because of binary search. 213 // 214 // Note that in case of (b), we cannot include Y in the result because then 215 // we wouldn't find any function for range (end of Y, end of X) 216 // with binary search 217 218 const auto NumBefore = Funcs.size(); 219 // Only sort and unique if this isn't a segment. If this is a segment we 220 // already finalized the main GsymCreator with all of the function infos 221 // and then the already sorted and uniqued function infos were added to this 222 // object. 223 if (!IsSegment) { 224 if (NumBefore > 1) { 225 // Sort function infos so we can emit sorted functions. 226 llvm::sort(Funcs); 227 std::vector<FunctionInfo> FinalizedFuncs; 228 FinalizedFuncs.reserve(Funcs.size()); 229 FinalizedFuncs.emplace_back(std::move(Funcs.front())); 230 for (size_t Idx=1; Idx < NumBefore; ++Idx) { 231 FunctionInfo &Prev = FinalizedFuncs.back(); 232 FunctionInfo &Curr = Funcs[Idx]; 233 // Empty ranges won't intersect, but we still need to 234 // catch the case where we have multiple symbols at the 235 // same address and coalesce them. 236 const bool ranges_equal = Prev.Range == Curr.Range; 237 if (ranges_equal || Prev.Range.intersects(Curr.Range)) { 238 // Overlapping ranges or empty identical ranges. 239 if (ranges_equal) { 240 // Same address range. Check if one is from debug 241 // info and the other is from a symbol table. If 242 // so, then keep the one with debug info. Our 243 // sorting guarantees that entries with matching 244 // address ranges that have debug info are last in 245 // the sort. 246 if (!(Prev == Curr)) { 247 if (Prev.hasRichInfo() && Curr.hasRichInfo()) { 248 if (!Quiet) { 249 OS << "warning: same address range contains " 250 "different debug " 251 << "info. Removing:\n" 252 << Prev << "\nIn favor of this one:\n" 253 << Curr << "\n"; 254 } 255 } 256 // We want to swap the current entry with the previous since 257 // later entries with the same range always have more debug info 258 // or different debug info. 259 std::swap(Prev, Curr); 260 } 261 } else { 262 if (!Quiet) { // print warnings about overlaps 263 OS << "warning: function ranges overlap:\n" 264 << Prev << "\n" 265 << Curr << "\n"; 266 } 267 FinalizedFuncs.emplace_back(std::move(Curr)); 268 } 269 } else { 270 if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) { 271 // Symbols on macOS don't have address ranges, so if the range 272 // doesn't match and the size is zero, then we replace the empty 273 // symbol function info with the current one. 274 std::swap(Prev, Curr); 275 } else { 276 FinalizedFuncs.emplace_back(std::move(Curr)); 277 } 278 } 279 } 280 std::swap(Funcs, FinalizedFuncs); 281 } 282 // If our last function info entry doesn't have a size and if we have valid 283 // text ranges, we should set the size of the last entry since any search for 284 // a high address might match our last entry. By fixing up this size, we can 285 // help ensure we don't cause lookups to always return the last symbol that 286 // has no size when doing lookups. 287 if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) { 288 if (auto Range = 289 ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) { 290 Funcs.back().Range = {Funcs.back().Range.start(), Range->end()}; 291 } 292 } 293 OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " 294 << Funcs.size() << " total\n"; 295 } 296 return Error::success(); 297 } 298 299 uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) { 300 // String offset at zero is always the empty string, no copying needed. 301 if (StrOff == 0) 302 return 0; 303 return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second); 304 } 305 306 uint32_t GsymCreator::insertString(StringRef S, bool Copy) { 307 if (S.empty()) 308 return 0; 309 310 // The hash can be calculated outside the lock. 311 CachedHashStringRef CHStr(S); 312 std::lock_guard<std::mutex> Guard(Mutex); 313 if (Copy) { 314 // We need to provide backing storage for the string if requested 315 // since StringTableBuilder stores references to strings. Any string 316 // that comes from a section in an object file doesn't need to be 317 // copied, but any string created by code will need to be copied. 318 // This allows GsymCreator to be really fast when parsing DWARF and 319 // other object files as most strings don't need to be copied. 320 if (!StrTab.contains(CHStr)) 321 CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(), 322 CHStr.hash()}; 323 } 324 const uint32_t StrOff = StrTab.add(CHStr); 325 // Save a mapping of string offsets to the cached string reference in case 326 // we need to segment the GSYM file and copy string from one string table to 327 // another. 328 if (StringOffsetMap.count(StrOff) == 0) 329 StringOffsetMap.insert(std::make_pair(StrOff, CHStr)); 330 return StrOff; 331 } 332 333 void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { 334 std::lock_guard<std::mutex> Guard(Mutex); 335 Funcs.emplace_back(std::move(FI)); 336 } 337 338 void GsymCreator::forEachFunctionInfo( 339 std::function<bool(FunctionInfo &)> const &Callback) { 340 std::lock_guard<std::mutex> Guard(Mutex); 341 for (auto &FI : Funcs) { 342 if (!Callback(FI)) 343 break; 344 } 345 } 346 347 void GsymCreator::forEachFunctionInfo( 348 std::function<bool(const FunctionInfo &)> const &Callback) const { 349 std::lock_guard<std::mutex> Guard(Mutex); 350 for (const auto &FI : Funcs) { 351 if (!Callback(FI)) 352 break; 353 } 354 } 355 356 size_t GsymCreator::getNumFunctionInfos() const { 357 std::lock_guard<std::mutex> Guard(Mutex); 358 return Funcs.size(); 359 } 360 361 bool GsymCreator::IsValidTextAddress(uint64_t Addr) const { 362 if (ValidTextRanges) 363 return ValidTextRanges->contains(Addr); 364 return true; // No valid text ranges has been set, so accept all ranges. 365 } 366 367 std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const { 368 // If we have finalized then Funcs are sorted. If we are a segment then 369 // Funcs will be sorted as well since function infos get added from an 370 // already finalized GsymCreator object where its functions were sorted and 371 // uniqued. 372 if ((Finalized || IsSegment) && !Funcs.empty()) 373 return std::optional<uint64_t>(Funcs.front().startAddress()); 374 return std::nullopt; 375 } 376 377 std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const { 378 // If we have finalized then Funcs are sorted. If we are a segment then 379 // Funcs will be sorted as well since function infos get added from an 380 // already finalized GsymCreator object where its functions were sorted and 381 // uniqued. 382 if ((Finalized || IsSegment) && !Funcs.empty()) 383 return std::optional<uint64_t>(Funcs.back().startAddress()); 384 return std::nullopt; 385 } 386 387 std::optional<uint64_t> GsymCreator::getBaseAddress() const { 388 if (BaseAddress) 389 return BaseAddress; 390 return getFirstFunctionAddress(); 391 } 392 393 uint64_t GsymCreator::getMaxAddressOffset() const { 394 switch (getAddressOffsetSize()) { 395 case 1: return UINT8_MAX; 396 case 2: return UINT16_MAX; 397 case 4: return UINT32_MAX; 398 case 8: return UINT64_MAX; 399 } 400 llvm_unreachable("invalid address offset"); 401 } 402 403 uint8_t GsymCreator::getAddressOffsetSize() const { 404 const std::optional<uint64_t> BaseAddress = getBaseAddress(); 405 const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress(); 406 if (BaseAddress && LastFuncAddr) { 407 const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress; 408 if (AddrDelta <= UINT8_MAX) 409 return 1; 410 else if (AddrDelta <= UINT16_MAX) 411 return 2; 412 else if (AddrDelta <= UINT32_MAX) 413 return 4; 414 return 8; 415 } 416 return 1; 417 } 418 419 uint64_t GsymCreator::calculateHeaderAndTableSize() const { 420 uint64_t Size = sizeof(Header); 421 const size_t NumFuncs = Funcs.size(); 422 // Add size of address offset table 423 Size += NumFuncs * getAddressOffsetSize(); 424 // Add size of address info offsets which are 32 bit integers in version 1. 425 Size += NumFuncs * sizeof(uint32_t); 426 // Add file table size 427 Size += Files.size() * sizeof(FileEntry); 428 // Add string table size 429 Size += StrTab.getSize(); 430 431 return Size; 432 } 433 434 // This function takes a InlineInfo class that was copy constructed from an 435 // InlineInfo from the \a SrcGC and updates all members that point to strings 436 // and files to point to strings and files from this GsymCreator. 437 void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) { 438 II.Name = copyString(SrcGC, II.Name); 439 II.CallFile = copyFile(SrcGC, II.CallFile); 440 for (auto &ChildII: II.Children) 441 fixupInlineInfo(SrcGC, ChildII); 442 } 443 444 uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) { 445 // To copy a function info we need to copy any files and strings over into 446 // this GsymCreator and then copy the function info and update the string 447 // table offsets to match the new offsets. 448 const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx]; 449 450 FunctionInfo DstFI; 451 DstFI.Range = SrcFI.Range; 452 DstFI.Name = copyString(SrcGC, SrcFI.Name); 453 // Copy the line table if there is one. 454 if (SrcFI.OptLineTable) { 455 // Copy the entire line table. 456 DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value()); 457 // Fixup all LineEntry::File entries which are indexes in the the file table 458 // from SrcGC and must be converted to file indexes from this GsymCreator. 459 LineTable &DstLT = DstFI.OptLineTable.value(); 460 const size_t NumLines = DstLT.size(); 461 for (size_t I=0; I<NumLines; ++I) { 462 LineEntry &LE = DstLT.get(I); 463 LE.File = copyFile(SrcGC, LE.File); 464 } 465 } 466 // Copy the inline information if needed. 467 if (SrcFI.Inline) { 468 // Make a copy of the source inline information. 469 DstFI.Inline = SrcFI.Inline.value(); 470 // Fixup all strings and files in the copied inline information. 471 fixupInlineInfo(SrcGC, *DstFI.Inline); 472 } 473 std::lock_guard<std::mutex> Guard(Mutex); 474 Funcs.emplace_back(DstFI); 475 return Funcs.back().cacheEncoding(); 476 } 477 478 llvm::Error GsymCreator::saveSegments(StringRef Path, 479 llvm::endianness ByteOrder, 480 uint64_t SegmentSize) const { 481 if (SegmentSize == 0) 482 return createStringError(std::errc::invalid_argument, 483 "invalid segment size zero"); 484 485 size_t FuncIdx = 0; 486 const size_t NumFuncs = Funcs.size(); 487 while (FuncIdx < NumFuncs) { 488 llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC = 489 createSegment(SegmentSize, FuncIdx); 490 if (ExpectedGC) { 491 GsymCreator *GC = ExpectedGC->get(); 492 if (GC == NULL) 493 break; // We had not more functions to encode. 494 raw_null_ostream ErrorStrm; 495 llvm::Error Err = GC->finalize(ErrorStrm); 496 if (Err) 497 return Err; 498 std::string SegmentedGsymPath; 499 raw_string_ostream SGP(SegmentedGsymPath); 500 std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress(); 501 if (FirstFuncAddr) { 502 SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1); 503 SGP.flush(); 504 Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt); 505 if (Err) 506 return Err; 507 } 508 } else { 509 return ExpectedGC.takeError(); 510 } 511 } 512 return Error::success(); 513 } 514 515 llvm::Expected<std::unique_ptr<GsymCreator>> 516 GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const { 517 // No function entries, return empty unique pointer 518 if (FuncIdx >= Funcs.size()) 519 return std::unique_ptr<GsymCreator>(); 520 521 std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true)); 522 523 // Tell the creator that this is a segment. 524 GC->setIsSegment(); 525 526 // Set the base address if there is one. 527 if (BaseAddress) 528 GC->setBaseAddress(*BaseAddress); 529 // Copy the UUID value from this object into the new creator. 530 GC->setUUID(UUID); 531 const size_t NumFuncs = Funcs.size(); 532 // Track how big the function infos are for the current segment so we can 533 // emit segments that are close to the requested size. It is quick math to 534 // determine the current header and tables sizes, so we can do that each loop. 535 uint64_t SegmentFuncInfosSize = 0; 536 for (; FuncIdx < NumFuncs; ++FuncIdx) { 537 const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize(); 538 if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) { 539 if (SegmentFuncInfosSize == 0) 540 return createStringError(std::errc::invalid_argument, 541 "a segment size of %" PRIu64 " is to small to " 542 "fit any function infos, specify a larger value", 543 SegmentSize); 544 545 break; 546 } 547 SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4); 548 } 549 return std::move(GC); 550 } 551