1 //===- GsymReader.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/DebugInfo/GSYM/GsymReader.h" 10 11 #include <assert.h> 12 #include <inttypes.h> 13 #include <stdio.h> 14 #include <stdlib.h> 15 16 #include "llvm/DebugInfo/GSYM/GsymCreator.h" 17 #include "llvm/DebugInfo/GSYM/InlineInfo.h" 18 #include "llvm/DebugInfo/GSYM/LineTable.h" 19 #include "llvm/Support/BinaryStreamReader.h" 20 #include "llvm/Support/DataExtractor.h" 21 #include "llvm/Support/MemoryBuffer.h" 22 23 using namespace llvm; 24 using namespace gsym; 25 26 GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer) 27 : MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {} 28 29 GsymReader::GsymReader(GsymReader &&RHS) = default; 30 31 GsymReader::~GsymReader() = default; 32 33 llvm::Expected<GsymReader> GsymReader::openFile(StringRef Filename) { 34 // Open the input file and return an appropriate error if needed. 35 ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr = 36 MemoryBuffer::getFileOrSTDIN(Filename); 37 auto Err = BuffOrErr.getError(); 38 if (Err) 39 return llvm::errorCodeToError(Err); 40 return create(BuffOrErr.get()); 41 } 42 43 llvm::Expected<GsymReader> GsymReader::copyBuffer(StringRef Bytes) { 44 auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes"); 45 return create(MemBuffer); 46 } 47 48 llvm::Expected<llvm::gsym::GsymReader> 49 GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) { 50 if (!MemBuffer) 51 return createStringError(std::errc::invalid_argument, 52 "invalid memory buffer"); 53 GsymReader GR(std::move(MemBuffer)); 54 llvm::Error Err = GR.parse(); 55 if (Err) 56 return std::move(Err); 57 return std::move(GR); 58 } 59 60 llvm::Error 61 GsymReader::parse() { 62 BinaryStreamReader FileData(MemBuffer->getBuffer(), llvm::endianness::native); 63 // Check for the magic bytes. This file format is designed to be mmap'ed 64 // into a process and accessed as read only. This is done for performance 65 // and efficiency for symbolicating and parsing GSYM data. 66 if (FileData.readObject(Hdr)) 67 return createStringError(std::errc::invalid_argument, 68 "not enough data for a GSYM header"); 69 70 const auto HostByteOrder = llvm::endianness::native; 71 switch (Hdr->Magic) { 72 case GSYM_MAGIC: 73 Endian = HostByteOrder; 74 break; 75 case GSYM_CIGAM: 76 // This is a GSYM file, but not native endianness. 77 Endian = sys::IsBigEndianHost ? llvm::endianness::little 78 : llvm::endianness::big; 79 Swap.reset(new SwappedData); 80 break; 81 default: 82 return createStringError(std::errc::invalid_argument, 83 "not a GSYM file"); 84 } 85 86 bool DataIsLittleEndian = HostByteOrder != llvm::endianness::little; 87 // Read a correctly byte swapped header if we need to. 88 if (Swap) { 89 DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); 90 if (auto ExpectedHdr = Header::decode(Data)) 91 Swap->Hdr = ExpectedHdr.get(); 92 else 93 return ExpectedHdr.takeError(); 94 Hdr = &Swap->Hdr; 95 } 96 97 // Detect errors in the header and report any that are found. If we make it 98 // past this without errors, we know we have a good magic value, a supported 99 // version number, verified address offset size and a valid UUID size. 100 if (Error Err = Hdr->checkForError()) 101 return Err; 102 103 if (!Swap) { 104 // This is the native endianness case that is most common and optimized for 105 // efficient lookups. Here we just grab pointers to the native data and 106 // use ArrayRef objects to allow efficient read only access. 107 108 // Read the address offsets. 109 if (FileData.padToAlignment(Hdr->AddrOffSize) || 110 FileData.readArray(AddrOffsets, 111 Hdr->NumAddresses * Hdr->AddrOffSize)) 112 return createStringError(std::errc::invalid_argument, 113 "failed to read address table"); 114 115 // Read the address info offsets. 116 if (FileData.padToAlignment(4) || 117 FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses)) 118 return createStringError(std::errc::invalid_argument, 119 "failed to read address info offsets table"); 120 121 // Read the file table. 122 uint32_t NumFiles = 0; 123 if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles)) 124 return createStringError(std::errc::invalid_argument, 125 "failed to read file table"); 126 127 // Get the string table. 128 FileData.setOffset(Hdr->StrtabOffset); 129 if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize)) 130 return createStringError(std::errc::invalid_argument, 131 "failed to read string table"); 132 } else { 133 // This is the non native endianness case that is not common and not 134 // optimized for lookups. Here we decode the important tables into local 135 // storage and then set the ArrayRef objects to point to these swapped 136 // copies of the read only data so lookups can be as efficient as possible. 137 DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); 138 139 // Read the address offsets. 140 uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize); 141 Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize); 142 switch (Hdr->AddrOffSize) { 143 case 1: 144 if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses)) 145 return createStringError(std::errc::invalid_argument, 146 "failed to read address table"); 147 break; 148 case 2: 149 if (!Data.getU16(&Offset, 150 reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()), 151 Hdr->NumAddresses)) 152 return createStringError(std::errc::invalid_argument, 153 "failed to read address table"); 154 break; 155 case 4: 156 if (!Data.getU32(&Offset, 157 reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()), 158 Hdr->NumAddresses)) 159 return createStringError(std::errc::invalid_argument, 160 "failed to read address table"); 161 break; 162 case 8: 163 if (!Data.getU64(&Offset, 164 reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()), 165 Hdr->NumAddresses)) 166 return createStringError(std::errc::invalid_argument, 167 "failed to read address table"); 168 } 169 AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets); 170 171 // Read the address info offsets. 172 Offset = alignTo(Offset, 4); 173 Swap->AddrInfoOffsets.resize(Hdr->NumAddresses); 174 if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses)) 175 AddrInfoOffsets = ArrayRef<uint32_t>(Swap->AddrInfoOffsets); 176 else 177 return createStringError(std::errc::invalid_argument, 178 "failed to read address table"); 179 // Read the file table. 180 const uint32_t NumFiles = Data.getU32(&Offset); 181 if (NumFiles > 0) { 182 Swap->Files.resize(NumFiles); 183 if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2)) 184 Files = ArrayRef<FileEntry>(Swap->Files); 185 else 186 return createStringError(std::errc::invalid_argument, 187 "failed to read file table"); 188 } 189 // Get the string table. 190 StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset, 191 Hdr->StrtabSize); 192 if (StrTab.Data.empty()) 193 return createStringError(std::errc::invalid_argument, 194 "failed to read string table"); 195 } 196 return Error::success(); 197 198 } 199 200 const Header &GsymReader::getHeader() const { 201 // The only way to get a GsymReader is from GsymReader::openFile(...) or 202 // GsymReader::copyBuffer() and the header must be valid and initialized to 203 // a valid pointer value, so the assert below should not trigger. 204 assert(Hdr); 205 return *Hdr; 206 } 207 208 std::optional<uint64_t> GsymReader::getAddress(size_t Index) const { 209 switch (Hdr->AddrOffSize) { 210 case 1: return addressForIndex<uint8_t>(Index); 211 case 2: return addressForIndex<uint16_t>(Index); 212 case 4: return addressForIndex<uint32_t>(Index); 213 case 8: return addressForIndex<uint64_t>(Index); 214 } 215 return std::nullopt; 216 } 217 218 std::optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const { 219 const auto NumAddrInfoOffsets = AddrInfoOffsets.size(); 220 if (Index < NumAddrInfoOffsets) 221 return AddrInfoOffsets[Index]; 222 return std::nullopt; 223 } 224 225 Expected<uint64_t> 226 GsymReader::getAddressIndex(const uint64_t Addr) const { 227 if (Addr >= Hdr->BaseAddress) { 228 const uint64_t AddrOffset = Addr - Hdr->BaseAddress; 229 std::optional<uint64_t> AddrOffsetIndex; 230 switch (Hdr->AddrOffSize) { 231 case 1: 232 AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset); 233 break; 234 case 2: 235 AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset); 236 break; 237 case 4: 238 AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset); 239 break; 240 case 8: 241 AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset); 242 break; 243 default: 244 return createStringError(std::errc::invalid_argument, 245 "unsupported address offset size %u", 246 Hdr->AddrOffSize); 247 } 248 if (AddrOffsetIndex) 249 return *AddrOffsetIndex; 250 } 251 return createStringError(std::errc::invalid_argument, 252 "address 0x%" PRIx64 " is not in GSYM", Addr); 253 254 } 255 256 llvm::Expected<DataExtractor> 257 GsymReader::getFunctionInfoDataForAddress(uint64_t Addr, 258 uint64_t &FuncStartAddr) const { 259 Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr); 260 if (!ExpectedAddrIdx) 261 return ExpectedAddrIdx.takeError(); 262 const uint64_t FirstAddrIdx = *ExpectedAddrIdx; 263 // The AddrIdx is the first index of the function info entries that match 264 // \a Addr. We need to iterate over all function info objects that start with 265 // the same address until we find a range that contains \a Addr. 266 std::optional<uint64_t> FirstFuncStartAddr; 267 const size_t NumAddresses = getNumAddresses(); 268 for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) { 269 auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr); 270 // If there was an error, return the error. 271 if (!ExpextedData) 272 return ExpextedData; 273 274 // Remember the first function start address if it hasn't already been set. 275 // If it is already valid, check to see if it matches the first function 276 // start address and only continue if it matches. 277 if (FirstFuncStartAddr.has_value()) { 278 if (*FirstFuncStartAddr != FuncStartAddr) 279 break; // Done with consecutive function entries with same address. 280 } else { 281 FirstFuncStartAddr = FuncStartAddr; 282 } 283 // Make sure the current function address ranges contains \a Addr. 284 // Some symbols on Darwin don't have valid sizes, so if we run into a 285 // symbol with zero size, then we have found a match for our address. 286 287 // The first thing the encoding of a FunctionInfo object is the function 288 // size. 289 uint64_t Offset = 0; 290 uint32_t FuncSize = ExpextedData->getU32(&Offset); 291 if (FuncSize == 0 || 292 AddressRange(FuncStartAddr, FuncStartAddr + FuncSize).contains(Addr)) 293 return ExpextedData; 294 } 295 return createStringError(std::errc::invalid_argument, 296 "address 0x%" PRIx64 " is not in GSYM", Addr); 297 } 298 299 llvm::Expected<DataExtractor> 300 GsymReader::getFunctionInfoDataAtIndex(uint64_t AddrIdx, 301 uint64_t &FuncStartAddr) const { 302 if (AddrIdx >= getNumAddresses()) 303 return createStringError(std::errc::invalid_argument, 304 "invalid address index %" PRIu64, AddrIdx); 305 const uint32_t AddrInfoOffset = AddrInfoOffsets[AddrIdx]; 306 assert((Endian == endianness::big || Endian == endianness::little) && 307 "Endian must be either big or little"); 308 StringRef Bytes = MemBuffer->getBuffer().substr(AddrInfoOffset); 309 if (Bytes.empty()) 310 return createStringError(std::errc::invalid_argument, 311 "invalid address info offset 0x%" PRIx32, 312 AddrInfoOffset); 313 std::optional<uint64_t> OptFuncStartAddr = getAddress(AddrIdx); 314 if (!OptFuncStartAddr) 315 return createStringError(std::errc::invalid_argument, 316 "failed to extract address[%" PRIu64 "]", AddrIdx); 317 FuncStartAddr = *OptFuncStartAddr; 318 return DataExtractor(Bytes, Endian == llvm::endianness::little, 4); 319 } 320 321 llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const { 322 uint64_t FuncStartAddr = 0; 323 if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr)) 324 return FunctionInfo::decode(*ExpectedData, FuncStartAddr); 325 else 326 return ExpectedData.takeError(); 327 } 328 329 llvm::Expected<FunctionInfo> 330 GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const { 331 uint64_t FuncStartAddr = 0; 332 if (auto ExpectedData = getFunctionInfoDataAtIndex(Idx, FuncStartAddr)) 333 return FunctionInfo::decode(*ExpectedData, FuncStartAddr); 334 else 335 return ExpectedData.takeError(); 336 } 337 338 llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const { 339 uint64_t FuncStartAddr = 0; 340 if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr)) 341 return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr); 342 else 343 return ExpectedData.takeError(); 344 } 345 346 void GsymReader::dump(raw_ostream &OS) { 347 const auto &Header = getHeader(); 348 // Dump the GSYM header. 349 OS << Header << "\n"; 350 // Dump the address table. 351 OS << "Address Table:\n"; 352 OS << "INDEX OFFSET"; 353 354 switch (Hdr->AddrOffSize) { 355 case 1: OS << "8 "; break; 356 case 2: OS << "16"; break; 357 case 4: OS << "32"; break; 358 case 8: OS << "64"; break; 359 default: OS << "??"; break; 360 } 361 OS << " (ADDRESS)\n"; 362 OS << "====== =============================== \n"; 363 for (uint32_t I = 0; I < Header.NumAddresses; ++I) { 364 OS << format("[%4u] ", I); 365 switch (Hdr->AddrOffSize) { 366 case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break; 367 case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break; 368 case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break; 369 case 8: OS << HEX32(getAddrOffsets<uint64_t>()[I]); break; 370 default: break; 371 } 372 OS << " (" << HEX64(*getAddress(I)) << ")\n"; 373 } 374 // Dump the address info offsets table. 375 OS << "\nAddress Info Offsets:\n"; 376 OS << "INDEX Offset\n"; 377 OS << "====== ==========\n"; 378 for (uint32_t I = 0; I < Header.NumAddresses; ++I) 379 OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n"; 380 // Dump the file table. 381 OS << "\nFiles:\n"; 382 OS << "INDEX DIRECTORY BASENAME PATH\n"; 383 OS << "====== ========== ========== ==============================\n"; 384 for (uint32_t I = 0; I < Files.size(); ++I) { 385 OS << format("[%4u] ", I) << HEX32(Files[I].Dir) << ' ' 386 << HEX32(Files[I].Base) << ' '; 387 dump(OS, getFile(I)); 388 OS << "\n"; 389 } 390 OS << "\n" << StrTab << "\n"; 391 392 for (uint32_t I = 0; I < Header.NumAddresses; ++I) { 393 OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": "; 394 if (auto FI = getFunctionInfoAtIndex(I)) 395 dump(OS, *FI); 396 else 397 logAllUnhandledErrors(FI.takeError(), OS, "FunctionInfo:"); 398 } 399 } 400 401 void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI) { 402 OS << FI.Range << " \"" << getString(FI.Name) << "\"\n"; 403 if (FI.OptLineTable) 404 dump(OS, *FI.OptLineTable); 405 if (FI.Inline) 406 dump(OS, *FI.Inline); 407 } 408 409 void GsymReader::dump(raw_ostream &OS, const LineTable <) { 410 OS << "LineTable:\n"; 411 for (auto &LE: LT) { 412 OS << " " << HEX64(LE.Addr) << ' '; 413 if (LE.File) 414 dump(OS, getFile(LE.File)); 415 OS << ':' << LE.Line << '\n'; 416 } 417 } 418 419 void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) { 420 if (Indent == 0) 421 OS << "InlineInfo:\n"; 422 else 423 OS.indent(Indent); 424 OS << II.Ranges << ' ' << getString(II.Name); 425 if (II.CallFile != 0) { 426 if (auto File = getFile(II.CallFile)) { 427 OS << " called from "; 428 dump(OS, File); 429 OS << ':' << II.CallLine; 430 } 431 } 432 OS << '\n'; 433 for (const auto &ChildII: II.Children) 434 dump(OS, ChildII, Indent + 2); 435 } 436 437 void GsymReader::dump(raw_ostream &OS, std::optional<FileEntry> FE) { 438 if (FE) { 439 // IF we have the file from index 0, then don't print anything 440 if (FE->Dir == 0 && FE->Base == 0) 441 return; 442 StringRef Dir = getString(FE->Dir); 443 StringRef Base = getString(FE->Base); 444 if (!Dir.empty()) { 445 OS << Dir; 446 if (Dir.contains('\\') && !Dir.contains('/')) 447 OS << '\\'; 448 else 449 OS << '/'; 450 } 451 if (!Base.empty()) { 452 OS << Base; 453 } 454 if (!Dir.empty() || !Base.empty()) 455 return; 456 } 457 OS << "<invalid-file>"; 458 } 459