1 //===-- BreakpadRecords.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h" 10 #include "lldb/lldb-defines.h" 11 #include "llvm/ADT/StringExtras.h" 12 #include "llvm/ADT/StringSwitch.h" 13 #include "llvm/Support/Endian.h" 14 #include "llvm/Support/FormatVariadic.h" 15 #include <optional> 16 17 using namespace lldb_private; 18 using namespace lldb_private::breakpad; 19 20 namespace { 21 enum class Token { 22 Unknown, 23 Module, 24 Info, 25 CodeID, 26 File, 27 Func, 28 Inline, 29 InlineOrigin, 30 Public, 31 Stack, 32 CFI, 33 Init, 34 Win, 35 }; 36 } 37 38 template<typename T> 39 static T stringTo(llvm::StringRef Str); 40 41 template <> Token stringTo<Token>(llvm::StringRef Str) { 42 return llvm::StringSwitch<Token>(Str) 43 .Case("MODULE", Token::Module) 44 .Case("INFO", Token::Info) 45 .Case("CODE_ID", Token::CodeID) 46 .Case("FILE", Token::File) 47 .Case("FUNC", Token::Func) 48 .Case("INLINE", Token::Inline) 49 .Case("INLINE_ORIGIN", Token::InlineOrigin) 50 .Case("PUBLIC", Token::Public) 51 .Case("STACK", Token::Stack) 52 .Case("CFI", Token::CFI) 53 .Case("INIT", Token::Init) 54 .Case("WIN", Token::Win) 55 .Default(Token::Unknown); 56 } 57 58 template <> 59 llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) { 60 using llvm::Triple; 61 return llvm::StringSwitch<Triple::OSType>(Str) 62 .Case("Linux", Triple::Linux) 63 .Case("mac", Triple::MacOSX) 64 .Case("windows", Triple::Win32) 65 .Default(Triple::UnknownOS); 66 } 67 68 template <> 69 llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) { 70 using llvm::Triple; 71 return llvm::StringSwitch<Triple::ArchType>(Str) 72 .Case("arm", Triple::arm) 73 .Cases("arm64", "arm64e", Triple::aarch64) 74 .Case("mips", Triple::mips) 75 .Case("msp430", Triple::msp430) 76 .Case("ppc", Triple::ppc) 77 .Case("ppc64", Triple::ppc64) 78 .Case("s390", Triple::systemz) 79 .Case("sparc", Triple::sparc) 80 .Case("sparcv9", Triple::sparcv9) 81 .Case("x86", Triple::x86) 82 .Cases("x86_64", "x86_64h", Triple::x86_64) 83 .Default(Triple::UnknownArch); 84 } 85 86 template<typename T> 87 static T consume(llvm::StringRef &Str) { 88 llvm::StringRef Token; 89 std::tie(Token, Str) = getToken(Str); 90 return stringTo<T>(Token); 91 } 92 93 /// Return the number of hex digits needed to encode an (POD) object of a given 94 /// type. 95 template <typename T> static constexpr size_t hex_digits() { 96 return 2 * sizeof(T); 97 } 98 99 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) { 100 struct data_t { 101 using uuid_t = uint8_t[16]; 102 uuid_t uuid; 103 llvm::support::ubig32_t age; 104 } data; 105 static_assert(sizeof(data) == 20); 106 // The textual module id encoding should be between 33 and 40 bytes long, 107 // depending on the size of the age field, which is of variable length. 108 // The first three chunks of the id are encoded in big endian, so we need to 109 // byte-swap those. 110 if (str.size() <= hex_digits<data_t::uuid_t>() || 111 str.size() > hex_digits<data_t>()) 112 return UUID(); 113 if (!all_of(str, llvm::isHexDigit)) 114 return UUID(); 115 116 llvm::StringRef uuid_str = str.take_front(hex_digits<data_t::uuid_t>()); 117 llvm::StringRef age_str = str.drop_front(hex_digits<data_t::uuid_t>()); 118 119 llvm::copy(fromHex(uuid_str), data.uuid); 120 uint32_t age; 121 bool success = to_integer(age_str, age, 16); 122 assert(success); 123 UNUSED_IF_ASSERT_DISABLED(success); 124 data.age = age; 125 126 // On non-windows, the age field should always be zero, so we don't include to 127 // match the native uuid format of these platforms. 128 return UUID(&data, os == llvm::Triple::Win32 ? sizeof(data) 129 : sizeof(data.uuid)); 130 } 131 132 std::optional<Record::Kind> Record::classify(llvm::StringRef Line) { 133 Token Tok = consume<Token>(Line); 134 switch (Tok) { 135 case Token::Module: 136 return Record::Module; 137 case Token::Info: 138 return Record::Info; 139 case Token::File: 140 return Record::File; 141 case Token::Func: 142 return Record::Func; 143 case Token::Public: 144 return Record::Public; 145 case Token::Stack: 146 Tok = consume<Token>(Line); 147 switch (Tok) { 148 case Token::CFI: 149 return Record::StackCFI; 150 case Token::Win: 151 return Record::StackWin; 152 default: 153 return std::nullopt; 154 } 155 case Token::Inline: 156 return Record::Inline; 157 case Token::InlineOrigin: 158 return Record::InlineOrigin; 159 case Token::Unknown: 160 // Optimistically assume that any unrecognised token means this is a line 161 // record, those don't have a special keyword and start directly with a 162 // hex number. 163 return Record::Line; 164 165 case Token::CodeID: 166 case Token::CFI: 167 case Token::Init: 168 case Token::Win: 169 // These should never appear at the start of a valid record. 170 return std::nullopt; 171 } 172 llvm_unreachable("Fully covered switch above!"); 173 } 174 175 std::optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) { 176 // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out 177 if (consume<Token>(Line) != Token::Module) 178 return std::nullopt; 179 180 llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line); 181 if (OS == llvm::Triple::UnknownOS) 182 return std::nullopt; 183 184 llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line); 185 if (Arch == llvm::Triple::UnknownArch) 186 return std::nullopt; 187 188 llvm::StringRef Str; 189 std::tie(Str, Line) = getToken(Line); 190 UUID ID = parseModuleId(OS, Str); 191 if (!ID) 192 return std::nullopt; 193 194 return ModuleRecord(OS, Arch, std::move(ID)); 195 } 196 197 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 198 const ModuleRecord &R) { 199 return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " " 200 << llvm::Triple::getArchTypeName(R.Arch) << " " 201 << R.ID.GetAsString(); 202 } 203 204 std::optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) { 205 // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe] 206 if (consume<Token>(Line) != Token::Info) 207 return std::nullopt; 208 209 if (consume<Token>(Line) != Token::CodeID) 210 return std::nullopt; 211 212 llvm::StringRef Str; 213 std::tie(Str, Line) = getToken(Line); 214 // If we don't have any text following the code ID (e.g. on linux), we should 215 // use this as the UUID. Otherwise, we should revert back to the module ID. 216 UUID ID; 217 if (Line.trim().empty()) { 218 if (Str.empty() || !ID.SetFromStringRef(Str)) 219 return std::nullopt; 220 } 221 return InfoRecord(std::move(ID)); 222 } 223 224 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 225 const InfoRecord &R) { 226 return OS << "INFO CODE_ID " << R.ID.GetAsString(); 227 } 228 229 template <typename T> 230 static std::optional<T> parseNumberName(llvm::StringRef Line, Token TokenType) { 231 // TOKEN number name 232 if (consume<Token>(Line) != TokenType) 233 return std::nullopt; 234 235 llvm::StringRef Str; 236 size_t Number; 237 std::tie(Str, Line) = getToken(Line); 238 if (!to_integer(Str, Number)) 239 return std::nullopt; 240 241 llvm::StringRef Name = Line.trim(); 242 if (Name.empty()) 243 return std::nullopt; 244 245 return T(Number, Name); 246 } 247 248 std::optional<FileRecord> FileRecord::parse(llvm::StringRef Line) { 249 // FILE number name 250 return parseNumberName<FileRecord>(Line, Token::File); 251 } 252 253 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 254 const FileRecord &R) { 255 return OS << "FILE " << R.Number << " " << R.Name; 256 } 257 258 std::optional<InlineOriginRecord> 259 InlineOriginRecord::parse(llvm::StringRef Line) { 260 // INLINE_ORIGIN number name 261 return parseNumberName<InlineOriginRecord>(Line, Token::InlineOrigin); 262 } 263 264 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 265 const InlineOriginRecord &R) { 266 return OS << "INLINE_ORIGIN " << R.Number << " " << R.Name; 267 } 268 269 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple, 270 lldb::addr_t &Address, lldb::addr_t *Size, 271 lldb::addr_t &ParamSize, llvm::StringRef &Name) { 272 // PUBLIC [m] address param_size name 273 // or 274 // FUNC [m] address size param_size name 275 276 Token Tok = Size ? Token::Func : Token::Public; 277 278 if (consume<Token>(Line) != Tok) 279 return false; 280 281 llvm::StringRef Str; 282 std::tie(Str, Line) = getToken(Line); 283 Multiple = Str == "m"; 284 285 if (Multiple) 286 std::tie(Str, Line) = getToken(Line); 287 if (!to_integer(Str, Address, 16)) 288 return false; 289 290 if (Tok == Token::Func) { 291 std::tie(Str, Line) = getToken(Line); 292 if (!to_integer(Str, *Size, 16)) 293 return false; 294 } 295 296 std::tie(Str, Line) = getToken(Line); 297 if (!to_integer(Str, ParamSize, 16)) 298 return false; 299 300 Name = Line.trim(); 301 if (Name.empty()) 302 return false; 303 304 return true; 305 } 306 307 std::optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) { 308 bool Multiple; 309 lldb::addr_t Address, Size, ParamSize; 310 llvm::StringRef Name; 311 312 if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name)) 313 return FuncRecord(Multiple, Address, Size, ParamSize, Name); 314 315 return std::nullopt; 316 } 317 318 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) { 319 return L.Multiple == R.Multiple && L.Address == R.Address && 320 L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name; 321 } 322 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 323 const FuncRecord &R) { 324 return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}", 325 R.Multiple ? "m " : "", R.Address, R.Size, 326 R.ParamSize, R.Name); 327 } 328 329 std::optional<InlineRecord> InlineRecord::parse(llvm::StringRef Line) { 330 // INLINE inline_nest_level call_site_line call_site_file_num origin_num 331 // [address size]+ 332 if (consume<Token>(Line) != Token::Inline) 333 return std::nullopt; 334 335 llvm::SmallVector<llvm::StringRef> Tokens; 336 SplitString(Line, Tokens, " "); 337 if (Tokens.size() < 6 || Tokens.size() % 2 == 1) 338 return std::nullopt; 339 340 size_t InlineNestLevel; 341 uint32_t CallSiteLineNum; 342 size_t CallSiteFileNum; 343 size_t OriginNum; 344 if (!(to_integer(Tokens[0], InlineNestLevel) && 345 to_integer(Tokens[1], CallSiteLineNum) && 346 to_integer(Tokens[2], CallSiteFileNum) && 347 to_integer(Tokens[3], OriginNum))) 348 return std::nullopt; 349 350 InlineRecord Record = InlineRecord(InlineNestLevel, CallSiteLineNum, 351 CallSiteFileNum, OriginNum); 352 for (size_t i = 4; i < Tokens.size(); i += 2) { 353 lldb::addr_t Address; 354 if (!to_integer(Tokens[i], Address, 16)) 355 return std::nullopt; 356 lldb::addr_t Size; 357 if (!to_integer(Tokens[i + 1].trim(), Size, 16)) 358 return std::nullopt; 359 Record.Ranges.emplace_back(Address, Size); 360 } 361 return Record; 362 } 363 364 bool breakpad::operator==(const InlineRecord &L, const InlineRecord &R) { 365 return L.InlineNestLevel == R.InlineNestLevel && 366 L.CallSiteLineNum == R.CallSiteLineNum && 367 L.CallSiteFileNum == R.CallSiteFileNum && L.OriginNum == R.OriginNum && 368 L.Ranges == R.Ranges; 369 } 370 371 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 372 const InlineRecord &R) { 373 OS << llvm::formatv("INLINE {0} {1} {2} {3}", R.InlineNestLevel, 374 R.CallSiteLineNum, R.CallSiteFileNum, R.OriginNum); 375 for (const auto &range : R.Ranges) { 376 OS << llvm::formatv(" {0:x-} {1:x-}", range.first, range.second); 377 } 378 return OS; 379 } 380 381 std::optional<LineRecord> LineRecord::parse(llvm::StringRef Line) { 382 lldb::addr_t Address; 383 llvm::StringRef Str; 384 std::tie(Str, Line) = getToken(Line); 385 if (!to_integer(Str, Address, 16)) 386 return std::nullopt; 387 388 lldb::addr_t Size; 389 std::tie(Str, Line) = getToken(Line); 390 if (!to_integer(Str, Size, 16)) 391 return std::nullopt; 392 393 uint32_t LineNum; 394 std::tie(Str, Line) = getToken(Line); 395 if (!to_integer(Str, LineNum)) 396 return std::nullopt; 397 398 size_t FileNum; 399 std::tie(Str, Line) = getToken(Line); 400 if (!to_integer(Str, FileNum)) 401 return std::nullopt; 402 403 return LineRecord(Address, Size, LineNum, FileNum); 404 } 405 406 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) { 407 return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum && 408 L.FileNum == R.FileNum; 409 } 410 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 411 const LineRecord &R) { 412 return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size, 413 R.LineNum, R.FileNum); 414 } 415 416 std::optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) { 417 bool Multiple; 418 lldb::addr_t Address, ParamSize; 419 llvm::StringRef Name; 420 421 if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name)) 422 return PublicRecord(Multiple, Address, ParamSize, Name); 423 424 return std::nullopt; 425 } 426 427 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) { 428 return L.Multiple == R.Multiple && L.Address == R.Address && 429 L.ParamSize == R.ParamSize && L.Name == R.Name; 430 } 431 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 432 const PublicRecord &R) { 433 return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}", 434 R.Multiple ? "m " : "", R.Address, R.ParamSize, 435 R.Name); 436 } 437 438 std::optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) { 439 // STACK CFI INIT address size reg1: expr1 reg2: expr2 ... 440 // or 441 // STACK CFI address reg1: expr1 reg2: expr2 ... 442 // No token in exprN ends with a colon. 443 444 if (consume<Token>(Line) != Token::Stack) 445 return std::nullopt; 446 if (consume<Token>(Line) != Token::CFI) 447 return std::nullopt; 448 449 llvm::StringRef Str; 450 std::tie(Str, Line) = getToken(Line); 451 452 bool IsInitRecord = stringTo<Token>(Str) == Token::Init; 453 if (IsInitRecord) 454 std::tie(Str, Line) = getToken(Line); 455 456 lldb::addr_t Address; 457 if (!to_integer(Str, Address, 16)) 458 return std::nullopt; 459 460 std::optional<lldb::addr_t> Size; 461 if (IsInitRecord) { 462 Size.emplace(); 463 std::tie(Str, Line) = getToken(Line); 464 if (!to_integer(Str, *Size, 16)) 465 return std::nullopt; 466 } 467 468 return StackCFIRecord(Address, Size, Line.trim()); 469 } 470 471 bool breakpad::operator==(const StackCFIRecord &L, const StackCFIRecord &R) { 472 return L.Address == R.Address && L.Size == R.Size && 473 L.UnwindRules == R.UnwindRules; 474 } 475 476 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 477 const StackCFIRecord &R) { 478 OS << "STACK CFI "; 479 if (R.Size) 480 OS << "INIT "; 481 OS << llvm::formatv("{0:x-} ", R.Address); 482 if (R.Size) 483 OS << llvm::formatv("{0:x-} ", *R.Size); 484 return OS << " " << R.UnwindRules; 485 } 486 487 std::optional<StackWinRecord> StackWinRecord::parse(llvm::StringRef Line) { 488 // STACK WIN type rva code_size prologue_size epilogue_size parameter_size 489 // saved_register_size local_size max_stack_size has_program_string 490 // program_string_OR_allocates_base_pointer 491 492 if (consume<Token>(Line) != Token::Stack) 493 return std::nullopt; 494 if (consume<Token>(Line) != Token::Win) 495 return std::nullopt; 496 497 llvm::StringRef Str; 498 uint8_t Type; 499 std::tie(Str, Line) = getToken(Line); 500 // Right now we only support the "FrameData" frame type. 501 if (!to_integer(Str, Type) || FrameType(Type) != FrameType::FrameData) 502 return std::nullopt; 503 504 lldb::addr_t RVA; 505 std::tie(Str, Line) = getToken(Line); 506 if (!to_integer(Str, RVA, 16)) 507 return std::nullopt; 508 509 lldb::addr_t CodeSize; 510 std::tie(Str, Line) = getToken(Line); 511 if (!to_integer(Str, CodeSize, 16)) 512 return std::nullopt; 513 514 // Skip fields which we aren't using right now. 515 std::tie(Str, Line) = getToken(Line); // prologue_size 516 std::tie(Str, Line) = getToken(Line); // epilogue_size 517 518 lldb::addr_t ParameterSize; 519 std::tie(Str, Line) = getToken(Line); 520 if (!to_integer(Str, ParameterSize, 16)) 521 return std::nullopt; 522 523 lldb::addr_t SavedRegisterSize; 524 std::tie(Str, Line) = getToken(Line); 525 if (!to_integer(Str, SavedRegisterSize, 16)) 526 return std::nullopt; 527 528 lldb::addr_t LocalSize; 529 std::tie(Str, Line) = getToken(Line); 530 if (!to_integer(Str, LocalSize, 16)) 531 return std::nullopt; 532 533 std::tie(Str, Line) = getToken(Line); // max_stack_size 534 535 uint8_t HasProgramString; 536 std::tie(Str, Line) = getToken(Line); 537 if (!to_integer(Str, HasProgramString)) 538 return std::nullopt; 539 // FrameData records should always have a program string. 540 if (!HasProgramString) 541 return std::nullopt; 542 543 return StackWinRecord(RVA, CodeSize, ParameterSize, SavedRegisterSize, 544 LocalSize, Line.trim()); 545 } 546 547 bool breakpad::operator==(const StackWinRecord &L, const StackWinRecord &R) { 548 return L.RVA == R.RVA && L.CodeSize == R.CodeSize && 549 L.ParameterSize == R.ParameterSize && 550 L.SavedRegisterSize == R.SavedRegisterSize && 551 L.LocalSize == R.LocalSize && L.ProgramString == R.ProgramString; 552 } 553 554 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 555 const StackWinRecord &R) { 556 return OS << llvm::formatv( 557 "STACK WIN 4 {0:x-} {1:x-} ? ? {2} {3} {4} ? 1 {5}", R.RVA, 558 R.CodeSize, R.ParameterSize, R.SavedRegisterSize, R.LocalSize, 559 R.ProgramString); 560 } 561 562 llvm::StringRef breakpad::toString(Record::Kind K) { 563 switch (K) { 564 case Record::Module: 565 return "MODULE"; 566 case Record::Info: 567 return "INFO"; 568 case Record::File: 569 return "FILE"; 570 case Record::Func: 571 return "FUNC"; 572 case Record::Inline: 573 return "INLINE"; 574 case Record::InlineOrigin: 575 return "INLINE_ORIGIN"; 576 case Record::Line: 577 return "LINE"; 578 case Record::Public: 579 return "PUBLIC"; 580 case Record::StackCFI: 581 return "STACK CFI"; 582 case Record::StackWin: 583 return "STACK WIN"; 584 } 585 llvm_unreachable("Unknown record kind!"); 586 } 587