xref: /freebsd/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
1 //===-- BreakpadRecords.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h"
10 #include "lldb/lldb-defines.h"
11 #include "llvm/ADT/StringExtras.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/Support/Endian.h"
14 #include "llvm/Support/FormatVariadic.h"
15 #include <optional>
16 
17 using namespace lldb_private;
18 using namespace lldb_private::breakpad;
19 
20 namespace {
21 enum class Token {
22   Unknown,
23   Module,
24   Info,
25   CodeID,
26   File,
27   Func,
28   Inline,
29   InlineOrigin,
30   Public,
31   Stack,
32   CFI,
33   Init,
34   Win,
35 };
36 }
37 
38 template<typename T>
39 static T stringTo(llvm::StringRef Str);
40 
41 template <> Token stringTo<Token>(llvm::StringRef Str) {
42   return llvm::StringSwitch<Token>(Str)
43       .Case("MODULE", Token::Module)
44       .Case("INFO", Token::Info)
45       .Case("CODE_ID", Token::CodeID)
46       .Case("FILE", Token::File)
47       .Case("FUNC", Token::Func)
48       .Case("INLINE", Token::Inline)
49       .Case("INLINE_ORIGIN", Token::InlineOrigin)
50       .Case("PUBLIC", Token::Public)
51       .Case("STACK", Token::Stack)
52       .Case("CFI", Token::CFI)
53       .Case("INIT", Token::Init)
54       .Case("WIN", Token::Win)
55       .Default(Token::Unknown);
56 }
57 
58 template <>
59 llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) {
60   using llvm::Triple;
61   return llvm::StringSwitch<Triple::OSType>(Str)
62       .Case("Linux", Triple::Linux)
63       .Case("mac", Triple::MacOSX)
64       .Case("windows", Triple::Win32)
65       .Default(Triple::UnknownOS);
66 }
67 
68 template <>
69 llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) {
70   using llvm::Triple;
71   return llvm::StringSwitch<Triple::ArchType>(Str)
72       .Case("arm", Triple::arm)
73       .Cases("arm64", "arm64e", Triple::aarch64)
74       .Case("mips", Triple::mips)
75       .Case("msp430", Triple::msp430)
76       .Case("ppc", Triple::ppc)
77       .Case("ppc64", Triple::ppc64)
78       .Case("s390", Triple::systemz)
79       .Case("sparc", Triple::sparc)
80       .Case("sparcv9", Triple::sparcv9)
81       .Case("x86", Triple::x86)
82       .Cases("x86_64", "x86_64h", Triple::x86_64)
83       .Default(Triple::UnknownArch);
84 }
85 
86 template<typename T>
87 static T consume(llvm::StringRef &Str) {
88   llvm::StringRef Token;
89   std::tie(Token, Str) = getToken(Str);
90   return stringTo<T>(Token);
91 }
92 
93 /// Return the number of hex digits needed to encode an (POD) object of a given
94 /// type.
95 template <typename T> static constexpr size_t hex_digits() {
96   return 2 * sizeof(T);
97 }
98 
99 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
100   struct data_t {
101     using uuid_t = uint8_t[16];
102     uuid_t uuid;
103     llvm::support::ubig32_t age;
104   } data;
105   static_assert(sizeof(data) == 20);
106   // The textual module id encoding should be between 33 and 40 bytes long,
107   // depending on the size of the age field, which is of variable length.
108   // The first three chunks of the id are encoded in big endian, so we need to
109   // byte-swap those.
110   if (str.size() <= hex_digits<data_t::uuid_t>() ||
111       str.size() > hex_digits<data_t>())
112     return UUID();
113   if (!all_of(str, llvm::isHexDigit))
114     return UUID();
115 
116   llvm::StringRef uuid_str = str.take_front(hex_digits<data_t::uuid_t>());
117   llvm::StringRef age_str = str.drop_front(hex_digits<data_t::uuid_t>());
118 
119   llvm::copy(fromHex(uuid_str), data.uuid);
120   uint32_t age;
121   bool success = to_integer(age_str, age, 16);
122   assert(success);
123   UNUSED_IF_ASSERT_DISABLED(success);
124   data.age = age;
125 
126   // On non-windows, the age field should always be zero, so we don't include to
127   // match the native uuid format of these platforms.
128   return UUID(&data, os == llvm::Triple::Win32 ? sizeof(data)
129                                                : sizeof(data.uuid));
130 }
131 
132 std::optional<Record::Kind> Record::classify(llvm::StringRef Line) {
133   Token Tok = consume<Token>(Line);
134   switch (Tok) {
135   case Token::Module:
136     return Record::Module;
137   case Token::Info:
138     return Record::Info;
139   case Token::File:
140     return Record::File;
141   case Token::Func:
142     return Record::Func;
143   case Token::Public:
144     return Record::Public;
145   case Token::Stack:
146     Tok = consume<Token>(Line);
147     switch (Tok) {
148     case Token::CFI:
149       return Record::StackCFI;
150     case Token::Win:
151       return Record::StackWin;
152     default:
153       return std::nullopt;
154     }
155   case Token::Inline:
156     return Record::Inline;
157   case Token::InlineOrigin:
158     return Record::InlineOrigin;
159   case Token::Unknown:
160     // Optimistically assume that any unrecognised token means this is a line
161     // record, those don't have a special keyword and start directly with a
162     // hex number.
163     return Record::Line;
164 
165   case Token::CodeID:
166   case Token::CFI:
167   case Token::Init:
168   case Token::Win:
169     // These should never appear at the start of a valid record.
170     return std::nullopt;
171   }
172   llvm_unreachable("Fully covered switch above!");
173 }
174 
175 std::optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
176   // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
177   if (consume<Token>(Line) != Token::Module)
178     return std::nullopt;
179 
180   llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line);
181   if (OS == llvm::Triple::UnknownOS)
182     return std::nullopt;
183 
184   llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line);
185   if (Arch == llvm::Triple::UnknownArch)
186     return std::nullopt;
187 
188   llvm::StringRef Str;
189   std::tie(Str, Line) = getToken(Line);
190   UUID ID = parseModuleId(OS, Str);
191   if (!ID)
192     return std::nullopt;
193 
194   return ModuleRecord(OS, Arch, std::move(ID));
195 }
196 
197 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
198                                         const ModuleRecord &R) {
199   return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " "
200             << llvm::Triple::getArchTypeName(R.Arch) << " "
201             << R.ID.GetAsString();
202 }
203 
204 std::optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
205   // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
206   if (consume<Token>(Line) != Token::Info)
207     return std::nullopt;
208 
209   if (consume<Token>(Line) != Token::CodeID)
210     return std::nullopt;
211 
212   llvm::StringRef Str;
213   std::tie(Str, Line) = getToken(Line);
214   // If we don't have any text following the code ID (e.g. on linux), we should
215   // use this as the UUID. Otherwise, we should revert back to the module ID.
216   UUID ID;
217   if (Line.trim().empty()) {
218     if (Str.empty() || !ID.SetFromStringRef(Str))
219       return std::nullopt;
220   }
221   return InfoRecord(std::move(ID));
222 }
223 
224 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
225                                         const InfoRecord &R) {
226   return OS << "INFO CODE_ID " << R.ID.GetAsString();
227 }
228 
229 template <typename T>
230 static std::optional<T> parseNumberName(llvm::StringRef Line, Token TokenType) {
231   // TOKEN number name
232   if (consume<Token>(Line) != TokenType)
233     return std::nullopt;
234 
235   llvm::StringRef Str;
236   size_t Number;
237   std::tie(Str, Line) = getToken(Line);
238   if (!to_integer(Str, Number))
239     return std::nullopt;
240 
241   llvm::StringRef Name = Line.trim();
242   if (Name.empty())
243     return std::nullopt;
244 
245   return T(Number, Name);
246 }
247 
248 std::optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
249   // FILE number name
250   return parseNumberName<FileRecord>(Line, Token::File);
251 }
252 
253 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
254                                         const FileRecord &R) {
255   return OS << "FILE " << R.Number << " " << R.Name;
256 }
257 
258 std::optional<InlineOriginRecord>
259 InlineOriginRecord::parse(llvm::StringRef Line) {
260   // INLINE_ORIGIN number name
261   return parseNumberName<InlineOriginRecord>(Line, Token::InlineOrigin);
262 }
263 
264 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
265                                         const InlineOriginRecord &R) {
266   return OS << "INLINE_ORIGIN " << R.Number << " " << R.Name;
267 }
268 
269 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,
270                               lldb::addr_t &Address, lldb::addr_t *Size,
271                               lldb::addr_t &ParamSize, llvm::StringRef &Name) {
272   // PUBLIC [m] address param_size name
273   // or
274   // FUNC [m] address size param_size name
275 
276   Token Tok = Size ? Token::Func : Token::Public;
277 
278   if (consume<Token>(Line) != Tok)
279     return false;
280 
281   llvm::StringRef Str;
282   std::tie(Str, Line) = getToken(Line);
283   Multiple = Str == "m";
284 
285   if (Multiple)
286     std::tie(Str, Line) = getToken(Line);
287   if (!to_integer(Str, Address, 16))
288     return false;
289 
290   if (Tok == Token::Func) {
291     std::tie(Str, Line) = getToken(Line);
292     if (!to_integer(Str, *Size, 16))
293       return false;
294   }
295 
296   std::tie(Str, Line) = getToken(Line);
297   if (!to_integer(Str, ParamSize, 16))
298     return false;
299 
300   Name = Line.trim();
301   if (Name.empty())
302     return false;
303 
304   return true;
305 }
306 
307 std::optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) {
308   bool Multiple;
309   lldb::addr_t Address, Size, ParamSize;
310   llvm::StringRef Name;
311 
312   if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name))
313     return FuncRecord(Multiple, Address, Size, ParamSize, Name);
314 
315   return std::nullopt;
316 }
317 
318 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) {
319   return L.Multiple == R.Multiple && L.Address == R.Address &&
320          L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name;
321 }
322 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
323                                         const FuncRecord &R) {
324   return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}",
325                              R.Multiple ? "m " : "", R.Address, R.Size,
326                              R.ParamSize, R.Name);
327 }
328 
329 std::optional<InlineRecord> InlineRecord::parse(llvm::StringRef Line) {
330   // INLINE inline_nest_level call_site_line call_site_file_num origin_num
331   // [address size]+
332   if (consume<Token>(Line) != Token::Inline)
333     return std::nullopt;
334 
335   llvm::SmallVector<llvm::StringRef> Tokens;
336   SplitString(Line, Tokens, " ");
337   if (Tokens.size() < 6 || Tokens.size() % 2 == 1)
338     return std::nullopt;
339 
340   size_t InlineNestLevel;
341   uint32_t CallSiteLineNum;
342   size_t CallSiteFileNum;
343   size_t OriginNum;
344   if (!(to_integer(Tokens[0], InlineNestLevel) &&
345         to_integer(Tokens[1], CallSiteLineNum) &&
346         to_integer(Tokens[2], CallSiteFileNum) &&
347         to_integer(Tokens[3], OriginNum)))
348     return std::nullopt;
349 
350   InlineRecord Record = InlineRecord(InlineNestLevel, CallSiteLineNum,
351                                      CallSiteFileNum, OriginNum);
352   for (size_t i = 4; i < Tokens.size(); i += 2) {
353     lldb::addr_t Address;
354     if (!to_integer(Tokens[i], Address, 16))
355       return std::nullopt;
356     lldb::addr_t Size;
357     if (!to_integer(Tokens[i + 1].trim(), Size, 16))
358       return std::nullopt;
359     Record.Ranges.emplace_back(Address, Size);
360   }
361   return Record;
362 }
363 
364 bool breakpad::operator==(const InlineRecord &L, const InlineRecord &R) {
365   return L.InlineNestLevel == R.InlineNestLevel &&
366          L.CallSiteLineNum == R.CallSiteLineNum &&
367          L.CallSiteFileNum == R.CallSiteFileNum && L.OriginNum == R.OriginNum &&
368          L.Ranges == R.Ranges;
369 }
370 
371 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
372                                         const InlineRecord &R) {
373   OS << llvm::formatv("INLINE {0} {1} {2} {3}", R.InlineNestLevel,
374                       R.CallSiteLineNum, R.CallSiteFileNum, R.OriginNum);
375   for (const auto &range : R.Ranges) {
376     OS << llvm::formatv(" {0:x-} {1:x-}", range.first, range.second);
377   }
378   return OS;
379 }
380 
381 std::optional<LineRecord> LineRecord::parse(llvm::StringRef Line) {
382   lldb::addr_t Address;
383   llvm::StringRef Str;
384   std::tie(Str, Line) = getToken(Line);
385   if (!to_integer(Str, Address, 16))
386     return std::nullopt;
387 
388   lldb::addr_t Size;
389   std::tie(Str, Line) = getToken(Line);
390   if (!to_integer(Str, Size, 16))
391     return std::nullopt;
392 
393   uint32_t LineNum;
394   std::tie(Str, Line) = getToken(Line);
395   if (!to_integer(Str, LineNum))
396     return std::nullopt;
397 
398   size_t FileNum;
399   std::tie(Str, Line) = getToken(Line);
400   if (!to_integer(Str, FileNum))
401     return std::nullopt;
402 
403   return LineRecord(Address, Size, LineNum, FileNum);
404 }
405 
406 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) {
407   return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum &&
408          L.FileNum == R.FileNum;
409 }
410 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
411                                         const LineRecord &R) {
412   return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size,
413                              R.LineNum, R.FileNum);
414 }
415 
416 std::optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) {
417   bool Multiple;
418   lldb::addr_t Address, ParamSize;
419   llvm::StringRef Name;
420 
421   if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name))
422     return PublicRecord(Multiple, Address, ParamSize, Name);
423 
424   return std::nullopt;
425 }
426 
427 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) {
428   return L.Multiple == R.Multiple && L.Address == R.Address &&
429          L.ParamSize == R.ParamSize && L.Name == R.Name;
430 }
431 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
432                                         const PublicRecord &R) {
433   return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}",
434                              R.Multiple ? "m " : "", R.Address, R.ParamSize,
435                              R.Name);
436 }
437 
438 std::optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) {
439   // STACK CFI INIT address size reg1: expr1 reg2: expr2 ...
440   // or
441   // STACK CFI address reg1: expr1 reg2: expr2 ...
442   // No token in exprN ends with a colon.
443 
444   if (consume<Token>(Line) != Token::Stack)
445     return std::nullopt;
446   if (consume<Token>(Line) != Token::CFI)
447     return std::nullopt;
448 
449   llvm::StringRef Str;
450   std::tie(Str, Line) = getToken(Line);
451 
452   bool IsInitRecord = stringTo<Token>(Str) == Token::Init;
453   if (IsInitRecord)
454     std::tie(Str, Line) = getToken(Line);
455 
456   lldb::addr_t Address;
457   if (!to_integer(Str, Address, 16))
458     return std::nullopt;
459 
460   std::optional<lldb::addr_t> Size;
461   if (IsInitRecord) {
462     Size.emplace();
463     std::tie(Str, Line) = getToken(Line);
464     if (!to_integer(Str, *Size, 16))
465       return std::nullopt;
466   }
467 
468   return StackCFIRecord(Address, Size, Line.trim());
469 }
470 
471 bool breakpad::operator==(const StackCFIRecord &L, const StackCFIRecord &R) {
472   return L.Address == R.Address && L.Size == R.Size &&
473          L.UnwindRules == R.UnwindRules;
474 }
475 
476 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
477                                         const StackCFIRecord &R) {
478   OS << "STACK CFI ";
479   if (R.Size)
480     OS << "INIT ";
481   OS << llvm::formatv("{0:x-} ", R.Address);
482   if (R.Size)
483     OS << llvm::formatv("{0:x-} ", *R.Size);
484   return OS << " " << R.UnwindRules;
485 }
486 
487 std::optional<StackWinRecord> StackWinRecord::parse(llvm::StringRef Line) {
488   // STACK WIN type rva code_size prologue_size epilogue_size parameter_size
489   //     saved_register_size local_size max_stack_size has_program_string
490   //     program_string_OR_allocates_base_pointer
491 
492   if (consume<Token>(Line) != Token::Stack)
493     return std::nullopt;
494   if (consume<Token>(Line) != Token::Win)
495     return std::nullopt;
496 
497   llvm::StringRef Str;
498   uint8_t Type;
499   std::tie(Str, Line) = getToken(Line);
500   // Right now we only support the "FrameData" frame type.
501   if (!to_integer(Str, Type) || FrameType(Type) != FrameType::FrameData)
502     return std::nullopt;
503 
504   lldb::addr_t RVA;
505   std::tie(Str, Line) = getToken(Line);
506   if (!to_integer(Str, RVA, 16))
507     return std::nullopt;
508 
509   lldb::addr_t CodeSize;
510   std::tie(Str, Line) = getToken(Line);
511   if (!to_integer(Str, CodeSize, 16))
512     return std::nullopt;
513 
514   // Skip fields which we aren't using right now.
515   std::tie(Str, Line) = getToken(Line); // prologue_size
516   std::tie(Str, Line) = getToken(Line); // epilogue_size
517 
518   lldb::addr_t ParameterSize;
519   std::tie(Str, Line) = getToken(Line);
520   if (!to_integer(Str, ParameterSize, 16))
521     return std::nullopt;
522 
523   lldb::addr_t SavedRegisterSize;
524   std::tie(Str, Line) = getToken(Line);
525   if (!to_integer(Str, SavedRegisterSize, 16))
526     return std::nullopt;
527 
528   lldb::addr_t LocalSize;
529   std::tie(Str, Line) = getToken(Line);
530   if (!to_integer(Str, LocalSize, 16))
531     return std::nullopt;
532 
533   std::tie(Str, Line) = getToken(Line); // max_stack_size
534 
535   uint8_t HasProgramString;
536   std::tie(Str, Line) = getToken(Line);
537   if (!to_integer(Str, HasProgramString))
538     return std::nullopt;
539   // FrameData records should always have a program string.
540   if (!HasProgramString)
541     return std::nullopt;
542 
543   return StackWinRecord(RVA, CodeSize, ParameterSize, SavedRegisterSize,
544                         LocalSize, Line.trim());
545 }
546 
547 bool breakpad::operator==(const StackWinRecord &L, const StackWinRecord &R) {
548   return L.RVA == R.RVA && L.CodeSize == R.CodeSize &&
549          L.ParameterSize == R.ParameterSize &&
550          L.SavedRegisterSize == R.SavedRegisterSize &&
551          L.LocalSize == R.LocalSize && L.ProgramString == R.ProgramString;
552 }
553 
554 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
555                                         const StackWinRecord &R) {
556   return OS << llvm::formatv(
557              "STACK WIN 4 {0:x-} {1:x-} ? ? {2} {3} {4} ? 1 {5}", R.RVA,
558              R.CodeSize, R.ParameterSize, R.SavedRegisterSize, R.LocalSize,
559              R.ProgramString);
560 }
561 
562 llvm::StringRef breakpad::toString(Record::Kind K) {
563   switch (K) {
564   case Record::Module:
565     return "MODULE";
566   case Record::Info:
567     return "INFO";
568   case Record::File:
569     return "FILE";
570   case Record::Func:
571     return "FUNC";
572   case Record::Inline:
573     return "INLINE";
574   case Record::InlineOrigin:
575     return "INLINE_ORIGIN";
576   case Record::Line:
577     return "LINE";
578   case Record::Public:
579     return "PUBLIC";
580   case Record::StackCFI:
581     return "STACK CFI";
582   case Record::StackWin:
583     return "STACK WIN";
584   }
585   llvm_unreachable("Unknown record kind!");
586 }
587