xref: /freebsd/contrib/llvm-project/lldb/include/lldb/Target/TraceDumper.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- TraceDumper.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Symbol/SymbolContext.h"
10 #include "lldb/Target/TraceCursor.h"
11 #include <optional>
12 #include <stack>
13 
14 #ifndef LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H
15 #define LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H
16 
17 namespace lldb_private {
18 
19 /// Class that holds the configuration used by \a TraceDumper for
20 /// traversing and dumping instructions.
21 struct TraceDumperOptions {
22   /// If \b true, the cursor will be iterated forwards starting from the
23   /// oldest instruction. Otherwise, the iteration starts from the most
24   /// recent instruction.
25   bool forwards = false;
26   /// Dump only instruction addresses without disassembly nor symbol
27   /// information.
28   bool raw = false;
29   /// Dump in json format.
30   bool json = false;
31   /// When dumping in JSON format, pretty print the output.
32   bool pretty_print_json = false;
33   /// For each trace item, print the corresponding timestamp in nanoseconds if
34   /// available.
35   bool show_timestamps = false;
36   /// Dump the events that happened between instructions.
37   bool show_events = false;
38   /// Dump events and none of the instructions.
39   bool only_events = false;
40   /// For each instruction, print the instruction kind.
41   bool show_control_flow_kind = false;
42   /// Optional custom id to start traversing from.
43   std::optional<uint64_t> id;
44   /// Optional number of instructions to skip from the starting position
45   /// of the cursor.
46   std::optional<size_t> skip;
47 };
48 
49 /// Class used to dump the instructions of a \a TraceCursor using its current
50 /// state and granularity.
51 class TraceDumper {
52 public:
53   /// Helper struct that holds symbol, disassembly and address information of an
54   /// instruction.
55   struct SymbolInfo {
56     SymbolContext sc;
57     Address address;
58     lldb::DisassemblerSP disassembler;
59     lldb::InstructionSP instruction;
60     lldb_private::ExecutionContext exe_ctx;
61   };
62 
63   /// Helper struct that holds all the information we know about a trace item
64   struct TraceItem {
65     lldb::user_id_t id;
66     lldb::addr_t load_address;
67     std::optional<double> timestamp;
68     std::optional<uint64_t> hw_clock;
69     std::optional<std::string> sync_point_metadata;
70     std::optional<llvm::StringRef> error;
71     std::optional<lldb::TraceEvent> event;
72     std::optional<SymbolInfo> symbol_info;
73     std::optional<SymbolInfo> prev_symbol_info;
74     std::optional<lldb::cpu_id_t> cpu_id;
75   };
76 
77   /// An object representing a traced function call.
78   ///
79   /// A function call is represented using segments and subcalls.
80   ///
81   /// TracedSegment:
82   ///   A traced segment is a maximal list of consecutive traced instructions
83   ///   that belong to the same function call. A traced segment will end in
84   ///   three possible ways:
85   ///     - With a call to a function deeper in the callstack. In this case,
86   ///     most of the times this nested call will return
87   ///       and resume with the next segment of this segment's owning function
88   ///       call. More on this later.
89   ///     - Abruptly due to end of trace. In this case, we weren't able to trace
90   ///     the end of this function call.
91   ///     - Simply a return higher in the callstack.
92   ///
93   ///   In terms of implementation details, as segment can be represented with
94   ///   the beginning and ending instruction IDs from the instruction trace.
95   ///
96   ///  UntracedPrefixSegment:
97   ///   It might happen that we didn't trace the beginning of a function and we
98   ///   saw it for the first time as part of a return. As a way to signal these
99   ///   cases, we have a placeholder UntracedPrefixSegment class that completes the
100   ///   callgraph.
101   ///
102   ///  Example:
103   ///   We might have this piece of execution:
104   ///
105   ///     main() [offset 0x00 to 0x20] [traced instruction ids 1 to 4]
106   ///       foo()  [offset 0x00 to 0x80] [traced instruction ids 5 to 20] # main
107   ///       invoked foo
108   ///     main() [offset 0x24 to 0x40] [traced instruction ids 21 to 30]
109   ///
110   ///   In this case, our function main invokes foo. We have 3 segments: main
111   ///   [offset 0x00 to 0x20], foo() [offset 0x00 to 0x80], and main() [offset
112   ///   0x24 to 0x40]. We also have the instruction ids from the corresponding
113   ///   linear instruction trace for each segment.
114   ///
115   ///   But what if we started tracing since the middle of foo? Then we'd have
116   ///   an incomplete trace
117   ///
118   ///       foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10]
119   ///     main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20]
120   ///
121   ///   Notice that we changed the instruction ids because this is a new trace.
122   ///   Here, in order to have a somewhat complete tree with good traversal
123   ///   capabilities, we can create an UntracedPrefixSegment to signal the portion of
124   ///   main() that we didn't trace. We don't know if this segment was in fact
125   ///   multiple segments with many function calls. We'll never know. The
126   ///   resulting tree looks like the following:
127   ///
128   ///     main() [untraced]
129   ///       foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10]
130   ///     main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20]
131   ///
132   ///   And in pseudo-code:
133   ///
134   ///     FunctionCall [
135   ///       UntracedPrefixSegment {
136   ///         symbol: main()
137   ///         nestedCall: FunctionCall [ # this untraced segment has a nested
138   ///         call
139   ///           TracedSegment {
140   ///             symbol: foo()
141   ///             fromInstructionId: 1
142   ///             toInstructionId: 10
143   ///             nestedCall: none # this doesn't have a nested call
144   ///           }
145   ///         }
146   ///       ],
147   ///       TracedSegment {
148   ///         symbol: main()
149   ///         fromInstructionId: 11
150   ///         toInstructionId: 20
151   ///         nestedCall: none # this also doesn't have a nested call
152   ///       }
153   ///   ]
154   ///
155   ///   We can see the nested structure and how instructions are represented as
156   ///   segments.
157   ///
158   ///
159   ///   Returns:
160   ///     Code doesn't always behave intuitively. Some interesting functions
161   ///     might modify the stack and thus change the behavior of common
162   ///     instructions like CALL and RET. We try to identify these cases, and
163   ///     the result is that the return edge from a segment might connect with a
164   ///     function call very high the stack. For example, you might have
165   ///
166   ///     main()
167   ///       foo()
168   ///         bar()
169   ///         # here bar modifies the stack and pops foo() from it. Then it
170   ///         finished the a RET (return)
171   ///     main() # we came back directly to main()
172   ///
173   ///     I have observed some trampolines doing this, as well as some std
174   ///     functions (like ostream functions). So consumers should be aware of
175   ///     this.
176   ///
177   ///     There are all sorts of "abnormal" behaviors you can see in code, and
178   ///     whenever we fail at identifying what's going on, we prefer to create a
179   ///     new tree.
180   ///
181   ///   Function call forest:
182   ///     A single tree would suffice if a trace didn't contain errors nor
183   ///     abnormal behaviors that made our algorithms fail. Sadly these
184   ///     anomalies exist and we prefer not to use too many heuristics and
185   ///     probably end up lying to the user. So we create a new tree from the
186   ///     point we can't continue using the previous tree. This results in
187   ///     having a forest instead of a single tree. This is probably the best we
188   ///     can do if we consumers want to use this data to perform performance
189   ///     analysis or reverse debugging.
190   ///
191   ///   Non-functions:
192   ///     Not everything in a program is a function. There are blocks of
193   ///     instructions that are simply labeled or even regions without symbol
194   ///     information that we don't what they are. We treat all of them as
195   ///     functions for simplicity.
196   ///
197   ///   Errors:
198   ///     Whenever an error is found, a new tree with a single segment is
199   ///     created. All consecutive errors after the original one are then
200   ///     appended to this segment. As a note, something that GDB does is to use
201   ///     some heuristics to merge trees that were interrupted by errors. We are
202   ///     leaving that out of scope until a feature like that one is really
203   ///     needed.
204 
205   /// Forward declaration
206   class FunctionCall;
207   using FunctionCallUP = std::unique_ptr<FunctionCall>;
208 
209   class FunctionCall {
210   public:
211     class TracedSegment {
212     public:
213       /// \param[in] cursor_sp
214       ///   A cursor pointing to the beginning of the segment.
215       ///
216       /// \param[in] symbol_info
217       ///   The symbol information of the first instruction of the segment.
218       ///
219       /// \param[in] call
220       ///   The FunctionCall object that owns this segment.
TracedSegment(const lldb::TraceCursorSP & cursor_sp,const SymbolInfo & symbol_info,FunctionCall & owning_call)221       TracedSegment(const lldb::TraceCursorSP &cursor_sp,
222                     const SymbolInfo &symbol_info, FunctionCall &owning_call)
223           : m_first_insn_id(cursor_sp->GetId()),
224             m_last_insn_id(cursor_sp->GetId()),
225             m_first_symbol_info(symbol_info), m_last_symbol_info(symbol_info),
226             m_owning_call(owning_call) {}
227 
228       /// \return
229       ///   The chronologically first instruction ID in this segment.
230       lldb::user_id_t GetFirstInstructionID() const;
231       /// \return
232       ///   The chronologically last instruction ID in this segment.
233       lldb::user_id_t GetLastInstructionID() const;
234 
235       /// \return
236       ///   The symbol information of the chronologically first instruction ID
237       ///   in this segment.
238       const SymbolInfo &GetFirstInstructionSymbolInfo() const;
239 
240       /// \return
241       ///   The symbol information of the chronologically last instruction ID in
242       ///   this segment.
243       const SymbolInfo &GetLastInstructionSymbolInfo() const;
244 
245       /// \return
246       ///   Get the call that owns this segment.
247       const FunctionCall &GetOwningCall() const;
248 
249       /// Append a new instruction to this segment.
250       ///
251       /// \param[in] cursor_sp
252       ///   A cursor pointing to the new instruction.
253       ///
254       /// \param[in] symbol_info
255       ///   The symbol information of the new instruction.
256       void AppendInsn(const lldb::TraceCursorSP &cursor_sp,
257                       const SymbolInfo &symbol_info);
258 
259       /// Create a nested call at the end of this segment.
260       ///
261       /// \param[in] cursor_sp
262       ///   A cursor pointing to the first instruction of the nested call.
263       ///
264       /// \param[in] symbol_info
265       ///   The symbol information of the first instruction of the nested call.
266       FunctionCall &CreateNestedCall(const lldb::TraceCursorSP &cursor_sp,
267                                      const SymbolInfo &symbol_info);
268 
269       /// Executed the given callback if there's a nested call at the end of
270       /// this segment.
271       void IfNestedCall(std::function<void(const FunctionCall &function_call)>
272                             callback) const;
273 
274     private:
275       TracedSegment(const TracedSegment &) = delete;
276       TracedSegment &operator=(TracedSegment const &);
277 
278       /// Delimiting instruction IDs taken chronologically.
279       /// \{
280       lldb::user_id_t m_first_insn_id;
281       lldb::user_id_t m_last_insn_id;
282       /// \}
283       /// An optional nested call starting at the end of this segment.
284       FunctionCallUP m_nested_call;
285       /// The symbol information of the delimiting instructions
286       /// \{
287       SymbolInfo m_first_symbol_info;
288       SymbolInfo m_last_symbol_info;
289       /// \}
290       FunctionCall &m_owning_call;
291     };
292 
293     class UntracedPrefixSegment {
294     public:
295       /// Note: Untraced segments can only exist if have also seen a traced
296       /// segment of the same function call. Thus, we can use those traced
297       /// segments if we want symbol information and such.
298 
UntracedPrefixSegment(FunctionCallUP && nested_call)299       UntracedPrefixSegment(FunctionCallUP &&nested_call)
300           : m_nested_call(std::move(nested_call)) {}
301 
302       const FunctionCall &GetNestedCall() const;
303 
304     private:
305       UntracedPrefixSegment(const UntracedPrefixSegment &) = delete;
306       UntracedPrefixSegment &operator=(UntracedPrefixSegment const &);
307       FunctionCallUP m_nested_call;
308     };
309 
310     /// Create a new function call given an instruction. This will also create a
311     /// segment for that instruction.
312     ///
313     /// \param[in] cursor_sp
314     ///   A cursor pointing to the first instruction of that function call.
315     ///
316     /// \param[in] symbol_info
317     ///   The symbol information of that first instruction.
318     FunctionCall(const lldb::TraceCursorSP &cursor_sp,
319                  const SymbolInfo &symbol_info);
320 
321     /// Append a new traced segment to this function call.
322     ///
323     /// \param[in] cursor_sp
324     ///   A cursor pointing to the first instruction of the new segment.
325     ///
326     /// \param[in] symbol_info
327     ///   The symbol information of that first instruction.
328     void AppendSegment(const lldb::TraceCursorSP &cursor_sp,
329                        const SymbolInfo &symbol_info);
330 
331     /// \return
332     ///   The symbol info of some traced instruction of this call.
333     const SymbolInfo &GetSymbolInfo() const;
334 
335     /// \return
336     ///   \b true if and only if the instructions in this function call are
337     ///   trace errors, in which case this function call is a fake one.
338     bool IsError() const;
339 
340     /// \return
341     ///   The list of traced segments of this call.
342     const std::deque<TracedSegment> &GetTracedSegments() const;
343 
344     /// \return
345     ///   A non-const reference to the most-recent traced segment.
346     TracedSegment &GetLastTracedSegment();
347 
348     /// Create an untraced segment for this call that jumps to the provided
349     /// nested call.
350     void SetUntracedPrefixSegment(FunctionCallUP &&nested_call);
351 
352     /// \return
353     ///   A optional to the untraced prefix segment of this call.
354     const std::optional<UntracedPrefixSegment> &
355     GetUntracedPrefixSegment() const;
356 
357     /// \return
358     ///   A pointer to the parent call. It may be \b nullptr.
359     FunctionCall *GetParentCall() const;
360 
361     void SetParentCall(FunctionCall &parent_call);
362 
363   private:
364     /// An optional untraced segment that precedes all the traced segments.
365     std::optional<UntracedPrefixSegment> m_untraced_prefix_segment;
366     /// The traced segments in order. We used a deque to prevent moving these
367     /// objects when appending to the list, which would happen with vector.
368     std::deque<TracedSegment> m_traced_segments;
369     /// The parent call, which might be null. Useful for reconstructing
370     /// callstacks.
371     FunctionCall *m_parent_call = nullptr;
372     /// Whether this call represents a list of consecutive errors.
373     bool m_is_error;
374   };
375 
376   /// Interface used to abstract away the format in which the instruction
377   /// information will be dumped.
378   class OutputWriter {
379   public:
380     virtual ~OutputWriter() = default;
381 
382     /// Notify this writer that the cursor ran out of data.
NoMoreData()383     virtual void NoMoreData() {}
384 
385     /// Dump a trace item (instruction, error or event).
386     virtual void TraceItem(const TraceItem &item) = 0;
387 
388     /// Dump a function call forest.
389     virtual void
390     FunctionCallForest(const std::vector<FunctionCallUP> &forest) = 0;
391   };
392 
393   /// Create a instruction dumper for the cursor.
394   ///
395   /// \param[in] cursor
396   ///     The cursor whose instructions will be dumped.
397   ///
398   /// \param[in] s
399   ///     The stream where to dump the instructions to.
400   ///
401   /// \param[in] options
402   ///     Additional options for configuring the dumping.
403   TraceDumper(lldb::TraceCursorSP cursor_sp, Stream &s,
404               const TraceDumperOptions &options);
405 
406   /// Dump \a count instructions of the thread trace starting at the current
407   /// cursor position.
408   ///
409   /// This effectively moves the cursor to the next unvisited position, so that
410   /// a subsequent call to this method continues where it left off.
411   ///
412   /// \param[in] count
413   ///     The number of instructions to print.
414   ///
415   /// \return
416   ///     The instruction id of the last traversed instruction, or \b
417   ///     std::nullopt if no instructions were visited.
418   std::optional<lldb::user_id_t> DumpInstructions(size_t count);
419 
420   /// Dump all function calls forwards chronologically and hierarchically
421   void DumpFunctionCalls();
422 
423 private:
424   /// Create a trace item for the current position without symbol information.
425   TraceItem CreatRawTraceItem();
426 
427   lldb::TraceCursorSP m_cursor_sp;
428   TraceDumperOptions m_options;
429   std::unique_ptr<OutputWriter> m_writer_up;
430 };
431 
432 } // namespace lldb_private
433 
434 #endif // LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H
435