1 //===-- TraceDumper.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Symbol/SymbolContext.h" 10 #include "lldb/Target/TraceCursor.h" 11 #include <optional> 12 #include <stack> 13 14 #ifndef LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H 15 #define LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H 16 17 namespace lldb_private { 18 19 /// Class that holds the configuration used by \a TraceDumper for 20 /// traversing and dumping instructions. 21 struct TraceDumperOptions { 22 /// If \b true, the cursor will be iterated forwards starting from the 23 /// oldest instruction. Otherwise, the iteration starts from the most 24 /// recent instruction. 25 bool forwards = false; 26 /// Dump only instruction addresses without disassembly nor symbol 27 /// information. 28 bool raw = false; 29 /// Dump in json format. 30 bool json = false; 31 /// When dumping in JSON format, pretty print the output. 32 bool pretty_print_json = false; 33 /// For each trace item, print the corresponding timestamp in nanoseconds if 34 /// available. 35 bool show_timestamps = false; 36 /// Dump the events that happened between instructions. 37 bool show_events = false; 38 /// Dump events and none of the instructions. 39 bool only_events = false; 40 /// For each instruction, print the instruction kind. 41 bool show_control_flow_kind = false; 42 /// Optional custom id to start traversing from. 43 std::optional<uint64_t> id; 44 /// Optional number of instructions to skip from the starting position 45 /// of the cursor. 46 std::optional<size_t> skip; 47 }; 48 49 /// Class used to dump the instructions of a \a TraceCursor using its current 50 /// state and granularity. 51 class TraceDumper { 52 public: 53 /// Helper struct that holds symbol, disassembly and address information of an 54 /// instruction. 55 struct SymbolInfo { 56 SymbolContext sc; 57 Address address; 58 lldb::DisassemblerSP disassembler; 59 lldb::InstructionSP instruction; 60 lldb_private::ExecutionContext exe_ctx; 61 }; 62 63 /// Helper struct that holds all the information we know about a trace item 64 struct TraceItem { 65 lldb::user_id_t id; 66 lldb::addr_t load_address; 67 std::optional<double> timestamp; 68 std::optional<uint64_t> hw_clock; 69 std::optional<std::string> sync_point_metadata; 70 std::optional<llvm::StringRef> error; 71 std::optional<lldb::TraceEvent> event; 72 std::optional<SymbolInfo> symbol_info; 73 std::optional<SymbolInfo> prev_symbol_info; 74 std::optional<lldb::cpu_id_t> cpu_id; 75 }; 76 77 /// An object representing a traced function call. 78 /// 79 /// A function call is represented using segments and subcalls. 80 /// 81 /// TracedSegment: 82 /// A traced segment is a maximal list of consecutive traced instructions 83 /// that belong to the same function call. A traced segment will end in 84 /// three possible ways: 85 /// - With a call to a function deeper in the callstack. In this case, 86 /// most of the times this nested call will return 87 /// and resume with the next segment of this segment's owning function 88 /// call. More on this later. 89 /// - Abruptly due to end of trace. In this case, we weren't able to trace 90 /// the end of this function call. 91 /// - Simply a return higher in the callstack. 92 /// 93 /// In terms of implementation details, as segment can be represented with 94 /// the beginning and ending instruction IDs from the instruction trace. 95 /// 96 /// UntracedPrefixSegment: 97 /// It might happen that we didn't trace the beginning of a function and we 98 /// saw it for the first time as part of a return. As a way to signal these 99 /// cases, we have a placeholder UntracedPrefixSegment class that completes the 100 /// callgraph. 101 /// 102 /// Example: 103 /// We might have this piece of execution: 104 /// 105 /// main() [offset 0x00 to 0x20] [traced instruction ids 1 to 4] 106 /// foo() [offset 0x00 to 0x80] [traced instruction ids 5 to 20] # main 107 /// invoked foo 108 /// main() [offset 0x24 to 0x40] [traced instruction ids 21 to 30] 109 /// 110 /// In this case, our function main invokes foo. We have 3 segments: main 111 /// [offset 0x00 to 0x20], foo() [offset 0x00 to 0x80], and main() [offset 112 /// 0x24 to 0x40]. We also have the instruction ids from the corresponding 113 /// linear instruction trace for each segment. 114 /// 115 /// But what if we started tracing since the middle of foo? Then we'd have 116 /// an incomplete trace 117 /// 118 /// foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10] 119 /// main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20] 120 /// 121 /// Notice that we changed the instruction ids because this is a new trace. 122 /// Here, in order to have a somewhat complete tree with good traversal 123 /// capabilities, we can create an UntracedPrefixSegment to signal the portion of 124 /// main() that we didn't trace. We don't know if this segment was in fact 125 /// multiple segments with many function calls. We'll never know. The 126 /// resulting tree looks like the following: 127 /// 128 /// main() [untraced] 129 /// foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10] 130 /// main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20] 131 /// 132 /// And in pseudo-code: 133 /// 134 /// FunctionCall [ 135 /// UntracedPrefixSegment { 136 /// symbol: main() 137 /// nestedCall: FunctionCall [ # this untraced segment has a nested 138 /// call 139 /// TracedSegment { 140 /// symbol: foo() 141 /// fromInstructionId: 1 142 /// toInstructionId: 10 143 /// nestedCall: none # this doesn't have a nested call 144 /// } 145 /// } 146 /// ], 147 /// TracedSegment { 148 /// symbol: main() 149 /// fromInstructionId: 11 150 /// toInstructionId: 20 151 /// nestedCall: none # this also doesn't have a nested call 152 /// } 153 /// ] 154 /// 155 /// We can see the nested structure and how instructions are represented as 156 /// segments. 157 /// 158 /// 159 /// Returns: 160 /// Code doesn't always behave intuitively. Some interesting functions 161 /// might modify the stack and thus change the behavior of common 162 /// instructions like CALL and RET. We try to identify these cases, and 163 /// the result is that the return edge from a segment might connect with a 164 /// function call very high the stack. For example, you might have 165 /// 166 /// main() 167 /// foo() 168 /// bar() 169 /// # here bar modifies the stack and pops foo() from it. Then it 170 /// finished the a RET (return) 171 /// main() # we came back directly to main() 172 /// 173 /// I have observed some trampolines doing this, as well as some std 174 /// functions (like ostream functions). So consumers should be aware of 175 /// this. 176 /// 177 /// There are all sorts of "abnormal" behaviors you can see in code, and 178 /// whenever we fail at identifying what's going on, we prefer to create a 179 /// new tree. 180 /// 181 /// Function call forest: 182 /// A single tree would suffice if a trace didn't contain errors nor 183 /// abnormal behaviors that made our algorithms fail. Sadly these 184 /// anomalies exist and we prefer not to use too many heuristics and 185 /// probably end up lying to the user. So we create a new tree from the 186 /// point we can't continue using the previous tree. This results in 187 /// having a forest instead of a single tree. This is probably the best we 188 /// can do if we consumers want to use this data to perform performance 189 /// analysis or reverse debugging. 190 /// 191 /// Non-functions: 192 /// Not everything in a program is a function. There are blocks of 193 /// instructions that are simply labeled or even regions without symbol 194 /// information that we don't what they are. We treat all of them as 195 /// functions for simplicity. 196 /// 197 /// Errors: 198 /// Whenever an error is found, a new tree with a single segment is 199 /// created. All consecutive errors after the original one are then 200 /// appended to this segment. As a note, something that GDB does is to use 201 /// some heuristics to merge trees that were interrupted by errors. We are 202 /// leaving that out of scope until a feature like that one is really 203 /// needed. 204 205 /// Forward declaration 206 class FunctionCall; 207 using FunctionCallUP = std::unique_ptr<FunctionCall>; 208 209 class FunctionCall { 210 public: 211 class TracedSegment { 212 public: 213 /// \param[in] cursor_sp 214 /// A cursor pointing to the beginning of the segment. 215 /// 216 /// \param[in] symbol_info 217 /// The symbol information of the first instruction of the segment. 218 /// 219 /// \param[in] call 220 /// The FunctionCall object that owns this segment. TracedSegment(const lldb::TraceCursorSP & cursor_sp,const SymbolInfo & symbol_info,FunctionCall & owning_call)221 TracedSegment(const lldb::TraceCursorSP &cursor_sp, 222 const SymbolInfo &symbol_info, FunctionCall &owning_call) 223 : m_first_insn_id(cursor_sp->GetId()), 224 m_last_insn_id(cursor_sp->GetId()), 225 m_first_symbol_info(symbol_info), m_last_symbol_info(symbol_info), 226 m_owning_call(owning_call) {} 227 228 /// \return 229 /// The chronologically first instruction ID in this segment. 230 lldb::user_id_t GetFirstInstructionID() const; 231 /// \return 232 /// The chronologically last instruction ID in this segment. 233 lldb::user_id_t GetLastInstructionID() const; 234 235 /// \return 236 /// The symbol information of the chronologically first instruction ID 237 /// in this segment. 238 const SymbolInfo &GetFirstInstructionSymbolInfo() const; 239 240 /// \return 241 /// The symbol information of the chronologically last instruction ID in 242 /// this segment. 243 const SymbolInfo &GetLastInstructionSymbolInfo() const; 244 245 /// \return 246 /// Get the call that owns this segment. 247 const FunctionCall &GetOwningCall() const; 248 249 /// Append a new instruction to this segment. 250 /// 251 /// \param[in] cursor_sp 252 /// A cursor pointing to the new instruction. 253 /// 254 /// \param[in] symbol_info 255 /// The symbol information of the new instruction. 256 void AppendInsn(const lldb::TraceCursorSP &cursor_sp, 257 const SymbolInfo &symbol_info); 258 259 /// Create a nested call at the end of this segment. 260 /// 261 /// \param[in] cursor_sp 262 /// A cursor pointing to the first instruction of the nested call. 263 /// 264 /// \param[in] symbol_info 265 /// The symbol information of the first instruction of the nested call. 266 FunctionCall &CreateNestedCall(const lldb::TraceCursorSP &cursor_sp, 267 const SymbolInfo &symbol_info); 268 269 /// Executed the given callback if there's a nested call at the end of 270 /// this segment. 271 void IfNestedCall(std::function<void(const FunctionCall &function_call)> 272 callback) const; 273 274 private: 275 TracedSegment(const TracedSegment &) = delete; 276 TracedSegment &operator=(TracedSegment const &); 277 278 /// Delimiting instruction IDs taken chronologically. 279 /// \{ 280 lldb::user_id_t m_first_insn_id; 281 lldb::user_id_t m_last_insn_id; 282 /// \} 283 /// An optional nested call starting at the end of this segment. 284 FunctionCallUP m_nested_call; 285 /// The symbol information of the delimiting instructions 286 /// \{ 287 SymbolInfo m_first_symbol_info; 288 SymbolInfo m_last_symbol_info; 289 /// \} 290 FunctionCall &m_owning_call; 291 }; 292 293 class UntracedPrefixSegment { 294 public: 295 /// Note: Untraced segments can only exist if have also seen a traced 296 /// segment of the same function call. Thus, we can use those traced 297 /// segments if we want symbol information and such. 298 UntracedPrefixSegment(FunctionCallUP && nested_call)299 UntracedPrefixSegment(FunctionCallUP &&nested_call) 300 : m_nested_call(std::move(nested_call)) {} 301 302 const FunctionCall &GetNestedCall() const; 303 304 private: 305 UntracedPrefixSegment(const UntracedPrefixSegment &) = delete; 306 UntracedPrefixSegment &operator=(UntracedPrefixSegment const &); 307 FunctionCallUP m_nested_call; 308 }; 309 310 /// Create a new function call given an instruction. This will also create a 311 /// segment for that instruction. 312 /// 313 /// \param[in] cursor_sp 314 /// A cursor pointing to the first instruction of that function call. 315 /// 316 /// \param[in] symbol_info 317 /// The symbol information of that first instruction. 318 FunctionCall(const lldb::TraceCursorSP &cursor_sp, 319 const SymbolInfo &symbol_info); 320 321 /// Append a new traced segment to this function call. 322 /// 323 /// \param[in] cursor_sp 324 /// A cursor pointing to the first instruction of the new segment. 325 /// 326 /// \param[in] symbol_info 327 /// The symbol information of that first instruction. 328 void AppendSegment(const lldb::TraceCursorSP &cursor_sp, 329 const SymbolInfo &symbol_info); 330 331 /// \return 332 /// The symbol info of some traced instruction of this call. 333 const SymbolInfo &GetSymbolInfo() const; 334 335 /// \return 336 /// \b true if and only if the instructions in this function call are 337 /// trace errors, in which case this function call is a fake one. 338 bool IsError() const; 339 340 /// \return 341 /// The list of traced segments of this call. 342 const std::deque<TracedSegment> &GetTracedSegments() const; 343 344 /// \return 345 /// A non-const reference to the most-recent traced segment. 346 TracedSegment &GetLastTracedSegment(); 347 348 /// Create an untraced segment for this call that jumps to the provided 349 /// nested call. 350 void SetUntracedPrefixSegment(FunctionCallUP &&nested_call); 351 352 /// \return 353 /// A optional to the untraced prefix segment of this call. 354 const std::optional<UntracedPrefixSegment> & 355 GetUntracedPrefixSegment() const; 356 357 /// \return 358 /// A pointer to the parent call. It may be \b nullptr. 359 FunctionCall *GetParentCall() const; 360 361 void SetParentCall(FunctionCall &parent_call); 362 363 private: 364 /// An optional untraced segment that precedes all the traced segments. 365 std::optional<UntracedPrefixSegment> m_untraced_prefix_segment; 366 /// The traced segments in order. We used a deque to prevent moving these 367 /// objects when appending to the list, which would happen with vector. 368 std::deque<TracedSegment> m_traced_segments; 369 /// The parent call, which might be null. Useful for reconstructing 370 /// callstacks. 371 FunctionCall *m_parent_call = nullptr; 372 /// Whether this call represents a list of consecutive errors. 373 bool m_is_error; 374 }; 375 376 /// Interface used to abstract away the format in which the instruction 377 /// information will be dumped. 378 class OutputWriter { 379 public: 380 virtual ~OutputWriter() = default; 381 382 /// Notify this writer that the cursor ran out of data. NoMoreData()383 virtual void NoMoreData() {} 384 385 /// Dump a trace item (instruction, error or event). 386 virtual void TraceItem(const TraceItem &item) = 0; 387 388 /// Dump a function call forest. 389 virtual void 390 FunctionCallForest(const std::vector<FunctionCallUP> &forest) = 0; 391 }; 392 393 /// Create a instruction dumper for the cursor. 394 /// 395 /// \param[in] cursor 396 /// The cursor whose instructions will be dumped. 397 /// 398 /// \param[in] s 399 /// The stream where to dump the instructions to. 400 /// 401 /// \param[in] options 402 /// Additional options for configuring the dumping. 403 TraceDumper(lldb::TraceCursorSP cursor_sp, Stream &s, 404 const TraceDumperOptions &options); 405 406 /// Dump \a count instructions of the thread trace starting at the current 407 /// cursor position. 408 /// 409 /// This effectively moves the cursor to the next unvisited position, so that 410 /// a subsequent call to this method continues where it left off. 411 /// 412 /// \param[in] count 413 /// The number of instructions to print. 414 /// 415 /// \return 416 /// The instruction id of the last traversed instruction, or \b 417 /// std::nullopt if no instructions were visited. 418 std::optional<lldb::user_id_t> DumpInstructions(size_t count); 419 420 /// Dump all function calls forwards chronologically and hierarchically 421 void DumpFunctionCalls(); 422 423 private: 424 /// Create a trace item for the current position without symbol information. 425 TraceItem CreatRawTraceItem(); 426 427 lldb::TraceCursorSP m_cursor_sp; 428 TraceDumperOptions m_options; 429 std::unique_ptr<OutputWriter> m_writer_up; 430 }; 431 432 } // namespace lldb_private 433 434 #endif // LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H 435