1 //===-- Disassembler.h ------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLDB_CORE_DISASSEMBLER_H 10 #define LLDB_CORE_DISASSEMBLER_H 11 12 #include "lldb/Core/Address.h" 13 #include "lldb/Core/EmulateInstruction.h" 14 #include "lldb/Core/FormatEntity.h" 15 #include "lldb/Core/Opcode.h" 16 #include "lldb/Core/PluginInterface.h" 17 #include "lldb/Interpreter/OptionValue.h" 18 #include "lldb/Symbol/LineEntry.h" 19 #include "lldb/Target/ExecutionContext.h" 20 #include "lldb/Utility/ArchSpec.h" 21 #include "lldb/Utility/ConstString.h" 22 #include "lldb/Utility/FileSpec.h" 23 #include "lldb/lldb-defines.h" 24 #include "lldb/lldb-forward.h" 25 #include "lldb/lldb-private-enumerations.h" 26 #include "lldb/lldb-types.h" 27 28 #include "llvm/ADT/StringRef.h" 29 30 #include <functional> 31 #include <map> 32 #include <memory> 33 #include <set> 34 #include <string> 35 #include <vector> 36 37 #include <cstddef> 38 #include <cstdint> 39 #include <cstdio> 40 41 namespace llvm { 42 template <typename T> class SmallVectorImpl; 43 } 44 45 namespace lldb_private { 46 class AddressRange; 47 class DataExtractor; 48 class Debugger; 49 class Disassembler; 50 class Module; 51 class StackFrame; 52 class Stream; 53 class SymbolContext; 54 class SymbolContextList; 55 class Target; 56 struct RegisterInfo; 57 58 class Instruction { 59 public: 60 Instruction(const Address &address, 61 AddressClass addr_class = AddressClass::eInvalid); 62 63 virtual ~Instruction(); 64 GetAddress()65 const Address &GetAddress() const { return m_address; } 66 67 const char *GetMnemonic(const ExecutionContext *exe_ctx, 68 bool markup = false) { 69 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 70 return markup ? m_markup_opcode_name.c_str() : m_opcode_name.c_str(); 71 } 72 73 const char *GetOperands(const ExecutionContext *exe_ctx, 74 bool markup = false) { 75 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 76 return markup ? m_markup_mnemonics.c_str() : m_mnemonics.c_str(); 77 } 78 GetComment(const ExecutionContext * exe_ctx)79 const char *GetComment(const ExecutionContext *exe_ctx) { 80 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 81 return m_comment.c_str(); 82 } 83 84 /// \return 85 /// The control flow kind of this instruction, or 86 /// eInstructionControlFlowKindUnknown if the instruction 87 /// can't be classified. 88 virtual lldb::InstructionControlFlowKind GetControlFlowKind(const ExecutionContext * exe_ctx)89 GetControlFlowKind(const ExecutionContext *exe_ctx) { 90 return lldb::eInstructionControlFlowKindUnknown; 91 } 92 93 virtual void 94 CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0; 95 96 AddressClass GetAddressClass(); 97 SetAddress(const Address & addr)98 void SetAddress(const Address &addr) { 99 // Invalidate the address class to lazily discover it if we need to. 100 m_address_class = AddressClass::eInvalid; 101 m_address = addr; 102 } 103 104 /// Dump the text representation of this Instruction to a Stream 105 /// 106 /// Print the (optional) address, (optional) bytes, opcode, 107 /// operands, and instruction comments to a stream. 108 /// 109 /// \param[in] s 110 /// The Stream to add the text to. 111 /// 112 /// \param[in] show_address 113 /// Whether the address (using disassembly_addr_format_spec formatting) 114 /// should be printed. 115 /// 116 /// \param[in] show_bytes 117 /// Whether the bytes of the assembly instruction should be printed. 118 /// 119 /// \param[in] show_control_flow_kind 120 /// Whether the control flow kind of the instruction should be printed. 121 /// 122 /// \param[in] max_opcode_byte_size 123 /// The size (in bytes) of the largest instruction in the list that 124 /// we are printing (for text justification/alignment purposes) 125 /// Only needed if show_bytes is true. 126 /// 127 /// \param[in] exe_ctx 128 /// The current execution context, if available. May be used in 129 /// the assembling of the operands+comments for this instruction. 130 /// Pass NULL if not applicable. 131 /// 132 /// \param[in] sym_ctx 133 /// The SymbolContext for this instruction. 134 /// Pass NULL if not available/computed. 135 /// Only needed if show_address is true. 136 /// 137 /// \param[in] prev_sym_ctx 138 /// The SymbolContext for the previous instruction. Depending on 139 /// the disassembly address format specification, a change in 140 /// Symbol / Function may mean that a line is printed with the new 141 /// symbol/function name. 142 /// Pass NULL if unavailable, or if this is the first instruction of 143 /// the InstructionList. 144 /// Only needed if show_address is true. 145 /// 146 /// \param[in] disassembly_addr_format 147 /// The format specification for how addresses are printed. 148 /// Only needed if show_address is true. 149 /// 150 /// \param[in] max_address_text_size 151 /// The length of the longest address string at the start of the 152 /// disassembly line that will be printed (the 153 /// Debugger::FormatDisassemblerAddress() string) 154 /// so this method can properly align the instruction opcodes. 155 /// May be 0 to indicate no indentation/alignment of the opcodes. 156 virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address, 157 bool show_bytes, bool show_control_flow_kind, 158 const ExecutionContext *exe_ctx, 159 const SymbolContext *sym_ctx, 160 const SymbolContext *prev_sym_ctx, 161 const FormatEntity::Entry *disassembly_addr_format, 162 size_t max_address_text_size); 163 164 virtual bool DoesBranch() = 0; 165 166 virtual bool HasDelaySlot(); 167 168 virtual bool IsLoad() = 0; 169 170 virtual bool IsAuthenticated() = 0; 171 172 bool CanSetBreakpoint (); 173 174 virtual size_t Decode(const Disassembler &disassembler, 175 const DataExtractor &data, 176 lldb::offset_t data_offset) = 0; 177 SetDescription(llvm::StringRef)178 virtual void SetDescription(llvm::StringRef) { 179 } // May be overridden in sub-classes that have descriptions. 180 181 lldb::OptionValueSP ReadArray(FILE *in_file, Stream &out_stream, 182 OptionValue::Type data_type); 183 184 lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream &out_stream); 185 186 bool DumpEmulation(const ArchSpec &arch); 187 188 virtual bool TestEmulation(Stream &stream, const char *test_file_name); 189 190 bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton, 191 EmulateInstruction::ReadMemoryCallback read_mem_callback, 192 EmulateInstruction::WriteMemoryCallback write_mem_calback, 193 EmulateInstruction::ReadRegisterCallback read_reg_callback, 194 EmulateInstruction::WriteRegisterCallback write_reg_callback); 195 GetOpcode()196 const Opcode &GetOpcode() const { return m_opcode; } 197 198 uint32_t GetData(DataExtractor &data); 199 200 struct Operand { 201 enum class Type { 202 Invalid = 0, 203 Register, 204 Immediate, 205 Dereference, 206 Sum, 207 Product 208 } m_type = Type::Invalid; 209 std::vector<Operand> m_children; 210 lldb::addr_t m_immediate = 0; 211 ConstString m_register; 212 bool m_negative = false; 213 bool m_clobbered = false; 214 IsValidOperand215 bool IsValid() { return m_type != Type::Invalid; } 216 217 static Operand BuildRegister(ConstString &r); 218 static Operand BuildImmediate(lldb::addr_t imm, bool neg); 219 static Operand BuildImmediate(int64_t imm); 220 static Operand BuildDereference(const Operand &ref); 221 static Operand BuildSum(const Operand &lhs, const Operand &rhs); 222 static Operand BuildProduct(const Operand &lhs, const Operand &rhs); 223 }; 224 ParseOperands(llvm::SmallVectorImpl<Operand> & operands)225 virtual bool ParseOperands(llvm::SmallVectorImpl<Operand> &operands) { 226 return false; 227 } 228 IsCall()229 virtual bool IsCall() { return false; } 230 231 static const char *GetNameForInstructionControlFlowKind( 232 lldb::InstructionControlFlowKind instruction_control_flow_kind); 233 234 protected: 235 Address m_address; // The section offset address of this instruction 236 // We include an address class in the Instruction class to 237 // allow the instruction specify the 238 // AddressClass::eCodeAlternateISA (currently used for 239 // thumb), and also to specify data (AddressClass::eData). 240 // The usual value will be AddressClass::eCode, but often 241 // when disassembling memory, you might run into data. 242 // This can help us to disassemble appropriately. 243 private: 244 AddressClass m_address_class; // Use GetAddressClass () accessor function! 245 246 protected: 247 Opcode m_opcode; // The opcode for this instruction 248 std::string m_opcode_name; 249 std::string m_markup_opcode_name; 250 std::string m_mnemonics; 251 std::string m_markup_mnemonics; 252 std::string m_comment; 253 bool m_calculated_strings; 254 255 void CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext * exe_ctx)256 CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) { 257 if (!m_calculated_strings) { 258 m_calculated_strings = true; 259 CalculateMnemonicOperandsAndComment(exe_ctx); 260 } 261 } 262 }; 263 264 namespace OperandMatchers { 265 std::function<bool(const Instruction::Operand &)> 266 MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base, 267 std::function<bool(const Instruction::Operand &)> left, 268 std::function<bool(const Instruction::Operand &)> right); 269 270 std::function<bool(const Instruction::Operand &)> 271 MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base, 272 std::function<bool(const Instruction::Operand &)> child); 273 274 std::function<bool(const Instruction::Operand &)> 275 MatchRegOp(const RegisterInfo &info); 276 277 std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString ®); 278 279 std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm); 280 281 std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm); 282 283 std::function<bool(const Instruction::Operand &)> 284 MatchOpType(Instruction::Operand::Type type); 285 } 286 287 class InstructionList { 288 public: 289 InstructionList(); 290 ~InstructionList(); 291 292 size_t GetSize() const; 293 294 uint32_t GetMaxOpcocdeByteSize() const; 295 296 lldb::InstructionSP GetInstructionAtIndex(size_t idx) const; 297 298 /// Get the instruction at the given address. 299 /// 300 /// \return 301 /// A valid \a InstructionSP if the address could be found, or null 302 /// otherwise. 303 lldb::InstructionSP GetInstructionAtAddress(const Address &addr); 304 305 //------------------------------------------------------------------ 306 /// Get the index of the next branch instruction. 307 /// 308 /// Given a list of instructions, find the next branch instruction 309 /// in the list by returning an index. 310 /// 311 /// @param[in] start 312 /// The instruction index of the first instruction to check. 313 /// 314 /// @param[in] ignore_calls 315 /// It true, then fine the first branch instruction that isn't 316 /// a function call (a branch that calls and returns to the next 317 /// instruction). If false, find the instruction index of any 318 /// branch in the list. 319 /// 320 /// @param[out] found_calls 321 /// If non-null, this will be set to true if any calls were found in 322 /// extending the range. 323 /// 324 /// @return 325 /// The instruction index of the first branch that is at or past 326 /// \a start. Returns UINT32_MAX if no matching branches are 327 /// found. 328 //------------------------------------------------------------------ 329 uint32_t GetIndexOfNextBranchInstruction(uint32_t start, 330 bool ignore_calls, 331 bool *found_calls) const; 332 333 uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr, 334 Target &target); 335 336 uint32_t GetIndexOfInstructionAtAddress(const Address &addr); 337 338 void Clear(); 339 340 void Append(lldb::InstructionSP &inst_sp); 341 342 void Dump(Stream *s, bool show_address, bool show_bytes, 343 bool show_control_flow_kind, const ExecutionContext *exe_ctx); 344 345 private: 346 typedef std::vector<lldb::InstructionSP> collection; 347 typedef collection::iterator iterator; 348 typedef collection::const_iterator const_iterator; 349 350 collection m_instructions; 351 }; 352 353 class PseudoInstruction : public Instruction { 354 public: 355 PseudoInstruction(); 356 357 ~PseudoInstruction() override; 358 359 bool DoesBranch() override; 360 361 bool HasDelaySlot() override; 362 363 bool IsLoad() override; 364 365 bool IsAuthenticated() override; 366 CalculateMnemonicOperandsAndComment(const ExecutionContext * exe_ctx)367 void CalculateMnemonicOperandsAndComment( 368 const ExecutionContext *exe_ctx) override { 369 // TODO: fill this in and put opcode name into Instruction::m_opcode_name, 370 // mnemonic into Instruction::m_mnemonics, and any comment into 371 // Instruction::m_comment 372 } 373 374 size_t Decode(const Disassembler &disassembler, const DataExtractor &data, 375 lldb::offset_t data_offset) override; 376 377 void SetOpcode(size_t opcode_size, void *opcode_data); 378 379 void SetDescription(llvm::StringRef description) override; 380 381 protected: 382 std::string m_description; 383 384 PseudoInstruction(const PseudoInstruction &) = delete; 385 const PseudoInstruction &operator=(const PseudoInstruction &) = delete; 386 }; 387 388 class Disassembler : public std::enable_shared_from_this<Disassembler>, 389 public PluginInterface { 390 public: 391 enum { 392 eOptionNone = 0u, 393 eOptionShowBytes = (1u << 0), 394 eOptionRawOuput = (1u << 1), 395 eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains 396 // the current PC (mixed mode only) 397 eOptionMarkPCAddress = 398 (1u << 3), // Mark the disassembly line the contains the PC 399 eOptionShowControlFlowKind = (1u << 4), 400 }; 401 402 enum HexImmediateStyle { 403 eHexStyleC, 404 eHexStyleAsm, 405 }; 406 407 // FindPlugin should be lax about the flavor string (it is too annoying to 408 // have various internal uses of the disassembler fail because the global 409 // flavor string gets set wrong. Instead, if you get a flavor string you 410 // don't understand, use the default. Folks who care to check can use the 411 // FlavorValidForArchSpec method on the disassembler they got back. 412 static lldb::DisassemblerSP 413 FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name); 414 415 // This version will use the value in the Target settings if flavor is NULL; 416 static lldb::DisassemblerSP FindPluginForTarget(const Target &target, 417 const ArchSpec &arch, 418 const char *flavor, 419 const char *plugin_name); 420 421 struct Limit { 422 enum { Bytes, Instructions } kind; 423 lldb::addr_t value; 424 }; 425 426 static lldb::DisassemblerSP DisassembleRange(const ArchSpec &arch, 427 const char *plugin_name, 428 const char *flavor, 429 Target &target, 430 const AddressRange &disasm_range, 431 bool force_live_memory = false); 432 433 static lldb::DisassemblerSP 434 DisassembleBytes(const ArchSpec &arch, const char *plugin_name, 435 const char *flavor, const Address &start, const void *bytes, 436 size_t length, uint32_t max_num_instructions, 437 bool data_from_file); 438 439 static bool Disassemble(Debugger &debugger, const ArchSpec &arch, 440 const char *plugin_name, const char *flavor, 441 const ExecutionContext &exe_ctx, const Address &start, 442 Limit limit, bool mixed_source_and_assembly, 443 uint32_t num_mixed_context_lines, uint32_t options, 444 Stream &strm); 445 446 static bool Disassemble(Debugger &debugger, const ArchSpec &arch, 447 StackFrame &frame, Stream &strm); 448 449 // Constructors and Destructors 450 Disassembler(const ArchSpec &arch, const char *flavor); 451 ~Disassembler() override; 452 453 void PrintInstructions(Debugger &debugger, const ArchSpec &arch, 454 const ExecutionContext &exe_ctx, 455 bool mixed_source_and_assembly, 456 uint32_t num_mixed_context_lines, uint32_t options, 457 Stream &strm); 458 459 size_t ParseInstructions(Target &target, Address address, Limit limit, 460 Stream *error_strm_ptr, 461 bool force_live_memory = false); 462 463 virtual size_t DecodeInstructions(const Address &base_addr, 464 const DataExtractor &data, 465 lldb::offset_t data_offset, 466 size_t num_instructions, bool append, 467 bool data_from_file) = 0; 468 469 InstructionList &GetInstructionList(); 470 471 const InstructionList &GetInstructionList() const; 472 GetArchitecture()473 const ArchSpec &GetArchitecture() const { return m_arch; } 474 GetFlavor()475 const char *GetFlavor() const { return m_flavor.c_str(); } 476 477 virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch, 478 const char *flavor) = 0; 479 480 protected: 481 // SourceLine and SourceLinesToDisplay structures are only used in the mixed 482 // source and assembly display methods internal to this class. 483 484 struct SourceLine { 485 FileSpec file; 486 uint32_t line = LLDB_INVALID_LINE_NUMBER; 487 uint32_t column = 0; 488 489 SourceLine() = default; 490 491 bool operator==(const SourceLine &rhs) const { 492 return file == rhs.file && line == rhs.line && rhs.column == column; 493 } 494 495 bool operator!=(const SourceLine &rhs) const { 496 return file != rhs.file || line != rhs.line || column != rhs.column; 497 } 498 IsValidSourceLine499 bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; } 500 }; 501 502 struct SourceLinesToDisplay { 503 std::vector<SourceLine> lines; 504 505 // index of the "current" source line, if we want to highlight that when 506 // displaying the source lines. (as opposed to the surrounding source 507 // lines provided to give context) 508 size_t current_source_line = -1; 509 510 // Whether to print a blank line at the end of the source lines. 511 bool print_source_context_end_eol = true; 512 513 SourceLinesToDisplay() = default; 514 }; 515 516 // Get the function's declaration line number, hopefully a line number 517 // earlier than the opening curly brace at the start of the function body. 518 static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc); 519 520 // Add the provided SourceLine to the map of filenames-to-source-lines-seen. 521 static void AddLineToSourceLineTables( 522 SourceLine &line, 523 std::map<FileSpec, std::set<uint32_t>> &source_lines_seen); 524 525 // Given a source line, determine if we should print it when we're doing 526 // mixed source & assembly output. We're currently using the 527 // target.process.thread.step-avoid-regexp setting (which is used for 528 // stepping over inlined STL functions by default) to determine what source 529 // lines to avoid showing. 530 // 531 // Returns true if this source line should be elided (if the source line 532 // should not be displayed). 533 static bool 534 ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, 535 const SymbolContext &sc, SourceLine &line); 536 537 static bool ElideMixedSourceAndDisassemblyLine(const ExecutionContext & exe_ctx,const SymbolContext & sc,LineEntry & line)538 ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, 539 const SymbolContext &sc, LineEntry &line) { 540 SourceLine sl; 541 sl.file = line.GetFile(); 542 sl.line = line.line; 543 sl.column = line.column; 544 return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, sl); 545 }; 546 547 // Classes that inherit from Disassembler can see and modify these 548 ArchSpec m_arch; 549 InstructionList m_instruction_list; 550 std::string m_flavor; 551 552 private: 553 // For Disassembler only 554 Disassembler(const Disassembler &) = delete; 555 const Disassembler &operator=(const Disassembler &) = delete; 556 }; 557 558 } // namespace lldb_private 559 560 #endif // LLDB_CORE_DISASSEMBLER_H 561