1 //===-- DisassemblerLLVMC.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "DisassemblerLLVMC.h" 10 11 #include "llvm-c/Disassembler.h" 12 #include "llvm/ADT/SmallString.h" 13 #include "llvm/ADT/StringExtras.h" 14 #include "llvm/MC/MCAsmInfo.h" 15 #include "llvm/MC/MCContext.h" 16 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 17 #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" 18 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" 19 #include "llvm/MC/MCInst.h" 20 #include "llvm/MC/MCInstPrinter.h" 21 #include "llvm/MC/MCInstrInfo.h" 22 #include "llvm/MC/MCRegisterInfo.h" 23 #include "llvm/MC/MCSubtargetInfo.h" 24 #include "llvm/MC/MCTargetOptions.h" 25 #include "llvm/MC/TargetRegistry.h" 26 #include "llvm/Support/ErrorHandling.h" 27 #include "llvm/Support/ScopedPrinter.h" 28 #include "llvm/Support/TargetSelect.h" 29 #include "llvm/TargetParser/AArch64TargetParser.h" 30 31 #include "lldb/Core/Address.h" 32 #include "lldb/Core/Module.h" 33 #include "lldb/Symbol/SymbolContext.h" 34 #include "lldb/Target/ExecutionContext.h" 35 #include "lldb/Target/Process.h" 36 #include "lldb/Target/RegisterContext.h" 37 #include "lldb/Target/SectionLoadList.h" 38 #include "lldb/Target/StackFrame.h" 39 #include "lldb/Target/Target.h" 40 #include "lldb/Utility/DataExtractor.h" 41 #include "lldb/Utility/LLDBLog.h" 42 #include "lldb/Utility/Log.h" 43 #include "lldb/Utility/RegularExpression.h" 44 #include "lldb/Utility/Stream.h" 45 #include <optional> 46 47 using namespace lldb; 48 using namespace lldb_private; 49 50 LLDB_PLUGIN_DEFINE(DisassemblerLLVMC) 51 52 class DisassemblerLLVMC::MCDisasmInstance { 53 public: 54 static std::unique_ptr<MCDisasmInstance> 55 Create(const char *triple, const char *cpu, const char *features_str, 56 unsigned flavor, DisassemblerLLVMC &owner); 57 58 ~MCDisasmInstance() = default; 59 60 uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len, 61 lldb::addr_t pc, llvm::MCInst &mc_inst) const; 62 void PrintMCInst(llvm::MCInst &mc_inst, std::string &inst_string, 63 std::string &comments_string); 64 void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style); 65 bool CanBranch(llvm::MCInst &mc_inst) const; 66 bool HasDelaySlot(llvm::MCInst &mc_inst) const; 67 bool IsCall(llvm::MCInst &mc_inst) const; 68 bool IsLoad(llvm::MCInst &mc_inst) const; 69 bool IsAuthenticated(llvm::MCInst &mc_inst) const; 70 71 private: 72 MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, 73 std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, 74 std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, 75 std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, 76 std::unique_ptr<llvm::MCContext> &&context_up, 77 std::unique_ptr<llvm::MCDisassembler> &&disasm_up, 78 std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up); 79 80 std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up; 81 std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up; 82 std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up; 83 std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up; 84 std::unique_ptr<llvm::MCContext> m_context_up; 85 std::unique_ptr<llvm::MCDisassembler> m_disasm_up; 86 std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up; 87 }; 88 89 namespace x86 { 90 91 /// These are the three values deciding instruction control flow kind. 92 /// InstructionLengthDecode function decodes an instruction and get this struct. 93 /// 94 /// primary_opcode 95 /// Primary opcode of the instruction. 96 /// For one-byte opcode instruction, it's the first byte after prefix. 97 /// For two- and three-byte opcodes, it's the second byte. 98 /// 99 /// opcode_len 100 /// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3. 101 /// 102 /// modrm 103 /// ModR/M byte of the instruction. 104 /// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0] 105 /// may contain a register or specify an addressing mode, depending on MOD. 106 struct InstructionOpcodeAndModrm { 107 uint8_t primary_opcode; 108 uint8_t opcode_len; 109 uint8_t modrm; 110 }; 111 112 /// Determine the InstructionControlFlowKind based on opcode and modrm bytes. 113 /// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and 114 /// instruction set. 115 /// 116 /// \param[in] opcode_and_modrm 117 /// Contains primary_opcode byte, its length, and ModR/M byte. 118 /// Refer to the struct InstructionOpcodeAndModrm for details. 119 /// 120 /// \return 121 /// The control flow kind of the instruction or 122 /// eInstructionControlFlowKindOther if the instruction doesn't affect 123 /// the control flow of the program. 124 lldb::InstructionControlFlowKind 125 MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) { 126 uint8_t opcode = opcode_and_modrm.primary_opcode; 127 uint8_t opcode_len = opcode_and_modrm.opcode_len; 128 uint8_t modrm = opcode_and_modrm.modrm; 129 130 if (opcode_len > 2) 131 return lldb::eInstructionControlFlowKindOther; 132 133 if (opcode >= 0x70 && opcode <= 0x7F) { 134 if (opcode_len == 1) 135 return lldb::eInstructionControlFlowKindCondJump; 136 else 137 return lldb::eInstructionControlFlowKindOther; 138 } 139 140 if (opcode >= 0x80 && opcode <= 0x8F) { 141 if (opcode_len == 2) 142 return lldb::eInstructionControlFlowKindCondJump; 143 else 144 return lldb::eInstructionControlFlowKindOther; 145 } 146 147 switch (opcode) { 148 case 0x9A: 149 if (opcode_len == 1) 150 return lldb::eInstructionControlFlowKindFarCall; 151 break; 152 case 0xFF: 153 if (opcode_len == 1) { 154 uint8_t modrm_reg = (modrm >> 3) & 7; 155 if (modrm_reg == 2) 156 return lldb::eInstructionControlFlowKindCall; 157 else if (modrm_reg == 3) 158 return lldb::eInstructionControlFlowKindFarCall; 159 else if (modrm_reg == 4) 160 return lldb::eInstructionControlFlowKindJump; 161 else if (modrm_reg == 5) 162 return lldb::eInstructionControlFlowKindFarJump; 163 } 164 break; 165 case 0xE8: 166 if (opcode_len == 1) 167 return lldb::eInstructionControlFlowKindCall; 168 break; 169 case 0xCD: 170 case 0xCC: 171 case 0xCE: 172 case 0xF1: 173 if (opcode_len == 1) 174 return lldb::eInstructionControlFlowKindFarCall; 175 break; 176 case 0xCF: 177 if (opcode_len == 1) 178 return lldb::eInstructionControlFlowKindFarReturn; 179 break; 180 case 0xE9: 181 case 0xEB: 182 if (opcode_len == 1) 183 return lldb::eInstructionControlFlowKindJump; 184 break; 185 case 0xEA: 186 if (opcode_len == 1) 187 return lldb::eInstructionControlFlowKindFarJump; 188 break; 189 case 0xE3: 190 case 0xE0: 191 case 0xE1: 192 case 0xE2: 193 if (opcode_len == 1) 194 return lldb::eInstructionControlFlowKindCondJump; 195 break; 196 case 0xC3: 197 case 0xC2: 198 if (opcode_len == 1) 199 return lldb::eInstructionControlFlowKindReturn; 200 break; 201 case 0xCB: 202 case 0xCA: 203 if (opcode_len == 1) 204 return lldb::eInstructionControlFlowKindFarReturn; 205 break; 206 case 0x05: 207 case 0x34: 208 if (opcode_len == 2) 209 return lldb::eInstructionControlFlowKindFarCall; 210 break; 211 case 0x35: 212 case 0x07: 213 if (opcode_len == 2) 214 return lldb::eInstructionControlFlowKindFarReturn; 215 break; 216 case 0x01: 217 if (opcode_len == 2) { 218 switch (modrm) { 219 case 0xc1: 220 return lldb::eInstructionControlFlowKindFarCall; 221 case 0xc2: 222 case 0xc3: 223 return lldb::eInstructionControlFlowKindFarReturn; 224 default: 225 break; 226 } 227 } 228 break; 229 default: 230 break; 231 } 232 233 return lldb::eInstructionControlFlowKindOther; 234 } 235 236 /// Decode an instruction into opcode, modrm and opcode_len. 237 /// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout. 238 /// Opcodes in x86 are generally the first byte of instruction, though two-byte 239 /// instructions and prefixes exist. ModR/M is the byte following the opcode 240 /// and adds additional information for how the instruction is executed. 241 /// 242 /// \param[in] inst_bytes 243 /// Raw bytes of the instruction 244 /// 245 /// 246 /// \param[in] bytes_len 247 /// The length of the inst_bytes array. 248 /// 249 /// \param[in] is_exec_mode_64b 250 /// If true, the execution mode is 64 bit. 251 /// 252 /// \return 253 /// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding 254 /// primary_opcode, opcode_len and modrm byte. Refer to the struct definition 255 /// for more details. 256 /// Otherwise if the given instruction is invalid, returns std::nullopt. 257 std::optional<InstructionOpcodeAndModrm> 258 InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len, 259 bool is_exec_mode_64b) { 260 int op_idx = 0; 261 bool prefix_done = false; 262 InstructionOpcodeAndModrm ret = {0, 0, 0}; 263 264 // In most cases, the primary_opcode is the first byte of the instruction 265 // but some instructions have a prefix to be skipped for these calculations. 266 // The following mapping is inspired from libipt's instruction decoding logic 267 // in `src/pt_ild.c` 268 while (!prefix_done) { 269 if (op_idx >= bytes_len) 270 return std::nullopt; 271 272 ret.primary_opcode = inst_bytes[op_idx]; 273 switch (ret.primary_opcode) { 274 // prefix_ignore 275 case 0x26: 276 case 0x2e: 277 case 0x36: 278 case 0x3e: 279 case 0x64: 280 case 0x65: 281 // prefix_osz, prefix_asz 282 case 0x66: 283 case 0x67: 284 // prefix_lock, prefix_f2, prefix_f3 285 case 0xf0: 286 case 0xf2: 287 case 0xf3: 288 op_idx++; 289 break; 290 291 // prefix_rex 292 case 0x40: 293 case 0x41: 294 case 0x42: 295 case 0x43: 296 case 0x44: 297 case 0x45: 298 case 0x46: 299 case 0x47: 300 case 0x48: 301 case 0x49: 302 case 0x4a: 303 case 0x4b: 304 case 0x4c: 305 case 0x4d: 306 case 0x4e: 307 case 0x4f: 308 if (is_exec_mode_64b) 309 op_idx++; 310 else 311 prefix_done = true; 312 break; 313 314 // prefix_vex_c4, c5 315 case 0xc5: 316 if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { 317 prefix_done = true; 318 break; 319 } 320 321 ret.opcode_len = 2; 322 ret.primary_opcode = inst_bytes[op_idx + 2]; 323 ret.modrm = inst_bytes[op_idx + 3]; 324 return ret; 325 326 case 0xc4: 327 if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { 328 prefix_done = true; 329 break; 330 } 331 ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f; 332 ret.primary_opcode = inst_bytes[op_idx + 3]; 333 ret.modrm = inst_bytes[op_idx + 4]; 334 return ret; 335 336 // prefix_evex 337 case 0x62: 338 if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { 339 prefix_done = true; 340 break; 341 } 342 ret.opcode_len = inst_bytes[op_idx + 1] & 0x03; 343 ret.primary_opcode = inst_bytes[op_idx + 4]; 344 ret.modrm = inst_bytes[op_idx + 5]; 345 return ret; 346 347 default: 348 prefix_done = true; 349 break; 350 } 351 } // prefix done 352 353 ret.primary_opcode = inst_bytes[op_idx]; 354 ret.modrm = inst_bytes[op_idx + 1]; 355 ret.opcode_len = 1; 356 357 // If the first opcode is 0F, it's two- or three- byte opcodes. 358 if (ret.primary_opcode == 0x0F) { 359 ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte 360 361 if (ret.primary_opcode == 0x38) { 362 ret.opcode_len = 3; 363 ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte 364 ret.modrm = inst_bytes[op_idx + 1]; 365 } else if (ret.primary_opcode == 0x3A) { 366 ret.opcode_len = 3; 367 ret.primary_opcode = inst_bytes[++op_idx]; 368 ret.modrm = inst_bytes[op_idx + 1]; 369 } else if ((ret.primary_opcode & 0xf8) == 0x38) { 370 ret.opcode_len = 0; 371 ret.primary_opcode = inst_bytes[++op_idx]; 372 ret.modrm = inst_bytes[op_idx + 1]; 373 } else if (ret.primary_opcode == 0x0F) { 374 ret.opcode_len = 3; 375 // opcode is 0x0F, no needs to update 376 ret.modrm = inst_bytes[op_idx + 1]; 377 } else { 378 ret.opcode_len = 2; 379 ret.modrm = inst_bytes[op_idx + 1]; 380 } 381 } 382 383 return ret; 384 } 385 386 lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b, 387 Opcode m_opcode) { 388 std::optional<InstructionOpcodeAndModrm> ret; 389 390 if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) { 391 // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes 392 return lldb::eInstructionControlFlowKindUnknown; 393 } 394 395 // Opcode bytes will be decoded into primary_opcode, modrm and opcode length. 396 // These are the three values deciding instruction control flow kind. 397 ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(), 398 m_opcode.GetByteSize(), is_exec_mode_64b); 399 if (!ret) 400 return lldb::eInstructionControlFlowKindUnknown; 401 else 402 return MapOpcodeIntoControlFlowKind(*ret); 403 } 404 405 } // namespace x86 406 407 class InstructionLLVMC : public lldb_private::Instruction { 408 public: 409 InstructionLLVMC(DisassemblerLLVMC &disasm, 410 const lldb_private::Address &address, 411 AddressClass addr_class) 412 : Instruction(address, addr_class), 413 m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>( 414 disasm.shared_from_this())) {} 415 416 ~InstructionLLVMC() override = default; 417 418 bool DoesBranch() override { 419 VisitInstruction(); 420 return m_does_branch; 421 } 422 423 bool HasDelaySlot() override { 424 VisitInstruction(); 425 return m_has_delay_slot; 426 } 427 428 bool IsLoad() override { 429 VisitInstruction(); 430 return m_is_load; 431 } 432 433 bool IsAuthenticated() override { 434 VisitInstruction(); 435 return m_is_authenticated; 436 } 437 438 DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) { 439 DisassemblerScope disasm(*this); 440 return GetDisasmToUse(is_alternate_isa, disasm); 441 } 442 443 size_t Decode(const lldb_private::Disassembler &disassembler, 444 const lldb_private::DataExtractor &data, 445 lldb::offset_t data_offset) override { 446 // All we have to do is read the opcode which can be easy for some 447 // architectures 448 bool got_op = false; 449 DisassemblerScope disasm(*this); 450 if (disasm) { 451 const ArchSpec &arch = disasm->GetArchitecture(); 452 const lldb::ByteOrder byte_order = data.GetByteOrder(); 453 454 const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize(); 455 const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize(); 456 if (min_op_byte_size == max_op_byte_size) { 457 // Fixed size instructions, just read that amount of data. 458 if (!data.ValidOffsetForDataOfSize(data_offset, min_op_byte_size)) 459 return false; 460 461 switch (min_op_byte_size) { 462 case 1: 463 m_opcode.SetOpcode8(data.GetU8(&data_offset), byte_order); 464 got_op = true; 465 break; 466 467 case 2: 468 m_opcode.SetOpcode16(data.GetU16(&data_offset), byte_order); 469 got_op = true; 470 break; 471 472 case 4: 473 m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order); 474 got_op = true; 475 break; 476 477 case 8: 478 m_opcode.SetOpcode64(data.GetU64(&data_offset), byte_order); 479 got_op = true; 480 break; 481 482 default: 483 m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size), 484 min_op_byte_size); 485 got_op = true; 486 break; 487 } 488 } 489 if (!got_op) { 490 bool is_alternate_isa = false; 491 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = 492 GetDisasmToUse(is_alternate_isa, disasm); 493 494 const llvm::Triple::ArchType machine = arch.GetMachine(); 495 if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) { 496 if (machine == llvm::Triple::thumb || is_alternate_isa) { 497 uint32_t thumb_opcode = data.GetU16(&data_offset); 498 if ((thumb_opcode & 0xe000) != 0xe000 || 499 ((thumb_opcode & 0x1800u) == 0)) { 500 m_opcode.SetOpcode16(thumb_opcode, byte_order); 501 m_is_valid = true; 502 } else { 503 thumb_opcode <<= 16; 504 thumb_opcode |= data.GetU16(&data_offset); 505 m_opcode.SetOpcode16_2(thumb_opcode, byte_order); 506 m_is_valid = true; 507 } 508 } else { 509 m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order); 510 m_is_valid = true; 511 } 512 } else { 513 // The opcode isn't evenly sized, so we need to actually use the llvm 514 // disassembler to parse it and get the size. 515 uint8_t *opcode_data = 516 const_cast<uint8_t *>(data.PeekData(data_offset, 1)); 517 const size_t opcode_data_len = data.BytesLeft(data_offset); 518 const addr_t pc = m_address.GetFileAddress(); 519 llvm::MCInst inst; 520 521 const size_t inst_size = 522 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); 523 if (inst_size == 0) 524 m_opcode.Clear(); 525 else { 526 m_opcode.SetOpcodeBytes(opcode_data, inst_size); 527 m_is_valid = true; 528 } 529 } 530 } 531 return m_opcode.GetByteSize(); 532 } 533 return 0; 534 } 535 536 void AppendComment(std::string &description) { 537 if (m_comment.empty()) 538 m_comment.swap(description); 539 else { 540 m_comment.append(", "); 541 m_comment.append(description); 542 } 543 } 544 545 lldb::InstructionControlFlowKind 546 GetControlFlowKind(const lldb_private::ExecutionContext *exe_ctx) override { 547 DisassemblerScope disasm(*this, exe_ctx); 548 if (disasm){ 549 if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86) 550 return x86::GetControlFlowKind(/*is_64b=*/false, m_opcode); 551 else if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86_64) 552 return x86::GetControlFlowKind(/*is_64b=*/true, m_opcode); 553 } 554 555 return eInstructionControlFlowKindUnknown; 556 } 557 558 void CalculateMnemonicOperandsAndComment( 559 const lldb_private::ExecutionContext *exe_ctx) override { 560 DataExtractor data; 561 const AddressClass address_class = GetAddressClass(); 562 563 if (m_opcode.GetData(data)) { 564 std::string out_string; 565 std::string comment_string; 566 567 DisassemblerScope disasm(*this, exe_ctx); 568 if (disasm) { 569 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr; 570 571 if (address_class == AddressClass::eCodeAlternateISA) 572 mc_disasm_ptr = disasm->m_alternate_disasm_up.get(); 573 else 574 mc_disasm_ptr = disasm->m_disasm_up.get(); 575 576 lldb::addr_t pc = m_address.GetFileAddress(); 577 m_using_file_addr = true; 578 579 const bool data_from_file = disasm->m_data_from_file; 580 bool use_hex_immediates = true; 581 Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC; 582 583 if (exe_ctx) { 584 Target *target = exe_ctx->GetTargetPtr(); 585 if (target) { 586 use_hex_immediates = target->GetUseHexImmediates(); 587 hex_style = target->GetHexImmediateStyle(); 588 589 if (!data_from_file) { 590 const lldb::addr_t load_addr = m_address.GetLoadAddress(target); 591 if (load_addr != LLDB_INVALID_ADDRESS) { 592 pc = load_addr; 593 m_using_file_addr = false; 594 } 595 } 596 } 597 } 598 599 const uint8_t *opcode_data = data.GetDataStart(); 600 const size_t opcode_data_len = data.GetByteSize(); 601 llvm::MCInst inst; 602 size_t inst_size = 603 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); 604 605 if (inst_size > 0) { 606 mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style); 607 mc_disasm_ptr->PrintMCInst(inst, out_string, comment_string); 608 609 if (!comment_string.empty()) { 610 AppendComment(comment_string); 611 } 612 } 613 614 if (inst_size == 0) { 615 m_comment.assign("unknown opcode"); 616 inst_size = m_opcode.GetByteSize(); 617 StreamString mnemonic_strm; 618 lldb::offset_t offset = 0; 619 lldb::ByteOrder byte_order = data.GetByteOrder(); 620 switch (inst_size) { 621 case 1: { 622 const uint8_t uval8 = data.GetU8(&offset); 623 m_opcode.SetOpcode8(uval8, byte_order); 624 m_opcode_name.assign(".byte"); 625 mnemonic_strm.Printf("0x%2.2x", uval8); 626 } break; 627 case 2: { 628 const uint16_t uval16 = data.GetU16(&offset); 629 m_opcode.SetOpcode16(uval16, byte_order); 630 m_opcode_name.assign(".short"); 631 mnemonic_strm.Printf("0x%4.4x", uval16); 632 } break; 633 case 4: { 634 const uint32_t uval32 = data.GetU32(&offset); 635 m_opcode.SetOpcode32(uval32, byte_order); 636 m_opcode_name.assign(".long"); 637 mnemonic_strm.Printf("0x%8.8x", uval32); 638 } break; 639 case 8: { 640 const uint64_t uval64 = data.GetU64(&offset); 641 m_opcode.SetOpcode64(uval64, byte_order); 642 m_opcode_name.assign(".quad"); 643 mnemonic_strm.Printf("0x%16.16" PRIx64, uval64); 644 } break; 645 default: 646 if (inst_size == 0) 647 return; 648 else { 649 const uint8_t *bytes = data.PeekData(offset, inst_size); 650 if (bytes == nullptr) 651 return; 652 m_opcode_name.assign(".byte"); 653 m_opcode.SetOpcodeBytes(bytes, inst_size); 654 mnemonic_strm.Printf("0x%2.2x", bytes[0]); 655 for (uint32_t i = 1; i < inst_size; ++i) 656 mnemonic_strm.Printf(" 0x%2.2x", bytes[i]); 657 } 658 break; 659 } 660 m_mnemonics = std::string(mnemonic_strm.GetString()); 661 return; 662 } 663 664 static RegularExpression s_regex( 665 llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?")); 666 667 llvm::SmallVector<llvm::StringRef, 4> matches; 668 if (s_regex.Execute(out_string, &matches)) { 669 m_opcode_name = matches[1].str(); 670 m_mnemonics = matches[2].str(); 671 } 672 } 673 } 674 } 675 676 bool IsValid() const { return m_is_valid; } 677 678 bool UsingFileAddress() const { return m_using_file_addr; } 679 size_t GetByteSize() const { return m_opcode.GetByteSize(); } 680 681 /// Grants exclusive access to the disassembler and initializes it with the 682 /// given InstructionLLVMC and an optional ExecutionContext. 683 class DisassemblerScope { 684 std::shared_ptr<DisassemblerLLVMC> m_disasm; 685 686 public: 687 explicit DisassemblerScope( 688 InstructionLLVMC &i, 689 const lldb_private::ExecutionContext *exe_ctx = nullptr) 690 : m_disasm(i.m_disasm_wp.lock()) { 691 m_disasm->m_mutex.lock(); 692 m_disasm->m_inst = &i; 693 m_disasm->m_exe_ctx = exe_ctx; 694 } 695 ~DisassemblerScope() { m_disasm->m_mutex.unlock(); } 696 697 /// Evaluates to true if this scope contains a valid disassembler. 698 operator bool() const { return static_cast<bool>(m_disasm); } 699 700 std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; } 701 }; 702 703 static llvm::StringRef::const_iterator 704 ConsumeWhitespace(llvm::StringRef::const_iterator osi, 705 llvm::StringRef::const_iterator ose) { 706 while (osi != ose) { 707 switch (*osi) { 708 default: 709 return osi; 710 case ' ': 711 case '\t': 712 break; 713 } 714 ++osi; 715 } 716 717 return osi; 718 } 719 720 static std::pair<bool, llvm::StringRef::const_iterator> 721 ConsumeChar(llvm::StringRef::const_iterator osi, const char c, 722 llvm::StringRef::const_iterator ose) { 723 bool found = false; 724 725 osi = ConsumeWhitespace(osi, ose); 726 if (osi != ose && *osi == c) { 727 found = true; 728 ++osi; 729 } 730 731 return std::make_pair(found, osi); 732 } 733 734 static std::pair<Operand, llvm::StringRef::const_iterator> 735 ParseRegisterName(llvm::StringRef::const_iterator osi, 736 llvm::StringRef::const_iterator ose) { 737 Operand ret; 738 ret.m_type = Operand::Type::Register; 739 std::string str; 740 741 osi = ConsumeWhitespace(osi, ose); 742 743 while (osi != ose) { 744 if (*osi >= '0' && *osi <= '9') { 745 if (str.empty()) { 746 return std::make_pair(Operand(), osi); 747 } else { 748 str.push_back(*osi); 749 } 750 } else if (*osi >= 'a' && *osi <= 'z') { 751 str.push_back(*osi); 752 } else { 753 switch (*osi) { 754 default: 755 if (str.empty()) { 756 return std::make_pair(Operand(), osi); 757 } else { 758 ret.m_register = ConstString(str); 759 return std::make_pair(ret, osi); 760 } 761 case '%': 762 if (!str.empty()) { 763 return std::make_pair(Operand(), osi); 764 } 765 break; 766 } 767 } 768 ++osi; 769 } 770 771 ret.m_register = ConstString(str); 772 return std::make_pair(ret, osi); 773 } 774 775 static std::pair<Operand, llvm::StringRef::const_iterator> 776 ParseImmediate(llvm::StringRef::const_iterator osi, 777 llvm::StringRef::const_iterator ose) { 778 Operand ret; 779 ret.m_type = Operand::Type::Immediate; 780 std::string str; 781 bool is_hex = false; 782 783 osi = ConsumeWhitespace(osi, ose); 784 785 while (osi != ose) { 786 if (*osi >= '0' && *osi <= '9') { 787 str.push_back(*osi); 788 } else if (*osi >= 'a' && *osi <= 'f') { 789 if (is_hex) { 790 str.push_back(*osi); 791 } else { 792 return std::make_pair(Operand(), osi); 793 } 794 } else { 795 switch (*osi) { 796 default: 797 if (str.empty()) { 798 return std::make_pair(Operand(), osi); 799 } else { 800 ret.m_immediate = strtoull(str.c_str(), nullptr, 0); 801 return std::make_pair(ret, osi); 802 } 803 case 'x': 804 if (!str.compare("0")) { 805 is_hex = true; 806 str.push_back(*osi); 807 } else { 808 return std::make_pair(Operand(), osi); 809 } 810 break; 811 case '#': 812 case '$': 813 if (!str.empty()) { 814 return std::make_pair(Operand(), osi); 815 } 816 break; 817 case '-': 818 if (str.empty()) { 819 ret.m_negative = true; 820 } else { 821 return std::make_pair(Operand(), osi); 822 } 823 } 824 } 825 ++osi; 826 } 827 828 ret.m_immediate = strtoull(str.c_str(), nullptr, 0); 829 return std::make_pair(ret, osi); 830 } 831 832 // -0x5(%rax,%rax,2) 833 static std::pair<Operand, llvm::StringRef::const_iterator> 834 ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi, 835 llvm::StringRef::const_iterator ose) { 836 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = 837 ParseImmediate(osi, ose); 838 if (offset_and_iterator.first.IsValid()) { 839 osi = offset_and_iterator.second; 840 } 841 842 bool found = false; 843 std::tie(found, osi) = ConsumeChar(osi, '(', ose); 844 if (!found) { 845 return std::make_pair(Operand(), osi); 846 } 847 848 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 849 ParseRegisterName(osi, ose); 850 if (base_and_iterator.first.IsValid()) { 851 osi = base_and_iterator.second; 852 } else { 853 return std::make_pair(Operand(), osi); 854 } 855 856 std::tie(found, osi) = ConsumeChar(osi, ',', ose); 857 if (!found) { 858 return std::make_pair(Operand(), osi); 859 } 860 861 std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator = 862 ParseRegisterName(osi, ose); 863 if (index_and_iterator.first.IsValid()) { 864 osi = index_and_iterator.second; 865 } else { 866 return std::make_pair(Operand(), osi); 867 } 868 869 std::tie(found, osi) = ConsumeChar(osi, ',', ose); 870 if (!found) { 871 return std::make_pair(Operand(), osi); 872 } 873 874 std::pair<Operand, llvm::StringRef::const_iterator> 875 multiplier_and_iterator = ParseImmediate(osi, ose); 876 if (index_and_iterator.first.IsValid()) { 877 osi = index_and_iterator.second; 878 } else { 879 return std::make_pair(Operand(), osi); 880 } 881 882 std::tie(found, osi) = ConsumeChar(osi, ')', ose); 883 if (!found) { 884 return std::make_pair(Operand(), osi); 885 } 886 887 Operand product; 888 product.m_type = Operand::Type::Product; 889 product.m_children.push_back(index_and_iterator.first); 890 product.m_children.push_back(multiplier_and_iterator.first); 891 892 Operand index; 893 index.m_type = Operand::Type::Sum; 894 index.m_children.push_back(base_and_iterator.first); 895 index.m_children.push_back(product); 896 897 if (offset_and_iterator.first.IsValid()) { 898 Operand offset; 899 offset.m_type = Operand::Type::Sum; 900 offset.m_children.push_back(offset_and_iterator.first); 901 offset.m_children.push_back(index); 902 903 Operand deref; 904 deref.m_type = Operand::Type::Dereference; 905 deref.m_children.push_back(offset); 906 return std::make_pair(deref, osi); 907 } else { 908 Operand deref; 909 deref.m_type = Operand::Type::Dereference; 910 deref.m_children.push_back(index); 911 return std::make_pair(deref, osi); 912 } 913 } 914 915 // -0x10(%rbp) 916 static std::pair<Operand, llvm::StringRef::const_iterator> 917 ParseIntelDerefAccess(llvm::StringRef::const_iterator osi, 918 llvm::StringRef::const_iterator ose) { 919 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = 920 ParseImmediate(osi, ose); 921 if (offset_and_iterator.first.IsValid()) { 922 osi = offset_and_iterator.second; 923 } 924 925 bool found = false; 926 std::tie(found, osi) = ConsumeChar(osi, '(', ose); 927 if (!found) { 928 return std::make_pair(Operand(), osi); 929 } 930 931 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 932 ParseRegisterName(osi, ose); 933 if (base_and_iterator.first.IsValid()) { 934 osi = base_and_iterator.second; 935 } else { 936 return std::make_pair(Operand(), osi); 937 } 938 939 std::tie(found, osi) = ConsumeChar(osi, ')', ose); 940 if (!found) { 941 return std::make_pair(Operand(), osi); 942 } 943 944 if (offset_and_iterator.first.IsValid()) { 945 Operand offset; 946 offset.m_type = Operand::Type::Sum; 947 offset.m_children.push_back(offset_and_iterator.first); 948 offset.m_children.push_back(base_and_iterator.first); 949 950 Operand deref; 951 deref.m_type = Operand::Type::Dereference; 952 deref.m_children.push_back(offset); 953 return std::make_pair(deref, osi); 954 } else { 955 Operand deref; 956 deref.m_type = Operand::Type::Dereference; 957 deref.m_children.push_back(base_and_iterator.first); 958 return std::make_pair(deref, osi); 959 } 960 } 961 962 // [sp, #8]! 963 static std::pair<Operand, llvm::StringRef::const_iterator> 964 ParseARMOffsetAccess(llvm::StringRef::const_iterator osi, 965 llvm::StringRef::const_iterator ose) { 966 bool found = false; 967 std::tie(found, osi) = ConsumeChar(osi, '[', ose); 968 if (!found) { 969 return std::make_pair(Operand(), osi); 970 } 971 972 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 973 ParseRegisterName(osi, ose); 974 if (base_and_iterator.first.IsValid()) { 975 osi = base_and_iterator.second; 976 } else { 977 return std::make_pair(Operand(), osi); 978 } 979 980 std::tie(found, osi) = ConsumeChar(osi, ',', ose); 981 if (!found) { 982 return std::make_pair(Operand(), osi); 983 } 984 985 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = 986 ParseImmediate(osi, ose); 987 if (offset_and_iterator.first.IsValid()) { 988 osi = offset_and_iterator.second; 989 } 990 991 std::tie(found, osi) = ConsumeChar(osi, ']', ose); 992 if (!found) { 993 return std::make_pair(Operand(), osi); 994 } 995 996 Operand offset; 997 offset.m_type = Operand::Type::Sum; 998 offset.m_children.push_back(offset_and_iterator.first); 999 offset.m_children.push_back(base_and_iterator.first); 1000 1001 Operand deref; 1002 deref.m_type = Operand::Type::Dereference; 1003 deref.m_children.push_back(offset); 1004 return std::make_pair(deref, osi); 1005 } 1006 1007 // [sp] 1008 static std::pair<Operand, llvm::StringRef::const_iterator> 1009 ParseARMDerefAccess(llvm::StringRef::const_iterator osi, 1010 llvm::StringRef::const_iterator ose) { 1011 bool found = false; 1012 std::tie(found, osi) = ConsumeChar(osi, '[', ose); 1013 if (!found) { 1014 return std::make_pair(Operand(), osi); 1015 } 1016 1017 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 1018 ParseRegisterName(osi, ose); 1019 if (base_and_iterator.first.IsValid()) { 1020 osi = base_and_iterator.second; 1021 } else { 1022 return std::make_pair(Operand(), osi); 1023 } 1024 1025 std::tie(found, osi) = ConsumeChar(osi, ']', ose); 1026 if (!found) { 1027 return std::make_pair(Operand(), osi); 1028 } 1029 1030 Operand deref; 1031 deref.m_type = Operand::Type::Dereference; 1032 deref.m_children.push_back(base_and_iterator.first); 1033 return std::make_pair(deref, osi); 1034 } 1035 1036 static void DumpOperand(const Operand &op, Stream &s) { 1037 switch (op.m_type) { 1038 case Operand::Type::Dereference: 1039 s.PutCString("*"); 1040 DumpOperand(op.m_children[0], s); 1041 break; 1042 case Operand::Type::Immediate: 1043 if (op.m_negative) { 1044 s.PutCString("-"); 1045 } 1046 s.PutCString(llvm::to_string(op.m_immediate)); 1047 break; 1048 case Operand::Type::Invalid: 1049 s.PutCString("Invalid"); 1050 break; 1051 case Operand::Type::Product: 1052 s.PutCString("("); 1053 DumpOperand(op.m_children[0], s); 1054 s.PutCString("*"); 1055 DumpOperand(op.m_children[1], s); 1056 s.PutCString(")"); 1057 break; 1058 case Operand::Type::Register: 1059 s.PutCString(op.m_register.GetStringRef()); 1060 break; 1061 case Operand::Type::Sum: 1062 s.PutCString("("); 1063 DumpOperand(op.m_children[0], s); 1064 s.PutCString("+"); 1065 DumpOperand(op.m_children[1], s); 1066 s.PutCString(")"); 1067 break; 1068 } 1069 } 1070 1071 bool ParseOperands( 1072 llvm::SmallVectorImpl<Instruction::Operand> &operands) override { 1073 const char *operands_string = GetOperands(nullptr); 1074 1075 if (!operands_string) { 1076 return false; 1077 } 1078 1079 llvm::StringRef operands_ref(operands_string); 1080 1081 llvm::StringRef::const_iterator osi = operands_ref.begin(); 1082 llvm::StringRef::const_iterator ose = operands_ref.end(); 1083 1084 while (osi != ose) { 1085 Operand operand; 1086 llvm::StringRef::const_iterator iter; 1087 1088 if ((std::tie(operand, iter) = ParseIntelIndexedAccess(osi, ose), 1089 operand.IsValid()) || 1090 (std::tie(operand, iter) = ParseIntelDerefAccess(osi, ose), 1091 operand.IsValid()) || 1092 (std::tie(operand, iter) = ParseARMOffsetAccess(osi, ose), 1093 operand.IsValid()) || 1094 (std::tie(operand, iter) = ParseARMDerefAccess(osi, ose), 1095 operand.IsValid()) || 1096 (std::tie(operand, iter) = ParseRegisterName(osi, ose), 1097 operand.IsValid()) || 1098 (std::tie(operand, iter) = ParseImmediate(osi, ose), 1099 operand.IsValid())) { 1100 osi = iter; 1101 operands.push_back(operand); 1102 } else { 1103 return false; 1104 } 1105 1106 std::pair<bool, llvm::StringRef::const_iterator> found_and_iter = 1107 ConsumeChar(osi, ',', ose); 1108 if (found_and_iter.first) { 1109 osi = found_and_iter.second; 1110 } 1111 1112 osi = ConsumeWhitespace(osi, ose); 1113 } 1114 1115 DisassemblerSP disasm_sp = m_disasm_wp.lock(); 1116 1117 if (disasm_sp && operands.size() > 1) { 1118 // TODO tie this into the MC Disassembler's notion of clobbers. 1119 switch (disasm_sp->GetArchitecture().GetMachine()) { 1120 default: 1121 break; 1122 case llvm::Triple::x86: 1123 case llvm::Triple::x86_64: 1124 operands[operands.size() - 1].m_clobbered = true; 1125 break; 1126 case llvm::Triple::arm: 1127 operands[0].m_clobbered = true; 1128 break; 1129 } 1130 } 1131 1132 if (Log *log = GetLog(LLDBLog::Process)) { 1133 StreamString ss; 1134 1135 ss.Printf("[%s] expands to %zu operands:\n", operands_string, 1136 operands.size()); 1137 for (const Operand &operand : operands) { 1138 ss.PutCString(" "); 1139 DumpOperand(operand, ss); 1140 ss.PutCString("\n"); 1141 } 1142 1143 log->PutString(ss.GetString()); 1144 } 1145 1146 return true; 1147 } 1148 1149 bool IsCall() override { 1150 VisitInstruction(); 1151 return m_is_call; 1152 } 1153 1154 protected: 1155 std::weak_ptr<DisassemblerLLVMC> m_disasm_wp; 1156 1157 bool m_is_valid = false; 1158 bool m_using_file_addr = false; 1159 bool m_has_visited_instruction = false; 1160 1161 // Be conservative. If we didn't understand the instruction, say it: 1162 // - Might branch 1163 // - Does not have a delay slot 1164 // - Is not a call 1165 // - Is not a load 1166 // - Is not an authenticated instruction 1167 bool m_does_branch = true; 1168 bool m_has_delay_slot = false; 1169 bool m_is_call = false; 1170 bool m_is_load = false; 1171 bool m_is_authenticated = false; 1172 1173 void VisitInstruction() { 1174 if (m_has_visited_instruction) 1175 return; 1176 1177 DisassemblerScope disasm(*this); 1178 if (!disasm) 1179 return; 1180 1181 DataExtractor data; 1182 if (!m_opcode.GetData(data)) 1183 return; 1184 1185 bool is_alternate_isa; 1186 lldb::addr_t pc = m_address.GetFileAddress(); 1187 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = 1188 GetDisasmToUse(is_alternate_isa, disasm); 1189 const uint8_t *opcode_data = data.GetDataStart(); 1190 const size_t opcode_data_len = data.GetByteSize(); 1191 llvm::MCInst inst; 1192 const size_t inst_size = 1193 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); 1194 if (inst_size == 0) 1195 return; 1196 1197 m_has_visited_instruction = true; 1198 m_does_branch = mc_disasm_ptr->CanBranch(inst); 1199 m_has_delay_slot = mc_disasm_ptr->HasDelaySlot(inst); 1200 m_is_call = mc_disasm_ptr->IsCall(inst); 1201 m_is_load = mc_disasm_ptr->IsLoad(inst); 1202 m_is_authenticated = mc_disasm_ptr->IsAuthenticated(inst); 1203 } 1204 1205 private: 1206 DisassemblerLLVMC::MCDisasmInstance * 1207 GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) { 1208 is_alternate_isa = false; 1209 if (disasm) { 1210 if (disasm->m_alternate_disasm_up) { 1211 const AddressClass address_class = GetAddressClass(); 1212 1213 if (address_class == AddressClass::eCodeAlternateISA) { 1214 is_alternate_isa = true; 1215 return disasm->m_alternate_disasm_up.get(); 1216 } 1217 } 1218 return disasm->m_disasm_up.get(); 1219 } 1220 return nullptr; 1221 } 1222 }; 1223 1224 std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance> 1225 DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu, 1226 const char *features_str, 1227 unsigned flavor, 1228 DisassemblerLLVMC &owner) { 1229 using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>; 1230 1231 std::string Status; 1232 const llvm::Target *curr_target = 1233 llvm::TargetRegistry::lookupTarget(triple, Status); 1234 if (!curr_target) 1235 return Instance(); 1236 1237 std::unique_ptr<llvm::MCInstrInfo> instr_info_up( 1238 curr_target->createMCInstrInfo()); 1239 if (!instr_info_up) 1240 return Instance(); 1241 1242 std::unique_ptr<llvm::MCRegisterInfo> reg_info_up( 1243 curr_target->createMCRegInfo(triple)); 1244 if (!reg_info_up) 1245 return Instance(); 1246 1247 std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up( 1248 curr_target->createMCSubtargetInfo(triple, cpu, features_str)); 1249 if (!subtarget_info_up) 1250 return Instance(); 1251 1252 llvm::MCTargetOptions MCOptions; 1253 std::unique_ptr<llvm::MCAsmInfo> asm_info_up( 1254 curr_target->createMCAsmInfo(*reg_info_up, triple, MCOptions)); 1255 if (!asm_info_up) 1256 return Instance(); 1257 1258 std::unique_ptr<llvm::MCContext> context_up( 1259 new llvm::MCContext(llvm::Triple(triple), asm_info_up.get(), 1260 reg_info_up.get(), subtarget_info_up.get())); 1261 if (!context_up) 1262 return Instance(); 1263 1264 std::unique_ptr<llvm::MCDisassembler> disasm_up( 1265 curr_target->createMCDisassembler(*subtarget_info_up, *context_up)); 1266 if (!disasm_up) 1267 return Instance(); 1268 1269 std::unique_ptr<llvm::MCRelocationInfo> rel_info_up( 1270 curr_target->createMCRelocationInfo(triple, *context_up)); 1271 if (!rel_info_up) 1272 return Instance(); 1273 1274 std::unique_ptr<llvm::MCSymbolizer> symbolizer_up( 1275 curr_target->createMCSymbolizer( 1276 triple, nullptr, DisassemblerLLVMC::SymbolLookupCallback, &owner, 1277 context_up.get(), std::move(rel_info_up))); 1278 disasm_up->setSymbolizer(std::move(symbolizer_up)); 1279 1280 unsigned asm_printer_variant = 1281 flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor; 1282 1283 std::unique_ptr<llvm::MCInstPrinter> instr_printer_up( 1284 curr_target->createMCInstPrinter(llvm::Triple{triple}, 1285 asm_printer_variant, *asm_info_up, 1286 *instr_info_up, *reg_info_up)); 1287 if (!instr_printer_up) 1288 return Instance(); 1289 1290 return Instance( 1291 new MCDisasmInstance(std::move(instr_info_up), std::move(reg_info_up), 1292 std::move(subtarget_info_up), std::move(asm_info_up), 1293 std::move(context_up), std::move(disasm_up), 1294 std::move(instr_printer_up))); 1295 } 1296 1297 DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance( 1298 std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, 1299 std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, 1300 std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, 1301 std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, 1302 std::unique_ptr<llvm::MCContext> &&context_up, 1303 std::unique_ptr<llvm::MCDisassembler> &&disasm_up, 1304 std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up) 1305 : m_instr_info_up(std::move(instr_info_up)), 1306 m_reg_info_up(std::move(reg_info_up)), 1307 m_subtarget_info_up(std::move(subtarget_info_up)), 1308 m_asm_info_up(std::move(asm_info_up)), 1309 m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)), 1310 m_instr_printer_up(std::move(instr_printer_up)) { 1311 assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up && 1312 m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up); 1313 } 1314 1315 uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst( 1316 const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc, 1317 llvm::MCInst &mc_inst) const { 1318 llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len); 1319 llvm::MCDisassembler::DecodeStatus status; 1320 1321 uint64_t new_inst_size; 1322 status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc, 1323 llvm::nulls()); 1324 if (status == llvm::MCDisassembler::Success) 1325 return new_inst_size; 1326 else 1327 return 0; 1328 } 1329 1330 void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst( 1331 llvm::MCInst &mc_inst, std::string &inst_string, 1332 std::string &comments_string) { 1333 llvm::raw_string_ostream inst_stream(inst_string); 1334 llvm::raw_string_ostream comments_stream(comments_string); 1335 1336 m_instr_printer_up->setCommentStream(comments_stream); 1337 m_instr_printer_up->printInst(&mc_inst, 0, llvm::StringRef(), 1338 *m_subtarget_info_up, inst_stream); 1339 m_instr_printer_up->setCommentStream(llvm::nulls()); 1340 comments_stream.flush(); 1341 1342 static std::string g_newlines("\r\n"); 1343 1344 for (size_t newline_pos = 0; 1345 (newline_pos = comments_string.find_first_of(g_newlines, newline_pos)) != 1346 comments_string.npos; 1347 /**/) { 1348 comments_string.replace(comments_string.begin() + newline_pos, 1349 comments_string.begin() + newline_pos + 1, 1, ' '); 1350 } 1351 } 1352 1353 void DisassemblerLLVMC::MCDisasmInstance::SetStyle( 1354 bool use_hex_immed, HexImmediateStyle hex_style) { 1355 m_instr_printer_up->setPrintImmHex(use_hex_immed); 1356 switch (hex_style) { 1357 case eHexStyleC: 1358 m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C); 1359 break; 1360 case eHexStyleAsm: 1361 m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm); 1362 break; 1363 } 1364 } 1365 1366 bool DisassemblerLLVMC::MCDisasmInstance::CanBranch( 1367 llvm::MCInst &mc_inst) const { 1368 return m_instr_info_up->get(mc_inst.getOpcode()) 1369 .mayAffectControlFlow(mc_inst, *m_reg_info_up); 1370 } 1371 1372 bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot( 1373 llvm::MCInst &mc_inst) const { 1374 return m_instr_info_up->get(mc_inst.getOpcode()).hasDelaySlot(); 1375 } 1376 1377 bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const { 1378 return m_instr_info_up->get(mc_inst.getOpcode()).isCall(); 1379 } 1380 1381 bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst &mc_inst) const { 1382 return m_instr_info_up->get(mc_inst.getOpcode()).mayLoad(); 1383 } 1384 1385 bool DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated( 1386 llvm::MCInst &mc_inst) const { 1387 const auto &InstrDesc = m_instr_info_up->get(mc_inst.getOpcode()); 1388 1389 // Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc4 1390 // == 'a' + 'c') as authenticated instructions for reporting purposes, in 1391 // addition to the standard authenticated instructions specified in ARMv8.3. 1392 bool IsBrkC47x = false; 1393 if (InstrDesc.isTrap() && mc_inst.getNumOperands() == 1) { 1394 const llvm::MCOperand &Op0 = mc_inst.getOperand(0); 1395 if (Op0.isImm() && Op0.getImm() >= 0xc470 && Op0.getImm() <= 0xc474) 1396 IsBrkC47x = true; 1397 } 1398 1399 return InstrDesc.isAuthenticated() || IsBrkC47x; 1400 } 1401 1402 DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch, 1403 const char *flavor_string) 1404 : Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr), 1405 m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS), 1406 m_adrp_insn() { 1407 if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) { 1408 m_flavor.assign("default"); 1409 } 1410 1411 unsigned flavor = ~0U; 1412 llvm::Triple triple = arch.GetTriple(); 1413 1414 // So far the only supported flavor is "intel" on x86. The base class will 1415 // set this correctly coming in. 1416 if (triple.getArch() == llvm::Triple::x86 || 1417 triple.getArch() == llvm::Triple::x86_64) { 1418 if (m_flavor == "intel") { 1419 flavor = 1; 1420 } else if (m_flavor == "att") { 1421 flavor = 0; 1422 } 1423 } 1424 1425 ArchSpec thumb_arch(arch); 1426 if (triple.getArch() == llvm::Triple::arm) { 1427 std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str()); 1428 // Replace "arm" with "thumb" so we get all thumb variants correct 1429 if (thumb_arch_name.size() > 3) { 1430 thumb_arch_name.erase(0, 3); 1431 thumb_arch_name.insert(0, "thumb"); 1432 } else { 1433 thumb_arch_name = "thumbv9.3a"; 1434 } 1435 thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name)); 1436 } 1437 1438 // If no sub architecture specified then use the most recent arm architecture 1439 // so the disassembler will return all instructions. Without it we will see a 1440 // lot of unknown opcodes if the code uses instructions which are not 1441 // available in the oldest arm version (which is used when no sub architecture 1442 // is specified). 1443 if (triple.getArch() == llvm::Triple::arm && 1444 triple.getSubArch() == llvm::Triple::NoSubArch) 1445 triple.setArchName("armv9.3a"); 1446 1447 std::string features_str; 1448 const char *triple_str = triple.getTriple().c_str(); 1449 1450 // ARM Cortex M0-M7 devices only execute thumb instructions 1451 if (arch.IsAlwaysThumbInstructions()) { 1452 triple_str = thumb_arch.GetTriple().getTriple().c_str(); 1453 features_str += "+fp-armv8,"; 1454 } 1455 1456 const char *cpu = ""; 1457 1458 switch (arch.GetCore()) { 1459 case ArchSpec::eCore_mips32: 1460 case ArchSpec::eCore_mips32el: 1461 cpu = "mips32"; 1462 break; 1463 case ArchSpec::eCore_mips32r2: 1464 case ArchSpec::eCore_mips32r2el: 1465 cpu = "mips32r2"; 1466 break; 1467 case ArchSpec::eCore_mips32r3: 1468 case ArchSpec::eCore_mips32r3el: 1469 cpu = "mips32r3"; 1470 break; 1471 case ArchSpec::eCore_mips32r5: 1472 case ArchSpec::eCore_mips32r5el: 1473 cpu = "mips32r5"; 1474 break; 1475 case ArchSpec::eCore_mips32r6: 1476 case ArchSpec::eCore_mips32r6el: 1477 cpu = "mips32r6"; 1478 break; 1479 case ArchSpec::eCore_mips64: 1480 case ArchSpec::eCore_mips64el: 1481 cpu = "mips64"; 1482 break; 1483 case ArchSpec::eCore_mips64r2: 1484 case ArchSpec::eCore_mips64r2el: 1485 cpu = "mips64r2"; 1486 break; 1487 case ArchSpec::eCore_mips64r3: 1488 case ArchSpec::eCore_mips64r3el: 1489 cpu = "mips64r3"; 1490 break; 1491 case ArchSpec::eCore_mips64r5: 1492 case ArchSpec::eCore_mips64r5el: 1493 cpu = "mips64r5"; 1494 break; 1495 case ArchSpec::eCore_mips64r6: 1496 case ArchSpec::eCore_mips64r6el: 1497 cpu = "mips64r6"; 1498 break; 1499 default: 1500 cpu = ""; 1501 break; 1502 } 1503 1504 if (arch.IsMIPS()) { 1505 uint32_t arch_flags = arch.GetFlags(); 1506 if (arch_flags & ArchSpec::eMIPSAse_msa) 1507 features_str += "+msa,"; 1508 if (arch_flags & ArchSpec::eMIPSAse_dsp) 1509 features_str += "+dsp,"; 1510 if (arch_flags & ArchSpec::eMIPSAse_dspr2) 1511 features_str += "+dspr2,"; 1512 } 1513 1514 // If any AArch64 variant, enable latest ISA with all extensions. 1515 if (triple.isAArch64()) { 1516 features_str += "+all,"; 1517 1518 if (triple.getVendor() == llvm::Triple::Apple) 1519 cpu = "apple-latest"; 1520 } 1521 1522 if (triple.isRISCV()) { 1523 uint32_t arch_flags = arch.GetFlags(); 1524 if (arch_flags & ArchSpec::eRISCV_rvc) 1525 features_str += "+c,"; 1526 if (arch_flags & ArchSpec::eRISCV_rve) 1527 features_str += "+e,"; 1528 if ((arch_flags & ArchSpec::eRISCV_float_abi_single) == 1529 ArchSpec::eRISCV_float_abi_single) 1530 features_str += "+f,"; 1531 if ((arch_flags & ArchSpec::eRISCV_float_abi_double) == 1532 ArchSpec::eRISCV_float_abi_double) 1533 features_str += "+f,+d,"; 1534 if ((arch_flags & ArchSpec::eRISCV_float_abi_quad) == 1535 ArchSpec::eRISCV_float_abi_quad) 1536 features_str += "+f,+d,+q,"; 1537 // FIXME: how do we detect features such as `+a`, `+m`? 1538 } 1539 1540 // We use m_disasm_up.get() to tell whether we are valid or not, so if this 1541 // isn't good for some reason, we won't be valid and FindPlugin will fail and 1542 // we won't get used. 1543 m_disasm_up = MCDisasmInstance::Create(triple_str, cpu, features_str.c_str(), 1544 flavor, *this); 1545 1546 llvm::Triple::ArchType llvm_arch = triple.getArch(); 1547 1548 // For arm CPUs that can execute arm or thumb instructions, also create a 1549 // thumb instruction disassembler. 1550 if (llvm_arch == llvm::Triple::arm) { 1551 std::string thumb_triple(thumb_arch.GetTriple().getTriple()); 1552 m_alternate_disasm_up = 1553 MCDisasmInstance::Create(thumb_triple.c_str(), "", features_str.c_str(), 1554 flavor, *this); 1555 if (!m_alternate_disasm_up) 1556 m_disasm_up.reset(); 1557 1558 } else if (arch.IsMIPS()) { 1559 /* Create alternate disassembler for MIPS16 and microMIPS */ 1560 uint32_t arch_flags = arch.GetFlags(); 1561 if (arch_flags & ArchSpec::eMIPSAse_mips16) 1562 features_str += "+mips16,"; 1563 else if (arch_flags & ArchSpec::eMIPSAse_micromips) 1564 features_str += "+micromips,"; 1565 1566 m_alternate_disasm_up = MCDisasmInstance::Create( 1567 triple_str, cpu, features_str.c_str(), flavor, *this); 1568 if (!m_alternate_disasm_up) 1569 m_disasm_up.reset(); 1570 } 1571 } 1572 1573 DisassemblerLLVMC::~DisassemblerLLVMC() = default; 1574 1575 lldb::DisassemblerSP DisassemblerLLVMC::CreateInstance(const ArchSpec &arch, 1576 const char *flavor) { 1577 if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) { 1578 auto disasm_sp = std::make_shared<DisassemblerLLVMC>(arch, flavor); 1579 if (disasm_sp && disasm_sp->IsValid()) 1580 return disasm_sp; 1581 } 1582 return lldb::DisassemblerSP(); 1583 } 1584 1585 size_t DisassemblerLLVMC::DecodeInstructions(const Address &base_addr, 1586 const DataExtractor &data, 1587 lldb::offset_t data_offset, 1588 size_t num_instructions, 1589 bool append, bool data_from_file) { 1590 if (!append) 1591 m_instruction_list.Clear(); 1592 1593 if (!IsValid()) 1594 return 0; 1595 1596 m_data_from_file = data_from_file; 1597 uint32_t data_cursor = data_offset; 1598 const size_t data_byte_size = data.GetByteSize(); 1599 uint32_t instructions_parsed = 0; 1600 Address inst_addr(base_addr); 1601 1602 while (data_cursor < data_byte_size && 1603 instructions_parsed < num_instructions) { 1604 1605 AddressClass address_class = AddressClass::eCode; 1606 1607 if (m_alternate_disasm_up) 1608 address_class = inst_addr.GetAddressClass(); 1609 1610 InstructionSP inst_sp( 1611 new InstructionLLVMC(*this, inst_addr, address_class)); 1612 1613 if (!inst_sp) 1614 break; 1615 1616 uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor); 1617 1618 if (inst_size == 0) 1619 break; 1620 1621 m_instruction_list.Append(inst_sp); 1622 data_cursor += inst_size; 1623 inst_addr.Slide(inst_size); 1624 instructions_parsed++; 1625 } 1626 1627 return data_cursor - data_offset; 1628 } 1629 1630 void DisassemblerLLVMC::Initialize() { 1631 PluginManager::RegisterPlugin(GetPluginNameStatic(), 1632 "Disassembler that uses LLVM MC to disassemble " 1633 "i386, x86_64, ARM, and ARM64.", 1634 CreateInstance); 1635 1636 llvm::InitializeAllTargetInfos(); 1637 llvm::InitializeAllTargetMCs(); 1638 llvm::InitializeAllAsmParsers(); 1639 llvm::InitializeAllDisassemblers(); 1640 } 1641 1642 void DisassemblerLLVMC::Terminate() { 1643 PluginManager::UnregisterPlugin(CreateInstance); 1644 } 1645 1646 int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc, 1647 uint64_t offset, uint64_t size, 1648 int tag_type, void *tag_bug) { 1649 return static_cast<DisassemblerLLVMC *>(disassembler) 1650 ->OpInfo(pc, offset, size, tag_type, tag_bug); 1651 } 1652 1653 const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler, 1654 uint64_t value, 1655 uint64_t *type, uint64_t pc, 1656 const char **name) { 1657 return static_cast<DisassemblerLLVMC *>(disassembler) 1658 ->SymbolLookup(value, type, pc, name); 1659 } 1660 1661 bool DisassemblerLLVMC::FlavorValidForArchSpec( 1662 const lldb_private::ArchSpec &arch, const char *flavor) { 1663 llvm::Triple triple = arch.GetTriple(); 1664 if (flavor == nullptr || strcmp(flavor, "default") == 0) 1665 return true; 1666 1667 if (triple.getArch() == llvm::Triple::x86 || 1668 triple.getArch() == llvm::Triple::x86_64) { 1669 return strcmp(flavor, "intel") == 0 || strcmp(flavor, "att") == 0; 1670 } else 1671 return false; 1672 } 1673 1674 bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); } 1675 1676 int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size, 1677 int tag_type, void *tag_bug) { 1678 switch (tag_type) { 1679 default: 1680 break; 1681 case 1: 1682 memset(tag_bug, 0, sizeof(::LLVMOpInfo1)); 1683 break; 1684 } 1685 return 0; 1686 } 1687 1688 const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr, 1689 uint64_t pc, const char **name) { 1690 if (*type_ptr) { 1691 if (m_exe_ctx && m_inst) { 1692 // std::string remove_this_prior_to_checkin; 1693 Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr; 1694 Address value_so_addr; 1695 Address pc_so_addr; 1696 if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 || 1697 target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be || 1698 target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) { 1699 if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) { 1700 m_adrp_address = pc; 1701 m_adrp_insn = value; 1702 *name = nullptr; 1703 *type_ptr = LLVMDisassembler_ReferenceType_InOut_None; 1704 return nullptr; 1705 } 1706 // If this instruction is an ADD and 1707 // the previous instruction was an ADRP and 1708 // the ADRP's register and this ADD's register are the same, 1709 // then this is a pc-relative address calculation. 1710 if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri && 1711 m_adrp_insn && m_adrp_address == pc - 4 && 1712 (*m_adrp_insn & 0x1f) == ((value >> 5) & 0x1f)) { 1713 uint32_t addxri_inst; 1714 uint64_t adrp_imm, addxri_imm; 1715 // Get immlo and immhi bits, OR them together to get the ADRP imm 1716 // value. 1717 adrp_imm = 1718 ((*m_adrp_insn & 0x00ffffe0) >> 3) | ((*m_adrp_insn >> 29) & 0x3); 1719 // if high bit of immhi after right-shifting set, sign extend 1720 if (adrp_imm & (1ULL << 20)) 1721 adrp_imm |= ~((1ULL << 21) - 1); 1722 1723 addxri_inst = value; 1724 addxri_imm = (addxri_inst >> 10) & 0xfff; 1725 // check if 'sh' bit is set, shift imm value up if so 1726 // (this would make no sense, ADRP already gave us this part) 1727 if ((addxri_inst >> (12 + 5 + 5)) & 1) 1728 addxri_imm <<= 12; 1729 value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) + 1730 addxri_imm; 1731 } 1732 m_adrp_address = LLDB_INVALID_ADDRESS; 1733 m_adrp_insn.reset(); 1734 } 1735 1736 if (m_inst->UsingFileAddress()) { 1737 ModuleSP module_sp(m_inst->GetAddress().GetModule()); 1738 if (module_sp) { 1739 module_sp->ResolveFileAddress(value, value_so_addr); 1740 module_sp->ResolveFileAddress(pc, pc_so_addr); 1741 } 1742 } else if (target && !target->GetSectionLoadList().IsEmpty()) { 1743 target->GetSectionLoadList().ResolveLoadAddress(value, value_so_addr); 1744 target->GetSectionLoadList().ResolveLoadAddress(pc, pc_so_addr); 1745 } 1746 1747 SymbolContext sym_ctx; 1748 const SymbolContextItem resolve_scope = 1749 eSymbolContextFunction | eSymbolContextSymbol; 1750 if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) { 1751 pc_so_addr.GetModule()->ResolveSymbolContextForAddress( 1752 pc_so_addr, resolve_scope, sym_ctx); 1753 } 1754 1755 if (value_so_addr.IsValid() && value_so_addr.GetSection()) { 1756 StreamString ss; 1757 1758 bool format_omitting_current_func_name = false; 1759 if (sym_ctx.symbol || sym_ctx.function) { 1760 AddressRange range; 1761 if (sym_ctx.GetAddressRange(resolve_scope, 0, false, range) && 1762 range.GetBaseAddress().IsValid() && 1763 range.ContainsLoadAddress(value_so_addr, target)) { 1764 format_omitting_current_func_name = true; 1765 } 1766 } 1767 1768 // If the "value" address (the target address we're symbolicating) is 1769 // inside the same SymbolContext as the current instruction pc 1770 // (pc_so_addr), don't print the full function name - just print it 1771 // with DumpStyleNoFunctionName style, e.g. "<+36>". 1772 if (format_omitting_current_func_name) { 1773 value_so_addr.Dump(&ss, target, Address::DumpStyleNoFunctionName, 1774 Address::DumpStyleSectionNameOffset); 1775 } else { 1776 value_so_addr.Dump( 1777 &ss, target, 1778 Address::DumpStyleResolvedDescriptionNoFunctionArguments, 1779 Address::DumpStyleSectionNameOffset); 1780 } 1781 1782 if (!ss.GetString().empty()) { 1783 // If Address::Dump returned a multi-line description, most commonly 1784 // seen when we have multiple levels of inlined functions at an 1785 // address, only show the first line. 1786 std::string str = std::string(ss.GetString()); 1787 size_t first_eol_char = str.find_first_of("\r\n"); 1788 if (first_eol_char != std::string::npos) { 1789 str.erase(first_eol_char); 1790 } 1791 m_inst->AppendComment(str); 1792 } 1793 } 1794 } 1795 } 1796 1797 // TODO: llvm-objdump sets the type_ptr to the 1798 // LLVMDisassembler_ReferenceType_Out_* values 1799 // based on where value_so_addr is pointing, with 1800 // Mach-O specific augmentations in MachODump.cpp. e.g. 1801 // see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand 1802 // handles. 1803 *type_ptr = LLVMDisassembler_ReferenceType_InOut_None; 1804 *name = nullptr; 1805 return nullptr; 1806 } 1807