1 //===-- DisassemblerLLVMC.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "DisassemblerLLVMC.h" 10 11 #include "llvm-c/Disassembler.h" 12 #include "llvm/ADT/SmallString.h" 13 #include "llvm/ADT/StringExtras.h" 14 #include "llvm/MC/MCAsmInfo.h" 15 #include "llvm/MC/MCContext.h" 16 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 17 #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" 18 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" 19 #include "llvm/MC/MCInst.h" 20 #include "llvm/MC/MCInstPrinter.h" 21 #include "llvm/MC/MCInstrInfo.h" 22 #include "llvm/MC/MCRegisterInfo.h" 23 #include "llvm/MC/MCSubtargetInfo.h" 24 #include "llvm/MC/MCTargetOptions.h" 25 #include "llvm/MC/TargetRegistry.h" 26 #include "llvm/Support/AArch64TargetParser.h" 27 #include "llvm/Support/ErrorHandling.h" 28 #include "llvm/Support/ScopedPrinter.h" 29 #include "llvm/Support/TargetSelect.h" 30 31 #include "lldb/Core/Address.h" 32 #include "lldb/Core/Module.h" 33 #include "lldb/Symbol/SymbolContext.h" 34 #include "lldb/Target/ExecutionContext.h" 35 #include "lldb/Target/Process.h" 36 #include "lldb/Target/RegisterContext.h" 37 #include "lldb/Target/SectionLoadList.h" 38 #include "lldb/Target/StackFrame.h" 39 #include "lldb/Target/Target.h" 40 #include "lldb/Utility/DataExtractor.h" 41 #include "lldb/Utility/LLDBLog.h" 42 #include "lldb/Utility/Log.h" 43 #include "lldb/Utility/RegularExpression.h" 44 #include "lldb/Utility/Stream.h" 45 46 using namespace lldb; 47 using namespace lldb_private; 48 49 LLDB_PLUGIN_DEFINE(DisassemblerLLVMC) 50 51 class DisassemblerLLVMC::MCDisasmInstance { 52 public: 53 static std::unique_ptr<MCDisasmInstance> 54 Create(const char *triple, const char *cpu, const char *features_str, 55 unsigned flavor, DisassemblerLLVMC &owner); 56 57 ~MCDisasmInstance() = default; 58 59 uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len, 60 lldb::addr_t pc, llvm::MCInst &mc_inst) const; 61 void PrintMCInst(llvm::MCInst &mc_inst, std::string &inst_string, 62 std::string &comments_string); 63 void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style); 64 bool CanBranch(llvm::MCInst &mc_inst) const; 65 bool HasDelaySlot(llvm::MCInst &mc_inst) const; 66 bool IsCall(llvm::MCInst &mc_inst) const; 67 bool IsLoad(llvm::MCInst &mc_inst) const; 68 bool IsAuthenticated(llvm::MCInst &mc_inst) const; 69 70 private: 71 MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, 72 std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, 73 std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, 74 std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, 75 std::unique_ptr<llvm::MCContext> &&context_up, 76 std::unique_ptr<llvm::MCDisassembler> &&disasm_up, 77 std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up); 78 79 std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up; 80 std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up; 81 std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up; 82 std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up; 83 std::unique_ptr<llvm::MCContext> m_context_up; 84 std::unique_ptr<llvm::MCDisassembler> m_disasm_up; 85 std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up; 86 }; 87 88 namespace x86 { 89 90 /// These are the three values deciding instruction control flow kind. 91 /// InstructionLengthDecode function decodes an instruction and get this struct. 92 /// 93 /// primary_opcode 94 /// Primary opcode of the instruction. 95 /// For one-byte opcode instruction, it's the first byte after prefix. 96 /// For two- and three-byte opcodes, it's the second byte. 97 /// 98 /// opcode_len 99 /// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3. 100 /// 101 /// modrm 102 /// ModR/M byte of the instruction. 103 /// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0] 104 /// may contain a register or specify an addressing mode, depending on MOD. 105 struct InstructionOpcodeAndModrm { 106 uint8_t primary_opcode; 107 uint8_t opcode_len; 108 uint8_t modrm; 109 }; 110 111 /// Determine the InstructionControlFlowKind based on opcode and modrm bytes. 112 /// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and 113 /// instruction set. 114 /// 115 /// \param[in] opcode_and_modrm 116 /// Contains primary_opcode byte, its length, and ModR/M byte. 117 /// Refer to the struct InstructionOpcodeAndModrm for details. 118 /// 119 /// \return 120 /// The control flow kind of the instruction or 121 /// eInstructionControlFlowKindOther if the instruction doesn't affect 122 /// the control flow of the program. 123 lldb::InstructionControlFlowKind 124 MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) { 125 uint8_t opcode = opcode_and_modrm.primary_opcode; 126 uint8_t opcode_len = opcode_and_modrm.opcode_len; 127 uint8_t modrm = opcode_and_modrm.modrm; 128 129 if (opcode_len > 2) 130 return lldb::eInstructionControlFlowKindOther; 131 132 if (opcode >= 0x70 && opcode <= 0x7F) { 133 if (opcode_len == 1) 134 return lldb::eInstructionControlFlowKindCondJump; 135 else 136 return lldb::eInstructionControlFlowKindOther; 137 } 138 139 if (opcode >= 0x80 && opcode <= 0x8F) { 140 if (opcode_len == 2) 141 return lldb::eInstructionControlFlowKindCondJump; 142 else 143 return lldb::eInstructionControlFlowKindOther; 144 } 145 146 switch (opcode) { 147 case 0x9A: 148 if (opcode_len == 1) 149 return lldb::eInstructionControlFlowKindFarCall; 150 break; 151 case 0xFF: 152 if (opcode_len == 1) { 153 uint8_t modrm_reg = (modrm >> 3) & 7; 154 if (modrm_reg == 2) 155 return lldb::eInstructionControlFlowKindCall; 156 else if (modrm_reg == 3) 157 return lldb::eInstructionControlFlowKindFarCall; 158 else if (modrm_reg == 4) 159 return lldb::eInstructionControlFlowKindJump; 160 else if (modrm_reg == 5) 161 return lldb::eInstructionControlFlowKindFarJump; 162 } 163 break; 164 case 0xE8: 165 if (opcode_len == 1) 166 return lldb::eInstructionControlFlowKindCall; 167 break; 168 case 0xCD: 169 case 0xCC: 170 case 0xCE: 171 case 0xF1: 172 if (opcode_len == 1) 173 return lldb::eInstructionControlFlowKindFarCall; 174 break; 175 case 0xCF: 176 if (opcode_len == 1) 177 return lldb::eInstructionControlFlowKindFarReturn; 178 break; 179 case 0xE9: 180 case 0xEB: 181 if (opcode_len == 1) 182 return lldb::eInstructionControlFlowKindJump; 183 break; 184 case 0xEA: 185 if (opcode_len == 1) 186 return lldb::eInstructionControlFlowKindFarJump; 187 break; 188 case 0xE3: 189 case 0xE0: 190 case 0xE1: 191 case 0xE2: 192 if (opcode_len == 1) 193 return lldb::eInstructionControlFlowKindCondJump; 194 break; 195 case 0xC3: 196 case 0xC2: 197 if (opcode_len == 1) 198 return lldb::eInstructionControlFlowKindReturn; 199 break; 200 case 0xCB: 201 case 0xCA: 202 if (opcode_len == 1) 203 return lldb::eInstructionControlFlowKindFarReturn; 204 break; 205 case 0x05: 206 case 0x34: 207 if (opcode_len == 2) 208 return lldb::eInstructionControlFlowKindFarCall; 209 break; 210 case 0x35: 211 case 0x07: 212 if (opcode_len == 2) 213 return lldb::eInstructionControlFlowKindFarReturn; 214 break; 215 case 0x01: 216 if (opcode_len == 2) { 217 switch (modrm) { 218 case 0xc1: 219 return lldb::eInstructionControlFlowKindFarCall; 220 case 0xc2: 221 case 0xc3: 222 return lldb::eInstructionControlFlowKindFarReturn; 223 default: 224 break; 225 } 226 } 227 break; 228 default: 229 break; 230 } 231 232 return lldb::eInstructionControlFlowKindOther; 233 } 234 235 /// Decode an instruction into opcode, modrm and opcode_len. 236 /// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout. 237 /// Opcodes in x86 are generally the first byte of instruction, though two-byte 238 /// instructions and prefixes exist. ModR/M is the byte following the opcode 239 /// and adds additional information for how the instruction is executed. 240 /// 241 /// \param[in] inst_bytes 242 /// Raw bytes of the instruction 243 /// 244 /// 245 /// \param[in] bytes_len 246 /// The length of the inst_bytes array. 247 /// 248 /// \param[in] is_exec_mode_64b 249 /// If true, the execution mode is 64 bit. 250 /// 251 /// \return 252 /// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding 253 /// primary_opcode, opcode_len and modrm byte. Refer to the struct definition 254 /// for more details. 255 /// Otherwise if the given instruction is invalid, returns None. 256 llvm::Optional<InstructionOpcodeAndModrm> 257 InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len, 258 bool is_exec_mode_64b) { 259 int op_idx = 0; 260 bool prefix_done = false; 261 InstructionOpcodeAndModrm ret = {0, 0, 0}; 262 263 // In most cases, the primary_opcode is the first byte of the instruction 264 // but some instructions have a prefix to be skipped for these calculations. 265 // The following mapping is inspired from libipt's instruction decoding logic 266 // in `src/pt_ild.c` 267 while (!prefix_done) { 268 if (op_idx >= bytes_len) 269 return llvm::None; 270 271 ret.primary_opcode = inst_bytes[op_idx]; 272 switch (ret.primary_opcode) { 273 // prefix_ignore 274 case 0x26: 275 case 0x2e: 276 case 0x36: 277 case 0x3e: 278 case 0x64: 279 case 0x65: 280 // prefix_osz, prefix_asz 281 case 0x66: 282 case 0x67: 283 // prefix_lock, prefix_f2, prefix_f3 284 case 0xf0: 285 case 0xf2: 286 case 0xf3: 287 op_idx++; 288 break; 289 290 // prefix_rex 291 case 0x40: 292 case 0x41: 293 case 0x42: 294 case 0x43: 295 case 0x44: 296 case 0x45: 297 case 0x46: 298 case 0x47: 299 case 0x48: 300 case 0x49: 301 case 0x4a: 302 case 0x4b: 303 case 0x4c: 304 case 0x4d: 305 case 0x4e: 306 case 0x4f: 307 if (is_exec_mode_64b) 308 op_idx++; 309 else 310 prefix_done = true; 311 break; 312 313 // prefix_vex_c4, c5 314 case 0xc5: 315 if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { 316 prefix_done = true; 317 break; 318 } 319 320 ret.opcode_len = 2; 321 ret.primary_opcode = inst_bytes[op_idx + 2]; 322 ret.modrm = inst_bytes[op_idx + 3]; 323 return ret; 324 325 case 0xc4: 326 if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { 327 prefix_done = true; 328 break; 329 } 330 ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f; 331 ret.primary_opcode = inst_bytes[op_idx + 3]; 332 ret.modrm = inst_bytes[op_idx + 4]; 333 return ret; 334 335 // prefix_evex 336 case 0x62: 337 if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { 338 prefix_done = true; 339 break; 340 } 341 ret.opcode_len = inst_bytes[op_idx + 1] & 0x03; 342 ret.primary_opcode = inst_bytes[op_idx + 4]; 343 ret.modrm = inst_bytes[op_idx + 5]; 344 return ret; 345 346 default: 347 prefix_done = true; 348 break; 349 } 350 } // prefix done 351 352 ret.primary_opcode = inst_bytes[op_idx]; 353 ret.modrm = inst_bytes[op_idx + 1]; 354 ret.opcode_len = 1; 355 356 // If the first opcode is 0F, it's two- or three- byte opcodes. 357 if (ret.primary_opcode == 0x0F) { 358 ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte 359 360 if (ret.primary_opcode == 0x38) { 361 ret.opcode_len = 3; 362 ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte 363 ret.modrm = inst_bytes[op_idx + 1]; 364 } else if (ret.primary_opcode == 0x3A) { 365 ret.opcode_len = 3; 366 ret.primary_opcode = inst_bytes[++op_idx]; 367 ret.modrm = inst_bytes[op_idx + 1]; 368 } else if ((ret.primary_opcode & 0xf8) == 0x38) { 369 ret.opcode_len = 0; 370 ret.primary_opcode = inst_bytes[++op_idx]; 371 ret.modrm = inst_bytes[op_idx + 1]; 372 } else if (ret.primary_opcode == 0x0F) { 373 ret.opcode_len = 3; 374 // opcode is 0x0F, no needs to update 375 ret.modrm = inst_bytes[op_idx + 1]; 376 } else { 377 ret.opcode_len = 2; 378 ret.modrm = inst_bytes[op_idx + 1]; 379 } 380 } 381 382 return ret; 383 } 384 385 lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b, 386 Opcode m_opcode) { 387 llvm::Optional<InstructionOpcodeAndModrm> ret = llvm::None; 388 389 if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) { 390 // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes 391 return lldb::eInstructionControlFlowKindUnknown; 392 } 393 394 // Opcode bytes will be decoded into primary_opcode, modrm and opcode length. 395 // These are the three values deciding instruction control flow kind. 396 ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(), 397 m_opcode.GetByteSize(), is_exec_mode_64b); 398 if (!ret) 399 return lldb::eInstructionControlFlowKindUnknown; 400 else 401 return MapOpcodeIntoControlFlowKind(ret.value()); 402 } 403 404 } // namespace x86 405 406 class InstructionLLVMC : public lldb_private::Instruction { 407 public: 408 InstructionLLVMC(DisassemblerLLVMC &disasm, 409 const lldb_private::Address &address, 410 AddressClass addr_class) 411 : Instruction(address, addr_class), 412 m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>( 413 disasm.shared_from_this())) {} 414 415 ~InstructionLLVMC() override = default; 416 417 bool DoesBranch() override { 418 VisitInstruction(); 419 return m_does_branch; 420 } 421 422 bool HasDelaySlot() override { 423 VisitInstruction(); 424 return m_has_delay_slot; 425 } 426 427 bool IsLoad() override { 428 VisitInstruction(); 429 return m_is_load; 430 } 431 432 bool IsAuthenticated() override { 433 VisitInstruction(); 434 return m_is_authenticated; 435 } 436 437 DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) { 438 DisassemblerScope disasm(*this); 439 return GetDisasmToUse(is_alternate_isa, disasm); 440 } 441 442 size_t Decode(const lldb_private::Disassembler &disassembler, 443 const lldb_private::DataExtractor &data, 444 lldb::offset_t data_offset) override { 445 // All we have to do is read the opcode which can be easy for some 446 // architectures 447 bool got_op = false; 448 DisassemblerScope disasm(*this); 449 if (disasm) { 450 const ArchSpec &arch = disasm->GetArchitecture(); 451 const lldb::ByteOrder byte_order = data.GetByteOrder(); 452 453 const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize(); 454 const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize(); 455 if (min_op_byte_size == max_op_byte_size) { 456 // Fixed size instructions, just read that amount of data. 457 if (!data.ValidOffsetForDataOfSize(data_offset, min_op_byte_size)) 458 return false; 459 460 switch (min_op_byte_size) { 461 case 1: 462 m_opcode.SetOpcode8(data.GetU8(&data_offset), byte_order); 463 got_op = true; 464 break; 465 466 case 2: 467 m_opcode.SetOpcode16(data.GetU16(&data_offset), byte_order); 468 got_op = true; 469 break; 470 471 case 4: 472 m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order); 473 got_op = true; 474 break; 475 476 case 8: 477 m_opcode.SetOpcode64(data.GetU64(&data_offset), byte_order); 478 got_op = true; 479 break; 480 481 default: 482 m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size), 483 min_op_byte_size); 484 got_op = true; 485 break; 486 } 487 } 488 if (!got_op) { 489 bool is_alternate_isa = false; 490 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = 491 GetDisasmToUse(is_alternate_isa, disasm); 492 493 const llvm::Triple::ArchType machine = arch.GetMachine(); 494 if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) { 495 if (machine == llvm::Triple::thumb || is_alternate_isa) { 496 uint32_t thumb_opcode = data.GetU16(&data_offset); 497 if ((thumb_opcode & 0xe000) != 0xe000 || 498 ((thumb_opcode & 0x1800u) == 0)) { 499 m_opcode.SetOpcode16(thumb_opcode, byte_order); 500 m_is_valid = true; 501 } else { 502 thumb_opcode <<= 16; 503 thumb_opcode |= data.GetU16(&data_offset); 504 m_opcode.SetOpcode16_2(thumb_opcode, byte_order); 505 m_is_valid = true; 506 } 507 } else { 508 m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order); 509 m_is_valid = true; 510 } 511 } else { 512 // The opcode isn't evenly sized, so we need to actually use the llvm 513 // disassembler to parse it and get the size. 514 uint8_t *opcode_data = 515 const_cast<uint8_t *>(data.PeekData(data_offset, 1)); 516 const size_t opcode_data_len = data.BytesLeft(data_offset); 517 const addr_t pc = m_address.GetFileAddress(); 518 llvm::MCInst inst; 519 520 const size_t inst_size = 521 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); 522 if (inst_size == 0) 523 m_opcode.Clear(); 524 else { 525 m_opcode.SetOpcodeBytes(opcode_data, inst_size); 526 m_is_valid = true; 527 } 528 } 529 } 530 return m_opcode.GetByteSize(); 531 } 532 return 0; 533 } 534 535 void AppendComment(std::string &description) { 536 if (m_comment.empty()) 537 m_comment.swap(description); 538 else { 539 m_comment.append(", "); 540 m_comment.append(description); 541 } 542 } 543 544 lldb::InstructionControlFlowKind 545 GetControlFlowKind(const lldb_private::ExecutionContext *exe_ctx) override { 546 DisassemblerScope disasm(*this, exe_ctx); 547 if (disasm){ 548 if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86) 549 return x86::GetControlFlowKind(/*is_64b=*/false, m_opcode); 550 else if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86_64) 551 return x86::GetControlFlowKind(/*is_64b=*/true, m_opcode); 552 } 553 554 return eInstructionControlFlowKindUnknown; 555 } 556 557 void CalculateMnemonicOperandsAndComment( 558 const lldb_private::ExecutionContext *exe_ctx) override { 559 DataExtractor data; 560 const AddressClass address_class = GetAddressClass(); 561 562 if (m_opcode.GetData(data)) { 563 std::string out_string; 564 std::string comment_string; 565 566 DisassemblerScope disasm(*this, exe_ctx); 567 if (disasm) { 568 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr; 569 570 if (address_class == AddressClass::eCodeAlternateISA) 571 mc_disasm_ptr = disasm->m_alternate_disasm_up.get(); 572 else 573 mc_disasm_ptr = disasm->m_disasm_up.get(); 574 575 lldb::addr_t pc = m_address.GetFileAddress(); 576 m_using_file_addr = true; 577 578 const bool data_from_file = disasm->m_data_from_file; 579 bool use_hex_immediates = true; 580 Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC; 581 582 if (exe_ctx) { 583 Target *target = exe_ctx->GetTargetPtr(); 584 if (target) { 585 use_hex_immediates = target->GetUseHexImmediates(); 586 hex_style = target->GetHexImmediateStyle(); 587 588 if (!data_from_file) { 589 const lldb::addr_t load_addr = m_address.GetLoadAddress(target); 590 if (load_addr != LLDB_INVALID_ADDRESS) { 591 pc = load_addr; 592 m_using_file_addr = false; 593 } 594 } 595 } 596 } 597 598 const uint8_t *opcode_data = data.GetDataStart(); 599 const size_t opcode_data_len = data.GetByteSize(); 600 llvm::MCInst inst; 601 size_t inst_size = 602 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); 603 604 if (inst_size > 0) { 605 mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style); 606 mc_disasm_ptr->PrintMCInst(inst, out_string, comment_string); 607 608 if (!comment_string.empty()) { 609 AppendComment(comment_string); 610 } 611 } 612 613 if (inst_size == 0) { 614 m_comment.assign("unknown opcode"); 615 inst_size = m_opcode.GetByteSize(); 616 StreamString mnemonic_strm; 617 lldb::offset_t offset = 0; 618 lldb::ByteOrder byte_order = data.GetByteOrder(); 619 switch (inst_size) { 620 case 1: { 621 const uint8_t uval8 = data.GetU8(&offset); 622 m_opcode.SetOpcode8(uval8, byte_order); 623 m_opcode_name.assign(".byte"); 624 mnemonic_strm.Printf("0x%2.2x", uval8); 625 } break; 626 case 2: { 627 const uint16_t uval16 = data.GetU16(&offset); 628 m_opcode.SetOpcode16(uval16, byte_order); 629 m_opcode_name.assign(".short"); 630 mnemonic_strm.Printf("0x%4.4x", uval16); 631 } break; 632 case 4: { 633 const uint32_t uval32 = data.GetU32(&offset); 634 m_opcode.SetOpcode32(uval32, byte_order); 635 m_opcode_name.assign(".long"); 636 mnemonic_strm.Printf("0x%8.8x", uval32); 637 } break; 638 case 8: { 639 const uint64_t uval64 = data.GetU64(&offset); 640 m_opcode.SetOpcode64(uval64, byte_order); 641 m_opcode_name.assign(".quad"); 642 mnemonic_strm.Printf("0x%16.16" PRIx64, uval64); 643 } break; 644 default: 645 if (inst_size == 0) 646 return; 647 else { 648 const uint8_t *bytes = data.PeekData(offset, inst_size); 649 if (bytes == nullptr) 650 return; 651 m_opcode_name.assign(".byte"); 652 m_opcode.SetOpcodeBytes(bytes, inst_size); 653 mnemonic_strm.Printf("0x%2.2x", bytes[0]); 654 for (uint32_t i = 1; i < inst_size; ++i) 655 mnemonic_strm.Printf(" 0x%2.2x", bytes[i]); 656 } 657 break; 658 } 659 m_mnemonics = std::string(mnemonic_strm.GetString()); 660 return; 661 } 662 663 static RegularExpression s_regex( 664 llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?")); 665 666 llvm::SmallVector<llvm::StringRef, 4> matches; 667 if (s_regex.Execute(out_string, &matches)) { 668 m_opcode_name = matches[1].str(); 669 m_mnemonics = matches[2].str(); 670 } 671 } 672 } 673 } 674 675 bool IsValid() const { return m_is_valid; } 676 677 bool UsingFileAddress() const { return m_using_file_addr; } 678 size_t GetByteSize() const { return m_opcode.GetByteSize(); } 679 680 /// Grants exclusive access to the disassembler and initializes it with the 681 /// given InstructionLLVMC and an optional ExecutionContext. 682 class DisassemblerScope { 683 std::shared_ptr<DisassemblerLLVMC> m_disasm; 684 685 public: 686 explicit DisassemblerScope( 687 InstructionLLVMC &i, 688 const lldb_private::ExecutionContext *exe_ctx = nullptr) 689 : m_disasm(i.m_disasm_wp.lock()) { 690 m_disasm->m_mutex.lock(); 691 m_disasm->m_inst = &i; 692 m_disasm->m_exe_ctx = exe_ctx; 693 } 694 ~DisassemblerScope() { m_disasm->m_mutex.unlock(); } 695 696 /// Evaluates to true if this scope contains a valid disassembler. 697 operator bool() const { return static_cast<bool>(m_disasm); } 698 699 std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; } 700 }; 701 702 static llvm::StringRef::const_iterator 703 ConsumeWhitespace(llvm::StringRef::const_iterator osi, 704 llvm::StringRef::const_iterator ose) { 705 while (osi != ose) { 706 switch (*osi) { 707 default: 708 return osi; 709 case ' ': 710 case '\t': 711 break; 712 } 713 ++osi; 714 } 715 716 return osi; 717 } 718 719 static std::pair<bool, llvm::StringRef::const_iterator> 720 ConsumeChar(llvm::StringRef::const_iterator osi, const char c, 721 llvm::StringRef::const_iterator ose) { 722 bool found = false; 723 724 osi = ConsumeWhitespace(osi, ose); 725 if (osi != ose && *osi == c) { 726 found = true; 727 ++osi; 728 } 729 730 return std::make_pair(found, osi); 731 } 732 733 static std::pair<Operand, llvm::StringRef::const_iterator> 734 ParseRegisterName(llvm::StringRef::const_iterator osi, 735 llvm::StringRef::const_iterator ose) { 736 Operand ret; 737 ret.m_type = Operand::Type::Register; 738 std::string str; 739 740 osi = ConsumeWhitespace(osi, ose); 741 742 while (osi != ose) { 743 if (*osi >= '0' && *osi <= '9') { 744 if (str.empty()) { 745 return std::make_pair(Operand(), osi); 746 } else { 747 str.push_back(*osi); 748 } 749 } else if (*osi >= 'a' && *osi <= 'z') { 750 str.push_back(*osi); 751 } else { 752 switch (*osi) { 753 default: 754 if (str.empty()) { 755 return std::make_pair(Operand(), osi); 756 } else { 757 ret.m_register = ConstString(str); 758 return std::make_pair(ret, osi); 759 } 760 case '%': 761 if (!str.empty()) { 762 return std::make_pair(Operand(), osi); 763 } 764 break; 765 } 766 } 767 ++osi; 768 } 769 770 ret.m_register = ConstString(str); 771 return std::make_pair(ret, osi); 772 } 773 774 static std::pair<Operand, llvm::StringRef::const_iterator> 775 ParseImmediate(llvm::StringRef::const_iterator osi, 776 llvm::StringRef::const_iterator ose) { 777 Operand ret; 778 ret.m_type = Operand::Type::Immediate; 779 std::string str; 780 bool is_hex = false; 781 782 osi = ConsumeWhitespace(osi, ose); 783 784 while (osi != ose) { 785 if (*osi >= '0' && *osi <= '9') { 786 str.push_back(*osi); 787 } else if (*osi >= 'a' && *osi <= 'f') { 788 if (is_hex) { 789 str.push_back(*osi); 790 } else { 791 return std::make_pair(Operand(), osi); 792 } 793 } else { 794 switch (*osi) { 795 default: 796 if (str.empty()) { 797 return std::make_pair(Operand(), osi); 798 } else { 799 ret.m_immediate = strtoull(str.c_str(), nullptr, 0); 800 return std::make_pair(ret, osi); 801 } 802 case 'x': 803 if (!str.compare("0")) { 804 is_hex = true; 805 str.push_back(*osi); 806 } else { 807 return std::make_pair(Operand(), osi); 808 } 809 break; 810 case '#': 811 case '$': 812 if (!str.empty()) { 813 return std::make_pair(Operand(), osi); 814 } 815 break; 816 case '-': 817 if (str.empty()) { 818 ret.m_negative = true; 819 } else { 820 return std::make_pair(Operand(), osi); 821 } 822 } 823 } 824 ++osi; 825 } 826 827 ret.m_immediate = strtoull(str.c_str(), nullptr, 0); 828 return std::make_pair(ret, osi); 829 } 830 831 // -0x5(%rax,%rax,2) 832 static std::pair<Operand, llvm::StringRef::const_iterator> 833 ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi, 834 llvm::StringRef::const_iterator ose) { 835 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = 836 ParseImmediate(osi, ose); 837 if (offset_and_iterator.first.IsValid()) { 838 osi = offset_and_iterator.second; 839 } 840 841 bool found = false; 842 std::tie(found, osi) = ConsumeChar(osi, '(', ose); 843 if (!found) { 844 return std::make_pair(Operand(), osi); 845 } 846 847 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 848 ParseRegisterName(osi, ose); 849 if (base_and_iterator.first.IsValid()) { 850 osi = base_and_iterator.second; 851 } else { 852 return std::make_pair(Operand(), osi); 853 } 854 855 std::tie(found, osi) = ConsumeChar(osi, ',', ose); 856 if (!found) { 857 return std::make_pair(Operand(), osi); 858 } 859 860 std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator = 861 ParseRegisterName(osi, ose); 862 if (index_and_iterator.first.IsValid()) { 863 osi = index_and_iterator.second; 864 } else { 865 return std::make_pair(Operand(), osi); 866 } 867 868 std::tie(found, osi) = ConsumeChar(osi, ',', ose); 869 if (!found) { 870 return std::make_pair(Operand(), osi); 871 } 872 873 std::pair<Operand, llvm::StringRef::const_iterator> 874 multiplier_and_iterator = ParseImmediate(osi, ose); 875 if (index_and_iterator.first.IsValid()) { 876 osi = index_and_iterator.second; 877 } else { 878 return std::make_pair(Operand(), osi); 879 } 880 881 std::tie(found, osi) = ConsumeChar(osi, ')', ose); 882 if (!found) { 883 return std::make_pair(Operand(), osi); 884 } 885 886 Operand product; 887 product.m_type = Operand::Type::Product; 888 product.m_children.push_back(index_and_iterator.first); 889 product.m_children.push_back(multiplier_and_iterator.first); 890 891 Operand index; 892 index.m_type = Operand::Type::Sum; 893 index.m_children.push_back(base_and_iterator.first); 894 index.m_children.push_back(product); 895 896 if (offset_and_iterator.first.IsValid()) { 897 Operand offset; 898 offset.m_type = Operand::Type::Sum; 899 offset.m_children.push_back(offset_and_iterator.first); 900 offset.m_children.push_back(index); 901 902 Operand deref; 903 deref.m_type = Operand::Type::Dereference; 904 deref.m_children.push_back(offset); 905 return std::make_pair(deref, osi); 906 } else { 907 Operand deref; 908 deref.m_type = Operand::Type::Dereference; 909 deref.m_children.push_back(index); 910 return std::make_pair(deref, osi); 911 } 912 } 913 914 // -0x10(%rbp) 915 static std::pair<Operand, llvm::StringRef::const_iterator> 916 ParseIntelDerefAccess(llvm::StringRef::const_iterator osi, 917 llvm::StringRef::const_iterator ose) { 918 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = 919 ParseImmediate(osi, ose); 920 if (offset_and_iterator.first.IsValid()) { 921 osi = offset_and_iterator.second; 922 } 923 924 bool found = false; 925 std::tie(found, osi) = ConsumeChar(osi, '(', ose); 926 if (!found) { 927 return std::make_pair(Operand(), osi); 928 } 929 930 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 931 ParseRegisterName(osi, ose); 932 if (base_and_iterator.first.IsValid()) { 933 osi = base_and_iterator.second; 934 } else { 935 return std::make_pair(Operand(), osi); 936 } 937 938 std::tie(found, osi) = ConsumeChar(osi, ')', ose); 939 if (!found) { 940 return std::make_pair(Operand(), osi); 941 } 942 943 if (offset_and_iterator.first.IsValid()) { 944 Operand offset; 945 offset.m_type = Operand::Type::Sum; 946 offset.m_children.push_back(offset_and_iterator.first); 947 offset.m_children.push_back(base_and_iterator.first); 948 949 Operand deref; 950 deref.m_type = Operand::Type::Dereference; 951 deref.m_children.push_back(offset); 952 return std::make_pair(deref, osi); 953 } else { 954 Operand deref; 955 deref.m_type = Operand::Type::Dereference; 956 deref.m_children.push_back(base_and_iterator.first); 957 return std::make_pair(deref, osi); 958 } 959 } 960 961 // [sp, #8]! 962 static std::pair<Operand, llvm::StringRef::const_iterator> 963 ParseARMOffsetAccess(llvm::StringRef::const_iterator osi, 964 llvm::StringRef::const_iterator ose) { 965 bool found = false; 966 std::tie(found, osi) = ConsumeChar(osi, '[', ose); 967 if (!found) { 968 return std::make_pair(Operand(), osi); 969 } 970 971 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 972 ParseRegisterName(osi, ose); 973 if (base_and_iterator.first.IsValid()) { 974 osi = base_and_iterator.second; 975 } else { 976 return std::make_pair(Operand(), osi); 977 } 978 979 std::tie(found, osi) = ConsumeChar(osi, ',', ose); 980 if (!found) { 981 return std::make_pair(Operand(), osi); 982 } 983 984 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = 985 ParseImmediate(osi, ose); 986 if (offset_and_iterator.first.IsValid()) { 987 osi = offset_and_iterator.second; 988 } 989 990 std::tie(found, osi) = ConsumeChar(osi, ']', ose); 991 if (!found) { 992 return std::make_pair(Operand(), osi); 993 } 994 995 Operand offset; 996 offset.m_type = Operand::Type::Sum; 997 offset.m_children.push_back(offset_and_iterator.first); 998 offset.m_children.push_back(base_and_iterator.first); 999 1000 Operand deref; 1001 deref.m_type = Operand::Type::Dereference; 1002 deref.m_children.push_back(offset); 1003 return std::make_pair(deref, osi); 1004 } 1005 1006 // [sp] 1007 static std::pair<Operand, llvm::StringRef::const_iterator> 1008 ParseARMDerefAccess(llvm::StringRef::const_iterator osi, 1009 llvm::StringRef::const_iterator ose) { 1010 bool found = false; 1011 std::tie(found, osi) = ConsumeChar(osi, '[', ose); 1012 if (!found) { 1013 return std::make_pair(Operand(), osi); 1014 } 1015 1016 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 1017 ParseRegisterName(osi, ose); 1018 if (base_and_iterator.first.IsValid()) { 1019 osi = base_and_iterator.second; 1020 } else { 1021 return std::make_pair(Operand(), osi); 1022 } 1023 1024 std::tie(found, osi) = ConsumeChar(osi, ']', ose); 1025 if (!found) { 1026 return std::make_pair(Operand(), osi); 1027 } 1028 1029 Operand deref; 1030 deref.m_type = Operand::Type::Dereference; 1031 deref.m_children.push_back(base_and_iterator.first); 1032 return std::make_pair(deref, osi); 1033 } 1034 1035 static void DumpOperand(const Operand &op, Stream &s) { 1036 switch (op.m_type) { 1037 case Operand::Type::Dereference: 1038 s.PutCString("*"); 1039 DumpOperand(op.m_children[0], s); 1040 break; 1041 case Operand::Type::Immediate: 1042 if (op.m_negative) { 1043 s.PutCString("-"); 1044 } 1045 s.PutCString(llvm::to_string(op.m_immediate)); 1046 break; 1047 case Operand::Type::Invalid: 1048 s.PutCString("Invalid"); 1049 break; 1050 case Operand::Type::Product: 1051 s.PutCString("("); 1052 DumpOperand(op.m_children[0], s); 1053 s.PutCString("*"); 1054 DumpOperand(op.m_children[1], s); 1055 s.PutCString(")"); 1056 break; 1057 case Operand::Type::Register: 1058 s.PutCString(op.m_register.GetStringRef()); 1059 break; 1060 case Operand::Type::Sum: 1061 s.PutCString("("); 1062 DumpOperand(op.m_children[0], s); 1063 s.PutCString("+"); 1064 DumpOperand(op.m_children[1], s); 1065 s.PutCString(")"); 1066 break; 1067 } 1068 } 1069 1070 bool ParseOperands( 1071 llvm::SmallVectorImpl<Instruction::Operand> &operands) override { 1072 const char *operands_string = GetOperands(nullptr); 1073 1074 if (!operands_string) { 1075 return false; 1076 } 1077 1078 llvm::StringRef operands_ref(operands_string); 1079 1080 llvm::StringRef::const_iterator osi = operands_ref.begin(); 1081 llvm::StringRef::const_iterator ose = operands_ref.end(); 1082 1083 while (osi != ose) { 1084 Operand operand; 1085 llvm::StringRef::const_iterator iter; 1086 1087 if ((std::tie(operand, iter) = ParseIntelIndexedAccess(osi, ose), 1088 operand.IsValid()) || 1089 (std::tie(operand, iter) = ParseIntelDerefAccess(osi, ose), 1090 operand.IsValid()) || 1091 (std::tie(operand, iter) = ParseARMOffsetAccess(osi, ose), 1092 operand.IsValid()) || 1093 (std::tie(operand, iter) = ParseARMDerefAccess(osi, ose), 1094 operand.IsValid()) || 1095 (std::tie(operand, iter) = ParseRegisterName(osi, ose), 1096 operand.IsValid()) || 1097 (std::tie(operand, iter) = ParseImmediate(osi, ose), 1098 operand.IsValid())) { 1099 osi = iter; 1100 operands.push_back(operand); 1101 } else { 1102 return false; 1103 } 1104 1105 std::pair<bool, llvm::StringRef::const_iterator> found_and_iter = 1106 ConsumeChar(osi, ',', ose); 1107 if (found_and_iter.first) { 1108 osi = found_and_iter.second; 1109 } 1110 1111 osi = ConsumeWhitespace(osi, ose); 1112 } 1113 1114 DisassemblerSP disasm_sp = m_disasm_wp.lock(); 1115 1116 if (disasm_sp && operands.size() > 1) { 1117 // TODO tie this into the MC Disassembler's notion of clobbers. 1118 switch (disasm_sp->GetArchitecture().GetMachine()) { 1119 default: 1120 break; 1121 case llvm::Triple::x86: 1122 case llvm::Triple::x86_64: 1123 operands[operands.size() - 1].m_clobbered = true; 1124 break; 1125 case llvm::Triple::arm: 1126 operands[0].m_clobbered = true; 1127 break; 1128 } 1129 } 1130 1131 if (Log *log = GetLog(LLDBLog::Process)) { 1132 StreamString ss; 1133 1134 ss.Printf("[%s] expands to %zu operands:\n", operands_string, 1135 operands.size()); 1136 for (const Operand &operand : operands) { 1137 ss.PutCString(" "); 1138 DumpOperand(operand, ss); 1139 ss.PutCString("\n"); 1140 } 1141 1142 log->PutString(ss.GetString()); 1143 } 1144 1145 return true; 1146 } 1147 1148 bool IsCall() override { 1149 VisitInstruction(); 1150 return m_is_call; 1151 } 1152 1153 protected: 1154 std::weak_ptr<DisassemblerLLVMC> m_disasm_wp; 1155 1156 bool m_is_valid = false; 1157 bool m_using_file_addr = false; 1158 bool m_has_visited_instruction = false; 1159 1160 // Be conservative. If we didn't understand the instruction, say it: 1161 // - Might branch 1162 // - Does not have a delay slot 1163 // - Is not a call 1164 // - Is not a load 1165 // - Is not an authenticated instruction 1166 bool m_does_branch = true; 1167 bool m_has_delay_slot = false; 1168 bool m_is_call = false; 1169 bool m_is_load = false; 1170 bool m_is_authenticated = false; 1171 1172 void VisitInstruction() { 1173 if (m_has_visited_instruction) 1174 return; 1175 1176 DisassemblerScope disasm(*this); 1177 if (!disasm) 1178 return; 1179 1180 DataExtractor data; 1181 if (!m_opcode.GetData(data)) 1182 return; 1183 1184 bool is_alternate_isa; 1185 lldb::addr_t pc = m_address.GetFileAddress(); 1186 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = 1187 GetDisasmToUse(is_alternate_isa, disasm); 1188 const uint8_t *opcode_data = data.GetDataStart(); 1189 const size_t opcode_data_len = data.GetByteSize(); 1190 llvm::MCInst inst; 1191 const size_t inst_size = 1192 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); 1193 if (inst_size == 0) 1194 return; 1195 1196 m_has_visited_instruction = true; 1197 m_does_branch = mc_disasm_ptr->CanBranch(inst); 1198 m_has_delay_slot = mc_disasm_ptr->HasDelaySlot(inst); 1199 m_is_call = mc_disasm_ptr->IsCall(inst); 1200 m_is_load = mc_disasm_ptr->IsLoad(inst); 1201 m_is_authenticated = mc_disasm_ptr->IsAuthenticated(inst); 1202 } 1203 1204 private: 1205 DisassemblerLLVMC::MCDisasmInstance * 1206 GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) { 1207 is_alternate_isa = false; 1208 if (disasm) { 1209 if (disasm->m_alternate_disasm_up) { 1210 const AddressClass address_class = GetAddressClass(); 1211 1212 if (address_class == AddressClass::eCodeAlternateISA) { 1213 is_alternate_isa = true; 1214 return disasm->m_alternate_disasm_up.get(); 1215 } 1216 } 1217 return disasm->m_disasm_up.get(); 1218 } 1219 return nullptr; 1220 } 1221 }; 1222 1223 std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance> 1224 DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu, 1225 const char *features_str, 1226 unsigned flavor, 1227 DisassemblerLLVMC &owner) { 1228 using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>; 1229 1230 std::string Status; 1231 const llvm::Target *curr_target = 1232 llvm::TargetRegistry::lookupTarget(triple, Status); 1233 if (!curr_target) 1234 return Instance(); 1235 1236 std::unique_ptr<llvm::MCInstrInfo> instr_info_up( 1237 curr_target->createMCInstrInfo()); 1238 if (!instr_info_up) 1239 return Instance(); 1240 1241 std::unique_ptr<llvm::MCRegisterInfo> reg_info_up( 1242 curr_target->createMCRegInfo(triple)); 1243 if (!reg_info_up) 1244 return Instance(); 1245 1246 std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up( 1247 curr_target->createMCSubtargetInfo(triple, cpu, features_str)); 1248 if (!subtarget_info_up) 1249 return Instance(); 1250 1251 llvm::MCTargetOptions MCOptions; 1252 std::unique_ptr<llvm::MCAsmInfo> asm_info_up( 1253 curr_target->createMCAsmInfo(*reg_info_up, triple, MCOptions)); 1254 if (!asm_info_up) 1255 return Instance(); 1256 1257 std::unique_ptr<llvm::MCContext> context_up( 1258 new llvm::MCContext(llvm::Triple(triple), asm_info_up.get(), 1259 reg_info_up.get(), subtarget_info_up.get())); 1260 if (!context_up) 1261 return Instance(); 1262 1263 std::unique_ptr<llvm::MCDisassembler> disasm_up( 1264 curr_target->createMCDisassembler(*subtarget_info_up, *context_up)); 1265 if (!disasm_up) 1266 return Instance(); 1267 1268 std::unique_ptr<llvm::MCRelocationInfo> rel_info_up( 1269 curr_target->createMCRelocationInfo(triple, *context_up)); 1270 if (!rel_info_up) 1271 return Instance(); 1272 1273 std::unique_ptr<llvm::MCSymbolizer> symbolizer_up( 1274 curr_target->createMCSymbolizer( 1275 triple, nullptr, DisassemblerLLVMC::SymbolLookupCallback, &owner, 1276 context_up.get(), std::move(rel_info_up))); 1277 disasm_up->setSymbolizer(std::move(symbolizer_up)); 1278 1279 unsigned asm_printer_variant = 1280 flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor; 1281 1282 std::unique_ptr<llvm::MCInstPrinter> instr_printer_up( 1283 curr_target->createMCInstPrinter(llvm::Triple{triple}, 1284 asm_printer_variant, *asm_info_up, 1285 *instr_info_up, *reg_info_up)); 1286 if (!instr_printer_up) 1287 return Instance(); 1288 1289 return Instance( 1290 new MCDisasmInstance(std::move(instr_info_up), std::move(reg_info_up), 1291 std::move(subtarget_info_up), std::move(asm_info_up), 1292 std::move(context_up), std::move(disasm_up), 1293 std::move(instr_printer_up))); 1294 } 1295 1296 DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance( 1297 std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, 1298 std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, 1299 std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, 1300 std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, 1301 std::unique_ptr<llvm::MCContext> &&context_up, 1302 std::unique_ptr<llvm::MCDisassembler> &&disasm_up, 1303 std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up) 1304 : m_instr_info_up(std::move(instr_info_up)), 1305 m_reg_info_up(std::move(reg_info_up)), 1306 m_subtarget_info_up(std::move(subtarget_info_up)), 1307 m_asm_info_up(std::move(asm_info_up)), 1308 m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)), 1309 m_instr_printer_up(std::move(instr_printer_up)) { 1310 assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up && 1311 m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up); 1312 } 1313 1314 uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst( 1315 const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc, 1316 llvm::MCInst &mc_inst) const { 1317 llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len); 1318 llvm::MCDisassembler::DecodeStatus status; 1319 1320 uint64_t new_inst_size; 1321 status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc, 1322 llvm::nulls()); 1323 if (status == llvm::MCDisassembler::Success) 1324 return new_inst_size; 1325 else 1326 return 0; 1327 } 1328 1329 void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst( 1330 llvm::MCInst &mc_inst, std::string &inst_string, 1331 std::string &comments_string) { 1332 llvm::raw_string_ostream inst_stream(inst_string); 1333 llvm::raw_string_ostream comments_stream(comments_string); 1334 1335 m_instr_printer_up->setCommentStream(comments_stream); 1336 m_instr_printer_up->printInst(&mc_inst, 0, llvm::StringRef(), 1337 *m_subtarget_info_up, inst_stream); 1338 m_instr_printer_up->setCommentStream(llvm::nulls()); 1339 comments_stream.flush(); 1340 1341 static std::string g_newlines("\r\n"); 1342 1343 for (size_t newline_pos = 0; 1344 (newline_pos = comments_string.find_first_of(g_newlines, newline_pos)) != 1345 comments_string.npos; 1346 /**/) { 1347 comments_string.replace(comments_string.begin() + newline_pos, 1348 comments_string.begin() + newline_pos + 1, 1, ' '); 1349 } 1350 } 1351 1352 void DisassemblerLLVMC::MCDisasmInstance::SetStyle( 1353 bool use_hex_immed, HexImmediateStyle hex_style) { 1354 m_instr_printer_up->setPrintImmHex(use_hex_immed); 1355 switch (hex_style) { 1356 case eHexStyleC: 1357 m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C); 1358 break; 1359 case eHexStyleAsm: 1360 m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm); 1361 break; 1362 } 1363 } 1364 1365 bool DisassemblerLLVMC::MCDisasmInstance::CanBranch( 1366 llvm::MCInst &mc_inst) const { 1367 return m_instr_info_up->get(mc_inst.getOpcode()) 1368 .mayAffectControlFlow(mc_inst, *m_reg_info_up); 1369 } 1370 1371 bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot( 1372 llvm::MCInst &mc_inst) const { 1373 return m_instr_info_up->get(mc_inst.getOpcode()).hasDelaySlot(); 1374 } 1375 1376 bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const { 1377 return m_instr_info_up->get(mc_inst.getOpcode()).isCall(); 1378 } 1379 1380 bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst &mc_inst) const { 1381 return m_instr_info_up->get(mc_inst.getOpcode()).mayLoad(); 1382 } 1383 1384 bool DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated( 1385 llvm::MCInst &mc_inst) const { 1386 auto InstrDesc = m_instr_info_up->get(mc_inst.getOpcode()); 1387 1388 // Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc4 1389 // == 'a' + 'c') as authenticated instructions for reporting purposes, in 1390 // addition to the standard authenticated instructions specified in ARMv8.3. 1391 bool IsBrkC47x = false; 1392 if (InstrDesc.isTrap() && mc_inst.getNumOperands() == 1) { 1393 const llvm::MCOperand &Op0 = mc_inst.getOperand(0); 1394 if (Op0.isImm() && Op0.getImm() >= 0xc470 && Op0.getImm() <= 0xc474) 1395 IsBrkC47x = true; 1396 } 1397 1398 return InstrDesc.isAuthenticated() || IsBrkC47x; 1399 } 1400 1401 DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch, 1402 const char *flavor_string) 1403 : Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr), 1404 m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS), 1405 m_adrp_insn() { 1406 if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) { 1407 m_flavor.assign("default"); 1408 } 1409 1410 unsigned flavor = ~0U; 1411 llvm::Triple triple = arch.GetTriple(); 1412 1413 // So far the only supported flavor is "intel" on x86. The base class will 1414 // set this correctly coming in. 1415 if (triple.getArch() == llvm::Triple::x86 || 1416 triple.getArch() == llvm::Triple::x86_64) { 1417 if (m_flavor == "intel") { 1418 flavor = 1; 1419 } else if (m_flavor == "att") { 1420 flavor = 0; 1421 } 1422 } 1423 1424 ArchSpec thumb_arch(arch); 1425 if (triple.getArch() == llvm::Triple::arm) { 1426 std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str()); 1427 // Replace "arm" with "thumb" so we get all thumb variants correct 1428 if (thumb_arch_name.size() > 3) { 1429 thumb_arch_name.erase(0, 3); 1430 thumb_arch_name.insert(0, "thumb"); 1431 } else { 1432 thumb_arch_name = "thumbv9.3a"; 1433 } 1434 thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name)); 1435 } 1436 1437 // If no sub architecture specified then use the most recent arm architecture 1438 // so the disassembler will return all instructions. Without it we will see a 1439 // lot of unknown opcodes if the code uses instructions which are not 1440 // available in the oldest arm version (which is used when no sub architecture 1441 // is specified). 1442 if (triple.getArch() == llvm::Triple::arm && 1443 triple.getSubArch() == llvm::Triple::NoSubArch) 1444 triple.setArchName("armv9.3a"); 1445 1446 std::string features_str; 1447 const char *triple_str = triple.getTriple().c_str(); 1448 1449 // ARM Cortex M0-M7 devices only execute thumb instructions 1450 if (arch.IsAlwaysThumbInstructions()) { 1451 triple_str = thumb_arch.GetTriple().getTriple().c_str(); 1452 features_str += "+fp-armv8,"; 1453 } 1454 1455 const char *cpu = ""; 1456 1457 switch (arch.GetCore()) { 1458 case ArchSpec::eCore_mips32: 1459 case ArchSpec::eCore_mips32el: 1460 cpu = "mips32"; 1461 break; 1462 case ArchSpec::eCore_mips32r2: 1463 case ArchSpec::eCore_mips32r2el: 1464 cpu = "mips32r2"; 1465 break; 1466 case ArchSpec::eCore_mips32r3: 1467 case ArchSpec::eCore_mips32r3el: 1468 cpu = "mips32r3"; 1469 break; 1470 case ArchSpec::eCore_mips32r5: 1471 case ArchSpec::eCore_mips32r5el: 1472 cpu = "mips32r5"; 1473 break; 1474 case ArchSpec::eCore_mips32r6: 1475 case ArchSpec::eCore_mips32r6el: 1476 cpu = "mips32r6"; 1477 break; 1478 case ArchSpec::eCore_mips64: 1479 case ArchSpec::eCore_mips64el: 1480 cpu = "mips64"; 1481 break; 1482 case ArchSpec::eCore_mips64r2: 1483 case ArchSpec::eCore_mips64r2el: 1484 cpu = "mips64r2"; 1485 break; 1486 case ArchSpec::eCore_mips64r3: 1487 case ArchSpec::eCore_mips64r3el: 1488 cpu = "mips64r3"; 1489 break; 1490 case ArchSpec::eCore_mips64r5: 1491 case ArchSpec::eCore_mips64r5el: 1492 cpu = "mips64r5"; 1493 break; 1494 case ArchSpec::eCore_mips64r6: 1495 case ArchSpec::eCore_mips64r6el: 1496 cpu = "mips64r6"; 1497 break; 1498 default: 1499 cpu = ""; 1500 break; 1501 } 1502 1503 if (arch.IsMIPS()) { 1504 uint32_t arch_flags = arch.GetFlags(); 1505 if (arch_flags & ArchSpec::eMIPSAse_msa) 1506 features_str += "+msa,"; 1507 if (arch_flags & ArchSpec::eMIPSAse_dsp) 1508 features_str += "+dsp,"; 1509 if (arch_flags & ArchSpec::eMIPSAse_dspr2) 1510 features_str += "+dspr2,"; 1511 } 1512 1513 // If any AArch64 variant, enable latest ISA with all extensions. 1514 if (triple.isAArch64()) { 1515 features_str += "+all,"; 1516 1517 if (triple.getVendor() == llvm::Triple::Apple) 1518 cpu = "apple-latest"; 1519 } 1520 1521 if (triple.isRISCV()) { 1522 uint32_t arch_flags = arch.GetFlags(); 1523 if (arch_flags & ArchSpec::eRISCV_rvc) 1524 features_str += "+c,"; 1525 if (arch_flags & ArchSpec::eRISCV_rve) 1526 features_str += "+e,"; 1527 if ((arch_flags & ArchSpec::eRISCV_float_abi_single) == 1528 ArchSpec::eRISCV_float_abi_single) 1529 features_str += "+f,"; 1530 if ((arch_flags & ArchSpec::eRISCV_float_abi_double) == 1531 ArchSpec::eRISCV_float_abi_double) 1532 features_str += "+f,+d,"; 1533 if ((arch_flags & ArchSpec::eRISCV_float_abi_quad) == 1534 ArchSpec::eRISCV_float_abi_quad) 1535 features_str += "+f,+d,+q,"; 1536 // FIXME: how do we detect features such as `+a`, `+m`? 1537 } 1538 1539 // We use m_disasm_up.get() to tell whether we are valid or not, so if this 1540 // isn't good for some reason, we won't be valid and FindPlugin will fail and 1541 // we won't get used. 1542 m_disasm_up = MCDisasmInstance::Create(triple_str, cpu, features_str.c_str(), 1543 flavor, *this); 1544 1545 llvm::Triple::ArchType llvm_arch = triple.getArch(); 1546 1547 // For arm CPUs that can execute arm or thumb instructions, also create a 1548 // thumb instruction disassembler. 1549 if (llvm_arch == llvm::Triple::arm) { 1550 std::string thumb_triple(thumb_arch.GetTriple().getTriple()); 1551 m_alternate_disasm_up = 1552 MCDisasmInstance::Create(thumb_triple.c_str(), "", features_str.c_str(), 1553 flavor, *this); 1554 if (!m_alternate_disasm_up) 1555 m_disasm_up.reset(); 1556 1557 } else if (arch.IsMIPS()) { 1558 /* Create alternate disassembler for MIPS16 and microMIPS */ 1559 uint32_t arch_flags = arch.GetFlags(); 1560 if (arch_flags & ArchSpec::eMIPSAse_mips16) 1561 features_str += "+mips16,"; 1562 else if (arch_flags & ArchSpec::eMIPSAse_micromips) 1563 features_str += "+micromips,"; 1564 1565 m_alternate_disasm_up = MCDisasmInstance::Create( 1566 triple_str, cpu, features_str.c_str(), flavor, *this); 1567 if (!m_alternate_disasm_up) 1568 m_disasm_up.reset(); 1569 } 1570 } 1571 1572 DisassemblerLLVMC::~DisassemblerLLVMC() = default; 1573 1574 Disassembler *DisassemblerLLVMC::CreateInstance(const ArchSpec &arch, 1575 const char *flavor) { 1576 if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) { 1577 std::unique_ptr<DisassemblerLLVMC> disasm_up( 1578 new DisassemblerLLVMC(arch, flavor)); 1579 1580 if (disasm_up.get() && disasm_up->IsValid()) 1581 return disasm_up.release(); 1582 } 1583 return nullptr; 1584 } 1585 1586 size_t DisassemblerLLVMC::DecodeInstructions(const Address &base_addr, 1587 const DataExtractor &data, 1588 lldb::offset_t data_offset, 1589 size_t num_instructions, 1590 bool append, bool data_from_file) { 1591 if (!append) 1592 m_instruction_list.Clear(); 1593 1594 if (!IsValid()) 1595 return 0; 1596 1597 m_data_from_file = data_from_file; 1598 uint32_t data_cursor = data_offset; 1599 const size_t data_byte_size = data.GetByteSize(); 1600 uint32_t instructions_parsed = 0; 1601 Address inst_addr(base_addr); 1602 1603 while (data_cursor < data_byte_size && 1604 instructions_parsed < num_instructions) { 1605 1606 AddressClass address_class = AddressClass::eCode; 1607 1608 if (m_alternate_disasm_up) 1609 address_class = inst_addr.GetAddressClass(); 1610 1611 InstructionSP inst_sp( 1612 new InstructionLLVMC(*this, inst_addr, address_class)); 1613 1614 if (!inst_sp) 1615 break; 1616 1617 uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor); 1618 1619 if (inst_size == 0) 1620 break; 1621 1622 m_instruction_list.Append(inst_sp); 1623 data_cursor += inst_size; 1624 inst_addr.Slide(inst_size); 1625 instructions_parsed++; 1626 } 1627 1628 return data_cursor - data_offset; 1629 } 1630 1631 void DisassemblerLLVMC::Initialize() { 1632 PluginManager::RegisterPlugin(GetPluginNameStatic(), 1633 "Disassembler that uses LLVM MC to disassemble " 1634 "i386, x86_64, ARM, and ARM64.", 1635 CreateInstance); 1636 1637 llvm::InitializeAllTargetInfos(); 1638 llvm::InitializeAllTargetMCs(); 1639 llvm::InitializeAllAsmParsers(); 1640 llvm::InitializeAllDisassemblers(); 1641 } 1642 1643 void DisassemblerLLVMC::Terminate() { 1644 PluginManager::UnregisterPlugin(CreateInstance); 1645 } 1646 1647 int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc, 1648 uint64_t offset, uint64_t size, 1649 int tag_type, void *tag_bug) { 1650 return static_cast<DisassemblerLLVMC *>(disassembler) 1651 ->OpInfo(pc, offset, size, tag_type, tag_bug); 1652 } 1653 1654 const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler, 1655 uint64_t value, 1656 uint64_t *type, uint64_t pc, 1657 const char **name) { 1658 return static_cast<DisassemblerLLVMC *>(disassembler) 1659 ->SymbolLookup(value, type, pc, name); 1660 } 1661 1662 bool DisassemblerLLVMC::FlavorValidForArchSpec( 1663 const lldb_private::ArchSpec &arch, const char *flavor) { 1664 llvm::Triple triple = arch.GetTriple(); 1665 if (flavor == nullptr || strcmp(flavor, "default") == 0) 1666 return true; 1667 1668 if (triple.getArch() == llvm::Triple::x86 || 1669 triple.getArch() == llvm::Triple::x86_64) { 1670 return strcmp(flavor, "intel") == 0 || strcmp(flavor, "att") == 0; 1671 } else 1672 return false; 1673 } 1674 1675 bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); } 1676 1677 int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size, 1678 int tag_type, void *tag_bug) { 1679 switch (tag_type) { 1680 default: 1681 break; 1682 case 1: 1683 memset(tag_bug, 0, sizeof(::LLVMOpInfo1)); 1684 break; 1685 } 1686 return 0; 1687 } 1688 1689 const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr, 1690 uint64_t pc, const char **name) { 1691 if (*type_ptr) { 1692 if (m_exe_ctx && m_inst) { 1693 // std::string remove_this_prior_to_checkin; 1694 Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr; 1695 Address value_so_addr; 1696 Address pc_so_addr; 1697 if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 || 1698 target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be || 1699 target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) { 1700 if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) { 1701 m_adrp_address = pc; 1702 m_adrp_insn = value; 1703 *name = nullptr; 1704 *type_ptr = LLVMDisassembler_ReferenceType_InOut_None; 1705 return nullptr; 1706 } 1707 // If this instruction is an ADD and 1708 // the previous instruction was an ADRP and 1709 // the ADRP's register and this ADD's register are the same, 1710 // then this is a pc-relative address calculation. 1711 if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri && 1712 m_adrp_insn && m_adrp_address == pc - 4 && 1713 (m_adrp_insn.value() & 0x1f) == ((value >> 5) & 0x1f)) { 1714 uint32_t addxri_inst; 1715 uint64_t adrp_imm, addxri_imm; 1716 // Get immlo and immhi bits, OR them together to get the ADRP imm 1717 // value. 1718 adrp_imm = ((m_adrp_insn.value() & 0x00ffffe0) >> 3) | 1719 ((m_adrp_insn.value() >> 29) & 0x3); 1720 // if high bit of immhi after right-shifting set, sign extend 1721 if (adrp_imm & (1ULL << 20)) 1722 adrp_imm |= ~((1ULL << 21) - 1); 1723 1724 addxri_inst = value; 1725 addxri_imm = (addxri_inst >> 10) & 0xfff; 1726 // check if 'sh' bit is set, shift imm value up if so 1727 // (this would make no sense, ADRP already gave us this part) 1728 if ((addxri_inst >> (12 + 5 + 5)) & 1) 1729 addxri_imm <<= 12; 1730 value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) + 1731 addxri_imm; 1732 } 1733 m_adrp_address = LLDB_INVALID_ADDRESS; 1734 m_adrp_insn.reset(); 1735 } 1736 1737 if (m_inst->UsingFileAddress()) { 1738 ModuleSP module_sp(m_inst->GetAddress().GetModule()); 1739 if (module_sp) { 1740 module_sp->ResolveFileAddress(value, value_so_addr); 1741 module_sp->ResolveFileAddress(pc, pc_so_addr); 1742 } 1743 } else if (target && !target->GetSectionLoadList().IsEmpty()) { 1744 target->GetSectionLoadList().ResolveLoadAddress(value, value_so_addr); 1745 target->GetSectionLoadList().ResolveLoadAddress(pc, pc_so_addr); 1746 } 1747 1748 SymbolContext sym_ctx; 1749 const SymbolContextItem resolve_scope = 1750 eSymbolContextFunction | eSymbolContextSymbol; 1751 if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) { 1752 pc_so_addr.GetModule()->ResolveSymbolContextForAddress( 1753 pc_so_addr, resolve_scope, sym_ctx); 1754 } 1755 1756 if (value_so_addr.IsValid() && value_so_addr.GetSection()) { 1757 StreamString ss; 1758 1759 bool format_omitting_current_func_name = false; 1760 if (sym_ctx.symbol || sym_ctx.function) { 1761 AddressRange range; 1762 if (sym_ctx.GetAddressRange(resolve_scope, 0, false, range) && 1763 range.GetBaseAddress().IsValid() && 1764 range.ContainsLoadAddress(value_so_addr, target)) { 1765 format_omitting_current_func_name = true; 1766 } 1767 } 1768 1769 // If the "value" address (the target address we're symbolicating) is 1770 // inside the same SymbolContext as the current instruction pc 1771 // (pc_so_addr), don't print the full function name - just print it 1772 // with DumpStyleNoFunctionName style, e.g. "<+36>". 1773 if (format_omitting_current_func_name) { 1774 value_so_addr.Dump(&ss, target, Address::DumpStyleNoFunctionName, 1775 Address::DumpStyleSectionNameOffset); 1776 } else { 1777 value_so_addr.Dump( 1778 &ss, target, 1779 Address::DumpStyleResolvedDescriptionNoFunctionArguments, 1780 Address::DumpStyleSectionNameOffset); 1781 } 1782 1783 if (!ss.GetString().empty()) { 1784 // If Address::Dump returned a multi-line description, most commonly 1785 // seen when we have multiple levels of inlined functions at an 1786 // address, only show the first line. 1787 std::string str = std::string(ss.GetString()); 1788 size_t first_eol_char = str.find_first_of("\r\n"); 1789 if (first_eol_char != std::string::npos) { 1790 str.erase(first_eol_char); 1791 } 1792 m_inst->AppendComment(str); 1793 } 1794 } 1795 } 1796 } 1797 1798 // TODO: llvm-objdump sets the type_ptr to the 1799 // LLVMDisassembler_ReferenceType_Out_* values 1800 // based on where value_so_addr is pointing, with 1801 // Mach-O specific augmentations in MachODump.cpp. e.g. 1802 // see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand 1803 // handles. 1804 *type_ptr = LLVMDisassembler_ReferenceType_InOut_None; 1805 *name = nullptr; 1806 return nullptr; 1807 } 1808