1 //===-- DisassemblerLLVMC.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "DisassemblerLLVMC.h" 10 11 #include "llvm-c/Disassembler.h" 12 #include "llvm/ADT/SmallString.h" 13 #include "llvm/ADT/StringExtras.h" 14 #include "llvm/MC/MCAsmInfo.h" 15 #include "llvm/MC/MCContext.h" 16 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 17 #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" 18 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" 19 #include "llvm/MC/MCInst.h" 20 #include "llvm/MC/MCInstPrinter.h" 21 #include "llvm/MC/MCInstrAnalysis.h" 22 #include "llvm/MC/MCInstrInfo.h" 23 #include "llvm/MC/MCRegisterInfo.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/MC/MCTargetOptions.h" 26 #include "llvm/MC/TargetRegistry.h" 27 #include "llvm/Support/ErrorHandling.h" 28 #include "llvm/Support/ScopedPrinter.h" 29 #include "llvm/Support/TargetSelect.h" 30 #include "llvm/TargetParser/AArch64TargetParser.h" 31 32 #include "lldb/Core/Address.h" 33 #include "lldb/Core/Module.h" 34 #include "lldb/Symbol/Function.h" 35 #include "lldb/Symbol/SymbolContext.h" 36 #include "lldb/Target/ExecutionContext.h" 37 #include "lldb/Target/Process.h" 38 #include "lldb/Target/RegisterContext.h" 39 #include "lldb/Target/SectionLoadList.h" 40 #include "lldb/Target/StackFrame.h" 41 #include "lldb/Target/Target.h" 42 #include "lldb/Utility/DataExtractor.h" 43 #include "lldb/Utility/LLDBLog.h" 44 #include "lldb/Utility/Log.h" 45 #include "lldb/Utility/RegularExpression.h" 46 #include "lldb/Utility/Stream.h" 47 #include <optional> 48 49 using namespace lldb; 50 using namespace lldb_private; 51 52 LLDB_PLUGIN_DEFINE(DisassemblerLLVMC) 53 54 class DisassemblerLLVMC::MCDisasmInstance { 55 public: 56 static std::unique_ptr<MCDisasmInstance> 57 Create(const char *triple, const char *cpu, const char *features_str, 58 unsigned flavor, DisassemblerLLVMC &owner); 59 60 ~MCDisasmInstance() = default; 61 62 bool GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len, 63 lldb::addr_t pc, llvm::MCInst &mc_inst, uint64_t &size) const; 64 void PrintMCInst(llvm::MCInst &mc_inst, lldb::addr_t pc, 65 std::string &inst_string, std::string &comments_string); 66 void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style); 67 void SetUseColor(bool use_color); 68 bool GetUseColor() const; 69 bool CanBranch(llvm::MCInst &mc_inst) const; 70 bool HasDelaySlot(llvm::MCInst &mc_inst) const; 71 bool IsCall(llvm::MCInst &mc_inst) const; 72 bool IsLoad(llvm::MCInst &mc_inst) const; 73 bool IsAuthenticated(llvm::MCInst &mc_inst) const; 74 75 private: 76 MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, 77 std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, 78 std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, 79 std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, 80 std::unique_ptr<llvm::MCContext> &&context_up, 81 std::unique_ptr<llvm::MCDisassembler> &&disasm_up, 82 std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up, 83 std::unique_ptr<llvm::MCInstrAnalysis> &&instr_analysis_up); 84 85 std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up; 86 std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up; 87 std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up; 88 std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up; 89 std::unique_ptr<llvm::MCContext> m_context_up; 90 std::unique_ptr<llvm::MCDisassembler> m_disasm_up; 91 std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up; 92 std::unique_ptr<llvm::MCInstrAnalysis> m_instr_analysis_up; 93 }; 94 95 namespace x86 { 96 97 /// These are the three values deciding instruction control flow kind. 98 /// InstructionLengthDecode function decodes an instruction and get this struct. 99 /// 100 /// primary_opcode 101 /// Primary opcode of the instruction. 102 /// For one-byte opcode instruction, it's the first byte after prefix. 103 /// For two- and three-byte opcodes, it's the second byte. 104 /// 105 /// opcode_len 106 /// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3. 107 /// 108 /// modrm 109 /// ModR/M byte of the instruction. 110 /// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0] 111 /// may contain a register or specify an addressing mode, depending on MOD. 112 struct InstructionOpcodeAndModrm { 113 uint8_t primary_opcode; 114 uint8_t opcode_len; 115 uint8_t modrm; 116 }; 117 118 /// Determine the InstructionControlFlowKind based on opcode and modrm bytes. 119 /// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and 120 /// instruction set. 121 /// 122 /// \param[in] opcode_and_modrm 123 /// Contains primary_opcode byte, its length, and ModR/M byte. 124 /// Refer to the struct InstructionOpcodeAndModrm for details. 125 /// 126 /// \return 127 /// The control flow kind of the instruction or 128 /// eInstructionControlFlowKindOther if the instruction doesn't affect 129 /// the control flow of the program. 130 lldb::InstructionControlFlowKind 131 MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) { 132 uint8_t opcode = opcode_and_modrm.primary_opcode; 133 uint8_t opcode_len = opcode_and_modrm.opcode_len; 134 uint8_t modrm = opcode_and_modrm.modrm; 135 136 if (opcode_len > 2) 137 return lldb::eInstructionControlFlowKindOther; 138 139 if (opcode >= 0x70 && opcode <= 0x7F) { 140 if (opcode_len == 1) 141 return lldb::eInstructionControlFlowKindCondJump; 142 else 143 return lldb::eInstructionControlFlowKindOther; 144 } 145 146 if (opcode >= 0x80 && opcode <= 0x8F) { 147 if (opcode_len == 2) 148 return lldb::eInstructionControlFlowKindCondJump; 149 else 150 return lldb::eInstructionControlFlowKindOther; 151 } 152 153 switch (opcode) { 154 case 0x9A: 155 if (opcode_len == 1) 156 return lldb::eInstructionControlFlowKindFarCall; 157 break; 158 case 0xFF: 159 if (opcode_len == 1) { 160 uint8_t modrm_reg = (modrm >> 3) & 7; 161 if (modrm_reg == 2) 162 return lldb::eInstructionControlFlowKindCall; 163 else if (modrm_reg == 3) 164 return lldb::eInstructionControlFlowKindFarCall; 165 else if (modrm_reg == 4) 166 return lldb::eInstructionControlFlowKindJump; 167 else if (modrm_reg == 5) 168 return lldb::eInstructionControlFlowKindFarJump; 169 } 170 break; 171 case 0xE8: 172 if (opcode_len == 1) 173 return lldb::eInstructionControlFlowKindCall; 174 break; 175 case 0xCD: 176 case 0xCC: 177 case 0xCE: 178 case 0xF1: 179 if (opcode_len == 1) 180 return lldb::eInstructionControlFlowKindFarCall; 181 break; 182 case 0xCF: 183 if (opcode_len == 1) 184 return lldb::eInstructionControlFlowKindFarReturn; 185 break; 186 case 0xE9: 187 case 0xEB: 188 if (opcode_len == 1) 189 return lldb::eInstructionControlFlowKindJump; 190 break; 191 case 0xEA: 192 if (opcode_len == 1) 193 return lldb::eInstructionControlFlowKindFarJump; 194 break; 195 case 0xE3: 196 case 0xE0: 197 case 0xE1: 198 case 0xE2: 199 if (opcode_len == 1) 200 return lldb::eInstructionControlFlowKindCondJump; 201 break; 202 case 0xC3: 203 case 0xC2: 204 if (opcode_len == 1) 205 return lldb::eInstructionControlFlowKindReturn; 206 break; 207 case 0xCB: 208 case 0xCA: 209 if (opcode_len == 1) 210 return lldb::eInstructionControlFlowKindFarReturn; 211 break; 212 case 0x05: 213 case 0x34: 214 if (opcode_len == 2) 215 return lldb::eInstructionControlFlowKindFarCall; 216 break; 217 case 0x35: 218 case 0x07: 219 if (opcode_len == 2) 220 return lldb::eInstructionControlFlowKindFarReturn; 221 break; 222 case 0x01: 223 if (opcode_len == 2) { 224 switch (modrm) { 225 case 0xc1: 226 return lldb::eInstructionControlFlowKindFarCall; 227 case 0xc2: 228 case 0xc3: 229 return lldb::eInstructionControlFlowKindFarReturn; 230 default: 231 break; 232 } 233 } 234 break; 235 default: 236 break; 237 } 238 239 return lldb::eInstructionControlFlowKindOther; 240 } 241 242 /// Decode an instruction into opcode, modrm and opcode_len. 243 /// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout. 244 /// Opcodes in x86 are generally the first byte of instruction, though two-byte 245 /// instructions and prefixes exist. ModR/M is the byte following the opcode 246 /// and adds additional information for how the instruction is executed. 247 /// 248 /// \param[in] inst_bytes 249 /// Raw bytes of the instruction 250 /// 251 /// 252 /// \param[in] bytes_len 253 /// The length of the inst_bytes array. 254 /// 255 /// \param[in] is_exec_mode_64b 256 /// If true, the execution mode is 64 bit. 257 /// 258 /// \return 259 /// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding 260 /// primary_opcode, opcode_len and modrm byte. Refer to the struct definition 261 /// for more details. 262 /// Otherwise if the given instruction is invalid, returns std::nullopt. 263 std::optional<InstructionOpcodeAndModrm> 264 InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len, 265 bool is_exec_mode_64b) { 266 int op_idx = 0; 267 bool prefix_done = false; 268 InstructionOpcodeAndModrm ret = {0, 0, 0}; 269 270 // In most cases, the primary_opcode is the first byte of the instruction 271 // but some instructions have a prefix to be skipped for these calculations. 272 // The following mapping is inspired from libipt's instruction decoding logic 273 // in `src/pt_ild.c` 274 while (!prefix_done) { 275 if (op_idx >= bytes_len) 276 return std::nullopt; 277 278 ret.primary_opcode = inst_bytes[op_idx]; 279 switch (ret.primary_opcode) { 280 // prefix_ignore 281 case 0x26: 282 case 0x2e: 283 case 0x36: 284 case 0x3e: 285 case 0x64: 286 case 0x65: 287 // prefix_osz, prefix_asz 288 case 0x66: 289 case 0x67: 290 // prefix_lock, prefix_f2, prefix_f3 291 case 0xf0: 292 case 0xf2: 293 case 0xf3: 294 op_idx++; 295 break; 296 297 // prefix_rex 298 case 0x40: 299 case 0x41: 300 case 0x42: 301 case 0x43: 302 case 0x44: 303 case 0x45: 304 case 0x46: 305 case 0x47: 306 case 0x48: 307 case 0x49: 308 case 0x4a: 309 case 0x4b: 310 case 0x4c: 311 case 0x4d: 312 case 0x4e: 313 case 0x4f: 314 if (is_exec_mode_64b) 315 op_idx++; 316 else 317 prefix_done = true; 318 break; 319 320 // prefix_vex_c4, c5 321 case 0xc5: 322 if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { 323 prefix_done = true; 324 break; 325 } 326 327 ret.opcode_len = 2; 328 ret.primary_opcode = inst_bytes[op_idx + 2]; 329 ret.modrm = inst_bytes[op_idx + 3]; 330 return ret; 331 332 case 0xc4: 333 if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { 334 prefix_done = true; 335 break; 336 } 337 ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f; 338 ret.primary_opcode = inst_bytes[op_idx + 3]; 339 ret.modrm = inst_bytes[op_idx + 4]; 340 return ret; 341 342 // prefix_evex 343 case 0x62: 344 if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { 345 prefix_done = true; 346 break; 347 } 348 ret.opcode_len = inst_bytes[op_idx + 1] & 0x03; 349 ret.primary_opcode = inst_bytes[op_idx + 4]; 350 ret.modrm = inst_bytes[op_idx + 5]; 351 return ret; 352 353 default: 354 prefix_done = true; 355 break; 356 } 357 } // prefix done 358 359 ret.primary_opcode = inst_bytes[op_idx]; 360 ret.modrm = inst_bytes[op_idx + 1]; 361 ret.opcode_len = 1; 362 363 // If the first opcode is 0F, it's two- or three- byte opcodes. 364 if (ret.primary_opcode == 0x0F) { 365 ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte 366 367 if (ret.primary_opcode == 0x38) { 368 ret.opcode_len = 3; 369 ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte 370 ret.modrm = inst_bytes[op_idx + 1]; 371 } else if (ret.primary_opcode == 0x3A) { 372 ret.opcode_len = 3; 373 ret.primary_opcode = inst_bytes[++op_idx]; 374 ret.modrm = inst_bytes[op_idx + 1]; 375 } else if ((ret.primary_opcode & 0xf8) == 0x38) { 376 ret.opcode_len = 0; 377 ret.primary_opcode = inst_bytes[++op_idx]; 378 ret.modrm = inst_bytes[op_idx + 1]; 379 } else if (ret.primary_opcode == 0x0F) { 380 ret.opcode_len = 3; 381 // opcode is 0x0F, no needs to update 382 ret.modrm = inst_bytes[op_idx + 1]; 383 } else { 384 ret.opcode_len = 2; 385 ret.modrm = inst_bytes[op_idx + 1]; 386 } 387 } 388 389 return ret; 390 } 391 392 lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b, 393 Opcode m_opcode) { 394 std::optional<InstructionOpcodeAndModrm> ret; 395 396 if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) { 397 // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes 398 return lldb::eInstructionControlFlowKindUnknown; 399 } 400 401 // Opcode bytes will be decoded into primary_opcode, modrm and opcode length. 402 // These are the three values deciding instruction control flow kind. 403 ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(), 404 m_opcode.GetByteSize(), is_exec_mode_64b); 405 if (!ret) 406 return lldb::eInstructionControlFlowKindUnknown; 407 else 408 return MapOpcodeIntoControlFlowKind(*ret); 409 } 410 411 } // namespace x86 412 413 class InstructionLLVMC : public lldb_private::Instruction { 414 public: 415 InstructionLLVMC(DisassemblerLLVMC &disasm, 416 const lldb_private::Address &address, 417 AddressClass addr_class) 418 : Instruction(address, addr_class), 419 m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>( 420 disasm.shared_from_this())) {} 421 422 ~InstructionLLVMC() override = default; 423 424 bool DoesBranch() override { 425 VisitInstruction(); 426 return m_does_branch; 427 } 428 429 bool HasDelaySlot() override { 430 VisitInstruction(); 431 return m_has_delay_slot; 432 } 433 434 bool IsLoad() override { 435 VisitInstruction(); 436 return m_is_load; 437 } 438 439 bool IsAuthenticated() override { 440 VisitInstruction(); 441 return m_is_authenticated; 442 } 443 444 DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) { 445 DisassemblerScope disasm(*this); 446 return GetDisasmToUse(is_alternate_isa, disasm); 447 } 448 449 size_t Decode(const lldb_private::Disassembler &disassembler, 450 const lldb_private::DataExtractor &data, 451 lldb::offset_t data_offset) override { 452 // All we have to do is read the opcode which can be easy for some 453 // architectures 454 bool got_op = false; 455 DisassemblerScope disasm(*this); 456 if (disasm) { 457 const ArchSpec &arch = disasm->GetArchitecture(); 458 const lldb::ByteOrder byte_order = data.GetByteOrder(); 459 460 const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize(); 461 const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize(); 462 if (min_op_byte_size == max_op_byte_size) { 463 // Fixed size instructions, just read that amount of data. 464 if (!data.ValidOffsetForDataOfSize(data_offset, min_op_byte_size)) 465 return false; 466 467 switch (min_op_byte_size) { 468 case 1: 469 m_opcode.SetOpcode8(data.GetU8(&data_offset), byte_order); 470 got_op = true; 471 break; 472 473 case 2: 474 m_opcode.SetOpcode16(data.GetU16(&data_offset), byte_order); 475 got_op = true; 476 break; 477 478 case 4: 479 m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order); 480 got_op = true; 481 break; 482 483 case 8: 484 m_opcode.SetOpcode64(data.GetU64(&data_offset), byte_order); 485 got_op = true; 486 break; 487 488 default: 489 if (arch.GetTriple().isRISCV()) 490 m_opcode.SetOpcode16_32TupleBytes( 491 data.PeekData(data_offset, min_op_byte_size), min_op_byte_size, 492 byte_order); 493 else 494 m_opcode.SetOpcodeBytes( 495 data.PeekData(data_offset, min_op_byte_size), min_op_byte_size); 496 got_op = true; 497 break; 498 } 499 } 500 if (!got_op) { 501 bool is_alternate_isa = false; 502 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = 503 GetDisasmToUse(is_alternate_isa, disasm); 504 505 const llvm::Triple::ArchType machine = arch.GetMachine(); 506 if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) { 507 if (machine == llvm::Triple::thumb || is_alternate_isa) { 508 uint32_t thumb_opcode = data.GetU16(&data_offset); 509 if ((thumb_opcode & 0xe000) != 0xe000 || 510 ((thumb_opcode & 0x1800u) == 0)) { 511 m_opcode.SetOpcode16(thumb_opcode, byte_order); 512 m_is_valid = true; 513 } else { 514 thumb_opcode <<= 16; 515 thumb_opcode |= data.GetU16(&data_offset); 516 m_opcode.SetOpcode16_2(thumb_opcode, byte_order); 517 m_is_valid = true; 518 } 519 } else { 520 m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order); 521 m_is_valid = true; 522 } 523 } else { 524 // The opcode isn't evenly sized, so we need to actually use the llvm 525 // disassembler to parse it and get the size. 526 uint8_t *opcode_data = 527 const_cast<uint8_t *>(data.PeekData(data_offset, 1)); 528 const size_t opcode_data_len = data.BytesLeft(data_offset); 529 const addr_t pc = m_address.GetFileAddress(); 530 llvm::MCInst inst; 531 532 uint64_t inst_size = 0; 533 m_is_valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, 534 pc, inst, inst_size); 535 m_opcode.Clear(); 536 if (inst_size != 0) { 537 if (arch.GetTriple().isRISCV()) 538 m_opcode.SetOpcode16_32TupleBytes(opcode_data, inst_size, 539 byte_order); 540 else 541 m_opcode.SetOpcodeBytes(opcode_data, inst_size); 542 } 543 } 544 } 545 return m_opcode.GetByteSize(); 546 } 547 return 0; 548 } 549 550 void AppendComment(std::string &description) { 551 if (m_comment.empty()) 552 m_comment.swap(description); 553 else { 554 m_comment.append(", "); 555 m_comment.append(description); 556 } 557 } 558 559 lldb::InstructionControlFlowKind 560 GetControlFlowKind(const lldb_private::ExecutionContext *exe_ctx) override { 561 DisassemblerScope disasm(*this, exe_ctx); 562 if (disasm){ 563 if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86) 564 return x86::GetControlFlowKind(/*is_64b=*/false, m_opcode); 565 else if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86_64) 566 return x86::GetControlFlowKind(/*is_64b=*/true, m_opcode); 567 } 568 569 return eInstructionControlFlowKindUnknown; 570 } 571 572 void CalculateMnemonicOperandsAndComment( 573 const lldb_private::ExecutionContext *exe_ctx) override { 574 DataExtractor data; 575 const AddressClass address_class = GetAddressClass(); 576 577 if (m_opcode.GetData(data)) { 578 std::string out_string; 579 std::string markup_out_string; 580 std::string comment_string; 581 std::string markup_comment_string; 582 583 DisassemblerScope disasm(*this, exe_ctx); 584 if (disasm) { 585 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr; 586 587 if (address_class == AddressClass::eCodeAlternateISA) 588 mc_disasm_ptr = disasm->m_alternate_disasm_up.get(); 589 else 590 mc_disasm_ptr = disasm->m_disasm_up.get(); 591 592 lldb::addr_t pc = m_address.GetFileAddress(); 593 m_using_file_addr = true; 594 595 bool use_hex_immediates = true; 596 Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC; 597 598 if (exe_ctx) { 599 Target *target = exe_ctx->GetTargetPtr(); 600 if (target) { 601 use_hex_immediates = target->GetUseHexImmediates(); 602 hex_style = target->GetHexImmediateStyle(); 603 604 const lldb::addr_t load_addr = m_address.GetLoadAddress(target); 605 if (load_addr != LLDB_INVALID_ADDRESS) { 606 pc = load_addr; 607 m_using_file_addr = false; 608 } 609 } 610 } 611 612 const uint8_t *opcode_data = data.GetDataStart(); 613 const size_t opcode_data_len = data.GetByteSize(); 614 llvm::MCInst inst; 615 uint64_t inst_size = 0; 616 bool valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, 617 inst, inst_size); 618 619 if (valid && inst_size > 0) { 620 mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style); 621 622 const bool saved_use_color = mc_disasm_ptr->GetUseColor(); 623 mc_disasm_ptr->SetUseColor(false); 624 mc_disasm_ptr->PrintMCInst(inst, pc, out_string, comment_string); 625 mc_disasm_ptr->SetUseColor(true); 626 mc_disasm_ptr->PrintMCInst(inst, pc, markup_out_string, 627 markup_comment_string); 628 mc_disasm_ptr->SetUseColor(saved_use_color); 629 630 if (!comment_string.empty()) { 631 AppendComment(comment_string); 632 } 633 } 634 635 if (inst_size == 0) { 636 m_comment.assign("unknown opcode"); 637 inst_size = m_opcode.GetByteSize(); 638 StreamString mnemonic_strm; 639 lldb::offset_t offset = 0; 640 lldb::ByteOrder byte_order = data.GetByteOrder(); 641 switch (inst_size) { 642 case 1: { 643 const uint8_t uval8 = data.GetU8(&offset); 644 m_opcode.SetOpcode8(uval8, byte_order); 645 m_opcode_name.assign(".byte"); 646 mnemonic_strm.Printf("0x%2.2x", uval8); 647 } break; 648 case 2: { 649 const uint16_t uval16 = data.GetU16(&offset); 650 m_opcode.SetOpcode16(uval16, byte_order); 651 m_opcode_name.assign(".short"); 652 mnemonic_strm.Printf("0x%4.4x", uval16); 653 } break; 654 case 4: { 655 const uint32_t uval32 = data.GetU32(&offset); 656 m_opcode.SetOpcode32(uval32, byte_order); 657 m_opcode_name.assign(".long"); 658 mnemonic_strm.Printf("0x%8.8x", uval32); 659 } break; 660 case 8: { 661 const uint64_t uval64 = data.GetU64(&offset); 662 m_opcode.SetOpcode64(uval64, byte_order); 663 m_opcode_name.assign(".quad"); 664 mnemonic_strm.Printf("0x%16.16" PRIx64, uval64); 665 } break; 666 default: 667 if (inst_size == 0) 668 return; 669 else { 670 const uint8_t *bytes = data.PeekData(offset, inst_size); 671 if (bytes == nullptr) 672 return; 673 m_opcode_name.assign(".byte"); 674 m_opcode.SetOpcodeBytes(bytes, inst_size); 675 mnemonic_strm.Printf("0x%2.2x", bytes[0]); 676 for (uint32_t i = 1; i < inst_size; ++i) 677 mnemonic_strm.Printf(" 0x%2.2x", bytes[i]); 678 } 679 break; 680 } 681 m_mnemonics = std::string(mnemonic_strm.GetString()); 682 return; 683 } 684 685 static RegularExpression s_regex( 686 llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?")); 687 688 llvm::SmallVector<llvm::StringRef, 4> matches; 689 if (s_regex.Execute(out_string, &matches)) { 690 m_opcode_name = matches[1].str(); 691 m_mnemonics = matches[2].str(); 692 } 693 matches.clear(); 694 if (s_regex.Execute(markup_out_string, &matches)) { 695 m_markup_opcode_name = matches[1].str(); 696 m_markup_mnemonics = matches[2].str(); 697 } 698 } 699 } 700 } 701 702 bool IsValid() const { return m_is_valid; } 703 704 bool UsingFileAddress() const { return m_using_file_addr; } 705 size_t GetByteSize() const { return m_opcode.GetByteSize(); } 706 707 /// Grants exclusive access to the disassembler and initializes it with the 708 /// given InstructionLLVMC and an optional ExecutionContext. 709 class DisassemblerScope { 710 std::shared_ptr<DisassemblerLLVMC> m_disasm; 711 712 public: 713 explicit DisassemblerScope( 714 InstructionLLVMC &i, 715 const lldb_private::ExecutionContext *exe_ctx = nullptr) 716 : m_disasm(i.m_disasm_wp.lock()) { 717 m_disasm->m_mutex.lock(); 718 m_disasm->m_inst = &i; 719 m_disasm->m_exe_ctx = exe_ctx; 720 } 721 ~DisassemblerScope() { m_disasm->m_mutex.unlock(); } 722 723 /// Evaluates to true if this scope contains a valid disassembler. 724 operator bool() const { return static_cast<bool>(m_disasm); } 725 726 std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; } 727 }; 728 729 static llvm::StringRef::const_iterator 730 ConsumeWhitespace(llvm::StringRef::const_iterator osi, 731 llvm::StringRef::const_iterator ose) { 732 while (osi != ose) { 733 switch (*osi) { 734 default: 735 return osi; 736 case ' ': 737 case '\t': 738 break; 739 } 740 ++osi; 741 } 742 743 return osi; 744 } 745 746 static std::pair<bool, llvm::StringRef::const_iterator> 747 ConsumeChar(llvm::StringRef::const_iterator osi, const char c, 748 llvm::StringRef::const_iterator ose) { 749 bool found = false; 750 751 osi = ConsumeWhitespace(osi, ose); 752 if (osi != ose && *osi == c) { 753 found = true; 754 ++osi; 755 } 756 757 return std::make_pair(found, osi); 758 } 759 760 static std::pair<Operand, llvm::StringRef::const_iterator> 761 ParseRegisterName(llvm::StringRef::const_iterator osi, 762 llvm::StringRef::const_iterator ose) { 763 Operand ret; 764 ret.m_type = Operand::Type::Register; 765 std::string str; 766 767 osi = ConsumeWhitespace(osi, ose); 768 769 while (osi != ose) { 770 if (*osi >= '0' && *osi <= '9') { 771 if (str.empty()) { 772 return std::make_pair(Operand(), osi); 773 } else { 774 str.push_back(*osi); 775 } 776 } else if (*osi >= 'a' && *osi <= 'z') { 777 str.push_back(*osi); 778 } else { 779 switch (*osi) { 780 default: 781 if (str.empty()) { 782 return std::make_pair(Operand(), osi); 783 } else { 784 ret.m_register = ConstString(str); 785 return std::make_pair(ret, osi); 786 } 787 case '%': 788 if (!str.empty()) { 789 return std::make_pair(Operand(), osi); 790 } 791 break; 792 } 793 } 794 ++osi; 795 } 796 797 ret.m_register = ConstString(str); 798 return std::make_pair(ret, osi); 799 } 800 801 static std::pair<Operand, llvm::StringRef::const_iterator> 802 ParseImmediate(llvm::StringRef::const_iterator osi, 803 llvm::StringRef::const_iterator ose) { 804 Operand ret; 805 ret.m_type = Operand::Type::Immediate; 806 std::string str; 807 bool is_hex = false; 808 809 osi = ConsumeWhitespace(osi, ose); 810 811 while (osi != ose) { 812 if (*osi >= '0' && *osi <= '9') { 813 str.push_back(*osi); 814 } else if (*osi >= 'a' && *osi <= 'f') { 815 if (is_hex) { 816 str.push_back(*osi); 817 } else { 818 return std::make_pair(Operand(), osi); 819 } 820 } else { 821 switch (*osi) { 822 default: 823 if (str.empty()) { 824 return std::make_pair(Operand(), osi); 825 } else { 826 ret.m_immediate = strtoull(str.c_str(), nullptr, 0); 827 return std::make_pair(ret, osi); 828 } 829 case 'x': 830 if (str == "0") { 831 is_hex = true; 832 str.push_back(*osi); 833 } else { 834 return std::make_pair(Operand(), osi); 835 } 836 break; 837 case '#': 838 case '$': 839 if (!str.empty()) { 840 return std::make_pair(Operand(), osi); 841 } 842 break; 843 case '-': 844 if (str.empty()) { 845 ret.m_negative = true; 846 } else { 847 return std::make_pair(Operand(), osi); 848 } 849 } 850 } 851 ++osi; 852 } 853 854 ret.m_immediate = strtoull(str.c_str(), nullptr, 0); 855 return std::make_pair(ret, osi); 856 } 857 858 // -0x5(%rax,%rax,2) 859 static std::pair<Operand, llvm::StringRef::const_iterator> 860 ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi, 861 llvm::StringRef::const_iterator ose) { 862 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = 863 ParseImmediate(osi, ose); 864 if (offset_and_iterator.first.IsValid()) { 865 osi = offset_and_iterator.second; 866 } 867 868 bool found = false; 869 std::tie(found, osi) = ConsumeChar(osi, '(', ose); 870 if (!found) { 871 return std::make_pair(Operand(), osi); 872 } 873 874 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 875 ParseRegisterName(osi, ose); 876 if (base_and_iterator.first.IsValid()) { 877 osi = base_and_iterator.second; 878 } else { 879 return std::make_pair(Operand(), osi); 880 } 881 882 std::tie(found, osi) = ConsumeChar(osi, ',', ose); 883 if (!found) { 884 return std::make_pair(Operand(), osi); 885 } 886 887 std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator = 888 ParseRegisterName(osi, ose); 889 if (index_and_iterator.first.IsValid()) { 890 osi = index_and_iterator.second; 891 } else { 892 return std::make_pair(Operand(), osi); 893 } 894 895 std::tie(found, osi) = ConsumeChar(osi, ',', ose); 896 if (!found) { 897 return std::make_pair(Operand(), osi); 898 } 899 900 std::pair<Operand, llvm::StringRef::const_iterator> 901 multiplier_and_iterator = ParseImmediate(osi, ose); 902 if (index_and_iterator.first.IsValid()) { 903 osi = index_and_iterator.second; 904 } else { 905 return std::make_pair(Operand(), osi); 906 } 907 908 std::tie(found, osi) = ConsumeChar(osi, ')', ose); 909 if (!found) { 910 return std::make_pair(Operand(), osi); 911 } 912 913 Operand product; 914 product.m_type = Operand::Type::Product; 915 product.m_children.push_back(index_and_iterator.first); 916 product.m_children.push_back(multiplier_and_iterator.first); 917 918 Operand index; 919 index.m_type = Operand::Type::Sum; 920 index.m_children.push_back(base_and_iterator.first); 921 index.m_children.push_back(product); 922 923 if (offset_and_iterator.first.IsValid()) { 924 Operand offset; 925 offset.m_type = Operand::Type::Sum; 926 offset.m_children.push_back(offset_and_iterator.first); 927 offset.m_children.push_back(index); 928 929 Operand deref; 930 deref.m_type = Operand::Type::Dereference; 931 deref.m_children.push_back(offset); 932 return std::make_pair(deref, osi); 933 } else { 934 Operand deref; 935 deref.m_type = Operand::Type::Dereference; 936 deref.m_children.push_back(index); 937 return std::make_pair(deref, osi); 938 } 939 } 940 941 // -0x10(%rbp) 942 static std::pair<Operand, llvm::StringRef::const_iterator> 943 ParseIntelDerefAccess(llvm::StringRef::const_iterator osi, 944 llvm::StringRef::const_iterator ose) { 945 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = 946 ParseImmediate(osi, ose); 947 if (offset_and_iterator.first.IsValid()) { 948 osi = offset_and_iterator.second; 949 } 950 951 bool found = false; 952 std::tie(found, osi) = ConsumeChar(osi, '(', ose); 953 if (!found) { 954 return std::make_pair(Operand(), osi); 955 } 956 957 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 958 ParseRegisterName(osi, ose); 959 if (base_and_iterator.first.IsValid()) { 960 osi = base_and_iterator.second; 961 } else { 962 return std::make_pair(Operand(), osi); 963 } 964 965 std::tie(found, osi) = ConsumeChar(osi, ')', ose); 966 if (!found) { 967 return std::make_pair(Operand(), osi); 968 } 969 970 if (offset_and_iterator.first.IsValid()) { 971 Operand offset; 972 offset.m_type = Operand::Type::Sum; 973 offset.m_children.push_back(offset_and_iterator.first); 974 offset.m_children.push_back(base_and_iterator.first); 975 976 Operand deref; 977 deref.m_type = Operand::Type::Dereference; 978 deref.m_children.push_back(offset); 979 return std::make_pair(deref, osi); 980 } else { 981 Operand deref; 982 deref.m_type = Operand::Type::Dereference; 983 deref.m_children.push_back(base_and_iterator.first); 984 return std::make_pair(deref, osi); 985 } 986 } 987 988 // [sp, #8]! 989 static std::pair<Operand, llvm::StringRef::const_iterator> 990 ParseARMOffsetAccess(llvm::StringRef::const_iterator osi, 991 llvm::StringRef::const_iterator ose) { 992 bool found = false; 993 std::tie(found, osi) = ConsumeChar(osi, '[', ose); 994 if (!found) { 995 return std::make_pair(Operand(), osi); 996 } 997 998 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 999 ParseRegisterName(osi, ose); 1000 if (base_and_iterator.first.IsValid()) { 1001 osi = base_and_iterator.second; 1002 } else { 1003 return std::make_pair(Operand(), osi); 1004 } 1005 1006 std::tie(found, osi) = ConsumeChar(osi, ',', ose); 1007 if (!found) { 1008 return std::make_pair(Operand(), osi); 1009 } 1010 1011 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = 1012 ParseImmediate(osi, ose); 1013 if (offset_and_iterator.first.IsValid()) { 1014 osi = offset_and_iterator.second; 1015 } 1016 1017 std::tie(found, osi) = ConsumeChar(osi, ']', ose); 1018 if (!found) { 1019 return std::make_pair(Operand(), osi); 1020 } 1021 1022 Operand offset; 1023 offset.m_type = Operand::Type::Sum; 1024 offset.m_children.push_back(offset_and_iterator.first); 1025 offset.m_children.push_back(base_and_iterator.first); 1026 1027 Operand deref; 1028 deref.m_type = Operand::Type::Dereference; 1029 deref.m_children.push_back(offset); 1030 return std::make_pair(deref, osi); 1031 } 1032 1033 // [sp] 1034 static std::pair<Operand, llvm::StringRef::const_iterator> 1035 ParseARMDerefAccess(llvm::StringRef::const_iterator osi, 1036 llvm::StringRef::const_iterator ose) { 1037 bool found = false; 1038 std::tie(found, osi) = ConsumeChar(osi, '[', ose); 1039 if (!found) { 1040 return std::make_pair(Operand(), osi); 1041 } 1042 1043 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 1044 ParseRegisterName(osi, ose); 1045 if (base_and_iterator.first.IsValid()) { 1046 osi = base_and_iterator.second; 1047 } else { 1048 return std::make_pair(Operand(), osi); 1049 } 1050 1051 std::tie(found, osi) = ConsumeChar(osi, ']', ose); 1052 if (!found) { 1053 return std::make_pair(Operand(), osi); 1054 } 1055 1056 Operand deref; 1057 deref.m_type = Operand::Type::Dereference; 1058 deref.m_children.push_back(base_and_iterator.first); 1059 return std::make_pair(deref, osi); 1060 } 1061 1062 static void DumpOperand(const Operand &op, Stream &s) { 1063 switch (op.m_type) { 1064 case Operand::Type::Dereference: 1065 s.PutCString("*"); 1066 DumpOperand(op.m_children[0], s); 1067 break; 1068 case Operand::Type::Immediate: 1069 if (op.m_negative) { 1070 s.PutCString("-"); 1071 } 1072 s.PutCString(llvm::to_string(op.m_immediate)); 1073 break; 1074 case Operand::Type::Invalid: 1075 s.PutCString("Invalid"); 1076 break; 1077 case Operand::Type::Product: 1078 s.PutCString("("); 1079 DumpOperand(op.m_children[0], s); 1080 s.PutCString("*"); 1081 DumpOperand(op.m_children[1], s); 1082 s.PutCString(")"); 1083 break; 1084 case Operand::Type::Register: 1085 s.PutCString(op.m_register.GetStringRef()); 1086 break; 1087 case Operand::Type::Sum: 1088 s.PutCString("("); 1089 DumpOperand(op.m_children[0], s); 1090 s.PutCString("+"); 1091 DumpOperand(op.m_children[1], s); 1092 s.PutCString(")"); 1093 break; 1094 } 1095 } 1096 1097 bool ParseOperands( 1098 llvm::SmallVectorImpl<Instruction::Operand> &operands) override { 1099 const char *operands_string = GetOperands(nullptr); 1100 1101 if (!operands_string) { 1102 return false; 1103 } 1104 1105 llvm::StringRef operands_ref(operands_string); 1106 1107 llvm::StringRef::const_iterator osi = operands_ref.begin(); 1108 llvm::StringRef::const_iterator ose = operands_ref.end(); 1109 1110 while (osi != ose) { 1111 Operand operand; 1112 llvm::StringRef::const_iterator iter; 1113 1114 if ((std::tie(operand, iter) = ParseIntelIndexedAccess(osi, ose), 1115 operand.IsValid()) || 1116 (std::tie(operand, iter) = ParseIntelDerefAccess(osi, ose), 1117 operand.IsValid()) || 1118 (std::tie(operand, iter) = ParseARMOffsetAccess(osi, ose), 1119 operand.IsValid()) || 1120 (std::tie(operand, iter) = ParseARMDerefAccess(osi, ose), 1121 operand.IsValid()) || 1122 (std::tie(operand, iter) = ParseRegisterName(osi, ose), 1123 operand.IsValid()) || 1124 (std::tie(operand, iter) = ParseImmediate(osi, ose), 1125 operand.IsValid())) { 1126 osi = iter; 1127 operands.push_back(operand); 1128 } else { 1129 return false; 1130 } 1131 1132 std::pair<bool, llvm::StringRef::const_iterator> found_and_iter = 1133 ConsumeChar(osi, ',', ose); 1134 if (found_and_iter.first) { 1135 osi = found_and_iter.second; 1136 } 1137 1138 osi = ConsumeWhitespace(osi, ose); 1139 } 1140 1141 DisassemblerSP disasm_sp = m_disasm_wp.lock(); 1142 1143 if (disasm_sp && operands.size() > 1) { 1144 // TODO tie this into the MC Disassembler's notion of clobbers. 1145 switch (disasm_sp->GetArchitecture().GetMachine()) { 1146 default: 1147 break; 1148 case llvm::Triple::x86: 1149 case llvm::Triple::x86_64: 1150 operands[operands.size() - 1].m_clobbered = true; 1151 break; 1152 case llvm::Triple::arm: 1153 operands[0].m_clobbered = true; 1154 break; 1155 } 1156 } 1157 1158 if (Log *log = GetLog(LLDBLog::Process | LLDBLog::Disassembler)) { 1159 StreamString ss; 1160 1161 ss.Printf("[%s] expands to %zu operands:\n", operands_string, 1162 operands.size()); 1163 for (const Operand &operand : operands) { 1164 ss.PutCString(" "); 1165 DumpOperand(operand, ss); 1166 ss.PutCString("\n"); 1167 } 1168 1169 log->PutString(ss.GetString()); 1170 } 1171 1172 return true; 1173 } 1174 1175 bool IsCall() override { 1176 VisitInstruction(); 1177 return m_is_call; 1178 } 1179 1180 protected: 1181 std::weak_ptr<DisassemblerLLVMC> m_disasm_wp; 1182 1183 bool m_is_valid = false; 1184 bool m_using_file_addr = false; 1185 bool m_has_visited_instruction = false; 1186 1187 // Be conservative. If we didn't understand the instruction, say it: 1188 // - Might branch 1189 // - Does not have a delay slot 1190 // - Is not a call 1191 // - Is not a load 1192 // - Is not an authenticated instruction 1193 bool m_does_branch = true; 1194 bool m_has_delay_slot = false; 1195 bool m_is_call = false; 1196 bool m_is_load = false; 1197 bool m_is_authenticated = false; 1198 1199 void VisitInstruction() { 1200 if (m_has_visited_instruction) 1201 return; 1202 1203 DisassemblerScope disasm(*this); 1204 if (!disasm) 1205 return; 1206 1207 DataExtractor data; 1208 if (!m_opcode.GetData(data)) 1209 return; 1210 1211 bool is_alternate_isa; 1212 lldb::addr_t pc = m_address.GetFileAddress(); 1213 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = 1214 GetDisasmToUse(is_alternate_isa, disasm); 1215 const uint8_t *opcode_data = data.GetDataStart(); 1216 const size_t opcode_data_len = data.GetByteSize(); 1217 llvm::MCInst inst; 1218 uint64_t inst_size = 0; 1219 const bool valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, 1220 pc, inst, inst_size); 1221 if (!valid) 1222 return; 1223 1224 m_has_visited_instruction = true; 1225 m_does_branch = mc_disasm_ptr->CanBranch(inst); 1226 m_has_delay_slot = mc_disasm_ptr->HasDelaySlot(inst); 1227 m_is_call = mc_disasm_ptr->IsCall(inst); 1228 m_is_load = mc_disasm_ptr->IsLoad(inst); 1229 m_is_authenticated = mc_disasm_ptr->IsAuthenticated(inst); 1230 } 1231 1232 private: 1233 DisassemblerLLVMC::MCDisasmInstance * 1234 GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) { 1235 is_alternate_isa = false; 1236 if (disasm) { 1237 if (disasm->m_alternate_disasm_up) { 1238 const AddressClass address_class = GetAddressClass(); 1239 1240 if (address_class == AddressClass::eCodeAlternateISA) { 1241 is_alternate_isa = true; 1242 return disasm->m_alternate_disasm_up.get(); 1243 } 1244 } 1245 return disasm->m_disasm_up.get(); 1246 } 1247 return nullptr; 1248 } 1249 }; 1250 1251 std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance> 1252 DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu, 1253 const char *features_str, 1254 unsigned flavor, 1255 DisassemblerLLVMC &owner) { 1256 using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>; 1257 1258 std::string Status; 1259 const llvm::Target *curr_target = 1260 llvm::TargetRegistry::lookupTarget(triple, Status); 1261 if (!curr_target) 1262 return Instance(); 1263 1264 std::unique_ptr<llvm::MCInstrInfo> instr_info_up( 1265 curr_target->createMCInstrInfo()); 1266 if (!instr_info_up) 1267 return Instance(); 1268 1269 std::unique_ptr<llvm::MCRegisterInfo> reg_info_up( 1270 curr_target->createMCRegInfo(triple)); 1271 if (!reg_info_up) 1272 return Instance(); 1273 1274 std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up( 1275 curr_target->createMCSubtargetInfo(triple, cpu, features_str)); 1276 if (!subtarget_info_up) 1277 return Instance(); 1278 1279 llvm::MCTargetOptions MCOptions; 1280 std::unique_ptr<llvm::MCAsmInfo> asm_info_up( 1281 curr_target->createMCAsmInfo(*reg_info_up, triple, MCOptions)); 1282 if (!asm_info_up) 1283 return Instance(); 1284 1285 std::unique_ptr<llvm::MCContext> context_up( 1286 new llvm::MCContext(llvm::Triple(triple), asm_info_up.get(), 1287 reg_info_up.get(), subtarget_info_up.get())); 1288 if (!context_up) 1289 return Instance(); 1290 1291 std::unique_ptr<llvm::MCDisassembler> disasm_up( 1292 curr_target->createMCDisassembler(*subtarget_info_up, *context_up)); 1293 if (!disasm_up) 1294 return Instance(); 1295 1296 std::unique_ptr<llvm::MCRelocationInfo> rel_info_up( 1297 curr_target->createMCRelocationInfo(triple, *context_up)); 1298 if (!rel_info_up) 1299 return Instance(); 1300 1301 std::unique_ptr<llvm::MCSymbolizer> symbolizer_up( 1302 curr_target->createMCSymbolizer( 1303 triple, nullptr, DisassemblerLLVMC::SymbolLookupCallback, &owner, 1304 context_up.get(), std::move(rel_info_up))); 1305 disasm_up->setSymbolizer(std::move(symbolizer_up)); 1306 1307 unsigned asm_printer_variant = 1308 flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor; 1309 1310 std::unique_ptr<llvm::MCInstPrinter> instr_printer_up( 1311 curr_target->createMCInstPrinter(llvm::Triple{triple}, 1312 asm_printer_variant, *asm_info_up, 1313 *instr_info_up, *reg_info_up)); 1314 if (!instr_printer_up) 1315 return Instance(); 1316 1317 instr_printer_up->setPrintBranchImmAsAddress(true); 1318 1319 // Not all targets may have registered createMCInstrAnalysis(). 1320 std::unique_ptr<llvm::MCInstrAnalysis> instr_analysis_up( 1321 curr_target->createMCInstrAnalysis(instr_info_up.get())); 1322 1323 return Instance(new MCDisasmInstance( 1324 std::move(instr_info_up), std::move(reg_info_up), 1325 std::move(subtarget_info_up), std::move(asm_info_up), 1326 std::move(context_up), std::move(disasm_up), std::move(instr_printer_up), 1327 std::move(instr_analysis_up))); 1328 } 1329 1330 DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance( 1331 std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, 1332 std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, 1333 std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, 1334 std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, 1335 std::unique_ptr<llvm::MCContext> &&context_up, 1336 std::unique_ptr<llvm::MCDisassembler> &&disasm_up, 1337 std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up, 1338 std::unique_ptr<llvm::MCInstrAnalysis> &&instr_analysis_up) 1339 : m_instr_info_up(std::move(instr_info_up)), 1340 m_reg_info_up(std::move(reg_info_up)), 1341 m_subtarget_info_up(std::move(subtarget_info_up)), 1342 m_asm_info_up(std::move(asm_info_up)), 1343 m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)), 1344 m_instr_printer_up(std::move(instr_printer_up)), 1345 m_instr_analysis_up(std::move(instr_analysis_up)) { 1346 assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up && 1347 m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up); 1348 } 1349 1350 bool DisassemblerLLVMC::MCDisasmInstance::GetMCInst(const uint8_t *opcode_data, 1351 size_t opcode_data_len, 1352 lldb::addr_t pc, 1353 llvm::MCInst &mc_inst, 1354 uint64_t &size) const { 1355 llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len); 1356 llvm::MCDisassembler::DecodeStatus status; 1357 1358 status = m_disasm_up->getInstruction(mc_inst, size, data, pc, llvm::nulls()); 1359 if (status == llvm::MCDisassembler::Success) 1360 return true; 1361 else 1362 return false; 1363 } 1364 1365 void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst( 1366 llvm::MCInst &mc_inst, lldb::addr_t pc, std::string &inst_string, 1367 std::string &comments_string) { 1368 llvm::raw_string_ostream inst_stream(inst_string); 1369 llvm::raw_string_ostream comments_stream(comments_string); 1370 1371 inst_stream.enable_colors(m_instr_printer_up->getUseColor()); 1372 m_instr_printer_up->setCommentStream(comments_stream); 1373 m_instr_printer_up->printInst(&mc_inst, pc, llvm::StringRef(), 1374 *m_subtarget_info_up, inst_stream); 1375 m_instr_printer_up->setCommentStream(llvm::nulls()); 1376 1377 static std::string g_newlines("\r\n"); 1378 1379 for (size_t newline_pos = 0; 1380 (newline_pos = comments_string.find_first_of(g_newlines, newline_pos)) != 1381 comments_string.npos; 1382 /**/) { 1383 comments_string.replace(comments_string.begin() + newline_pos, 1384 comments_string.begin() + newline_pos + 1, 1, ' '); 1385 } 1386 } 1387 1388 void DisassemblerLLVMC::MCDisasmInstance::SetStyle( 1389 bool use_hex_immed, HexImmediateStyle hex_style) { 1390 m_instr_printer_up->setPrintImmHex(use_hex_immed); 1391 switch (hex_style) { 1392 case eHexStyleC: 1393 m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C); 1394 break; 1395 case eHexStyleAsm: 1396 m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm); 1397 break; 1398 } 1399 } 1400 1401 void DisassemblerLLVMC::MCDisasmInstance::SetUseColor(bool use_color) { 1402 m_instr_printer_up->setUseColor(use_color); 1403 } 1404 1405 bool DisassemblerLLVMC::MCDisasmInstance::GetUseColor() const { 1406 return m_instr_printer_up->getUseColor(); 1407 } 1408 1409 bool DisassemblerLLVMC::MCDisasmInstance::CanBranch( 1410 llvm::MCInst &mc_inst) const { 1411 if (m_instr_analysis_up) 1412 return m_instr_analysis_up->mayAffectControlFlow(mc_inst, *m_reg_info_up); 1413 return m_instr_info_up->get(mc_inst.getOpcode()) 1414 .mayAffectControlFlow(mc_inst, *m_reg_info_up); 1415 } 1416 1417 bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot( 1418 llvm::MCInst &mc_inst) const { 1419 return m_instr_info_up->get(mc_inst.getOpcode()).hasDelaySlot(); 1420 } 1421 1422 bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const { 1423 if (m_instr_analysis_up) 1424 return m_instr_analysis_up->isCall(mc_inst); 1425 return m_instr_info_up->get(mc_inst.getOpcode()).isCall(); 1426 } 1427 1428 bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst &mc_inst) const { 1429 return m_instr_info_up->get(mc_inst.getOpcode()).mayLoad(); 1430 } 1431 1432 bool DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated( 1433 llvm::MCInst &mc_inst) const { 1434 const auto &InstrDesc = m_instr_info_up->get(mc_inst.getOpcode()); 1435 1436 // Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc4 1437 // == 'a' + 'c') as authenticated instructions for reporting purposes, in 1438 // addition to the standard authenticated instructions specified in ARMv8.3. 1439 bool IsBrkC47x = false; 1440 if (InstrDesc.isTrap() && mc_inst.getNumOperands() == 1) { 1441 const llvm::MCOperand &Op0 = mc_inst.getOperand(0); 1442 if (Op0.isImm() && Op0.getImm() >= 0xc470 && Op0.getImm() <= 0xc474) 1443 IsBrkC47x = true; 1444 } 1445 1446 return InstrDesc.isAuthenticated() || IsBrkC47x; 1447 } 1448 1449 DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch, 1450 const char *flavor_string, 1451 const char *cpu_string, 1452 const char *features_string) 1453 : Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr), 1454 m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS), 1455 m_adrp_insn() { 1456 if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) { 1457 m_flavor.assign("default"); 1458 } 1459 1460 const bool cpu_or_features_overriden = cpu_string || features_string; 1461 unsigned flavor = ~0U; 1462 llvm::Triple triple = arch.GetTriple(); 1463 1464 // So far the only supported flavor is "intel" on x86. The base class will 1465 // set this correctly coming in. 1466 if (triple.getArch() == llvm::Triple::x86 || 1467 triple.getArch() == llvm::Triple::x86_64) { 1468 if (m_flavor == "intel") { 1469 flavor = 1; 1470 } else if (m_flavor == "att") { 1471 flavor = 0; 1472 } 1473 } 1474 1475 ArchSpec thumb_arch(arch); 1476 if (triple.getArch() == llvm::Triple::arm) { 1477 std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str()); 1478 // Replace "arm" with "thumb" so we get all thumb variants correct 1479 if (thumb_arch_name.size() > 3) { 1480 thumb_arch_name.erase(0, 3); 1481 thumb_arch_name.insert(0, "thumb"); 1482 } else { 1483 thumb_arch_name = "thumbv9.3a"; 1484 } 1485 thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name)); 1486 } 1487 1488 // If no sub architecture specified then use the most recent arm architecture 1489 // so the disassembler will return all instructions. Without it we will see a 1490 // lot of unknown opcodes if the code uses instructions which are not 1491 // available in the oldest arm version (which is used when no sub architecture 1492 // is specified). 1493 if (triple.getArch() == llvm::Triple::arm && 1494 triple.getSubArch() == llvm::Triple::NoSubArch) 1495 triple.setArchName("armv9.3a"); 1496 1497 std::string features_str = 1498 features_string ? std::string(features_string) : ""; 1499 const char *triple_str = triple.getTriple().c_str(); 1500 1501 // ARM Cortex M0-M7 devices only execute thumb instructions 1502 if (arch.IsAlwaysThumbInstructions()) { 1503 triple_str = thumb_arch.GetTriple().getTriple().c_str(); 1504 if (!features_string) 1505 features_str += "+fp-armv8,"; 1506 } 1507 1508 const char *cpu = cpu_string; 1509 1510 if (!cpu_or_features_overriden) { 1511 switch (arch.GetCore()) { 1512 case ArchSpec::eCore_mips32: 1513 case ArchSpec::eCore_mips32el: 1514 cpu = "mips32"; 1515 break; 1516 case ArchSpec::eCore_mips32r2: 1517 case ArchSpec::eCore_mips32r2el: 1518 cpu = "mips32r2"; 1519 break; 1520 case ArchSpec::eCore_mips32r3: 1521 case ArchSpec::eCore_mips32r3el: 1522 cpu = "mips32r3"; 1523 break; 1524 case ArchSpec::eCore_mips32r5: 1525 case ArchSpec::eCore_mips32r5el: 1526 cpu = "mips32r5"; 1527 break; 1528 case ArchSpec::eCore_mips32r6: 1529 case ArchSpec::eCore_mips32r6el: 1530 cpu = "mips32r6"; 1531 break; 1532 case ArchSpec::eCore_mips64: 1533 case ArchSpec::eCore_mips64el: 1534 cpu = "mips64"; 1535 break; 1536 case ArchSpec::eCore_mips64r2: 1537 case ArchSpec::eCore_mips64r2el: 1538 cpu = "mips64r2"; 1539 break; 1540 case ArchSpec::eCore_mips64r3: 1541 case ArchSpec::eCore_mips64r3el: 1542 cpu = "mips64r3"; 1543 break; 1544 case ArchSpec::eCore_mips64r5: 1545 case ArchSpec::eCore_mips64r5el: 1546 cpu = "mips64r5"; 1547 break; 1548 case ArchSpec::eCore_mips64r6: 1549 case ArchSpec::eCore_mips64r6el: 1550 cpu = "mips64r6"; 1551 break; 1552 default: 1553 cpu = ""; 1554 break; 1555 } 1556 } 1557 1558 if (arch.IsMIPS() && !cpu_or_features_overriden) { 1559 uint32_t arch_flags = arch.GetFlags(); 1560 if (arch_flags & ArchSpec::eMIPSAse_msa) 1561 features_str += "+msa,"; 1562 if (arch_flags & ArchSpec::eMIPSAse_dsp) 1563 features_str += "+dsp,"; 1564 if (arch_flags & ArchSpec::eMIPSAse_dspr2) 1565 features_str += "+dspr2,"; 1566 } 1567 1568 // If any AArch64 variant, enable latest ISA with all extensions unless the 1569 // CPU or features were overridden. 1570 if (triple.isAArch64() && !cpu_or_features_overriden) { 1571 features_str += "+all,"; 1572 if (triple.getVendor() == llvm::Triple::Apple) 1573 cpu = "apple-latest"; 1574 } 1575 1576 if (triple.isRISCV() && !cpu_or_features_overriden) { 1577 uint32_t arch_flags = arch.GetFlags(); 1578 if (arch_flags & ArchSpec::eRISCV_rvc) 1579 features_str += "+c,"; 1580 if (arch_flags & ArchSpec::eRISCV_rve) 1581 features_str += "+e,"; 1582 if ((arch_flags & ArchSpec::eRISCV_float_abi_single) == 1583 ArchSpec::eRISCV_float_abi_single) 1584 features_str += "+f,"; 1585 if ((arch_flags & ArchSpec::eRISCV_float_abi_double) == 1586 ArchSpec::eRISCV_float_abi_double) 1587 features_str += "+f,+d,"; 1588 if ((arch_flags & ArchSpec::eRISCV_float_abi_quad) == 1589 ArchSpec::eRISCV_float_abi_quad) 1590 features_str += "+f,+d,+q,"; 1591 // FIXME: how do we detect features such as `+a`, `+m`? 1592 // Turn them on by default now, since everyone seems to use them 1593 features_str += "+a,+m,"; 1594 } 1595 1596 // We use m_disasm_up.get() to tell whether we are valid or not, so if this 1597 // isn't good for some reason, we won't be valid and FindPlugin will fail and 1598 // we won't get used. 1599 m_disasm_up = MCDisasmInstance::Create(triple_str, cpu, features_str.c_str(), 1600 flavor, *this); 1601 1602 llvm::Triple::ArchType llvm_arch = triple.getArch(); 1603 1604 // For arm CPUs that can execute arm or thumb instructions, also create a 1605 // thumb instruction disassembler. 1606 if (llvm_arch == llvm::Triple::arm) { 1607 std::string thumb_triple(thumb_arch.GetTriple().getTriple()); 1608 m_alternate_disasm_up = 1609 MCDisasmInstance::Create(thumb_triple.c_str(), "", features_str.c_str(), 1610 flavor, *this); 1611 if (!m_alternate_disasm_up) 1612 m_disasm_up.reset(); 1613 1614 } else if (arch.IsMIPS()) { 1615 /* Create alternate disassembler for MIPS16 and microMIPS */ 1616 uint32_t arch_flags = arch.GetFlags(); 1617 if (arch_flags & ArchSpec::eMIPSAse_mips16) 1618 features_str += "+mips16,"; 1619 else if (arch_flags & ArchSpec::eMIPSAse_micromips) 1620 features_str += "+micromips,"; 1621 1622 m_alternate_disasm_up = MCDisasmInstance::Create( 1623 triple_str, cpu, features_str.c_str(), flavor, *this); 1624 if (!m_alternate_disasm_up) 1625 m_disasm_up.reset(); 1626 } 1627 } 1628 1629 DisassemblerLLVMC::~DisassemblerLLVMC() = default; 1630 1631 lldb::DisassemblerSP DisassemblerLLVMC::CreateInstance(const ArchSpec &arch, 1632 const char *flavor, 1633 const char *cpu, 1634 const char *features) { 1635 if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) { 1636 auto disasm_sp = 1637 std::make_shared<DisassemblerLLVMC>(arch, flavor, cpu, features); 1638 if (disasm_sp && disasm_sp->IsValid()) 1639 return disasm_sp; 1640 } 1641 return lldb::DisassemblerSP(); 1642 } 1643 1644 size_t DisassemblerLLVMC::DecodeInstructions(const Address &base_addr, 1645 const DataExtractor &data, 1646 lldb::offset_t data_offset, 1647 size_t num_instructions, 1648 bool append, bool data_from_file) { 1649 if (!append) 1650 m_instruction_list.Clear(); 1651 1652 if (!IsValid()) 1653 return 0; 1654 1655 m_data_from_file = data_from_file; 1656 uint32_t data_cursor = data_offset; 1657 const size_t data_byte_size = data.GetByteSize(); 1658 uint32_t instructions_parsed = 0; 1659 Address inst_addr(base_addr); 1660 1661 while (data_cursor < data_byte_size && 1662 instructions_parsed < num_instructions) { 1663 1664 AddressClass address_class = AddressClass::eCode; 1665 1666 if (m_alternate_disasm_up) 1667 address_class = inst_addr.GetAddressClass(); 1668 1669 InstructionSP inst_sp( 1670 new InstructionLLVMC(*this, inst_addr, address_class)); 1671 1672 if (!inst_sp) 1673 break; 1674 1675 uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor); 1676 1677 if (inst_size == 0) 1678 break; 1679 1680 m_instruction_list.Append(inst_sp); 1681 data_cursor += inst_size; 1682 inst_addr.Slide(inst_size); 1683 instructions_parsed++; 1684 } 1685 1686 return data_cursor - data_offset; 1687 } 1688 1689 void DisassemblerLLVMC::Initialize() { 1690 PluginManager::RegisterPlugin(GetPluginNameStatic(), 1691 "Disassembler that uses LLVM MC to disassemble " 1692 "i386, x86_64, ARM, and ARM64.", 1693 CreateInstance); 1694 1695 llvm::InitializeAllTargetInfos(); 1696 llvm::InitializeAllTargetMCs(); 1697 llvm::InitializeAllAsmParsers(); 1698 llvm::InitializeAllDisassemblers(); 1699 } 1700 1701 void DisassemblerLLVMC::Terminate() { 1702 PluginManager::UnregisterPlugin(CreateInstance); 1703 } 1704 1705 int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc, 1706 uint64_t offset, uint64_t size, 1707 int tag_type, void *tag_bug) { 1708 return static_cast<DisassemblerLLVMC *>(disassembler) 1709 ->OpInfo(pc, offset, size, tag_type, tag_bug); 1710 } 1711 1712 const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler, 1713 uint64_t value, 1714 uint64_t *type, uint64_t pc, 1715 const char **name) { 1716 return static_cast<DisassemblerLLVMC *>(disassembler) 1717 ->SymbolLookup(value, type, pc, name); 1718 } 1719 1720 bool DisassemblerLLVMC::FlavorValidForArchSpec( 1721 const lldb_private::ArchSpec &arch, const char *flavor) { 1722 llvm::Triple triple = arch.GetTriple(); 1723 if (flavor == nullptr || strcmp(flavor, "default") == 0) 1724 return true; 1725 1726 if (triple.getArch() == llvm::Triple::x86 || 1727 triple.getArch() == llvm::Triple::x86_64) { 1728 return strcmp(flavor, "intel") == 0 || strcmp(flavor, "att") == 0; 1729 } else 1730 return false; 1731 } 1732 1733 bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); } 1734 1735 int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size, 1736 int tag_type, void *tag_bug) { 1737 switch (tag_type) { 1738 default: 1739 break; 1740 case 1: 1741 memset(tag_bug, 0, sizeof(::LLVMOpInfo1)); 1742 break; 1743 } 1744 return 0; 1745 } 1746 1747 const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr, 1748 uint64_t pc, const char **name) { 1749 if (*type_ptr) { 1750 if (m_exe_ctx && m_inst) { 1751 // std::string remove_this_prior_to_checkin; 1752 Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr; 1753 Address value_so_addr; 1754 Address pc_so_addr; 1755 if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 || 1756 target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be || 1757 target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) { 1758 if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) { 1759 m_adrp_address = pc; 1760 m_adrp_insn = value; 1761 *name = nullptr; 1762 *type_ptr = LLVMDisassembler_ReferenceType_InOut_None; 1763 return nullptr; 1764 } 1765 // If this instruction is an ADD and 1766 // the previous instruction was an ADRP and 1767 // the ADRP's register and this ADD's register are the same, 1768 // then this is a pc-relative address calculation. 1769 if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri && 1770 m_adrp_insn && m_adrp_address == pc - 4 && 1771 (*m_adrp_insn & 0x1f) == ((value >> 5) & 0x1f)) { 1772 uint32_t addxri_inst; 1773 uint64_t adrp_imm, addxri_imm; 1774 // Get immlo and immhi bits, OR them together to get the ADRP imm 1775 // value. 1776 adrp_imm = 1777 ((*m_adrp_insn & 0x00ffffe0) >> 3) | ((*m_adrp_insn >> 29) & 0x3); 1778 // if high bit of immhi after right-shifting set, sign extend 1779 if (adrp_imm & (1ULL << 20)) 1780 adrp_imm |= ~((1ULL << 21) - 1); 1781 1782 addxri_inst = value; 1783 addxri_imm = (addxri_inst >> 10) & 0xfff; 1784 // check if 'sh' bit is set, shift imm value up if so 1785 // (this would make no sense, ADRP already gave us this part) 1786 if ((addxri_inst >> (12 + 5 + 5)) & 1) 1787 addxri_imm <<= 12; 1788 value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) + 1789 addxri_imm; 1790 } 1791 m_adrp_address = LLDB_INVALID_ADDRESS; 1792 m_adrp_insn.reset(); 1793 } 1794 1795 if (m_inst->UsingFileAddress()) { 1796 ModuleSP module_sp(m_inst->GetAddress().GetModule()); 1797 if (module_sp) { 1798 module_sp->ResolveFileAddress(value, value_so_addr); 1799 module_sp->ResolveFileAddress(pc, pc_so_addr); 1800 } 1801 } else if (target && target->HasLoadedSections()) { 1802 target->ResolveLoadAddress(value, value_so_addr); 1803 target->ResolveLoadAddress(pc, pc_so_addr); 1804 } 1805 1806 SymbolContext sym_ctx; 1807 const SymbolContextItem resolve_scope = 1808 eSymbolContextFunction | eSymbolContextSymbol; 1809 if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) { 1810 pc_so_addr.GetModule()->ResolveSymbolContextForAddress( 1811 pc_so_addr, resolve_scope, sym_ctx); 1812 } 1813 1814 if (value_so_addr.IsValid() && value_so_addr.GetSection()) { 1815 StreamString ss; 1816 1817 bool format_omitting_current_func_name = false; 1818 if (sym_ctx.symbol || sym_ctx.function) { 1819 AddressRange range; 1820 for (uint32_t idx = 0; 1821 sym_ctx.GetAddressRange(resolve_scope, idx, false, range); 1822 ++idx) { 1823 if (range.ContainsLoadAddress(value_so_addr, target)) { 1824 format_omitting_current_func_name = true; 1825 break; 1826 } 1827 } 1828 } 1829 1830 // If the "value" address (the target address we're symbolicating) is 1831 // inside the same SymbolContext as the current instruction pc 1832 // (pc_so_addr), don't print the full function name - just print it 1833 // with DumpStyleNoFunctionName style, e.g. "<+36>". 1834 if (format_omitting_current_func_name) { 1835 value_so_addr.Dump(&ss, target, Address::DumpStyleNoFunctionName, 1836 Address::DumpStyleSectionNameOffset); 1837 } else { 1838 value_so_addr.Dump( 1839 &ss, target, 1840 Address::DumpStyleResolvedDescriptionNoFunctionArguments, 1841 Address::DumpStyleSectionNameOffset); 1842 } 1843 1844 if (!ss.GetString().empty()) { 1845 // If Address::Dump returned a multi-line description, most commonly 1846 // seen when we have multiple levels of inlined functions at an 1847 // address, only show the first line. 1848 std::string str = std::string(ss.GetString()); 1849 size_t first_eol_char = str.find_first_of("\r\n"); 1850 if (first_eol_char != std::string::npos) { 1851 str.erase(first_eol_char); 1852 } 1853 m_inst->AppendComment(str); 1854 } 1855 } 1856 } 1857 } 1858 1859 // TODO: llvm-objdump sets the type_ptr to the 1860 // LLVMDisassembler_ReferenceType_Out_* values 1861 // based on where value_so_addr is pointing, with 1862 // Mach-O specific augmentations in MachODump.cpp. e.g. 1863 // see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand 1864 // handles. 1865 *type_ptr = LLVMDisassembler_ReferenceType_InOut_None; 1866 *name = nullptr; 1867 return nullptr; 1868 } 1869