xref: /freebsd/contrib/llvm-project/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp (revision 069ac18495ad8fde2748bc94b0f80a50250bb01d)
1 //===-- DisassemblerLLVMC.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "DisassemblerLLVMC.h"
10 
11 #include "llvm-c/Disassembler.h"
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/ADT/StringExtras.h"
14 #include "llvm/MC/MCAsmInfo.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
17 #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
18 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
19 #include "llvm/MC/MCInst.h"
20 #include "llvm/MC/MCInstPrinter.h"
21 #include "llvm/MC/MCInstrInfo.h"
22 #include "llvm/MC/MCRegisterInfo.h"
23 #include "llvm/MC/MCSubtargetInfo.h"
24 #include "llvm/MC/MCTargetOptions.h"
25 #include "llvm/MC/TargetRegistry.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include "llvm/Support/ScopedPrinter.h"
28 #include "llvm/Support/TargetSelect.h"
29 #include "llvm/TargetParser/AArch64TargetParser.h"
30 
31 #include "lldb/Core/Address.h"
32 #include "lldb/Core/Module.h"
33 #include "lldb/Symbol/SymbolContext.h"
34 #include "lldb/Target/ExecutionContext.h"
35 #include "lldb/Target/Process.h"
36 #include "lldb/Target/RegisterContext.h"
37 #include "lldb/Target/SectionLoadList.h"
38 #include "lldb/Target/StackFrame.h"
39 #include "lldb/Target/Target.h"
40 #include "lldb/Utility/DataExtractor.h"
41 #include "lldb/Utility/LLDBLog.h"
42 #include "lldb/Utility/Log.h"
43 #include "lldb/Utility/RegularExpression.h"
44 #include "lldb/Utility/Stream.h"
45 #include <optional>
46 
47 using namespace lldb;
48 using namespace lldb_private;
49 
50 LLDB_PLUGIN_DEFINE(DisassemblerLLVMC)
51 
52 class DisassemblerLLVMC::MCDisasmInstance {
53 public:
54   static std::unique_ptr<MCDisasmInstance>
55   Create(const char *triple, const char *cpu, const char *features_str,
56          unsigned flavor, DisassemblerLLVMC &owner);
57 
58   ~MCDisasmInstance() = default;
59 
60   uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len,
61                      lldb::addr_t pc, llvm::MCInst &mc_inst) const;
62   void PrintMCInst(llvm::MCInst &mc_inst, std::string &inst_string,
63                    std::string &comments_string);
64   void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style);
65   bool CanBranch(llvm::MCInst &mc_inst) const;
66   bool HasDelaySlot(llvm::MCInst &mc_inst) const;
67   bool IsCall(llvm::MCInst &mc_inst) const;
68   bool IsLoad(llvm::MCInst &mc_inst) const;
69   bool IsAuthenticated(llvm::MCInst &mc_inst) const;
70 
71 private:
72   MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
73                    std::unique_ptr<llvm::MCRegisterInfo> &&reg_info_up,
74                    std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,
75                    std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,
76                    std::unique_ptr<llvm::MCContext> &&context_up,
77                    std::unique_ptr<llvm::MCDisassembler> &&disasm_up,
78                    std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up);
79 
80   std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up;
81   std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up;
82   std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up;
83   std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up;
84   std::unique_ptr<llvm::MCContext> m_context_up;
85   std::unique_ptr<llvm::MCDisassembler> m_disasm_up;
86   std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up;
87 };
88 
89 namespace x86 {
90 
91 /// These are the three values deciding instruction control flow kind.
92 /// InstructionLengthDecode function decodes an instruction and get this struct.
93 ///
94 /// primary_opcode
95 ///    Primary opcode of the instruction.
96 ///    For one-byte opcode instruction, it's the first byte after prefix.
97 ///    For two- and three-byte opcodes, it's the second byte.
98 ///
99 /// opcode_len
100 ///    The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3.
101 ///
102 /// modrm
103 ///    ModR/M byte of the instruction.
104 ///    Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0]
105 ///    may contain a register or specify an addressing mode, depending on MOD.
106 struct InstructionOpcodeAndModrm {
107   uint8_t primary_opcode;
108   uint8_t opcode_len;
109   uint8_t modrm;
110 };
111 
112 /// Determine the InstructionControlFlowKind based on opcode and modrm bytes.
113 /// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and
114 /// instruction set.
115 ///
116 /// \param[in] opcode_and_modrm
117 ///    Contains primary_opcode byte, its length, and ModR/M byte.
118 ///    Refer to the struct InstructionOpcodeAndModrm for details.
119 ///
120 /// \return
121 ///   The control flow kind of the instruction or
122 ///   eInstructionControlFlowKindOther if the instruction doesn't affect
123 ///   the control flow of the program.
124 lldb::InstructionControlFlowKind
125 MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) {
126   uint8_t opcode = opcode_and_modrm.primary_opcode;
127   uint8_t opcode_len = opcode_and_modrm.opcode_len;
128   uint8_t modrm = opcode_and_modrm.modrm;
129 
130   if (opcode_len > 2)
131     return lldb::eInstructionControlFlowKindOther;
132 
133   if (opcode >= 0x70 && opcode <= 0x7F) {
134     if (opcode_len == 1)
135       return lldb::eInstructionControlFlowKindCondJump;
136     else
137       return lldb::eInstructionControlFlowKindOther;
138   }
139 
140   if (opcode >= 0x80 && opcode <= 0x8F) {
141     if (opcode_len == 2)
142       return lldb::eInstructionControlFlowKindCondJump;
143     else
144       return lldb::eInstructionControlFlowKindOther;
145   }
146 
147   switch (opcode) {
148   case 0x9A:
149     if (opcode_len == 1)
150       return lldb::eInstructionControlFlowKindFarCall;
151     break;
152   case 0xFF:
153     if (opcode_len == 1) {
154       uint8_t modrm_reg = (modrm >> 3) & 7;
155       if (modrm_reg == 2)
156         return lldb::eInstructionControlFlowKindCall;
157       else if (modrm_reg == 3)
158         return lldb::eInstructionControlFlowKindFarCall;
159       else if (modrm_reg == 4)
160         return lldb::eInstructionControlFlowKindJump;
161       else if (modrm_reg == 5)
162         return lldb::eInstructionControlFlowKindFarJump;
163     }
164     break;
165   case 0xE8:
166     if (opcode_len == 1)
167       return lldb::eInstructionControlFlowKindCall;
168     break;
169   case 0xCD:
170   case 0xCC:
171   case 0xCE:
172   case 0xF1:
173     if (opcode_len == 1)
174       return lldb::eInstructionControlFlowKindFarCall;
175     break;
176   case 0xCF:
177     if (opcode_len == 1)
178       return lldb::eInstructionControlFlowKindFarReturn;
179     break;
180   case 0xE9:
181   case 0xEB:
182     if (opcode_len == 1)
183       return lldb::eInstructionControlFlowKindJump;
184     break;
185   case 0xEA:
186     if (opcode_len == 1)
187       return lldb::eInstructionControlFlowKindFarJump;
188     break;
189   case 0xE3:
190   case 0xE0:
191   case 0xE1:
192   case 0xE2:
193     if (opcode_len == 1)
194       return lldb::eInstructionControlFlowKindCondJump;
195     break;
196   case 0xC3:
197   case 0xC2:
198     if (opcode_len == 1)
199       return lldb::eInstructionControlFlowKindReturn;
200     break;
201   case 0xCB:
202   case 0xCA:
203     if (opcode_len == 1)
204       return lldb::eInstructionControlFlowKindFarReturn;
205     break;
206   case 0x05:
207   case 0x34:
208     if (opcode_len == 2)
209       return lldb::eInstructionControlFlowKindFarCall;
210     break;
211   case 0x35:
212   case 0x07:
213     if (opcode_len == 2)
214       return lldb::eInstructionControlFlowKindFarReturn;
215     break;
216   case 0x01:
217     if (opcode_len == 2) {
218       switch (modrm) {
219       case 0xc1:
220         return lldb::eInstructionControlFlowKindFarCall;
221       case 0xc2:
222       case 0xc3:
223         return lldb::eInstructionControlFlowKindFarReturn;
224       default:
225         break;
226       }
227     }
228     break;
229   default:
230     break;
231   }
232 
233   return lldb::eInstructionControlFlowKindOther;
234 }
235 
236 /// Decode an instruction into opcode, modrm and opcode_len.
237 /// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout.
238 /// Opcodes in x86 are generally the first byte of instruction, though two-byte
239 /// instructions and prefixes exist. ModR/M is the byte following the opcode
240 /// and adds additional information for how the instruction is executed.
241 ///
242 /// \param[in] inst_bytes
243 ///    Raw bytes of the instruction
244 ///
245 ///
246 /// \param[in] bytes_len
247 ///    The length of the inst_bytes array.
248 ///
249 /// \param[in] is_exec_mode_64b
250 ///    If true, the execution mode is 64 bit.
251 ///
252 /// \return
253 ///    Returns decoded instruction as struct InstructionOpcodeAndModrm, holding
254 ///    primary_opcode, opcode_len and modrm byte. Refer to the struct definition
255 ///    for more details.
256 ///    Otherwise if the given instruction is invalid, returns std::nullopt.
257 std::optional<InstructionOpcodeAndModrm>
258 InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len,
259                         bool is_exec_mode_64b) {
260   int op_idx = 0;
261   bool prefix_done = false;
262   InstructionOpcodeAndModrm ret = {0, 0, 0};
263 
264   // In most cases, the primary_opcode is the first byte of the instruction
265   // but some instructions have a prefix to be skipped for these calculations.
266   // The following mapping is inspired from libipt's instruction decoding logic
267   // in `src/pt_ild.c`
268   while (!prefix_done) {
269     if (op_idx >= bytes_len)
270       return std::nullopt;
271 
272     ret.primary_opcode = inst_bytes[op_idx];
273     switch (ret.primary_opcode) {
274     // prefix_ignore
275     case 0x26:
276     case 0x2e:
277     case 0x36:
278     case 0x3e:
279     case 0x64:
280     case 0x65:
281     // prefix_osz, prefix_asz
282     case 0x66:
283     case 0x67:
284     // prefix_lock, prefix_f2, prefix_f3
285     case 0xf0:
286     case 0xf2:
287     case 0xf3:
288       op_idx++;
289       break;
290 
291     // prefix_rex
292     case 0x40:
293     case 0x41:
294     case 0x42:
295     case 0x43:
296     case 0x44:
297     case 0x45:
298     case 0x46:
299     case 0x47:
300     case 0x48:
301     case 0x49:
302     case 0x4a:
303     case 0x4b:
304     case 0x4c:
305     case 0x4d:
306     case 0x4e:
307     case 0x4f:
308       if (is_exec_mode_64b)
309         op_idx++;
310       else
311         prefix_done = true;
312       break;
313 
314     // prefix_vex_c4, c5
315     case 0xc5:
316       if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
317         prefix_done = true;
318         break;
319       }
320 
321       ret.opcode_len = 2;
322       ret.primary_opcode = inst_bytes[op_idx + 2];
323       ret.modrm = inst_bytes[op_idx + 3];
324       return ret;
325 
326     case 0xc4:
327       if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
328         prefix_done = true;
329         break;
330       }
331       ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f;
332       ret.primary_opcode = inst_bytes[op_idx + 3];
333       ret.modrm = inst_bytes[op_idx + 4];
334       return ret;
335 
336     // prefix_evex
337     case 0x62:
338       if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
339         prefix_done = true;
340         break;
341       }
342       ret.opcode_len = inst_bytes[op_idx + 1] & 0x03;
343       ret.primary_opcode = inst_bytes[op_idx + 4];
344       ret.modrm = inst_bytes[op_idx + 5];
345       return ret;
346 
347     default:
348       prefix_done = true;
349       break;
350     }
351   } // prefix done
352 
353   ret.primary_opcode = inst_bytes[op_idx];
354   ret.modrm = inst_bytes[op_idx + 1];
355   ret.opcode_len = 1;
356 
357   // If the first opcode is 0F, it's two- or three- byte opcodes.
358   if (ret.primary_opcode == 0x0F) {
359     ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
360 
361     if (ret.primary_opcode == 0x38) {
362       ret.opcode_len = 3;
363       ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
364       ret.modrm = inst_bytes[op_idx + 1];
365     } else if (ret.primary_opcode == 0x3A) {
366       ret.opcode_len = 3;
367       ret.primary_opcode = inst_bytes[++op_idx];
368       ret.modrm = inst_bytes[op_idx + 1];
369     } else if ((ret.primary_opcode & 0xf8) == 0x38) {
370       ret.opcode_len = 0;
371       ret.primary_opcode = inst_bytes[++op_idx];
372       ret.modrm = inst_bytes[op_idx + 1];
373     } else if (ret.primary_opcode == 0x0F) {
374       ret.opcode_len = 3;
375       // opcode is 0x0F, no needs to update
376       ret.modrm = inst_bytes[op_idx + 1];
377     } else {
378       ret.opcode_len = 2;
379       ret.modrm = inst_bytes[op_idx + 1];
380     }
381   }
382 
383   return ret;
384 }
385 
386 lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b,
387                                                     Opcode m_opcode) {
388   std::optional<InstructionOpcodeAndModrm> ret;
389 
390   if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) {
391     // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes
392     return lldb::eInstructionControlFlowKindUnknown;
393   }
394 
395   // Opcode bytes will be decoded into primary_opcode, modrm and opcode length.
396   // These are the three values deciding instruction control flow kind.
397   ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(),
398                                 m_opcode.GetByteSize(), is_exec_mode_64b);
399   if (!ret)
400     return lldb::eInstructionControlFlowKindUnknown;
401   else
402     return MapOpcodeIntoControlFlowKind(*ret);
403 }
404 
405 } // namespace x86
406 
407 class InstructionLLVMC : public lldb_private::Instruction {
408 public:
409   InstructionLLVMC(DisassemblerLLVMC &disasm,
410                    const lldb_private::Address &address,
411                    AddressClass addr_class)
412       : Instruction(address, addr_class),
413         m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>(
414             disasm.shared_from_this())) {}
415 
416   ~InstructionLLVMC() override = default;
417 
418   bool DoesBranch() override {
419     VisitInstruction();
420     return m_does_branch;
421   }
422 
423   bool HasDelaySlot() override {
424     VisitInstruction();
425     return m_has_delay_slot;
426   }
427 
428   bool IsLoad() override {
429     VisitInstruction();
430     return m_is_load;
431   }
432 
433   bool IsAuthenticated() override {
434     VisitInstruction();
435     return m_is_authenticated;
436   }
437 
438   DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) {
439     DisassemblerScope disasm(*this);
440     return GetDisasmToUse(is_alternate_isa, disasm);
441   }
442 
443   size_t Decode(const lldb_private::Disassembler &disassembler,
444                 const lldb_private::DataExtractor &data,
445                 lldb::offset_t data_offset) override {
446     // All we have to do is read the opcode which can be easy for some
447     // architectures
448     bool got_op = false;
449     DisassemblerScope disasm(*this);
450     if (disasm) {
451       const ArchSpec &arch = disasm->GetArchitecture();
452       const lldb::ByteOrder byte_order = data.GetByteOrder();
453 
454       const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize();
455       const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize();
456       if (min_op_byte_size == max_op_byte_size) {
457         // Fixed size instructions, just read that amount of data.
458         if (!data.ValidOffsetForDataOfSize(data_offset, min_op_byte_size))
459           return false;
460 
461         switch (min_op_byte_size) {
462         case 1:
463           m_opcode.SetOpcode8(data.GetU8(&data_offset), byte_order);
464           got_op = true;
465           break;
466 
467         case 2:
468           m_opcode.SetOpcode16(data.GetU16(&data_offset), byte_order);
469           got_op = true;
470           break;
471 
472         case 4:
473           m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order);
474           got_op = true;
475           break;
476 
477         case 8:
478           m_opcode.SetOpcode64(data.GetU64(&data_offset), byte_order);
479           got_op = true;
480           break;
481 
482         default:
483           m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size),
484                                   min_op_byte_size);
485           got_op = true;
486           break;
487         }
488       }
489       if (!got_op) {
490         bool is_alternate_isa = false;
491         DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
492             GetDisasmToUse(is_alternate_isa, disasm);
493 
494         const llvm::Triple::ArchType machine = arch.GetMachine();
495         if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) {
496           if (machine == llvm::Triple::thumb || is_alternate_isa) {
497             uint32_t thumb_opcode = data.GetU16(&data_offset);
498             if ((thumb_opcode & 0xe000) != 0xe000 ||
499                 ((thumb_opcode & 0x1800u) == 0)) {
500               m_opcode.SetOpcode16(thumb_opcode, byte_order);
501               m_is_valid = true;
502             } else {
503               thumb_opcode <<= 16;
504               thumb_opcode |= data.GetU16(&data_offset);
505               m_opcode.SetOpcode16_2(thumb_opcode, byte_order);
506               m_is_valid = true;
507             }
508           } else {
509             m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order);
510             m_is_valid = true;
511           }
512         } else {
513           // The opcode isn't evenly sized, so we need to actually use the llvm
514           // disassembler to parse it and get the size.
515           uint8_t *opcode_data =
516               const_cast<uint8_t *>(data.PeekData(data_offset, 1));
517           const size_t opcode_data_len = data.BytesLeft(data_offset);
518           const addr_t pc = m_address.GetFileAddress();
519           llvm::MCInst inst;
520 
521           const size_t inst_size =
522               mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
523           if (inst_size == 0)
524             m_opcode.Clear();
525           else {
526             m_opcode.SetOpcodeBytes(opcode_data, inst_size);
527             m_is_valid = true;
528           }
529         }
530       }
531       return m_opcode.GetByteSize();
532     }
533     return 0;
534   }
535 
536   void AppendComment(std::string &description) {
537     if (m_comment.empty())
538       m_comment.swap(description);
539     else {
540       m_comment.append(", ");
541       m_comment.append(description);
542     }
543   }
544 
545   lldb::InstructionControlFlowKind
546   GetControlFlowKind(const lldb_private::ExecutionContext *exe_ctx) override {
547     DisassemblerScope disasm(*this, exe_ctx);
548     if (disasm){
549       if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86)
550         return x86::GetControlFlowKind(/*is_64b=*/false, m_opcode);
551       else if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86_64)
552         return x86::GetControlFlowKind(/*is_64b=*/true, m_opcode);
553     }
554 
555     return eInstructionControlFlowKindUnknown;
556   }
557 
558   void CalculateMnemonicOperandsAndComment(
559       const lldb_private::ExecutionContext *exe_ctx) override {
560     DataExtractor data;
561     const AddressClass address_class = GetAddressClass();
562 
563     if (m_opcode.GetData(data)) {
564       std::string out_string;
565       std::string comment_string;
566 
567       DisassemblerScope disasm(*this, exe_ctx);
568       if (disasm) {
569         DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr;
570 
571         if (address_class == AddressClass::eCodeAlternateISA)
572           mc_disasm_ptr = disasm->m_alternate_disasm_up.get();
573         else
574           mc_disasm_ptr = disasm->m_disasm_up.get();
575 
576         lldb::addr_t pc = m_address.GetFileAddress();
577         m_using_file_addr = true;
578 
579         const bool data_from_file = disasm->m_data_from_file;
580         bool use_hex_immediates = true;
581         Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC;
582 
583         if (exe_ctx) {
584           Target *target = exe_ctx->GetTargetPtr();
585           if (target) {
586             use_hex_immediates = target->GetUseHexImmediates();
587             hex_style = target->GetHexImmediateStyle();
588 
589             if (!data_from_file) {
590               const lldb::addr_t load_addr = m_address.GetLoadAddress(target);
591               if (load_addr != LLDB_INVALID_ADDRESS) {
592                 pc = load_addr;
593                 m_using_file_addr = false;
594               }
595             }
596           }
597         }
598 
599         const uint8_t *opcode_data = data.GetDataStart();
600         const size_t opcode_data_len = data.GetByteSize();
601         llvm::MCInst inst;
602         size_t inst_size =
603             mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
604 
605         if (inst_size > 0) {
606           mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style);
607           mc_disasm_ptr->PrintMCInst(inst, out_string, comment_string);
608 
609           if (!comment_string.empty()) {
610             AppendComment(comment_string);
611           }
612         }
613 
614         if (inst_size == 0) {
615           m_comment.assign("unknown opcode");
616           inst_size = m_opcode.GetByteSize();
617           StreamString mnemonic_strm;
618           lldb::offset_t offset = 0;
619           lldb::ByteOrder byte_order = data.GetByteOrder();
620           switch (inst_size) {
621           case 1: {
622             const uint8_t uval8 = data.GetU8(&offset);
623             m_opcode.SetOpcode8(uval8, byte_order);
624             m_opcode_name.assign(".byte");
625             mnemonic_strm.Printf("0x%2.2x", uval8);
626           } break;
627           case 2: {
628             const uint16_t uval16 = data.GetU16(&offset);
629             m_opcode.SetOpcode16(uval16, byte_order);
630             m_opcode_name.assign(".short");
631             mnemonic_strm.Printf("0x%4.4x", uval16);
632           } break;
633           case 4: {
634             const uint32_t uval32 = data.GetU32(&offset);
635             m_opcode.SetOpcode32(uval32, byte_order);
636             m_opcode_name.assign(".long");
637             mnemonic_strm.Printf("0x%8.8x", uval32);
638           } break;
639           case 8: {
640             const uint64_t uval64 = data.GetU64(&offset);
641             m_opcode.SetOpcode64(uval64, byte_order);
642             m_opcode_name.assign(".quad");
643             mnemonic_strm.Printf("0x%16.16" PRIx64, uval64);
644           } break;
645           default:
646             if (inst_size == 0)
647               return;
648             else {
649               const uint8_t *bytes = data.PeekData(offset, inst_size);
650               if (bytes == nullptr)
651                 return;
652               m_opcode_name.assign(".byte");
653               m_opcode.SetOpcodeBytes(bytes, inst_size);
654               mnemonic_strm.Printf("0x%2.2x", bytes[0]);
655               for (uint32_t i = 1; i < inst_size; ++i)
656                 mnemonic_strm.Printf(" 0x%2.2x", bytes[i]);
657             }
658             break;
659           }
660           m_mnemonics = std::string(mnemonic_strm.GetString());
661           return;
662         }
663 
664         static RegularExpression s_regex(
665             llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?"));
666 
667         llvm::SmallVector<llvm::StringRef, 4> matches;
668         if (s_regex.Execute(out_string, &matches)) {
669           m_opcode_name = matches[1].str();
670           m_mnemonics = matches[2].str();
671         }
672       }
673     }
674   }
675 
676   bool IsValid() const { return m_is_valid; }
677 
678   bool UsingFileAddress() const { return m_using_file_addr; }
679   size_t GetByteSize() const { return m_opcode.GetByteSize(); }
680 
681   /// Grants exclusive access to the disassembler and initializes it with the
682   /// given InstructionLLVMC and an optional ExecutionContext.
683   class DisassemblerScope {
684     std::shared_ptr<DisassemblerLLVMC> m_disasm;
685 
686   public:
687     explicit DisassemblerScope(
688         InstructionLLVMC &i,
689         const lldb_private::ExecutionContext *exe_ctx = nullptr)
690         : m_disasm(i.m_disasm_wp.lock()) {
691       m_disasm->m_mutex.lock();
692       m_disasm->m_inst = &i;
693       m_disasm->m_exe_ctx = exe_ctx;
694     }
695     ~DisassemblerScope() { m_disasm->m_mutex.unlock(); }
696 
697     /// Evaluates to true if this scope contains a valid disassembler.
698     operator bool() const { return static_cast<bool>(m_disasm); }
699 
700     std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; }
701   };
702 
703   static llvm::StringRef::const_iterator
704   ConsumeWhitespace(llvm::StringRef::const_iterator osi,
705                     llvm::StringRef::const_iterator ose) {
706     while (osi != ose) {
707       switch (*osi) {
708       default:
709         return osi;
710       case ' ':
711       case '\t':
712         break;
713       }
714       ++osi;
715     }
716 
717     return osi;
718   }
719 
720   static std::pair<bool, llvm::StringRef::const_iterator>
721   ConsumeChar(llvm::StringRef::const_iterator osi, const char c,
722               llvm::StringRef::const_iterator ose) {
723     bool found = false;
724 
725     osi = ConsumeWhitespace(osi, ose);
726     if (osi != ose && *osi == c) {
727       found = true;
728       ++osi;
729     }
730 
731     return std::make_pair(found, osi);
732   }
733 
734   static std::pair<Operand, llvm::StringRef::const_iterator>
735   ParseRegisterName(llvm::StringRef::const_iterator osi,
736                     llvm::StringRef::const_iterator ose) {
737     Operand ret;
738     ret.m_type = Operand::Type::Register;
739     std::string str;
740 
741     osi = ConsumeWhitespace(osi, ose);
742 
743     while (osi != ose) {
744       if (*osi >= '0' && *osi <= '9') {
745         if (str.empty()) {
746           return std::make_pair(Operand(), osi);
747         } else {
748           str.push_back(*osi);
749         }
750       } else if (*osi >= 'a' && *osi <= 'z') {
751         str.push_back(*osi);
752       } else {
753         switch (*osi) {
754         default:
755           if (str.empty()) {
756             return std::make_pair(Operand(), osi);
757           } else {
758             ret.m_register = ConstString(str);
759             return std::make_pair(ret, osi);
760           }
761         case '%':
762           if (!str.empty()) {
763             return std::make_pair(Operand(), osi);
764           }
765           break;
766         }
767       }
768       ++osi;
769     }
770 
771     ret.m_register = ConstString(str);
772     return std::make_pair(ret, osi);
773   }
774 
775   static std::pair<Operand, llvm::StringRef::const_iterator>
776   ParseImmediate(llvm::StringRef::const_iterator osi,
777                  llvm::StringRef::const_iterator ose) {
778     Operand ret;
779     ret.m_type = Operand::Type::Immediate;
780     std::string str;
781     bool is_hex = false;
782 
783     osi = ConsumeWhitespace(osi, ose);
784 
785     while (osi != ose) {
786       if (*osi >= '0' && *osi <= '9') {
787         str.push_back(*osi);
788       } else if (*osi >= 'a' && *osi <= 'f') {
789         if (is_hex) {
790           str.push_back(*osi);
791         } else {
792           return std::make_pair(Operand(), osi);
793         }
794       } else {
795         switch (*osi) {
796         default:
797           if (str.empty()) {
798             return std::make_pair(Operand(), osi);
799           } else {
800             ret.m_immediate = strtoull(str.c_str(), nullptr, 0);
801             return std::make_pair(ret, osi);
802           }
803         case 'x':
804           if (!str.compare("0")) {
805             is_hex = true;
806             str.push_back(*osi);
807           } else {
808             return std::make_pair(Operand(), osi);
809           }
810           break;
811         case '#':
812         case '$':
813           if (!str.empty()) {
814             return std::make_pair(Operand(), osi);
815           }
816           break;
817         case '-':
818           if (str.empty()) {
819             ret.m_negative = true;
820           } else {
821             return std::make_pair(Operand(), osi);
822           }
823         }
824       }
825       ++osi;
826     }
827 
828     ret.m_immediate = strtoull(str.c_str(), nullptr, 0);
829     return std::make_pair(ret, osi);
830   }
831 
832   // -0x5(%rax,%rax,2)
833   static std::pair<Operand, llvm::StringRef::const_iterator>
834   ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi,
835                           llvm::StringRef::const_iterator ose) {
836     std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
837         ParseImmediate(osi, ose);
838     if (offset_and_iterator.first.IsValid()) {
839       osi = offset_and_iterator.second;
840     }
841 
842     bool found = false;
843     std::tie(found, osi) = ConsumeChar(osi, '(', ose);
844     if (!found) {
845       return std::make_pair(Operand(), osi);
846     }
847 
848     std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
849         ParseRegisterName(osi, ose);
850     if (base_and_iterator.first.IsValid()) {
851       osi = base_and_iterator.second;
852     } else {
853       return std::make_pair(Operand(), osi);
854     }
855 
856     std::tie(found, osi) = ConsumeChar(osi, ',', ose);
857     if (!found) {
858       return std::make_pair(Operand(), osi);
859     }
860 
861     std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator =
862         ParseRegisterName(osi, ose);
863     if (index_and_iterator.first.IsValid()) {
864       osi = index_and_iterator.second;
865     } else {
866       return std::make_pair(Operand(), osi);
867     }
868 
869     std::tie(found, osi) = ConsumeChar(osi, ',', ose);
870     if (!found) {
871       return std::make_pair(Operand(), osi);
872     }
873 
874     std::pair<Operand, llvm::StringRef::const_iterator>
875         multiplier_and_iterator = ParseImmediate(osi, ose);
876     if (index_and_iterator.first.IsValid()) {
877       osi = index_and_iterator.second;
878     } else {
879       return std::make_pair(Operand(), osi);
880     }
881 
882     std::tie(found, osi) = ConsumeChar(osi, ')', ose);
883     if (!found) {
884       return std::make_pair(Operand(), osi);
885     }
886 
887     Operand product;
888     product.m_type = Operand::Type::Product;
889     product.m_children.push_back(index_and_iterator.first);
890     product.m_children.push_back(multiplier_and_iterator.first);
891 
892     Operand index;
893     index.m_type = Operand::Type::Sum;
894     index.m_children.push_back(base_and_iterator.first);
895     index.m_children.push_back(product);
896 
897     if (offset_and_iterator.first.IsValid()) {
898       Operand offset;
899       offset.m_type = Operand::Type::Sum;
900       offset.m_children.push_back(offset_and_iterator.first);
901       offset.m_children.push_back(index);
902 
903       Operand deref;
904       deref.m_type = Operand::Type::Dereference;
905       deref.m_children.push_back(offset);
906       return std::make_pair(deref, osi);
907     } else {
908       Operand deref;
909       deref.m_type = Operand::Type::Dereference;
910       deref.m_children.push_back(index);
911       return std::make_pair(deref, osi);
912     }
913   }
914 
915   // -0x10(%rbp)
916   static std::pair<Operand, llvm::StringRef::const_iterator>
917   ParseIntelDerefAccess(llvm::StringRef::const_iterator osi,
918                         llvm::StringRef::const_iterator ose) {
919     std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
920         ParseImmediate(osi, ose);
921     if (offset_and_iterator.first.IsValid()) {
922       osi = offset_and_iterator.second;
923     }
924 
925     bool found = false;
926     std::tie(found, osi) = ConsumeChar(osi, '(', ose);
927     if (!found) {
928       return std::make_pair(Operand(), osi);
929     }
930 
931     std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
932         ParseRegisterName(osi, ose);
933     if (base_and_iterator.first.IsValid()) {
934       osi = base_and_iterator.second;
935     } else {
936       return std::make_pair(Operand(), osi);
937     }
938 
939     std::tie(found, osi) = ConsumeChar(osi, ')', ose);
940     if (!found) {
941       return std::make_pair(Operand(), osi);
942     }
943 
944     if (offset_and_iterator.first.IsValid()) {
945       Operand offset;
946       offset.m_type = Operand::Type::Sum;
947       offset.m_children.push_back(offset_and_iterator.first);
948       offset.m_children.push_back(base_and_iterator.first);
949 
950       Operand deref;
951       deref.m_type = Operand::Type::Dereference;
952       deref.m_children.push_back(offset);
953       return std::make_pair(deref, osi);
954     } else {
955       Operand deref;
956       deref.m_type = Operand::Type::Dereference;
957       deref.m_children.push_back(base_and_iterator.first);
958       return std::make_pair(deref, osi);
959     }
960   }
961 
962   // [sp, #8]!
963   static std::pair<Operand, llvm::StringRef::const_iterator>
964   ParseARMOffsetAccess(llvm::StringRef::const_iterator osi,
965                        llvm::StringRef::const_iterator ose) {
966     bool found = false;
967     std::tie(found, osi) = ConsumeChar(osi, '[', ose);
968     if (!found) {
969       return std::make_pair(Operand(), osi);
970     }
971 
972     std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
973         ParseRegisterName(osi, ose);
974     if (base_and_iterator.first.IsValid()) {
975       osi = base_and_iterator.second;
976     } else {
977       return std::make_pair(Operand(), osi);
978     }
979 
980     std::tie(found, osi) = ConsumeChar(osi, ',', ose);
981     if (!found) {
982       return std::make_pair(Operand(), osi);
983     }
984 
985     std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
986         ParseImmediate(osi, ose);
987     if (offset_and_iterator.first.IsValid()) {
988       osi = offset_and_iterator.second;
989     }
990 
991     std::tie(found, osi) = ConsumeChar(osi, ']', ose);
992     if (!found) {
993       return std::make_pair(Operand(), osi);
994     }
995 
996     Operand offset;
997     offset.m_type = Operand::Type::Sum;
998     offset.m_children.push_back(offset_and_iterator.first);
999     offset.m_children.push_back(base_and_iterator.first);
1000 
1001     Operand deref;
1002     deref.m_type = Operand::Type::Dereference;
1003     deref.m_children.push_back(offset);
1004     return std::make_pair(deref, osi);
1005   }
1006 
1007   // [sp]
1008   static std::pair<Operand, llvm::StringRef::const_iterator>
1009   ParseARMDerefAccess(llvm::StringRef::const_iterator osi,
1010                       llvm::StringRef::const_iterator ose) {
1011     bool found = false;
1012     std::tie(found, osi) = ConsumeChar(osi, '[', ose);
1013     if (!found) {
1014       return std::make_pair(Operand(), osi);
1015     }
1016 
1017     std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
1018         ParseRegisterName(osi, ose);
1019     if (base_and_iterator.first.IsValid()) {
1020       osi = base_and_iterator.second;
1021     } else {
1022       return std::make_pair(Operand(), osi);
1023     }
1024 
1025     std::tie(found, osi) = ConsumeChar(osi, ']', ose);
1026     if (!found) {
1027       return std::make_pair(Operand(), osi);
1028     }
1029 
1030     Operand deref;
1031     deref.m_type = Operand::Type::Dereference;
1032     deref.m_children.push_back(base_and_iterator.first);
1033     return std::make_pair(deref, osi);
1034   }
1035 
1036   static void DumpOperand(const Operand &op, Stream &s) {
1037     switch (op.m_type) {
1038     case Operand::Type::Dereference:
1039       s.PutCString("*");
1040       DumpOperand(op.m_children[0], s);
1041       break;
1042     case Operand::Type::Immediate:
1043       if (op.m_negative) {
1044         s.PutCString("-");
1045       }
1046       s.PutCString(llvm::to_string(op.m_immediate));
1047       break;
1048     case Operand::Type::Invalid:
1049       s.PutCString("Invalid");
1050       break;
1051     case Operand::Type::Product:
1052       s.PutCString("(");
1053       DumpOperand(op.m_children[0], s);
1054       s.PutCString("*");
1055       DumpOperand(op.m_children[1], s);
1056       s.PutCString(")");
1057       break;
1058     case Operand::Type::Register:
1059       s.PutCString(op.m_register.GetStringRef());
1060       break;
1061     case Operand::Type::Sum:
1062       s.PutCString("(");
1063       DumpOperand(op.m_children[0], s);
1064       s.PutCString("+");
1065       DumpOperand(op.m_children[1], s);
1066       s.PutCString(")");
1067       break;
1068     }
1069   }
1070 
1071   bool ParseOperands(
1072       llvm::SmallVectorImpl<Instruction::Operand> &operands) override {
1073     const char *operands_string = GetOperands(nullptr);
1074 
1075     if (!operands_string) {
1076       return false;
1077     }
1078 
1079     llvm::StringRef operands_ref(operands_string);
1080 
1081     llvm::StringRef::const_iterator osi = operands_ref.begin();
1082     llvm::StringRef::const_iterator ose = operands_ref.end();
1083 
1084     while (osi != ose) {
1085       Operand operand;
1086       llvm::StringRef::const_iterator iter;
1087 
1088       if ((std::tie(operand, iter) = ParseIntelIndexedAccess(osi, ose),
1089            operand.IsValid()) ||
1090           (std::tie(operand, iter) = ParseIntelDerefAccess(osi, ose),
1091            operand.IsValid()) ||
1092           (std::tie(operand, iter) = ParseARMOffsetAccess(osi, ose),
1093            operand.IsValid()) ||
1094           (std::tie(operand, iter) = ParseARMDerefAccess(osi, ose),
1095            operand.IsValid()) ||
1096           (std::tie(operand, iter) = ParseRegisterName(osi, ose),
1097            operand.IsValid()) ||
1098           (std::tie(operand, iter) = ParseImmediate(osi, ose),
1099            operand.IsValid())) {
1100         osi = iter;
1101         operands.push_back(operand);
1102       } else {
1103         return false;
1104       }
1105 
1106       std::pair<bool, llvm::StringRef::const_iterator> found_and_iter =
1107           ConsumeChar(osi, ',', ose);
1108       if (found_and_iter.first) {
1109         osi = found_and_iter.second;
1110       }
1111 
1112       osi = ConsumeWhitespace(osi, ose);
1113     }
1114 
1115     DisassemblerSP disasm_sp = m_disasm_wp.lock();
1116 
1117     if (disasm_sp && operands.size() > 1) {
1118       // TODO tie this into the MC Disassembler's notion of clobbers.
1119       switch (disasm_sp->GetArchitecture().GetMachine()) {
1120       default:
1121         break;
1122       case llvm::Triple::x86:
1123       case llvm::Triple::x86_64:
1124         operands[operands.size() - 1].m_clobbered = true;
1125         break;
1126       case llvm::Triple::arm:
1127         operands[0].m_clobbered = true;
1128         break;
1129       }
1130     }
1131 
1132     if (Log *log = GetLog(LLDBLog::Process)) {
1133       StreamString ss;
1134 
1135       ss.Printf("[%s] expands to %zu operands:\n", operands_string,
1136                 operands.size());
1137       for (const Operand &operand : operands) {
1138         ss.PutCString("  ");
1139         DumpOperand(operand, ss);
1140         ss.PutCString("\n");
1141       }
1142 
1143       log->PutString(ss.GetString());
1144     }
1145 
1146     return true;
1147   }
1148 
1149   bool IsCall() override {
1150     VisitInstruction();
1151     return m_is_call;
1152   }
1153 
1154 protected:
1155   std::weak_ptr<DisassemblerLLVMC> m_disasm_wp;
1156 
1157   bool m_is_valid = false;
1158   bool m_using_file_addr = false;
1159   bool m_has_visited_instruction = false;
1160 
1161   // Be conservative. If we didn't understand the instruction, say it:
1162   //   - Might branch
1163   //   - Does not have a delay slot
1164   //   - Is not a call
1165   //   - Is not a load
1166   //   - Is not an authenticated instruction
1167   bool m_does_branch = true;
1168   bool m_has_delay_slot = false;
1169   bool m_is_call = false;
1170   bool m_is_load = false;
1171   bool m_is_authenticated = false;
1172 
1173   void VisitInstruction() {
1174     if (m_has_visited_instruction)
1175       return;
1176 
1177     DisassemblerScope disasm(*this);
1178     if (!disasm)
1179       return;
1180 
1181     DataExtractor data;
1182     if (!m_opcode.GetData(data))
1183       return;
1184 
1185     bool is_alternate_isa;
1186     lldb::addr_t pc = m_address.GetFileAddress();
1187     DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
1188         GetDisasmToUse(is_alternate_isa, disasm);
1189     const uint8_t *opcode_data = data.GetDataStart();
1190     const size_t opcode_data_len = data.GetByteSize();
1191     llvm::MCInst inst;
1192     const size_t inst_size =
1193         mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
1194     if (inst_size == 0)
1195       return;
1196 
1197     m_has_visited_instruction = true;
1198     m_does_branch = mc_disasm_ptr->CanBranch(inst);
1199     m_has_delay_slot = mc_disasm_ptr->HasDelaySlot(inst);
1200     m_is_call = mc_disasm_ptr->IsCall(inst);
1201     m_is_load = mc_disasm_ptr->IsLoad(inst);
1202     m_is_authenticated = mc_disasm_ptr->IsAuthenticated(inst);
1203   }
1204 
1205 private:
1206   DisassemblerLLVMC::MCDisasmInstance *
1207   GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) {
1208     is_alternate_isa = false;
1209     if (disasm) {
1210       if (disasm->m_alternate_disasm_up) {
1211         const AddressClass address_class = GetAddressClass();
1212 
1213         if (address_class == AddressClass::eCodeAlternateISA) {
1214           is_alternate_isa = true;
1215           return disasm->m_alternate_disasm_up.get();
1216         }
1217       }
1218       return disasm->m_disasm_up.get();
1219     }
1220     return nullptr;
1221   }
1222 };
1223 
1224 std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>
1225 DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu,
1226                                             const char *features_str,
1227                                             unsigned flavor,
1228                                             DisassemblerLLVMC &owner) {
1229   using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>;
1230 
1231   std::string Status;
1232   const llvm::Target *curr_target =
1233       llvm::TargetRegistry::lookupTarget(triple, Status);
1234   if (!curr_target)
1235     return Instance();
1236 
1237   std::unique_ptr<llvm::MCInstrInfo> instr_info_up(
1238       curr_target->createMCInstrInfo());
1239   if (!instr_info_up)
1240     return Instance();
1241 
1242   std::unique_ptr<llvm::MCRegisterInfo> reg_info_up(
1243       curr_target->createMCRegInfo(triple));
1244   if (!reg_info_up)
1245     return Instance();
1246 
1247   std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up(
1248       curr_target->createMCSubtargetInfo(triple, cpu, features_str));
1249   if (!subtarget_info_up)
1250     return Instance();
1251 
1252   llvm::MCTargetOptions MCOptions;
1253   std::unique_ptr<llvm::MCAsmInfo> asm_info_up(
1254       curr_target->createMCAsmInfo(*reg_info_up, triple, MCOptions));
1255   if (!asm_info_up)
1256     return Instance();
1257 
1258   std::unique_ptr<llvm::MCContext> context_up(
1259       new llvm::MCContext(llvm::Triple(triple), asm_info_up.get(),
1260                           reg_info_up.get(), subtarget_info_up.get()));
1261   if (!context_up)
1262     return Instance();
1263 
1264   std::unique_ptr<llvm::MCDisassembler> disasm_up(
1265       curr_target->createMCDisassembler(*subtarget_info_up, *context_up));
1266   if (!disasm_up)
1267     return Instance();
1268 
1269   std::unique_ptr<llvm::MCRelocationInfo> rel_info_up(
1270       curr_target->createMCRelocationInfo(triple, *context_up));
1271   if (!rel_info_up)
1272     return Instance();
1273 
1274   std::unique_ptr<llvm::MCSymbolizer> symbolizer_up(
1275       curr_target->createMCSymbolizer(
1276           triple, nullptr, DisassemblerLLVMC::SymbolLookupCallback, &owner,
1277           context_up.get(), std::move(rel_info_up)));
1278   disasm_up->setSymbolizer(std::move(symbolizer_up));
1279 
1280   unsigned asm_printer_variant =
1281       flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor;
1282 
1283   std::unique_ptr<llvm::MCInstPrinter> instr_printer_up(
1284       curr_target->createMCInstPrinter(llvm::Triple{triple},
1285                                        asm_printer_variant, *asm_info_up,
1286                                        *instr_info_up, *reg_info_up));
1287   if (!instr_printer_up)
1288     return Instance();
1289 
1290   return Instance(
1291       new MCDisasmInstance(std::move(instr_info_up), std::move(reg_info_up),
1292                            std::move(subtarget_info_up), std::move(asm_info_up),
1293                            std::move(context_up), std::move(disasm_up),
1294                            std::move(instr_printer_up)));
1295 }
1296 
1297 DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance(
1298     std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
1299     std::unique_ptr<llvm::MCRegisterInfo> &&reg_info_up,
1300     std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,
1301     std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,
1302     std::unique_ptr<llvm::MCContext> &&context_up,
1303     std::unique_ptr<llvm::MCDisassembler> &&disasm_up,
1304     std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up)
1305     : m_instr_info_up(std::move(instr_info_up)),
1306       m_reg_info_up(std::move(reg_info_up)),
1307       m_subtarget_info_up(std::move(subtarget_info_up)),
1308       m_asm_info_up(std::move(asm_info_up)),
1309       m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)),
1310       m_instr_printer_up(std::move(instr_printer_up)) {
1311   assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up &&
1312          m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up);
1313 }
1314 
1315 uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst(
1316     const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc,
1317     llvm::MCInst &mc_inst) const {
1318   llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len);
1319   llvm::MCDisassembler::DecodeStatus status;
1320 
1321   uint64_t new_inst_size;
1322   status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc,
1323                                        llvm::nulls());
1324   if (status == llvm::MCDisassembler::Success)
1325     return new_inst_size;
1326   else
1327     return 0;
1328 }
1329 
1330 void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst(
1331     llvm::MCInst &mc_inst, std::string &inst_string,
1332     std::string &comments_string) {
1333   llvm::raw_string_ostream inst_stream(inst_string);
1334   llvm::raw_string_ostream comments_stream(comments_string);
1335 
1336   m_instr_printer_up->setCommentStream(comments_stream);
1337   m_instr_printer_up->printInst(&mc_inst, 0, llvm::StringRef(),
1338                                 *m_subtarget_info_up, inst_stream);
1339   m_instr_printer_up->setCommentStream(llvm::nulls());
1340   comments_stream.flush();
1341 
1342   static std::string g_newlines("\r\n");
1343 
1344   for (size_t newline_pos = 0;
1345        (newline_pos = comments_string.find_first_of(g_newlines, newline_pos)) !=
1346        comments_string.npos;
1347        /**/) {
1348     comments_string.replace(comments_string.begin() + newline_pos,
1349                             comments_string.begin() + newline_pos + 1, 1, ' ');
1350   }
1351 }
1352 
1353 void DisassemblerLLVMC::MCDisasmInstance::SetStyle(
1354     bool use_hex_immed, HexImmediateStyle hex_style) {
1355   m_instr_printer_up->setPrintImmHex(use_hex_immed);
1356   switch (hex_style) {
1357   case eHexStyleC:
1358     m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C);
1359     break;
1360   case eHexStyleAsm:
1361     m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm);
1362     break;
1363   }
1364 }
1365 
1366 bool DisassemblerLLVMC::MCDisasmInstance::CanBranch(
1367     llvm::MCInst &mc_inst) const {
1368   return m_instr_info_up->get(mc_inst.getOpcode())
1369       .mayAffectControlFlow(mc_inst, *m_reg_info_up);
1370 }
1371 
1372 bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot(
1373     llvm::MCInst &mc_inst) const {
1374   return m_instr_info_up->get(mc_inst.getOpcode()).hasDelaySlot();
1375 }
1376 
1377 bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const {
1378   return m_instr_info_up->get(mc_inst.getOpcode()).isCall();
1379 }
1380 
1381 bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst &mc_inst) const {
1382   return m_instr_info_up->get(mc_inst.getOpcode()).mayLoad();
1383 }
1384 
1385 bool DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated(
1386     llvm::MCInst &mc_inst) const {
1387   const auto &InstrDesc = m_instr_info_up->get(mc_inst.getOpcode());
1388 
1389   // Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc4
1390   // == 'a' + 'c') as authenticated instructions for reporting purposes, in
1391   // addition to the standard authenticated instructions specified in ARMv8.3.
1392   bool IsBrkC47x = false;
1393   if (InstrDesc.isTrap() && mc_inst.getNumOperands() == 1) {
1394     const llvm::MCOperand &Op0 = mc_inst.getOperand(0);
1395     if (Op0.isImm() && Op0.getImm() >= 0xc470 && Op0.getImm() <= 0xc474)
1396       IsBrkC47x = true;
1397   }
1398 
1399   return InstrDesc.isAuthenticated() || IsBrkC47x;
1400 }
1401 
1402 DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch,
1403                                      const char *flavor_string)
1404     : Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr),
1405       m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS),
1406       m_adrp_insn() {
1407   if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) {
1408     m_flavor.assign("default");
1409   }
1410 
1411   unsigned flavor = ~0U;
1412   llvm::Triple triple = arch.GetTriple();
1413 
1414   // So far the only supported flavor is "intel" on x86.  The base class will
1415   // set this correctly coming in.
1416   if (triple.getArch() == llvm::Triple::x86 ||
1417       triple.getArch() == llvm::Triple::x86_64) {
1418     if (m_flavor == "intel") {
1419       flavor = 1;
1420     } else if (m_flavor == "att") {
1421       flavor = 0;
1422     }
1423   }
1424 
1425   ArchSpec thumb_arch(arch);
1426   if (triple.getArch() == llvm::Triple::arm) {
1427     std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str());
1428     // Replace "arm" with "thumb" so we get all thumb variants correct
1429     if (thumb_arch_name.size() > 3) {
1430       thumb_arch_name.erase(0, 3);
1431       thumb_arch_name.insert(0, "thumb");
1432     } else {
1433       thumb_arch_name = "thumbv9.3a";
1434     }
1435     thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name));
1436   }
1437 
1438   // If no sub architecture specified then use the most recent arm architecture
1439   // so the disassembler will return all instructions. Without it we will see a
1440   // lot of unknown opcodes if the code uses instructions which are not
1441   // available in the oldest arm version (which is used when no sub architecture
1442   // is specified).
1443   if (triple.getArch() == llvm::Triple::arm &&
1444       triple.getSubArch() == llvm::Triple::NoSubArch)
1445     triple.setArchName("armv9.3a");
1446 
1447   std::string features_str;
1448   const char *triple_str = triple.getTriple().c_str();
1449 
1450   // ARM Cortex M0-M7 devices only execute thumb instructions
1451   if (arch.IsAlwaysThumbInstructions()) {
1452     triple_str = thumb_arch.GetTriple().getTriple().c_str();
1453     features_str += "+fp-armv8,";
1454   }
1455 
1456   const char *cpu = "";
1457 
1458   switch (arch.GetCore()) {
1459   case ArchSpec::eCore_mips32:
1460   case ArchSpec::eCore_mips32el:
1461     cpu = "mips32";
1462     break;
1463   case ArchSpec::eCore_mips32r2:
1464   case ArchSpec::eCore_mips32r2el:
1465     cpu = "mips32r2";
1466     break;
1467   case ArchSpec::eCore_mips32r3:
1468   case ArchSpec::eCore_mips32r3el:
1469     cpu = "mips32r3";
1470     break;
1471   case ArchSpec::eCore_mips32r5:
1472   case ArchSpec::eCore_mips32r5el:
1473     cpu = "mips32r5";
1474     break;
1475   case ArchSpec::eCore_mips32r6:
1476   case ArchSpec::eCore_mips32r6el:
1477     cpu = "mips32r6";
1478     break;
1479   case ArchSpec::eCore_mips64:
1480   case ArchSpec::eCore_mips64el:
1481     cpu = "mips64";
1482     break;
1483   case ArchSpec::eCore_mips64r2:
1484   case ArchSpec::eCore_mips64r2el:
1485     cpu = "mips64r2";
1486     break;
1487   case ArchSpec::eCore_mips64r3:
1488   case ArchSpec::eCore_mips64r3el:
1489     cpu = "mips64r3";
1490     break;
1491   case ArchSpec::eCore_mips64r5:
1492   case ArchSpec::eCore_mips64r5el:
1493     cpu = "mips64r5";
1494     break;
1495   case ArchSpec::eCore_mips64r6:
1496   case ArchSpec::eCore_mips64r6el:
1497     cpu = "mips64r6";
1498     break;
1499   default:
1500     cpu = "";
1501     break;
1502   }
1503 
1504   if (arch.IsMIPS()) {
1505     uint32_t arch_flags = arch.GetFlags();
1506     if (arch_flags & ArchSpec::eMIPSAse_msa)
1507       features_str += "+msa,";
1508     if (arch_flags & ArchSpec::eMIPSAse_dsp)
1509       features_str += "+dsp,";
1510     if (arch_flags & ArchSpec::eMIPSAse_dspr2)
1511       features_str += "+dspr2,";
1512   }
1513 
1514   // If any AArch64 variant, enable latest ISA with all extensions.
1515   if (triple.isAArch64()) {
1516     features_str += "+all,";
1517 
1518     if (triple.getVendor() == llvm::Triple::Apple)
1519       cpu = "apple-latest";
1520   }
1521 
1522   if (triple.isRISCV()) {
1523     uint32_t arch_flags = arch.GetFlags();
1524     if (arch_flags & ArchSpec::eRISCV_rvc)
1525       features_str += "+c,";
1526     if (arch_flags & ArchSpec::eRISCV_rve)
1527       features_str += "+e,";
1528     if ((arch_flags & ArchSpec::eRISCV_float_abi_single) ==
1529         ArchSpec::eRISCV_float_abi_single)
1530       features_str += "+f,";
1531     if ((arch_flags & ArchSpec::eRISCV_float_abi_double) ==
1532         ArchSpec::eRISCV_float_abi_double)
1533       features_str += "+f,+d,";
1534     if ((arch_flags & ArchSpec::eRISCV_float_abi_quad) ==
1535         ArchSpec::eRISCV_float_abi_quad)
1536       features_str += "+f,+d,+q,";
1537     // FIXME: how do we detect features such as `+a`, `+m`?
1538   }
1539 
1540   // We use m_disasm_up.get() to tell whether we are valid or not, so if this
1541   // isn't good for some reason, we won't be valid and FindPlugin will fail and
1542   // we won't get used.
1543   m_disasm_up = MCDisasmInstance::Create(triple_str, cpu, features_str.c_str(),
1544                                          flavor, *this);
1545 
1546   llvm::Triple::ArchType llvm_arch = triple.getArch();
1547 
1548   // For arm CPUs that can execute arm or thumb instructions, also create a
1549   // thumb instruction disassembler.
1550   if (llvm_arch == llvm::Triple::arm) {
1551     std::string thumb_triple(thumb_arch.GetTriple().getTriple());
1552     m_alternate_disasm_up =
1553         MCDisasmInstance::Create(thumb_triple.c_str(), "", features_str.c_str(),
1554                                  flavor, *this);
1555     if (!m_alternate_disasm_up)
1556       m_disasm_up.reset();
1557 
1558   } else if (arch.IsMIPS()) {
1559     /* Create alternate disassembler for MIPS16 and microMIPS */
1560     uint32_t arch_flags = arch.GetFlags();
1561     if (arch_flags & ArchSpec::eMIPSAse_mips16)
1562       features_str += "+mips16,";
1563     else if (arch_flags & ArchSpec::eMIPSAse_micromips)
1564       features_str += "+micromips,";
1565 
1566     m_alternate_disasm_up = MCDisasmInstance::Create(
1567         triple_str, cpu, features_str.c_str(), flavor, *this);
1568     if (!m_alternate_disasm_up)
1569       m_disasm_up.reset();
1570   }
1571 }
1572 
1573 DisassemblerLLVMC::~DisassemblerLLVMC() = default;
1574 
1575 lldb::DisassemblerSP DisassemblerLLVMC::CreateInstance(const ArchSpec &arch,
1576                                                        const char *flavor) {
1577   if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) {
1578     auto disasm_sp = std::make_shared<DisassemblerLLVMC>(arch, flavor);
1579     if (disasm_sp && disasm_sp->IsValid())
1580       return disasm_sp;
1581   }
1582   return lldb::DisassemblerSP();
1583 }
1584 
1585 size_t DisassemblerLLVMC::DecodeInstructions(const Address &base_addr,
1586                                              const DataExtractor &data,
1587                                              lldb::offset_t data_offset,
1588                                              size_t num_instructions,
1589                                              bool append, bool data_from_file) {
1590   if (!append)
1591     m_instruction_list.Clear();
1592 
1593   if (!IsValid())
1594     return 0;
1595 
1596   m_data_from_file = data_from_file;
1597   uint32_t data_cursor = data_offset;
1598   const size_t data_byte_size = data.GetByteSize();
1599   uint32_t instructions_parsed = 0;
1600   Address inst_addr(base_addr);
1601 
1602   while (data_cursor < data_byte_size &&
1603          instructions_parsed < num_instructions) {
1604 
1605     AddressClass address_class = AddressClass::eCode;
1606 
1607     if (m_alternate_disasm_up)
1608       address_class = inst_addr.GetAddressClass();
1609 
1610     InstructionSP inst_sp(
1611         new InstructionLLVMC(*this, inst_addr, address_class));
1612 
1613     if (!inst_sp)
1614       break;
1615 
1616     uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor);
1617 
1618     if (inst_size == 0)
1619       break;
1620 
1621     m_instruction_list.Append(inst_sp);
1622     data_cursor += inst_size;
1623     inst_addr.Slide(inst_size);
1624     instructions_parsed++;
1625   }
1626 
1627   return data_cursor - data_offset;
1628 }
1629 
1630 void DisassemblerLLVMC::Initialize() {
1631   PluginManager::RegisterPlugin(GetPluginNameStatic(),
1632                                 "Disassembler that uses LLVM MC to disassemble "
1633                                 "i386, x86_64, ARM, and ARM64.",
1634                                 CreateInstance);
1635 
1636   llvm::InitializeAllTargetInfos();
1637   llvm::InitializeAllTargetMCs();
1638   llvm::InitializeAllAsmParsers();
1639   llvm::InitializeAllDisassemblers();
1640 }
1641 
1642 void DisassemblerLLVMC::Terminate() {
1643   PluginManager::UnregisterPlugin(CreateInstance);
1644 }
1645 
1646 int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc,
1647                                       uint64_t offset, uint64_t size,
1648                                       int tag_type, void *tag_bug) {
1649   return static_cast<DisassemblerLLVMC *>(disassembler)
1650       ->OpInfo(pc, offset, size, tag_type, tag_bug);
1651 }
1652 
1653 const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler,
1654                                                     uint64_t value,
1655                                                     uint64_t *type, uint64_t pc,
1656                                                     const char **name) {
1657   return static_cast<DisassemblerLLVMC *>(disassembler)
1658       ->SymbolLookup(value, type, pc, name);
1659 }
1660 
1661 bool DisassemblerLLVMC::FlavorValidForArchSpec(
1662     const lldb_private::ArchSpec &arch, const char *flavor) {
1663   llvm::Triple triple = arch.GetTriple();
1664   if (flavor == nullptr || strcmp(flavor, "default") == 0)
1665     return true;
1666 
1667   if (triple.getArch() == llvm::Triple::x86 ||
1668       triple.getArch() == llvm::Triple::x86_64) {
1669     return strcmp(flavor, "intel") == 0 || strcmp(flavor, "att") == 0;
1670   } else
1671     return false;
1672 }
1673 
1674 bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); }
1675 
1676 int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size,
1677                               int tag_type, void *tag_bug) {
1678   switch (tag_type) {
1679   default:
1680     break;
1681   case 1:
1682     memset(tag_bug, 0, sizeof(::LLVMOpInfo1));
1683     break;
1684   }
1685   return 0;
1686 }
1687 
1688 const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
1689                                             uint64_t pc, const char **name) {
1690   if (*type_ptr) {
1691     if (m_exe_ctx && m_inst) {
1692       // std::string remove_this_prior_to_checkin;
1693       Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr;
1694       Address value_so_addr;
1695       Address pc_so_addr;
1696       if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 ||
1697           target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be ||
1698           target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) {
1699         if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) {
1700           m_adrp_address = pc;
1701           m_adrp_insn = value;
1702           *name = nullptr;
1703           *type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
1704           return nullptr;
1705         }
1706         // If this instruction is an ADD and
1707         // the previous instruction was an ADRP and
1708         // the ADRP's register and this ADD's register are the same,
1709         // then this is a pc-relative address calculation.
1710         if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri &&
1711             m_adrp_insn && m_adrp_address == pc - 4 &&
1712             (*m_adrp_insn & 0x1f) == ((value >> 5) & 0x1f)) {
1713           uint32_t addxri_inst;
1714           uint64_t adrp_imm, addxri_imm;
1715           // Get immlo and immhi bits, OR them together to get the ADRP imm
1716           // value.
1717           adrp_imm =
1718               ((*m_adrp_insn & 0x00ffffe0) >> 3) | ((*m_adrp_insn >> 29) & 0x3);
1719           // if high bit of immhi after right-shifting set, sign extend
1720           if (adrp_imm & (1ULL << 20))
1721             adrp_imm |= ~((1ULL << 21) - 1);
1722 
1723           addxri_inst = value;
1724           addxri_imm = (addxri_inst >> 10) & 0xfff;
1725           // check if 'sh' bit is set, shift imm value up if so
1726           // (this would make no sense, ADRP already gave us this part)
1727           if ((addxri_inst >> (12 + 5 + 5)) & 1)
1728             addxri_imm <<= 12;
1729           value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) +
1730                   addxri_imm;
1731         }
1732         m_adrp_address = LLDB_INVALID_ADDRESS;
1733         m_adrp_insn.reset();
1734       }
1735 
1736       if (m_inst->UsingFileAddress()) {
1737         ModuleSP module_sp(m_inst->GetAddress().GetModule());
1738         if (module_sp) {
1739           module_sp->ResolveFileAddress(value, value_so_addr);
1740           module_sp->ResolveFileAddress(pc, pc_so_addr);
1741         }
1742       } else if (target && !target->GetSectionLoadList().IsEmpty()) {
1743         target->GetSectionLoadList().ResolveLoadAddress(value, value_so_addr);
1744         target->GetSectionLoadList().ResolveLoadAddress(pc, pc_so_addr);
1745       }
1746 
1747       SymbolContext sym_ctx;
1748       const SymbolContextItem resolve_scope =
1749           eSymbolContextFunction | eSymbolContextSymbol;
1750       if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) {
1751         pc_so_addr.GetModule()->ResolveSymbolContextForAddress(
1752             pc_so_addr, resolve_scope, sym_ctx);
1753       }
1754 
1755       if (value_so_addr.IsValid() && value_so_addr.GetSection()) {
1756         StreamString ss;
1757 
1758         bool format_omitting_current_func_name = false;
1759         if (sym_ctx.symbol || sym_ctx.function) {
1760           AddressRange range;
1761           if (sym_ctx.GetAddressRange(resolve_scope, 0, false, range) &&
1762               range.GetBaseAddress().IsValid() &&
1763               range.ContainsLoadAddress(value_so_addr, target)) {
1764             format_omitting_current_func_name = true;
1765           }
1766         }
1767 
1768         // If the "value" address (the target address we're symbolicating) is
1769         // inside the same SymbolContext as the current instruction pc
1770         // (pc_so_addr), don't print the full function name - just print it
1771         // with DumpStyleNoFunctionName style, e.g. "<+36>".
1772         if (format_omitting_current_func_name) {
1773           value_so_addr.Dump(&ss, target, Address::DumpStyleNoFunctionName,
1774                              Address::DumpStyleSectionNameOffset);
1775         } else {
1776           value_so_addr.Dump(
1777               &ss, target,
1778               Address::DumpStyleResolvedDescriptionNoFunctionArguments,
1779               Address::DumpStyleSectionNameOffset);
1780         }
1781 
1782         if (!ss.GetString().empty()) {
1783           // If Address::Dump returned a multi-line description, most commonly
1784           // seen when we have multiple levels of inlined functions at an
1785           // address, only show the first line.
1786           std::string str = std::string(ss.GetString());
1787           size_t first_eol_char = str.find_first_of("\r\n");
1788           if (first_eol_char != std::string::npos) {
1789             str.erase(first_eol_char);
1790           }
1791           m_inst->AppendComment(str);
1792         }
1793       }
1794     }
1795   }
1796 
1797   // TODO: llvm-objdump sets the type_ptr to the
1798   // LLVMDisassembler_ReferenceType_Out_* values
1799   // based on where value_so_addr is pointing, with
1800   // Mach-O specific augmentations in MachODump.cpp. e.g.
1801   // see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand
1802   // handles.
1803   *type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
1804   *name = nullptr;
1805   return nullptr;
1806 }
1807