1 //===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AArch64ExternalSymbolizer.h" 10 #include "MCTargetDesc/AArch64AddressingModes.h" 11 #include "Utils/AArch64BaseInfo.h" 12 #include "llvm/MC/MCContext.h" 13 #include "llvm/MC/MCExpr.h" 14 #include "llvm/MC/MCInst.h" 15 #include "llvm/MC/MCRegisterInfo.h" 16 #include "llvm/Support/Format.h" 17 #include "llvm/Support/raw_ostream.h" 18 19 using namespace llvm; 20 21 #define DEBUG_TYPE "aarch64-disassembler" 22 23 static MCSymbolRefExpr::VariantKind 24 getVariant(uint64_t LLVMDisassembler_VariantKind) { 25 switch (LLVMDisassembler_VariantKind) { 26 case LLVMDisassembler_VariantKind_None: 27 return MCSymbolRefExpr::VK_None; 28 case LLVMDisassembler_VariantKind_ARM64_PAGE: 29 return MCSymbolRefExpr::VK_PAGE; 30 case LLVMDisassembler_VariantKind_ARM64_PAGEOFF: 31 return MCSymbolRefExpr::VK_PAGEOFF; 32 case LLVMDisassembler_VariantKind_ARM64_GOTPAGE: 33 return MCSymbolRefExpr::VK_GOTPAGE; 34 case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF: 35 return MCSymbolRefExpr::VK_GOTPAGEOFF; 36 case LLVMDisassembler_VariantKind_ARM64_TLVP: 37 case LLVMDisassembler_VariantKind_ARM64_TLVOFF: 38 default: 39 llvm_unreachable("bad LLVMDisassembler_VariantKind"); 40 } 41 } 42 43 /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic 44 /// operand in place of the immediate Value in the MCInst. The immediate 45 /// Value has not had any PC adjustment made by the caller. If the instruction 46 /// is a branch that adds the PC to the immediate Value then isBranch is 47 /// Success, else Fail. If GetOpInfo is non-null, then it is called to get any 48 /// symbolic information at the Address for this instrution. If that returns 49 /// non-zero then the symbolic information it returns is used to create an 50 /// MCExpr and that is added as an operand to the MCInst. If GetOpInfo() 51 /// returns zero and isBranch is Success then a symbol look up for 52 /// Address + Value is done and if a symbol is found an MCExpr is created with 53 /// that, else an MCExpr with Address + Value is created. If GetOpInfo() 54 /// returns zero and isBranch is Fail then the Opcode of the MCInst is 55 /// tested and for ADRP an other instructions that help to load of pointers 56 /// a symbol look up is done to see it is returns a specific reference type 57 /// to add to the comment stream. This function returns Success if it adds 58 /// an operand to the MCInst and Fail otherwise. 59 bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( 60 MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, 61 bool IsBranch, uint64_t Offset, uint64_t InstSize) { 62 if (!SymbolLookUp) 63 return false; 64 // FIXME: This method shares a lot of code with 65 // MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible 66 // refactor the MCExternalSymbolizer interface to allow more of this 67 // implementation to be shared. 68 // 69 struct LLVMOpInfo1 SymbolicOp; 70 memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); 71 SymbolicOp.Value = Value; 72 uint64_t ReferenceType; 73 const char *ReferenceName; 74 if (!GetOpInfo || 75 !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { 76 if (IsBranch) { 77 ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; 78 const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, 79 Address, &ReferenceName); 80 if (Name) { 81 SymbolicOp.AddSymbol.Name = Name; 82 SymbolicOp.AddSymbol.Present = true; 83 SymbolicOp.Value = 0; 84 } else { 85 SymbolicOp.Value = Address + Value; 86 } 87 if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) 88 CommentStream << "symbol stub for: " << ReferenceName; 89 else if (ReferenceType == 90 LLVMDisassembler_ReferenceType_Out_Objc_Message) 91 CommentStream << "Objc message: " << ReferenceName; 92 } else if (MI.getOpcode() == AArch64::ADRP) { 93 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP; 94 // otool expects the fully encoded ADRP instruction to be passed in as 95 // the value here, so reconstruct it: 96 const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo(); 97 uint32_t EncodedInst = 0x90000000; 98 EncodedInst |= (Value & 0x3) << 29; // immlo 99 EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi 100 EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg 101 SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address, 102 &ReferenceName); 103 CommentStream << format("0x%llx", (0xfffffffffffff000LL & Address) + 104 Value * 0x1000); 105 } else if (MI.getOpcode() == AArch64::ADDXri || 106 MI.getOpcode() == AArch64::LDRXui || 107 MI.getOpcode() == AArch64::LDRXl || 108 MI.getOpcode() == AArch64::ADR) { 109 if (MI.getOpcode() == AArch64::ADDXri) 110 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri; 111 else if (MI.getOpcode() == AArch64::LDRXui) 112 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui; 113 if (MI.getOpcode() == AArch64::LDRXl) { 114 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl; 115 SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, 116 &ReferenceName); 117 } else if (MI.getOpcode() == AArch64::ADR) { 118 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR; 119 SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, 120 &ReferenceName); 121 } else { 122 const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo(); 123 // otool expects the fully encoded ADD/LDR instruction to be passed in 124 // as the value here, so reconstruct it: 125 unsigned EncodedInst = 126 MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000; 127 EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD] 128 EncodedInst |= 129 MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn 130 EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd 131 132 SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address, 133 &ReferenceName); 134 } 135 if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) 136 CommentStream << "literal pool symbol address: " << ReferenceName; 137 else if (ReferenceType == 138 LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) { 139 CommentStream << "literal pool for: \""; 140 CommentStream.write_escaped(ReferenceName); 141 CommentStream << "\""; 142 } else if (ReferenceType == 143 LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) 144 CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\""; 145 else if (ReferenceType == 146 LLVMDisassembler_ReferenceType_Out_Objc_Message) 147 CommentStream << "Objc message: " << ReferenceName; 148 else if (ReferenceType == 149 LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) 150 CommentStream << "Objc message ref: " << ReferenceName; 151 else if (ReferenceType == 152 LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) 153 CommentStream << "Objc selector ref: " << ReferenceName; 154 else if (ReferenceType == 155 LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) 156 CommentStream << "Objc class ref: " << ReferenceName; 157 // For these instructions, the SymbolLookUp() above is just to get the 158 // ReferenceType and ReferenceName. We want to make sure not to 159 // fall through so we don't build an MCExpr to leave the disassembly 160 // of the immediate values of these instructions to the InstPrinter. 161 return false; 162 } else { 163 return false; 164 } 165 } 166 167 const MCExpr *Add = nullptr; 168 if (SymbolicOp.AddSymbol.Present) { 169 if (SymbolicOp.AddSymbol.Name) { 170 StringRef Name(SymbolicOp.AddSymbol.Name); 171 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); 172 MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind); 173 if (Variant != MCSymbolRefExpr::VK_None) 174 Add = MCSymbolRefExpr::create(Sym, Variant, Ctx); 175 else 176 Add = MCSymbolRefExpr::create(Sym, Ctx); 177 } else { 178 Add = MCConstantExpr::create(SymbolicOp.AddSymbol.Value, Ctx); 179 } 180 } 181 182 const MCExpr *Sub = nullptr; 183 if (SymbolicOp.SubtractSymbol.Present) { 184 if (SymbolicOp.SubtractSymbol.Name) { 185 StringRef Name(SymbolicOp.SubtractSymbol.Name); 186 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); 187 Sub = MCSymbolRefExpr::create(Sym, Ctx); 188 } else { 189 Sub = MCConstantExpr::create(SymbolicOp.SubtractSymbol.Value, Ctx); 190 } 191 } 192 193 const MCExpr *Off = nullptr; 194 if (SymbolicOp.Value != 0) 195 Off = MCConstantExpr::create(SymbolicOp.Value, Ctx); 196 197 const MCExpr *Expr; 198 if (Sub) { 199 const MCExpr *LHS; 200 if (Add) 201 LHS = MCBinaryExpr::createSub(Add, Sub, Ctx); 202 else 203 LHS = MCUnaryExpr::createMinus(Sub, Ctx); 204 if (Off) 205 Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx); 206 else 207 Expr = LHS; 208 } else if (Add) { 209 if (Off) 210 Expr = MCBinaryExpr::createAdd(Add, Off, Ctx); 211 else 212 Expr = Add; 213 } else { 214 if (Off) 215 Expr = Off; 216 else 217 Expr = MCConstantExpr::create(0, Ctx); 218 } 219 220 MI.addOperand(MCOperand::createExpr(Expr)); 221 222 return true; 223 } 224