1 //===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AArch64ExternalSymbolizer.h" 10 #include "MCTargetDesc/AArch64AddressingModes.h" 11 #include "Utils/AArch64BaseInfo.h" 12 #include "llvm/MC/MCContext.h" 13 #include "llvm/MC/MCExpr.h" 14 #include "llvm/MC/MCInst.h" 15 #include "llvm/MC/MCRegisterInfo.h" 16 #include "llvm/Support/Format.h" 17 #include "llvm/Support/raw_ostream.h" 18 19 using namespace llvm; 20 21 #define DEBUG_TYPE "aarch64-disassembler" 22 23 static MCSymbolRefExpr::VariantKind 24 getVariant(uint64_t LLVMDisassembler_VariantKind) { 25 switch (LLVMDisassembler_VariantKind) { 26 case LLVMDisassembler_VariantKind_None: 27 return MCSymbolRefExpr::VK_None; 28 case LLVMDisassembler_VariantKind_ARM64_PAGE: 29 return MCSymbolRefExpr::VK_PAGE; 30 case LLVMDisassembler_VariantKind_ARM64_PAGEOFF: 31 return MCSymbolRefExpr::VK_PAGEOFF; 32 case LLVMDisassembler_VariantKind_ARM64_GOTPAGE: 33 return MCSymbolRefExpr::VK_GOTPAGE; 34 case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF: 35 return MCSymbolRefExpr::VK_GOTPAGEOFF; 36 case LLVMDisassembler_VariantKind_ARM64_TLVP: 37 return MCSymbolRefExpr::VK_TLVPPAGE; 38 case LLVMDisassembler_VariantKind_ARM64_TLVOFF: 39 return MCSymbolRefExpr::VK_TLVPPAGEOFF; 40 default: 41 llvm_unreachable("bad LLVMDisassembler_VariantKind"); 42 } 43 } 44 45 /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic 46 /// operand in place of the immediate Value in the MCInst. The immediate 47 /// Value has not had any PC adjustment made by the caller. If the instruction 48 /// is a branch that adds the PC to the immediate Value then isBranch is 49 /// Success, else Fail. If GetOpInfo is non-null, then it is called to get any 50 /// symbolic information at the Address for this instrution. If that returns 51 /// non-zero then the symbolic information it returns is used to create an 52 /// MCExpr and that is added as an operand to the MCInst. If GetOpInfo() 53 /// returns zero and isBranch is Success then a symbol look up for 54 /// Address + Value is done and if a symbol is found an MCExpr is created with 55 /// that, else an MCExpr with Address + Value is created. If GetOpInfo() 56 /// returns zero and isBranch is Fail then the Opcode of the MCInst is 57 /// tested and for ADRP an other instructions that help to load of pointers 58 /// a symbol look up is done to see it is returns a specific reference type 59 /// to add to the comment stream. This function returns Success if it adds 60 /// an operand to the MCInst and Fail otherwise. 61 bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( 62 MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, 63 bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) { 64 if (!SymbolLookUp) 65 return false; 66 // FIXME: This method shares a lot of code with 67 // MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible 68 // refactor the MCExternalSymbolizer interface to allow more of this 69 // implementation to be shared. 70 // 71 struct LLVMOpInfo1 SymbolicOp; 72 memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); 73 SymbolicOp.Value = Value; 74 uint64_t ReferenceType; 75 const char *ReferenceName; 76 if (!GetOpInfo || !GetOpInfo(DisInfo, Address, /*Offset=*/0, OpSize, InstSize, 77 1, &SymbolicOp)) { 78 if (IsBranch) { 79 ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; 80 const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, 81 Address, &ReferenceName); 82 if (Name) { 83 SymbolicOp.AddSymbol.Name = Name; 84 SymbolicOp.AddSymbol.Present = true; 85 SymbolicOp.Value = 0; 86 } else { 87 SymbolicOp.Value = Address + Value; 88 } 89 if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) 90 CommentStream << "symbol stub for: " << ReferenceName; 91 else if (ReferenceType == 92 LLVMDisassembler_ReferenceType_Out_Objc_Message) 93 CommentStream << "Objc message: " << ReferenceName; 94 } else if (MI.getOpcode() == AArch64::ADRP) { 95 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP; 96 // otool expects the fully encoded ADRP instruction to be passed in as 97 // the value here, so reconstruct it: 98 const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo(); 99 uint32_t EncodedInst = 0x90000000; 100 EncodedInst |= (Value & 0x3) << 29; // immlo 101 EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi 102 EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg 103 SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address, 104 &ReferenceName); 105 CommentStream << format("0x%llx", (0xfffffffffffff000LL & Address) + 106 Value * 0x1000); 107 } else if (MI.getOpcode() == AArch64::ADDXri || 108 MI.getOpcode() == AArch64::LDRXui || 109 MI.getOpcode() == AArch64::LDRXl || 110 MI.getOpcode() == AArch64::ADR) { 111 if (MI.getOpcode() == AArch64::ADDXri) 112 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri; 113 else if (MI.getOpcode() == AArch64::LDRXui) 114 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui; 115 if (MI.getOpcode() == AArch64::LDRXl) { 116 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl; 117 SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, 118 &ReferenceName); 119 } else if (MI.getOpcode() == AArch64::ADR) { 120 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR; 121 SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, 122 &ReferenceName); 123 } else { 124 const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo(); 125 // otool expects the fully encoded ADD/LDR instruction to be passed in 126 // as the value here, so reconstruct it: 127 unsigned EncodedInst = 128 MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000; 129 EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD] 130 EncodedInst |= 131 MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn 132 EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd 133 134 SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address, 135 &ReferenceName); 136 } 137 if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) 138 CommentStream << "literal pool symbol address: " << ReferenceName; 139 else if (ReferenceType == 140 LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) { 141 CommentStream << "literal pool for: \""; 142 CommentStream.write_escaped(ReferenceName); 143 CommentStream << "\""; 144 } else if (ReferenceType == 145 LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) 146 CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\""; 147 else if (ReferenceType == 148 LLVMDisassembler_ReferenceType_Out_Objc_Message) 149 CommentStream << "Objc message: " << ReferenceName; 150 else if (ReferenceType == 151 LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) 152 CommentStream << "Objc message ref: " << ReferenceName; 153 else if (ReferenceType == 154 LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) 155 CommentStream << "Objc selector ref: " << ReferenceName; 156 else if (ReferenceType == 157 LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) 158 CommentStream << "Objc class ref: " << ReferenceName; 159 // For these instructions, the SymbolLookUp() above is just to get the 160 // ReferenceType and ReferenceName. We want to make sure not to 161 // fall through so we don't build an MCExpr to leave the disassembly 162 // of the immediate values of these instructions to the InstPrinter. 163 return false; 164 } else { 165 return false; 166 } 167 } 168 169 const MCExpr *Add = nullptr; 170 if (SymbolicOp.AddSymbol.Present) { 171 if (SymbolicOp.AddSymbol.Name) { 172 StringRef Name(SymbolicOp.AddSymbol.Name); 173 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); 174 MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind); 175 if (Variant != MCSymbolRefExpr::VK_None) 176 Add = MCSymbolRefExpr::create(Sym, Variant, Ctx); 177 else 178 Add = MCSymbolRefExpr::create(Sym, Ctx); 179 } else { 180 Add = MCConstantExpr::create(SymbolicOp.AddSymbol.Value, Ctx); 181 } 182 } 183 184 const MCExpr *Sub = nullptr; 185 if (SymbolicOp.SubtractSymbol.Present) { 186 if (SymbolicOp.SubtractSymbol.Name) { 187 StringRef Name(SymbolicOp.SubtractSymbol.Name); 188 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); 189 Sub = MCSymbolRefExpr::create(Sym, Ctx); 190 } else { 191 Sub = MCConstantExpr::create(SymbolicOp.SubtractSymbol.Value, Ctx); 192 } 193 } 194 195 const MCExpr *Off = nullptr; 196 if (SymbolicOp.Value != 0) 197 Off = MCConstantExpr::create(SymbolicOp.Value, Ctx); 198 199 const MCExpr *Expr; 200 if (Sub) { 201 const MCExpr *LHS; 202 if (Add) 203 LHS = MCBinaryExpr::createSub(Add, Sub, Ctx); 204 else 205 LHS = MCUnaryExpr::createMinus(Sub, Ctx); 206 if (Off) 207 Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx); 208 else 209 Expr = LHS; 210 } else if (Add) { 211 if (Off) 212 Expr = MCBinaryExpr::createAdd(Add, Off, Ctx); 213 else 214 Expr = Add; 215 } else { 216 if (Off) 217 Expr = Off; 218 else 219 Expr = MCConstantExpr::create(0, Ctx); 220 } 221 222 MI.addOperand(MCOperand::createExpr(Expr)); 223 224 return true; 225 } 226