1*0b57cec5SDimitry Andric //===-- MCExternalSymbolizer.cpp - External symbolizer --------------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric 9*0b57cec5SDimitry Andric #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" 10*0b57cec5SDimitry Andric #include "llvm/MC/MCContext.h" 11*0b57cec5SDimitry Andric #include "llvm/MC/MCExpr.h" 12*0b57cec5SDimitry Andric #include "llvm/MC/MCInst.h" 13*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 14*0b57cec5SDimitry Andric #include <cstring> 15*0b57cec5SDimitry Andric 16*0b57cec5SDimitry Andric using namespace llvm; 17*0b57cec5SDimitry Andric 18*0b57cec5SDimitry Andric namespace llvm { 19*0b57cec5SDimitry Andric class Triple; 20*0b57cec5SDimitry Andric } 21*0b57cec5SDimitry Andric 22*0b57cec5SDimitry Andric // This function tries to add a symbolic operand in place of the immediate 23*0b57cec5SDimitry Andric // Value in the MCInst. The immediate Value has had any PC adjustment made by 24*0b57cec5SDimitry Andric // the caller. If the instruction is a branch instruction then IsBranch is true, 25*0b57cec5SDimitry Andric // else false. If the getOpInfo() function was set as part of the 26*0b57cec5SDimitry Andric // setupForSymbolicDisassembly() call then that function is called to get any 27*0b57cec5SDimitry Andric // symbolic information at the Address for this instruction. If that returns 28*0b57cec5SDimitry Andric // non-zero then the symbolic information it returns is used to create an MCExpr 29*0b57cec5SDimitry Andric // and that is added as an operand to the MCInst. If getOpInfo() returns zero 30*0b57cec5SDimitry Andric // and IsBranch is true then a symbol look up for Value is done and if a symbol 31*0b57cec5SDimitry Andric // is found an MCExpr is created with that, else an MCExpr with Value is 32*0b57cec5SDimitry Andric // created. This function returns true if it adds an operand to the MCInst and 33*0b57cec5SDimitry Andric // false otherwise. 34*0b57cec5SDimitry Andric bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, 35*0b57cec5SDimitry Andric raw_ostream &cStream, 36*0b57cec5SDimitry Andric int64_t Value, 37*0b57cec5SDimitry Andric uint64_t Address, 38*0b57cec5SDimitry Andric bool IsBranch, 39*0b57cec5SDimitry Andric uint64_t Offset, 40*0b57cec5SDimitry Andric uint64_t InstSize) { 41*0b57cec5SDimitry Andric struct LLVMOpInfo1 SymbolicOp; 42*0b57cec5SDimitry Andric std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); 43*0b57cec5SDimitry Andric SymbolicOp.Value = Value; 44*0b57cec5SDimitry Andric 45*0b57cec5SDimitry Andric if (!GetOpInfo || 46*0b57cec5SDimitry Andric !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) { 47*0b57cec5SDimitry Andric // Clear SymbolicOp.Value from above and also all other fields. 48*0b57cec5SDimitry Andric std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); 49*0b57cec5SDimitry Andric 50*0b57cec5SDimitry Andric // At this point, GetOpInfo() did not find any relocation information about 51*0b57cec5SDimitry Andric // this operand and we are left to use the SymbolLookUp() call back to guess 52*0b57cec5SDimitry Andric // if the Value is the address of a symbol. In the case this is a branch 53*0b57cec5SDimitry Andric // that always makes sense to guess. But in the case of an immediate it is 54*0b57cec5SDimitry Andric // a bit more questionable if it is an address of a symbol or some other 55*0b57cec5SDimitry Andric // reference. So if the immediate Value comes from a width of 1 byte, 56*0b57cec5SDimitry Andric // InstSize, we will not guess it is an address of a symbol. Because in 57*0b57cec5SDimitry Andric // object files assembled starting at address 0 this usually leads to 58*0b57cec5SDimitry Andric // incorrect symbolication. 59*0b57cec5SDimitry Andric if (!SymbolLookUp || (InstSize == 1 && !IsBranch)) 60*0b57cec5SDimitry Andric return false; 61*0b57cec5SDimitry Andric 62*0b57cec5SDimitry Andric uint64_t ReferenceType; 63*0b57cec5SDimitry Andric if (IsBranch) 64*0b57cec5SDimitry Andric ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; 65*0b57cec5SDimitry Andric else 66*0b57cec5SDimitry Andric ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; 67*0b57cec5SDimitry Andric const char *ReferenceName; 68*0b57cec5SDimitry Andric const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, 69*0b57cec5SDimitry Andric &ReferenceName); 70*0b57cec5SDimitry Andric if (Name) { 71*0b57cec5SDimitry Andric SymbolicOp.AddSymbol.Name = Name; 72*0b57cec5SDimitry Andric SymbolicOp.AddSymbol.Present = true; 73*0b57cec5SDimitry Andric // If Name is a C++ symbol name put the human readable name in a comment. 74*0b57cec5SDimitry Andric if(ReferenceType == LLVMDisassembler_ReferenceType_DeMangled_Name) 75*0b57cec5SDimitry Andric cStream << ReferenceName; 76*0b57cec5SDimitry Andric } 77*0b57cec5SDimitry Andric // For branches always create an MCExpr so it gets printed as hex address. 78*0b57cec5SDimitry Andric else if (IsBranch) { 79*0b57cec5SDimitry Andric SymbolicOp.Value = Value; 80*0b57cec5SDimitry Andric } 81*0b57cec5SDimitry Andric if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) 82*0b57cec5SDimitry Andric cStream << "symbol stub for: " << ReferenceName; 83*0b57cec5SDimitry Andric else if(ReferenceType == LLVMDisassembler_ReferenceType_Out_Objc_Message) 84*0b57cec5SDimitry Andric cStream << "Objc message: " << ReferenceName; 85*0b57cec5SDimitry Andric if (!Name && !IsBranch) 86*0b57cec5SDimitry Andric return false; 87*0b57cec5SDimitry Andric } 88*0b57cec5SDimitry Andric 89*0b57cec5SDimitry Andric const MCExpr *Add = nullptr; 90*0b57cec5SDimitry Andric if (SymbolicOp.AddSymbol.Present) { 91*0b57cec5SDimitry Andric if (SymbolicOp.AddSymbol.Name) { 92*0b57cec5SDimitry Andric StringRef Name(SymbolicOp.AddSymbol.Name); 93*0b57cec5SDimitry Andric MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); 94*0b57cec5SDimitry Andric Add = MCSymbolRefExpr::create(Sym, Ctx); 95*0b57cec5SDimitry Andric } else { 96*0b57cec5SDimitry Andric Add = MCConstantExpr::create((int)SymbolicOp.AddSymbol.Value, Ctx); 97*0b57cec5SDimitry Andric } 98*0b57cec5SDimitry Andric } 99*0b57cec5SDimitry Andric 100*0b57cec5SDimitry Andric const MCExpr *Sub = nullptr; 101*0b57cec5SDimitry Andric if (SymbolicOp.SubtractSymbol.Present) { 102*0b57cec5SDimitry Andric if (SymbolicOp.SubtractSymbol.Name) { 103*0b57cec5SDimitry Andric StringRef Name(SymbolicOp.SubtractSymbol.Name); 104*0b57cec5SDimitry Andric MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); 105*0b57cec5SDimitry Andric Sub = MCSymbolRefExpr::create(Sym, Ctx); 106*0b57cec5SDimitry Andric } else { 107*0b57cec5SDimitry Andric Sub = MCConstantExpr::create((int)SymbolicOp.SubtractSymbol.Value, Ctx); 108*0b57cec5SDimitry Andric } 109*0b57cec5SDimitry Andric } 110*0b57cec5SDimitry Andric 111*0b57cec5SDimitry Andric const MCExpr *Off = nullptr; 112*0b57cec5SDimitry Andric if (SymbolicOp.Value != 0) 113*0b57cec5SDimitry Andric Off = MCConstantExpr::create(SymbolicOp.Value, Ctx); 114*0b57cec5SDimitry Andric 115*0b57cec5SDimitry Andric const MCExpr *Expr; 116*0b57cec5SDimitry Andric if (Sub) { 117*0b57cec5SDimitry Andric const MCExpr *LHS; 118*0b57cec5SDimitry Andric if (Add) 119*0b57cec5SDimitry Andric LHS = MCBinaryExpr::createSub(Add, Sub, Ctx); 120*0b57cec5SDimitry Andric else 121*0b57cec5SDimitry Andric LHS = MCUnaryExpr::createMinus(Sub, Ctx); 122*0b57cec5SDimitry Andric if (Off) 123*0b57cec5SDimitry Andric Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx); 124*0b57cec5SDimitry Andric else 125*0b57cec5SDimitry Andric Expr = LHS; 126*0b57cec5SDimitry Andric } else if (Add) { 127*0b57cec5SDimitry Andric if (Off) 128*0b57cec5SDimitry Andric Expr = MCBinaryExpr::createAdd(Add, Off, Ctx); 129*0b57cec5SDimitry Andric else 130*0b57cec5SDimitry Andric Expr = Add; 131*0b57cec5SDimitry Andric } else { 132*0b57cec5SDimitry Andric if (Off) 133*0b57cec5SDimitry Andric Expr = Off; 134*0b57cec5SDimitry Andric else 135*0b57cec5SDimitry Andric Expr = MCConstantExpr::create(0, Ctx); 136*0b57cec5SDimitry Andric } 137*0b57cec5SDimitry Andric 138*0b57cec5SDimitry Andric Expr = RelInfo->createExprForCAPIVariantKind(Expr, SymbolicOp.VariantKind); 139*0b57cec5SDimitry Andric if (!Expr) 140*0b57cec5SDimitry Andric return false; 141*0b57cec5SDimitry Andric 142*0b57cec5SDimitry Andric MI.addOperand(MCOperand::createExpr(Expr)); 143*0b57cec5SDimitry Andric return true; 144*0b57cec5SDimitry Andric } 145*0b57cec5SDimitry Andric 146*0b57cec5SDimitry Andric // This function tries to add a comment as to what is being referenced by a load 147*0b57cec5SDimitry Andric // instruction with the base register that is the Pc. These can often be values 148*0b57cec5SDimitry Andric // in a literal pool near the Address of the instruction. The Address of the 149*0b57cec5SDimitry Andric // instruction and its immediate Value are used as a possible literal pool entry. 150*0b57cec5SDimitry Andric // The SymbolLookUp call back will return the name of a symbol referenced by the 151*0b57cec5SDimitry Andric // literal pool's entry if the referenced address is that of a symbol. Or it 152*0b57cec5SDimitry Andric // will return a pointer to a literal 'C' string if the referenced address of 153*0b57cec5SDimitry Andric // the literal pool's entry is an address into a section with C string literals. 154*0b57cec5SDimitry Andric // Or if the reference is to an Objective-C data structure it will return a 155*0b57cec5SDimitry Andric // specific reference type for it and a string. 156*0b57cec5SDimitry Andric void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream, 157*0b57cec5SDimitry Andric int64_t Value, 158*0b57cec5SDimitry Andric uint64_t Address) { 159*0b57cec5SDimitry Andric if (SymbolLookUp) { 160*0b57cec5SDimitry Andric uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load; 161*0b57cec5SDimitry Andric const char *ReferenceName; 162*0b57cec5SDimitry Andric (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName); 163*0b57cec5SDimitry Andric if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) 164*0b57cec5SDimitry Andric cStream << "literal pool symbol address: " << ReferenceName; 165*0b57cec5SDimitry Andric else if(ReferenceType == 166*0b57cec5SDimitry Andric LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) { 167*0b57cec5SDimitry Andric cStream << "literal pool for: \""; 168*0b57cec5SDimitry Andric cStream.write_escaped(ReferenceName); 169*0b57cec5SDimitry Andric cStream << "\""; 170*0b57cec5SDimitry Andric } 171*0b57cec5SDimitry Andric else if(ReferenceType == 172*0b57cec5SDimitry Andric LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) 173*0b57cec5SDimitry Andric cStream << "Objc cfstring ref: @\"" << ReferenceName << "\""; 174*0b57cec5SDimitry Andric else if(ReferenceType == 175*0b57cec5SDimitry Andric LLVMDisassembler_ReferenceType_Out_Objc_Message) 176*0b57cec5SDimitry Andric cStream << "Objc message: " << ReferenceName; 177*0b57cec5SDimitry Andric else if(ReferenceType == 178*0b57cec5SDimitry Andric LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) 179*0b57cec5SDimitry Andric cStream << "Objc message ref: " << ReferenceName; 180*0b57cec5SDimitry Andric else if(ReferenceType == 181*0b57cec5SDimitry Andric LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) 182*0b57cec5SDimitry Andric cStream << "Objc selector ref: " << ReferenceName; 183*0b57cec5SDimitry Andric else if(ReferenceType == 184*0b57cec5SDimitry Andric LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) 185*0b57cec5SDimitry Andric cStream << "Objc class ref: " << ReferenceName; 186*0b57cec5SDimitry Andric } 187*0b57cec5SDimitry Andric } 188*0b57cec5SDimitry Andric 189*0b57cec5SDimitry Andric namespace llvm { 190*0b57cec5SDimitry Andric MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo, 191*0b57cec5SDimitry Andric LLVMSymbolLookupCallback SymbolLookUp, 192*0b57cec5SDimitry Andric void *DisInfo, MCContext *Ctx, 193*0b57cec5SDimitry Andric std::unique_ptr<MCRelocationInfo> &&RelInfo) { 194*0b57cec5SDimitry Andric assert(Ctx && "No MCContext given for symbolic disassembly"); 195*0b57cec5SDimitry Andric 196*0b57cec5SDimitry Andric return new MCExternalSymbolizer(*Ctx, std::move(RelInfo), GetOpInfo, 197*0b57cec5SDimitry Andric SymbolLookUp, DisInfo); 198*0b57cec5SDimitry Andric } 199*0b57cec5SDimitry Andric } 200