LLVM API Documentation
00001 //===-- MCExternalSymbolizer.cpp - External symbolizer --------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 00010 #include "llvm/MC/MCExternalSymbolizer.h" 00011 #include "llvm/MC/MCContext.h" 00012 #include "llvm/MC/MCExpr.h" 00013 #include "llvm/MC/MCInst.h" 00014 #include "llvm/Support/raw_ostream.h" 00015 #include <cstring> 00016 00017 using namespace llvm; 00018 00019 // This function tries to add a symbolic operand in place of the immediate 00020 // Value in the MCInst. The immediate Value has had any PC adjustment made by 00021 // the caller. If the instruction is a branch instruction then IsBranch is true, 00022 // else false. If the getOpInfo() function was set as part of the 00023 // setupForSymbolicDisassembly() call then that function is called to get any 00024 // symbolic information at the Address for this instruction. If that returns 00025 // non-zero then the symbolic information it returns is used to create an MCExpr 00026 // and that is added as an operand to the MCInst. If getOpInfo() returns zero 00027 // and IsBranch is true then a symbol look up for Value is done and if a symbol 00028 // is found an MCExpr is created with that, else an MCExpr with Value is 00029 // created. This function returns true if it adds an operand to the MCInst and 00030 // false otherwise. 00031 bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, 00032 raw_ostream &cStream, 00033 int64_t Value, 00034 uint64_t Address, 00035 bool IsBranch, 00036 uint64_t Offset, 00037 uint64_t InstSize) { 00038 struct LLVMOpInfo1 SymbolicOp; 00039 std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); 00040 SymbolicOp.Value = Value; 00041 00042 if (!GetOpInfo || 00043 !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) { 00044 // Clear SymbolicOp.Value from above and also all other fields. 00045 std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); 00046 00047 // At this point, GetOpInfo() did not find any relocation information about 00048 // this operand and we are left to use the SymbolLookUp() call back to guess 00049 // if the Value is the address of a symbol. In the case this is a branch 00050 // that always makes sense to guess. But in the case of an immediate it is 00051 // a bit more questionable if it is an address of a symbol or some other 00052 // reference. So if the immediate Value comes from a width of 1 byte, 00053 // InstSize, we will not guess it is an address of a symbol. Because in 00054 // object files assembled starting at address 0 this usually leads to 00055 // incorrect symbolication. 00056 if (!SymbolLookUp || (InstSize == 1 && !IsBranch)) 00057 return false; 00058 00059 uint64_t ReferenceType; 00060 if (IsBranch) 00061 ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; 00062 else 00063 ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; 00064 const char *ReferenceName; 00065 const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, 00066 &ReferenceName); 00067 if (Name) { 00068 SymbolicOp.AddSymbol.Name = Name; 00069 SymbolicOp.AddSymbol.Present = true; 00070 // If Name is a C++ symbol name put the human readable name in a comment. 00071 if(ReferenceType == LLVMDisassembler_ReferenceType_DeMangled_Name) 00072 cStream << ReferenceName; 00073 } 00074 // For branches always create an MCExpr so it gets printed as hex address. 00075 else if (IsBranch) { 00076 SymbolicOp.Value = Value; 00077 } 00078 if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) 00079 cStream << "symbol stub for: " << ReferenceName; 00080 else if(ReferenceType == LLVMDisassembler_ReferenceType_Out_Objc_Message) 00081 cStream << "Objc message: " << ReferenceName; 00082 if (!Name && !IsBranch) 00083 return false; 00084 } 00085 00086 const MCExpr *Add = nullptr; 00087 if (SymbolicOp.AddSymbol.Present) { 00088 if (SymbolicOp.AddSymbol.Name) { 00089 StringRef Name(SymbolicOp.AddSymbol.Name); 00090 MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); 00091 Add = MCSymbolRefExpr::Create(Sym, Ctx); 00092 } else { 00093 Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, Ctx); 00094 } 00095 } 00096 00097 const MCExpr *Sub = nullptr; 00098 if (SymbolicOp.SubtractSymbol.Present) { 00099 if (SymbolicOp.SubtractSymbol.Name) { 00100 StringRef Name(SymbolicOp.SubtractSymbol.Name); 00101 MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); 00102 Sub = MCSymbolRefExpr::Create(Sym, Ctx); 00103 } else { 00104 Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, Ctx); 00105 } 00106 } 00107 00108 const MCExpr *Off = nullptr; 00109 if (SymbolicOp.Value != 0) 00110 Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx); 00111 00112 const MCExpr *Expr; 00113 if (Sub) { 00114 const MCExpr *LHS; 00115 if (Add) 00116 LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx); 00117 else 00118 LHS = MCUnaryExpr::CreateMinus(Sub, Ctx); 00119 if (Off) 00120 Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx); 00121 else 00122 Expr = LHS; 00123 } else if (Add) { 00124 if (Off) 00125 Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx); 00126 else 00127 Expr = Add; 00128 } else { 00129 if (Off) 00130 Expr = Off; 00131 else 00132 Expr = MCConstantExpr::Create(0, Ctx); 00133 } 00134 00135 Expr = RelInfo->createExprForCAPIVariantKind(Expr, SymbolicOp.VariantKind); 00136 if (!Expr) 00137 return false; 00138 00139 MI.addOperand(MCOperand::CreateExpr(Expr)); 00140 return true; 00141 } 00142 00143 // This function tries to add a comment as to what is being referenced by a load 00144 // instruction with the base register that is the Pc. These can often be values 00145 // in a literal pool near the Address of the instruction. The Address of the 00146 // instruction and its immediate Value are used as a possible literal pool entry. 00147 // The SymbolLookUp call back will return the name of a symbol referenced by the 00148 // literal pool's entry if the referenced address is that of a symbol. Or it 00149 // will return a pointer to a literal 'C' string if the referenced address of 00150 // the literal pool's entry is an address into a section with C string literals. 00151 // Or if the reference is to an Objective-C data structure it will return a 00152 // specific reference type for it and a string. 00153 void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream, 00154 int64_t Value, 00155 uint64_t Address) { 00156 if (SymbolLookUp) { 00157 uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load; 00158 const char *ReferenceName; 00159 (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName); 00160 if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) 00161 cStream << "literal pool symbol address: " << ReferenceName; 00162 else if(ReferenceType == 00163 LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) { 00164 cStream << "literal pool for: \""; 00165 cStream.write_escaped(ReferenceName); 00166 cStream << "\""; 00167 } 00168 else if(ReferenceType == 00169 LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) 00170 cStream << "Objc cfstring ref: @\"" << ReferenceName << "\""; 00171 else if(ReferenceType == 00172 LLVMDisassembler_ReferenceType_Out_Objc_Message) 00173 cStream << "Objc message: " << ReferenceName; 00174 else if(ReferenceType == 00175 LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) 00176 cStream << "Objc message ref: " << ReferenceName; 00177 else if(ReferenceType == 00178 LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) 00179 cStream << "Objc selector ref: " << ReferenceName; 00180 else if(ReferenceType == 00181 LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) 00182 cStream << "Objc class ref: " << ReferenceName; 00183 } 00184 } 00185 00186 namespace llvm { 00187 MCSymbolizer *createMCSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo, 00188 LLVMSymbolLookupCallback SymbolLookUp, 00189 void *DisInfo, 00190 MCContext *Ctx, 00191 MCRelocationInfo *RelInfo) { 00192 assert(Ctx && "No MCContext given for symbolic disassembly"); 00193 00194 return new MCExternalSymbolizer(*Ctx, 00195 std::unique_ptr<MCRelocationInfo>(RelInfo), 00196 GetOpInfo, SymbolLookUp, DisInfo); 00197 } 00198 }