LLVM API Documentation

MCExternalSymbolizer.cpp
Go to the documentation of this file.
00001 //===-- MCExternalSymbolizer.cpp - External symbolizer --------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 
00010 #include "llvm/MC/MCExternalSymbolizer.h"
00011 #include "llvm/MC/MCContext.h"
00012 #include "llvm/MC/MCExpr.h"
00013 #include "llvm/MC/MCInst.h"
00014 #include "llvm/Support/raw_ostream.h"
00015 #include <cstring>
00016 
00017 using namespace llvm;
00018 
00019 // This function tries to add a symbolic operand in place of the immediate
00020 // Value in the MCInst. The immediate Value has had any PC adjustment made by
00021 // the caller. If the instruction is a branch instruction then IsBranch is true,
00022 // else false. If the getOpInfo() function was set as part of the
00023 // setupForSymbolicDisassembly() call then that function is called to get any
00024 // symbolic information at the Address for this instruction. If that returns
00025 // non-zero then the symbolic information it returns is used to create an MCExpr
00026 // and that is added as an operand to the MCInst. If getOpInfo() returns zero
00027 // and IsBranch is true then a symbol look up for Value is done and if a symbol
00028 // is found an MCExpr is created with that, else an MCExpr with Value is
00029 // created. This function returns true if it adds an operand to the MCInst and
00030 // false otherwise.
00031 bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
00032                                                     raw_ostream &cStream,
00033                                                     int64_t Value,
00034                                                     uint64_t Address,
00035                                                     bool IsBranch,
00036                                                     uint64_t Offset,
00037                                                     uint64_t InstSize) {
00038   struct LLVMOpInfo1 SymbolicOp;
00039   std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
00040   SymbolicOp.Value = Value;
00041 
00042   if (!GetOpInfo ||
00043       !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) {
00044     // Clear SymbolicOp.Value from above and also all other fields.
00045     std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
00046 
00047     // At this point, GetOpInfo() did not find any relocation information about
00048     // this operand and we are left to use the SymbolLookUp() call back to guess
00049     // if the Value is the address of a symbol.  In the case this is a branch
00050     // that always makes sense to guess.  But in the case of an immediate it is
00051     // a bit more questionable if it is an address of a symbol or some other
00052     // reference.  So if the immediate Value comes from a width of 1 byte,
00053     // InstSize, we will not guess it is an address of a symbol.  Because in
00054     // object files assembled starting at address 0 this usually leads to
00055     // incorrect symbolication.
00056     if (!SymbolLookUp || (InstSize == 1 && !IsBranch))
00057       return false;
00058 
00059     uint64_t ReferenceType;
00060     if (IsBranch)
00061        ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
00062     else
00063        ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
00064     const char *ReferenceName;
00065     const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
00066                                     &ReferenceName);
00067     if (Name) {
00068       SymbolicOp.AddSymbol.Name = Name;
00069       SymbolicOp.AddSymbol.Present = true;
00070       // If Name is a C++ symbol name put the human readable name in a comment.
00071       if(ReferenceType == LLVMDisassembler_ReferenceType_DeMangled_Name)
00072         cStream << ReferenceName;
00073     }
00074     // For branches always create an MCExpr so it gets printed as hex address.
00075     else if (IsBranch) {
00076       SymbolicOp.Value = Value;
00077     }
00078     if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
00079       cStream << "symbol stub for: " << ReferenceName;
00080     else if(ReferenceType == LLVMDisassembler_ReferenceType_Out_Objc_Message)
00081       cStream << "Objc message: " << ReferenceName;
00082     if (!Name && !IsBranch)
00083       return false;
00084   }
00085 
00086   const MCExpr *Add = nullptr;
00087   if (SymbolicOp.AddSymbol.Present) {
00088     if (SymbolicOp.AddSymbol.Name) {
00089       StringRef Name(SymbolicOp.AddSymbol.Name);
00090       MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
00091       Add = MCSymbolRefExpr::Create(Sym, Ctx);
00092     } else {
00093       Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, Ctx);
00094     }
00095   }
00096 
00097   const MCExpr *Sub = nullptr;
00098   if (SymbolicOp.SubtractSymbol.Present) {
00099       if (SymbolicOp.SubtractSymbol.Name) {
00100       StringRef Name(SymbolicOp.SubtractSymbol.Name);
00101       MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
00102       Sub = MCSymbolRefExpr::Create(Sym, Ctx);
00103     } else {
00104       Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, Ctx);
00105     }
00106   }
00107 
00108   const MCExpr *Off = nullptr;
00109   if (SymbolicOp.Value != 0)
00110     Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx);
00111 
00112   const MCExpr *Expr;
00113   if (Sub) {
00114     const MCExpr *LHS;
00115     if (Add)
00116       LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx);
00117     else
00118       LHS = MCUnaryExpr::CreateMinus(Sub, Ctx);
00119     if (Off)
00120       Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx);
00121     else
00122       Expr = LHS;
00123   } else if (Add) {
00124     if (Off)
00125       Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx);
00126     else
00127       Expr = Add;
00128   } else {
00129     if (Off)
00130       Expr = Off;
00131     else
00132       Expr = MCConstantExpr::Create(0, Ctx);
00133   }
00134 
00135   Expr = RelInfo->createExprForCAPIVariantKind(Expr, SymbolicOp.VariantKind);
00136   if (!Expr)
00137     return false;
00138 
00139   MI.addOperand(MCOperand::CreateExpr(Expr));
00140   return true;
00141 }
00142 
00143 // This function tries to add a comment as to what is being referenced by a load
00144 // instruction with the base register that is the Pc.  These can often be values
00145 // in a literal pool near the Address of the instruction. The Address of the
00146 // instruction and its immediate Value are used as a possible literal pool entry.
00147 // The SymbolLookUp call back will return the name of a symbol referenced by the
00148 // literal pool's entry if the referenced address is that of a symbol. Or it
00149 // will return a pointer to a literal 'C' string if the referenced address of
00150 // the literal pool's entry is an address into a section with C string literals.
00151 // Or if the reference is to an Objective-C data structure it will return a
00152 // specific reference type for it and a string.
00153 void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
00154                                                            int64_t Value,
00155                                                            uint64_t Address) {
00156   if (SymbolLookUp) {
00157     uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
00158     const char *ReferenceName;
00159     (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
00160     if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
00161       cStream << "literal pool symbol address: " << ReferenceName;
00162     else if(ReferenceType ==
00163             LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) {
00164       cStream << "literal pool for: \"";
00165       cStream.write_escaped(ReferenceName);
00166       cStream << "\"";
00167     }
00168     else if(ReferenceType ==
00169             LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
00170       cStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
00171     else if(ReferenceType ==
00172             LLVMDisassembler_ReferenceType_Out_Objc_Message)
00173       cStream << "Objc message: " << ReferenceName;
00174     else if(ReferenceType ==
00175             LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
00176       cStream << "Objc message ref: " << ReferenceName;
00177     else if(ReferenceType ==
00178             LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
00179       cStream << "Objc selector ref: " << ReferenceName;
00180     else if(ReferenceType ==
00181             LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
00182       cStream << "Objc class ref: " << ReferenceName;
00183   }
00184 }
00185 
00186 namespace llvm {
00187 MCSymbolizer *createMCSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
00188                                  LLVMSymbolLookupCallback SymbolLookUp,
00189                                  void *DisInfo,
00190                                  MCContext *Ctx,
00191                                  MCRelocationInfo *RelInfo) {
00192   assert(Ctx && "No MCContext given for symbolic disassembly");
00193 
00194   return new MCExternalSymbolizer(*Ctx,
00195                                   std::unique_ptr<MCRelocationInfo>(RelInfo),
00196                                   GetOpInfo, SymbolLookUp, DisInfo);
00197 }
00198 }