LLVM API Documentation

AArch64ExternalSymbolizer.cpp
Go to the documentation of this file.
00001 //===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 
00010 #include "AArch64ExternalSymbolizer.h"
00011 #include "AArch64Subtarget.h"
00012 #include "MCTargetDesc/AArch64AddressingModes.h"
00013 #include "Utils/AArch64BaseInfo.h"
00014 #include "llvm/MC/MCContext.h"
00015 #include "llvm/MC/MCExpr.h"
00016 #include "llvm/MC/MCInst.h"
00017 #include "llvm/Support/Format.h"
00018 #include "llvm/Support/raw_ostream.h"
00019 
00020 using namespace llvm;
00021 
00022 #define DEBUG_TYPE "aarch64-disassembler"
00023 
00024 static MCSymbolRefExpr::VariantKind
00025 getVariant(uint64_t LLVMDisassembler_VariantKind) {
00026   switch (LLVMDisassembler_VariantKind) {
00027   case LLVMDisassembler_VariantKind_None:
00028     return MCSymbolRefExpr::VK_None;
00029   case LLVMDisassembler_VariantKind_ARM64_PAGE:
00030     return MCSymbolRefExpr::VK_PAGE;
00031   case LLVMDisassembler_VariantKind_ARM64_PAGEOFF:
00032     return MCSymbolRefExpr::VK_PAGEOFF;
00033   case LLVMDisassembler_VariantKind_ARM64_GOTPAGE:
00034     return MCSymbolRefExpr::VK_GOTPAGE;
00035   case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
00036     return MCSymbolRefExpr::VK_GOTPAGEOFF;
00037   case LLVMDisassembler_VariantKind_ARM64_TLVP:
00038   case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
00039   default:
00040     llvm_unreachable("bad LLVMDisassembler_VariantKind");
00041   }
00042 }
00043 
00044 /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
00045 /// operand in place of the immediate Value in the MCInst.  The immediate
00046 /// Value has not had any PC adjustment made by the caller. If the instruction
00047 /// is a branch that adds the PC to the immediate Value then isBranch is
00048 /// Success, else Fail. If GetOpInfo is non-null, then it is called to get any
00049 /// symbolic information at the Address for this instrution.  If that returns
00050 /// non-zero then the symbolic information it returns is used to create an
00051 /// MCExpr and that is added as an operand to the MCInst.  If GetOpInfo()
00052 /// returns zero and isBranch is Success then a symbol look up for
00053 /// Address + Value is done and if a symbol is found an MCExpr is created with
00054 /// that, else an MCExpr with Address + Value is created.  If GetOpInfo()
00055 /// returns zero and isBranch is Fail then the the Opcode of the MCInst is
00056 /// tested and for ADRP an other instructions that help to load of pointers
00057 /// a symbol look up is done to see it is returns a specific reference type
00058 /// to add to the comment stream.  This function returns Success if it adds
00059 /// an operand to the MCInst and Fail otherwise.
00060 bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
00061     MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
00062     bool IsBranch, uint64_t Offset, uint64_t InstSize) {
00063   // FIXME: This method shares a lot of code with
00064   //        MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible
00065   //        refactor the MCExternalSymbolizer interface to allow more of this
00066   //        implementation to be shared.
00067   //
00068   struct LLVMOpInfo1 SymbolicOp;
00069   memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
00070   SymbolicOp.Value = Value;
00071   uint64_t ReferenceType;
00072   const char *ReferenceName;
00073   if (!GetOpInfo ||
00074       !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
00075     if (IsBranch) {
00076       ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
00077       const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
00078                                       Address, &ReferenceName);
00079       if (Name) {
00080         SymbolicOp.AddSymbol.Name = Name;
00081         SymbolicOp.AddSymbol.Present = true;
00082         SymbolicOp.Value = 0;
00083       } else {
00084         SymbolicOp.Value = Address + Value;
00085       }
00086       if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
00087         CommentStream << "symbol stub for: " << ReferenceName;
00088       else if (ReferenceType ==
00089                LLVMDisassembler_ReferenceType_Out_Objc_Message)
00090         CommentStream << "Objc message: " << ReferenceName;
00091     } else if (MI.getOpcode() == AArch64::ADRP) {
00092         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP;
00093         // otool expects the fully encoded ADRP instruction to be passed in as
00094         // the value here, so reconstruct it:
00095         const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
00096         uint32_t EncodedInst = 0x90000000;
00097         EncodedInst |= (Value & 0x3) << 29; // immlo
00098         EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi
00099         EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg
00100         SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
00101                      &ReferenceName);
00102         CommentStream << format("0x%llx",
00103                                 0xfffffffffffff000LL & (Address + Value));
00104     } else if (MI.getOpcode() == AArch64::ADDXri ||
00105                MI.getOpcode() == AArch64::LDRXui ||
00106                MI.getOpcode() == AArch64::LDRXl ||
00107                MI.getOpcode() == AArch64::ADR) {
00108       if (MI.getOpcode() == AArch64::ADDXri)
00109         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
00110       else if (MI.getOpcode() == AArch64::LDRXui)
00111         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
00112       if (MI.getOpcode() == AArch64::LDRXl) {
00113         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
00114         SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
00115                      &ReferenceName);
00116       } else if (MI.getOpcode() == AArch64::ADR) {
00117         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR;
00118         SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
00119                             &ReferenceName);
00120       } else {
00121         const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
00122         // otool expects the fully encoded ADD/LDR instruction to be passed in
00123         // as the value here, so reconstruct it:
00124         unsigned EncodedInst =
00125           MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000;
00126         EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD]
00127         EncodedInst |=
00128           MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn
00129         EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd
00130 
00131         SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
00132                      &ReferenceName);
00133       }
00134       if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
00135         CommentStream << "literal pool symbol address: " << ReferenceName;
00136       else if (ReferenceType ==
00137                LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
00138         CommentStream << "literal pool for: \"" << ReferenceName << "\"";
00139       else if (ReferenceType ==
00140                LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
00141         CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
00142       else if (ReferenceType ==
00143                LLVMDisassembler_ReferenceType_Out_Objc_Message)
00144         CommentStream << "Objc message: " << ReferenceName;
00145       else if (ReferenceType ==
00146                LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
00147         CommentStream << "Objc message ref: " << ReferenceName;
00148       else if (ReferenceType ==
00149                LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
00150         CommentStream << "Objc selector ref: " << ReferenceName;
00151       else if (ReferenceType ==
00152                LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
00153         CommentStream << "Objc class ref: " << ReferenceName;
00154       // For these instructions, the SymbolLookUp() above is just to get the
00155       // ReferenceType and ReferenceName.  We want to make sure not to
00156       // fall through so we don't build an MCExpr to leave the disassembly
00157       // of the immediate values of these instructions to the InstPrinter.
00158       return false;
00159     } else {
00160       return false;
00161     }
00162   }
00163 
00164   const MCExpr *Add = nullptr;
00165   if (SymbolicOp.AddSymbol.Present) {
00166     if (SymbolicOp.AddSymbol.Name) {
00167       StringRef Name(SymbolicOp.AddSymbol.Name);
00168       MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
00169       MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind);
00170       if (Variant != MCSymbolRefExpr::VK_None)
00171         Add = MCSymbolRefExpr::Create(Sym, Variant, Ctx);
00172       else
00173         Add = MCSymbolRefExpr::Create(Sym, Ctx);
00174     } else {
00175       Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, Ctx);
00176     }
00177   }
00178 
00179   const MCExpr *Sub = nullptr;
00180   if (SymbolicOp.SubtractSymbol.Present) {
00181     if (SymbolicOp.SubtractSymbol.Name) {
00182       StringRef Name(SymbolicOp.SubtractSymbol.Name);
00183       MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
00184       Sub = MCSymbolRefExpr::Create(Sym, Ctx);
00185     } else {
00186       Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, Ctx);
00187     }
00188   }
00189 
00190   const MCExpr *Off = nullptr;
00191   if (SymbolicOp.Value != 0)
00192     Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx);
00193 
00194   const MCExpr *Expr;
00195   if (Sub) {
00196     const MCExpr *LHS;
00197     if (Add)
00198       LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx);
00199     else
00200       LHS = MCUnaryExpr::CreateMinus(Sub, Ctx);
00201     if (Off)
00202       Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx);
00203     else
00204       Expr = LHS;
00205   } else if (Add) {
00206     if (Off)
00207       Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx);
00208     else
00209       Expr = Add;
00210   } else {
00211     if (Off)
00212       Expr = Off;
00213     else
00214       Expr = MCConstantExpr::Create(0, Ctx);
00215   }
00216 
00217   MI.addOperand(MCOperand::CreateExpr(Expr));
00218 
00219   return true;
00220 }