LLVM API Documentation

X86Disassembler.cpp
Go to the documentation of this file.
00001 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file is part of the X86 Disassembler.
00011 // It contains code to translate the data produced by the decoder into
00012 //  MCInsts.
00013 // Documentation for the disassembler can be found in X86Disassembler.h.
00014 //
00015 //===----------------------------------------------------------------------===//
00016 
00017 #include "X86Disassembler.h"
00018 #include "X86DisassemblerDecoder.h"
00019 #include "llvm/MC/MCContext.h"
00020 #include "llvm/MC/MCDisassembler.h"
00021 #include "llvm/MC/MCExpr.h"
00022 #include "llvm/MC/MCInst.h"
00023 #include "llvm/MC/MCInstrInfo.h"
00024 #include "llvm/MC/MCSubtargetInfo.h"
00025 #include "llvm/Support/Debug.h"
00026 #include "llvm/Support/MemoryObject.h"
00027 #include "llvm/Support/TargetRegistry.h"
00028 #include "llvm/Support/raw_ostream.h"
00029 
00030 using namespace llvm;
00031 using namespace llvm::X86Disassembler;
00032 
00033 #define DEBUG_TYPE "x86-disassembler"
00034 
00035 #define GET_REGINFO_ENUM
00036 #include "X86GenRegisterInfo.inc"
00037 #define GET_INSTRINFO_ENUM
00038 #include "X86GenInstrInfo.inc"
00039 #define GET_SUBTARGETINFO_ENUM
00040 #include "X86GenSubtargetInfo.inc"
00041 
00042 void llvm::X86Disassembler::Debug(const char *file, unsigned line,
00043                                   const char *s) {
00044   dbgs() << file << ":" << line << ": " << s;
00045 }
00046 
00047 const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode,
00048                                                 const void *mii) {
00049   const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii);
00050   return MII->getName(Opcode);
00051 }
00052 
00053 #define debug(s) DEBUG(Debug(__FILE__, __LINE__, s));
00054 
00055 namespace llvm {  
00056   
00057 // Fill-ins to make the compiler happy.  These constants are never actually
00058 //   assigned; they are just filler to make an automatically-generated switch
00059 //   statement work.
00060 namespace X86 {
00061   enum {
00062     BX_SI = 500,
00063     BX_DI = 501,
00064     BP_SI = 502,
00065     BP_DI = 503,
00066     sib   = 504,
00067     sib64 = 505
00068   };
00069 }
00070 
00071 extern Target TheX86_32Target, TheX86_64Target;
00072 
00073 }
00074 
00075 static bool translateInstruction(MCInst &target,
00076                                 InternalInstruction &source,
00077                                 const MCDisassembler *Dis);
00078 
00079 X86GenericDisassembler::X86GenericDisassembler(
00080                                          const MCSubtargetInfo &STI,
00081                                          MCContext &Ctx,
00082                                          std::unique_ptr<const MCInstrInfo> MII)
00083   : MCDisassembler(STI, Ctx), MII(std::move(MII)) {
00084   switch (STI.getFeatureBits() &
00085           (X86::Mode16Bit | X86::Mode32Bit | X86::Mode64Bit)) {
00086   case X86::Mode16Bit:
00087     fMode = MODE_16BIT;
00088     break;
00089   case X86::Mode32Bit:
00090     fMode = MODE_32BIT;
00091     break;
00092   case X86::Mode64Bit:
00093     fMode = MODE_64BIT;
00094     break;
00095   default:
00096     llvm_unreachable("Invalid CPU mode");
00097   }
00098 }
00099 
00100 /// regionReader - a callback function that wraps the readByte method from
00101 ///   MemoryObject.
00102 ///
00103 /// @param arg      - The generic callback parameter.  In this case, this should
00104 ///                   be a pointer to a MemoryObject.
00105 /// @param byte     - A pointer to the byte to be read.
00106 /// @param address  - The address to be read.
00107 static int regionReader(const void* arg, uint8_t* byte, uint64_t address) {
00108   const MemoryObject* region = static_cast<const MemoryObject*>(arg);
00109   return region->readByte(address, byte);
00110 }
00111 
00112 /// logger - a callback function that wraps the operator<< method from
00113 ///   raw_ostream.
00114 ///
00115 /// @param arg      - The generic callback parameter.  This should be a pointe
00116 ///                   to a raw_ostream.
00117 /// @param log      - A string to be logged.  logger() adds a newline.
00118 static void logger(void* arg, const char* log) {
00119   if (!arg)
00120     return;
00121   
00122   raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
00123   vStream << log << "\n";
00124 }  
00125   
00126 //
00127 // Public interface for the disassembler
00128 //
00129 
00130 MCDisassembler::DecodeStatus
00131 X86GenericDisassembler::getInstruction(MCInst &instr,
00132                                        uint64_t &size,
00133                                        const MemoryObject &region,
00134                                        uint64_t address,
00135                                        raw_ostream &vStream,
00136                                        raw_ostream &cStream) const {
00137   CommentStream = &cStream;
00138 
00139   InternalInstruction internalInstr;
00140 
00141   dlog_t loggerFn = logger;
00142   if (&vStream == &nulls())
00143     loggerFn = nullptr; // Disable logging completely if it's going to nulls().
00144   
00145   int ret = decodeInstruction(&internalInstr,
00146                               regionReader,
00147                               (const void*)&region,
00148                               loggerFn,
00149                               (void*)&vStream,
00150                               (const void*)MII.get(),
00151                               address,
00152                               fMode);
00153 
00154   if (ret) {
00155     size = internalInstr.readerCursor - address;
00156     return Fail;
00157   }
00158   else {
00159     size = internalInstr.length;
00160     return (!translateInstruction(instr, internalInstr, this)) ?
00161             Success : Fail;
00162   }
00163 }
00164 
00165 //
00166 // Private code that translates from struct InternalInstructions to MCInsts.
00167 //
00168 
00169 /// translateRegister - Translates an internal register to the appropriate LLVM
00170 ///   register, and appends it as an operand to an MCInst.
00171 ///
00172 /// @param mcInst     - The MCInst to append to.
00173 /// @param reg        - The Reg to append.
00174 static void translateRegister(MCInst &mcInst, Reg reg) {
00175 #define ENTRY(x) X86::x,
00176   uint8_t llvmRegnums[] = {
00177     ALL_REGS
00178     0
00179   };
00180 #undef ENTRY
00181 
00182   uint8_t llvmRegnum = llvmRegnums[reg];
00183   mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
00184 }
00185 
00186 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
00187 /// immediate Value in the MCInst. 
00188 ///
00189 /// @param Value      - The immediate Value, has had any PC adjustment made by
00190 ///                     the caller.
00191 /// @param isBranch   - If the instruction is a branch instruction
00192 /// @param Address    - The starting address of the instruction
00193 /// @param Offset     - The byte offset to this immediate in the instruction
00194 /// @param Width      - The byte width of this immediate in the instruction
00195 ///
00196 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
00197 /// called then that function is called to get any symbolic information for the
00198 /// immediate in the instruction using the Address, Offset and Width.  If that
00199 /// returns non-zero then the symbolic information it returns is used to create 
00200 /// an MCExpr and that is added as an operand to the MCInst.  If getOpInfo()
00201 /// returns zero and isBranch is true then a symbol look up for immediate Value
00202 /// is done and if a symbol is found an MCExpr is created with that, else
00203 /// an MCExpr with the immediate Value is created.  This function returns true
00204 /// if it adds an operand to the MCInst and false otherwise.
00205 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
00206                                      uint64_t Address, uint64_t Offset,
00207                                      uint64_t Width, MCInst &MI, 
00208                                      const MCDisassembler *Dis) {  
00209   return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch,
00210                                        Offset, Width);
00211 }
00212 
00213 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
00214 /// referenced by a load instruction with the base register that is the rip.
00215 /// These can often be addresses in a literal pool.  The Address of the
00216 /// instruction and its immediate Value are used to determine the address
00217 /// being referenced in the literal pool entry.  The SymbolLookUp call back will
00218 /// return a pointer to a literal 'C' string if the referenced address is an 
00219 /// address into a section with 'C' string literals.
00220 static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,
00221                                             const void *Decoder) {
00222   const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
00223   Dis->tryAddingPcLoadReferenceComment(Value, Address);
00224 }
00225 
00226 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
00227   0,        // SEG_OVERRIDE_NONE
00228   X86::CS,
00229   X86::SS,
00230   X86::DS,
00231   X86::ES,
00232   X86::FS,
00233   X86::GS
00234 };
00235 
00236 /// translateSrcIndex   - Appends a source index operand to an MCInst.
00237 ///
00238 /// @param mcInst       - The MCInst to append to.
00239 /// @param insn         - The internal instruction.
00240 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
00241   unsigned baseRegNo;
00242 
00243   if (insn.mode == MODE_64BIT)
00244     baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::RSI;
00245   else if (insn.mode == MODE_32BIT)
00246     baseRegNo = insn.prefixPresent[0x67] ? X86::SI : X86::ESI;
00247   else {
00248     assert(insn.mode == MODE_16BIT);
00249     baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::SI;
00250   }
00251   MCOperand baseReg = MCOperand::CreateReg(baseRegNo);
00252   mcInst.addOperand(baseReg);
00253 
00254   MCOperand segmentReg;
00255   segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
00256   mcInst.addOperand(segmentReg);
00257   return false;
00258 }
00259 
00260 /// translateDstIndex   - Appends a destination index operand to an MCInst.
00261 ///
00262 /// @param mcInst       - The MCInst to append to.
00263 /// @param insn         - The internal instruction.
00264 
00265 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
00266   unsigned baseRegNo;
00267 
00268   if (insn.mode == MODE_64BIT)
00269     baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::RDI;
00270   else if (insn.mode == MODE_32BIT)
00271     baseRegNo = insn.prefixPresent[0x67] ? X86::DI : X86::EDI;
00272   else {
00273     assert(insn.mode == MODE_16BIT);
00274     baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::DI;
00275   }
00276   MCOperand baseReg = MCOperand::CreateReg(baseRegNo);
00277   mcInst.addOperand(baseReg);
00278   return false;
00279 }
00280 
00281 /// translateImmediate  - Appends an immediate operand to an MCInst.
00282 ///
00283 /// @param mcInst       - The MCInst to append to.
00284 /// @param immediate    - The immediate value to append.
00285 /// @param operand      - The operand, as stored in the descriptor table.
00286 /// @param insn         - The internal instruction.
00287 static void translateImmediate(MCInst &mcInst, uint64_t immediate,
00288                                const OperandSpecifier &operand,
00289                                InternalInstruction &insn,
00290                                const MCDisassembler *Dis) {  
00291   // Sign-extend the immediate if necessary.
00292 
00293   OperandType type = (OperandType)operand.type;
00294 
00295   bool isBranch = false;
00296   uint64_t pcrel = 0;
00297   if (type == TYPE_RELv) {
00298     isBranch = true;
00299     pcrel = insn.startLocation +
00300             insn.immediateOffset + insn.immediateSize;
00301     switch (insn.displacementSize) {
00302     default:
00303       break;
00304     case 1:
00305       if(immediate & 0x80)
00306         immediate |= ~(0xffull);
00307       break;
00308     case 2:
00309       if(immediate & 0x8000)
00310         immediate |= ~(0xffffull);
00311       break;
00312     case 4:
00313       if(immediate & 0x80000000)
00314         immediate |= ~(0xffffffffull);
00315       break;
00316     case 8:
00317       break;
00318     }
00319   }
00320   // By default sign-extend all X86 immediates based on their encoding.
00321   else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 ||
00322            type == TYPE_IMM64 || type == TYPE_IMMv) {
00323     uint32_t Opcode = mcInst.getOpcode();
00324     switch (operand.encoding) {
00325     default:
00326       break;
00327     case ENCODING_IB:
00328       // Special case those X86 instructions that use the imm8 as a set of
00329       // bits, bit count, etc. and are not sign-extend.
00330       if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri &&
00331           Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri &&
00332           Opcode != X86::DPPSrri && Opcode != X86::DPPDrri &&
00333           Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri &&
00334           Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri &&
00335           Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri &&
00336           Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri &&
00337           Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri &&
00338           Opcode != X86::VINSERTPSrr)
00339         if(immediate & 0x80)
00340           immediate |= ~(0xffull);
00341       break;
00342     case ENCODING_IW:
00343       if(immediate & 0x8000)
00344         immediate |= ~(0xffffull);
00345       break;
00346     case ENCODING_ID:
00347       if(immediate & 0x80000000)
00348         immediate |= ~(0xffffffffull);
00349       break;
00350     case ENCODING_IO:
00351       break;
00352     }
00353   }
00354 
00355   switch (type) {
00356   case TYPE_XMM32:
00357   case TYPE_XMM64:
00358   case TYPE_XMM128:
00359     mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
00360     return;
00361   case TYPE_XMM256:
00362     mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4)));
00363     return;
00364   case TYPE_XMM512:
00365     mcInst.addOperand(MCOperand::CreateReg(X86::ZMM0 + (immediate >> 4)));
00366     return;
00367   case TYPE_REL8:
00368     isBranch = true;
00369     pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
00370     if(immediate & 0x80)
00371       immediate |= ~(0xffull);
00372     break;
00373   case TYPE_REL32:
00374   case TYPE_REL64:
00375     isBranch = true;
00376     pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
00377     if(immediate & 0x80000000)
00378       immediate |= ~(0xffffffffull);
00379     break;
00380   default:
00381     // operand is 64 bits wide.  Do nothing.
00382     break;
00383   }
00384 
00385   if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
00386                                insn.immediateOffset, insn.immediateSize,
00387                                mcInst, Dis))
00388     mcInst.addOperand(MCOperand::CreateImm(immediate));
00389 
00390   if (type == TYPE_MOFFS8 || type == TYPE_MOFFS16 ||
00391       type == TYPE_MOFFS32 || type == TYPE_MOFFS64) {
00392     MCOperand segmentReg;
00393     segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
00394     mcInst.addOperand(segmentReg);
00395   }
00396 }
00397 
00398 /// translateRMRegister - Translates a register stored in the R/M field of the
00399 ///   ModR/M byte to its LLVM equivalent and appends it to an MCInst.
00400 /// @param mcInst       - The MCInst to append to.
00401 /// @param insn         - The internal instruction to extract the R/M field
00402 ///                       from.
00403 /// @return             - 0 on success; -1 otherwise
00404 static bool translateRMRegister(MCInst &mcInst,
00405                                 InternalInstruction &insn) {
00406   if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
00407     debug("A R/M register operand may not have a SIB byte");
00408     return true;
00409   }
00410   
00411   switch (insn.eaBase) {
00412   default:
00413     debug("Unexpected EA base register");
00414     return true;
00415   case EA_BASE_NONE:
00416     debug("EA_BASE_NONE for ModR/M base");
00417     return true;
00418 #define ENTRY(x) case EA_BASE_##x:
00419   ALL_EA_BASES
00420 #undef ENTRY
00421     debug("A R/M register operand may not have a base; "
00422           "the operand must be a register.");
00423     return true;
00424 #define ENTRY(x)                                                      \
00425   case EA_REG_##x:                                                    \
00426     mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
00427   ALL_REGS
00428 #undef ENTRY
00429   }
00430   
00431   return false;
00432 }
00433 
00434 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
00435 ///   fields of an internal instruction (and possibly its SIB byte) to a memory
00436 ///   operand in LLVM's format, and appends it to an MCInst.
00437 ///
00438 /// @param mcInst       - The MCInst to append to.
00439 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
00440 ///                       from.
00441 /// @return             - 0 on success; nonzero otherwise
00442 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
00443                               const MCDisassembler *Dis) {  
00444   // Addresses in an MCInst are represented as five operands:
00445   //   1. basereg       (register)  The R/M base, or (if there is a SIB) the 
00446   //                                SIB base
00447   //   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified 
00448   //                                scale amount
00449   //   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB)
00450   //                                the index (which is multiplied by the 
00451   //                                scale amount)
00452   //   4. displacement  (immediate) 0, or the displacement if there is one
00453   //   5. segmentreg    (register)  x86_registerNONE for now, but could be set
00454   //                                if we have segment overrides
00455   
00456   MCOperand baseReg;
00457   MCOperand scaleAmount;
00458   MCOperand indexReg;
00459   MCOperand displacement;
00460   MCOperand segmentReg;
00461   uint64_t pcrel = 0;
00462   
00463   if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
00464     if (insn.sibBase != SIB_BASE_NONE) {
00465       switch (insn.sibBase) {
00466       default:
00467         debug("Unexpected sibBase");
00468         return true;
00469 #define ENTRY(x)                                          \
00470       case SIB_BASE_##x:                                  \
00471         baseReg = MCOperand::CreateReg(X86::x); break;
00472       ALL_SIB_BASES
00473 #undef ENTRY
00474       }
00475     } else {
00476       baseReg = MCOperand::CreateReg(0);
00477     }
00478 
00479     // Check whether we are handling VSIB addressing mode for GATHER.
00480     // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and
00481     // we should use SIB_INDEX_XMM4|YMM4 for VSIB.
00482     // I don't see a way to get the correct IndexReg in readSIB:
00483     //   We can tell whether it is VSIB or SIB after instruction ID is decoded,
00484     //   but instruction ID may not be decoded yet when calling readSIB.
00485     uint32_t Opcode = mcInst.getOpcode();
00486     bool IndexIs128 = (Opcode == X86::VGATHERDPDrm ||
00487                        Opcode == X86::VGATHERDPDYrm ||
00488                        Opcode == X86::VGATHERQPDrm ||
00489                        Opcode == X86::VGATHERDPSrm ||
00490                        Opcode == X86::VGATHERQPSrm ||
00491                        Opcode == X86::VPGATHERDQrm ||
00492                        Opcode == X86::VPGATHERDQYrm ||
00493                        Opcode == X86::VPGATHERQQrm ||
00494                        Opcode == X86::VPGATHERDDrm ||
00495                        Opcode == X86::VPGATHERQDrm);
00496     bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm ||
00497                        Opcode == X86::VGATHERDPSYrm ||
00498                        Opcode == X86::VGATHERQPSYrm ||
00499                        Opcode == X86::VGATHERDPDZrm ||
00500                        Opcode == X86::VPGATHERDQZrm ||
00501                        Opcode == X86::VPGATHERQQYrm ||
00502                        Opcode == X86::VPGATHERDDYrm ||
00503                        Opcode == X86::VPGATHERQDYrm);
00504     bool IndexIs512 = (Opcode == X86::VGATHERQPDZrm ||
00505                        Opcode == X86::VGATHERDPSZrm ||
00506                        Opcode == X86::VGATHERQPSZrm ||
00507                        Opcode == X86::VPGATHERQQZrm ||
00508                        Opcode == X86::VPGATHERDDZrm ||
00509                        Opcode == X86::VPGATHERQDZrm);
00510     if (IndexIs128 || IndexIs256 || IndexIs512) {
00511       unsigned IndexOffset = insn.sibIndex -
00512                          (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
00513       SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 :
00514                            IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
00515       insn.sibIndex = (SIBIndex)(IndexBase + 
00516                            (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
00517     }
00518 
00519     if (insn.sibIndex != SIB_INDEX_NONE) {
00520       switch (insn.sibIndex) {
00521       default:
00522         debug("Unexpected sibIndex");
00523         return true;
00524 #define ENTRY(x)                                          \
00525       case SIB_INDEX_##x:                                 \
00526         indexReg = MCOperand::CreateReg(X86::x); break;
00527       EA_BASES_32BIT
00528       EA_BASES_64BIT
00529       REGS_XMM
00530       REGS_YMM
00531       REGS_ZMM
00532 #undef ENTRY
00533       }
00534     } else {
00535       indexReg = MCOperand::CreateReg(0);
00536     }
00537     
00538     scaleAmount = MCOperand::CreateImm(insn.sibScale);
00539   } else {
00540     switch (insn.eaBase) {
00541     case EA_BASE_NONE:
00542       if (insn.eaDisplacement == EA_DISP_NONE) {
00543         debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
00544         return true;
00545       }
00546       if (insn.mode == MODE_64BIT){
00547         pcrel = insn.startLocation +
00548                 insn.displacementOffset + insn.displacementSize;
00549         tryAddingPcLoadReferenceComment(insn.startLocation +
00550                                         insn.displacementOffset,
00551                                         insn.displacement + pcrel, Dis);
00552         baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
00553       }
00554       else
00555         baseReg = MCOperand::CreateReg(0);
00556       
00557       indexReg = MCOperand::CreateReg(0);
00558       break;
00559     case EA_BASE_BX_SI:
00560       baseReg = MCOperand::CreateReg(X86::BX);
00561       indexReg = MCOperand::CreateReg(X86::SI);
00562       break;
00563     case EA_BASE_BX_DI:
00564       baseReg = MCOperand::CreateReg(X86::BX);
00565       indexReg = MCOperand::CreateReg(X86::DI);
00566       break;
00567     case EA_BASE_BP_SI:
00568       baseReg = MCOperand::CreateReg(X86::BP);
00569       indexReg = MCOperand::CreateReg(X86::SI);
00570       break;
00571     case EA_BASE_BP_DI:
00572       baseReg = MCOperand::CreateReg(X86::BP);
00573       indexReg = MCOperand::CreateReg(X86::DI);
00574       break;
00575     default:
00576       indexReg = MCOperand::CreateReg(0);
00577       switch (insn.eaBase) {
00578       default:
00579         debug("Unexpected eaBase");
00580         return true;
00581         // Here, we will use the fill-ins defined above.  However,
00582         //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
00583         //   sib and sib64 were handled in the top-level if, so they're only
00584         //   placeholders to keep the compiler happy.
00585 #define ENTRY(x)                                        \
00586       case EA_BASE_##x:                                 \
00587         baseReg = MCOperand::CreateReg(X86::x); break; 
00588       ALL_EA_BASES
00589 #undef ENTRY
00590 #define ENTRY(x) case EA_REG_##x:
00591       ALL_REGS
00592 #undef ENTRY
00593         debug("A R/M memory operand may not be a register; "
00594               "the base field must be a base.");
00595         return true;
00596       }
00597     }
00598     
00599     scaleAmount = MCOperand::CreateImm(1);
00600   }
00601   
00602   displacement = MCOperand::CreateImm(insn.displacement);
00603 
00604   segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
00605   
00606   mcInst.addOperand(baseReg);
00607   mcInst.addOperand(scaleAmount);
00608   mcInst.addOperand(indexReg);
00609   if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
00610                                insn.startLocation, insn.displacementOffset,
00611                                insn.displacementSize, mcInst, Dis))
00612     mcInst.addOperand(displacement);
00613   mcInst.addOperand(segmentReg);
00614   return false;
00615 }
00616 
00617 /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
00618 ///   byte of an instruction to LLVM form, and appends it to an MCInst.
00619 ///
00620 /// @param mcInst       - The MCInst to append to.
00621 /// @param operand      - The operand, as stored in the descriptor table.
00622 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
00623 ///                       from.
00624 /// @return             - 0 on success; nonzero otherwise
00625 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
00626                         InternalInstruction &insn, const MCDisassembler *Dis) {  
00627   switch (operand.type) {
00628   default:
00629     debug("Unexpected type for a R/M operand");
00630     return true;
00631   case TYPE_R8:
00632   case TYPE_R16:
00633   case TYPE_R32:
00634   case TYPE_R64:
00635   case TYPE_Rv:
00636   case TYPE_MM:
00637   case TYPE_MM32:
00638   case TYPE_MM64:
00639   case TYPE_XMM:
00640   case TYPE_XMM32:
00641   case TYPE_XMM64:
00642   case TYPE_XMM128:
00643   case TYPE_XMM256:
00644   case TYPE_XMM512:
00645   case TYPE_VK1:
00646   case TYPE_VK8:
00647   case TYPE_VK16:
00648   case TYPE_DEBUGREG:
00649   case TYPE_CONTROLREG:
00650     return translateRMRegister(mcInst, insn);
00651   case TYPE_M:
00652   case TYPE_M8:
00653   case TYPE_M16:
00654   case TYPE_M32:
00655   case TYPE_M64:
00656   case TYPE_M128:
00657   case TYPE_M256:
00658   case TYPE_M512:
00659   case TYPE_Mv:
00660   case TYPE_M32FP:
00661   case TYPE_M64FP:
00662   case TYPE_M80FP:
00663   case TYPE_M16INT:
00664   case TYPE_M32INT:
00665   case TYPE_M64INT:
00666   case TYPE_M1616:
00667   case TYPE_M1632:
00668   case TYPE_M1664:
00669   case TYPE_LEA:
00670     return translateRMMemory(mcInst, insn, Dis);
00671   }
00672 }
00673   
00674 /// translateFPRegister - Translates a stack position on the FPU stack to its
00675 ///   LLVM form, and appends it to an MCInst.
00676 ///
00677 /// @param mcInst       - The MCInst to append to.
00678 /// @param stackPos     - The stack position to translate.
00679 static void translateFPRegister(MCInst &mcInst,
00680                                 uint8_t stackPos) {
00681   mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
00682 }
00683 
00684 /// translateMaskRegister - Translates a 3-bit mask register number to
00685 ///   LLVM form, and appends it to an MCInst.
00686 ///
00687 /// @param mcInst       - The MCInst to append to.
00688 /// @param maskRegNum   - Number of mask register from 0 to 7.
00689 /// @return             - false on success; true otherwise.
00690 static bool translateMaskRegister(MCInst &mcInst,
00691                                 uint8_t maskRegNum) {
00692   if (maskRegNum >= 8) {
00693     debug("Invalid mask register number");
00694     return true;
00695   }
00696 
00697   mcInst.addOperand(MCOperand::CreateReg(X86::K0 + maskRegNum));
00698   return false;
00699 }
00700 
00701 /// translateOperand - Translates an operand stored in an internal instruction 
00702 ///   to LLVM's format and appends it to an MCInst.
00703 ///
00704 /// @param mcInst       - The MCInst to append to.
00705 /// @param operand      - The operand, as stored in the descriptor table.
00706 /// @param insn         - The internal instruction.
00707 /// @return             - false on success; true otherwise.
00708 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
00709                              InternalInstruction &insn,
00710                              const MCDisassembler *Dis) {  
00711   switch (operand.encoding) {
00712   default:
00713     debug("Unhandled operand encoding during translation");
00714     return true;
00715   case ENCODING_REG:
00716     translateRegister(mcInst, insn.reg);
00717     return false;
00718   case ENCODING_WRITEMASK:
00719     return translateMaskRegister(mcInst, insn.writemask);
00720   CASE_ENCODING_RM:
00721     return translateRM(mcInst, operand, insn, Dis);
00722   case ENCODING_CB:
00723   case ENCODING_CW:
00724   case ENCODING_CD:
00725   case ENCODING_CP:
00726   case ENCODING_CO:
00727   case ENCODING_CT:
00728     debug("Translation of code offsets isn't supported.");
00729     return true;
00730   case ENCODING_IB:
00731   case ENCODING_IW:
00732   case ENCODING_ID:
00733   case ENCODING_IO:
00734   case ENCODING_Iv:
00735   case ENCODING_Ia:
00736     translateImmediate(mcInst,
00737                        insn.immediates[insn.numImmediatesTranslated++],
00738                        operand,
00739                        insn,
00740                        Dis);
00741     return false;
00742   case ENCODING_SI:
00743     return translateSrcIndex(mcInst, insn);
00744   case ENCODING_DI:
00745     return translateDstIndex(mcInst, insn);
00746   case ENCODING_RB:
00747   case ENCODING_RW:
00748   case ENCODING_RD:
00749   case ENCODING_RO:
00750   case ENCODING_Rv:
00751     translateRegister(mcInst, insn.opcodeRegister);
00752     return false;
00753   case ENCODING_FP:
00754     translateFPRegister(mcInst, insn.modRM & 7);
00755     return false;
00756   case ENCODING_VVVV:
00757     translateRegister(mcInst, insn.vvvv);
00758     return false;
00759   case ENCODING_DUP:
00760     return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
00761                             insn, Dis);
00762   }
00763 }
00764   
00765 /// translateInstruction - Translates an internal instruction and all its
00766 ///   operands to an MCInst.
00767 ///
00768 /// @param mcInst       - The MCInst to populate with the instruction's data.
00769 /// @param insn         - The internal instruction.
00770 /// @return             - false on success; true otherwise.
00771 static bool translateInstruction(MCInst &mcInst,
00772                                 InternalInstruction &insn,
00773                                 const MCDisassembler *Dis) {  
00774   if (!insn.spec) {
00775     debug("Instruction has no specification");
00776     return true;
00777   }
00778   
00779   mcInst.setOpcode(insn.instructionID);
00780   // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
00781   // prefix bytes should be disassembled as xrelease and xacquire then set the
00782   // opcode to those instead of the rep and repne opcodes.
00783   if (insn.xAcquireRelease) {
00784     if(mcInst.getOpcode() == X86::REP_PREFIX)
00785       mcInst.setOpcode(X86::XRELEASE_PREFIX);
00786     else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
00787       mcInst.setOpcode(X86::XACQUIRE_PREFIX);
00788   }
00789   
00790   insn.numImmediatesTranslated = 0;
00791   
00792   for (const auto &Op : insn.operands) {
00793     if (Op.encoding != ENCODING_NONE) {
00794       if (translateOperand(mcInst, Op, insn, Dis)) {
00795         return true;
00796       }
00797     }
00798   }
00799   
00800   return false;
00801 }
00802 
00803 static MCDisassembler *createX86Disassembler(const Target &T,
00804                                              const MCSubtargetInfo &STI,
00805                                              MCContext &Ctx) {
00806   std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
00807   return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII));
00808 }
00809 
00810 extern "C" void LLVMInitializeX86Disassembler() { 
00811   // Register the disassembler.
00812   TargetRegistry::RegisterMCDisassembler(TheX86_32Target, 
00813                                          createX86Disassembler);
00814   TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
00815                                          createX86Disassembler);
00816 }