LLVM API Documentation
00001 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file is part of the X86 Disassembler. 00011 // It contains code to translate the data produced by the decoder into 00012 // MCInsts. 00013 // Documentation for the disassembler can be found in X86Disassembler.h. 00014 // 00015 //===----------------------------------------------------------------------===// 00016 00017 #include "X86Disassembler.h" 00018 #include "X86DisassemblerDecoder.h" 00019 #include "llvm/MC/MCContext.h" 00020 #include "llvm/MC/MCDisassembler.h" 00021 #include "llvm/MC/MCExpr.h" 00022 #include "llvm/MC/MCInst.h" 00023 #include "llvm/MC/MCInstrInfo.h" 00024 #include "llvm/MC/MCSubtargetInfo.h" 00025 #include "llvm/Support/Debug.h" 00026 #include "llvm/Support/MemoryObject.h" 00027 #include "llvm/Support/TargetRegistry.h" 00028 #include "llvm/Support/raw_ostream.h" 00029 00030 using namespace llvm; 00031 using namespace llvm::X86Disassembler; 00032 00033 #define DEBUG_TYPE "x86-disassembler" 00034 00035 #define GET_REGINFO_ENUM 00036 #include "X86GenRegisterInfo.inc" 00037 #define GET_INSTRINFO_ENUM 00038 #include "X86GenInstrInfo.inc" 00039 #define GET_SUBTARGETINFO_ENUM 00040 #include "X86GenSubtargetInfo.inc" 00041 00042 void llvm::X86Disassembler::Debug(const char *file, unsigned line, 00043 const char *s) { 00044 dbgs() << file << ":" << line << ": " << s; 00045 } 00046 00047 const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode, 00048 const void *mii) { 00049 const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii); 00050 return MII->getName(Opcode); 00051 } 00052 00053 #define debug(s) DEBUG(Debug(__FILE__, __LINE__, s)); 00054 00055 namespace llvm { 00056 00057 // Fill-ins to make the compiler happy. These constants are never actually 00058 // assigned; they are just filler to make an automatically-generated switch 00059 // statement work. 00060 namespace X86 { 00061 enum { 00062 BX_SI = 500, 00063 BX_DI = 501, 00064 BP_SI = 502, 00065 BP_DI = 503, 00066 sib = 504, 00067 sib64 = 505 00068 }; 00069 } 00070 00071 extern Target TheX86_32Target, TheX86_64Target; 00072 00073 } 00074 00075 static bool translateInstruction(MCInst &target, 00076 InternalInstruction &source, 00077 const MCDisassembler *Dis); 00078 00079 X86GenericDisassembler::X86GenericDisassembler( 00080 const MCSubtargetInfo &STI, 00081 MCContext &Ctx, 00082 std::unique_ptr<const MCInstrInfo> MII) 00083 : MCDisassembler(STI, Ctx), MII(std::move(MII)) { 00084 switch (STI.getFeatureBits() & 00085 (X86::Mode16Bit | X86::Mode32Bit | X86::Mode64Bit)) { 00086 case X86::Mode16Bit: 00087 fMode = MODE_16BIT; 00088 break; 00089 case X86::Mode32Bit: 00090 fMode = MODE_32BIT; 00091 break; 00092 case X86::Mode64Bit: 00093 fMode = MODE_64BIT; 00094 break; 00095 default: 00096 llvm_unreachable("Invalid CPU mode"); 00097 } 00098 } 00099 00100 /// regionReader - a callback function that wraps the readByte method from 00101 /// MemoryObject. 00102 /// 00103 /// @param arg - The generic callback parameter. In this case, this should 00104 /// be a pointer to a MemoryObject. 00105 /// @param byte - A pointer to the byte to be read. 00106 /// @param address - The address to be read. 00107 static int regionReader(const void* arg, uint8_t* byte, uint64_t address) { 00108 const MemoryObject* region = static_cast<const MemoryObject*>(arg); 00109 return region->readByte(address, byte); 00110 } 00111 00112 /// logger - a callback function that wraps the operator<< method from 00113 /// raw_ostream. 00114 /// 00115 /// @param arg - The generic callback parameter. This should be a pointe 00116 /// to a raw_ostream. 00117 /// @param log - A string to be logged. logger() adds a newline. 00118 static void logger(void* arg, const char* log) { 00119 if (!arg) 00120 return; 00121 00122 raw_ostream &vStream = *(static_cast<raw_ostream*>(arg)); 00123 vStream << log << "\n"; 00124 } 00125 00126 // 00127 // Public interface for the disassembler 00128 // 00129 00130 MCDisassembler::DecodeStatus 00131 X86GenericDisassembler::getInstruction(MCInst &instr, 00132 uint64_t &size, 00133 const MemoryObject ®ion, 00134 uint64_t address, 00135 raw_ostream &vStream, 00136 raw_ostream &cStream) const { 00137 CommentStream = &cStream; 00138 00139 InternalInstruction internalInstr; 00140 00141 dlog_t loggerFn = logger; 00142 if (&vStream == &nulls()) 00143 loggerFn = nullptr; // Disable logging completely if it's going to nulls(). 00144 00145 int ret = decodeInstruction(&internalInstr, 00146 regionReader, 00147 (const void*)®ion, 00148 loggerFn, 00149 (void*)&vStream, 00150 (const void*)MII.get(), 00151 address, 00152 fMode); 00153 00154 if (ret) { 00155 size = internalInstr.readerCursor - address; 00156 return Fail; 00157 } 00158 else { 00159 size = internalInstr.length; 00160 return (!translateInstruction(instr, internalInstr, this)) ? 00161 Success : Fail; 00162 } 00163 } 00164 00165 // 00166 // Private code that translates from struct InternalInstructions to MCInsts. 00167 // 00168 00169 /// translateRegister - Translates an internal register to the appropriate LLVM 00170 /// register, and appends it as an operand to an MCInst. 00171 /// 00172 /// @param mcInst - The MCInst to append to. 00173 /// @param reg - The Reg to append. 00174 static void translateRegister(MCInst &mcInst, Reg reg) { 00175 #define ENTRY(x) X86::x, 00176 uint8_t llvmRegnums[] = { 00177 ALL_REGS 00178 0 00179 }; 00180 #undef ENTRY 00181 00182 uint8_t llvmRegnum = llvmRegnums[reg]; 00183 mcInst.addOperand(MCOperand::CreateReg(llvmRegnum)); 00184 } 00185 00186 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the 00187 /// immediate Value in the MCInst. 00188 /// 00189 /// @param Value - The immediate Value, has had any PC adjustment made by 00190 /// the caller. 00191 /// @param isBranch - If the instruction is a branch instruction 00192 /// @param Address - The starting address of the instruction 00193 /// @param Offset - The byte offset to this immediate in the instruction 00194 /// @param Width - The byte width of this immediate in the instruction 00195 /// 00196 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was 00197 /// called then that function is called to get any symbolic information for the 00198 /// immediate in the instruction using the Address, Offset and Width. If that 00199 /// returns non-zero then the symbolic information it returns is used to create 00200 /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo() 00201 /// returns zero and isBranch is true then a symbol look up for immediate Value 00202 /// is done and if a symbol is found an MCExpr is created with that, else 00203 /// an MCExpr with the immediate Value is created. This function returns true 00204 /// if it adds an operand to the MCInst and false otherwise. 00205 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, 00206 uint64_t Address, uint64_t Offset, 00207 uint64_t Width, MCInst &MI, 00208 const MCDisassembler *Dis) { 00209 return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch, 00210 Offset, Width); 00211 } 00212 00213 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being 00214 /// referenced by a load instruction with the base register that is the rip. 00215 /// These can often be addresses in a literal pool. The Address of the 00216 /// instruction and its immediate Value are used to determine the address 00217 /// being referenced in the literal pool entry. The SymbolLookUp call back will 00218 /// return a pointer to a literal 'C' string if the referenced address is an 00219 /// address into a section with 'C' string literals. 00220 static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, 00221 const void *Decoder) { 00222 const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); 00223 Dis->tryAddingPcLoadReferenceComment(Value, Address); 00224 } 00225 00226 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 00227 0, // SEG_OVERRIDE_NONE 00228 X86::CS, 00229 X86::SS, 00230 X86::DS, 00231 X86::ES, 00232 X86::FS, 00233 X86::GS 00234 }; 00235 00236 /// translateSrcIndex - Appends a source index operand to an MCInst. 00237 /// 00238 /// @param mcInst - The MCInst to append to. 00239 /// @param insn - The internal instruction. 00240 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { 00241 unsigned baseRegNo; 00242 00243 if (insn.mode == MODE_64BIT) 00244 baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::RSI; 00245 else if (insn.mode == MODE_32BIT) 00246 baseRegNo = insn.prefixPresent[0x67] ? X86::SI : X86::ESI; 00247 else { 00248 assert(insn.mode == MODE_16BIT); 00249 baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::SI; 00250 } 00251 MCOperand baseReg = MCOperand::CreateReg(baseRegNo); 00252 mcInst.addOperand(baseReg); 00253 00254 MCOperand segmentReg; 00255 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 00256 mcInst.addOperand(segmentReg); 00257 return false; 00258 } 00259 00260 /// translateDstIndex - Appends a destination index operand to an MCInst. 00261 /// 00262 /// @param mcInst - The MCInst to append to. 00263 /// @param insn - The internal instruction. 00264 00265 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { 00266 unsigned baseRegNo; 00267 00268 if (insn.mode == MODE_64BIT) 00269 baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::RDI; 00270 else if (insn.mode == MODE_32BIT) 00271 baseRegNo = insn.prefixPresent[0x67] ? X86::DI : X86::EDI; 00272 else { 00273 assert(insn.mode == MODE_16BIT); 00274 baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::DI; 00275 } 00276 MCOperand baseReg = MCOperand::CreateReg(baseRegNo); 00277 mcInst.addOperand(baseReg); 00278 return false; 00279 } 00280 00281 /// translateImmediate - Appends an immediate operand to an MCInst. 00282 /// 00283 /// @param mcInst - The MCInst to append to. 00284 /// @param immediate - The immediate value to append. 00285 /// @param operand - The operand, as stored in the descriptor table. 00286 /// @param insn - The internal instruction. 00287 static void translateImmediate(MCInst &mcInst, uint64_t immediate, 00288 const OperandSpecifier &operand, 00289 InternalInstruction &insn, 00290 const MCDisassembler *Dis) { 00291 // Sign-extend the immediate if necessary. 00292 00293 OperandType type = (OperandType)operand.type; 00294 00295 bool isBranch = false; 00296 uint64_t pcrel = 0; 00297 if (type == TYPE_RELv) { 00298 isBranch = true; 00299 pcrel = insn.startLocation + 00300 insn.immediateOffset + insn.immediateSize; 00301 switch (insn.displacementSize) { 00302 default: 00303 break; 00304 case 1: 00305 if(immediate & 0x80) 00306 immediate |= ~(0xffull); 00307 break; 00308 case 2: 00309 if(immediate & 0x8000) 00310 immediate |= ~(0xffffull); 00311 break; 00312 case 4: 00313 if(immediate & 0x80000000) 00314 immediate |= ~(0xffffffffull); 00315 break; 00316 case 8: 00317 break; 00318 } 00319 } 00320 // By default sign-extend all X86 immediates based on their encoding. 00321 else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 || 00322 type == TYPE_IMM64 || type == TYPE_IMMv) { 00323 uint32_t Opcode = mcInst.getOpcode(); 00324 switch (operand.encoding) { 00325 default: 00326 break; 00327 case ENCODING_IB: 00328 // Special case those X86 instructions that use the imm8 as a set of 00329 // bits, bit count, etc. and are not sign-extend. 00330 if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri && 00331 Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri && 00332 Opcode != X86::DPPSrri && Opcode != X86::DPPDrri && 00333 Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri && 00334 Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri && 00335 Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri && 00336 Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri && 00337 Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri && 00338 Opcode != X86::VINSERTPSrr) 00339 if(immediate & 0x80) 00340 immediate |= ~(0xffull); 00341 break; 00342 case ENCODING_IW: 00343 if(immediate & 0x8000) 00344 immediate |= ~(0xffffull); 00345 break; 00346 case ENCODING_ID: 00347 if(immediate & 0x80000000) 00348 immediate |= ~(0xffffffffull); 00349 break; 00350 case ENCODING_IO: 00351 break; 00352 } 00353 } 00354 00355 switch (type) { 00356 case TYPE_XMM32: 00357 case TYPE_XMM64: 00358 case TYPE_XMM128: 00359 mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); 00360 return; 00361 case TYPE_XMM256: 00362 mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4))); 00363 return; 00364 case TYPE_XMM512: 00365 mcInst.addOperand(MCOperand::CreateReg(X86::ZMM0 + (immediate >> 4))); 00366 return; 00367 case TYPE_REL8: 00368 isBranch = true; 00369 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; 00370 if(immediate & 0x80) 00371 immediate |= ~(0xffull); 00372 break; 00373 case TYPE_REL32: 00374 case TYPE_REL64: 00375 isBranch = true; 00376 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; 00377 if(immediate & 0x80000000) 00378 immediate |= ~(0xffffffffull); 00379 break; 00380 default: 00381 // operand is 64 bits wide. Do nothing. 00382 break; 00383 } 00384 00385 if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation, 00386 insn.immediateOffset, insn.immediateSize, 00387 mcInst, Dis)) 00388 mcInst.addOperand(MCOperand::CreateImm(immediate)); 00389 00390 if (type == TYPE_MOFFS8 || type == TYPE_MOFFS16 || 00391 type == TYPE_MOFFS32 || type == TYPE_MOFFS64) { 00392 MCOperand segmentReg; 00393 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 00394 mcInst.addOperand(segmentReg); 00395 } 00396 } 00397 00398 /// translateRMRegister - Translates a register stored in the R/M field of the 00399 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. 00400 /// @param mcInst - The MCInst to append to. 00401 /// @param insn - The internal instruction to extract the R/M field 00402 /// from. 00403 /// @return - 0 on success; -1 otherwise 00404 static bool translateRMRegister(MCInst &mcInst, 00405 InternalInstruction &insn) { 00406 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 00407 debug("A R/M register operand may not have a SIB byte"); 00408 return true; 00409 } 00410 00411 switch (insn.eaBase) { 00412 default: 00413 debug("Unexpected EA base register"); 00414 return true; 00415 case EA_BASE_NONE: 00416 debug("EA_BASE_NONE for ModR/M base"); 00417 return true; 00418 #define ENTRY(x) case EA_BASE_##x: 00419 ALL_EA_BASES 00420 #undef ENTRY 00421 debug("A R/M register operand may not have a base; " 00422 "the operand must be a register."); 00423 return true; 00424 #define ENTRY(x) \ 00425 case EA_REG_##x: \ 00426 mcInst.addOperand(MCOperand::CreateReg(X86::x)); break; 00427 ALL_REGS 00428 #undef ENTRY 00429 } 00430 00431 return false; 00432 } 00433 00434 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M 00435 /// fields of an internal instruction (and possibly its SIB byte) to a memory 00436 /// operand in LLVM's format, and appends it to an MCInst. 00437 /// 00438 /// @param mcInst - The MCInst to append to. 00439 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 00440 /// from. 00441 /// @return - 0 on success; nonzero otherwise 00442 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, 00443 const MCDisassembler *Dis) { 00444 // Addresses in an MCInst are represented as five operands: 00445 // 1. basereg (register) The R/M base, or (if there is a SIB) the 00446 // SIB base 00447 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified 00448 // scale amount 00449 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) 00450 // the index (which is multiplied by the 00451 // scale amount) 00452 // 4. displacement (immediate) 0, or the displacement if there is one 00453 // 5. segmentreg (register) x86_registerNONE for now, but could be set 00454 // if we have segment overrides 00455 00456 MCOperand baseReg; 00457 MCOperand scaleAmount; 00458 MCOperand indexReg; 00459 MCOperand displacement; 00460 MCOperand segmentReg; 00461 uint64_t pcrel = 0; 00462 00463 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 00464 if (insn.sibBase != SIB_BASE_NONE) { 00465 switch (insn.sibBase) { 00466 default: 00467 debug("Unexpected sibBase"); 00468 return true; 00469 #define ENTRY(x) \ 00470 case SIB_BASE_##x: \ 00471 baseReg = MCOperand::CreateReg(X86::x); break; 00472 ALL_SIB_BASES 00473 #undef ENTRY 00474 } 00475 } else { 00476 baseReg = MCOperand::CreateReg(0); 00477 } 00478 00479 // Check whether we are handling VSIB addressing mode for GATHER. 00480 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and 00481 // we should use SIB_INDEX_XMM4|YMM4 for VSIB. 00482 // I don't see a way to get the correct IndexReg in readSIB: 00483 // We can tell whether it is VSIB or SIB after instruction ID is decoded, 00484 // but instruction ID may not be decoded yet when calling readSIB. 00485 uint32_t Opcode = mcInst.getOpcode(); 00486 bool IndexIs128 = (Opcode == X86::VGATHERDPDrm || 00487 Opcode == X86::VGATHERDPDYrm || 00488 Opcode == X86::VGATHERQPDrm || 00489 Opcode == X86::VGATHERDPSrm || 00490 Opcode == X86::VGATHERQPSrm || 00491 Opcode == X86::VPGATHERDQrm || 00492 Opcode == X86::VPGATHERDQYrm || 00493 Opcode == X86::VPGATHERQQrm || 00494 Opcode == X86::VPGATHERDDrm || 00495 Opcode == X86::VPGATHERQDrm); 00496 bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm || 00497 Opcode == X86::VGATHERDPSYrm || 00498 Opcode == X86::VGATHERQPSYrm || 00499 Opcode == X86::VGATHERDPDZrm || 00500 Opcode == X86::VPGATHERDQZrm || 00501 Opcode == X86::VPGATHERQQYrm || 00502 Opcode == X86::VPGATHERDDYrm || 00503 Opcode == X86::VPGATHERQDYrm); 00504 bool IndexIs512 = (Opcode == X86::VGATHERQPDZrm || 00505 Opcode == X86::VGATHERDPSZrm || 00506 Opcode == X86::VGATHERQPSZrm || 00507 Opcode == X86::VPGATHERQQZrm || 00508 Opcode == X86::VPGATHERDDZrm || 00509 Opcode == X86::VPGATHERQDZrm); 00510 if (IndexIs128 || IndexIs256 || IndexIs512) { 00511 unsigned IndexOffset = insn.sibIndex - 00512 (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); 00513 SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 : 00514 IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; 00515 insn.sibIndex = (SIBIndex)(IndexBase + 00516 (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); 00517 } 00518 00519 if (insn.sibIndex != SIB_INDEX_NONE) { 00520 switch (insn.sibIndex) { 00521 default: 00522 debug("Unexpected sibIndex"); 00523 return true; 00524 #define ENTRY(x) \ 00525 case SIB_INDEX_##x: \ 00526 indexReg = MCOperand::CreateReg(X86::x); break; 00527 EA_BASES_32BIT 00528 EA_BASES_64BIT 00529 REGS_XMM 00530 REGS_YMM 00531 REGS_ZMM 00532 #undef ENTRY 00533 } 00534 } else { 00535 indexReg = MCOperand::CreateReg(0); 00536 } 00537 00538 scaleAmount = MCOperand::CreateImm(insn.sibScale); 00539 } else { 00540 switch (insn.eaBase) { 00541 case EA_BASE_NONE: 00542 if (insn.eaDisplacement == EA_DISP_NONE) { 00543 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); 00544 return true; 00545 } 00546 if (insn.mode == MODE_64BIT){ 00547 pcrel = insn.startLocation + 00548 insn.displacementOffset + insn.displacementSize; 00549 tryAddingPcLoadReferenceComment(insn.startLocation + 00550 insn.displacementOffset, 00551 insn.displacement + pcrel, Dis); 00552 baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6 00553 } 00554 else 00555 baseReg = MCOperand::CreateReg(0); 00556 00557 indexReg = MCOperand::CreateReg(0); 00558 break; 00559 case EA_BASE_BX_SI: 00560 baseReg = MCOperand::CreateReg(X86::BX); 00561 indexReg = MCOperand::CreateReg(X86::SI); 00562 break; 00563 case EA_BASE_BX_DI: 00564 baseReg = MCOperand::CreateReg(X86::BX); 00565 indexReg = MCOperand::CreateReg(X86::DI); 00566 break; 00567 case EA_BASE_BP_SI: 00568 baseReg = MCOperand::CreateReg(X86::BP); 00569 indexReg = MCOperand::CreateReg(X86::SI); 00570 break; 00571 case EA_BASE_BP_DI: 00572 baseReg = MCOperand::CreateReg(X86::BP); 00573 indexReg = MCOperand::CreateReg(X86::DI); 00574 break; 00575 default: 00576 indexReg = MCOperand::CreateReg(0); 00577 switch (insn.eaBase) { 00578 default: 00579 debug("Unexpected eaBase"); 00580 return true; 00581 // Here, we will use the fill-ins defined above. However, 00582 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and 00583 // sib and sib64 were handled in the top-level if, so they're only 00584 // placeholders to keep the compiler happy. 00585 #define ENTRY(x) \ 00586 case EA_BASE_##x: \ 00587 baseReg = MCOperand::CreateReg(X86::x); break; 00588 ALL_EA_BASES 00589 #undef ENTRY 00590 #define ENTRY(x) case EA_REG_##x: 00591 ALL_REGS 00592 #undef ENTRY 00593 debug("A R/M memory operand may not be a register; " 00594 "the base field must be a base."); 00595 return true; 00596 } 00597 } 00598 00599 scaleAmount = MCOperand::CreateImm(1); 00600 } 00601 00602 displacement = MCOperand::CreateImm(insn.displacement); 00603 00604 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 00605 00606 mcInst.addOperand(baseReg); 00607 mcInst.addOperand(scaleAmount); 00608 mcInst.addOperand(indexReg); 00609 if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false, 00610 insn.startLocation, insn.displacementOffset, 00611 insn.displacementSize, mcInst, Dis)) 00612 mcInst.addOperand(displacement); 00613 mcInst.addOperand(segmentReg); 00614 return false; 00615 } 00616 00617 /// translateRM - Translates an operand stored in the R/M (and possibly SIB) 00618 /// byte of an instruction to LLVM form, and appends it to an MCInst. 00619 /// 00620 /// @param mcInst - The MCInst to append to. 00621 /// @param operand - The operand, as stored in the descriptor table. 00622 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 00623 /// from. 00624 /// @return - 0 on success; nonzero otherwise 00625 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, 00626 InternalInstruction &insn, const MCDisassembler *Dis) { 00627 switch (operand.type) { 00628 default: 00629 debug("Unexpected type for a R/M operand"); 00630 return true; 00631 case TYPE_R8: 00632 case TYPE_R16: 00633 case TYPE_R32: 00634 case TYPE_R64: 00635 case TYPE_Rv: 00636 case TYPE_MM: 00637 case TYPE_MM32: 00638 case TYPE_MM64: 00639 case TYPE_XMM: 00640 case TYPE_XMM32: 00641 case TYPE_XMM64: 00642 case TYPE_XMM128: 00643 case TYPE_XMM256: 00644 case TYPE_XMM512: 00645 case TYPE_VK1: 00646 case TYPE_VK8: 00647 case TYPE_VK16: 00648 case TYPE_DEBUGREG: 00649 case TYPE_CONTROLREG: 00650 return translateRMRegister(mcInst, insn); 00651 case TYPE_M: 00652 case TYPE_M8: 00653 case TYPE_M16: 00654 case TYPE_M32: 00655 case TYPE_M64: 00656 case TYPE_M128: 00657 case TYPE_M256: 00658 case TYPE_M512: 00659 case TYPE_Mv: 00660 case TYPE_M32FP: 00661 case TYPE_M64FP: 00662 case TYPE_M80FP: 00663 case TYPE_M16INT: 00664 case TYPE_M32INT: 00665 case TYPE_M64INT: 00666 case TYPE_M1616: 00667 case TYPE_M1632: 00668 case TYPE_M1664: 00669 case TYPE_LEA: 00670 return translateRMMemory(mcInst, insn, Dis); 00671 } 00672 } 00673 00674 /// translateFPRegister - Translates a stack position on the FPU stack to its 00675 /// LLVM form, and appends it to an MCInst. 00676 /// 00677 /// @param mcInst - The MCInst to append to. 00678 /// @param stackPos - The stack position to translate. 00679 static void translateFPRegister(MCInst &mcInst, 00680 uint8_t stackPos) { 00681 mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos)); 00682 } 00683 00684 /// translateMaskRegister - Translates a 3-bit mask register number to 00685 /// LLVM form, and appends it to an MCInst. 00686 /// 00687 /// @param mcInst - The MCInst to append to. 00688 /// @param maskRegNum - Number of mask register from 0 to 7. 00689 /// @return - false on success; true otherwise. 00690 static bool translateMaskRegister(MCInst &mcInst, 00691 uint8_t maskRegNum) { 00692 if (maskRegNum >= 8) { 00693 debug("Invalid mask register number"); 00694 return true; 00695 } 00696 00697 mcInst.addOperand(MCOperand::CreateReg(X86::K0 + maskRegNum)); 00698 return false; 00699 } 00700 00701 /// translateOperand - Translates an operand stored in an internal instruction 00702 /// to LLVM's format and appends it to an MCInst. 00703 /// 00704 /// @param mcInst - The MCInst to append to. 00705 /// @param operand - The operand, as stored in the descriptor table. 00706 /// @param insn - The internal instruction. 00707 /// @return - false on success; true otherwise. 00708 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, 00709 InternalInstruction &insn, 00710 const MCDisassembler *Dis) { 00711 switch (operand.encoding) { 00712 default: 00713 debug("Unhandled operand encoding during translation"); 00714 return true; 00715 case ENCODING_REG: 00716 translateRegister(mcInst, insn.reg); 00717 return false; 00718 case ENCODING_WRITEMASK: 00719 return translateMaskRegister(mcInst, insn.writemask); 00720 CASE_ENCODING_RM: 00721 return translateRM(mcInst, operand, insn, Dis); 00722 case ENCODING_CB: 00723 case ENCODING_CW: 00724 case ENCODING_CD: 00725 case ENCODING_CP: 00726 case ENCODING_CO: 00727 case ENCODING_CT: 00728 debug("Translation of code offsets isn't supported."); 00729 return true; 00730 case ENCODING_IB: 00731 case ENCODING_IW: 00732 case ENCODING_ID: 00733 case ENCODING_IO: 00734 case ENCODING_Iv: 00735 case ENCODING_Ia: 00736 translateImmediate(mcInst, 00737 insn.immediates[insn.numImmediatesTranslated++], 00738 operand, 00739 insn, 00740 Dis); 00741 return false; 00742 case ENCODING_SI: 00743 return translateSrcIndex(mcInst, insn); 00744 case ENCODING_DI: 00745 return translateDstIndex(mcInst, insn); 00746 case ENCODING_RB: 00747 case ENCODING_RW: 00748 case ENCODING_RD: 00749 case ENCODING_RO: 00750 case ENCODING_Rv: 00751 translateRegister(mcInst, insn.opcodeRegister); 00752 return false; 00753 case ENCODING_FP: 00754 translateFPRegister(mcInst, insn.modRM & 7); 00755 return false; 00756 case ENCODING_VVVV: 00757 translateRegister(mcInst, insn.vvvv); 00758 return false; 00759 case ENCODING_DUP: 00760 return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], 00761 insn, Dis); 00762 } 00763 } 00764 00765 /// translateInstruction - Translates an internal instruction and all its 00766 /// operands to an MCInst. 00767 /// 00768 /// @param mcInst - The MCInst to populate with the instruction's data. 00769 /// @param insn - The internal instruction. 00770 /// @return - false on success; true otherwise. 00771 static bool translateInstruction(MCInst &mcInst, 00772 InternalInstruction &insn, 00773 const MCDisassembler *Dis) { 00774 if (!insn.spec) { 00775 debug("Instruction has no specification"); 00776 return true; 00777 } 00778 00779 mcInst.setOpcode(insn.instructionID); 00780 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 00781 // prefix bytes should be disassembled as xrelease and xacquire then set the 00782 // opcode to those instead of the rep and repne opcodes. 00783 if (insn.xAcquireRelease) { 00784 if(mcInst.getOpcode() == X86::REP_PREFIX) 00785 mcInst.setOpcode(X86::XRELEASE_PREFIX); 00786 else if(mcInst.getOpcode() == X86::REPNE_PREFIX) 00787 mcInst.setOpcode(X86::XACQUIRE_PREFIX); 00788 } 00789 00790 insn.numImmediatesTranslated = 0; 00791 00792 for (const auto &Op : insn.operands) { 00793 if (Op.encoding != ENCODING_NONE) { 00794 if (translateOperand(mcInst, Op, insn, Dis)) { 00795 return true; 00796 } 00797 } 00798 } 00799 00800 return false; 00801 } 00802 00803 static MCDisassembler *createX86Disassembler(const Target &T, 00804 const MCSubtargetInfo &STI, 00805 MCContext &Ctx) { 00806 std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo()); 00807 return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII)); 00808 } 00809 00810 extern "C" void LLVMInitializeX86Disassembler() { 00811 // Register the disassembler. 00812 TargetRegistry::RegisterMCDisassembler(TheX86_32Target, 00813 createX86Disassembler); 00814 TargetRegistry::RegisterMCDisassembler(TheX86_64Target, 00815 createX86Disassembler); 00816 }