LLVM API Documentation
00001 //===-- X86DisassemblerDecoder.c - Disassembler decoder -------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file is part of the X86 Disassembler. 00011 // It contains the implementation of the instruction decoder. 00012 // Documentation for the disassembler can be found in X86Disassembler.h. 00013 // 00014 //===----------------------------------------------------------------------===// 00015 00016 #include <stdarg.h> /* for va_*() */ 00017 #include <stdio.h> /* for vsnprintf() */ 00018 #include <stdlib.h> /* for exit() */ 00019 #include <string.h> /* for memset() */ 00020 00021 #include "X86DisassemblerDecoder.h" 00022 00023 using namespace llvm::X86Disassembler; 00024 00025 /// Specifies whether a ModR/M byte is needed and (if so) which 00026 /// instruction each possible value of the ModR/M byte corresponds to. Once 00027 /// this information is known, we have narrowed down to a single instruction. 00028 struct ModRMDecision { 00029 uint8_t modrm_type; 00030 uint16_t instructionIDs; 00031 }; 00032 00033 /// Specifies which set of ModR/M->instruction tables to look at 00034 /// given a particular opcode. 00035 struct OpcodeDecision { 00036 ModRMDecision modRMDecisions[256]; 00037 }; 00038 00039 /// Specifies which opcode->instruction tables to look at given 00040 /// a particular context (set of attributes). Since there are many possible 00041 /// contexts, the decoder first uses CONTEXTS_SYM to determine which context 00042 /// applies given a specific set of attributes. Hence there are only IC_max 00043 /// entries in this table, rather than 2^(ATTR_max). 00044 struct ContextDecision { 00045 OpcodeDecision opcodeDecisions[IC_max]; 00046 }; 00047 00048 #include "X86GenDisassemblerTables.inc" 00049 00050 #ifndef NDEBUG 00051 #define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0) 00052 #else 00053 #define debug(s) do { } while (0) 00054 #endif 00055 00056 00057 /* 00058 * contextForAttrs - Client for the instruction context table. Takes a set of 00059 * attributes and returns the appropriate decode context. 00060 * 00061 * @param attrMask - Attributes, from the enumeration attributeBits. 00062 * @return - The InstructionContext to use when looking up an 00063 * an instruction with these attributes. 00064 */ 00065 static InstructionContext contextForAttrs(uint16_t attrMask) { 00066 return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]); 00067 } 00068 00069 /* 00070 * modRMRequired - Reads the appropriate instruction table to determine whether 00071 * the ModR/M byte is required to decode a particular instruction. 00072 * 00073 * @param type - The opcode type (i.e., how many bytes it has). 00074 * @param insnContext - The context for the instruction, as returned by 00075 * contextForAttrs. 00076 * @param opcode - The last byte of the instruction's opcode, not counting 00077 * ModR/M extensions and escapes. 00078 * @return - true if the ModR/M byte is required, false otherwise. 00079 */ 00080 static int modRMRequired(OpcodeType type, 00081 InstructionContext insnContext, 00082 uint16_t opcode) { 00083 const struct ContextDecision* decision = nullptr; 00084 00085 switch (type) { 00086 case ONEBYTE: 00087 decision = &ONEBYTE_SYM; 00088 break; 00089 case TWOBYTE: 00090 decision = &TWOBYTE_SYM; 00091 break; 00092 case THREEBYTE_38: 00093 decision = &THREEBYTE38_SYM; 00094 break; 00095 case THREEBYTE_3A: 00096 decision = &THREEBYTE3A_SYM; 00097 break; 00098 case XOP8_MAP: 00099 decision = &XOP8_MAP_SYM; 00100 break; 00101 case XOP9_MAP: 00102 decision = &XOP9_MAP_SYM; 00103 break; 00104 case XOPA_MAP: 00105 decision = &XOPA_MAP_SYM; 00106 break; 00107 } 00108 00109 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. 00110 modrm_type != MODRM_ONEENTRY; 00111 } 00112 00113 /* 00114 * decode - Reads the appropriate instruction table to obtain the unique ID of 00115 * an instruction. 00116 * 00117 * @param type - See modRMRequired(). 00118 * @param insnContext - See modRMRequired(). 00119 * @param opcode - See modRMRequired(). 00120 * @param modRM - The ModR/M byte if required, or any value if not. 00121 * @return - The UID of the instruction, or 0 on failure. 00122 */ 00123 static InstrUID decode(OpcodeType type, 00124 InstructionContext insnContext, 00125 uint8_t opcode, 00126 uint8_t modRM) { 00127 const struct ModRMDecision* dec = nullptr; 00128 00129 switch (type) { 00130 case ONEBYTE: 00131 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 00132 break; 00133 case TWOBYTE: 00134 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 00135 break; 00136 case THREEBYTE_38: 00137 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 00138 break; 00139 case THREEBYTE_3A: 00140 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 00141 break; 00142 case XOP8_MAP: 00143 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 00144 break; 00145 case XOP9_MAP: 00146 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 00147 break; 00148 case XOPA_MAP: 00149 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 00150 break; 00151 } 00152 00153 switch (dec->modrm_type) { 00154 default: 00155 debug("Corrupt table! Unknown modrm_type"); 00156 return 0; 00157 case MODRM_ONEENTRY: 00158 return modRMTable[dec->instructionIDs]; 00159 case MODRM_SPLITRM: 00160 if (modFromModRM(modRM) == 0x3) 00161 return modRMTable[dec->instructionIDs+1]; 00162 return modRMTable[dec->instructionIDs]; 00163 case MODRM_SPLITREG: 00164 if (modFromModRM(modRM) == 0x3) 00165 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; 00166 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 00167 case MODRM_SPLITMISC: 00168 if (modFromModRM(modRM) == 0x3) 00169 return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; 00170 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 00171 case MODRM_FULL: 00172 return modRMTable[dec->instructionIDs+modRM]; 00173 } 00174 } 00175 00176 /* 00177 * specifierForUID - Given a UID, returns the name and operand specification for 00178 * that instruction. 00179 * 00180 * @param uid - The unique ID for the instruction. This should be returned by 00181 * decode(); specifierForUID will not check bounds. 00182 * @return - A pointer to the specification for that instruction. 00183 */ 00184 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { 00185 return &INSTRUCTIONS_SYM[uid]; 00186 } 00187 00188 /* 00189 * consumeByte - Uses the reader function provided by the user to consume one 00190 * byte from the instruction's memory and advance the cursor. 00191 * 00192 * @param insn - The instruction with the reader function to use. The cursor 00193 * for this instruction is advanced. 00194 * @param byte - A pointer to a pre-allocated memory buffer to be populated 00195 * with the data read. 00196 * @return - 0 if the read was successful; nonzero otherwise. 00197 */ 00198 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { 00199 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); 00200 00201 if (!ret) 00202 ++(insn->readerCursor); 00203 00204 return ret; 00205 } 00206 00207 /* 00208 * lookAtByte - Like consumeByte, but does not advance the cursor. 00209 * 00210 * @param insn - See consumeByte(). 00211 * @param byte - See consumeByte(). 00212 * @return - See consumeByte(). 00213 */ 00214 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { 00215 return insn->reader(insn->readerArg, byte, insn->readerCursor); 00216 } 00217 00218 static void unconsumeByte(struct InternalInstruction* insn) { 00219 insn->readerCursor--; 00220 } 00221 00222 #define CONSUME_FUNC(name, type) \ 00223 static int name(struct InternalInstruction* insn, type* ptr) { \ 00224 type combined = 0; \ 00225 unsigned offset; \ 00226 for (offset = 0; offset < sizeof(type); ++offset) { \ 00227 uint8_t byte; \ 00228 int ret = insn->reader(insn->readerArg, \ 00229 &byte, \ 00230 insn->readerCursor + offset); \ 00231 if (ret) \ 00232 return ret; \ 00233 combined = combined | ((uint64_t)byte << (offset * 8)); \ 00234 } \ 00235 *ptr = combined; \ 00236 insn->readerCursor += sizeof(type); \ 00237 return 0; \ 00238 } 00239 00240 /* 00241 * consume* - Use the reader function provided by the user to consume data 00242 * values of various sizes from the instruction's memory and advance the 00243 * cursor appropriately. These readers perform endian conversion. 00244 * 00245 * @param insn - See consumeByte(). 00246 * @param ptr - A pointer to a pre-allocated memory of appropriate size to 00247 * be populated with the data read. 00248 * @return - See consumeByte(). 00249 */ 00250 CONSUME_FUNC(consumeInt8, int8_t) 00251 CONSUME_FUNC(consumeInt16, int16_t) 00252 CONSUME_FUNC(consumeInt32, int32_t) 00253 CONSUME_FUNC(consumeUInt16, uint16_t) 00254 CONSUME_FUNC(consumeUInt32, uint32_t) 00255 CONSUME_FUNC(consumeUInt64, uint64_t) 00256 00257 /* 00258 * dbgprintf - Uses the logging function provided by the user to log a single 00259 * message, typically without a carriage-return. 00260 * 00261 * @param insn - The instruction containing the logging function. 00262 * @param format - See printf(). 00263 * @param ... - See printf(). 00264 */ 00265 static void dbgprintf(struct InternalInstruction* insn, 00266 const char* format, 00267 ...) { 00268 char buffer[256]; 00269 va_list ap; 00270 00271 if (!insn->dlog) 00272 return; 00273 00274 va_start(ap, format); 00275 (void)vsnprintf(buffer, sizeof(buffer), format, ap); 00276 va_end(ap); 00277 00278 insn->dlog(insn->dlogArg, buffer); 00279 00280 return; 00281 } 00282 00283 /* 00284 * setPrefixPresent - Marks that a particular prefix is present at a particular 00285 * location. 00286 * 00287 * @param insn - The instruction to be marked as having the prefix. 00288 * @param prefix - The prefix that is present. 00289 * @param location - The location where the prefix is located (in the address 00290 * space of the instruction's reader). 00291 */ 00292 static void setPrefixPresent(struct InternalInstruction* insn, 00293 uint8_t prefix, 00294 uint64_t location) 00295 { 00296 insn->prefixPresent[prefix] = 1; 00297 insn->prefixLocations[prefix] = location; 00298 } 00299 00300 /* 00301 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is 00302 * present at a given location. 00303 * 00304 * @param insn - The instruction to be queried. 00305 * @param prefix - The prefix. 00306 * @param location - The location to query. 00307 * @return - Whether the prefix is at that location. 00308 */ 00309 static bool isPrefixAtLocation(struct InternalInstruction* insn, 00310 uint8_t prefix, 00311 uint64_t location) 00312 { 00313 if (insn->prefixPresent[prefix] == 1 && 00314 insn->prefixLocations[prefix] == location) 00315 return true; 00316 else 00317 return false; 00318 } 00319 00320 /* 00321 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the 00322 * instruction as having them. Also sets the instruction's default operand, 00323 * address, and other relevant data sizes to report operands correctly. 00324 * 00325 * @param insn - The instruction whose prefixes are to be read. 00326 * @return - 0 if the instruction could be read until the end of the prefix 00327 * bytes, and no prefixes conflicted; nonzero otherwise. 00328 */ 00329 static int readPrefixes(struct InternalInstruction* insn) { 00330 bool isPrefix = true; 00331 bool prefixGroups[4] = { false }; 00332 uint64_t prefixLocation; 00333 uint8_t byte = 0; 00334 uint8_t nextByte; 00335 00336 bool hasAdSize = false; 00337 bool hasOpSize = false; 00338 00339 dbgprintf(insn, "readPrefixes()"); 00340 00341 while (isPrefix) { 00342 prefixLocation = insn->readerCursor; 00343 00344 /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */ 00345 if (consumeByte(insn, &byte)) 00346 break; 00347 00348 /* 00349 * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then 00350 * break and let it be disassembled as a normal "instruction". 00351 */ 00352 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) 00353 break; 00354 00355 if (insn->readerCursor - 1 == insn->startLocation 00356 && (byte == 0xf2 || byte == 0xf3) 00357 && !lookAtByte(insn, &nextByte)) 00358 { 00359 /* 00360 * If the byte is 0xf2 or 0xf3, and any of the following conditions are 00361 * met: 00362 * - it is followed by a LOCK (0xf0) prefix 00363 * - it is followed by an xchg instruction 00364 * then it should be disassembled as a xacquire/xrelease not repne/rep. 00365 */ 00366 if ((byte == 0xf2 || byte == 0xf3) && 00367 ((nextByte == 0xf0) | 00368 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) 00369 insn->xAcquireRelease = true; 00370 /* 00371 * Also if the byte is 0xf3, and the following condition is met: 00372 * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or 00373 * "mov mem, imm" (opcode 0xc6/0xc7) instructions. 00374 * then it should be disassembled as an xrelease not rep. 00375 */ 00376 if (byte == 0xf3 && 00377 (nextByte == 0x88 || nextByte == 0x89 || 00378 nextByte == 0xc6 || nextByte == 0xc7)) 00379 insn->xAcquireRelease = true; 00380 if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { 00381 if (consumeByte(insn, &nextByte)) 00382 return -1; 00383 if (lookAtByte(insn, &nextByte)) 00384 return -1; 00385 unconsumeByte(insn); 00386 } 00387 if (nextByte != 0x0f && nextByte != 0x90) 00388 break; 00389 } 00390 00391 switch (byte) { 00392 case 0xf0: /* LOCK */ 00393 case 0xf2: /* REPNE/REPNZ */ 00394 case 0xf3: /* REP or REPE/REPZ */ 00395 if (prefixGroups[0]) 00396 dbgprintf(insn, "Redundant Group 1 prefix"); 00397 prefixGroups[0] = true; 00398 setPrefixPresent(insn, byte, prefixLocation); 00399 break; 00400 case 0x2e: /* CS segment override -OR- Branch not taken */ 00401 case 0x36: /* SS segment override -OR- Branch taken */ 00402 case 0x3e: /* DS segment override */ 00403 case 0x26: /* ES segment override */ 00404 case 0x64: /* FS segment override */ 00405 case 0x65: /* GS segment override */ 00406 switch (byte) { 00407 case 0x2e: 00408 insn->segmentOverride = SEG_OVERRIDE_CS; 00409 break; 00410 case 0x36: 00411 insn->segmentOverride = SEG_OVERRIDE_SS; 00412 break; 00413 case 0x3e: 00414 insn->segmentOverride = SEG_OVERRIDE_DS; 00415 break; 00416 case 0x26: 00417 insn->segmentOverride = SEG_OVERRIDE_ES; 00418 break; 00419 case 0x64: 00420 insn->segmentOverride = SEG_OVERRIDE_FS; 00421 break; 00422 case 0x65: 00423 insn->segmentOverride = SEG_OVERRIDE_GS; 00424 break; 00425 default: 00426 debug("Unhandled override"); 00427 return -1; 00428 } 00429 if (prefixGroups[1]) 00430 dbgprintf(insn, "Redundant Group 2 prefix"); 00431 prefixGroups[1] = true; 00432 setPrefixPresent(insn, byte, prefixLocation); 00433 break; 00434 case 0x66: /* Operand-size override */ 00435 if (prefixGroups[2]) 00436 dbgprintf(insn, "Redundant Group 3 prefix"); 00437 prefixGroups[2] = true; 00438 hasOpSize = true; 00439 setPrefixPresent(insn, byte, prefixLocation); 00440 break; 00441 case 0x67: /* Address-size override */ 00442 if (prefixGroups[3]) 00443 dbgprintf(insn, "Redundant Group 4 prefix"); 00444 prefixGroups[3] = true; 00445 hasAdSize = true; 00446 setPrefixPresent(insn, byte, prefixLocation); 00447 break; 00448 default: /* Not a prefix byte */ 00449 isPrefix = false; 00450 break; 00451 } 00452 00453 if (isPrefix) 00454 dbgprintf(insn, "Found prefix 0x%hhx", byte); 00455 } 00456 00457 insn->vectorExtensionType = TYPE_NO_VEX_XOP; 00458 00459 if (byte == 0x62) { 00460 uint8_t byte1, byte2; 00461 00462 if (consumeByte(insn, &byte1)) { 00463 dbgprintf(insn, "Couldn't read second byte of EVEX prefix"); 00464 return -1; 00465 } 00466 00467 if (lookAtByte(insn, &byte2)) { 00468 dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); 00469 return -1; 00470 } 00471 00472 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) && 00473 ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) { 00474 insn->vectorExtensionType = TYPE_EVEX; 00475 } 00476 else { 00477 unconsumeByte(insn); /* unconsume byte1 */ 00478 unconsumeByte(insn); /* unconsume byte */ 00479 insn->necessaryPrefixLocation = insn->readerCursor - 2; 00480 } 00481 00482 if (insn->vectorExtensionType == TYPE_EVEX) { 00483 insn->vectorExtensionPrefix[0] = byte; 00484 insn->vectorExtensionPrefix[1] = byte1; 00485 if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) { 00486 dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); 00487 return -1; 00488 } 00489 if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) { 00490 dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix"); 00491 return -1; 00492 } 00493 00494 /* We simulate the REX prefix for simplicity's sake */ 00495 if (insn->mode == MODE_64BIT) { 00496 insn->rexPrefix = 0x40 00497 | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) 00498 | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) 00499 | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) 00500 | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); 00501 } 00502 00503 dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", 00504 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 00505 insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]); 00506 } 00507 } 00508 else if (byte == 0xc4) { 00509 uint8_t byte1; 00510 00511 if (lookAtByte(insn, &byte1)) { 00512 dbgprintf(insn, "Couldn't read second byte of VEX"); 00513 return -1; 00514 } 00515 00516 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 00517 insn->vectorExtensionType = TYPE_VEX_3B; 00518 insn->necessaryPrefixLocation = insn->readerCursor - 1; 00519 } 00520 else { 00521 unconsumeByte(insn); 00522 insn->necessaryPrefixLocation = insn->readerCursor - 1; 00523 } 00524 00525 if (insn->vectorExtensionType == TYPE_VEX_3B) { 00526 insn->vectorExtensionPrefix[0] = byte; 00527 consumeByte(insn, &insn->vectorExtensionPrefix[1]); 00528 consumeByte(insn, &insn->vectorExtensionPrefix[2]); 00529 00530 /* We simulate the REX prefix for simplicity's sake */ 00531 00532 if (insn->mode == MODE_64BIT) { 00533 insn->rexPrefix = 0x40 00534 | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) 00535 | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) 00536 | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) 00537 | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); 00538 } 00539 00540 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", 00541 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 00542 insn->vectorExtensionPrefix[2]); 00543 } 00544 } 00545 else if (byte == 0xc5) { 00546 uint8_t byte1; 00547 00548 if (lookAtByte(insn, &byte1)) { 00549 dbgprintf(insn, "Couldn't read second byte of VEX"); 00550 return -1; 00551 } 00552 00553 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 00554 insn->vectorExtensionType = TYPE_VEX_2B; 00555 } 00556 else { 00557 unconsumeByte(insn); 00558 } 00559 00560 if (insn->vectorExtensionType == TYPE_VEX_2B) { 00561 insn->vectorExtensionPrefix[0] = byte; 00562 consumeByte(insn, &insn->vectorExtensionPrefix[1]); 00563 00564 if (insn->mode == MODE_64BIT) { 00565 insn->rexPrefix = 0x40 00566 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); 00567 } 00568 00569 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) 00570 { 00571 default: 00572 break; 00573 case VEX_PREFIX_66: 00574 hasOpSize = true; 00575 break; 00576 } 00577 00578 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", 00579 insn->vectorExtensionPrefix[0], 00580 insn->vectorExtensionPrefix[1]); 00581 } 00582 } 00583 else if (byte == 0x8f) { 00584 uint8_t byte1; 00585 00586 if (lookAtByte(insn, &byte1)) { 00587 dbgprintf(insn, "Couldn't read second byte of XOP"); 00588 return -1; 00589 } 00590 00591 if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */ 00592 insn->vectorExtensionType = TYPE_XOP; 00593 insn->necessaryPrefixLocation = insn->readerCursor - 1; 00594 } 00595 else { 00596 unconsumeByte(insn); 00597 insn->necessaryPrefixLocation = insn->readerCursor - 1; 00598 } 00599 00600 if (insn->vectorExtensionType == TYPE_XOP) { 00601 insn->vectorExtensionPrefix[0] = byte; 00602 consumeByte(insn, &insn->vectorExtensionPrefix[1]); 00603 consumeByte(insn, &insn->vectorExtensionPrefix[2]); 00604 00605 /* We simulate the REX prefix for simplicity's sake */ 00606 00607 if (insn->mode == MODE_64BIT) { 00608 insn->rexPrefix = 0x40 00609 | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) 00610 | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) 00611 | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) 00612 | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); 00613 } 00614 00615 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) 00616 { 00617 default: 00618 break; 00619 case VEX_PREFIX_66: 00620 hasOpSize = true; 00621 break; 00622 } 00623 00624 dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", 00625 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 00626 insn->vectorExtensionPrefix[2]); 00627 } 00628 } 00629 else { 00630 if (insn->mode == MODE_64BIT) { 00631 if ((byte & 0xf0) == 0x40) { 00632 uint8_t opcodeByte; 00633 00634 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { 00635 dbgprintf(insn, "Redundant REX prefix"); 00636 return -1; 00637 } 00638 00639 insn->rexPrefix = byte; 00640 insn->necessaryPrefixLocation = insn->readerCursor - 2; 00641 00642 dbgprintf(insn, "Found REX prefix 0x%hhx", byte); 00643 } else { 00644 unconsumeByte(insn); 00645 insn->necessaryPrefixLocation = insn->readerCursor - 1; 00646 } 00647 } else { 00648 unconsumeByte(insn); 00649 insn->necessaryPrefixLocation = insn->readerCursor - 1; 00650 } 00651 } 00652 00653 if (insn->mode == MODE_16BIT) { 00654 insn->registerSize = (hasOpSize ? 4 : 2); 00655 insn->addressSize = (hasAdSize ? 4 : 2); 00656 insn->displacementSize = (hasAdSize ? 4 : 2); 00657 insn->immediateSize = (hasOpSize ? 4 : 2); 00658 } else if (insn->mode == MODE_32BIT) { 00659 insn->registerSize = (hasOpSize ? 2 : 4); 00660 insn->addressSize = (hasAdSize ? 2 : 4); 00661 insn->displacementSize = (hasAdSize ? 2 : 4); 00662 insn->immediateSize = (hasOpSize ? 2 : 4); 00663 } else if (insn->mode == MODE_64BIT) { 00664 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 00665 insn->registerSize = 8; 00666 insn->addressSize = (hasAdSize ? 4 : 8); 00667 insn->displacementSize = 4; 00668 insn->immediateSize = 4; 00669 } else if (insn->rexPrefix) { 00670 insn->registerSize = (hasOpSize ? 2 : 4); 00671 insn->addressSize = (hasAdSize ? 4 : 8); 00672 insn->displacementSize = (hasOpSize ? 2 : 4); 00673 insn->immediateSize = (hasOpSize ? 2 : 4); 00674 } else { 00675 insn->registerSize = (hasOpSize ? 2 : 4); 00676 insn->addressSize = (hasAdSize ? 4 : 8); 00677 insn->displacementSize = (hasOpSize ? 2 : 4); 00678 insn->immediateSize = (hasOpSize ? 2 : 4); 00679 } 00680 } 00681 00682 return 0; 00683 } 00684 00685 /* 00686 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of 00687 * extended or escape opcodes). 00688 * 00689 * @param insn - The instruction whose opcode is to be read. 00690 * @return - 0 if the opcode could be read successfully; nonzero otherwise. 00691 */ 00692 static int readOpcode(struct InternalInstruction* insn) { 00693 /* Determine the length of the primary opcode */ 00694 00695 uint8_t current; 00696 00697 dbgprintf(insn, "readOpcode()"); 00698 00699 insn->opcodeType = ONEBYTE; 00700 00701 if (insn->vectorExtensionType == TYPE_EVEX) 00702 { 00703 switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { 00704 default: 00705 dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)", 00706 mmFromEVEX2of4(insn->vectorExtensionPrefix[1])); 00707 return -1; 00708 case VEX_LOB_0F: 00709 insn->opcodeType = TWOBYTE; 00710 return consumeByte(insn, &insn->opcode); 00711 case VEX_LOB_0F38: 00712 insn->opcodeType = THREEBYTE_38; 00713 return consumeByte(insn, &insn->opcode); 00714 case VEX_LOB_0F3A: 00715 insn->opcodeType = THREEBYTE_3A; 00716 return consumeByte(insn, &insn->opcode); 00717 } 00718 } 00719 else if (insn->vectorExtensionType == TYPE_VEX_3B) { 00720 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { 00721 default: 00722 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", 00723 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); 00724 return -1; 00725 case VEX_LOB_0F: 00726 insn->opcodeType = TWOBYTE; 00727 return consumeByte(insn, &insn->opcode); 00728 case VEX_LOB_0F38: 00729 insn->opcodeType = THREEBYTE_38; 00730 return consumeByte(insn, &insn->opcode); 00731 case VEX_LOB_0F3A: 00732 insn->opcodeType = THREEBYTE_3A; 00733 return consumeByte(insn, &insn->opcode); 00734 } 00735 } 00736 else if (insn->vectorExtensionType == TYPE_VEX_2B) { 00737 insn->opcodeType = TWOBYTE; 00738 return consumeByte(insn, &insn->opcode); 00739 } 00740 else if (insn->vectorExtensionType == TYPE_XOP) { 00741 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { 00742 default: 00743 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", 00744 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); 00745 return -1; 00746 case XOP_MAP_SELECT_8: 00747 insn->opcodeType = XOP8_MAP; 00748 return consumeByte(insn, &insn->opcode); 00749 case XOP_MAP_SELECT_9: 00750 insn->opcodeType = XOP9_MAP; 00751 return consumeByte(insn, &insn->opcode); 00752 case XOP_MAP_SELECT_A: 00753 insn->opcodeType = XOPA_MAP; 00754 return consumeByte(insn, &insn->opcode); 00755 } 00756 } 00757 00758 if (consumeByte(insn, ¤t)) 00759 return -1; 00760 00761 if (current == 0x0f) { 00762 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); 00763 00764 if (consumeByte(insn, ¤t)) 00765 return -1; 00766 00767 if (current == 0x38) { 00768 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 00769 00770 if (consumeByte(insn, ¤t)) 00771 return -1; 00772 00773 insn->opcodeType = THREEBYTE_38; 00774 } else if (current == 0x3a) { 00775 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 00776 00777 if (consumeByte(insn, ¤t)) 00778 return -1; 00779 00780 insn->opcodeType = THREEBYTE_3A; 00781 } else { 00782 dbgprintf(insn, "Didn't find a three-byte escape prefix"); 00783 00784 insn->opcodeType = TWOBYTE; 00785 } 00786 } 00787 00788 /* 00789 * At this point we have consumed the full opcode. 00790 * Anything we consume from here on must be unconsumed. 00791 */ 00792 00793 insn->opcode = current; 00794 00795 return 0; 00796 } 00797 00798 static int readModRM(struct InternalInstruction* insn); 00799 00800 /* 00801 * getIDWithAttrMask - Determines the ID of an instruction, consuming 00802 * the ModR/M byte as appropriate for extended and escape opcodes, 00803 * and using a supplied attribute mask. 00804 * 00805 * @param instructionID - A pointer whose target is filled in with the ID of the 00806 * instruction. 00807 * @param insn - The instruction whose ID is to be determined. 00808 * @param attrMask - The attribute mask to search. 00809 * @return - 0 if the ModR/M could be read when needed or was not 00810 * needed; nonzero otherwise. 00811 */ 00812 static int getIDWithAttrMask(uint16_t* instructionID, 00813 struct InternalInstruction* insn, 00814 uint16_t attrMask) { 00815 bool hasModRMExtension; 00816 00817 InstructionContext instructionClass = contextForAttrs(attrMask); 00818 00819 hasModRMExtension = modRMRequired(insn->opcodeType, 00820 instructionClass, 00821 insn->opcode); 00822 00823 if (hasModRMExtension) { 00824 if (readModRM(insn)) 00825 return -1; 00826 00827 *instructionID = decode(insn->opcodeType, 00828 instructionClass, 00829 insn->opcode, 00830 insn->modRM); 00831 } else { 00832 *instructionID = decode(insn->opcodeType, 00833 instructionClass, 00834 insn->opcode, 00835 0); 00836 } 00837 00838 return 0; 00839 } 00840 00841 /* 00842 * is16BitEquivalent - Determines whether two instruction names refer to 00843 * equivalent instructions but one is 16-bit whereas the other is not. 00844 * 00845 * @param orig - The instruction that is not 16-bit 00846 * @param equiv - The instruction that is 16-bit 00847 */ 00848 static bool is16BitEquivalent(const char* orig, const char* equiv) { 00849 off_t i; 00850 00851 for (i = 0;; i++) { 00852 if (orig[i] == '\0' && equiv[i] == '\0') 00853 return true; 00854 if (orig[i] == '\0' || equiv[i] == '\0') 00855 return false; 00856 if (orig[i] != equiv[i]) { 00857 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 00858 continue; 00859 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 00860 continue; 00861 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 00862 continue; 00863 return false; 00864 } 00865 } 00866 } 00867 00868 /* 00869 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 00870 * appropriate for extended and escape opcodes. Determines the attributes and 00871 * context for the instruction before doing so. 00872 * 00873 * @param insn - The instruction whose ID is to be determined. 00874 * @return - 0 if the ModR/M could be read when needed or was not needed; 00875 * nonzero otherwise. 00876 */ 00877 static int getID(struct InternalInstruction* insn, const void *miiArg) { 00878 uint16_t attrMask; 00879 uint16_t instructionID; 00880 00881 dbgprintf(insn, "getID()"); 00882 00883 attrMask = ATTR_NONE; 00884 00885 if (insn->mode == MODE_64BIT) 00886 attrMask |= ATTR_64BIT; 00887 00888 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 00889 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX; 00890 00891 if (insn->vectorExtensionType == TYPE_EVEX) { 00892 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) { 00893 case VEX_PREFIX_66: 00894 attrMask |= ATTR_OPSIZE; 00895 break; 00896 case VEX_PREFIX_F3: 00897 attrMask |= ATTR_XS; 00898 break; 00899 case VEX_PREFIX_F2: 00900 attrMask |= ATTR_XD; 00901 break; 00902 } 00903 00904 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3])) 00905 attrMask |= ATTR_EVEXKZ; 00906 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) 00907 attrMask |= ATTR_EVEXB; 00908 if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])) 00909 attrMask |= ATTR_EVEXK; 00910 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) 00911 attrMask |= ATTR_EVEXL; 00912 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 00913 attrMask |= ATTR_EVEXL2; 00914 } 00915 else if (insn->vectorExtensionType == TYPE_VEX_3B) { 00916 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { 00917 case VEX_PREFIX_66: 00918 attrMask |= ATTR_OPSIZE; 00919 break; 00920 case VEX_PREFIX_F3: 00921 attrMask |= ATTR_XS; 00922 break; 00923 case VEX_PREFIX_F2: 00924 attrMask |= ATTR_XD; 00925 break; 00926 } 00927 00928 if (lFromVEX3of3(insn->vectorExtensionPrefix[2])) 00929 attrMask |= ATTR_VEXL; 00930 } 00931 else if (insn->vectorExtensionType == TYPE_VEX_2B) { 00932 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 00933 case VEX_PREFIX_66: 00934 attrMask |= ATTR_OPSIZE; 00935 break; 00936 case VEX_PREFIX_F3: 00937 attrMask |= ATTR_XS; 00938 break; 00939 case VEX_PREFIX_F2: 00940 attrMask |= ATTR_XD; 00941 break; 00942 } 00943 00944 if (lFromVEX2of2(insn->vectorExtensionPrefix[1])) 00945 attrMask |= ATTR_VEXL; 00946 } 00947 else if (insn->vectorExtensionType == TYPE_XOP) { 00948 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 00949 case VEX_PREFIX_66: 00950 attrMask |= ATTR_OPSIZE; 00951 break; 00952 case VEX_PREFIX_F3: 00953 attrMask |= ATTR_XS; 00954 break; 00955 case VEX_PREFIX_F2: 00956 attrMask |= ATTR_XD; 00957 break; 00958 } 00959 00960 if (lFromXOP3of3(insn->vectorExtensionPrefix[2])) 00961 attrMask |= ATTR_VEXL; 00962 } 00963 else { 00964 return -1; 00965 } 00966 } 00967 else { 00968 if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) 00969 attrMask |= ATTR_OPSIZE; 00970 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) 00971 attrMask |= ATTR_ADSIZE; 00972 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) 00973 attrMask |= ATTR_XS; 00974 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) 00975 attrMask |= ATTR_XD; 00976 } 00977 00978 if (insn->rexPrefix & 0x08) 00979 attrMask |= ATTR_REXW; 00980 00981 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 00982 return -1; 00983 00984 /* 00985 * JCXZ/JECXZ need special handling for 16-bit mode because the meaning 00986 * of the AdSize prefix is inverted w.r.t. 32-bit mode. 00987 */ 00988 if (insn->mode == MODE_16BIT && insn->opcode == 0xE3) { 00989 const struct InstructionSpecifier *spec; 00990 spec = specifierForUID(instructionID); 00991 00992 /* 00993 * Check for Ii8PCRel instructions. We could alternatively do a 00994 * string-compare on the names, but this is probably cheaper. 00995 */ 00996 if (x86OperandSets[spec->operands][0].type == TYPE_REL8) { 00997 attrMask ^= ATTR_ADSIZE; 00998 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 00999 return -1; 01000 } 01001 } 01002 01003 /* The following clauses compensate for limitations of the tables. */ 01004 01005 if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) && 01006 !(attrMask & ATTR_OPSIZE)) { 01007 /* 01008 * The instruction tables make no distinction between instructions that 01009 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 01010 * particular spot (i.e., many MMX operations). In general we're 01011 * conservative, but in the specific case where OpSize is present but not 01012 * in the right place we check if there's a 16-bit operation. 01013 */ 01014 01015 const struct InstructionSpecifier *spec; 01016 uint16_t instructionIDWithOpsize; 01017 const char *specName, *specWithOpSizeName; 01018 01019 spec = specifierForUID(instructionID); 01020 01021 if (getIDWithAttrMask(&instructionIDWithOpsize, 01022 insn, 01023 attrMask | ATTR_OPSIZE)) { 01024 /* 01025 * ModRM required with OpSize but not present; give up and return version 01026 * without OpSize set 01027 */ 01028 01029 insn->instructionID = instructionID; 01030 insn->spec = spec; 01031 return 0; 01032 } 01033 01034 specName = GetInstrName(instructionID, miiArg); 01035 specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg); 01036 01037 if (is16BitEquivalent(specName, specWithOpSizeName) && 01038 (insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) { 01039 insn->instructionID = instructionIDWithOpsize; 01040 insn->spec = specifierForUID(instructionIDWithOpsize); 01041 } else { 01042 insn->instructionID = instructionID; 01043 insn->spec = spec; 01044 } 01045 return 0; 01046 } 01047 01048 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 01049 insn->rexPrefix & 0x01) { 01050 /* 01051 * NOOP shouldn't decode as NOOP if REX.b is set. Instead 01052 * it should decode as XCHG %r8, %eax. 01053 */ 01054 01055 const struct InstructionSpecifier *spec; 01056 uint16_t instructionIDWithNewOpcode; 01057 const struct InstructionSpecifier *specWithNewOpcode; 01058 01059 spec = specifierForUID(instructionID); 01060 01061 /* Borrow opcode from one of the other XCHGar opcodes */ 01062 insn->opcode = 0x91; 01063 01064 if (getIDWithAttrMask(&instructionIDWithNewOpcode, 01065 insn, 01066 attrMask)) { 01067 insn->opcode = 0x90; 01068 01069 insn->instructionID = instructionID; 01070 insn->spec = spec; 01071 return 0; 01072 } 01073 01074 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); 01075 01076 /* Change back */ 01077 insn->opcode = 0x90; 01078 01079 insn->instructionID = instructionIDWithNewOpcode; 01080 insn->spec = specWithNewOpcode; 01081 01082 return 0; 01083 } 01084 01085 insn->instructionID = instructionID; 01086 insn->spec = specifierForUID(insn->instructionID); 01087 01088 return 0; 01089 } 01090 01091 /* 01092 * readSIB - Consumes the SIB byte to determine addressing information for an 01093 * instruction. 01094 * 01095 * @param insn - The instruction whose SIB byte is to be read. 01096 * @return - 0 if the SIB byte was successfully read; nonzero otherwise. 01097 */ 01098 static int readSIB(struct InternalInstruction* insn) { 01099 SIBIndex sibIndexBase = SIB_INDEX_NONE; 01100 SIBBase sibBaseBase = SIB_BASE_NONE; 01101 uint8_t index, base; 01102 01103 dbgprintf(insn, "readSIB()"); 01104 01105 if (insn->consumedSIB) 01106 return 0; 01107 01108 insn->consumedSIB = true; 01109 01110 switch (insn->addressSize) { 01111 case 2: 01112 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); 01113 return -1; 01114 case 4: 01115 sibIndexBase = SIB_INDEX_EAX; 01116 sibBaseBase = SIB_BASE_EAX; 01117 break; 01118 case 8: 01119 sibIndexBase = SIB_INDEX_RAX; 01120 sibBaseBase = SIB_BASE_RAX; 01121 break; 01122 } 01123 01124 if (consumeByte(insn, &insn->sib)) 01125 return -1; 01126 01127 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 01128 if (insn->vectorExtensionType == TYPE_EVEX) 01129 index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4; 01130 01131 switch (index) { 01132 case 0x4: 01133 insn->sibIndex = SIB_INDEX_NONE; 01134 break; 01135 default: 01136 insn->sibIndex = (SIBIndex)(sibIndexBase + index); 01137 if (insn->sibIndex == SIB_INDEX_sib || 01138 insn->sibIndex == SIB_INDEX_sib64) 01139 insn->sibIndex = SIB_INDEX_NONE; 01140 break; 01141 } 01142 01143 switch (scaleFromSIB(insn->sib)) { 01144 case 0: 01145 insn->sibScale = 1; 01146 break; 01147 case 1: 01148 insn->sibScale = 2; 01149 break; 01150 case 2: 01151 insn->sibScale = 4; 01152 break; 01153 case 3: 01154 insn->sibScale = 8; 01155 break; 01156 } 01157 01158 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 01159 01160 switch (base) { 01161 case 0x5: 01162 case 0xd: 01163 switch (modFromModRM(insn->modRM)) { 01164 case 0x0: 01165 insn->eaDisplacement = EA_DISP_32; 01166 insn->sibBase = SIB_BASE_NONE; 01167 break; 01168 case 0x1: 01169 insn->eaDisplacement = EA_DISP_8; 01170 insn->sibBase = (SIBBase)(sibBaseBase + base); 01171 break; 01172 case 0x2: 01173 insn->eaDisplacement = EA_DISP_32; 01174 insn->sibBase = (SIBBase)(sibBaseBase + base); 01175 break; 01176 case 0x3: 01177 debug("Cannot have Mod = 0b11 and a SIB byte"); 01178 return -1; 01179 } 01180 break; 01181 default: 01182 insn->sibBase = (SIBBase)(sibBaseBase + base); 01183 break; 01184 } 01185 01186 return 0; 01187 } 01188 01189 /* 01190 * readDisplacement - Consumes the displacement of an instruction. 01191 * 01192 * @param insn - The instruction whose displacement is to be read. 01193 * @return - 0 if the displacement byte was successfully read; nonzero 01194 * otherwise. 01195 */ 01196 static int readDisplacement(struct InternalInstruction* insn) { 01197 int8_t d8; 01198 int16_t d16; 01199 int32_t d32; 01200 01201 dbgprintf(insn, "readDisplacement()"); 01202 01203 if (insn->consumedDisplacement) 01204 return 0; 01205 01206 insn->consumedDisplacement = true; 01207 insn->displacementOffset = insn->readerCursor - insn->startLocation; 01208 01209 switch (insn->eaDisplacement) { 01210 case EA_DISP_NONE: 01211 insn->consumedDisplacement = false; 01212 break; 01213 case EA_DISP_8: 01214 if (consumeInt8(insn, &d8)) 01215 return -1; 01216 insn->displacement = d8; 01217 break; 01218 case EA_DISP_16: 01219 if (consumeInt16(insn, &d16)) 01220 return -1; 01221 insn->displacement = d16; 01222 break; 01223 case EA_DISP_32: 01224 if (consumeInt32(insn, &d32)) 01225 return -1; 01226 insn->displacement = d32; 01227 break; 01228 } 01229 01230 insn->consumedDisplacement = true; 01231 return 0; 01232 } 01233 01234 /* 01235 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and 01236 * displacement) for an instruction and interprets it. 01237 * 01238 * @param insn - The instruction whose addressing information is to be read. 01239 * @return - 0 if the information was successfully read; nonzero otherwise. 01240 */ 01241 static int readModRM(struct InternalInstruction* insn) { 01242 uint8_t mod, rm, reg; 01243 01244 dbgprintf(insn, "readModRM()"); 01245 01246 if (insn->consumedModRM) 01247 return 0; 01248 01249 if (consumeByte(insn, &insn->modRM)) 01250 return -1; 01251 insn->consumedModRM = true; 01252 01253 mod = modFromModRM(insn->modRM); 01254 rm = rmFromModRM(insn->modRM); 01255 reg = regFromModRM(insn->modRM); 01256 01257 /* 01258 * This goes by insn->registerSize to pick the correct register, which messes 01259 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in 01260 * fixupReg(). 01261 */ 01262 switch (insn->registerSize) { 01263 case 2: 01264 insn->regBase = MODRM_REG_AX; 01265 insn->eaRegBase = EA_REG_AX; 01266 break; 01267 case 4: 01268 insn->regBase = MODRM_REG_EAX; 01269 insn->eaRegBase = EA_REG_EAX; 01270 break; 01271 case 8: 01272 insn->regBase = MODRM_REG_RAX; 01273 insn->eaRegBase = EA_REG_RAX; 01274 break; 01275 } 01276 01277 reg |= rFromREX(insn->rexPrefix) << 3; 01278 rm |= bFromREX(insn->rexPrefix) << 3; 01279 if (insn->vectorExtensionType == TYPE_EVEX) { 01280 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 01281 rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 01282 } 01283 01284 insn->reg = (Reg)(insn->regBase + reg); 01285 01286 switch (insn->addressSize) { 01287 case 2: 01288 insn->eaBaseBase = EA_BASE_BX_SI; 01289 01290 switch (mod) { 01291 case 0x0: 01292 if (rm == 0x6) { 01293 insn->eaBase = EA_BASE_NONE; 01294 insn->eaDisplacement = EA_DISP_16; 01295 if (readDisplacement(insn)) 01296 return -1; 01297 } else { 01298 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 01299 insn->eaDisplacement = EA_DISP_NONE; 01300 } 01301 break; 01302 case 0x1: 01303 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 01304 insn->eaDisplacement = EA_DISP_8; 01305 insn->displacementSize = 1; 01306 if (readDisplacement(insn)) 01307 return -1; 01308 break; 01309 case 0x2: 01310 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 01311 insn->eaDisplacement = EA_DISP_16; 01312 if (readDisplacement(insn)) 01313 return -1; 01314 break; 01315 case 0x3: 01316 insn->eaBase = (EABase)(insn->eaRegBase + rm); 01317 if (readDisplacement(insn)) 01318 return -1; 01319 break; 01320 } 01321 break; 01322 case 4: 01323 case 8: 01324 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 01325 01326 switch (mod) { 01327 case 0x0: 01328 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ 01329 switch (rm) { 01330 case 0x14: 01331 case 0x4: 01332 case 0xc: /* in case REXW.b is set */ 01333 insn->eaBase = (insn->addressSize == 4 ? 01334 EA_BASE_sib : EA_BASE_sib64); 01335 if (readSIB(insn) || readDisplacement(insn)) 01336 return -1; 01337 break; 01338 case 0x5: 01339 insn->eaBase = EA_BASE_NONE; 01340 insn->eaDisplacement = EA_DISP_32; 01341 if (readDisplacement(insn)) 01342 return -1; 01343 break; 01344 default: 01345 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 01346 break; 01347 } 01348 break; 01349 case 0x1: 01350 insn->displacementSize = 1; 01351 /* FALLTHROUGH */ 01352 case 0x2: 01353 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 01354 switch (rm) { 01355 case 0x14: 01356 case 0x4: 01357 case 0xc: /* in case REXW.b is set */ 01358 insn->eaBase = EA_BASE_sib; 01359 if (readSIB(insn) || readDisplacement(insn)) 01360 return -1; 01361 break; 01362 default: 01363 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 01364 if (readDisplacement(insn)) 01365 return -1; 01366 break; 01367 } 01368 break; 01369 case 0x3: 01370 insn->eaDisplacement = EA_DISP_NONE; 01371 insn->eaBase = (EABase)(insn->eaRegBase + rm); 01372 break; 01373 } 01374 break; 01375 } /* switch (insn->addressSize) */ 01376 01377 return 0; 01378 } 01379 01380 #define GENERIC_FIXUP_FUNC(name, base, prefix) \ 01381 static uint8_t name(struct InternalInstruction *insn, \ 01382 OperandType type, \ 01383 uint8_t index, \ 01384 uint8_t *valid) { \ 01385 *valid = 1; \ 01386 switch (type) { \ 01387 default: \ 01388 debug("Unhandled register type"); \ 01389 *valid = 0; \ 01390 return 0; \ 01391 case TYPE_Rv: \ 01392 return base + index; \ 01393 case TYPE_R8: \ 01394 if (insn->rexPrefix && \ 01395 index >= 4 && index <= 7) { \ 01396 return prefix##_SPL + (index - 4); \ 01397 } else { \ 01398 return prefix##_AL + index; \ 01399 } \ 01400 case TYPE_R16: \ 01401 return prefix##_AX + index; \ 01402 case TYPE_R32: \ 01403 return prefix##_EAX + index; \ 01404 case TYPE_R64: \ 01405 return prefix##_RAX + index; \ 01406 case TYPE_XMM512: \ 01407 return prefix##_ZMM0 + index; \ 01408 case TYPE_XMM256: \ 01409 return prefix##_YMM0 + index; \ 01410 case TYPE_XMM128: \ 01411 case TYPE_XMM64: \ 01412 case TYPE_XMM32: \ 01413 case TYPE_XMM: \ 01414 return prefix##_XMM0 + index; \ 01415 case TYPE_VK1: \ 01416 case TYPE_VK8: \ 01417 case TYPE_VK16: \ 01418 return prefix##_K0 + index; \ 01419 case TYPE_MM64: \ 01420 case TYPE_MM32: \ 01421 case TYPE_MM: \ 01422 if (index > 7) \ 01423 *valid = 0; \ 01424 return prefix##_MM0 + index; \ 01425 case TYPE_SEGMENTREG: \ 01426 if (index > 5) \ 01427 *valid = 0; \ 01428 return prefix##_ES + index; \ 01429 case TYPE_DEBUGREG: \ 01430 if (index > 7) \ 01431 *valid = 0; \ 01432 return prefix##_DR0 + index; \ 01433 case TYPE_CONTROLREG: \ 01434 if (index > 8) \ 01435 *valid = 0; \ 01436 return prefix##_CR0 + index; \ 01437 } \ 01438 } 01439 01440 /* 01441 * fixup*Value - Consults an operand type to determine the meaning of the 01442 * reg or R/M field. If the operand is an XMM operand, for example, an 01443 * operand would be XMM0 instead of AX, which readModRM() would otherwise 01444 * misinterpret it as. 01445 * 01446 * @param insn - The instruction containing the operand. 01447 * @param type - The operand type. 01448 * @param index - The existing value of the field as reported by readModRM(). 01449 * @param valid - The address of a uint8_t. The target is set to 1 if the 01450 * field is valid for the register class; 0 if not. 01451 * @return - The proper value. 01452 */ 01453 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 01454 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 01455 01456 /* 01457 * fixupReg - Consults an operand specifier to determine which of the 01458 * fixup*Value functions to use in correcting readModRM()'ss interpretation. 01459 * 01460 * @param insn - See fixup*Value(). 01461 * @param op - The operand specifier. 01462 * @return - 0 if fixup was successful; -1 if the register returned was 01463 * invalid for its class. 01464 */ 01465 static int fixupReg(struct InternalInstruction *insn, 01466 const struct OperandSpecifier *op) { 01467 uint8_t valid; 01468 01469 dbgprintf(insn, "fixupReg()"); 01470 01471 switch ((OperandEncoding)op->encoding) { 01472 default: 01473 debug("Expected a REG or R/M encoding in fixupReg"); 01474 return -1; 01475 case ENCODING_VVVV: 01476 insn->vvvv = (Reg)fixupRegValue(insn, 01477 (OperandType)op->type, 01478 insn->vvvv, 01479 &valid); 01480 if (!valid) 01481 return -1; 01482 break; 01483 case ENCODING_REG: 01484 insn->reg = (Reg)fixupRegValue(insn, 01485 (OperandType)op->type, 01486 insn->reg - insn->regBase, 01487 &valid); 01488 if (!valid) 01489 return -1; 01490 break; 01491 CASE_ENCODING_RM: 01492 if (insn->eaBase >= insn->eaRegBase) { 01493 insn->eaBase = (EABase)fixupRMValue(insn, 01494 (OperandType)op->type, 01495 insn->eaBase - insn->eaRegBase, 01496 &valid); 01497 if (!valid) 01498 return -1; 01499 } 01500 break; 01501 } 01502 01503 return 0; 01504 } 01505 01506 /* 01507 * readOpcodeRegister - Reads an operand from the opcode field of an 01508 * instruction and interprets it appropriately given the operand width. 01509 * Handles AddRegFrm instructions. 01510 * 01511 * @param insn - the instruction whose opcode field is to be read. 01512 * @param size - The width (in bytes) of the register being specified. 01513 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 01514 * RAX. 01515 * @return - 0 on success; nonzero otherwise. 01516 */ 01517 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { 01518 dbgprintf(insn, "readOpcodeRegister()"); 01519 01520 if (size == 0) 01521 size = insn->registerSize; 01522 01523 switch (size) { 01524 case 1: 01525 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 01526 | (insn->opcode & 7))); 01527 if (insn->rexPrefix && 01528 insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 01529 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 01530 insn->opcodeRegister = (Reg)(MODRM_REG_SPL 01531 + (insn->opcodeRegister - MODRM_REG_AL - 4)); 01532 } 01533 01534 break; 01535 case 2: 01536 insn->opcodeRegister = (Reg)(MODRM_REG_AX 01537 + ((bFromREX(insn->rexPrefix) << 3) 01538 | (insn->opcode & 7))); 01539 break; 01540 case 4: 01541 insn->opcodeRegister = (Reg)(MODRM_REG_EAX 01542 + ((bFromREX(insn->rexPrefix) << 3) 01543 | (insn->opcode & 7))); 01544 break; 01545 case 8: 01546 insn->opcodeRegister = (Reg)(MODRM_REG_RAX 01547 + ((bFromREX(insn->rexPrefix) << 3) 01548 | (insn->opcode & 7))); 01549 break; 01550 } 01551 01552 return 0; 01553 } 01554 01555 /* 01556 * readImmediate - Consumes an immediate operand from an instruction, given the 01557 * desired operand size. 01558 * 01559 * @param insn - The instruction whose operand is to be read. 01560 * @param size - The width (in bytes) of the operand. 01561 * @return - 0 if the immediate was successfully consumed; nonzero 01562 * otherwise. 01563 */ 01564 static int readImmediate(struct InternalInstruction* insn, uint8_t size) { 01565 uint8_t imm8; 01566 uint16_t imm16; 01567 uint32_t imm32; 01568 uint64_t imm64; 01569 01570 dbgprintf(insn, "readImmediate()"); 01571 01572 if (insn->numImmediatesConsumed == 2) { 01573 debug("Already consumed two immediates"); 01574 return -1; 01575 } 01576 01577 if (size == 0) 01578 size = insn->immediateSize; 01579 else 01580 insn->immediateSize = size; 01581 insn->immediateOffset = insn->readerCursor - insn->startLocation; 01582 01583 switch (size) { 01584 case 1: 01585 if (consumeByte(insn, &imm8)) 01586 return -1; 01587 insn->immediates[insn->numImmediatesConsumed] = imm8; 01588 break; 01589 case 2: 01590 if (consumeUInt16(insn, &imm16)) 01591 return -1; 01592 insn->immediates[insn->numImmediatesConsumed] = imm16; 01593 break; 01594 case 4: 01595 if (consumeUInt32(insn, &imm32)) 01596 return -1; 01597 insn->immediates[insn->numImmediatesConsumed] = imm32; 01598 break; 01599 case 8: 01600 if (consumeUInt64(insn, &imm64)) 01601 return -1; 01602 insn->immediates[insn->numImmediatesConsumed] = imm64; 01603 break; 01604 } 01605 01606 insn->numImmediatesConsumed++; 01607 01608 return 0; 01609 } 01610 01611 /* 01612 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. 01613 * 01614 * @param insn - The instruction whose operand is to be read. 01615 * @return - 0 if the vvvv was successfully consumed; nonzero 01616 * otherwise. 01617 */ 01618 static int readVVVV(struct InternalInstruction* insn) { 01619 dbgprintf(insn, "readVVVV()"); 01620 01621 int vvvv; 01622 if (insn->vectorExtensionType == TYPE_EVEX) 01623 vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 | 01624 vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2])); 01625 else if (insn->vectorExtensionType == TYPE_VEX_3B) 01626 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); 01627 else if (insn->vectorExtensionType == TYPE_VEX_2B) 01628 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]); 01629 else if (insn->vectorExtensionType == TYPE_XOP) 01630 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]); 01631 else 01632 return -1; 01633 01634 if (insn->mode != MODE_64BIT) 01635 vvvv &= 0x7; 01636 01637 insn->vvvv = static_cast<Reg>(vvvv); 01638 return 0; 01639 } 01640 01641 /* 01642 * readMaskRegister - Reads an mask register from the opcode field of an 01643 * instruction. 01644 * 01645 * @param insn - The instruction whose opcode field is to be read. 01646 * @return - 0 on success; nonzero otherwise. 01647 */ 01648 static int readMaskRegister(struct InternalInstruction* insn) { 01649 dbgprintf(insn, "readMaskRegister()"); 01650 01651 if (insn->vectorExtensionType != TYPE_EVEX) 01652 return -1; 01653 01654 insn->writemask = 01655 static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])); 01656 return 0; 01657 } 01658 01659 /* 01660 * readOperands - Consults the specifier for an instruction and consumes all 01661 * operands for that instruction, interpreting them as it goes. 01662 * 01663 * @param insn - The instruction whose operands are to be read and interpreted. 01664 * @return - 0 if all operands could be read; nonzero otherwise. 01665 */ 01666 static int readOperands(struct InternalInstruction* insn) { 01667 int hasVVVV, needVVVV; 01668 int sawRegImm = 0; 01669 01670 dbgprintf(insn, "readOperands()"); 01671 01672 /* If non-zero vvvv specified, need to make sure one of the operands 01673 uses it. */ 01674 hasVVVV = !readVVVV(insn); 01675 needVVVV = hasVVVV && (insn->vvvv != 0); 01676 01677 for (const auto &Op : x86OperandSets[insn->spec->operands]) { 01678 switch (Op.encoding) { 01679 case ENCODING_NONE: 01680 case ENCODING_SI: 01681 case ENCODING_DI: 01682 break; 01683 case ENCODING_REG: 01684 CASE_ENCODING_RM: 01685 if (readModRM(insn)) 01686 return -1; 01687 if (fixupReg(insn, &Op)) 01688 return -1; 01689 // Apply the AVX512 compressed displacement scaling factor. 01690 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 01691 insn->displacement *= 1 << (Op.encoding - ENCODING_RM); 01692 break; 01693 case ENCODING_CB: 01694 case ENCODING_CW: 01695 case ENCODING_CD: 01696 case ENCODING_CP: 01697 case ENCODING_CO: 01698 case ENCODING_CT: 01699 dbgprintf(insn, "We currently don't hande code-offset encodings"); 01700 return -1; 01701 case ENCODING_IB: 01702 if (sawRegImm) { 01703 /* Saw a register immediate so don't read again and instead split the 01704 previous immediate. FIXME: This is a hack. */ 01705 insn->immediates[insn->numImmediatesConsumed] = 01706 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 01707 ++insn->numImmediatesConsumed; 01708 break; 01709 } 01710 if (readImmediate(insn, 1)) 01711 return -1; 01712 if (Op.type == TYPE_IMM3 && 01713 insn->immediates[insn->numImmediatesConsumed - 1] > 7) 01714 return -1; 01715 if (Op.type == TYPE_IMM5 && 01716 insn->immediates[insn->numImmediatesConsumed - 1] > 31) 01717 return -1; 01718 if (Op.type == TYPE_XMM128 || 01719 Op.type == TYPE_XMM256) 01720 sawRegImm = 1; 01721 break; 01722 case ENCODING_IW: 01723 if (readImmediate(insn, 2)) 01724 return -1; 01725 break; 01726 case ENCODING_ID: 01727 if (readImmediate(insn, 4)) 01728 return -1; 01729 break; 01730 case ENCODING_IO: 01731 if (readImmediate(insn, 8)) 01732 return -1; 01733 break; 01734 case ENCODING_Iv: 01735 if (readImmediate(insn, insn->immediateSize)) 01736 return -1; 01737 break; 01738 case ENCODING_Ia: 01739 if (readImmediate(insn, insn->addressSize)) 01740 return -1; 01741 break; 01742 case ENCODING_RB: 01743 if (readOpcodeRegister(insn, 1)) 01744 return -1; 01745 break; 01746 case ENCODING_RW: 01747 if (readOpcodeRegister(insn, 2)) 01748 return -1; 01749 break; 01750 case ENCODING_RD: 01751 if (readOpcodeRegister(insn, 4)) 01752 return -1; 01753 break; 01754 case ENCODING_RO: 01755 if (readOpcodeRegister(insn, 8)) 01756 return -1; 01757 break; 01758 case ENCODING_Rv: 01759 if (readOpcodeRegister(insn, 0)) 01760 return -1; 01761 break; 01762 case ENCODING_FP: 01763 break; 01764 case ENCODING_VVVV: 01765 needVVVV = 0; /* Mark that we have found a VVVV operand. */ 01766 if (!hasVVVV) 01767 return -1; 01768 if (fixupReg(insn, &Op)) 01769 return -1; 01770 break; 01771 case ENCODING_WRITEMASK: 01772 if (readMaskRegister(insn)) 01773 return -1; 01774 break; 01775 case ENCODING_DUP: 01776 break; 01777 default: 01778 dbgprintf(insn, "Encountered an operand with an unknown encoding."); 01779 return -1; 01780 } 01781 } 01782 01783 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ 01784 if (needVVVV) return -1; 01785 01786 return 0; 01787 } 01788 01789 /* 01790 * decodeInstruction - Reads and interprets a full instruction provided by the 01791 * user. 01792 * 01793 * @param insn - A pointer to the instruction to be populated. Must be 01794 * pre-allocated. 01795 * @param reader - The function to be used to read the instruction's bytes. 01796 * @param readerArg - A generic argument to be passed to the reader to store 01797 * any internal state. 01798 * @param logger - If non-NULL, the function to be used to write log messages 01799 * and warnings. 01800 * @param loggerArg - A generic argument to be passed to the logger to store 01801 * any internal state. 01802 * @param startLoc - The address (in the reader's address space) of the first 01803 * byte in the instruction. 01804 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to 01805 * decode the instruction in. 01806 * @return - 0 if the instruction's memory could be read; nonzero if 01807 * not. 01808 */ 01809 int llvm::X86Disassembler::decodeInstruction( 01810 struct InternalInstruction *insn, byteReader_t reader, 01811 const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, 01812 uint64_t startLoc, DisassemblerMode mode) { 01813 memset(insn, 0, sizeof(struct InternalInstruction)); 01814 01815 insn->reader = reader; 01816 insn->readerArg = readerArg; 01817 insn->dlog = logger; 01818 insn->dlogArg = loggerArg; 01819 insn->startLocation = startLoc; 01820 insn->readerCursor = startLoc; 01821 insn->mode = mode; 01822 insn->numImmediatesConsumed = 0; 01823 01824 if (readPrefixes(insn) || 01825 readOpcode(insn) || 01826 getID(insn, miiArg) || 01827 insn->instructionID == 0 || 01828 readOperands(insn)) 01829 return -1; 01830 01831 insn->operands = x86OperandSets[insn->spec->operands]; 01832 01833 insn->length = insn->readerCursor - insn->startLocation; 01834 01835 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", 01836 startLoc, insn->readerCursor, insn->length); 01837 01838 if (insn->length > 15) 01839 dbgprintf(insn, "Instruction exceeds 15-byte limit"); 01840 01841 return 0; 01842 }