Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
insn.c
Go to the documentation of this file.
1 /*
2  * x86 instruction analysis
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  *
18  * Copyright (C) IBM Corporation, 2002, 2004, 2009
19  */
20 
21 #ifdef __KERNEL__
22 #include <linux/string.h>
23 #else
24 #include <string.h>
25 #endif
26 #include <asm/inat.h>
27 #include <asm/insn.h>
28 
29 /* Verify next sizeof(t) bytes can be on the same instruction */
30 #define validate_next(t, insn, n) \
31  ((insn)->next_byte + sizeof(t) + n - (insn)->kaddr <= MAX_INSN_SIZE)
32 
33 #define __get_next(t, insn) \
34  ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
35 
36 #define __peek_nbyte_next(t, insn, n) \
37  ({ t r = *(t*)((insn)->next_byte + n); r; })
38 
39 #define get_next(t, insn) \
40  ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
41 
42 #define peek_nbyte_next(t, insn, n) \
43  ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
44 
45 #define peek_next(t, insn) peek_nbyte_next(t, insn, 0)
46 
53 void insn_init(struct insn *insn, const void *kaddr, int x86_64)
54 {
55  memset(insn, 0, sizeof(*insn));
56  insn->kaddr = kaddr;
57  insn->next_byte = kaddr;
58  insn->x86_64 = x86_64 ? 1 : 0;
59  insn->opnd_bytes = 4;
60  if (x86_64)
61  insn->addr_bytes = 8;
62  else
63  insn->addr_bytes = 4;
64 }
65 
75 {
76  struct insn_field *prefixes = &insn->prefixes;
78  insn_byte_t b, lb;
79  int i, nb;
80 
81  if (prefixes->got)
82  return;
83 
84  nb = 0;
85  lb = 0;
86  b = peek_next(insn_byte_t, insn);
87  attr = inat_get_opcode_attribute(b);
88  while (inat_is_legacy_prefix(attr)) {
89  /* Skip if same prefix */
90  for (i = 0; i < nb; i++)
91  if (prefixes->bytes[i] == b)
92  goto found;
93  if (nb == 4)
94  /* Invalid instruction */
95  break;
96  prefixes->bytes[nb++] = b;
97  if (inat_is_address_size_prefix(attr)) {
98  /* address size switches 2/4 or 4/8 */
99  if (insn->x86_64)
100  insn->addr_bytes ^= 12;
101  else
102  insn->addr_bytes ^= 6;
103  } else if (inat_is_operand_size_prefix(attr)) {
104  /* oprand size switches 2/4 */
105  insn->opnd_bytes ^= 6;
106  }
107 found:
108  prefixes->nbytes++;
109  insn->next_byte++;
110  lb = b;
111  b = peek_next(insn_byte_t, insn);
112  attr = inat_get_opcode_attribute(b);
113  }
114  /* Set the last prefix */
115  if (lb && lb != insn->prefixes.bytes[3]) {
116  if (unlikely(insn->prefixes.bytes[3])) {
117  /* Swap the last prefix */
118  b = insn->prefixes.bytes[3];
119  for (i = 0; i < nb; i++)
120  if (prefixes->bytes[i] == lb)
121  prefixes->bytes[i] = b;
122  }
123  insn->prefixes.bytes[3] = lb;
124  }
125 
126  /* Decode REX prefix */
127  if (insn->x86_64) {
128  b = peek_next(insn_byte_t, insn);
129  attr = inat_get_opcode_attribute(b);
130  if (inat_is_rex_prefix(attr)) {
131  insn->rex_prefix.value = b;
132  insn->rex_prefix.nbytes = 1;
133  insn->next_byte++;
134  if (X86_REX_W(b))
135  /* REX.W overrides opnd_size */
136  insn->opnd_bytes = 8;
137  }
138  }
139  insn->rex_prefix.got = 1;
140 
141  /* Decode VEX prefix */
142  b = peek_next(insn_byte_t, insn);
143  attr = inat_get_opcode_attribute(b);
144  if (inat_is_vex_prefix(attr)) {
145  insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
146  if (!insn->x86_64) {
147  /*
148  * In 32-bits mode, if the [7:6] bits (mod bits of
149  * ModRM) on the second byte are not 11b, it is
150  * LDS or LES.
151  */
152  if (X86_MODRM_MOD(b2) != 3)
153  goto vex_end;
154  }
155  insn->vex_prefix.bytes[0] = b;
156  insn->vex_prefix.bytes[1] = b2;
157  if (inat_is_vex3_prefix(attr)) {
158  b2 = peek_nbyte_next(insn_byte_t, insn, 2);
159  insn->vex_prefix.bytes[2] = b2;
160  insn->vex_prefix.nbytes = 3;
161  insn->next_byte += 3;
162  if (insn->x86_64 && X86_VEX_W(b2))
163  /* VEX.W overrides opnd_size */
164  insn->opnd_bytes = 8;
165  } else {
166  insn->vex_prefix.nbytes = 2;
167  insn->next_byte += 2;
168  }
169  }
170 vex_end:
171  insn->vex_prefix.got = 1;
172 
173  prefixes->got = 1;
174 
175 err_out:
176  return;
177 }
178 
189 void insn_get_opcode(struct insn *insn)
190 {
191  struct insn_field *opcode = &insn->opcode;
192  insn_byte_t op;
193  int pfx_id;
194  if (opcode->got)
195  return;
196  if (!insn->prefixes.got)
197  insn_get_prefixes(insn);
198 
199  /* Get first opcode */
200  op = get_next(insn_byte_t, insn);
201  opcode->bytes[0] = op;
202  opcode->nbytes = 1;
203 
204  /* Check if there is VEX prefix or not */
205  if (insn_is_avx(insn)) {
206  insn_byte_t m, p;
207  m = insn_vex_m_bits(insn);
208  p = insn_vex_p_bits(insn);
209  insn->attr = inat_get_avx_attribute(op, m, p);
210  if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
211  insn->attr = 0; /* This instruction is bad */
212  goto end; /* VEX has only 1 byte for opcode */
213  }
214 
215  insn->attr = inat_get_opcode_attribute(op);
216  while (inat_is_escape(insn->attr)) {
217  /* Get escaped opcode */
218  op = get_next(insn_byte_t, insn);
219  opcode->bytes[opcode->nbytes++] = op;
220  pfx_id = insn_last_prefix_id(insn);
221  insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
222  }
223  if (inat_must_vex(insn->attr))
224  insn->attr = 0; /* This instruction is bad */
225 end:
226  opcode->got = 1;
227 
228 err_out:
229  return;
230 }
231 
240 void insn_get_modrm(struct insn *insn)
241 {
242  struct insn_field *modrm = &insn->modrm;
243  insn_byte_t pfx_id, mod;
244  if (modrm->got)
245  return;
246  if (!insn->opcode.got)
247  insn_get_opcode(insn);
248 
249  if (inat_has_modrm(insn->attr)) {
250  mod = get_next(insn_byte_t, insn);
251  modrm->value = mod;
252  modrm->nbytes = 1;
253  if (inat_is_group(insn->attr)) {
254  pfx_id = insn_last_prefix_id(insn);
255  insn->attr = inat_get_group_attribute(mod, pfx_id,
256  insn->attr);
257  if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
258  insn->attr = 0; /* This is bad */
259  }
260  }
261 
262  if (insn->x86_64 && inat_is_force64(insn->attr))
263  insn->opnd_bytes = 8;
264  modrm->got = 1;
265 
266 err_out:
267  return;
268 }
269 
270 
279 {
280  struct insn_field *modrm = &insn->modrm;
281 
282  if (!insn->x86_64)
283  return 0;
284  if (!modrm->got)
285  insn_get_modrm(insn);
286  /*
287  * For rip-relative instructions, the mod field (top 2 bits)
288  * is zero and the r/m field (bottom 3 bits) is 0x5.
289  */
290  return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
291 }
292 
300 void insn_get_sib(struct insn *insn)
301 {
302  insn_byte_t modrm;
303 
304  if (insn->sib.got)
305  return;
306  if (!insn->modrm.got)
307  insn_get_modrm(insn);
308  if (insn->modrm.nbytes) {
309  modrm = (insn_byte_t)insn->modrm.value;
310  if (insn->addr_bytes != 2 &&
311  X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
312  insn->sib.value = get_next(insn_byte_t, insn);
313  insn->sib.nbytes = 1;
314  }
315  }
316  insn->sib.got = 1;
317 
318 err_out:
319  return;
320 }
321 
322 
332 {
333  insn_byte_t mod, rm, base;
334 
335  if (insn->displacement.got)
336  return;
337  if (!insn->sib.got)
338  insn_get_sib(insn);
339  if (insn->modrm.nbytes) {
340  /*
341  * Interpreting the modrm byte:
342  * mod = 00 - no displacement fields (exceptions below)
343  * mod = 01 - 1-byte displacement field
344  * mod = 10 - displacement field is 4 bytes, or 2 bytes if
345  * address size = 2 (0x67 prefix in 32-bit mode)
346  * mod = 11 - no memory operand
347  *
348  * If address size = 2...
349  * mod = 00, r/m = 110 - displacement field is 2 bytes
350  *
351  * If address size != 2...
352  * mod != 11, r/m = 100 - SIB byte exists
353  * mod = 00, SIB base = 101 - displacement field is 4 bytes
354  * mod = 00, r/m = 101 - rip-relative addressing, displacement
355  * field is 4 bytes
356  */
357  mod = X86_MODRM_MOD(insn->modrm.value);
358  rm = X86_MODRM_RM(insn->modrm.value);
359  base = X86_SIB_BASE(insn->sib.value);
360  if (mod == 3)
361  goto out;
362  if (mod == 1) {
363  insn->displacement.value = get_next(char, insn);
364  insn->displacement.nbytes = 1;
365  } else if (insn->addr_bytes == 2) {
366  if ((mod == 0 && rm == 6) || mod == 2) {
367  insn->displacement.value =
368  get_next(short, insn);
369  insn->displacement.nbytes = 2;
370  }
371  } else {
372  if ((mod == 0 && rm == 5) || mod == 2 ||
373  (mod == 0 && base == 5)) {
374  insn->displacement.value = get_next(int, insn);
375  insn->displacement.nbytes = 4;
376  }
377  }
378  }
379 out:
380  insn->displacement.got = 1;
381 
382 err_out:
383  return;
384 }
385 
386 /* Decode moffset16/32/64. Return 0 if failed */
387 static int __get_moffset(struct insn *insn)
388 {
389  switch (insn->addr_bytes) {
390  case 2:
391  insn->moffset1.value = get_next(short, insn);
392  insn->moffset1.nbytes = 2;
393  break;
394  case 4:
395  insn->moffset1.value = get_next(int, insn);
396  insn->moffset1.nbytes = 4;
397  break;
398  case 8:
399  insn->moffset1.value = get_next(int, insn);
400  insn->moffset1.nbytes = 4;
401  insn->moffset2.value = get_next(int, insn);
402  insn->moffset2.nbytes = 4;
403  break;
404  default: /* opnd_bytes must be modified manually */
405  goto err_out;
406  }
407  insn->moffset1.got = insn->moffset2.got = 1;
408 
409  return 1;
410 
411 err_out:
412  return 0;
413 }
414 
415 /* Decode imm v32(Iz). Return 0 if failed */
416 static int __get_immv32(struct insn *insn)
417 {
418  switch (insn->opnd_bytes) {
419  case 2:
420  insn->immediate.value = get_next(short, insn);
421  insn->immediate.nbytes = 2;
422  break;
423  case 4:
424  case 8:
425  insn->immediate.value = get_next(int, insn);
426  insn->immediate.nbytes = 4;
427  break;
428  default: /* opnd_bytes must be modified manually */
429  goto err_out;
430  }
431 
432  return 1;
433 
434 err_out:
435  return 0;
436 }
437 
438 /* Decode imm v64(Iv/Ov), Return 0 if failed */
439 static int __get_immv(struct insn *insn)
440 {
441  switch (insn->opnd_bytes) {
442  case 2:
443  insn->immediate1.value = get_next(short, insn);
444  insn->immediate1.nbytes = 2;
445  break;
446  case 4:
447  insn->immediate1.value = get_next(int, insn);
448  insn->immediate1.nbytes = 4;
449  break;
450  case 8:
451  insn->immediate1.value = get_next(int, insn);
452  insn->immediate1.nbytes = 4;
453  insn->immediate2.value = get_next(int, insn);
454  insn->immediate2.nbytes = 4;
455  break;
456  default: /* opnd_bytes must be modified manually */
457  goto err_out;
458  }
459  insn->immediate1.got = insn->immediate2.got = 1;
460 
461  return 1;
462 err_out:
463  return 0;
464 }
465 
466 /* Decode ptr16:16/32(Ap) */
467 static int __get_immptr(struct insn *insn)
468 {
469  switch (insn->opnd_bytes) {
470  case 2:
471  insn->immediate1.value = get_next(short, insn);
472  insn->immediate1.nbytes = 2;
473  break;
474  case 4:
475  insn->immediate1.value = get_next(int, insn);
476  insn->immediate1.nbytes = 4;
477  break;
478  case 8:
479  /* ptr16:64 is not exist (no segment) */
480  return 0;
481  default: /* opnd_bytes must be modified manually */
482  goto err_out;
483  }
484  insn->immediate2.value = get_next(unsigned short, insn);
485  insn->immediate2.nbytes = 2;
486  insn->immediate1.got = insn->immediate2.got = 1;
487 
488  return 1;
489 err_out:
490  return 0;
491 }
492 
502 void insn_get_immediate(struct insn *insn)
503 {
504  if (insn->immediate.got)
505  return;
506  if (!insn->displacement.got)
507  insn_get_displacement(insn);
508 
509  if (inat_has_moffset(insn->attr)) {
510  if (!__get_moffset(insn))
511  goto err_out;
512  goto done;
513  }
514 
515  if (!inat_has_immediate(insn->attr))
516  /* no immediates */
517  goto done;
518 
519  switch (inat_immediate_size(insn->attr)) {
520  case INAT_IMM_BYTE:
521  insn->immediate.value = get_next(char, insn);
522  insn->immediate.nbytes = 1;
523  break;
524  case INAT_IMM_WORD:
525  insn->immediate.value = get_next(short, insn);
526  insn->immediate.nbytes = 2;
527  break;
528  case INAT_IMM_DWORD:
529  insn->immediate.value = get_next(int, insn);
530  insn->immediate.nbytes = 4;
531  break;
532  case INAT_IMM_QWORD:
533  insn->immediate1.value = get_next(int, insn);
534  insn->immediate1.nbytes = 4;
535  insn->immediate2.value = get_next(int, insn);
536  insn->immediate2.nbytes = 4;
537  break;
538  case INAT_IMM_PTR:
539  if (!__get_immptr(insn))
540  goto err_out;
541  break;
542  case INAT_IMM_VWORD32:
543  if (!__get_immv32(insn))
544  goto err_out;
545  break;
546  case INAT_IMM_VWORD:
547  if (!__get_immv(insn))
548  goto err_out;
549  break;
550  default:
551  /* Here, insn must have an immediate, but failed */
552  goto err_out;
553  }
554  if (inat_has_second_immediate(insn->attr)) {
555  insn->immediate2.value = get_next(char, insn);
556  insn->immediate2.nbytes = 1;
557  }
558 done:
559  insn->immediate.got = 1;
560 
561 err_out:
562  return;
563 }
564 
572 void insn_get_length(struct insn *insn)
573 {
574  if (insn->length)
575  return;
576  if (!insn->immediate.got)
577  insn_get_immediate(insn);
578  insn->length = (unsigned char)((unsigned long)insn->next_byte
579  - (unsigned long)insn->kaddr);
580 }