Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
xz_dec_bcj.c
Go to the documentation of this file.
1 /*
2  * Branch/Call/Jump (BCJ) filter decoders
3  *
4  * Authors: Lasse Collin <[email protected]>
5  * Igor Pavlov <http://7-zip.org/>
6  *
7  * This file has been put into the public domain.
8  * You can do whatever you want with this file.
9  */
10 
11 #include "xz_private.h"
12 
13 /*
14  * The rest of the file is inside this ifdef. It makes things a little more
15  * convenient when building without support for any BCJ filters.
16  */
17 #ifdef XZ_DEC_BCJ
18 
19 struct xz_dec_bcj {
20  /* Type of the BCJ filter being used */
21  enum {
22  BCJ_X86 = 4, /* x86 or x86-64 */
23  BCJ_POWERPC = 5, /* Big endian only */
24  BCJ_IA64 = 6, /* Big or little endian */
25  BCJ_ARM = 7, /* Little endian only */
26  BCJ_ARMTHUMB = 8, /* Little endian only */
27  BCJ_SPARC = 9 /* Big or little endian */
28  } type;
29 
30  /*
31  * Return value of the next filter in the chain. We need to preserve
32  * this information across calls, because we must not call the next
33  * filter anymore once it has returned XZ_STREAM_END.
34  */
35  enum xz_ret ret;
36 
37  /* True if we are operating in single-call mode. */
38  bool single_call;
39 
40  /*
41  * Absolute position relative to the beginning of the uncompressed
42  * data (in a single .xz Block). We care only about the lowest 32
43  * bits so this doesn't need to be uint64_t even with big files.
44  */
45  uint32_t pos;
46 
47  /* x86 filter state */
48  uint32_t x86_prev_mask;
49 
50  /* Temporary space to hold the variables from struct xz_buf */
51  uint8_t *out;
52  size_t out_pos;
53  size_t out_size;
54 
55  struct {
56  /* Amount of already filtered data in the beginning of buf */
57  size_t filtered;
58 
59  /* Total amount of data currently stored in buf */
60  size_t size;
61 
62  /*
63  * Buffer to hold a mix of filtered and unfiltered data. This
64  * needs to be big enough to hold Alignment + 2 * Look-ahead:
65  *
66  * Type Alignment Look-ahead
67  * x86 1 4
68  * PowerPC 4 0
69  * IA-64 16 0
70  * ARM 4 0
71  * ARM-Thumb 2 2
72  * SPARC 4 0
73  */
74  uint8_t buf[16];
75  } temp;
76 };
77 
78 #ifdef XZ_DEC_X86
79 /*
80  * This is used to test the most significant byte of a memory address
81  * in an x86 instruction.
82  */
83 static inline int bcj_x86_test_msbyte(uint8_t b)
84 {
85  return b == 0x00 || b == 0xFF;
86 }
87 
88 static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
89 {
90  static const bool mask_to_allowed_status[8]
91  = { true, true, true, false, true, false, false, false };
92 
93  static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
94 
95  size_t i;
96  size_t prev_pos = (size_t)-1;
97  uint32_t prev_mask = s->x86_prev_mask;
98  uint32_t src;
99  uint32_t dest;
100  uint32_t j;
101  uint8_t b;
102 
103  if (size <= 4)
104  return 0;
105 
106  size -= 4;
107  for (i = 0; i < size; ++i) {
108  if ((buf[i] & 0xFE) != 0xE8)
109  continue;
110 
111  prev_pos = i - prev_pos;
112  if (prev_pos > 3) {
113  prev_mask = 0;
114  } else {
115  prev_mask = (prev_mask << (prev_pos - 1)) & 7;
116  if (prev_mask != 0) {
117  b = buf[i + 4 - mask_to_bit_num[prev_mask]];
118  if (!mask_to_allowed_status[prev_mask]
119  || bcj_x86_test_msbyte(b)) {
120  prev_pos = i;
121  prev_mask = (prev_mask << 1) | 1;
122  continue;
123  }
124  }
125  }
126 
127  prev_pos = i;
128 
129  if (bcj_x86_test_msbyte(buf[i + 4])) {
130  src = get_unaligned_le32(buf + i + 1);
131  while (true) {
132  dest = src - (s->pos + (uint32_t)i + 5);
133  if (prev_mask == 0)
134  break;
135 
136  j = mask_to_bit_num[prev_mask] * 8;
137  b = (uint8_t)(dest >> (24 - j));
138  if (!bcj_x86_test_msbyte(b))
139  break;
140 
141  src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
142  }
143 
144  dest &= 0x01FFFFFF;
145  dest |= (uint32_t)0 - (dest & 0x01000000);
146  put_unaligned_le32(dest, buf + i + 1);
147  i += 4;
148  } else {
149  prev_mask = (prev_mask << 1) | 1;
150  }
151  }
152 
153  prev_pos = i - prev_pos;
154  s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
155  return i;
156 }
157 #endif
158 
159 #ifdef XZ_DEC_POWERPC
160 static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
161 {
162  size_t i;
163  uint32_t instr;
164 
165  for (i = 0; i + 4 <= size; i += 4) {
166  instr = get_unaligned_be32(buf + i);
167  if ((instr & 0xFC000003) == 0x48000001) {
168  instr &= 0x03FFFFFC;
169  instr -= s->pos + (uint32_t)i;
170  instr &= 0x03FFFFFC;
171  instr |= 0x48000001;
172  put_unaligned_be32(instr, buf + i);
173  }
174  }
175 
176  return i;
177 }
178 #endif
179 
180 #ifdef XZ_DEC_IA64
181 static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
182 {
183  static const uint8_t branch_table[32] = {
184  0, 0, 0, 0, 0, 0, 0, 0,
185  0, 0, 0, 0, 0, 0, 0, 0,
186  4, 4, 6, 6, 0, 0, 7, 7,
187  4, 4, 0, 0, 4, 4, 0, 0
188  };
189 
190  /*
191  * The local variables take a little bit stack space, but it's less
192  * than what LZMA2 decoder takes, so it doesn't make sense to reduce
193  * stack usage here without doing that for the LZMA2 decoder too.
194  */
195 
196  /* Loop counters */
197  size_t i;
198  size_t j;
199 
200  /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
201  uint32_t slot;
202 
203  /* Bitwise offset of the instruction indicated by slot */
204  uint32_t bit_pos;
205 
206  /* bit_pos split into byte and bit parts */
207  uint32_t byte_pos;
208  uint32_t bit_res;
209 
210  /* Address part of an instruction */
211  uint32_t addr;
212 
213  /* Mask used to detect which instructions to convert */
214  uint32_t mask;
215 
216  /* 41-bit instruction stored somewhere in the lowest 48 bits */
217  uint64_t instr;
218 
219  /* Instruction normalized with bit_res for easier manipulation */
220  uint64_t norm;
221 
222  for (i = 0; i + 16 <= size; i += 16) {
223  mask = branch_table[buf[i] & 0x1F];
224  for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
225  if (((mask >> slot) & 1) == 0)
226  continue;
227 
228  byte_pos = bit_pos >> 3;
229  bit_res = bit_pos & 7;
230  instr = 0;
231  for (j = 0; j < 6; ++j)
232  instr |= (uint64_t)(buf[i + j + byte_pos])
233  << (8 * j);
234 
235  norm = instr >> bit_res;
236 
237  if (((norm >> 37) & 0x0F) == 0x05
238  && ((norm >> 9) & 0x07) == 0) {
239  addr = (norm >> 13) & 0x0FFFFF;
240  addr |= ((uint32_t)(norm >> 36) & 1) << 20;
241  addr <<= 4;
242  addr -= s->pos + (uint32_t)i;
243  addr >>= 4;
244 
245  norm &= ~((uint64_t)0x8FFFFF << 13);
246  norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
247  norm |= (uint64_t)(addr & 0x100000)
248  << (36 - 20);
249 
250  instr &= (1 << bit_res) - 1;
251  instr |= norm << bit_res;
252 
253  for (j = 0; j < 6; j++)
254  buf[i + j + byte_pos]
255  = (uint8_t)(instr >> (8 * j));
256  }
257  }
258  }
259 
260  return i;
261 }
262 #endif
263 
264 #ifdef XZ_DEC_ARM
265 static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
266 {
267  size_t i;
268  uint32_t addr;
269 
270  for (i = 0; i + 4 <= size; i += 4) {
271  if (buf[i + 3] == 0xEB) {
272  addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
273  | ((uint32_t)buf[i + 2] << 16);
274  addr <<= 2;
275  addr -= s->pos + (uint32_t)i + 8;
276  addr >>= 2;
277  buf[i] = (uint8_t)addr;
278  buf[i + 1] = (uint8_t)(addr >> 8);
279  buf[i + 2] = (uint8_t)(addr >> 16);
280  }
281  }
282 
283  return i;
284 }
285 #endif
286 
287 #ifdef XZ_DEC_ARMTHUMB
288 static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
289 {
290  size_t i;
291  uint32_t addr;
292 
293  for (i = 0; i + 4 <= size; i += 2) {
294  if ((buf[i + 1] & 0xF8) == 0xF0
295  && (buf[i + 3] & 0xF8) == 0xF8) {
296  addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
297  | ((uint32_t)buf[i] << 11)
298  | (((uint32_t)buf[i + 3] & 0x07) << 8)
299  | (uint32_t)buf[i + 2];
300  addr <<= 1;
301  addr -= s->pos + (uint32_t)i + 4;
302  addr >>= 1;
303  buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
304  buf[i] = (uint8_t)(addr >> 11);
305  buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
306  buf[i + 2] = (uint8_t)addr;
307  i += 2;
308  }
309  }
310 
311  return i;
312 }
313 #endif
314 
315 #ifdef XZ_DEC_SPARC
316 static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
317 {
318  size_t i;
319  uint32_t instr;
320 
321  for (i = 0; i + 4 <= size; i += 4) {
322  instr = get_unaligned_be32(buf + i);
323  if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
324  instr <<= 2;
325  instr -= s->pos + (uint32_t)i;
326  instr >>= 2;
327  instr = ((uint32_t)0x40000000 - (instr & 0x400000))
328  | 0x40000000 | (instr & 0x3FFFFF);
329  put_unaligned_be32(instr, buf + i);
330  }
331  }
332 
333  return i;
334 }
335 #endif
336 
337 /*
338  * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
339  * of data that got filtered.
340  *
341  * NOTE: This is implemented as a switch statement to avoid using function
342  * pointers, which could be problematic in the kernel boot code, which must
343  * avoid pointers to static data (at least on x86).
344  */
345 static void bcj_apply(struct xz_dec_bcj *s,
346  uint8_t *buf, size_t *pos, size_t size)
347 {
348  size_t filtered;
349 
350  buf += *pos;
351  size -= *pos;
352 
353  switch (s->type) {
354 #ifdef XZ_DEC_X86
355  case BCJ_X86:
356  filtered = bcj_x86(s, buf, size);
357  break;
358 #endif
359 #ifdef XZ_DEC_POWERPC
360  case BCJ_POWERPC:
361  filtered = bcj_powerpc(s, buf, size);
362  break;
363 #endif
364 #ifdef XZ_DEC_IA64
365  case BCJ_IA64:
366  filtered = bcj_ia64(s, buf, size);
367  break;
368 #endif
369 #ifdef XZ_DEC_ARM
370  case BCJ_ARM:
371  filtered = bcj_arm(s, buf, size);
372  break;
373 #endif
374 #ifdef XZ_DEC_ARMTHUMB
375  case BCJ_ARMTHUMB:
376  filtered = bcj_armthumb(s, buf, size);
377  break;
378 #endif
379 #ifdef XZ_DEC_SPARC
380  case BCJ_SPARC:
381  filtered = bcj_sparc(s, buf, size);
382  break;
383 #endif
384  default:
385  /* Never reached but silence compiler warnings. */
386  filtered = 0;
387  break;
388  }
389 
390  *pos += filtered;
391  s->pos += filtered;
392 }
393 
394 /*
395  * Flush pending filtered data from temp to the output buffer.
396  * Move the remaining mixture of possibly filtered and unfiltered
397  * data to the beginning of temp.
398  */
399 static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
400 {
401  size_t copy_size;
402 
403  copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
404  memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
405  b->out_pos += copy_size;
406 
407  s->temp.filtered -= copy_size;
408  s->temp.size -= copy_size;
409  memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
410 }
411 
412 /*
413  * The BCJ filter functions are primitive in sense that they process the
414  * data in chunks of 1-16 bytes. To hide this issue, this function does
415  * some buffering.
416  */
417 XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
418  struct xz_dec_lzma2 *lzma2,
419  struct xz_buf *b)
420 {
421  size_t out_start;
422 
423  /*
424  * Flush pending already filtered data to the output buffer. Return
425  * immediatelly if we couldn't flush everything, or if the next
426  * filter in the chain had already returned XZ_STREAM_END.
427  */
428  if (s->temp.filtered > 0) {
429  bcj_flush(s, b);
430  if (s->temp.filtered > 0)
431  return XZ_OK;
432 
433  if (s->ret == XZ_STREAM_END)
434  return XZ_STREAM_END;
435  }
436 
437  /*
438  * If we have more output space than what is currently pending in
439  * temp, copy the unfiltered data from temp to the output buffer
440  * and try to fill the output buffer by decoding more data from the
441  * next filter in the chain. Apply the BCJ filter on the new data
442  * in the output buffer. If everything cannot be filtered, copy it
443  * to temp and rewind the output buffer position accordingly.
444  *
445  * This needs to be always run when temp.size == 0 to handle a special
446  * case where the output buffer is full and the next filter has no
447  * more output coming but hasn't returned XZ_STREAM_END yet.
448  */
449  if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
450  out_start = b->out_pos;
451  memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
452  b->out_pos += s->temp.size;
453 
454  s->ret = xz_dec_lzma2_run(lzma2, b);
455  if (s->ret != XZ_STREAM_END
456  && (s->ret != XZ_OK || s->single_call))
457  return s->ret;
458 
459  bcj_apply(s, b->out, &out_start, b->out_pos);
460 
461  /*
462  * As an exception, if the next filter returned XZ_STREAM_END,
463  * we can do that too, since the last few bytes that remain
464  * unfiltered are meant to remain unfiltered.
465  */
466  if (s->ret == XZ_STREAM_END)
467  return XZ_STREAM_END;
468 
469  s->temp.size = b->out_pos - out_start;
470  b->out_pos -= s->temp.size;
471  memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
472 
473  /*
474  * If there wasn't enough input to the next filter to fill
475  * the output buffer with unfiltered data, there's no point
476  * to try decoding more data to temp.
477  */
478  if (b->out_pos + s->temp.size < b->out_size)
479  return XZ_OK;
480  }
481 
482  /*
483  * We have unfiltered data in temp. If the output buffer isn't full
484  * yet, try to fill the temp buffer by decoding more data from the
485  * next filter. Apply the BCJ filter on temp. Then we hopefully can
486  * fill the actual output buffer by copying filtered data from temp.
487  * A mix of filtered and unfiltered data may be left in temp; it will
488  * be taken care on the next call to this function.
489  */
490  if (b->out_pos < b->out_size) {
491  /* Make b->out{,_pos,_size} temporarily point to s->temp. */
492  s->out = b->out;
493  s->out_pos = b->out_pos;
494  s->out_size = b->out_size;
495  b->out = s->temp.buf;
496  b->out_pos = s->temp.size;
497  b->out_size = sizeof(s->temp.buf);
498 
499  s->ret = xz_dec_lzma2_run(lzma2, b);
500 
501  s->temp.size = b->out_pos;
502  b->out = s->out;
503  b->out_pos = s->out_pos;
504  b->out_size = s->out_size;
505 
506  if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
507  return s->ret;
508 
509  bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
510 
511  /*
512  * If the next filter returned XZ_STREAM_END, we mark that
513  * everything is filtered, since the last unfiltered bytes
514  * of the stream are meant to be left as is.
515  */
516  if (s->ret == XZ_STREAM_END)
517  s->temp.filtered = s->temp.size;
518 
519  bcj_flush(s, b);
520  if (s->temp.filtered > 0)
521  return XZ_OK;
522  }
523 
524  return s->ret;
525 }
526 
527 XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
528 {
529  struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
530  if (s != NULL)
531  s->single_call = single_call;
532 
533  return s;
534 }
535 
536 XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
537 {
538  switch (id) {
539 #ifdef XZ_DEC_X86
540  case BCJ_X86:
541 #endif
542 #ifdef XZ_DEC_POWERPC
543  case BCJ_POWERPC:
544 #endif
545 #ifdef XZ_DEC_IA64
546  case BCJ_IA64:
547 #endif
548 #ifdef XZ_DEC_ARM
549  case BCJ_ARM:
550 #endif
551 #ifdef XZ_DEC_ARMTHUMB
552  case BCJ_ARMTHUMB:
553 #endif
554 #ifdef XZ_DEC_SPARC
555  case BCJ_SPARC:
556 #endif
557  break;
558 
559  default:
560  /* Unsupported Filter ID */
561  return XZ_OPTIONS_ERROR;
562  }
563 
564  s->type = id;
565  s->ret = XZ_OK;
566  s->pos = 0;
567  s->x86_prev_mask = 0;
568  s->temp.filtered = 0;
569  s->temp.size = 0;
570 
571  return XZ_OK;
572 }
573 
574 #endif