Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
decompress_bunzip2.c
Go to the documentation of this file.
1 /* Small bzip2 deflate implementation, by Rob Landley ([email protected]).
2 
3  Based on bzip2 decompression code by Julian R Seward ([email protected]),
4  which also acknowledges contributions by Mike Burrows, David Wheeler,
5  Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
6  Robert Sedgewick, and Jon L. Bentley.
7 
8  This code is licensed under the LGPLv2:
9  LGPL (http://www.gnu.org/copyleft/lgpl.html
10 */
11 
12 /*
13  Size and speed optimizations by Manuel Novoa III ([email protected]).
14 
15  More efficient reading of Huffman codes, a streamlined read_bunzip()
16  function, and various other tweaks. In (limited) tests, approximately
17  20% faster than bzcat on x86 and about 10% faster on arm.
18 
19  Note that about 2/3 of the time is spent in read_unzip() reversing
20  the Burrows-Wheeler transformation. Much of that time is delay
21  resulting from cache misses.
22 
23  I would ask that anyone benefiting from this work, especially those
24  using it in commercial products, consider making a donation to my local
25  non-profit hospice organization in the name of the woman I loved, who
26  passed away Feb. 12, 2003.
27 
28  In memory of Toni W. Hagan
29 
30  Hospice of Acadiana, Inc.
31  2600 Johnston St., Suite 200
32  Lafayette, LA 70503-3240
33 
34  Phone (337) 232-1234 or 1-800-738-2226
35  Fax (337) 232-1297
36 
37  http://www.hospiceacadiana.com/
38 
39  Manuel
40  */
41 
42 /*
43  Made it fit for running in Linux Kernel by Alain Knaff ([email protected])
44 */
45 
46 
47 #ifdef STATIC
48 #define PREBOOT
49 #else
51 #endif /* STATIC */
52 
53 #include <linux/decompress/mm.h>
54 
55 #ifndef INT_MAX
56 #define INT_MAX 0x7fffffff
57 #endif
58 
59 /* Constants for Huffman coding */
60 #define MAX_GROUPS 6
61 #define GROUP_SIZE 50 /* 64 would have been more efficient */
62 #define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */
63 #define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */
64 #define SYMBOL_RUNA 0
65 #define SYMBOL_RUNB 1
66 
67 /* Status return values */
68 #define RETVAL_OK 0
69 #define RETVAL_LAST_BLOCK (-1)
70 #define RETVAL_NOT_BZIP_DATA (-2)
71 #define RETVAL_UNEXPECTED_INPUT_EOF (-3)
72 #define RETVAL_UNEXPECTED_OUTPUT_EOF (-4)
73 #define RETVAL_DATA_ERROR (-5)
74 #define RETVAL_OUT_OF_MEMORY (-6)
75 #define RETVAL_OBSOLETE_INPUT (-7)
76 
77 /* Other housekeeping constants */
78 #define BZIP2_IOBUF_SIZE 4096
79 
80 /* This is what we know about each Huffman coding group */
81 struct group_data {
82  /* We have an extra slot at the end of limit[] for a sentinal value. */
86  int minLen, maxLen;
87 };
88 
89 /* Structure holding all the housekeeping data, including IO buffers and
90  memory that persists between calls to bunzip */
91 struct bunzip_data {
92  /* State for interrupting output loop */
94  /* I/O tracking data (file handles, buffers, positions, etc.) */
95  int (*fill)(void*, unsigned int);
96  int inbufCount, inbufPos /*, outbufPos*/;
97  unsigned char *inbuf /*,*outbuf*/;
98  unsigned int inbufBitCount, inbufBits;
99  /* The CRC values stored in the block header and calculated from the
100  data */
101  unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC;
102  /* Intermediate buffer and its size (in bytes) */
103  unsigned int *dbuf, dbufSize;
104  /* These things are a bit too big to go on the stack */
105  unsigned char selectors[32768]; /* nSelectors = 15 bits */
106  struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */
107  int io_error; /* non-zero if we have IO error */
108  int byteCount[256];
109  unsigned char symToByte[256], mtfSymbol[256];
110 };
111 
112 
113 /* Return the next nnn bits of input. All reads from the compressed input
114  are done through this function. All reads are big endian */
115 static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted)
116 {
117  unsigned int bits = 0;
118 
119  /* If we need to get more data from the byte buffer, do so.
120  (Loop getting one byte at a time to enforce endianness and avoid
121  unaligned access.) */
122  while (bd->inbufBitCount < bits_wanted) {
123  /* If we need to read more data from file into byte buffer, do
124  so */
125  if (bd->inbufPos == bd->inbufCount) {
126  if (bd->io_error)
127  return 0;
128  bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE);
129  if (bd->inbufCount <= 0) {
131  return 0;
132  }
133  bd->inbufPos = 0;
134  }
135  /* Avoid 32-bit overflow (dump bit buffer to top of output) */
136  if (bd->inbufBitCount >= 24) {
137  bits = bd->inbufBits&((1 << bd->inbufBitCount)-1);
138  bits_wanted -= bd->inbufBitCount;
139  bits <<= bits_wanted;
140  bd->inbufBitCount = 0;
141  }
142  /* Grab next 8 bits of input from buffer. */
143  bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
144  bd->inbufBitCount += 8;
145  }
146  /* Calculate result */
147  bd->inbufBitCount -= bits_wanted;
148  bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1);
149 
150  return bits;
151 }
152 
153 /* Unpacks the next block and sets up for the inverse burrows-wheeler step. */
154 
155 static int INIT get_next_block(struct bunzip_data *bd)
156 {
157  struct group_data *hufGroup = NULL;
158  int *base = NULL;
159  int *limit = NULL;
160  int dbufCount, nextSym, dbufSize, groupCount, selector,
161  i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount;
162  unsigned char uc, *symToByte, *mtfSymbol, *selectors;
163  unsigned int *dbuf, origPtr;
164 
165  dbuf = bd->dbuf;
166  dbufSize = bd->dbufSize;
167  selectors = bd->selectors;
168  byteCount = bd->byteCount;
169  symToByte = bd->symToByte;
170  mtfSymbol = bd->mtfSymbol;
171 
172  /* Read in header signature and CRC, then validate signature.
173  (last block signature means CRC is for whole file, return now) */
174  i = get_bits(bd, 24);
175  j = get_bits(bd, 24);
176  bd->headerCRC = get_bits(bd, 32);
177  if ((i == 0x177245) && (j == 0x385090))
178  return RETVAL_LAST_BLOCK;
179  if ((i != 0x314159) || (j != 0x265359))
180  return RETVAL_NOT_BZIP_DATA;
181  /* We can add support for blockRandomised if anybody complains.
182  There was some code for this in busybox 1.0.0-pre3, but nobody ever
183  noticed that it didn't actually work. */
184  if (get_bits(bd, 1))
185  return RETVAL_OBSOLETE_INPUT;
186  origPtr = get_bits(bd, 24);
187  if (origPtr > dbufSize)
188  return RETVAL_DATA_ERROR;
189  /* mapping table: if some byte values are never used (encoding things
190  like ascii text), the compression code removes the gaps to have fewer
191  symbols to deal with, and writes a sparse bitfield indicating which
192  values were present. We make a translation table to convert the
193  symbols back to the corresponding bytes. */
194  t = get_bits(bd, 16);
195  symTotal = 0;
196  for (i = 0; i < 16; i++) {
197  if (t&(1 << (15-i))) {
198  k = get_bits(bd, 16);
199  for (j = 0; j < 16; j++)
200  if (k&(1 << (15-j)))
201  symToByte[symTotal++] = (16*i)+j;
202  }
203  }
204  /* How many different Huffman coding groups does this block use? */
205  groupCount = get_bits(bd, 3);
206  if (groupCount < 2 || groupCount > MAX_GROUPS)
207  return RETVAL_DATA_ERROR;
208  /* nSelectors: Every GROUP_SIZE many symbols we select a new
209  Huffman coding group. Read in the group selector list,
210  which is stored as MTF encoded bit runs. (MTF = Move To
211  Front, as each value is used it's moved to the start of the
212  list.) */
213  nSelectors = get_bits(bd, 15);
214  if (!nSelectors)
215  return RETVAL_DATA_ERROR;
216  for (i = 0; i < groupCount; i++)
217  mtfSymbol[i] = i;
218  for (i = 0; i < nSelectors; i++) {
219  /* Get next value */
220  for (j = 0; get_bits(bd, 1); j++)
221  if (j >= groupCount)
222  return RETVAL_DATA_ERROR;
223  /* Decode MTF to get the next selector */
224  uc = mtfSymbol[j];
225  for (; j; j--)
226  mtfSymbol[j] = mtfSymbol[j-1];
227  mtfSymbol[0] = selectors[i] = uc;
228  }
229  /* Read the Huffman coding tables for each group, which code
230  for symTotal literal symbols, plus two run symbols (RUNA,
231  RUNB) */
232  symCount = symTotal+2;
233  for (j = 0; j < groupCount; j++) {
234  unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1];
235  int minLen, maxLen, pp;
236  /* Read Huffman code lengths for each symbol. They're
237  stored in a way similar to mtf; record a starting
238  value for the first symbol, and an offset from the
239  previous value for everys symbol after that.
240  (Subtracting 1 before the loop and then adding it
241  back at the end is an optimization that makes the
242  test inside the loop simpler: symbol length 0
243  becomes negative, so an unsigned inequality catches
244  it.) */
245  t = get_bits(bd, 5)-1;
246  for (i = 0; i < symCount; i++) {
247  for (;;) {
248  if (((unsigned)t) > (MAX_HUFCODE_BITS-1))
249  return RETVAL_DATA_ERROR;
250 
251  /* If first bit is 0, stop. Else
252  second bit indicates whether to
253  increment or decrement the value.
254  Optimization: grab 2 bits and unget
255  the second if the first was 0. */
256 
257  k = get_bits(bd, 2);
258  if (k < 2) {
259  bd->inbufBitCount++;
260  break;
261  }
262  /* Add one if second bit 1, else
263  * subtract 1. Avoids if/else */
264  t += (((k+1)&2)-1);
265  }
266  /* Correct for the initial -1, to get the
267  * final symbol length */
268  length[i] = t+1;
269  }
270  /* Find largest and smallest lengths in this group */
271  minLen = maxLen = length[0];
272 
273  for (i = 1; i < symCount; i++) {
274  if (length[i] > maxLen)
275  maxLen = length[i];
276  else if (length[i] < minLen)
277  minLen = length[i];
278  }
279 
280  /* Calculate permute[], base[], and limit[] tables from
281  * length[].
282  *
283  * permute[] is the lookup table for converting
284  * Huffman coded symbols into decoded symbols. base[]
285  * is the amount to subtract from the value of a
286  * Huffman symbol of a given length when using
287  * permute[].
288  *
289  * limit[] indicates the largest numerical value a
290  * symbol with a given number of bits can have. This
291  * is how the Huffman codes can vary in length: each
292  * code with a value > limit[length] needs another
293  * bit.
294  */
295  hufGroup = bd->groups+j;
296  hufGroup->minLen = minLen;
297  hufGroup->maxLen = maxLen;
298  /* Note that minLen can't be smaller than 1, so we
299  adjust the base and limit array pointers so we're
300  not always wasting the first entry. We do this
301  again when using them (during symbol decoding).*/
302  base = hufGroup->base-1;
303  limit = hufGroup->limit-1;
304  /* Calculate permute[]. Concurrently, initialize
305  * temp[] and limit[]. */
306  pp = 0;
307  for (i = minLen; i <= maxLen; i++) {
308  temp[i] = limit[i] = 0;
309  for (t = 0; t < symCount; t++)
310  if (length[t] == i)
311  hufGroup->permute[pp++] = t;
312  }
313  /* Count symbols coded for at each bit length */
314  for (i = 0; i < symCount; i++)
315  temp[length[i]]++;
316  /* Calculate limit[] (the largest symbol-coding value
317  *at each bit length, which is (previous limit <<
318  *1)+symbols at this level), and base[] (number of
319  *symbols to ignore at each bit length, which is limit
320  *minus the cumulative count of symbols coded for
321  *already). */
322  pp = t = 0;
323  for (i = minLen; i < maxLen; i++) {
324  pp += temp[i];
325  /* We read the largest possible symbol size
326  and then unget bits after determining how
327  many we need, and those extra bits could be
328  set to anything. (They're noise from
329  future symbols.) At each level we're
330  really only interested in the first few
331  bits, so here we set all the trailing
332  to-be-ignored bits to 1 so they don't
333  affect the value > limit[length]
334  comparison. */
335  limit[i] = (pp << (maxLen - i)) - 1;
336  pp <<= 1;
337  base[i+1] = pp-(t += temp[i]);
338  }
339  limit[maxLen+1] = INT_MAX; /* Sentinal value for
340  * reading next sym. */
341  limit[maxLen] = pp+temp[maxLen]-1;
342  base[minLen] = 0;
343  }
344  /* We've finished reading and digesting the block header. Now
345  read this block's Huffman coded symbols from the file and
346  undo the Huffman coding and run length encoding, saving the
347  result into dbuf[dbufCount++] = uc */
348 
349  /* Initialize symbol occurrence counters and symbol Move To
350  * Front table */
351  for (i = 0; i < 256; i++) {
352  byteCount[i] = 0;
353  mtfSymbol[i] = (unsigned char)i;
354  }
355  /* Loop through compressed symbols. */
356  runPos = dbufCount = symCount = selector = 0;
357  for (;;) {
358  /* Determine which Huffman coding group to use. */
359  if (!(symCount--)) {
360  symCount = GROUP_SIZE-1;
361  if (selector >= nSelectors)
362  return RETVAL_DATA_ERROR;
363  hufGroup = bd->groups+selectors[selector++];
364  base = hufGroup->base-1;
365  limit = hufGroup->limit-1;
366  }
367  /* Read next Huffman-coded symbol. */
368  /* Note: It is far cheaper to read maxLen bits and
369  back up than it is to read minLen bits and then an
370  additional bit at a time, testing as we go.
371  Because there is a trailing last block (with file
372  CRC), there is no danger of the overread causing an
373  unexpected EOF for a valid compressed file. As a
374  further optimization, we do the read inline
375  (falling back to a call to get_bits if the buffer
376  runs dry). The following (up to got_huff_bits:) is
377  equivalent to j = get_bits(bd, hufGroup->maxLen);
378  */
379  while (bd->inbufBitCount < hufGroup->maxLen) {
380  if (bd->inbufPos == bd->inbufCount) {
381  j = get_bits(bd, hufGroup->maxLen);
382  goto got_huff_bits;
383  }
384  bd->inbufBits =
385  (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
386  bd->inbufBitCount += 8;
387  };
388  bd->inbufBitCount -= hufGroup->maxLen;
389  j = (bd->inbufBits >> bd->inbufBitCount)&
390  ((1 << hufGroup->maxLen)-1);
391 got_huff_bits:
392  /* Figure how how many bits are in next symbol and
393  * unget extras */
394  i = hufGroup->minLen;
395  while (j > limit[i])
396  ++i;
397  bd->inbufBitCount += (hufGroup->maxLen - i);
398  /* Huffman decode value to get nextSym (with bounds checking) */
399  if ((i > hufGroup->maxLen)
400  || (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i]))
401  >= MAX_SYMBOLS))
402  return RETVAL_DATA_ERROR;
403  nextSym = hufGroup->permute[j];
404  /* We have now decoded the symbol, which indicates
405  either a new literal byte, or a repeated run of the
406  most recent literal byte. First, check if nextSym
407  indicates a repeated run, and if so loop collecting
408  how many times to repeat the last literal. */
409  if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */
410  /* If this is the start of a new run, zero out
411  * counter */
412  if (!runPos) {
413  runPos = 1;
414  t = 0;
415  }
416  /* Neat trick that saves 1 symbol: instead of
417  or-ing 0 or 1 at each bit position, add 1
418  or 2 instead. For example, 1011 is 1 << 0
419  + 1 << 1 + 2 << 2. 1010 is 2 << 0 + 2 << 1
420  + 1 << 2. You can make any bit pattern
421  that way using 1 less symbol than the basic
422  or 0/1 method (except all bits 0, which
423  would use no symbols, but a run of length 0
424  doesn't mean anything in this context).
425  Thus space is saved. */
426  t += (runPos << nextSym);
427  /* +runPos if RUNA; +2*runPos if RUNB */
428 
429  runPos <<= 1;
430  continue;
431  }
432  /* When we hit the first non-run symbol after a run,
433  we now know how many times to repeat the last
434  literal, so append that many copies to our buffer
435  of decoded symbols (dbuf) now. (The last literal
436  used is the one at the head of the mtfSymbol
437  array.) */
438  if (runPos) {
439  runPos = 0;
440  if (dbufCount+t >= dbufSize)
441  return RETVAL_DATA_ERROR;
442 
443  uc = symToByte[mtfSymbol[0]];
444  byteCount[uc] += t;
445  while (t--)
446  dbuf[dbufCount++] = uc;
447  }
448  /* Is this the terminating symbol? */
449  if (nextSym > symTotal)
450  break;
451  /* At this point, nextSym indicates a new literal
452  character. Subtract one to get the position in the
453  MTF array at which this literal is currently to be
454  found. (Note that the result can't be -1 or 0,
455  because 0 and 1 are RUNA and RUNB. But another
456  instance of the first symbol in the mtf array,
457  position 0, would have been handled as part of a
458  run above. Therefore 1 unused mtf position minus 2
459  non-literal nextSym values equals -1.) */
460  if (dbufCount >= dbufSize)
461  return RETVAL_DATA_ERROR;
462  i = nextSym - 1;
463  uc = mtfSymbol[i];
464  /* Adjust the MTF array. Since we typically expect to
465  *move only a small number of symbols, and are bound
466  *by 256 in any case, using memmove here would
467  *typically be bigger and slower due to function call
468  *overhead and other assorted setup costs. */
469  do {
470  mtfSymbol[i] = mtfSymbol[i-1];
471  } while (--i);
472  mtfSymbol[0] = uc;
473  uc = symToByte[uc];
474  /* We have our literal byte. Save it into dbuf. */
475  byteCount[uc]++;
476  dbuf[dbufCount++] = (unsigned int)uc;
477  }
478  /* At this point, we've read all the Huffman-coded symbols
479  (and repeated runs) for this block from the input stream,
480  and decoded them into the intermediate buffer. There are
481  dbufCount many decoded bytes in dbuf[]. Now undo the
482  Burrows-Wheeler transform on dbuf. See
483  http://dogma.net/markn/articles/bwt/bwt.htm
484  */
485  /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
486  j = 0;
487  for (i = 0; i < 256; i++) {
488  k = j+byteCount[i];
489  byteCount[i] = j;
490  j = k;
491  }
492  /* Figure out what order dbuf would be in if we sorted it. */
493  for (i = 0; i < dbufCount; i++) {
494  uc = (unsigned char)(dbuf[i] & 0xff);
495  dbuf[byteCount[uc]] |= (i << 8);
496  byteCount[uc]++;
497  }
498  /* Decode first byte by hand to initialize "previous" byte.
499  Note that it doesn't get output, and if the first three
500  characters are identical it doesn't qualify as a run (hence
501  writeRunCountdown = 5). */
502  if (dbufCount) {
503  if (origPtr >= dbufCount)
504  return RETVAL_DATA_ERROR;
505  bd->writePos = dbuf[origPtr];
506  bd->writeCurrent = (unsigned char)(bd->writePos&0xff);
507  bd->writePos >>= 8;
508  bd->writeRunCountdown = 5;
509  }
510  bd->writeCount = dbufCount;
511 
512  return RETVAL_OK;
513 }
514 
515 /* Undo burrows-wheeler transform on intermediate buffer to produce output.
516  If start_bunzip was initialized with out_fd =-1, then up to len bytes of
517  data are written to outbuf. Return value is number of bytes written or
518  error (all errors are negative numbers). If out_fd!=-1, outbuf and len
519  are ignored, data is written to out_fd and return is RETVAL_OK or error.
520 */
521 
522 static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len)
523 {
524  const unsigned int *dbuf;
525  int pos, xcurrent, previous, gotcount;
526 
527  /* If last read was short due to end of file, return last block now */
528  if (bd->writeCount < 0)
529  return bd->writeCount;
530 
531  gotcount = 0;
532  dbuf = bd->dbuf;
533  pos = bd->writePos;
534  xcurrent = bd->writeCurrent;
535 
536  /* We will always have pending decoded data to write into the output
537  buffer unless this is the very first call (in which case we haven't
538  Huffman-decoded a block into the intermediate buffer yet). */
539 
540  if (bd->writeCopies) {
541  /* Inside the loop, writeCopies means extra copies (beyond 1) */
542  --bd->writeCopies;
543  /* Loop outputting bytes */
544  for (;;) {
545  /* If the output buffer is full, snapshot
546  * state and return */
547  if (gotcount >= len) {
548  bd->writePos = pos;
549  bd->writeCurrent = xcurrent;
550  bd->writeCopies++;
551  return len;
552  }
553  /* Write next byte into output buffer, updating CRC */
554  outbuf[gotcount++] = xcurrent;
555  bd->writeCRC = (((bd->writeCRC) << 8)
556  ^bd->crc32Table[((bd->writeCRC) >> 24)
557  ^xcurrent]);
558  /* Loop now if we're outputting multiple
559  * copies of this byte */
560  if (bd->writeCopies) {
561  --bd->writeCopies;
562  continue;
563  }
564 decode_next_byte:
565  if (!bd->writeCount--)
566  break;
567  /* Follow sequence vector to undo
568  * Burrows-Wheeler transform */
569  previous = xcurrent;
570  pos = dbuf[pos];
571  xcurrent = pos&0xff;
572  pos >>= 8;
573  /* After 3 consecutive copies of the same
574  byte, the 4th is a repeat count. We count
575  down from 4 instead *of counting up because
576  testing for non-zero is faster */
577  if (--bd->writeRunCountdown) {
578  if (xcurrent != previous)
579  bd->writeRunCountdown = 4;
580  } else {
581  /* We have a repeated run, this byte
582  * indicates the count */
583  bd->writeCopies = xcurrent;
584  xcurrent = previous;
585  bd->writeRunCountdown = 5;
586  /* Sometimes there are just 3 bytes
587  * (run length 0) */
588  if (!bd->writeCopies)
589  goto decode_next_byte;
590  /* Subtract the 1 copy we'd output
591  * anyway to get extras */
592  --bd->writeCopies;
593  }
594  }
595  /* Decompression of this block completed successfully */
596  bd->writeCRC = ~bd->writeCRC;
597  bd->totalCRC = ((bd->totalCRC << 1) |
598  (bd->totalCRC >> 31)) ^ bd->writeCRC;
599  /* If this block had a CRC error, force file level CRC error. */
600  if (bd->writeCRC != bd->headerCRC) {
601  bd->totalCRC = bd->headerCRC+1;
602  return RETVAL_LAST_BLOCK;
603  }
604  }
605 
606  /* Refill the intermediate buffer by Huffman-decoding next
607  * block of input */
608  /* (previous is just a convenient unused temp variable here) */
609  previous = get_next_block(bd);
610  if (previous) {
611  bd->writeCount = previous;
612  return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;
613  }
614  bd->writeCRC = 0xffffffffUL;
615  pos = bd->writePos;
616  xcurrent = bd->writeCurrent;
617  goto decode_next_byte;
618 }
619 
620 static int INIT nofill(void *buf, unsigned int len)
621 {
622  return -1;
623 }
624 
625 /* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain
626  a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
627  ignored, and data is read from file handle into temporary buffer. */
628 static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
629  int (*fill)(void*, unsigned int))
630 {
631  struct bunzip_data *bd;
632  unsigned int i, j, c;
633  const unsigned int BZh0 =
634  (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)
635  +(((unsigned int)'h') << 8)+(unsigned int)'0';
636 
637  /* Figure out how much data to allocate */
638  i = sizeof(struct bunzip_data);
639 
640  /* Allocate bunzip_data. Most fields initialize to zero. */
641  bd = *bdp = malloc(i);
642  if (!bd)
643  return RETVAL_OUT_OF_MEMORY;
644  memset(bd, 0, sizeof(struct bunzip_data));
645  /* Setup input buffer */
646  bd->inbuf = inbuf;
647  bd->inbufCount = len;
648  if (fill != NULL)
649  bd->fill = fill;
650  else
651  bd->fill = nofill;
652 
653  /* Init the CRC32 table (big endian) */
654  for (i = 0; i < 256; i++) {
655  c = i << 24;
656  for (j = 8; j; j--)
657  c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1);
658  bd->crc32Table[i] = c;
659  }
660 
661  /* Ensure that file starts with "BZh['1'-'9']." */
662  i = get_bits(bd, 32);
663  if (((unsigned int)(i-BZh0-1)) >= 9)
664  return RETVAL_NOT_BZIP_DATA;
665 
666  /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of
667  uncompressed data. Allocate intermediate buffer for block. */
668  bd->dbufSize = 100000*(i-BZh0);
669 
670  bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));
671  if (!bd->dbuf)
672  return RETVAL_OUT_OF_MEMORY;
673  return RETVAL_OK;
674 }
675 
676 /* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data,
677  not end of file.) */
678 STATIC int INIT bunzip2(unsigned char *buf, int len,
679  int(*fill)(void*, unsigned int),
680  int(*flush)(void*, unsigned int),
681  unsigned char *outbuf,
682  int *pos,
683  void(*error)(char *x))
684 {
685  struct bunzip_data *bd;
686  int i = -1;
687  unsigned char *inbuf;
688 
689  if (flush)
690  outbuf = malloc(BZIP2_IOBUF_SIZE);
691 
692  if (!outbuf) {
693  error("Could not allocate output buffer");
694  return RETVAL_OUT_OF_MEMORY;
695  }
696  if (buf)
697  inbuf = buf;
698  else
699  inbuf = malloc(BZIP2_IOBUF_SIZE);
700  if (!inbuf) {
701  error("Could not allocate input buffer");
703  goto exit_0;
704  }
705  i = start_bunzip(&bd, inbuf, len, fill);
706  if (!i) {
707  for (;;) {
708  i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE);
709  if (i <= 0)
710  break;
711  if (!flush)
712  outbuf += i;
713  else
714  if (i != flush(outbuf, i)) {
716  break;
717  }
718  }
719  }
720  /* Check CRC and release memory */
721  if (i == RETVAL_LAST_BLOCK) {
722  if (bd->headerCRC != bd->totalCRC)
723  error("Data integrity error when decompressing.");
724  else
725  i = RETVAL_OK;
726  } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {
727  error("Compressed file ends unexpectedly");
728  }
729  if (!bd)
730  goto exit_1;
731  if (bd->dbuf)
732  large_free(bd->dbuf);
733  if (pos)
734  *pos = bd->inbufPos;
735  free(bd);
736 exit_1:
737  if (!buf)
738  free(inbuf);
739 exit_0:
740  if (flush)
741  free(outbuf);
742  return i;
743 }
744 
745 #ifdef PREBOOT
746 STATIC int INIT decompress(unsigned char *buf, int len,
747  int(*fill)(void*, unsigned int),
748  int(*flush)(void*, unsigned int),
749  unsigned char *outbuf,
750  int *pos,
751  void(*error)(char *x))
752 {
753  return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
754 }
755 #endif