00001 /* ---------- 00002 * pg_lzcompress.c - 00003 * 00004 * This is an implementation of LZ compression for PostgreSQL. 00005 * It uses a simple history table and generates 2-3 byte tags 00006 * capable of backward copy information for 3-273 bytes with 00007 * a max offset of 4095. 00008 * 00009 * Entry routines: 00010 * 00011 * bool 00012 * pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, 00013 * const PGLZ_Strategy *strategy); 00014 * 00015 * source is the input data to be compressed. 00016 * 00017 * slen is the length of the input data. 00018 * 00019 * dest is the output area for the compressed result. 00020 * It must be at least as big as PGLZ_MAX_OUTPUT(slen). 00021 * 00022 * strategy is a pointer to some information controlling 00023 * the compression algorithm. If NULL, the compiled 00024 * in default strategy is used. 00025 * 00026 * The return value is TRUE if compression succeeded, 00027 * FALSE if not; in the latter case the contents of dest 00028 * are undefined. 00029 * 00030 * void 00031 * pglz_decompress(const PGLZ_Header *source, char *dest) 00032 * 00033 * source is the compressed input. 00034 * 00035 * dest is the area where the uncompressed data will be 00036 * written to. It is the callers responsibility to 00037 * provide enough space. The required amount can be 00038 * obtained with the macro PGLZ_RAW_SIZE(source). 00039 * 00040 * The data is written to buff exactly as it was handed 00041 * to pglz_compress(). No terminating zero byte is added. 00042 * 00043 * The decompression algorithm and internal data format: 00044 * 00045 * PGLZ_Header is defined as 00046 * 00047 * typedef struct PGLZ_Header { 00048 * int32 vl_len_; 00049 * int32 rawsize; 00050 * } 00051 * 00052 * The header is followed by the compressed data itself. 00053 * 00054 * The data representation is easiest explained by describing 00055 * the process of decompression. 00056 * 00057 * If VARSIZE(x) == rawsize + sizeof(PGLZ_Header), then the data 00058 * is stored uncompressed as plain bytes. Thus, the decompressor 00059 * simply copies rawsize bytes from the location after the 00060 * header to the destination. 00061 * 00062 * Otherwise the first byte after the header tells what to do 00063 * the next 8 times. We call this the control byte. 00064 * 00065 * An unset bit in the control byte means, that one uncompressed 00066 * byte follows, which is copied from input to output. 00067 * 00068 * A set bit in the control byte means, that a tag of 2-3 bytes 00069 * follows. A tag contains information to copy some bytes, that 00070 * are already in the output buffer, to the current location in 00071 * the output. Let's call the three tag bytes T1, T2 and T3. The 00072 * position of the data to copy is coded as an offset from the 00073 * actual output position. 00074 * 00075 * The offset is in the upper nibble of T1 and in T2. 00076 * The length is in the lower nibble of T1. 00077 * 00078 * So the 16 bits of a 2 byte tag are coded as 00079 * 00080 * 7---T1--0 7---T2--0 00081 * OOOO LLLL OOOO OOOO 00082 * 00083 * This limits the offset to 1-4095 (12 bits) and the length 00084 * to 3-18 (4 bits) because 3 is always added to it. To emit 00085 * a tag of 2 bytes with a length of 2 only saves one control 00086 * bit. But we lose one byte in the possible length of a tag. 00087 * 00088 * In the actual implementation, the 2 byte tag's length is 00089 * limited to 3-17, because the value 0xF in the length nibble 00090 * has special meaning. It means, that the next following 00091 * byte (T3) has to be added to the length value of 18. That 00092 * makes total limits of 1-4095 for offset and 3-273 for length. 00093 * 00094 * Now that we have successfully decoded a tag. We simply copy 00095 * the output that occurred <offset> bytes back to the current 00096 * output location in the specified <length>. Thus, a 00097 * sequence of 200 spaces (think about bpchar fields) could be 00098 * coded in 4 bytes. One literal space and a three byte tag to 00099 * copy 199 bytes with a -1 offset. Whow - that's a compression 00100 * rate of 98%! Well, the implementation needs to save the 00101 * original data size too, so we need another 4 bytes for it 00102 * and end up with a total compression rate of 96%, what's still 00103 * worth a Whow. 00104 * 00105 * The compression algorithm 00106 * 00107 * The following uses numbers used in the default strategy. 00108 * 00109 * The compressor works best for attributes of a size between 00110 * 1K and 1M. For smaller items there's not that much chance of 00111 * redundancy in the character sequence (except for large areas 00112 * of identical bytes like trailing spaces) and for bigger ones 00113 * our 4K maximum look-back distance is too small. 00114 * 00115 * The compressor creates a table for 8192 lists of positions. 00116 * For each input position (except the last 3), a hash key is 00117 * built from the 4 next input bytes and the position remembered 00118 * in the appropriate list. Thus, the table points to linked 00119 * lists of likely to be at least in the first 4 characters 00120 * matching strings. This is done on the fly while the input 00121 * is compressed into the output area. Table entries are only 00122 * kept for the last 4096 input positions, since we cannot use 00123 * back-pointers larger than that anyway. 00124 * 00125 * For each byte in the input, it's hash key (built from this 00126 * byte and the next 3) is used to find the appropriate list 00127 * in the table. The lists remember the positions of all bytes 00128 * that had the same hash key in the past in increasing backward 00129 * offset order. Now for all entries in the used lists, the 00130 * match length is computed by comparing the characters from the 00131 * entries position with the characters from the actual input 00132 * position. 00133 * 00134 * The compressor starts with a so called "good_match" of 128. 00135 * It is a "prefer speed against compression ratio" optimizer. 00136 * So if the first entry looked at already has 128 or more 00137 * matching characters, the lookup stops and that position is 00138 * used for the next tag in the output. 00139 * 00140 * For each subsequent entry in the history list, the "good_match" 00141 * is lowered by 10%. So the compressor will be more happy with 00142 * short matches the farer it has to go back in the history. 00143 * Another "speed against ratio" preference characteristic of 00144 * the algorithm. 00145 * 00146 * Thus there are 3 stop conditions for the lookup of matches: 00147 * 00148 * - a match >= good_match is found 00149 * - there are no more history entries to look at 00150 * - the next history entry is already too far back 00151 * to be coded into a tag. 00152 * 00153 * Finally the match algorithm checks that at least a match 00154 * of 3 or more bytes has been found, because thats the smallest 00155 * amount of copy information to code into a tag. If so, a tag 00156 * is omitted and all the input bytes covered by that are just 00157 * scanned for the history add's, otherwise a literal character 00158 * is omitted and only his history entry added. 00159 * 00160 * Acknowledgements: 00161 * 00162 * Many thanks to Adisak Pochanayon, who's article about SLZ 00163 * inspired me to write the PostgreSQL compression this way. 00164 * 00165 * Jan Wieck 00166 * 00167 * Copyright (c) 1999-2013, PostgreSQL Global Development Group 00168 * 00169 * src/backend/utils/adt/pg_lzcompress.c 00170 * ---------- 00171 */ 00172 #include "postgres.h" 00173 00174 #include <limits.h> 00175 00176 #include "utils/pg_lzcompress.h" 00177 00178 00179 /* ---------- 00180 * Local definitions 00181 * ---------- 00182 */ 00183 #define PGLZ_HISTORY_LISTS 8192 /* must be power of 2 */ 00184 #define PGLZ_HISTORY_MASK (PGLZ_HISTORY_LISTS - 1) 00185 #define PGLZ_HISTORY_SIZE 4096 00186 #define PGLZ_MAX_MATCH 273 00187 00188 00189 /* ---------- 00190 * PGLZ_HistEntry - 00191 * 00192 * Linked list for the backward history lookup 00193 * 00194 * All the entries sharing a hash key are linked in a doubly linked list. 00195 * This makes it easy to remove an entry when it's time to recycle it 00196 * (because it's more than 4K positions old). 00197 * ---------- 00198 */ 00199 typedef struct PGLZ_HistEntry 00200 { 00201 struct PGLZ_HistEntry *next; /* links for my hash key's list */ 00202 struct PGLZ_HistEntry *prev; 00203 int hindex; /* my current hash key */ 00204 const char *pos; /* my input position */ 00205 } PGLZ_HistEntry; 00206 00207 00208 /* ---------- 00209 * The provided standard strategies 00210 * ---------- 00211 */ 00212 static const PGLZ_Strategy strategy_default_data = { 00213 32, /* Data chunks less than 32 bytes are not 00214 * compressed */ 00215 INT_MAX, /* No upper limit on what we'll try to 00216 * compress */ 00217 25, /* Require 25% compression rate, or not worth 00218 * it */ 00219 1024, /* Give up if no compression in the first 1KB */ 00220 128, /* Stop history lookup if a match of 128 bytes 00221 * is found */ 00222 10 /* Lower good match size by 10% at every loop 00223 * iteration */ 00224 }; 00225 const PGLZ_Strategy *const PGLZ_strategy_default = &strategy_default_data; 00226 00227 00228 static const PGLZ_Strategy strategy_always_data = { 00229 0, /* Chunks of any size are compressed */ 00230 INT_MAX, 00231 0, /* It's enough to save one single byte */ 00232 INT_MAX, /* Never give up early */ 00233 128, /* Stop history lookup if a match of 128 bytes 00234 * is found */ 00235 6 /* Look harder for a good match */ 00236 }; 00237 const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data; 00238 00239 00240 /* ---------- 00241 * Statically allocated work arrays for history 00242 * ---------- 00243 */ 00244 static PGLZ_HistEntry *hist_start[PGLZ_HISTORY_LISTS]; 00245 static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE]; 00246 00247 00248 /* ---------- 00249 * pglz_hist_idx - 00250 * 00251 * Computes the history table slot for the lookup by the next 4 00252 * characters in the input. 00253 * 00254 * NB: because we use the next 4 characters, we are not guaranteed to 00255 * find 3-character matches; they very possibly will be in the wrong 00256 * hash list. This seems an acceptable tradeoff for spreading out the 00257 * hash keys more. 00258 * ---------- 00259 */ 00260 #define pglz_hist_idx(_s,_e) ( \ 00261 ((((_e) - (_s)) < 4) ? (int) (_s)[0] : \ 00262 (((_s)[0] << 9) ^ ((_s)[1] << 6) ^ \ 00263 ((_s)[2] << 3) ^ (_s)[3])) & (PGLZ_HISTORY_MASK) \ 00264 ) 00265 00266 00267 /* ---------- 00268 * pglz_hist_add - 00269 * 00270 * Adds a new entry to the history table. 00271 * 00272 * If _recycle is true, then we are recycling a previously used entry, 00273 * and must first delink it from its old hashcode's linked list. 00274 * 00275 * NOTE: beware of multiple evaluations of macro's arguments, and note that 00276 * _hn and _recycle are modified in the macro. 00277 * ---------- 00278 */ 00279 #define pglz_hist_add(_hs,_he,_hn,_recycle,_s,_e) \ 00280 do { \ 00281 int __hindex = pglz_hist_idx((_s),(_e)); \ 00282 PGLZ_HistEntry **__myhsp = &(_hs)[__hindex]; \ 00283 PGLZ_HistEntry *__myhe = &(_he)[_hn]; \ 00284 if (_recycle) { \ 00285 if (__myhe->prev == NULL) \ 00286 (_hs)[__myhe->hindex] = __myhe->next; \ 00287 else \ 00288 __myhe->prev->next = __myhe->next; \ 00289 if (__myhe->next != NULL) \ 00290 __myhe->next->prev = __myhe->prev; \ 00291 } \ 00292 __myhe->next = *__myhsp; \ 00293 __myhe->prev = NULL; \ 00294 __myhe->hindex = __hindex; \ 00295 __myhe->pos = (_s); \ 00296 if (*__myhsp != NULL) \ 00297 (*__myhsp)->prev = __myhe; \ 00298 *__myhsp = __myhe; \ 00299 if (++(_hn) >= PGLZ_HISTORY_SIZE) { \ 00300 (_hn) = 0; \ 00301 (_recycle) = true; \ 00302 } \ 00303 } while (0) 00304 00305 00306 /* ---------- 00307 * pglz_out_ctrl - 00308 * 00309 * Outputs the last and allocates a new control byte if needed. 00310 * ---------- 00311 */ 00312 #define pglz_out_ctrl(__ctrlp,__ctrlb,__ctrl,__buf) \ 00313 do { \ 00314 if ((__ctrl & 0xff) == 0) \ 00315 { \ 00316 *(__ctrlp) = __ctrlb; \ 00317 __ctrlp = (__buf)++; \ 00318 __ctrlb = 0; \ 00319 __ctrl = 1; \ 00320 } \ 00321 } while (0) 00322 00323 00324 /* ---------- 00325 * pglz_out_literal - 00326 * 00327 * Outputs a literal byte to the destination buffer including the 00328 * appropriate control bit. 00329 * ---------- 00330 */ 00331 #define pglz_out_literal(_ctrlp,_ctrlb,_ctrl,_buf,_byte) \ 00332 do { \ 00333 pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf); \ 00334 *(_buf)++ = (unsigned char)(_byte); \ 00335 _ctrl <<= 1; \ 00336 } while (0) 00337 00338 00339 /* ---------- 00340 * pglz_out_tag - 00341 * 00342 * Outputs a backward reference tag of 2-4 bytes (depending on 00343 * offset and length) to the destination buffer including the 00344 * appropriate control bit. 00345 * ---------- 00346 */ 00347 #define pglz_out_tag(_ctrlp,_ctrlb,_ctrl,_buf,_len,_off) \ 00348 do { \ 00349 pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf); \ 00350 _ctrlb |= _ctrl; \ 00351 _ctrl <<= 1; \ 00352 if (_len > 17) \ 00353 { \ 00354 (_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | 0x0f); \ 00355 (_buf)[1] = (unsigned char)(((_off) & 0xff)); \ 00356 (_buf)[2] = (unsigned char)((_len) - 18); \ 00357 (_buf) += 3; \ 00358 } else { \ 00359 (_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | ((_len) - 3)); \ 00360 (_buf)[1] = (unsigned char)((_off) & 0xff); \ 00361 (_buf) += 2; \ 00362 } \ 00363 } while (0) 00364 00365 00366 /* ---------- 00367 * pglz_find_match - 00368 * 00369 * Lookup the history table if the actual input stream matches 00370 * another sequence of characters, starting somewhere earlier 00371 * in the input buffer. 00372 * ---------- 00373 */ 00374 static inline int 00375 pglz_find_match(PGLZ_HistEntry **hstart, const char *input, const char *end, 00376 int *lenp, int *offp, int good_match, int good_drop) 00377 { 00378 PGLZ_HistEntry *hent; 00379 int32 len = 0; 00380 int32 off = 0; 00381 00382 /* 00383 * Traverse the linked history list until a good enough match is found. 00384 */ 00385 hent = hstart[pglz_hist_idx(input, end)]; 00386 while (hent) 00387 { 00388 const char *ip = input; 00389 const char *hp = hent->pos; 00390 int32 thisoff; 00391 int32 thislen; 00392 00393 /* 00394 * Stop if the offset does not fit into our tag anymore. 00395 */ 00396 thisoff = ip - hp; 00397 if (thisoff >= 0x0fff) 00398 break; 00399 00400 /* 00401 * Determine length of match. A better match must be larger than the 00402 * best so far. And if we already have a match of 16 or more bytes, 00403 * it's worth the call overhead to use memcmp() to check if this match 00404 * is equal for the same size. After that we must fallback to 00405 * character by character comparison to know the exact position where 00406 * the diff occurred. 00407 */ 00408 thislen = 0; 00409 if (len >= 16) 00410 { 00411 if (memcmp(ip, hp, len) == 0) 00412 { 00413 thislen = len; 00414 ip += len; 00415 hp += len; 00416 while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) 00417 { 00418 thislen++; 00419 ip++; 00420 hp++; 00421 } 00422 } 00423 } 00424 else 00425 { 00426 while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) 00427 { 00428 thislen++; 00429 ip++; 00430 hp++; 00431 } 00432 } 00433 00434 /* 00435 * Remember this match as the best (if it is) 00436 */ 00437 if (thislen > len) 00438 { 00439 len = thislen; 00440 off = thisoff; 00441 } 00442 00443 /* 00444 * Advance to the next history entry 00445 */ 00446 hent = hent->next; 00447 00448 /* 00449 * Be happy with lesser good matches the more entries we visited. But 00450 * no point in doing calculation if we're at end of list. 00451 */ 00452 if (hent) 00453 { 00454 if (len >= good_match) 00455 break; 00456 good_match -= (good_match * good_drop) / 100; 00457 } 00458 } 00459 00460 /* 00461 * Return match information only if it results at least in one byte 00462 * reduction. 00463 */ 00464 if (len > 2) 00465 { 00466 *lenp = len; 00467 *offp = off; 00468 return 1; 00469 } 00470 00471 return 0; 00472 } 00473 00474 00475 /* ---------- 00476 * pglz_compress - 00477 * 00478 * Compresses source into dest using strategy. 00479 * ---------- 00480 */ 00481 bool 00482 pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, 00483 const PGLZ_Strategy *strategy) 00484 { 00485 unsigned char *bp = ((unsigned char *) dest) + sizeof(PGLZ_Header); 00486 unsigned char *bstart = bp; 00487 int hist_next = 0; 00488 bool hist_recycle = false; 00489 const char *dp = source; 00490 const char *dend = source + slen; 00491 unsigned char ctrl_dummy = 0; 00492 unsigned char *ctrlp = &ctrl_dummy; 00493 unsigned char ctrlb = 0; 00494 unsigned char ctrl = 0; 00495 bool found_match = false; 00496 int32 match_len; 00497 int32 match_off; 00498 int32 good_match; 00499 int32 good_drop; 00500 int32 result_size; 00501 int32 result_max; 00502 int32 need_rate; 00503 00504 /* 00505 * Our fallback strategy is the default. 00506 */ 00507 if (strategy == NULL) 00508 strategy = PGLZ_strategy_default; 00509 00510 /* 00511 * If the strategy forbids compression (at all or if source chunk size out 00512 * of range), fail. 00513 */ 00514 if (strategy->match_size_good <= 0 || 00515 slen < strategy->min_input_size || 00516 slen > strategy->max_input_size) 00517 return false; 00518 00519 /* 00520 * Save the original source size in the header. 00521 */ 00522 dest->rawsize = slen; 00523 00524 /* 00525 * Limit the match parameters to the supported range. 00526 */ 00527 good_match = strategy->match_size_good; 00528 if (good_match > PGLZ_MAX_MATCH) 00529 good_match = PGLZ_MAX_MATCH; 00530 else if (good_match < 17) 00531 good_match = 17; 00532 00533 good_drop = strategy->match_size_drop; 00534 if (good_drop < 0) 00535 good_drop = 0; 00536 else if (good_drop > 100) 00537 good_drop = 100; 00538 00539 need_rate = strategy->min_comp_rate; 00540 if (need_rate < 0) 00541 need_rate = 0; 00542 else if (need_rate > 99) 00543 need_rate = 99; 00544 00545 /* 00546 * Compute the maximum result size allowed by the strategy, namely the 00547 * input size minus the minimum wanted compression rate. This had better 00548 * be <= slen, else we might overrun the provided output buffer. 00549 */ 00550 if (slen > (INT_MAX / 100)) 00551 { 00552 /* Approximate to avoid overflow */ 00553 result_max = (slen / 100) * (100 - need_rate); 00554 } 00555 else 00556 result_max = (slen * (100 - need_rate)) / 100; 00557 00558 /* 00559 * Initialize the history lists to empty. We do not need to zero the 00560 * hist_entries[] array; its entries are initialized as they are used. 00561 */ 00562 memset(hist_start, 0, sizeof(hist_start)); 00563 00564 /* 00565 * Compress the source directly into the output buffer. 00566 */ 00567 while (dp < dend) 00568 { 00569 /* 00570 * If we already exceeded the maximum result size, fail. 00571 * 00572 * We check once per loop; since the loop body could emit as many as 4 00573 * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better 00574 * allow 4 slop bytes. 00575 */ 00576 if (bp - bstart >= result_max) 00577 return false; 00578 00579 /* 00580 * If we've emitted more than first_success_by bytes without finding 00581 * anything compressible at all, fail. This lets us fall out 00582 * reasonably quickly when looking at incompressible input (such as 00583 * pre-compressed data). 00584 */ 00585 if (!found_match && bp - bstart >= strategy->first_success_by) 00586 return false; 00587 00588 /* 00589 * Try to find a match in the history 00590 */ 00591 if (pglz_find_match(hist_start, dp, dend, &match_len, 00592 &match_off, good_match, good_drop)) 00593 { 00594 /* 00595 * Create the tag and add history entries for all matched 00596 * characters. 00597 */ 00598 pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off); 00599 while (match_len--) 00600 { 00601 pglz_hist_add(hist_start, hist_entries, 00602 hist_next, hist_recycle, 00603 dp, dend); 00604 dp++; /* Do not do this ++ in the line above! */ 00605 /* The macro would do it four times - Jan. */ 00606 } 00607 found_match = true; 00608 } 00609 else 00610 { 00611 /* 00612 * No match found. Copy one literal byte. 00613 */ 00614 pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp); 00615 pglz_hist_add(hist_start, hist_entries, 00616 hist_next, hist_recycle, 00617 dp, dend); 00618 dp++; /* Do not do this ++ in the line above! */ 00619 /* The macro would do it four times - Jan. */ 00620 } 00621 } 00622 00623 /* 00624 * Write out the last control byte and check that we haven't overrun the 00625 * output size allowed by the strategy. 00626 */ 00627 *ctrlp = ctrlb; 00628 result_size = bp - bstart; 00629 if (result_size >= result_max) 00630 return false; 00631 00632 /* 00633 * Success - need only fill in the actual length of the compressed datum. 00634 */ 00635 SET_VARSIZE_COMPRESSED(dest, result_size + sizeof(PGLZ_Header)); 00636 00637 return true; 00638 } 00639 00640 00641 /* ---------- 00642 * pglz_decompress - 00643 * 00644 * Decompresses source into dest. 00645 * ---------- 00646 */ 00647 void 00648 pglz_decompress(const PGLZ_Header *source, char *dest) 00649 { 00650 const unsigned char *sp; 00651 const unsigned char *srcend; 00652 unsigned char *dp; 00653 unsigned char *destend; 00654 00655 sp = ((const unsigned char *) source) + sizeof(PGLZ_Header); 00656 srcend = ((const unsigned char *) source) + VARSIZE(source); 00657 dp = (unsigned char *) dest; 00658 destend = dp + source->rawsize; 00659 00660 while (sp < srcend && dp < destend) 00661 { 00662 /* 00663 * Read one control byte and process the next 8 items (or as many as 00664 * remain in the compressed input). 00665 */ 00666 unsigned char ctrl = *sp++; 00667 int ctrlc; 00668 00669 for (ctrlc = 0; ctrlc < 8 && sp < srcend; ctrlc++) 00670 { 00671 if (ctrl & 1) 00672 { 00673 /* 00674 * Otherwise it contains the match length minus 3 and the 00675 * upper 4 bits of the offset. The next following byte 00676 * contains the lower 8 bits of the offset. If the length is 00677 * coded as 18, another extension tag byte tells how much 00678 * longer the match really was (0-255). 00679 */ 00680 int32 len; 00681 int32 off; 00682 00683 len = (sp[0] & 0x0f) + 3; 00684 off = ((sp[0] & 0xf0) << 4) | sp[1]; 00685 sp += 2; 00686 if (len == 18) 00687 len += *sp++; 00688 00689 /* 00690 * Check for output buffer overrun, to ensure we don't clobber 00691 * memory in case of corrupt input. Note: we must advance dp 00692 * here to ensure the error is detected below the loop. We 00693 * don't simply put the elog inside the loop since that will 00694 * probably interfere with optimization. 00695 */ 00696 if (dp + len > destend) 00697 { 00698 dp += len; 00699 break; 00700 } 00701 00702 /* 00703 * Now we copy the bytes specified by the tag from OUTPUT to 00704 * OUTPUT. It is dangerous and platform dependent to use 00705 * memcpy() here, because the copied areas could overlap 00706 * extremely! 00707 */ 00708 while (len--) 00709 { 00710 *dp = dp[-off]; 00711 dp++; 00712 } 00713 } 00714 else 00715 { 00716 /* 00717 * An unset control bit means LITERAL BYTE. So we just copy 00718 * one from INPUT to OUTPUT. 00719 */ 00720 if (dp >= destend) /* check for buffer overrun */ 00721 break; /* do not clobber memory */ 00722 00723 *dp++ = *sp++; 00724 } 00725 00726 /* 00727 * Advance the control bit 00728 */ 00729 ctrl >>= 1; 00730 } 00731 } 00732 00733 /* 00734 * Check we decompressed the right amount. 00735 */ 00736 if (dp != destend || sp != srcend) 00737 elog(ERROR, "compressed data is corrupt"); 00738 00739 /* 00740 * That's it. 00741 */ 00742 }