Header And Logo

PostgreSQL
| The world's most advanced open source database.

pg_lzcompress.c

Go to the documentation of this file.
00001 /* ----------
00002  * pg_lzcompress.c -
00003  *
00004  *      This is an implementation of LZ compression for PostgreSQL.
00005  *      It uses a simple history table and generates 2-3 byte tags
00006  *      capable of backward copy information for 3-273 bytes with
00007  *      a max offset of 4095.
00008  *
00009  *      Entry routines:
00010  *
00011  *          bool
00012  *          pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
00013  *                        const PGLZ_Strategy *strategy);
00014  *
00015  *              source is the input data to be compressed.
00016  *
00017  *              slen is the length of the input data.
00018  *
00019  *              dest is the output area for the compressed result.
00020  *                  It must be at least as big as PGLZ_MAX_OUTPUT(slen).
00021  *
00022  *              strategy is a pointer to some information controlling
00023  *                  the compression algorithm. If NULL, the compiled
00024  *                  in default strategy is used.
00025  *
00026  *              The return value is TRUE if compression succeeded,
00027  *              FALSE if not; in the latter case the contents of dest
00028  *              are undefined.
00029  *
00030  *          void
00031  *          pglz_decompress(const PGLZ_Header *source, char *dest)
00032  *
00033  *              source is the compressed input.
00034  *
00035  *              dest is the area where the uncompressed data will be
00036  *                  written to. It is the callers responsibility to
00037  *                  provide enough space. The required amount can be
00038  *                  obtained with the macro PGLZ_RAW_SIZE(source).
00039  *
00040  *                  The data is written to buff exactly as it was handed
00041  *                  to pglz_compress(). No terminating zero byte is added.
00042  *
00043  *      The decompression algorithm and internal data format:
00044  *
00045  *          PGLZ_Header is defined as
00046  *
00047  *              typedef struct PGLZ_Header {
00048  *                  int32       vl_len_;
00049  *                  int32       rawsize;
00050  *              }
00051  *
00052  *          The header is followed by the compressed data itself.
00053  *
00054  *          The data representation is easiest explained by describing
00055  *          the process of decompression.
00056  *
00057  *          If VARSIZE(x) == rawsize + sizeof(PGLZ_Header), then the data
00058  *          is stored uncompressed as plain bytes. Thus, the decompressor
00059  *          simply copies rawsize bytes from the location after the
00060  *          header to the destination.
00061  *
00062  *          Otherwise the first byte after the header tells what to do
00063  *          the next 8 times. We call this the control byte.
00064  *
00065  *          An unset bit in the control byte means, that one uncompressed
00066  *          byte follows, which is copied from input to output.
00067  *
00068  *          A set bit in the control byte means, that a tag of 2-3 bytes
00069  *          follows. A tag contains information to copy some bytes, that
00070  *          are already in the output buffer, to the current location in
00071  *          the output. Let's call the three tag bytes T1, T2 and T3. The
00072  *          position of the data to copy is coded as an offset from the
00073  *          actual output position.
00074  *
00075  *          The offset is in the upper nibble of T1 and in T2.
00076  *          The length is in the lower nibble of T1.
00077  *
00078  *          So the 16 bits of a 2 byte tag are coded as
00079  *
00080  *              7---T1--0  7---T2--0
00081  *              OOOO LLLL  OOOO OOOO
00082  *
00083  *          This limits the offset to 1-4095 (12 bits) and the length
00084  *          to 3-18 (4 bits) because 3 is always added to it. To emit
00085  *          a tag of 2 bytes with a length of 2 only saves one control
00086  *          bit. But we lose one byte in the possible length of a tag.
00087  *
00088  *          In the actual implementation, the 2 byte tag's length is
00089  *          limited to 3-17, because the value 0xF in the length nibble
00090  *          has special meaning. It means, that the next following
00091  *          byte (T3) has to be added to the length value of 18. That
00092  *          makes total limits of 1-4095 for offset and 3-273 for length.
00093  *
00094  *          Now that we have successfully decoded a tag. We simply copy
00095  *          the output that occurred <offset> bytes back to the current
00096  *          output location in the specified <length>. Thus, a
00097  *          sequence of 200 spaces (think about bpchar fields) could be
00098  *          coded in 4 bytes. One literal space and a three byte tag to
00099  *          copy 199 bytes with a -1 offset. Whow - that's a compression
00100  *          rate of 98%! Well, the implementation needs to save the
00101  *          original data size too, so we need another 4 bytes for it
00102  *          and end up with a total compression rate of 96%, what's still
00103  *          worth a Whow.
00104  *
00105  *      The compression algorithm
00106  *
00107  *          The following uses numbers used in the default strategy.
00108  *
00109  *          The compressor works best for attributes of a size between
00110  *          1K and 1M. For smaller items there's not that much chance of
00111  *          redundancy in the character sequence (except for large areas
00112  *          of identical bytes like trailing spaces) and for bigger ones
00113  *          our 4K maximum look-back distance is too small.
00114  *
00115  *          The compressor creates a table for 8192 lists of positions.
00116  *          For each input position (except the last 3), a hash key is
00117  *          built from the 4 next input bytes and the position remembered
00118  *          in the appropriate list. Thus, the table points to linked
00119  *          lists of likely to be at least in the first 4 characters
00120  *          matching strings. This is done on the fly while the input
00121  *          is compressed into the output area.  Table entries are only
00122  *          kept for the last 4096 input positions, since we cannot use
00123  *          back-pointers larger than that anyway.
00124  *
00125  *          For each byte in the input, it's hash key (built from this
00126  *          byte and the next 3) is used to find the appropriate list
00127  *          in the table. The lists remember the positions of all bytes
00128  *          that had the same hash key in the past in increasing backward
00129  *          offset order. Now for all entries in the used lists, the
00130  *          match length is computed by comparing the characters from the
00131  *          entries position with the characters from the actual input
00132  *          position.
00133  *
00134  *          The compressor starts with a so called "good_match" of 128.
00135  *          It is a "prefer speed against compression ratio" optimizer.
00136  *          So if the first entry looked at already has 128 or more
00137  *          matching characters, the lookup stops and that position is
00138  *          used for the next tag in the output.
00139  *
00140  *          For each subsequent entry in the history list, the "good_match"
00141  *          is lowered by 10%. So the compressor will be more happy with
00142  *          short matches the farer it has to go back in the history.
00143  *          Another "speed against ratio" preference characteristic of
00144  *          the algorithm.
00145  *
00146  *          Thus there are 3 stop conditions for the lookup of matches:
00147  *
00148  *              - a match >= good_match is found
00149  *              - there are no more history entries to look at
00150  *              - the next history entry is already too far back
00151  *                to be coded into a tag.
00152  *
00153  *          Finally the match algorithm checks that at least a match
00154  *          of 3 or more bytes has been found, because thats the smallest
00155  *          amount of copy information to code into a tag. If so, a tag
00156  *          is omitted and all the input bytes covered by that are just
00157  *          scanned for the history add's, otherwise a literal character
00158  *          is omitted and only his history entry added.
00159  *
00160  *      Acknowledgements:
00161  *
00162  *          Many thanks to Adisak Pochanayon, who's article about SLZ
00163  *          inspired me to write the PostgreSQL compression this way.
00164  *
00165  *          Jan Wieck
00166  *
00167  * Copyright (c) 1999-2013, PostgreSQL Global Development Group
00168  *
00169  * src/backend/utils/adt/pg_lzcompress.c
00170  * ----------
00171  */
00172 #include "postgres.h"
00173 
00174 #include <limits.h>
00175 
00176 #include "utils/pg_lzcompress.h"
00177 
00178 
00179 /* ----------
00180  * Local definitions
00181  * ----------
00182  */
00183 #define PGLZ_HISTORY_LISTS      8192    /* must be power of 2 */
00184 #define PGLZ_HISTORY_MASK       (PGLZ_HISTORY_LISTS - 1)
00185 #define PGLZ_HISTORY_SIZE       4096
00186 #define PGLZ_MAX_MATCH          273
00187 
00188 
00189 /* ----------
00190  * PGLZ_HistEntry -
00191  *
00192  *      Linked list for the backward history lookup
00193  *
00194  * All the entries sharing a hash key are linked in a doubly linked list.
00195  * This makes it easy to remove an entry when it's time to recycle it
00196  * (because it's more than 4K positions old).
00197  * ----------
00198  */
00199 typedef struct PGLZ_HistEntry
00200 {
00201     struct PGLZ_HistEntry *next;    /* links for my hash key's list */
00202     struct PGLZ_HistEntry *prev;
00203     int         hindex;         /* my current hash key */
00204     const char *pos;            /* my input position */
00205 } PGLZ_HistEntry;
00206 
00207 
00208 /* ----------
00209  * The provided standard strategies
00210  * ----------
00211  */
00212 static const PGLZ_Strategy strategy_default_data = {
00213     32,                         /* Data chunks less than 32 bytes are not
00214                                  * compressed */
00215     INT_MAX,                    /* No upper limit on what we'll try to
00216                                  * compress */
00217     25,                         /* Require 25% compression rate, or not worth
00218                                  * it */
00219     1024,                       /* Give up if no compression in the first 1KB */
00220     128,                        /* Stop history lookup if a match of 128 bytes
00221                                  * is found */
00222     10                          /* Lower good match size by 10% at every loop
00223                                  * iteration */
00224 };
00225 const PGLZ_Strategy *const PGLZ_strategy_default = &strategy_default_data;
00226 
00227 
00228 static const PGLZ_Strategy strategy_always_data = {
00229     0,                          /* Chunks of any size are compressed */
00230     INT_MAX,
00231     0,                          /* It's enough to save one single byte */
00232     INT_MAX,                    /* Never give up early */
00233     128,                        /* Stop history lookup if a match of 128 bytes
00234                                  * is found */
00235     6                           /* Look harder for a good match */
00236 };
00237 const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data;
00238 
00239 
00240 /* ----------
00241  * Statically allocated work arrays for history
00242  * ----------
00243  */
00244 static PGLZ_HistEntry *hist_start[PGLZ_HISTORY_LISTS];
00245 static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE];
00246 
00247 
00248 /* ----------
00249  * pglz_hist_idx -
00250  *
00251  *      Computes the history table slot for the lookup by the next 4
00252  *      characters in the input.
00253  *
00254  * NB: because we use the next 4 characters, we are not guaranteed to
00255  * find 3-character matches; they very possibly will be in the wrong
00256  * hash list.  This seems an acceptable tradeoff for spreading out the
00257  * hash keys more.
00258  * ----------
00259  */
00260 #define pglz_hist_idx(_s,_e) (                                              \
00261             ((((_e) - (_s)) < 4) ? (int) (_s)[0] :                          \
00262              (((_s)[0] << 9) ^ ((_s)[1] << 6) ^                             \
00263               ((_s)[2] << 3) ^ (_s)[3])) & (PGLZ_HISTORY_MASK)              \
00264         )
00265 
00266 
00267 /* ----------
00268  * pglz_hist_add -
00269  *
00270  *      Adds a new entry to the history table.
00271  *
00272  * If _recycle is true, then we are recycling a previously used entry,
00273  * and must first delink it from its old hashcode's linked list.
00274  *
00275  * NOTE: beware of multiple evaluations of macro's arguments, and note that
00276  * _hn and _recycle are modified in the macro.
00277  * ----------
00278  */
00279 #define pglz_hist_add(_hs,_he,_hn,_recycle,_s,_e) \
00280 do {                                    \
00281             int __hindex = pglz_hist_idx((_s),(_e));                        \
00282             PGLZ_HistEntry **__myhsp = &(_hs)[__hindex];                    \
00283             PGLZ_HistEntry *__myhe = &(_he)[_hn];                           \
00284             if (_recycle) {                                                 \
00285                 if (__myhe->prev == NULL)                                   \
00286                     (_hs)[__myhe->hindex] = __myhe->next;                   \
00287                 else                                                        \
00288                     __myhe->prev->next = __myhe->next;                      \
00289                 if (__myhe->next != NULL)                                   \
00290                     __myhe->next->prev = __myhe->prev;                      \
00291             }                                                               \
00292             __myhe->next = *__myhsp;                                        \
00293             __myhe->prev = NULL;                                            \
00294             __myhe->hindex = __hindex;                                      \
00295             __myhe->pos  = (_s);                                            \
00296             if (*__myhsp != NULL)                                           \
00297                 (*__myhsp)->prev = __myhe;                                  \
00298             *__myhsp = __myhe;                                              \
00299             if (++(_hn) >= PGLZ_HISTORY_SIZE) {                             \
00300                 (_hn) = 0;                                                  \
00301                 (_recycle) = true;                                          \
00302             }                                                               \
00303 } while (0)
00304 
00305 
00306 /* ----------
00307  * pglz_out_ctrl -
00308  *
00309  *      Outputs the last and allocates a new control byte if needed.
00310  * ----------
00311  */
00312 #define pglz_out_ctrl(__ctrlp,__ctrlb,__ctrl,__buf) \
00313 do { \
00314     if ((__ctrl & 0xff) == 0)                                               \
00315     {                                                                       \
00316         *(__ctrlp) = __ctrlb;                                               \
00317         __ctrlp = (__buf)++;                                                \
00318         __ctrlb = 0;                                                        \
00319         __ctrl = 1;                                                         \
00320     }                                                                       \
00321 } while (0)
00322 
00323 
00324 /* ----------
00325  * pglz_out_literal -
00326  *
00327  *      Outputs a literal byte to the destination buffer including the
00328  *      appropriate control bit.
00329  * ----------
00330  */
00331 #define pglz_out_literal(_ctrlp,_ctrlb,_ctrl,_buf,_byte) \
00332 do { \
00333     pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf);                                \
00334     *(_buf)++ = (unsigned char)(_byte);                                     \
00335     _ctrl <<= 1;                                                            \
00336 } while (0)
00337 
00338 
00339 /* ----------
00340  * pglz_out_tag -
00341  *
00342  *      Outputs a backward reference tag of 2-4 bytes (depending on
00343  *      offset and length) to the destination buffer including the
00344  *      appropriate control bit.
00345  * ----------
00346  */
00347 #define pglz_out_tag(_ctrlp,_ctrlb,_ctrl,_buf,_len,_off) \
00348 do { \
00349     pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf);                                \
00350     _ctrlb |= _ctrl;                                                        \
00351     _ctrl <<= 1;                                                            \
00352     if (_len > 17)                                                          \
00353     {                                                                       \
00354         (_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | 0x0f);        \
00355         (_buf)[1] = (unsigned char)(((_off) & 0xff));                       \
00356         (_buf)[2] = (unsigned char)((_len) - 18);                           \
00357         (_buf) += 3;                                                        \
00358     } else {                                                                \
00359         (_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | ((_len) - 3)); \
00360         (_buf)[1] = (unsigned char)((_off) & 0xff);                         \
00361         (_buf) += 2;                                                        \
00362     }                                                                       \
00363 } while (0)
00364 
00365 
00366 /* ----------
00367  * pglz_find_match -
00368  *
00369  *      Lookup the history table if the actual input stream matches
00370  *      another sequence of characters, starting somewhere earlier
00371  *      in the input buffer.
00372  * ----------
00373  */
00374 static inline int
00375 pglz_find_match(PGLZ_HistEntry **hstart, const char *input, const char *end,
00376                 int *lenp, int *offp, int good_match, int good_drop)
00377 {
00378     PGLZ_HistEntry *hent;
00379     int32       len = 0;
00380     int32       off = 0;
00381 
00382     /*
00383      * Traverse the linked history list until a good enough match is found.
00384      */
00385     hent = hstart[pglz_hist_idx(input, end)];
00386     while (hent)
00387     {
00388         const char *ip = input;
00389         const char *hp = hent->pos;
00390         int32       thisoff;
00391         int32       thislen;
00392 
00393         /*
00394          * Stop if the offset does not fit into our tag anymore.
00395          */
00396         thisoff = ip - hp;
00397         if (thisoff >= 0x0fff)
00398             break;
00399 
00400         /*
00401          * Determine length of match. A better match must be larger than the
00402          * best so far. And if we already have a match of 16 or more bytes,
00403          * it's worth the call overhead to use memcmp() to check if this match
00404          * is equal for the same size. After that we must fallback to
00405          * character by character comparison to know the exact position where
00406          * the diff occurred.
00407          */
00408         thislen = 0;
00409         if (len >= 16)
00410         {
00411             if (memcmp(ip, hp, len) == 0)
00412             {
00413                 thislen = len;
00414                 ip += len;
00415                 hp += len;
00416                 while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH)
00417                 {
00418                     thislen++;
00419                     ip++;
00420                     hp++;
00421                 }
00422             }
00423         }
00424         else
00425         {
00426             while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH)
00427             {
00428                 thislen++;
00429                 ip++;
00430                 hp++;
00431             }
00432         }
00433 
00434         /*
00435          * Remember this match as the best (if it is)
00436          */
00437         if (thislen > len)
00438         {
00439             len = thislen;
00440             off = thisoff;
00441         }
00442 
00443         /*
00444          * Advance to the next history entry
00445          */
00446         hent = hent->next;
00447 
00448         /*
00449          * Be happy with lesser good matches the more entries we visited. But
00450          * no point in doing calculation if we're at end of list.
00451          */
00452         if (hent)
00453         {
00454             if (len >= good_match)
00455                 break;
00456             good_match -= (good_match * good_drop) / 100;
00457         }
00458     }
00459 
00460     /*
00461      * Return match information only if it results at least in one byte
00462      * reduction.
00463      */
00464     if (len > 2)
00465     {
00466         *lenp = len;
00467         *offp = off;
00468         return 1;
00469     }
00470 
00471     return 0;
00472 }
00473 
00474 
00475 /* ----------
00476  * pglz_compress -
00477  *
00478  *      Compresses source into dest using strategy.
00479  * ----------
00480  */
00481 bool
00482 pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
00483               const PGLZ_Strategy *strategy)
00484 {
00485     unsigned char *bp = ((unsigned char *) dest) + sizeof(PGLZ_Header);
00486     unsigned char *bstart = bp;
00487     int         hist_next = 0;
00488     bool        hist_recycle = false;
00489     const char *dp = source;
00490     const char *dend = source + slen;
00491     unsigned char ctrl_dummy = 0;
00492     unsigned char *ctrlp = &ctrl_dummy;
00493     unsigned char ctrlb = 0;
00494     unsigned char ctrl = 0;
00495     bool        found_match = false;
00496     int32       match_len;
00497     int32       match_off;
00498     int32       good_match;
00499     int32       good_drop;
00500     int32       result_size;
00501     int32       result_max;
00502     int32       need_rate;
00503 
00504     /*
00505      * Our fallback strategy is the default.
00506      */
00507     if (strategy == NULL)
00508         strategy = PGLZ_strategy_default;
00509 
00510     /*
00511      * If the strategy forbids compression (at all or if source chunk size out
00512      * of range), fail.
00513      */
00514     if (strategy->match_size_good <= 0 ||
00515         slen < strategy->min_input_size ||
00516         slen > strategy->max_input_size)
00517         return false;
00518 
00519     /*
00520      * Save the original source size in the header.
00521      */
00522     dest->rawsize = slen;
00523 
00524     /*
00525      * Limit the match parameters to the supported range.
00526      */
00527     good_match = strategy->match_size_good;
00528     if (good_match > PGLZ_MAX_MATCH)
00529         good_match = PGLZ_MAX_MATCH;
00530     else if (good_match < 17)
00531         good_match = 17;
00532 
00533     good_drop = strategy->match_size_drop;
00534     if (good_drop < 0)
00535         good_drop = 0;
00536     else if (good_drop > 100)
00537         good_drop = 100;
00538 
00539     need_rate = strategy->min_comp_rate;
00540     if (need_rate < 0)
00541         need_rate = 0;
00542     else if (need_rate > 99)
00543         need_rate = 99;
00544 
00545     /*
00546      * Compute the maximum result size allowed by the strategy, namely the
00547      * input size minus the minimum wanted compression rate.  This had better
00548      * be <= slen, else we might overrun the provided output buffer.
00549      */
00550     if (slen > (INT_MAX / 100))
00551     {
00552         /* Approximate to avoid overflow */
00553         result_max = (slen / 100) * (100 - need_rate);
00554     }
00555     else
00556         result_max = (slen * (100 - need_rate)) / 100;
00557 
00558     /*
00559      * Initialize the history lists to empty.  We do not need to zero the
00560      * hist_entries[] array; its entries are initialized as they are used.
00561      */
00562     memset(hist_start, 0, sizeof(hist_start));
00563 
00564     /*
00565      * Compress the source directly into the output buffer.
00566      */
00567     while (dp < dend)
00568     {
00569         /*
00570          * If we already exceeded the maximum result size, fail.
00571          *
00572          * We check once per loop; since the loop body could emit as many as 4
00573          * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better
00574          * allow 4 slop bytes.
00575          */
00576         if (bp - bstart >= result_max)
00577             return false;
00578 
00579         /*
00580          * If we've emitted more than first_success_by bytes without finding
00581          * anything compressible at all, fail.  This lets us fall out
00582          * reasonably quickly when looking at incompressible input (such as
00583          * pre-compressed data).
00584          */
00585         if (!found_match && bp - bstart >= strategy->first_success_by)
00586             return false;
00587 
00588         /*
00589          * Try to find a match in the history
00590          */
00591         if (pglz_find_match(hist_start, dp, dend, &match_len,
00592                             &match_off, good_match, good_drop))
00593         {
00594             /*
00595              * Create the tag and add history entries for all matched
00596              * characters.
00597              */
00598             pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off);
00599             while (match_len--)
00600             {
00601                 pglz_hist_add(hist_start, hist_entries,
00602                               hist_next, hist_recycle,
00603                               dp, dend);
00604                 dp++;           /* Do not do this ++ in the line above! */
00605                 /* The macro would do it four times - Jan.  */
00606             }
00607             found_match = true;
00608         }
00609         else
00610         {
00611             /*
00612              * No match found. Copy one literal byte.
00613              */
00614             pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp);
00615             pglz_hist_add(hist_start, hist_entries,
00616                           hist_next, hist_recycle,
00617                           dp, dend);
00618             dp++;               /* Do not do this ++ in the line above! */
00619             /* The macro would do it four times - Jan.  */
00620         }
00621     }
00622 
00623     /*
00624      * Write out the last control byte and check that we haven't overrun the
00625      * output size allowed by the strategy.
00626      */
00627     *ctrlp = ctrlb;
00628     result_size = bp - bstart;
00629     if (result_size >= result_max)
00630         return false;
00631 
00632     /*
00633      * Success - need only fill in the actual length of the compressed datum.
00634      */
00635     SET_VARSIZE_COMPRESSED(dest, result_size + sizeof(PGLZ_Header));
00636 
00637     return true;
00638 }
00639 
00640 
00641 /* ----------
00642  * pglz_decompress -
00643  *
00644  *      Decompresses source into dest.
00645  * ----------
00646  */
00647 void
00648 pglz_decompress(const PGLZ_Header *source, char *dest)
00649 {
00650     const unsigned char *sp;
00651     const unsigned char *srcend;
00652     unsigned char *dp;
00653     unsigned char *destend;
00654 
00655     sp = ((const unsigned char *) source) + sizeof(PGLZ_Header);
00656     srcend = ((const unsigned char *) source) + VARSIZE(source);
00657     dp = (unsigned char *) dest;
00658     destend = dp + source->rawsize;
00659 
00660     while (sp < srcend && dp < destend)
00661     {
00662         /*
00663          * Read one control byte and process the next 8 items (or as many as
00664          * remain in the compressed input).
00665          */
00666         unsigned char ctrl = *sp++;
00667         int         ctrlc;
00668 
00669         for (ctrlc = 0; ctrlc < 8 && sp < srcend; ctrlc++)
00670         {
00671             if (ctrl & 1)
00672             {
00673                 /*
00674                  * Otherwise it contains the match length minus 3 and the
00675                  * upper 4 bits of the offset. The next following byte
00676                  * contains the lower 8 bits of the offset. If the length is
00677                  * coded as 18, another extension tag byte tells how much
00678                  * longer the match really was (0-255).
00679                  */
00680                 int32       len;
00681                 int32       off;
00682 
00683                 len = (sp[0] & 0x0f) + 3;
00684                 off = ((sp[0] & 0xf0) << 4) | sp[1];
00685                 sp += 2;
00686                 if (len == 18)
00687                     len += *sp++;
00688 
00689                 /*
00690                  * Check for output buffer overrun, to ensure we don't clobber
00691                  * memory in case of corrupt input.  Note: we must advance dp
00692                  * here to ensure the error is detected below the loop.  We
00693                  * don't simply put the elog inside the loop since that will
00694                  * probably interfere with optimization.
00695                  */
00696                 if (dp + len > destend)
00697                 {
00698                     dp += len;
00699                     break;
00700                 }
00701 
00702                 /*
00703                  * Now we copy the bytes specified by the tag from OUTPUT to
00704                  * OUTPUT. It is dangerous and platform dependent to use
00705                  * memcpy() here, because the copied areas could overlap
00706                  * extremely!
00707                  */
00708                 while (len--)
00709                 {
00710                     *dp = dp[-off];
00711                     dp++;
00712                 }
00713             }
00714             else
00715             {
00716                 /*
00717                  * An unset control bit means LITERAL BYTE. So we just copy
00718                  * one from INPUT to OUTPUT.
00719                  */
00720                 if (dp >= destend)      /* check for buffer overrun */
00721                     break;      /* do not clobber memory */
00722 
00723                 *dp++ = *sp++;
00724             }
00725 
00726             /*
00727              * Advance the control bit
00728              */
00729             ctrl >>= 1;
00730         }
00731     }
00732 
00733     /*
00734      * Check we decompressed the right amount.
00735      */
00736     if (dp != destend || sp != srcend)
00737         elog(ERROR, "compressed data is corrupt");
00738 
00739     /*
00740      * That's it.
00741      */
00742 }