Planeshift

xdelta3-hash.h

Go to the documentation of this file.
00001 /* xdelta 3 - delta compression tools and library
00002  * Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007.  Joshua P. MacDonald
00003  *
00004  *  This program is free software; you can redistribute it and/or modify
00005  *  it under the terms of the GNU General Public License as published by
00006  *  the Free Software Foundation; either version 2 of the License, or
00007  *  (at your option) any later version.
00008  *
00009  *  This program is distributed in the hope that it will be useful,
00010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *  GNU General Public License for more details.
00013  *
00014  *  You should have received a copy of the GNU General Public License
00015  *  along with this program; if not, write to the Free Software
00016  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00017  */
00018 
00019 #ifndef _XDELTA3_HASH_H_
00020 #define _XDELTA3_HASH_H_
00021 
00022 #if XD3_DEBUG
00023 #define SMALL_HASH_DEBUG1(s,inp)                                  \
00024   usize_t debug_state;                                            \
00025   usize_t debug_hval = xd3_checksum_hash (& (s)->small_hash,      \
00026               xd3_scksum (&debug_state, (inp), (s)->smatcher.small_look))
00027 #define SMALL_HASH_DEBUG2(s,inp)                                  \
00028   XD3_ASSERT (debug_hval == xd3_checksum_hash (& (s)->small_hash, \
00029               xd3_scksum (&debug_state, (inp), (s)->smatcher.small_look)))
00030 #else
00031 #define SMALL_HASH_DEBUG1(s,inp)
00032 #define SMALL_HASH_DEBUG2(s,inp)
00033 #endif /* XD3_DEBUG */
00034 
00035 /* This is a good hash multiplier for 32-bit LCGs: see "linear
00036  * congruential generators of different sizes and good lattice
00037  * structure" */
00038 static const uint32_t hash_multiplier = 1597334677U;
00039 
00040 /***********************************************************************
00041  Permute stuff
00042  ***********************************************************************/
00043 
00044 #if HASH_PERMUTE == 0
00045 #define PERMUTE(x) (x)
00046 #else
00047 #define PERMUTE(x) (__single_hash[(uint32_t)x])
00048 
00049 static const uint16_t __single_hash[256] =
00050 {
00051   /* Random numbers generated using SLIB's pseudo-random number generator.
00052    * This hashes the input alphabet. */
00053   0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46,
00054   0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602,
00055   0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6,
00056   0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a,
00057   0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58,
00058   0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b,
00059   0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94,
00060   0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176,
00061   0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743,
00062   0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6,
00063   0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227,
00064   0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d,
00065   0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a,
00066   0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31,
00067   0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9,
00068   0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6,
00069   0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f,
00070   0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f,
00071   0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49,
00072   0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037,
00073   0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad,
00074   0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f,
00075   0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d,
00076   0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b,
00077   0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624,
00078   0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272,
00079   0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806,
00080   0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050,
00081   0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113,
00082   0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2,
00083   0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25,
00084   0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5
00085 };
00086 #endif
00087 
00088 /* Update the checksum state. */
00089 #if ADLER_LARGE_CKSUM
00090 inline uint32_t
00091 xd3_large_cksum_update (uint32_t cksum,
00092                         const uint8_t *base,
00093                         usize_t look) {
00094   uint32_t old_c = PERMUTE(base[0]);
00095   uint32_t new_c = PERMUTE(base[look]);
00096   uint32_t low   = ((cksum & 0xffff) - old_c + new_c) & 0xffff;
00097   uint32_t high  = ((cksum >> 16) - (old_c * look) + low) & 0xffff;
00098   return (high << 16) | low;
00099 }
00100 #else
00101 // TODO: revisit this topic
00102 #endif
00103 
00104 /* Note: small cksum is hard-coded for 4 bytes */
00105 #if UNALIGNED_OK
00106 static inline uint32_t
00107 xd3_scksum (uint32_t *state,
00108             const uint8_t *base,
00109             const usize_t look)
00110 {
00111   (*state) = *(uint32_t*)base;
00112   return (*state) * hash_multiplier;
00113 }
00114 static inline uint32_t
00115 xd3_small_cksum_update (uint32_t *state,
00116                         const uint8_t *base,
00117                         usize_t look)
00118 {
00119   (*state) = *(uint32_t*)(base+1);
00120   return (*state) * hash_multiplier;
00121 }
00122 #else
00123 static inline uint32_t
00124 xd3_scksum (uint32_t *state,
00125             const uint8_t *base,
00126             const usize_t look)
00127 {
00128   (*state) = (base[0] << 24 |
00129               base[1] << 16 |
00130               base[2] << 8 |
00131               base[3]);
00132   return (*state) * hash_multiplier;
00133 }
00134 static inline uint32_t
00135 xd3_small_cksum_update (uint32_t *state,
00136                         const uint8_t *base,
00137                         const usize_t look)
00138 {
00139   (*state) <<= 8;
00140   (*state) |= base[4];
00141   return (*state) * hash_multiplier;
00142 }
00143 #endif
00144 
00145 /***********************************************************************
00146  Ctable stuff
00147  ***********************************************************************/
00148 
00149 static inline usize_t
00150 xd3_checksum_hash (const xd3_hash_cfg *cfg, const usize_t cksum)
00151 {
00152   return (cksum >> cfg->shift) ^ (cksum & cfg->mask);
00153 }
00154 
00155 /***********************************************************************
00156  Cksum function
00157  ***********************************************************************/
00158 
00159 #if ADLER_LARGE_CKSUM
00160 static inline uint32_t
00161 xd3_lcksum (const uint8_t *seg, const usize_t ln)
00162 {
00163   usize_t i = 0;
00164   uint32_t low  = 0;
00165   uint32_t high = 0;
00166 
00167   for (; i < ln; i += 1)
00168     {
00169       low  += PERMUTE(*seg++);
00170       high += low;
00171     }
00172 
00173   return ((high & 0xffff) << 16) | (low & 0xffff);
00174 }
00175 #else
00176 static inline uint32_t
00177 xd3_lcksum (const uint8_t *seg, const usize_t ln)
00178 {
00179   usize_t i, j;
00180   uint32_t h = 0;
00181   for (i = 0, j = ln - 1; i < ln; ++i, --j) {
00182     h += PERMUTE(seg[i]) * hash_multiplier_powers[j];
00183   }
00184   return h;
00185 }
00186 #endif
00187 
00188 #if XD3_ENCODER
00189 static usize_t
00190 xd3_size_log2 (usize_t slots)
00191 {
00192   int bits = 28; /* This should not be an unreasonable limit. */
00193   int i;
00194 
00195   for (i = 3; i <= bits; i += 1)
00196     {
00197       if (slots < (1U << i))
00198         {
00199           /* TODO: this is compaction=1 in checksum_test.cc and maybe should
00200            * not be fixed at -1. */
00201           bits = i - 1; 
00202           break;
00203         }
00204     }
00205 
00206   return bits;
00207 }
00208 
00209 static void
00210 xd3_size_hashtable (xd3_stream    *stream,
00211                     usize_t        slots,
00212                     xd3_hash_cfg  *cfg)
00213 {
00214   int bits = xd3_size_log2 (slots);
00215 
00216   /* TODO: there's a 32-bit assumption here */
00217   cfg->size  = (1 << bits);
00218   cfg->mask  = (cfg->size - 1);
00219   cfg->shift = 32 - bits;
00220 }
00221 #endif
00222 
00223 #endif