00001 /*- 00002 * See the file LICENSE for redistribution information. 00003 * 00004 * Copyright (c) 1996-2005 00005 * Sleepycat Software. All rights reserved. 00006 */ 00007 /* 00008 * Copyright (c) 1990, 1993 00009 * Margo Seltzer. All rights reserved. 00010 */ 00011 /* 00012 * Copyright (c) 1990, 1993 00013 * The Regents of the University of California. All rights reserved. 00014 * 00015 * This code is derived from software contributed to Berkeley by 00016 * Margo Seltzer. 00017 * 00018 * Redistribution and use in source and binary forms, with or without 00019 * modification, are permitted provided that the following conditions 00020 * are met: 00021 * 1. Redistributions of source code must retain the above copyright 00022 * notice, this list of conditions and the following disclaimer. 00023 * 2. Redistributions in binary form must reproduce the above copyright 00024 * notice, this list of conditions and the following disclaimer in the 00025 * documentation and/or other materials provided with the distribution. 00026 * 3. Neither the name of the University nor the names of its contributors 00027 * may be used to endorse or promote products derived from this software 00028 * without specific prior written permission. 00029 * 00030 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 00031 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00032 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00033 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 00034 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00035 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00036 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00037 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00038 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00039 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00040 * SUCH DAMAGE. 00041 * 00042 * $Id: hash_func.c,v 12.1 2005/06/16 20:22:52 bostic Exp $ 00043 */ 00044 00045 #include "db_config.h" 00046 00047 #ifndef NO_SYSTEM_INCLUDES 00048 #include <sys/types.h> 00049 #endif 00050 00051 #include "db_int.h" 00052 #include "dbinc/db_page.h" 00053 #include "dbinc/hash.h" 00054 00055 /* 00056 * __ham_func2 -- 00057 * Phong Vo's linear congruential hash. 00058 * 00059 * PUBLIC: u_int32_t __ham_func2 __P((DB *, const void *, u_int32_t)); 00060 */ 00061 #define DCHARHASH(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c)) 00062 00063 u_int32_t 00064 __ham_func2(dbp, key, len) 00065 DB *dbp; 00066 const void *key; 00067 u_int32_t len; 00068 { 00069 const u_int8_t *e, *k; 00070 u_int32_t h; 00071 u_int8_t c; 00072 00073 if (dbp != NULL) 00074 COMPQUIET(dbp, NULL); 00075 00076 k = key; 00077 e = k + len; 00078 for (h = 0; k != e;) { 00079 c = *k++; 00080 if (!c && k > e) 00081 break; 00082 DCHARHASH(h, c); 00083 } 00084 return (h); 00085 } 00086 00087 /* 00088 * __ham_func3 -- 00089 * Ozan Yigit's original sdbm hash. 00090 * 00091 * Ugly, but fast. Break the string up into 8 byte units. On the first time 00092 * through the loop get the "leftover bytes" (strlen % 8). On every other 00093 * iteration, perform 8 HASHC's so we handle all 8 bytes. Essentially, this 00094 * saves us 7 cmp & branch instructions. 00095 * 00096 * PUBLIC: u_int32_t __ham_func3 __P((DB *, const void *, u_int32_t)); 00097 */ 00098 u_int32_t 00099 __ham_func3(dbp, key, len) 00100 DB *dbp; 00101 const void *key; 00102 u_int32_t len; 00103 { 00104 const u_int8_t *k; 00105 u_int32_t n, loop; 00106 00107 if (dbp != NULL) 00108 COMPQUIET(dbp, NULL); 00109 00110 if (len == 0) 00111 return (0); 00112 00113 #define HASHC n = *k++ + 65599 * n 00114 n = 0; 00115 k = key; 00116 00117 loop = (len + 8 - 1) >> 3; 00118 switch (len & (8 - 1)) { 00119 case 0: 00120 do { 00121 HASHC; 00122 case 7: 00123 HASHC; 00124 case 6: 00125 HASHC; 00126 case 5: 00127 HASHC; 00128 case 4: 00129 HASHC; 00130 case 3: 00131 HASHC; 00132 case 2: 00133 HASHC; 00134 case 1: 00135 HASHC; 00136 } while (--loop); 00137 } 00138 return (n); 00139 } 00140 00141 /* 00142 * __ham_func4 -- 00143 * Chris Torek's hash function. Although this function performs only 00144 * slightly worse than __ham_func5 on strings, it performs horribly on 00145 * numbers. 00146 * 00147 * PUBLIC: u_int32_t __ham_func4 __P((DB *, const void *, u_int32_t)); 00148 */ 00149 u_int32_t 00150 __ham_func4(dbp, key, len) 00151 DB *dbp; 00152 const void *key; 00153 u_int32_t len; 00154 { 00155 const u_int8_t *k; 00156 u_int32_t h, loop; 00157 00158 if (dbp != NULL) 00159 COMPQUIET(dbp, NULL); 00160 00161 if (len == 0) 00162 return (0); 00163 00164 #define HASH4a h = (h << 5) - h + *k++; 00165 #define HASH4b h = (h << 5) + h + *k++; 00166 #define HASH4 HASH4b 00167 h = 0; 00168 k = key; 00169 00170 loop = (len + 8 - 1) >> 3; 00171 switch (len & (8 - 1)) { 00172 case 0: 00173 do { 00174 HASH4; 00175 case 7: 00176 HASH4; 00177 case 6: 00178 HASH4; 00179 case 5: 00180 HASH4; 00181 case 4: 00182 HASH4; 00183 case 3: 00184 HASH4; 00185 case 2: 00186 HASH4; 00187 case 1: 00188 HASH4; 00189 } while (--loop); 00190 } 00191 return (h); 00192 } 00193 00194 /* 00195 * Fowler/Noll/Vo hash 00196 * 00197 * The basis of the hash algorithm was taken from an idea sent by email to the 00198 * IEEE Posix P1003.2 mailing list from Phong Vo ([email protected]) and 00199 * Glenn Fowler ([email protected]). Landon Curt Noll ([email protected]) 00200 * later improved on their algorithm. 00201 * 00202 * The magic is in the interesting relationship between the special prime 00203 * 16777619 (2^24 + 403) and 2^32 and 2^8. 00204 * 00205 * This hash produces the fewest collisions of any function that we've seen so 00206 * far, and works well on both numbers and strings. 00207 * 00208 * PUBLIC: u_int32_t __ham_func5 __P((DB *, const void *, u_int32_t)); 00209 */ 00210 u_int32_t 00211 __ham_func5(dbp, key, len) 00212 DB *dbp; 00213 const void *key; 00214 u_int32_t len; 00215 { 00216 const u_int8_t *k, *e; 00217 u_int32_t h; 00218 00219 if (dbp != NULL) 00220 COMPQUIET(dbp, NULL); 00221 00222 k = key; 00223 e = k + len; 00224 for (h = 0; k < e; ++k) { 00225 h *= 16777619; 00226 h ^= *k; 00227 } 00228 return (h); 00229 } 00230 00231 /* 00232 * __ham_test -- 00233 * 00234 * PUBLIC: u_int32_t __ham_test __P((DB *, const void *, u_int32_t)); 00235 */ 00236 u_int32_t 00237 __ham_test(dbp, key, len) 00238 DB *dbp; 00239 const void *key; 00240 u_int32_t len; 00241 { 00242 COMPQUIET(dbp, NULL); 00243 COMPQUIET(len, 0); 00244 return ((u_int32_t)*(char *)key); 00245 }