Header And Logo

PostgreSQL
| The world's most advanced open source database.

hashutil.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * hashutil.c
00004  *    Utility code for Postgres hash implementation.
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  * Portions Copyright (c) 1994, Regents of the University of California
00008  *
00009  *
00010  * IDENTIFICATION
00011  *    src/backend/access/hash/hashutil.c
00012  *
00013  *-------------------------------------------------------------------------
00014  */
00015 #include "postgres.h"
00016 
00017 #include "access/hash.h"
00018 #include "access/reloptions.h"
00019 #include "access/relscan.h"
00020 #include "utils/lsyscache.h"
00021 #include "utils/rel.h"
00022 
00023 
00024 /*
00025  * _hash_checkqual -- does the index tuple satisfy the scan conditions?
00026  */
00027 bool
00028 _hash_checkqual(IndexScanDesc scan, IndexTuple itup)
00029 {
00030     /*
00031      * Currently, we can't check any of the scan conditions since we do not
00032      * have the original index entry value to supply to the sk_func. Always
00033      * return true; we expect that hashgettuple already set the recheck flag
00034      * to make the main indexscan code do it.
00035      */
00036 #ifdef NOT_USED
00037     TupleDesc   tupdesc = RelationGetDescr(scan->indexRelation);
00038     ScanKey     key = scan->keyData;
00039     int         scanKeySize = scan->numberOfKeys;
00040 
00041     while (scanKeySize > 0)
00042     {
00043         Datum       datum;
00044         bool        isNull;
00045         Datum       test;
00046 
00047         datum = index_getattr(itup,
00048                               key->sk_attno,
00049                               tupdesc,
00050                               &isNull);
00051 
00052         /* assume sk_func is strict */
00053         if (isNull)
00054             return false;
00055         if (key->sk_flags & SK_ISNULL)
00056             return false;
00057 
00058         test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
00059                                  datum, key->sk_argument);
00060 
00061         if (!DatumGetBool(test))
00062             return false;
00063 
00064         key++;
00065         scanKeySize--;
00066     }
00067 #endif
00068 
00069     return true;
00070 }
00071 
00072 /*
00073  * _hash_datum2hashkey -- given a Datum, call the index's hash procedure
00074  *
00075  * The Datum is assumed to be of the index's column type, so we can use the
00076  * "primary" hash procedure that's tracked for us by the generic index code.
00077  */
00078 uint32
00079 _hash_datum2hashkey(Relation rel, Datum key)
00080 {
00081     FmgrInfo   *procinfo;
00082     Oid         collation;
00083 
00084     /* XXX assumes index has only one attribute */
00085     procinfo = index_getprocinfo(rel, 1, HASHPROC);
00086     collation = rel->rd_indcollation[0];
00087 
00088     return DatumGetUInt32(FunctionCall1Coll(procinfo, collation, key));
00089 }
00090 
00091 /*
00092  * _hash_datum2hashkey_type -- given a Datum of a specified type,
00093  *          hash it in a fashion compatible with this index
00094  *
00095  * This is much more expensive than _hash_datum2hashkey, so use it only in
00096  * cross-type situations.
00097  */
00098 uint32
00099 _hash_datum2hashkey_type(Relation rel, Datum key, Oid keytype)
00100 {
00101     RegProcedure hash_proc;
00102     Oid         collation;
00103 
00104     /* XXX assumes index has only one attribute */
00105     hash_proc = get_opfamily_proc(rel->rd_opfamily[0],
00106                                   keytype,
00107                                   keytype,
00108                                   HASHPROC);
00109     if (!RegProcedureIsValid(hash_proc))
00110         elog(ERROR, "missing support function %d(%u,%u) for index \"%s\"",
00111              HASHPROC, keytype, keytype,
00112              RelationGetRelationName(rel));
00113     collation = rel->rd_indcollation[0];
00114 
00115     return DatumGetUInt32(OidFunctionCall1Coll(hash_proc, collation, key));
00116 }
00117 
00118 /*
00119  * _hash_hashkey2bucket -- determine which bucket the hashkey maps to.
00120  */
00121 Bucket
00122 _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket,
00123                      uint32 highmask, uint32 lowmask)
00124 {
00125     Bucket      bucket;
00126 
00127     bucket = hashkey & highmask;
00128     if (bucket > maxbucket)
00129         bucket = bucket & lowmask;
00130 
00131     return bucket;
00132 }
00133 
00134 /*
00135  * _hash_log2 -- returns ceil(lg2(num))
00136  */
00137 uint32
00138 _hash_log2(uint32 num)
00139 {
00140     uint32      i,
00141                 limit;
00142 
00143     limit = 1;
00144     for (i = 0; limit < num; limit <<= 1, i++)
00145         ;
00146     return i;
00147 }
00148 
00149 /*
00150  * _hash_checkpage -- sanity checks on the format of all hash pages
00151  *
00152  * If flags is not zero, it is a bitwise OR of the acceptable values of
00153  * hasho_flag.
00154  */
00155 void
00156 _hash_checkpage(Relation rel, Buffer buf, int flags)
00157 {
00158     Page        page = BufferGetPage(buf);
00159 
00160     /*
00161      * ReadBuffer verifies that every newly-read page passes
00162      * PageHeaderIsValid, which means it either contains a reasonably sane
00163      * page header or is all-zero.  We have to defend against the all-zero
00164      * case, however.
00165      */
00166     if (PageIsNew(page))
00167         ereport(ERROR,
00168                 (errcode(ERRCODE_INDEX_CORRUPTED),
00169              errmsg("index \"%s\" contains unexpected zero page at block %u",
00170                     RelationGetRelationName(rel),
00171                     BufferGetBlockNumber(buf)),
00172                  errhint("Please REINDEX it.")));
00173 
00174     /*
00175      * Additionally check that the special area looks sane.
00176      */
00177     if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HashPageOpaqueData)))
00178         ereport(ERROR,
00179                 (errcode(ERRCODE_INDEX_CORRUPTED),
00180                  errmsg("index \"%s\" contains corrupted page at block %u",
00181                         RelationGetRelationName(rel),
00182                         BufferGetBlockNumber(buf)),
00183                  errhint("Please REINDEX it.")));
00184 
00185     if (flags)
00186     {
00187         HashPageOpaque opaque = (HashPageOpaque) PageGetSpecialPointer(page);
00188 
00189         if ((opaque->hasho_flag & flags) == 0)
00190             ereport(ERROR,
00191                     (errcode(ERRCODE_INDEX_CORRUPTED),
00192                    errmsg("index \"%s\" contains corrupted page at block %u",
00193                           RelationGetRelationName(rel),
00194                           BufferGetBlockNumber(buf)),
00195                      errhint("Please REINDEX it.")));
00196     }
00197 
00198     /*
00199      * When checking the metapage, also verify magic number and version.
00200      */
00201     if (flags == LH_META_PAGE)
00202     {
00203         HashMetaPage metap = HashPageGetMeta(page);
00204 
00205         if (metap->hashm_magic != HASH_MAGIC)
00206             ereport(ERROR,
00207                     (errcode(ERRCODE_INDEX_CORRUPTED),
00208                      errmsg("index \"%s\" is not a hash index",
00209                             RelationGetRelationName(rel))));
00210 
00211         if (metap->hashm_version != HASH_VERSION)
00212             ereport(ERROR,
00213                     (errcode(ERRCODE_INDEX_CORRUPTED),
00214                      errmsg("index \"%s\" has wrong hash version",
00215                             RelationGetRelationName(rel)),
00216                      errhint("Please REINDEX it.")));
00217     }
00218 }
00219 
00220 Datum
00221 hashoptions(PG_FUNCTION_ARGS)
00222 {
00223     Datum       reloptions = PG_GETARG_DATUM(0);
00224     bool        validate = PG_GETARG_BOOL(1);
00225     bytea      *result;
00226 
00227     result = default_reloptions(reloptions, validate, RELOPT_KIND_HASH);
00228 
00229     if (result)
00230         PG_RETURN_BYTEA_P(result);
00231     PG_RETURN_NULL();
00232 }
00233 
00234 /*
00235  * _hash_get_indextuple_hashkey - get the hash index tuple's hash key value
00236  */
00237 uint32
00238 _hash_get_indextuple_hashkey(IndexTuple itup)
00239 {
00240     char       *attp;
00241 
00242     /*
00243      * We assume the hash key is the first attribute and can't be null, so
00244      * this can be done crudely but very very cheaply ...
00245      */
00246     attp = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
00247     return *((uint32 *) attp);
00248 }
00249 
00250 /*
00251  * _hash_form_tuple - form an index tuple containing hash code only
00252  */
00253 IndexTuple
00254 _hash_form_tuple(Relation index, Datum *values, bool *isnull)
00255 {
00256     IndexTuple  itup;
00257     uint32      hashkey;
00258     Datum       hashkeydatum;
00259     TupleDesc   hashdesc;
00260 
00261     if (isnull[0])
00262         hashkeydatum = (Datum) 0;
00263     else
00264     {
00265         hashkey = _hash_datum2hashkey(index, values[0]);
00266         hashkeydatum = UInt32GetDatum(hashkey);
00267     }
00268     hashdesc = RelationGetDescr(index);
00269     Assert(hashdesc->natts == 1);
00270     itup = index_form_tuple(hashdesc, &hashkeydatum, isnull);
00271     return itup;
00272 }
00273 
00274 /*
00275  * _hash_binsearch - Return the offset number in the page where the
00276  *                   specified hash value should be sought or inserted.
00277  *
00278  * We use binary search, relying on the assumption that the existing entries
00279  * are ordered by hash key.
00280  *
00281  * Returns the offset of the first index entry having hashkey >= hash_value,
00282  * or the page's max offset plus one if hash_value is greater than all
00283  * existing hash keys in the page.  This is the appropriate place to start
00284  * a search, or to insert a new item.
00285  */
00286 OffsetNumber
00287 _hash_binsearch(Page page, uint32 hash_value)
00288 {
00289     OffsetNumber upper;
00290     OffsetNumber lower;
00291 
00292     /* Loop invariant: lower <= desired place <= upper */
00293     upper = PageGetMaxOffsetNumber(page) + 1;
00294     lower = FirstOffsetNumber;
00295 
00296     while (upper > lower)
00297     {
00298         OffsetNumber off;
00299         IndexTuple  itup;
00300         uint32      hashkey;
00301 
00302         off = (upper + lower) / 2;
00303         Assert(OffsetNumberIsValid(off));
00304 
00305         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
00306         hashkey = _hash_get_indextuple_hashkey(itup);
00307         if (hashkey < hash_value)
00308             lower = off + 1;
00309         else
00310             upper = off;
00311     }
00312 
00313     return lower;
00314 }
00315 
00316 /*
00317  * _hash_binsearch_last
00318  *
00319  * Same as above, except that if there are multiple matching items in the
00320  * page, we return the offset of the last one instead of the first one,
00321  * and the possible range of outputs is 0..maxoffset not 1..maxoffset+1.
00322  * This is handy for starting a new page in a backwards scan.
00323  */
00324 OffsetNumber
00325 _hash_binsearch_last(Page page, uint32 hash_value)
00326 {
00327     OffsetNumber upper;
00328     OffsetNumber lower;
00329 
00330     /* Loop invariant: lower <= desired place <= upper */
00331     upper = PageGetMaxOffsetNumber(page);
00332     lower = FirstOffsetNumber - 1;
00333 
00334     while (upper > lower)
00335     {
00336         IndexTuple  itup;
00337         OffsetNumber off;
00338         uint32      hashkey;
00339 
00340         off = (upper + lower + 1) / 2;
00341         Assert(OffsetNumberIsValid(off));
00342 
00343         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
00344         hashkey = _hash_get_indextuple_hashkey(itup);
00345         if (hashkey > hash_value)
00346             upper = off - 1;
00347         else
00348             lower = off;
00349     }
00350 
00351     return lower;
00352 }