Header And Logo

PostgreSQL
| The world's most advanced open source database.

hash.h

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * hash.h
00004  *    header file for postgres hash access method implementation
00005  *
00006  *
00007  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00008  * Portions Copyright (c) 1994, Regents of the University of California
00009  *
00010  * src/include/access/hash.h
00011  *
00012  * NOTES
00013  *      modeled after Margo Seltzer's hash implementation for unix.
00014  *
00015  *-------------------------------------------------------------------------
00016  */
00017 #ifndef HASH_H
00018 #define HASH_H
00019 
00020 #include "access/genam.h"
00021 #include "access/itup.h"
00022 #include "access/sdir.h"
00023 #include "access/xlog.h"
00024 #include "fmgr.h"
00025 #include "storage/bufmgr.h"
00026 #include "storage/lock.h"
00027 #include "utils/relcache.h"
00028 
00029 /*
00030  * Mapping from hash bucket number to physical block number of bucket's
00031  * starting page.  Beware of multiple evaluations of argument!
00032  */
00033 typedef uint32 Bucket;
00034 
00035 #define BUCKET_TO_BLKNO(metap,B) \
00036         ((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_log2((B)+1)-1] : 0)) + 1)
00037 
00038 /*
00039  * Special space for hash index pages.
00040  *
00041  * hasho_flag tells us which type of page we're looking at.  For
00042  * example, knowing overflow pages from bucket pages is necessary
00043  * information when you're deleting tuples from a page. If all the
00044  * tuples are deleted from an overflow page, the overflow is made
00045  * available to other buckets by calling _hash_freeovflpage(). If all
00046  * the tuples are deleted from a bucket page, no additional action is
00047  * necessary.
00048  */
00049 #define LH_UNUSED_PAGE          (0)
00050 #define LH_OVERFLOW_PAGE        (1 << 0)
00051 #define LH_BUCKET_PAGE          (1 << 1)
00052 #define LH_BITMAP_PAGE          (1 << 2)
00053 #define LH_META_PAGE            (1 << 3)
00054 
00055 typedef struct HashPageOpaqueData
00056 {
00057     BlockNumber hasho_prevblkno;    /* previous ovfl (or bucket) blkno */
00058     BlockNumber hasho_nextblkno;    /* next ovfl blkno */
00059     Bucket      hasho_bucket;   /* bucket number this pg belongs to */
00060     uint16      hasho_flag;     /* page type code, see above */
00061     uint16      hasho_page_id;  /* for identification of hash indexes */
00062 } HashPageOpaqueData;
00063 
00064 typedef HashPageOpaqueData *HashPageOpaque;
00065 
00066 /*
00067  * The page ID is for the convenience of pg_filedump and similar utilities,
00068  * which otherwise would have a hard time telling pages of different index
00069  * types apart.  It should be the last 2 bytes on the page.  This is more or
00070  * less "free" due to alignment considerations.
00071  */
00072 #define HASHO_PAGE_ID       0xFF80
00073 
00074 /*
00075  *  HashScanOpaqueData is private state for a hash index scan.
00076  */
00077 typedef struct HashScanOpaqueData
00078 {
00079     /* Hash value of the scan key, ie, the hash key we seek */
00080     uint32      hashso_sk_hash;
00081 
00082     /*
00083      * By definition, a hash scan should be examining only one bucket. We
00084      * record the bucket number here as soon as it is known.
00085      */
00086     Bucket      hashso_bucket;
00087     bool        hashso_bucket_valid;
00088 
00089     /*
00090      * If we have a share lock on the bucket, we record it here.  When
00091      * hashso_bucket_blkno is zero, we have no such lock.
00092      */
00093     BlockNumber hashso_bucket_blkno;
00094 
00095     /*
00096      * We also want to remember which buffer we're currently examining in the
00097      * scan. We keep the buffer pinned (but not locked) across hashgettuple
00098      * calls, in order to avoid doing a ReadBuffer() for every tuple in the
00099      * index.
00100      */
00101     Buffer      hashso_curbuf;
00102 
00103     /* Current position of the scan, as an index TID */
00104     ItemPointerData hashso_curpos;
00105 
00106     /* Current position of the scan, as a heap TID */
00107     ItemPointerData hashso_heappos;
00108 } HashScanOpaqueData;
00109 
00110 typedef HashScanOpaqueData *HashScanOpaque;
00111 
00112 /*
00113  * Definitions for metapage.
00114  */
00115 
00116 #define HASH_METAPAGE   0       /* metapage is always block 0 */
00117 
00118 #define HASH_MAGIC      0x6440640
00119 #define HASH_VERSION    2       /* 2 signifies only hash key value is stored */
00120 
00121 /*
00122  * Spares[] holds the number of overflow pages currently allocated at or
00123  * before a certain splitpoint. For example, if spares[3] = 7 then there are
00124  * 7 ovflpages before splitpoint 3 (compare BUCKET_TO_BLKNO macro).  The
00125  * value in spares[ovflpoint] increases as overflow pages are added at the
00126  * end of the index.  Once ovflpoint increases (ie, we have actually allocated
00127  * the bucket pages belonging to that splitpoint) the number of spares at the
00128  * prior splitpoint cannot change anymore.
00129  *
00130  * ovflpages that have been recycled for reuse can be found by looking at
00131  * bitmaps that are stored within ovflpages dedicated for the purpose.
00132  * The blknos of these bitmap pages are kept in bitmaps[]; nmaps is the
00133  * number of currently existing bitmaps.
00134  *
00135  * The limitation on the size of spares[] comes from the fact that there's
00136  * no point in having more than 2^32 buckets with only uint32 hashcodes.
00137  * There is no particular upper limit on the size of mapp[], other than
00138  * needing to fit into the metapage.  (With 8K block size, 128 bitmaps
00139  * limit us to 64 Gb of overflow space...)
00140  */
00141 #define HASH_MAX_SPLITPOINTS        32
00142 #define HASH_MAX_BITMAPS            128
00143 
00144 typedef struct HashMetaPageData
00145 {
00146     uint32      hashm_magic;    /* magic no. for hash tables */
00147     uint32      hashm_version;  /* version ID */
00148     double      hashm_ntuples;  /* number of tuples stored in the table */
00149     uint16      hashm_ffactor;  /* target fill factor (tuples/bucket) */
00150     uint16      hashm_bsize;    /* index page size (bytes) */
00151     uint16      hashm_bmsize;   /* bitmap array size (bytes) - must be a power
00152                                  * of 2 */
00153     uint16      hashm_bmshift;  /* log2(bitmap array size in BITS) */
00154     uint32      hashm_maxbucket;    /* ID of maximum bucket in use */
00155     uint32      hashm_highmask; /* mask to modulo into entire table */
00156     uint32      hashm_lowmask;  /* mask to modulo into lower half of table */
00157     uint32      hashm_ovflpoint;/* splitpoint from which ovflpgs being
00158                                  * allocated */
00159     uint32      hashm_firstfree;    /* lowest-number free ovflpage (bit#) */
00160     uint32      hashm_nmaps;    /* number of bitmap pages */
00161     RegProcedure hashm_procid;  /* hash procedure id from pg_proc */
00162     uint32      hashm_spares[HASH_MAX_SPLITPOINTS];     /* spare pages before
00163                                                          * each splitpoint */
00164     BlockNumber hashm_mapp[HASH_MAX_BITMAPS];   /* blknos of ovfl bitmaps */
00165 } HashMetaPageData;
00166 
00167 typedef HashMetaPageData *HashMetaPage;
00168 
00169 /*
00170  * Maximum size of a hash index item (it's okay to have only one per page)
00171  */
00172 #define HashMaxItemSize(page) \
00173     MAXALIGN_DOWN(PageGetPageSize(page) - \
00174                   SizeOfPageHeaderData - \
00175                   sizeof(ItemIdData) - \
00176                   MAXALIGN(sizeof(HashPageOpaqueData)))
00177 
00178 #define HASH_MIN_FILLFACTOR         10
00179 #define HASH_DEFAULT_FILLFACTOR     75
00180 
00181 /*
00182  * Constants
00183  */
00184 #define BYTE_TO_BIT             3       /* 2^3 bits/byte */
00185 #define ALL_SET                 ((uint32) ~0)
00186 
00187 /*
00188  * Bitmap pages do not contain tuples.  They do contain the standard
00189  * page headers and trailers; however, everything in between is a
00190  * giant bit array.  The number of bits that fit on a page obviously
00191  * depends on the page size and the header/trailer overhead.  We require
00192  * the number of bits per page to be a power of 2.
00193  */
00194 #define BMPGSZ_BYTE(metap)      ((metap)->hashm_bmsize)
00195 #define BMPGSZ_BIT(metap)       ((metap)->hashm_bmsize << BYTE_TO_BIT)
00196 #define BMPG_SHIFT(metap)       ((metap)->hashm_bmshift)
00197 #define BMPG_MASK(metap)        (BMPGSZ_BIT(metap) - 1)
00198 
00199 #define HashPageGetBitmap(page) \
00200     ((uint32 *) PageGetContents(page))
00201 
00202 #define HashGetMaxBitmapSize(page) \
00203     (PageGetPageSize((Page) page) - \
00204      (MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(HashPageOpaqueData))))
00205 
00206 #define HashPageGetMeta(page) \
00207     ((HashMetaPage) PageGetContents(page))
00208 
00209 /*
00210  * The number of bits in an ovflpage bitmap word.
00211  */
00212 #define BITS_PER_MAP    32      /* Number of bits in uint32 */
00213 
00214 /* Given the address of the beginning of a bit map, clear/set the nth bit */
00215 #define CLRBIT(A, N)    ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))
00216 #define SETBIT(A, N)    ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP)))
00217 #define ISSET(A, N)     ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))
00218 
00219 /*
00220  * page-level and high-level locking modes (see README)
00221  */
00222 #define HASH_READ       BUFFER_LOCK_SHARE
00223 #define HASH_WRITE      BUFFER_LOCK_EXCLUSIVE
00224 #define HASH_NOLOCK     (-1)
00225 
00226 #define HASH_SHARE      ShareLock
00227 #define HASH_EXCLUSIVE  ExclusiveLock
00228 
00229 /*
00230  *  Strategy number. There's only one valid strategy for hashing: equality.
00231  */
00232 #define HTEqualStrategyNumber           1
00233 #define HTMaxStrategyNumber             1
00234 
00235 /*
00236  *  When a new operator class is declared, we require that the user supply
00237  *  us with an amproc procudure for hashing a key of the new type.
00238  *  Since we only have one such proc in amproc, it's number 1.
00239  */
00240 #define HASHPROC        1
00241 
00242 
00243 /* public routines */
00244 
00245 extern Datum hashbuild(PG_FUNCTION_ARGS);
00246 extern Datum hashbuildempty(PG_FUNCTION_ARGS);
00247 extern Datum hashinsert(PG_FUNCTION_ARGS);
00248 extern Datum hashbeginscan(PG_FUNCTION_ARGS);
00249 extern Datum hashgettuple(PG_FUNCTION_ARGS);
00250 extern Datum hashgetbitmap(PG_FUNCTION_ARGS);
00251 extern Datum hashrescan(PG_FUNCTION_ARGS);
00252 extern Datum hashendscan(PG_FUNCTION_ARGS);
00253 extern Datum hashmarkpos(PG_FUNCTION_ARGS);
00254 extern Datum hashrestrpos(PG_FUNCTION_ARGS);
00255 extern Datum hashbulkdelete(PG_FUNCTION_ARGS);
00256 extern Datum hashvacuumcleanup(PG_FUNCTION_ARGS);
00257 extern Datum hashoptions(PG_FUNCTION_ARGS);
00258 
00259 /*
00260  * Datatype-specific hash functions in hashfunc.c.
00261  *
00262  * These support both hash indexes and hash joins.
00263  *
00264  * NOTE: some of these are also used by catcache operations, without
00265  * any direct connection to hash indexes.  Also, the common hash_any
00266  * routine is also used by dynahash tables.
00267  */
00268 extern Datum hashchar(PG_FUNCTION_ARGS);
00269 extern Datum hashint2(PG_FUNCTION_ARGS);
00270 extern Datum hashint4(PG_FUNCTION_ARGS);
00271 extern Datum hashint8(PG_FUNCTION_ARGS);
00272 extern Datum hashoid(PG_FUNCTION_ARGS);
00273 extern Datum hashenum(PG_FUNCTION_ARGS);
00274 extern Datum hashfloat4(PG_FUNCTION_ARGS);
00275 extern Datum hashfloat8(PG_FUNCTION_ARGS);
00276 extern Datum hashoidvector(PG_FUNCTION_ARGS);
00277 extern Datum hashint2vector(PG_FUNCTION_ARGS);
00278 extern Datum hashname(PG_FUNCTION_ARGS);
00279 extern Datum hashtext(PG_FUNCTION_ARGS);
00280 extern Datum hashvarlena(PG_FUNCTION_ARGS);
00281 extern Datum hash_any(register const unsigned char *k, register int keylen);
00282 extern Datum hash_uint32(uint32 k);
00283 
00284 /* private routines */
00285 
00286 /* hashinsert.c */
00287 extern void _hash_doinsert(Relation rel, IndexTuple itup);
00288 extern OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf,
00289                Size itemsize, IndexTuple itup);
00290 
00291 /* hashovfl.c */
00292 extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf);
00293 extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf,
00294                    BufferAccessStrategy bstrategy);
00295 extern void _hash_initbitmap(Relation rel, HashMetaPage metap,
00296                  BlockNumber blkno, ForkNumber forkNum);
00297 extern void _hash_squeezebucket(Relation rel,
00298                     Bucket bucket, BlockNumber bucket_blkno,
00299                     BufferAccessStrategy bstrategy);
00300 
00301 /* hashpage.c */
00302 extern void _hash_getlock(Relation rel, BlockNumber whichlock, int access);
00303 extern bool _hash_try_getlock(Relation rel, BlockNumber whichlock, int access);
00304 extern void _hash_droplock(Relation rel, BlockNumber whichlock, int access);
00305 extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno,
00306              int access, int flags);
00307 extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno);
00308 extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno,
00309                 ForkNumber forkNum);
00310 extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
00311                            int access, int flags,
00312                            BufferAccessStrategy bstrategy);
00313 extern void _hash_relbuf(Relation rel, Buffer buf);
00314 extern void _hash_dropbuf(Relation rel, Buffer buf);
00315 extern void _hash_wrtbuf(Relation rel, Buffer buf);
00316 extern void _hash_chgbufaccess(Relation rel, Buffer buf, int from_access,
00317                    int to_access);
00318 extern uint32 _hash_metapinit(Relation rel, double num_tuples,
00319                 ForkNumber forkNum);
00320 extern void _hash_pageinit(Page page, Size size);
00321 extern void _hash_expandtable(Relation rel, Buffer metabuf);
00322 
00323 /* hashscan.c */
00324 extern void _hash_regscan(IndexScanDesc scan);
00325 extern void _hash_dropscan(IndexScanDesc scan);
00326 extern bool _hash_has_active_scan(Relation rel, Bucket bucket);
00327 extern void ReleaseResources_hash(void);
00328 
00329 /* hashsearch.c */
00330 extern bool _hash_next(IndexScanDesc scan, ScanDirection dir);
00331 extern bool _hash_first(IndexScanDesc scan, ScanDirection dir);
00332 extern bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir);
00333 
00334 /* hashsort.c */
00335 typedef struct HSpool HSpool;   /* opaque struct in hashsort.c */
00336 
00337 extern HSpool *_h_spoolinit(Relation heap, Relation index, uint32 num_buckets);
00338 extern void _h_spooldestroy(HSpool *hspool);
00339 extern void _h_spool(IndexTuple itup, HSpool *hspool);
00340 extern void _h_indexbuild(HSpool *hspool);
00341 
00342 /* hashutil.c */
00343 extern bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup);
00344 extern uint32 _hash_datum2hashkey(Relation rel, Datum key);
00345 extern uint32 _hash_datum2hashkey_type(Relation rel, Datum key, Oid keytype);
00346 extern Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket,
00347                      uint32 highmask, uint32 lowmask);
00348 extern uint32 _hash_log2(uint32 num);
00349 extern void _hash_checkpage(Relation rel, Buffer buf, int flags);
00350 extern uint32 _hash_get_indextuple_hashkey(IndexTuple itup);
00351 extern IndexTuple _hash_form_tuple(Relation index,
00352                  Datum *values, bool *isnull);
00353 extern OffsetNumber _hash_binsearch(Page page, uint32 hash_value);
00354 extern OffsetNumber _hash_binsearch_last(Page page, uint32 hash_value);
00355 
00356 /* hash.c */
00357 extern void hash_redo(XLogRecPtr lsn, XLogRecord *record);
00358 extern void hash_desc(StringInfo buf, uint8 xl_info, char *rec);
00359 
00360 #endif   /* HASH_H */