PostgreSQL Source Code: src/backend/access/hash/hashsort.c Source File

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * hashsort.c
00004  *      Sort tuples for insertion into a new hash index.
00005  *
00006  * When building a very large hash index, we pre-sort the tuples by bucket
00007  * number to improve locality of access to the index, and thereby avoid
00008  * thrashing.  We use tuplesort.c to sort the given index tuples into order.
00009  *
00010  * Note: if the number of rows in the table has been underestimated,
00011  * bucket splits may occur during the index build.  In that case we'd
00012  * be inserting into two or more buckets for each possible masked-off
00013  * hash code value.  That's no big problem though, since we'll still have
00014  * plenty of locality of access.
00015  *
00016  *
00017  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00018  * Portions Copyright (c) 1994, Regents of the University of California
00019  *
00020  * IDENTIFICATION
00021  *    src/backend/access/hash/hashsort.c
00022  *
00023  *-------------------------------------------------------------------------
00024  */
00025 
00026 #include "postgres.h"
00027 
00028 #include "access/hash.h"
00029 #include "miscadmin.h"
00030 #include "utils/tuplesort.h"
00031 
00032 
00033 /*
00034  * Status record for spooling/sorting phase.
00035  */
00036 struct HSpool
00037 {
00038     Tuplesortstate *sortstate;  /* state data for tuplesort.c */
00039     Relation    index;
00040 };
00041 
00042 
00043 /*
00044  * create and initialize a spool structure
00045  */
00046 HSpool *
00047 _h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
00048 {
00049     HSpool     *hspool = (HSpool *) palloc0(sizeof(HSpool));
00050     uint32      hash_mask;
00051 
00052     hspool->index = index;
00053 
00054     /*
00055      * Determine the bitmask for hash code values.  Since there are currently
00056      * num_buckets buckets in the index, the appropriate mask can be computed
00057      * as follows.
00058      *
00059      * Note: at present, the passed-in num_buckets is always a power of 2, so
00060      * we could just compute num_buckets - 1.  We prefer not to assume that
00061      * here, though.
00062      */
00063     hash_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1;
00064 
00065     /*
00066      * We size the sort area as maintenance_work_mem rather than work_mem to
00067      * speed index creation.  This should be OK since a single backend can't
00068      * run multiple index creations in parallel.
00069      */
00070     hspool->sortstate = tuplesort_begin_index_hash(heap,
00071                                                    index,
00072                                                    hash_mask,
00073                                                    maintenance_work_mem,
00074                                                    false);
00075 
00076     return hspool;
00077 }
00078 
00079 /*
00080  * clean up a spool structure and its substructures.
00081  */
00082 void
00083 _h_spooldestroy(HSpool *hspool)
00084 {
00085     tuplesort_end(hspool->sortstate);
00086     pfree(hspool);
00087 }
00088 
00089 /*
00090  * spool an index entry into the sort file.
00091  */
00092 void
00093 _h_spool(IndexTuple itup, HSpool *hspool)
00094 {
00095     tuplesort_putindextuple(hspool->sortstate, itup);
00096 }
00097 
00098 /*
00099  * given a spool loaded by successive calls to _h_spool,
00100  * create an entire index.
00101  */
00102 void
00103 _h_indexbuild(HSpool *hspool)
00104 {
00105     IndexTuple  itup;
00106     bool        should_free;
00107 
00108     tuplesort_performsort(hspool->sortstate);
00109 
00110     while ((itup = tuplesort_getindextuple(hspool->sortstate,
00111                                            true, &should_free)) != NULL)
00112     {
00113         _hash_doinsert(hspool->index, itup);
00114         if (should_free)
00115             pfree(itup);
00116     }
00117 }
Header And Logo

hashsort.c