00001 /*------------------------------------------------------------------------- 00002 * 00003 * hashsort.c 00004 * Sort tuples for insertion into a new hash index. 00005 * 00006 * When building a very large hash index, we pre-sort the tuples by bucket 00007 * number to improve locality of access to the index, and thereby avoid 00008 * thrashing. We use tuplesort.c to sort the given index tuples into order. 00009 * 00010 * Note: if the number of rows in the table has been underestimated, 00011 * bucket splits may occur during the index build. In that case we'd 00012 * be inserting into two or more buckets for each possible masked-off 00013 * hash code value. That's no big problem though, since we'll still have 00014 * plenty of locality of access. 00015 * 00016 * 00017 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group 00018 * Portions Copyright (c) 1994, Regents of the University of California 00019 * 00020 * IDENTIFICATION 00021 * src/backend/access/hash/hashsort.c 00022 * 00023 *------------------------------------------------------------------------- 00024 */ 00025 00026 #include "postgres.h" 00027 00028 #include "access/hash.h" 00029 #include "miscadmin.h" 00030 #include "utils/tuplesort.h" 00031 00032 00033 /* 00034 * Status record for spooling/sorting phase. 00035 */ 00036 struct HSpool 00037 { 00038 Tuplesortstate *sortstate; /* state data for tuplesort.c */ 00039 Relation index; 00040 }; 00041 00042 00043 /* 00044 * create and initialize a spool structure 00045 */ 00046 HSpool * 00047 _h_spoolinit(Relation heap, Relation index, uint32 num_buckets) 00048 { 00049 HSpool *hspool = (HSpool *) palloc0(sizeof(HSpool)); 00050 uint32 hash_mask; 00051 00052 hspool->index = index; 00053 00054 /* 00055 * Determine the bitmask for hash code values. Since there are currently 00056 * num_buckets buckets in the index, the appropriate mask can be computed 00057 * as follows. 00058 * 00059 * Note: at present, the passed-in num_buckets is always a power of 2, so 00060 * we could just compute num_buckets - 1. We prefer not to assume that 00061 * here, though. 00062 */ 00063 hash_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1; 00064 00065 /* 00066 * We size the sort area as maintenance_work_mem rather than work_mem to 00067 * speed index creation. This should be OK since a single backend can't 00068 * run multiple index creations in parallel. 00069 */ 00070 hspool->sortstate = tuplesort_begin_index_hash(heap, 00071 index, 00072 hash_mask, 00073 maintenance_work_mem, 00074 false); 00075 00076 return hspool; 00077 } 00078 00079 /* 00080 * clean up a spool structure and its substructures. 00081 */ 00082 void 00083 _h_spooldestroy(HSpool *hspool) 00084 { 00085 tuplesort_end(hspool->sortstate); 00086 pfree(hspool); 00087 } 00088 00089 /* 00090 * spool an index entry into the sort file. 00091 */ 00092 void 00093 _h_spool(IndexTuple itup, HSpool *hspool) 00094 { 00095 tuplesort_putindextuple(hspool->sortstate, itup); 00096 } 00097 00098 /* 00099 * given a spool loaded by successive calls to _h_spool, 00100 * create an entire index. 00101 */ 00102 void 00103 _h_indexbuild(HSpool *hspool) 00104 { 00105 IndexTuple itup; 00106 bool should_free; 00107 00108 tuplesort_performsort(hspool->sortstate); 00109 00110 while ((itup = tuplesort_getindextuple(hspool->sortstate, 00111 true, &should_free)) != NULL) 00112 { 00113 _hash_doinsert(hspool->index, itup); 00114 if (should_free) 00115 pfree(itup); 00116 } 00117 }