Header And Logo

PostgreSQL
| The world's most advanced open source database.

ginbulk.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * ginbulk.c
00004  *    routines for fast build of inverted index
00005  *
00006  *
00007  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00008  * Portions Copyright (c) 1994, Regents of the University of California
00009  *
00010  * IDENTIFICATION
00011  *          src/backend/access/gin/ginbulk.c
00012  *-------------------------------------------------------------------------
00013  */
00014 
00015 #include "postgres.h"
00016 
00017 #include "access/gin_private.h"
00018 #include "utils/datum.h"
00019 #include "utils/memutils.h"
00020 
00021 
00022 #define DEF_NENTRY  2048        /* GinEntryAccumulator allocation quantum */
00023 #define DEF_NPTR    5           /* ItemPointer initial allocation quantum */
00024 
00025 
00026 /* Combiner function for rbtree.c */
00027 static void
00028 ginCombineData(RBNode *existing, const RBNode *newdata, void *arg)
00029 {
00030     GinEntryAccumulator *eo = (GinEntryAccumulator *) existing;
00031     const GinEntryAccumulator *en = (const GinEntryAccumulator *) newdata;
00032     BuildAccumulator *accum = (BuildAccumulator *) arg;
00033 
00034     /*
00035      * Note this code assumes that newdata contains only one itempointer.
00036      */
00037     if (eo->count >= eo->maxcount)
00038     {
00039         accum->allocatedMemory -= GetMemoryChunkSpace(eo->list);
00040         eo->maxcount *= 2;
00041         eo->list = (ItemPointerData *)
00042             repalloc(eo->list, sizeof(ItemPointerData) * eo->maxcount);
00043         accum->allocatedMemory += GetMemoryChunkSpace(eo->list);
00044     }
00045 
00046     /* If item pointers are not ordered, they will need to be sorted later */
00047     if (eo->shouldSort == FALSE)
00048     {
00049         int         res;
00050 
00051         res = ginCompareItemPointers(eo->list + eo->count - 1, en->list);
00052         Assert(res != 0);
00053 
00054         if (res > 0)
00055             eo->shouldSort = TRUE;
00056     }
00057 
00058     eo->list[eo->count] = en->list[0];
00059     eo->count++;
00060 }
00061 
00062 /* Comparator function for rbtree.c */
00063 static int
00064 cmpEntryAccumulator(const RBNode *a, const RBNode *b, void *arg)
00065 {
00066     const GinEntryAccumulator *ea = (const GinEntryAccumulator *) a;
00067     const GinEntryAccumulator *eb = (const GinEntryAccumulator *) b;
00068     BuildAccumulator *accum = (BuildAccumulator *) arg;
00069 
00070     return ginCompareAttEntries(accum->ginstate,
00071                                 ea->attnum, ea->key, ea->category,
00072                                 eb->attnum, eb->key, eb->category);
00073 }
00074 
00075 /* Allocator function for rbtree.c */
00076 static RBNode *
00077 ginAllocEntryAccumulator(void *arg)
00078 {
00079     BuildAccumulator *accum = (BuildAccumulator *) arg;
00080     GinEntryAccumulator *ea;
00081 
00082     /*
00083      * Allocate memory by rather big chunks to decrease overhead.  We have no
00084      * need to reclaim RBNodes individually, so this costs nothing.
00085      */
00086     if (accum->entryallocator == NULL || accum->eas_used >= DEF_NENTRY)
00087     {
00088         accum->entryallocator = palloc(sizeof(GinEntryAccumulator) * DEF_NENTRY);
00089         accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator);
00090         accum->eas_used = 0;
00091     }
00092 
00093     /* Allocate new RBNode from current chunk */
00094     ea = accum->entryallocator + accum->eas_used;
00095     accum->eas_used++;
00096 
00097     return (RBNode *) ea;
00098 }
00099 
00100 void
00101 ginInitBA(BuildAccumulator *accum)
00102 {
00103     /* accum->ginstate is intentionally not set here */
00104     accum->allocatedMemory = 0;
00105     accum->entryallocator = NULL;
00106     accum->eas_used = 0;
00107     accum->tree = rb_create(sizeof(GinEntryAccumulator),
00108                             cmpEntryAccumulator,
00109                             ginCombineData,
00110                             ginAllocEntryAccumulator,
00111                             NULL,       /* no freefunc needed */
00112                             (void *) accum);
00113 }
00114 
00115 /*
00116  * This is basically the same as datumCopy(), but extended to count
00117  * palloc'd space in accum->allocatedMemory.
00118  */
00119 static Datum
00120 getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value)
00121 {
00122     Form_pg_attribute att = accum->ginstate->origTupdesc->attrs[attnum - 1];
00123     Datum       res;
00124 
00125     if (att->attbyval)
00126         res = value;
00127     else
00128     {
00129         res = datumCopy(value, false, att->attlen);
00130         accum->allocatedMemory += GetMemoryChunkSpace(DatumGetPointer(res));
00131     }
00132     return res;
00133 }
00134 
00135 /*
00136  * Find/store one entry from indexed value.
00137  */
00138 static void
00139 ginInsertBAEntry(BuildAccumulator *accum,
00140                  ItemPointer heapptr, OffsetNumber attnum,
00141                  Datum key, GinNullCategory category)
00142 {
00143     GinEntryAccumulator eatmp;
00144     GinEntryAccumulator *ea;
00145     bool        isNew;
00146 
00147     /*
00148      * For the moment, fill only the fields of eatmp that will be looked at by
00149      * cmpEntryAccumulator or ginCombineData.
00150      */
00151     eatmp.attnum = attnum;
00152     eatmp.key = key;
00153     eatmp.category = category;
00154     /* temporarily set up single-entry itempointer list */
00155     eatmp.list = heapptr;
00156 
00157     ea = (GinEntryAccumulator *) rb_insert(accum->tree, (RBNode *) &eatmp,
00158                                            &isNew);
00159 
00160     if (isNew)
00161     {
00162         /*
00163          * Finish initializing new tree entry, including making permanent
00164          * copies of the datum (if it's not null) and itempointer.
00165          */
00166         if (category == GIN_CAT_NORM_KEY)
00167             ea->key = getDatumCopy(accum, attnum, key);
00168         ea->maxcount = DEF_NPTR;
00169         ea->count = 1;
00170         ea->shouldSort = FALSE;
00171         ea->list =
00172             (ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR);
00173         ea->list[0] = *heapptr;
00174         accum->allocatedMemory += GetMemoryChunkSpace(ea->list);
00175     }
00176     else
00177     {
00178         /*
00179          * ginCombineData did everything needed.
00180          */
00181     }
00182 }
00183 
00184 /*
00185  * Insert the entries for one heap pointer.
00186  *
00187  * Since the entries are being inserted into a balanced binary tree, you
00188  * might think that the order of insertion wouldn't be critical, but it turns
00189  * out that inserting the entries in sorted order results in a lot of
00190  * rebalancing operations and is slow.  To prevent this, we attempt to insert
00191  * the nodes in an order that will produce a nearly-balanced tree if the input
00192  * is in fact sorted.
00193  *
00194  * We do this as follows.  First, we imagine that we have an array whose size
00195  * is the smallest power of two greater than or equal to the actual array
00196  * size.  Second, we insert the middle entry of our virtual array into the
00197  * tree; then, we insert the middles of each half of our virtual array, then
00198  * middles of quarters, etc.
00199  */
00200 void
00201 ginInsertBAEntries(BuildAccumulator *accum,
00202                    ItemPointer heapptr, OffsetNumber attnum,
00203                    Datum *entries, GinNullCategory *categories,
00204                    int32 nentries)
00205 {
00206     uint32      step = nentries;
00207 
00208     if (nentries <= 0)
00209         return;
00210 
00211     Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber);
00212 
00213     /*
00214      * step will contain largest power of 2 and <= nentries
00215      */
00216     step |= (step >> 1);
00217     step |= (step >> 2);
00218     step |= (step >> 4);
00219     step |= (step >> 8);
00220     step |= (step >> 16);
00221     step >>= 1;
00222     step++;
00223 
00224     while (step > 0)
00225     {
00226         int         i;
00227 
00228         for (i = step - 1; i < nentries && i >= 0; i += step << 1 /* *2 */ )
00229             ginInsertBAEntry(accum, heapptr, attnum,
00230                              entries[i], categories[i]);
00231 
00232         step >>= 1;             /* /2 */
00233     }
00234 }
00235 
00236 static int
00237 qsortCompareItemPointers(const void *a, const void *b)
00238 {
00239     int         res = ginCompareItemPointers((ItemPointer) a, (ItemPointer) b);
00240 
00241     /* Assert that there are no equal item pointers being sorted */
00242     Assert(res != 0);
00243     return res;
00244 }
00245 
00246 /* Prepare to read out the rbtree contents using ginGetBAEntry */
00247 void
00248 ginBeginBAScan(BuildAccumulator *accum)
00249 {
00250     rb_begin_iterate(accum->tree, LeftRightWalk);
00251 }
00252 
00253 /*
00254  * Get the next entry in sequence from the BuildAccumulator's rbtree.
00255  * This consists of a single key datum and a list (array) of one or more
00256  * heap TIDs in which that key is found.  The list is guaranteed sorted.
00257  */
00258 ItemPointerData *
00259 ginGetBAEntry(BuildAccumulator *accum,
00260               OffsetNumber *attnum, Datum *key, GinNullCategory *category,
00261               uint32 *n)
00262 {
00263     GinEntryAccumulator *entry;
00264     ItemPointerData *list;
00265 
00266     entry = (GinEntryAccumulator *) rb_iterate(accum->tree);
00267 
00268     if (entry == NULL)
00269         return NULL;            /* no more entries */
00270 
00271     *attnum = entry->attnum;
00272     *key = entry->key;
00273     *category = entry->category;
00274     list = entry->list;
00275     *n = entry->count;
00276 
00277     Assert(list != NULL && entry->count > 0);
00278 
00279     if (entry->shouldSort && entry->count > 1)
00280         qsort(list, entry->count, sizeof(ItemPointerData),
00281               qsortCompareItemPointers);
00282 
00283     return list;
00284 }