Header And Logo

PostgreSQL
| The world's most advanced open source database.

ginutil.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * ginutil.c
00004  *    utilities routines for the postgres inverted index access method.
00005  *
00006  *
00007  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00008  * Portions Copyright (c) 1994, Regents of the University of California
00009  *
00010  * IDENTIFICATION
00011  *          src/backend/access/gin/ginutil.c
00012  *-------------------------------------------------------------------------
00013  */
00014 
00015 #include "postgres.h"
00016 
00017 #include "access/gin_private.h"
00018 #include "access/reloptions.h"
00019 #include "catalog/pg_collation.h"
00020 #include "catalog/pg_type.h"
00021 #include "miscadmin.h"
00022 #include "storage/indexfsm.h"
00023 #include "storage/lmgr.h"
00024 
00025 
00026 /*
00027  * initGinState: fill in an empty GinState struct to describe the index
00028  *
00029  * Note: assorted subsidiary data is allocated in the CurrentMemoryContext.
00030  */
00031 void
00032 initGinState(GinState *state, Relation index)
00033 {
00034     TupleDesc   origTupdesc = RelationGetDescr(index);
00035     int         i;
00036 
00037     MemSet(state, 0, sizeof(GinState));
00038 
00039     state->index = index;
00040     state->oneCol = (origTupdesc->natts == 1) ? true : false;
00041     state->origTupdesc = origTupdesc;
00042 
00043     for (i = 0; i < origTupdesc->natts; i++)
00044     {
00045         if (state->oneCol)
00046             state->tupdesc[i] = state->origTupdesc;
00047         else
00048         {
00049             state->tupdesc[i] = CreateTemplateTupleDesc(2, false);
00050 
00051             TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 1, NULL,
00052                                INT2OID, -1, 0);
00053             TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 2, NULL,
00054                                origTupdesc->attrs[i]->atttypid,
00055                                origTupdesc->attrs[i]->atttypmod,
00056                                origTupdesc->attrs[i]->attndims);
00057             TupleDescInitEntryCollation(state->tupdesc[i], (AttrNumber) 2,
00058                                         origTupdesc->attrs[i]->attcollation);
00059         }
00060 
00061         fmgr_info_copy(&(state->compareFn[i]),
00062                        index_getprocinfo(index, i + 1, GIN_COMPARE_PROC),
00063                        CurrentMemoryContext);
00064         fmgr_info_copy(&(state->extractValueFn[i]),
00065                        index_getprocinfo(index, i + 1, GIN_EXTRACTVALUE_PROC),
00066                        CurrentMemoryContext);
00067         fmgr_info_copy(&(state->extractQueryFn[i]),
00068                        index_getprocinfo(index, i + 1, GIN_EXTRACTQUERY_PROC),
00069                        CurrentMemoryContext);
00070         fmgr_info_copy(&(state->consistentFn[i]),
00071                        index_getprocinfo(index, i + 1, GIN_CONSISTENT_PROC),
00072                        CurrentMemoryContext);
00073 
00074         /*
00075          * Check opclass capability to do partial match.
00076          */
00077         if (index_getprocid(index, i + 1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid)
00078         {
00079             fmgr_info_copy(&(state->comparePartialFn[i]),
00080                    index_getprocinfo(index, i + 1, GIN_COMPARE_PARTIAL_PROC),
00081                            CurrentMemoryContext);
00082             state->canPartialMatch[i] = true;
00083         }
00084         else
00085         {
00086             state->canPartialMatch[i] = false;
00087         }
00088 
00089         /*
00090          * If the index column has a specified collation, we should honor that
00091          * while doing comparisons.  However, we may have a collatable storage
00092          * type for a noncollatable indexed data type (for instance, hstore
00093          * uses text index entries).  If there's no index collation then
00094          * specify default collation in case the support functions need
00095          * collation.  This is harmless if the support functions don't care
00096          * about collation, so we just do it unconditionally.  (We could
00097          * alternatively call get_typcollation, but that seems like expensive
00098          * overkill --- there aren't going to be any cases where a GIN storage
00099          * type has a nondefault collation.)
00100          */
00101         if (OidIsValid(index->rd_indcollation[i]))
00102             state->supportCollation[i] = index->rd_indcollation[i];
00103         else
00104             state->supportCollation[i] = DEFAULT_COLLATION_OID;
00105     }
00106 }
00107 
00108 /*
00109  * Extract attribute (column) number of stored entry from GIN tuple
00110  */
00111 OffsetNumber
00112 gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
00113 {
00114     OffsetNumber colN;
00115 
00116     if (ginstate->oneCol)
00117     {
00118         /* column number is not stored explicitly */
00119         colN = FirstOffsetNumber;
00120     }
00121     else
00122     {
00123         Datum       res;
00124         bool        isnull;
00125 
00126         /*
00127          * First attribute is always int16, so we can safely use any tuple
00128          * descriptor to obtain first attribute of tuple
00129          */
00130         res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0],
00131                             &isnull);
00132         Assert(!isnull);
00133 
00134         colN = DatumGetUInt16(res);
00135         Assert(colN >= FirstOffsetNumber && colN <= ginstate->origTupdesc->natts);
00136     }
00137 
00138     return colN;
00139 }
00140 
00141 /*
00142  * Extract stored datum (and possible null category) from GIN tuple
00143  */
00144 Datum
00145 gintuple_get_key(GinState *ginstate, IndexTuple tuple,
00146                  GinNullCategory *category)
00147 {
00148     Datum       res;
00149     bool        isnull;
00150 
00151     if (ginstate->oneCol)
00152     {
00153         /*
00154          * Single column index doesn't store attribute numbers in tuples
00155          */
00156         res = index_getattr(tuple, FirstOffsetNumber, ginstate->origTupdesc,
00157                             &isnull);
00158     }
00159     else
00160     {
00161         /*
00162          * Since the datum type depends on which index column it's from, we
00163          * must be careful to use the right tuple descriptor here.
00164          */
00165         OffsetNumber colN = gintuple_get_attrnum(ginstate, tuple);
00166 
00167         res = index_getattr(tuple, OffsetNumberNext(FirstOffsetNumber),
00168                             ginstate->tupdesc[colN - 1],
00169                             &isnull);
00170     }
00171 
00172     if (isnull)
00173         *category = GinGetNullCategory(tuple, ginstate);
00174     else
00175         *category = GIN_CAT_NORM_KEY;
00176 
00177     return res;
00178 }
00179 
00180 /*
00181  * Allocate a new page (either by recycling, or by extending the index file)
00182  * The returned buffer is already pinned and exclusive-locked
00183  * Caller is responsible for initializing the page by calling GinInitBuffer
00184  */
00185 Buffer
00186 GinNewBuffer(Relation index)
00187 {
00188     Buffer      buffer;
00189     bool        needLock;
00190 
00191     /* First, try to get a page from FSM */
00192     for (;;)
00193     {
00194         BlockNumber blkno = GetFreeIndexPage(index);
00195 
00196         if (blkno == InvalidBlockNumber)
00197             break;
00198 
00199         buffer = ReadBuffer(index, blkno);
00200 
00201         /*
00202          * We have to guard against the possibility that someone else already
00203          * recycled this page; the buffer may be locked if so.
00204          */
00205         if (ConditionalLockBuffer(buffer))
00206         {
00207             Page        page = BufferGetPage(buffer);
00208 
00209             if (PageIsNew(page))
00210                 return buffer;  /* OK to use, if never initialized */
00211 
00212             if (GinPageIsDeleted(page))
00213                 return buffer;  /* OK to use */
00214 
00215             LockBuffer(buffer, GIN_UNLOCK);
00216         }
00217 
00218         /* Can't use it, so release buffer and try again */
00219         ReleaseBuffer(buffer);
00220     }
00221 
00222     /* Must extend the file */
00223     needLock = !RELATION_IS_LOCAL(index);
00224     if (needLock)
00225         LockRelationForExtension(index, ExclusiveLock);
00226 
00227     buffer = ReadBuffer(index, P_NEW);
00228     LockBuffer(buffer, GIN_EXCLUSIVE);
00229 
00230     if (needLock)
00231         UnlockRelationForExtension(index, ExclusiveLock);
00232 
00233     return buffer;
00234 }
00235 
00236 void
00237 GinInitPage(Page page, uint32 f, Size pageSize)
00238 {
00239     GinPageOpaque opaque;
00240 
00241     PageInit(page, pageSize, sizeof(GinPageOpaqueData));
00242 
00243     opaque = GinPageGetOpaque(page);
00244     memset(opaque, 0, sizeof(GinPageOpaqueData));
00245     opaque->flags = f;
00246     opaque->rightlink = InvalidBlockNumber;
00247 }
00248 
00249 void
00250 GinInitBuffer(Buffer b, uint32 f)
00251 {
00252     GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b));
00253 }
00254 
00255 void
00256 GinInitMetabuffer(Buffer b)
00257 {
00258     GinMetaPageData *metadata;
00259     Page        page = BufferGetPage(b);
00260 
00261     GinInitPage(page, GIN_META, BufferGetPageSize(b));
00262 
00263     metadata = GinPageGetMeta(page);
00264 
00265     metadata->head = metadata->tail = InvalidBlockNumber;
00266     metadata->tailFreeSize = 0;
00267     metadata->nPendingPages = 0;
00268     metadata->nPendingHeapTuples = 0;
00269     metadata->nTotalPages = 0;
00270     metadata->nEntryPages = 0;
00271     metadata->nDataPages = 0;
00272     metadata->nEntries = 0;
00273     metadata->ginVersion = GIN_CURRENT_VERSION;
00274 }
00275 
00276 /*
00277  * Compare two keys of the same index column
00278  */
00279 int
00280 ginCompareEntries(GinState *ginstate, OffsetNumber attnum,
00281                   Datum a, GinNullCategory categorya,
00282                   Datum b, GinNullCategory categoryb)
00283 {
00284     /* if not of same null category, sort by that first */
00285     if (categorya != categoryb)
00286         return (categorya < categoryb) ? -1 : 1;
00287 
00288     /* all null items in same category are equal */
00289     if (categorya != GIN_CAT_NORM_KEY)
00290         return 0;
00291 
00292     /* both not null, so safe to call the compareFn */
00293     return DatumGetInt32(FunctionCall2Coll(&ginstate->compareFn[attnum - 1],
00294                                       ginstate->supportCollation[attnum - 1],
00295                                            a, b));
00296 }
00297 
00298 /*
00299  * Compare two keys of possibly different index columns
00300  */
00301 int
00302 ginCompareAttEntries(GinState *ginstate,
00303                      OffsetNumber attnuma, Datum a, GinNullCategory categorya,
00304                      OffsetNumber attnumb, Datum b, GinNullCategory categoryb)
00305 {
00306     /* attribute number is the first sort key */
00307     if (attnuma != attnumb)
00308         return (attnuma < attnumb) ? -1 : 1;
00309 
00310     return ginCompareEntries(ginstate, attnuma, a, categorya, b, categoryb);
00311 }
00312 
00313 
00314 /*
00315  * Support for sorting key datums in ginExtractEntries
00316  *
00317  * Note: we only have to worry about null and not-null keys here;
00318  * ginExtractEntries never generates more than one placeholder null,
00319  * so it doesn't have to sort those.
00320  */
00321 typedef struct
00322 {
00323     Datum       datum;
00324     bool        isnull;
00325 } keyEntryData;
00326 
00327 typedef struct
00328 {
00329     FmgrInfo   *cmpDatumFunc;
00330     Oid         collation;
00331     bool        haveDups;
00332 } cmpEntriesArg;
00333 
00334 static int
00335 cmpEntries(const void *a, const void *b, void *arg)
00336 {
00337     const keyEntryData *aa = (const keyEntryData *) a;
00338     const keyEntryData *bb = (const keyEntryData *) b;
00339     cmpEntriesArg *data = (cmpEntriesArg *) arg;
00340     int         res;
00341 
00342     if (aa->isnull)
00343     {
00344         if (bb->isnull)
00345             res = 0;            /* NULL "=" NULL */
00346         else
00347             res = 1;            /* NULL ">" not-NULL */
00348     }
00349     else if (bb->isnull)
00350         res = -1;               /* not-NULL "<" NULL */
00351     else
00352         res = DatumGetInt32(FunctionCall2Coll(data->cmpDatumFunc,
00353                                               data->collation,
00354                                               aa->datum, bb->datum));
00355 
00356     /*
00357      * Detect if we have any duplicates.  If there are equal keys, qsort must
00358      * compare them at some point, else it wouldn't know whether one should go
00359      * before or after the other.
00360      */
00361     if (res == 0)
00362         data->haveDups = true;
00363 
00364     return res;
00365 }
00366 
00367 
00368 /*
00369  * Extract the index key values from an indexable item
00370  *
00371  * The resulting key values are sorted, and any duplicates are removed.
00372  * This avoids generating redundant index entries.
00373  */
00374 Datum *
00375 ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
00376                   Datum value, bool isNull,
00377                   int32 *nentries, GinNullCategory **categories)
00378 {
00379     Datum      *entries;
00380     bool       *nullFlags;
00381     int32       i;
00382 
00383     /*
00384      * We don't call the extractValueFn on a null item.  Instead generate a
00385      * placeholder.
00386      */
00387     if (isNull)
00388     {
00389         *nentries = 1;
00390         entries = (Datum *) palloc(sizeof(Datum));
00391         entries[0] = (Datum) 0;
00392         *categories = (GinNullCategory *) palloc(sizeof(GinNullCategory));
00393         (*categories)[0] = GIN_CAT_NULL_ITEM;
00394         return entries;
00395     }
00396 
00397     /* OK, call the opclass's extractValueFn */
00398     nullFlags = NULL;           /* in case extractValue doesn't set it */
00399     entries = (Datum *)
00400         DatumGetPointer(FunctionCall3Coll(&ginstate->extractValueFn[attnum - 1],
00401                                       ginstate->supportCollation[attnum - 1],
00402                                           value,
00403                                           PointerGetDatum(nentries),
00404                                           PointerGetDatum(&nullFlags)));
00405 
00406     /*
00407      * Generate a placeholder if the item contained no keys.
00408      */
00409     if (entries == NULL || *nentries <= 0)
00410     {
00411         *nentries = 1;
00412         entries = (Datum *) palloc(sizeof(Datum));
00413         entries[0] = (Datum) 0;
00414         *categories = (GinNullCategory *) palloc(sizeof(GinNullCategory));
00415         (*categories)[0] = GIN_CAT_EMPTY_ITEM;
00416         return entries;
00417     }
00418 
00419     /*
00420      * If the extractValueFn didn't create a nullFlags array, create one,
00421      * assuming that everything's non-null.  Otherwise, run through the array
00422      * and make sure each value is exactly 0 or 1; this ensures binary
00423      * compatibility with the GinNullCategory representation.
00424      */
00425     if (nullFlags == NULL)
00426         nullFlags = (bool *) palloc0(*nentries * sizeof(bool));
00427     else
00428     {
00429         for (i = 0; i < *nentries; i++)
00430             nullFlags[i] = (nullFlags[i] ? true : false);
00431     }
00432     /* now we can use the nullFlags as category codes */
00433     *categories = (GinNullCategory *) nullFlags;
00434 
00435     /*
00436      * If there's more than one key, sort and unique-ify.
00437      *
00438      * XXX Using qsort here is notationally painful, and the overhead is
00439      * pretty bad too.  For small numbers of keys it'd likely be better to use
00440      * a simple insertion sort.
00441      */
00442     if (*nentries > 1)
00443     {
00444         keyEntryData *keydata;
00445         cmpEntriesArg arg;
00446 
00447         keydata = (keyEntryData *) palloc(*nentries * sizeof(keyEntryData));
00448         for (i = 0; i < *nentries; i++)
00449         {
00450             keydata[i].datum = entries[i];
00451             keydata[i].isnull = nullFlags[i];
00452         }
00453 
00454         arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1];
00455         arg.collation = ginstate->supportCollation[attnum - 1];
00456         arg.haveDups = false;
00457         qsort_arg(keydata, *nentries, sizeof(keyEntryData),
00458                   cmpEntries, (void *) &arg);
00459 
00460         if (arg.haveDups)
00461         {
00462             /* there are duplicates, must get rid of 'em */
00463             int32       j;
00464 
00465             entries[0] = keydata[0].datum;
00466             nullFlags[0] = keydata[0].isnull;
00467             j = 1;
00468             for (i = 1; i < *nentries; i++)
00469             {
00470                 if (cmpEntries(&keydata[i - 1], &keydata[i], &arg) != 0)
00471                 {
00472                     entries[j] = keydata[i].datum;
00473                     nullFlags[j] = keydata[i].isnull;
00474                     j++;
00475                 }
00476             }
00477             *nentries = j;
00478         }
00479         else
00480         {
00481             /* easy, no duplicates */
00482             for (i = 0; i < *nentries; i++)
00483             {
00484                 entries[i] = keydata[i].datum;
00485                 nullFlags[i] = keydata[i].isnull;
00486             }
00487         }
00488 
00489         pfree(keydata);
00490     }
00491 
00492     return entries;
00493 }
00494 
00495 Datum
00496 ginoptions(PG_FUNCTION_ARGS)
00497 {
00498     Datum       reloptions = PG_GETARG_DATUM(0);
00499     bool        validate = PG_GETARG_BOOL(1);
00500     relopt_value *options;
00501     GinOptions *rdopts;
00502     int         numoptions;
00503     static const relopt_parse_elt tab[] = {
00504         {"fastupdate", RELOPT_TYPE_BOOL, offsetof(GinOptions, useFastUpdate)}
00505     };
00506 
00507     options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIN,
00508                               &numoptions);
00509 
00510     /* if none set, we're done */
00511     if (numoptions == 0)
00512         PG_RETURN_NULL();
00513 
00514     rdopts = allocateReloptStruct(sizeof(GinOptions), options, numoptions);
00515 
00516     fillRelOptions((void *) rdopts, sizeof(GinOptions), options, numoptions,
00517                    validate, tab, lengthof(tab));
00518 
00519     pfree(options);
00520 
00521     PG_RETURN_BYTEA_P(rdopts);
00522 }
00523 
00524 /*
00525  * Fetch index's statistical data into *stats
00526  *
00527  * Note: in the result, nPendingPages can be trusted to be up-to-date,
00528  * as can ginVersion; but the other fields are as of the last VACUUM.
00529  */
00530 void
00531 ginGetStats(Relation index, GinStatsData *stats)
00532 {
00533     Buffer      metabuffer;
00534     Page        metapage;
00535     GinMetaPageData *metadata;
00536 
00537     metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
00538     LockBuffer(metabuffer, GIN_SHARE);
00539     metapage = BufferGetPage(metabuffer);
00540     metadata = GinPageGetMeta(metapage);
00541 
00542     stats->nPendingPages = metadata->nPendingPages;
00543     stats->nTotalPages = metadata->nTotalPages;
00544     stats->nEntryPages = metadata->nEntryPages;
00545     stats->nDataPages = metadata->nDataPages;
00546     stats->nEntries = metadata->nEntries;
00547     stats->ginVersion = metadata->ginVersion;
00548 
00549     UnlockReleaseBuffer(metabuffer);
00550 }
00551 
00552 /*
00553  * Write the given statistics to the index's metapage
00554  *
00555  * Note: nPendingPages and ginVersion are *not* copied over
00556  */
00557 void
00558 ginUpdateStats(Relation index, const GinStatsData *stats)
00559 {
00560     Buffer      metabuffer;
00561     Page        metapage;
00562     GinMetaPageData *metadata;
00563 
00564     metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
00565     LockBuffer(metabuffer, GIN_EXCLUSIVE);
00566     metapage = BufferGetPage(metabuffer);
00567     metadata = GinPageGetMeta(metapage);
00568 
00569     START_CRIT_SECTION();
00570 
00571     metadata->nTotalPages = stats->nTotalPages;
00572     metadata->nEntryPages = stats->nEntryPages;
00573     metadata->nDataPages = stats->nDataPages;
00574     metadata->nEntries = stats->nEntries;
00575 
00576     MarkBufferDirty(metabuffer);
00577 
00578     if (RelationNeedsWAL(index))
00579     {
00580         XLogRecPtr  recptr;
00581         ginxlogUpdateMeta data;
00582         XLogRecData rdata;
00583 
00584         data.node = index->rd_node;
00585         data.ntuples = 0;
00586         data.newRightlink = data.prevTail = InvalidBlockNumber;
00587         memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
00588 
00589         rdata.buffer = InvalidBuffer;
00590         rdata.data = (char *) &data;
00591         rdata.len = sizeof(ginxlogUpdateMeta);
00592         rdata.next = NULL;
00593 
00594         recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, &rdata);
00595         PageSetLSN(metapage, recptr);
00596     }
00597 
00598     UnlockReleaseBuffer(metabuffer);
00599 
00600     END_CRIT_SECTION();
00601 }