Header And Logo

PostgreSQL
| The world's most advanced open source database.

gistutil.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * gistutil.c
00004  *    utilities routines for the postgres GiST index access method.
00005  *
00006  *
00007  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00008  * Portions Copyright (c) 1994, Regents of the University of California
00009  *
00010  * IDENTIFICATION
00011  *          src/backend/access/gist/gistutil.c
00012  *-------------------------------------------------------------------------
00013  */
00014 #include "postgres.h"
00015 
00016 #include <math.h>
00017 
00018 #include "access/gist_private.h"
00019 #include "access/reloptions.h"
00020 #include "storage/indexfsm.h"
00021 #include "storage/lmgr.h"
00022 #include "utils/builtins.h"
00023 
00024 
00025 /*
00026  * Write itup vector to page, has no control of free space.
00027  */
00028 void
00029 gistfillbuffer(Page page, IndexTuple *itup, int len, OffsetNumber off)
00030 {
00031     OffsetNumber l = InvalidOffsetNumber;
00032     int         i;
00033 
00034     if (off == InvalidOffsetNumber)
00035         off = (PageIsEmpty(page)) ? FirstOffsetNumber :
00036             OffsetNumberNext(PageGetMaxOffsetNumber(page));
00037 
00038     for (i = 0; i < len; i++)
00039     {
00040         Size        sz = IndexTupleSize(itup[i]);
00041 
00042         l = PageAddItem(page, (Item) itup[i], sz, off, false, false);
00043         if (l == InvalidOffsetNumber)
00044             elog(ERROR, "failed to add item to GiST index page, item %d out of %d, size %d bytes",
00045                  i, len, (int) sz);
00046         off++;
00047     }
00048 }
00049 
00050 /*
00051  * Check space for itup vector on page
00052  */
00053 bool
00054 gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
00055 {
00056     unsigned int size = freespace,
00057                 deleted = 0;
00058     int         i;
00059 
00060     for (i = 0; i < len; i++)
00061         size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
00062 
00063     if (todelete != InvalidOffsetNumber)
00064     {
00065         IndexTuple  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, todelete));
00066 
00067         deleted = IndexTupleSize(itup) + sizeof(ItemIdData);
00068     }
00069 
00070     return (PageGetFreeSpace(page) + deleted < size);
00071 }
00072 
00073 bool
00074 gistfitpage(IndexTuple *itvec, int len)
00075 {
00076     int         i;
00077     Size        size = 0;
00078 
00079     for (i = 0; i < len; i++)
00080         size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
00081 
00082     /* TODO: Consider fillfactor */
00083     return (size <= GiSTPageSize);
00084 }
00085 
00086 /*
00087  * Read buffer into itup vector
00088  */
00089 IndexTuple *
00090 gistextractpage(Page page, int *len /* out */ )
00091 {
00092     OffsetNumber i,
00093                 maxoff;
00094     IndexTuple *itvec;
00095 
00096     maxoff = PageGetMaxOffsetNumber(page);
00097     *len = maxoff;
00098     itvec = palloc(sizeof(IndexTuple) * maxoff);
00099     for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
00100         itvec[i - FirstOffsetNumber] = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
00101 
00102     return itvec;
00103 }
00104 
00105 /*
00106  * join two vectors into one
00107  */
00108 IndexTuple *
00109 gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
00110 {
00111     itvec = (IndexTuple *) repalloc((void *) itvec, sizeof(IndexTuple) * ((*len) + addlen));
00112     memmove(&itvec[*len], additvec, sizeof(IndexTuple) * addlen);
00113     *len += addlen;
00114     return itvec;
00115 }
00116 
00117 /*
00118  * make plain IndexTupleVector
00119  */
00120 
00121 IndexTupleData *
00122 gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
00123 {
00124     char       *ptr,
00125                *ret;
00126     int         i;
00127 
00128     *memlen = 0;
00129 
00130     for (i = 0; i < veclen; i++)
00131         *memlen += IndexTupleSize(vec[i]);
00132 
00133     ptr = ret = palloc(*memlen);
00134 
00135     for (i = 0; i < veclen; i++)
00136     {
00137         memcpy(ptr, vec[i], IndexTupleSize(vec[i]));
00138         ptr += IndexTupleSize(vec[i]);
00139     }
00140 
00141     return (IndexTupleData *) ret;
00142 }
00143 
00144 /*
00145  * Make unions of keys in IndexTuple vector (one union datum per index column).
00146  * Union Datums are returned into the attr/isnull arrays.
00147  * Resulting Datums aren't compressed.
00148  */
00149 void
00150 gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len,
00151                    Datum *attr, bool *isnull)
00152 {
00153     int         i;
00154     GistEntryVector *evec;
00155     int         attrsize;
00156 
00157     evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ);
00158 
00159     for (i = 0; i < giststate->tupdesc->natts; i++)
00160     {
00161         int         j;
00162 
00163         /* Collect non-null datums for this column */
00164         evec->n = 0;
00165         for (j = 0; j < len; j++)
00166         {
00167             Datum       datum;
00168             bool        IsNull;
00169 
00170             datum = index_getattr(itvec[j], i + 1, giststate->tupdesc, &IsNull);
00171             if (IsNull)
00172                 continue;
00173 
00174             gistdentryinit(giststate, i,
00175                            evec->vector + evec->n,
00176                            datum,
00177                            NULL, NULL, (OffsetNumber) 0,
00178                            FALSE, IsNull);
00179             evec->n++;
00180         }
00181 
00182         /* If this column was all NULLs, the union is NULL */
00183         if (evec->n == 0)
00184         {
00185             attr[i] = (Datum) 0;
00186             isnull[i] = TRUE;
00187         }
00188         else
00189         {
00190             if (evec->n == 1)
00191             {
00192                 /* unionFn may expect at least two inputs */
00193                 evec->n = 2;
00194                 evec->vector[1] = evec->vector[0];
00195             }
00196 
00197             /* Make union and store in attr array */
00198             attr[i] = FunctionCall2Coll(&giststate->unionFn[i],
00199                                         giststate->supportCollation[i],
00200                                         PointerGetDatum(evec),
00201                                         PointerGetDatum(&attrsize));
00202 
00203             isnull[i] = FALSE;
00204         }
00205     }
00206 }
00207 
00208 /*
00209  * Return an IndexTuple containing the result of applying the "union"
00210  * method to the specified IndexTuple vector.
00211  */
00212 IndexTuple
00213 gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
00214 {
00215     Datum       attr[INDEX_MAX_KEYS];
00216     bool        isnull[INDEX_MAX_KEYS];
00217 
00218     gistMakeUnionItVec(giststate, itvec, len, attr, isnull);
00219 
00220     return gistFormTuple(giststate, r, attr, isnull, false);
00221 }
00222 
00223 /*
00224  * makes union of two key
00225  */
00226 void
00227 gistMakeUnionKey(GISTSTATE *giststate, int attno,
00228                  GISTENTRY *entry1, bool isnull1,
00229                  GISTENTRY *entry2, bool isnull2,
00230                  Datum *dst, bool *dstisnull)
00231 {
00232     /* we need a GistEntryVector with room for exactly 2 elements */
00233     union
00234     {
00235         GistEntryVector gev;
00236         char        padding[2 * sizeof(GISTENTRY) + GEVHDRSZ];
00237     }           storage;
00238     GistEntryVector *evec = &storage.gev;
00239     int         dstsize;
00240 
00241     evec->n = 2;
00242 
00243     if (isnull1 && isnull2)
00244     {
00245         *dstisnull = TRUE;
00246         *dst = (Datum) 0;
00247     }
00248     else
00249     {
00250         if (isnull1 == FALSE && isnull2 == FALSE)
00251         {
00252             evec->vector[0] = *entry1;
00253             evec->vector[1] = *entry2;
00254         }
00255         else if (isnull1 == FALSE)
00256         {
00257             evec->vector[0] = *entry1;
00258             evec->vector[1] = *entry1;
00259         }
00260         else
00261         {
00262             evec->vector[0] = *entry2;
00263             evec->vector[1] = *entry2;
00264         }
00265 
00266         *dstisnull = FALSE;
00267         *dst = FunctionCall2Coll(&giststate->unionFn[attno],
00268                                  giststate->supportCollation[attno],
00269                                  PointerGetDatum(evec),
00270                                  PointerGetDatum(&dstsize));
00271     }
00272 }
00273 
00274 bool
00275 gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b)
00276 {
00277     bool        result;
00278 
00279     FunctionCall3Coll(&giststate->equalFn[attno],
00280                       giststate->supportCollation[attno],
00281                       a, b,
00282                       PointerGetDatum(&result));
00283     return result;
00284 }
00285 
00286 /*
00287  * Decompress all keys in tuple
00288  */
00289 void
00290 gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,
00291                   OffsetNumber o, GISTENTRY *attdata, bool *isnull)
00292 {
00293     int         i;
00294 
00295     for (i = 0; i < r->rd_att->natts; i++)
00296     {
00297         Datum       datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]);
00298 
00299         gistdentryinit(giststate, i, &attdata[i],
00300                        datum, r, p, o,
00301                        FALSE, isnull[i]);
00302     }
00303 }
00304 
00305 /*
00306  * Forms union of oldtup and addtup, if union == oldtup then return NULL
00307  */
00308 IndexTuple
00309 gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)
00310 {
00311     bool        neednew = FALSE;
00312     GISTENTRY   oldentries[INDEX_MAX_KEYS],
00313                 addentries[INDEX_MAX_KEYS];
00314     bool        oldisnull[INDEX_MAX_KEYS],
00315                 addisnull[INDEX_MAX_KEYS];
00316     Datum       attr[INDEX_MAX_KEYS];
00317     bool        isnull[INDEX_MAX_KEYS];
00318     IndexTuple  newtup = NULL;
00319     int         i;
00320 
00321     gistDeCompressAtt(giststate, r, oldtup, NULL,
00322                       (OffsetNumber) 0, oldentries, oldisnull);
00323 
00324     gistDeCompressAtt(giststate, r, addtup, NULL,
00325                       (OffsetNumber) 0, addentries, addisnull);
00326 
00327     for (i = 0; i < r->rd_att->natts; i++)
00328     {
00329         gistMakeUnionKey(giststate, i,
00330                          oldentries + i, oldisnull[i],
00331                          addentries + i, addisnull[i],
00332                          attr + i, isnull + i);
00333 
00334         if (neednew)
00335             /* we already need new key, so we can skip check */
00336             continue;
00337 
00338         if (isnull[i])
00339             /* union of key may be NULL if and only if both keys are NULL */
00340             continue;
00341 
00342         if (!addisnull[i])
00343         {
00344             if (oldisnull[i] ||
00345                 !gistKeyIsEQ(giststate, i, oldentries[i].key, attr[i]))
00346                 neednew = true;
00347         }
00348     }
00349 
00350     if (neednew)
00351     {
00352         /* need to update key */
00353         newtup = gistFormTuple(giststate, r, attr, isnull, false);
00354         newtup->t_tid = oldtup->t_tid;
00355     }
00356 
00357     return newtup;
00358 }
00359 
00360 /*
00361  * Search an upper index page for the entry with lowest penalty for insertion
00362  * of the new index key contained in "it".
00363  *
00364  * Returns the index of the page entry to insert into.
00365  */
00366 OffsetNumber
00367 gistchoose(Relation r, Page p, IndexTuple it,   /* it has compressed entry */
00368            GISTSTATE *giststate)
00369 {
00370     OffsetNumber result;
00371     OffsetNumber maxoff;
00372     OffsetNumber i;
00373     float       best_penalty[INDEX_MAX_KEYS];
00374     GISTENTRY   entry,
00375                 identry[INDEX_MAX_KEYS];
00376     bool        isnull[INDEX_MAX_KEYS];
00377     int         keep_current_best;
00378 
00379     Assert(!GistPageIsLeaf(p));
00380 
00381     gistDeCompressAtt(giststate, r,
00382                       it, NULL, (OffsetNumber) 0,
00383                       identry, isnull);
00384 
00385     /* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */
00386     result = FirstOffsetNumber;
00387 
00388     /*
00389      * The index may have multiple columns, and there's a penalty value for
00390      * each column.  The penalty associated with a column that appears earlier
00391      * in the index definition is strictly more important than the penalty of
00392      * a column that appears later in the index definition.
00393      *
00394      * best_penalty[j] is the best penalty we have seen so far for column j,
00395      * or -1 when we haven't yet examined column j.  Array entries to the
00396      * right of the first -1 are undefined.
00397      */
00398     best_penalty[0] = -1;
00399 
00400     /*
00401      * If we find a tuple that's exactly as good as the currently best one, we
00402      * could use either one.  When inserting a lot of tuples with the same or
00403      * similar keys, it's preferable to descend down the same path when
00404      * possible, as that's more cache-friendly.  On the other hand, if all
00405      * inserts land on the same leaf page after a split, we're never going to
00406      * insert anything to the other half of the split, and will end up using
00407      * only 50% of the available space.  Distributing the inserts evenly would
00408      * lead to better space usage, but that hurts cache-locality during
00409      * insertion.  To get the best of both worlds, when we find a tuple that's
00410      * exactly as good as the previous best, choose randomly whether to stick
00411      * to the old best, or use the new one.  Once we decide to stick to the
00412      * old best, we keep sticking to it for any subsequent equally good tuples
00413      * we might find.  This favors tuples with low offsets, but still allows
00414      * some inserts to go to other equally-good subtrees.
00415      *
00416      * keep_current_best is -1 if we haven't yet had to make a random choice
00417      * whether to keep the current best tuple.  If we have done so, and
00418      * decided to keep it, keep_current_best is 1; if we've decided to
00419      * replace, keep_current_best is 0.  (This state will be reset to -1 as
00420      * soon as we've made the replacement, but sometimes we make the choice in
00421      * advance of actually finding a replacement best tuple.)
00422      */
00423     keep_current_best = -1;
00424 
00425     /*
00426      * Loop over tuples on page.
00427      */
00428     maxoff = PageGetMaxOffsetNumber(p);
00429     Assert(maxoff >= FirstOffsetNumber);
00430 
00431     for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
00432     {
00433         IndexTuple  itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
00434         bool        zero_penalty;
00435         int         j;
00436 
00437         zero_penalty = true;
00438 
00439         /* Loop over index attributes. */
00440         for (j = 0; j < r->rd_att->natts; j++)
00441         {
00442             Datum       datum;
00443             float       usize;
00444             bool        IsNull;
00445 
00446             /* Compute penalty for this column. */
00447             datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull);
00448             gistdentryinit(giststate, j, &entry, datum, r, p, i,
00449                            FALSE, IsNull);
00450             usize = gistpenalty(giststate, j, &entry, IsNull,
00451                                 &identry[j], isnull[j]);
00452             if (usize > 0)
00453                 zero_penalty = false;
00454 
00455             if (best_penalty[j] < 0 || usize < best_penalty[j])
00456             {
00457                 /*
00458                  * New best penalty for column.  Tentatively select this tuple
00459                  * as the target, and record the best penalty.  Then reset the
00460                  * next column's penalty to "unknown" (and indirectly, the
00461                  * same for all the ones to its right).  This will force us to
00462                  * adopt this tuple's penalty values as the best for all the
00463                  * remaining columns during subsequent loop iterations.
00464                  */
00465                 result = i;
00466                 best_penalty[j] = usize;
00467 
00468                 if (j < r->rd_att->natts - 1)
00469                     best_penalty[j + 1] = -1;
00470 
00471                 /* we have new best, so reset keep-it decision */
00472                 keep_current_best = -1;
00473             }
00474             else if (best_penalty[j] == usize)
00475             {
00476                 /*
00477                  * The current tuple is exactly as good for this column as the
00478                  * best tuple seen so far.  The next iteration of this loop
00479                  * will compare the next column.
00480                  */
00481             }
00482             else
00483             {
00484                 /*
00485                  * The current tuple is worse for this column than the best
00486                  * tuple seen so far.  Skip the remaining columns and move on
00487                  * to the next tuple, if any.
00488                  */
00489                 zero_penalty = false;   /* so outer loop won't exit */
00490                 break;
00491             }
00492         }
00493 
00494         /*
00495          * If we looped past the last column, and did not update "result",
00496          * then this tuple is exactly as good as the prior best tuple.
00497          */
00498         if (j == r->rd_att->natts && result != i)
00499         {
00500             if (keep_current_best == -1)
00501             {
00502                 /* we didn't make the random choice yet for this old best */
00503                 keep_current_best = (random() <= (MAX_RANDOM_VALUE / 2)) ? 1 : 0;
00504             }
00505             if (keep_current_best == 0)
00506             {
00507                 /* we choose to use the new tuple */
00508                 result = i;
00509                 /* choose again if there are even more exactly-as-good ones */
00510                 keep_current_best = -1;
00511             }
00512         }
00513 
00514         /*
00515          * If we find a tuple with zero penalty for all columns, and we've
00516          * decided we don't want to search for another tuple with equal
00517          * penalty, there's no need to examine remaining tuples; just break
00518          * out of the loop and return it.
00519          */
00520         if (zero_penalty)
00521         {
00522             if (keep_current_best == -1)
00523             {
00524                 /* we didn't make the random choice yet for this old best */
00525                 keep_current_best = (random() <= (MAX_RANDOM_VALUE / 2)) ? 1 : 0;
00526             }
00527             if (keep_current_best == 1)
00528                 break;
00529         }
00530     }
00531 
00532     return result;
00533 }
00534 
00535 /*
00536  * initialize a GiST entry with a decompressed version of key
00537  */
00538 void
00539 gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
00540                Datum k, Relation r, Page pg, OffsetNumber o,
00541                bool l, bool isNull)
00542 {
00543     if (!isNull)
00544     {
00545         GISTENTRY  *dep;
00546 
00547         gistentryinit(*e, k, r, pg, o, l);
00548         dep = (GISTENTRY *)
00549             DatumGetPointer(FunctionCall1Coll(&giststate->decompressFn[nkey],
00550                                            giststate->supportCollation[nkey],
00551                                               PointerGetDatum(e)));
00552         /* decompressFn may just return the given pointer */
00553         if (dep != e)
00554             gistentryinit(*e, dep->key, dep->rel, dep->page, dep->offset,
00555                           dep->leafkey);
00556     }
00557     else
00558         gistentryinit(*e, (Datum) 0, r, pg, o, l);
00559 }
00560 
00561 
00562 /*
00563  * initialize a GiST entry with a compressed version of key
00564  */
00565 void
00566 gistcentryinit(GISTSTATE *giststate, int nkey,
00567                GISTENTRY *e, Datum k, Relation r,
00568                Page pg, OffsetNumber o, bool l, bool isNull)
00569 {
00570     if (!isNull)
00571     {
00572         GISTENTRY  *cep;
00573 
00574         gistentryinit(*e, k, r, pg, o, l);
00575         cep = (GISTENTRY *)
00576             DatumGetPointer(FunctionCall1Coll(&giststate->compressFn[nkey],
00577                                            giststate->supportCollation[nkey],
00578                                               PointerGetDatum(e)));
00579         /* compressFn may just return the given pointer */
00580         if (cep != e)
00581             gistentryinit(*e, cep->key, cep->rel, cep->page, cep->offset,
00582                           cep->leafkey);
00583     }
00584     else
00585         gistentryinit(*e, (Datum) 0, r, pg, o, l);
00586 }
00587 
00588 IndexTuple
00589 gistFormTuple(GISTSTATE *giststate, Relation r,
00590               Datum attdata[], bool isnull[], bool newValues)
00591 {
00592     GISTENTRY   centry[INDEX_MAX_KEYS];
00593     Datum       compatt[INDEX_MAX_KEYS];
00594     int         i;
00595     IndexTuple  res;
00596 
00597     for (i = 0; i < r->rd_att->natts; i++)
00598     {
00599         if (isnull[i])
00600             compatt[i] = (Datum) 0;
00601         else
00602         {
00603             gistcentryinit(giststate, i, &centry[i], attdata[i],
00604                            r, NULL, (OffsetNumber) 0,
00605                            newValues,
00606                            FALSE);
00607             compatt[i] = centry[i].key;
00608         }
00609     }
00610 
00611     res = index_form_tuple(giststate->tupdesc, compatt, isnull);
00612 
00613     /*
00614      * The offset number on tuples on internal pages is unused. For historical
00615      * reasons, it is set 0xffff.
00616      */
00617     ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
00618     return res;
00619 }
00620 
00621 float
00622 gistpenalty(GISTSTATE *giststate, int attno,
00623             GISTENTRY *orig, bool isNullOrig,
00624             GISTENTRY *add, bool isNullAdd)
00625 {
00626     float       penalty = 0.0;
00627 
00628     if (giststate->penaltyFn[attno].fn_strict == FALSE ||
00629         (isNullOrig == FALSE && isNullAdd == FALSE))
00630     {
00631         FunctionCall3Coll(&giststate->penaltyFn[attno],
00632                           giststate->supportCollation[attno],
00633                           PointerGetDatum(orig),
00634                           PointerGetDatum(add),
00635                           PointerGetDatum(&penalty));
00636         /* disallow negative or NaN penalty */
00637         if (isnan(penalty) || penalty < 0.0)
00638             penalty = 0.0;
00639     }
00640     else if (isNullOrig && isNullAdd)
00641         penalty = 0.0;
00642     else
00643     {
00644         /* try to prevent mixing null and non-null values */
00645         penalty = get_float4_infinity();
00646     }
00647 
00648     return penalty;
00649 }
00650 
00651 /*
00652  * Initialize a new index page
00653  */
00654 void
00655 GISTInitBuffer(Buffer b, uint32 f)
00656 {
00657     GISTPageOpaque opaque;
00658     Page        page;
00659     Size        pageSize;
00660 
00661     pageSize = BufferGetPageSize(b);
00662     page = BufferGetPage(b);
00663     PageInit(page, pageSize, sizeof(GISTPageOpaqueData));
00664 
00665     opaque = GistPageGetOpaque(page);
00666     /* page was already zeroed by PageInit, so this is not needed: */
00667     /* memset(&(opaque->nsn), 0, sizeof(GistNSN)); */
00668     opaque->rightlink = InvalidBlockNumber;
00669     opaque->flags = f;
00670     opaque->gist_page_id = GIST_PAGE_ID;
00671 }
00672 
00673 /*
00674  * Verify that a freshly-read page looks sane.
00675  */
00676 void
00677 gistcheckpage(Relation rel, Buffer buf)
00678 {
00679     Page        page = BufferGetPage(buf);
00680 
00681     /*
00682      * ReadBuffer verifies that every newly-read page passes
00683      * PageHeaderIsValid, which means it either contains a reasonably sane
00684      * page header or is all-zero.  We have to defend against the all-zero
00685      * case, however.
00686      */
00687     if (PageIsNew(page))
00688         ereport(ERROR,
00689                 (errcode(ERRCODE_INDEX_CORRUPTED),
00690              errmsg("index \"%s\" contains unexpected zero page at block %u",
00691                     RelationGetRelationName(rel),
00692                     BufferGetBlockNumber(buf)),
00693                  errhint("Please REINDEX it.")));
00694 
00695     /*
00696      * Additionally check that the special area looks sane.
00697      */
00698     if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData)))
00699         ereport(ERROR,
00700                 (errcode(ERRCODE_INDEX_CORRUPTED),
00701                  errmsg("index \"%s\" contains corrupted page at block %u",
00702                         RelationGetRelationName(rel),
00703                         BufferGetBlockNumber(buf)),
00704                  errhint("Please REINDEX it.")));
00705 }
00706 
00707 
00708 /*
00709  * Allocate a new page (either by recycling, or by extending the index file)
00710  *
00711  * The returned buffer is already pinned and exclusive-locked
00712  *
00713  * Caller is responsible for initializing the page by calling GISTInitBuffer
00714  */
00715 Buffer
00716 gistNewBuffer(Relation r)
00717 {
00718     Buffer      buffer;
00719     bool        needLock;
00720 
00721     /* First, try to get a page from FSM */
00722     for (;;)
00723     {
00724         BlockNumber blkno = GetFreeIndexPage(r);
00725 
00726         if (blkno == InvalidBlockNumber)
00727             break;              /* nothing left in FSM */
00728 
00729         buffer = ReadBuffer(r, blkno);
00730 
00731         /*
00732          * We have to guard against the possibility that someone else already
00733          * recycled this page; the buffer may be locked if so.
00734          */
00735         if (ConditionalLockBuffer(buffer))
00736         {
00737             Page        page = BufferGetPage(buffer);
00738 
00739             if (PageIsNew(page))
00740                 return buffer;  /* OK to use, if never initialized */
00741 
00742             gistcheckpage(r, buffer);
00743 
00744             if (GistPageIsDeleted(page))
00745                 return buffer;  /* OK to use */
00746 
00747             LockBuffer(buffer, GIST_UNLOCK);
00748         }
00749 
00750         /* Can't use it, so release buffer and try again */
00751         ReleaseBuffer(buffer);
00752     }
00753 
00754     /* Must extend the file */
00755     needLock = !RELATION_IS_LOCAL(r);
00756 
00757     if (needLock)
00758         LockRelationForExtension(r, ExclusiveLock);
00759 
00760     buffer = ReadBuffer(r, P_NEW);
00761     LockBuffer(buffer, GIST_EXCLUSIVE);
00762 
00763     if (needLock)
00764         UnlockRelationForExtension(r, ExclusiveLock);
00765 
00766     return buffer;
00767 }
00768 
00769 Datum
00770 gistoptions(PG_FUNCTION_ARGS)
00771 {
00772     Datum       reloptions = PG_GETARG_DATUM(0);
00773     bool        validate = PG_GETARG_BOOL(1);
00774     relopt_value *options;
00775     GiSTOptions *rdopts;
00776     int         numoptions;
00777     static const relopt_parse_elt tab[] = {
00778         {"fillfactor", RELOPT_TYPE_INT, offsetof(GiSTOptions, fillfactor)},
00779         {"buffering", RELOPT_TYPE_STRING, offsetof(GiSTOptions, bufferingModeOffset)}
00780     };
00781 
00782     options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIST,
00783                               &numoptions);
00784 
00785     /* if none set, we're done */
00786     if (numoptions == 0)
00787         PG_RETURN_NULL();
00788 
00789     rdopts = allocateReloptStruct(sizeof(GiSTOptions), options, numoptions);
00790 
00791     fillRelOptions((void *) rdopts, sizeof(GiSTOptions), options, numoptions,
00792                    validate, tab, lengthof(tab));
00793 
00794     pfree(options);
00795 
00796     PG_RETURN_BYTEA_P(rdopts);
00797 
00798 }
00799 
00800 /*
00801  * Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
00802  * to detect concurrent page splits anyway. This function provides a fake
00803  * sequence of LSNs for that purpose.
00804  */
00805 XLogRecPtr
00806 gistGetFakeLSN(Relation rel)
00807 {
00808     static XLogRecPtr counter = 1;
00809 
00810     if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
00811     {
00812         /*
00813          * Temporary relations are only accessible in our session, so a
00814          * simple backend-local counter will do.
00815          */
00816         return counter++;
00817     }
00818     else
00819     {
00820         /*
00821          * Unlogged relations are accessible from other backends, and survive
00822          * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
00823          */
00824         Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
00825         return GetFakeLSNForUnloggedRel();
00826     }
00827 }