PostgreSQL Source Code: src/backend/access/gist/gistsplit.c Source File

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * gistsplit.c
00004  *    Multi-column page splitting algorithm
00005  *
00006  * This file is concerned with making good page-split decisions in multi-column
00007  * GiST indexes.  The opclass-specific picksplit functions can only be expected
00008  * to produce answers based on a single column.  We first run the picksplit
00009  * function for column 1; then, if there are more columns, we check if any of
00010  * the tuples are "don't cares" so far as the column 1 split is concerned
00011  * (that is, they could go to either side for no additional penalty).  If so,
00012  * we try to redistribute those tuples on the basis of the next column.
00013  * Repeat till we're out of columns.
00014  *
00015  * gistSplitByKey() is the entry point to this file.
00016  *
00017  *
00018  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00019  * Portions Copyright (c) 1994, Regents of the University of California
00020  *
00021  * IDENTIFICATION
00022  *    src/backend/access/gist/gistsplit.c
00023  *
00024  *-------------------------------------------------------------------------
00025  */
00026 #include "postgres.h"
00027 
00028 #include "access/gist_private.h"
00029 #include "utils/rel.h"
00030 
00031 typedef struct
00032 {
00033     OffsetNumber *entries;
00034     int         len;
00035     Datum      *attr;
00036     bool       *isnull;
00037     bool       *dontcare;
00038 } GistSplitUnion;
00039 
00040 
00041 /*
00042  * Form unions of subkeys in itvec[] entries listed in gsvp->entries[],
00043  * ignoring any tuples that are marked in gsvp->dontcare[].  Subroutine for
00044  * gistunionsubkey.
00045  */
00046 static void
00047 gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
00048                    GistSplitUnion *gsvp)
00049 {
00050     IndexTuple *cleanedItVec;
00051     int         i,
00052                 cleanedLen = 0;
00053 
00054     cleanedItVec = (IndexTuple *) palloc(sizeof(IndexTuple) * gsvp->len);
00055 
00056     for (i = 0; i < gsvp->len; i++)
00057     {
00058         if (gsvp->dontcare && gsvp->dontcare[gsvp->entries[i]])
00059             continue;
00060 
00061         cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1];
00062     }
00063 
00064     gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen,
00065                        gsvp->attr, gsvp->isnull);
00066 
00067     pfree(cleanedItVec);
00068 }
00069 
00070 /*
00071  * Recompute unions of left- and right-side subkeys after a page split,
00072  * ignoring any tuples that are marked in spl->spl_dontcare[].
00073  *
00074  * Note: we always recompute union keys for all index columns.  In some cases
00075  * this might represent duplicate work for the leftmost column(s), but it's
00076  * not safe to assume that "zero penalty to move a tuple" means "the union
00077  * key doesn't change at all".  Penalty functions aren't 100% accurate.
00078  */
00079 static void
00080 gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl)
00081 {
00082     GistSplitUnion gsvp;
00083 
00084     gsvp.dontcare = spl->spl_dontcare;
00085 
00086     gsvp.entries = spl->splitVector.spl_left;
00087     gsvp.len = spl->splitVector.spl_nleft;
00088     gsvp.attr = spl->spl_lattr;
00089     gsvp.isnull = spl->spl_lisnull;
00090 
00091     gistunionsubkeyvec(giststate, itvec, &gsvp);
00092 
00093     gsvp.entries = spl->splitVector.spl_right;
00094     gsvp.len = spl->splitVector.spl_nright;
00095     gsvp.attr = spl->spl_rattr;
00096     gsvp.isnull = spl->spl_risnull;
00097 
00098     gistunionsubkeyvec(giststate, itvec, &gsvp);
00099 }
00100 
00101 /*
00102  * Find tuples that are "don't cares", that is could be moved to the other
00103  * side of the split with zero penalty, so far as the attno column is
00104  * concerned.
00105  *
00106  * Don't-care tuples are marked by setting the corresponding entry in
00107  * spl->spl_dontcare[] to "true".  Caller must have initialized that array
00108  * to zeroes.
00109  *
00110  * Returns number of don't-cares found.
00111  */
00112 static int
00113 findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec,
00114               GistSplitVector *spl, int attno)
00115 {
00116     int         i;
00117     GISTENTRY   entry;
00118     int         NumDontCare = 0;
00119 
00120     /*
00121      * First, search the left-side tuples to see if any have zero penalty to
00122      * be added to the right-side union key.
00123      *
00124      * attno column is known all-not-null (see gistSplitByKey), so we need not
00125      * check for nulls
00126      */
00127     gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL,
00128                   (OffsetNumber) 0, FALSE);
00129     for (i = 0; i < spl->splitVector.spl_nleft; i++)
00130     {
00131         int         j = spl->splitVector.spl_left[i];
00132         float       penalty = gistpenalty(giststate, attno, &entry, false,
00133                                           &valvec[j], false);
00134 
00135         if (penalty == 0.0)
00136         {
00137             spl->spl_dontcare[j] = true;
00138             NumDontCare++;
00139         }
00140     }
00141 
00142     /* And conversely for the right-side tuples */
00143     gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL,
00144                   (OffsetNumber) 0, FALSE);
00145     for (i = 0; i < spl->splitVector.spl_nright; i++)
00146     {
00147         int         j = spl->splitVector.spl_right[i];
00148         float       penalty = gistpenalty(giststate, attno, &entry, false,
00149                                           &valvec[j], false);
00150 
00151         if (penalty == 0.0)
00152         {
00153             spl->spl_dontcare[j] = true;
00154             NumDontCare++;
00155         }
00156     }
00157 
00158     return NumDontCare;
00159 }
00160 
00161 /*
00162  * Remove tuples that are marked don't-cares from the tuple index array a[]
00163  * of length *len.  This is applied separately to the spl_left and spl_right
00164  * arrays.
00165  */
00166 static void
00167 removeDontCares(OffsetNumber *a, int *len, const bool *dontcare)
00168 {
00169     int         origlen,
00170                 newlen,
00171                 i;
00172     OffsetNumber *curwpos;
00173 
00174     origlen = newlen = *len;
00175     curwpos = a;
00176     for (i = 0; i < origlen; i++)
00177     {
00178         OffsetNumber ai = a[i];
00179 
00180         if (dontcare[ai] == FALSE)
00181         {
00182             /* re-emit item into a[] */
00183             *curwpos = ai;
00184             curwpos++;
00185         }
00186         else
00187             newlen--;
00188     }
00189 
00190     *len = newlen;
00191 }
00192 
00193 /*
00194  * Place a single don't-care tuple into either the left or right side of the
00195  * split, according to which has least penalty for merging the tuple into
00196  * the previously-computed union keys.  We need consider only columns starting
00197  * at attno.
00198  */
00199 static void
00200 placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v,
00201          IndexTuple itup, OffsetNumber off, int attno)
00202 {
00203     GISTENTRY   identry[INDEX_MAX_KEYS];
00204     bool        isnull[INDEX_MAX_KEYS];
00205     bool        toLeft = true;
00206 
00207     gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0,
00208                       identry, isnull);
00209 
00210     for (; attno < giststate->tupdesc->natts; attno++)
00211     {
00212         float       lpenalty,
00213                     rpenalty;
00214         GISTENTRY   entry;
00215 
00216         gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, FALSE);
00217         lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno],
00218                                identry + attno, isnull[attno]);
00219         gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, FALSE);
00220         rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno],
00221                                identry + attno, isnull[attno]);
00222 
00223         if (lpenalty != rpenalty)
00224         {
00225             if (lpenalty > rpenalty)
00226                 toLeft = false;
00227             break;
00228         }
00229     }
00230 
00231     if (toLeft)
00232         v->splitVector.spl_left[v->splitVector.spl_nleft++] = off;
00233     else
00234         v->splitVector.spl_right[v->splitVector.spl_nright++] = off;
00235 }
00236 
00237 #define SWAPVAR( s, d, t ) \
00238 do {    \
00239     (t) = (s); \
00240     (s) = (d); \
00241     (d) = (t); \
00242 } while(0)
00243 
00244 /*
00245  * Clean up when we did a secondary split but the user-defined PickSplit
00246  * method didn't support it (leaving spl_ldatum_exists or spl_rdatum_exists
00247  * true).
00248  *
00249  * We consider whether to swap the left and right outputs of the secondary
00250  * split; this can be worthwhile if the penalty for merging those tuples into
00251  * the previously chosen sets is less that way.
00252  *
00253  * In any case we must update the union datums for the current column by
00254  * adding in the previous union keys (oldL/oldR), since the user-defined
00255  * PickSplit method didn't do so.
00256  */
00257 static void
00258 supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno,
00259                       GIST_SPLITVEC *sv, Datum oldL, Datum oldR)
00260 {
00261     bool        leaveOnLeft = true,
00262                 tmpBool;
00263     GISTENTRY   entryL,
00264                 entryR,
00265                 entrySL,
00266                 entrySR;
00267 
00268     gistentryinit(entryL, oldL, r, NULL, 0, FALSE);
00269     gistentryinit(entryR, oldR, r, NULL, 0, FALSE);
00270     gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, FALSE);
00271     gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, FALSE);
00272 
00273     if (sv->spl_ldatum_exists && sv->spl_rdatum_exists)
00274     {
00275         float       penalty1,
00276                     penalty2;
00277 
00278         penalty1 = gistpenalty(giststate, attno, &entryL, false, &entrySL, false) +
00279             gistpenalty(giststate, attno, &entryR, false, &entrySR, false);
00280         penalty2 = gistpenalty(giststate, attno, &entryL, false, &entrySR, false) +
00281             gistpenalty(giststate, attno, &entryR, false, &entrySL, false);
00282 
00283         if (penalty1 > penalty2)
00284             leaveOnLeft = false;
00285     }
00286     else
00287     {
00288         GISTENTRY  *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR;
00289         float       penalty1,
00290                     penalty2;
00291 
00292         /*
00293          * There is only one previously defined union, so we just choose swap
00294          * or not by lowest penalty for that side.  We can only get here if a
00295          * secondary split happened to have all NULLs in its column in the
00296          * tuples that the outer recursion level had assigned to one side.
00297          * (Note that the null checks in gistSplitByKey don't prevent the
00298          * case, because they'll only be checking tuples that were considered
00299          * don't-cares at the outer recursion level, not the tuples that went
00300          * into determining the passed-down left and right union keys.)
00301          */
00302         penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false);
00303         penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false);
00304 
00305         if (penalty1 < penalty2)
00306             leaveOnLeft = (sv->spl_ldatum_exists) ? true : false;
00307         else
00308             leaveOnLeft = (sv->spl_rdatum_exists) ? true : false;
00309     }
00310 
00311     if (leaveOnLeft == false)
00312     {
00313         /*
00314          * swap left and right
00315          */
00316         OffsetNumber *off,
00317                     noff;
00318         Datum       datum;
00319 
00320         SWAPVAR(sv->spl_left, sv->spl_right, off);
00321         SWAPVAR(sv->spl_nleft, sv->spl_nright, noff);
00322         SWAPVAR(sv->spl_ldatum, sv->spl_rdatum, datum);
00323         gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, FALSE);
00324         gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, FALSE);
00325     }
00326 
00327     if (sv->spl_ldatum_exists)
00328         gistMakeUnionKey(giststate, attno, &entryL, false, &entrySL, false,
00329                          &sv->spl_ldatum, &tmpBool);
00330 
00331     if (sv->spl_rdatum_exists)
00332         gistMakeUnionKey(giststate, attno, &entryR, false, &entrySR, false,
00333                          &sv->spl_rdatum, &tmpBool);
00334 
00335     sv->spl_ldatum_exists = sv->spl_rdatum_exists = false;
00336 }
00337 
00338 /*
00339  * Trivial picksplit implementation. Function called only
00340  * if user-defined picksplit puts all keys on the same side of the split.
00341  * That is a bug of user-defined picksplit but we don't want to fail.
00342  */
00343 static void
00344 genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC *v, int attno)
00345 {
00346     OffsetNumber i,
00347                 maxoff;
00348     int         nbytes;
00349     GistEntryVector *evec;
00350 
00351     maxoff = entryvec->n - 1;
00352 
00353     nbytes = (maxoff + 2) * sizeof(OffsetNumber);
00354 
00355     v->spl_left = (OffsetNumber *) palloc(nbytes);
00356     v->spl_right = (OffsetNumber *) palloc(nbytes);
00357     v->spl_nleft = v->spl_nright = 0;
00358 
00359     for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
00360     {
00361         if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
00362         {
00363             v->spl_left[v->spl_nleft] = i;
00364             v->spl_nleft++;
00365         }
00366         else
00367         {
00368             v->spl_right[v->spl_nright] = i;
00369             v->spl_nright++;
00370         }
00371     }
00372 
00373     /*
00374      * Form union datums for each side
00375      */
00376     evec = palloc(sizeof(GISTENTRY) * entryvec->n + GEVHDRSZ);
00377 
00378     evec->n = v->spl_nleft;
00379     memcpy(evec->vector, entryvec->vector + FirstOffsetNumber,
00380            sizeof(GISTENTRY) * evec->n);
00381     v->spl_ldatum = FunctionCall2Coll(&giststate->unionFn[attno],
00382                                       giststate->supportCollation[attno],
00383                                       PointerGetDatum(evec),
00384                                       PointerGetDatum(&nbytes));
00385 
00386     evec->n = v->spl_nright;
00387     memcpy(evec->vector, entryvec->vector + FirstOffsetNumber + v->spl_nleft,
00388            sizeof(GISTENTRY) * evec->n);
00389     v->spl_rdatum = FunctionCall2Coll(&giststate->unionFn[attno],
00390                                       giststate->supportCollation[attno],
00391                                       PointerGetDatum(evec),
00392                                       PointerGetDatum(&nbytes));
00393 }
00394 
00395 /*
00396  * Calls user picksplit method for attno column to split tuples into
00397  * two vectors.
00398  *
00399  * Returns FALSE if split is complete (there are no more index columns, or
00400  * there is no need to consider them because split is optimal already).
00401  *
00402  * Returns TRUE and v->spl_dontcare = NULL if the picksplit result is
00403  * degenerate (all tuples seem to be don't-cares), so we should just
00404  * disregard this column and split on the next column(s) instead.
00405  *
00406  * Returns TRUE and v->spl_dontcare != NULL if there are don't-care tuples
00407  * that could be relocated based on the next column(s).  The don't-care
00408  * tuples have been removed from the split and must be reinserted by caller.
00409  * There is at least one non-don't-care tuple on each side of the split,
00410  * and union keys for all columns are updated to include just those tuples.
00411  *
00412  * A TRUE result implies there is at least one more index column.
00413  */
00414 static bool
00415 gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v,
00416                   IndexTuple *itup, int len, GISTSTATE *giststate)
00417 {
00418     GIST_SPLITVEC *sv = &v->splitVector;
00419 
00420     /*
00421      * Prepare spl_ldatum/spl_rdatum/spl_ldatum_exists/spl_rdatum_exists in
00422      * case we are doing a secondary split (see comments in gist.h).
00423      */
00424     sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true;
00425     sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true;
00426     sv->spl_ldatum = v->spl_lattr[attno];
00427     sv->spl_rdatum = v->spl_rattr[attno];
00428 
00429     /*
00430      * Let the opclass-specific PickSplit method do its thing.  Note that at
00431      * this point we know there are no null keys in the entryvec.
00432      */
00433     FunctionCall2Coll(&giststate->picksplitFn[attno],
00434                       giststate->supportCollation[attno],
00435                       PointerGetDatum(entryvec),
00436                       PointerGetDatum(sv));
00437 
00438     if (sv->spl_nleft == 0 || sv->spl_nright == 0)
00439     {
00440         /*
00441          * User-defined picksplit failed to create an actual split, ie it put
00442          * everything on the same side.  Complain but cope.
00443          */
00444         ereport(DEBUG1,
00445                 (errcode(ERRCODE_INTERNAL_ERROR),
00446               errmsg("picksplit method for column %d of index \"%s\" failed",
00447                      attno + 1, RelationGetRelationName(r)),
00448                  errhint("The index is not optimal. To optimize it, contact a developer, or try to use the column as the second one in the CREATE INDEX command.")));
00449 
00450         /*
00451          * Reinit GIST_SPLITVEC. Although these fields are not used by
00452          * genericPickSplit(), set them up for further processing
00453          */
00454         sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true;
00455         sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true;
00456         sv->spl_ldatum = v->spl_lattr[attno];
00457         sv->spl_rdatum = v->spl_rattr[attno];
00458 
00459         /* Do a generic split */
00460         genericPickSplit(giststate, entryvec, sv, attno);
00461     }
00462     else
00463     {
00464         /* hack for compatibility with old picksplit API */
00465         if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber)
00466             sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1);
00467         if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber)
00468             sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1);
00469     }
00470 
00471     /* Clean up if PickSplit didn't take care of a secondary split */
00472     if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
00473         supportSecondarySplit(r, giststate, attno, sv,
00474                               v->spl_lattr[attno], v->spl_rattr[attno]);
00475 
00476     /* emit union datums computed by PickSplit back to v arrays */
00477     v->spl_lattr[attno] = sv->spl_ldatum;
00478     v->spl_rattr[attno] = sv->spl_rdatum;
00479     v->spl_lisnull[attno] = false;
00480     v->spl_risnull[attno] = false;
00481 
00482     /*
00483      * If index columns remain, then consider whether we can improve the split
00484      * by using them.
00485      */
00486     v->spl_dontcare = NULL;
00487 
00488     if (attno + 1 < giststate->tupdesc->natts)
00489     {
00490         int         NumDontCare;
00491 
00492         /*
00493          * Make a quick check to see if left and right union keys are equal;
00494          * if so, the split is certainly degenerate, so tell caller to
00495          * re-split with the next column.
00496          */
00497         if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum))
00498             return true;
00499 
00500         /*
00501          * Locate don't-care tuples, if any.  If there are none, the split is
00502          * optimal, so just fall out and return false.
00503          */
00504         v->spl_dontcare = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
00505 
00506         NumDontCare = findDontCares(r, giststate, entryvec->vector, v, attno);
00507 
00508         if (NumDontCare > 0)
00509         {
00510             /*
00511              * Remove don't-cares from spl_left[] and spl_right[].
00512              */
00513             removeDontCares(sv->spl_left, &sv->spl_nleft, v->spl_dontcare);
00514             removeDontCares(sv->spl_right, &sv->spl_nright, v->spl_dontcare);
00515 
00516             /*
00517              * If all tuples on either side were don't-cares, the split is
00518              * degenerate, and we're best off to ignore it and split on the
00519              * next column.  (We used to try to press on with a secondary
00520              * split by forcing a random tuple on each side to be treated as
00521              * non-don't-care, but it seems unlikely that that technique
00522              * really gives a better result.  Note that we don't want to try a
00523              * secondary split with empty left or right primary split sides,
00524              * because then there is no union key on that side for the
00525              * PickSplit function to try to expand, so it can have no good
00526              * figure of merit for what it's doing.  Also note that this check
00527              * ensures we can't produce a bogus one-side-only split in the
00528              * NumDontCare == 1 special case below.)
00529              */
00530             if (sv->spl_nleft == 0 || sv->spl_nright == 0)
00531             {
00532                 v->spl_dontcare = NULL;
00533                 return true;
00534             }
00535 
00536             /*
00537              * Recompute union keys, considering only non-don't-care tuples.
00538              * NOTE: this will set union keys for remaining index columns,
00539              * which will cause later calls of gistUserPicksplit to pass those
00540              * values down to user-defined PickSplit methods with
00541              * spl_ldatum_exists/spl_rdatum_exists set true.
00542              */
00543             gistunionsubkey(giststate, itup, v);
00544 
00545             if (NumDontCare == 1)
00546             {
00547                 /*
00548                  * If there's only one don't-care tuple then we can't do a
00549                  * PickSplit on it, so just choose whether to send it left or
00550                  * right by comparing penalties.  We needed the
00551                  * gistunionsubkey step anyway so that we have appropriate
00552                  * union keys for figuring the penalties.
00553                  */
00554                 OffsetNumber toMove;
00555 
00556                 /* find it ... */
00557                 for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++)
00558                 {
00559                     if (v->spl_dontcare[toMove])
00560                         break;
00561                 }
00562                 Assert(toMove < entryvec->n);
00563 
00564                 /* ... and assign it to cheaper side */
00565                 placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1);
00566 
00567                 /*
00568                  * At this point the union keys are wrong, but we don't care
00569                  * because we're done splitting.  The outermost recursion
00570                  * level of gistSplitByKey will fix things before returning.
00571                  */
00572             }
00573             else
00574                 return true;
00575         }
00576     }
00577 
00578     return false;
00579 }
00580 
00581 /*
00582  * simply split page in half
00583  */
00584 static void
00585 gistSplitHalf(GIST_SPLITVEC *v, int len)
00586 {
00587     int         i;
00588 
00589     v->spl_nright = v->spl_nleft = 0;
00590     v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
00591     v->spl_right = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
00592     for (i = 1; i <= len; i++)
00593         if (i < len / 2)
00594             v->spl_right[v->spl_nright++] = i;
00595         else
00596             v->spl_left[v->spl_nleft++] = i;
00597 
00598     /* we need not compute union keys, caller took care of it */
00599 }
00600 
00601 /*
00602  * gistSplitByKey: main entry point for page-splitting algorithm
00603  *
00604  * r: index relation
00605  * page: page being split
00606  * itup: array of IndexTuples to be processed
00607  * len: number of IndexTuples to be processed (must be at least 2)
00608  * giststate: additional info about index
00609  * v: working state and output area
00610  * attno: column we are working on (zero-based index)
00611  *
00612  * Outside caller must initialize v->spl_lisnull and v->spl_risnull arrays
00613  * to all-TRUE.  On return, spl_left/spl_nleft contain indexes of tuples
00614  * to go left, spl_right/spl_nright contain indexes of tuples to go right,
00615  * spl_lattr/spl_lisnull contain left-side union key values, and
00616  * spl_rattr/spl_risnull contain right-side union key values.  Other fields
00617  * in this struct are workspace for this file.
00618  *
00619  * Outside caller must pass zero for attno.  The function may internally
00620  * recurse to the next column by passing attno+1.
00621  */
00622 void
00623 gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
00624                GISTSTATE *giststate, GistSplitVector *v, int attno)
00625 {
00626     GistEntryVector *entryvec;
00627     OffsetNumber *offNullTuples;
00628     int         nOffNullTuples = 0;
00629     int         i;
00630 
00631     /* generate the item array, and identify tuples with null keys */
00632     /* note that entryvec->vector[0] goes unused in this code */
00633     entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
00634     entryvec->n = len + 1;
00635     offNullTuples = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
00636 
00637     for (i = 1; i <= len; i++)
00638     {
00639         Datum       datum;
00640         bool        IsNull;
00641 
00642         datum = index_getattr(itup[i - 1], attno + 1, giststate->tupdesc,
00643                               &IsNull);
00644         gistdentryinit(giststate, attno, &(entryvec->vector[i]),
00645                        datum, r, page, i,
00646                        FALSE, IsNull);
00647         if (IsNull)
00648             offNullTuples[nOffNullTuples++] = i;
00649     }
00650 
00651     if (nOffNullTuples == len)
00652     {
00653         /*
00654          * Corner case: All keys in attno column are null, so just transfer
00655          * our attention to the next column.  If there's no next column, just
00656          * split page in half.
00657          */
00658         v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE;
00659 
00660         if (attno + 1 < giststate->tupdesc->natts)
00661             gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
00662         else
00663             gistSplitHalf(&v->splitVector, len);
00664     }
00665     else if (nOffNullTuples > 0)
00666     {
00667         int         j = 0;
00668 
00669         /*
00670          * We don't want to mix NULL and not-NULL keys on one page, so split
00671          * nulls to right page and not-nulls to left.
00672          */
00673         v->splitVector.spl_right = offNullTuples;
00674         v->splitVector.spl_nright = nOffNullTuples;
00675         v->spl_risnull[attno] = TRUE;
00676 
00677         v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
00678         v->splitVector.spl_nleft = 0;
00679         for (i = 1; i <= len; i++)
00680             if (j < v->splitVector.spl_nright && offNullTuples[j] == i)
00681                 j++;
00682             else
00683                 v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
00684 
00685         /* Compute union keys, unless outer recursion level will handle it */
00686         if (attno == 0 && giststate->tupdesc->natts == 1)
00687         {
00688             v->spl_dontcare = NULL;
00689             gistunionsubkey(giststate, itup, v);
00690         }
00691     }
00692     else
00693     {
00694         /*
00695          * All keys are not-null, so apply user-defined PickSplit method
00696          */
00697         if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate))
00698         {
00699             /*
00700              * Splitting on attno column is not optimal, so consider
00701              * redistributing don't-care tuples according to the next column
00702              */
00703             Assert(attno + 1 < giststate->tupdesc->natts);
00704 
00705             if (v->spl_dontcare == NULL)
00706             {
00707                 /*
00708                  * This split was actually degenerate, so ignore it altogether
00709                  * and just split according to the next column.
00710                  */
00711                 gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
00712             }
00713             else
00714             {
00715                 /*
00716                  * Form an array of just the don't-care tuples to pass to a
00717                  * recursive invocation of this function for the next column.
00718                  */
00719                 IndexTuple *newitup = (IndexTuple *) palloc(len * sizeof(IndexTuple));
00720                 OffsetNumber *map = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
00721                 int         newlen = 0;
00722                 GIST_SPLITVEC backupSplit;
00723 
00724                 for (i = 0; i < len; i++)
00725                 {
00726                     if (v->spl_dontcare[i + 1])
00727                     {
00728                         newitup[newlen] = itup[i];
00729                         map[newlen] = i + 1;
00730                         newlen++;
00731                     }
00732                 }
00733 
00734                 Assert(newlen > 0);
00735 
00736                 /*
00737                  * Make a backup copy of v->splitVector, since the recursive
00738                  * call will overwrite that with its own result.
00739                  */
00740                 backupSplit = v->splitVector;
00741                 backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
00742                 memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft);
00743                 backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
00744                 memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright);
00745 
00746                 /* Recursively decide how to split the don't-care tuples */
00747                 gistSplitByKey(r, page, newitup, newlen, giststate, v, attno + 1);
00748 
00749                 /* Merge result of subsplit with non-don't-care tuples */
00750                 for (i = 0; i < v->splitVector.spl_nleft; i++)
00751                     backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1];
00752                 for (i = 0; i < v->splitVector.spl_nright; i++)
00753                     backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1];
00754 
00755                 v->splitVector = backupSplit;
00756             }
00757         }
00758     }
00759 
00760     /*
00761      * If we're handling a multicolumn index, at the end of the recursion
00762      * recompute the left and right union datums for all index columns.  This
00763      * makes sure we hand back correct union datums in all corner cases,
00764      * including when we haven't processed all columns to start with, or when
00765      * a secondary split moved "don't care" tuples from one side to the other
00766      * (we really shouldn't assume that that didn't change the union datums).
00767      *
00768      * Note: when we're in an internal recursion (attno > 0), we do not worry
00769      * about whether the union datums we return with are sensible, since
00770      * calling levels won't care.  Also, in a single-column index, we expect
00771      * that PickSplit (or the special cases above) produced correct union
00772      * datums.
00773      */
00774     if (attno == 0 && giststate->tupdesc->natts > 1)
00775     {
00776         v->spl_dontcare = NULL;
00777         gistunionsubkey(giststate, itup, v);
00778     }
00779 }
Header And Logo

gistsplit.c