Header And Logo

PostgreSQL
| The world's most advanced open source database.

_int_gist.c

Go to the documentation of this file.
00001 /*
00002  * contrib/intarray/_int_gist.c
00003  */
00004 #include "postgres.h"
00005 
00006 #include "access/gist.h"
00007 #include "access/skey.h"
00008 
00009 #include "_int.h"
00010 
00011 #define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))
00012 
00013 /*
00014 ** GiST support methods
00015 */
00016 PG_FUNCTION_INFO_V1(g_int_consistent);
00017 PG_FUNCTION_INFO_V1(g_int_compress);
00018 PG_FUNCTION_INFO_V1(g_int_decompress);
00019 PG_FUNCTION_INFO_V1(g_int_penalty);
00020 PG_FUNCTION_INFO_V1(g_int_picksplit);
00021 PG_FUNCTION_INFO_V1(g_int_union);
00022 PG_FUNCTION_INFO_V1(g_int_same);
00023 
00024 Datum       g_int_consistent(PG_FUNCTION_ARGS);
00025 Datum       g_int_compress(PG_FUNCTION_ARGS);
00026 Datum       g_int_decompress(PG_FUNCTION_ARGS);
00027 Datum       g_int_penalty(PG_FUNCTION_ARGS);
00028 Datum       g_int_picksplit(PG_FUNCTION_ARGS);
00029 Datum       g_int_union(PG_FUNCTION_ARGS);
00030 Datum       g_int_same(PG_FUNCTION_ARGS);
00031 
00032 
00033 /*
00034 ** The GiST Consistent method for _intments
00035 ** Should return false if for all data items x below entry,
00036 ** the predicate x op query == FALSE, where op is the oper
00037 ** corresponding to strategy in the pg_amop table.
00038 */
00039 Datum
00040 g_int_consistent(PG_FUNCTION_ARGS)
00041 {
00042     GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
00043     ArrayType  *query = PG_GETARG_ARRAYTYPE_P_COPY(1);
00044     StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
00045 
00046     /* Oid      subtype = PG_GETARG_OID(3); */
00047     bool       *recheck = (bool *) PG_GETARG_POINTER(4);
00048     bool        retval;
00049 
00050     /* this is exact except for RTSameStrategyNumber */
00051     *recheck = (strategy == RTSameStrategyNumber);
00052 
00053     if (strategy == BooleanSearchStrategy)
00054     {
00055         retval = execconsistent((QUERYTYPE *) query,
00056                                 (ArrayType *) DatumGetPointer(entry->key),
00057                                 GIST_LEAF(entry));
00058 
00059         pfree(query);
00060         PG_RETURN_BOOL(retval);
00061     }
00062 
00063     /* sort query for fast search, key is already sorted */
00064     CHECKARRVALID(query);
00065     PREPAREARR(query);
00066 
00067     switch (strategy)
00068     {
00069         case RTOverlapStrategyNumber:
00070             retval = inner_int_overlap((ArrayType *) DatumGetPointer(entry->key),
00071                                        query);
00072             break;
00073         case RTSameStrategyNumber:
00074             if (GIST_LEAF(entry))
00075                 DirectFunctionCall3(g_int_same,
00076                                     entry->key,
00077                                     PointerGetDatum(query),
00078                                     PointerGetDatum(&retval));
00079             else
00080                 retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
00081                                             query);
00082             break;
00083         case RTContainsStrategyNumber:
00084         case RTOldContainsStrategyNumber:
00085             retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
00086                                         query);
00087             break;
00088         case RTContainedByStrategyNumber:
00089         case RTOldContainedByStrategyNumber:
00090             if (GIST_LEAF(entry))
00091                 retval = inner_int_contains(query,
00092                                   (ArrayType *) DatumGetPointer(entry->key));
00093             else
00094                 retval = inner_int_overlap((ArrayType *) DatumGetPointer(entry->key),
00095                                            query);
00096             break;
00097         default:
00098             retval = FALSE;
00099     }
00100     pfree(query);
00101     PG_RETURN_BOOL(retval);
00102 }
00103 
00104 Datum
00105 g_int_union(PG_FUNCTION_ARGS)
00106 {
00107     GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
00108     int        *size = (int *) PG_GETARG_POINTER(1);
00109     int32       i,
00110                *ptr;
00111     ArrayType  *res;
00112     int         totlen = 0;
00113 
00114     for (i = 0; i < entryvec->n; i++)
00115     {
00116         ArrayType  *ent = GETENTRY(entryvec, i);
00117 
00118         CHECKARRVALID(ent);
00119         totlen += ARRNELEMS(ent);
00120     }
00121 
00122     res = new_intArrayType(totlen);
00123     ptr = ARRPTR(res);
00124 
00125     for (i = 0; i < entryvec->n; i++)
00126     {
00127         ArrayType  *ent = GETENTRY(entryvec, i);
00128         int         nel;
00129 
00130         nel = ARRNELEMS(ent);
00131         memcpy(ptr, ARRPTR(ent), nel * sizeof(int32));
00132         ptr += nel;
00133     }
00134 
00135     QSORT(res, 1);
00136     res = _int_unique(res);
00137     *size = VARSIZE(res);
00138     PG_RETURN_POINTER(res);
00139 }
00140 
00141 /*
00142 ** GiST Compress and Decompress methods
00143 */
00144 Datum
00145 g_int_compress(PG_FUNCTION_ARGS)
00146 {
00147     GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
00148     GISTENTRY  *retval;
00149     ArrayType  *r;
00150     int         len;
00151     int        *dr;
00152     int         i,
00153                 min,
00154                 cand;
00155 
00156     if (entry->leafkey)
00157     {
00158         r = DatumGetArrayTypePCopy(entry->key);
00159         CHECKARRVALID(r);
00160         PREPAREARR(r);
00161 
00162         if (ARRNELEMS(r) >= 2 * MAXNUMRANGE)
00163             elog(NOTICE, "input array is too big (%d maximum allowed, %d current), use gist__intbig_ops opclass instead",
00164                  2 * MAXNUMRANGE - 1, ARRNELEMS(r));
00165 
00166         retval = palloc(sizeof(GISTENTRY));
00167         gistentryinit(*retval, PointerGetDatum(r),
00168                       entry->rel, entry->page, entry->offset, FALSE);
00169 
00170         PG_RETURN_POINTER(retval);
00171     }
00172 
00173     /*
00174      * leaf entries never compress one more time, only when entry->leafkey
00175      * ==true, so now we work only with internal keys
00176      */
00177 
00178     r = DatumGetArrayTypeP(entry->key);
00179     CHECKARRVALID(r);
00180     if (ARRISEMPTY(r))
00181     {
00182         if (r != (ArrayType *) DatumGetPointer(entry->key))
00183             pfree(r);
00184         PG_RETURN_POINTER(entry);
00185     }
00186 
00187     if ((len = ARRNELEMS(r)) >= 2 * MAXNUMRANGE)
00188     {                           /* compress */
00189         if (r == (ArrayType *) DatumGetPointer(entry->key))
00190             r = DatumGetArrayTypePCopy(entry->key);
00191         r = resize_intArrayType(r, 2 * (len));
00192 
00193         dr = ARRPTR(r);
00194 
00195         for (i = len - 1; i >= 0; i--)
00196             dr[2 * i] = dr[2 * i + 1] = dr[i];
00197 
00198         len *= 2;
00199         cand = 1;
00200         while (len > MAXNUMRANGE * 2)
00201         {
00202             min = 0x7fffffff;
00203             for (i = 2; i < len; i += 2)
00204                 if (min > (dr[i] - dr[i - 1]))
00205                 {
00206                     min = (dr[i] - dr[i - 1]);
00207                     cand = i;
00208                 }
00209             memmove((void *) &dr[cand - 1], (void *) &dr[cand + 1], (len - cand - 1) * sizeof(int32));
00210             len -= 2;
00211         }
00212         r = resize_intArrayType(r, len);
00213         retval = palloc(sizeof(GISTENTRY));
00214         gistentryinit(*retval, PointerGetDatum(r),
00215                       entry->rel, entry->page, entry->offset, FALSE);
00216         PG_RETURN_POINTER(retval);
00217     }
00218     else
00219         PG_RETURN_POINTER(entry);
00220 }
00221 
00222 Datum
00223 g_int_decompress(PG_FUNCTION_ARGS)
00224 {
00225     GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
00226     GISTENTRY  *retval;
00227     ArrayType  *r;
00228     int        *dr,
00229                 lenr;
00230     ArrayType  *in;
00231     int         lenin;
00232     int        *din;
00233     int         i,
00234                 j;
00235 
00236     in = DatumGetArrayTypeP(entry->key);
00237 
00238     CHECKARRVALID(in);
00239     if (ARRISEMPTY(in))
00240     {
00241         if (in != (ArrayType *) DatumGetPointer(entry->key))
00242         {
00243             retval = palloc(sizeof(GISTENTRY));
00244             gistentryinit(*retval, PointerGetDatum(in),
00245                           entry->rel, entry->page, entry->offset, FALSE);
00246             PG_RETURN_POINTER(retval);
00247         }
00248 
00249         PG_RETURN_POINTER(entry);
00250     }
00251 
00252     lenin = ARRNELEMS(in);
00253 
00254     if (lenin < 2 * MAXNUMRANGE)
00255     {                           /* not compressed value */
00256         if (in != (ArrayType *) DatumGetPointer(entry->key))
00257         {
00258             retval = palloc(sizeof(GISTENTRY));
00259             gistentryinit(*retval, PointerGetDatum(in),
00260                           entry->rel, entry->page, entry->offset, FALSE);
00261 
00262             PG_RETURN_POINTER(retval);
00263         }
00264         PG_RETURN_POINTER(entry);
00265     }
00266 
00267     din = ARRPTR(in);
00268     lenr = internal_size(din, lenin);
00269 
00270     r = new_intArrayType(lenr);
00271     dr = ARRPTR(r);
00272 
00273     for (i = 0; i < lenin; i += 2)
00274         for (j = din[i]; j <= din[i + 1]; j++)
00275             if ((!i) || *(dr - 1) != j)
00276                 *dr++ = j;
00277 
00278     if (in != (ArrayType *) DatumGetPointer(entry->key))
00279         pfree(in);
00280     retval = palloc(sizeof(GISTENTRY));
00281     gistentryinit(*retval, PointerGetDatum(r),
00282                   entry->rel, entry->page, entry->offset, FALSE);
00283 
00284     PG_RETURN_POINTER(retval);
00285 }
00286 
00287 /*
00288 ** The GiST Penalty method for _intments
00289 */
00290 Datum
00291 g_int_penalty(PG_FUNCTION_ARGS)
00292 {
00293     GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
00294     GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
00295     float      *result = (float *) PG_GETARG_POINTER(2);
00296     ArrayType  *ud;
00297     float       tmp1,
00298                 tmp2;
00299 
00300     ud = inner_int_union((ArrayType *) DatumGetPointer(origentry->key),
00301                          (ArrayType *) DatumGetPointer(newentry->key));
00302     rt__int_size(ud, &tmp1);
00303     rt__int_size((ArrayType *) DatumGetPointer(origentry->key), &tmp2);
00304     *result = tmp1 - tmp2;
00305     pfree(ud);
00306 
00307     PG_RETURN_POINTER(result);
00308 }
00309 
00310 
00311 
00312 Datum
00313 g_int_same(PG_FUNCTION_ARGS)
00314 {
00315     ArrayType  *a = PG_GETARG_ARRAYTYPE_P(0);
00316     ArrayType  *b = PG_GETARG_ARRAYTYPE_P(1);
00317     bool       *result = (bool *) PG_GETARG_POINTER(2);
00318     int32       n = ARRNELEMS(a);
00319     int32      *da,
00320                *db;
00321 
00322     CHECKARRVALID(a);
00323     CHECKARRVALID(b);
00324 
00325     if (n != ARRNELEMS(b))
00326     {
00327         *result = false;
00328         PG_RETURN_POINTER(result);
00329     }
00330     *result = TRUE;
00331     da = ARRPTR(a);
00332     db = ARRPTR(b);
00333     while (n--)
00334     {
00335         if (*da++ != *db++)
00336         {
00337             *result = FALSE;
00338             break;
00339         }
00340     }
00341 
00342     PG_RETURN_POINTER(result);
00343 }
00344 
00345 /*****************************************************************
00346 ** Common GiST Method
00347 *****************************************************************/
00348 
00349 typedef struct
00350 {
00351     OffsetNumber pos;
00352     float       cost;
00353 } SPLITCOST;
00354 
00355 static int
00356 comparecost(const void *a, const void *b)
00357 {
00358     if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
00359         return 0;
00360     else
00361         return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
00362 }
00363 
00364 /*
00365 ** The GiST PickSplit method for _intments
00366 ** We use Guttman's poly time split algorithm
00367 */
00368 Datum
00369 g_int_picksplit(PG_FUNCTION_ARGS)
00370 {
00371     GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
00372     GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
00373     OffsetNumber i,
00374                 j;
00375     ArrayType  *datum_alpha,
00376                *datum_beta;
00377     ArrayType  *datum_l,
00378                *datum_r;
00379     ArrayType  *union_d,
00380                *union_dl,
00381                *union_dr;
00382     ArrayType  *inter_d;
00383     bool        firsttime;
00384     float       size_alpha,
00385                 size_beta,
00386                 size_union,
00387                 size_inter;
00388     float       size_waste,
00389                 waste;
00390     float       size_l,
00391                 size_r;
00392     int         nbytes;
00393     OffsetNumber seed_1 = 0,
00394                 seed_2 = 0;
00395     OffsetNumber *left,
00396                *right;
00397     OffsetNumber maxoff;
00398     SPLITCOST  *costvector;
00399 
00400 #ifdef GIST_DEBUG
00401     elog(DEBUG3, "--------picksplit %d", entryvec->n);
00402 #endif
00403 
00404     maxoff = entryvec->n - 2;
00405     nbytes = (maxoff + 2) * sizeof(OffsetNumber);
00406     v->spl_left = (OffsetNumber *) palloc(nbytes);
00407     v->spl_right = (OffsetNumber *) palloc(nbytes);
00408 
00409     firsttime = true;
00410     waste = 0.0;
00411     for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
00412     {
00413         datum_alpha = GETENTRY(entryvec, i);
00414         for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
00415         {
00416             datum_beta = GETENTRY(entryvec, j);
00417 
00418             /* compute the wasted space by unioning these guys */
00419             /* size_waste = size_union - size_inter; */
00420             union_d = inner_int_union(datum_alpha, datum_beta);
00421             rt__int_size(union_d, &size_union);
00422             inter_d = inner_int_inter(datum_alpha, datum_beta);
00423             rt__int_size(inter_d, &size_inter);
00424             size_waste = size_union - size_inter;
00425 
00426             pfree(union_d);
00427 
00428             if (inter_d != (ArrayType *) NULL)
00429                 pfree(inter_d);
00430 
00431             /*
00432              * are these a more promising split that what we've already seen?
00433              */
00434 
00435             if (size_waste > waste || firsttime)
00436             {
00437                 waste = size_waste;
00438                 seed_1 = i;
00439                 seed_2 = j;
00440                 firsttime = false;
00441             }
00442         }
00443     }
00444 
00445     left = v->spl_left;
00446     v->spl_nleft = 0;
00447     right = v->spl_right;
00448     v->spl_nright = 0;
00449     if (seed_1 == 0 || seed_2 == 0)
00450     {
00451         seed_1 = 1;
00452         seed_2 = 2;
00453     }
00454 
00455     datum_alpha = GETENTRY(entryvec, seed_1);
00456     datum_l = copy_intArrayType(datum_alpha);
00457     rt__int_size(datum_l, &size_l);
00458     datum_beta = GETENTRY(entryvec, seed_2);
00459     datum_r = copy_intArrayType(datum_beta);
00460     rt__int_size(datum_r, &size_r);
00461 
00462     maxoff = OffsetNumberNext(maxoff);
00463 
00464     /*
00465      * sort entries
00466      */
00467     costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
00468     for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
00469     {
00470         costvector[i - 1].pos = i;
00471         datum_alpha = GETENTRY(entryvec, i);
00472         union_d = inner_int_union(datum_l, datum_alpha);
00473         rt__int_size(union_d, &size_alpha);
00474         pfree(union_d);
00475         union_d = inner_int_union(datum_r, datum_alpha);
00476         rt__int_size(union_d, &size_beta);
00477         pfree(union_d);
00478         costvector[i - 1].cost = Abs((size_alpha - size_l) - (size_beta - size_r));
00479     }
00480     qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
00481 
00482     /*
00483      * Now split up the regions between the two seeds.  An important property
00484      * of this split algorithm is that the split vector v has the indices of
00485      * items to be split in order in its left and right vectors.  We exploit
00486      * this property by doing a merge in the code that actually splits the
00487      * page.
00488      *
00489      * For efficiency, we also place the new index tuple in this loop. This is
00490      * handled at the very end, when we have placed all the existing tuples
00491      * and i == maxoff + 1.
00492      */
00493 
00494 
00495     for (j = 0; j < maxoff; j++)
00496     {
00497         i = costvector[j].pos;
00498 
00499         /*
00500          * If we've already decided where to place this item, just put it on
00501          * the right list.  Otherwise, we need to figure out which page needs
00502          * the least enlargement in order to store the item.
00503          */
00504 
00505         if (i == seed_1)
00506         {
00507             *left++ = i;
00508             v->spl_nleft++;
00509             continue;
00510         }
00511         else if (i == seed_2)
00512         {
00513             *right++ = i;
00514             v->spl_nright++;
00515             continue;
00516         }
00517 
00518         /* okay, which page needs least enlargement? */
00519         datum_alpha = GETENTRY(entryvec, i);
00520         union_dl = inner_int_union(datum_l, datum_alpha);
00521         union_dr = inner_int_union(datum_r, datum_alpha);
00522         rt__int_size(union_dl, &size_alpha);
00523         rt__int_size(union_dr, &size_beta);
00524 
00525         /* pick which page to add it to */
00526         if (size_alpha - size_l < size_beta - size_r + WISH_F(v->spl_nleft, v->spl_nright, 0.01))
00527         {
00528             if (datum_l)
00529                 pfree(datum_l);
00530             if (union_dr)
00531                 pfree(union_dr);
00532             datum_l = union_dl;
00533             size_l = size_alpha;
00534             *left++ = i;
00535             v->spl_nleft++;
00536         }
00537         else
00538         {
00539             if (datum_r)
00540                 pfree(datum_r);
00541             if (union_dl)
00542                 pfree(union_dl);
00543             datum_r = union_dr;
00544             size_r = size_beta;
00545             *right++ = i;
00546             v->spl_nright++;
00547         }
00548     }
00549     pfree(costvector);
00550     *right = *left = FirstOffsetNumber;
00551 
00552     v->spl_ldatum = PointerGetDatum(datum_l);
00553     v->spl_rdatum = PointerGetDatum(datum_r);
00554 
00555     PG_RETURN_POINTER(v);
00556 }