Header And Logo

PostgreSQL
| The world's most advanced open source database.

_intbig_gist.c

Go to the documentation of this file.
00001 /*
00002  * contrib/intarray/_intbig_gist.c
00003  */
00004 #include "postgres.h"
00005 
00006 #include "access/gist.h"
00007 #include "access/skey.h"
00008 
00009 #include "_int.h"
00010 
00011 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
00012 /*
00013 ** _intbig methods
00014 */
00015 PG_FUNCTION_INFO_V1(g_intbig_consistent);
00016 PG_FUNCTION_INFO_V1(g_intbig_compress);
00017 PG_FUNCTION_INFO_V1(g_intbig_decompress);
00018 PG_FUNCTION_INFO_V1(g_intbig_penalty);
00019 PG_FUNCTION_INFO_V1(g_intbig_picksplit);
00020 PG_FUNCTION_INFO_V1(g_intbig_union);
00021 PG_FUNCTION_INFO_V1(g_intbig_same);
00022 
00023 Datum       g_intbig_consistent(PG_FUNCTION_ARGS);
00024 Datum       g_intbig_compress(PG_FUNCTION_ARGS);
00025 Datum       g_intbig_decompress(PG_FUNCTION_ARGS);
00026 Datum       g_intbig_penalty(PG_FUNCTION_ARGS);
00027 Datum       g_intbig_picksplit(PG_FUNCTION_ARGS);
00028 Datum       g_intbig_union(PG_FUNCTION_ARGS);
00029 Datum       g_intbig_same(PG_FUNCTION_ARGS);
00030 
00031 /* Number of one-bits in an unsigned byte */
00032 static const uint8 number_of_ones[256] = {
00033     0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
00034     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00035     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00036     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00037     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00038     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00039     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00040     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00041     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00042     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00043     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00044     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00045     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00046     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00047     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00048     4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
00049 };
00050 
00051 PG_FUNCTION_INFO_V1(_intbig_in);
00052 Datum       _intbig_in(PG_FUNCTION_ARGS);
00053 
00054 PG_FUNCTION_INFO_V1(_intbig_out);
00055 Datum       _intbig_out(PG_FUNCTION_ARGS);
00056 
00057 
00058 Datum
00059 _intbig_in(PG_FUNCTION_ARGS)
00060 {
00061     ereport(ERROR,
00062             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
00063              errmsg("_intbig_in() not implemented")));
00064     PG_RETURN_DATUM(0);
00065 }
00066 
00067 Datum
00068 _intbig_out(PG_FUNCTION_ARGS)
00069 {
00070     ereport(ERROR,
00071             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
00072              errmsg("_intbig_out() not implemented")));
00073     PG_RETURN_DATUM(0);
00074 }
00075 
00076 
00077 /*********************************************************************
00078 ** intbig functions
00079 *********************************************************************/
00080 static bool
00081 _intbig_overlap(GISTTYPE *a, ArrayType *b)
00082 {
00083     int         num = ARRNELEMS(b);
00084     int32      *ptr = ARRPTR(b);
00085 
00086     CHECKARRVALID(b);
00087 
00088     while (num--)
00089     {
00090         if (GETBIT(GETSIGN(a), HASHVAL(*ptr)))
00091             return true;
00092         ptr++;
00093     }
00094 
00095     return false;
00096 }
00097 
00098 static bool
00099 _intbig_contains(GISTTYPE *a, ArrayType *b)
00100 {
00101     int         num = ARRNELEMS(b);
00102     int32      *ptr = ARRPTR(b);
00103 
00104     CHECKARRVALID(b);
00105 
00106     while (num--)
00107     {
00108         if (!GETBIT(GETSIGN(a), HASHVAL(*ptr)))
00109             return false;
00110         ptr++;
00111     }
00112 
00113     return true;
00114 }
00115 
00116 Datum
00117 g_intbig_same(PG_FUNCTION_ARGS)
00118 {
00119     GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
00120     GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
00121     bool       *result = (bool *) PG_GETARG_POINTER(2);
00122 
00123     if (ISALLTRUE(a) && ISALLTRUE(b))
00124         *result = true;
00125     else if (ISALLTRUE(a))
00126         *result = false;
00127     else if (ISALLTRUE(b))
00128         *result = false;
00129     else
00130     {
00131         int32       i;
00132         BITVECP     sa = GETSIGN(a),
00133                     sb = GETSIGN(b);
00134 
00135         *result = true;
00136         LOOPBYTE
00137         {
00138             if (sa[i] != sb[i])
00139             {
00140                 *result = false;
00141                 break;
00142             }
00143         }
00144     }
00145     PG_RETURN_POINTER(result);
00146 }
00147 
00148 Datum
00149 g_intbig_compress(PG_FUNCTION_ARGS)
00150 {
00151     GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
00152 
00153     if (entry->leafkey)
00154     {
00155         GISTENTRY  *retval;
00156         ArrayType  *in = DatumGetArrayTypeP(entry->key);
00157         int32      *ptr;
00158         int         num;
00159         GISTTYPE   *res = (GISTTYPE *) palloc0(CALCGTSIZE(0));
00160 
00161         CHECKARRVALID(in);
00162         if (ARRISEMPTY(in))
00163         {
00164             ptr = NULL;
00165             num = 0;
00166         }
00167         else
00168         {
00169             ptr = ARRPTR(in);
00170             num = ARRNELEMS(in);
00171         }
00172         SET_VARSIZE(res, CALCGTSIZE(0));
00173 
00174         while (num--)
00175         {
00176             HASH(GETSIGN(res), *ptr);
00177             ptr++;
00178         }
00179 
00180         retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
00181         gistentryinit(*retval, PointerGetDatum(res),
00182                       entry->rel, entry->page,
00183                       entry->offset, FALSE);
00184 
00185         if (in != DatumGetArrayTypeP(entry->key))
00186             pfree(in);
00187 
00188         PG_RETURN_POINTER(retval);
00189     }
00190     else if (!ISALLTRUE(DatumGetPointer(entry->key)))
00191     {
00192         GISTENTRY  *retval;
00193         int         i;
00194         BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
00195         GISTTYPE   *res;
00196 
00197         LOOPBYTE
00198         {
00199             if ((sign[i] & 0xff) != 0xff)
00200                 PG_RETURN_POINTER(entry);
00201         }
00202 
00203         res = (GISTTYPE *) palloc(CALCGTSIZE(ALLISTRUE));
00204         SET_VARSIZE(res, CALCGTSIZE(ALLISTRUE));
00205         res->flag = ALLISTRUE;
00206 
00207         retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
00208         gistentryinit(*retval, PointerGetDatum(res),
00209                       entry->rel, entry->page,
00210                       entry->offset, FALSE);
00211 
00212         PG_RETURN_POINTER(retval);
00213     }
00214 
00215     PG_RETURN_POINTER(entry);
00216 }
00217 
00218 
00219 static int32
00220 sizebitvec(BITVECP sign)
00221 {
00222     int32       size = 0,
00223                 i;
00224 
00225     LOOPBYTE
00226         size += number_of_ones[(unsigned char) sign[i]];
00227     return size;
00228 }
00229 
00230 static int
00231 hemdistsign(BITVECP a, BITVECP b)
00232 {
00233     int         i,
00234                 diff,
00235                 dist = 0;
00236 
00237     LOOPBYTE
00238     {
00239         diff = (unsigned char) (a[i] ^ b[i]);
00240         dist += number_of_ones[diff];
00241     }
00242     return dist;
00243 }
00244 
00245 static int
00246 hemdist(GISTTYPE *a, GISTTYPE *b)
00247 {
00248     if (ISALLTRUE(a))
00249     {
00250         if (ISALLTRUE(b))
00251             return 0;
00252         else
00253             return SIGLENBIT - sizebitvec(GETSIGN(b));
00254     }
00255     else if (ISALLTRUE(b))
00256         return SIGLENBIT - sizebitvec(GETSIGN(a));
00257 
00258     return hemdistsign(GETSIGN(a), GETSIGN(b));
00259 }
00260 
00261 Datum
00262 g_intbig_decompress(PG_FUNCTION_ARGS)
00263 {
00264     PG_RETURN_DATUM(PG_GETARG_DATUM(0));
00265 }
00266 
00267 static int32
00268 unionkey(BITVECP sbase, GISTTYPE *add)
00269 {
00270     int32       i;
00271     BITVECP     sadd = GETSIGN(add);
00272 
00273     if (ISALLTRUE(add))
00274         return 1;
00275     LOOPBYTE
00276         sbase[i] |= sadd[i];
00277     return 0;
00278 }
00279 
00280 Datum
00281 g_intbig_union(PG_FUNCTION_ARGS)
00282 {
00283     GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
00284     int        *size = (int *) PG_GETARG_POINTER(1);
00285     BITVEC      base;
00286     int32       i,
00287                 len;
00288     int32       flag = 0;
00289     GISTTYPE   *result;
00290 
00291     MemSet((void *) base, 0, sizeof(BITVEC));
00292     for (i = 0; i < entryvec->n; i++)
00293     {
00294         if (unionkey(base, GETENTRY(entryvec, i)))
00295         {
00296             flag = ALLISTRUE;
00297             break;
00298         }
00299     }
00300 
00301     len = CALCGTSIZE(flag);
00302     result = (GISTTYPE *) palloc(len);
00303     SET_VARSIZE(result, len);
00304     result->flag = flag;
00305     if (!ISALLTRUE(result))
00306         memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
00307     *size = len;
00308 
00309     PG_RETURN_POINTER(result);
00310 }
00311 
00312 Datum
00313 g_intbig_penalty(PG_FUNCTION_ARGS)
00314 {
00315     GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
00316     GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
00317     float      *penalty = (float *) PG_GETARG_POINTER(2);
00318     GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
00319     GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
00320 
00321     *penalty = hemdist(origval, newval);
00322     PG_RETURN_POINTER(penalty);
00323 }
00324 
00325 
00326 typedef struct
00327 {
00328     OffsetNumber pos;
00329     int32       cost;
00330 } SPLITCOST;
00331 
00332 static int
00333 comparecost(const void *a, const void *b)
00334 {
00335     return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
00336 }
00337 
00338 
00339 Datum
00340 g_intbig_picksplit(PG_FUNCTION_ARGS)
00341 {
00342     GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
00343     GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
00344     OffsetNumber k,
00345                 j;
00346     GISTTYPE   *datum_l,
00347                *datum_r;
00348     BITVECP     union_l,
00349                 union_r;
00350     int32       size_alpha,
00351                 size_beta;
00352     int32       size_waste,
00353                 waste = -1;
00354     int32       nbytes;
00355     OffsetNumber seed_1 = 0,
00356                 seed_2 = 0;
00357     OffsetNumber *left,
00358                *right;
00359     OffsetNumber maxoff;
00360     BITVECP     ptr;
00361     int         i;
00362     SPLITCOST  *costvector;
00363     GISTTYPE   *_k,
00364                *_j;
00365 
00366     maxoff = entryvec->n - 2;
00367     nbytes = (maxoff + 2) * sizeof(OffsetNumber);
00368     v->spl_left = (OffsetNumber *) palloc(nbytes);
00369     v->spl_right = (OffsetNumber *) palloc(nbytes);
00370 
00371     for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
00372     {
00373         _k = GETENTRY(entryvec, k);
00374         for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
00375         {
00376             size_waste = hemdist(_k, GETENTRY(entryvec, j));
00377             if (size_waste > waste)
00378             {
00379                 waste = size_waste;
00380                 seed_1 = k;
00381                 seed_2 = j;
00382             }
00383         }
00384     }
00385 
00386     left = v->spl_left;
00387     v->spl_nleft = 0;
00388     right = v->spl_right;
00389     v->spl_nright = 0;
00390 
00391     if (seed_1 == 0 || seed_2 == 0)
00392     {
00393         seed_1 = 1;
00394         seed_2 = 2;
00395     }
00396 
00397     /* form initial .. */
00398     if (ISALLTRUE(GETENTRY(entryvec, seed_1)))
00399     {
00400         datum_l = (GISTTYPE *) palloc(GTHDRSIZE);
00401         SET_VARSIZE(datum_l, GTHDRSIZE);
00402         datum_l->flag = ALLISTRUE;
00403     }
00404     else
00405     {
00406         datum_l = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
00407         SET_VARSIZE(datum_l, GTHDRSIZE + SIGLEN);
00408         datum_l->flag = 0;
00409         memcpy((void *) GETSIGN(datum_l), (void *) GETSIGN(GETENTRY(entryvec, seed_1)), sizeof(BITVEC));
00410     }
00411     if (ISALLTRUE(GETENTRY(entryvec, seed_2)))
00412     {
00413         datum_r = (GISTTYPE *) palloc(GTHDRSIZE);
00414         SET_VARSIZE(datum_r, GTHDRSIZE);
00415         datum_r->flag = ALLISTRUE;
00416     }
00417     else
00418     {
00419         datum_r = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
00420         SET_VARSIZE(datum_r, GTHDRSIZE + SIGLEN);
00421         datum_r->flag = 0;
00422         memcpy((void *) GETSIGN(datum_r), (void *) GETSIGN(GETENTRY(entryvec, seed_2)), sizeof(BITVEC));
00423     }
00424 
00425     maxoff = OffsetNumberNext(maxoff);
00426     /* sort before ... */
00427     costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
00428     for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
00429     {
00430         costvector[j - 1].pos = j;
00431         _j = GETENTRY(entryvec, j);
00432         size_alpha = hemdist(datum_l, _j);
00433         size_beta = hemdist(datum_r, _j);
00434         costvector[j - 1].cost = Abs(size_alpha - size_beta);
00435     }
00436     qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
00437 
00438     union_l = GETSIGN(datum_l);
00439     union_r = GETSIGN(datum_r);
00440 
00441     for (k = 0; k < maxoff; k++)
00442     {
00443         j = costvector[k].pos;
00444         if (j == seed_1)
00445         {
00446             *left++ = j;
00447             v->spl_nleft++;
00448             continue;
00449         }
00450         else if (j == seed_2)
00451         {
00452             *right++ = j;
00453             v->spl_nright++;
00454             continue;
00455         }
00456         _j = GETENTRY(entryvec, j);
00457         size_alpha = hemdist(datum_l, _j);
00458         size_beta = hemdist(datum_r, _j);
00459 
00460         if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
00461         {
00462             if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
00463             {
00464                 if (!ISALLTRUE(datum_l))
00465                     MemSet((void *) union_l, 0xff, sizeof(BITVEC));
00466             }
00467             else
00468             {
00469                 ptr = GETSIGN(_j);
00470                 LOOPBYTE
00471                     union_l[i] |= ptr[i];
00472             }
00473             *left++ = j;
00474             v->spl_nleft++;
00475         }
00476         else
00477         {
00478             if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
00479             {
00480                 if (!ISALLTRUE(datum_r))
00481                     MemSet((void *) union_r, 0xff, sizeof(BITVEC));
00482             }
00483             else
00484             {
00485                 ptr = GETSIGN(_j);
00486                 LOOPBYTE
00487                     union_r[i] |= ptr[i];
00488             }
00489             *right++ = j;
00490             v->spl_nright++;
00491         }
00492     }
00493 
00494     *right = *left = FirstOffsetNumber;
00495     pfree(costvector);
00496 
00497     v->spl_ldatum = PointerGetDatum(datum_l);
00498     v->spl_rdatum = PointerGetDatum(datum_r);
00499 
00500     PG_RETURN_POINTER(v);
00501 }
00502 
00503 Datum
00504 g_intbig_consistent(PG_FUNCTION_ARGS)
00505 {
00506     GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
00507     ArrayType  *query = PG_GETARG_ARRAYTYPE_P(1);
00508     StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
00509 
00510     /* Oid      subtype = PG_GETARG_OID(3); */
00511     bool       *recheck = (bool *) PG_GETARG_POINTER(4);
00512     bool        retval;
00513 
00514     /* All cases served by this function are inexact */
00515     *recheck = true;
00516 
00517     if (ISALLTRUE(DatumGetPointer(entry->key)))
00518         PG_RETURN_BOOL(true);
00519 
00520     if (strategy == BooleanSearchStrategy)
00521     {
00522         retval = signconsistent((QUERYTYPE *) query,
00523                                 GETSIGN(DatumGetPointer(entry->key)),
00524                                 false);
00525         PG_FREE_IF_COPY(query, 1);
00526         PG_RETURN_BOOL(retval);
00527     }
00528 
00529     CHECKARRVALID(query);
00530 
00531     switch (strategy)
00532     {
00533         case RTOverlapStrategyNumber:
00534             retval = _intbig_overlap((GISTTYPE *) DatumGetPointer(entry->key), query);
00535             break;
00536         case RTSameStrategyNumber:
00537             if (GIST_LEAF(entry))
00538             {
00539                 int         i,
00540                             num = ARRNELEMS(query);
00541                 int32      *ptr = ARRPTR(query);
00542                 BITVEC      qp;
00543                 BITVECP     dq,
00544                             de;
00545 
00546                 memset(qp, 0, sizeof(BITVEC));
00547 
00548                 while (num--)
00549                 {
00550                     HASH(qp, *ptr);
00551                     ptr++;
00552                 }
00553 
00554                 de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
00555                 dq = qp;
00556                 retval = true;
00557                 LOOPBYTE
00558                 {
00559                     if (de[i] != dq[i])
00560                     {
00561                         retval = false;
00562                         break;
00563                     }
00564                 }
00565 
00566             }
00567             else
00568                 retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key), query);
00569             break;
00570         case RTContainsStrategyNumber:
00571         case RTOldContainsStrategyNumber:
00572             retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key), query);
00573             break;
00574         case RTContainedByStrategyNumber:
00575         case RTOldContainedByStrategyNumber:
00576             if (GIST_LEAF(entry))
00577             {
00578                 int         i,
00579                             num = ARRNELEMS(query);
00580                 int32      *ptr = ARRPTR(query);
00581                 BITVEC      qp;
00582                 BITVECP     dq,
00583                             de;
00584 
00585                 memset(qp, 0, sizeof(BITVEC));
00586 
00587                 while (num--)
00588                 {
00589                     HASH(qp, *ptr);
00590                     ptr++;
00591                 }
00592 
00593                 de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
00594                 dq = qp;
00595                 retval = true;
00596                 LOOPBYTE
00597                 {
00598                     if (de[i] & ~dq[i])
00599                     {
00600                         retval = false;
00601                         break;
00602                     }
00603                 }
00604             }
00605             else
00606                 retval = _intbig_overlap((GISTTYPE *) DatumGetPointer(entry->key), query);
00607             break;
00608         default:
00609             retval = FALSE;
00610     }
00611     PG_FREE_IF_COPY(query, 1);
00612     PG_RETURN_BOOL(retval);
00613 }