Header And Logo

PostgreSQL
| The world's most advanced open source database.

hstore_gist.c

Go to the documentation of this file.
00001 /*
00002  * contrib/hstore/hstore_gist.c
00003  */
00004 #include "postgres.h"
00005 
00006 #include "access/gist.h"
00007 #include "access/skey.h"
00008 #include "catalog/pg_type.h"
00009 
00010 #include "crc32.h"
00011 #include "hstore.h"
00012 
00013 /* bigint defines */
00014 #define BITBYTE 8
00015 #define SIGLENINT  4            /* >122 => key will toast, so very slow!!! */
00016 #define SIGLEN  ( sizeof(int)*SIGLENINT )
00017 #define SIGLENBIT (SIGLEN*BITBYTE)
00018 
00019 typedef char BITVEC[SIGLEN];
00020 typedef char *BITVECP;
00021 
00022 #define SIGPTR(x)  ( (BITVECP) ARR_DATA_PTR(x) )
00023 
00024 
00025 #define LOOPBYTE \
00026             for(i=0;i<SIGLEN;i++)
00027 
00028 #define LOOPBIT \
00029             for(i=0;i<SIGLENBIT;i++)
00030 
00031 /* beware of multiple evaluation of arguments to these macros! */
00032 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
00033 #define GETBITBYTE(x,i) ( (*((char*)(x)) >> (i)) & 0x01 )
00034 #define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
00035 #define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
00036 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
00037 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
00038 #define HASH(sign, val) SETBIT((sign), HASHVAL(val))
00039 
00040 typedef struct
00041 {
00042     int32       vl_len_;        /* varlena header (do not touch directly!) */
00043     int32       flag;
00044     char        data[1];
00045 } GISTTYPE;
00046 
00047 #define ALLISTRUE       0x04
00048 
00049 #define ISALLTRUE(x)    ( ((GISTTYPE*)x)->flag & ALLISTRUE )
00050 
00051 #define GTHDRSIZE       (VARHDRSZ + sizeof(int32))
00052 #define CALCGTSIZE(flag) ( GTHDRSIZE+(((flag) & ALLISTRUE) ? 0 : SIGLEN) )
00053 
00054 #define GETSIGN(x)      ( (BITVECP)( (char*)x+GTHDRSIZE ) )
00055 
00056 #define SUMBIT(val) (       \
00057     GETBITBYTE((val),0) + \
00058     GETBITBYTE((val),1) + \
00059     GETBITBYTE((val),2) + \
00060     GETBITBYTE((val),3) + \
00061     GETBITBYTE((val),4) + \
00062     GETBITBYTE((val),5) + \
00063     GETBITBYTE((val),6) + \
00064     GETBITBYTE((val),7)   \
00065 )
00066 
00067 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
00068 
00069 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
00070 
00071 PG_FUNCTION_INFO_V1(ghstore_in);
00072 Datum       ghstore_in(PG_FUNCTION_ARGS);
00073 
00074 PG_FUNCTION_INFO_V1(ghstore_out);
00075 Datum       ghstore_out(PG_FUNCTION_ARGS);
00076 
00077 
00078 Datum
00079 ghstore_in(PG_FUNCTION_ARGS)
00080 {
00081     elog(ERROR, "Not implemented");
00082     PG_RETURN_DATUM(0);
00083 }
00084 
00085 Datum
00086 ghstore_out(PG_FUNCTION_ARGS)
00087 {
00088     elog(ERROR, "Not implemented");
00089     PG_RETURN_DATUM(0);
00090 }
00091 
00092 PG_FUNCTION_INFO_V1(ghstore_consistent);
00093 PG_FUNCTION_INFO_V1(ghstore_compress);
00094 PG_FUNCTION_INFO_V1(ghstore_decompress);
00095 PG_FUNCTION_INFO_V1(ghstore_penalty);
00096 PG_FUNCTION_INFO_V1(ghstore_picksplit);
00097 PG_FUNCTION_INFO_V1(ghstore_union);
00098 PG_FUNCTION_INFO_V1(ghstore_same);
00099 
00100 Datum       ghstore_consistent(PG_FUNCTION_ARGS);
00101 Datum       ghstore_compress(PG_FUNCTION_ARGS);
00102 Datum       ghstore_decompress(PG_FUNCTION_ARGS);
00103 Datum       ghstore_penalty(PG_FUNCTION_ARGS);
00104 Datum       ghstore_picksplit(PG_FUNCTION_ARGS);
00105 Datum       ghstore_union(PG_FUNCTION_ARGS);
00106 Datum       ghstore_same(PG_FUNCTION_ARGS);
00107 
00108 Datum
00109 ghstore_compress(PG_FUNCTION_ARGS)
00110 {
00111     GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
00112     GISTENTRY  *retval = entry;
00113 
00114     if (entry->leafkey)
00115     {
00116         GISTTYPE   *res = (GISTTYPE *) palloc0(CALCGTSIZE(0));
00117         HStore     *val = DatumGetHStoreP(entry->key);
00118         HEntry     *hsent = ARRPTR(val);
00119         char       *ptr = STRPTR(val);
00120         int         count = HS_COUNT(val);
00121         int         i;
00122 
00123         SET_VARSIZE(res, CALCGTSIZE(0));
00124 
00125         for (i = 0; i < count; ++i)
00126         {
00127             int         h;
00128 
00129             h = crc32_sz((char *) HS_KEY(hsent, ptr, i), HS_KEYLEN(hsent, i));
00130             HASH(GETSIGN(res), h);
00131             if (!HS_VALISNULL(hsent, i))
00132             {
00133                 h = crc32_sz((char *) HS_VAL(hsent, ptr, i), HS_VALLEN(hsent, i));
00134                 HASH(GETSIGN(res), h);
00135             }
00136         }
00137 
00138         retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
00139         gistentryinit(*retval, PointerGetDatum(res),
00140                       entry->rel, entry->page,
00141                       entry->offset,
00142                       FALSE);
00143     }
00144     else if (!ISALLTRUE(DatumGetPointer(entry->key)))
00145     {
00146         int32       i;
00147         GISTTYPE   *res;
00148         BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
00149 
00150         LOOPBYTE
00151         {
00152             if ((sign[i] & 0xff) != 0xff)
00153                 PG_RETURN_POINTER(retval);
00154         }
00155 
00156         res = (GISTTYPE *) palloc(CALCGTSIZE(ALLISTRUE));
00157         SET_VARSIZE(res, CALCGTSIZE(ALLISTRUE));
00158         res->flag = ALLISTRUE;
00159 
00160         retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
00161         gistentryinit(*retval, PointerGetDatum(res),
00162                       entry->rel, entry->page,
00163                       entry->offset,
00164                       FALSE);
00165     }
00166 
00167     PG_RETURN_POINTER(retval);
00168 }
00169 
00170 /*
00171  * Since type ghstore isn't toastable (and doesn't need to be),
00172  * this function can be a no-op.
00173  */
00174 Datum
00175 ghstore_decompress(PG_FUNCTION_ARGS)
00176 {
00177     PG_RETURN_POINTER(PG_GETARG_POINTER(0));
00178 }
00179 
00180 Datum
00181 ghstore_same(PG_FUNCTION_ARGS)
00182 {
00183     GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
00184     GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
00185     bool       *result = (bool *) PG_GETARG_POINTER(2);
00186 
00187     if (ISALLTRUE(a) && ISALLTRUE(b))
00188         *result = true;
00189     else if (ISALLTRUE(a))
00190         *result = false;
00191     else if (ISALLTRUE(b))
00192         *result = false;
00193     else
00194     {
00195         int32       i;
00196         BITVECP     sa = GETSIGN(a),
00197                     sb = GETSIGN(b);
00198 
00199         *result = true;
00200         LOOPBYTE
00201         {
00202             if (sa[i] != sb[i])
00203             {
00204                 *result = false;
00205                 break;
00206             }
00207         }
00208     }
00209     PG_RETURN_POINTER(result);
00210 }
00211 
00212 static int32
00213 sizebitvec(BITVECP sign)
00214 {
00215     int32       size = 0,
00216                 i;
00217 
00218     LOOPBYTE
00219     {
00220         size += SUMBIT(sign);
00221         sign = (BITVECP) (((char *) sign) + 1);
00222     }
00223     return size;
00224 }
00225 
00226 static int
00227 hemdistsign(BITVECP a, BITVECP b)
00228 {
00229     int         i,
00230                 dist = 0;
00231 
00232     LOOPBIT
00233     {
00234         if (GETBIT(a, i) != GETBIT(b, i))
00235             dist++;
00236     }
00237     return dist;
00238 }
00239 
00240 static int
00241 hemdist(GISTTYPE *a, GISTTYPE *b)
00242 {
00243     if (ISALLTRUE(a))
00244     {
00245         if (ISALLTRUE(b))
00246             return 0;
00247         else
00248             return SIGLENBIT - sizebitvec(GETSIGN(b));
00249     }
00250     else if (ISALLTRUE(b))
00251         return SIGLENBIT - sizebitvec(GETSIGN(a));
00252 
00253     return hemdistsign(GETSIGN(a), GETSIGN(b));
00254 }
00255 
00256 static int32
00257 unionkey(BITVECP sbase, GISTTYPE *add)
00258 {
00259     int32       i;
00260     BITVECP     sadd = GETSIGN(add);
00261 
00262     if (ISALLTRUE(add))
00263         return 1;
00264     LOOPBYTE
00265         sbase[i] |= sadd[i];
00266     return 0;
00267 }
00268 
00269 Datum
00270 ghstore_union(PG_FUNCTION_ARGS)
00271 {
00272     GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
00273     int32       len = entryvec->n;
00274 
00275     int        *size = (int *) PG_GETARG_POINTER(1);
00276     BITVEC      base;
00277     int32       i;
00278     int32       flag = 0;
00279     GISTTYPE   *result;
00280 
00281     MemSet((void *) base, 0, sizeof(BITVEC));
00282     for (i = 0; i < len; i++)
00283     {
00284         if (unionkey(base, GETENTRY(entryvec, i)))
00285         {
00286             flag = ALLISTRUE;
00287             break;
00288         }
00289     }
00290 
00291     len = CALCGTSIZE(flag);
00292     result = (GISTTYPE *) palloc(len);
00293     SET_VARSIZE(result, len);
00294     result->flag = flag;
00295     if (!ISALLTRUE(result))
00296         memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
00297     *size = len;
00298 
00299     PG_RETURN_POINTER(result);
00300 }
00301 
00302 Datum
00303 ghstore_penalty(PG_FUNCTION_ARGS)
00304 {
00305     GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
00306     GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
00307     float      *penalty = (float *) PG_GETARG_POINTER(2);
00308     GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
00309     GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
00310 
00311     *penalty = hemdist(origval, newval);
00312     PG_RETURN_POINTER(penalty);
00313 }
00314 
00315 
00316 typedef struct
00317 {
00318     OffsetNumber pos;
00319     int32       cost;
00320 } SPLITCOST;
00321 
00322 static int
00323 comparecost(const void *a, const void *b)
00324 {
00325     return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
00326 }
00327 
00328 
00329 Datum
00330 ghstore_picksplit(PG_FUNCTION_ARGS)
00331 {
00332     GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
00333     OffsetNumber maxoff = entryvec->n - 2;
00334 
00335     GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
00336     OffsetNumber k,
00337                 j;
00338     GISTTYPE   *datum_l,
00339                *datum_r;
00340     BITVECP     union_l,
00341                 union_r;
00342     int32       size_alpha,
00343                 size_beta;
00344     int32       size_waste,
00345                 waste = -1;
00346     int32       nbytes;
00347     OffsetNumber seed_1 = 0,
00348                 seed_2 = 0;
00349     OffsetNumber *left,
00350                *right;
00351     BITVECP     ptr;
00352     int         i;
00353     SPLITCOST  *costvector;
00354     GISTTYPE   *_k,
00355                *_j;
00356 
00357     nbytes = (maxoff + 2) * sizeof(OffsetNumber);
00358     v->spl_left = (OffsetNumber *) palloc(nbytes);
00359     v->spl_right = (OffsetNumber *) palloc(nbytes);
00360 
00361     for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
00362     {
00363         _k = GETENTRY(entryvec, k);
00364         for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
00365         {
00366             size_waste = hemdist(_k, GETENTRY(entryvec, j));
00367             if (size_waste > waste)
00368             {
00369                 waste = size_waste;
00370                 seed_1 = k;
00371                 seed_2 = j;
00372             }
00373         }
00374     }
00375 
00376     left = v->spl_left;
00377     v->spl_nleft = 0;
00378     right = v->spl_right;
00379     v->spl_nright = 0;
00380 
00381     if (seed_1 == 0 || seed_2 == 0)
00382     {
00383         seed_1 = 1;
00384         seed_2 = 2;
00385     }
00386 
00387     /* form initial .. */
00388     if (ISALLTRUE(GETENTRY(entryvec, seed_1)))
00389     {
00390         datum_l = (GISTTYPE *) palloc(GTHDRSIZE);
00391         SET_VARSIZE(datum_l, GTHDRSIZE);
00392         datum_l->flag = ALLISTRUE;
00393     }
00394     else
00395     {
00396         datum_l = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
00397         SET_VARSIZE(datum_l, GTHDRSIZE + SIGLEN);
00398         datum_l->flag = 0;
00399         memcpy((void *) GETSIGN(datum_l), (void *) GETSIGN(GETENTRY(entryvec, seed_1)), sizeof(BITVEC))
00400             ;
00401     }
00402     if (ISALLTRUE(GETENTRY(entryvec, seed_2)))
00403     {
00404         datum_r = (GISTTYPE *) palloc(GTHDRSIZE);
00405         SET_VARSIZE(datum_r, GTHDRSIZE);
00406         datum_r->flag = ALLISTRUE;
00407     }
00408     else
00409     {
00410         datum_r = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
00411         SET_VARSIZE(datum_r, GTHDRSIZE + SIGLEN);
00412         datum_r->flag = 0;
00413         memcpy((void *) GETSIGN(datum_r), (void *) GETSIGN(GETENTRY(entryvec, seed_2)), sizeof(BITVEC));
00414     }
00415 
00416     maxoff = OffsetNumberNext(maxoff);
00417     /* sort before ... */
00418     costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
00419     for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
00420     {
00421         costvector[j - 1].pos = j;
00422         _j = GETENTRY(entryvec, j);
00423         size_alpha = hemdist(datum_l, _j);
00424         size_beta = hemdist(datum_r, _j);
00425         costvector[j - 1].cost = abs(size_alpha - size_beta);
00426     }
00427     qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
00428 
00429     union_l = GETSIGN(datum_l);
00430     union_r = GETSIGN(datum_r);
00431 
00432     for (k = 0; k < maxoff; k++)
00433     {
00434         j = costvector[k].pos;
00435         if (j == seed_1)
00436         {
00437             *left++ = j;
00438             v->spl_nleft++;
00439             continue;
00440         }
00441         else if (j == seed_2)
00442         {
00443             *right++ = j;
00444             v->spl_nright++;
00445             continue;
00446         }
00447         _j = GETENTRY(entryvec, j);
00448         size_alpha = hemdist(datum_l, _j);
00449         size_beta = hemdist(datum_r, _j);
00450 
00451         if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.0001))
00452         {
00453             if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
00454             {
00455                 if (!ISALLTRUE(datum_l))
00456                     MemSet((void *) union_l, 0xff, sizeof(BITVEC));
00457             }
00458             else
00459             {
00460                 ptr = GETSIGN(_j);
00461                 LOOPBYTE
00462                     union_l[i] |= ptr[i];
00463             }
00464             *left++ = j;
00465             v->spl_nleft++;
00466         }
00467         else
00468         {
00469             if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
00470             {
00471                 if (!ISALLTRUE(datum_r))
00472                     MemSet((void *) union_r, 0xff, sizeof(BITVEC));
00473             }
00474             else
00475             {
00476                 ptr = GETSIGN(_j);
00477                 LOOPBYTE
00478                     union_r[i] |= ptr[i];
00479             }
00480             *right++ = j;
00481             v->spl_nright++;
00482         }
00483     }
00484 
00485     *right = *left = FirstOffsetNumber;
00486 
00487     v->spl_ldatum = PointerGetDatum(datum_l);
00488     v->spl_rdatum = PointerGetDatum(datum_r);
00489 
00490     PG_RETURN_POINTER(v);
00491 }
00492 
00493 
00494 Datum
00495 ghstore_consistent(PG_FUNCTION_ARGS)
00496 {
00497     GISTTYPE   *entry = (GISTTYPE *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
00498     StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
00499 
00500     /* Oid      subtype = PG_GETARG_OID(3); */
00501     bool       *recheck = (bool *) PG_GETARG_POINTER(4);
00502     bool        res = true;
00503     BITVECP     sign;
00504 
00505     /* All cases served by this function are inexact */
00506     *recheck = true;
00507 
00508     if (ISALLTRUE(entry))
00509         PG_RETURN_BOOL(true);
00510 
00511     sign = GETSIGN(entry);
00512 
00513     if (strategy == HStoreContainsStrategyNumber ||
00514         strategy == HStoreOldContainsStrategyNumber)
00515     {
00516         HStore     *query = PG_GETARG_HS(1);
00517         HEntry     *qe = ARRPTR(query);
00518         char       *qv = STRPTR(query);
00519         int         count = HS_COUNT(query);
00520         int         i;
00521 
00522         for (i = 0; res && i < count; ++i)
00523         {
00524             int         crc = crc32_sz((char *) HS_KEY(qe, qv, i), HS_KEYLEN(qe, i));
00525 
00526             if (GETBIT(sign, HASHVAL(crc)))
00527             {
00528                 if (!HS_VALISNULL(qe, i))
00529                 {
00530                     crc = crc32_sz((char *) HS_VAL(qe, qv, i), HS_VALLEN(qe, i));
00531                     if (!GETBIT(sign, HASHVAL(crc)))
00532                         res = false;
00533                 }
00534             }
00535             else
00536                 res = false;
00537         }
00538     }
00539     else if (strategy == HStoreExistsStrategyNumber)
00540     {
00541         text       *query = PG_GETARG_TEXT_PP(1);
00542         int         crc = crc32_sz(VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query));
00543 
00544         res = (GETBIT(sign, HASHVAL(crc))) ? true : false;
00545     }
00546     else if (strategy == HStoreExistsAllStrategyNumber)
00547     {
00548         ArrayType  *query = PG_GETARG_ARRAYTYPE_P(1);
00549         Datum      *key_datums;
00550         bool       *key_nulls;
00551         int         key_count;
00552         int         i;
00553 
00554         deconstruct_array(query,
00555                           TEXTOID, -1, false, 'i',
00556                           &key_datums, &key_nulls, &key_count);
00557 
00558         for (i = 0; res && i < key_count; ++i)
00559         {
00560             int         crc;
00561 
00562             if (key_nulls[i])
00563                 continue;
00564             crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ);
00565             if (!(GETBIT(sign, HASHVAL(crc))))
00566                 res = FALSE;
00567         }
00568     }
00569     else if (strategy == HStoreExistsAnyStrategyNumber)
00570     {
00571         ArrayType  *query = PG_GETARG_ARRAYTYPE_P(1);
00572         Datum      *key_datums;
00573         bool       *key_nulls;
00574         int         key_count;
00575         int         i;
00576 
00577         deconstruct_array(query,
00578                           TEXTOID, -1, false, 'i',
00579                           &key_datums, &key_nulls, &key_count);
00580 
00581         res = FALSE;
00582 
00583         for (i = 0; !res && i < key_count; ++i)
00584         {
00585             int         crc;
00586 
00587             if (key_nulls[i])
00588                 continue;
00589             crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ);
00590             if (GETBIT(sign, HASHVAL(crc)))
00591                 res = TRUE;
00592         }
00593     }
00594     else
00595         elog(ERROR, "Unsupported strategy number: %d", strategy);
00596 
00597     PG_RETURN_BOOL(res);
00598 }