Header And Logo

PostgreSQL
| The world's most advanced open source database.

_ltree_gist.c

Go to the documentation of this file.
00001 /*
00002  * contrib/ltree/_ltree_gist.c
00003  *
00004  *
00005  * GiST support for ltree[]
00006  * Teodor Sigaev <[email protected]>
00007  */
00008 #include "postgres.h"
00009 
00010 #include "access/gist.h"
00011 #include "access/skey.h"
00012 #include "crc32.h"
00013 #include "ltree.h"
00014 
00015 
00016 PG_FUNCTION_INFO_V1(_ltree_compress);
00017 Datum       _ltree_compress(PG_FUNCTION_ARGS);
00018 
00019 PG_FUNCTION_INFO_V1(_ltree_same);
00020 Datum       _ltree_same(PG_FUNCTION_ARGS);
00021 
00022 PG_FUNCTION_INFO_V1(_ltree_union);
00023 Datum       _ltree_union(PG_FUNCTION_ARGS);
00024 
00025 PG_FUNCTION_INFO_V1(_ltree_penalty);
00026 Datum       _ltree_penalty(PG_FUNCTION_ARGS);
00027 
00028 PG_FUNCTION_INFO_V1(_ltree_picksplit);
00029 Datum       _ltree_picksplit(PG_FUNCTION_ARGS);
00030 
00031 PG_FUNCTION_INFO_V1(_ltree_consistent);
00032 Datum       _ltree_consistent(PG_FUNCTION_ARGS);
00033 
00034 #define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key))
00035 #define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
00036 
00037 /* Number of one-bits in an unsigned byte */
00038 static const uint8 number_of_ones[256] = {
00039     0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
00040     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00041     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00042     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00043     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00044     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00045     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00046     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00047     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00048     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00049     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00050     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00051     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00052     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00053     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00054     4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
00055 };
00056 
00057 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
00058 
00059 
00060 static void
00061 hashing(BITVECP sign, ltree *t)
00062 {
00063     int         tlen = t->numlevel;
00064     ltree_level *cur = LTREE_FIRST(t);
00065     int         hash;
00066 
00067     while (tlen > 0)
00068     {
00069         hash = ltree_crc32_sz(cur->name, cur->len);
00070         AHASH(sign, hash);
00071         cur = LEVEL_NEXT(cur);
00072         tlen--;
00073     }
00074 }
00075 
00076 Datum
00077 _ltree_compress(PG_FUNCTION_ARGS)
00078 {
00079     GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
00080     GISTENTRY  *retval = entry;
00081 
00082     if (entry->leafkey)
00083     {                           /* ltree */
00084         ltree_gist *key;
00085         ArrayType  *val = DatumGetArrayTypeP(entry->key);
00086         int32       len = LTG_HDRSIZE + ASIGLEN;
00087         int         num = ArrayGetNItems(ARR_NDIM(val), ARR_DIMS(val));
00088         ltree      *item = (ltree *) ARR_DATA_PTR(val);
00089 
00090         if (ARR_NDIM(val) > 1)
00091             ereport(ERROR,
00092                     (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
00093                      errmsg("array must be one-dimensional")));
00094         if (array_contains_nulls(val))
00095             ereport(ERROR,
00096                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
00097                      errmsg("array must not contain nulls")));
00098 
00099         key = (ltree_gist *) palloc(len);
00100         SET_VARSIZE(key, len);
00101         key->flag = 0;
00102 
00103         MemSet(LTG_SIGN(key), 0, ASIGLEN);
00104         while (num > 0)
00105         {
00106             hashing(LTG_SIGN(key), item);
00107             num--;
00108             item = NEXTVAL(item);
00109         }
00110 
00111         retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
00112         gistentryinit(*retval, PointerGetDatum(key),
00113                       entry->rel, entry->page,
00114                       entry->offset, FALSE);
00115     }
00116     else if (!LTG_ISALLTRUE(entry->key))
00117     {
00118         int32       i,
00119                     len;
00120         ltree_gist *key;
00121 
00122         BITVECP     sign = LTG_SIGN(DatumGetPointer(entry->key));
00123 
00124         ALOOPBYTE
00125         {
00126             if ((sign[i] & 0xff) != 0xff)
00127                 PG_RETURN_POINTER(retval);
00128         }
00129         len = LTG_HDRSIZE;
00130         key = (ltree_gist *) palloc(len);
00131         SET_VARSIZE(key, len);
00132         key->flag = LTG_ALLTRUE;
00133 
00134         retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
00135         gistentryinit(*retval, PointerGetDatum(key),
00136                       entry->rel, entry->page,
00137                       entry->offset, FALSE);
00138     }
00139     PG_RETURN_POINTER(retval);
00140 }
00141 
00142 Datum
00143 _ltree_same(PG_FUNCTION_ARGS)
00144 {
00145     ltree_gist *a = (ltree_gist *) PG_GETARG_POINTER(0);
00146     ltree_gist *b = (ltree_gist *) PG_GETARG_POINTER(1);
00147     bool       *result = (bool *) PG_GETARG_POINTER(2);
00148 
00149     if (LTG_ISALLTRUE(a) && LTG_ISALLTRUE(b))
00150         *result = true;
00151     else if (LTG_ISALLTRUE(a))
00152         *result = false;
00153     else if (LTG_ISALLTRUE(b))
00154         *result = false;
00155     else
00156     {
00157         int32       i;
00158         BITVECP     sa = LTG_SIGN(a),
00159                     sb = LTG_SIGN(b);
00160 
00161         *result = true;
00162         ALOOPBYTE
00163         {
00164             if (sa[i] != sb[i])
00165             {
00166                 *result = false;
00167                 break;
00168             }
00169         }
00170     }
00171     PG_RETURN_POINTER(result);
00172 }
00173 
00174 static int32
00175 unionkey(BITVECP sbase, ltree_gist *add)
00176 {
00177     int32       i;
00178     BITVECP     sadd = LTG_SIGN(add);
00179 
00180     if (LTG_ISALLTRUE(add))
00181         return 1;
00182 
00183     ALOOPBYTE
00184         sbase[i] |= sadd[i];
00185     return 0;
00186 }
00187 
00188 Datum
00189 _ltree_union(PG_FUNCTION_ARGS)
00190 {
00191     GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
00192     int        *size = (int *) PG_GETARG_POINTER(1);
00193     ABITVEC     base;
00194     int32       i,
00195                 len;
00196     int32       flag = 0;
00197     ltree_gist *result;
00198 
00199     MemSet((void *) base, 0, sizeof(ABITVEC));
00200     for (i = 0; i < entryvec->n; i++)
00201     {
00202         if (unionkey(base, GETENTRY(entryvec, i)))
00203         {
00204             flag = LTG_ALLTRUE;
00205             break;
00206         }
00207     }
00208 
00209     len = LTG_HDRSIZE + ((flag & LTG_ALLTRUE) ? 0 : ASIGLEN);
00210     result = (ltree_gist *) palloc(len);
00211     SET_VARSIZE(result, len);
00212     result->flag = flag;
00213     if (!LTG_ISALLTRUE(result))
00214         memcpy((void *) LTG_SIGN(result), (void *) base, sizeof(ABITVEC));
00215     *size = len;
00216 
00217     PG_RETURN_POINTER(result);
00218 }
00219 
00220 static int32
00221 sizebitvec(BITVECP sign)
00222 {
00223     int32       size = 0,
00224                 i;
00225 
00226     ALOOPBYTE
00227         size += number_of_ones[(unsigned char) sign[i]];
00228     return size;
00229 }
00230 
00231 static int
00232 hemdistsign(BITVECP a, BITVECP b)
00233 {
00234     int         i,
00235                 diff,
00236                 dist = 0;
00237 
00238     ALOOPBYTE
00239     {
00240         diff = (unsigned char) (a[i] ^ b[i]);
00241         dist += number_of_ones[diff];
00242     }
00243     return dist;
00244 }
00245 
00246 static int
00247 hemdist(ltree_gist *a, ltree_gist *b)
00248 {
00249     if (LTG_ISALLTRUE(a))
00250     {
00251         if (LTG_ISALLTRUE(b))
00252             return 0;
00253         else
00254             return ASIGLENBIT - sizebitvec(LTG_SIGN(b));
00255     }
00256     else if (LTG_ISALLTRUE(b))
00257         return ASIGLENBIT - sizebitvec(LTG_SIGN(a));
00258 
00259     return hemdistsign(LTG_SIGN(a), LTG_SIGN(b));
00260 }
00261 
00262 
00263 Datum
00264 _ltree_penalty(PG_FUNCTION_ARGS)
00265 {
00266     ltree_gist *origval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
00267     ltree_gist *newval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
00268     float      *penalty = (float *) PG_GETARG_POINTER(2);
00269 
00270     *penalty = hemdist(origval, newval);
00271     PG_RETURN_POINTER(penalty);
00272 }
00273 
00274 typedef struct
00275 {
00276     OffsetNumber pos;
00277     int32       cost;
00278 } SPLITCOST;
00279 
00280 static int
00281 comparecost(const void *a, const void *b)
00282 {
00283     return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
00284 }
00285 
00286 Datum
00287 _ltree_picksplit(PG_FUNCTION_ARGS)
00288 {
00289     GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
00290     GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
00291     OffsetNumber k,
00292                 j;
00293     ltree_gist *datum_l,
00294                *datum_r;
00295     BITVECP     union_l,
00296                 union_r;
00297     int32       size_alpha,
00298                 size_beta;
00299     int32       size_waste,
00300                 waste = -1;
00301     int32       nbytes;
00302     OffsetNumber seed_1 = 0,
00303                 seed_2 = 0;
00304     OffsetNumber *left,
00305                *right;
00306     OffsetNumber maxoff;
00307     BITVECP     ptr;
00308     int         i;
00309     SPLITCOST  *costvector;
00310     ltree_gist *_k,
00311                *_j;
00312 
00313     maxoff = entryvec->n - 2;
00314     nbytes = (maxoff + 2) * sizeof(OffsetNumber);
00315     v->spl_left = (OffsetNumber *) palloc(nbytes);
00316     v->spl_right = (OffsetNumber *) palloc(nbytes);
00317 
00318     for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
00319     {
00320         _k = GETENTRY(entryvec, k);
00321         for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
00322         {
00323             size_waste = hemdist(_k, GETENTRY(entryvec, j));
00324             if (size_waste > waste)
00325             {
00326                 waste = size_waste;
00327                 seed_1 = k;
00328                 seed_2 = j;
00329             }
00330         }
00331     }
00332 
00333     left = v->spl_left;
00334     v->spl_nleft = 0;
00335     right = v->spl_right;
00336     v->spl_nright = 0;
00337 
00338     if (seed_1 == 0 || seed_2 == 0)
00339     {
00340         seed_1 = 1;
00341         seed_2 = 2;
00342     }
00343 
00344     /* form initial .. */
00345     if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_1)))
00346     {
00347         datum_l = (ltree_gist *) palloc(LTG_HDRSIZE);
00348         SET_VARSIZE(datum_l, LTG_HDRSIZE);
00349         datum_l->flag = LTG_ALLTRUE;
00350     }
00351     else
00352     {
00353         datum_l = (ltree_gist *) palloc(LTG_HDRSIZE + ASIGLEN);
00354         SET_VARSIZE(datum_l, LTG_HDRSIZE + ASIGLEN);
00355         datum_l->flag = 0;
00356         memcpy((void *) LTG_SIGN(datum_l), (void *) LTG_SIGN(GETENTRY(entryvec, seed_1)), sizeof(ABITVEC));
00357     }
00358     if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_2)))
00359     {
00360         datum_r = (ltree_gist *) palloc(LTG_HDRSIZE);
00361         SET_VARSIZE(datum_r, LTG_HDRSIZE);
00362         datum_r->flag = LTG_ALLTRUE;
00363     }
00364     else
00365     {
00366         datum_r = (ltree_gist *) palloc(LTG_HDRSIZE + ASIGLEN);
00367         SET_VARSIZE(datum_r, LTG_HDRSIZE + ASIGLEN);
00368         datum_r->flag = 0;
00369         memcpy((void *) LTG_SIGN(datum_r), (void *) LTG_SIGN(GETENTRY(entryvec, seed_2)), sizeof(ABITVEC));
00370     }
00371 
00372     maxoff = OffsetNumberNext(maxoff);
00373     /* sort before ... */
00374     costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
00375     for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
00376     {
00377         costvector[j - 1].pos = j;
00378         _j = GETENTRY(entryvec, j);
00379         size_alpha = hemdist(datum_l, _j);
00380         size_beta = hemdist(datum_r, _j);
00381         costvector[j - 1].cost = Abs(size_alpha - size_beta);
00382     }
00383     qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
00384 
00385     union_l = LTG_SIGN(datum_l);
00386     union_r = LTG_SIGN(datum_r);
00387 
00388     for (k = 0; k < maxoff; k++)
00389     {
00390         j = costvector[k].pos;
00391         if (j == seed_1)
00392         {
00393             *left++ = j;
00394             v->spl_nleft++;
00395             continue;
00396         }
00397         else if (j == seed_2)
00398         {
00399             *right++ = j;
00400             v->spl_nright++;
00401             continue;
00402         }
00403         _j = GETENTRY(entryvec, j);
00404         size_alpha = hemdist(datum_l, _j);
00405         size_beta = hemdist(datum_r, _j);
00406 
00407         if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
00408         {
00409             if (LTG_ISALLTRUE(datum_l) || LTG_ISALLTRUE(_j))
00410             {
00411                 if (!LTG_ISALLTRUE(datum_l))
00412                     MemSet((void *) union_l, 0xff, sizeof(ABITVEC));
00413             }
00414             else
00415             {
00416                 ptr = LTG_SIGN(_j);
00417                 ALOOPBYTE
00418                     union_l[i] |= ptr[i];
00419             }
00420             *left++ = j;
00421             v->spl_nleft++;
00422         }
00423         else
00424         {
00425             if (LTG_ISALLTRUE(datum_r) || LTG_ISALLTRUE(_j))
00426             {
00427                 if (!LTG_ISALLTRUE(datum_r))
00428                     MemSet((void *) union_r, 0xff, sizeof(ABITVEC));
00429             }
00430             else
00431             {
00432                 ptr = LTG_SIGN(_j);
00433                 ALOOPBYTE
00434                     union_r[i] |= ptr[i];
00435             }
00436             *right++ = j;
00437             v->spl_nright++;
00438         }
00439     }
00440 
00441     *right = *left = FirstOffsetNumber;
00442 
00443     v->spl_ldatum = PointerGetDatum(datum_l);
00444     v->spl_rdatum = PointerGetDatum(datum_r);
00445 
00446     PG_RETURN_POINTER(v);
00447 }
00448 
00449 static bool
00450 gist_te(ltree_gist *key, ltree *query)
00451 {
00452     ltree_level *curq = LTREE_FIRST(query);
00453     BITVECP     sign = LTG_SIGN(key);
00454     int         qlen = query->numlevel;
00455     unsigned int hv;
00456 
00457     if (LTG_ISALLTRUE(key))
00458         return true;
00459 
00460     while (qlen > 0)
00461     {
00462         hv = ltree_crc32_sz(curq->name, curq->len);
00463         if (!GETBIT(sign, AHASHVAL(hv)))
00464             return false;
00465         curq = LEVEL_NEXT(curq);
00466         qlen--;
00467     }
00468 
00469     return true;
00470 }
00471 
00472 static bool
00473 checkcondition_bit(void *checkval, ITEM *val)
00474 {
00475     return (FLG_CANLOOKSIGN(val->flag)) ? GETBIT(checkval, AHASHVAL(val->val)) : true;
00476 }
00477 
00478 static bool
00479 gist_qtxt(ltree_gist *key, ltxtquery *query)
00480 {
00481     if (LTG_ISALLTRUE(key))
00482         return true;
00483 
00484     return ltree_execute(
00485                          GETQUERY(query),
00486                          (void *) LTG_SIGN(key), false,
00487                          checkcondition_bit
00488         );
00489 }
00490 
00491 static bool
00492 gist_qe(ltree_gist *key, lquery *query)
00493 {
00494     lquery_level *curq = LQUERY_FIRST(query);
00495     BITVECP     sign = LTG_SIGN(key);
00496     int         qlen = query->numlevel;
00497 
00498     if (LTG_ISALLTRUE(key))
00499         return true;
00500 
00501     while (qlen > 0)
00502     {
00503         if (curq->numvar && LQL_CANLOOKSIGN(curq))
00504         {
00505             bool        isexist = false;
00506             int         vlen = curq->numvar;
00507             lquery_variant *curv = LQL_FIRST(curq);
00508 
00509             while (vlen > 0)
00510             {
00511                 if (GETBIT(sign, AHASHVAL(curv->val)))
00512                 {
00513                     isexist = true;
00514                     break;
00515                 }
00516                 curv = LVAR_NEXT(curv);
00517                 vlen--;
00518             }
00519             if (!isexist)
00520                 return false;
00521         }
00522 
00523         curq = LQL_NEXT(curq);
00524         qlen--;
00525     }
00526 
00527     return true;
00528 }
00529 
00530 static bool
00531 _arrq_cons(ltree_gist *key, ArrayType *_query)
00532 {
00533     lquery     *query = (lquery *) ARR_DATA_PTR(_query);
00534     int         num = ArrayGetNItems(ARR_NDIM(_query), ARR_DIMS(_query));
00535 
00536     if (ARR_NDIM(_query) > 1)
00537         ereport(ERROR,
00538                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
00539                  errmsg("array must be one-dimensional")));
00540     if (array_contains_nulls(_query))
00541         ereport(ERROR,
00542                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
00543                  errmsg("array must not contain nulls")));
00544 
00545     while (num > 0)
00546     {
00547         if (gist_qe(key, query))
00548             return true;
00549         num--;
00550         query = (lquery *) NEXTVAL(query);
00551     }
00552     return false;
00553 }
00554 
00555 Datum
00556 _ltree_consistent(PG_FUNCTION_ARGS)
00557 {
00558     GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
00559     char       *query = (char *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
00560     StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
00561 
00562     /* Oid      subtype = PG_GETARG_OID(3); */
00563     bool       *recheck = (bool *) PG_GETARG_POINTER(4);
00564     ltree_gist *key = (ltree_gist *) DatumGetPointer(entry->key);
00565     bool        res = false;
00566 
00567     /* All cases served by this function are inexact */
00568     *recheck = true;
00569 
00570     switch (strategy)
00571     {
00572         case 10:
00573         case 11:
00574             res = gist_te(key, (ltree *) query);
00575             break;
00576         case 12:
00577         case 13:
00578             res = gist_qe(key, (lquery *) query);
00579             break;
00580         case 14:
00581         case 15:
00582             res = gist_qtxt(key, (ltxtquery *) query);
00583             break;
00584         case 16:
00585         case 17:
00586             res = _arrq_cons(key, (ArrayType *) query);
00587             break;
00588         default:
00589             /* internal error */
00590             elog(ERROR, "unrecognized StrategyNumber: %d", strategy);
00591     }
00592     PG_FREE_IF_COPY(query, 1);
00593     PG_RETURN_BOOL(res);
00594 }