Header And Logo

PostgreSQL
| The world's most advanced open source database.

btree_utils_var.c

Go to the documentation of this file.
00001 /*
00002  * contrib/btree_gist/btree_utils_var.c
00003  */
00004 #include "postgres.h"
00005 
00006 #include "btree_gist.h"
00007 
00008 #include <math.h>
00009 #include <limits.h>
00010 #include <float.h>
00011 
00012 #include "btree_utils_var.h"
00013 #include "utils/pg_locale.h"
00014 #include "utils/builtins.h"
00015 #include "utils/rel.h"
00016 
00017 /* used for key sorting */
00018 typedef struct
00019 {
00020     int         i;
00021     GBT_VARKEY *t;
00022 } Vsrt;
00023 
00024 typedef struct
00025 {
00026     const gbtree_vinfo *tinfo;
00027     Oid         collation;
00028 } gbt_vsrt_arg;
00029 
00030 
00031 PG_FUNCTION_INFO_V1(gbt_var_decompress);
00032 Datum       gbt_var_decompress(PG_FUNCTION_ARGS);
00033 
00034 
00035 Datum
00036 gbt_var_decompress(PG_FUNCTION_ARGS)
00037 {
00038     GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
00039     GBT_VARKEY *key = (GBT_VARKEY *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
00040 
00041     if (key != (GBT_VARKEY *) DatumGetPointer(entry->key))
00042     {
00043         GISTENTRY  *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
00044 
00045         gistentryinit(*retval, PointerGetDatum(key),
00046                       entry->rel, entry->page,
00047                       entry->offset, FALSE);
00048 
00049         PG_RETURN_POINTER(retval);
00050     }
00051 
00052     PG_RETURN_POINTER(entry);
00053 }
00054 
00055 /* Returns a better readable representaion of variable key ( sets pointer ) */
00056 GBT_VARKEY_R
00057 gbt_var_key_readable(const GBT_VARKEY *k)
00058 {
00059 
00060     GBT_VARKEY_R r;
00061 
00062     r.lower = (bytea *) &(((char *) k)[VARHDRSZ]);
00063     if (VARSIZE(k) > (VARHDRSZ + (VARSIZE(r.lower))))
00064         r.upper = (bytea *) &(((char *) k)[VARHDRSZ + INTALIGN(VARSIZE(r.lower))]);
00065     else
00066         r.upper = r.lower;
00067     return r;
00068 }
00069 
00070 
00071 GBT_VARKEY *
00072 gbt_var_key_copy(const GBT_VARKEY_R *u, bool force_node)
00073 {
00074     GBT_VARKEY *r = NULL;
00075 
00076     if (u->lower == u->upper && !force_node)
00077     {                           /* leaf key mode */
00078         r = (GBT_VARKEY *) palloc(VARSIZE(u->lower) + VARHDRSZ);
00079         memcpy(VARDATA(r), u->lower, VARSIZE(u->lower));
00080         SET_VARSIZE(r, VARSIZE(u->lower) + VARHDRSZ);
00081     }
00082     else
00083     {                           /* node key mode  */
00084         r = (GBT_VARKEY *) palloc(INTALIGN(VARSIZE(u->lower)) + VARSIZE(u->upper) + VARHDRSZ);
00085         memcpy(VARDATA(r), u->lower, VARSIZE(u->lower));
00086         memcpy(VARDATA(r) + INTALIGN(VARSIZE(u->lower)), u->upper, VARSIZE(u->upper));
00087         SET_VARSIZE(r, INTALIGN(VARSIZE(u->lower)) + VARSIZE(u->upper) + VARHDRSZ);
00088     }
00089     return r;
00090 }
00091 
00092 
00093 static GBT_VARKEY *
00094 gbt_var_leaf2node(GBT_VARKEY *leaf, const gbtree_vinfo *tinfo)
00095 {
00096     GBT_VARKEY *out = leaf;
00097 
00098     if (tinfo->f_l2n)
00099         out = (*tinfo->f_l2n) (leaf);
00100 
00101     return out;
00102 }
00103 
00104 
00105 /*
00106  * returns the common prefix length of a node key
00107 */
00108 static int32
00109 gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
00110 {
00111     GBT_VARKEY_R r = gbt_var_key_readable(node);
00112     int32       i = 0;
00113     int32       l = 0;
00114     int32       t1len = VARSIZE(r.lower) - VARHDRSZ;
00115     int32       t2len = VARSIZE(r.upper) - VARHDRSZ;
00116     int32       ml = Min(t1len, t2len);
00117     char       *p1 = VARDATA(r.lower);
00118     char       *p2 = VARDATA(r.upper);
00119 
00120     if (ml == 0)
00121         return 0;
00122 
00123     while (i < ml)
00124     {
00125         if (tinfo->eml > 1 && l == 0)
00126         {
00127             if ((l = pg_mblen(p1)) != pg_mblen(p2))
00128             {
00129                 return i;
00130             }
00131         }
00132         if (*p1 != *p2)
00133         {
00134             if (tinfo->eml > 1)
00135             {
00136                 return (i - l + 1);
00137             }
00138             else
00139             {
00140                 return i;
00141             }
00142         }
00143 
00144         p1++;
00145         p2++;
00146         l--;
00147         i++;
00148     }
00149     return (ml);                /* lower == upper */
00150 }
00151 
00152 
00153 /*
00154  * returns true, if query matches prefix ( common prefix )
00155  */
00156 static bool
00157 gbt_bytea_pf_match(const bytea *pf, const bytea *query, const gbtree_vinfo *tinfo)
00158 {
00159     bool        out = FALSE;
00160     int32       qlen = VARSIZE(query) - VARHDRSZ;
00161     int32       nlen = VARSIZE(pf) - VARHDRSZ;
00162 
00163     if (nlen <= qlen)
00164     {
00165         char       *q = VARDATA(query);
00166         char       *n = VARDATA(pf);
00167 
00168         out = (memcmp(q, n, nlen) == 0);
00169     }
00170 
00171     return out;
00172 }
00173 
00174 
00175 /*
00176  * returns true, if query matches node using common prefix
00177  */
00178 static bool
00179 gbt_var_node_pf_match(const GBT_VARKEY_R *node, const bytea *query, const gbtree_vinfo *tinfo)
00180 {
00181     return (tinfo->trnc && (
00182                             gbt_bytea_pf_match(node->lower, query, tinfo) ||
00183                             gbt_bytea_pf_match(node->upper, query, tinfo)
00184                             ));
00185 }
00186 
00187 
00188 /*
00189 *  truncates / compresses the node key
00190 *  cpf_length .. common prefix length
00191 */
00192 static GBT_VARKEY *
00193 gbt_var_node_truncate(const GBT_VARKEY *node, int32 cpf_length, const gbtree_vinfo *tinfo)
00194 {
00195     GBT_VARKEY *out = NULL;
00196     GBT_VARKEY_R r = gbt_var_key_readable(node);
00197     int32       len1 = VARSIZE(r.lower) - VARHDRSZ;
00198     int32       len2 = VARSIZE(r.upper) - VARHDRSZ;
00199     int32       si;
00200     char       *out2;
00201 
00202     len1 = Min(len1, (cpf_length + 1));
00203     len2 = Min(len2, (cpf_length + 1));
00204 
00205     si = 2 * VARHDRSZ + INTALIGN(len1 + VARHDRSZ) + len2;
00206     out = (GBT_VARKEY *) palloc(si);
00207     SET_VARSIZE(out, si);
00208 
00209     memcpy(VARDATA(out), r.lower, len1 + VARHDRSZ);
00210     SET_VARSIZE(VARDATA(out), len1 + VARHDRSZ);
00211 
00212     out2 = VARDATA(out) + INTALIGN(len1 + VARHDRSZ);
00213     memcpy(out2, r.upper, len2 + VARHDRSZ);
00214     SET_VARSIZE(out2, len2 + VARHDRSZ);
00215 
00216     return out;
00217 }
00218 
00219 
00220 
00221 void
00222 gbt_var_bin_union(Datum *u, GBT_VARKEY *e, Oid collation,
00223                   const gbtree_vinfo *tinfo)
00224 {
00225     GBT_VARKEY_R eo = gbt_var_key_readable(e);
00226     GBT_VARKEY_R nr;
00227 
00228     if (eo.lower == eo.upper)   /* leaf */
00229     {
00230         GBT_VARKEY *tmp;
00231 
00232         tmp = gbt_var_leaf2node(e, tinfo);
00233         if (tmp != e)
00234             eo = gbt_var_key_readable(tmp);
00235     }
00236 
00237     if (DatumGetPointer(*u))
00238     {
00239         GBT_VARKEY_R ro = gbt_var_key_readable((GBT_VARKEY *) DatumGetPointer(*u));
00240         bool        update = false;
00241 
00242         nr.lower = ro.lower;
00243         nr.upper = ro.upper;
00244 
00245         if ((*tinfo->f_cmp) (ro.lower, eo.lower, collation) > 0)
00246         {
00247             nr.lower = eo.lower;
00248             update = true;
00249         }
00250 
00251         if ((*tinfo->f_cmp) (ro.upper, eo.upper, collation) < 0)
00252         {
00253             nr.upper = eo.upper;
00254             update = true;
00255         }
00256 
00257         if (update)
00258             *u = PointerGetDatum(gbt_var_key_copy(&nr, TRUE));
00259     }
00260     else
00261     {
00262         nr.lower = eo.lower;
00263         nr.upper = eo.upper;
00264         *u = PointerGetDatum(gbt_var_key_copy(&nr, TRUE));
00265     }
00266 }
00267 
00268 
00269 
00270 GISTENTRY *
00271 gbt_var_compress(GISTENTRY *entry, const gbtree_vinfo *tinfo)
00272 {
00273 
00274     GISTENTRY  *retval;
00275 
00276     if (entry->leafkey)
00277     {
00278         GBT_VARKEY *r = NULL;
00279         bytea      *leaf = (bytea *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
00280         GBT_VARKEY_R u;
00281 
00282         u.lower = u.upper = leaf;
00283         r = gbt_var_key_copy(&u, FALSE);
00284 
00285         retval = palloc(sizeof(GISTENTRY));
00286         gistentryinit(*retval, PointerGetDatum(r),
00287                       entry->rel, entry->page,
00288                       entry->offset, TRUE);
00289     }
00290     else
00291         retval = entry;
00292 
00293     return (retval);
00294 }
00295 
00296 
00297 
00298 GBT_VARKEY *
00299 gbt_var_union(const GistEntryVector *entryvec, int32 *size, Oid collation,
00300               const gbtree_vinfo *tinfo)
00301 {
00302 
00303     int         i = 0,
00304                 numranges = entryvec->n;
00305     GBT_VARKEY *cur;
00306     Datum       out;
00307     GBT_VARKEY_R rk;
00308 
00309     *size = sizeof(GBT_VARKEY);
00310 
00311     cur = (GBT_VARKEY *) DatumGetPointer(entryvec->vector[0].key);
00312     rk = gbt_var_key_readable(cur);
00313     out = PointerGetDatum(gbt_var_key_copy(&rk, TRUE));
00314 
00315     for (i = 1; i < numranges; i++)
00316     {
00317         cur = (GBT_VARKEY *) DatumGetPointer(entryvec->vector[i].key);
00318         gbt_var_bin_union(&out, cur, collation, tinfo);
00319     }
00320 
00321 
00322     /* Truncate (=compress) key */
00323     if (tinfo->trnc)
00324     {
00325         int32       plen;
00326         GBT_VARKEY *trc = NULL;
00327 
00328         plen = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(out), tinfo);
00329         trc = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(out), plen + 1, tinfo);
00330 
00331         out = PointerGetDatum(trc);
00332     }
00333 
00334     return ((GBT_VARKEY *) DatumGetPointer(out));
00335 }
00336 
00337 
00338 bool
00339 gbt_var_same(Datum d1, Datum d2, Oid collation,
00340              const gbtree_vinfo *tinfo)
00341 {
00342     bool        result;
00343     GBT_VARKEY *t1 = (GBT_VARKEY *) DatumGetPointer(d1);
00344     GBT_VARKEY *t2 = (GBT_VARKEY *) DatumGetPointer(d2);
00345     GBT_VARKEY_R r1,
00346                 r2;
00347 
00348     r1 = gbt_var_key_readable(t1);
00349     r2 = gbt_var_key_readable(t2);
00350 
00351     if (t1 && t2)
00352         result = ((*tinfo->f_cmp) (r1.lower, r2.lower, collation) == 0 &&
00353                   (*tinfo->f_cmp) (r1.upper, r2.upper, collation) == 0);
00354     else
00355         result = (t1 == NULL && t2 == NULL);
00356 
00357     return result;
00358 }
00359 
00360 
00361 float *
00362 gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n,
00363                 Oid collation, const gbtree_vinfo *tinfo)
00364 {
00365     GBT_VARKEY *orge = (GBT_VARKEY *) DatumGetPointer(o->key);
00366     GBT_VARKEY *newe = (GBT_VARKEY *) DatumGetPointer(n->key);
00367     GBT_VARKEY_R ok,
00368                 nk;
00369 
00370     *res = 0.0;
00371 
00372     nk = gbt_var_key_readable(newe);
00373     if (nk.lower == nk.upper)   /* leaf */
00374     {
00375         GBT_VARKEY *tmp;
00376 
00377         tmp = gbt_var_leaf2node(newe, tinfo);
00378         if (tmp != newe)
00379             nk = gbt_var_key_readable(tmp);
00380     }
00381     ok = gbt_var_key_readable(orge);
00382 
00383     if ((VARSIZE(ok.lower) - VARHDRSZ) == 0 && (VARSIZE(ok.upper) - VARHDRSZ) == 0)
00384         *res = 0.0;
00385     else if (!(((*tinfo->f_cmp) (nk.lower, ok.lower, collation) >= 0 ||
00386                 gbt_bytea_pf_match(ok.lower, nk.lower, tinfo)) &&
00387                ((*tinfo->f_cmp) (nk.upper, ok.upper, collation) <= 0 ||
00388                 gbt_bytea_pf_match(ok.upper, nk.upper, tinfo))))
00389     {
00390         Datum       d = PointerGetDatum(0);
00391         double      dres;
00392         int32       ol,
00393                     ul;
00394 
00395         gbt_var_bin_union(&d, orge, collation, tinfo);
00396         ol = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(d), tinfo);
00397         gbt_var_bin_union(&d, newe, collation, tinfo);
00398         ul = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(d), tinfo);
00399 
00400         if (ul < ol)
00401         {
00402             dres = (ol - ul);   /* reduction of common prefix len */
00403         }
00404         else
00405         {
00406             GBT_VARKEY_R uk = gbt_var_key_readable((GBT_VARKEY *) DatumGetPointer(d));
00407             unsigned char tmp[4];
00408 
00409             tmp[0] = (unsigned char) (((VARSIZE(ok.lower) - VARHDRSZ) <= ul) ? 0 : (VARDATA(ok.lower)[ul]));
00410             tmp[1] = (unsigned char) (((VARSIZE(uk.lower) - VARHDRSZ) <= ul) ? 0 : (VARDATA(uk.lower)[ul]));
00411             tmp[2] = (unsigned char) (((VARSIZE(ok.upper) - VARHDRSZ) <= ul) ? 0 : (VARDATA(ok.upper)[ul]));
00412             tmp[3] = (unsigned char) (((VARSIZE(uk.upper) - VARHDRSZ) <= ul) ? 0 : (VARDATA(uk.upper)[ul]));
00413             dres = Abs(tmp[0] - tmp[1]) + Abs(tmp[3] - tmp[2]);
00414             dres /= 256.0;
00415         }
00416 
00417         *res += FLT_MIN;
00418         *res += (float) (dres / ((double) (ol + 1)));
00419         *res *= (FLT_MAX / (o->rel->rd_att->natts + 1));
00420     }
00421 
00422     return res;
00423 }
00424 
00425 
00426 static int
00427 gbt_vsrt_cmp(const void *a, const void *b, void *arg)
00428 {
00429     GBT_VARKEY_R ar = gbt_var_key_readable(((const Vsrt *) a)->t);
00430     GBT_VARKEY_R br = gbt_var_key_readable(((const Vsrt *) b)->t);
00431     const gbt_vsrt_arg *varg = (const gbt_vsrt_arg *) arg;
00432     int         res;
00433 
00434     res = (*varg->tinfo->f_cmp) (ar.lower, br.lower, varg->collation);
00435     if (res == 0)
00436         return (*varg->tinfo->f_cmp) (ar.upper, br.upper, varg->collation);
00437 
00438     return res;
00439 }
00440 
00441 GIST_SPLITVEC *
00442 gbt_var_picksplit(const GistEntryVector *entryvec, GIST_SPLITVEC *v,
00443                   Oid collation, const gbtree_vinfo *tinfo)
00444 {
00445     OffsetNumber i,
00446                 maxoff = entryvec->n - 1;
00447     Vsrt       *arr;
00448     int         svcntr = 0,
00449                 nbytes;
00450     char       *cur;
00451     GBT_VARKEY **sv = NULL;
00452     gbt_vsrt_arg varg;
00453 
00454     arr = (Vsrt *) palloc((maxoff + 1) * sizeof(Vsrt));
00455     nbytes = (maxoff + 2) * sizeof(OffsetNumber);
00456     v->spl_left = (OffsetNumber *) palloc(nbytes);
00457     v->spl_right = (OffsetNumber *) palloc(nbytes);
00458     v->spl_ldatum = PointerGetDatum(0);
00459     v->spl_rdatum = PointerGetDatum(0);
00460     v->spl_nleft = 0;
00461     v->spl_nright = 0;
00462 
00463     sv = palloc(sizeof(bytea *) * (maxoff + 1));
00464 
00465     /* Sort entries */
00466 
00467     for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
00468     {
00469         GBT_VARKEY_R ro;
00470 
00471         cur = (char *) DatumGetPointer(entryvec->vector[i].key);
00472         ro = gbt_var_key_readable((GBT_VARKEY *) cur);
00473         if (ro.lower == ro.upper)       /* leaf */
00474         {
00475             sv[svcntr] = gbt_var_leaf2node((GBT_VARKEY *) cur, tinfo);
00476             arr[i].t = sv[svcntr];
00477             if (sv[svcntr] != (GBT_VARKEY *) cur)
00478                 svcntr++;
00479         }
00480         else
00481             arr[i].t = (GBT_VARKEY *) cur;
00482         arr[i].i = i;
00483     }
00484 
00485     /* sort */
00486     varg.tinfo = tinfo;
00487     varg.collation = collation;
00488     qsort_arg((void *) &arr[FirstOffsetNumber],
00489               maxoff - FirstOffsetNumber + 1,
00490               sizeof(Vsrt),
00491               gbt_vsrt_cmp,
00492               (void *) &varg);
00493 
00494     /* We do simply create two parts */
00495 
00496     for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
00497     {
00498         if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
00499         {
00500             gbt_var_bin_union(&v->spl_ldatum, arr[i].t, collation, tinfo);
00501             v->spl_left[v->spl_nleft] = arr[i].i;
00502             v->spl_nleft++;
00503         }
00504         else
00505         {
00506             gbt_var_bin_union(&v->spl_rdatum, arr[i].t, collation, tinfo);
00507             v->spl_right[v->spl_nright] = arr[i].i;
00508             v->spl_nright++;
00509         }
00510     }
00511 
00512     /* Truncate (=compress) key */
00513     if (tinfo->trnc)
00514     {
00515         int32       ll = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(v->spl_ldatum), tinfo);
00516         int32       lr = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(v->spl_rdatum), tinfo);
00517         GBT_VARKEY *dl;
00518         GBT_VARKEY *dr;
00519 
00520         ll = Max(ll, lr);
00521         ll++;
00522 
00523         dl = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(v->spl_ldatum), ll, tinfo);
00524         dr = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(v->spl_rdatum), ll, tinfo);
00525         v->spl_ldatum = PointerGetDatum(dl);
00526         v->spl_rdatum = PointerGetDatum(dr);
00527     }
00528 
00529     return v;
00530 }
00531 
00532 
00533 /*
00534  * The GiST consistent method
00535  */
00536 bool
00537 gbt_var_consistent(GBT_VARKEY_R *key,
00538                    const void *query,
00539                    StrategyNumber strategy,
00540                    Oid collation,
00541                    bool is_leaf,
00542                    const gbtree_vinfo *tinfo)
00543 {
00544     bool        retval = FALSE;
00545 
00546     switch (strategy)
00547     {
00548         case BTLessEqualStrategyNumber:
00549             if (is_leaf)
00550                 retval = (*tinfo->f_ge) (query, key->lower, collation);
00551             else
00552                 retval = (*tinfo->f_cmp) (query, key->lower, collation) >= 0
00553                     || gbt_var_node_pf_match(key, query, tinfo);
00554             break;
00555         case BTLessStrategyNumber:
00556             if (is_leaf)
00557                 retval = (*tinfo->f_gt) (query, key->lower, collation);
00558             else
00559                 retval = (*tinfo->f_cmp) (query, key->lower, collation) >= 0
00560                     || gbt_var_node_pf_match(key, query, tinfo);
00561             break;
00562         case BTEqualStrategyNumber:
00563             if (is_leaf)
00564                 retval = (*tinfo->f_eq) (query, key->lower, collation);
00565             else
00566                 retval =
00567                     ((*tinfo->f_cmp) (key->lower, query, collation) <= 0 &&
00568                      (*tinfo->f_cmp) (query, key->upper, collation) <= 0) ||
00569                     gbt_var_node_pf_match(key, query, tinfo);
00570             break;
00571         case BTGreaterStrategyNumber:
00572             if (is_leaf)
00573                 retval = (*tinfo->f_lt) (query, key->upper, collation);
00574             else
00575                 retval = (*tinfo->f_cmp) (query, key->upper, collation) <= 0
00576                     || gbt_var_node_pf_match(key, query, tinfo);
00577             break;
00578         case BTGreaterEqualStrategyNumber:
00579             if (is_leaf)
00580                 retval = (*tinfo->f_le) (query, key->upper, collation);
00581             else
00582                 retval = (*tinfo->f_cmp) (query, key->upper, collation) <= 0
00583                     || gbt_var_node_pf_match(key, query, tinfo);
00584             break;
00585         case BtreeGistNotEqualStrategyNumber:
00586             retval = !((*tinfo->f_eq) (query, key->lower, collation) &&
00587                        (*tinfo->f_eq) (query, key->upper, collation));
00588             break;
00589         default:
00590             retval = FALSE;
00591     }
00592 
00593     return retval;
00594 }