Header And Logo

PostgreSQL
| The world's most advanced open source database.

ltree_op.c

Go to the documentation of this file.
00001 /*
00002  * op function for ltree
00003  * Teodor Sigaev <[email protected]>
00004  * contrib/ltree/ltree_op.c
00005  */
00006 #include "postgres.h"
00007 
00008 #include <ctype.h>
00009 
00010 #include "access/htup_details.h"
00011 #include "catalog/pg_statistic.h"
00012 #include "utils/builtins.h"
00013 #include "utils/lsyscache.h"
00014 #include "utils/selfuncs.h"
00015 #include "ltree.h"
00016 
00017 PG_MODULE_MAGIC;
00018 
00019 /* compare functions */
00020 PG_FUNCTION_INFO_V1(ltree_cmp);
00021 PG_FUNCTION_INFO_V1(ltree_lt);
00022 PG_FUNCTION_INFO_V1(ltree_le);
00023 PG_FUNCTION_INFO_V1(ltree_eq);
00024 PG_FUNCTION_INFO_V1(ltree_ne);
00025 PG_FUNCTION_INFO_V1(ltree_ge);
00026 PG_FUNCTION_INFO_V1(ltree_gt);
00027 PG_FUNCTION_INFO_V1(nlevel);
00028 PG_FUNCTION_INFO_V1(ltree_isparent);
00029 PG_FUNCTION_INFO_V1(ltree_risparent);
00030 PG_FUNCTION_INFO_V1(subltree);
00031 PG_FUNCTION_INFO_V1(subpath);
00032 PG_FUNCTION_INFO_V1(ltree_index);
00033 PG_FUNCTION_INFO_V1(ltree_addltree);
00034 PG_FUNCTION_INFO_V1(ltree_addtext);
00035 PG_FUNCTION_INFO_V1(ltree_textadd);
00036 PG_FUNCTION_INFO_V1(lca);
00037 PG_FUNCTION_INFO_V1(ltree2text);
00038 PG_FUNCTION_INFO_V1(text2ltree);
00039 PG_FUNCTION_INFO_V1(ltreeparentsel);
00040 
00041 Datum       ltree_cmp(PG_FUNCTION_ARGS);
00042 Datum       ltree_lt(PG_FUNCTION_ARGS);
00043 Datum       ltree_le(PG_FUNCTION_ARGS);
00044 Datum       ltree_eq(PG_FUNCTION_ARGS);
00045 Datum       ltree_ne(PG_FUNCTION_ARGS);
00046 Datum       ltree_ge(PG_FUNCTION_ARGS);
00047 Datum       ltree_gt(PG_FUNCTION_ARGS);
00048 Datum       nlevel(PG_FUNCTION_ARGS);
00049 Datum       subltree(PG_FUNCTION_ARGS);
00050 Datum       subpath(PG_FUNCTION_ARGS);
00051 Datum       ltree_index(PG_FUNCTION_ARGS);
00052 Datum       ltree_addltree(PG_FUNCTION_ARGS);
00053 Datum       ltree_addtext(PG_FUNCTION_ARGS);
00054 Datum       ltree_textadd(PG_FUNCTION_ARGS);
00055 Datum       lca(PG_FUNCTION_ARGS);
00056 Datum       ltree2text(PG_FUNCTION_ARGS);
00057 Datum       text2ltree(PG_FUNCTION_ARGS);
00058 Datum       ltreeparentsel(PG_FUNCTION_ARGS);
00059 
00060 int
00061 ltree_compare(const ltree *a, const ltree *b)
00062 {
00063     ltree_level *al = LTREE_FIRST(a);
00064     ltree_level *bl = LTREE_FIRST(b);
00065     int         an = a->numlevel;
00066     int         bn = b->numlevel;
00067     int         res = 0;
00068 
00069     while (an > 0 && bn > 0)
00070     {
00071         if ((res = memcmp(al->name, bl->name, Min(al->len, bl->len))) == 0)
00072         {
00073             if (al->len != bl->len)
00074                 return (al->len - bl->len) * 10 * (an + 1);
00075         }
00076         else
00077             return res * 10 * (an + 1);
00078 
00079         an--;
00080         bn--;
00081         al = LEVEL_NEXT(al);
00082         bl = LEVEL_NEXT(bl);
00083     }
00084 
00085     return (a->numlevel - b->numlevel) * 10 * (an + 1);
00086 }
00087 
00088 #define RUNCMP                      \
00089 ltree *a    = PG_GETARG_LTREE(0);           \
00090 ltree *b    = PG_GETARG_LTREE(1);           \
00091 int res = ltree_compare(a,b);               \
00092 PG_FREE_IF_COPY(a,0);                   \
00093 PG_FREE_IF_COPY(b,1);                   \
00094 
00095 Datum
00096 ltree_cmp(PG_FUNCTION_ARGS)
00097 {
00098     RUNCMP
00099         PG_RETURN_INT32(res);
00100 }
00101 
00102 Datum
00103 ltree_lt(PG_FUNCTION_ARGS)
00104 {
00105     RUNCMP
00106         PG_RETURN_BOOL((res < 0) ? true : false);
00107 }
00108 
00109 Datum
00110 ltree_le(PG_FUNCTION_ARGS)
00111 {
00112     RUNCMP
00113         PG_RETURN_BOOL((res <= 0) ? true : false);
00114 }
00115 
00116 Datum
00117 ltree_eq(PG_FUNCTION_ARGS)
00118 {
00119     RUNCMP
00120         PG_RETURN_BOOL((res == 0) ? true : false);
00121 }
00122 
00123 Datum
00124 ltree_ge(PG_FUNCTION_ARGS)
00125 {
00126     RUNCMP
00127         PG_RETURN_BOOL((res >= 0) ? true : false);
00128 }
00129 
00130 Datum
00131 ltree_gt(PG_FUNCTION_ARGS)
00132 {
00133     RUNCMP
00134         PG_RETURN_BOOL((res > 0) ? true : false);
00135 }
00136 
00137 Datum
00138 ltree_ne(PG_FUNCTION_ARGS)
00139 {
00140     RUNCMP
00141         PG_RETURN_BOOL((res != 0) ? true : false);
00142 }
00143 
00144 Datum
00145 nlevel(PG_FUNCTION_ARGS)
00146 {
00147     ltree      *a = PG_GETARG_LTREE(0);
00148     int         res = a->numlevel;
00149 
00150     PG_FREE_IF_COPY(a, 0);
00151     PG_RETURN_INT32(res);
00152 }
00153 
00154 bool
00155 inner_isparent(const ltree *c, const ltree *p)
00156 {
00157     ltree_level *cl = LTREE_FIRST(c);
00158     ltree_level *pl = LTREE_FIRST(p);
00159     int         pn = p->numlevel;
00160 
00161     if (pn > c->numlevel)
00162         return false;
00163 
00164     while (pn > 0)
00165     {
00166         if (cl->len != pl->len)
00167             return false;
00168         if (memcmp(cl->name, pl->name, cl->len))
00169             return false;
00170 
00171         pn--;
00172         cl = LEVEL_NEXT(cl);
00173         pl = LEVEL_NEXT(pl);
00174     }
00175     return true;
00176 }
00177 
00178 Datum
00179 ltree_isparent(PG_FUNCTION_ARGS)
00180 {
00181     ltree      *c = PG_GETARG_LTREE(1);
00182     ltree      *p = PG_GETARG_LTREE(0);
00183     bool        res = inner_isparent(c, p);
00184 
00185     PG_FREE_IF_COPY(c, 1);
00186     PG_FREE_IF_COPY(p, 0);
00187     PG_RETURN_BOOL(res);
00188 }
00189 
00190 Datum
00191 ltree_risparent(PG_FUNCTION_ARGS)
00192 {
00193     ltree      *c = PG_GETARG_LTREE(0);
00194     ltree      *p = PG_GETARG_LTREE(1);
00195     bool        res = inner_isparent(c, p);
00196 
00197     PG_FREE_IF_COPY(c, 0);
00198     PG_FREE_IF_COPY(p, 1);
00199     PG_RETURN_BOOL(res);
00200 }
00201 
00202 
00203 static ltree *
00204 inner_subltree(ltree *t, int32 startpos, int32 endpos)
00205 {
00206     char       *start = NULL,
00207                *end = NULL;
00208     ltree_level *ptr = LTREE_FIRST(t);
00209     ltree      *res;
00210     int         i;
00211 
00212     if (startpos < 0 || endpos < 0 || startpos >= t->numlevel || startpos > endpos)
00213         ereport(ERROR,
00214                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00215                  errmsg("invalid positions")));
00216 
00217     if (endpos > t->numlevel)
00218         endpos = t->numlevel;
00219 
00220     start = end = (char *) ptr;
00221     for (i = 0; i < endpos; i++)
00222     {
00223         if (i == startpos)
00224             start = (char *) ptr;
00225         if (i == endpos - 1)
00226         {
00227             end = (char *) LEVEL_NEXT(ptr);
00228             break;
00229         }
00230         ptr = LEVEL_NEXT(ptr);
00231     }
00232 
00233     res = (ltree *) palloc(LTREE_HDRSIZE + (end - start));
00234     SET_VARSIZE(res, LTREE_HDRSIZE + (end - start));
00235     res->numlevel = endpos - startpos;
00236 
00237     memcpy(LTREE_FIRST(res), start, end - start);
00238 
00239     return res;
00240 }
00241 
00242 Datum
00243 subltree(PG_FUNCTION_ARGS)
00244 {
00245     ltree      *t = PG_GETARG_LTREE(0);
00246     ltree      *res = inner_subltree(t, PG_GETARG_INT32(1), PG_GETARG_INT32(2));
00247 
00248     PG_FREE_IF_COPY(t, 0);
00249     PG_RETURN_POINTER(res);
00250 }
00251 
00252 Datum
00253 subpath(PG_FUNCTION_ARGS)
00254 {
00255     ltree      *t = PG_GETARG_LTREE(0);
00256     int32       start = PG_GETARG_INT32(1);
00257     int32       len = (fcinfo->nargs == 3) ? PG_GETARG_INT32(2) : 0;
00258     int32       end;
00259     ltree      *res;
00260 
00261     end = start + len;
00262 
00263     if (start < 0)
00264     {
00265         start = t->numlevel + start;
00266         end = start + len;
00267     }
00268     if (start < 0)
00269     {                           /* start > t->numlevel */
00270         start = t->numlevel + start;
00271         end = start + len;
00272     }
00273 
00274     if (len < 0)
00275         end = t->numlevel + len;
00276     else if (len == 0)
00277         end = (fcinfo->nargs == 3) ? start : 0xffff;
00278 
00279     res = inner_subltree(t, start, end);
00280 
00281     PG_FREE_IF_COPY(t, 0);
00282     PG_RETURN_POINTER(res);
00283 }
00284 
00285 static ltree *
00286 ltree_concat(ltree *a, ltree *b)
00287 {
00288     ltree      *r;
00289 
00290     r = (ltree *) palloc(VARSIZE(a) + VARSIZE(b) - LTREE_HDRSIZE);
00291     SET_VARSIZE(r, VARSIZE(a) + VARSIZE(b) - LTREE_HDRSIZE);
00292     r->numlevel = a->numlevel + b->numlevel;
00293 
00294     memcpy(LTREE_FIRST(r), LTREE_FIRST(a), VARSIZE(a) - LTREE_HDRSIZE);
00295     memcpy(((char *) LTREE_FIRST(r)) + VARSIZE(a) - LTREE_HDRSIZE,
00296            LTREE_FIRST(b),
00297            VARSIZE(b) - LTREE_HDRSIZE);
00298     return r;
00299 }
00300 
00301 Datum
00302 ltree_addltree(PG_FUNCTION_ARGS)
00303 {
00304     ltree      *a = PG_GETARG_LTREE(0);
00305     ltree      *b = PG_GETARG_LTREE(1);
00306     ltree      *r;
00307 
00308     r = ltree_concat(a, b);
00309     PG_FREE_IF_COPY(a, 0);
00310     PG_FREE_IF_COPY(b, 1);
00311     PG_RETURN_POINTER(r);
00312 }
00313 
00314 Datum
00315 ltree_addtext(PG_FUNCTION_ARGS)
00316 {
00317     ltree      *a = PG_GETARG_LTREE(0);
00318     text       *b = PG_GETARG_TEXT_PP(1);
00319     char       *s;
00320     ltree      *r,
00321                *tmp;
00322 
00323     s = text_to_cstring(b);
00324 
00325     tmp = (ltree *) DatumGetPointer(DirectFunctionCall1(ltree_in,
00326                                                         PointerGetDatum(s)));
00327 
00328     pfree(s);
00329 
00330     r = ltree_concat(a, tmp);
00331 
00332     pfree(tmp);
00333 
00334     PG_FREE_IF_COPY(a, 0);
00335     PG_FREE_IF_COPY(b, 1);
00336     PG_RETURN_POINTER(r);
00337 }
00338 
00339 Datum
00340 ltree_index(PG_FUNCTION_ARGS)
00341 {
00342     ltree      *a = PG_GETARG_LTREE(0);
00343     ltree      *b = PG_GETARG_LTREE(1);
00344     int         start = (fcinfo->nargs == 3) ? PG_GETARG_INT32(2) : 0;
00345     int         i,
00346                 j;
00347     ltree_level *startptr,
00348                *aptr,
00349                *bptr;
00350     bool        found = false;
00351 
00352     if (start < 0)
00353     {
00354         if (-start >= a->numlevel)
00355             start = 0;
00356         else
00357             start = (int) (a->numlevel) + start;
00358     }
00359 
00360     if (a->numlevel - start < b->numlevel || a->numlevel == 0 || b->numlevel == 0)
00361     {
00362         PG_FREE_IF_COPY(a, 0);
00363         PG_FREE_IF_COPY(b, 1);
00364         PG_RETURN_INT32(-1);
00365     }
00366 
00367     startptr = LTREE_FIRST(a);
00368     for (i = 0; i <= a->numlevel - b->numlevel; i++)
00369     {
00370         if (i >= start)
00371         {
00372             aptr = startptr;
00373             bptr = LTREE_FIRST(b);
00374             for (j = 0; j < b->numlevel; j++)
00375             {
00376                 if (!(aptr->len == bptr->len && memcmp(aptr->name, bptr->name, aptr->len) == 0))
00377                     break;
00378                 aptr = LEVEL_NEXT(aptr);
00379                 bptr = LEVEL_NEXT(bptr);
00380             }
00381 
00382             if (j == b->numlevel)
00383             {
00384                 found = true;
00385                 break;
00386             }
00387         }
00388         startptr = LEVEL_NEXT(startptr);
00389     }
00390 
00391     if (!found)
00392         i = -1;
00393 
00394     PG_FREE_IF_COPY(a, 0);
00395     PG_FREE_IF_COPY(b, 1);
00396     PG_RETURN_INT32(i);
00397 }
00398 
00399 Datum
00400 ltree_textadd(PG_FUNCTION_ARGS)
00401 {
00402     ltree      *a = PG_GETARG_LTREE(1);
00403     text       *b = PG_GETARG_TEXT_PP(0);
00404     char       *s;
00405     ltree      *r,
00406                *tmp;
00407 
00408     s = text_to_cstring(b);
00409 
00410     tmp = (ltree *) DatumGetPointer(DirectFunctionCall1(ltree_in,
00411                                                         PointerGetDatum(s)));
00412 
00413     pfree(s);
00414 
00415     r = ltree_concat(tmp, a);
00416 
00417     pfree(tmp);
00418 
00419     PG_FREE_IF_COPY(a, 1);
00420     PG_FREE_IF_COPY(b, 0);
00421     PG_RETURN_POINTER(r);
00422 }
00423 
00424 ltree *
00425 lca_inner(ltree **a, int len)
00426 {
00427     int         tmp,
00428                 num = ((*a)->numlevel) ? (*a)->numlevel - 1 : 0;
00429     ltree     **ptr = a + 1;
00430     int         i,
00431                 reslen = LTREE_HDRSIZE;
00432     ltree_level *l1,
00433                *l2;
00434     ltree      *res;
00435 
00436 
00437     if ((*a)->numlevel == 0)
00438         return NULL;
00439 
00440     while (ptr - a < len)
00441     {
00442         if ((*ptr)->numlevel == 0)
00443             return NULL;
00444         else if ((*ptr)->numlevel == 1)
00445             num = 0;
00446         else
00447         {
00448             l1 = LTREE_FIRST(*a);
00449             l2 = LTREE_FIRST(*ptr);
00450             tmp = num;
00451             num = 0;
00452             for (i = 0; i < Min(tmp, (*ptr)->numlevel - 1); i++)
00453             {
00454                 if (l1->len == l2->len && memcmp(l1->name, l2->name, l1->len) == 0)
00455                     num = i + 1;
00456                 else
00457                     break;
00458                 l1 = LEVEL_NEXT(l1);
00459                 l2 = LEVEL_NEXT(l2);
00460             }
00461         }
00462         ptr++;
00463     }
00464 
00465     l1 = LTREE_FIRST(*a);
00466     for (i = 0; i < num; i++)
00467     {
00468         reslen += MAXALIGN(l1->len + LEVEL_HDRSIZE);
00469         l1 = LEVEL_NEXT(l1);
00470     }
00471 
00472     res = (ltree *) palloc(reslen);
00473     SET_VARSIZE(res, reslen);
00474     res->numlevel = num;
00475 
00476     l1 = LTREE_FIRST(*a);
00477     l2 = LTREE_FIRST(res);
00478 
00479     for (i = 0; i < num; i++)
00480     {
00481         memcpy(l2, l1, MAXALIGN(l1->len + LEVEL_HDRSIZE));
00482         l1 = LEVEL_NEXT(l1);
00483         l2 = LEVEL_NEXT(l2);
00484     }
00485 
00486     return res;
00487 }
00488 
00489 Datum
00490 lca(PG_FUNCTION_ARGS)
00491 {
00492     int         i;
00493     ltree     **a,
00494                *res;
00495 
00496     a = (ltree **) palloc(sizeof(ltree *) * fcinfo->nargs);
00497     for (i = 0; i < fcinfo->nargs; i++)
00498         a[i] = PG_GETARG_LTREE(i);
00499     res = lca_inner(a, (int) fcinfo->nargs);
00500     for (i = 0; i < fcinfo->nargs; i++)
00501         PG_FREE_IF_COPY(a[i], i);
00502     pfree(a);
00503 
00504     if (res)
00505         PG_RETURN_POINTER(res);
00506     else
00507         PG_RETURN_NULL();
00508 }
00509 
00510 Datum
00511 text2ltree(PG_FUNCTION_ARGS)
00512 {
00513     text       *in = PG_GETARG_TEXT_PP(0);
00514     char       *s;
00515     ltree      *out;
00516 
00517     s = text_to_cstring(in);
00518 
00519     out = (ltree *) DatumGetPointer(DirectFunctionCall1(ltree_in,
00520                                                         PointerGetDatum(s)));
00521     pfree(s);
00522     PG_FREE_IF_COPY(in, 0);
00523     PG_RETURN_POINTER(out);
00524 }
00525 
00526 
00527 Datum
00528 ltree2text(PG_FUNCTION_ARGS)
00529 {
00530     ltree      *in = PG_GETARG_LTREE(0);
00531     char       *ptr;
00532     int         i;
00533     ltree_level *curlevel;
00534     text       *out;
00535 
00536     out = (text *) palloc(VARSIZE(in) + VARHDRSZ);
00537     ptr = VARDATA(out);
00538     curlevel = LTREE_FIRST(in);
00539     for (i = 0; i < in->numlevel; i++)
00540     {
00541         if (i != 0)
00542         {
00543             *ptr = '.';
00544             ptr++;
00545         }
00546         memcpy(ptr, curlevel->name, curlevel->len);
00547         ptr += curlevel->len;
00548         curlevel = LEVEL_NEXT(curlevel);
00549     }
00550 
00551     SET_VARSIZE(out, ptr - ((char *) out));
00552     PG_FREE_IF_COPY(in, 0);
00553 
00554     PG_RETURN_POINTER(out);
00555 }
00556 
00557 
00558 #define DEFAULT_PARENT_SEL 0.001
00559 
00560 /*
00561  *  ltreeparentsel - Selectivity of parent relationship for ltree data types.
00562  */
00563 Datum
00564 ltreeparentsel(PG_FUNCTION_ARGS)
00565 {
00566     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
00567     Oid         operator = PG_GETARG_OID(1);
00568     List       *args = (List *) PG_GETARG_POINTER(2);
00569     int         varRelid = PG_GETARG_INT32(3);
00570     VariableStatData vardata;
00571     Node       *other;
00572     bool        varonleft;
00573     double      selec;
00574 
00575     /*
00576      * If expression is not variable <@ something or something <@ variable,
00577      * then punt and return a default estimate.
00578      */
00579     if (!get_restriction_variable(root, args, varRelid,
00580                                   &vardata, &other, &varonleft))
00581         PG_RETURN_FLOAT8(DEFAULT_PARENT_SEL);
00582 
00583     /*
00584      * If the something is a NULL constant, assume operator is strict and
00585      * return zero, ie, operator will never return TRUE.
00586      */
00587     if (IsA(other, Const) &&
00588         ((Const *) other)->constisnull)
00589     {
00590         ReleaseVariableStats(vardata);
00591         PG_RETURN_FLOAT8(0.0);
00592     }
00593 
00594     if (IsA(other, Const))
00595     {
00596         /* Variable is being compared to a known non-null constant */
00597         Datum       constval = ((Const *) other)->constvalue;
00598         FmgrInfo    contproc;
00599         double      mcvsum;
00600         double      mcvsel;
00601         double      nullfrac;
00602         int         hist_size;
00603 
00604         fmgr_info(get_opcode(operator), &contproc);
00605 
00606         /*
00607          * Is the constant "<@" to any of the column's most common values?
00608          */
00609         mcvsel = mcv_selectivity(&vardata, &contproc, constval, varonleft,
00610                                  &mcvsum);
00611 
00612         /*
00613          * If the histogram is large enough, see what fraction of it the
00614          * constant is "<@" to, and assume that's representative of the
00615          * non-MCV population.  Otherwise use the default selectivity for the
00616          * non-MCV population.
00617          */
00618         selec = histogram_selectivity(&vardata, &contproc,
00619                                       constval, varonleft,
00620                                       10, 1, &hist_size);
00621         if (selec < 0)
00622         {
00623             /* Nope, fall back on default */
00624             selec = DEFAULT_PARENT_SEL;
00625         }
00626         else if (hist_size < 100)
00627         {
00628             /*
00629              * For histogram sizes from 10 to 100, we combine the histogram
00630              * and default selectivities, putting increasingly more trust in
00631              * the histogram for larger sizes.
00632              */
00633             double      hist_weight = hist_size / 100.0;
00634 
00635             selec = selec * hist_weight +
00636                 DEFAULT_PARENT_SEL * (1.0 - hist_weight);
00637         }
00638 
00639         /* In any case, don't believe extremely small or large estimates. */
00640         if (selec < 0.0001)
00641             selec = 0.0001;
00642         else if (selec > 0.9999)
00643             selec = 0.9999;
00644 
00645         if (HeapTupleIsValid(vardata.statsTuple))
00646             nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
00647         else
00648             nullfrac = 0.0;
00649 
00650         /*
00651          * Now merge the results from the MCV and histogram calculations,
00652          * realizing that the histogram covers only the non-null values that
00653          * are not listed in MCV.
00654          */
00655         selec *= 1.0 - nullfrac - mcvsum;
00656         selec += mcvsel;
00657     }
00658     else
00659         selec = DEFAULT_PARENT_SEL;
00660 
00661     ReleaseVariableStats(vardata);
00662 
00663     /* result should be in range, but make sure... */
00664     CLAMP_PROBABILITY(selec);
00665 
00666     PG_RETURN_FLOAT8((float8) selec);
00667 }