Header And Logo

PostgreSQL
| The world's most advanced open source database.

tsvector_op.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * tsvector_op.c
00004  *    operations over tsvector
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  *
00008  *
00009  * IDENTIFICATION
00010  *    src/backend/utils/adt/tsvector_op.c
00011  *
00012  *-------------------------------------------------------------------------
00013  */
00014 
00015 #include "postgres.h"
00016 
00017 #include "catalog/namespace.h"
00018 #include "catalog/pg_type.h"
00019 #include "commands/trigger.h"
00020 #include "executor/spi.h"
00021 #include "funcapi.h"
00022 #include "mb/pg_wchar.h"
00023 #include "miscadmin.h"
00024 #include "tsearch/ts_utils.h"
00025 #include "utils/builtins.h"
00026 #include "utils/lsyscache.h"
00027 #include "utils/rel.h"
00028 
00029 
00030 typedef struct
00031 {
00032     WordEntry  *arrb;
00033     WordEntry  *arre;
00034     char       *values;
00035     char       *operand;
00036 } CHKVAL;
00037 
00038 
00039 typedef struct StatEntry
00040 {
00041     uint32      ndoc;           /* zero indicates that we already was here
00042                                  * while walking throug the tree */
00043     uint32      nentry;
00044     struct StatEntry *left;
00045     struct StatEntry *right;
00046     uint32      lenlexeme;
00047     char        lexeme[1];
00048 } StatEntry;
00049 
00050 #define STATENTRYHDRSZ  (offsetof(StatEntry, lexeme))
00051 
00052 typedef struct
00053 {
00054     int32       weight;
00055 
00056     uint32      maxdepth;
00057 
00058     StatEntry **stack;
00059     uint32      stackpos;
00060 
00061     StatEntry  *root;
00062 } TSVectorStat;
00063 
00064 #define STATHDRSIZE (offsetof(TSVectorStat, data))
00065 
00066 static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
00067 
00068 
00069 /*
00070  * Check if datatype is the specified type or equivalent to it.
00071  *
00072  * Note: we could just do getBaseType() unconditionally, but since that's
00073  * a relatively expensive catalog lookup that most users won't need, we
00074  * try the straight comparison first.
00075  */
00076 static bool
00077 is_expected_type(Oid typid, Oid expected_type)
00078 {
00079     if (typid == expected_type)
00080         return true;
00081     typid = getBaseType(typid);
00082     if (typid == expected_type)
00083         return true;
00084     return false;
00085 }
00086 
00087 /* Check if datatype is TEXT or binary-equivalent to it */
00088 static bool
00089 is_text_type(Oid typid)
00090 {
00091     /* varchar(n) and char(n) are binary-compatible with text */
00092     if (typid == TEXTOID || typid == VARCHAROID || typid == BPCHAROID)
00093         return true;
00094     /* Allow domains over these types, too */
00095     typid = getBaseType(typid);
00096     if (typid == TEXTOID || typid == VARCHAROID || typid == BPCHAROID)
00097         return true;
00098     return false;
00099 }
00100 
00101 
00102 /*
00103  * Order: haspos, len, word, for all positions (pos, weight)
00104  */
00105 static int
00106 silly_cmp_tsvector(const TSVector a, const TSVector b)
00107 {
00108     if (VARSIZE(a) < VARSIZE(b))
00109         return -1;
00110     else if (VARSIZE(a) > VARSIZE(b))
00111         return 1;
00112     else if (a->size < b->size)
00113         return -1;
00114     else if (a->size > b->size)
00115         return 1;
00116     else
00117     {
00118         WordEntry  *aptr = ARRPTR(a);
00119         WordEntry  *bptr = ARRPTR(b);
00120         int         i = 0;
00121         int         res;
00122 
00123 
00124         for (i = 0; i < a->size; i++)
00125         {
00126             if (aptr->haspos != bptr->haspos)
00127             {
00128                 return (aptr->haspos > bptr->haspos) ? -1 : 1;
00129             }
00130             else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
00131             {
00132                 return res;
00133             }
00134             else if (aptr->haspos)
00135             {
00136                 WordEntryPos *ap = POSDATAPTR(a, aptr);
00137                 WordEntryPos *bp = POSDATAPTR(b, bptr);
00138                 int         j;
00139 
00140                 if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
00141                     return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
00142 
00143                 for (j = 0; j < POSDATALEN(a, aptr); j++)
00144                 {
00145                     if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
00146                     {
00147                         return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
00148                     }
00149                     else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
00150                     {
00151                         return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
00152                     }
00153                     ap++, bp++;
00154                 }
00155             }
00156 
00157             aptr++;
00158             bptr++;
00159         }
00160     }
00161 
00162     return 0;
00163 }
00164 
00165 #define TSVECTORCMPFUNC( type, action, ret )            \
00166 Datum                                                   \
00167 tsvector_##type(PG_FUNCTION_ARGS)                       \
00168 {                                                       \
00169     TSVector    a = PG_GETARG_TSVECTOR(0);              \
00170     TSVector    b = PG_GETARG_TSVECTOR(1);              \
00171     int         res = silly_cmp_tsvector(a, b);         \
00172     PG_FREE_IF_COPY(a,0);                               \
00173     PG_FREE_IF_COPY(b,1);                               \
00174     PG_RETURN_##ret( res action 0 );                    \
00175 }   \
00176 /* keep compiler quiet - no extra ; */                  \
00177 extern int no_such_variable
00178 
00179 TSVECTORCMPFUNC(lt, <, BOOL);
00180 TSVECTORCMPFUNC(le, <=, BOOL);
00181 TSVECTORCMPFUNC(eq, ==, BOOL);
00182 TSVECTORCMPFUNC(ge, >=, BOOL);
00183 TSVECTORCMPFUNC(gt, >, BOOL);
00184 TSVECTORCMPFUNC(ne, !=, BOOL);
00185 TSVECTORCMPFUNC(cmp, +, INT32);
00186 
00187 Datum
00188 tsvector_strip(PG_FUNCTION_ARGS)
00189 {
00190     TSVector    in = PG_GETARG_TSVECTOR(0);
00191     TSVector    out;
00192     int         i,
00193                 len = 0;
00194     WordEntry  *arrin = ARRPTR(in),
00195                *arrout;
00196     char       *cur;
00197 
00198     for (i = 0; i < in->size; i++)
00199         len += arrin[i].len;
00200 
00201     len = CALCDATASIZE(in->size, len);
00202     out = (TSVector) palloc0(len);
00203     SET_VARSIZE(out, len);
00204     out->size = in->size;
00205     arrout = ARRPTR(out);
00206     cur = STRPTR(out);
00207     for (i = 0; i < in->size; i++)
00208     {
00209         memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
00210         arrout[i].haspos = 0;
00211         arrout[i].len = arrin[i].len;
00212         arrout[i].pos = cur - STRPTR(out);
00213         cur += arrout[i].len;
00214     }
00215 
00216     PG_FREE_IF_COPY(in, 0);
00217     PG_RETURN_POINTER(out);
00218 }
00219 
00220 Datum
00221 tsvector_length(PG_FUNCTION_ARGS)
00222 {
00223     TSVector    in = PG_GETARG_TSVECTOR(0);
00224     int32       ret = in->size;
00225 
00226     PG_FREE_IF_COPY(in, 0);
00227     PG_RETURN_INT32(ret);
00228 }
00229 
00230 Datum
00231 tsvector_setweight(PG_FUNCTION_ARGS)
00232 {
00233     TSVector    in = PG_GETARG_TSVECTOR(0);
00234     char        cw = PG_GETARG_CHAR(1);
00235     TSVector    out;
00236     int         i,
00237                 j;
00238     WordEntry  *entry;
00239     WordEntryPos *p;
00240     int         w = 0;
00241 
00242     switch (cw)
00243     {
00244         case 'A':
00245         case 'a':
00246             w = 3;
00247             break;
00248         case 'B':
00249         case 'b':
00250             w = 2;
00251             break;
00252         case 'C':
00253         case 'c':
00254             w = 1;
00255             break;
00256         case 'D':
00257         case 'd':
00258             w = 0;
00259             break;
00260         default:
00261             /* internal error */
00262             elog(ERROR, "unrecognized weight: %d", cw);
00263     }
00264 
00265     out = (TSVector) palloc(VARSIZE(in));
00266     memcpy(out, in, VARSIZE(in));
00267     entry = ARRPTR(out);
00268     i = out->size;
00269     while (i--)
00270     {
00271         if ((j = POSDATALEN(out, entry)) != 0)
00272         {
00273             p = POSDATAPTR(out, entry);
00274             while (j--)
00275             {
00276                 WEP_SETWEIGHT(*p, w);
00277                 p++;
00278             }
00279         }
00280         entry++;
00281     }
00282 
00283     PG_FREE_IF_COPY(in, 0);
00284     PG_RETURN_POINTER(out);
00285 }
00286 
00287 #define compareEntry(pa, a, pb, b) \
00288     tsCompareString((pa) + (a)->pos, (a)->len,  \
00289                     (pb) + (b)->pos, (b)->len,  \
00290                     false)
00291 
00292 /*
00293  * Add positions from src to dest after offsetting them by maxpos.
00294  * Return the number added (might be less than expected due to overflow)
00295  */
00296 static int32
00297 add_pos(TSVector src, WordEntry *srcptr,
00298         TSVector dest, WordEntry *destptr,
00299         int32 maxpos)
00300 {
00301     uint16     *clen = &_POSVECPTR(dest, destptr)->npos;
00302     int         i;
00303     uint16      slen = POSDATALEN(src, srcptr),
00304                 startlen;
00305     WordEntryPos *spos = POSDATAPTR(src, srcptr),
00306                *dpos = POSDATAPTR(dest, destptr);
00307 
00308     if (!destptr->haspos)
00309         *clen = 0;
00310 
00311     startlen = *clen;
00312     for (i = 0;
00313          i < slen && *clen < MAXNUMPOS &&
00314          (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
00315          i++)
00316     {
00317         WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
00318         WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
00319         (*clen)++;
00320     }
00321 
00322     if (*clen != startlen)
00323         destptr->haspos = 1;
00324     return *clen - startlen;
00325 }
00326 
00327 
00328 Datum
00329 tsvector_concat(PG_FUNCTION_ARGS)
00330 {
00331     TSVector    in1 = PG_GETARG_TSVECTOR(0);
00332     TSVector    in2 = PG_GETARG_TSVECTOR(1);
00333     TSVector    out;
00334     WordEntry  *ptr;
00335     WordEntry  *ptr1,
00336                *ptr2;
00337     WordEntryPos *p;
00338     int         maxpos = 0,
00339                 i,
00340                 j,
00341                 i1,
00342                 i2,
00343                 dataoff,
00344                 output_bytes,
00345                 output_size;
00346     char       *data,
00347                *data1,
00348                *data2;
00349 
00350     /* Get max position in in1; we'll need this to offset in2's positions */
00351     ptr = ARRPTR(in1);
00352     i = in1->size;
00353     while (i--)
00354     {
00355         if ((j = POSDATALEN(in1, ptr)) != 0)
00356         {
00357             p = POSDATAPTR(in1, ptr);
00358             while (j--)
00359             {
00360                 if (WEP_GETPOS(*p) > maxpos)
00361                     maxpos = WEP_GETPOS(*p);
00362                 p++;
00363             }
00364         }
00365         ptr++;
00366     }
00367 
00368     ptr1 = ARRPTR(in1);
00369     ptr2 = ARRPTR(in2);
00370     data1 = STRPTR(in1);
00371     data2 = STRPTR(in2);
00372     i1 = in1->size;
00373     i2 = in2->size;
00374 
00375     /*
00376      * Conservative estimate of space needed.  We might need all the data in
00377      * both inputs, and conceivably add a pad byte before position data for
00378      * each item where there was none before.
00379      */
00380     output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
00381 
00382     out = (TSVector) palloc0(output_bytes);
00383     SET_VARSIZE(out, output_bytes);
00384 
00385     /*
00386      * We must make out->size valid so that STRPTR(out) is sensible.  We'll
00387      * collapse out any unused space at the end.
00388      */
00389     out->size = in1->size + in2->size;
00390 
00391     ptr = ARRPTR(out);
00392     data = STRPTR(out);
00393     dataoff = 0;
00394     while (i1 && i2)
00395     {
00396         int         cmp = compareEntry(data1, ptr1, data2, ptr2);
00397 
00398         if (cmp < 0)
00399         {                       /* in1 first */
00400             ptr->haspos = ptr1->haspos;
00401             ptr->len = ptr1->len;
00402             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
00403             ptr->pos = dataoff;
00404             dataoff += ptr1->len;
00405             if (ptr->haspos)
00406             {
00407                 dataoff = SHORTALIGN(dataoff);
00408                 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
00409                 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
00410             }
00411 
00412             ptr++;
00413             ptr1++;
00414             i1--;
00415         }
00416         else if (cmp > 0)
00417         {                       /* in2 first */
00418             ptr->haspos = ptr2->haspos;
00419             ptr->len = ptr2->len;
00420             memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
00421             ptr->pos = dataoff;
00422             dataoff += ptr2->len;
00423             if (ptr->haspos)
00424             {
00425                 int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
00426 
00427                 if (addlen == 0)
00428                     ptr->haspos = 0;
00429                 else
00430                 {
00431                     dataoff = SHORTALIGN(dataoff);
00432                     dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
00433                 }
00434             }
00435 
00436             ptr++;
00437             ptr2++;
00438             i2--;
00439         }
00440         else
00441         {
00442             ptr->haspos = ptr1->haspos | ptr2->haspos;
00443             ptr->len = ptr1->len;
00444             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
00445             ptr->pos = dataoff;
00446             dataoff += ptr1->len;
00447             if (ptr->haspos)
00448             {
00449                 if (ptr1->haspos)
00450                 {
00451                     dataoff = SHORTALIGN(dataoff);
00452                     memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
00453                     dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
00454                     if (ptr2->haspos)
00455                         dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
00456                 }
00457                 else    /* must have ptr2->haspos */
00458                 {
00459                     int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
00460 
00461                     if (addlen == 0)
00462                         ptr->haspos = 0;
00463                     else
00464                     {
00465                         dataoff = SHORTALIGN(dataoff);
00466                         dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
00467                     }
00468                 }
00469             }
00470 
00471             ptr++;
00472             ptr1++;
00473             ptr2++;
00474             i1--;
00475             i2--;
00476         }
00477     }
00478 
00479     while (i1)
00480     {
00481         ptr->haspos = ptr1->haspos;
00482         ptr->len = ptr1->len;
00483         memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
00484         ptr->pos = dataoff;
00485         dataoff += ptr1->len;
00486         if (ptr->haspos)
00487         {
00488             dataoff = SHORTALIGN(dataoff);
00489             memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
00490             dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
00491         }
00492 
00493         ptr++;
00494         ptr1++;
00495         i1--;
00496     }
00497 
00498     while (i2)
00499     {
00500         ptr->haspos = ptr2->haspos;
00501         ptr->len = ptr2->len;
00502         memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
00503         ptr->pos = dataoff;
00504         dataoff += ptr2->len;
00505         if (ptr->haspos)
00506         {
00507             int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
00508 
00509             if (addlen == 0)
00510                 ptr->haspos = 0;
00511             else
00512             {
00513                 dataoff = SHORTALIGN(dataoff);
00514                 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
00515             }
00516         }
00517 
00518         ptr++;
00519         ptr2++;
00520         i2--;
00521     }
00522 
00523     /*
00524      * Instead of checking each offset individually, we check for overflow of
00525      * pos fields once at the end.
00526      */
00527     if (dataoff > MAXSTRPOS)
00528         ereport(ERROR,
00529                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00530                  errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
00531 
00532     /*
00533      * Adjust sizes (asserting that we didn't overrun the original estimates)
00534      * and collapse out any unused array entries.
00535      */
00536     output_size = ptr - ARRPTR(out);
00537     Assert(output_size <= out->size);
00538     out->size = output_size;
00539     if (data != STRPTR(out))
00540         memmove(STRPTR(out), data, dataoff);
00541     output_bytes = CALCDATASIZE(out->size, dataoff);
00542     Assert(output_bytes <= VARSIZE(out));
00543     SET_VARSIZE(out, output_bytes);
00544 
00545     PG_FREE_IF_COPY(in1, 0);
00546     PG_FREE_IF_COPY(in2, 1);
00547     PG_RETURN_POINTER(out);
00548 }
00549 
00550 /*
00551  * Compare two strings by tsvector rules.
00552  *
00553  * if isPrefix = true then it returns zero value iff b has prefix a
00554  */
00555 int32
00556 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
00557 {
00558     int         cmp;
00559 
00560     if (lena == 0)
00561     {
00562         if (prefix)
00563             cmp = 0;            /* empty string is prefix of anything */
00564         else
00565             cmp = (lenb > 0) ? -1 : 0;
00566     }
00567     else if (lenb == 0)
00568     {
00569         cmp = (lena > 0) ? 1 : 0;
00570     }
00571     else
00572     {
00573         cmp = memcmp(a, b, Min(lena, lenb));
00574 
00575         if (prefix)
00576         {
00577             if (cmp == 0 && lena > lenb)
00578                 cmp = 1;        /* a is longer, so not a prefix of b */
00579         }
00580         else if (cmp == 0 && lena != lenb)
00581         {
00582             cmp = (lena < lenb) ? -1 : 1;
00583         }
00584     }
00585 
00586     return cmp;
00587 }
00588 
00589 /*
00590  * check weight info
00591  */
00592 static bool
00593 checkclass_str(CHKVAL *chkval, WordEntry *val, QueryOperand *item)
00594 {
00595     WordEntryPosVector *posvec;
00596     WordEntryPos *ptr;
00597     uint16      len;
00598 
00599     posvec = (WordEntryPosVector *)
00600         (chkval->values + SHORTALIGN(val->pos + val->len));
00601 
00602     len = posvec->npos;
00603     ptr = posvec->pos;
00604 
00605     while (len--)
00606     {
00607         if (item->weight & (1 << WEP_GETWEIGHT(*ptr)))
00608             return true;
00609         ptr++;
00610     }
00611     return false;
00612 }
00613 
00614 /*
00615  * is there value 'val' in array or not ?
00616  */
00617 static bool
00618 checkcondition_str(void *checkval, QueryOperand *val)
00619 {
00620     CHKVAL     *chkval = (CHKVAL *) checkval;
00621     WordEntry  *StopLow = chkval->arrb;
00622     WordEntry  *StopHigh = chkval->arre;
00623     WordEntry  *StopMiddle = StopHigh;
00624     int         difference = -1;
00625     bool        res = false;
00626 
00627     /* Loop invariant: StopLow <= val < StopHigh */
00628     while (StopLow < StopHigh)
00629     {
00630         StopMiddle = StopLow + (StopHigh - StopLow) / 2;
00631         difference = tsCompareString(chkval->operand + val->distance, val->length,
00632                            chkval->values + StopMiddle->pos, StopMiddle->len,
00633                                      false);
00634 
00635         if (difference == 0)
00636         {
00637             res = (val->weight && StopMiddle->haspos) ?
00638                 checkclass_str(chkval, StopMiddle, val) : true;
00639             break;
00640         }
00641         else if (difference > 0)
00642             StopLow = StopMiddle + 1;
00643         else
00644             StopHigh = StopMiddle;
00645     }
00646 
00647     if (!res && val->prefix)
00648     {
00649         /*
00650          * there was a failed exact search, so we should scan further to find
00651          * a prefix match.
00652          */
00653         if (StopLow >= StopHigh)
00654             StopMiddle = StopHigh;
00655 
00656         while (res == false && StopMiddle < chkval->arre &&
00657                tsCompareString(chkval->operand + val->distance, val->length,
00658                            chkval->values + StopMiddle->pos, StopMiddle->len,
00659                                true) == 0)
00660         {
00661             res = (val->weight && StopMiddle->haspos) ?
00662                 checkclass_str(chkval, StopMiddle, val) : true;
00663 
00664             StopMiddle++;
00665         }
00666     }
00667 
00668     return res;
00669 }
00670 
00671 /*
00672  * Evaluate tsquery boolean expression.
00673  *
00674  * chkcond is a callback function used to evaluate each VAL node in the query.
00675  * checkval can be used to pass information to the callback. TS_execute doesn't
00676  * do anything with it.
00677  * if calcnot is false, NOT expressions are always evaluated to be true. This
00678  * is used in ranking.
00679  */
00680 bool
00681 TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
00682            bool (*chkcond) (void *checkval, QueryOperand *val))
00683 {
00684     /* since this function recurses, it could be driven to stack overflow */
00685     check_stack_depth();
00686 
00687     if (curitem->type == QI_VAL)
00688         return chkcond(checkval, (QueryOperand *) curitem);
00689 
00690     switch (curitem->qoperator.oper)
00691     {
00692         case OP_NOT:
00693             if (calcnot)
00694                 return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
00695             else
00696                 return true;
00697 
00698         case OP_AND:
00699             if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
00700                 return TS_execute(curitem + 1, checkval, calcnot, chkcond);
00701             else
00702                 return false;
00703 
00704         case OP_OR:
00705             if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
00706                 return true;
00707             else
00708                 return TS_execute(curitem + 1, checkval, calcnot, chkcond);
00709 
00710         default:
00711             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
00712     }
00713 
00714     /* not reachable, but keep compiler quiet */
00715     return false;
00716 }
00717 
00718 /*
00719  * Detect whether a tsquery boolean expression requires any positive matches
00720  * to values shown in the tsquery.
00721  *
00722  * This is needed to know whether a GIN index search requires full index scan.
00723  * For example, 'x & !y' requires a match of x, so it's sufficient to scan
00724  * entries for x; but 'x | !y' could match rows containing neither x nor y.
00725  */
00726 bool
00727 tsquery_requires_match(QueryItem *curitem)
00728 {
00729     /* since this function recurses, it could be driven to stack overflow */
00730     check_stack_depth();
00731 
00732     if (curitem->type == QI_VAL)
00733         return true;
00734 
00735     switch (curitem->qoperator.oper)
00736     {
00737         case OP_NOT:
00738 
00739             /*
00740              * Assume there are no required matches underneath a NOT.  For
00741              * some cases with nested NOTs, we could prove there's a required
00742              * match, but it seems unlikely to be worth the trouble.
00743              */
00744             return false;
00745 
00746         case OP_AND:
00747             /* If either side requires a match, we're good */
00748             if (tsquery_requires_match(curitem + curitem->qoperator.left))
00749                 return true;
00750             else
00751                 return tsquery_requires_match(curitem + 1);
00752 
00753         case OP_OR:
00754             /* Both sides must require a match */
00755             if (tsquery_requires_match(curitem + curitem->qoperator.left))
00756                 return tsquery_requires_match(curitem + 1);
00757             else
00758                 return false;
00759 
00760         default:
00761             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
00762     }
00763 
00764     /* not reachable, but keep compiler quiet */
00765     return false;
00766 }
00767 
00768 /*
00769  * boolean operations
00770  */
00771 Datum
00772 ts_match_qv(PG_FUNCTION_ARGS)
00773 {
00774     PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
00775                                         PG_GETARG_DATUM(1),
00776                                         PG_GETARG_DATUM(0)));
00777 }
00778 
00779 Datum
00780 ts_match_vq(PG_FUNCTION_ARGS)
00781 {
00782     TSVector    val = PG_GETARG_TSVECTOR(0);
00783     TSQuery     query = PG_GETARG_TSQUERY(1);
00784     CHKVAL      chkval;
00785     bool        result;
00786 
00787     if (!val->size || !query->size)
00788     {
00789         PG_FREE_IF_COPY(val, 0);
00790         PG_FREE_IF_COPY(query, 1);
00791         PG_RETURN_BOOL(false);
00792     }
00793 
00794     chkval.arrb = ARRPTR(val);
00795     chkval.arre = chkval.arrb + val->size;
00796     chkval.values = STRPTR(val);
00797     chkval.operand = GETOPERAND(query);
00798     result = TS_execute(
00799                         GETQUERY(query),
00800                         &chkval,
00801                         true,
00802                         checkcondition_str
00803         );
00804 
00805     PG_FREE_IF_COPY(val, 0);
00806     PG_FREE_IF_COPY(query, 1);
00807     PG_RETURN_BOOL(result);
00808 }
00809 
00810 Datum
00811 ts_match_tt(PG_FUNCTION_ARGS)
00812 {
00813     TSVector    vector;
00814     TSQuery     query;
00815     bool        res;
00816 
00817     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
00818                                                   PG_GETARG_DATUM(0)));
00819     query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
00820                                                 PG_GETARG_DATUM(1)));
00821 
00822     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
00823                                            TSVectorGetDatum(vector),
00824                                            TSQueryGetDatum(query)));
00825 
00826     pfree(vector);
00827     pfree(query);
00828 
00829     PG_RETURN_BOOL(res);
00830 }
00831 
00832 Datum
00833 ts_match_tq(PG_FUNCTION_ARGS)
00834 {
00835     TSVector    vector;
00836     TSQuery     query = PG_GETARG_TSQUERY(1);
00837     bool        res;
00838 
00839     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
00840                                                   PG_GETARG_DATUM(0)));
00841 
00842     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
00843                                            TSVectorGetDatum(vector),
00844                                            TSQueryGetDatum(query)));
00845 
00846     pfree(vector);
00847     PG_FREE_IF_COPY(query, 1);
00848 
00849     PG_RETURN_BOOL(res);
00850 }
00851 
00852 /*
00853  * ts_stat statistic function support
00854  */
00855 
00856 
00857 /*
00858  * Returns the number of positions in value 'wptr' within tsvector 'txt',
00859  * that have a weight equal to one of the weights in 'weight' bitmask.
00860  */
00861 static int
00862 check_weight(TSVector txt, WordEntry *wptr, int8 weight)
00863 {
00864     int         len = POSDATALEN(txt, wptr);
00865     int         num = 0;
00866     WordEntryPos *ptr = POSDATAPTR(txt, wptr);
00867 
00868     while (len--)
00869     {
00870         if (weight & (1 << WEP_GETWEIGHT(*ptr)))
00871             num++;
00872         ptr++;
00873     }
00874     return num;
00875 }
00876 
00877 #define compareStatWord(a,e,t)                          \
00878     tsCompareString((a)->lexeme, (a)->lenlexeme,        \
00879                     STRPTR(t) + (e)->pos, (e)->len,     \
00880                     false)
00881 
00882 static void
00883 insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
00884 {
00885     WordEntry  *we = ARRPTR(txt) + off;
00886     StatEntry  *node = stat->root,
00887                *pnode = NULL;
00888     int         n,
00889                 res = 0;
00890     uint32      depth = 1;
00891 
00892     if (stat->weight == 0)
00893         n = (we->haspos) ? POSDATALEN(txt, we) : 1;
00894     else
00895         n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
00896 
00897     if (n == 0)
00898         return;                 /* nothing to insert */
00899 
00900     while (node)
00901     {
00902         res = compareStatWord(node, we, txt);
00903 
00904         if (res == 0)
00905         {
00906             break;
00907         }
00908         else
00909         {
00910             pnode = node;
00911             node = (res < 0) ? node->left : node->right;
00912         }
00913         depth++;
00914     }
00915 
00916     if (depth > stat->maxdepth)
00917         stat->maxdepth = depth;
00918 
00919     if (node == NULL)
00920     {
00921         node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
00922         node->left = node->right = NULL;
00923         node->ndoc = 1;
00924         node->nentry = n;
00925         node->lenlexeme = we->len;
00926         memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
00927 
00928         if (pnode == NULL)
00929         {
00930             stat->root = node;
00931         }
00932         else
00933         {
00934             if (res < 0)
00935                 pnode->left = node;
00936             else
00937                 pnode->right = node;
00938         }
00939 
00940     }
00941     else
00942     {
00943         node->ndoc++;
00944         node->nentry += n;
00945     }
00946 }
00947 
00948 static void
00949 chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
00950                     uint32 low, uint32 high, uint32 offset)
00951 {
00952     uint32      pos;
00953     uint32      middle = (low + high) >> 1;
00954 
00955     pos = (low + middle) >> 1;
00956     if (low != middle && pos >= offset && pos - offset < txt->size)
00957         insertStatEntry(persistentContext, stat, txt, pos - offset);
00958     pos = (high + middle + 1) >> 1;
00959     if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
00960         insertStatEntry(persistentContext, stat, txt, pos - offset);
00961 
00962     if (low != middle)
00963         chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
00964     if (high != middle + 1)
00965         chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
00966 }
00967 
00968 /*
00969  * This is written like a custom aggregate function, because the
00970  * original plan was to do just that. Unfortunately, an aggregate function
00971  * can't return a set, so that plan was abandoned. If that limitation is
00972  * lifted in the future, ts_stat could be a real aggregate function so that
00973  * you could use it like this:
00974  *
00975  *   SELECT ts_stat(vector_column) FROM vector_table;
00976  *
00977  *  where vector_column is a tsvector-type column in vector_table.
00978  */
00979 
00980 static TSVectorStat *
00981 ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
00982 {
00983     TSVector    txt = DatumGetTSVector(data);
00984     uint32      i,
00985                 nbit = 0,
00986                 offset;
00987 
00988     if (stat == NULL)
00989     {                           /* Init in first */
00990         stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
00991         stat->maxdepth = 1;
00992     }
00993 
00994     /* simple check of correctness */
00995     if (txt == NULL || txt->size == 0)
00996     {
00997         if (txt && txt != (TSVector) DatumGetPointer(data))
00998             pfree(txt);
00999         return stat;
01000     }
01001 
01002     i = txt->size - 1;
01003     for (; i > 0; i >>= 1)
01004         nbit++;
01005 
01006     nbit = 1 << nbit;
01007     offset = (nbit - txt->size) / 2;
01008 
01009     insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
01010     chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
01011 
01012     return stat;
01013 }
01014 
01015 static void
01016 ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
01017                    TSVectorStat *stat)
01018 {
01019     TupleDesc   tupdesc;
01020     MemoryContext oldcontext;
01021     StatEntry  *node;
01022 
01023     funcctx->user_fctx = (void *) stat;
01024 
01025     oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
01026 
01027     stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
01028     stat->stackpos = 0;
01029 
01030     node = stat->root;
01031     /* find leftmost value */
01032     if (node == NULL)
01033         stat->stack[stat->stackpos] = NULL;
01034     else
01035         for (;;)
01036         {
01037             stat->stack[stat->stackpos] = node;
01038             if (node->left)
01039             {
01040                 stat->stackpos++;
01041                 node = node->left;
01042             }
01043             else
01044                 break;
01045         }
01046     Assert(stat->stackpos <= stat->maxdepth);
01047 
01048     tupdesc = CreateTemplateTupleDesc(3, false);
01049     TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
01050                        TEXTOID, -1, 0);
01051     TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
01052                        INT4OID, -1, 0);
01053     TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
01054                        INT4OID, -1, 0);
01055     funcctx->tuple_desc = BlessTupleDesc(tupdesc);
01056     funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
01057 
01058     MemoryContextSwitchTo(oldcontext);
01059 }
01060 
01061 static StatEntry *
01062 walkStatEntryTree(TSVectorStat *stat)
01063 {
01064     StatEntry  *node = stat->stack[stat->stackpos];
01065 
01066     if (node == NULL)
01067         return NULL;
01068 
01069     if (node->ndoc != 0)
01070     {
01071         /* return entry itself: we already was at left sublink */
01072         return node;
01073     }
01074     else if (node->right && node->right != stat->stack[stat->stackpos + 1])
01075     {
01076         /* go on right sublink */
01077         stat->stackpos++;
01078         node = node->right;
01079 
01080         /* find most-left value */
01081         for (;;)
01082         {
01083             stat->stack[stat->stackpos] = node;
01084             if (node->left)
01085             {
01086                 stat->stackpos++;
01087                 node = node->left;
01088             }
01089             else
01090                 break;
01091         }
01092         Assert(stat->stackpos <= stat->maxdepth);
01093     }
01094     else
01095     {
01096         /* we already return all left subtree, itself and  right subtree */
01097         if (stat->stackpos == 0)
01098             return NULL;
01099 
01100         stat->stackpos--;
01101         return walkStatEntryTree(stat);
01102     }
01103 
01104     return node;
01105 }
01106 
01107 static Datum
01108 ts_process_call(FuncCallContext *funcctx)
01109 {
01110     TSVectorStat *st;
01111     StatEntry  *entry;
01112 
01113     st = (TSVectorStat *) funcctx->user_fctx;
01114 
01115     entry = walkStatEntryTree(st);
01116 
01117     if (entry != NULL)
01118     {
01119         Datum       result;
01120         char       *values[3];
01121         char        ndoc[16];
01122         char        nentry[16];
01123         HeapTuple   tuple;
01124 
01125         values[0] = palloc(entry->lenlexeme + 1);
01126         memcpy(values[0], entry->lexeme, entry->lenlexeme);
01127         (values[0])[entry->lenlexeme] = '\0';
01128         sprintf(ndoc, "%d", entry->ndoc);
01129         values[1] = ndoc;
01130         sprintf(nentry, "%d", entry->nentry);
01131         values[2] = nentry;
01132 
01133         tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
01134         result = HeapTupleGetDatum(tuple);
01135 
01136         pfree(values[0]);
01137 
01138         /* mark entry as already visited */
01139         entry->ndoc = 0;
01140 
01141         return result;
01142     }
01143 
01144     return (Datum) 0;
01145 }
01146 
01147 static TSVectorStat *
01148 ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
01149 {
01150     char       *query = text_to_cstring(txt);
01151     int         i;
01152     TSVectorStat *stat;
01153     bool        isnull;
01154     Portal      portal;
01155     SPIPlanPtr  plan;
01156 
01157     if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
01158         /* internal error */
01159         elog(ERROR, "SPI_prepare(\"%s\") failed", query);
01160 
01161     if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
01162         /* internal error */
01163         elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
01164 
01165     SPI_cursor_fetch(portal, true, 100);
01166 
01167     if (SPI_tuptable == NULL ||
01168         SPI_tuptable->tupdesc->natts != 1 ||
01169         !is_expected_type(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
01170                           TSVECTOROID))
01171         ereport(ERROR,
01172                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01173                  errmsg("ts_stat query must return one tsvector column")));
01174 
01175     stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
01176     stat->maxdepth = 1;
01177 
01178     if (ws)
01179     {
01180         char       *buf;
01181 
01182         buf = VARDATA(ws);
01183         while (buf - VARDATA(ws) < VARSIZE(ws) - VARHDRSZ)
01184         {
01185             if (pg_mblen(buf) == 1)
01186             {
01187                 switch (*buf)
01188                 {
01189                     case 'A':
01190                     case 'a':
01191                         stat->weight |= 1 << 3;
01192                         break;
01193                     case 'B':
01194                     case 'b':
01195                         stat->weight |= 1 << 2;
01196                         break;
01197                     case 'C':
01198                     case 'c':
01199                         stat->weight |= 1 << 1;
01200                         break;
01201                     case 'D':
01202                     case 'd':
01203                         stat->weight |= 1;
01204                         break;
01205                     default:
01206                         stat->weight |= 0;
01207                 }
01208             }
01209             buf += pg_mblen(buf);
01210         }
01211     }
01212 
01213     while (SPI_processed > 0)
01214     {
01215         for (i = 0; i < SPI_processed; i++)
01216         {
01217             Datum       data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
01218 
01219             if (!isnull)
01220                 stat = ts_accum(persistentContext, stat, data);
01221         }
01222 
01223         SPI_freetuptable(SPI_tuptable);
01224         SPI_cursor_fetch(portal, true, 100);
01225     }
01226 
01227     SPI_freetuptable(SPI_tuptable);
01228     SPI_cursor_close(portal);
01229     SPI_freeplan(plan);
01230     pfree(query);
01231 
01232     return stat;
01233 }
01234 
01235 Datum
01236 ts_stat1(PG_FUNCTION_ARGS)
01237 {
01238     FuncCallContext *funcctx;
01239     Datum       result;
01240 
01241     if (SRF_IS_FIRSTCALL())
01242     {
01243         TSVectorStat *stat;
01244         text       *txt = PG_GETARG_TEXT_P(0);
01245 
01246         funcctx = SRF_FIRSTCALL_INIT();
01247         SPI_connect();
01248         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
01249         PG_FREE_IF_COPY(txt, 0);
01250         ts_setup_firstcall(fcinfo, funcctx, stat);
01251         SPI_finish();
01252     }
01253 
01254     funcctx = SRF_PERCALL_SETUP();
01255     if ((result = ts_process_call(funcctx)) != (Datum) 0)
01256         SRF_RETURN_NEXT(funcctx, result);
01257     SRF_RETURN_DONE(funcctx);
01258 }
01259 
01260 Datum
01261 ts_stat2(PG_FUNCTION_ARGS)
01262 {
01263     FuncCallContext *funcctx;
01264     Datum       result;
01265 
01266     if (SRF_IS_FIRSTCALL())
01267     {
01268         TSVectorStat *stat;
01269         text       *txt = PG_GETARG_TEXT_P(0);
01270         text       *ws = PG_GETARG_TEXT_P(1);
01271 
01272         funcctx = SRF_FIRSTCALL_INIT();
01273         SPI_connect();
01274         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
01275         PG_FREE_IF_COPY(txt, 0);
01276         PG_FREE_IF_COPY(ws, 1);
01277         ts_setup_firstcall(fcinfo, funcctx, stat);
01278         SPI_finish();
01279     }
01280 
01281     funcctx = SRF_PERCALL_SETUP();
01282     if ((result = ts_process_call(funcctx)) != (Datum) 0)
01283         SRF_RETURN_NEXT(funcctx, result);
01284     SRF_RETURN_DONE(funcctx);
01285 }
01286 
01287 
01288 /*
01289  * Triggers for automatic update of a tsvector column from text column(s)
01290  *
01291  * Trigger arguments are either
01292  *      name of tsvector col, name of tsconfig to use, name(s) of text col(s)
01293  *      name of tsvector col, name of regconfig col, name(s) of text col(s)
01294  * ie, tsconfig can either be specified by name, or indirectly as the
01295  * contents of a regconfig field in the row.  If the name is used, it must
01296  * be explicitly schema-qualified.
01297  */
01298 Datum
01299 tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
01300 {
01301     return tsvector_update_trigger(fcinfo, false);
01302 }
01303 
01304 Datum
01305 tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
01306 {
01307     return tsvector_update_trigger(fcinfo, true);
01308 }
01309 
01310 static Datum
01311 tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
01312 {
01313     TriggerData *trigdata;
01314     Trigger    *trigger;
01315     Relation    rel;
01316     HeapTuple   rettuple = NULL;
01317     int         tsvector_attr_num,
01318                 i;
01319     ParsedText  prs;
01320     Datum       datum;
01321     bool        isnull;
01322     text       *txt;
01323     Oid         cfgId;
01324 
01325     /* Check call context */
01326     if (!CALLED_AS_TRIGGER(fcinfo))     /* internal error */
01327         elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
01328 
01329     trigdata = (TriggerData *) fcinfo->context;
01330     if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
01331         elog(ERROR, "tsvector_update_trigger: must be fired for row");
01332     if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
01333         elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
01334 
01335     if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
01336         rettuple = trigdata->tg_trigtuple;
01337     else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
01338         rettuple = trigdata->tg_newtuple;
01339     else
01340         elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
01341 
01342     trigger = trigdata->tg_trigger;
01343     rel = trigdata->tg_relation;
01344 
01345     if (trigger->tgnargs < 3)
01346         elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
01347 
01348     /* Find the target tsvector column */
01349     tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
01350     if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
01351         ereport(ERROR,
01352                 (errcode(ERRCODE_UNDEFINED_COLUMN),
01353                  errmsg("tsvector column \"%s\" does not exist",
01354                         trigger->tgargs[0])));
01355     if (!is_expected_type(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
01356                           TSVECTOROID))
01357         ereport(ERROR,
01358                 (errcode(ERRCODE_DATATYPE_MISMATCH),
01359                  errmsg("column \"%s\" is not of tsvector type",
01360                         trigger->tgargs[0])));
01361 
01362     /* Find the configuration to use */
01363     if (config_column)
01364     {
01365         int         config_attr_num;
01366 
01367         config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
01368         if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
01369             ereport(ERROR,
01370                     (errcode(ERRCODE_UNDEFINED_COLUMN),
01371                      errmsg("configuration column \"%s\" does not exist",
01372                             trigger->tgargs[1])));
01373         if (!is_expected_type(SPI_gettypeid(rel->rd_att, config_attr_num),
01374                               REGCONFIGOID))
01375             ereport(ERROR,
01376                     (errcode(ERRCODE_DATATYPE_MISMATCH),
01377                      errmsg("column \"%s\" is not of regconfig type",
01378                             trigger->tgargs[1])));
01379 
01380         datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
01381         if (isnull)
01382             ereport(ERROR,
01383                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
01384                      errmsg("configuration column \"%s\" must not be null",
01385                             trigger->tgargs[1])));
01386         cfgId = DatumGetObjectId(datum);
01387     }
01388     else
01389     {
01390         List       *names;
01391 
01392         names = stringToQualifiedNameList(trigger->tgargs[1]);
01393         /* require a schema so that results are not search path dependent */
01394         if (list_length(names) < 2)
01395             ereport(ERROR,
01396                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01397                      errmsg("text search configuration name \"%s\" must be schema-qualified",
01398                             trigger->tgargs[1])));
01399         cfgId = get_ts_config_oid(names, false);
01400     }
01401 
01402     /* initialize parse state */
01403     prs.lenwords = 32;
01404     prs.curwords = 0;
01405     prs.pos = 0;
01406     prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
01407 
01408     /* find all words in indexable column(s) */
01409     for (i = 2; i < trigger->tgnargs; i++)
01410     {
01411         int         numattr;
01412 
01413         numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
01414         if (numattr == SPI_ERROR_NOATTRIBUTE)
01415             ereport(ERROR,
01416                     (errcode(ERRCODE_UNDEFINED_COLUMN),
01417                      errmsg("column \"%s\" does not exist",
01418                             trigger->tgargs[i])));
01419         if (!is_text_type(SPI_gettypeid(rel->rd_att, numattr)))
01420             ereport(ERROR,
01421                     (errcode(ERRCODE_DATATYPE_MISMATCH),
01422                      errmsg("column \"%s\" is not of a character type",
01423                             trigger->tgargs[i])));
01424 
01425         datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
01426         if (isnull)
01427             continue;
01428 
01429         txt = DatumGetTextP(datum);
01430 
01431         parsetext(cfgId, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
01432 
01433         if (txt != (text *) DatumGetPointer(datum))
01434             pfree(txt);
01435     }
01436 
01437     /* make tsvector value */
01438     if (prs.curwords)
01439     {
01440         datum = PointerGetDatum(make_tsvector(&prs));
01441         rettuple = SPI_modifytuple(rel, rettuple, 1, &tsvector_attr_num,
01442                                    &datum, NULL);
01443         pfree(DatumGetPointer(datum));
01444     }
01445     else
01446     {
01447         TSVector    out = palloc(CALCDATASIZE(0, 0));
01448 
01449         SET_VARSIZE(out, CALCDATASIZE(0, 0));
01450         out->size = 0;
01451         datum = PointerGetDatum(out);
01452         rettuple = SPI_modifytuple(rel, rettuple, 1, &tsvector_attr_num,
01453                                    &datum, NULL);
01454         pfree(prs.words);
01455     }
01456 
01457     if (rettuple == NULL)       /* internal error */
01458         elog(ERROR, "tsvector_update_trigger: %d returned by SPI_modifytuple",
01459              SPI_result);
01460 
01461     return PointerGetDatum(rettuple);
01462 }