00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include "postgres.h"
00016
00017 #include "catalog/namespace.h"
00018 #include "catalog/pg_type.h"
00019 #include "commands/trigger.h"
00020 #include "executor/spi.h"
00021 #include "funcapi.h"
00022 #include "mb/pg_wchar.h"
00023 #include "miscadmin.h"
00024 #include "tsearch/ts_utils.h"
00025 #include "utils/builtins.h"
00026 #include "utils/lsyscache.h"
00027 #include "utils/rel.h"
00028
00029
00030 typedef struct
00031 {
00032 WordEntry *arrb;
00033 WordEntry *arre;
00034 char *values;
00035 char *operand;
00036 } CHKVAL;
00037
00038
00039 typedef struct StatEntry
00040 {
00041 uint32 ndoc;
00042
00043 uint32 nentry;
00044 struct StatEntry *left;
00045 struct StatEntry *right;
00046 uint32 lenlexeme;
00047 char lexeme[1];
00048 } StatEntry;
00049
00050 #define STATENTRYHDRSZ (offsetof(StatEntry, lexeme))
00051
00052 typedef struct
00053 {
00054 int32 weight;
00055
00056 uint32 maxdepth;
00057
00058 StatEntry **stack;
00059 uint32 stackpos;
00060
00061 StatEntry *root;
00062 } TSVectorStat;
00063
00064 #define STATHDRSIZE (offsetof(TSVectorStat, data))
00065
00066 static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076 static bool
00077 is_expected_type(Oid typid, Oid expected_type)
00078 {
00079 if (typid == expected_type)
00080 return true;
00081 typid = getBaseType(typid);
00082 if (typid == expected_type)
00083 return true;
00084 return false;
00085 }
00086
00087
00088 static bool
00089 is_text_type(Oid typid)
00090 {
00091
00092 if (typid == TEXTOID || typid == VARCHAROID || typid == BPCHAROID)
00093 return true;
00094
00095 typid = getBaseType(typid);
00096 if (typid == TEXTOID || typid == VARCHAROID || typid == BPCHAROID)
00097 return true;
00098 return false;
00099 }
00100
00101
00102
00103
00104
00105 static int
00106 silly_cmp_tsvector(const TSVector a, const TSVector b)
00107 {
00108 if (VARSIZE(a) < VARSIZE(b))
00109 return -1;
00110 else if (VARSIZE(a) > VARSIZE(b))
00111 return 1;
00112 else if (a->size < b->size)
00113 return -1;
00114 else if (a->size > b->size)
00115 return 1;
00116 else
00117 {
00118 WordEntry *aptr = ARRPTR(a);
00119 WordEntry *bptr = ARRPTR(b);
00120 int i = 0;
00121 int res;
00122
00123
00124 for (i = 0; i < a->size; i++)
00125 {
00126 if (aptr->haspos != bptr->haspos)
00127 {
00128 return (aptr->haspos > bptr->haspos) ? -1 : 1;
00129 }
00130 else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
00131 {
00132 return res;
00133 }
00134 else if (aptr->haspos)
00135 {
00136 WordEntryPos *ap = POSDATAPTR(a, aptr);
00137 WordEntryPos *bp = POSDATAPTR(b, bptr);
00138 int j;
00139
00140 if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
00141 return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
00142
00143 for (j = 0; j < POSDATALEN(a, aptr); j++)
00144 {
00145 if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
00146 {
00147 return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
00148 }
00149 else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
00150 {
00151 return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
00152 }
00153 ap++, bp++;
00154 }
00155 }
00156
00157 aptr++;
00158 bptr++;
00159 }
00160 }
00161
00162 return 0;
00163 }
00164
00165 #define TSVECTORCMPFUNC( type, action, ret ) \
00166 Datum \
00167 tsvector_##type(PG_FUNCTION_ARGS) \
00168 { \
00169 TSVector a = PG_GETARG_TSVECTOR(0); \
00170 TSVector b = PG_GETARG_TSVECTOR(1); \
00171 int res = silly_cmp_tsvector(a, b); \
00172 PG_FREE_IF_COPY(a,0); \
00173 PG_FREE_IF_COPY(b,1); \
00174 PG_RETURN_##ret( res action 0 ); \
00175 } \
00176 \
00177 extern int no_such_variable
00178
00179 TSVECTORCMPFUNC(lt, <, BOOL);
00180 TSVECTORCMPFUNC(le, <=, BOOL);
00181 TSVECTORCMPFUNC(eq, ==, BOOL);
00182 TSVECTORCMPFUNC(ge, >=, BOOL);
00183 TSVECTORCMPFUNC(gt, >, BOOL);
00184 TSVECTORCMPFUNC(ne, !=, BOOL);
00185 TSVECTORCMPFUNC(cmp, +, INT32);
00186
00187 Datum
00188 tsvector_strip(PG_FUNCTION_ARGS)
00189 {
00190 TSVector in = PG_GETARG_TSVECTOR(0);
00191 TSVector out;
00192 int i,
00193 len = 0;
00194 WordEntry *arrin = ARRPTR(in),
00195 *arrout;
00196 char *cur;
00197
00198 for (i = 0; i < in->size; i++)
00199 len += arrin[i].len;
00200
00201 len = CALCDATASIZE(in->size, len);
00202 out = (TSVector) palloc0(len);
00203 SET_VARSIZE(out, len);
00204 out->size = in->size;
00205 arrout = ARRPTR(out);
00206 cur = STRPTR(out);
00207 for (i = 0; i < in->size; i++)
00208 {
00209 memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
00210 arrout[i].haspos = 0;
00211 arrout[i].len = arrin[i].len;
00212 arrout[i].pos = cur - STRPTR(out);
00213 cur += arrout[i].len;
00214 }
00215
00216 PG_FREE_IF_COPY(in, 0);
00217 PG_RETURN_POINTER(out);
00218 }
00219
00220 Datum
00221 tsvector_length(PG_FUNCTION_ARGS)
00222 {
00223 TSVector in = PG_GETARG_TSVECTOR(0);
00224 int32 ret = in->size;
00225
00226 PG_FREE_IF_COPY(in, 0);
00227 PG_RETURN_INT32(ret);
00228 }
00229
00230 Datum
00231 tsvector_setweight(PG_FUNCTION_ARGS)
00232 {
00233 TSVector in = PG_GETARG_TSVECTOR(0);
00234 char cw = PG_GETARG_CHAR(1);
00235 TSVector out;
00236 int i,
00237 j;
00238 WordEntry *entry;
00239 WordEntryPos *p;
00240 int w = 0;
00241
00242 switch (cw)
00243 {
00244 case 'A':
00245 case 'a':
00246 w = 3;
00247 break;
00248 case 'B':
00249 case 'b':
00250 w = 2;
00251 break;
00252 case 'C':
00253 case 'c':
00254 w = 1;
00255 break;
00256 case 'D':
00257 case 'd':
00258 w = 0;
00259 break;
00260 default:
00261
00262 elog(ERROR, "unrecognized weight: %d", cw);
00263 }
00264
00265 out = (TSVector) palloc(VARSIZE(in));
00266 memcpy(out, in, VARSIZE(in));
00267 entry = ARRPTR(out);
00268 i = out->size;
00269 while (i--)
00270 {
00271 if ((j = POSDATALEN(out, entry)) != 0)
00272 {
00273 p = POSDATAPTR(out, entry);
00274 while (j--)
00275 {
00276 WEP_SETWEIGHT(*p, w);
00277 p++;
00278 }
00279 }
00280 entry++;
00281 }
00282
00283 PG_FREE_IF_COPY(in, 0);
00284 PG_RETURN_POINTER(out);
00285 }
00286
00287 #define compareEntry(pa, a, pb, b) \
00288 tsCompareString((pa) + (a)->pos, (a)->len, \
00289 (pb) + (b)->pos, (b)->len, \
00290 false)
00291
00292
00293
00294
00295
00296 static int32
00297 add_pos(TSVector src, WordEntry *srcptr,
00298 TSVector dest, WordEntry *destptr,
00299 int32 maxpos)
00300 {
00301 uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
00302 int i;
00303 uint16 slen = POSDATALEN(src, srcptr),
00304 startlen;
00305 WordEntryPos *spos = POSDATAPTR(src, srcptr),
00306 *dpos = POSDATAPTR(dest, destptr);
00307
00308 if (!destptr->haspos)
00309 *clen = 0;
00310
00311 startlen = *clen;
00312 for (i = 0;
00313 i < slen && *clen < MAXNUMPOS &&
00314 (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
00315 i++)
00316 {
00317 WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
00318 WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
00319 (*clen)++;
00320 }
00321
00322 if (*clen != startlen)
00323 destptr->haspos = 1;
00324 return *clen - startlen;
00325 }
00326
00327
00328 Datum
00329 tsvector_concat(PG_FUNCTION_ARGS)
00330 {
00331 TSVector in1 = PG_GETARG_TSVECTOR(0);
00332 TSVector in2 = PG_GETARG_TSVECTOR(1);
00333 TSVector out;
00334 WordEntry *ptr;
00335 WordEntry *ptr1,
00336 *ptr2;
00337 WordEntryPos *p;
00338 int maxpos = 0,
00339 i,
00340 j,
00341 i1,
00342 i2,
00343 dataoff,
00344 output_bytes,
00345 output_size;
00346 char *data,
00347 *data1,
00348 *data2;
00349
00350
00351 ptr = ARRPTR(in1);
00352 i = in1->size;
00353 while (i--)
00354 {
00355 if ((j = POSDATALEN(in1, ptr)) != 0)
00356 {
00357 p = POSDATAPTR(in1, ptr);
00358 while (j--)
00359 {
00360 if (WEP_GETPOS(*p) > maxpos)
00361 maxpos = WEP_GETPOS(*p);
00362 p++;
00363 }
00364 }
00365 ptr++;
00366 }
00367
00368 ptr1 = ARRPTR(in1);
00369 ptr2 = ARRPTR(in2);
00370 data1 = STRPTR(in1);
00371 data2 = STRPTR(in2);
00372 i1 = in1->size;
00373 i2 = in2->size;
00374
00375
00376
00377
00378
00379
00380 output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
00381
00382 out = (TSVector) palloc0(output_bytes);
00383 SET_VARSIZE(out, output_bytes);
00384
00385
00386
00387
00388
00389 out->size = in1->size + in2->size;
00390
00391 ptr = ARRPTR(out);
00392 data = STRPTR(out);
00393 dataoff = 0;
00394 while (i1 && i2)
00395 {
00396 int cmp = compareEntry(data1, ptr1, data2, ptr2);
00397
00398 if (cmp < 0)
00399 {
00400 ptr->haspos = ptr1->haspos;
00401 ptr->len = ptr1->len;
00402 memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
00403 ptr->pos = dataoff;
00404 dataoff += ptr1->len;
00405 if (ptr->haspos)
00406 {
00407 dataoff = SHORTALIGN(dataoff);
00408 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
00409 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
00410 }
00411
00412 ptr++;
00413 ptr1++;
00414 i1--;
00415 }
00416 else if (cmp > 0)
00417 {
00418 ptr->haspos = ptr2->haspos;
00419 ptr->len = ptr2->len;
00420 memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
00421 ptr->pos = dataoff;
00422 dataoff += ptr2->len;
00423 if (ptr->haspos)
00424 {
00425 int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
00426
00427 if (addlen == 0)
00428 ptr->haspos = 0;
00429 else
00430 {
00431 dataoff = SHORTALIGN(dataoff);
00432 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
00433 }
00434 }
00435
00436 ptr++;
00437 ptr2++;
00438 i2--;
00439 }
00440 else
00441 {
00442 ptr->haspos = ptr1->haspos | ptr2->haspos;
00443 ptr->len = ptr1->len;
00444 memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
00445 ptr->pos = dataoff;
00446 dataoff += ptr1->len;
00447 if (ptr->haspos)
00448 {
00449 if (ptr1->haspos)
00450 {
00451 dataoff = SHORTALIGN(dataoff);
00452 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
00453 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
00454 if (ptr2->haspos)
00455 dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
00456 }
00457 else
00458 {
00459 int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
00460
00461 if (addlen == 0)
00462 ptr->haspos = 0;
00463 else
00464 {
00465 dataoff = SHORTALIGN(dataoff);
00466 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
00467 }
00468 }
00469 }
00470
00471 ptr++;
00472 ptr1++;
00473 ptr2++;
00474 i1--;
00475 i2--;
00476 }
00477 }
00478
00479 while (i1)
00480 {
00481 ptr->haspos = ptr1->haspos;
00482 ptr->len = ptr1->len;
00483 memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
00484 ptr->pos = dataoff;
00485 dataoff += ptr1->len;
00486 if (ptr->haspos)
00487 {
00488 dataoff = SHORTALIGN(dataoff);
00489 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
00490 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
00491 }
00492
00493 ptr++;
00494 ptr1++;
00495 i1--;
00496 }
00497
00498 while (i2)
00499 {
00500 ptr->haspos = ptr2->haspos;
00501 ptr->len = ptr2->len;
00502 memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
00503 ptr->pos = dataoff;
00504 dataoff += ptr2->len;
00505 if (ptr->haspos)
00506 {
00507 int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
00508
00509 if (addlen == 0)
00510 ptr->haspos = 0;
00511 else
00512 {
00513 dataoff = SHORTALIGN(dataoff);
00514 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
00515 }
00516 }
00517
00518 ptr++;
00519 ptr2++;
00520 i2--;
00521 }
00522
00523
00524
00525
00526
00527 if (dataoff > MAXSTRPOS)
00528 ereport(ERROR,
00529 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00530 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
00531
00532
00533
00534
00535
00536 output_size = ptr - ARRPTR(out);
00537 Assert(output_size <= out->size);
00538 out->size = output_size;
00539 if (data != STRPTR(out))
00540 memmove(STRPTR(out), data, dataoff);
00541 output_bytes = CALCDATASIZE(out->size, dataoff);
00542 Assert(output_bytes <= VARSIZE(out));
00543 SET_VARSIZE(out, output_bytes);
00544
00545 PG_FREE_IF_COPY(in1, 0);
00546 PG_FREE_IF_COPY(in2, 1);
00547 PG_RETURN_POINTER(out);
00548 }
00549
00550
00551
00552
00553
00554
00555 int32
00556 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
00557 {
00558 int cmp;
00559
00560 if (lena == 0)
00561 {
00562 if (prefix)
00563 cmp = 0;
00564 else
00565 cmp = (lenb > 0) ? -1 : 0;
00566 }
00567 else if (lenb == 0)
00568 {
00569 cmp = (lena > 0) ? 1 : 0;
00570 }
00571 else
00572 {
00573 cmp = memcmp(a, b, Min(lena, lenb));
00574
00575 if (prefix)
00576 {
00577 if (cmp == 0 && lena > lenb)
00578 cmp = 1;
00579 }
00580 else if (cmp == 0 && lena != lenb)
00581 {
00582 cmp = (lena < lenb) ? -1 : 1;
00583 }
00584 }
00585
00586 return cmp;
00587 }
00588
00589
00590
00591
00592 static bool
00593 checkclass_str(CHKVAL *chkval, WordEntry *val, QueryOperand *item)
00594 {
00595 WordEntryPosVector *posvec;
00596 WordEntryPos *ptr;
00597 uint16 len;
00598
00599 posvec = (WordEntryPosVector *)
00600 (chkval->values + SHORTALIGN(val->pos + val->len));
00601
00602 len = posvec->npos;
00603 ptr = posvec->pos;
00604
00605 while (len--)
00606 {
00607 if (item->weight & (1 << WEP_GETWEIGHT(*ptr)))
00608 return true;
00609 ptr++;
00610 }
00611 return false;
00612 }
00613
00614
00615
00616
00617 static bool
00618 checkcondition_str(void *checkval, QueryOperand *val)
00619 {
00620 CHKVAL *chkval = (CHKVAL *) checkval;
00621 WordEntry *StopLow = chkval->arrb;
00622 WordEntry *StopHigh = chkval->arre;
00623 WordEntry *StopMiddle = StopHigh;
00624 int difference = -1;
00625 bool res = false;
00626
00627
00628 while (StopLow < StopHigh)
00629 {
00630 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
00631 difference = tsCompareString(chkval->operand + val->distance, val->length,
00632 chkval->values + StopMiddle->pos, StopMiddle->len,
00633 false);
00634
00635 if (difference == 0)
00636 {
00637 res = (val->weight && StopMiddle->haspos) ?
00638 checkclass_str(chkval, StopMiddle, val) : true;
00639 break;
00640 }
00641 else if (difference > 0)
00642 StopLow = StopMiddle + 1;
00643 else
00644 StopHigh = StopMiddle;
00645 }
00646
00647 if (!res && val->prefix)
00648 {
00649
00650
00651
00652
00653 if (StopLow >= StopHigh)
00654 StopMiddle = StopHigh;
00655
00656 while (res == false && StopMiddle < chkval->arre &&
00657 tsCompareString(chkval->operand + val->distance, val->length,
00658 chkval->values + StopMiddle->pos, StopMiddle->len,
00659 true) == 0)
00660 {
00661 res = (val->weight && StopMiddle->haspos) ?
00662 checkclass_str(chkval, StopMiddle, val) : true;
00663
00664 StopMiddle++;
00665 }
00666 }
00667
00668 return res;
00669 }
00670
00671
00672
00673
00674
00675
00676
00677
00678
00679
00680 bool
00681 TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
00682 bool (*chkcond) (void *checkval, QueryOperand *val))
00683 {
00684
00685 check_stack_depth();
00686
00687 if (curitem->type == QI_VAL)
00688 return chkcond(checkval, (QueryOperand *) curitem);
00689
00690 switch (curitem->qoperator.oper)
00691 {
00692 case OP_NOT:
00693 if (calcnot)
00694 return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
00695 else
00696 return true;
00697
00698 case OP_AND:
00699 if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
00700 return TS_execute(curitem + 1, checkval, calcnot, chkcond);
00701 else
00702 return false;
00703
00704 case OP_OR:
00705 if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
00706 return true;
00707 else
00708 return TS_execute(curitem + 1, checkval, calcnot, chkcond);
00709
00710 default:
00711 elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
00712 }
00713
00714
00715 return false;
00716 }
00717
00718
00719
00720
00721
00722
00723
00724
00725
00726 bool
00727 tsquery_requires_match(QueryItem *curitem)
00728 {
00729
00730 check_stack_depth();
00731
00732 if (curitem->type == QI_VAL)
00733 return true;
00734
00735 switch (curitem->qoperator.oper)
00736 {
00737 case OP_NOT:
00738
00739
00740
00741
00742
00743
00744 return false;
00745
00746 case OP_AND:
00747
00748 if (tsquery_requires_match(curitem + curitem->qoperator.left))
00749 return true;
00750 else
00751 return tsquery_requires_match(curitem + 1);
00752
00753 case OP_OR:
00754
00755 if (tsquery_requires_match(curitem + curitem->qoperator.left))
00756 return tsquery_requires_match(curitem + 1);
00757 else
00758 return false;
00759
00760 default:
00761 elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
00762 }
00763
00764
00765 return false;
00766 }
00767
00768
00769
00770
00771 Datum
00772 ts_match_qv(PG_FUNCTION_ARGS)
00773 {
00774 PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
00775 PG_GETARG_DATUM(1),
00776 PG_GETARG_DATUM(0)));
00777 }
00778
00779 Datum
00780 ts_match_vq(PG_FUNCTION_ARGS)
00781 {
00782 TSVector val = PG_GETARG_TSVECTOR(0);
00783 TSQuery query = PG_GETARG_TSQUERY(1);
00784 CHKVAL chkval;
00785 bool result;
00786
00787 if (!val->size || !query->size)
00788 {
00789 PG_FREE_IF_COPY(val, 0);
00790 PG_FREE_IF_COPY(query, 1);
00791 PG_RETURN_BOOL(false);
00792 }
00793
00794 chkval.arrb = ARRPTR(val);
00795 chkval.arre = chkval.arrb + val->size;
00796 chkval.values = STRPTR(val);
00797 chkval.operand = GETOPERAND(query);
00798 result = TS_execute(
00799 GETQUERY(query),
00800 &chkval,
00801 true,
00802 checkcondition_str
00803 );
00804
00805 PG_FREE_IF_COPY(val, 0);
00806 PG_FREE_IF_COPY(query, 1);
00807 PG_RETURN_BOOL(result);
00808 }
00809
00810 Datum
00811 ts_match_tt(PG_FUNCTION_ARGS)
00812 {
00813 TSVector vector;
00814 TSQuery query;
00815 bool res;
00816
00817 vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
00818 PG_GETARG_DATUM(0)));
00819 query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
00820 PG_GETARG_DATUM(1)));
00821
00822 res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
00823 TSVectorGetDatum(vector),
00824 TSQueryGetDatum(query)));
00825
00826 pfree(vector);
00827 pfree(query);
00828
00829 PG_RETURN_BOOL(res);
00830 }
00831
00832 Datum
00833 ts_match_tq(PG_FUNCTION_ARGS)
00834 {
00835 TSVector vector;
00836 TSQuery query = PG_GETARG_TSQUERY(1);
00837 bool res;
00838
00839 vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
00840 PG_GETARG_DATUM(0)));
00841
00842 res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
00843 TSVectorGetDatum(vector),
00844 TSQueryGetDatum(query)));
00845
00846 pfree(vector);
00847 PG_FREE_IF_COPY(query, 1);
00848
00849 PG_RETURN_BOOL(res);
00850 }
00851
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861 static int
00862 check_weight(TSVector txt, WordEntry *wptr, int8 weight)
00863 {
00864 int len = POSDATALEN(txt, wptr);
00865 int num = 0;
00866 WordEntryPos *ptr = POSDATAPTR(txt, wptr);
00867
00868 while (len--)
00869 {
00870 if (weight & (1 << WEP_GETWEIGHT(*ptr)))
00871 num++;
00872 ptr++;
00873 }
00874 return num;
00875 }
00876
00877 #define compareStatWord(a,e,t) \
00878 tsCompareString((a)->lexeme, (a)->lenlexeme, \
00879 STRPTR(t) + (e)->pos, (e)->len, \
00880 false)
00881
00882 static void
00883 insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
00884 {
00885 WordEntry *we = ARRPTR(txt) + off;
00886 StatEntry *node = stat->root,
00887 *pnode = NULL;
00888 int n,
00889 res = 0;
00890 uint32 depth = 1;
00891
00892 if (stat->weight == 0)
00893 n = (we->haspos) ? POSDATALEN(txt, we) : 1;
00894 else
00895 n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
00896
00897 if (n == 0)
00898 return;
00899
00900 while (node)
00901 {
00902 res = compareStatWord(node, we, txt);
00903
00904 if (res == 0)
00905 {
00906 break;
00907 }
00908 else
00909 {
00910 pnode = node;
00911 node = (res < 0) ? node->left : node->right;
00912 }
00913 depth++;
00914 }
00915
00916 if (depth > stat->maxdepth)
00917 stat->maxdepth = depth;
00918
00919 if (node == NULL)
00920 {
00921 node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
00922 node->left = node->right = NULL;
00923 node->ndoc = 1;
00924 node->nentry = n;
00925 node->lenlexeme = we->len;
00926 memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
00927
00928 if (pnode == NULL)
00929 {
00930 stat->root = node;
00931 }
00932 else
00933 {
00934 if (res < 0)
00935 pnode->left = node;
00936 else
00937 pnode->right = node;
00938 }
00939
00940 }
00941 else
00942 {
00943 node->ndoc++;
00944 node->nentry += n;
00945 }
00946 }
00947
00948 static void
00949 chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
00950 uint32 low, uint32 high, uint32 offset)
00951 {
00952 uint32 pos;
00953 uint32 middle = (low + high) >> 1;
00954
00955 pos = (low + middle) >> 1;
00956 if (low != middle && pos >= offset && pos - offset < txt->size)
00957 insertStatEntry(persistentContext, stat, txt, pos - offset);
00958 pos = (high + middle + 1) >> 1;
00959 if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
00960 insertStatEntry(persistentContext, stat, txt, pos - offset);
00961
00962 if (low != middle)
00963 chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
00964 if (high != middle + 1)
00965 chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
00966 }
00967
00968
00969
00970
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980 static TSVectorStat *
00981 ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
00982 {
00983 TSVector txt = DatumGetTSVector(data);
00984 uint32 i,
00985 nbit = 0,
00986 offset;
00987
00988 if (stat == NULL)
00989 {
00990 stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
00991 stat->maxdepth = 1;
00992 }
00993
00994
00995 if (txt == NULL || txt->size == 0)
00996 {
00997 if (txt && txt != (TSVector) DatumGetPointer(data))
00998 pfree(txt);
00999 return stat;
01000 }
01001
01002 i = txt->size - 1;
01003 for (; i > 0; i >>= 1)
01004 nbit++;
01005
01006 nbit = 1 << nbit;
01007 offset = (nbit - txt->size) / 2;
01008
01009 insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
01010 chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
01011
01012 return stat;
01013 }
01014
01015 static void
01016 ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
01017 TSVectorStat *stat)
01018 {
01019 TupleDesc tupdesc;
01020 MemoryContext oldcontext;
01021 StatEntry *node;
01022
01023 funcctx->user_fctx = (void *) stat;
01024
01025 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
01026
01027 stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
01028 stat->stackpos = 0;
01029
01030 node = stat->root;
01031
01032 if (node == NULL)
01033 stat->stack[stat->stackpos] = NULL;
01034 else
01035 for (;;)
01036 {
01037 stat->stack[stat->stackpos] = node;
01038 if (node->left)
01039 {
01040 stat->stackpos++;
01041 node = node->left;
01042 }
01043 else
01044 break;
01045 }
01046 Assert(stat->stackpos <= stat->maxdepth);
01047
01048 tupdesc = CreateTemplateTupleDesc(3, false);
01049 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
01050 TEXTOID, -1, 0);
01051 TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
01052 INT4OID, -1, 0);
01053 TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
01054 INT4OID, -1, 0);
01055 funcctx->tuple_desc = BlessTupleDesc(tupdesc);
01056 funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
01057
01058 MemoryContextSwitchTo(oldcontext);
01059 }
01060
01061 static StatEntry *
01062 walkStatEntryTree(TSVectorStat *stat)
01063 {
01064 StatEntry *node = stat->stack[stat->stackpos];
01065
01066 if (node == NULL)
01067 return NULL;
01068
01069 if (node->ndoc != 0)
01070 {
01071
01072 return node;
01073 }
01074 else if (node->right && node->right != stat->stack[stat->stackpos + 1])
01075 {
01076
01077 stat->stackpos++;
01078 node = node->right;
01079
01080
01081 for (;;)
01082 {
01083 stat->stack[stat->stackpos] = node;
01084 if (node->left)
01085 {
01086 stat->stackpos++;
01087 node = node->left;
01088 }
01089 else
01090 break;
01091 }
01092 Assert(stat->stackpos <= stat->maxdepth);
01093 }
01094 else
01095 {
01096
01097 if (stat->stackpos == 0)
01098 return NULL;
01099
01100 stat->stackpos--;
01101 return walkStatEntryTree(stat);
01102 }
01103
01104 return node;
01105 }
01106
01107 static Datum
01108 ts_process_call(FuncCallContext *funcctx)
01109 {
01110 TSVectorStat *st;
01111 StatEntry *entry;
01112
01113 st = (TSVectorStat *) funcctx->user_fctx;
01114
01115 entry = walkStatEntryTree(st);
01116
01117 if (entry != NULL)
01118 {
01119 Datum result;
01120 char *values[3];
01121 char ndoc[16];
01122 char nentry[16];
01123 HeapTuple tuple;
01124
01125 values[0] = palloc(entry->lenlexeme + 1);
01126 memcpy(values[0], entry->lexeme, entry->lenlexeme);
01127 (values[0])[entry->lenlexeme] = '\0';
01128 sprintf(ndoc, "%d", entry->ndoc);
01129 values[1] = ndoc;
01130 sprintf(nentry, "%d", entry->nentry);
01131 values[2] = nentry;
01132
01133 tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
01134 result = HeapTupleGetDatum(tuple);
01135
01136 pfree(values[0]);
01137
01138
01139 entry->ndoc = 0;
01140
01141 return result;
01142 }
01143
01144 return (Datum) 0;
01145 }
01146
01147 static TSVectorStat *
01148 ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
01149 {
01150 char *query = text_to_cstring(txt);
01151 int i;
01152 TSVectorStat *stat;
01153 bool isnull;
01154 Portal portal;
01155 SPIPlanPtr plan;
01156
01157 if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
01158
01159 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
01160
01161 if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
01162
01163 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
01164
01165 SPI_cursor_fetch(portal, true, 100);
01166
01167 if (SPI_tuptable == NULL ||
01168 SPI_tuptable->tupdesc->natts != 1 ||
01169 !is_expected_type(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
01170 TSVECTOROID))
01171 ereport(ERROR,
01172 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01173 errmsg("ts_stat query must return one tsvector column")));
01174
01175 stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
01176 stat->maxdepth = 1;
01177
01178 if (ws)
01179 {
01180 char *buf;
01181
01182 buf = VARDATA(ws);
01183 while (buf - VARDATA(ws) < VARSIZE(ws) - VARHDRSZ)
01184 {
01185 if (pg_mblen(buf) == 1)
01186 {
01187 switch (*buf)
01188 {
01189 case 'A':
01190 case 'a':
01191 stat->weight |= 1 << 3;
01192 break;
01193 case 'B':
01194 case 'b':
01195 stat->weight |= 1 << 2;
01196 break;
01197 case 'C':
01198 case 'c':
01199 stat->weight |= 1 << 1;
01200 break;
01201 case 'D':
01202 case 'd':
01203 stat->weight |= 1;
01204 break;
01205 default:
01206 stat->weight |= 0;
01207 }
01208 }
01209 buf += pg_mblen(buf);
01210 }
01211 }
01212
01213 while (SPI_processed > 0)
01214 {
01215 for (i = 0; i < SPI_processed; i++)
01216 {
01217 Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
01218
01219 if (!isnull)
01220 stat = ts_accum(persistentContext, stat, data);
01221 }
01222
01223 SPI_freetuptable(SPI_tuptable);
01224 SPI_cursor_fetch(portal, true, 100);
01225 }
01226
01227 SPI_freetuptable(SPI_tuptable);
01228 SPI_cursor_close(portal);
01229 SPI_freeplan(plan);
01230 pfree(query);
01231
01232 return stat;
01233 }
01234
01235 Datum
01236 ts_stat1(PG_FUNCTION_ARGS)
01237 {
01238 FuncCallContext *funcctx;
01239 Datum result;
01240
01241 if (SRF_IS_FIRSTCALL())
01242 {
01243 TSVectorStat *stat;
01244 text *txt = PG_GETARG_TEXT_P(0);
01245
01246 funcctx = SRF_FIRSTCALL_INIT();
01247 SPI_connect();
01248 stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
01249 PG_FREE_IF_COPY(txt, 0);
01250 ts_setup_firstcall(fcinfo, funcctx, stat);
01251 SPI_finish();
01252 }
01253
01254 funcctx = SRF_PERCALL_SETUP();
01255 if ((result = ts_process_call(funcctx)) != (Datum) 0)
01256 SRF_RETURN_NEXT(funcctx, result);
01257 SRF_RETURN_DONE(funcctx);
01258 }
01259
01260 Datum
01261 ts_stat2(PG_FUNCTION_ARGS)
01262 {
01263 FuncCallContext *funcctx;
01264 Datum result;
01265
01266 if (SRF_IS_FIRSTCALL())
01267 {
01268 TSVectorStat *stat;
01269 text *txt = PG_GETARG_TEXT_P(0);
01270 text *ws = PG_GETARG_TEXT_P(1);
01271
01272 funcctx = SRF_FIRSTCALL_INIT();
01273 SPI_connect();
01274 stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
01275 PG_FREE_IF_COPY(txt, 0);
01276 PG_FREE_IF_COPY(ws, 1);
01277 ts_setup_firstcall(fcinfo, funcctx, stat);
01278 SPI_finish();
01279 }
01280
01281 funcctx = SRF_PERCALL_SETUP();
01282 if ((result = ts_process_call(funcctx)) != (Datum) 0)
01283 SRF_RETURN_NEXT(funcctx, result);
01284 SRF_RETURN_DONE(funcctx);
01285 }
01286
01287
01288
01289
01290
01291
01292
01293
01294
01295
01296
01297
01298 Datum
01299 tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
01300 {
01301 return tsvector_update_trigger(fcinfo, false);
01302 }
01303
01304 Datum
01305 tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
01306 {
01307 return tsvector_update_trigger(fcinfo, true);
01308 }
01309
01310 static Datum
01311 tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
01312 {
01313 TriggerData *trigdata;
01314 Trigger *trigger;
01315 Relation rel;
01316 HeapTuple rettuple = NULL;
01317 int tsvector_attr_num,
01318 i;
01319 ParsedText prs;
01320 Datum datum;
01321 bool isnull;
01322 text *txt;
01323 Oid cfgId;
01324
01325
01326 if (!CALLED_AS_TRIGGER(fcinfo))
01327 elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
01328
01329 trigdata = (TriggerData *) fcinfo->context;
01330 if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
01331 elog(ERROR, "tsvector_update_trigger: must be fired for row");
01332 if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
01333 elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
01334
01335 if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
01336 rettuple = trigdata->tg_trigtuple;
01337 else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
01338 rettuple = trigdata->tg_newtuple;
01339 else
01340 elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
01341
01342 trigger = trigdata->tg_trigger;
01343 rel = trigdata->tg_relation;
01344
01345 if (trigger->tgnargs < 3)
01346 elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
01347
01348
01349 tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
01350 if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
01351 ereport(ERROR,
01352 (errcode(ERRCODE_UNDEFINED_COLUMN),
01353 errmsg("tsvector column \"%s\" does not exist",
01354 trigger->tgargs[0])));
01355 if (!is_expected_type(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
01356 TSVECTOROID))
01357 ereport(ERROR,
01358 (errcode(ERRCODE_DATATYPE_MISMATCH),
01359 errmsg("column \"%s\" is not of tsvector type",
01360 trigger->tgargs[0])));
01361
01362
01363 if (config_column)
01364 {
01365 int config_attr_num;
01366
01367 config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
01368 if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
01369 ereport(ERROR,
01370 (errcode(ERRCODE_UNDEFINED_COLUMN),
01371 errmsg("configuration column \"%s\" does not exist",
01372 trigger->tgargs[1])));
01373 if (!is_expected_type(SPI_gettypeid(rel->rd_att, config_attr_num),
01374 REGCONFIGOID))
01375 ereport(ERROR,
01376 (errcode(ERRCODE_DATATYPE_MISMATCH),
01377 errmsg("column \"%s\" is not of regconfig type",
01378 trigger->tgargs[1])));
01379
01380 datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
01381 if (isnull)
01382 ereport(ERROR,
01383 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
01384 errmsg("configuration column \"%s\" must not be null",
01385 trigger->tgargs[1])));
01386 cfgId = DatumGetObjectId(datum);
01387 }
01388 else
01389 {
01390 List *names;
01391
01392 names = stringToQualifiedNameList(trigger->tgargs[1]);
01393
01394 if (list_length(names) < 2)
01395 ereport(ERROR,
01396 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01397 errmsg("text search configuration name \"%s\" must be schema-qualified",
01398 trigger->tgargs[1])));
01399 cfgId = get_ts_config_oid(names, false);
01400 }
01401
01402
01403 prs.lenwords = 32;
01404 prs.curwords = 0;
01405 prs.pos = 0;
01406 prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
01407
01408
01409 for (i = 2; i < trigger->tgnargs; i++)
01410 {
01411 int numattr;
01412
01413 numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
01414 if (numattr == SPI_ERROR_NOATTRIBUTE)
01415 ereport(ERROR,
01416 (errcode(ERRCODE_UNDEFINED_COLUMN),
01417 errmsg("column \"%s\" does not exist",
01418 trigger->tgargs[i])));
01419 if (!is_text_type(SPI_gettypeid(rel->rd_att, numattr)))
01420 ereport(ERROR,
01421 (errcode(ERRCODE_DATATYPE_MISMATCH),
01422 errmsg("column \"%s\" is not of a character type",
01423 trigger->tgargs[i])));
01424
01425 datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
01426 if (isnull)
01427 continue;
01428
01429 txt = DatumGetTextP(datum);
01430
01431 parsetext(cfgId, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
01432
01433 if (txt != (text *) DatumGetPointer(datum))
01434 pfree(txt);
01435 }
01436
01437
01438 if (prs.curwords)
01439 {
01440 datum = PointerGetDatum(make_tsvector(&prs));
01441 rettuple = SPI_modifytuple(rel, rettuple, 1, &tsvector_attr_num,
01442 &datum, NULL);
01443 pfree(DatumGetPointer(datum));
01444 }
01445 else
01446 {
01447 TSVector out = palloc(CALCDATASIZE(0, 0));
01448
01449 SET_VARSIZE(out, CALCDATASIZE(0, 0));
01450 out->size = 0;
01451 datum = PointerGetDatum(out);
01452 rettuple = SPI_modifytuple(rel, rettuple, 1, &tsvector_attr_num,
01453 &datum, NULL);
01454 pfree(prs.words);
01455 }
01456
01457 if (rettuple == NULL)
01458 elog(ERROR, "tsvector_update_trigger: %d returned by SPI_modifytuple",
01459 SPI_result);
01460
01461 return PointerGetDatum(rettuple);
01462 }