00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include "postgres.h"
00015
00016 #include "catalog/namespace.h"
00017 #include "commands/defrem.h"
00018 #include "tsearch/ts_cache.h"
00019 #include "tsearch/ts_locale.h"
00020 #include "tsearch/ts_utils.h"
00021 #include "utils/builtins.h"
00022
00023
00024
00025
00026
00027 #define DT_USEASIS 0x1000
00028
00029 typedef struct LexemeInfo
00030 {
00031 uint16 idsubst;
00032 uint16 posinsubst;
00033 uint16 tnvariant;
00034 struct LexemeInfo *nextentry;
00035 struct LexemeInfo *nextvariant;
00036 } LexemeInfo;
00037
00038 typedef struct
00039 {
00040 char *lexeme;
00041 LexemeInfo *entries;
00042 } TheLexeme;
00043
00044 typedef struct
00045 {
00046 uint16 lastlexeme;
00047 uint16 reslen;
00048 TSLexeme *res;
00049 } TheSubstitute;
00050
00051 typedef struct
00052 {
00053
00054 Oid subdictOid;
00055 TSDictionaryCacheEntry *subdict;
00056
00057
00058 TheLexeme *wrds;
00059 int nwrds;
00060 int ntwrds;
00061
00062
00063
00064
00065 TheSubstitute *subst;
00066 int nsubst;
00067 } DictThesaurus;
00068
00069
00070 static void
00071 newLexeme(DictThesaurus *d, char *b, char *e, uint16 idsubst, uint16 posinsubst)
00072 {
00073 TheLexeme *ptr;
00074
00075 if (d->nwrds >= d->ntwrds)
00076 {
00077 if (d->ntwrds == 0)
00078 {
00079 d->ntwrds = 16;
00080 d->wrds = (TheLexeme *) palloc(sizeof(TheLexeme) * d->ntwrds);
00081 }
00082 else
00083 {
00084 d->ntwrds *= 2;
00085 d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->ntwrds);
00086 }
00087 }
00088
00089 ptr = d->wrds + d->nwrds;
00090 d->nwrds++;
00091
00092 ptr->lexeme = palloc(e - b + 1);
00093
00094 memcpy(ptr->lexeme, b, e - b);
00095 ptr->lexeme[e - b] = '\0';
00096
00097 ptr->entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
00098
00099 ptr->entries->nextentry = NULL;
00100 ptr->entries->idsubst = idsubst;
00101 ptr->entries->posinsubst = posinsubst;
00102 }
00103
00104 static void
00105 addWrd(DictThesaurus *d, char *b, char *e, uint16 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
00106 {
00107 static int nres = 0;
00108 static int ntres = 0;
00109 TheSubstitute *ptr;
00110
00111 if (nwrd == 0)
00112 {
00113 nres = ntres = 0;
00114
00115 if (idsubst >= d->nsubst)
00116 {
00117 if (d->nsubst == 0)
00118 {
00119 d->nsubst = 16;
00120 d->subst = (TheSubstitute *) palloc(sizeof(TheSubstitute) * d->nsubst);
00121 }
00122 else
00123 {
00124 d->nsubst *= 2;
00125 d->subst = (TheSubstitute *) repalloc(d->subst, sizeof(TheSubstitute) * d->nsubst);
00126 }
00127 }
00128 }
00129
00130 ptr = d->subst + idsubst;
00131
00132 ptr->lastlexeme = posinsubst - 1;
00133
00134 if (nres + 1 >= ntres)
00135 {
00136 if (ntres == 0)
00137 {
00138 ntres = 2;
00139 ptr->res = (TSLexeme *) palloc(sizeof(TSLexeme) * ntres);
00140 }
00141 else
00142 {
00143 ntres *= 2;
00144 ptr->res = (TSLexeme *) repalloc(ptr->res, sizeof(TSLexeme) * ntres);
00145 }
00146
00147 }
00148
00149 ptr->res[nres].lexeme = palloc(e - b + 1);
00150 memcpy(ptr->res[nres].lexeme, b, e - b);
00151 ptr->res[nres].lexeme[e - b] = '\0';
00152
00153 ptr->res[nres].nvariant = nwrd;
00154 if (useasis)
00155 ptr->res[nres].flags = DT_USEASIS;
00156 else
00157 ptr->res[nres].flags = 0;
00158
00159 ptr->res[++nres].lexeme = NULL;
00160 }
00161
00162 #define TR_WAITLEX 1
00163 #define TR_INLEX 2
00164 #define TR_WAITSUBS 3
00165 #define TR_INSUBS 4
00166
00167 static void
00168 thesaurusRead(char *filename, DictThesaurus *d)
00169 {
00170 tsearch_readline_state trst;
00171 uint16 idsubst = 0;
00172 bool useasis = false;
00173 char *line;
00174
00175 filename = get_tsearch_config_filename(filename, "ths");
00176 if (!tsearch_readline_begin(&trst, filename))
00177 ereport(ERROR,
00178 (errcode(ERRCODE_CONFIG_FILE_ERROR),
00179 errmsg("could not open thesaurus file \"%s\": %m",
00180 filename)));
00181
00182 while ((line = tsearch_readline(&trst)) != NULL)
00183 {
00184 char *ptr;
00185 int state = TR_WAITLEX;
00186 char *beginwrd = NULL;
00187 uint16 posinsubst = 0;
00188 uint16 nwrd = 0;
00189
00190 ptr = line;
00191
00192
00193 while (*ptr && t_isspace(ptr))
00194 ptr += pg_mblen(ptr);
00195
00196 if (t_iseq(ptr, '#') || *ptr == '\0' ||
00197 t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
00198 {
00199 pfree(line);
00200 continue;
00201 }
00202
00203 while (*ptr)
00204 {
00205 if (state == TR_WAITLEX)
00206 {
00207 if (t_iseq(ptr, ':'))
00208 {
00209 if (posinsubst == 0)
00210 ereport(ERROR,
00211 (errcode(ERRCODE_CONFIG_FILE_ERROR),
00212 errmsg("unexpected delimiter")));
00213 state = TR_WAITSUBS;
00214 }
00215 else if (!t_isspace(ptr))
00216 {
00217 beginwrd = ptr;
00218 state = TR_INLEX;
00219 }
00220 }
00221 else if (state == TR_INLEX)
00222 {
00223 if (t_iseq(ptr, ':'))
00224 {
00225 newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
00226 state = TR_WAITSUBS;
00227 }
00228 else if (t_isspace(ptr))
00229 {
00230 newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
00231 state = TR_WAITLEX;
00232 }
00233 }
00234 else if (state == TR_WAITSUBS)
00235 {
00236 if (t_iseq(ptr, '*'))
00237 {
00238 useasis = true;
00239 state = TR_INSUBS;
00240 beginwrd = ptr + pg_mblen(ptr);
00241 }
00242 else if (t_iseq(ptr, '\\'))
00243 {
00244 useasis = false;
00245 state = TR_INSUBS;
00246 beginwrd = ptr + pg_mblen(ptr);
00247 }
00248 else if (!t_isspace(ptr))
00249 {
00250 useasis = false;
00251 beginwrd = ptr;
00252 state = TR_INSUBS;
00253 }
00254 }
00255 else if (state == TR_INSUBS)
00256 {
00257 if (t_isspace(ptr))
00258 {
00259 if (ptr == beginwrd)
00260 ereport(ERROR,
00261 (errcode(ERRCODE_CONFIG_FILE_ERROR),
00262 errmsg("unexpected end of line or lexeme")));
00263 addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
00264 state = TR_WAITSUBS;
00265 }
00266 }
00267 else
00268 elog(ERROR, "unrecognized thesaurus state: %d", state);
00269
00270 ptr += pg_mblen(ptr);
00271 }
00272
00273 if (state == TR_INSUBS)
00274 {
00275 if (ptr == beginwrd)
00276 ereport(ERROR,
00277 (errcode(ERRCODE_CONFIG_FILE_ERROR),
00278 errmsg("unexpected end of line or lexeme")));
00279 addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
00280 }
00281
00282 idsubst++;
00283
00284 if (!(nwrd && posinsubst))
00285 ereport(ERROR,
00286 (errcode(ERRCODE_CONFIG_FILE_ERROR),
00287 errmsg("unexpected end of line")));
00288
00289 pfree(line);
00290 }
00291
00292 d->nsubst = idsubst;
00293
00294 tsearch_readline_end(&trst);
00295 }
00296
00297 static TheLexeme *
00298 addCompiledLexeme(TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo *src, uint16 tnvariant)
00299 {
00300 if (*nnw >= *tnm)
00301 {
00302 *tnm *= 2;
00303 newwrds = (TheLexeme *) repalloc(newwrds, sizeof(TheLexeme) * *tnm);
00304 }
00305
00306 newwrds[*nnw].entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
00307
00308 if (lexeme && lexeme->lexeme)
00309 {
00310 newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme);
00311 newwrds[*nnw].entries->tnvariant = tnvariant;
00312 }
00313 else
00314 {
00315 newwrds[*nnw].lexeme = NULL;
00316 newwrds[*nnw].entries->tnvariant = 1;
00317 }
00318
00319 newwrds[*nnw].entries->idsubst = src->idsubst;
00320 newwrds[*nnw].entries->posinsubst = src->posinsubst;
00321
00322 newwrds[*nnw].entries->nextentry = NULL;
00323
00324 (*nnw)++;
00325 return newwrds;
00326 }
00327
00328 static int
00329 cmpLexemeInfo(LexemeInfo *a, LexemeInfo *b)
00330 {
00331 if (a == NULL || b == NULL)
00332 return 0;
00333
00334 if (a->idsubst == b->idsubst)
00335 {
00336 if (a->posinsubst == b->posinsubst)
00337 {
00338 if (a->tnvariant == b->tnvariant)
00339 return 0;
00340
00341 return (a->tnvariant > b->tnvariant) ? 1 : -1;
00342 }
00343
00344 return (a->posinsubst > b->posinsubst) ? 1 : -1;
00345 }
00346
00347 return (a->idsubst > b->idsubst) ? 1 : -1;
00348 }
00349
00350 static int
00351 cmpLexeme(const TheLexeme *a, const TheLexeme *b)
00352 {
00353 if (a->lexeme == NULL)
00354 {
00355 if (b->lexeme == NULL)
00356 return 0;
00357 else
00358 return 1;
00359 }
00360 else if (b->lexeme == NULL)
00361 return -1;
00362
00363 return strcmp(a->lexeme, b->lexeme);
00364 }
00365
00366 static int
00367 cmpLexemeQ(const void *a, const void *b)
00368 {
00369 return cmpLexeme((const TheLexeme *) a, (const TheLexeme *) b);
00370 }
00371
00372 static int
00373 cmpTheLexeme(const void *a, const void *b)
00374 {
00375 const TheLexeme *la = (const TheLexeme *) a;
00376 const TheLexeme *lb = (const TheLexeme *) b;
00377 int res;
00378
00379 if ((res = cmpLexeme(la, lb)) != 0)
00380 return res;
00381
00382 return -cmpLexemeInfo(la->entries, lb->entries);
00383 }
00384
00385 static void
00386 compileTheLexeme(DictThesaurus *d)
00387 {
00388 int i,
00389 nnw = 0,
00390 tnm = 16;
00391 TheLexeme *newwrds = (TheLexeme *) palloc(sizeof(TheLexeme) * tnm),
00392 *ptrwrds;
00393
00394 for (i = 0; i < d->nwrds; i++)
00395 {
00396 TSLexeme *ptr;
00397
00398 if (strcmp(d->wrds[i].lexeme, "?") == 0)
00399 newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
00400 else
00401 {
00402 ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
00403 PointerGetDatum(d->subdict->dictData),
00404 PointerGetDatum(d->wrds[i].lexeme),
00405 Int32GetDatum(strlen(d->wrds[i].lexeme)),
00406 PointerGetDatum(NULL)));
00407
00408 if (!ptr)
00409 ereport(ERROR,
00410 (errcode(ERRCODE_CONFIG_FILE_ERROR),
00411 errmsg("thesaurus sample word \"%s\" isn't recognized by subdictionary (rule %d)",
00412 d->wrds[i].lexeme,
00413 d->wrds[i].entries->idsubst + 1)));
00414 else if (!(ptr->lexeme))
00415 ereport(ERROR,
00416 (errcode(ERRCODE_CONFIG_FILE_ERROR),
00417 errmsg("thesaurus sample word \"%s\" is a stop word (rule %d)",
00418 d->wrds[i].lexeme,
00419 d->wrds[i].entries->idsubst + 1),
00420 errhint("Use \"?\" to represent a stop word within a sample phrase.")));
00421 else
00422 {
00423 while (ptr->lexeme)
00424 {
00425 TSLexeme *remptr = ptr + 1;
00426 int tnvar = 1;
00427 int curvar = ptr->nvariant;
00428
00429
00430 while (remptr->lexeme)
00431 {
00432 if (remptr->nvariant != (remptr - 1)->nvariant)
00433 break;
00434 tnvar++;
00435 remptr++;
00436 }
00437
00438 remptr = ptr;
00439 while (remptr->lexeme && remptr->nvariant == curvar)
00440 {
00441 newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar);
00442 remptr++;
00443 }
00444
00445 ptr = remptr;
00446 }
00447 }
00448 }
00449
00450 pfree(d->wrds[i].lexeme);
00451 pfree(d->wrds[i].entries);
00452 }
00453
00454 if (d->wrds)
00455 pfree(d->wrds);
00456 d->wrds = newwrds;
00457 d->nwrds = nnw;
00458 d->ntwrds = tnm;
00459
00460 if (d->nwrds > 1)
00461 {
00462 qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme);
00463
00464
00465 newwrds = d->wrds;
00466 ptrwrds = d->wrds + 1;
00467 while (ptrwrds - d->wrds < d->nwrds)
00468 {
00469 if (cmpLexeme(ptrwrds, newwrds) == 0)
00470 {
00471 if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries))
00472 {
00473 ptrwrds->entries->nextentry = newwrds->entries;
00474 newwrds->entries = ptrwrds->entries;
00475 }
00476 else
00477 pfree(ptrwrds->entries);
00478
00479 if (ptrwrds->lexeme)
00480 pfree(ptrwrds->lexeme);
00481 }
00482 else
00483 {
00484 newwrds++;
00485 *newwrds = *ptrwrds;
00486 }
00487
00488 ptrwrds++;
00489 }
00490
00491 d->nwrds = newwrds - d->wrds + 1;
00492 d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->nwrds);
00493 }
00494 }
00495
00496 static void
00497 compileTheSubstitute(DictThesaurus *d)
00498 {
00499 int i;
00500
00501 for (i = 0; i < d->nsubst; i++)
00502 {
00503 TSLexeme *rem = d->subst[i].res,
00504 *outptr,
00505 *inptr;
00506 int n = 2;
00507
00508 outptr = d->subst[i].res = (TSLexeme *) palloc(sizeof(TSLexeme) * n);
00509 outptr->lexeme = NULL;
00510 inptr = rem;
00511
00512 while (inptr && inptr->lexeme)
00513 {
00514 TSLexeme *lexized,
00515 tmplex[2];
00516
00517 if (inptr->flags & DT_USEASIS)
00518 {
00519 tmplex[0] = *inptr;
00520 tmplex[0].flags = 0;
00521 tmplex[1].lexeme = NULL;
00522 lexized = tmplex;
00523 }
00524 else
00525 {
00526 lexized = (TSLexeme *) DatumGetPointer(
00527 FunctionCall4(
00528 &(d->subdict->lexize),
00529 PointerGetDatum(d->subdict->dictData),
00530 PointerGetDatum(inptr->lexeme),
00531 Int32GetDatum(strlen(inptr->lexeme)),
00532 PointerGetDatum(NULL)
00533 )
00534 );
00535 }
00536
00537 if (lexized && lexized->lexeme)
00538 {
00539 int toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1;
00540
00541 while (lexized->lexeme)
00542 {
00543 if (outptr - d->subst[i].res + 1 >= n)
00544 {
00545 int diff = outptr - d->subst[i].res;
00546
00547 n *= 2;
00548 d->subst[i].res = (TSLexeme *) repalloc(d->subst[i].res, sizeof(TSLexeme) * n);
00549 outptr = d->subst[i].res + diff;
00550 }
00551
00552 *outptr = *lexized;
00553 outptr->lexeme = pstrdup(lexized->lexeme);
00554
00555 outptr++;
00556 lexized++;
00557 }
00558
00559 if (toset > 0)
00560 d->subst[i].res[toset].flags |= TSL_ADDPOS;
00561 }
00562 else if (lexized)
00563 {
00564 ereport(ERROR,
00565 (errcode(ERRCODE_CONFIG_FILE_ERROR),
00566 errmsg("thesaurus substitute word \"%s\" is a stop word (rule %d)",
00567 inptr->lexeme, i + 1)));
00568 }
00569 else
00570 {
00571 ereport(ERROR,
00572 (errcode(ERRCODE_CONFIG_FILE_ERROR),
00573 errmsg("thesaurus substitute word \"%s\" isn't recognized by subdictionary (rule %d)",
00574 inptr->lexeme, i + 1)));
00575 }
00576
00577 if (inptr->lexeme)
00578 pfree(inptr->lexeme);
00579 inptr++;
00580 }
00581
00582 if (outptr == d->subst[i].res)
00583 ereport(ERROR,
00584 (errcode(ERRCODE_CONFIG_FILE_ERROR),
00585 errmsg("thesaurus substitute phrase is empty (rule %d)",
00586 i + 1)));
00587
00588 d->subst[i].reslen = outptr - d->subst[i].res;
00589
00590 pfree(rem);
00591 }
00592 }
00593
00594 Datum
00595 thesaurus_init(PG_FUNCTION_ARGS)
00596 {
00597 List *dictoptions = (List *) PG_GETARG_POINTER(0);
00598 DictThesaurus *d;
00599 char *subdictname = NULL;
00600 bool fileloaded = false;
00601 ListCell *l;
00602
00603 d = (DictThesaurus *) palloc0(sizeof(DictThesaurus));
00604
00605 foreach(l, dictoptions)
00606 {
00607 DefElem *defel = (DefElem *) lfirst(l);
00608
00609 if (pg_strcasecmp("DictFile", defel->defname) == 0)
00610 {
00611 if (fileloaded)
00612 ereport(ERROR,
00613 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00614 errmsg("multiple DictFile parameters")));
00615 thesaurusRead(defGetString(defel), d);
00616 fileloaded = true;
00617 }
00618 else if (pg_strcasecmp("Dictionary", defel->defname) == 0)
00619 {
00620 if (subdictname)
00621 ereport(ERROR,
00622 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00623 errmsg("multiple Dictionary parameters")));
00624 subdictname = pstrdup(defGetString(defel));
00625 }
00626 else
00627 {
00628 ereport(ERROR,
00629 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00630 errmsg("unrecognized Thesaurus parameter: \"%s\"",
00631 defel->defname)));
00632 }
00633 }
00634
00635 if (!fileloaded)
00636 ereport(ERROR,
00637 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00638 errmsg("missing DictFile parameter")));
00639 if (!subdictname)
00640 ereport(ERROR,
00641 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00642 errmsg("missing Dictionary parameter")));
00643
00644 d->subdictOid = get_ts_dict_oid(stringToQualifiedNameList(subdictname), false);
00645 d->subdict = lookup_ts_dictionary_cache(d->subdictOid);
00646
00647 compileTheLexeme(d);
00648 compileTheSubstitute(d);
00649
00650 PG_RETURN_POINTER(d);
00651 }
00652
00653 static LexemeInfo *
00654 findTheLexeme(DictThesaurus *d, char *lexeme)
00655 {
00656 TheLexeme key,
00657 *res;
00658
00659 if (d->nwrds == 0)
00660 return NULL;
00661
00662 key.lexeme = lexeme;
00663 key.entries = NULL;
00664
00665 res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);
00666
00667 if (res == NULL)
00668 return NULL;
00669 return res->entries;
00670 }
00671
00672 static bool
00673 matchIdSubst(LexemeInfo *stored, uint16 idsubst)
00674 {
00675 bool res = true;
00676
00677 if (stored)
00678 {
00679 res = false;
00680
00681 for (; stored; stored = stored->nextvariant)
00682 if (stored->idsubst == idsubst)
00683 {
00684 res = true;
00685 break;
00686 }
00687 }
00688
00689 return res;
00690 }
00691
00692 static LexemeInfo *
00693 findVariant(LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn)
00694 {
00695 for (;;)
00696 {
00697 int i;
00698 LexemeInfo *ptr = newin[0];
00699
00700 for (i = 0; i < newn; i++)
00701 {
00702 while (newin[i] && newin[i]->idsubst < ptr->idsubst)
00703 newin[i] = newin[i]->nextentry;
00704
00705 if (newin[i] == NULL)
00706 return in;
00707
00708 if (newin[i]->idsubst > ptr->idsubst)
00709 {
00710 ptr = newin[i];
00711 i = -1;
00712 continue;
00713 }
00714
00715 while (newin[i]->idsubst == ptr->idsubst)
00716 {
00717 if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn)
00718 {
00719 ptr = newin[i];
00720 break;
00721 }
00722
00723 newin[i] = newin[i]->nextentry;
00724 if (newin[i] == NULL)
00725 return in;
00726 }
00727
00728 if (newin[i]->idsubst != ptr->idsubst)
00729 {
00730 ptr = newin[i];
00731 i = -1;
00732 continue;
00733 }
00734 }
00735
00736 if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst)))
00737 {
00738
00739 ptr->nextvariant = in;
00740 in = ptr;
00741 }
00742
00743
00744 for (i = 0; i < newn; i++)
00745 newin[i] = newin[i]->nextentry;
00746 }
00747 }
00748
00749 static TSLexeme *
00750 copyTSLexeme(TheSubstitute *ts)
00751 {
00752 TSLexeme *res;
00753 uint16 i;
00754
00755 res = (TSLexeme *) palloc(sizeof(TSLexeme) * (ts->reslen + 1));
00756 for (i = 0; i < ts->reslen; i++)
00757 {
00758 res[i] = ts->res[i];
00759 res[i].lexeme = pstrdup(ts->res[i].lexeme);
00760 }
00761
00762 res[ts->reslen].lexeme = NULL;
00763
00764 return res;
00765 }
00766
00767 static TSLexeme *
00768 checkMatch(DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres)
00769 {
00770 *moreres = false;
00771 while (info)
00772 {
00773 Assert(info->idsubst < d->nsubst);
00774 if (info->nextvariant)
00775 *moreres = true;
00776 if (d->subst[info->idsubst].lastlexeme == curpos)
00777 return copyTSLexeme(d->subst + info->idsubst);
00778 info = info->nextvariant;
00779 }
00780
00781 return NULL;
00782 }
00783
00784 Datum
00785 thesaurus_lexize(PG_FUNCTION_ARGS)
00786 {
00787 DictThesaurus *d = (DictThesaurus *) PG_GETARG_POINTER(0);
00788 DictSubState *dstate = (DictSubState *) PG_GETARG_POINTER(3);
00789 TSLexeme *res = NULL;
00790 LexemeInfo *stored,
00791 *info = NULL;
00792 uint16 curpos = 0;
00793 bool moreres = false;
00794
00795 if (PG_NARGS() != 4 || dstate == NULL)
00796 elog(ERROR, "forbidden call of thesaurus or nested call");
00797
00798 if (dstate->isend)
00799 PG_RETURN_POINTER(NULL);
00800 stored = (LexemeInfo *) dstate->private_state;
00801
00802 if (stored)
00803 curpos = stored->posinsubst + 1;
00804
00805 if (!d->subdict->isvalid)
00806 d->subdict = lookup_ts_dictionary_cache(d->subdictOid);
00807
00808 res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
00809 PointerGetDatum(d->subdict->dictData),
00810 PG_GETARG_DATUM(1),
00811 PG_GETARG_DATUM(2),
00812 PointerGetDatum(NULL)));
00813
00814 if (res && res->lexeme)
00815 {
00816 TSLexeme *ptr = res,
00817 *basevar;
00818
00819 while (ptr->lexeme)
00820 {
00821 uint16 nv = ptr->nvariant;
00822 uint16 i,
00823 nlex = 0;
00824 LexemeInfo **infos;
00825
00826 basevar = ptr;
00827 while (ptr->lexeme && nv == ptr->nvariant)
00828 {
00829 nlex++;
00830 ptr++;
00831 }
00832
00833 infos = (LexemeInfo **) palloc(sizeof(LexemeInfo *) * nlex);
00834 for (i = 0; i < nlex; i++)
00835 if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL)
00836 break;
00837
00838 if (i < nlex)
00839 {
00840
00841 pfree(infos);
00842 continue;
00843 }
00844
00845 info = findVariant(info, stored, curpos, infos, nlex);
00846 }
00847 }
00848 else if (res)
00849 {
00850 LexemeInfo *infos = findTheLexeme(d, NULL);
00851
00852 info = findVariant(NULL, stored, curpos, &infos, 1);
00853 }
00854 else
00855 {
00856 info = NULL;
00857 }
00858
00859 dstate->private_state = (void *) info;
00860
00861 if (!info)
00862 {
00863 dstate->getnext = false;
00864 PG_RETURN_POINTER(NULL);
00865 }
00866
00867 if ((res = checkMatch(d, info, curpos, &moreres)) != NULL)
00868 {
00869 dstate->getnext = moreres;
00870 PG_RETURN_POINTER(res);
00871 }
00872
00873 dstate->getnext = true;
00874
00875 PG_RETURN_POINTER(NULL);
00876 }