#include "postgres.h"
#include "catalog/namespace.h"
#include "commands/defrem.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
Go to the source code of this file.
Data Structures | |
struct | LexemeInfo |
struct | TheLexeme |
struct | TheSubstitute |
struct | DictThesaurus |
Defines | |
#define | DT_USEASIS 0x1000 |
#define | TR_WAITLEX 1 |
#define | TR_INLEX 2 |
#define | TR_WAITSUBS 3 |
#define | TR_INSUBS 4 |
Typedefs | |
typedef struct LexemeInfo | LexemeInfo |
Functions | |
static void | newLexeme (DictThesaurus *d, char *b, char *e, uint16 idsubst, uint16 posinsubst) |
static void | addWrd (DictThesaurus *d, char *b, char *e, uint16 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis) |
static void | thesaurusRead (char *filename, DictThesaurus *d) |
static TheLexeme * | addCompiledLexeme (TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo *src, uint16 tnvariant) |
static int | cmpLexemeInfo (LexemeInfo *a, LexemeInfo *b) |
static int | cmpLexeme (const TheLexeme *a, const TheLexeme *b) |
static int | cmpLexemeQ (const void *a, const void *b) |
static int | cmpTheLexeme (const void *a, const void *b) |
static void | compileTheLexeme (DictThesaurus *d) |
static void | compileTheSubstitute (DictThesaurus *d) |
Datum | thesaurus_init (PG_FUNCTION_ARGS) |
static LexemeInfo * | findTheLexeme (DictThesaurus *d, char *lexeme) |
static bool | matchIdSubst (LexemeInfo *stored, uint16 idsubst) |
static LexemeInfo * | findVariant (LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn) |
static TSLexeme * | copyTSLexeme (TheSubstitute *ts) |
static TSLexeme * | checkMatch (DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres) |
Datum | thesaurus_lexize (PG_FUNCTION_ARGS) |
#define DT_USEASIS 0x1000 |
Definition at line 27 of file dict_thesaurus.c.
Referenced by compileTheSubstitute().
#define TR_INLEX 2 |
Definition at line 163 of file dict_thesaurus.c.
Referenced by thesaurusRead().
#define TR_INSUBS 4 |
Definition at line 165 of file dict_thesaurus.c.
Referenced by thesaurusRead().
#define TR_WAITLEX 1 |
Definition at line 162 of file dict_thesaurus.c.
Referenced by thesaurusRead().
#define TR_WAITSUBS 3 |
Definition at line 164 of file dict_thesaurus.c.
Referenced by thesaurusRead().
typedef struct LexemeInfo LexemeInfo |
static TheLexeme* addCompiledLexeme | ( | TheLexeme * | newwrds, | |
int * | nnw, | |||
int * | tnm, | |||
TSLexeme * | lexeme, | |||
LexemeInfo * | src, | |||
uint16 | tnvariant | |||
) | [static] |
Definition at line 298 of file dict_thesaurus.c.
References TheLexeme::entries, LexemeInfo::idsubst, TheLexeme::lexeme, TSLexeme::lexeme, LexemeInfo::nextentry, palloc(), LexemeInfo::posinsubst, pstrdup(), repalloc(), and LexemeInfo::tnvariant.
Referenced by compileTheLexeme().
{ if (*nnw >= *tnm) { *tnm *= 2; newwrds = (TheLexeme *) repalloc(newwrds, sizeof(TheLexeme) * *tnm); } newwrds[*nnw].entries = (LexemeInfo *) palloc(sizeof(LexemeInfo)); if (lexeme && lexeme->lexeme) { newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme); newwrds[*nnw].entries->tnvariant = tnvariant; } else { newwrds[*nnw].lexeme = NULL; newwrds[*nnw].entries->tnvariant = 1; } newwrds[*nnw].entries->idsubst = src->idsubst; newwrds[*nnw].entries->posinsubst = src->posinsubst; newwrds[*nnw].entries->nextentry = NULL; (*nnw)++; return newwrds; }
static void addWrd | ( | DictThesaurus * | d, | |
char * | b, | |||
char * | e, | |||
uint16 | idsubst, | |||
uint16 | nwrd, | |||
uint16 | posinsubst, | |||
bool | useasis | |||
) | [static] |
Definition at line 105 of file dict_thesaurus.c.
References TSLexeme::flags, TheSubstitute::lastlexeme, TSLexeme::lexeme, DictThesaurus::nsubst, TSLexeme::nvariant, palloc(), repalloc(), TheSubstitute::res, and DictThesaurus::subst.
Referenced by thesaurusRead().
{ static int nres = 0; static int ntres = 0; TheSubstitute *ptr; if (nwrd == 0) { nres = ntres = 0; if (idsubst >= d->nsubst) { if (d->nsubst == 0) { d->nsubst = 16; d->subst = (TheSubstitute *) palloc(sizeof(TheSubstitute) * d->nsubst); } else { d->nsubst *= 2; d->subst = (TheSubstitute *) repalloc(d->subst, sizeof(TheSubstitute) * d->nsubst); } } } ptr = d->subst + idsubst; ptr->lastlexeme = posinsubst - 1; if (nres + 1 >= ntres) { if (ntres == 0) { ntres = 2; ptr->res = (TSLexeme *) palloc(sizeof(TSLexeme) * ntres); } else { ntres *= 2; ptr->res = (TSLexeme *) repalloc(ptr->res, sizeof(TSLexeme) * ntres); } } ptr->res[nres].lexeme = palloc(e - b + 1); memcpy(ptr->res[nres].lexeme, b, e - b); ptr->res[nres].lexeme[e - b] = '\0'; ptr->res[nres].nvariant = nwrd; if (useasis) ptr->res[nres].flags = DT_USEASIS; else ptr->res[nres].flags = 0; ptr->res[++nres].lexeme = NULL; }
static TSLexeme* checkMatch | ( | DictThesaurus * | d, | |
LexemeInfo * | info, | |||
uint16 | curpos, | |||
bool * | moreres | |||
) | [static] |
Definition at line 768 of file dict_thesaurus.c.
References Assert, copyTSLexeme(), LexemeInfo::idsubst, TheSubstitute::lastlexeme, LexemeInfo::nextvariant, DictThesaurus::nsubst, and DictThesaurus::subst.
Referenced by thesaurus_lexize().
{ *moreres = false; while (info) { Assert(info->idsubst < d->nsubst); if (info->nextvariant) *moreres = true; if (d->subst[info->idsubst].lastlexeme == curpos) return copyTSLexeme(d->subst + info->idsubst); info = info->nextvariant; } return NULL; }
Definition at line 351 of file dict_thesaurus.c.
References TheLexeme::lexeme, and NULL.
Referenced by cmpLexemeQ(), cmpTheLexeme(), and compileTheLexeme().
static int cmpLexemeInfo | ( | LexemeInfo * | a, | |
LexemeInfo * | b | |||
) | [static] |
Definition at line 329 of file dict_thesaurus.c.
References LexemeInfo::idsubst, NULL, LexemeInfo::posinsubst, and LexemeInfo::tnvariant.
Referenced by cmpTheLexeme(), and compileTheLexeme().
{ if (a == NULL || b == NULL) return 0; if (a->idsubst == b->idsubst) { if (a->posinsubst == b->posinsubst) { if (a->tnvariant == b->tnvariant) return 0; return (a->tnvariant > b->tnvariant) ? 1 : -1; } return (a->posinsubst > b->posinsubst) ? 1 : -1; } return (a->idsubst > b->idsubst) ? 1 : -1; }
static int cmpLexemeQ | ( | const void * | a, | |
const void * | b | |||
) | [static] |
Definition at line 367 of file dict_thesaurus.c.
References cmpLexeme().
Referenced by findTheLexeme().
static int cmpTheLexeme | ( | const void * | a, | |
const void * | b | |||
) | [static] |
Definition at line 373 of file dict_thesaurus.c.
References cmpLexeme(), cmpLexemeInfo(), and TheLexeme::entries.
Referenced by compileTheLexeme().
static void compileTheLexeme | ( | DictThesaurus * | d | ) | [static] |
Definition at line 386 of file dict_thesaurus.c.
References addCompiledLexeme(), cmpLexeme(), cmpLexemeInfo(), cmpTheLexeme(), DatumGetPointer, TSDictionaryCacheEntry::dictData, TheLexeme::entries, ereport, errcode(), errhint(), errmsg(), ERROR, FunctionCall4, i, LexemeInfo::idsubst, Int32GetDatum, TSLexeme::lexeme, TheLexeme::lexeme, TSDictionaryCacheEntry::lexize, LexemeInfo::nextentry, DictThesaurus::ntwrds, NULL, TSLexeme::nvariant, DictThesaurus::nwrds, palloc(), pfree(), PointerGetDatum, qsort, repalloc(), DictThesaurus::subdict, and DictThesaurus::wrds.
Referenced by thesaurus_init().
{ int i, nnw = 0, tnm = 16; TheLexeme *newwrds = (TheLexeme *) palloc(sizeof(TheLexeme) * tnm), *ptrwrds; for (i = 0; i < d->nwrds; i++) { TSLexeme *ptr; if (strcmp(d->wrds[i].lexeme, "?") == 0) /* Is stop word marker? */ newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0); else { ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize), PointerGetDatum(d->subdict->dictData), PointerGetDatum(d->wrds[i].lexeme), Int32GetDatum(strlen(d->wrds[i].lexeme)), PointerGetDatum(NULL))); if (!ptr) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("thesaurus sample word \"%s\" isn't recognized by subdictionary (rule %d)", d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1))); else if (!(ptr->lexeme)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("thesaurus sample word \"%s\" is a stop word (rule %d)", d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1), errhint("Use \"?\" to represent a stop word within a sample phrase."))); else { while (ptr->lexeme) { TSLexeme *remptr = ptr + 1; int tnvar = 1; int curvar = ptr->nvariant; /* compute n words in one variant */ while (remptr->lexeme) { if (remptr->nvariant != (remptr - 1)->nvariant) break; tnvar++; remptr++; } remptr = ptr; while (remptr->lexeme && remptr->nvariant == curvar) { newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar); remptr++; } ptr = remptr; } } } pfree(d->wrds[i].lexeme); pfree(d->wrds[i].entries); } if (d->wrds) pfree(d->wrds); d->wrds = newwrds; d->nwrds = nnw; d->ntwrds = tnm; if (d->nwrds > 1) { qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme); /* uniq */ newwrds = d->wrds; ptrwrds = d->wrds + 1; while (ptrwrds - d->wrds < d->nwrds) { if (cmpLexeme(ptrwrds, newwrds) == 0) { if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries)) { ptrwrds->entries->nextentry = newwrds->entries; newwrds->entries = ptrwrds->entries; } else pfree(ptrwrds->entries); if (ptrwrds->lexeme) pfree(ptrwrds->lexeme); } else { newwrds++; *newwrds = *ptrwrds; } ptrwrds++; } d->nwrds = newwrds - d->wrds + 1; d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->nwrds); } }
static void compileTheSubstitute | ( | DictThesaurus * | d | ) | [static] |
Definition at line 497 of file dict_thesaurus.c.
References DatumGetPointer, TSDictionaryCacheEntry::dictData, DT_USEASIS, ereport, errcode(), errmsg(), ERROR, TSLexeme::flags, FunctionCall4, i, Int32GetDatum, TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, DictThesaurus::nsubst, NULL, palloc(), pfree(), PointerGetDatum, pstrdup(), repalloc(), TheSubstitute::res, TheSubstitute::reslen, DictThesaurus::subdict, and DictThesaurus::subst.
Referenced by thesaurus_init().
{ int i; for (i = 0; i < d->nsubst; i++) { TSLexeme *rem = d->subst[i].res, *outptr, *inptr; int n = 2; outptr = d->subst[i].res = (TSLexeme *) palloc(sizeof(TSLexeme) * n); outptr->lexeme = NULL; inptr = rem; while (inptr && inptr->lexeme) { TSLexeme *lexized, tmplex[2]; if (inptr->flags & DT_USEASIS) { /* do not lexize */ tmplex[0] = *inptr; tmplex[0].flags = 0; tmplex[1].lexeme = NULL; lexized = tmplex; } else { lexized = (TSLexeme *) DatumGetPointer( FunctionCall4( &(d->subdict->lexize), PointerGetDatum(d->subdict->dictData), PointerGetDatum(inptr->lexeme), Int32GetDatum(strlen(inptr->lexeme)), PointerGetDatum(NULL) ) ); } if (lexized && lexized->lexeme) { int toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1; while (lexized->lexeme) { if (outptr - d->subst[i].res + 1 >= n) { int diff = outptr - d->subst[i].res; n *= 2; d->subst[i].res = (TSLexeme *) repalloc(d->subst[i].res, sizeof(TSLexeme) * n); outptr = d->subst[i].res + diff; } *outptr = *lexized; outptr->lexeme = pstrdup(lexized->lexeme); outptr++; lexized++; } if (toset > 0) d->subst[i].res[toset].flags |= TSL_ADDPOS; } else if (lexized) { ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("thesaurus substitute word \"%s\" is a stop word (rule %d)", inptr->lexeme, i + 1))); } else { ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("thesaurus substitute word \"%s\" isn't recognized by subdictionary (rule %d)", inptr->lexeme, i + 1))); } if (inptr->lexeme) pfree(inptr->lexeme); inptr++; } if (outptr == d->subst[i].res) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("thesaurus substitute phrase is empty (rule %d)", i + 1))); d->subst[i].reslen = outptr - d->subst[i].res; pfree(rem); } }
static TSLexeme* copyTSLexeme | ( | TheSubstitute * | ts | ) | [static] |
Definition at line 750 of file dict_thesaurus.c.
References i, TSLexeme::lexeme, palloc(), pstrdup(), TheSubstitute::res, and TheSubstitute::reslen.
Referenced by checkMatch().
static LexemeInfo* findTheLexeme | ( | DictThesaurus * | d, | |
char * | lexeme | |||
) | [static] |
Definition at line 654 of file dict_thesaurus.c.
References cmpLexemeQ(), TheLexeme::entries, TheLexeme::lexeme, NULL, DictThesaurus::nwrds, and DictThesaurus::wrds.
Referenced by thesaurus_lexize().
static LexemeInfo* findVariant | ( | LexemeInfo * | in, | |
LexemeInfo * | stored, | |||
uint16 | curpos, | |||
LexemeInfo ** | newin, | |||
int | newn | |||
) | [static] |
Definition at line 693 of file dict_thesaurus.c.
References i, LexemeInfo::idsubst, matchIdSubst(), LexemeInfo::nextentry, LexemeInfo::nextvariant, NULL, LexemeInfo::posinsubst, and LexemeInfo::tnvariant.
Referenced by thesaurus_lexize().
{ for (;;) { int i; LexemeInfo *ptr = newin[0]; for (i = 0; i < newn; i++) { while (newin[i] && newin[i]->idsubst < ptr->idsubst) newin[i] = newin[i]->nextentry; if (newin[i] == NULL) return in; if (newin[i]->idsubst > ptr->idsubst) { ptr = newin[i]; i = -1; continue; } while (newin[i]->idsubst == ptr->idsubst) { if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn) { ptr = newin[i]; break; } newin[i] = newin[i]->nextentry; if (newin[i] == NULL) return in; } if (newin[i]->idsubst != ptr->idsubst) { ptr = newin[i]; i = -1; continue; } } if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst))) { /* found */ ptr->nextvariant = in; in = ptr; } /* step forward */ for (i = 0; i < newn; i++) newin[i] = newin[i]->nextentry; } }
static bool matchIdSubst | ( | LexemeInfo * | stored, | |
uint16 | idsubst | |||
) | [static] |
Definition at line 673 of file dict_thesaurus.c.
References LexemeInfo::idsubst, and LexemeInfo::nextvariant.
Referenced by findVariant().
{ bool res = true; if (stored) { res = false; for (; stored; stored = stored->nextvariant) if (stored->idsubst == idsubst) { res = true; break; } } return res; }
static void newLexeme | ( | DictThesaurus * | d, | |
char * | b, | |||
char * | e, | |||
uint16 | idsubst, | |||
uint16 | posinsubst | |||
) | [static] |
Definition at line 71 of file dict_thesaurus.c.
References TheLexeme::entries, LexemeInfo::idsubst, TheLexeme::lexeme, LexemeInfo::nextentry, DictThesaurus::ntwrds, DictThesaurus::nwrds, palloc(), LexemeInfo::posinsubst, repalloc(), and DictThesaurus::wrds.
Referenced by thesaurusRead().
{ TheLexeme *ptr; if (d->nwrds >= d->ntwrds) { if (d->ntwrds == 0) { d->ntwrds = 16; d->wrds = (TheLexeme *) palloc(sizeof(TheLexeme) * d->ntwrds); } else { d->ntwrds *= 2; d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->ntwrds); } } ptr = d->wrds + d->nwrds; d->nwrds++; ptr->lexeme = palloc(e - b + 1); memcpy(ptr->lexeme, b, e - b); ptr->lexeme[e - b] = '\0'; ptr->entries = (LexemeInfo *) palloc(sizeof(LexemeInfo)); ptr->entries->nextentry = NULL; ptr->entries->idsubst = idsubst; ptr->entries->posinsubst = posinsubst; }
Datum thesaurus_init | ( | PG_FUNCTION_ARGS | ) |
Definition at line 595 of file dict_thesaurus.c.
References compileTheLexeme(), compileTheSubstitute(), defGetString(), DefElem::defname, ereport, errcode(), errmsg(), ERROR, get_ts_dict_oid(), lfirst, lookup_ts_dictionary_cache(), palloc0(), PG_GETARG_POINTER, PG_RETURN_POINTER, pg_strcasecmp(), pstrdup(), stringToQualifiedNameList(), DictThesaurus::subdict, DictThesaurus::subdictOid, and thesaurusRead().
{ List *dictoptions = (List *) PG_GETARG_POINTER(0); DictThesaurus *d; char *subdictname = NULL; bool fileloaded = false; ListCell *l; d = (DictThesaurus *) palloc0(sizeof(DictThesaurus)); foreach(l, dictoptions) { DefElem *defel = (DefElem *) lfirst(l); if (pg_strcasecmp("DictFile", defel->defname) == 0) { if (fileloaded) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple DictFile parameters"))); thesaurusRead(defGetString(defel), d); fileloaded = true; } else if (pg_strcasecmp("Dictionary", defel->defname) == 0) { if (subdictname) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple Dictionary parameters"))); subdictname = pstrdup(defGetString(defel)); } else { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized Thesaurus parameter: \"%s\"", defel->defname))); } } if (!fileloaded) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("missing DictFile parameter"))); if (!subdictname) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("missing Dictionary parameter"))); d->subdictOid = get_ts_dict_oid(stringToQualifiedNameList(subdictname), false); d->subdict = lookup_ts_dictionary_cache(d->subdictOid); compileTheLexeme(d); compileTheSubstitute(d); PG_RETURN_POINTER(d); }
Datum thesaurus_lexize | ( | PG_FUNCTION_ARGS | ) |
Definition at line 785 of file dict_thesaurus.c.
References checkMatch(), DatumGetPointer, TSDictionaryCacheEntry::dictData, elog, ERROR, findTheLexeme(), findVariant(), FunctionCall4, DictSubState::getnext, i, DictSubState::isend, TSDictionaryCacheEntry::isvalid, TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, lookup_ts_dictionary_cache(), NULL, TSLexeme::nvariant, palloc(), pfree(), PG_GETARG_DATUM, PG_GETARG_POINTER, PG_NARGS, PG_RETURN_POINTER, PointerGetDatum, LexemeInfo::posinsubst, DictSubState::private_state, DictThesaurus::subdict, and DictThesaurus::subdictOid.
{ DictThesaurus *d = (DictThesaurus *) PG_GETARG_POINTER(0); DictSubState *dstate = (DictSubState *) PG_GETARG_POINTER(3); TSLexeme *res = NULL; LexemeInfo *stored, *info = NULL; uint16 curpos = 0; bool moreres = false; if (PG_NARGS() != 4 || dstate == NULL) elog(ERROR, "forbidden call of thesaurus or nested call"); if (dstate->isend) PG_RETURN_POINTER(NULL); stored = (LexemeInfo *) dstate->private_state; if (stored) curpos = stored->posinsubst + 1; if (!d->subdict->isvalid) d->subdict = lookup_ts_dictionary_cache(d->subdictOid); res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize), PointerGetDatum(d->subdict->dictData), PG_GETARG_DATUM(1), PG_GETARG_DATUM(2), PointerGetDatum(NULL))); if (res && res->lexeme) { TSLexeme *ptr = res, *basevar; while (ptr->lexeme) { uint16 nv = ptr->nvariant; uint16 i, nlex = 0; LexemeInfo **infos; basevar = ptr; while (ptr->lexeme && nv == ptr->nvariant) { nlex++; ptr++; } infos = (LexemeInfo **) palloc(sizeof(LexemeInfo *) * nlex); for (i = 0; i < nlex; i++) if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL) break; if (i < nlex) { /* no chance to find */ pfree(infos); continue; } info = findVariant(info, stored, curpos, infos, nlex); } } else if (res) { /* stop-word */ LexemeInfo *infos = findTheLexeme(d, NULL); info = findVariant(NULL, stored, curpos, &infos, 1); } else { info = NULL; /* word isn't recognized */ } dstate->private_state = (void *) info; if (!info) { dstate->getnext = false; PG_RETURN_POINTER(NULL); } if ((res = checkMatch(d, info, curpos, &moreres)) != NULL) { dstate->getnext = moreres; PG_RETURN_POINTER(res); } dstate->getnext = true; PG_RETURN_POINTER(NULL); }
static void thesaurusRead | ( | char * | filename, | |
DictThesaurus * | d | |||
) | [static] |
Definition at line 168 of file dict_thesaurus.c.
References addWrd(), elog, ereport, errcode(), errmsg(), ERROR, get_tsearch_config_filename(), LexemeInfo::idsubst, newLexeme(), DictThesaurus::nsubst, NULL, pfree(), pg_mblen(), LexemeInfo::posinsubst, t_iseq, t_isspace, TR_INLEX, TR_INSUBS, TR_WAITLEX, TR_WAITSUBS, tsearch_readline(), tsearch_readline_begin(), and tsearch_readline_end().
Referenced by thesaurus_init().
{ tsearch_readline_state trst; uint16 idsubst = 0; bool useasis = false; char *line; filename = get_tsearch_config_filename(filename, "ths"); if (!tsearch_readline_begin(&trst, filename)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("could not open thesaurus file \"%s\": %m", filename))); while ((line = tsearch_readline(&trst)) != NULL) { char *ptr; int state = TR_WAITLEX; char *beginwrd = NULL; uint16 posinsubst = 0; uint16 nwrd = 0; ptr = line; /* is it a comment? */ while (*ptr && t_isspace(ptr)) ptr += pg_mblen(ptr); if (t_iseq(ptr, '#') || *ptr == '\0' || t_iseq(ptr, '\n') || t_iseq(ptr, '\r')) { pfree(line); continue; } while (*ptr) { if (state == TR_WAITLEX) { if (t_iseq(ptr, ':')) { if (posinsubst == 0) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("unexpected delimiter"))); state = TR_WAITSUBS; } else if (!t_isspace(ptr)) { beginwrd = ptr; state = TR_INLEX; } } else if (state == TR_INLEX) { if (t_iseq(ptr, ':')) { newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); state = TR_WAITSUBS; } else if (t_isspace(ptr)) { newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); state = TR_WAITLEX; } } else if (state == TR_WAITSUBS) { if (t_iseq(ptr, '*')) { useasis = true; state = TR_INSUBS; beginwrd = ptr + pg_mblen(ptr); } else if (t_iseq(ptr, '\\')) { useasis = false; state = TR_INSUBS; beginwrd = ptr + pg_mblen(ptr); } else if (!t_isspace(ptr)) { useasis = false; beginwrd = ptr; state = TR_INSUBS; } } else if (state == TR_INSUBS) { if (t_isspace(ptr)) { if (ptr == beginwrd) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("unexpected end of line or lexeme"))); addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis); state = TR_WAITSUBS; } } else elog(ERROR, "unrecognized thesaurus state: %d", state); ptr += pg_mblen(ptr); } if (state == TR_INSUBS) { if (ptr == beginwrd) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("unexpected end of line or lexeme"))); addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis); } idsubst++; if (!(nwrd && posinsubst)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("unexpected end of line"))); pfree(line); } d->nsubst = idsubst; tsearch_readline_end(&trst); }