#include "postgres.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_utils.h"
Go to the source code of this file.
Data Structures | |
struct | ParsedLex |
struct | ListParsedLex |
struct | LexizeData |
Defines | |
#define | IGNORE_LONGLEXEME 1 |
Typedefs | |
typedef struct ParsedLex | ParsedLex |
typedef struct ListParsedLex | ListParsedLex |
Functions | |
static void | LexizeInit (LexizeData *ld, TSConfigCacheEntry *cfg) |
static void | LPLAddTail (ListParsedLex *list, ParsedLex *newpl) |
static ParsedLex * | LPLRemoveHead (ListParsedLex *list) |
static void | LexizeAddLemm (LexizeData *ld, int type, char *lemm, int lenlemm) |
static void | RemoveHead (LexizeData *ld) |
static void | setCorrLex (LexizeData *ld, ParsedLex **correspondLexem) |
static void | moveToWaste (LexizeData *ld, ParsedLex *stop) |
static void | setNewTmpRes (LexizeData *ld, ParsedLex *lex, TSLexeme *res) |
static TSLexeme * | LexizeExec (LexizeData *ld, ParsedLex **correspondLexem) |
void | parsetext (Oid cfgId, ParsedText *prs, char *buf, int buflen) |
static void | hladdword (HeadlineParsedText *prs, char *buf, int buflen, int type) |
static void | hlfinditem (HeadlineParsedText *prs, TSQuery query, char *buf, int buflen) |
static void | addHLParsedLex (HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme *norms) |
void | hlparsetext (Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int buflen) |
text * | generateHeadline (HeadlineParsedText *prs) |
#define IGNORE_LONGLEXEME 1 |
Definition at line 20 of file ts_parse.c.
typedef struct ListParsedLex ListParsedLex |
static void addHLParsedLex | ( | HeadlineParsedText * | prs, | |
TSQuery | query, | |||
ParsedLex * | lexs, | |||
TSLexeme * | norms | |||
) | [static] |
Definition at line 491 of file ts_parse.c.
References hladdword(), hlfinditem(), ParsedLex::lemm, ParsedLex::lenlemm, TSLexeme::lexeme, ParsedLex::next, pfree(), and ParsedLex::type.
Referenced by hlparsetext().
{ ParsedLex *tmplexs; TSLexeme *ptr; while (lexs) { if (lexs->type > 0) hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type); ptr = norms; while (ptr && ptr->lexeme) { hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme)); ptr++; } tmplexs = lexs->next; pfree(lexs); lexs = tmplexs; } if (norms) { ptr = norms; while (ptr->lexeme) { pfree(ptr->lexeme); ptr++; } pfree(norms); } }
text* generateHeadline | ( | HeadlineParsedText * | prs | ) |
Definition at line 589 of file ts_parse.c.
References HeadlineParsedText::curwords, HeadlineParsedText::fragdelim, HeadlineParsedText::fragdelimlen, HeadlineWordEntry::in, HeadlineWordEntry::len, palloc(), pfree(), repalloc(), HeadlineWordEntry::repeated, HeadlineWordEntry::replace, HeadlineWordEntry::selected, SET_VARSIZE, HeadlineWordEntry::skip, HeadlineParsedText::startsel, HeadlineParsedText::startsellen, HeadlineParsedText::stopsel, HeadlineParsedText::stopsellen, HeadlineWordEntry::word, and HeadlineParsedText::words.
Referenced by ts_headline_byid_opt().
{ text *out; char *ptr; int len = 128; int numfragments = 0; int16 infrag = 0; HeadlineWordEntry *wrd = prs->words; out = (text *) palloc(len); ptr = ((char *) out) + VARHDRSZ; while (wrd - prs->words < prs->curwords) { while (wrd->len + prs->stopsellen + prs->startsellen + prs->fragdelimlen + (ptr - ((char *) out)) >= len) { int dist = ptr - ((char *) out); len *= 2; out = (text *) repalloc(out, len); ptr = ((char *) out) + dist; } if (wrd->in && !wrd->repeated) { if (!infrag) { /* start of a new fragment */ infrag = 1; numfragments++; /* add a fragment delimitor if this is after the first one */ if (numfragments > 1) { memcpy(ptr, prs->fragdelim, prs->fragdelimlen); ptr += prs->fragdelimlen; } } if (wrd->replace) { *ptr = ' '; ptr++; } else if (!wrd->skip) { if (wrd->selected) { memcpy(ptr, prs->startsel, prs->startsellen); ptr += prs->startsellen; } memcpy(ptr, wrd->word, wrd->len); ptr += wrd->len; if (wrd->selected) { memcpy(ptr, prs->stopsel, prs->stopsellen); ptr += prs->stopsellen; } } } else if (!wrd->repeated) { if (infrag) infrag = 0; pfree(wrd->word); } wrd++; } SET_VARSIZE(out, ptr - ((char *) out)); return out; }
static void hladdword | ( | HeadlineParsedText * | prs, | |
char * | buf, | |||
int | buflen, | |||
int | type | |||
) | [static] |
Definition at line 441 of file ts_parse.c.
References HeadlineParsedText::curwords, HeadlineWordEntry::len, HeadlineParsedText::lenwords, palloc(), repalloc(), HeadlineWordEntry::type, HeadlineWordEntry::word, and HeadlineParsedText::words.
Referenced by addHLParsedLex().
{ while (prs->curwords >= prs->lenwords) { prs->lenwords *= 2; prs->words = (HeadlineWordEntry *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWordEntry)); } memset(&(prs->words[prs->curwords]), 0, sizeof(HeadlineWordEntry)); prs->words[prs->curwords].type = (uint8) type; prs->words[prs->curwords].len = buflen; prs->words[prs->curwords].word = palloc(buflen); memcpy(prs->words[prs->curwords].word, buf, buflen); prs->curwords++; }
static void hlfinditem | ( | HeadlineParsedText * | prs, | |
TSQuery | query, | |||
char * | buf, | |||
int | buflen | |||
) | [static] |
Definition at line 457 of file ts_parse.c.
References HeadlineParsedText::curwords, QueryOperand::distance, GETOPERAND, GETQUERY, i, HeadlineWordEntry::item, QueryOperand::length, HeadlineParsedText::lenwords, QueryOperand::prefix, QI_VAL, QueryItem::qoperand, repalloc(), HeadlineWordEntry::repeated, TSQueryData::size, tsCompareString(), QueryItem::type, and HeadlineParsedText::words.
Referenced by addHLParsedLex().
{ int i; QueryItem *item = GETQUERY(query); HeadlineWordEntry *word; while (prs->curwords + query->size >= prs->lenwords) { prs->lenwords *= 2; prs->words = (HeadlineWordEntry *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWordEntry)); } word = &(prs->words[prs->curwords - 1]); for (i = 0; i < query->size; i++) { if (item->type == QI_VAL && tsCompareString(GETOPERAND(query) + item->qoperand.distance, item->qoperand.length, buf, buflen, item->qoperand.prefix) == 0) { if (word->item) { memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry)); prs->words[prs->curwords].item = &item->qoperand; prs->words[prs->curwords].repeated = 1; prs->curwords++; } else word->item = &item->qoperand; } item++; } }
void hlparsetext | ( | Oid | cfgId, | |
HeadlineParsedText * | prs, | |||
TSQuery | query, | |||
char * | buf, | |||
int | buflen | |||
) |
Definition at line 527 of file ts_parse.c.
References addHLParsedLex(), DatumGetInt32, DatumGetPointer, ereport, errcode(), errdetail(), errmsg(), ERROR, FunctionCall1, FunctionCall2, FunctionCall3, Int32GetDatum, LexizeAddLemm(), LexizeExec(), LexizeInit(), lookup_ts_config_cache(), lookup_ts_parser_cache(), MAXSTRLEN, NOTICE, NULL, PointerGetDatum, TSParserCacheEntry::prsend, TSConfigCacheEntry::prsId, TSParserCacheEntry::prsstart, and TSParserCacheEntry::prstoken.
Referenced by ts_headline_byid_opt().
{ int type, lenlemm; char *lemm = NULL; LexizeData ldata; TSLexeme *norms; ParsedLex *lexs; TSConfigCacheEntry *cfg; TSParserCacheEntry *prsobj; void *prsdata; cfg = lookup_ts_config_cache(cfgId); prsobj = lookup_ts_parser_cache(cfg->prsId); prsdata = (void *) DatumGetPointer(FunctionCall2(&(prsobj->prsstart), PointerGetDatum(buf), Int32GetDatum(buflen))); LexizeInit(&ldata, cfg); do { type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken), PointerGetDatum(prsdata), PointerGetDatum(&lemm), PointerGetDatum(&lenlemm))); if (type > 0 && lenlemm >= MAXSTRLEN) { #ifdef IGNORE_LONGLEXEME ereport(NOTICE, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("word is too long to be indexed"), errdetail("Words longer than %d characters are ignored.", MAXSTRLEN))); continue; #else ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("word is too long to be indexed"), errdetail("Words longer than %d characters are ignored.", MAXSTRLEN))); #endif } LexizeAddLemm(&ldata, type, lemm, lenlemm); do { if ((norms = LexizeExec(&ldata, &lexs)) != NULL) addHLParsedLex(prs, query, lexs, norms); else addHLParsedLex(prs, query, lexs, NULL); } while (norms); } while (type > 0); FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata)); }
static void LexizeAddLemm | ( | LexizeData * | ld, | |
int | type, | |||
char * | lemm, | |||
int | lenlemm | |||
) | [static] |
Definition at line 99 of file ts_parse.c.
References LexizeData::curSub, ParsedLex::lemm, ParsedLex::lenlemm, LPLAddTail(), palloc(), ListParsedLex::tail, LexizeData::towork, and ParsedLex::type.
Referenced by hlparsetext(), and parsetext().
static TSLexeme* LexizeExec | ( | LexizeData * | ld, | |
ParsedLex ** | correspondLexem | |||
) | [static] |
Definition at line 172 of file ts_parse.c.
References LexizeData::cfg, LexizeData::curDictId, LexizeData::curSub, DatumGetObjectId, DatumGetPointer, TSDictionaryCacheEntry::dictData, ListDictionary::dictIds, LexizeData::dictState, TSLexeme::flags, FunctionCall4, DictSubState::getnext, ListParsedLex::head, i, Int32GetDatum, InvalidOid, DictSubState::isend, LexizeData::lastRes, ParsedLex::lemm, ListDictionary::len, ParsedLex::lenlemm, TSConfigCacheEntry::lenmap, TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, lookup_ts_dictionary_cache(), TSConfigCacheEntry::map, moveToWaste(), ParsedLex::next, PointerGetDatum, LexizeData::posDict, DictSubState::private_state, RemoveHead(), setCorrLex(), setNewTmpRes(), LexizeData::tmpRes, LexizeData::towork, TSL_FILTER, and ParsedLex::type.
Referenced by hlparsetext(), and parsetext().
{ int i; ListDictionary *map; TSDictionaryCacheEntry *dict; TSLexeme *res; if (ld->curDictId == InvalidOid) { /* * usial mode: dictionary wants only one word, but we should keep in * mind that we should go through all stack */ while (ld->towork.head) { ParsedLex *curVal = ld->towork.head; char *curValLemm = curVal->lemm; int curValLenLemm = curVal->lenlemm; map = ld->cfg->map + curVal->type; if (curVal->type == 0 || curVal->type >= ld->cfg->lenmap || map->len == 0) { /* skip this type of lexeme */ RemoveHead(ld); continue; } for (i = ld->posDict; i < map->len; i++) { dict = lookup_ts_dictionary_cache(map->dictIds[i]); ld->dictState.isend = ld->dictState.getnext = false; ld->dictState.private_state = NULL; res = (TSLexeme *) DatumGetPointer(FunctionCall4( &(dict->lexize), PointerGetDatum(dict->dictData), PointerGetDatum(curValLemm), Int32GetDatum(curValLenLemm), PointerGetDatum(&ld->dictState) )); if (ld->dictState.getnext) { /* * dictionary wants next word, so setup and store current * position and go to multiword mode */ ld->curDictId = DatumGetObjectId(map->dictIds[i]); ld->posDict = i + 1; ld->curSub = curVal->next; if (res) setNewTmpRes(ld, curVal, res); return LexizeExec(ld, correspondLexem); } if (!res) /* dictionary doesn't know this lexeme */ continue; if (res->flags & TSL_FILTER) { curValLemm = res->lexeme; curValLenLemm = strlen(res->lexeme); continue; } RemoveHead(ld); setCorrLex(ld, correspondLexem); return res; } RemoveHead(ld); } } else { /* curDictId is valid */ dict = lookup_ts_dictionary_cache(ld->curDictId); /* * Dictionary ld->curDictId asks us about following words */ while (ld->curSub) { ParsedLex *curVal = ld->curSub; map = ld->cfg->map + curVal->type; if (curVal->type != 0) { bool dictExists = false; if (curVal->type >= ld->cfg->lenmap || map->len == 0) { /* skip this type of lexeme */ ld->curSub = curVal->next; continue; } /* * We should be sure that current type of lexeme is recognized * by our dictinonary: we just check is it exist in list of * dictionaries ? */ for (i = 0; i < map->len && !dictExists; i++) if (ld->curDictId == DatumGetObjectId(map->dictIds[i])) dictExists = true; if (!dictExists) { /* * Dictionary can't work with current tpe of lexeme, * return to basic mode and redo all stored lexemes */ ld->curDictId = InvalidOid; return LexizeExec(ld, correspondLexem); } } ld->dictState.isend = (curVal->type == 0) ? true : false; ld->dictState.getnext = false; res = (TSLexeme *) DatumGetPointer(FunctionCall4( &(dict->lexize), PointerGetDatum(dict->dictData), PointerGetDatum(curVal->lemm), Int32GetDatum(curVal->lenlemm), PointerGetDatum(&ld->dictState) )); if (ld->dictState.getnext) { /* Dictionary wants one more */ ld->curSub = curVal->next; if (res) setNewTmpRes(ld, curVal, res); continue; } if (res || ld->tmpRes) { /* * Dictionary normalizes lexemes, so we remove from stack all * used lexemes, return to basic mode and redo end of stack * (if it exists) */ if (res) { moveToWaste(ld, ld->curSub); } else { res = ld->tmpRes; moveToWaste(ld, ld->lastRes); } /* reset to initial state */ ld->curDictId = InvalidOid; ld->posDict = 0; ld->lastRes = NULL; ld->tmpRes = NULL; setCorrLex(ld, correspondLexem); return res; } /* * Dict don't want next lexem and didn't recognize anything, redo * from ld->towork.head */ ld->curDictId = InvalidOid; return LexizeExec(ld, correspondLexem); } } setCorrLex(ld, correspondLexem); return NULL; }
static void LexizeInit | ( | LexizeData * | ld, | |
TSConfigCacheEntry * | cfg | |||
) | [static] |
Definition at line 60 of file ts_parse.c.
References LexizeData::cfg, LexizeData::curDictId, LexizeData::curSub, ListParsedLex::head, LexizeData::lastRes, LexizeData::posDict, ListParsedLex::tail, LexizeData::tmpRes, LexizeData::towork, and LexizeData::waste.
Referenced by hlparsetext(), and parsetext().
static void LPLAddTail | ( | ListParsedLex * | list, | |
ParsedLex * | newpl | |||
) | [static] |
Definition at line 72 of file ts_parse.c.
References ListParsedLex::head, ParsedLex::next, and ListParsedLex::tail.
Referenced by LexizeAddLemm(), and RemoveHead().
static ParsedLex* LPLRemoveHead | ( | ListParsedLex * | list | ) | [static] |
Definition at line 85 of file ts_parse.c.
References ListParsedLex::head, ParsedLex::next, NULL, and ListParsedLex::tail.
Referenced by RemoveHead().
static void moveToWaste | ( | LexizeData * | ld, | |
ParsedLex * | stop | |||
) | [static] |
Definition at line 141 of file ts_parse.c.
References LexizeData::curSub, ListParsedLex::head, ParsedLex::next, RemoveHead(), and LexizeData::towork.
Referenced by LexizeExec().
void parsetext | ( | Oid | cfgId, | |
ParsedText * | prs, | |||
char * | buf, | |||
int | buflen | |||
) |
Definition at line 358 of file ts_parse.c.
References ParsedWord::alen, ParsedText::curwords, DatumGetInt32, DatumGetPointer, ereport, errcode(), errdetail(), errmsg(), ERROR, ParsedWord::flags, TSLexeme::flags, FunctionCall1, FunctionCall2, FunctionCall3, Int32GetDatum, ParsedWord::len, ParsedText::lenwords, TSLexeme::lexeme, LexizeAddLemm(), LexizeExec(), LexizeInit(), LIMITPOS, lookup_ts_config_cache(), lookup_ts_parser_cache(), MAXSTRLEN, NOTICE, NULL, TSLexeme::nvariant, ParsedWord::nvariant, pfree(), PointerGetDatum, ParsedWord::pos, ParsedText::pos, TSParserCacheEntry::prsend, TSConfigCacheEntry::prsId, TSParserCacheEntry::prsstart, TSParserCacheEntry::prstoken, repalloc(), TSL_ADDPOS, ParsedWord::word, and ParsedText::words.
Referenced by pushval_morph(), to_tsvector_byid(), and tsvector_update_trigger().
{ int type, lenlemm; char *lemm = NULL; LexizeData ldata; TSLexeme *norms; TSConfigCacheEntry *cfg; TSParserCacheEntry *prsobj; void *prsdata; cfg = lookup_ts_config_cache(cfgId); prsobj = lookup_ts_parser_cache(cfg->prsId); prsdata = (void *) DatumGetPointer(FunctionCall2(&prsobj->prsstart, PointerGetDatum(buf), Int32GetDatum(buflen))); LexizeInit(&ldata, cfg); do { type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken), PointerGetDatum(prsdata), PointerGetDatum(&lemm), PointerGetDatum(&lenlemm))); if (type > 0 && lenlemm >= MAXSTRLEN) { #ifdef IGNORE_LONGLEXEME ereport(NOTICE, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("word is too long to be indexed"), errdetail("Words longer than %d characters are ignored.", MAXSTRLEN))); continue; #else ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("word is too long to be indexed"), errdetail("Words longer than %d characters are ignored.", MAXSTRLEN))); #endif } LexizeAddLemm(&ldata, type, lemm, lenlemm); while ((norms = LexizeExec(&ldata, NULL)) != NULL) { TSLexeme *ptr = norms; prs->pos++; /* set pos */ while (ptr->lexeme) { if (prs->curwords == prs->lenwords) { prs->lenwords *= 2; prs->words = (ParsedWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(ParsedWord)); } if (ptr->flags & TSL_ADDPOS) prs->pos++; prs->words[prs->curwords].len = strlen(ptr->lexeme); prs->words[prs->curwords].word = ptr->lexeme; prs->words[prs->curwords].nvariant = ptr->nvariant; prs->words[prs->curwords].flags = ptr->flags & TSL_PREFIX; prs->words[prs->curwords].alen = 0; prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos); ptr++; prs->curwords++; } pfree(norms); } } while (type > 0); FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata)); }
static void RemoveHead | ( | LexizeData * | ld | ) | [static] |
Definition at line 111 of file ts_parse.c.
References LPLAddTail(), LPLRemoveHead(), LexizeData::posDict, LexizeData::towork, and LexizeData::waste.
Referenced by LexizeExec(), and moveToWaste().
{ LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork)); ld->posDict = 0; }
static void setCorrLex | ( | LexizeData * | ld, | |
ParsedLex ** | correspondLexem | |||
) | [static] |
Definition at line 119 of file ts_parse.c.
References ListParsedLex::head, ParsedLex::next, pfree(), ListParsedLex::tail, and LexizeData::waste.
Referenced by LexizeExec().
static void setNewTmpRes | ( | LexizeData * | ld, | |
ParsedLex * | lex, | |||
TSLexeme * | res | |||
) | [static] |
Definition at line 157 of file ts_parse.c.
References LexizeData::lastRes, TSLexeme::lexeme, pfree(), and LexizeData::tmpRes.
Referenced by LexizeExec().