#include "postgres.h"#include "tsearch/ts_cache.h"#include "tsearch/ts_utils.h"
Go to the source code of this file.
Data Structures | |
| struct | ParsedLex |
| struct | ListParsedLex |
| struct | LexizeData |
Defines | |
| #define | IGNORE_LONGLEXEME 1 |
Typedefs | |
| typedef struct ParsedLex | ParsedLex |
| typedef struct ListParsedLex | ListParsedLex |
Functions | |
| static void | LexizeInit (LexizeData *ld, TSConfigCacheEntry *cfg) |
| static void | LPLAddTail (ListParsedLex *list, ParsedLex *newpl) |
| static ParsedLex * | LPLRemoveHead (ListParsedLex *list) |
| static void | LexizeAddLemm (LexizeData *ld, int type, char *lemm, int lenlemm) |
| static void | RemoveHead (LexizeData *ld) |
| static void | setCorrLex (LexizeData *ld, ParsedLex **correspondLexem) |
| static void | moveToWaste (LexizeData *ld, ParsedLex *stop) |
| static void | setNewTmpRes (LexizeData *ld, ParsedLex *lex, TSLexeme *res) |
| static TSLexeme * | LexizeExec (LexizeData *ld, ParsedLex **correspondLexem) |
| void | parsetext (Oid cfgId, ParsedText *prs, char *buf, int buflen) |
| static void | hladdword (HeadlineParsedText *prs, char *buf, int buflen, int type) |
| static void | hlfinditem (HeadlineParsedText *prs, TSQuery query, char *buf, int buflen) |
| static void | addHLParsedLex (HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme *norms) |
| void | hlparsetext (Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int buflen) |
| text * | generateHeadline (HeadlineParsedText *prs) |
| #define IGNORE_LONGLEXEME 1 |
Definition at line 20 of file ts_parse.c.
| typedef struct ListParsedLex ListParsedLex |
| static void addHLParsedLex | ( | HeadlineParsedText * | prs, | |
| TSQuery | query, | |||
| ParsedLex * | lexs, | |||
| TSLexeme * | norms | |||
| ) | [static] |
Definition at line 491 of file ts_parse.c.
References hladdword(), hlfinditem(), ParsedLex::lemm, ParsedLex::lenlemm, TSLexeme::lexeme, ParsedLex::next, pfree(), and ParsedLex::type.
Referenced by hlparsetext().
{
ParsedLex *tmplexs;
TSLexeme *ptr;
while (lexs)
{
if (lexs->type > 0)
hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
ptr = norms;
while (ptr && ptr->lexeme)
{
hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme));
ptr++;
}
tmplexs = lexs->next;
pfree(lexs);
lexs = tmplexs;
}
if (norms)
{
ptr = norms;
while (ptr->lexeme)
{
pfree(ptr->lexeme);
ptr++;
}
pfree(norms);
}
}
| text* generateHeadline | ( | HeadlineParsedText * | prs | ) |
Definition at line 589 of file ts_parse.c.
References HeadlineParsedText::curwords, HeadlineParsedText::fragdelim, HeadlineParsedText::fragdelimlen, HeadlineWordEntry::in, HeadlineWordEntry::len, palloc(), pfree(), repalloc(), HeadlineWordEntry::repeated, HeadlineWordEntry::replace, HeadlineWordEntry::selected, SET_VARSIZE, HeadlineWordEntry::skip, HeadlineParsedText::startsel, HeadlineParsedText::startsellen, HeadlineParsedText::stopsel, HeadlineParsedText::stopsellen, HeadlineWordEntry::word, and HeadlineParsedText::words.
Referenced by ts_headline_byid_opt().
{
text *out;
char *ptr;
int len = 128;
int numfragments = 0;
int16 infrag = 0;
HeadlineWordEntry *wrd = prs->words;
out = (text *) palloc(len);
ptr = ((char *) out) + VARHDRSZ;
while (wrd - prs->words < prs->curwords)
{
while (wrd->len + prs->stopsellen + prs->startsellen + prs->fragdelimlen + (ptr - ((char *) out)) >= len)
{
int dist = ptr - ((char *) out);
len *= 2;
out = (text *) repalloc(out, len);
ptr = ((char *) out) + dist;
}
if (wrd->in && !wrd->repeated)
{
if (!infrag)
{
/* start of a new fragment */
infrag = 1;
numfragments++;
/* add a fragment delimitor if this is after the first one */
if (numfragments > 1)
{
memcpy(ptr, prs->fragdelim, prs->fragdelimlen);
ptr += prs->fragdelimlen;
}
}
if (wrd->replace)
{
*ptr = ' ';
ptr++;
}
else if (!wrd->skip)
{
if (wrd->selected)
{
memcpy(ptr, prs->startsel, prs->startsellen);
ptr += prs->startsellen;
}
memcpy(ptr, wrd->word, wrd->len);
ptr += wrd->len;
if (wrd->selected)
{
memcpy(ptr, prs->stopsel, prs->stopsellen);
ptr += prs->stopsellen;
}
}
}
else if (!wrd->repeated)
{
if (infrag)
infrag = 0;
pfree(wrd->word);
}
wrd++;
}
SET_VARSIZE(out, ptr - ((char *) out));
return out;
}
| static void hladdword | ( | HeadlineParsedText * | prs, | |
| char * | buf, | |||
| int | buflen, | |||
| int | type | |||
| ) | [static] |
Definition at line 441 of file ts_parse.c.
References HeadlineParsedText::curwords, HeadlineWordEntry::len, HeadlineParsedText::lenwords, palloc(), repalloc(), HeadlineWordEntry::type, HeadlineWordEntry::word, and HeadlineParsedText::words.
Referenced by addHLParsedLex().
{
while (prs->curwords >= prs->lenwords)
{
prs->lenwords *= 2;
prs->words = (HeadlineWordEntry *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
}
memset(&(prs->words[prs->curwords]), 0, sizeof(HeadlineWordEntry));
prs->words[prs->curwords].type = (uint8) type;
prs->words[prs->curwords].len = buflen;
prs->words[prs->curwords].word = palloc(buflen);
memcpy(prs->words[prs->curwords].word, buf, buflen);
prs->curwords++;
}
| static void hlfinditem | ( | HeadlineParsedText * | prs, | |
| TSQuery | query, | |||
| char * | buf, | |||
| int | buflen | |||
| ) | [static] |
Definition at line 457 of file ts_parse.c.
References HeadlineParsedText::curwords, QueryOperand::distance, GETOPERAND, GETQUERY, i, HeadlineWordEntry::item, QueryOperand::length, HeadlineParsedText::lenwords, QueryOperand::prefix, QI_VAL, QueryItem::qoperand, repalloc(), HeadlineWordEntry::repeated, TSQueryData::size, tsCompareString(), QueryItem::type, and HeadlineParsedText::words.
Referenced by addHLParsedLex().
{
int i;
QueryItem *item = GETQUERY(query);
HeadlineWordEntry *word;
while (prs->curwords + query->size >= prs->lenwords)
{
prs->lenwords *= 2;
prs->words = (HeadlineWordEntry *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
}
word = &(prs->words[prs->curwords - 1]);
for (i = 0; i < query->size; i++)
{
if (item->type == QI_VAL &&
tsCompareString(GETOPERAND(query) + item->qoperand.distance, item->qoperand.length,
buf, buflen, item->qoperand.prefix) == 0)
{
if (word->item)
{
memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
prs->words[prs->curwords].item = &item->qoperand;
prs->words[prs->curwords].repeated = 1;
prs->curwords++;
}
else
word->item = &item->qoperand;
}
item++;
}
}
| void hlparsetext | ( | Oid | cfgId, | |
| HeadlineParsedText * | prs, | |||
| TSQuery | query, | |||
| char * | buf, | |||
| int | buflen | |||
| ) |
Definition at line 527 of file ts_parse.c.
References addHLParsedLex(), DatumGetInt32, DatumGetPointer, ereport, errcode(), errdetail(), errmsg(), ERROR, FunctionCall1, FunctionCall2, FunctionCall3, Int32GetDatum, LexizeAddLemm(), LexizeExec(), LexizeInit(), lookup_ts_config_cache(), lookup_ts_parser_cache(), MAXSTRLEN, NOTICE, NULL, PointerGetDatum, TSParserCacheEntry::prsend, TSConfigCacheEntry::prsId, TSParserCacheEntry::prsstart, and TSParserCacheEntry::prstoken.
Referenced by ts_headline_byid_opt().
{
int type,
lenlemm;
char *lemm = NULL;
LexizeData ldata;
TSLexeme *norms;
ParsedLex *lexs;
TSConfigCacheEntry *cfg;
TSParserCacheEntry *prsobj;
void *prsdata;
cfg = lookup_ts_config_cache(cfgId);
prsobj = lookup_ts_parser_cache(cfg->prsId);
prsdata = (void *) DatumGetPointer(FunctionCall2(&(prsobj->prsstart),
PointerGetDatum(buf),
Int32GetDatum(buflen)));
LexizeInit(&ldata, cfg);
do
{
type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
PointerGetDatum(prsdata),
PointerGetDatum(&lemm),
PointerGetDatum(&lenlemm)));
if (type > 0 && lenlemm >= MAXSTRLEN)
{
#ifdef IGNORE_LONGLEXEME
ereport(NOTICE,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("word is too long to be indexed"),
errdetail("Words longer than %d characters are ignored.",
MAXSTRLEN)));
continue;
#else
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("word is too long to be indexed"),
errdetail("Words longer than %d characters are ignored.",
MAXSTRLEN)));
#endif
}
LexizeAddLemm(&ldata, type, lemm, lenlemm);
do
{
if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
addHLParsedLex(prs, query, lexs, norms);
else
addHLParsedLex(prs, query, lexs, NULL);
} while (norms);
} while (type > 0);
FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
}
| static void LexizeAddLemm | ( | LexizeData * | ld, | |
| int | type, | |||
| char * | lemm, | |||
| int | lenlemm | |||
| ) | [static] |
Definition at line 99 of file ts_parse.c.
References LexizeData::curSub, ParsedLex::lemm, ParsedLex::lenlemm, LPLAddTail(), palloc(), ListParsedLex::tail, LexizeData::towork, and ParsedLex::type.
Referenced by hlparsetext(), and parsetext().
| static TSLexeme* LexizeExec | ( | LexizeData * | ld, | |
| ParsedLex ** | correspondLexem | |||
| ) | [static] |
Definition at line 172 of file ts_parse.c.
References LexizeData::cfg, LexizeData::curDictId, LexizeData::curSub, DatumGetObjectId, DatumGetPointer, TSDictionaryCacheEntry::dictData, ListDictionary::dictIds, LexizeData::dictState, TSLexeme::flags, FunctionCall4, DictSubState::getnext, ListParsedLex::head, i, Int32GetDatum, InvalidOid, DictSubState::isend, LexizeData::lastRes, ParsedLex::lemm, ListDictionary::len, ParsedLex::lenlemm, TSConfigCacheEntry::lenmap, TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, lookup_ts_dictionary_cache(), TSConfigCacheEntry::map, moveToWaste(), ParsedLex::next, PointerGetDatum, LexizeData::posDict, DictSubState::private_state, RemoveHead(), setCorrLex(), setNewTmpRes(), LexizeData::tmpRes, LexizeData::towork, TSL_FILTER, and ParsedLex::type.
Referenced by hlparsetext(), and parsetext().
{
int i;
ListDictionary *map;
TSDictionaryCacheEntry *dict;
TSLexeme *res;
if (ld->curDictId == InvalidOid)
{
/*
* usial mode: dictionary wants only one word, but we should keep in
* mind that we should go through all stack
*/
while (ld->towork.head)
{
ParsedLex *curVal = ld->towork.head;
char *curValLemm = curVal->lemm;
int curValLenLemm = curVal->lenlemm;
map = ld->cfg->map + curVal->type;
if (curVal->type == 0 || curVal->type >= ld->cfg->lenmap || map->len == 0)
{
/* skip this type of lexeme */
RemoveHead(ld);
continue;
}
for (i = ld->posDict; i < map->len; i++)
{
dict = lookup_ts_dictionary_cache(map->dictIds[i]);
ld->dictState.isend = ld->dictState.getnext = false;
ld->dictState.private_state = NULL;
res = (TSLexeme *) DatumGetPointer(FunctionCall4(
&(dict->lexize),
PointerGetDatum(dict->dictData),
PointerGetDatum(curValLemm),
Int32GetDatum(curValLenLemm),
PointerGetDatum(&ld->dictState)
));
if (ld->dictState.getnext)
{
/*
* dictionary wants next word, so setup and store current
* position and go to multiword mode
*/
ld->curDictId = DatumGetObjectId(map->dictIds[i]);
ld->posDict = i + 1;
ld->curSub = curVal->next;
if (res)
setNewTmpRes(ld, curVal, res);
return LexizeExec(ld, correspondLexem);
}
if (!res) /* dictionary doesn't know this lexeme */
continue;
if (res->flags & TSL_FILTER)
{
curValLemm = res->lexeme;
curValLenLemm = strlen(res->lexeme);
continue;
}
RemoveHead(ld);
setCorrLex(ld, correspondLexem);
return res;
}
RemoveHead(ld);
}
}
else
{ /* curDictId is valid */
dict = lookup_ts_dictionary_cache(ld->curDictId);
/*
* Dictionary ld->curDictId asks us about following words
*/
while (ld->curSub)
{
ParsedLex *curVal = ld->curSub;
map = ld->cfg->map + curVal->type;
if (curVal->type != 0)
{
bool dictExists = false;
if (curVal->type >= ld->cfg->lenmap || map->len == 0)
{
/* skip this type of lexeme */
ld->curSub = curVal->next;
continue;
}
/*
* We should be sure that current type of lexeme is recognized
* by our dictinonary: we just check is it exist in list of
* dictionaries ?
*/
for (i = 0; i < map->len && !dictExists; i++)
if (ld->curDictId == DatumGetObjectId(map->dictIds[i]))
dictExists = true;
if (!dictExists)
{
/*
* Dictionary can't work with current tpe of lexeme,
* return to basic mode and redo all stored lexemes
*/
ld->curDictId = InvalidOid;
return LexizeExec(ld, correspondLexem);
}
}
ld->dictState.isend = (curVal->type == 0) ? true : false;
ld->dictState.getnext = false;
res = (TSLexeme *) DatumGetPointer(FunctionCall4(
&(dict->lexize),
PointerGetDatum(dict->dictData),
PointerGetDatum(curVal->lemm),
Int32GetDatum(curVal->lenlemm),
PointerGetDatum(&ld->dictState)
));
if (ld->dictState.getnext)
{
/* Dictionary wants one more */
ld->curSub = curVal->next;
if (res)
setNewTmpRes(ld, curVal, res);
continue;
}
if (res || ld->tmpRes)
{
/*
* Dictionary normalizes lexemes, so we remove from stack all
* used lexemes, return to basic mode and redo end of stack
* (if it exists)
*/
if (res)
{
moveToWaste(ld, ld->curSub);
}
else
{
res = ld->tmpRes;
moveToWaste(ld, ld->lastRes);
}
/* reset to initial state */
ld->curDictId = InvalidOid;
ld->posDict = 0;
ld->lastRes = NULL;
ld->tmpRes = NULL;
setCorrLex(ld, correspondLexem);
return res;
}
/*
* Dict don't want next lexem and didn't recognize anything, redo
* from ld->towork.head
*/
ld->curDictId = InvalidOid;
return LexizeExec(ld, correspondLexem);
}
}
setCorrLex(ld, correspondLexem);
return NULL;
}
| static void LexizeInit | ( | LexizeData * | ld, | |
| TSConfigCacheEntry * | cfg | |||
| ) | [static] |
Definition at line 60 of file ts_parse.c.
References LexizeData::cfg, LexizeData::curDictId, LexizeData::curSub, ListParsedLex::head, LexizeData::lastRes, LexizeData::posDict, ListParsedLex::tail, LexizeData::tmpRes, LexizeData::towork, and LexizeData::waste.
Referenced by hlparsetext(), and parsetext().
| static void LPLAddTail | ( | ListParsedLex * | list, | |
| ParsedLex * | newpl | |||
| ) | [static] |
Definition at line 72 of file ts_parse.c.
References ListParsedLex::head, ParsedLex::next, and ListParsedLex::tail.
Referenced by LexizeAddLemm(), and RemoveHead().
| static ParsedLex* LPLRemoveHead | ( | ListParsedLex * | list | ) | [static] |
Definition at line 85 of file ts_parse.c.
References ListParsedLex::head, ParsedLex::next, NULL, and ListParsedLex::tail.
Referenced by RemoveHead().
| static void moveToWaste | ( | LexizeData * | ld, | |
| ParsedLex * | stop | |||
| ) | [static] |
Definition at line 141 of file ts_parse.c.
References LexizeData::curSub, ListParsedLex::head, ParsedLex::next, RemoveHead(), and LexizeData::towork.
Referenced by LexizeExec().
| void parsetext | ( | Oid | cfgId, | |
| ParsedText * | prs, | |||
| char * | buf, | |||
| int | buflen | |||
| ) |
Definition at line 358 of file ts_parse.c.
References ParsedWord::alen, ParsedText::curwords, DatumGetInt32, DatumGetPointer, ereport, errcode(), errdetail(), errmsg(), ERROR, ParsedWord::flags, TSLexeme::flags, FunctionCall1, FunctionCall2, FunctionCall3, Int32GetDatum, ParsedWord::len, ParsedText::lenwords, TSLexeme::lexeme, LexizeAddLemm(), LexizeExec(), LexizeInit(), LIMITPOS, lookup_ts_config_cache(), lookup_ts_parser_cache(), MAXSTRLEN, NOTICE, NULL, TSLexeme::nvariant, ParsedWord::nvariant, pfree(), PointerGetDatum, ParsedWord::pos, ParsedText::pos, TSParserCacheEntry::prsend, TSConfigCacheEntry::prsId, TSParserCacheEntry::prsstart, TSParserCacheEntry::prstoken, repalloc(), TSL_ADDPOS, ParsedWord::word, and ParsedText::words.
Referenced by pushval_morph(), to_tsvector_byid(), and tsvector_update_trigger().
{
int type,
lenlemm;
char *lemm = NULL;
LexizeData ldata;
TSLexeme *norms;
TSConfigCacheEntry *cfg;
TSParserCacheEntry *prsobj;
void *prsdata;
cfg = lookup_ts_config_cache(cfgId);
prsobj = lookup_ts_parser_cache(cfg->prsId);
prsdata = (void *) DatumGetPointer(FunctionCall2(&prsobj->prsstart,
PointerGetDatum(buf),
Int32GetDatum(buflen)));
LexizeInit(&ldata, cfg);
do
{
type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
PointerGetDatum(prsdata),
PointerGetDatum(&lemm),
PointerGetDatum(&lenlemm)));
if (type > 0 && lenlemm >= MAXSTRLEN)
{
#ifdef IGNORE_LONGLEXEME
ereport(NOTICE,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("word is too long to be indexed"),
errdetail("Words longer than %d characters are ignored.",
MAXSTRLEN)));
continue;
#else
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("word is too long to be indexed"),
errdetail("Words longer than %d characters are ignored.",
MAXSTRLEN)));
#endif
}
LexizeAddLemm(&ldata, type, lemm, lenlemm);
while ((norms = LexizeExec(&ldata, NULL)) != NULL)
{
TSLexeme *ptr = norms;
prs->pos++; /* set pos */
while (ptr->lexeme)
{
if (prs->curwords == prs->lenwords)
{
prs->lenwords *= 2;
prs->words = (ParsedWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(ParsedWord));
}
if (ptr->flags & TSL_ADDPOS)
prs->pos++;
prs->words[prs->curwords].len = strlen(ptr->lexeme);
prs->words[prs->curwords].word = ptr->lexeme;
prs->words[prs->curwords].nvariant = ptr->nvariant;
prs->words[prs->curwords].flags = ptr->flags & TSL_PREFIX;
prs->words[prs->curwords].alen = 0;
prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
ptr++;
prs->curwords++;
}
pfree(norms);
}
} while (type > 0);
FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
}
| static void RemoveHead | ( | LexizeData * | ld | ) | [static] |
Definition at line 111 of file ts_parse.c.
References LPLAddTail(), LPLRemoveHead(), LexizeData::posDict, LexizeData::towork, and LexizeData::waste.
Referenced by LexizeExec(), and moveToWaste().
{
LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));
ld->posDict = 0;
}
| static void setCorrLex | ( | LexizeData * | ld, | |
| ParsedLex ** | correspondLexem | |||
| ) | [static] |
Definition at line 119 of file ts_parse.c.
References ListParsedLex::head, ParsedLex::next, pfree(), ListParsedLex::tail, and LexizeData::waste.
Referenced by LexizeExec().
| static void setNewTmpRes | ( | LexizeData * | ld, | |
| ParsedLex * | lex, | |||
| TSLexeme * | res | |||
| ) | [static] |
Definition at line 157 of file ts_parse.c.
References LexizeData::lastRes, TSLexeme::lexeme, pfree(), and LexizeData::tmpRes.
Referenced by LexizeExec().
1.7.1