#include "postgres.h"#include "tsearch/ts_locale.h"#include "tsearch/ts_utils.h"
Go to the source code of this file.
Data Structures | |
| struct | TSVectorParseStateData |
Defines | |
| #define | RESIZEPRSBUF |
| #define | ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) ) |
| #define | RETURN_TOKEN |
| #define | WAITWORD 1 |
| #define | WAITENDWORD 2 |
| #define | WAITNEXTCHAR 3 |
| #define | WAITENDCMPLX 4 |
| #define | WAITPOSINFO 5 |
| #define | INPOSINFO 6 |
| #define | WAITPOSDELIM 7 |
| #define | WAITCHARCMPLX 8 |
| #define | PRSSYNTAXERROR prssyntaxerror(state) |
Functions | |
| TSVectorParseState | init_tsvector_parser (char *input, bool oprisdelim, bool is_tsquery) |
| void | reset_tsvector_parser (TSVectorParseState state, char *input) |
| void | close_tsvector_parser (TSVectorParseState state) |
| static void | prssyntaxerror (TSVectorParseState state) |
| bool | gettoken_tsvector (TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr) |
| #define INPOSINFO 6 |
Definition at line 121 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
| #define ISOPERATOR | ( | x | ) | ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) ) |
Definition at line 92 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
| #define PRSSYNTAXERROR prssyntaxerror(state) |
Definition at line 125 of file tsvector_parser.c.
| #define RESIZEPRSBUF |
| #define RETURN_TOKEN |
do { \ if (pos_ptr != NULL) \ { \ *pos_ptr = pos; \ *poslen = npos; \ } \ else if (pos != NULL) \ pfree(pos); \ \ if (strval != NULL) \ *strval = state->word; \ if (lenval != NULL) \ *lenval = curpos - state->word; \ if (endptr != NULL) \ *endptr = state->prsbuf; \ return true; \ } while(0)
Definition at line 95 of file tsvector_parser.c.
| #define WAITCHARCMPLX 8 |
Definition at line 123 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
| #define WAITENDCMPLX 4 |
Definition at line 119 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
| #define WAITENDWORD 2 |
Definition at line 117 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
| #define WAITNEXTCHAR 3 |
Definition at line 118 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
| #define WAITPOSDELIM 7 |
Definition at line 122 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
| #define WAITPOSINFO 5 |
Definition at line 120 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
| #define WAITWORD 1 |
Definition at line 116 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
| void close_tsvector_parser | ( | TSVectorParseState | state | ) |
Definition at line 74 of file tsvector_parser.c.
References pfree(), and TSVectorParseStateData::word.
Referenced by parse_tsquery(), and tsvectorin().
| bool gettoken_tsvector | ( | TSVectorParseState | state, | |
| char ** | strval, | |||
| int * | lenval, | |||
| WordEntryPos ** | pos_ptr, | |||
| int * | poslen, | |||
| char ** | endptr | |||
| ) |
Definition at line 155 of file tsvector_parser.c.
References Assert, TSVectorParseStateData::bufstart, COPYCHAR, elog, ereport, errcode(), errmsg(), ERROR, INPOSINFO, ISOPERATOR, LIMITPOS, TSVectorParseStateData::oprisdelim, palloc(), pg_mblen(), TSVectorParseStateData::prsbuf, repalloc(), t_isdigit, t_iseq, t_isspace, WAITCHARCMPLX, WAITENDCMPLX, WAITENDWORD, WAITNEXTCHAR, WAITPOSDELIM, WAITPOSINFO, WAITWORD, WEP_GETPOS, WEP_GETWEIGHT, WEP_SETPOS, WEP_SETWEIGHT, and TSVectorParseStateData::word.
Referenced by gettoken_query(), and tsvectorin().
{
int oldstate = 0;
char *curpos = state->word;
int statecode = WAITWORD;
/*
* pos is for collecting the comma delimited list of positions followed by
* the actual token.
*/
WordEntryPos *pos = NULL;
int npos = 0; /* elements of pos used */
int posalen = 0; /* allocated size of pos */
while (1)
{
if (statecode == WAITWORD)
{
if (*(state->prsbuf) == '\0')
return false;
else if (t_iseq(state->prsbuf, '\''))
statecode = WAITENDCMPLX;
else if (t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
PRSSYNTAXERROR;
else if (!t_isspace(state->prsbuf))
{
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
statecode = WAITENDWORD;
}
}
else if (statecode == WAITNEXTCHAR)
{
if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("there is no escaped character: \"%s\"",
state->bufstart)));
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
Assert(oldstate != 0);
statecode = oldstate;
}
}
else if (statecode == WAITENDWORD)
{
if (t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
(state->oprisdelim && ISOPERATOR(state->prsbuf)))
{
RESIZEPRSBUF;
if (curpos == state->word)
PRSSYNTAXERROR;
*(curpos) = '\0';
RETURN_TOKEN;
}
else if (t_iseq(state->prsbuf, ':'))
{
if (curpos == state->word)
PRSSYNTAXERROR;
*(curpos) = '\0';
if (state->oprisdelim)
RETURN_TOKEN;
else
statecode = INPOSINFO;
}
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
}
}
else if (statecode == WAITENDCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
statecode = WAITCHARCMPLX;
}
else if (t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDCMPLX;
}
else if (*(state->prsbuf) == '\0')
PRSSYNTAXERROR;
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
}
}
else if (statecode == WAITCHARCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
statecode = WAITENDCMPLX;
}
else
{
RESIZEPRSBUF;
*(curpos) = '\0';
if (curpos == state->word)
PRSSYNTAXERROR;
if (state->oprisdelim)
{
/* state->prsbuf+=pg_mblen(state->prsbuf); */
RETURN_TOKEN;
}
else
statecode = WAITPOSINFO;
continue; /* recheck current character */
}
}
else if (statecode == WAITPOSINFO)
{
if (t_iseq(state->prsbuf, ':'))
statecode = INPOSINFO;
else
RETURN_TOKEN;
}
else if (statecode == INPOSINFO)
{
if (t_isdigit(state->prsbuf))
{
if (posalen == 0)
{
posalen = 4;
pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen);
npos = 0;
}
else if (npos + 1 >= posalen)
{
posalen *= 2;
pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen);
}
npos++;
WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
/* we cannot get here in tsquery, so no need for 2 errmsgs */
if (WEP_GETPOS(pos[npos - 1]) == 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("wrong position info in tsvector: \"%s\"",
state->bufstart)));
WEP_SETWEIGHT(pos[npos - 1], 0);
statecode = WAITPOSDELIM;
}
else
PRSSYNTAXERROR;
}
else if (statecode == WAITPOSDELIM)
{
if (t_iseq(state->prsbuf, ','))
statecode = INPOSINFO;
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
PRSSYNTAXERROR;
WEP_SETWEIGHT(pos[npos - 1], 3);
}
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
PRSSYNTAXERROR;
WEP_SETWEIGHT(pos[npos - 1], 2);
}
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
PRSSYNTAXERROR;
WEP_SETWEIGHT(pos[npos - 1], 1);
}
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
PRSSYNTAXERROR;
WEP_SETWEIGHT(pos[npos - 1], 0);
}
else if (t_isspace(state->prsbuf) ||
*(state->prsbuf) == '\0')
RETURN_TOKEN;
else if (!t_isdigit(state->prsbuf))
PRSSYNTAXERROR;
}
else /* internal error */
elog(ERROR, "unrecognized state in gettoken_tsvector: %d",
statecode);
/* get next char */
state->prsbuf += pg_mblen(state->prsbuf);
}
}
| TSVectorParseState init_tsvector_parser | ( | char * | input, | |
| bool | oprisdelim, | |||
| bool | is_tsquery | |||
| ) |
Definition at line 45 of file tsvector_parser.c.
References TSVectorParseStateData::bufstart, TSVectorParseStateData::eml, TSVectorParseStateData::is_tsquery, TSVectorParseStateData::len, TSVectorParseStateData::oprisdelim, palloc(), pg_database_encoding_max_length(), TSVectorParseStateData::prsbuf, and TSVectorParseStateData::word.
Referenced by parse_tsquery(), and tsvectorin().
{
TSVectorParseState state;
state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
state->prsbuf = input;
state->bufstart = input;
state->len = 32;
state->word = (char *) palloc(state->len);
state->eml = pg_database_encoding_max_length();
state->oprisdelim = oprisdelim;
state->is_tsquery = is_tsquery;
return state;
}
| static void prssyntaxerror | ( | TSVectorParseState | state | ) | [static] |
Definition at line 128 of file tsvector_parser.c.
References TSVectorParseStateData::bufstart, ereport, errcode(), errmsg(), ERROR, and TSVectorParseStateData::is_tsquery.
| void reset_tsvector_parser | ( | TSVectorParseState | state, | |
| char * | input | |||
| ) |
Definition at line 65 of file tsvector_parser.c.
References TSVectorParseStateData::prsbuf.
Referenced by gettoken_query().
{
state->prsbuf = input;
}
1.7.1