#include "postgres.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
Go to the source code of this file.
Data Structures | |
struct | TSVectorParseStateData |
Defines | |
#define | RESIZEPRSBUF |
#define | ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) ) |
#define | RETURN_TOKEN |
#define | WAITWORD 1 |
#define | WAITENDWORD 2 |
#define | WAITNEXTCHAR 3 |
#define | WAITENDCMPLX 4 |
#define | WAITPOSINFO 5 |
#define | INPOSINFO 6 |
#define | WAITPOSDELIM 7 |
#define | WAITCHARCMPLX 8 |
#define | PRSSYNTAXERROR prssyntaxerror(state) |
Functions | |
TSVectorParseState | init_tsvector_parser (char *input, bool oprisdelim, bool is_tsquery) |
void | reset_tsvector_parser (TSVectorParseState state, char *input) |
void | close_tsvector_parser (TSVectorParseState state) |
static void | prssyntaxerror (TSVectorParseState state) |
bool | gettoken_tsvector (TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr) |
#define INPOSINFO 6 |
Definition at line 121 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
#define ISOPERATOR | ( | x | ) | ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) ) |
Definition at line 92 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
#define PRSSYNTAXERROR prssyntaxerror(state) |
Definition at line 125 of file tsvector_parser.c.
#define RESIZEPRSBUF |
#define RETURN_TOKEN |
do { \ if (pos_ptr != NULL) \ { \ *pos_ptr = pos; \ *poslen = npos; \ } \ else if (pos != NULL) \ pfree(pos); \ \ if (strval != NULL) \ *strval = state->word; \ if (lenval != NULL) \ *lenval = curpos - state->word; \ if (endptr != NULL) \ *endptr = state->prsbuf; \ return true; \ } while(0)
Definition at line 95 of file tsvector_parser.c.
#define WAITCHARCMPLX 8 |
Definition at line 123 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
#define WAITENDCMPLX 4 |
Definition at line 119 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
#define WAITENDWORD 2 |
Definition at line 117 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
#define WAITNEXTCHAR 3 |
Definition at line 118 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
#define WAITPOSDELIM 7 |
Definition at line 122 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
#define WAITPOSINFO 5 |
Definition at line 120 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
#define WAITWORD 1 |
Definition at line 116 of file tsvector_parser.c.
Referenced by gettoken_tsvector().
void close_tsvector_parser | ( | TSVectorParseState | state | ) |
Definition at line 74 of file tsvector_parser.c.
References pfree(), and TSVectorParseStateData::word.
Referenced by parse_tsquery(), and tsvectorin().
bool gettoken_tsvector | ( | TSVectorParseState | state, | |
char ** | strval, | |||
int * | lenval, | |||
WordEntryPos ** | pos_ptr, | |||
int * | poslen, | |||
char ** | endptr | |||
) |
Definition at line 155 of file tsvector_parser.c.
References Assert, TSVectorParseStateData::bufstart, COPYCHAR, elog, ereport, errcode(), errmsg(), ERROR, INPOSINFO, ISOPERATOR, LIMITPOS, TSVectorParseStateData::oprisdelim, palloc(), pg_mblen(), TSVectorParseStateData::prsbuf, repalloc(), t_isdigit, t_iseq, t_isspace, WAITCHARCMPLX, WAITENDCMPLX, WAITENDWORD, WAITNEXTCHAR, WAITPOSDELIM, WAITPOSINFO, WAITWORD, WEP_GETPOS, WEP_GETWEIGHT, WEP_SETPOS, WEP_SETWEIGHT, and TSVectorParseStateData::word.
Referenced by gettoken_query(), and tsvectorin().
{ int oldstate = 0; char *curpos = state->word; int statecode = WAITWORD; /* * pos is for collecting the comma delimited list of positions followed by * the actual token. */ WordEntryPos *pos = NULL; int npos = 0; /* elements of pos used */ int posalen = 0; /* allocated size of pos */ while (1) { if (statecode == WAITWORD) { if (*(state->prsbuf) == '\0') return false; else if (t_iseq(state->prsbuf, '\'')) statecode = WAITENDCMPLX; else if (t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDWORD; } else if (state->oprisdelim && ISOPERATOR(state->prsbuf)) PRSSYNTAXERROR; else if (!t_isspace(state->prsbuf)) { COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); statecode = WAITENDWORD; } } else if (statecode == WAITNEXTCHAR) { if (*(state->prsbuf) == '\0') ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("there is no escaped character: \"%s\"", state->bufstart))); else { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); Assert(oldstate != 0); statecode = oldstate; } } else if (statecode == WAITENDWORD) { if (t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDWORD; } else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' || (state->oprisdelim && ISOPERATOR(state->prsbuf))) { RESIZEPRSBUF; if (curpos == state->word) PRSSYNTAXERROR; *(curpos) = '\0'; RETURN_TOKEN; } else if (t_iseq(state->prsbuf, ':')) { if (curpos == state->word) PRSSYNTAXERROR; *(curpos) = '\0'; if (state->oprisdelim) RETURN_TOKEN; else statecode = INPOSINFO; } else { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); } } else if (statecode == WAITENDCMPLX) { if (t_iseq(state->prsbuf, '\'')) { statecode = WAITCHARCMPLX; } else if (t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDCMPLX; } else if (*(state->prsbuf) == '\0') PRSSYNTAXERROR; else { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); } } else if (statecode == WAITCHARCMPLX) { if (t_iseq(state->prsbuf, '\'')) { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); statecode = WAITENDCMPLX; } else { RESIZEPRSBUF; *(curpos) = '\0'; if (curpos == state->word) PRSSYNTAXERROR; if (state->oprisdelim) { /* state->prsbuf+=pg_mblen(state->prsbuf); */ RETURN_TOKEN; } else statecode = WAITPOSINFO; continue; /* recheck current character */ } } else if (statecode == WAITPOSINFO) { if (t_iseq(state->prsbuf, ':')) statecode = INPOSINFO; else RETURN_TOKEN; } else if (statecode == INPOSINFO) { if (t_isdigit(state->prsbuf)) { if (posalen == 0) { posalen = 4; pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen); npos = 0; } else if (npos + 1 >= posalen) { posalen *= 2; pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen); } npos++; WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf))); /* we cannot get here in tsquery, so no need for 2 errmsgs */ if (WEP_GETPOS(pos[npos - 1]) == 0) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("wrong position info in tsvector: \"%s\"", state->bufstart))); WEP_SETWEIGHT(pos[npos - 1], 0); statecode = WAITPOSDELIM; } else PRSSYNTAXERROR; } else if (statecode == WAITPOSDELIM) { if (t_iseq(state->prsbuf, ',')) statecode = INPOSINFO; else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*')) { if (WEP_GETWEIGHT(pos[npos - 1])) PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 3); } else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B')) { if (WEP_GETWEIGHT(pos[npos - 1])) PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 2); } else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C')) { if (WEP_GETWEIGHT(pos[npos - 1])) PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 1); } else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D')) { if (WEP_GETWEIGHT(pos[npos - 1])) PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 0); } else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0') RETURN_TOKEN; else if (!t_isdigit(state->prsbuf)) PRSSYNTAXERROR; } else /* internal error */ elog(ERROR, "unrecognized state in gettoken_tsvector: %d", statecode); /* get next char */ state->prsbuf += pg_mblen(state->prsbuf); } }
TSVectorParseState init_tsvector_parser | ( | char * | input, | |
bool | oprisdelim, | |||
bool | is_tsquery | |||
) |
Definition at line 45 of file tsvector_parser.c.
References TSVectorParseStateData::bufstart, TSVectorParseStateData::eml, TSVectorParseStateData::is_tsquery, TSVectorParseStateData::len, TSVectorParseStateData::oprisdelim, palloc(), pg_database_encoding_max_length(), TSVectorParseStateData::prsbuf, and TSVectorParseStateData::word.
Referenced by parse_tsquery(), and tsvectorin().
{ TSVectorParseState state; state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData)); state->prsbuf = input; state->bufstart = input; state->len = 32; state->word = (char *) palloc(state->len); state->eml = pg_database_encoding_max_length(); state->oprisdelim = oprisdelim; state->is_tsquery = is_tsquery; return state; }
static void prssyntaxerror | ( | TSVectorParseState | state | ) | [static] |
Definition at line 128 of file tsvector_parser.c.
References TSVectorParseStateData::bufstart, ereport, errcode(), errmsg(), ERROR, and TSVectorParseStateData::is_tsquery.
void reset_tsvector_parser | ( | TSVectorParseState | state, | |
char * | input | |||
) |
Definition at line 65 of file tsvector_parser.c.
References TSVectorParseStateData::prsbuf.
Referenced by gettoken_query().
{ state->prsbuf = input; }