Header And Logo

PostgreSQL
| The world's most advanced open source database.

Data Structures | Defines | Functions

tsvector_parser.c File Reference

#include "postgres.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
Include dependency graph for tsvector_parser.c:

Go to the source code of this file.

Data Structures

struct  TSVectorParseStateData

Defines

#define RESIZEPRSBUF
#define ISOPERATOR(x)   ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
#define RETURN_TOKEN
#define WAITWORD   1
#define WAITENDWORD   2
#define WAITNEXTCHAR   3
#define WAITENDCMPLX   4
#define WAITPOSINFO   5
#define INPOSINFO   6
#define WAITPOSDELIM   7
#define WAITCHARCMPLX   8
#define PRSSYNTAXERROR   prssyntaxerror(state)

Functions

TSVectorParseState init_tsvector_parser (char *input, bool oprisdelim, bool is_tsquery)
void reset_tsvector_parser (TSVectorParseState state, char *input)
void close_tsvector_parser (TSVectorParseState state)
static void prssyntaxerror (TSVectorParseState state)
bool gettoken_tsvector (TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr)

Define Documentation

#define INPOSINFO   6

Definition at line 121 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define ISOPERATOR (   x  )     ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )

Definition at line 92 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define PRSSYNTAXERROR   prssyntaxerror(state)

Definition at line 125 of file tsvector_parser.c.

#define RESIZEPRSBUF
Value:
do { \
    int clen = curpos - state->word; \
    if ( clen + state->eml >= state->len ) \
    { \
        state->len *= 2; \
        state->word = (char *) repalloc(state->word, state->len); \
        curpos = state->word + clen; \
    } \
} while (0)

Definition at line 81 of file tsvector_parser.c.

#define RETURN_TOKEN
Value:
do { \
    if (pos_ptr != NULL) \
    { \
        *pos_ptr = pos; \
        *poslen = npos; \
    } \
    else if (pos != NULL) \
        pfree(pos); \
    \
    if (strval != NULL) \
        *strval = state->word; \
    if (lenval != NULL) \
        *lenval = curpos - state->word; \
    if (endptr != NULL) \
        *endptr = state->prsbuf; \
    return true; \
} while(0)

Definition at line 95 of file tsvector_parser.c.

#define WAITCHARCMPLX   8

Definition at line 123 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define WAITENDCMPLX   4

Definition at line 119 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define WAITENDWORD   2

Definition at line 117 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define WAITNEXTCHAR   3

Definition at line 118 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define WAITPOSDELIM   7

Definition at line 122 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define WAITPOSINFO   5

Definition at line 120 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define WAITWORD   1

Definition at line 116 of file tsvector_parser.c.

Referenced by gettoken_tsvector().


Function Documentation

void close_tsvector_parser ( TSVectorParseState  state  ) 

Definition at line 74 of file tsvector_parser.c.

References pfree(), and TSVectorParseStateData::word.

Referenced by parse_tsquery(), and tsvectorin().

{
    pfree(state->word);
    pfree(state);
}

bool gettoken_tsvector ( TSVectorParseState  state,
char **  strval,
int *  lenval,
WordEntryPos **  pos_ptr,
int *  poslen,
char **  endptr 
)

Definition at line 155 of file tsvector_parser.c.

References Assert, TSVectorParseStateData::bufstart, COPYCHAR, elog, ereport, errcode(), errmsg(), ERROR, INPOSINFO, ISOPERATOR, LIMITPOS, TSVectorParseStateData::oprisdelim, palloc(), pg_mblen(), TSVectorParseStateData::prsbuf, repalloc(), t_isdigit, t_iseq, t_isspace, WAITCHARCMPLX, WAITENDCMPLX, WAITENDWORD, WAITNEXTCHAR, WAITPOSDELIM, WAITPOSINFO, WAITWORD, WEP_GETPOS, WEP_GETWEIGHT, WEP_SETPOS, WEP_SETWEIGHT, and TSVectorParseStateData::word.

Referenced by gettoken_query(), and tsvectorin().

{
    int         oldstate = 0;
    char       *curpos = state->word;
    int         statecode = WAITWORD;

    /*
     * pos is for collecting the comma delimited list of positions followed by
     * the actual token.
     */
    WordEntryPos *pos = NULL;
    int         npos = 0;       /* elements of pos used */
    int         posalen = 0;    /* allocated size of pos */

    while (1)
    {
        if (statecode == WAITWORD)
        {
            if (*(state->prsbuf) == '\0')
                return false;
            else if (t_iseq(state->prsbuf, '\''))
                statecode = WAITENDCMPLX;
            else if (t_iseq(state->prsbuf, '\\'))
            {
                statecode = WAITNEXTCHAR;
                oldstate = WAITENDWORD;
            }
            else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
                PRSSYNTAXERROR;
            else if (!t_isspace(state->prsbuf))
            {
                COPYCHAR(curpos, state->prsbuf);
                curpos += pg_mblen(state->prsbuf);
                statecode = WAITENDWORD;
            }
        }
        else if (statecode == WAITNEXTCHAR)
        {
            if (*(state->prsbuf) == '\0')
                ereport(ERROR,
                        (errcode(ERRCODE_SYNTAX_ERROR),
                         errmsg("there is no escaped character: \"%s\"",
                                state->bufstart)));
            else
            {
                RESIZEPRSBUF;
                COPYCHAR(curpos, state->prsbuf);
                curpos += pg_mblen(state->prsbuf);
                Assert(oldstate != 0);
                statecode = oldstate;
            }
        }
        else if (statecode == WAITENDWORD)
        {
            if (t_iseq(state->prsbuf, '\\'))
            {
                statecode = WAITNEXTCHAR;
                oldstate = WAITENDWORD;
            }
            else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
                     (state->oprisdelim && ISOPERATOR(state->prsbuf)))
            {
                RESIZEPRSBUF;
                if (curpos == state->word)
                    PRSSYNTAXERROR;
                *(curpos) = '\0';
                RETURN_TOKEN;
            }
            else if (t_iseq(state->prsbuf, ':'))
            {
                if (curpos == state->word)
                    PRSSYNTAXERROR;
                *(curpos) = '\0';
                if (state->oprisdelim)
                    RETURN_TOKEN;
                else
                    statecode = INPOSINFO;
            }
            else
            {
                RESIZEPRSBUF;
                COPYCHAR(curpos, state->prsbuf);
                curpos += pg_mblen(state->prsbuf);
            }
        }
        else if (statecode == WAITENDCMPLX)
        {
            if (t_iseq(state->prsbuf, '\''))
            {
                statecode = WAITCHARCMPLX;
            }
            else if (t_iseq(state->prsbuf, '\\'))
            {
                statecode = WAITNEXTCHAR;
                oldstate = WAITENDCMPLX;
            }
            else if (*(state->prsbuf) == '\0')
                PRSSYNTAXERROR;
            else
            {
                RESIZEPRSBUF;
                COPYCHAR(curpos, state->prsbuf);
                curpos += pg_mblen(state->prsbuf);
            }
        }
        else if (statecode == WAITCHARCMPLX)
        {
            if (t_iseq(state->prsbuf, '\''))
            {
                RESIZEPRSBUF;
                COPYCHAR(curpos, state->prsbuf);
                curpos += pg_mblen(state->prsbuf);
                statecode = WAITENDCMPLX;
            }
            else
            {
                RESIZEPRSBUF;
                *(curpos) = '\0';
                if (curpos == state->word)
                    PRSSYNTAXERROR;
                if (state->oprisdelim)
                {
                    /* state->prsbuf+=pg_mblen(state->prsbuf); */
                    RETURN_TOKEN;
                }
                else
                    statecode = WAITPOSINFO;
                continue;       /* recheck current character */
            }
        }
        else if (statecode == WAITPOSINFO)
        {
            if (t_iseq(state->prsbuf, ':'))
                statecode = INPOSINFO;
            else
                RETURN_TOKEN;
        }
        else if (statecode == INPOSINFO)
        {
            if (t_isdigit(state->prsbuf))
            {
                if (posalen == 0)
                {
                    posalen = 4;
                    pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen);
                    npos = 0;
                }
                else if (npos + 1 >= posalen)
                {
                    posalen *= 2;
                    pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen);
                }
                npos++;
                WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
                /* we cannot get here in tsquery, so no need for 2 errmsgs */
                if (WEP_GETPOS(pos[npos - 1]) == 0)
                    ereport(ERROR,
                            (errcode(ERRCODE_SYNTAX_ERROR),
                             errmsg("wrong position info in tsvector: \"%s\"",
                                    state->bufstart)));
                WEP_SETWEIGHT(pos[npos - 1], 0);
                statecode = WAITPOSDELIM;
            }
            else
                PRSSYNTAXERROR;
        }
        else if (statecode == WAITPOSDELIM)
        {
            if (t_iseq(state->prsbuf, ','))
                statecode = INPOSINFO;
            else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
            {
                if (WEP_GETWEIGHT(pos[npos - 1]))
                    PRSSYNTAXERROR;
                WEP_SETWEIGHT(pos[npos - 1], 3);
            }
            else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
            {
                if (WEP_GETWEIGHT(pos[npos - 1]))
                    PRSSYNTAXERROR;
                WEP_SETWEIGHT(pos[npos - 1], 2);
            }
            else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
            {
                if (WEP_GETWEIGHT(pos[npos - 1]))
                    PRSSYNTAXERROR;
                WEP_SETWEIGHT(pos[npos - 1], 1);
            }
            else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
            {
                if (WEP_GETWEIGHT(pos[npos - 1]))
                    PRSSYNTAXERROR;
                WEP_SETWEIGHT(pos[npos - 1], 0);
            }
            else if (t_isspace(state->prsbuf) ||
                     *(state->prsbuf) == '\0')
                RETURN_TOKEN;
            else if (!t_isdigit(state->prsbuf))
                PRSSYNTAXERROR;
        }
        else    /* internal error */
            elog(ERROR, "unrecognized state in gettoken_tsvector: %d",
                 statecode);

        /* get next char */
        state->prsbuf += pg_mblen(state->prsbuf);
    }
}

TSVectorParseState init_tsvector_parser ( char *  input,
bool  oprisdelim,
bool  is_tsquery 
)
static void prssyntaxerror ( TSVectorParseState  state  )  [static]

Definition at line 128 of file tsvector_parser.c.

References TSVectorParseStateData::bufstart, ereport, errcode(), errmsg(), ERROR, and TSVectorParseStateData::is_tsquery.

{
    ereport(ERROR,
            (errcode(ERRCODE_SYNTAX_ERROR),
             state->is_tsquery ?
             errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
             errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
}

void reset_tsvector_parser ( TSVectorParseState  state,
char *  input 
)

Definition at line 65 of file tsvector_parser.c.

References TSVectorParseStateData::prsbuf.

Referenced by gettoken_query().

{
    state->prsbuf = input;
}