Header And Logo

PostgreSQL
| The world's most advanced open source database.

Functions

to_tsany.c File Reference

#include "postgres.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
Include dependency graph for to_tsany.c:

Go to the source code of this file.

Functions

Datum get_current_ts_config (PG_FUNCTION_ARGS)
static int compareWORD (const void *a, const void *b)
static int uniqueWORD (ParsedWord *a, int32 l)
TSVector make_tsvector (ParsedText *prs)
Datum to_tsvector_byid (PG_FUNCTION_ARGS)
Datum to_tsvector (PG_FUNCTION_ARGS)
static void pushval_morph (Datum opaque, TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
Datum to_tsquery_byid (PG_FUNCTION_ARGS)
Datum to_tsquery (PG_FUNCTION_ARGS)
Datum plainto_tsquery_byid (PG_FUNCTION_ARGS)
Datum plainto_tsquery (PG_FUNCTION_ARGS)

Function Documentation

static int compareWORD ( const void *  a,
const void *  b 
) [static]

Definition at line 31 of file to_tsany.c.

References tsCompareString(), and word().

Referenced by uniqueWORD().

{
    int         res;

    res = tsCompareString(
               ((const ParsedWord *) a)->word, ((const ParsedWord *) a)->len,
               ((const ParsedWord *) b)->word, ((const ParsedWord *) b)->len,
                          false);

    if (res == 0)
    {
        if (((const ParsedWord *) a)->pos.pos == ((const ParsedWord *) b)->pos.pos)
            return 0;

        res = (((const ParsedWord *) a)->pos.pos > ((const ParsedWord *) b)->pos.pos) ? 1 : -1;
    }

    return res;
}

Datum get_current_ts_config ( PG_FUNCTION_ARGS   ) 

Definition at line 22 of file to_tsany.c.

References getTSCurrentConfig(), and PG_RETURN_OID.

TSVector make_tsvector ( ParsedText prs  ) 

Definition at line 138 of file to_tsany.c.

References ParsedWord::alen, ARRPTR, CALCDATASIZE, ParsedText::curwords, elog, ereport, errcode(), errmsg(), ERROR, WordEntry::haspos, i, WordEntry::len, ParsedWord::len, MAXSTRPOS, palloc0(), pfree(), WordEntry::pos, ParsedWord::pos, POSDATAPTR, SET_VARSIZE, SHORTALIGN, TSVectorData::size, STRPTR, uniqueWORD(), WEP_SETPOS, WEP_SETWEIGHT, ParsedWord::word, and ParsedText::words.

Referenced by to_tsvector_byid(), and tsvector_update_trigger().

{
    int         i,
                j,
                lenstr = 0,
                totallen;
    TSVector    in;
    WordEntry  *ptr;
    char       *str;
    int         stroff;

    prs->curwords = uniqueWORD(prs->words, prs->curwords);
    for (i = 0; i < prs->curwords; i++)
    {
        lenstr += prs->words[i].len;
        if (prs->words[i].alen)
        {
            lenstr = SHORTALIGN(lenstr);
            lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
        }
    }

    if (lenstr > MAXSTRPOS)
        ereport(ERROR,
                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", lenstr, MAXSTRPOS)));

    totallen = CALCDATASIZE(prs->curwords, lenstr);
    in = (TSVector) palloc0(totallen);
    SET_VARSIZE(in, totallen);
    in->size = prs->curwords;

    ptr = ARRPTR(in);
    str = STRPTR(in);
    stroff = 0;
    for (i = 0; i < prs->curwords; i++)
    {
        ptr->len = prs->words[i].len;
        ptr->pos = stroff;
        memcpy(str + stroff, prs->words[i].word, prs->words[i].len);
        stroff += prs->words[i].len;
        pfree(prs->words[i].word);
        if (prs->words[i].alen)
        {
            int         k = prs->words[i].pos.apos[0];
            WordEntryPos *wptr;

            if (k > 0xFFFF)
                elog(ERROR, "positions array too long");

            ptr->haspos = 1;
            stroff = SHORTALIGN(stroff);
            *(uint16 *) (str + stroff) = (uint16) k;
            wptr = POSDATAPTR(in, ptr);
            for (j = 0; j < k; j++)
            {
                WEP_SETWEIGHT(wptr[j], 0);
                WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
            }
            stroff += sizeof(uint16) + k * sizeof(WordEntryPos);
            pfree(prs->words[i].pos.apos);
        }
        else
            ptr->haspos = 0;
        ptr++;
    }
    pfree(prs->words);
    return in;
}

Datum plainto_tsquery ( PG_FUNCTION_ARGS   ) 
Datum plainto_tsquery_byid ( PG_FUNCTION_ARGS   ) 

Definition at line 386 of file to_tsany.c.

References Assert, clean_fakeval(), COMPUTESIZE, GETOPERAND, GETQUERY, HDRSIZETQ, ObjectIdGetDatum, parse_tsquery(), pfree(), PG_GETARG_OID, PG_GETARG_TEXT_P, PG_RETURN_POINTER, PG_RETURN_TSQUERY, pushval_morph(), SET_VARSIZE, TSQueryData::size, text_to_cstring(), and VARSIZE.

Referenced by plainto_tsquery(), and tsa_plainto_tsquery_name().

{
    Oid         cfgid = PG_GETARG_OID(0);
    text       *in = PG_GETARG_TEXT_P(1);
    TSQuery     query;
    QueryItem  *res;
    int32       len;

    query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), true);

    if (query->size == 0)
        PG_RETURN_TSQUERY(query);

    res = clean_fakeval(GETQUERY(query), &len);
    if (!res)
    {
        SET_VARSIZE(query, HDRSIZETQ);
        query->size = 0;
        PG_RETURN_POINTER(query);
    }
    memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));

    if (len != query->size)
    {
        char       *oldoperand = GETOPERAND(query);
        int32       lenoperand = VARSIZE(query) - (oldoperand - (char *) query);

        Assert(len < query->size);

        query->size = len;
        memcpy((void *) GETOPERAND(query), oldoperand, lenoperand);
        SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
    }

    pfree(res);
    PG_RETURN_POINTER(query);
}

static void pushval_morph ( Datum  opaque,
TSQueryParserState  state,
char *  strval,
int  lenval,
int16  weight,
bool  prefix 
) [static]

Definition at line 268 of file to_tsany.c.

References ParsedText::curwords, DatumGetObjectId, ParsedWord::flags, ParsedWord::len, ParsedText::lenwords, ParsedWord::nvariant, OP_AND, OP_OR, palloc(), parsetext(), pfree(), ParsedWord::pos, ParsedText::pos, pushOperator(), pushStop(), pushValue(), TSL_PREFIX, ParsedWord::word, and ParsedText::words.

Referenced by plainto_tsquery_byid(), and to_tsquery_byid().

{
    int32       count = 0;
    ParsedText  prs;
    uint32      variant,
                pos,
                cntvar = 0,
                cntpos = 0,
                cnt = 0;
    Oid         cfg_id = DatumGetObjectId(opaque);      /* the input is actually
                                                         * an Oid, not a pointer */

    prs.lenwords = 4;
    prs.curwords = 0;
    prs.pos = 0;
    prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);

    parsetext(cfg_id, &prs, strval, lenval);

    if (prs.curwords > 0)
    {

        while (count < prs.curwords)
        {
            pos = prs.words[count].pos.pos;
            cntvar = 0;
            while (count < prs.curwords && pos == prs.words[count].pos.pos)
            {
                variant = prs.words[count].nvariant;

                cnt = 0;
                while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
                {

                    pushValue(state, prs.words[count].word, prs.words[count].len, weight,
                              ((prs.words[count].flags & TSL_PREFIX) || prefix) ? true : false);
                    pfree(prs.words[count].word);
                    if (cnt)
                        pushOperator(state, OP_AND);
                    cnt++;
                    count++;
                }

                if (cntvar)
                    pushOperator(state, OP_OR);
                cntvar++;
            }

            if (cntpos)
                pushOperator(state, OP_AND);

            cntpos++;
        }

        pfree(prs.words);

    }
    else
        pushStop(state);
}

Datum to_tsquery ( PG_FUNCTION_ARGS   ) 
Datum to_tsquery_byid ( PG_FUNCTION_ARGS   ) 

Definition at line 330 of file to_tsany.c.

References Assert, clean_fakeval(), COMPUTESIZE, GETOPERAND, GETQUERY, HDRSIZETQ, memmove, ObjectIdGetDatum, parse_tsquery(), pfree(), PG_GETARG_OID, PG_GETARG_TEXT_P, PG_RETURN_POINTER, PG_RETURN_TSQUERY, pushval_morph(), SET_VARSIZE, TSQueryData::size, text_to_cstring(), and VARSIZE.

Referenced by to_tsquery(), and tsa_to_tsquery_name().

{
    Oid         cfgid = PG_GETARG_OID(0);
    text       *in = PG_GETARG_TEXT_P(1);
    TSQuery     query;
    QueryItem  *res;
    int32       len;

    query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), false);

    if (query->size == 0)
        PG_RETURN_TSQUERY(query);

    /* clean out any stopword placeholders from the tree */
    res = clean_fakeval(GETQUERY(query), &len);
    if (!res)
    {
        SET_VARSIZE(query, HDRSIZETQ);
        query->size = 0;
        PG_RETURN_POINTER(query);
    }
    memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));

    /*
     * Removing the stopword placeholders might've resulted in fewer
     * QueryItems. If so, move the operands up accordingly.
     */
    if (len != query->size)
    {
        char       *oldoperand = GETOPERAND(query);
        int32       lenoperand = VARSIZE(query) - (oldoperand - (char *) query);

        Assert(len < query->size);

        query->size = len;
        memmove((void *) GETOPERAND(query), oldoperand, VARSIZE(query) - (oldoperand - (char *) query));
        SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
    }

    pfree(res);
    PG_RETURN_TSQUERY(query);
}

Datum to_tsvector ( PG_FUNCTION_ARGS   ) 
Datum to_tsvector_byid ( PG_FUNCTION_ARGS   ) 

Definition at line 209 of file to_tsany.c.

References CALCDATASIZE, ParsedText::curwords, ParsedText::lenwords, make_tsvector(), palloc(), parsetext(), pfree(), PG_FREE_IF_COPY, PG_GETARG_OID, PG_GETARG_TEXT_P, PG_RETURN_POINTER, ParsedText::pos, SET_VARSIZE, TSVectorData::size, VARDATA, VARHDRSZ, VARSIZE, and ParsedText::words.

Referenced by to_tsvector(), and tsa_to_tsvector_name().

{
    Oid         cfgId = PG_GETARG_OID(0);
    text       *in = PG_GETARG_TEXT_P(1);
    ParsedText  prs;
    TSVector    out;

    prs.lenwords = (VARSIZE(in) - VARHDRSZ) / 6;        /* just estimation of
                                                         * word's number */
    if (prs.lenwords == 0)
        prs.lenwords = 2;
    prs.curwords = 0;
    prs.pos = 0;
    prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);

    parsetext(cfgId, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
    PG_FREE_IF_COPY(in, 1);

    if (prs.curwords)
        out = make_tsvector(&prs);
    else
    {
        pfree(prs.words);
        out = palloc(CALCDATASIZE(0, 0));
        SET_VARSIZE(out, CALCDATASIZE(0, 0));
        out->size = 0;
    }

    PG_RETURN_POINTER(out);
}

static int uniqueWORD ( ParsedWord a,
int32  l 
) [static]

Definition at line 52 of file to_tsany.c.

References ParsedWord::alen, compareWORD(), ParsedWord::len, LIMITPOS, MAXENTRYPOS, MAXNUMPOS, palloc(), pfree(), ParsedWord::pos, qsort, repalloc(), and ParsedWord::word.

Referenced by make_tsvector().

{
    ParsedWord *ptr,
               *res;
    int         tmppos;

    if (l == 1)
    {
        tmppos = LIMITPOS(a->pos.pos);
        a->alen = 2;
        a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
        a->pos.apos[0] = 1;
        a->pos.apos[1] = tmppos;
        return l;
    }

    res = a;
    ptr = a + 1;

    /*
     * Sort words with its positions
     */
    qsort((void *) a, l, sizeof(ParsedWord), compareWORD);

    /*
     * Initialize first word and its first position
     */
    tmppos = LIMITPOS(a->pos.pos);
    a->alen = 2;
    a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
    a->pos.apos[0] = 1;
    a->pos.apos[1] = tmppos;

    /*
     * Summarize position information for each word
     */
    while (ptr - a < l)
    {
        if (!(ptr->len == res->len &&
              strncmp(ptr->word, res->word, res->len) == 0))
        {
            /*
             * Got a new word, so put it in result
             */
            res++;
            res->len = ptr->len;
            res->word = ptr->word;
            tmppos = LIMITPOS(ptr->pos.pos);
            res->alen = 2;
            res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
            res->pos.apos[0] = 1;
            res->pos.apos[1] = tmppos;
        }
        else
        {
            /*
             * The word already exists, so adjust position information. But
             * before we should check size of position's array, max allowed
             * value for position and uniqueness of position
             */
            pfree(ptr->word);
            if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1 &&
                res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
            {
                if (res->pos.apos[0] + 1 >= res->alen)
                {
                    res->alen *= 2;
                    res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
                }
                if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
                {
                    res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
                    res->pos.apos[0]++;
                }
            }
        }
        ptr++;
    }

    return res + 1 - a;
}