Header And Logo

PostgreSQL
| The world's most advanced open source database.

Data Structures | Defines | Typedefs | Functions

dict_thesaurus.c File Reference

#include "postgres.h"
#include "catalog/namespace.h"
#include "commands/defrem.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
Include dependency graph for dict_thesaurus.c:

Go to the source code of this file.

Data Structures

struct  LexemeInfo
struct  TheLexeme
struct  TheSubstitute
struct  DictThesaurus

Defines

#define DT_USEASIS   0x1000
#define TR_WAITLEX   1
#define TR_INLEX   2
#define TR_WAITSUBS   3
#define TR_INSUBS   4

Typedefs

typedef struct LexemeInfo LexemeInfo

Functions

static void newLexeme (DictThesaurus *d, char *b, char *e, uint16 idsubst, uint16 posinsubst)
static void addWrd (DictThesaurus *d, char *b, char *e, uint16 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
static void thesaurusRead (char *filename, DictThesaurus *d)
static TheLexemeaddCompiledLexeme (TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo *src, uint16 tnvariant)
static int cmpLexemeInfo (LexemeInfo *a, LexemeInfo *b)
static int cmpLexeme (const TheLexeme *a, const TheLexeme *b)
static int cmpLexemeQ (const void *a, const void *b)
static int cmpTheLexeme (const void *a, const void *b)
static void compileTheLexeme (DictThesaurus *d)
static void compileTheSubstitute (DictThesaurus *d)
Datum thesaurus_init (PG_FUNCTION_ARGS)
static LexemeInfofindTheLexeme (DictThesaurus *d, char *lexeme)
static bool matchIdSubst (LexemeInfo *stored, uint16 idsubst)
static LexemeInfofindVariant (LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn)
static TSLexemecopyTSLexeme (TheSubstitute *ts)
static TSLexemecheckMatch (DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres)
Datum thesaurus_lexize (PG_FUNCTION_ARGS)

Define Documentation

#define DT_USEASIS   0x1000

Definition at line 27 of file dict_thesaurus.c.

Referenced by compileTheSubstitute().

#define TR_INLEX   2

Definition at line 163 of file dict_thesaurus.c.

Referenced by thesaurusRead().

#define TR_INSUBS   4

Definition at line 165 of file dict_thesaurus.c.

Referenced by thesaurusRead().

#define TR_WAITLEX   1

Definition at line 162 of file dict_thesaurus.c.

Referenced by thesaurusRead().

#define TR_WAITSUBS   3

Definition at line 164 of file dict_thesaurus.c.

Referenced by thesaurusRead().


Typedef Documentation

typedef struct LexemeInfo LexemeInfo

Function Documentation

static TheLexeme* addCompiledLexeme ( TheLexeme newwrds,
int *  nnw,
int *  tnm,
TSLexeme lexeme,
LexemeInfo src,
uint16  tnvariant 
) [static]

Definition at line 298 of file dict_thesaurus.c.

References TheLexeme::entries, LexemeInfo::idsubst, TheLexeme::lexeme, TSLexeme::lexeme, LexemeInfo::nextentry, palloc(), LexemeInfo::posinsubst, pstrdup(), repalloc(), and LexemeInfo::tnvariant.

Referenced by compileTheLexeme().

{
    if (*nnw >= *tnm)
    {
        *tnm *= 2;
        newwrds = (TheLexeme *) repalloc(newwrds, sizeof(TheLexeme) * *tnm);
    }

    newwrds[*nnw].entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));

    if (lexeme && lexeme->lexeme)
    {
        newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme);
        newwrds[*nnw].entries->tnvariant = tnvariant;
    }
    else
    {
        newwrds[*nnw].lexeme = NULL;
        newwrds[*nnw].entries->tnvariant = 1;
    }

    newwrds[*nnw].entries->idsubst = src->idsubst;
    newwrds[*nnw].entries->posinsubst = src->posinsubst;

    newwrds[*nnw].entries->nextentry = NULL;

    (*nnw)++;
    return newwrds;
}

static void addWrd ( DictThesaurus d,
char *  b,
char *  e,
uint16  idsubst,
uint16  nwrd,
uint16  posinsubst,
bool  useasis 
) [static]

Definition at line 105 of file dict_thesaurus.c.

References TSLexeme::flags, TheSubstitute::lastlexeme, TSLexeme::lexeme, DictThesaurus::nsubst, TSLexeme::nvariant, palloc(), repalloc(), TheSubstitute::res, and DictThesaurus::subst.

Referenced by thesaurusRead().

{
    static int  nres = 0;
    static int  ntres = 0;
    TheSubstitute *ptr;

    if (nwrd == 0)
    {
        nres = ntres = 0;

        if (idsubst >= d->nsubst)
        {
            if (d->nsubst == 0)
            {
                d->nsubst = 16;
                d->subst = (TheSubstitute *) palloc(sizeof(TheSubstitute) * d->nsubst);
            }
            else
            {
                d->nsubst *= 2;
                d->subst = (TheSubstitute *) repalloc(d->subst, sizeof(TheSubstitute) * d->nsubst);
            }
        }
    }

    ptr = d->subst + idsubst;

    ptr->lastlexeme = posinsubst - 1;

    if (nres + 1 >= ntres)
    {
        if (ntres == 0)
        {
            ntres = 2;
            ptr->res = (TSLexeme *) palloc(sizeof(TSLexeme) * ntres);
        }
        else
        {
            ntres *= 2;
            ptr->res = (TSLexeme *) repalloc(ptr->res, sizeof(TSLexeme) * ntres);
        }

    }

    ptr->res[nres].lexeme = palloc(e - b + 1);
    memcpy(ptr->res[nres].lexeme, b, e - b);
    ptr->res[nres].lexeme[e - b] = '\0';

    ptr->res[nres].nvariant = nwrd;
    if (useasis)
        ptr->res[nres].flags = DT_USEASIS;
    else
        ptr->res[nres].flags = 0;

    ptr->res[++nres].lexeme = NULL;
}

static TSLexeme* checkMatch ( DictThesaurus d,
LexemeInfo info,
uint16  curpos,
bool moreres 
) [static]

Definition at line 768 of file dict_thesaurus.c.

References Assert, copyTSLexeme(), LexemeInfo::idsubst, TheSubstitute::lastlexeme, LexemeInfo::nextvariant, DictThesaurus::nsubst, and DictThesaurus::subst.

Referenced by thesaurus_lexize().

{
    *moreres = false;
    while (info)
    {
        Assert(info->idsubst < d->nsubst);
        if (info->nextvariant)
            *moreres = true;
        if (d->subst[info->idsubst].lastlexeme == curpos)
            return copyTSLexeme(d->subst + info->idsubst);
        info = info->nextvariant;
    }

    return NULL;
}

static int cmpLexeme ( const TheLexeme a,
const TheLexeme b 
) [static]

Definition at line 351 of file dict_thesaurus.c.

References TheLexeme::lexeme, and NULL.

Referenced by cmpLexemeQ(), cmpTheLexeme(), and compileTheLexeme().

{
    if (a->lexeme == NULL)
    {
        if (b->lexeme == NULL)
            return 0;
        else
            return 1;
    }
    else if (b->lexeme == NULL)
        return -1;

    return strcmp(a->lexeme, b->lexeme);
}

static int cmpLexemeInfo ( LexemeInfo a,
LexemeInfo b 
) [static]

Definition at line 329 of file dict_thesaurus.c.

References LexemeInfo::idsubst, NULL, LexemeInfo::posinsubst, and LexemeInfo::tnvariant.

Referenced by cmpTheLexeme(), and compileTheLexeme().

{
    if (a == NULL || b == NULL)
        return 0;

    if (a->idsubst == b->idsubst)
    {
        if (a->posinsubst == b->posinsubst)
        {
            if (a->tnvariant == b->tnvariant)
                return 0;

            return (a->tnvariant > b->tnvariant) ? 1 : -1;
        }

        return (a->posinsubst > b->posinsubst) ? 1 : -1;
    }

    return (a->idsubst > b->idsubst) ? 1 : -1;
}

static int cmpLexemeQ ( const void *  a,
const void *  b 
) [static]

Definition at line 367 of file dict_thesaurus.c.

References cmpLexeme().

Referenced by findTheLexeme().

{
    return cmpLexeme((const TheLexeme *) a, (const TheLexeme *) b);
}

static int cmpTheLexeme ( const void *  a,
const void *  b 
) [static]

Definition at line 373 of file dict_thesaurus.c.

References cmpLexeme(), cmpLexemeInfo(), and TheLexeme::entries.

Referenced by compileTheLexeme().

{
    const TheLexeme *la = (const TheLexeme *) a;
    const TheLexeme *lb = (const TheLexeme *) b;
    int         res;

    if ((res = cmpLexeme(la, lb)) != 0)
        return res;

    return -cmpLexemeInfo(la->entries, lb->entries);
}

static void compileTheLexeme ( DictThesaurus d  )  [static]

Definition at line 386 of file dict_thesaurus.c.

References addCompiledLexeme(), cmpLexeme(), cmpLexemeInfo(), cmpTheLexeme(), DatumGetPointer, TSDictionaryCacheEntry::dictData, TheLexeme::entries, ereport, errcode(), errhint(), errmsg(), ERROR, FunctionCall4, i, LexemeInfo::idsubst, Int32GetDatum, TSLexeme::lexeme, TheLexeme::lexeme, TSDictionaryCacheEntry::lexize, LexemeInfo::nextentry, DictThesaurus::ntwrds, NULL, TSLexeme::nvariant, DictThesaurus::nwrds, palloc(), pfree(), PointerGetDatum, qsort, repalloc(), DictThesaurus::subdict, and DictThesaurus::wrds.

Referenced by thesaurus_init().

{
    int         i,
                nnw = 0,
                tnm = 16;
    TheLexeme  *newwrds = (TheLexeme *) palloc(sizeof(TheLexeme) * tnm),
               *ptrwrds;

    for (i = 0; i < d->nwrds; i++)
    {
        TSLexeme   *ptr;

        if (strcmp(d->wrds[i].lexeme, "?") == 0)        /* Is stop word marker? */
            newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
        else
        {
            ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
                                       PointerGetDatum(d->subdict->dictData),
                                          PointerGetDatum(d->wrds[i].lexeme),
                                    Int32GetDatum(strlen(d->wrds[i].lexeme)),
                                                     PointerGetDatum(NULL)));

            if (!ptr)
                ereport(ERROR,
                        (errcode(ERRCODE_CONFIG_FILE_ERROR),
                         errmsg("thesaurus sample word \"%s\" isn't recognized by subdictionary (rule %d)",
                                d->wrds[i].lexeme,
                                d->wrds[i].entries->idsubst + 1)));
            else if (!(ptr->lexeme))
                ereport(ERROR,
                        (errcode(ERRCODE_CONFIG_FILE_ERROR),
                         errmsg("thesaurus sample word \"%s\" is a stop word (rule %d)",
                                d->wrds[i].lexeme,
                                d->wrds[i].entries->idsubst + 1),
                         errhint("Use \"?\" to represent a stop word within a sample phrase.")));
            else
            {
                while (ptr->lexeme)
                {
                    TSLexeme   *remptr = ptr + 1;
                    int         tnvar = 1;
                    int         curvar = ptr->nvariant;

                    /* compute n words in one variant */
                    while (remptr->lexeme)
                    {
                        if (remptr->nvariant != (remptr - 1)->nvariant)
                            break;
                        tnvar++;
                        remptr++;
                    }

                    remptr = ptr;
                    while (remptr->lexeme && remptr->nvariant == curvar)
                    {
                        newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar);
                        remptr++;
                    }

                    ptr = remptr;
                }
            }
        }

        pfree(d->wrds[i].lexeme);
        pfree(d->wrds[i].entries);
    }

    if (d->wrds)
        pfree(d->wrds);
    d->wrds = newwrds;
    d->nwrds = nnw;
    d->ntwrds = tnm;

    if (d->nwrds > 1)
    {
        qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme);

        /* uniq */
        newwrds = d->wrds;
        ptrwrds = d->wrds + 1;
        while (ptrwrds - d->wrds < d->nwrds)
        {
            if (cmpLexeme(ptrwrds, newwrds) == 0)
            {
                if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries))
                {
                    ptrwrds->entries->nextentry = newwrds->entries;
                    newwrds->entries = ptrwrds->entries;
                }
                else
                    pfree(ptrwrds->entries);

                if (ptrwrds->lexeme)
                    pfree(ptrwrds->lexeme);
            }
            else
            {
                newwrds++;
                *newwrds = *ptrwrds;
            }

            ptrwrds++;
        }

        d->nwrds = newwrds - d->wrds + 1;
        d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->nwrds);
    }
}

static void compileTheSubstitute ( DictThesaurus d  )  [static]

Definition at line 497 of file dict_thesaurus.c.

References DatumGetPointer, TSDictionaryCacheEntry::dictData, DT_USEASIS, ereport, errcode(), errmsg(), ERROR, TSLexeme::flags, FunctionCall4, i, Int32GetDatum, TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, DictThesaurus::nsubst, NULL, palloc(), pfree(), PointerGetDatum, pstrdup(), repalloc(), TheSubstitute::res, TheSubstitute::reslen, DictThesaurus::subdict, and DictThesaurus::subst.

Referenced by thesaurus_init().

{
    int         i;

    for (i = 0; i < d->nsubst; i++)
    {
        TSLexeme   *rem = d->subst[i].res,
                   *outptr,
                   *inptr;
        int         n = 2;

        outptr = d->subst[i].res = (TSLexeme *) palloc(sizeof(TSLexeme) * n);
        outptr->lexeme = NULL;
        inptr = rem;

        while (inptr && inptr->lexeme)
        {
            TSLexeme   *lexized,
                        tmplex[2];

            if (inptr->flags & DT_USEASIS)
            {                   /* do not lexize */
                tmplex[0] = *inptr;
                tmplex[0].flags = 0;
                tmplex[1].lexeme = NULL;
                lexized = tmplex;
            }
            else
            {
                lexized = (TSLexeme *) DatumGetPointer(
                                                       FunctionCall4(
                                                       &(d->subdict->lexize),
                                       PointerGetDatum(d->subdict->dictData),
                                              PointerGetDatum(inptr->lexeme),
                                        Int32GetDatum(strlen(inptr->lexeme)),
                                                        PointerGetDatum(NULL)
                                                                     )
                    );
            }

            if (lexized && lexized->lexeme)
            {
                int         toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1;

                while (lexized->lexeme)
                {
                    if (outptr - d->subst[i].res + 1 >= n)
                    {
                        int         diff = outptr - d->subst[i].res;

                        n *= 2;
                        d->subst[i].res = (TSLexeme *) repalloc(d->subst[i].res, sizeof(TSLexeme) * n);
                        outptr = d->subst[i].res + diff;
                    }

                    *outptr = *lexized;
                    outptr->lexeme = pstrdup(lexized->lexeme);

                    outptr++;
                    lexized++;
                }

                if (toset > 0)
                    d->subst[i].res[toset].flags |= TSL_ADDPOS;
            }
            else if (lexized)
            {
                ereport(ERROR,
                        (errcode(ERRCODE_CONFIG_FILE_ERROR),
                         errmsg("thesaurus substitute word \"%s\" is a stop word (rule %d)",
                                inptr->lexeme, i + 1)));
            }
            else
            {
                ereport(ERROR,
                        (errcode(ERRCODE_CONFIG_FILE_ERROR),
                         errmsg("thesaurus substitute word \"%s\" isn't recognized by subdictionary (rule %d)",
                                inptr->lexeme, i + 1)));
            }

            if (inptr->lexeme)
                pfree(inptr->lexeme);
            inptr++;
        }

        if (outptr == d->subst[i].res)
            ereport(ERROR,
                    (errcode(ERRCODE_CONFIG_FILE_ERROR),
                     errmsg("thesaurus substitute phrase is empty (rule %d)",
                            i + 1)));

        d->subst[i].reslen = outptr - d->subst[i].res;

        pfree(rem);
    }
}

static TSLexeme* copyTSLexeme ( TheSubstitute ts  )  [static]

Definition at line 750 of file dict_thesaurus.c.

References i, TSLexeme::lexeme, palloc(), pstrdup(), TheSubstitute::res, and TheSubstitute::reslen.

Referenced by checkMatch().

{
    TSLexeme   *res;
    uint16      i;

    res = (TSLexeme *) palloc(sizeof(TSLexeme) * (ts->reslen + 1));
    for (i = 0; i < ts->reslen; i++)
    {
        res[i] = ts->res[i];
        res[i].lexeme = pstrdup(ts->res[i].lexeme);
    }

    res[ts->reslen].lexeme = NULL;

    return res;
}

static LexemeInfo* findTheLexeme ( DictThesaurus d,
char *  lexeme 
) [static]

Definition at line 654 of file dict_thesaurus.c.

References cmpLexemeQ(), TheLexeme::entries, TheLexeme::lexeme, NULL, DictThesaurus::nwrds, and DictThesaurus::wrds.

Referenced by thesaurus_lexize().

{
    TheLexeme   key,
               *res;

    if (d->nwrds == 0)
        return NULL;

    key.lexeme = lexeme;
    key.entries = NULL;

    res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);

    if (res == NULL)
        return NULL;
    return res->entries;
}

static LexemeInfo* findVariant ( LexemeInfo in,
LexemeInfo stored,
uint16  curpos,
LexemeInfo **  newin,
int  newn 
) [static]

Definition at line 693 of file dict_thesaurus.c.

References i, LexemeInfo::idsubst, matchIdSubst(), LexemeInfo::nextentry, LexemeInfo::nextvariant, NULL, LexemeInfo::posinsubst, and LexemeInfo::tnvariant.

Referenced by thesaurus_lexize().

{
    for (;;)
    {
        int         i;
        LexemeInfo *ptr = newin[0];

        for (i = 0; i < newn; i++)
        {
            while (newin[i] && newin[i]->idsubst < ptr->idsubst)
                newin[i] = newin[i]->nextentry;

            if (newin[i] == NULL)
                return in;

            if (newin[i]->idsubst > ptr->idsubst)
            {
                ptr = newin[i];
                i = -1;
                continue;
            }

            while (newin[i]->idsubst == ptr->idsubst)
            {
                if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn)
                {
                    ptr = newin[i];
                    break;
                }

                newin[i] = newin[i]->nextentry;
                if (newin[i] == NULL)
                    return in;
            }

            if (newin[i]->idsubst != ptr->idsubst)
            {
                ptr = newin[i];
                i = -1;
                continue;
            }
        }

        if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst)))
        {                       /* found */

            ptr->nextvariant = in;
            in = ptr;
        }

        /* step forward */
        for (i = 0; i < newn; i++)
            newin[i] = newin[i]->nextentry;
    }
}

static bool matchIdSubst ( LexemeInfo stored,
uint16  idsubst 
) [static]

Definition at line 673 of file dict_thesaurus.c.

References LexemeInfo::idsubst, and LexemeInfo::nextvariant.

Referenced by findVariant().

{
    bool        res = true;

    if (stored)
    {
        res = false;

        for (; stored; stored = stored->nextvariant)
            if (stored->idsubst == idsubst)
            {
                res = true;
                break;
            }
    }

    return res;
}

static void newLexeme ( DictThesaurus d,
char *  b,
char *  e,
uint16  idsubst,
uint16  posinsubst 
) [static]

Definition at line 71 of file dict_thesaurus.c.

References TheLexeme::entries, LexemeInfo::idsubst, TheLexeme::lexeme, LexemeInfo::nextentry, DictThesaurus::ntwrds, DictThesaurus::nwrds, palloc(), LexemeInfo::posinsubst, repalloc(), and DictThesaurus::wrds.

Referenced by thesaurusRead().

{
    TheLexeme  *ptr;

    if (d->nwrds >= d->ntwrds)
    {
        if (d->ntwrds == 0)
        {
            d->ntwrds = 16;
            d->wrds = (TheLexeme *) palloc(sizeof(TheLexeme) * d->ntwrds);
        }
        else
        {
            d->ntwrds *= 2;
            d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->ntwrds);
        }
    }

    ptr = d->wrds + d->nwrds;
    d->nwrds++;

    ptr->lexeme = palloc(e - b + 1);

    memcpy(ptr->lexeme, b, e - b);
    ptr->lexeme[e - b] = '\0';

    ptr->entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));

    ptr->entries->nextentry = NULL;
    ptr->entries->idsubst = idsubst;
    ptr->entries->posinsubst = posinsubst;
}

Datum thesaurus_init ( PG_FUNCTION_ARGS   ) 

Definition at line 595 of file dict_thesaurus.c.

References compileTheLexeme(), compileTheSubstitute(), defGetString(), DefElem::defname, ereport, errcode(), errmsg(), ERROR, get_ts_dict_oid(), lfirst, lookup_ts_dictionary_cache(), palloc0(), PG_GETARG_POINTER, PG_RETURN_POINTER, pg_strcasecmp(), pstrdup(), stringToQualifiedNameList(), DictThesaurus::subdict, DictThesaurus::subdictOid, and thesaurusRead().

{
    List       *dictoptions = (List *) PG_GETARG_POINTER(0);
    DictThesaurus *d;
    char       *subdictname = NULL;
    bool        fileloaded = false;
    ListCell   *l;

    d = (DictThesaurus *) palloc0(sizeof(DictThesaurus));

    foreach(l, dictoptions)
    {
        DefElem    *defel = (DefElem *) lfirst(l);

        if (pg_strcasecmp("DictFile", defel->defname) == 0)
        {
            if (fileloaded)
                ereport(ERROR,
                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                         errmsg("multiple DictFile parameters")));
            thesaurusRead(defGetString(defel), d);
            fileloaded = true;
        }
        else if (pg_strcasecmp("Dictionary", defel->defname) == 0)
        {
            if (subdictname)
                ereport(ERROR,
                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                         errmsg("multiple Dictionary parameters")));
            subdictname = pstrdup(defGetString(defel));
        }
        else
        {
            ereport(ERROR,
                    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                     errmsg("unrecognized Thesaurus parameter: \"%s\"",
                            defel->defname)));
        }
    }

    if (!fileloaded)
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                 errmsg("missing DictFile parameter")));
    if (!subdictname)
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                 errmsg("missing Dictionary parameter")));

    d->subdictOid = get_ts_dict_oid(stringToQualifiedNameList(subdictname), false);
    d->subdict = lookup_ts_dictionary_cache(d->subdictOid);

    compileTheLexeme(d);
    compileTheSubstitute(d);

    PG_RETURN_POINTER(d);
}

Datum thesaurus_lexize ( PG_FUNCTION_ARGS   ) 

Definition at line 785 of file dict_thesaurus.c.

References checkMatch(), DatumGetPointer, TSDictionaryCacheEntry::dictData, elog, ERROR, findTheLexeme(), findVariant(), FunctionCall4, DictSubState::getnext, i, DictSubState::isend, TSDictionaryCacheEntry::isvalid, TSLexeme::lexeme, TSDictionaryCacheEntry::lexize, lookup_ts_dictionary_cache(), NULL, TSLexeme::nvariant, palloc(), pfree(), PG_GETARG_DATUM, PG_GETARG_POINTER, PG_NARGS, PG_RETURN_POINTER, PointerGetDatum, LexemeInfo::posinsubst, DictSubState::private_state, DictThesaurus::subdict, and DictThesaurus::subdictOid.

{
    DictThesaurus *d = (DictThesaurus *) PG_GETARG_POINTER(0);
    DictSubState *dstate = (DictSubState *) PG_GETARG_POINTER(3);
    TSLexeme   *res = NULL;
    LexemeInfo *stored,
               *info = NULL;
    uint16      curpos = 0;
    bool        moreres = false;

    if (PG_NARGS() != 4 || dstate == NULL)
        elog(ERROR, "forbidden call of thesaurus or nested call");

    if (dstate->isend)
        PG_RETURN_POINTER(NULL);
    stored = (LexemeInfo *) dstate->private_state;

    if (stored)
        curpos = stored->posinsubst + 1;

    if (!d->subdict->isvalid)
        d->subdict = lookup_ts_dictionary_cache(d->subdictOid);

    res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
                                       PointerGetDatum(d->subdict->dictData),
                                                     PG_GETARG_DATUM(1),
                                                     PG_GETARG_DATUM(2),
                                                     PointerGetDatum(NULL)));

    if (res && res->lexeme)
    {
        TSLexeme   *ptr = res,
                   *basevar;

        while (ptr->lexeme)
        {
            uint16      nv = ptr->nvariant;
            uint16      i,
                        nlex = 0;
            LexemeInfo **infos;

            basevar = ptr;
            while (ptr->lexeme && nv == ptr->nvariant)
            {
                nlex++;
                ptr++;
            }

            infos = (LexemeInfo **) palloc(sizeof(LexemeInfo *) * nlex);
            for (i = 0; i < nlex; i++)
                if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL)
                    break;

            if (i < nlex)
            {
                /* no chance to find */
                pfree(infos);
                continue;
            }

            info = findVariant(info, stored, curpos, infos, nlex);
        }
    }
    else if (res)
    {                           /* stop-word */
        LexemeInfo *infos = findTheLexeme(d, NULL);

        info = findVariant(NULL, stored, curpos, &infos, 1);
    }
    else
    {
        info = NULL;            /* word isn't recognized */
    }

    dstate->private_state = (void *) info;

    if (!info)
    {
        dstate->getnext = false;
        PG_RETURN_POINTER(NULL);
    }

    if ((res = checkMatch(d, info, curpos, &moreres)) != NULL)
    {
        dstate->getnext = moreres;
        PG_RETURN_POINTER(res);
    }

    dstate->getnext = true;

    PG_RETURN_POINTER(NULL);
}

static void thesaurusRead ( char *  filename,
DictThesaurus d 
) [static]

Definition at line 168 of file dict_thesaurus.c.

References addWrd(), elog, ereport, errcode(), errmsg(), ERROR, get_tsearch_config_filename(), LexemeInfo::idsubst, newLexeme(), DictThesaurus::nsubst, NULL, pfree(), pg_mblen(), LexemeInfo::posinsubst, t_iseq, t_isspace, TR_INLEX, TR_INSUBS, TR_WAITLEX, TR_WAITSUBS, tsearch_readline(), tsearch_readline_begin(), and tsearch_readline_end().

Referenced by thesaurus_init().

{
    tsearch_readline_state trst;
    uint16      idsubst = 0;
    bool        useasis = false;
    char       *line;

    filename = get_tsearch_config_filename(filename, "ths");
    if (!tsearch_readline_begin(&trst, filename))
        ereport(ERROR,
                (errcode(ERRCODE_CONFIG_FILE_ERROR),
                 errmsg("could not open thesaurus file \"%s\": %m",
                        filename)));

    while ((line = tsearch_readline(&trst)) != NULL)
    {
        char       *ptr;
        int         state = TR_WAITLEX;
        char       *beginwrd = NULL;
        uint16      posinsubst = 0;
        uint16      nwrd = 0;

        ptr = line;

        /* is it a comment? */
        while (*ptr && t_isspace(ptr))
            ptr += pg_mblen(ptr);

        if (t_iseq(ptr, '#') || *ptr == '\0' ||
            t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
        {
            pfree(line);
            continue;
        }

        while (*ptr)
        {
            if (state == TR_WAITLEX)
            {
                if (t_iseq(ptr, ':'))
                {
                    if (posinsubst == 0)
                        ereport(ERROR,
                                (errcode(ERRCODE_CONFIG_FILE_ERROR),
                                 errmsg("unexpected delimiter")));
                    state = TR_WAITSUBS;
                }
                else if (!t_isspace(ptr))
                {
                    beginwrd = ptr;
                    state = TR_INLEX;
                }
            }
            else if (state == TR_INLEX)
            {
                if (t_iseq(ptr, ':'))
                {
                    newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
                    state = TR_WAITSUBS;
                }
                else if (t_isspace(ptr))
                {
                    newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
                    state = TR_WAITLEX;
                }
            }
            else if (state == TR_WAITSUBS)
            {
                if (t_iseq(ptr, '*'))
                {
                    useasis = true;
                    state = TR_INSUBS;
                    beginwrd = ptr + pg_mblen(ptr);
                }
                else if (t_iseq(ptr, '\\'))
                {
                    useasis = false;
                    state = TR_INSUBS;
                    beginwrd = ptr + pg_mblen(ptr);
                }
                else if (!t_isspace(ptr))
                {
                    useasis = false;
                    beginwrd = ptr;
                    state = TR_INSUBS;
                }
            }
            else if (state == TR_INSUBS)
            {
                if (t_isspace(ptr))
                {
                    if (ptr == beginwrd)
                        ereport(ERROR,
                                (errcode(ERRCODE_CONFIG_FILE_ERROR),
                                 errmsg("unexpected end of line or lexeme")));
                    addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
                    state = TR_WAITSUBS;
                }
            }
            else
                elog(ERROR, "unrecognized thesaurus state: %d", state);

            ptr += pg_mblen(ptr);
        }

        if (state == TR_INSUBS)
        {
            if (ptr == beginwrd)
                ereport(ERROR,
                        (errcode(ERRCODE_CONFIG_FILE_ERROR),
                         errmsg("unexpected end of line or lexeme")));
            addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
        }

        idsubst++;

        if (!(nwrd && posinsubst))
            ereport(ERROR,
                    (errcode(ERRCODE_CONFIG_FILE_ERROR),
                     errmsg("unexpected end of line")));

        pfree(line);
    }

    d->nsubst = idsubst;

    tsearch_readline_end(&trst);
}