Header And Logo

PostgreSQL
| The world's most advanced open source database.

Data Structures | Defines | Typedefs | Functions

spell.h File Reference

#include "regex/regex.h"
#include "tsearch/dicts/regis.h"
#include "tsearch/ts_public.h"
Include dependency graph for spell.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  SPNodeData
struct  SPNode
struct  spell_struct
struct  aff_struct
struct  AffixNodeData
struct  AffixNode
struct  CMPDAffix
struct  IspellDict

Defines

#define MAXFLAGLEN   16
#define FF_COMPOUNDONLY   0x01
#define FF_COMPOUNDBEGIN   0x02
#define FF_COMPOUNDMIDDLE   0x04
#define FF_COMPOUNDLAST   0x08
#define FF_COMPOUNDFLAG   ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | FF_COMPOUNDLAST )
#define FF_DICTFLAGMASK   0x0f
#define SPNHDRSZ   (offsetof(SPNode,data))
#define SPELLHDRSZ   (offsetof(SPELL, word))
#define FF_COMPOUNDPERMITFLAG   0x10
#define FF_COMPOUNDFORBIDFLAG   0x20
#define FF_CROSSPRODUCT   0x40
#define FF_SUFFIX   1
#define FF_PREFIX   0
#define ANHRDSZ   (offsetof(AffixNode, data))

Typedefs

typedef struct SPNode SPNode
typedef struct spell_struct SPELL
typedef struct aff_struct AFFIX
typedef struct AffixNode AffixNode

Functions

TSLexemeNINormalizeWord (IspellDict *Conf, char *word)
void NIStartBuild (IspellDict *Conf)
void NIImportAffixes (IspellDict *Conf, const char *filename)
void NIImportDictionary (IspellDict *Conf, const char *filename)
void NISortDictionary (IspellDict *Conf)
void NISortAffixes (IspellDict *Conf)
void NIFinishBuild (IspellDict *Conf)

Define Documentation

#define ANHRDSZ   (offsetof(AffixNode, data))

Definition at line 126 of file spell.h.

Referenced by mkANode(), and mkVoidAffix().

#define FF_COMPOUNDBEGIN   0x02

Definition at line 43 of file spell.h.

Referenced by CheckAffix(), NIImportOOAffixes(), and SplitToVariants().

#define FF_COMPOUNDFLAG   ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | FF_COMPOUNDLAST )

Definition at line 46 of file spell.h.

Referenced by mkSPNode(), NIAddAffix(), NIImportAffixes(), NIImportOOAffixes(), and NISortAffixes().

#define FF_COMPOUNDFORBIDFLAG   0x20

Definition at line 99 of file spell.h.

Referenced by CheckAffix(), and NIImportOOAffixes().

#define FF_COMPOUNDLAST   0x08

Definition at line 45 of file spell.h.

Referenced by CheckAffix(), NIImportOOAffixes(), and NINormalizeWord().

#define FF_COMPOUNDMIDDLE   0x04

Definition at line 44 of file spell.h.

Referenced by CheckAffix(), and NIImportOOAffixes().

#define FF_COMPOUNDONLY   0x01

Definition at line 42 of file spell.h.

Referenced by CheckAffix(), FindWord(), mkSPNode(), NIAddAffix(), and NIImportOOAffixes().

#define FF_COMPOUNDPERMITFLAG   0x10

Definition at line 98 of file spell.h.

Referenced by NIAddAffix(), and NIImportOOAffixes().

#define FF_CROSSPRODUCT   0x40

Definition at line 100 of file spell.h.

Referenced by NIImportOOAffixes().

#define FF_DICTFLAGMASK   0x0f

Definition at line 47 of file spell.h.

Referenced by makeCompoundFlags().

#define FF_PREFIX   0
#define FF_SUFFIX   1
#define MAXFLAGLEN   16

Definition at line 25 of file spell.h.

Referenced by cmpspellaffix(), NIAddSpell(), and NISortDictionary().

#define SPELLHDRSZ   (offsetof(SPELL, word))

Definition at line 76 of file spell.h.

Referenced by NIAddSpell().

#define SPNHDRSZ   (offsetof(SPNode,data))

Definition at line 55 of file spell.h.

Referenced by mkSPNode().


Typedef Documentation

typedef struct aff_struct AFFIX
typedef struct AffixNode AffixNode
typedef struct spell_struct SPELL
typedef struct SPNode SPNode

Function Documentation

void NIFinishBuild ( IspellDict Conf  ) 

Definition at line 56 of file spell.c.

References IspellDict::buildCxt, IspellDict::firstfree, MemoryContextDelete(), and IspellDict::Spell.

Referenced by dispell_init().

{
    /* Release no-longer-needed temp memory */
    MemoryContextDelete(Conf->buildCxt);
    /* Just for cleanliness, zero the now-dangling pointers */
    Conf->buildCxt = NULL;
    Conf->Spell = NULL;
    Conf->firstfree = NULL;
}

void NIImportAffixes ( IspellDict Conf,
const char *  filename 
)

Definition at line 762 of file spell.c.

References ereport, errcode(), errmsg(), ERROR, FF_COMPOUNDFLAG, FF_PREFIX, FF_SUFFIX, find(), findchar(), flag(), IspellDict::flagval, lowerstr(), NIAddAffix(), NIImportOOAffixes(), NULL, parse_affentry(), pfree(), pg_mblen(), prefixes(), STRNCMP, t_isspace, tsearch_readline(), tsearch_readline_begin(), tsearch_readline_end(), and IspellDict::usecompound.

Referenced by dispell_init().

{
    char       *pstr = NULL;
    char        mask[BUFSIZ];
    char        find[BUFSIZ];
    char        repl[BUFSIZ];
    char       *s;
    bool        suffixes = false;
    bool        prefixes = false;
    int         flag = 0;
    char        flagflags = 0;
    tsearch_readline_state trst;
    bool        oldformat = false;
    char       *recoded = NULL;

    if (!tsearch_readline_begin(&trst, filename))
        ereport(ERROR,
                (errcode(ERRCODE_CONFIG_FILE_ERROR),
                 errmsg("could not open affix file \"%s\": %m",
                        filename)));

    memset(Conf->flagval, 0, sizeof(Conf->flagval));
    Conf->usecompound = false;

    while ((recoded = tsearch_readline(&trst)) != NULL)
    {
        pstr = lowerstr(recoded);

        /* Skip comments and empty lines */
        if (*pstr == '#' || *pstr == '\n')
            goto nextline;

        if (STRNCMP(pstr, "compoundwords") == 0)
        {
            s = findchar(pstr, 'l');
            if (s)
            {
                s = recoded + (s - pstr);       /* we need non-lowercased
                                                 * string */
                while (*s && !t_isspace(s))
                    s += pg_mblen(s);
                while (*s && t_isspace(s))
                    s += pg_mblen(s);

                if (*s && pg_mblen(s) == 1)
                {
                    Conf->flagval[*(unsigned char *) s] = FF_COMPOUNDFLAG;
                    Conf->usecompound = true;
                }
                oldformat = true;
                goto nextline;
            }
        }
        if (STRNCMP(pstr, "suffixes") == 0)
        {
            suffixes = true;
            prefixes = false;
            oldformat = true;
            goto nextline;
        }
        if (STRNCMP(pstr, "prefixes") == 0)
        {
            suffixes = false;
            prefixes = true;
            oldformat = true;
            goto nextline;
        }
        if (STRNCMP(pstr, "flag") == 0)
        {
            s = recoded + 4;    /* we need non-lowercased string */
            flagflags = 0;

            while (*s && t_isspace(s))
                s += pg_mblen(s);
            oldformat = true;

            /* allow only single-encoded flags */
            if (pg_mblen(s) != 1)
                ereport(ERROR,
                        (errcode(ERRCODE_CONFIG_FILE_ERROR),
                         errmsg("multibyte flag character is not allowed")));

            if (*s == '*')
            {
                flagflags |= FF_CROSSPRODUCT;
                s++;
            }
            else if (*s == '~')
            {
                flagflags |= FF_COMPOUNDONLY;
                s++;
            }

            if (*s == '\\')
                s++;

            /* allow only single-encoded flags */
            if (pg_mblen(s) != 1)
                ereport(ERROR,
                        (errcode(ERRCODE_CONFIG_FILE_ERROR),
                         errmsg("multibyte flag character is not allowed")));

            flag = *(unsigned char *) s;
            goto nextline;
        }
        if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 || STRNCMP(recoded, "COMPOUNDMIN") == 0 ||
            STRNCMP(recoded, "PFX") == 0 || STRNCMP(recoded, "SFX") == 0)
        {
            if (oldformat)
                ereport(ERROR,
                        (errcode(ERRCODE_CONFIG_FILE_ERROR),
                         errmsg("wrong affix file format for flag")));
            tsearch_readline_end(&trst);
            NIImportOOAffixes(Conf, filename);
            return;
        }
        if ((!suffixes) && (!prefixes))
            goto nextline;

        if (!parse_affentry(pstr, mask, find, repl))
            goto nextline;

        NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);

nextline:
        pfree(recoded);
        pfree(pstr);
    }
    tsearch_readline_end(&trst);
}

void NIImportDictionary ( IspellDict Conf,
const char *  filename 
)

Definition at line 268 of file spell.c.

References ereport, errcode(), errmsg(), ERROR, findchar(), lowerstr_ctx(), NIAddSpell(), NULL, pfree(), pg_mblen(), t_isprint, t_isspace, tsearch_readline(), tsearch_readline_begin(), and tsearch_readline_end().

Referenced by dispell_init().

{
    tsearch_readline_state trst;
    char       *line;

    if (!tsearch_readline_begin(&trst, filename))
        ereport(ERROR,
                (errcode(ERRCODE_CONFIG_FILE_ERROR),
                 errmsg("could not open dictionary file \"%s\": %m",
                        filename)));

    while ((line = tsearch_readline(&trst)) != NULL)
    {
        char       *s,
                   *pstr;
        const char *flag;

        /* Extract flag from the line */
        flag = NULL;
        if ((s = findchar(line, '/')))
        {
            *s++ = '\0';
            flag = s;
            while (*s)
            {
                /* we allow only single encoded flags for faster works */
                if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s))
                    s++;
                else
                {
                    *s = '\0';
                    break;
                }
            }
        }
        else
            flag = "";

        /* Remove trailing spaces */
        s = line;
        while (*s)
        {
            if (t_isspace(s))
            {
                *s = '\0';
                break;
            }
            s += pg_mblen(s);
        }
        pstr = lowerstr_ctx(Conf, line);

        NIAddSpell(Conf, pstr, flag);
        pfree(pstr);

        pfree(line);
    }
    tsearch_readline_end(&trst);
}

TSLexeme* NINormalizeWord ( IspellDict Conf,
char *  word 
)

Definition at line 1739 of file spell.c.

References addNorm(), FF_COMPOUNDLAST, i, MAX_NORM, SplitVar::next, NormalizeSubWord(), SplitVar::nstem, NULL, pfree(), pstrdup(), SplitToVariants(), SplitVar::stem, and IspellDict::usecompound.

Referenced by dispell_lexize().

{
    char      **res;
    TSLexeme   *lcur = NULL,
               *lres = NULL;
    uint16      NVariant = 1;

    res = NormalizeSubWord(Conf, word, 0);

    if (res)
    {
        char      **ptr = res;

        while (*ptr && (lcur - lres) < MAX_NORM)
        {
            addNorm(&lres, &lcur, *ptr, 0, NVariant++);
            ptr++;
        }
        pfree(res);
    }

    if (Conf->usecompound)
    {
        int         wordlen = strlen(word);
        SplitVar   *ptr,
                   *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
        int         i;

        while (var)
        {
            if (var->nstem > 1)
            {
                char      **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST);

                if (subres)
                {
                    char      **subptr = subres;

                    while (*subptr)
                    {
                        for (i = 0; i < var->nstem - 1; i++)
                        {
                            addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant);
                        }

                        addNorm(&lres, &lcur, *subptr, 0, NVariant);
                        subptr++;
                        NVariant++;
                    }

                    pfree(subres);
                    var->stem[0] = NULL;
                    pfree(var->stem[var->nstem - 1]);
                }
            }

            for (i = 0; i < var->nstem && var->stem[i]; i++)
                pfree(var->stem[i]);
            ptr = var->next;
            pfree(var->stem);
            pfree(var);
            var = ptr;
        }
    }

    return lres;
}

void NISortAffixes ( IspellDict Conf  ) 

Definition at line 1186 of file spell.c.

References CMPDAffix::affix, IspellDict::Affix, cmpaffix(), IspellDict::CompoundAffix, FF_COMPOUNDFLAG, FF_PREFIX, FF_SUFFIX, aff_struct::flag, aff_struct::flagflags, i, isAffixInUse(), CMPDAffix::issuffix, CMPDAffix::len, mkANode(), mkVoidAffix(), IspellDict::naffixes, palloc(), IspellDict::Prefix, qsort, repalloc(), aff_struct::repl, aff_struct::replen, strbncmp(), IspellDict::Suffix, and aff_struct::type.

Referenced by dispell_init().

{
    AFFIX      *Affix;
    size_t      i;
    CMPDAffix  *ptr;
    int         firstsuffix = Conf->naffixes;

    if (Conf->naffixes == 0)
        return;

    if (Conf->naffixes > 1)
        qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
    Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes);
    ptr->affix = NULL;

    for (i = 0; i < Conf->naffixes; i++)
    {
        Affix = &(((AFFIX *) Conf->Affix)[i]);
        if (Affix->type == FF_SUFFIX && i < firstsuffix)
            firstsuffix = i;

        if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
            isAffixInUse(Conf, (char) Affix->flag))
        {
            if (ptr == Conf->CompoundAffix ||
                ptr->issuffix != (ptr - 1)->issuffix ||
                strbncmp((const unsigned char *) (ptr - 1)->affix,
                         (const unsigned char *) Affix->repl,
                         (ptr - 1)->len))
            {
                /* leave only unique and minimals suffixes */
                ptr->affix = Affix->repl;
                ptr->len = Affix->replen;
                ptr->issuffix = (Affix->type == FF_SUFFIX) ? true : false;
                ptr++;
            }
        }
    }
    ptr->affix = NULL;
    Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));

    Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
    Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
    mkVoidAffix(Conf, true, firstsuffix);
    mkVoidAffix(Conf, false, firstsuffix);
}

void NISortDictionary ( IspellDict Conf  ) 

Definition at line 1013 of file spell.c.

References IspellDict::AffixData, Assert, cmpspell(), cmpspellaffix(), cpstrdup(), spell_struct::d, IspellDict::Dictionary, spell_struct::flag, i, IspellDict::lenAffixData, MAXFLAGLEN, mkSPNode(), IspellDict::nAffixData, IspellDict::nspell, spell_struct::p, palloc0(), qsort, IspellDict::Spell, and spell_struct::word.

Referenced by dispell_init().

{
    int         i;
    int         naffix = 0;
    int         curaffix;

    /* compress affixes */

    /* Count the number of different flags used in the dictionary */

    qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspellaffix);

    naffix = 0;
    for (i = 0; i < Conf->nspell; i++)
    {
        if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag, MAXFLAGLEN))
            naffix++;
    }

    /*
     * Fill in Conf->AffixData with the affixes that were used in the
     * dictionary. Replace textual flag-field of Conf->Spell entries with
     * indexes into Conf->AffixData array.
     */
    Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));

    curaffix = -1;
    for (i = 0; i < Conf->nspell; i++)
    {
        if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix], MAXFLAGLEN))
        {
            curaffix++;
            Assert(curaffix < naffix);
            Conf->AffixData[curaffix] = cpstrdup(Conf, Conf->Spell[i]->p.flag);
        }

        Conf->Spell[i]->p.d.affix = curaffix;
        Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
    }

    Conf->lenAffixData = Conf->nAffixData = naffix;

    qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
    Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
}

void NIStartBuild ( IspellDict Conf  ) 

Definition at line 39 of file spell.c.

References ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE, ALLOCSET_DEFAULT_MINSIZE, AllocSetContextCreate(), IspellDict::buildCxt, and CurTransactionContext.

Referenced by dispell_init().

{
    /*
     * The temp context is a child of CurTransactionContext, so that it will
     * go away automatically on error.
     */
    Conf->buildCxt = AllocSetContextCreate(CurTransactionContext,
                                           "Ispell dictionary init context",
                                           ALLOCSET_DEFAULT_MINSIZE,
                                           ALLOCSET_DEFAULT_INITSIZE,
                                           ALLOCSET_DEFAULT_MAXSIZE);
}