Header And Logo

PostgreSQL
| The world's most advanced open source database.

Data Structures | Typedefs | Functions | Variables

dict_snowball.c File Reference

#include "postgres.h"
#include "commands/defrem.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
#include "snowball/libstemmer/header.h"
#include "snowball/libstemmer/stem_ISO_8859_1_danish.h"
#include "snowball/libstemmer/stem_ISO_8859_1_dutch.h"
#include "snowball/libstemmer/stem_ISO_8859_1_english.h"
#include "snowball/libstemmer/stem_ISO_8859_1_finnish.h"
#include "snowball/libstemmer/stem_ISO_8859_1_french.h"
#include "snowball/libstemmer/stem_ISO_8859_1_german.h"
#include "snowball/libstemmer/stem_ISO_8859_1_hungarian.h"
#include "snowball/libstemmer/stem_ISO_8859_1_italian.h"
#include "snowball/libstemmer/stem_ISO_8859_1_norwegian.h"
#include "snowball/libstemmer/stem_ISO_8859_1_porter.h"
#include "snowball/libstemmer/stem_ISO_8859_1_portuguese.h"
#include "snowball/libstemmer/stem_ISO_8859_1_spanish.h"
#include "snowball/libstemmer/stem_ISO_8859_1_swedish.h"
#include "snowball/libstemmer/stem_ISO_8859_2_romanian.h"
#include "snowball/libstemmer/stem_KOI8_R_russian.h"
#include "snowball/libstemmer/stem_UTF_8_danish.h"
#include "snowball/libstemmer/stem_UTF_8_dutch.h"
#include "snowball/libstemmer/stem_UTF_8_english.h"
#include "snowball/libstemmer/stem_UTF_8_finnish.h"
#include "snowball/libstemmer/stem_UTF_8_french.h"
#include "snowball/libstemmer/stem_UTF_8_german.h"
#include "snowball/libstemmer/stem_UTF_8_hungarian.h"
#include "snowball/libstemmer/stem_UTF_8_italian.h"
#include "snowball/libstemmer/stem_UTF_8_norwegian.h"
#include "snowball/libstemmer/stem_UTF_8_porter.h"
#include "snowball/libstemmer/stem_UTF_8_portuguese.h"
#include "snowball/libstemmer/stem_UTF_8_romanian.h"
#include "snowball/libstemmer/stem_UTF_8_russian.h"
#include "snowball/libstemmer/stem_UTF_8_spanish.h"
#include "snowball/libstemmer/stem_UTF_8_swedish.h"
#include "snowball/libstemmer/stem_UTF_8_turkish.h"
Include dependency graph for dict_snowball.c:

Go to the source code of this file.

Data Structures

struct  stemmer_module
struct  DictSnowball

Typedefs

typedef struct stemmer_module stemmer_module
typedef struct DictSnowball DictSnowball

Functions

 PG_FUNCTION_INFO_V1 (dsnowball_init)
Datum dsnowball_init (PG_FUNCTION_ARGS)
 PG_FUNCTION_INFO_V1 (dsnowball_lexize)
Datum dsnowball_lexize (PG_FUNCTION_ARGS)
static void locate_stem_module (DictSnowball *d, char *lang)

Variables

 PG_MODULE_MAGIC
static const stemmer_module stemmer_modules []

Typedef Documentation

typedef struct DictSnowball DictSnowball

Function Documentation

Datum dsnowball_init ( PG_FUNCTION_ARGS   ) 

Definition at line 185 of file dict_snowball.c.

References CurrentMemoryContext, defGetString(), DefElem::defname, DictSnowball::dictCtx, ereport, errcode(), errmsg(), ERROR, SN_env::l, lfirst, locate_stem_module(), lowerstr(), palloc0(), PG_GETARG_POINTER, PG_RETURN_POINTER, pg_strcasecmp(), readstoplist(), DictSnowball::stem, and DictSnowball::stoplist.

{
    List       *dictoptions = (List *) PG_GETARG_POINTER(0);
    DictSnowball *d;
    bool        stoploaded = false;
    ListCell   *l;

    d = (DictSnowball *) palloc0(sizeof(DictSnowball));

    foreach(l, dictoptions)
    {
        DefElem    *defel = (DefElem *) lfirst(l);

        if (pg_strcasecmp("StopWords", defel->defname) == 0)
        {
            if (stoploaded)
                ereport(ERROR,
                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                         errmsg("multiple StopWords parameters")));
            readstoplist(defGetString(defel), &d->stoplist, lowerstr);
            stoploaded = true;
        }
        else if (pg_strcasecmp("Language", defel->defname) == 0)
        {
            if (d->stem)
                ereport(ERROR,
                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                         errmsg("multiple Language parameters")));
            locate_stem_module(d, defGetString(defel));
        }
        else
        {
            ereport(ERROR,
                    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                     errmsg("unrecognized Snowball parameter: \"%s\"",
                            defel->defname)));
        }
    }

    if (!d->stem)
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                 errmsg("missing Language parameter")));

    d->dictCtx = CurrentMemoryContext;

    PG_RETURN_POINTER(d);
}

Datum dsnowball_lexize ( PG_FUNCTION_ARGS   ) 

Definition at line 235 of file dict_snowball.c.

References DictSnowball::dictCtx, GetDatabaseEncoding(), SN_env::l, TSLexeme::lexeme, lowerstr_with_len(), MemoryContextSwitchTo(), DictSnowball::needrecode, SN_env::p, palloc0(), pfree(), pg_do_encoding_conversion(), PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_POINTER, repalloc(), searchstoplist(), SN_set_current(), DictSnowball::stem, DictSnowball::stoplist, and DictSnowball::z.

{
    DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0);
    char       *in = (char *) PG_GETARG_POINTER(1);
    int32       len = PG_GETARG_INT32(2);
    char       *txt = lowerstr_with_len(in, len);
    TSLexeme   *res = palloc0(sizeof(TSLexeme) * 2);

    if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
    {
        pfree(txt);
    }
    else
    {
        MemoryContext saveCtx;

        /*
         * recode to utf8 if stemmer is utf8 and doesn't match server encoding
         */
        if (d->needrecode)
        {
            char       *recoded;

            recoded = (char *) pg_do_encoding_conversion((unsigned char *) txt,
                                                         strlen(txt),
                                                       GetDatabaseEncoding(),
                                                         PG_UTF8);
            if (recoded != txt)
            {
                pfree(txt);
                txt = recoded;
            }
        }

        /* see comment about d->dictCtx */
        saveCtx = MemoryContextSwitchTo(d->dictCtx);
        SN_set_current(d->z, strlen(txt), (symbol *) txt);
        d->stem(d->z);
        MemoryContextSwitchTo(saveCtx);

        if (d->z->p && d->z->l)
        {
            txt = repalloc(txt, d->z->l + 1);
            memcpy(txt, d->z->p, d->z->l);
            txt[d->z->l] = '\0';
        }

        /* back recode if needed */
        if (d->needrecode)
        {
            char       *recoded;

            recoded = (char *) pg_do_encoding_conversion((unsigned char *) txt,
                                                         strlen(txt),
                                                         PG_UTF8,
                                                      GetDatabaseEncoding());
            if (recoded != txt)
            {
                pfree(txt);
                txt = recoded;
            }
        }

        res->lexeme = txt;
    }

    PG_RETURN_POINTER(res);
}

static void locate_stem_module ( DictSnowball d,
char *  lang 
) [static]

Definition at line 144 of file dict_snowball.c.

References stemmer_module::create, stemmer_module::enc, ereport, errcode(), errmsg(), ERROR, GetDatabaseEncoding(), GetDatabaseEncodingName(), stemmer_module::name, DictSnowball::needrecode, PG_SQL_ASCII, pg_strcasecmp(), PG_UTF8, stemmer_module::stem, DictSnowball::stem, and DictSnowball::z.

Referenced by dsnowball_init().

{
    const stemmer_module *m;

    /*
     * First, try to find exact match of stemmer module. Stemmer with
     * PG_SQL_ASCII encoding is treated as working with any server encoding
     */
    for (m = stemmer_modules; m->name; m++)
    {
        if ((m->enc == PG_SQL_ASCII || m->enc == GetDatabaseEncoding()) &&
            pg_strcasecmp(m->name, lang) == 0)
        {
            d->stem = m->stem;
            d->z = m->create();
            d->needrecode = false;
            return;
        }
    }

    /*
     * Second, try to find stemmer for needed language for UTF8 encoding.
     */
    for (m = stemmer_modules; m->name; m++)
    {
        if (m->enc == PG_UTF8 && pg_strcasecmp(m->name, lang) == 0)
        {
            d->stem = m->stem;
            d->z = m->create();
            d->needrecode = true;
            return;
        }
    }

    ereport(ERROR,
            (errcode(ERRCODE_UNDEFINED_OBJECT),
             errmsg("no Snowball stemmer available for language \"%s\" and encoding \"%s\"",
                    lang, GetDatabaseEncodingName())));
}

PG_FUNCTION_INFO_V1 ( dsnowball_lexize   ) 
PG_FUNCTION_INFO_V1 ( dsnowball_init   ) 

Variable Documentation

Definition at line 62 of file dict_snowball.c.

const stemmer_module stemmer_modules[] [static]

Definition at line 80 of file dict_snowball.c.