#include "postgres.h"#include "commands/defrem.h"#include "tsearch/ts_locale.h"#include "tsearch/ts_utils.h"#include "snowball/libstemmer/header.h"#include "snowball/libstemmer/stem_ISO_8859_1_danish.h"#include "snowball/libstemmer/stem_ISO_8859_1_dutch.h"#include "snowball/libstemmer/stem_ISO_8859_1_english.h"#include "snowball/libstemmer/stem_ISO_8859_1_finnish.h"#include "snowball/libstemmer/stem_ISO_8859_1_french.h"#include "snowball/libstemmer/stem_ISO_8859_1_german.h"#include "snowball/libstemmer/stem_ISO_8859_1_hungarian.h"#include "snowball/libstemmer/stem_ISO_8859_1_italian.h"#include "snowball/libstemmer/stem_ISO_8859_1_norwegian.h"#include "snowball/libstemmer/stem_ISO_8859_1_porter.h"#include "snowball/libstemmer/stem_ISO_8859_1_portuguese.h"#include "snowball/libstemmer/stem_ISO_8859_1_spanish.h"#include "snowball/libstemmer/stem_ISO_8859_1_swedish.h"#include "snowball/libstemmer/stem_ISO_8859_2_romanian.h"#include "snowball/libstemmer/stem_KOI8_R_russian.h"#include "snowball/libstemmer/stem_UTF_8_danish.h"#include "snowball/libstemmer/stem_UTF_8_dutch.h"#include "snowball/libstemmer/stem_UTF_8_english.h"#include "snowball/libstemmer/stem_UTF_8_finnish.h"#include "snowball/libstemmer/stem_UTF_8_french.h"#include "snowball/libstemmer/stem_UTF_8_german.h"#include "snowball/libstemmer/stem_UTF_8_hungarian.h"#include "snowball/libstemmer/stem_UTF_8_italian.h"#include "snowball/libstemmer/stem_UTF_8_norwegian.h"#include "snowball/libstemmer/stem_UTF_8_porter.h"#include "snowball/libstemmer/stem_UTF_8_portuguese.h"#include "snowball/libstemmer/stem_UTF_8_romanian.h"#include "snowball/libstemmer/stem_UTF_8_russian.h"#include "snowball/libstemmer/stem_UTF_8_spanish.h"#include "snowball/libstemmer/stem_UTF_8_swedish.h"#include "snowball/libstemmer/stem_UTF_8_turkish.h"
Go to the source code of this file.
Data Structures | |
| struct | stemmer_module |
| struct | DictSnowball |
Typedefs | |
| typedef struct stemmer_module | stemmer_module |
| typedef struct DictSnowball | DictSnowball |
Functions | |
| PG_FUNCTION_INFO_V1 (dsnowball_init) | |
| Datum | dsnowball_init (PG_FUNCTION_ARGS) |
| PG_FUNCTION_INFO_V1 (dsnowball_lexize) | |
| Datum | dsnowball_lexize (PG_FUNCTION_ARGS) |
| static void | locate_stem_module (DictSnowball *d, char *lang) |
Variables | |
| PG_MODULE_MAGIC | |
| static const stemmer_module | stemmer_modules [] |
| typedef struct DictSnowball DictSnowball |
| typedef struct stemmer_module stemmer_module |
| Datum dsnowball_init | ( | PG_FUNCTION_ARGS | ) |
Definition at line 185 of file dict_snowball.c.
References CurrentMemoryContext, defGetString(), DefElem::defname, DictSnowball::dictCtx, ereport, errcode(), errmsg(), ERROR, SN_env::l, lfirst, locate_stem_module(), lowerstr(), palloc0(), PG_GETARG_POINTER, PG_RETURN_POINTER, pg_strcasecmp(), readstoplist(), DictSnowball::stem, and DictSnowball::stoplist.
{
List *dictoptions = (List *) PG_GETARG_POINTER(0);
DictSnowball *d;
bool stoploaded = false;
ListCell *l;
d = (DictSnowball *) palloc0(sizeof(DictSnowball));
foreach(l, dictoptions)
{
DefElem *defel = (DefElem *) lfirst(l);
if (pg_strcasecmp("StopWords", defel->defname) == 0)
{
if (stoploaded)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple StopWords parameters")));
readstoplist(defGetString(defel), &d->stoplist, lowerstr);
stoploaded = true;
}
else if (pg_strcasecmp("Language", defel->defname) == 0)
{
if (d->stem)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple Language parameters")));
locate_stem_module(d, defGetString(defel));
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized Snowball parameter: \"%s\"",
defel->defname)));
}
}
if (!d->stem)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing Language parameter")));
d->dictCtx = CurrentMemoryContext;
PG_RETURN_POINTER(d);
}
| Datum dsnowball_lexize | ( | PG_FUNCTION_ARGS | ) |
Definition at line 235 of file dict_snowball.c.
References DictSnowball::dictCtx, GetDatabaseEncoding(), SN_env::l, TSLexeme::lexeme, lowerstr_with_len(), MemoryContextSwitchTo(), DictSnowball::needrecode, SN_env::p, palloc0(), pfree(), pg_do_encoding_conversion(), PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_POINTER, repalloc(), searchstoplist(), SN_set_current(), DictSnowball::stem, DictSnowball::stoplist, and DictSnowball::z.
{
DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
int32 len = PG_GETARG_INT32(2);
char *txt = lowerstr_with_len(in, len);
TSLexeme *res = palloc0(sizeof(TSLexeme) * 2);
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
{
pfree(txt);
}
else
{
MemoryContext saveCtx;
/*
* recode to utf8 if stemmer is utf8 and doesn't match server encoding
*/
if (d->needrecode)
{
char *recoded;
recoded = (char *) pg_do_encoding_conversion((unsigned char *) txt,
strlen(txt),
GetDatabaseEncoding(),
PG_UTF8);
if (recoded != txt)
{
pfree(txt);
txt = recoded;
}
}
/* see comment about d->dictCtx */
saveCtx = MemoryContextSwitchTo(d->dictCtx);
SN_set_current(d->z, strlen(txt), (symbol *) txt);
d->stem(d->z);
MemoryContextSwitchTo(saveCtx);
if (d->z->p && d->z->l)
{
txt = repalloc(txt, d->z->l + 1);
memcpy(txt, d->z->p, d->z->l);
txt[d->z->l] = '\0';
}
/* back recode if needed */
if (d->needrecode)
{
char *recoded;
recoded = (char *) pg_do_encoding_conversion((unsigned char *) txt,
strlen(txt),
PG_UTF8,
GetDatabaseEncoding());
if (recoded != txt)
{
pfree(txt);
txt = recoded;
}
}
res->lexeme = txt;
}
PG_RETURN_POINTER(res);
}
| static void locate_stem_module | ( | DictSnowball * | d, | |
| char * | lang | |||
| ) | [static] |
Definition at line 144 of file dict_snowball.c.
References stemmer_module::create, stemmer_module::enc, ereport, errcode(), errmsg(), ERROR, GetDatabaseEncoding(), GetDatabaseEncodingName(), stemmer_module::name, DictSnowball::needrecode, PG_SQL_ASCII, pg_strcasecmp(), PG_UTF8, stemmer_module::stem, DictSnowball::stem, and DictSnowball::z.
Referenced by dsnowball_init().
{
const stemmer_module *m;
/*
* First, try to find exact match of stemmer module. Stemmer with
* PG_SQL_ASCII encoding is treated as working with any server encoding
*/
for (m = stemmer_modules; m->name; m++)
{
if ((m->enc == PG_SQL_ASCII || m->enc == GetDatabaseEncoding()) &&
pg_strcasecmp(m->name, lang) == 0)
{
d->stem = m->stem;
d->z = m->create();
d->needrecode = false;
return;
}
}
/*
* Second, try to find stemmer for needed language for UTF8 encoding.
*/
for (m = stemmer_modules; m->name; m++)
{
if (m->enc == PG_UTF8 && pg_strcasecmp(m->name, lang) == 0)
{
d->stem = m->stem;
d->z = m->create();
d->needrecode = true;
return;
}
}
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("no Snowball stemmer available for language \"%s\" and encoding \"%s\"",
lang, GetDatabaseEncodingName())));
}
| PG_FUNCTION_INFO_V1 | ( | dsnowball_lexize | ) |
| PG_FUNCTION_INFO_V1 | ( | dsnowball_init | ) |
Definition at line 62 of file dict_snowball.c.
const stemmer_module stemmer_modules[] [static] |
Definition at line 80 of file dict_snowball.c.
1.7.1