#include "postgres.h"
#include "catalog/pg_collation.h"
#include "tsearch/dicts/spell.h"
#include "tsearch/ts_locale.h"
#include "utils/memutils.h"
Go to the source code of this file.
Data Structures | |
struct | SplitVar |
Defines | |
#define | tmpalloc(sz) MemoryContextAlloc(Conf->buildCxt, (sz)) |
#define | tmpalloc0(sz) MemoryContextAllocZero(Conf->buildCxt, (sz)) |
#define | COMPACT_ALLOC_CHUNK 8192 |
#define | COMPACT_MAX_REQ 1024 |
#define | cpalloc(size) compact_palloc0(Conf, size) |
#define | cpalloc0(size) compact_palloc0(Conf, size) |
#define | MAX_NORM 1024 |
#define | MAXNORMLEN 256 |
#define | STRNCMP(s, p) strncmp( (s), (p), strlen(p) ) |
#define | GETWCHAR(W, L, N, T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] ) |
#define | GETCHAR(A, N, T) GETWCHAR( (A)->repl, (A)->replen, N, T ) |
#define | PAE_WAIT_MASK 0 |
#define | PAE_INMASK 1 |
#define | PAE_WAIT_FIND 2 |
#define | PAE_INFIND 3 |
#define | PAE_WAIT_REPL 4 |
#define | PAE_INREPL 5 |
Typedefs | |
typedef struct SplitVar | SplitVar |
Functions | |
void | NIStartBuild (IspellDict *Conf) |
void | NIFinishBuild (IspellDict *Conf) |
static void * | compact_palloc0 (IspellDict *Conf, size_t size) |
static char * | cpstrdup (IspellDict *Conf, const char *str) |
static char * | lowerstr_ctx (IspellDict *Conf, const char *src) |
static int | cmpspell (const void *s1, const void *s2) |
static int | cmpspellaffix (const void *s1, const void *s2) |
static char * | findchar (char *str, int c) |
static int | strbcmp (const unsigned char *s1, const unsigned char *s2) |
static int | strbncmp (const unsigned char *s1, const unsigned char *s2, size_t count) |
static int | cmpaffix (const void *s1, const void *s2) |
static void | NIAddSpell (IspellDict *Conf, const char *word, const char *flag) |
void | NIImportDictionary (IspellDict *Conf, const char *filename) |
static int | FindWord (IspellDict *Conf, const char *word, int affixflag, int flag) |
static void | NIAddAffix (IspellDict *Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type) |
static bool | parse_affentry (char *str, char *mask, char *find, char *repl) |
static void | addFlagValue (IspellDict *Conf, char *s, uint32 val) |
static void | NIImportOOAffixes (IspellDict *Conf, const char *filename) |
void | NIImportAffixes (IspellDict *Conf, const char *filename) |
static int | MergeAffix (IspellDict *Conf, int a1, int a2) |
static uint32 | makeCompoundFlags (IspellDict *Conf, int affix) |
static SPNode * | mkSPNode (IspellDict *Conf, int low, int high, int level) |
void | NISortDictionary (IspellDict *Conf) |
static AffixNode * | mkANode (IspellDict *Conf, int low, int high, int level, int type) |
static void | mkVoidAffix (IspellDict *Conf, bool issuffix, int startsuffix) |
static bool | isAffixInUse (IspellDict *Conf, char flag) |
void | NISortAffixes (IspellDict *Conf) |
static AffixNodeData * | FindAffixes (AffixNode *node, const char *word, int wrdlen, int *level, int type) |
static char * | CheckAffix (const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen) |
static int | addToResult (char **forms, char **cur, char *word) |
static char ** | NormalizeSubWord (IspellDict *Conf, char *word, int flag) |
static int | CheckCompoundAffixes (CMPDAffix **ptr, char *word, int len, bool CheckInPlace) |
static SplitVar * | CopyVar (SplitVar *s, int makedup) |
static void | AddStem (SplitVar *v, char *word) |
static SplitVar * | SplitToVariants (IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int wordlen, int startpos, int minpos) |
static void | addNorm (TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant) |
TSLexeme * | NINormalizeWord (IspellDict *Conf, char *word) |
Variables | |
static char * | VoidString = "" |
#define COMPACT_ALLOC_CHUNK 8192 |
Definition at line 78 of file spell.c.
Referenced by compact_palloc0().
#define COMPACT_MAX_REQ 1024 |
Definition at line 79 of file spell.c.
Referenced by compact_palloc0().
#define cpalloc | ( | size | ) | compact_palloc0(Conf, size) |
Definition at line 110 of file spell.c.
Referenced by cpstrdup(), MergeAffix(), mkANode(), and mkVoidAffix().
#define cpalloc0 | ( | size | ) | compact_palloc0(Conf, size) |
Definition at line 111 of file spell.c.
Referenced by mkANode(), and mkSPNode().
#define GETWCHAR | ( | W, | ||
L, | ||||
N, | ||||
T | ||||
) | ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] ) |
Definition at line 143 of file spell.c.
Referenced by FindAffixes().
#define MAX_NORM 1024 |
Definition at line 139 of file spell.c.
Referenced by addNorm(), addToResult(), NINormalizeWord(), and NormalizeSubWord().
#define MAXNORMLEN 256 |
Definition at line 140 of file spell.c.
Referenced by NormalizeSubWord(), and SplitToVariants().
#define PAE_INFIND 3 |
Definition at line 463 of file spell.c.
Referenced by parse_affentry().
#define PAE_INMASK 1 |
Definition at line 461 of file spell.c.
Referenced by parse_affentry().
#define PAE_INREPL 5 |
Definition at line 465 of file spell.c.
Referenced by parse_affentry().
#define PAE_WAIT_FIND 2 |
Definition at line 462 of file spell.c.
Referenced by parse_affentry().
#define PAE_WAIT_MASK 0 |
Definition at line 460 of file spell.c.
Referenced by parse_affentry().
#define PAE_WAIT_REPL 4 |
Definition at line 464 of file spell.c.
Referenced by parse_affentry().
#define STRNCMP | ( | s, | ||
p | ||||
) | strncmp( (s), (p), strlen(p) ) |
Definition at line 142 of file spell.c.
Referenced by NIImportAffixes(), and NIImportOOAffixes().
#define tmpalloc | ( | sz | ) | MemoryContextAlloc(Conf->buildCxt, (sz)) |
Definition at line 30 of file spell.c.
Referenced by mkANode(), NIAddAffix(), and NIAddSpell().
#define tmpalloc0 | ( | sz | ) | MemoryContextAllocZero(Conf->buildCxt, (sz)) |
static void addFlagValue | ( | IspellDict * | Conf, | |
char * | s, | |||
uint32 | val | |||
) | [static] |
Definition at line 583 of file spell.c.
References ereport, errcode(), errmsg(), ERROR, IspellDict::flagval, pg_mblen(), t_isspace, and IspellDict::usecompound.
Referenced by NIImportOOAffixes().
{ while (*s && t_isspace(s)) s += pg_mblen(s); if (!*s) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); if (pg_mblen(s) != 1) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("multibyte flag character is not allowed"))); Conf->flagval[*(unsigned char *) s] = (unsigned char) val; Conf->usecompound = true; }
static void addNorm | ( | TSLexeme ** | lres, | |
TSLexeme ** | lcur, | |||
char * | word, | |||
int | flags, | |||
uint16 | NVariant | |||
) | [static] |
static void AddStem | ( | SplitVar * | v, | |
char * | word | |||
) | [static] |
Definition at line 1563 of file spell.c.
References SplitVar::lenstem, SplitVar::nstem, repalloc(), and SplitVar::stem.
Referenced by SplitToVariants().
static int addToResult | ( | char ** | forms, | |
char ** | cur, | |||
char * | word | |||
) | [static] |
static char* CheckAffix | ( | const char * | word, | |
size_t | len, | |||
AFFIX * | Affix, | |||
int | flagflags, | |||
char * | newword, | |||
int * | baselen | |||
) | [static] |
Definition at line 1277 of file spell.c.
References FF_COMPOUNDBEGIN, FF_COMPOUNDFORBIDFLAG, FF_COMPOUNDLAST, FF_COMPOUNDMIDDLE, FF_COMPOUNDONLY, FF_PREFIX, FF_SUFFIX, aff_struct::find, aff_struct::flagflags, aff_struct::isregis, aff_struct::issimple, NULL, palloc(), pfree(), pg_mb2wchar_with_len(), pg_regexec(), aff_struct::reg, aff_struct::regex, aff_struct::regis, aff_struct::replen, RS_execute(), and aff_struct::type.
Referenced by NormalizeSubWord().
{ /* * Check compound allow flags */ if (flagflags == 0) { if (Affix->flagflags & FF_COMPOUNDONLY) return NULL; } else if (flagflags & FF_COMPOUNDBEGIN) { if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG) return NULL; if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0) if (Affix->type == FF_SUFFIX) return NULL; } else if (flagflags & FF_COMPOUNDMIDDLE) { if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 || (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)) return NULL; } else if (flagflags & FF_COMPOUNDLAST) { if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG) return NULL; if ((Affix->flagflags & FF_COMPOUNDLAST) == 0) if (Affix->type == FF_PREFIX) return NULL; } /* * make replace pattern of affix */ if (Affix->type == FF_SUFFIX) { strcpy(newword, word); strcpy(newword + len - Affix->replen, Affix->find); if (baselen) /* store length of non-changed part of word */ *baselen = len - Affix->replen; } else { /* * if prefix is a all non-chaged part's length then all word contains * only prefix and suffix, so out */ if (baselen && *baselen + strlen(Affix->find) <= Affix->replen) return NULL; strcpy(newword, Affix->find); strcat(newword, word + Affix->replen); } /* * check resulting word */ if (Affix->issimple) return newword; else if (Affix->isregis) { if (RS_execute(&(Affix->reg.regis), newword)) return newword; } else { int err; pg_wchar *data; size_t data_len; int newword_len; /* Convert data string to wide characters */ newword_len = strlen(newword); data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar)); data_len = pg_mb2wchar_with_len(newword, data, newword_len); if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0))) { pfree(data); return newword; } pfree(data); } return NULL; }
static int CheckCompoundAffixes | ( | CMPDAffix ** | ptr, | |
char * | word, | |||
int | len, | |||
bool | CheckInPlace | |||
) | [static] |
Definition at line 1500 of file spell.c.
References NULL.
Referenced by SplitToVariants().
{ bool issuffix; if (CheckInPlace) { while ((*ptr)->affix) { if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0) { len = (*ptr)->len; issuffix = (*ptr)->issuffix; (*ptr)++; return (issuffix) ? len : 0; } (*ptr)++; } } else { char *affbegin; while ((*ptr)->affix) { if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL) { len = (*ptr)->len + (affbegin - word); issuffix = (*ptr)->issuffix; (*ptr)++; return (issuffix) ? len : 0; } (*ptr)++; } } return -1; }
static int cmpaffix | ( | const void * | s1, | |
const void * | s2 | |||
) | [static] |
Definition at line 224 of file spell.c.
References a1, a2, FF_PREFIX, aff_struct::repl, strbcmp(), and aff_struct::type.
Referenced by NISortAffixes().
{ const AFFIX *a1 = (const AFFIX *) s1; const AFFIX *a2 = (const AFFIX *) s2; if (a1->type < a2->type) return -1; if (a1->type > a2->type) return 1; if (a1->type == FF_PREFIX) return strcmp(a1->repl, a2->repl); else return strbcmp((const unsigned char *) a1->repl, (const unsigned char *) a2->repl); }
static int cmpspell | ( | const void * | s1, | |
const void * | s2 | |||
) | [static] |
static int cmpspellaffix | ( | const void * | s1, | |
const void * | s2 | |||
) | [static] |
Definition at line 154 of file spell.c.
References MAXFLAGLEN.
Referenced by NISortDictionary().
{ return (strncmp((*(SPELL *const *) s1)->p.flag, (*(SPELL *const *) s2)->p.flag, MAXFLAGLEN)); }
static void* compact_palloc0 | ( | IspellDict * | Conf, | |
size_t | size | |||
) | [static] |
Definition at line 82 of file spell.c.
References Assert, IspellDict::avail, IspellDict::buildCxt, COMPACT_ALLOC_CHUNK, COMPACT_MAX_REQ, IspellDict::firstfree, MAXALIGN, NULL, and palloc0().
{ void *result; /* Should only be called during init */ Assert(Conf->buildCxt != NULL); /* No point in this for large chunks */ if (size > COMPACT_MAX_REQ) return palloc0(size); /* Keep everything maxaligned */ size = MAXALIGN(size); /* Need more space? */ if (size > Conf->avail) { Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK); Conf->avail = COMPACT_ALLOC_CHUNK; } result = (void *) Conf->firstfree; Conf->firstfree += size; Conf->avail -= size; return result; }
Definition at line 1538 of file spell.c.
References i, SplitVar::lenstem, SplitVar::next, SplitVar::nstem, palloc(), pstrdup(), and SplitVar::stem.
Referenced by SplitToVariants().
{ SplitVar *v = (SplitVar *) palloc(sizeof(SplitVar)); v->next = NULL; if (s) { int i; v->lenstem = s->lenstem; v->stem = (char **) palloc(sizeof(char *) * v->lenstem); v->nstem = s->nstem; for (i = 0; i < s->nstem; i++) v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i]; } else { v->lenstem = 16; v->stem = (char **) palloc(sizeof(char *) * v->lenstem); v->nstem = 0; } return v; }
static char* cpstrdup | ( | IspellDict * | Conf, | |
const char * | str | |||
) | [static] |
Definition at line 114 of file spell.c.
References cpalloc.
Referenced by NIAddAffix(), and NISortDictionary().
{ char *res = cpalloc(strlen(str) + 1); strcpy(res, str); return res; }
static AffixNodeData* FindAffixes | ( | AffixNode * | node, | |
const char * | word, | |||
int | wrdlen, | |||
int * | level, | |||
int | type | |||
) | [static] |
Definition at line 1234 of file spell.c.
References AffixNode::data, GETWCHAR, AffixNode::isvoid, AffixNode::length, AffixNodeData::naff, AffixNodeData::node, and AffixNodeData::val.
Referenced by NormalizeSubWord().
{ AffixNodeData *StopLow, *StopHigh, *StopMiddle; uint8 symbol; if (node->isvoid) { /* search void affixes */ if (node->data->naff) return node->data; node = node->data->node; } while (node && *level < wrdlen) { StopLow = node->data; StopHigh = node->data + node->length; while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); symbol = GETWCHAR(word, wrdlen, *level, type); if (StopMiddle->val == symbol) { (*level)++; if (StopMiddle->naff) return StopMiddle; node = StopMiddle->node; break; } else if (StopMiddle->val < symbol) StopLow = StopMiddle + 1; else StopHigh = StopMiddle; } if (StopLow >= StopHigh) break; } return NULL; }
static char* findchar | ( | char * | str, | |
int | c | |||
) | [static] |
Definition at line 160 of file spell.c.
References pg_mblen(), and t_iseq.
Referenced by NIImportAffixes(), and NIImportDictionary().
static int FindWord | ( | IspellDict * | Conf, | |
const char * | word, | |||
int | affixflag, | |||
int | flag | |||
) | [static] |
Definition at line 329 of file spell.c.
References SPNodeData::affix, IspellDict::AffixData, SPNodeData::compoundflag, SPNode::data, IspellDict::Dictionary, FF_COMPOUNDONLY, SPNodeData::isword, SPNode::length, SPNodeData::node, NULL, and SPNodeData::val.
Referenced by NormalizeSubWord().
{ SPNode *node = Conf->Dictionary; SPNodeData *StopLow, *StopHigh, *StopMiddle; const uint8 *ptr = (const uint8 *) word; flag &= FF_DICTFLAGMASK; while (node && *ptr) { StopLow = node->data; StopHigh = node->data + node->length; while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); if (StopMiddle->val == *ptr) { if (*(ptr + 1) == '\0' && StopMiddle->isword) { if (flag == 0) { if (StopMiddle->compoundflag & FF_COMPOUNDONLY) return 0; } else if ((flag & StopMiddle->compoundflag) == 0) return 0; if ((affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL)) return 1; } node = StopMiddle->node; ptr++; break; } else if (StopMiddle->val < *ptr) StopLow = StopMiddle + 1; else StopHigh = StopMiddle; } if (StopLow >= StopHigh) break; } return 0; }
static bool isAffixInUse | ( | IspellDict * | Conf, | |
char | flag | |||
) | [static] |
Definition at line 1174 of file spell.c.
References IspellDict::AffixData, i, IspellDict::nAffixData, and NULL.
Referenced by NISortAffixes().
{ int i; for (i = 0; i < Conf->nAffixData; i++) if (strchr(Conf->AffixData[i], flag) != NULL) return true; return false; }
static char* lowerstr_ctx | ( | IspellDict * | Conf, | |
const char * | src | |||
) | [static] |
Definition at line 127 of file spell.c.
References IspellDict::buildCxt, lowerstr(), and MemoryContextSwitchTo().
Referenced by NIImportDictionary(), and NIImportOOAffixes().
{ MemoryContext saveCtx; char *dst; saveCtx = MemoryContextSwitchTo(Conf->buildCxt); dst = lowerstr(src); MemoryContextSwitchTo(saveCtx); return dst; }
static uint32 makeCompoundFlags | ( | IspellDict * | Conf, | |
int | affix | |||
) | [static] |
Definition at line 918 of file spell.c.
References IspellDict::AffixData, FF_DICTFLAGMASK, flag(), and IspellDict::flagval.
Referenced by mkSPNode().
{ uint32 flag = 0; char *str = Conf->AffixData[affix]; while (str && *str) { flag |= Conf->flagval[*(unsigned char *) str]; str++; } return (flag & FF_DICTFLAGMASK); }
static int MergeAffix | ( | IspellDict * | Conf, | |
int | a1, | |||
int | a2 | |||
) | [static] |
Definition at line 894 of file spell.c.
References IspellDict::AffixData, cpalloc, IspellDict::lenAffixData, IspellDict::nAffixData, and repalloc().
Referenced by mkSPNode().
{ char **ptr; while (Conf->nAffixData + 1 >= Conf->lenAffixData) { Conf->lenAffixData *= 2; Conf->AffixData = (char **) repalloc(Conf->AffixData, sizeof(char *) * Conf->lenAffixData); } ptr = Conf->AffixData + Conf->nAffixData; *ptr = cpalloc(strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) + 1 /* space */ + 1 /* \0 */ ); sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]); ptr++; *ptr = NULL; Conf->nAffixData++; return Conf->nAffixData - 1; }
static AffixNode* mkANode | ( | IspellDict * | Conf, | |
int | low, | |||
int | high, | |||
int | level, | |||
int | type | |||
) | [static] |
Definition at line 1060 of file spell.c.
References AffixNodeData::aff, IspellDict::Affix, ANHRDSZ, cpalloc, cpalloc0, AffixNode::data, GETCHAR, i, AffixNode::length, AffixNodeData::naff, AffixNodeData::node, pfree(), aff_struct::replen, tmpalloc, and AffixNodeData::val.
Referenced by NISortAffixes().
{ int i; int nchar = 0; uint8 lastchar = '\0'; AffixNode *rs; AffixNodeData *data; int lownew = low; int naff; AFFIX **aff; for (i = low; i < high; i++) if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type)) { nchar++; lastchar = GETCHAR(Conf->Affix + i, level, type); } if (!nchar) return NULL; aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1)); naff = 0; rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData)); rs->length = nchar; data = rs->data; lastchar = '\0'; for (i = low; i < high; i++) if (Conf->Affix[i].replen > level) { if (lastchar != GETCHAR(Conf->Affix + i, level, type)) { if (lastchar) { data->node = mkANode(Conf, lownew, i, level + 1, type); if (naff) { data->naff = naff; data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff); memcpy(data->aff, aff, sizeof(AFFIX *) * naff); naff = 0; } data++; lownew = i; } lastchar = GETCHAR(Conf->Affix + i, level, type); } data->val = GETCHAR(Conf->Affix + i, level, type); if (Conf->Affix[i].replen == level + 1) { /* affix stopped */ aff[naff++] = Conf->Affix + i; } } data->node = mkANode(Conf, lownew, high, level + 1, type); if (naff) { data->naff = naff; data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff); memcpy(data->aff, aff, sizeof(AFFIX *) * naff); naff = 0; } pfree(aff); return rs; }
static SPNode* mkSPNode | ( | IspellDict * | Conf, | |
int | low, | |||
int | high, | |||
int | level | |||
) | [static] |
Definition at line 933 of file spell.c.
References SPNodeData::affix, SPNodeData::compoundflag, cpalloc0, spell_struct::d, SPNode::data, FF_COMPOUNDFLAG, FF_COMPOUNDONLY, i, SPNodeData::isword, SPNode::length, makeCompoundFlags(), MergeAffix(), SPNodeData::node, spell_struct::p, IspellDict::Spell, SPNHDRSZ, SPNodeData::val, and spell_struct::word.
Referenced by NISortDictionary().
{ int i; int nchar = 0; char lastchar = '\0'; SPNode *rs; SPNodeData *data; int lownew = low; for (i = low; i < high; i++) if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level]) { nchar++; lastchar = Conf->Spell[i]->word[level]; } if (!nchar) return NULL; rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData)); rs->length = nchar; data = rs->data; lastchar = '\0'; for (i = low; i < high; i++) if (Conf->Spell[i]->p.d.len > level) { if (lastchar != Conf->Spell[i]->word[level]) { if (lastchar) { data->node = mkSPNode(Conf, lownew, i, level + 1); lownew = i; data++; } lastchar = Conf->Spell[i]->word[level]; } data->val = ((uint8 *) (Conf->Spell[i]->word))[level]; if (Conf->Spell[i]->p.d.len == level + 1) { bool clearCompoundOnly = false; if (data->isword && data->affix != Conf->Spell[i]->p.d.affix) { /* * MergeAffix called a few times. If one of word is * allowed to be in compound word and another isn't, then * clear FF_COMPOUNDONLY flag. */ clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix)) ? false : true; data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix); } else data->affix = Conf->Spell[i]->p.d.affix; data->isword = 1; data->compoundflag = makeCompoundFlags(Conf, data->affix); if ((data->compoundflag & FF_COMPOUNDONLY) && (data->compoundflag & FF_COMPOUNDFLAG) == 0) data->compoundflag |= FF_COMPOUNDFLAG; if (clearCompoundOnly) data->compoundflag &= ~FF_COMPOUNDONLY; } } data->node = mkSPNode(Conf, lownew, high, level + 1); return rs; }
static void mkVoidAffix | ( | IspellDict * | Conf, | |
bool | issuffix, | |||
int | startsuffix | |||
) | [static] |
Definition at line 1131 of file spell.c.
References AffixNodeData::aff, IspellDict::Affix, ANHRDSZ, cpalloc, AffixNode::data, end, i, IspellDict::naffixes, AffixNodeData::node, palloc0(), IspellDict::Prefix, aff_struct::replen, and IspellDict::Suffix.
Referenced by NISortAffixes().
{ int i, cnt = 0; int start = (issuffix) ? startsuffix : 0; int end = (issuffix) ? Conf->naffixes : startsuffix; AffixNode *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData)); Affix->length = 1; Affix->isvoid = 1; if (issuffix) { Affix->data->node = Conf->Suffix; Conf->Suffix = Affix; } else { Affix->data->node = Conf->Prefix; Conf->Prefix = Affix; } for (i = start; i < end; i++) if (Conf->Affix[i].replen == 0) cnt++; if (cnt == 0) return; Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt); Affix->data->naff = (uint32) cnt; cnt = 0; for (i = start; i < end; i++) if (Conf->Affix[i].replen == 0) { Affix->data->aff[cnt] = Conf->Affix + i; cnt++; } }
static void NIAddAffix | ( | IspellDict * | Conf, | |
int | flag, | |||
char | flagflags, | |||
const char * | mask, | |||
const char * | find, | |||
const char * | repl, | |||
int | type | |||
) | [static] |
Definition at line 377 of file spell.c.
References IspellDict::Affix, cpstrdup(), DEFAULT_COLLATION_OID, ereport, errcode(), errmsg(), ERROR, FF_COMPOUNDFLAG, FF_COMPOUNDONLY, FF_COMPOUNDPERMITFLAG, FF_SUFFIX, aff_struct::find, aff_struct::flag, aff_struct::flagflags, aff_struct::isregis, aff_struct::issimple, IspellDict::maffixes, IspellDict::naffixes, palloc(), pg_mb2wchar_with_len(), pg_regcomp(), pg_regerror(), aff_struct::reg, REG_ADVANCED, REG_NOSUB, aff_struct::regex, aff_struct::regis, repalloc(), aff_struct::repl, aff_struct::replen, RS_compile(), RS_isRegis(), tmpalloc, aff_struct::type, and VoidString.
Referenced by NIImportAffixes(), and NIImportOOAffixes().
{ AFFIX *Affix; if (Conf->naffixes >= Conf->maffixes) { if (Conf->maffixes) { Conf->maffixes *= 2; Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX)); } else { Conf->maffixes = 16; Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX)); } } Affix = Conf->Affix + Conf->naffixes; if (strcmp(mask, ".") == 0) { Affix->issimple = 1; Affix->isregis = 0; } else if (RS_isRegis(mask)) { Affix->issimple = 0; Affix->isregis = 1; RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX) ? true : false, (mask && *mask) ? mask : VoidString); } else { int masklen; int wmasklen; int err; pg_wchar *wmask; char *tmask; Affix->issimple = 0; Affix->isregis = 0; tmask = (char *) tmpalloc(strlen(mask) + 3); if (type == FF_SUFFIX) sprintf(tmask, "%s$", mask); else sprintf(tmask, "^%s", mask); masklen = strlen(tmask); wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar)); wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen); err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen, REG_ADVANCED | REG_NOSUB, DEFAULT_COLLATION_OID); if (err) { char errstr[100]; pg_regerror(err, &(Affix->reg.regex), errstr, sizeof(errstr)); ereport(ERROR, (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), errmsg("invalid regular expression: %s", errstr))); } } Affix->flagflags = flagflags; if ((Affix->flagflags & FF_COMPOUNDONLY) || (Affix->flagflags & FF_COMPOUNDPERMITFLAG)) { if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0) Affix->flagflags |= FF_COMPOUNDFLAG; } Affix->flag = flag; Affix->type = type; Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString; if ((Affix->replen = strlen(repl)) > 0) Affix->repl = cpstrdup(Conf, repl); else Affix->repl = VoidString; Conf->naffixes++; }
static void NIAddSpell | ( | IspellDict * | Conf, | |
const char * | word, | |||
const char * | flag | |||
) | [static] |
Definition at line 241 of file spell.c.
References spell_struct::flag, MAXFLAGLEN, IspellDict::mspell, IspellDict::nspell, spell_struct::p, repalloc(), IspellDict::Spell, SPELLHDRSZ, tmpalloc, and spell_struct::word.
Referenced by NIImportDictionary().
{ if (Conf->nspell >= Conf->mspell) { if (Conf->mspell) { Conf->mspell *= 2; Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *)); } else { Conf->mspell = 1024 * 20; Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *)); } } Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1); strcpy(Conf->Spell[Conf->nspell]->word, word); strncpy(Conf->Spell[Conf->nspell]->p.flag, flag, MAXFLAGLEN); Conf->nspell++; }
void NIFinishBuild | ( | IspellDict * | Conf | ) |
Definition at line 56 of file spell.c.
References IspellDict::buildCxt, IspellDict::firstfree, MemoryContextDelete(), and IspellDict::Spell.
Referenced by dispell_init().
{ /* Release no-longer-needed temp memory */ MemoryContextDelete(Conf->buildCxt); /* Just for cleanliness, zero the now-dangling pointers */ Conf->buildCxt = NULL; Conf->Spell = NULL; Conf->firstfree = NULL; }
void NIImportAffixes | ( | IspellDict * | Conf, | |
const char * | filename | |||
) |
Definition at line 762 of file spell.c.
References ereport, errcode(), errmsg(), ERROR, FF_COMPOUNDFLAG, FF_PREFIX, FF_SUFFIX, find(), findchar(), flag(), IspellDict::flagval, lowerstr(), NIAddAffix(), NIImportOOAffixes(), NULL, parse_affentry(), pfree(), pg_mblen(), prefixes(), STRNCMP, t_isspace, tsearch_readline(), tsearch_readline_begin(), tsearch_readline_end(), and IspellDict::usecompound.
Referenced by dispell_init().
{ char *pstr = NULL; char mask[BUFSIZ]; char find[BUFSIZ]; char repl[BUFSIZ]; char *s; bool suffixes = false; bool prefixes = false; int flag = 0; char flagflags = 0; tsearch_readline_state trst; bool oldformat = false; char *recoded = NULL; if (!tsearch_readline_begin(&trst, filename)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("could not open affix file \"%s\": %m", filename))); memset(Conf->flagval, 0, sizeof(Conf->flagval)); Conf->usecompound = false; while ((recoded = tsearch_readline(&trst)) != NULL) { pstr = lowerstr(recoded); /* Skip comments and empty lines */ if (*pstr == '#' || *pstr == '\n') goto nextline; if (STRNCMP(pstr, "compoundwords") == 0) { s = findchar(pstr, 'l'); if (s) { s = recoded + (s - pstr); /* we need non-lowercased * string */ while (*s && !t_isspace(s)) s += pg_mblen(s); while (*s && t_isspace(s)) s += pg_mblen(s); if (*s && pg_mblen(s) == 1) { Conf->flagval[*(unsigned char *) s] = FF_COMPOUNDFLAG; Conf->usecompound = true; } oldformat = true; goto nextline; } } if (STRNCMP(pstr, "suffixes") == 0) { suffixes = true; prefixes = false; oldformat = true; goto nextline; } if (STRNCMP(pstr, "prefixes") == 0) { suffixes = false; prefixes = true; oldformat = true; goto nextline; } if (STRNCMP(pstr, "flag") == 0) { s = recoded + 4; /* we need non-lowercased string */ flagflags = 0; while (*s && t_isspace(s)) s += pg_mblen(s); oldformat = true; /* allow only single-encoded flags */ if (pg_mblen(s) != 1) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("multibyte flag character is not allowed"))); if (*s == '*') { flagflags |= FF_CROSSPRODUCT; s++; } else if (*s == '~') { flagflags |= FF_COMPOUNDONLY; s++; } if (*s == '\\') s++; /* allow only single-encoded flags */ if (pg_mblen(s) != 1) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("multibyte flag character is not allowed"))); flag = *(unsigned char *) s; goto nextline; } if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 || STRNCMP(recoded, "COMPOUNDMIN") == 0 || STRNCMP(recoded, "PFX") == 0 || STRNCMP(recoded, "SFX") == 0) { if (oldformat) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("wrong affix file format for flag"))); tsearch_readline_end(&trst); NIImportOOAffixes(Conf, filename); return; } if ((!suffixes) && (!prefixes)) goto nextline; if (!parse_affentry(pstr, mask, find, repl)) goto nextline; NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX); nextline: pfree(recoded); pfree(pstr); } tsearch_readline_end(&trst); }
void NIImportDictionary | ( | IspellDict * | Conf, | |
const char * | filename | |||
) |
Definition at line 268 of file spell.c.
References ereport, errcode(), errmsg(), ERROR, findchar(), lowerstr_ctx(), NIAddSpell(), NULL, pfree(), pg_mblen(), t_isprint, t_isspace, tsearch_readline(), tsearch_readline_begin(), and tsearch_readline_end().
Referenced by dispell_init().
{ tsearch_readline_state trst; char *line; if (!tsearch_readline_begin(&trst, filename)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("could not open dictionary file \"%s\": %m", filename))); while ((line = tsearch_readline(&trst)) != NULL) { char *s, *pstr; const char *flag; /* Extract flag from the line */ flag = NULL; if ((s = findchar(line, '/'))) { *s++ = '\0'; flag = s; while (*s) { /* we allow only single encoded flags for faster works */ if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s)) s++; else { *s = '\0'; break; } } } else flag = ""; /* Remove trailing spaces */ s = line; while (*s) { if (t_isspace(s)) { *s = '\0'; break; } s += pg_mblen(s); } pstr = lowerstr_ctx(Conf, line); NIAddSpell(Conf, pstr, flag); pfree(pstr); pfree(line); } tsearch_readline_end(&trst); }
static void NIImportOOAffixes | ( | IspellDict * | Conf, | |
const char * | filename | |||
) | [static] |
Definition at line 603 of file spell.c.
References addFlagValue(), ereport, errcode(), errmsg(), ERROR, FF_COMPOUNDBEGIN, FF_COMPOUNDFLAG, FF_COMPOUNDFORBIDFLAG, FF_COMPOUNDLAST, FF_COMPOUNDMIDDLE, FF_COMPOUNDONLY, FF_COMPOUNDPERMITFLAG, FF_CROSSPRODUCT, FF_PREFIX, FF_SUFFIX, find(), flag(), IspellDict::flagval, lowerstr_ctx(), NIAddAffix(), NULL, pfree(), pg_mblen(), STRNCMP, t_iseq, t_isspace, tsearch_readline(), tsearch_readline_begin(), tsearch_readline_end(), and IspellDict::usecompound.
Referenced by NIImportAffixes().
{ char type[BUFSIZ], *ptype = NULL; char sflag[BUFSIZ]; char mask[BUFSIZ], *pmask; char find[BUFSIZ], *pfind; char repl[BUFSIZ], *prepl; bool isSuffix = false; int flag = 0; char flagflags = 0; tsearch_readline_state trst; int scanread = 0; char scanbuf[BUFSIZ]; char *recoded; /* read file to find any flag */ memset(Conf->flagval, 0, sizeof(Conf->flagval)); Conf->usecompound = false; if (!tsearch_readline_begin(&trst, filename)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("could not open affix file \"%s\": %m", filename))); while ((recoded = tsearch_readline(&trst)) != NULL) { if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#')) { pfree(recoded); continue; } if (STRNCMP(recoded, "COMPOUNDFLAG") == 0) addFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"), FF_COMPOUNDFLAG); else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0) addFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"), FF_COMPOUNDBEGIN); else if (STRNCMP(recoded, "COMPOUNDLAST") == 0) addFlagValue(Conf, recoded + strlen("COMPOUNDLAST"), FF_COMPOUNDLAST); /* COMPOUNDLAST and COMPOUNDEND are synonyms */ else if (STRNCMP(recoded, "COMPOUNDEND") == 0) addFlagValue(Conf, recoded + strlen("COMPOUNDEND"), FF_COMPOUNDLAST); else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0) addFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"), FF_COMPOUNDMIDDLE); else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0) addFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"), FF_COMPOUNDONLY); else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0) addFlagValue(Conf, recoded + strlen("COMPOUNDPERMITFLAG"), FF_COMPOUNDPERMITFLAG); else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0) addFlagValue(Conf, recoded + strlen("COMPOUNDFORBIDFLAG"), FF_COMPOUNDFORBIDFLAG); else if (STRNCMP(recoded, "FLAG") == 0) { char *s = recoded + strlen("FLAG"); while (*s && t_isspace(s)) s += pg_mblen(s); if (*s && STRNCMP(s, "default") != 0) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("Ispell dictionary supports only default flag value"))); } pfree(recoded); } tsearch_readline_end(&trst); sprintf(scanbuf, "%%6s %%%ds %%%ds %%%ds %%%ds", BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5); if (!tsearch_readline_begin(&trst, filename)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("could not open affix file \"%s\": %m", filename))); while ((recoded = tsearch_readline(&trst)) != NULL) { if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#')) goto nextline; scanread = sscanf(recoded, scanbuf, type, sflag, find, repl, mask); if (ptype) pfree(ptype); ptype = lowerstr_ctx(Conf, type); if (scanread < 4 || (STRNCMP(ptype, "sfx") && STRNCMP(ptype, "pfx"))) goto nextline; if (scanread == 4) { if (strlen(sflag) != 1) goto nextline; flag = *sflag; isSuffix = (STRNCMP(ptype, "sfx") == 0) ? true : false; if (t_iseq(find, 'y') || t_iseq(find, 'Y')) flagflags = FF_CROSSPRODUCT; else flagflags = 0; } else { char *ptr; int aflg = 0; if (strlen(sflag) != 1 || flag != *sflag || flag == 0) goto nextline; prepl = lowerstr_ctx(Conf, repl); /* affix flag */ if ((ptr = strchr(prepl, '/')) != NULL) { *ptr = '\0'; ptr = repl + (ptr - prepl) + 1; while (*ptr) { aflg |= Conf->flagval[*(unsigned char *) ptr]; ptr++; } } pfind = lowerstr_ctx(Conf, find); pmask = lowerstr_ctx(Conf, mask); if (t_iseq(find, '0')) *pfind = '\0'; if (t_iseq(repl, '0')) *prepl = '\0'; NIAddAffix(Conf, flag, flagflags | aflg, pmask, pfind, prepl, isSuffix ? FF_SUFFIX : FF_PREFIX); pfree(prepl); pfree(pfind); pfree(pmask); } nextline: pfree(recoded); } tsearch_readline_end(&trst); if (ptype) pfree(ptype); }
TSLexeme* NINormalizeWord | ( | IspellDict * | Conf, | |
char * | word | |||
) |
Definition at line 1739 of file spell.c.
References addNorm(), FF_COMPOUNDLAST, i, MAX_NORM, SplitVar::next, NormalizeSubWord(), SplitVar::nstem, NULL, pfree(), pstrdup(), SplitToVariants(), SplitVar::stem, and IspellDict::usecompound.
Referenced by dispell_lexize().
{ char **res; TSLexeme *lcur = NULL, *lres = NULL; uint16 NVariant = 1; res = NormalizeSubWord(Conf, word, 0); if (res) { char **ptr = res; while (*ptr && (lcur - lres) < MAX_NORM) { addNorm(&lres, &lcur, *ptr, 0, NVariant++); ptr++; } pfree(res); } if (Conf->usecompound) { int wordlen = strlen(word); SplitVar *ptr, *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1); int i; while (var) { if (var->nstem > 1) { char **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST); if (subres) { char **subptr = subres; while (*subptr) { for (i = 0; i < var->nstem - 1; i++) { addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant); } addNorm(&lres, &lcur, *subptr, 0, NVariant); subptr++; NVariant++; } pfree(subres); var->stem[0] = NULL; pfree(var->stem[var->nstem - 1]); } } for (i = 0; i < var->nstem && var->stem[i]; i++) pfree(var->stem[i]); ptr = var->next; pfree(var->stem); pfree(var); var = ptr; } } return lres; }
void NISortAffixes | ( | IspellDict * | Conf | ) |
Definition at line 1186 of file spell.c.
References CMPDAffix::affix, IspellDict::Affix, cmpaffix(), IspellDict::CompoundAffix, FF_COMPOUNDFLAG, FF_PREFIX, FF_SUFFIX, aff_struct::flag, aff_struct::flagflags, i, isAffixInUse(), CMPDAffix::issuffix, CMPDAffix::len, mkANode(), mkVoidAffix(), IspellDict::naffixes, palloc(), IspellDict::Prefix, qsort, repalloc(), aff_struct::repl, aff_struct::replen, strbncmp(), IspellDict::Suffix, and aff_struct::type.
Referenced by dispell_init().
{ AFFIX *Affix; size_t i; CMPDAffix *ptr; int firstsuffix = Conf->naffixes; if (Conf->naffixes == 0) return; if (Conf->naffixes > 1) qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix); Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes); ptr->affix = NULL; for (i = 0; i < Conf->naffixes; i++) { Affix = &(((AFFIX *) Conf->Affix)[i]); if (Affix->type == FF_SUFFIX && i < firstsuffix) firstsuffix = i; if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 && isAffixInUse(Conf, (char) Affix->flag)) { if (ptr == Conf->CompoundAffix || ptr->issuffix != (ptr - 1)->issuffix || strbncmp((const unsigned char *) (ptr - 1)->affix, (const unsigned char *) Affix->repl, (ptr - 1)->len)) { /* leave only unique and minimals suffixes */ ptr->affix = Affix->repl; ptr->len = Affix->replen; ptr->issuffix = (Affix->type == FF_SUFFIX) ? true : false; ptr++; } } } ptr->affix = NULL; Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1)); Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX); Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX); mkVoidAffix(Conf, true, firstsuffix); mkVoidAffix(Conf, false, firstsuffix); }
void NISortDictionary | ( | IspellDict * | Conf | ) |
Definition at line 1013 of file spell.c.
References IspellDict::AffixData, Assert, cmpspell(), cmpspellaffix(), cpstrdup(), spell_struct::d, IspellDict::Dictionary, spell_struct::flag, i, IspellDict::lenAffixData, MAXFLAGLEN, mkSPNode(), IspellDict::nAffixData, IspellDict::nspell, spell_struct::p, palloc0(), qsort, IspellDict::Spell, and spell_struct::word.
Referenced by dispell_init().
{ int i; int naffix = 0; int curaffix; /* compress affixes */ /* Count the number of different flags used in the dictionary */ qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspellaffix); naffix = 0; for (i = 0; i < Conf->nspell; i++) { if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag, MAXFLAGLEN)) naffix++; } /* * Fill in Conf->AffixData with the affixes that were used in the * dictionary. Replace textual flag-field of Conf->Spell entries with * indexes into Conf->AffixData array. */ Conf->AffixData = (char **) palloc0(naffix * sizeof(char *)); curaffix = -1; for (i = 0; i < Conf->nspell; i++) { if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix], MAXFLAGLEN)) { curaffix++; Assert(curaffix < naffix); Conf->AffixData[curaffix] = cpstrdup(Conf, Conf->Spell[i]->p.flag); } Conf->Spell[i]->p.d.affix = curaffix; Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word); } Conf->lenAffixData = Conf->nAffixData = naffix; qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell); Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0); }
void NIStartBuild | ( | IspellDict * | Conf | ) |
Definition at line 39 of file spell.c.
References ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE, ALLOCSET_DEFAULT_MINSIZE, AllocSetContextCreate(), IspellDict::buildCxt, and CurTransactionContext.
Referenced by dispell_init().
{ /* * The temp context is a child of CurTransactionContext, so that it will * go away automatically on error. */ Conf->buildCxt = AllocSetContextCreate(CurTransactionContext, "Ispell dictionary init context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); }
static char** NormalizeSubWord | ( | IspellDict * | Conf, | |
char * | word, | |||
int | flag | |||
) | [static] |
Definition at line 1382 of file spell.c.
References addToResult(), AffixNodeData::aff, CheckAffix(), cur, FF_PREFIX, FF_SUFFIX, FindAffixes(), FindWord(), aff_struct::flag, aff_struct::flagflags, i, MAX_NORM, MAXNORMLEN, AffixNodeData::naff, AffixNodeData::node, NULL, palloc(), pfree(), IspellDict::Prefix, pstrdup(), and IspellDict::Suffix.
Referenced by NINormalizeWord(), and SplitToVariants().
{ AffixNodeData *suffix = NULL, *prefix = NULL; int slevel = 0, plevel = 0; int wrdlen = strlen(word), swrdlen; char **forms; char **cur; char newword[2 * MAXNORMLEN] = ""; char pnewword[2 * MAXNORMLEN] = ""; AffixNode *snode = Conf->Suffix, *pnode; int i, j; if (wrdlen > MAXNORMLEN) return NULL; cur = forms = (char **) palloc(MAX_NORM * sizeof(char *)); *cur = NULL; /* Check that the word itself is normal form */ if (FindWord(Conf, word, 0, flag)) { *cur = pstrdup(word); cur++; *cur = NULL; } /* Find all other NORMAL forms of the 'word' (check only prefix) */ pnode = Conf->Prefix; plevel = 0; while (pnode) { prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX); if (!prefix) break; for (j = 0; j < prefix->naff; j++) { if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL)) { /* prefix success */ if (FindWord(Conf, newword, prefix->aff[j]->flag, flag)) cur += addToResult(forms, cur, newword); } } pnode = prefix->node; } /* * Find all other NORMAL forms of the 'word' (check suffix and then * prefix) */ while (snode) { int baselen = 0; /* find possible suffix */ suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX); if (!suffix) break; /* foreach suffix check affix */ for (i = 0; i < suffix->naff; i++) { if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen)) { /* suffix success */ if (FindWord(Conf, newword, suffix->aff[i]->flag, flag)) cur += addToResult(forms, cur, newword); /* now we will look changed word with prefixes */ pnode = Conf->Prefix; plevel = 0; swrdlen = strlen(newword); while (pnode) { prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX); if (!prefix) break; for (j = 0; j < prefix->naff; j++) { if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen)) { /* prefix success */ int ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ? 0 : prefix->aff[j]->flag; if (FindWord(Conf, pnewword, ff, flag)) cur += addToResult(forms, cur, pnewword); } } pnode = prefix->node; } } } snode = suffix->node; } if (cur == forms) { pfree(forms); return (NULL); } return (forms); }
static bool parse_affentry | ( | char * | str, | |
char * | mask, | |||
char * | find, | |||
char * | repl | |||
) | [static] |
Definition at line 468 of file spell.c.
References COPYCHAR, elog, ereport, errcode(), errmsg(), ERROR, PAE_INFIND, PAE_INMASK, PAE_INREPL, PAE_WAIT_FIND, PAE_WAIT_MASK, PAE_WAIT_REPL, pg_mblen(), t_isalpha, t_iseq, and t_isspace.
Referenced by NIImportAffixes().
{ int state = PAE_WAIT_MASK; char *pmask = mask, *pfind = find, *prepl = repl; *mask = *find = *repl = '\0'; while (*str) { if (state == PAE_WAIT_MASK) { if (t_iseq(str, '#')) return false; else if (!t_isspace(str)) { COPYCHAR(pmask, str); pmask += pg_mblen(str); state = PAE_INMASK; } } else if (state == PAE_INMASK) { if (t_iseq(str, '>')) { *pmask = '\0'; state = PAE_WAIT_FIND; } else if (!t_isspace(str)) { COPYCHAR(pmask, str); pmask += pg_mblen(str); } } else if (state == PAE_WAIT_FIND) { if (t_iseq(str, '-')) { state = PAE_INFIND; } else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ ) { COPYCHAR(prepl, str); prepl += pg_mblen(str); state = PAE_INREPL; } else if (!t_isspace(str)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); } else if (state == PAE_INFIND) { if (t_iseq(str, ',')) { *pfind = '\0'; state = PAE_WAIT_REPL; } else if (t_isalpha(str)) { COPYCHAR(pfind, str); pfind += pg_mblen(str); } else if (!t_isspace(str)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); } else if (state == PAE_WAIT_REPL) { if (t_iseq(str, '-')) { break; /* void repl */ } else if (t_isalpha(str)) { COPYCHAR(prepl, str); prepl += pg_mblen(str); state = PAE_INREPL; } else if (!t_isspace(str)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); } else if (state == PAE_INREPL) { if (t_iseq(str, '#')) { *prepl = '\0'; break; } else if (t_isalpha(str)) { COPYCHAR(prepl, str); prepl += pg_mblen(str); } else if (!t_isspace(str)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); } else elog(ERROR, "unrecognized state in parse_affentry: %d", state); str += pg_mblen(str); } *pmask = *pfind = *prepl = '\0'; return (*mask && (*find || *repl)) ? true : false; }
static SplitVar* SplitToVariants | ( | IspellDict * | Conf, | |
SPNode * | snode, | |||
SplitVar * | orig, | |||
char * | word, | |||
int | wordlen, | |||
int | startpos, | |||
int | minpos | |||
) | [static] |
Definition at line 1576 of file spell.c.
References AddStem(), buf, CheckCompoundAffixes(), IspellDict::CompoundAffix, SPNodeData::compoundflag, CopyVar(), SPNode::data, IspellDict::Dictionary, FF_COMPOUNDBEGIN, SPNodeData::isword, SPNode::length, MAXNORMLEN, SplitVar::next, SPNodeData::node, NormalizeSubWord(), NULL, palloc(), pfree(), pnstrdup(), and SPNodeData::val.
Referenced by NINormalizeWord().
{ SplitVar *var = NULL; SPNodeData *StopLow, *StopHigh, *StopMiddle = NULL; SPNode *node = (snode) ? snode : Conf->Dictionary; int level = (snode) ? minpos : startpos; /* recursive * minpos==level */ int lenaff; CMPDAffix *caff; char *notprobed; int compoundflag = 0; notprobed = (char *) palloc(wordlen); memset(notprobed, 1, wordlen); var = CopyVar(orig, 1); while (level < wordlen) { /* find word with epenthetic or/and compound affix */ caff = Conf->CompoundAffix; while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0) { /* * there is one of compound affixes, so check word for existings */ char buf[MAXNORMLEN]; char **subres; lenaff = level - startpos + lenaff; if (!notprobed[startpos + lenaff - 1]) continue; if (level + lenaff - 1 <= minpos) continue; if (lenaff >= MAXNORMLEN) continue; /* skip too big value */ if (lenaff > 0) memcpy(buf, word + startpos, lenaff); buf[lenaff] = '\0'; if (level == 0) compoundflag = FF_COMPOUNDBEGIN; else if (level == wordlen - 1) compoundflag = FF_COMPOUNDLAST; else compoundflag = FF_COMPOUNDMIDDLE; subres = NormalizeSubWord(Conf, buf, compoundflag); if (subres) { /* Yes, it was a word from dictionary */ SplitVar *new = CopyVar(var, 0); SplitVar *ptr = var; char **sptr = subres; notprobed[startpos + lenaff - 1] = 0; while (*sptr) { AddStem(new, *sptr); sptr++; } pfree(subres); while (ptr->next) ptr = ptr->next; ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff); pfree(new->stem); pfree(new); } } if (!node) break; StopLow = node->data; StopHigh = node->data + node->length; while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); if (StopMiddle->val == ((uint8 *) (word))[level]) break; else if (StopMiddle->val < ((uint8 *) (word))[level]) StopLow = StopMiddle + 1; else StopHigh = StopMiddle; } if (StopLow < StopHigh) { if (level == FF_COMPOUNDBEGIN) compoundflag = FF_COMPOUNDBEGIN; else if (level == wordlen - 1) compoundflag = FF_COMPOUNDLAST; else compoundflag = FF_COMPOUNDMIDDLE; /* find infinitive */ if (StopMiddle->isword && (StopMiddle->compoundflag & compoundflag) && notprobed[level]) { /* ok, we found full compoundallowed word */ if (level > minpos) { /* and its length more than minimal */ if (wordlen == level + 1) { /* well, it was last word */ AddStem(var, pnstrdup(word + startpos, wordlen - startpos)); pfree(notprobed); return var; } else { /* then we will search more big word at the same point */ SplitVar *ptr = var; while (ptr->next) ptr = ptr->next; ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level); /* we can find next word */ level++; AddStem(var, pnstrdup(word + startpos, level - startpos)); node = Conf->Dictionary; startpos = level; continue; } } } node = StopMiddle->node; } else node = NULL; level++; } AddStem(var, pnstrdup(word + startpos, wordlen - startpos)); pfree(notprobed); return var; }
static int strbcmp | ( | const unsigned char * | s1, | |
const unsigned char * | s2 | |||
) | [static] |
Definition at line 175 of file spell.c.
Referenced by cmpaffix().
static int strbncmp | ( | const unsigned char * | s1, | |
const unsigned char * | s2, | |||
size_t | count | |||
) | [static] |
Definition at line 198 of file spell.c.
Referenced by NISortAffixes().
char* VoidString = "" [static] |
Definition at line 146 of file spell.c.
Referenced by NIAddAffix().