Header And Logo

PostgreSQL
| The world's most advanced open source database.

spell.h

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * spell.h
00004  *
00005  * Declarations for ISpell dictionary
00006  *
00007  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00008  *
00009  * src/include/tsearch/dicts/spell.h
00010  *
00011  *-------------------------------------------------------------------------
00012  */
00013 
00014 #ifndef __SPELL_H__
00015 #define __SPELL_H__
00016 
00017 #include "regex/regex.h"
00018 #include "tsearch/dicts/regis.h"
00019 #include "tsearch/ts_public.h"
00020 
00021 /*
00022  * Max length of a flag name. Names longer than this will be truncated
00023  * to the maximum.
00024  */
00025 #define MAXFLAGLEN 16
00026 
00027 struct SPNode;
00028 
00029 typedef struct
00030 {
00031     uint32      val:8,
00032                 isword:1,
00033                 compoundflag:4,
00034                 affix:19;
00035     struct SPNode *node;
00036 } SPNodeData;
00037 
00038 /*
00039  * Names of FF_ are correlated with Hunspell options in affix file
00040  * http://hunspell.sourceforge.net/
00041  */
00042 #define FF_COMPOUNDONLY     0x01
00043 #define FF_COMPOUNDBEGIN    0x02
00044 #define FF_COMPOUNDMIDDLE   0x04
00045 #define FF_COMPOUNDLAST     0x08
00046 #define FF_COMPOUNDFLAG     ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | FF_COMPOUNDLAST )
00047 #define FF_DICTFLAGMASK     0x0f
00048 
00049 typedef struct SPNode
00050 {
00051     uint32      length;
00052     SPNodeData  data[1];
00053 } SPNode;
00054 
00055 #define SPNHDRSZ    (offsetof(SPNode,data))
00056 
00057 
00058 typedef struct spell_struct
00059 {
00060     union
00061     {
00062         /*
00063          * flag is filled in by NIImportDictionary. After NISortDictionary, d
00064          * is valid and flag is invalid.
00065          */
00066         char        flag[MAXFLAGLEN];
00067         struct
00068         {
00069             int         affix;
00070             int         len;
00071         }           d;
00072     }           p;
00073     char        word[1];        /* variable length, null-terminated */
00074 } SPELL;
00075 
00076 #define SPELLHDRSZ  (offsetof(SPELL, word))
00077 
00078 typedef struct aff_struct
00079 {
00080     uint32      flag:8,
00081                 type:1,
00082                 flagflags:7,
00083                 issimple:1,
00084                 isregis:1,
00085                 replen:14;
00086     char       *find;
00087     char       *repl;
00088     union
00089     {
00090         regex_t     regex;
00091         Regis       regis;
00092     }           reg;
00093 } AFFIX;
00094 
00095 /*
00096  * affixes use dictionary flags too
00097  */
00098 #define FF_COMPOUNDPERMITFLAG   0x10
00099 #define FF_COMPOUNDFORBIDFLAG   0x20
00100 #define FF_CROSSPRODUCT         0x40
00101 
00102 /*
00103  * Don't change the order of these. Initialization sorts by these,
00104  * and expects prefixes to come first after sorting.
00105  */
00106 #define FF_SUFFIX               1
00107 #define FF_PREFIX               0
00108 
00109 struct AffixNode;
00110 
00111 typedef struct
00112 {
00113     uint32      val:8,
00114                 naff:24;
00115     AFFIX     **aff;
00116     struct AffixNode *node;
00117 } AffixNodeData;
00118 
00119 typedef struct AffixNode
00120 {
00121     uint32      isvoid:1,
00122                 length:31;
00123     AffixNodeData data[1];
00124 } AffixNode;
00125 
00126 #define ANHRDSZ        (offsetof(AffixNode, data))
00127 
00128 typedef struct
00129 {
00130     char       *affix;
00131     int         len;
00132     bool        issuffix;
00133 } CMPDAffix;
00134 
00135 typedef struct
00136 {
00137     int         maffixes;
00138     int         naffixes;
00139     AFFIX      *Affix;
00140 
00141     AffixNode  *Suffix;
00142     AffixNode  *Prefix;
00143 
00144     SPNode     *Dictionary;
00145     char      **AffixData;
00146     int         lenAffixData;
00147     int         nAffixData;
00148 
00149     CMPDAffix  *CompoundAffix;
00150 
00151     unsigned char flagval[256];
00152     bool        usecompound;
00153 
00154     /*
00155      * Remaining fields are only used during dictionary construction; they are
00156      * set up by NIStartBuild and cleared by NIFinishBuild.
00157      */
00158     MemoryContext buildCxt;     /* temp context for construction */
00159 
00160     /* Temporary array of all words in the dict file */
00161     SPELL     **Spell;
00162     int         nspell;         /* number of valid entries in Spell array */
00163     int         mspell;         /* allocated length of Spell array */
00164 
00165     /* These are used to allocate "compact" data without palloc overhead */
00166     char       *firstfree;      /* first free address (always maxaligned) */
00167     size_t      avail;          /* free space remaining at firstfree */
00168 } IspellDict;
00169 
00170 extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
00171 
00172 extern void NIStartBuild(IspellDict *Conf);
00173 extern void NIImportAffixes(IspellDict *Conf, const char *filename);
00174 extern void NIImportDictionary(IspellDict *Conf, const char *filename);
00175 extern void NISortDictionary(IspellDict *Conf);
00176 extern void NISortAffixes(IspellDict *Conf);
00177 extern void NIFinishBuild(IspellDict *Conf);
00178 
00179 #endif