Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "postgres.h"
00014
00015 #include <ctype.h>
00016
00017 #include "commands/defrem.h"
00018 #include "tsearch/ts_locale.h"
00019 #include "tsearch/ts_utils.h"
00020
00021 PG_MODULE_MAGIC;
00022
00023 typedef struct
00024 {
00025 char *key;
00026 char *value;
00027
00028 } Syn;
00029
00030 typedef struct
00031 {
00032 int len;
00033 Syn *syn;
00034
00035 bool matchorig;
00036 bool keeporig;
00037 bool matchsynonyms;
00038 bool keepsynonyms;
00039 } DictSyn;
00040
00041
00042 PG_FUNCTION_INFO_V1(dxsyn_init);
00043 Datum dxsyn_init(PG_FUNCTION_ARGS);
00044
00045 PG_FUNCTION_INFO_V1(dxsyn_lexize);
00046 Datum dxsyn_lexize(PG_FUNCTION_ARGS);
00047
00048 static char *
00049 find_word(char *in, char **end)
00050 {
00051 char *start;
00052
00053 *end = NULL;
00054 while (*in && t_isspace(in))
00055 in += pg_mblen(in);
00056
00057 if (!*in || *in == '#')
00058 return NULL;
00059 start = in;
00060
00061 while (*in && !t_isspace(in))
00062 in += pg_mblen(in);
00063
00064 *end = in;
00065
00066 return start;
00067 }
00068
00069 static int
00070 compare_syn(const void *a, const void *b)
00071 {
00072 return strcmp(((const Syn *) a)->key, ((const Syn *) b)->key);
00073 }
00074
00075 static void
00076 read_dictionary(DictSyn *d, char *filename)
00077 {
00078 char *real_filename = get_tsearch_config_filename(filename, "rules");
00079 tsearch_readline_state trst;
00080 char *line;
00081 int cur = 0;
00082
00083 if (!tsearch_readline_begin(&trst, real_filename))
00084 ereport(ERROR,
00085 (errcode(ERRCODE_CONFIG_FILE_ERROR),
00086 errmsg("could not open synonym file \"%s\": %m",
00087 real_filename)));
00088
00089 while ((line = tsearch_readline(&trst)) != NULL)
00090 {
00091 char *value;
00092 char *key;
00093 char *pos;
00094 char *end;
00095
00096 if (*line == '\0')
00097 continue;
00098
00099 value = lowerstr(line);
00100 pfree(line);
00101
00102 pos = value;
00103 while ((key = find_word(pos, &end)) != NULL)
00104 {
00105
00106 if (cur == d->len)
00107 {
00108 d->len = (d->len > 0) ? 2 * d->len : 16;
00109 if (d->syn)
00110 d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
00111 else
00112 d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
00113 }
00114
00115
00116 if (pos != value || d->matchorig)
00117 {
00118 d->syn[cur].key = pnstrdup(key, end - key);
00119 d->syn[cur].value = pstrdup(value);
00120
00121 cur++;
00122 }
00123
00124 pos = end;
00125
00126
00127 if (!d->matchsynonyms)
00128 break;
00129 }
00130
00131 pfree(value);
00132 }
00133
00134 tsearch_readline_end(&trst);
00135
00136 d->len = cur;
00137 if (cur > 1)
00138 qsort(d->syn, d->len, sizeof(Syn), compare_syn);
00139
00140 pfree(real_filename);
00141 }
00142
00143 Datum
00144 dxsyn_init(PG_FUNCTION_ARGS)
00145 {
00146 List *dictoptions = (List *) PG_GETARG_POINTER(0);
00147 DictSyn *d;
00148 ListCell *l;
00149 char *filename = NULL;
00150
00151 d = (DictSyn *) palloc0(sizeof(DictSyn));
00152 d->len = 0;
00153 d->syn = NULL;
00154 d->matchorig = true;
00155 d->keeporig = true;
00156 d->matchsynonyms = false;
00157 d->keepsynonyms = true;
00158
00159 foreach(l, dictoptions)
00160 {
00161 DefElem *defel = (DefElem *) lfirst(l);
00162
00163 if (pg_strcasecmp(defel->defname, "MATCHORIG") == 0)
00164 {
00165 d->matchorig = defGetBoolean(defel);
00166 }
00167 else if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
00168 {
00169 d->keeporig = defGetBoolean(defel);
00170 }
00171 else if (pg_strcasecmp(defel->defname, "MATCHSYNONYMS") == 0)
00172 {
00173 d->matchsynonyms = defGetBoolean(defel);
00174 }
00175 else if (pg_strcasecmp(defel->defname, "KEEPSYNONYMS") == 0)
00176 {
00177 d->keepsynonyms = defGetBoolean(defel);
00178 }
00179 else if (pg_strcasecmp(defel->defname, "RULES") == 0)
00180 {
00181
00182 filename = defGetString(defel);
00183 }
00184 else
00185 {
00186 ereport(ERROR,
00187 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00188 errmsg("unrecognized xsyn parameter: \"%s\"",
00189 defel->defname)));
00190 }
00191 }
00192
00193 if (filename)
00194 read_dictionary(d, filename);
00195
00196 PG_RETURN_POINTER(d);
00197 }
00198
00199 Datum
00200 dxsyn_lexize(PG_FUNCTION_ARGS)
00201 {
00202 DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
00203 char *in = (char *) PG_GETARG_POINTER(1);
00204 int length = PG_GETARG_INT32(2);
00205 Syn word;
00206 Syn *found;
00207 TSLexeme *res = NULL;
00208
00209 if (!length || d->len == 0)
00210 PG_RETURN_POINTER(NULL);
00211
00212
00213 {
00214 char *temp = pnstrdup(in, length);
00215
00216 word.key = lowerstr(temp);
00217 pfree(temp);
00218 word.value = NULL;
00219 }
00220
00221
00222 found = (Syn *) bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
00223 pfree(word.key);
00224
00225 if (!found)
00226 PG_RETURN_POINTER(NULL);
00227
00228
00229 {
00230 char *value = found->value;
00231 char *syn;
00232 char *pos;
00233 char *end;
00234 int nsyns = 0;
00235
00236 res = palloc(sizeof(TSLexeme));
00237
00238 pos = value;
00239 while ((syn = find_word(pos, &end)) != NULL)
00240 {
00241 res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
00242
00243
00244 if (pos != value || d->keeporig)
00245 {
00246 res[nsyns].lexeme = pnstrdup(syn, end - syn);
00247 res[nsyns].nvariant = 0;
00248 res[nsyns].flags = 0;
00249 nsyns++;
00250 }
00251
00252 pos = end;
00253
00254
00255 if (!d->keepsynonyms)
00256 break;
00257 }
00258 res[nsyns].lexeme = NULL;
00259 }
00260
00261 PG_RETURN_POINTER(res);
00262 }