00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include "postgres.h"
00015
00016 #include "tsearch/ts_cache.h"
00017 #include "tsearch/ts_utils.h"
00018 #include "utils/builtins.h"
00019
00020
00021 Datum
00022 get_current_ts_config(PG_FUNCTION_ARGS)
00023 {
00024 PG_RETURN_OID(getTSCurrentConfig(true));
00025 }
00026
00027
00028
00029
00030 static int
00031 compareWORD(const void *a, const void *b)
00032 {
00033 int res;
00034
00035 res = tsCompareString(
00036 ((const ParsedWord *) a)->word, ((const ParsedWord *) a)->len,
00037 ((const ParsedWord *) b)->word, ((const ParsedWord *) b)->len,
00038 false);
00039
00040 if (res == 0)
00041 {
00042 if (((const ParsedWord *) a)->pos.pos == ((const ParsedWord *) b)->pos.pos)
00043 return 0;
00044
00045 res = (((const ParsedWord *) a)->pos.pos > ((const ParsedWord *) b)->pos.pos) ? 1 : -1;
00046 }
00047
00048 return res;
00049 }
00050
00051 static int
00052 uniqueWORD(ParsedWord *a, int32 l)
00053 {
00054 ParsedWord *ptr,
00055 *res;
00056 int tmppos;
00057
00058 if (l == 1)
00059 {
00060 tmppos = LIMITPOS(a->pos.pos);
00061 a->alen = 2;
00062 a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
00063 a->pos.apos[0] = 1;
00064 a->pos.apos[1] = tmppos;
00065 return l;
00066 }
00067
00068 res = a;
00069 ptr = a + 1;
00070
00071
00072
00073
00074 qsort((void *) a, l, sizeof(ParsedWord), compareWORD);
00075
00076
00077
00078
00079 tmppos = LIMITPOS(a->pos.pos);
00080 a->alen = 2;
00081 a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
00082 a->pos.apos[0] = 1;
00083 a->pos.apos[1] = tmppos;
00084
00085
00086
00087
00088 while (ptr - a < l)
00089 {
00090 if (!(ptr->len == res->len &&
00091 strncmp(ptr->word, res->word, res->len) == 0))
00092 {
00093
00094
00095
00096 res++;
00097 res->len = ptr->len;
00098 res->word = ptr->word;
00099 tmppos = LIMITPOS(ptr->pos.pos);
00100 res->alen = 2;
00101 res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
00102 res->pos.apos[0] = 1;
00103 res->pos.apos[1] = tmppos;
00104 }
00105 else
00106 {
00107
00108
00109
00110
00111
00112 pfree(ptr->word);
00113 if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1 &&
00114 res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
00115 {
00116 if (res->pos.apos[0] + 1 >= res->alen)
00117 {
00118 res->alen *= 2;
00119 res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
00120 }
00121 if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
00122 {
00123 res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
00124 res->pos.apos[0]++;
00125 }
00126 }
00127 }
00128 ptr++;
00129 }
00130
00131 return res + 1 - a;
00132 }
00133
00134
00135
00136
00137 TSVector
00138 make_tsvector(ParsedText *prs)
00139 {
00140 int i,
00141 j,
00142 lenstr = 0,
00143 totallen;
00144 TSVector in;
00145 WordEntry *ptr;
00146 char *str;
00147 int stroff;
00148
00149 prs->curwords = uniqueWORD(prs->words, prs->curwords);
00150 for (i = 0; i < prs->curwords; i++)
00151 {
00152 lenstr += prs->words[i].len;
00153 if (prs->words[i].alen)
00154 {
00155 lenstr = SHORTALIGN(lenstr);
00156 lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
00157 }
00158 }
00159
00160 if (lenstr > MAXSTRPOS)
00161 ereport(ERROR,
00162 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00163 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", lenstr, MAXSTRPOS)));
00164
00165 totallen = CALCDATASIZE(prs->curwords, lenstr);
00166 in = (TSVector) palloc0(totallen);
00167 SET_VARSIZE(in, totallen);
00168 in->size = prs->curwords;
00169
00170 ptr = ARRPTR(in);
00171 str = STRPTR(in);
00172 stroff = 0;
00173 for (i = 0; i < prs->curwords; i++)
00174 {
00175 ptr->len = prs->words[i].len;
00176 ptr->pos = stroff;
00177 memcpy(str + stroff, prs->words[i].word, prs->words[i].len);
00178 stroff += prs->words[i].len;
00179 pfree(prs->words[i].word);
00180 if (prs->words[i].alen)
00181 {
00182 int k = prs->words[i].pos.apos[0];
00183 WordEntryPos *wptr;
00184
00185 if (k > 0xFFFF)
00186 elog(ERROR, "positions array too long");
00187
00188 ptr->haspos = 1;
00189 stroff = SHORTALIGN(stroff);
00190 *(uint16 *) (str + stroff) = (uint16) k;
00191 wptr = POSDATAPTR(in, ptr);
00192 for (j = 0; j < k; j++)
00193 {
00194 WEP_SETWEIGHT(wptr[j], 0);
00195 WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
00196 }
00197 stroff += sizeof(uint16) + k * sizeof(WordEntryPos);
00198 pfree(prs->words[i].pos.apos);
00199 }
00200 else
00201 ptr->haspos = 0;
00202 ptr++;
00203 }
00204 pfree(prs->words);
00205 return in;
00206 }
00207
00208 Datum
00209 to_tsvector_byid(PG_FUNCTION_ARGS)
00210 {
00211 Oid cfgId = PG_GETARG_OID(0);
00212 text *in = PG_GETARG_TEXT_P(1);
00213 ParsedText prs;
00214 TSVector out;
00215
00216 prs.lenwords = (VARSIZE(in) - VARHDRSZ) / 6;
00217
00218 if (prs.lenwords == 0)
00219 prs.lenwords = 2;
00220 prs.curwords = 0;
00221 prs.pos = 0;
00222 prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
00223
00224 parsetext(cfgId, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
00225 PG_FREE_IF_COPY(in, 1);
00226
00227 if (prs.curwords)
00228 out = make_tsvector(&prs);
00229 else
00230 {
00231 pfree(prs.words);
00232 out = palloc(CALCDATASIZE(0, 0));
00233 SET_VARSIZE(out, CALCDATASIZE(0, 0));
00234 out->size = 0;
00235 }
00236
00237 PG_RETURN_POINTER(out);
00238 }
00239
00240 Datum
00241 to_tsvector(PG_FUNCTION_ARGS)
00242 {
00243 text *in = PG_GETARG_TEXT_P(0);
00244 Oid cfgId;
00245
00246 cfgId = getTSCurrentConfig(true);
00247 PG_RETURN_DATUM(DirectFunctionCall2(to_tsvector_byid,
00248 ObjectIdGetDatum(cfgId),
00249 PointerGetDatum(in)));
00250 }
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267 static void
00268 pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
00269 {
00270 int32 count = 0;
00271 ParsedText prs;
00272 uint32 variant,
00273 pos,
00274 cntvar = 0,
00275 cntpos = 0,
00276 cnt = 0;
00277 Oid cfg_id = DatumGetObjectId(opaque);
00278
00279
00280 prs.lenwords = 4;
00281 prs.curwords = 0;
00282 prs.pos = 0;
00283 prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
00284
00285 parsetext(cfg_id, &prs, strval, lenval);
00286
00287 if (prs.curwords > 0)
00288 {
00289
00290 while (count < prs.curwords)
00291 {
00292 pos = prs.words[count].pos.pos;
00293 cntvar = 0;
00294 while (count < prs.curwords && pos == prs.words[count].pos.pos)
00295 {
00296 variant = prs.words[count].nvariant;
00297
00298 cnt = 0;
00299 while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
00300 {
00301
00302 pushValue(state, prs.words[count].word, prs.words[count].len, weight,
00303 ((prs.words[count].flags & TSL_PREFIX) || prefix) ? true : false);
00304 pfree(prs.words[count].word);
00305 if (cnt)
00306 pushOperator(state, OP_AND);
00307 cnt++;
00308 count++;
00309 }
00310
00311 if (cntvar)
00312 pushOperator(state, OP_OR);
00313 cntvar++;
00314 }
00315
00316 if (cntpos)
00317 pushOperator(state, OP_AND);
00318
00319 cntpos++;
00320 }
00321
00322 pfree(prs.words);
00323
00324 }
00325 else
00326 pushStop(state);
00327 }
00328
00329 Datum
00330 to_tsquery_byid(PG_FUNCTION_ARGS)
00331 {
00332 Oid cfgid = PG_GETARG_OID(0);
00333 text *in = PG_GETARG_TEXT_P(1);
00334 TSQuery query;
00335 QueryItem *res;
00336 int32 len;
00337
00338 query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), false);
00339
00340 if (query->size == 0)
00341 PG_RETURN_TSQUERY(query);
00342
00343
00344 res = clean_fakeval(GETQUERY(query), &len);
00345 if (!res)
00346 {
00347 SET_VARSIZE(query, HDRSIZETQ);
00348 query->size = 0;
00349 PG_RETURN_POINTER(query);
00350 }
00351 memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
00352
00353
00354
00355
00356
00357 if (len != query->size)
00358 {
00359 char *oldoperand = GETOPERAND(query);
00360 int32 lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
00361
00362 Assert(len < query->size);
00363
00364 query->size = len;
00365 memmove((void *) GETOPERAND(query), oldoperand, VARSIZE(query) - (oldoperand - (char *) query));
00366 SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
00367 }
00368
00369 pfree(res);
00370 PG_RETURN_TSQUERY(query);
00371 }
00372
00373 Datum
00374 to_tsquery(PG_FUNCTION_ARGS)
00375 {
00376 text *in = PG_GETARG_TEXT_P(0);
00377 Oid cfgId;
00378
00379 cfgId = getTSCurrentConfig(true);
00380 PG_RETURN_DATUM(DirectFunctionCall2(to_tsquery_byid,
00381 ObjectIdGetDatum(cfgId),
00382 PointerGetDatum(in)));
00383 }
00384
00385 Datum
00386 plainto_tsquery_byid(PG_FUNCTION_ARGS)
00387 {
00388 Oid cfgid = PG_GETARG_OID(0);
00389 text *in = PG_GETARG_TEXT_P(1);
00390 TSQuery query;
00391 QueryItem *res;
00392 int32 len;
00393
00394 query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), true);
00395
00396 if (query->size == 0)
00397 PG_RETURN_TSQUERY(query);
00398
00399 res = clean_fakeval(GETQUERY(query), &len);
00400 if (!res)
00401 {
00402 SET_VARSIZE(query, HDRSIZETQ);
00403 query->size = 0;
00404 PG_RETURN_POINTER(query);
00405 }
00406 memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
00407
00408 if (len != query->size)
00409 {
00410 char *oldoperand = GETOPERAND(query);
00411 int32 lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
00412
00413 Assert(len < query->size);
00414
00415 query->size = len;
00416 memcpy((void *) GETOPERAND(query), oldoperand, lenoperand);
00417 SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
00418 }
00419
00420 pfree(res);
00421 PG_RETURN_POINTER(query);
00422 }
00423
00424 Datum
00425 plainto_tsquery(PG_FUNCTION_ARGS)
00426 {
00427 text *in = PG_GETARG_TEXT_P(0);
00428 Oid cfgId;
00429
00430 cfgId = getTSCurrentConfig(true);
00431 PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid,
00432 ObjectIdGetDatum(cfgId),
00433 PointerGetDatum(in)));
00434 }