Header And Logo

PostgreSQL
| The world's most advanced open source database.

wparser.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * wparser.c
00004  *      Standard interface to word parser
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  *
00008  *
00009  * IDENTIFICATION
00010  *    src/backend/tsearch/wparser.c
00011  *
00012  *-------------------------------------------------------------------------
00013  */
00014 #include "postgres.h"
00015 
00016 #include "funcapi.h"
00017 #include "catalog/namespace.h"
00018 #include "catalog/pg_type.h"
00019 #include "commands/defrem.h"
00020 #include "tsearch/ts_cache.h"
00021 #include "tsearch/ts_utils.h"
00022 #include "utils/builtins.h"
00023 
00024 
00025 /******sql-level interface******/
00026 
00027 typedef struct
00028 {
00029     int         cur;
00030     LexDescr   *list;
00031 } TSTokenTypeStorage;
00032 
00033 static void
00034 tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
00035 {
00036     TupleDesc   tupdesc;
00037     MemoryContext oldcontext;
00038     TSTokenTypeStorage *st;
00039     TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
00040 
00041     if (!OidIsValid(prs->lextypeOid))
00042         elog(ERROR, "method lextype isn't defined for text search parser %u",
00043              prsid);
00044 
00045     oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
00046 
00047     st = (TSTokenTypeStorage *) palloc(sizeof(TSTokenTypeStorage));
00048     st->cur = 0;
00049     /* lextype takes one dummy argument */
00050     st->list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
00051                                                              (Datum) 0));
00052     funcctx->user_fctx = (void *) st;
00053 
00054     tupdesc = CreateTemplateTupleDesc(3, false);
00055     TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
00056                        INT4OID, -1, 0);
00057     TupleDescInitEntry(tupdesc, (AttrNumber) 2, "alias",
00058                        TEXTOID, -1, 0);
00059     TupleDescInitEntry(tupdesc, (AttrNumber) 3, "description",
00060                        TEXTOID, -1, 0);
00061 
00062     funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
00063     MemoryContextSwitchTo(oldcontext);
00064 }
00065 
00066 static Datum
00067 tt_process_call(FuncCallContext *funcctx)
00068 {
00069     TSTokenTypeStorage *st;
00070 
00071     st = (TSTokenTypeStorage *) funcctx->user_fctx;
00072     if (st->list && st->list[st->cur].lexid)
00073     {
00074         Datum       result;
00075         char       *values[3];
00076         char        txtid[16];
00077         HeapTuple   tuple;
00078 
00079         sprintf(txtid, "%d", st->list[st->cur].lexid);
00080         values[0] = txtid;
00081         values[1] = st->list[st->cur].alias;
00082         values[2] = st->list[st->cur].descr;
00083 
00084         tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
00085         result = HeapTupleGetDatum(tuple);
00086 
00087         pfree(values[1]);
00088         pfree(values[2]);
00089         st->cur++;
00090         return result;
00091     }
00092     if (st->list)
00093         pfree(st->list);
00094     pfree(st);
00095     return (Datum) 0;
00096 }
00097 
00098 Datum
00099 ts_token_type_byid(PG_FUNCTION_ARGS)
00100 {
00101     FuncCallContext *funcctx;
00102     Datum       result;
00103 
00104     if (SRF_IS_FIRSTCALL())
00105     {
00106         funcctx = SRF_FIRSTCALL_INIT();
00107         tt_setup_firstcall(funcctx, PG_GETARG_OID(0));
00108     }
00109 
00110     funcctx = SRF_PERCALL_SETUP();
00111 
00112     if ((result = tt_process_call(funcctx)) != (Datum) 0)
00113         SRF_RETURN_NEXT(funcctx, result);
00114     SRF_RETURN_DONE(funcctx);
00115 }
00116 
00117 Datum
00118 ts_token_type_byname(PG_FUNCTION_ARGS)
00119 {
00120     FuncCallContext *funcctx;
00121     Datum       result;
00122 
00123     if (SRF_IS_FIRSTCALL())
00124     {
00125         text       *prsname = PG_GETARG_TEXT_P(0);
00126         Oid         prsId;
00127 
00128         funcctx = SRF_FIRSTCALL_INIT();
00129         prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
00130         tt_setup_firstcall(funcctx, prsId);
00131     }
00132 
00133     funcctx = SRF_PERCALL_SETUP();
00134 
00135     if ((result = tt_process_call(funcctx)) != (Datum) 0)
00136         SRF_RETURN_NEXT(funcctx, result);
00137     SRF_RETURN_DONE(funcctx);
00138 }
00139 
00140 typedef struct
00141 {
00142     int         type;
00143     char       *lexeme;
00144 } LexemeEntry;
00145 
00146 typedef struct
00147 {
00148     int         cur;
00149     int         len;
00150     LexemeEntry *list;
00151 } PrsStorage;
00152 
00153 
00154 static void
00155 prs_setup_firstcall(FuncCallContext *funcctx, Oid prsid, text *txt)
00156 {
00157     TupleDesc   tupdesc;
00158     MemoryContext oldcontext;
00159     PrsStorage *st;
00160     TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
00161     char       *lex = NULL;
00162     int         llen = 0,
00163                 type = 0;
00164     void       *prsdata;
00165 
00166     oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
00167 
00168     st = (PrsStorage *) palloc(sizeof(PrsStorage));
00169     st->cur = 0;
00170     st->len = 16;
00171     st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len);
00172 
00173     prsdata = (void *) DatumGetPointer(FunctionCall2(&prs->prsstart,
00174                                                PointerGetDatum(VARDATA(txt)),
00175                                     Int32GetDatum(VARSIZE(txt) - VARHDRSZ)));
00176 
00177     while ((type = DatumGetInt32(FunctionCall3(&prs->prstoken,
00178                                                PointerGetDatum(prsdata),
00179                                                PointerGetDatum(&lex),
00180                                                PointerGetDatum(&llen)))) != 0)
00181     {
00182         if (st->cur >= st->len)
00183         {
00184             st->len = 2 * st->len;
00185             st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len);
00186         }
00187         st->list[st->cur].lexeme = palloc(llen + 1);
00188         memcpy(st->list[st->cur].lexeme, lex, llen);
00189         st->list[st->cur].lexeme[llen] = '\0';
00190         st->list[st->cur].type = type;
00191         st->cur++;
00192     }
00193 
00194     FunctionCall1(&prs->prsend, PointerGetDatum(prsdata));
00195 
00196     st->len = st->cur;
00197     st->cur = 0;
00198 
00199     funcctx->user_fctx = (void *) st;
00200     tupdesc = CreateTemplateTupleDesc(2, false);
00201     TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
00202                        INT4OID, -1, 0);
00203     TupleDescInitEntry(tupdesc, (AttrNumber) 2, "token",
00204                        TEXTOID, -1, 0);
00205 
00206     funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
00207     MemoryContextSwitchTo(oldcontext);
00208 }
00209 
00210 static Datum
00211 prs_process_call(FuncCallContext *funcctx)
00212 {
00213     PrsStorage *st;
00214 
00215     st = (PrsStorage *) funcctx->user_fctx;
00216     if (st->cur < st->len)
00217     {
00218         Datum       result;
00219         char       *values[2];
00220         char        tid[16];
00221         HeapTuple   tuple;
00222 
00223         values[0] = tid;
00224         sprintf(tid, "%d", st->list[st->cur].type);
00225         values[1] = st->list[st->cur].lexeme;
00226         tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
00227         result = HeapTupleGetDatum(tuple);
00228 
00229         pfree(values[1]);
00230         st->cur++;
00231         return result;
00232     }
00233     else
00234     {
00235         if (st->list)
00236             pfree(st->list);
00237         pfree(st);
00238     }
00239     return (Datum) 0;
00240 }
00241 
00242 Datum
00243 ts_parse_byid(PG_FUNCTION_ARGS)
00244 {
00245     FuncCallContext *funcctx;
00246     Datum       result;
00247 
00248     if (SRF_IS_FIRSTCALL())
00249     {
00250         text       *txt = PG_GETARG_TEXT_P(1);
00251 
00252         funcctx = SRF_FIRSTCALL_INIT();
00253         prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
00254         PG_FREE_IF_COPY(txt, 1);
00255     }
00256 
00257     funcctx = SRF_PERCALL_SETUP();
00258 
00259     if ((result = prs_process_call(funcctx)) != (Datum) 0)
00260         SRF_RETURN_NEXT(funcctx, result);
00261     SRF_RETURN_DONE(funcctx);
00262 }
00263 
00264 Datum
00265 ts_parse_byname(PG_FUNCTION_ARGS)
00266 {
00267     FuncCallContext *funcctx;
00268     Datum       result;
00269 
00270     if (SRF_IS_FIRSTCALL())
00271     {
00272         text       *prsname = PG_GETARG_TEXT_P(0);
00273         text       *txt = PG_GETARG_TEXT_P(1);
00274         Oid         prsId;
00275 
00276         funcctx = SRF_FIRSTCALL_INIT();
00277         prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
00278         prs_setup_firstcall(funcctx, prsId, txt);
00279     }
00280 
00281     funcctx = SRF_PERCALL_SETUP();
00282 
00283     if ((result = prs_process_call(funcctx)) != (Datum) 0)
00284         SRF_RETURN_NEXT(funcctx, result);
00285     SRF_RETURN_DONE(funcctx);
00286 }
00287 
00288 Datum
00289 ts_headline_byid_opt(PG_FUNCTION_ARGS)
00290 {
00291     text       *in = PG_GETARG_TEXT_P(1);
00292     TSQuery     query = PG_GETARG_TSQUERY(2);
00293     text       *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
00294     HeadlineParsedText prs;
00295     List       *prsoptions;
00296     text       *out;
00297     TSConfigCacheEntry *cfg;
00298     TSParserCacheEntry *prsobj;
00299 
00300     cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
00301     prsobj = lookup_ts_parser_cache(cfg->prsId);
00302 
00303     if (!OidIsValid(prsobj->headlineOid))
00304         ereport(ERROR,
00305                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
00306            errmsg("text search parser does not support headline creation")));
00307 
00308     memset(&prs, 0, sizeof(HeadlineParsedText));
00309     prs.lenwords = 32;
00310     prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
00311 
00312     hlparsetext(cfg->cfgId, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
00313 
00314     if (opt)
00315         prsoptions = deserialize_deflist(PointerGetDatum(opt));
00316     else
00317         prsoptions = NIL;
00318 
00319     FunctionCall3(&(prsobj->prsheadline),
00320                   PointerGetDatum(&prs),
00321                   PointerGetDatum(prsoptions),
00322                   PointerGetDatum(query));
00323 
00324     out = generateHeadline(&prs);
00325 
00326     PG_FREE_IF_COPY(in, 1);
00327     PG_FREE_IF_COPY(query, 2);
00328     if (opt)
00329         PG_FREE_IF_COPY(opt, 3);
00330     pfree(prs.words);
00331     pfree(prs.startsel);
00332     pfree(prs.stopsel);
00333 
00334     PG_RETURN_POINTER(out);
00335 }
00336 
00337 Datum
00338 ts_headline_byid(PG_FUNCTION_ARGS)
00339 {
00340     PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
00341                                         PG_GETARG_DATUM(0),
00342                                         PG_GETARG_DATUM(1),
00343                                         PG_GETARG_DATUM(2)));
00344 }
00345 
00346 Datum
00347 ts_headline(PG_FUNCTION_ARGS)
00348 {
00349     PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
00350                                   ObjectIdGetDatum(getTSCurrentConfig(true)),
00351                                         PG_GETARG_DATUM(0),
00352                                         PG_GETARG_DATUM(1)));
00353 }
00354 
00355 Datum
00356 ts_headline_opt(PG_FUNCTION_ARGS)
00357 {
00358     PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt,
00359                                   ObjectIdGetDatum(getTSCurrentConfig(true)),
00360                                         PG_GETARG_DATUM(0),
00361                                         PG_GETARG_DATUM(1),
00362                                         PG_GETARG_DATUM(2)));
00363 }