Header And Logo

PostgreSQL
| The world's most advanced open source database.

tsginidx.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * tsginidx.c
00004  *   GIN support functions for tsvector_ops
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  *
00008  *
00009  * IDENTIFICATION
00010  *    src/backend/utils/adt/tsginidx.c
00011  *
00012  *-------------------------------------------------------------------------
00013  */
00014 #include "postgres.h"
00015 
00016 #include "access/gin.h"
00017 #include "access/skey.h"
00018 #include "tsearch/ts_type.h"
00019 #include "tsearch/ts_utils.h"
00020 #include "utils/builtins.h"
00021 
00022 
00023 Datum
00024 gin_cmp_tslexeme(PG_FUNCTION_ARGS)
00025 {
00026     text       *a = PG_GETARG_TEXT_PP(0);
00027     text       *b = PG_GETARG_TEXT_PP(1);
00028     int         cmp;
00029 
00030     cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
00031                           VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
00032                           false);
00033 
00034     PG_FREE_IF_COPY(a, 0);
00035     PG_FREE_IF_COPY(b, 1);
00036     PG_RETURN_INT32(cmp);
00037 }
00038 
00039 Datum
00040 gin_cmp_prefix(PG_FUNCTION_ARGS)
00041 {
00042     text       *a = PG_GETARG_TEXT_PP(0);
00043     text       *b = PG_GETARG_TEXT_PP(1);
00044 
00045 #ifdef NOT_USED
00046     StrategyNumber strategy = PG_GETARG_UINT16(2);
00047     Pointer     extra_data = PG_GETARG_POINTER(3);
00048 #endif
00049     int         cmp;
00050 
00051     cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
00052                           VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
00053                           true);
00054 
00055     if (cmp < 0)
00056         cmp = 1;                /* prevent continue scan */
00057 
00058     PG_FREE_IF_COPY(a, 0);
00059     PG_FREE_IF_COPY(b, 1);
00060     PG_RETURN_INT32(cmp);
00061 }
00062 
00063 Datum
00064 gin_extract_tsvector(PG_FUNCTION_ARGS)
00065 {
00066     TSVector    vector = PG_GETARG_TSVECTOR(0);
00067     int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
00068     Datum      *entries = NULL;
00069 
00070     *nentries = vector->size;
00071     if (vector->size > 0)
00072     {
00073         int         i;
00074         WordEntry  *we = ARRPTR(vector);
00075 
00076         entries = (Datum *) palloc(sizeof(Datum) * vector->size);
00077 
00078         for (i = 0; i < vector->size; i++)
00079         {
00080             text       *txt;
00081 
00082             txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
00083             entries[i] = PointerGetDatum(txt);
00084 
00085             we++;
00086         }
00087     }
00088 
00089     PG_FREE_IF_COPY(vector, 0);
00090     PG_RETURN_POINTER(entries);
00091 }
00092 
00093 Datum
00094 gin_extract_tsquery(PG_FUNCTION_ARGS)
00095 {
00096     TSQuery     query = PG_GETARG_TSQUERY(0);
00097     int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
00098 
00099     /* StrategyNumber strategy = PG_GETARG_UINT16(2); */
00100     bool      **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
00101     Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
00102 
00103     /* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
00104     int32      *searchMode = (int32 *) PG_GETARG_POINTER(6);
00105     Datum      *entries = NULL;
00106 
00107     *nentries = 0;
00108 
00109     if (query->size > 0)
00110     {
00111         QueryItem  *item = GETQUERY(query);
00112         int32       i,
00113                     j;
00114         bool       *partialmatch;
00115         int        *map_item_operand;
00116 
00117         /*
00118          * If the query doesn't have any required positive matches (for
00119          * instance, it's something like '! foo'), we have to do a full index
00120          * scan.
00121          */
00122         if (tsquery_requires_match(item))
00123             *searchMode = GIN_SEARCH_MODE_DEFAULT;
00124         else
00125             *searchMode = GIN_SEARCH_MODE_ALL;
00126 
00127         /* count number of VAL items */
00128         j = 0;
00129         for (i = 0; i < query->size; i++)
00130         {
00131             if (item[i].type == QI_VAL)
00132                 j++;
00133         }
00134         *nentries = j;
00135 
00136         entries = (Datum *) palloc(sizeof(Datum) * j);
00137         partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);
00138 
00139         /*
00140          * Make map to convert item's number to corresponding operand's (the
00141          * same, entry's) number. Entry's number is used in check array in
00142          * consistent method. We use the same map for each entry.
00143          */
00144         *extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
00145         map_item_operand = (int *) palloc0(sizeof(int) * query->size);
00146 
00147         /* Now rescan the VAL items and fill in the arrays */
00148         j = 0;
00149         for (i = 0; i < query->size; i++)
00150         {
00151             if (item[i].type == QI_VAL)
00152             {
00153                 QueryOperand *val = &item[i].qoperand;
00154                 text       *txt;
00155 
00156                 txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
00157                                                val->length);
00158                 entries[j] = PointerGetDatum(txt);
00159                 partialmatch[j] = val->prefix;
00160                 (*extra_data)[j] = (Pointer) map_item_operand;
00161                 map_item_operand[i] = j;
00162                 j++;
00163             }
00164         }
00165     }
00166 
00167     PG_FREE_IF_COPY(query, 0);
00168 
00169     PG_RETURN_POINTER(entries);
00170 }
00171 
00172 typedef struct
00173 {
00174     QueryItem  *first_item;
00175     bool       *check;
00176     int        *map_item_operand;
00177     bool       *need_recheck;
00178 } GinChkVal;
00179 
00180 static bool
00181 checkcondition_gin(void *checkval, QueryOperand *val)
00182 {
00183     GinChkVal  *gcv = (GinChkVal *) checkval;
00184     int         j;
00185 
00186     /* if any val requiring a weight is used, set recheck flag */
00187     if (val->weight != 0)
00188         *(gcv->need_recheck) = true;
00189 
00190     /* convert item's number to corresponding entry's (operand's) number */
00191     j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
00192 
00193     /* return presence of current entry in indexed value */
00194     return gcv->check[j];
00195 }
00196 
00197 Datum
00198 gin_tsquery_consistent(PG_FUNCTION_ARGS)
00199 {
00200     bool       *check = (bool *) PG_GETARG_POINTER(0);
00201 
00202     /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
00203     TSQuery     query = PG_GETARG_TSQUERY(2);
00204 
00205     /* int32    nkeys = PG_GETARG_INT32(3); */
00206     Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
00207     bool       *recheck = (bool *) PG_GETARG_POINTER(5);
00208     bool        res = FALSE;
00209 
00210     /* The query requires recheck only if it involves weights */
00211     *recheck = false;
00212 
00213     if (query->size > 0)
00214     {
00215         QueryItem  *item;
00216         GinChkVal   gcv;
00217 
00218         /*
00219          * check-parameter array has one entry for each value (operand) in the
00220          * query.
00221          */
00222         gcv.first_item = item = GETQUERY(query);
00223         gcv.check = check;
00224         gcv.map_item_operand = (int *) (extra_data[0]);
00225         gcv.need_recheck = recheck;
00226 
00227         res = TS_execute(GETQUERY(query),
00228                          &gcv,
00229                          true,
00230                          checkcondition_gin);
00231     }
00232 
00233     PG_RETURN_BOOL(res);
00234 }
00235 
00236 /*
00237  * Formerly, gin_extract_tsvector had only two arguments.  Now it has three,
00238  * but we still need a pg_proc entry with two args to support reloading
00239  * pre-9.1 contrib/tsearch2 opclass declarations.  This compatibility
00240  * function should go away eventually.  (Note: you might say "hey, but the
00241  * code above is only *using* two args, so let's just declare it that way".
00242  * If you try that you'll find the opr_sanity regression test complains.)
00243  */
00244 Datum
00245 gin_extract_tsvector_2args(PG_FUNCTION_ARGS)
00246 {
00247     if (PG_NARGS() < 3)         /* should not happen */
00248         elog(ERROR, "gin_extract_tsvector requires three arguments");
00249     return gin_extract_tsvector(fcinfo);
00250 }
00251 
00252 /*
00253  * Likewise, we need a stub version of gin_extract_tsquery declared with
00254  * only five arguments.
00255  */
00256 Datum
00257 gin_extract_tsquery_5args(PG_FUNCTION_ARGS)
00258 {
00259     if (PG_NARGS() < 7)         /* should not happen */
00260         elog(ERROR, "gin_extract_tsquery requires seven arguments");
00261     return gin_extract_tsquery(fcinfo);
00262 }
00263 
00264 /*
00265  * Likewise, we need a stub version of gin_tsquery_consistent declared with
00266  * only six arguments.
00267  */
00268 Datum
00269 gin_tsquery_consistent_6args(PG_FUNCTION_ARGS)
00270 {
00271     if (PG_NARGS() < 8)         /* should not happen */
00272         elog(ERROR, "gin_tsquery_consistent requires eight arguments");
00273     return gin_tsquery_consistent(fcinfo);
00274 }