00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include "postgres.h"
00015
00016 #include "access/htup_details.h"
00017 #include "catalog/pg_statistic.h"
00018 #include "catalog/pg_type.h"
00019 #include "miscadmin.h"
00020 #include "nodes/nodes.h"
00021 #include "tsearch/ts_type.h"
00022 #include "utils/lsyscache.h"
00023 #include "utils/selfuncs.h"
00024 #include "utils/syscache.h"
00025
00026
00027
00028
00029
00030
00031
00032 #define DEFAULT_TS_MATCH_SEL 0.005
00033
00034
00035 typedef struct
00036 {
00037 text *element;
00038 float4 frequency;
00039 } TextFreq;
00040
00041
00042 typedef struct
00043 {
00044 char *lexeme;
00045 int length;
00046 } LexemeKey;
00047
00048 static Selectivity tsquerysel(VariableStatData *vardata, Datum constval);
00049 static Selectivity mcelem_tsquery_selec(TSQuery query,
00050 Datum *mcelem, int nmcelem,
00051 float4 *numbers, int nnumbers);
00052 static Selectivity tsquery_opr_selec(QueryItem *item, char *operand,
00053 TextFreq *lookup, int length, float4 minfreq);
00054 static int compare_lexeme_textfreq(const void *e1, const void *e2);
00055
00056 #define tsquery_opr_selec_no_stats(query) \
00057 tsquery_opr_selec(GETQUERY(query), GETOPERAND(query), NULL, 0, 0)
00058
00059
00060
00061
00062
00063
00064
00065
00066 Datum
00067 tsmatchsel(PG_FUNCTION_ARGS)
00068 {
00069 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
00070
00071 #ifdef NOT_USED
00072 Oid operator = PG_GETARG_OID(1);
00073 #endif
00074 List *args = (List *) PG_GETARG_POINTER(2);
00075 int varRelid = PG_GETARG_INT32(3);
00076 VariableStatData vardata;
00077 Node *other;
00078 bool varonleft;
00079 Selectivity selec;
00080
00081
00082
00083
00084
00085 if (!get_restriction_variable(root, args, varRelid,
00086 &vardata, &other, &varonleft))
00087 PG_RETURN_FLOAT8(DEFAULT_TS_MATCH_SEL);
00088
00089
00090
00091
00092 if (!IsA(other, Const))
00093 {
00094 ReleaseVariableStats(vardata);
00095 PG_RETURN_FLOAT8(DEFAULT_TS_MATCH_SEL);
00096 }
00097
00098
00099
00100
00101 if (((Const *) other)->constisnull)
00102 {
00103 ReleaseVariableStats(vardata);
00104 PG_RETURN_FLOAT8(0.0);
00105 }
00106
00107
00108
00109
00110
00111
00112 if (((Const *) other)->consttype == TSQUERYOID)
00113 {
00114
00115 Assert(vardata.vartype == TSVECTOROID);
00116
00117 selec = tsquerysel(&vardata, ((Const *) other)->constvalue);
00118 }
00119 else
00120 {
00121
00122 selec = DEFAULT_TS_MATCH_SEL;
00123 }
00124
00125 ReleaseVariableStats(vardata);
00126
00127 CLAMP_PROBABILITY(selec);
00128
00129 PG_RETURN_FLOAT8((float8) selec);
00130 }
00131
00132
00133
00134
00135
00136
00137
00138 Datum
00139 tsmatchjoinsel(PG_FUNCTION_ARGS)
00140 {
00141
00142 PG_RETURN_FLOAT8(DEFAULT_TS_MATCH_SEL);
00143 }
00144
00145
00146
00147
00148
00149 static Selectivity
00150 tsquerysel(VariableStatData *vardata, Datum constval)
00151 {
00152 Selectivity selec;
00153 TSQuery query;
00154
00155
00156 query = DatumGetTSQuery(constval);
00157
00158
00159 if (query->size == 0)
00160 return (Selectivity) 0.0;
00161
00162 if (HeapTupleIsValid(vardata->statsTuple))
00163 {
00164 Form_pg_statistic stats;
00165 Datum *values;
00166 int nvalues;
00167 float4 *numbers;
00168 int nnumbers;
00169
00170 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
00171
00172
00173 if (get_attstatsslot(vardata->statsTuple,
00174 TEXTOID, -1,
00175 STATISTIC_KIND_MCELEM, InvalidOid,
00176 NULL,
00177 &values, &nvalues,
00178 &numbers, &nnumbers))
00179 {
00180
00181
00182
00183
00184 selec = mcelem_tsquery_selec(query, values, nvalues,
00185 numbers, nnumbers);
00186 free_attstatsslot(TEXTOID, values, nvalues, numbers, nnumbers);
00187 }
00188 else
00189 {
00190
00191 selec = tsquery_opr_selec_no_stats(query);
00192 }
00193
00194
00195
00196
00197 selec *= (1.0 - stats->stanullfrac);
00198 }
00199 else
00200 {
00201
00202 selec = tsquery_opr_selec_no_stats(query);
00203
00204 }
00205
00206 return selec;
00207 }
00208
00209
00210
00211
00212 static Selectivity
00213 mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem,
00214 float4 *numbers, int nnumbers)
00215 {
00216 float4 minfreq;
00217 TextFreq *lookup;
00218 Selectivity selec;
00219 int i;
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229 if (nnumbers != nmcelem + 2)
00230 return tsquery_opr_selec_no_stats(query);
00231
00232
00233
00234
00235 lookup = (TextFreq *) palloc(sizeof(TextFreq) * nmcelem);
00236 for (i = 0; i < nmcelem; i++)
00237 {
00238
00239
00240
00241
00242 Assert(!VARATT_IS_COMPRESSED(mcelem[i]) && !VARATT_IS_EXTERNAL(mcelem[i]));
00243 lookup[i].element = (text *) DatumGetPointer(mcelem[i]);
00244 lookup[i].frequency = numbers[i];
00245 }
00246
00247
00248
00249
00250
00251 minfreq = numbers[nnumbers - 2];
00252
00253 selec = tsquery_opr_selec(GETQUERY(query), GETOPERAND(query), lookup,
00254 nmcelem, minfreq);
00255
00256 pfree(lookup);
00257
00258 return selec;
00259 }
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283 static Selectivity
00284 tsquery_opr_selec(QueryItem *item, char *operand,
00285 TextFreq *lookup, int length, float4 minfreq)
00286 {
00287 Selectivity selec;
00288
00289
00290 check_stack_depth();
00291
00292 if (item->type == QI_VAL)
00293 {
00294 QueryOperand *oper = (QueryOperand *) item;
00295 LexemeKey key;
00296
00297
00298
00299
00300 key.lexeme = operand + oper->distance;
00301 key.length = oper->length;
00302
00303 if (oper->prefix)
00304 {
00305
00306 Selectivity matched,
00307 allmces;
00308 int i,
00309 n_matched;
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325 if (lookup == NULL || length < 100)
00326 return (Selectivity) (DEFAULT_TS_MATCH_SEL * 4);
00327
00328 matched = allmces = 0;
00329 n_matched = 0;
00330 for (i = 0; i < length; i++)
00331 {
00332 TextFreq *t = lookup + i;
00333 int tlen = VARSIZE_ANY_EXHDR(t->element);
00334
00335 if (tlen >= key.length &&
00336 strncmp(key.lexeme, VARDATA_ANY(t->element),
00337 key.length) == 0)
00338 {
00339 matched += t->frequency - matched * t->frequency;
00340 n_matched++;
00341 }
00342 allmces += t->frequency - allmces * t->frequency;
00343 }
00344
00345
00346 CLAMP_PROBABILITY(matched);
00347 CLAMP_PROBABILITY(allmces);
00348
00349 selec = matched + (1.0 - allmces) * ((double) n_matched / length);
00350
00351
00352
00353
00354
00355
00356
00357 selec = Max(Min(DEFAULT_TS_MATCH_SEL, minfreq / 2), selec);
00358 }
00359 else
00360 {
00361
00362 TextFreq *searchres;
00363
00364
00365 if (lookup == NULL)
00366 return (Selectivity) DEFAULT_TS_MATCH_SEL;
00367
00368 searchres = (TextFreq *) bsearch(&key, lookup, length,
00369 sizeof(TextFreq),
00370 compare_lexeme_textfreq);
00371
00372 if (searchres)
00373 {
00374
00375
00376
00377
00378 selec = searchres->frequency;
00379 }
00380 else
00381 {
00382
00383
00384
00385
00386 selec = Min(DEFAULT_TS_MATCH_SEL, minfreq / 2);
00387 }
00388 }
00389 }
00390 else
00391 {
00392
00393 Selectivity s1,
00394 s2;
00395
00396 switch (item->qoperator.oper)
00397 {
00398 case OP_NOT:
00399 selec = 1.0 - tsquery_opr_selec(item + 1, operand,
00400 lookup, length, minfreq);
00401 break;
00402
00403 case OP_AND:
00404 s1 = tsquery_opr_selec(item + 1, operand,
00405 lookup, length, minfreq);
00406 s2 = tsquery_opr_selec(item + item->qoperator.left, operand,
00407 lookup, length, minfreq);
00408 selec = s1 * s2;
00409 break;
00410
00411 case OP_OR:
00412 s1 = tsquery_opr_selec(item + 1, operand,
00413 lookup, length, minfreq);
00414 s2 = tsquery_opr_selec(item + item->qoperator.left, operand,
00415 lookup, length, minfreq);
00416 selec = s1 + s2 - s1 * s2;
00417 break;
00418
00419 default:
00420 elog(ERROR, "unrecognized operator: %d", item->qoperator.oper);
00421 selec = 0;
00422 break;
00423 }
00424 }
00425
00426
00427 CLAMP_PROBABILITY(selec);
00428
00429 return selec;
00430 }
00431
00432
00433
00434
00435
00436
00437
00438 static int
00439 compare_lexeme_textfreq(const void *e1, const void *e2)
00440 {
00441 const LexemeKey *key = (const LexemeKey *) e1;
00442 const TextFreq *t = (const TextFreq *) e2;
00443 int len1,
00444 len2;
00445
00446 len1 = key->length;
00447 len2 = VARSIZE_ANY_EXHDR(t->element);
00448
00449
00450 if (len1 > len2)
00451 return 1;
00452 else if (len1 < len2)
00453 return -1;
00454
00455
00456 return strncmp(key->lexeme, VARDATA_ANY(t->element), len1);
00457 }