00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098 #include "postgres.h"
00099
00100 #include <ctype.h>
00101 #include <math.h>
00102
00103 #include "access/gin.h"
00104 #include "access/htup_details.h"
00105 #include "access/sysattr.h"
00106 #include "catalog/index.h"
00107 #include "catalog/pg_collation.h"
00108 #include "catalog/pg_opfamily.h"
00109 #include "catalog/pg_statistic.h"
00110 #include "catalog/pg_type.h"
00111 #include "executor/executor.h"
00112 #include "mb/pg_wchar.h"
00113 #include "nodes/makefuncs.h"
00114 #include "nodes/nodeFuncs.h"
00115 #include "optimizer/clauses.h"
00116 #include "optimizer/cost.h"
00117 #include "optimizer/pathnode.h"
00118 #include "optimizer/paths.h"
00119 #include "optimizer/plancat.h"
00120 #include "optimizer/predtest.h"
00121 #include "optimizer/restrictinfo.h"
00122 #include "optimizer/var.h"
00123 #include "parser/parse_clause.h"
00124 #include "parser/parse_coerce.h"
00125 #include "parser/parsetree.h"
00126 #include "utils/builtins.h"
00127 #include "utils/bytea.h"
00128 #include "utils/date.h"
00129 #include "utils/datum.h"
00130 #include "utils/fmgroids.h"
00131 #include "utils/lsyscache.h"
00132 #include "utils/nabstime.h"
00133 #include "utils/pg_locale.h"
00134 #include "utils/rel.h"
00135 #include "utils/selfuncs.h"
00136 #include "utils/spccache.h"
00137 #include "utils/syscache.h"
00138 #include "utils/timestamp.h"
00139 #include "utils/tqual.h"
00140 #include "utils/typcache.h"
00141
00142
00143
00144 get_relation_stats_hook_type get_relation_stats_hook = NULL;
00145 get_index_stats_hook_type get_index_stats_hook = NULL;
00146
00147 static double var_eq_const(VariableStatData *vardata, Oid operator,
00148 Datum constval, bool constisnull,
00149 bool varonleft);
00150 static double var_eq_non_const(VariableStatData *vardata, Oid operator,
00151 Node *other,
00152 bool varonleft);
00153 static double ineq_histogram_selectivity(PlannerInfo *root,
00154 VariableStatData *vardata,
00155 FmgrInfo *opproc, bool isgt,
00156 Datum constval, Oid consttype);
00157 static double eqjoinsel_inner(Oid operator,
00158 VariableStatData *vardata1, VariableStatData *vardata2);
00159 static double eqjoinsel_semi(Oid operator,
00160 VariableStatData *vardata1, VariableStatData *vardata2,
00161 RelOptInfo *inner_rel);
00162 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
00163 Datum lobound, Datum hibound, Oid boundstypid,
00164 double *scaledlobound, double *scaledhibound);
00165 static double convert_numeric_to_scalar(Datum value, Oid typid);
00166 static void convert_string_to_scalar(char *value,
00167 double *scaledvalue,
00168 char *lobound,
00169 double *scaledlobound,
00170 char *hibound,
00171 double *scaledhibound);
00172 static void convert_bytea_to_scalar(Datum value,
00173 double *scaledvalue,
00174 Datum lobound,
00175 double *scaledlobound,
00176 Datum hibound,
00177 double *scaledhibound);
00178 static double convert_one_string_to_scalar(char *value,
00179 int rangelo, int rangehi);
00180 static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
00181 int rangelo, int rangehi);
00182 static char *convert_string_datum(Datum value, Oid typid);
00183 static double convert_timevalue_to_scalar(Datum value, Oid typid);
00184 static void examine_simple_variable(PlannerInfo *root, Var *var,
00185 VariableStatData *vardata);
00186 static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
00187 Oid sortop, Datum *min, Datum *max);
00188 static bool get_actual_variable_range(PlannerInfo *root,
00189 VariableStatData *vardata,
00190 Oid sortop,
00191 Datum *min, Datum *max);
00192 static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
00193 static Selectivity prefix_selectivity(PlannerInfo *root,
00194 VariableStatData *vardata,
00195 Oid vartype, Oid opfamily, Const *prefixcon);
00196 static Selectivity like_selectivity(const char *patt, int pattlen,
00197 bool case_insensitive);
00198 static Selectivity regex_selectivity(const char *patt, int pattlen,
00199 bool case_insensitive,
00200 int fixed_prefix_len);
00201 static Datum string_to_datum(const char *str, Oid datatype);
00202 static Const *string_to_const(const char *str, Oid datatype);
00203 static Const *string_to_bytea_const(const char *str, size_t str_len);
00204 static List *add_predicate_to_quals(IndexOptInfo *index, List *indexQuals);
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215 Datum
00216 eqsel(PG_FUNCTION_ARGS)
00217 {
00218 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
00219 Oid operator = PG_GETARG_OID(1);
00220 List *args = (List *) PG_GETARG_POINTER(2);
00221 int varRelid = PG_GETARG_INT32(3);
00222 VariableStatData vardata;
00223 Node *other;
00224 bool varonleft;
00225 double selec;
00226
00227
00228
00229
00230
00231 if (!get_restriction_variable(root, args, varRelid,
00232 &vardata, &other, &varonleft))
00233 PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
00234
00235
00236
00237
00238
00239
00240 if (IsA(other, Const))
00241 selec = var_eq_const(&vardata, operator,
00242 ((Const *) other)->constvalue,
00243 ((Const *) other)->constisnull,
00244 varonleft);
00245 else
00246 selec = var_eq_non_const(&vardata, operator, other,
00247 varonleft);
00248
00249 ReleaseVariableStats(vardata);
00250
00251 PG_RETURN_FLOAT8((float8) selec);
00252 }
00253
00254
00255
00256
00257
00258
00259 static double
00260 var_eq_const(VariableStatData *vardata, Oid operator,
00261 Datum constval, bool constisnull,
00262 bool varonleft)
00263 {
00264 double selec;
00265 bool isdefault;
00266
00267
00268
00269
00270
00271 if (constisnull)
00272 return 0.0;
00273
00274
00275
00276
00277
00278
00279
00280
00281 if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
00282 return 1.0 / vardata->rel->tuples;
00283
00284 if (HeapTupleIsValid(vardata->statsTuple))
00285 {
00286 Form_pg_statistic stats;
00287 Datum *values;
00288 int nvalues;
00289 float4 *numbers;
00290 int nnumbers;
00291 bool match = false;
00292 int i;
00293
00294 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
00295
00296
00297
00298
00299
00300
00301
00302
00303 if (get_attstatsslot(vardata->statsTuple,
00304 vardata->atttype, vardata->atttypmod,
00305 STATISTIC_KIND_MCV, InvalidOid,
00306 NULL,
00307 &values, &nvalues,
00308 &numbers, &nnumbers))
00309 {
00310 FmgrInfo eqproc;
00311
00312 fmgr_info(get_opcode(operator), &eqproc);
00313
00314 for (i = 0; i < nvalues; i++)
00315 {
00316
00317 if (varonleft)
00318 match = DatumGetBool(FunctionCall2Coll(&eqproc,
00319 DEFAULT_COLLATION_OID,
00320 values[i],
00321 constval));
00322 else
00323 match = DatumGetBool(FunctionCall2Coll(&eqproc,
00324 DEFAULT_COLLATION_OID,
00325 constval,
00326 values[i]));
00327 if (match)
00328 break;
00329 }
00330 }
00331 else
00332 {
00333
00334 values = NULL;
00335 numbers = NULL;
00336 i = nvalues = nnumbers = 0;
00337 }
00338
00339 if (match)
00340 {
00341
00342
00343
00344
00345 selec = numbers[i];
00346 }
00347 else
00348 {
00349
00350
00351
00352
00353
00354 double sumcommon = 0.0;
00355 double otherdistinct;
00356
00357 for (i = 0; i < nnumbers; i++)
00358 sumcommon += numbers[i];
00359 selec = 1.0 - sumcommon - stats->stanullfrac;
00360 CLAMP_PROBABILITY(selec);
00361
00362
00363
00364
00365
00366
00367 otherdistinct = get_variable_numdistinct(vardata, &isdefault) - nnumbers;
00368 if (otherdistinct > 1)
00369 selec /= otherdistinct;
00370
00371
00372
00373
00374
00375 if (nnumbers > 0 && selec > numbers[nnumbers - 1])
00376 selec = numbers[nnumbers - 1];
00377 }
00378
00379 free_attstatsslot(vardata->atttype, values, nvalues,
00380 numbers, nnumbers);
00381 }
00382 else
00383 {
00384
00385
00386
00387
00388
00389 selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
00390 }
00391
00392
00393 CLAMP_PROBABILITY(selec);
00394
00395 return selec;
00396 }
00397
00398
00399
00400
00401 static double
00402 var_eq_non_const(VariableStatData *vardata, Oid operator,
00403 Node *other,
00404 bool varonleft)
00405 {
00406 double selec;
00407 bool isdefault;
00408
00409
00410
00411
00412
00413
00414
00415
00416 if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
00417 return 1.0 / vardata->rel->tuples;
00418
00419 if (HeapTupleIsValid(vardata->statsTuple))
00420 {
00421 Form_pg_statistic stats;
00422 double ndistinct;
00423 float4 *numbers;
00424 int nnumbers;
00425
00426 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438 selec = 1.0 - stats->stanullfrac;
00439 ndistinct = get_variable_numdistinct(vardata, &isdefault);
00440 if (ndistinct > 1)
00441 selec /= ndistinct;
00442
00443
00444
00445
00446
00447 if (get_attstatsslot(vardata->statsTuple,
00448 vardata->atttype, vardata->atttypmod,
00449 STATISTIC_KIND_MCV, InvalidOid,
00450 NULL,
00451 NULL, NULL,
00452 &numbers, &nnumbers))
00453 {
00454 if (nnumbers > 0 && selec > numbers[0])
00455 selec = numbers[0];
00456 free_attstatsslot(vardata->atttype, NULL, 0, numbers, nnumbers);
00457 }
00458 }
00459 else
00460 {
00461
00462
00463
00464
00465
00466 selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
00467 }
00468
00469
00470 CLAMP_PROBABILITY(selec);
00471
00472 return selec;
00473 }
00474
00475
00476
00477
00478
00479
00480
00481
00482 Datum
00483 neqsel(PG_FUNCTION_ARGS)
00484 {
00485 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
00486 Oid operator = PG_GETARG_OID(1);
00487 List *args = (List *) PG_GETARG_POINTER(2);
00488 int varRelid = PG_GETARG_INT32(3);
00489 Oid eqop;
00490 float8 result;
00491
00492
00493
00494
00495
00496 eqop = get_negator(operator);
00497 if (eqop)
00498 {
00499 result = DatumGetFloat8(DirectFunctionCall4(eqsel,
00500 PointerGetDatum(root),
00501 ObjectIdGetDatum(eqop),
00502 PointerGetDatum(args),
00503 Int32GetDatum(varRelid)));
00504 }
00505 else
00506 {
00507
00508 result = DEFAULT_EQ_SEL;
00509 }
00510 result = 1.0 - result;
00511 PG_RETURN_FLOAT8(result);
00512 }
00513
00514
00515
00516
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527 static double
00528 scalarineqsel(PlannerInfo *root, Oid operator, bool isgt,
00529 VariableStatData *vardata, Datum constval, Oid consttype)
00530 {
00531 Form_pg_statistic stats;
00532 FmgrInfo opproc;
00533 double mcv_selec,
00534 hist_selec,
00535 sumcommon;
00536 double selec;
00537
00538 if (!HeapTupleIsValid(vardata->statsTuple))
00539 {
00540
00541 return DEFAULT_INEQ_SEL;
00542 }
00543 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
00544
00545 fmgr_info(get_opcode(operator), &opproc);
00546
00547
00548
00549
00550
00551
00552
00553 mcv_selec = mcv_selectivity(vardata, &opproc, constval, true,
00554 &sumcommon);
00555
00556
00557
00558
00559
00560 hist_selec = ineq_histogram_selectivity(root, vardata, &opproc, isgt,
00561 constval, consttype);
00562
00563
00564
00565
00566
00567
00568 selec = 1.0 - stats->stanullfrac - sumcommon;
00569
00570 if (hist_selec >= 0.0)
00571 selec *= hist_selec;
00572 else
00573 {
00574
00575
00576
00577
00578 selec *= 0.5;
00579 }
00580
00581 selec += mcv_selec;
00582
00583
00584 CLAMP_PROBABILITY(selec);
00585
00586 return selec;
00587 }
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601 double
00602 mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
00603 Datum constval, bool varonleft,
00604 double *sumcommonp)
00605 {
00606 double mcv_selec,
00607 sumcommon;
00608 Datum *values;
00609 int nvalues;
00610 float4 *numbers;
00611 int nnumbers;
00612 int i;
00613
00614 mcv_selec = 0.0;
00615 sumcommon = 0.0;
00616
00617 if (HeapTupleIsValid(vardata->statsTuple) &&
00618 get_attstatsslot(vardata->statsTuple,
00619 vardata->atttype, vardata->atttypmod,
00620 STATISTIC_KIND_MCV, InvalidOid,
00621 NULL,
00622 &values, &nvalues,
00623 &numbers, &nnumbers))
00624 {
00625 for (i = 0; i < nvalues; i++)
00626 {
00627 if (varonleft ?
00628 DatumGetBool(FunctionCall2Coll(opproc,
00629 DEFAULT_COLLATION_OID,
00630 values[i],
00631 constval)) :
00632 DatumGetBool(FunctionCall2Coll(opproc,
00633 DEFAULT_COLLATION_OID,
00634 constval,
00635 values[i])))
00636 mcv_selec += numbers[i];
00637 sumcommon += numbers[i];
00638 }
00639 free_attstatsslot(vardata->atttype, values, nvalues,
00640 numbers, nnumbers);
00641 }
00642
00643 *sumcommonp = sumcommon;
00644 return mcv_selec;
00645 }
00646
00647
00648
00649
00650
00651
00652
00653
00654
00655
00656
00657
00658
00659
00660
00661
00662
00663
00664
00665
00666
00667
00668
00669
00670
00671
00672
00673
00674
00675
00676
00677
00678
00679 double
00680 histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
00681 Datum constval, bool varonleft,
00682 int min_hist_size, int n_skip,
00683 int *hist_size)
00684 {
00685 double result;
00686 Datum *values;
00687 int nvalues;
00688
00689
00690 Assert(n_skip >= 0);
00691 Assert(min_hist_size > 2 * n_skip);
00692
00693 if (HeapTupleIsValid(vardata->statsTuple) &&
00694 get_attstatsslot(vardata->statsTuple,
00695 vardata->atttype, vardata->atttypmod,
00696 STATISTIC_KIND_HISTOGRAM, InvalidOid,
00697 NULL,
00698 &values, &nvalues,
00699 NULL, NULL))
00700 {
00701 *hist_size = nvalues;
00702 if (nvalues >= min_hist_size)
00703 {
00704 int nmatch = 0;
00705 int i;
00706
00707 for (i = n_skip; i < nvalues - n_skip; i++)
00708 {
00709 if (varonleft ?
00710 DatumGetBool(FunctionCall2Coll(opproc,
00711 DEFAULT_COLLATION_OID,
00712 values[i],
00713 constval)) :
00714 DatumGetBool(FunctionCall2Coll(opproc,
00715 DEFAULT_COLLATION_OID,
00716 constval,
00717 values[i])))
00718 nmatch++;
00719 }
00720 result = ((double) nmatch) / ((double) (nvalues - 2 * n_skip));
00721 }
00722 else
00723 result = -1;
00724 free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
00725 }
00726 else
00727 {
00728 *hist_size = 0;
00729 result = -1;
00730 }
00731
00732 return result;
00733 }
00734
00735
00736
00737
00738
00739
00740
00741
00742
00743
00744
00745
00746
00747 static double
00748 ineq_histogram_selectivity(PlannerInfo *root,
00749 VariableStatData *vardata,
00750 FmgrInfo *opproc, bool isgt,
00751 Datum constval, Oid consttype)
00752 {
00753 double hist_selec;
00754 Oid hist_op;
00755 Datum *values;
00756 int nvalues;
00757
00758 hist_selec = -1.0;
00759
00760
00761
00762
00763
00764
00765
00766
00767
00768
00769
00770 if (HeapTupleIsValid(vardata->statsTuple) &&
00771 get_attstatsslot(vardata->statsTuple,
00772 vardata->atttype, vardata->atttypmod,
00773 STATISTIC_KIND_HISTOGRAM, InvalidOid,
00774 &hist_op,
00775 &values, &nvalues,
00776 NULL, NULL))
00777 {
00778 if (nvalues > 1)
00779 {
00780
00781
00782
00783
00784
00785
00786
00787
00788
00789
00790
00791
00792
00793
00794
00795 double histfrac;
00796 int lobound = 0;
00797 int hibound = nvalues;
00798 bool have_end = false;
00799
00800
00801
00802
00803
00804
00805
00806 if (nvalues == 2)
00807 have_end = get_actual_variable_range(root,
00808 vardata,
00809 hist_op,
00810 &values[0],
00811 &values[1]);
00812
00813 while (lobound < hibound)
00814 {
00815 int probe = (lobound + hibound) / 2;
00816 bool ltcmp;
00817
00818
00819
00820
00821
00822
00823 if (probe == 0 && nvalues > 2)
00824 have_end = get_actual_variable_range(root,
00825 vardata,
00826 hist_op,
00827 &values[0],
00828 NULL);
00829 else if (probe == nvalues - 1 && nvalues > 2)
00830 have_end = get_actual_variable_range(root,
00831 vardata,
00832 hist_op,
00833 NULL,
00834 &values[probe]);
00835
00836 ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
00837 DEFAULT_COLLATION_OID,
00838 values[probe],
00839 constval));
00840 if (isgt)
00841 ltcmp = !ltcmp;
00842 if (ltcmp)
00843 lobound = probe + 1;
00844 else
00845 hibound = probe;
00846 }
00847
00848 if (lobound <= 0)
00849 {
00850
00851 histfrac = 0.0;
00852 }
00853 else if (lobound >= nvalues)
00854 {
00855
00856 histfrac = 1.0;
00857 }
00858 else
00859 {
00860 int i = lobound;
00861 double val,
00862 high,
00863 low;
00864 double binfrac;
00865
00866
00867
00868
00869
00870
00871
00872
00873 if (convert_to_scalar(constval, consttype, &val,
00874 values[i - 1], values[i],
00875 vardata->vartype,
00876 &low, &high))
00877 {
00878 if (high <= low)
00879 {
00880
00881 binfrac = 0.5;
00882 }
00883 else if (val <= low)
00884 binfrac = 0.0;
00885 else if (val >= high)
00886 binfrac = 1.0;
00887 else
00888 {
00889 binfrac = (val - low) / (high - low);
00890
00891
00892
00893
00894
00895
00896
00897 if (isnan(binfrac) ||
00898 binfrac < 0.0 || binfrac > 1.0)
00899 binfrac = 0.5;
00900 }
00901 }
00902 else
00903 {
00904
00905
00906
00907
00908
00909
00910
00911
00912 binfrac = 0.5;
00913 }
00914
00915
00916
00917
00918
00919
00920 histfrac = (double) (i - 1) + binfrac;
00921 histfrac /= (double) (nvalues - 1);
00922 }
00923
00924
00925
00926
00927
00928
00929
00930 hist_selec = isgt ? (1.0 - histfrac) : histfrac;
00931
00932
00933
00934
00935
00936
00937
00938 if (have_end)
00939 CLAMP_PROBABILITY(hist_selec);
00940 else
00941 {
00942 if (hist_selec < 0.0001)
00943 hist_selec = 0.0001;
00944 else if (hist_selec > 0.9999)
00945 hist_selec = 0.9999;
00946 }
00947 }
00948
00949 free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
00950 }
00951
00952 return hist_selec;
00953 }
00954
00955
00956
00957
00958 Datum
00959 scalarltsel(PG_FUNCTION_ARGS)
00960 {
00961 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
00962 Oid operator = PG_GETARG_OID(1);
00963 List *args = (List *) PG_GETARG_POINTER(2);
00964 int varRelid = PG_GETARG_INT32(3);
00965 VariableStatData vardata;
00966 Node *other;
00967 bool varonleft;
00968 Datum constval;
00969 Oid consttype;
00970 bool isgt;
00971 double selec;
00972
00973
00974
00975
00976
00977 if (!get_restriction_variable(root, args, varRelid,
00978 &vardata, &other, &varonleft))
00979 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
00980
00981
00982
00983
00984 if (!IsA(other, Const))
00985 {
00986 ReleaseVariableStats(vardata);
00987 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
00988 }
00989
00990
00991
00992
00993
00994 if (((Const *) other)->constisnull)
00995 {
00996 ReleaseVariableStats(vardata);
00997 PG_RETURN_FLOAT8(0.0);
00998 }
00999 constval = ((Const *) other)->constvalue;
01000 consttype = ((Const *) other)->consttype;
01001
01002
01003
01004
01005 if (varonleft)
01006 {
01007
01008 isgt = false;
01009 }
01010 else
01011 {
01012
01013 operator = get_commutator(operator);
01014 if (!operator)
01015 {
01016
01017 ReleaseVariableStats(vardata);
01018 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
01019 }
01020 isgt = true;
01021 }
01022
01023 selec = scalarineqsel(root, operator, isgt, &vardata, constval, consttype);
01024
01025 ReleaseVariableStats(vardata);
01026
01027 PG_RETURN_FLOAT8((float8) selec);
01028 }
01029
01030
01031
01032
01033 Datum
01034 scalargtsel(PG_FUNCTION_ARGS)
01035 {
01036 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
01037 Oid operator = PG_GETARG_OID(1);
01038 List *args = (List *) PG_GETARG_POINTER(2);
01039 int varRelid = PG_GETARG_INT32(3);
01040 VariableStatData vardata;
01041 Node *other;
01042 bool varonleft;
01043 Datum constval;
01044 Oid consttype;
01045 bool isgt;
01046 double selec;
01047
01048
01049
01050
01051
01052 if (!get_restriction_variable(root, args, varRelid,
01053 &vardata, &other, &varonleft))
01054 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
01055
01056
01057
01058
01059 if (!IsA(other, Const))
01060 {
01061 ReleaseVariableStats(vardata);
01062 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
01063 }
01064
01065
01066
01067
01068
01069 if (((Const *) other)->constisnull)
01070 {
01071 ReleaseVariableStats(vardata);
01072 PG_RETURN_FLOAT8(0.0);
01073 }
01074 constval = ((Const *) other)->constvalue;
01075 consttype = ((Const *) other)->consttype;
01076
01077
01078
01079
01080 if (varonleft)
01081 {
01082
01083 isgt = true;
01084 }
01085 else
01086 {
01087
01088 operator = get_commutator(operator);
01089 if (!operator)
01090 {
01091
01092 ReleaseVariableStats(vardata);
01093 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
01094 }
01095 isgt = false;
01096 }
01097
01098 selec = scalarineqsel(root, operator, isgt, &vardata, constval, consttype);
01099
01100 ReleaseVariableStats(vardata);
01101
01102 PG_RETURN_FLOAT8((float8) selec);
01103 }
01104
01105
01106
01107
01108 static double
01109 patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
01110 {
01111 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
01112 Oid operator = PG_GETARG_OID(1);
01113 List *args = (List *) PG_GETARG_POINTER(2);
01114 int varRelid = PG_GETARG_INT32(3);
01115 Oid collation = PG_GET_COLLATION();
01116 VariableStatData vardata;
01117 Node *other;
01118 bool varonleft;
01119 Datum constval;
01120 Oid consttype;
01121 Oid vartype;
01122 Oid opfamily;
01123 Pattern_Prefix_Status pstatus;
01124 Const *patt;
01125 Const *prefix = NULL;
01126 Selectivity rest_selec = 0;
01127 double result;
01128
01129
01130
01131
01132
01133
01134 if (negate)
01135 {
01136 operator = get_negator(operator);
01137 if (!OidIsValid(operator))
01138 elog(ERROR, "patternsel called for operator without a negator");
01139 result = 1.0 - DEFAULT_MATCH_SEL;
01140 }
01141 else
01142 {
01143 result = DEFAULT_MATCH_SEL;
01144 }
01145
01146
01147
01148
01149
01150 if (!get_restriction_variable(root, args, varRelid,
01151 &vardata, &other, &varonleft))
01152 return result;
01153 if (!varonleft || !IsA(other, Const))
01154 {
01155 ReleaseVariableStats(vardata);
01156 return result;
01157 }
01158
01159
01160
01161
01162
01163 if (((Const *) other)->constisnull)
01164 {
01165 ReleaseVariableStats(vardata);
01166 return 0.0;
01167 }
01168 constval = ((Const *) other)->constvalue;
01169 consttype = ((Const *) other)->consttype;
01170
01171
01172
01173
01174
01175
01176
01177 if (consttype != TEXTOID && consttype != BYTEAOID)
01178 {
01179 ReleaseVariableStats(vardata);
01180 return result;
01181 }
01182
01183
01184
01185
01186
01187
01188
01189
01190
01191
01192
01193
01194 vartype = vardata.vartype;
01195
01196 switch (vartype)
01197 {
01198 case TEXTOID:
01199 opfamily = TEXT_BTREE_FAM_OID;
01200 break;
01201 case BPCHAROID:
01202 opfamily = BPCHAR_BTREE_FAM_OID;
01203 break;
01204 case NAMEOID:
01205 opfamily = NAME_BTREE_FAM_OID;
01206 break;
01207 case BYTEAOID:
01208 opfamily = BYTEA_BTREE_FAM_OID;
01209 break;
01210 default:
01211 ReleaseVariableStats(vardata);
01212 return result;
01213 }
01214
01215
01216
01217
01218
01219
01220
01221
01222
01223
01224 patt = (Const *) other;
01225 pstatus = pattern_fixed_prefix(patt, ptype, collation,
01226 &prefix, &rest_selec);
01227
01228
01229
01230
01231 if (prefix && prefix->consttype != vartype)
01232 {
01233 char *prefixstr;
01234
01235 switch (prefix->consttype)
01236 {
01237 case TEXTOID:
01238 prefixstr = TextDatumGetCString(prefix->constvalue);
01239 break;
01240 case BYTEAOID:
01241 prefixstr = DatumGetCString(DirectFunctionCall1(byteaout,
01242 prefix->constvalue));
01243 break;
01244 default:
01245 elog(ERROR, "unrecognized consttype: %u",
01246 prefix->consttype);
01247 ReleaseVariableStats(vardata);
01248 return result;
01249 }
01250 prefix = string_to_const(prefixstr, vartype);
01251 pfree(prefixstr);
01252 }
01253
01254 if (pstatus == Pattern_Prefix_Exact)
01255 {
01256
01257
01258
01259 Oid eqopr = get_opfamily_member(opfamily, vartype, vartype,
01260 BTEqualStrategyNumber);
01261
01262 if (eqopr == InvalidOid)
01263 elog(ERROR, "no = operator for opfamily %u", opfamily);
01264 result = var_eq_const(&vardata, eqopr, prefix->constvalue,
01265 false, true);
01266 }
01267 else
01268 {
01269
01270
01271
01272
01273
01274
01275
01276
01277
01278
01279
01280
01281
01282
01283
01284
01285 Selectivity selec;
01286 int hist_size;
01287 FmgrInfo opproc;
01288 double nullfrac,
01289 mcv_selec,
01290 sumcommon;
01291
01292
01293 fmgr_info(get_opcode(operator), &opproc);
01294
01295 selec = histogram_selectivity(&vardata, &opproc, constval, true,
01296 10, 1, &hist_size);
01297
01298
01299 if (hist_size < 100)
01300 {
01301 Selectivity heursel;
01302 Selectivity prefixsel;
01303
01304 if (pstatus == Pattern_Prefix_Partial)
01305 prefixsel = prefix_selectivity(root, &vardata, vartype,
01306 opfamily, prefix);
01307 else
01308 prefixsel = 1.0;
01309 heursel = prefixsel * rest_selec;
01310
01311 if (selec < 0)
01312 selec = heursel;
01313 else
01314 {
01315
01316
01317
01318
01319
01320 double hist_weight = hist_size / 100.0;
01321
01322 selec = selec * hist_weight + heursel * (1.0 - hist_weight);
01323 }
01324 }
01325
01326
01327 if (selec < 0.0001)
01328 selec = 0.0001;
01329 else if (selec > 0.9999)
01330 selec = 0.9999;
01331
01332
01333
01334
01335
01336
01337
01338 mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
01339 &sumcommon);
01340
01341 if (HeapTupleIsValid(vardata.statsTuple))
01342 nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
01343 else
01344 nullfrac = 0.0;
01345
01346
01347
01348
01349
01350
01351 selec *= 1.0 - nullfrac - sumcommon;
01352 selec += mcv_selec;
01353
01354
01355 CLAMP_PROBABILITY(selec);
01356 result = selec;
01357 }
01358
01359 if (prefix)
01360 {
01361 pfree(DatumGetPointer(prefix->constvalue));
01362 pfree(prefix);
01363 }
01364
01365 ReleaseVariableStats(vardata);
01366
01367 return negate ? (1.0 - result) : result;
01368 }
01369
01370
01371
01372
01373 Datum
01374 regexeqsel(PG_FUNCTION_ARGS)
01375 {
01376 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
01377 }
01378
01379
01380
01381
01382 Datum
01383 icregexeqsel(PG_FUNCTION_ARGS)
01384 {
01385 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
01386 }
01387
01388
01389
01390
01391 Datum
01392 likesel(PG_FUNCTION_ARGS)
01393 {
01394 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
01395 }
01396
01397
01398
01399
01400 Datum
01401 iclikesel(PG_FUNCTION_ARGS)
01402 {
01403 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
01404 }
01405
01406
01407
01408
01409 Datum
01410 regexnesel(PG_FUNCTION_ARGS)
01411 {
01412 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
01413 }
01414
01415
01416
01417
01418 Datum
01419 icregexnesel(PG_FUNCTION_ARGS)
01420 {
01421 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
01422 }
01423
01424
01425
01426
01427 Datum
01428 nlikesel(PG_FUNCTION_ARGS)
01429 {
01430 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
01431 }
01432
01433
01434
01435
01436 Datum
01437 icnlikesel(PG_FUNCTION_ARGS)
01438 {
01439 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
01440 }
01441
01442
01443
01444
01445 Selectivity
01446 booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
01447 int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
01448 {
01449 VariableStatData vardata;
01450 double selec;
01451
01452 examine_variable(root, arg, varRelid, &vardata);
01453
01454 if (HeapTupleIsValid(vardata.statsTuple))
01455 {
01456 Form_pg_statistic stats;
01457 double freq_null;
01458 Datum *values;
01459 int nvalues;
01460 float4 *numbers;
01461 int nnumbers;
01462
01463 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
01464 freq_null = stats->stanullfrac;
01465
01466 if (get_attstatsslot(vardata.statsTuple,
01467 vardata.atttype, vardata.atttypmod,
01468 STATISTIC_KIND_MCV, InvalidOid,
01469 NULL,
01470 &values, &nvalues,
01471 &numbers, &nnumbers)
01472 && nnumbers > 0)
01473 {
01474 double freq_true;
01475 double freq_false;
01476
01477
01478
01479
01480 if (DatumGetBool(values[0]))
01481 freq_true = numbers[0];
01482 else
01483 freq_true = 1.0 - numbers[0] - freq_null;
01484
01485
01486
01487
01488
01489 freq_false = 1.0 - freq_true - freq_null;
01490
01491 switch (booltesttype)
01492 {
01493 case IS_UNKNOWN:
01494
01495 selec = freq_null;
01496 break;
01497 case IS_NOT_UNKNOWN:
01498
01499 selec = 1.0 - freq_null;
01500 break;
01501 case IS_TRUE:
01502
01503 selec = freq_true;
01504 break;
01505 case IS_NOT_TRUE:
01506
01507 selec = 1.0 - freq_true;
01508 break;
01509 case IS_FALSE:
01510
01511 selec = freq_false;
01512 break;
01513 case IS_NOT_FALSE:
01514
01515 selec = 1.0 - freq_false;
01516 break;
01517 default:
01518 elog(ERROR, "unrecognized booltesttype: %d",
01519 (int) booltesttype);
01520 selec = 0.0;
01521 break;
01522 }
01523
01524 free_attstatsslot(vardata.atttype, values, nvalues,
01525 numbers, nnumbers);
01526 }
01527 else
01528 {
01529
01530
01531
01532
01533
01534 switch (booltesttype)
01535 {
01536 case IS_UNKNOWN:
01537
01538
01539
01540
01541 selec = freq_null;
01542 break;
01543 case IS_NOT_UNKNOWN:
01544
01545
01546
01547
01548
01549 selec = 1.0 - freq_null;
01550 break;
01551 case IS_TRUE:
01552 case IS_NOT_TRUE:
01553 case IS_FALSE:
01554 case IS_NOT_FALSE:
01555 selec = (1.0 - freq_null) / 2.0;
01556 break;
01557 default:
01558 elog(ERROR, "unrecognized booltesttype: %d",
01559 (int) booltesttype);
01560 selec = 0.0;
01561 break;
01562 }
01563 }
01564 }
01565 else
01566 {
01567
01568
01569
01570
01571
01572
01573 switch (booltesttype)
01574 {
01575 case IS_UNKNOWN:
01576 selec = DEFAULT_UNK_SEL;
01577 break;
01578 case IS_NOT_UNKNOWN:
01579 selec = DEFAULT_NOT_UNK_SEL;
01580 break;
01581 case IS_TRUE:
01582 case IS_NOT_FALSE:
01583 selec = (double) clause_selectivity(root, arg,
01584 varRelid,
01585 jointype, sjinfo);
01586 break;
01587 case IS_FALSE:
01588 case IS_NOT_TRUE:
01589 selec = 1.0 - (double) clause_selectivity(root, arg,
01590 varRelid,
01591 jointype, sjinfo);
01592 break;
01593 default:
01594 elog(ERROR, "unrecognized booltesttype: %d",
01595 (int) booltesttype);
01596 selec = 0.0;
01597 break;
01598 }
01599 }
01600
01601 ReleaseVariableStats(vardata);
01602
01603
01604 CLAMP_PROBABILITY(selec);
01605
01606 return (Selectivity) selec;
01607 }
01608
01609
01610
01611
01612 Selectivity
01613 nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg,
01614 int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
01615 {
01616 VariableStatData vardata;
01617 double selec;
01618
01619 examine_variable(root, arg, varRelid, &vardata);
01620
01621 if (HeapTupleIsValid(vardata.statsTuple))
01622 {
01623 Form_pg_statistic stats;
01624 double freq_null;
01625
01626 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
01627 freq_null = stats->stanullfrac;
01628
01629 switch (nulltesttype)
01630 {
01631 case IS_NULL:
01632
01633
01634
01635
01636 selec = freq_null;
01637 break;
01638 case IS_NOT_NULL:
01639
01640
01641
01642
01643
01644 selec = 1.0 - freq_null;
01645 break;
01646 default:
01647 elog(ERROR, "unrecognized nulltesttype: %d",
01648 (int) nulltesttype);
01649 return (Selectivity) 0;
01650 }
01651 }
01652 else
01653 {
01654
01655
01656
01657 switch (nulltesttype)
01658 {
01659 case IS_NULL:
01660 selec = DEFAULT_UNK_SEL;
01661 break;
01662 case IS_NOT_NULL:
01663 selec = DEFAULT_NOT_UNK_SEL;
01664 break;
01665 default:
01666 elog(ERROR, "unrecognized nulltesttype: %d",
01667 (int) nulltesttype);
01668 return (Selectivity) 0;
01669 }
01670 }
01671
01672 ReleaseVariableStats(vardata);
01673
01674
01675 CLAMP_PROBABILITY(selec);
01676
01677 return (Selectivity) selec;
01678 }
01679
01680
01681
01682
01683
01684
01685
01686
01687
01688 static Node *
01689 strip_array_coercion(Node *node)
01690 {
01691 for (;;)
01692 {
01693 if (node && IsA(node, ArrayCoerceExpr) &&
01694 ((ArrayCoerceExpr *) node)->elemfuncid == InvalidOid)
01695 {
01696 node = (Node *) ((ArrayCoerceExpr *) node)->arg;
01697 }
01698 else if (node && IsA(node, RelabelType))
01699 {
01700
01701 node = (Node *) ((RelabelType *) node)->arg;
01702 }
01703 else
01704 break;
01705 }
01706 return node;
01707 }
01708
01709
01710
01711
01712 Selectivity
01713 scalararraysel(PlannerInfo *root,
01714 ScalarArrayOpExpr *clause,
01715 bool is_join_clause,
01716 int varRelid,
01717 JoinType jointype,
01718 SpecialJoinInfo *sjinfo)
01719 {
01720 Oid operator = clause->opno;
01721 bool useOr = clause->useOr;
01722 bool isEquality = false;
01723 bool isInequality = false;
01724 Node *leftop;
01725 Node *rightop;
01726 Oid nominal_element_type;
01727 Oid nominal_element_collation;
01728 TypeCacheEntry *typentry;
01729 RegProcedure oprsel;
01730 FmgrInfo oprselproc;
01731 Selectivity s1;
01732 Selectivity s1disjoint;
01733
01734
01735 Assert(list_length(clause->args) == 2);
01736 leftop = (Node *) linitial(clause->args);
01737 rightop = (Node *) lsecond(clause->args);
01738
01739
01740 nominal_element_type = get_base_element_type(exprType(rightop));
01741 if (!OidIsValid(nominal_element_type))
01742 return (Selectivity) 0.5;
01743
01744 nominal_element_collation = exprCollation(rightop);
01745
01746
01747 rightop = strip_array_coercion(rightop);
01748
01749
01750
01751
01752
01753 typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
01754 if (OidIsValid(typentry->eq_opr))
01755 {
01756 if (operator == typentry->eq_opr)
01757 isEquality = true;
01758 else if (get_negator(operator) == typentry->eq_opr)
01759 isInequality = true;
01760 }
01761
01762
01763
01764
01765
01766
01767
01768 if ((isEquality || isInequality) && !is_join_clause)
01769 {
01770 s1 = scalararraysel_containment(root, leftop, rightop,
01771 nominal_element_type,
01772 isEquality, useOr, varRelid);
01773 if (s1 >= 0.0)
01774 return s1;
01775 }
01776
01777
01778
01779
01780
01781 if (is_join_clause)
01782 oprsel = get_oprjoin(operator);
01783 else
01784 oprsel = get_oprrest(operator);
01785 if (!oprsel)
01786 return (Selectivity) 0.5;
01787 fmgr_info(oprsel, &oprselproc);
01788
01789
01790
01791
01792
01793
01794
01795
01796
01797 if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
01798 isEquality = true;
01799 else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
01800 isInequality = true;
01801
01802
01803
01804
01805
01806
01807
01808
01809
01810
01811
01812
01813
01814 if (rightop && IsA(rightop, Const))
01815 {
01816 Datum arraydatum = ((Const *) rightop)->constvalue;
01817 bool arrayisnull = ((Const *) rightop)->constisnull;
01818 ArrayType *arrayval;
01819 int16 elmlen;
01820 bool elmbyval;
01821 char elmalign;
01822 int num_elems;
01823 Datum *elem_values;
01824 bool *elem_nulls;
01825 int i;
01826
01827 if (arrayisnull)
01828 return (Selectivity) 0.0;
01829 arrayval = DatumGetArrayTypeP(arraydatum);
01830 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
01831 &elmlen, &elmbyval, &elmalign);
01832 deconstruct_array(arrayval,
01833 ARR_ELEMTYPE(arrayval),
01834 elmlen, elmbyval, elmalign,
01835 &elem_values, &elem_nulls, &num_elems);
01836
01837
01838
01839
01840
01841
01842
01843
01844
01845
01846
01847
01848
01849
01850
01851 s1 = s1disjoint = (useOr ? 0.0 : 1.0);
01852
01853 for (i = 0; i < num_elems; i++)
01854 {
01855 List *args;
01856 Selectivity s2;
01857
01858 args = list_make2(leftop,
01859 makeConst(nominal_element_type,
01860 -1,
01861 nominal_element_collation,
01862 elmlen,
01863 elem_values[i],
01864 elem_nulls[i],
01865 elmbyval));
01866 if (is_join_clause)
01867 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
01868 clause->inputcollid,
01869 PointerGetDatum(root),
01870 ObjectIdGetDatum(operator),
01871 PointerGetDatum(args),
01872 Int16GetDatum(jointype),
01873 PointerGetDatum(sjinfo)));
01874 else
01875 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
01876 clause->inputcollid,
01877 PointerGetDatum(root),
01878 ObjectIdGetDatum(operator),
01879 PointerGetDatum(args),
01880 Int32GetDatum(varRelid)));
01881
01882 if (useOr)
01883 {
01884 s1 = s1 + s2 - s1 * s2;
01885 if (isEquality)
01886 s1disjoint += s2;
01887 }
01888 else
01889 {
01890 s1 = s1 * s2;
01891 if (isInequality)
01892 s1disjoint += s2 - 1.0;
01893 }
01894 }
01895
01896
01897 if ((useOr ? isEquality : isInequality) &&
01898 s1disjoint >= 0.0 && s1disjoint <= 1.0)
01899 s1 = s1disjoint;
01900 }
01901 else if (rightop && IsA(rightop, ArrayExpr) &&
01902 !((ArrayExpr *) rightop)->multidims)
01903 {
01904 ArrayExpr *arrayexpr = (ArrayExpr *) rightop;
01905 int16 elmlen;
01906 bool elmbyval;
01907 ListCell *l;
01908
01909 get_typlenbyval(arrayexpr->element_typeid,
01910 &elmlen, &elmbyval);
01911
01912
01913
01914
01915
01916
01917
01918
01919 s1 = s1disjoint = (useOr ? 0.0 : 1.0);
01920
01921 foreach(l, arrayexpr->elements)
01922 {
01923 Node *elem = (Node *) lfirst(l);
01924 List *args;
01925 Selectivity s2;
01926
01927
01928
01929
01930
01931
01932 args = list_make2(leftop, elem);
01933 if (is_join_clause)
01934 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
01935 clause->inputcollid,
01936 PointerGetDatum(root),
01937 ObjectIdGetDatum(operator),
01938 PointerGetDatum(args),
01939 Int16GetDatum(jointype),
01940 PointerGetDatum(sjinfo)));
01941 else
01942 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
01943 clause->inputcollid,
01944 PointerGetDatum(root),
01945 ObjectIdGetDatum(operator),
01946 PointerGetDatum(args),
01947 Int32GetDatum(varRelid)));
01948
01949 if (useOr)
01950 {
01951 s1 = s1 + s2 - s1 * s2;
01952 if (isEquality)
01953 s1disjoint += s2;
01954 }
01955 else
01956 {
01957 s1 = s1 * s2;
01958 if (isInequality)
01959 s1disjoint += s2 - 1.0;
01960 }
01961 }
01962
01963
01964 if ((useOr ? isEquality : isInequality) &&
01965 s1disjoint >= 0.0 && s1disjoint <= 1.0)
01966 s1 = s1disjoint;
01967 }
01968 else
01969 {
01970 CaseTestExpr *dummyexpr;
01971 List *args;
01972 Selectivity s2;
01973 int i;
01974
01975
01976
01977
01978
01979
01980 dummyexpr = makeNode(CaseTestExpr);
01981 dummyexpr->typeId = nominal_element_type;
01982 dummyexpr->typeMod = -1;
01983 dummyexpr->collation = clause->inputcollid;
01984 args = list_make2(leftop, dummyexpr);
01985 if (is_join_clause)
01986 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
01987 clause->inputcollid,
01988 PointerGetDatum(root),
01989 ObjectIdGetDatum(operator),
01990 PointerGetDatum(args),
01991 Int16GetDatum(jointype),
01992 PointerGetDatum(sjinfo)));
01993 else
01994 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
01995 clause->inputcollid,
01996 PointerGetDatum(root),
01997 ObjectIdGetDatum(operator),
01998 PointerGetDatum(args),
01999 Int32GetDatum(varRelid)));
02000 s1 = useOr ? 0.0 : 1.0;
02001
02002
02003
02004
02005
02006
02007 for (i = 0; i < 10; i++)
02008 {
02009 if (useOr)
02010 s1 = s1 + s2 - s1 * s2;
02011 else
02012 s1 = s1 * s2;
02013 }
02014 }
02015
02016
02017 CLAMP_PROBABILITY(s1);
02018
02019 return s1;
02020 }
02021
02022
02023
02024
02025
02026
02027 int
02028 estimate_array_length(Node *arrayexpr)
02029 {
02030
02031 arrayexpr = strip_array_coercion(arrayexpr);
02032
02033 if (arrayexpr && IsA(arrayexpr, Const))
02034 {
02035 Datum arraydatum = ((Const *) arrayexpr)->constvalue;
02036 bool arrayisnull = ((Const *) arrayexpr)->constisnull;
02037 ArrayType *arrayval;
02038
02039 if (arrayisnull)
02040 return 0;
02041 arrayval = DatumGetArrayTypeP(arraydatum);
02042 return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
02043 }
02044 else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
02045 !((ArrayExpr *) arrayexpr)->multidims)
02046 {
02047 return list_length(((ArrayExpr *) arrayexpr)->elements);
02048 }
02049 else
02050 {
02051
02052 return 10;
02053 }
02054 }
02055
02056
02057
02058
02059
02060
02061
02062
02063
02064
02065 Selectivity
02066 rowcomparesel(PlannerInfo *root,
02067 RowCompareExpr *clause,
02068 int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
02069 {
02070 Selectivity s1;
02071 Oid opno = linitial_oid(clause->opnos);
02072 Oid inputcollid = linitial_oid(clause->inputcollids);
02073 List *opargs;
02074 bool is_join_clause;
02075
02076
02077 opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
02078
02079
02080
02081
02082
02083
02084 if (varRelid != 0)
02085 {
02086
02087
02088
02089
02090 is_join_clause = false;
02091 }
02092 else if (sjinfo == NULL)
02093 {
02094
02095
02096
02097
02098 is_join_clause = false;
02099 }
02100 else
02101 {
02102
02103
02104
02105 is_join_clause = (NumRelids((Node *) opargs) > 1);
02106 }
02107
02108 if (is_join_clause)
02109 {
02110
02111 s1 = join_selectivity(root, opno,
02112 opargs,
02113 inputcollid,
02114 jointype,
02115 sjinfo);
02116 }
02117 else
02118 {
02119
02120 s1 = restriction_selectivity(root, opno,
02121 opargs,
02122 inputcollid,
02123 varRelid);
02124 }
02125
02126 return s1;
02127 }
02128
02129
02130
02131
02132 Datum
02133 eqjoinsel(PG_FUNCTION_ARGS)
02134 {
02135 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
02136 Oid operator = PG_GETARG_OID(1);
02137 List *args = (List *) PG_GETARG_POINTER(2);
02138
02139 #ifdef NOT_USED
02140 JoinType jointype = (JoinType) PG_GETARG_INT16(3);
02141 #endif
02142 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
02143 double selec;
02144 VariableStatData vardata1;
02145 VariableStatData vardata2;
02146 bool join_is_reversed;
02147 RelOptInfo *inner_rel;
02148
02149 get_join_variables(root, args, sjinfo,
02150 &vardata1, &vardata2, &join_is_reversed);
02151
02152 switch (sjinfo->jointype)
02153 {
02154 case JOIN_INNER:
02155 case JOIN_LEFT:
02156 case JOIN_FULL:
02157 selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
02158 break;
02159 case JOIN_SEMI:
02160 case JOIN_ANTI:
02161
02162
02163
02164
02165
02166
02167
02168 inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
02169
02170 if (!join_is_reversed)
02171 selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
02172 inner_rel);
02173 else
02174 selec = eqjoinsel_semi(get_commutator(operator),
02175 &vardata2, &vardata1,
02176 inner_rel);
02177 break;
02178 default:
02179
02180 elog(ERROR, "unrecognized join type: %d",
02181 (int) sjinfo->jointype);
02182 selec = 0;
02183 break;
02184 }
02185
02186 ReleaseVariableStats(vardata1);
02187 ReleaseVariableStats(vardata2);
02188
02189 CLAMP_PROBABILITY(selec);
02190
02191 PG_RETURN_FLOAT8((float8) selec);
02192 }
02193
02194
02195
02196
02197
02198
02199
02200 static double
02201 eqjoinsel_inner(Oid operator,
02202 VariableStatData *vardata1, VariableStatData *vardata2)
02203 {
02204 double selec;
02205 double nd1;
02206 double nd2;
02207 bool isdefault1;
02208 bool isdefault2;
02209 Form_pg_statistic stats1 = NULL;
02210 Form_pg_statistic stats2 = NULL;
02211 bool have_mcvs1 = false;
02212 Datum *values1 = NULL;
02213 int nvalues1 = 0;
02214 float4 *numbers1 = NULL;
02215 int nnumbers1 = 0;
02216 bool have_mcvs2 = false;
02217 Datum *values2 = NULL;
02218 int nvalues2 = 0;
02219 float4 *numbers2 = NULL;
02220 int nnumbers2 = 0;
02221
02222 nd1 = get_variable_numdistinct(vardata1, &isdefault1);
02223 nd2 = get_variable_numdistinct(vardata2, &isdefault2);
02224
02225 if (HeapTupleIsValid(vardata1->statsTuple))
02226 {
02227 stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
02228 have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
02229 vardata1->atttype,
02230 vardata1->atttypmod,
02231 STATISTIC_KIND_MCV,
02232 InvalidOid,
02233 NULL,
02234 &values1, &nvalues1,
02235 &numbers1, &nnumbers1);
02236 }
02237
02238 if (HeapTupleIsValid(vardata2->statsTuple))
02239 {
02240 stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
02241 have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
02242 vardata2->atttype,
02243 vardata2->atttypmod,
02244 STATISTIC_KIND_MCV,
02245 InvalidOid,
02246 NULL,
02247 &values2, &nvalues2,
02248 &numbers2, &nnumbers2);
02249 }
02250
02251 if (have_mcvs1 && have_mcvs2)
02252 {
02253
02254
02255
02256
02257
02258
02259
02260
02261
02262
02263
02264
02265 FmgrInfo eqproc;
02266 bool *hasmatch1;
02267 bool *hasmatch2;
02268 double nullfrac1 = stats1->stanullfrac;
02269 double nullfrac2 = stats2->stanullfrac;
02270 double matchprodfreq,
02271 matchfreq1,
02272 matchfreq2,
02273 unmatchfreq1,
02274 unmatchfreq2,
02275 otherfreq1,
02276 otherfreq2,
02277 totalsel1,
02278 totalsel2;
02279 int i,
02280 nmatches;
02281
02282 fmgr_info(get_opcode(operator), &eqproc);
02283 hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
02284 hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
02285
02286
02287
02288
02289
02290
02291
02292 matchprodfreq = 0.0;
02293 nmatches = 0;
02294 for (i = 0; i < nvalues1; i++)
02295 {
02296 int j;
02297
02298 for (j = 0; j < nvalues2; j++)
02299 {
02300 if (hasmatch2[j])
02301 continue;
02302 if (DatumGetBool(FunctionCall2Coll(&eqproc,
02303 DEFAULT_COLLATION_OID,
02304 values1[i],
02305 values2[j])))
02306 {
02307 hasmatch1[i] = hasmatch2[j] = true;
02308 matchprodfreq += numbers1[i] * numbers2[j];
02309 nmatches++;
02310 break;
02311 }
02312 }
02313 }
02314 CLAMP_PROBABILITY(matchprodfreq);
02315
02316 matchfreq1 = unmatchfreq1 = 0.0;
02317 for (i = 0; i < nvalues1; i++)
02318 {
02319 if (hasmatch1[i])
02320 matchfreq1 += numbers1[i];
02321 else
02322 unmatchfreq1 += numbers1[i];
02323 }
02324 CLAMP_PROBABILITY(matchfreq1);
02325 CLAMP_PROBABILITY(unmatchfreq1);
02326 matchfreq2 = unmatchfreq2 = 0.0;
02327 for (i = 0; i < nvalues2; i++)
02328 {
02329 if (hasmatch2[i])
02330 matchfreq2 += numbers2[i];
02331 else
02332 unmatchfreq2 += numbers2[i];
02333 }
02334 CLAMP_PROBABILITY(matchfreq2);
02335 CLAMP_PROBABILITY(unmatchfreq2);
02336 pfree(hasmatch1);
02337 pfree(hasmatch2);
02338
02339
02340
02341
02342
02343 otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
02344 otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
02345 CLAMP_PROBABILITY(otherfreq1);
02346 CLAMP_PROBABILITY(otherfreq2);
02347
02348
02349
02350
02351
02352
02353
02354
02355
02356 totalsel1 = matchprodfreq;
02357 if (nd2 > nvalues2)
02358 totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - nvalues2);
02359 if (nd2 > nmatches)
02360 totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
02361 (nd2 - nmatches);
02362
02363 totalsel2 = matchprodfreq;
02364 if (nd1 > nvalues1)
02365 totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - nvalues1);
02366 if (nd1 > nmatches)
02367 totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
02368 (nd1 - nmatches);
02369
02370
02371
02372
02373
02374
02375
02376 selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
02377 }
02378 else
02379 {
02380
02381
02382
02383
02384
02385
02386
02387
02388
02389
02390
02391
02392
02393
02394
02395
02396
02397
02398
02399
02400 double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
02401 double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
02402
02403 selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
02404 if (nd1 > nd2)
02405 selec /= nd1;
02406 else
02407 selec /= nd2;
02408 }
02409
02410 if (have_mcvs1)
02411 free_attstatsslot(vardata1->atttype, values1, nvalues1,
02412 numbers1, nnumbers1);
02413 if (have_mcvs2)
02414 free_attstatsslot(vardata2->atttype, values2, nvalues2,
02415 numbers2, nnumbers2);
02416
02417 return selec;
02418 }
02419
02420
02421
02422
02423
02424
02425
02426 static double
02427 eqjoinsel_semi(Oid operator,
02428 VariableStatData *vardata1, VariableStatData *vardata2,
02429 RelOptInfo *inner_rel)
02430 {
02431 double selec;
02432 double nd1;
02433 double nd2;
02434 bool isdefault1;
02435 bool isdefault2;
02436 Form_pg_statistic stats1 = NULL;
02437 bool have_mcvs1 = false;
02438 Datum *values1 = NULL;
02439 int nvalues1 = 0;
02440 float4 *numbers1 = NULL;
02441 int nnumbers1 = 0;
02442 bool have_mcvs2 = false;
02443 Datum *values2 = NULL;
02444 int nvalues2 = 0;
02445 float4 *numbers2 = NULL;
02446 int nnumbers2 = 0;
02447
02448 nd1 = get_variable_numdistinct(vardata1, &isdefault1);
02449 nd2 = get_variable_numdistinct(vardata2, &isdefault2);
02450
02451
02452
02453
02454
02455
02456
02457
02458
02459
02460
02461
02462
02463
02464
02465
02466 if (vardata2->rel)
02467 nd2 = Min(nd2, vardata2->rel->rows);
02468 nd2 = Min(nd2, inner_rel->rows);
02469
02470 if (HeapTupleIsValid(vardata1->statsTuple))
02471 {
02472 stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
02473 have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
02474 vardata1->atttype,
02475 vardata1->atttypmod,
02476 STATISTIC_KIND_MCV,
02477 InvalidOid,
02478 NULL,
02479 &values1, &nvalues1,
02480 &numbers1, &nnumbers1);
02481 }
02482
02483 if (HeapTupleIsValid(vardata2->statsTuple))
02484 {
02485 have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
02486 vardata2->atttype,
02487 vardata2->atttypmod,
02488 STATISTIC_KIND_MCV,
02489 InvalidOid,
02490 NULL,
02491 &values2, &nvalues2,
02492 &numbers2, &nnumbers2);
02493 }
02494
02495 if (have_mcvs1 && have_mcvs2 && OidIsValid(operator))
02496 {
02497
02498
02499
02500
02501
02502
02503
02504
02505 FmgrInfo eqproc;
02506 bool *hasmatch1;
02507 bool *hasmatch2;
02508 double nullfrac1 = stats1->stanullfrac;
02509 double matchfreq1,
02510 uncertainfrac,
02511 uncertain;
02512 int i,
02513 nmatches,
02514 clamped_nvalues2;
02515
02516
02517
02518
02519
02520
02521
02522
02523 clamped_nvalues2 = Min(nvalues2, nd2);
02524
02525 fmgr_info(get_opcode(operator), &eqproc);
02526 hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
02527 hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
02528
02529
02530
02531
02532
02533
02534
02535 nmatches = 0;
02536 for (i = 0; i < nvalues1; i++)
02537 {
02538 int j;
02539
02540 for (j = 0; j < clamped_nvalues2; j++)
02541 {
02542 if (hasmatch2[j])
02543 continue;
02544 if (DatumGetBool(FunctionCall2Coll(&eqproc,
02545 DEFAULT_COLLATION_OID,
02546 values1[i],
02547 values2[j])))
02548 {
02549 hasmatch1[i] = hasmatch2[j] = true;
02550 nmatches++;
02551 break;
02552 }
02553 }
02554 }
02555
02556 matchfreq1 = 0.0;
02557 for (i = 0; i < nvalues1; i++)
02558 {
02559 if (hasmatch1[i])
02560 matchfreq1 += numbers1[i];
02561 }
02562 CLAMP_PROBABILITY(matchfreq1);
02563 pfree(hasmatch1);
02564 pfree(hasmatch2);
02565
02566
02567
02568
02569
02570
02571
02572
02573
02574
02575
02576
02577
02578
02579
02580
02581 if (!isdefault1 && !isdefault2)
02582 {
02583 nd1 -= nmatches;
02584 nd2 -= nmatches;
02585 if (nd1 <= nd2 || nd2 < 0)
02586 uncertainfrac = 1.0;
02587 else
02588 uncertainfrac = nd2 / nd1;
02589 }
02590 else
02591 uncertainfrac = 0.5;
02592 uncertain = 1.0 - matchfreq1 - nullfrac1;
02593 CLAMP_PROBABILITY(uncertain);
02594 selec = matchfreq1 + uncertainfrac * uncertain;
02595 }
02596 else
02597 {
02598
02599
02600
02601
02602 double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
02603
02604 if (!isdefault1 && !isdefault2)
02605 {
02606 if (nd1 <= nd2 || nd2 < 0)
02607 selec = 1.0 - nullfrac1;
02608 else
02609 selec = (nd2 / nd1) * (1.0 - nullfrac1);
02610 }
02611 else
02612 selec = 0.5 * (1.0 - nullfrac1);
02613 }
02614
02615 if (have_mcvs1)
02616 free_attstatsslot(vardata1->atttype, values1, nvalues1,
02617 numbers1, nnumbers1);
02618 if (have_mcvs2)
02619 free_attstatsslot(vardata2->atttype, values2, nvalues2,
02620 numbers2, nnumbers2);
02621
02622 return selec;
02623 }
02624
02625
02626
02627
02628 Datum
02629 neqjoinsel(PG_FUNCTION_ARGS)
02630 {
02631 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
02632 Oid operator = PG_GETARG_OID(1);
02633 List *args = (List *) PG_GETARG_POINTER(2);
02634 JoinType jointype = (JoinType) PG_GETARG_INT16(3);
02635 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
02636 Oid eqop;
02637 float8 result;
02638
02639
02640
02641
02642
02643 eqop = get_negator(operator);
02644 if (eqop)
02645 {
02646 result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel,
02647 PointerGetDatum(root),
02648 ObjectIdGetDatum(eqop),
02649 PointerGetDatum(args),
02650 Int16GetDatum(jointype),
02651 PointerGetDatum(sjinfo)));
02652 }
02653 else
02654 {
02655
02656 result = DEFAULT_EQ_SEL;
02657 }
02658 result = 1.0 - result;
02659 PG_RETURN_FLOAT8(result);
02660 }
02661
02662
02663
02664
02665 Datum
02666 scalarltjoinsel(PG_FUNCTION_ARGS)
02667 {
02668 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
02669 }
02670
02671
02672
02673
02674 Datum
02675 scalargtjoinsel(PG_FUNCTION_ARGS)
02676 {
02677 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
02678 }
02679
02680
02681
02682
02683 static double
02684 patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
02685 {
02686
02687 return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
02688 }
02689
02690
02691
02692
02693 Datum
02694 regexeqjoinsel(PG_FUNCTION_ARGS)
02695 {
02696 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
02697 }
02698
02699
02700
02701
02702 Datum
02703 icregexeqjoinsel(PG_FUNCTION_ARGS)
02704 {
02705 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
02706 }
02707
02708
02709
02710
02711 Datum
02712 likejoinsel(PG_FUNCTION_ARGS)
02713 {
02714 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
02715 }
02716
02717
02718
02719
02720 Datum
02721 iclikejoinsel(PG_FUNCTION_ARGS)
02722 {
02723 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
02724 }
02725
02726
02727
02728
02729 Datum
02730 regexnejoinsel(PG_FUNCTION_ARGS)
02731 {
02732 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
02733 }
02734
02735
02736
02737
02738 Datum
02739 icregexnejoinsel(PG_FUNCTION_ARGS)
02740 {
02741 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
02742 }
02743
02744
02745
02746
02747 Datum
02748 nlikejoinsel(PG_FUNCTION_ARGS)
02749 {
02750 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
02751 }
02752
02753
02754
02755
02756 Datum
02757 icnlikejoinsel(PG_FUNCTION_ARGS)
02758 {
02759 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
02760 }
02761
02762
02763
02764
02765
02766
02767
02768
02769
02770
02771
02772
02773
02774
02775
02776
02777
02778
02779
02780
02781
02782
02783 void
02784 mergejoinscansel(PlannerInfo *root, Node *clause,
02785 Oid opfamily, int strategy, bool nulls_first,
02786 Selectivity *leftstart, Selectivity *leftend,
02787 Selectivity *rightstart, Selectivity *rightend)
02788 {
02789 Node *left,
02790 *right;
02791 VariableStatData leftvar,
02792 rightvar;
02793 int op_strategy;
02794 Oid op_lefttype;
02795 Oid op_righttype;
02796 Oid opno,
02797 lsortop,
02798 rsortop,
02799 lstatop,
02800 rstatop,
02801 ltop,
02802 leop,
02803 revltop,
02804 revleop;
02805 bool isgt;
02806 Datum leftmin,
02807 leftmax,
02808 rightmin,
02809 rightmax;
02810 double selec;
02811
02812
02813
02814 *leftstart = *rightstart = 0.0;
02815 *leftend = *rightend = 1.0;
02816
02817
02818 if (!is_opclause(clause))
02819 return;
02820 opno = ((OpExpr *) clause)->opno;
02821 left = get_leftop((Expr *) clause);
02822 right = get_rightop((Expr *) clause);
02823 if (!right)
02824 return;
02825
02826
02827 examine_variable(root, left, 0, &leftvar);
02828 examine_variable(root, right, 0, &rightvar);
02829
02830
02831 get_op_opfamily_properties(opno, opfamily, false,
02832 &op_strategy,
02833 &op_lefttype,
02834 &op_righttype);
02835 Assert(op_strategy == BTEqualStrategyNumber);
02836
02837
02838
02839
02840
02841
02842
02843
02844 switch (strategy)
02845 {
02846 case BTLessStrategyNumber:
02847 isgt = false;
02848 if (op_lefttype == op_righttype)
02849 {
02850
02851 ltop = get_opfamily_member(opfamily,
02852 op_lefttype, op_righttype,
02853 BTLessStrategyNumber);
02854 leop = get_opfamily_member(opfamily,
02855 op_lefttype, op_righttype,
02856 BTLessEqualStrategyNumber);
02857 lsortop = ltop;
02858 rsortop = ltop;
02859 lstatop = lsortop;
02860 rstatop = rsortop;
02861 revltop = ltop;
02862 revleop = leop;
02863 }
02864 else
02865 {
02866 ltop = get_opfamily_member(opfamily,
02867 op_lefttype, op_righttype,
02868 BTLessStrategyNumber);
02869 leop = get_opfamily_member(opfamily,
02870 op_lefttype, op_righttype,
02871 BTLessEqualStrategyNumber);
02872 lsortop = get_opfamily_member(opfamily,
02873 op_lefttype, op_lefttype,
02874 BTLessStrategyNumber);
02875 rsortop = get_opfamily_member(opfamily,
02876 op_righttype, op_righttype,
02877 BTLessStrategyNumber);
02878 lstatop = lsortop;
02879 rstatop = rsortop;
02880 revltop = get_opfamily_member(opfamily,
02881 op_righttype, op_lefttype,
02882 BTLessStrategyNumber);
02883 revleop = get_opfamily_member(opfamily,
02884 op_righttype, op_lefttype,
02885 BTLessEqualStrategyNumber);
02886 }
02887 break;
02888 case BTGreaterStrategyNumber:
02889
02890 isgt = true;
02891 if (op_lefttype == op_righttype)
02892 {
02893
02894 ltop = get_opfamily_member(opfamily,
02895 op_lefttype, op_righttype,
02896 BTGreaterStrategyNumber);
02897 leop = get_opfamily_member(opfamily,
02898 op_lefttype, op_righttype,
02899 BTGreaterEqualStrategyNumber);
02900 lsortop = ltop;
02901 rsortop = ltop;
02902 lstatop = get_opfamily_member(opfamily,
02903 op_lefttype, op_lefttype,
02904 BTLessStrategyNumber);
02905 rstatop = lstatop;
02906 revltop = ltop;
02907 revleop = leop;
02908 }
02909 else
02910 {
02911 ltop = get_opfamily_member(opfamily,
02912 op_lefttype, op_righttype,
02913 BTGreaterStrategyNumber);
02914 leop = get_opfamily_member(opfamily,
02915 op_lefttype, op_righttype,
02916 BTGreaterEqualStrategyNumber);
02917 lsortop = get_opfamily_member(opfamily,
02918 op_lefttype, op_lefttype,
02919 BTGreaterStrategyNumber);
02920 rsortop = get_opfamily_member(opfamily,
02921 op_righttype, op_righttype,
02922 BTGreaterStrategyNumber);
02923 lstatop = get_opfamily_member(opfamily,
02924 op_lefttype, op_lefttype,
02925 BTLessStrategyNumber);
02926 rstatop = get_opfamily_member(opfamily,
02927 op_righttype, op_righttype,
02928 BTLessStrategyNumber);
02929 revltop = get_opfamily_member(opfamily,
02930 op_righttype, op_lefttype,
02931 BTGreaterStrategyNumber);
02932 revleop = get_opfamily_member(opfamily,
02933 op_righttype, op_lefttype,
02934 BTGreaterEqualStrategyNumber);
02935 }
02936 break;
02937 default:
02938 goto fail;
02939 }
02940
02941 if (!OidIsValid(lsortop) ||
02942 !OidIsValid(rsortop) ||
02943 !OidIsValid(lstatop) ||
02944 !OidIsValid(rstatop) ||
02945 !OidIsValid(ltop) ||
02946 !OidIsValid(leop) ||
02947 !OidIsValid(revltop) ||
02948 !OidIsValid(revleop))
02949 goto fail;
02950
02951
02952 if (!isgt)
02953 {
02954 if (!get_variable_range(root, &leftvar, lstatop,
02955 &leftmin, &leftmax))
02956 goto fail;
02957 if (!get_variable_range(root, &rightvar, rstatop,
02958 &rightmin, &rightmax))
02959 goto fail;
02960 }
02961 else
02962 {
02963
02964 if (!get_variable_range(root, &leftvar, lstatop,
02965 &leftmax, &leftmin))
02966 goto fail;
02967 if (!get_variable_range(root, &rightvar, rstatop,
02968 &rightmax, &rightmin))
02969 goto fail;
02970 }
02971
02972
02973
02974
02975
02976
02977 selec = scalarineqsel(root, leop, isgt, &leftvar,
02978 rightmax, op_righttype);
02979 if (selec != DEFAULT_INEQ_SEL)
02980 *leftend = selec;
02981
02982
02983 selec = scalarineqsel(root, revleop, isgt, &rightvar,
02984 leftmax, op_lefttype);
02985 if (selec != DEFAULT_INEQ_SEL)
02986 *rightend = selec;
02987
02988
02989
02990
02991
02992
02993
02994 if (*leftend > *rightend)
02995 *leftend = 1.0;
02996 else if (*leftend < *rightend)
02997 *rightend = 1.0;
02998 else
02999 *leftend = *rightend = 1.0;
03000
03001
03002
03003
03004
03005
03006
03007 selec = scalarineqsel(root, ltop, isgt, &leftvar,
03008 rightmin, op_righttype);
03009 if (selec != DEFAULT_INEQ_SEL)
03010 *leftstart = selec;
03011
03012
03013 selec = scalarineqsel(root, revltop, isgt, &rightvar,
03014 leftmin, op_lefttype);
03015 if (selec != DEFAULT_INEQ_SEL)
03016 *rightstart = selec;
03017
03018
03019
03020
03021
03022
03023
03024 if (*leftstart < *rightstart)
03025 *leftstart = 0.0;
03026 else if (*leftstart > *rightstart)
03027 *rightstart = 0.0;
03028 else
03029 *leftstart = *rightstart = 0.0;
03030
03031
03032
03033
03034
03035
03036
03037 if (nulls_first)
03038 {
03039 Form_pg_statistic stats;
03040
03041 if (HeapTupleIsValid(leftvar.statsTuple))
03042 {
03043 stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
03044 *leftstart += stats->stanullfrac;
03045 CLAMP_PROBABILITY(*leftstart);
03046 *leftend += stats->stanullfrac;
03047 CLAMP_PROBABILITY(*leftend);
03048 }
03049 if (HeapTupleIsValid(rightvar.statsTuple))
03050 {
03051 stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
03052 *rightstart += stats->stanullfrac;
03053 CLAMP_PROBABILITY(*rightstart);
03054 *rightend += stats->stanullfrac;
03055 CLAMP_PROBABILITY(*rightend);
03056 }
03057 }
03058
03059
03060 if (*leftstart >= *leftend)
03061 {
03062 *leftstart = 0.0;
03063 *leftend = 1.0;
03064 }
03065 if (*rightstart >= *rightend)
03066 {
03067 *rightstart = 0.0;
03068 *rightend = 1.0;
03069 }
03070
03071 fail:
03072 ReleaseVariableStats(leftvar);
03073 ReleaseVariableStats(rightvar);
03074 }
03075
03076
03077
03078
03079
03080
03081
03082 typedef struct
03083 {
03084 Node *var;
03085 RelOptInfo *rel;
03086 double ndistinct;
03087 } GroupVarInfo;
03088
03089 static List *
03090 add_unique_group_var(PlannerInfo *root, List *varinfos,
03091 Node *var, VariableStatData *vardata)
03092 {
03093 GroupVarInfo *varinfo;
03094 double ndistinct;
03095 bool isdefault;
03096 ListCell *lc;
03097
03098 ndistinct = get_variable_numdistinct(vardata, &isdefault);
03099
03100
03101 lc = list_head(varinfos);
03102 while (lc)
03103 {
03104 varinfo = (GroupVarInfo *) lfirst(lc);
03105
03106
03107 lc = lnext(lc);
03108
03109
03110 if (equal(var, varinfo->var))
03111 return varinfos;
03112
03113
03114
03115
03116
03117 if (vardata->rel != varinfo->rel &&
03118 exprs_known_equal(root, var, varinfo->var))
03119 {
03120 if (varinfo->ndistinct <= ndistinct)
03121 {
03122
03123 return varinfos;
03124 }
03125 else
03126 {
03127
03128 varinfos = list_delete_ptr(varinfos, varinfo);
03129 }
03130 }
03131 }
03132
03133 varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
03134
03135 varinfo->var = var;
03136 varinfo->rel = vardata->rel;
03137 varinfo->ndistinct = ndistinct;
03138 varinfos = lappend(varinfos, varinfo);
03139 return varinfos;
03140 }
03141
03142
03143
03144
03145
03146
03147
03148
03149
03150
03151
03152
03153
03154
03155
03156
03157
03158
03159
03160
03161
03162
03163
03164
03165
03166
03167
03168
03169
03170
03171
03172
03173
03174
03175
03176
03177
03178
03179
03180
03181
03182
03183
03184
03185
03186
03187
03188
03189
03190
03191
03192
03193
03194
03195
03196
03197
03198
03199
03200
03201
03202
03203
03204
03205 double
03206 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
03207 {
03208 List *varinfos = NIL;
03209 double numdistinct;
03210 ListCell *l;
03211
03212
03213
03214
03215
03216
03217 if (groupExprs == NIL)
03218 return 1.0;
03219
03220
03221
03222
03223
03224
03225
03226
03227 numdistinct = 1.0;
03228
03229 foreach(l, groupExprs)
03230 {
03231 Node *groupexpr = (Node *) lfirst(l);
03232 VariableStatData vardata;
03233 List *varshere;
03234 ListCell *l2;
03235
03236
03237 if (exprType(groupexpr) == BOOLOID)
03238 {
03239 numdistinct *= 2.0;
03240 continue;
03241 }
03242
03243
03244
03245
03246
03247
03248 examine_variable(root, groupexpr, 0, &vardata);
03249 if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
03250 {
03251 varinfos = add_unique_group_var(root, varinfos,
03252 groupexpr, &vardata);
03253 ReleaseVariableStats(vardata);
03254 continue;
03255 }
03256 ReleaseVariableStats(vardata);
03257
03258
03259
03260
03261
03262
03263
03264 varshere = pull_var_clause(groupexpr,
03265 PVC_RECURSE_AGGREGATES,
03266 PVC_RECURSE_PLACEHOLDERS);
03267
03268
03269
03270
03271
03272
03273
03274 if (varshere == NIL)
03275 {
03276 if (contain_volatile_functions(groupexpr))
03277 return input_rows;
03278 continue;
03279 }
03280
03281
03282
03283
03284 foreach(l2, varshere)
03285 {
03286 Node *var = (Node *) lfirst(l2);
03287
03288 examine_variable(root, var, 0, &vardata);
03289 varinfos = add_unique_group_var(root, varinfos, var, &vardata);
03290 ReleaseVariableStats(vardata);
03291 }
03292 }
03293
03294
03295
03296
03297
03298 if (varinfos == NIL)
03299 {
03300
03301 if (numdistinct > input_rows)
03302 numdistinct = input_rows;
03303 return numdistinct;
03304 }
03305
03306
03307
03308
03309
03310
03311
03312
03313
03314 do
03315 {
03316 GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
03317 RelOptInfo *rel = varinfo1->rel;
03318 double reldistinct = varinfo1->ndistinct;
03319 double relmaxndistinct = reldistinct;
03320 int relvarcount = 1;
03321 List *newvarinfos = NIL;
03322
03323
03324
03325
03326
03327 for_each_cell(l, lnext(list_head(varinfos)))
03328 {
03329 GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
03330
03331 if (varinfo2->rel == varinfo1->rel)
03332 {
03333 reldistinct *= varinfo2->ndistinct;
03334 if (relmaxndistinct < varinfo2->ndistinct)
03335 relmaxndistinct = varinfo2->ndistinct;
03336 relvarcount++;
03337 }
03338 else
03339 {
03340
03341 newvarinfos = lcons(varinfo2, newvarinfos);
03342 }
03343 }
03344
03345
03346
03347
03348 Assert(rel->reloptkind == RELOPT_BASEREL);
03349 if (rel->tuples > 0)
03350 {
03351
03352
03353
03354
03355
03356
03357
03358 double clamp = rel->tuples;
03359
03360 if (relvarcount > 1)
03361 {
03362 clamp *= 0.1;
03363 if (clamp < relmaxndistinct)
03364 {
03365 clamp = relmaxndistinct;
03366
03367 if (clamp > rel->tuples)
03368 clamp = rel->tuples;
03369 }
03370 }
03371 if (reldistinct > clamp)
03372 reldistinct = clamp;
03373
03374
03375
03376
03377 reldistinct *= rel->rows / rel->tuples;
03378
03379
03380
03381
03382 numdistinct *= reldistinct;
03383 }
03384
03385 varinfos = newvarinfos;
03386 } while (varinfos != NIL);
03387
03388 numdistinct = ceil(numdistinct);
03389
03390
03391 if (numdistinct > input_rows)
03392 numdistinct = input_rows;
03393 if (numdistinct < 1.0)
03394 numdistinct = 1.0;
03395
03396 return numdistinct;
03397 }
03398
03399
03400
03401
03402
03403
03404
03405
03406
03407
03408
03409
03410
03411
03412
03413
03414
03415
03416
03417
03418
03419
03420
03421
03422
03423
03424
03425
03426
03427
03428
03429 Selectivity
03430 estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
03431 {
03432 VariableStatData vardata;
03433 double estfract,
03434 ndistinct,
03435 stanullfrac,
03436 mcvfreq,
03437 avgfreq;
03438 bool isdefault;
03439 float4 *numbers;
03440 int nnumbers;
03441
03442 examine_variable(root, hashkey, 0, &vardata);
03443
03444
03445 ndistinct = get_variable_numdistinct(&vardata, &isdefault);
03446
03447
03448 if (isdefault)
03449 {
03450 ReleaseVariableStats(vardata);
03451 return (Selectivity) 0.1;
03452 }
03453
03454
03455 if (HeapTupleIsValid(vardata.statsTuple))
03456 {
03457 Form_pg_statistic stats;
03458
03459 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
03460 stanullfrac = stats->stanullfrac;
03461 }
03462 else
03463 stanullfrac = 0.0;
03464
03465
03466 avgfreq = (1.0 - stanullfrac) / ndistinct;
03467
03468
03469
03470
03471
03472
03473
03474
03475
03476 if (vardata.rel)
03477 ndistinct *= vardata.rel->rows / vardata.rel->tuples;
03478
03479
03480
03481
03482
03483
03484 if (ndistinct > nbuckets)
03485 estfract = 1.0 / nbuckets;
03486 else
03487 estfract = 1.0 / ndistinct;
03488
03489
03490
03491
03492 mcvfreq = 0.0;
03493
03494 if (HeapTupleIsValid(vardata.statsTuple))
03495 {
03496 if (get_attstatsslot(vardata.statsTuple,
03497 vardata.atttype, vardata.atttypmod,
03498 STATISTIC_KIND_MCV, InvalidOid,
03499 NULL,
03500 NULL, NULL,
03501 &numbers, &nnumbers))
03502 {
03503
03504
03505
03506 if (nnumbers > 0)
03507 mcvfreq = numbers[0];
03508 free_attstatsslot(vardata.atttype, NULL, 0,
03509 numbers, nnumbers);
03510 }
03511 }
03512
03513
03514
03515
03516 if (avgfreq > 0.0 && mcvfreq > avgfreq)
03517 estfract *= mcvfreq / avgfreq;
03518
03519
03520
03521
03522
03523
03524 if (estfract < 1.0e-6)
03525 estfract = 1.0e-6;
03526 else if (estfract > 1.0)
03527 estfract = 1.0;
03528
03529 ReleaseVariableStats(vardata);
03530
03531 return (Selectivity) estfract;
03532 }
03533
03534
03535
03536
03537
03538
03539
03540
03541
03542
03543
03544
03545
03546
03547
03548
03549
03550
03551
03552
03553
03554
03555
03556
03557
03558
03559
03560
03561
03562
03563
03564
03565
03566
03567
03568
03569
03570
03571 static bool
03572 convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
03573 Datum lobound, Datum hibound, Oid boundstypid,
03574 double *scaledlobound, double *scaledhibound)
03575 {
03576
03577
03578
03579
03580
03581
03582
03583
03584
03585
03586
03587
03588
03589
03590
03591 switch (valuetypid)
03592 {
03593
03594
03595
03596 case BOOLOID:
03597 case INT2OID:
03598 case INT4OID:
03599 case INT8OID:
03600 case FLOAT4OID:
03601 case FLOAT8OID:
03602 case NUMERICOID:
03603 case OIDOID:
03604 case REGPROCOID:
03605 case REGPROCEDUREOID:
03606 case REGOPEROID:
03607 case REGOPERATOROID:
03608 case REGCLASSOID:
03609 case REGTYPEOID:
03610 case REGCONFIGOID:
03611 case REGDICTIONARYOID:
03612 *scaledvalue = convert_numeric_to_scalar(value, valuetypid);
03613 *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
03614 *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
03615 return true;
03616
03617
03618
03619
03620 case CHAROID:
03621 case BPCHAROID:
03622 case VARCHAROID:
03623 case TEXTOID:
03624 case NAMEOID:
03625 {
03626 char *valstr = convert_string_datum(value, valuetypid);
03627 char *lostr = convert_string_datum(lobound, boundstypid);
03628 char *histr = convert_string_datum(hibound, boundstypid);
03629
03630 convert_string_to_scalar(valstr, scaledvalue,
03631 lostr, scaledlobound,
03632 histr, scaledhibound);
03633 pfree(valstr);
03634 pfree(lostr);
03635 pfree(histr);
03636 return true;
03637 }
03638
03639
03640
03641
03642 case BYTEAOID:
03643 {
03644 convert_bytea_to_scalar(value, scaledvalue,
03645 lobound, scaledlobound,
03646 hibound, scaledhibound);
03647 return true;
03648 }
03649
03650
03651
03652
03653 case TIMESTAMPOID:
03654 case TIMESTAMPTZOID:
03655 case ABSTIMEOID:
03656 case DATEOID:
03657 case INTERVALOID:
03658 case RELTIMEOID:
03659 case TINTERVALOID:
03660 case TIMEOID:
03661 case TIMETZOID:
03662 *scaledvalue = convert_timevalue_to_scalar(value, valuetypid);
03663 *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid);
03664 *scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid);
03665 return true;
03666
03667
03668
03669
03670 case INETOID:
03671 case CIDROID:
03672 case MACADDROID:
03673 *scaledvalue = convert_network_to_scalar(value, valuetypid);
03674 *scaledlobound = convert_network_to_scalar(lobound, boundstypid);
03675 *scaledhibound = convert_network_to_scalar(hibound, boundstypid);
03676 return true;
03677 }
03678
03679 *scaledvalue = *scaledlobound = *scaledhibound = 0;
03680 return false;
03681 }
03682
03683
03684
03685
03686 static double
03687 convert_numeric_to_scalar(Datum value, Oid typid)
03688 {
03689 switch (typid)
03690 {
03691 case BOOLOID:
03692 return (double) DatumGetBool(value);
03693 case INT2OID:
03694 return (double) DatumGetInt16(value);
03695 case INT4OID:
03696 return (double) DatumGetInt32(value);
03697 case INT8OID:
03698 return (double) DatumGetInt64(value);
03699 case FLOAT4OID:
03700 return (double) DatumGetFloat4(value);
03701 case FLOAT8OID:
03702 return (double) DatumGetFloat8(value);
03703 case NUMERICOID:
03704
03705 return (double)
03706 DatumGetFloat8(DirectFunctionCall1(numeric_float8_no_overflow,
03707 value));
03708 case OIDOID:
03709 case REGPROCOID:
03710 case REGPROCEDUREOID:
03711 case REGOPEROID:
03712 case REGOPERATOROID:
03713 case REGCLASSOID:
03714 case REGTYPEOID:
03715 case REGCONFIGOID:
03716 case REGDICTIONARYOID:
03717
03718 return (double) DatumGetObjectId(value);
03719 }
03720
03721
03722
03723
03724
03725 elog(ERROR, "unsupported type: %u", typid);
03726 return 0;
03727 }
03728
03729
03730
03731
03732
03733
03734
03735
03736
03737
03738
03739
03740
03741
03742
03743
03744
03745
03746
03747
03748
03749 static void
03750 convert_string_to_scalar(char *value,
03751 double *scaledvalue,
03752 char *lobound,
03753 double *scaledlobound,
03754 char *hibound,
03755 double *scaledhibound)
03756 {
03757 int rangelo,
03758 rangehi;
03759 char *sptr;
03760
03761 rangelo = rangehi = (unsigned char) hibound[0];
03762 for (sptr = lobound; *sptr; sptr++)
03763 {
03764 if (rangelo > (unsigned char) *sptr)
03765 rangelo = (unsigned char) *sptr;
03766 if (rangehi < (unsigned char) *sptr)
03767 rangehi = (unsigned char) *sptr;
03768 }
03769 for (sptr = hibound; *sptr; sptr++)
03770 {
03771 if (rangelo > (unsigned char) *sptr)
03772 rangelo = (unsigned char) *sptr;
03773 if (rangehi < (unsigned char) *sptr)
03774 rangehi = (unsigned char) *sptr;
03775 }
03776
03777 if (rangelo <= 'Z' && rangehi >= 'A')
03778 {
03779 if (rangelo > 'A')
03780 rangelo = 'A';
03781 if (rangehi < 'Z')
03782 rangehi = 'Z';
03783 }
03784
03785 if (rangelo <= 'z' && rangehi >= 'a')
03786 {
03787 if (rangelo > 'a')
03788 rangelo = 'a';
03789 if (rangehi < 'z')
03790 rangehi = 'z';
03791 }
03792
03793 if (rangelo <= '9' && rangehi >= '0')
03794 {
03795 if (rangelo > '0')
03796 rangelo = '0';
03797 if (rangehi < '9')
03798 rangehi = '9';
03799 }
03800
03801
03802
03803
03804
03805 if (rangehi - rangelo < 9)
03806 {
03807 rangelo = ' ';
03808 rangehi = 127;
03809 }
03810
03811
03812
03813
03814 while (*lobound)
03815 {
03816 if (*lobound != *hibound || *lobound != *value)
03817 break;
03818 lobound++, hibound++, value++;
03819 }
03820
03821
03822
03823
03824 *scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
03825 *scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
03826 *scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
03827 }
03828
03829 static double
03830 convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
03831 {
03832 int slen = strlen(value);
03833 double num,
03834 denom,
03835 base;
03836
03837 if (slen <= 0)
03838 return 0.0;
03839
03840
03841
03842
03843 if (slen > 20)
03844 slen = 20;
03845
03846
03847 base = rangehi - rangelo + 1;
03848 num = 0.0;
03849 denom = base;
03850 while (slen-- > 0)
03851 {
03852 int ch = (unsigned char) *value++;
03853
03854 if (ch < rangelo)
03855 ch = rangelo - 1;
03856 else if (ch > rangehi)
03857 ch = rangehi + 1;
03858 num += ((double) (ch - rangelo)) / denom;
03859 denom *= base;
03860 }
03861
03862 return num;
03863 }
03864
03865
03866
03867
03868
03869
03870
03871 static char *
03872 convert_string_datum(Datum value, Oid typid)
03873 {
03874 char *val;
03875
03876 switch (typid)
03877 {
03878 case CHAROID:
03879 val = (char *) palloc(2);
03880 val[0] = DatumGetChar(value);
03881 val[1] = '\0';
03882 break;
03883 case BPCHAROID:
03884 case VARCHAROID:
03885 case TEXTOID:
03886 val = TextDatumGetCString(value);
03887 break;
03888 case NAMEOID:
03889 {
03890 NameData *nm = (NameData *) DatumGetPointer(value);
03891
03892 val = pstrdup(NameStr(*nm));
03893 break;
03894 }
03895 default:
03896
03897
03898
03899
03900
03901 elog(ERROR, "unsupported type: %u", typid);
03902 return NULL;
03903 }
03904
03905 if (!lc_collate_is_c(DEFAULT_COLLATION_OID))
03906 {
03907 char *xfrmstr;
03908 size_t xfrmlen;
03909 size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
03910
03911
03912
03913
03914
03915
03916
03917
03918
03919
03920
03921
03922
03923
03924
03925
03926
03927
03928 #if _MSC_VER == 1400
03929
03930
03931
03932
03933
03934 {
03935 char x[1];
03936
03937 xfrmlen = strxfrm(x, val, 0);
03938 }
03939 #else
03940 xfrmlen = strxfrm(NULL, val, 0);
03941 #endif
03942 #ifdef WIN32
03943
03944
03945
03946
03947
03948
03949 if (xfrmlen == INT_MAX)
03950 return val;
03951 #endif
03952 xfrmstr = (char *) palloc(xfrmlen + 1);
03953 xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
03954 Assert(xfrmlen2 <= xfrmlen);
03955 pfree(val);
03956 val = xfrmstr;
03957 }
03958
03959 return val;
03960 }
03961
03962
03963
03964
03965
03966
03967
03968
03969
03970
03971
03972
03973 static void
03974 convert_bytea_to_scalar(Datum value,
03975 double *scaledvalue,
03976 Datum lobound,
03977 double *scaledlobound,
03978 Datum hibound,
03979 double *scaledhibound)
03980 {
03981 int rangelo,
03982 rangehi,
03983 valuelen = VARSIZE(DatumGetPointer(value)) - VARHDRSZ,
03984 loboundlen = VARSIZE(DatumGetPointer(lobound)) - VARHDRSZ,
03985 hiboundlen = VARSIZE(DatumGetPointer(hibound)) - VARHDRSZ,
03986 i,
03987 minlen;
03988 unsigned char *valstr = (unsigned char *) VARDATA(DatumGetPointer(value)),
03989 *lostr = (unsigned char *) VARDATA(DatumGetPointer(lobound)),
03990 *histr = (unsigned char *) VARDATA(DatumGetPointer(hibound));
03991
03992
03993
03994
03995 rangelo = 0;
03996 rangehi = 255;
03997
03998
03999
04000
04001 minlen = Min(Min(valuelen, loboundlen), hiboundlen);
04002 for (i = 0; i < minlen; i++)
04003 {
04004 if (*lostr != *histr || *lostr != *valstr)
04005 break;
04006 lostr++, histr++, valstr++;
04007 loboundlen--, hiboundlen--, valuelen--;
04008 }
04009
04010
04011
04012
04013 *scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
04014 *scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
04015 *scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
04016 }
04017
04018 static double
04019 convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
04020 int rangelo, int rangehi)
04021 {
04022 double num,
04023 denom,
04024 base;
04025
04026 if (valuelen <= 0)
04027 return 0.0;
04028
04029
04030
04031
04032
04033 if (valuelen > 10)
04034 valuelen = 10;
04035
04036
04037 base = rangehi - rangelo + 1;
04038 num = 0.0;
04039 denom = base;
04040 while (valuelen-- > 0)
04041 {
04042 int ch = *value++;
04043
04044 if (ch < rangelo)
04045 ch = rangelo - 1;
04046 else if (ch > rangehi)
04047 ch = rangehi + 1;
04048 num += ((double) (ch - rangelo)) / denom;
04049 denom *= base;
04050 }
04051
04052 return num;
04053 }
04054
04055
04056
04057
04058 static double
04059 convert_timevalue_to_scalar(Datum value, Oid typid)
04060 {
04061 switch (typid)
04062 {
04063 case TIMESTAMPOID:
04064 return DatumGetTimestamp(value);
04065 case TIMESTAMPTZOID:
04066 return DatumGetTimestampTz(value);
04067 case ABSTIMEOID:
04068 return DatumGetTimestamp(DirectFunctionCall1(abstime_timestamp,
04069 value));
04070 case DATEOID:
04071 return date2timestamp_no_overflow(DatumGetDateADT(value));
04072 case INTERVALOID:
04073 {
04074 Interval *interval = DatumGetIntervalP(value);
04075
04076
04077
04078
04079
04080
04081 #ifdef HAVE_INT64_TIMESTAMP
04082 return interval->time + interval->day * (double) USECS_PER_DAY +
04083 interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
04084 #else
04085 return interval->time + interval->day * SECS_PER_DAY +
04086 interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * (double) SECS_PER_DAY);
04087 #endif
04088 }
04089 case RELTIMEOID:
04090 #ifdef HAVE_INT64_TIMESTAMP
04091 return (DatumGetRelativeTime(value) * 1000000.0);
04092 #else
04093 return DatumGetRelativeTime(value);
04094 #endif
04095 case TINTERVALOID:
04096 {
04097 TimeInterval tinterval = DatumGetTimeInterval(value);
04098
04099 #ifdef HAVE_INT64_TIMESTAMP
04100 if (tinterval->status != 0)
04101 return ((tinterval->data[1] - tinterval->data[0]) * 1000000.0);
04102 #else
04103 if (tinterval->status != 0)
04104 return tinterval->data[1] - tinterval->data[0];
04105 #endif
04106 return 0;
04107 }
04108 case TIMEOID:
04109 return DatumGetTimeADT(value);
04110 case TIMETZOID:
04111 {
04112 TimeTzADT *timetz = DatumGetTimeTzADTP(value);
04113
04114
04115 #ifdef HAVE_INT64_TIMESTAMP
04116 return (double) (timetz->time + (timetz->zone * 1000000.0));
04117 #else
04118 return (double) (timetz->time + timetz->zone);
04119 #endif
04120 }
04121 }
04122
04123
04124
04125
04126
04127 elog(ERROR, "unsupported type: %u", typid);
04128 return 0;
04129 }
04130
04131
04132
04133
04134
04135
04136
04137
04138
04139
04140
04141
04142
04143
04144
04145
04146
04147
04148
04149
04150
04151
04152
04153
04154
04155 bool
04156 get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
04157 VariableStatData *vardata, Node **other,
04158 bool *varonleft)
04159 {
04160 Node *left,
04161 *right;
04162 VariableStatData rdata;
04163
04164
04165 if (list_length(args) != 2)
04166 return false;
04167
04168 left = (Node *) linitial(args);
04169 right = (Node *) lsecond(args);
04170
04171
04172
04173
04174
04175 examine_variable(root, left, varRelid, vardata);
04176 examine_variable(root, right, varRelid, &rdata);
04177
04178
04179
04180
04181 if (vardata->rel && rdata.rel == NULL)
04182 {
04183 *varonleft = true;
04184 *other = estimate_expression_value(root, rdata.var);
04185
04186 return true;
04187 }
04188
04189 if (vardata->rel == NULL && rdata.rel)
04190 {
04191 *varonleft = false;
04192 *other = estimate_expression_value(root, vardata->var);
04193
04194 *vardata = rdata;
04195 return true;
04196 }
04197
04198
04199 ReleaseVariableStats(*vardata);
04200 ReleaseVariableStats(rdata);
04201
04202 return false;
04203 }
04204
04205
04206
04207
04208
04209
04210
04211
04212
04213
04214
04215 void
04216 get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
04217 VariableStatData *vardata1, VariableStatData *vardata2,
04218 bool *join_is_reversed)
04219 {
04220 Node *left,
04221 *right;
04222
04223 if (list_length(args) != 2)
04224 elog(ERROR, "join operator should take two arguments");
04225
04226 left = (Node *) linitial(args);
04227 right = (Node *) lsecond(args);
04228
04229 examine_variable(root, left, 0, vardata1);
04230 examine_variable(root, right, 0, vardata2);
04231
04232 if (vardata1->rel &&
04233 bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
04234 *join_is_reversed = true;
04235 else if (vardata2->rel &&
04236 bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
04237 *join_is_reversed = true;
04238 else
04239 *join_is_reversed = false;
04240 }
04241
04242
04243
04244
04245
04246
04247
04248
04249
04250
04251
04252
04253
04254
04255
04256
04257
04258
04259
04260
04261
04262
04263
04264
04265
04266
04267
04268
04269
04270
04271
04272
04273
04274 void
04275 examine_variable(PlannerInfo *root, Node *node, int varRelid,
04276 VariableStatData *vardata)
04277 {
04278 Node *basenode;
04279 Relids varnos;
04280 RelOptInfo *onerel;
04281
04282
04283 MemSet(vardata, 0, sizeof(VariableStatData));
04284
04285
04286 vardata->vartype = exprType(node);
04287
04288
04289
04290 if (IsA(node, RelabelType))
04291 basenode = (Node *) ((RelabelType *) node)->arg;
04292 else
04293 basenode = node;
04294
04295
04296
04297 if (IsA(basenode, Var) &&
04298 (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
04299 {
04300 Var *var = (Var *) basenode;
04301
04302
04303 vardata->var = basenode;
04304 vardata->rel = find_base_rel(root, var->varno);
04305 vardata->atttype = var->vartype;
04306 vardata->atttypmod = var->vartypmod;
04307 vardata->isunique = has_unique_index(vardata->rel, var->varattno);
04308
04309
04310 examine_simple_variable(root, var, vardata);
04311
04312 return;
04313 }
04314
04315
04316
04317
04318
04319
04320 varnos = pull_varnos(basenode);
04321
04322 onerel = NULL;
04323
04324 switch (bms_membership(varnos))
04325 {
04326 case BMS_EMPTY_SET:
04327
04328 break;
04329 case BMS_SINGLETON:
04330 if (varRelid == 0 || bms_is_member(varRelid, varnos))
04331 {
04332 onerel = find_base_rel(root,
04333 (varRelid ? varRelid : bms_singleton_member(varnos)));
04334 vardata->rel = onerel;
04335 node = basenode;
04336 }
04337
04338 break;
04339 case BMS_MULTIPLE:
04340 if (varRelid == 0)
04341 {
04342
04343 vardata->rel = find_join_rel(root, varnos);
04344 node = basenode;
04345 }
04346 else if (bms_is_member(varRelid, varnos))
04347 {
04348
04349 vardata->rel = find_base_rel(root, varRelid);
04350 node = basenode;
04351
04352 }
04353
04354 break;
04355 }
04356
04357 bms_free(varnos);
04358
04359 vardata->var = node;
04360 vardata->atttype = exprType(node);
04361 vardata->atttypmod = exprTypmod(node);
04362
04363 if (onerel)
04364 {
04365
04366
04367
04368
04369
04370
04371
04372
04373
04374 ListCell *ilist;
04375
04376 foreach(ilist, onerel->indexlist)
04377 {
04378 IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
04379 ListCell *indexpr_item;
04380 int pos;
04381
04382 indexpr_item = list_head(index->indexprs);
04383 if (indexpr_item == NULL)
04384 continue;
04385
04386 for (pos = 0; pos < index->ncolumns; pos++)
04387 {
04388 if (index->indexkeys[pos] == 0)
04389 {
04390 Node *indexkey;
04391
04392 if (indexpr_item == NULL)
04393 elog(ERROR, "too few entries in indexprs list");
04394 indexkey = (Node *) lfirst(indexpr_item);
04395 if (indexkey && IsA(indexkey, RelabelType))
04396 indexkey = (Node *) ((RelabelType *) indexkey)->arg;
04397 if (equal(node, indexkey))
04398 {
04399
04400
04401
04402
04403 if (index->unique &&
04404 index->ncolumns == 1 &&
04405 (index->indpred == NIL || index->predOK))
04406 vardata->isunique = true;
04407
04408
04409
04410
04411
04412
04413
04414
04415
04416
04417
04418 if (get_index_stats_hook &&
04419 (*get_index_stats_hook) (root, index->indexoid,
04420 pos + 1, vardata))
04421 {
04422
04423
04424
04425
04426
04427 if (HeapTupleIsValid(vardata->statsTuple) &&
04428 !vardata->freefunc)
04429 elog(ERROR, "no function provided to release variable stats with");
04430 }
04431 else if (index->indpred == NIL)
04432 {
04433 vardata->statsTuple =
04434 SearchSysCache3(STATRELATTINH,
04435 ObjectIdGetDatum(index->indexoid),
04436 Int16GetDatum(pos + 1),
04437 BoolGetDatum(false));
04438 vardata->freefunc = ReleaseSysCache;
04439 }
04440 if (vardata->statsTuple)
04441 break;
04442 }
04443 indexpr_item = lnext(indexpr_item);
04444 }
04445 }
04446 if (vardata->statsTuple)
04447 break;
04448 }
04449 }
04450 }
04451
04452
04453
04454
04455
04456
04457
04458
04459
04460
04461 static void
04462 examine_simple_variable(PlannerInfo *root, Var *var,
04463 VariableStatData *vardata)
04464 {
04465 RangeTblEntry *rte = root->simple_rte_array[var->varno];
04466
04467 Assert(IsA(rte, RangeTblEntry));
04468
04469 if (get_relation_stats_hook &&
04470 (*get_relation_stats_hook) (root, rte, var->varattno, vardata))
04471 {
04472
04473
04474
04475
04476 if (HeapTupleIsValid(vardata->statsTuple) &&
04477 !vardata->freefunc)
04478 elog(ERROR, "no function provided to release variable stats with");
04479 }
04480 else if (rte->rtekind == RTE_RELATION)
04481 {
04482
04483
04484
04485
04486 vardata->statsTuple = SearchSysCache3(STATRELATTINH,
04487 ObjectIdGetDatum(rte->relid),
04488 Int16GetDatum(var->varattno),
04489 BoolGetDatum(rte->inh));
04490 vardata->freefunc = ReleaseSysCache;
04491 }
04492 else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
04493 {
04494
04495
04496
04497 Query *subquery = rte->subquery;
04498 RelOptInfo *rel;
04499 TargetEntry *ste;
04500
04501
04502
04503
04504
04505
04506
04507
04508
04509 if (subquery->setOperations ||
04510 subquery->groupClause)
04511 return;
04512
04513
04514
04515
04516
04517
04518
04519
04520 rel = find_base_rel(root, var->varno);
04521
04522
04523 if (rel->subroot == NULL)
04524 return;
04525 Assert(IsA(rel->subroot, PlannerInfo));
04526
04527
04528
04529
04530
04531
04532
04533
04534
04535 subquery = rel->subroot->parse;
04536 Assert(IsA(subquery, Query));
04537
04538
04539 ste = get_tle_by_resno(subquery->targetList, var->varattno);
04540 if (ste == NULL || ste->resjunk)
04541 elog(ERROR, "subquery %s does not have attribute %d",
04542 rte->eref->aliasname, var->varattno);
04543 var = (Var *) ste->expr;
04544
04545
04546
04547
04548
04549
04550
04551 if (subquery->distinctClause)
04552 {
04553 if (list_length(subquery->distinctClause) == 1 &&
04554 targetIsInSortList(ste, InvalidOid, subquery->distinctClause))
04555 vardata->isunique = true;
04556
04557 return;
04558 }
04559
04560
04561
04562
04563
04564
04565
04566
04567
04568
04569
04570
04571
04572
04573 if (rte->security_barrier)
04574 return;
04575
04576
04577 if (var && IsA(var, Var) &&
04578 var->varlevelsup == 0)
04579 {
04580
04581
04582
04583
04584
04585
04586
04587 examine_simple_variable(rel->subroot, var, vardata);
04588 }
04589 }
04590 else
04591 {
04592
04593
04594
04595
04596
04597
04598 }
04599 }
04600
04601
04602
04603
04604
04605
04606
04607
04608
04609
04610
04611
04612 double
04613 get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
04614 {
04615 double stadistinct;
04616 double ntuples;
04617
04618 *isdefault = false;
04619
04620
04621
04622
04623
04624
04625 if (HeapTupleIsValid(vardata->statsTuple))
04626 {
04627
04628 Form_pg_statistic stats;
04629
04630 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
04631 stadistinct = stats->stadistinct;
04632 }
04633 else if (vardata->vartype == BOOLOID)
04634 {
04635
04636
04637
04638
04639
04640
04641 stadistinct = 2.0;
04642 }
04643 else
04644 {
04645
04646
04647
04648
04649 if (vardata->var && IsA(vardata->var, Var))
04650 {
04651 switch (((Var *) vardata->var)->varattno)
04652 {
04653 case ObjectIdAttributeNumber:
04654 case SelfItemPointerAttributeNumber:
04655 stadistinct = -1.0;
04656 break;
04657 case TableOidAttributeNumber:
04658 stadistinct = 1.0;
04659 break;
04660 default:
04661 stadistinct = 0.0;
04662 break;
04663 }
04664 }
04665 else
04666 stadistinct = 0.0;
04667
04668
04669
04670
04671 }
04672
04673
04674
04675
04676
04677
04678
04679 if (vardata->isunique)
04680 stadistinct = -1.0;
04681
04682
04683
04684
04685 if (stadistinct > 0.0)
04686 return stadistinct;
04687
04688
04689
04690
04691 if (vardata->rel == NULL)
04692 {
04693 *isdefault = true;
04694 return DEFAULT_NUM_DISTINCT;
04695 }
04696 ntuples = vardata->rel->tuples;
04697 if (ntuples <= 0.0)
04698 {
04699 *isdefault = true;
04700 return DEFAULT_NUM_DISTINCT;
04701 }
04702
04703
04704
04705
04706 if (stadistinct < 0.0)
04707 return floor((-stadistinct * ntuples) + 0.5);
04708
04709
04710
04711
04712
04713
04714 if (ntuples < DEFAULT_NUM_DISTINCT)
04715 return ntuples;
04716
04717 *isdefault = true;
04718 return DEFAULT_NUM_DISTINCT;
04719 }
04720
04721
04722
04723
04724
04725
04726
04727
04728
04729
04730 static bool
04731 get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
04732 Datum *min, Datum *max)
04733 {
04734 Datum tmin = 0;
04735 Datum tmax = 0;
04736 bool have_data = false;
04737 int16 typLen;
04738 bool typByVal;
04739 Datum *values;
04740 int nvalues;
04741 int i;
04742
04743
04744
04745
04746
04747
04748
04749
04750 #ifdef NOT_USED
04751 if (get_actual_variable_range(root, vardata, sortop, min, max))
04752 return true;
04753 #endif
04754
04755 if (!HeapTupleIsValid(vardata->statsTuple))
04756 {
04757
04758 return false;
04759 }
04760
04761 get_typlenbyval(vardata->atttype, &typLen, &typByVal);
04762
04763
04764
04765
04766
04767
04768
04769
04770 if (get_attstatsslot(vardata->statsTuple,
04771 vardata->atttype, vardata->atttypmod,
04772 STATISTIC_KIND_HISTOGRAM, sortop,
04773 NULL,
04774 &values, &nvalues,
04775 NULL, NULL))
04776 {
04777 if (nvalues > 0)
04778 {
04779 tmin = datumCopy(values[0], typByVal, typLen);
04780 tmax = datumCopy(values[nvalues - 1], typByVal, typLen);
04781 have_data = true;
04782 }
04783 free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
04784 }
04785 else if (get_attstatsslot(vardata->statsTuple,
04786 vardata->atttype, vardata->atttypmod,
04787 STATISTIC_KIND_HISTOGRAM, InvalidOid,
04788 NULL,
04789 &values, &nvalues,
04790 NULL, NULL))
04791 {
04792 free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
04793 return false;
04794 }
04795
04796
04797
04798
04799
04800
04801
04802 if (get_attstatsslot(vardata->statsTuple,
04803 vardata->atttype, vardata->atttypmod,
04804 STATISTIC_KIND_MCV, InvalidOid,
04805 NULL,
04806 &values, &nvalues,
04807 NULL, NULL))
04808 {
04809 bool tmin_is_mcv = false;
04810 bool tmax_is_mcv = false;
04811 FmgrInfo opproc;
04812
04813 fmgr_info(get_opcode(sortop), &opproc);
04814
04815 for (i = 0; i < nvalues; i++)
04816 {
04817 if (!have_data)
04818 {
04819 tmin = tmax = values[i];
04820 tmin_is_mcv = tmax_is_mcv = have_data = true;
04821 continue;
04822 }
04823 if (DatumGetBool(FunctionCall2Coll(&opproc,
04824 DEFAULT_COLLATION_OID,
04825 values[i], tmin)))
04826 {
04827 tmin = values[i];
04828 tmin_is_mcv = true;
04829 }
04830 if (DatumGetBool(FunctionCall2Coll(&opproc,
04831 DEFAULT_COLLATION_OID,
04832 tmax, values[i])))
04833 {
04834 tmax = values[i];
04835 tmax_is_mcv = true;
04836 }
04837 }
04838 if (tmin_is_mcv)
04839 tmin = datumCopy(tmin, typByVal, typLen);
04840 if (tmax_is_mcv)
04841 tmax = datumCopy(tmax, typByVal, typLen);
04842 free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
04843 }
04844
04845 *min = tmin;
04846 *max = tmax;
04847 return have_data;
04848 }
04849
04850
04851
04852
04853
04854
04855
04856
04857
04858
04859
04860
04861
04862 static bool
04863 get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
04864 Oid sortop,
04865 Datum *min, Datum *max)
04866 {
04867 bool have_data = false;
04868 RelOptInfo *rel = vardata->rel;
04869 RangeTblEntry *rte;
04870 ListCell *lc;
04871
04872
04873 if (rel == NULL || rel->indexlist == NIL)
04874 return false;
04875
04876 rte = root->simple_rte_array[rel->relid];
04877 Assert(rte->rtekind == RTE_RELATION);
04878
04879
04880 foreach(lc, rel->indexlist)
04881 {
04882 IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
04883 ScanDirection indexscandir;
04884
04885
04886 if (index->relam != BTREE_AM_OID)
04887 continue;
04888
04889
04890
04891
04892
04893 if (index->indpred != NIL)
04894 continue;
04895
04896
04897
04898
04899
04900 if (index->hypothetical)
04901 continue;
04902
04903
04904
04905
04906
04907 if (!match_index_to_operand(vardata->var, 0, index))
04908 continue;
04909 switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0]))
04910 {
04911 case BTLessStrategyNumber:
04912 if (index->reverse_sort[0])
04913 indexscandir = BackwardScanDirection;
04914 else
04915 indexscandir = ForwardScanDirection;
04916 break;
04917 case BTGreaterStrategyNumber:
04918 if (index->reverse_sort[0])
04919 indexscandir = ForwardScanDirection;
04920 else
04921 indexscandir = BackwardScanDirection;
04922 break;
04923 default:
04924
04925 continue;
04926 }
04927
04928
04929
04930
04931
04932 {
04933 EState *estate;
04934 ExprContext *econtext;
04935 MemoryContext tmpcontext;
04936 MemoryContext oldcontext;
04937 Relation heapRel;
04938 Relation indexRel;
04939 IndexInfo *indexInfo;
04940 TupleTableSlot *slot;
04941 int16 typLen;
04942 bool typByVal;
04943 ScanKeyData scankeys[1];
04944 IndexScanDesc index_scan;
04945 HeapTuple tup;
04946 Datum values[INDEX_MAX_KEYS];
04947 bool isnull[INDEX_MAX_KEYS];
04948
04949 estate = CreateExecutorState();
04950 econtext = GetPerTupleExprContext(estate);
04951
04952 tmpcontext = econtext->ecxt_per_tuple_memory;
04953 oldcontext = MemoryContextSwitchTo(tmpcontext);
04954
04955
04956
04957
04958
04959
04960 heapRel = heap_open(rte->relid, NoLock);
04961 indexRel = index_open(index->indexoid, AccessShareLock);
04962
04963
04964 indexInfo = BuildIndexInfo(indexRel);
04965
04966
04967 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel));
04968 econtext->ecxt_scantuple = slot;
04969 get_typlenbyval(vardata->atttype, &typLen, &typByVal);
04970
04971
04972 ScanKeyEntryInitialize(&scankeys[0],
04973 SK_ISNULL | SK_SEARCHNOTNULL,
04974 1,
04975 InvalidStrategy,
04976 InvalidOid,
04977 InvalidOid,
04978 InvalidOid,
04979 (Datum) 0);
04980
04981 have_data = true;
04982
04983
04984 if (min)
04985 {
04986 index_scan = index_beginscan(heapRel, indexRel, SnapshotNow,
04987 1, 0);
04988 index_rescan(index_scan, scankeys, 1, NULL, 0);
04989
04990
04991 if ((tup = index_getnext(index_scan,
04992 indexscandir)) != NULL)
04993 {
04994
04995 ExecStoreTuple(tup, slot, InvalidBuffer, false);
04996 FormIndexDatum(indexInfo, slot, estate,
04997 values, isnull);
04998
04999
05000 if (isnull[0])
05001 elog(ERROR, "found unexpected null value in index \"%s\"",
05002 RelationGetRelationName(indexRel));
05003
05004
05005 MemoryContextSwitchTo(oldcontext);
05006 *min = datumCopy(values[0], typByVal, typLen);
05007 MemoryContextSwitchTo(tmpcontext);
05008 }
05009 else
05010 have_data = false;
05011
05012 index_endscan(index_scan);
05013 }
05014
05015
05016 if (max && have_data)
05017 {
05018 index_scan = index_beginscan(heapRel, indexRel, SnapshotNow,
05019 1, 0);
05020 index_rescan(index_scan, scankeys, 1, NULL, 0);
05021
05022
05023 if ((tup = index_getnext(index_scan,
05024 -indexscandir)) != NULL)
05025 {
05026
05027 ExecStoreTuple(tup, slot, InvalidBuffer, false);
05028 FormIndexDatum(indexInfo, slot, estate,
05029 values, isnull);
05030
05031
05032 if (isnull[0])
05033 elog(ERROR, "found unexpected null value in index \"%s\"",
05034 RelationGetRelationName(indexRel));
05035
05036
05037 MemoryContextSwitchTo(oldcontext);
05038 *max = datumCopy(values[0], typByVal, typLen);
05039 MemoryContextSwitchTo(tmpcontext);
05040 }
05041 else
05042 have_data = false;
05043
05044 index_endscan(index_scan);
05045 }
05046
05047
05048 ExecDropSingleTupleTableSlot(slot);
05049
05050 index_close(indexRel, AccessShareLock);
05051 heap_close(heapRel, NoLock);
05052
05053 MemoryContextSwitchTo(oldcontext);
05054 FreeExecutorState(estate);
05055
05056
05057 break;
05058 }
05059 }
05060
05061 return have_data;
05062 }
05063
05064
05065
05066
05067
05068
05069
05070
05071 static RelOptInfo *
05072 find_join_input_rel(PlannerInfo *root, Relids relids)
05073 {
05074 RelOptInfo *rel = NULL;
05075
05076 switch (bms_membership(relids))
05077 {
05078 case BMS_EMPTY_SET:
05079
05080 break;
05081 case BMS_SINGLETON:
05082 rel = find_base_rel(root, bms_singleton_member(relids));
05083 break;
05084 case BMS_MULTIPLE:
05085 rel = find_join_rel(root, relids);
05086 break;
05087 }
05088
05089 if (rel == NULL)
05090 elog(ERROR, "could not find RelOptInfo for given relids");
05091
05092 return rel;
05093 }
05094
05095
05096
05097
05098
05099
05100
05101
05102
05103
05104
05105
05106
05107
05108
05109
05110
05111
05112
05113
05114
05115
05116
05117
05118
05119
05120
05121 static int
05122 pattern_char_isalpha(char c, bool is_multibyte,
05123 pg_locale_t locale, bool locale_is_c)
05124 {
05125 if (locale_is_c)
05126 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
05127 else if (is_multibyte && IS_HIGHBIT_SET(c))
05128 return true;
05129 #ifdef HAVE_LOCALE_T
05130 else if (locale)
05131 return isalpha_l((unsigned char) c, locale);
05132 #endif
05133 else
05134 return isalpha((unsigned char) c);
05135 }
05136
05137
05138
05139
05140
05141
05142
05143
05144
05145
05146
05147
05148
05149
05150 static Pattern_Prefix_Status
05151 like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
05152 Const **prefix_const, Selectivity *rest_selec)
05153 {
05154 char *match;
05155 char *patt;
05156 int pattlen;
05157 Oid typeid = patt_const->consttype;
05158 int pos,
05159 match_pos;
05160 bool is_multibyte = (pg_database_encoding_max_length() > 1);
05161 pg_locale_t locale = 0;
05162 bool locale_is_c = false;
05163
05164
05165 Assert(typeid == BYTEAOID || typeid == TEXTOID);
05166
05167 if (case_insensitive)
05168 {
05169 if (typeid == BYTEAOID)
05170 ereport(ERROR,
05171 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
05172 errmsg("case insensitive matching not supported on type bytea")));
05173
05174
05175 if (lc_ctype_is_c(collation))
05176 locale_is_c = true;
05177 else if (collation != DEFAULT_COLLATION_OID)
05178 {
05179 if (!OidIsValid(collation))
05180 {
05181
05182
05183
05184
05185 ereport(ERROR,
05186 (errcode(ERRCODE_INDETERMINATE_COLLATION),
05187 errmsg("could not determine which collation to use for ILIKE"),
05188 errhint("Use the COLLATE clause to set the collation explicitly.")));
05189 }
05190 locale = pg_newlocale_from_collation(collation);
05191 }
05192 }
05193
05194 if (typeid != BYTEAOID)
05195 {
05196 patt = TextDatumGetCString(patt_const->constvalue);
05197 pattlen = strlen(patt);
05198 }
05199 else
05200 {
05201 bytea *bstr = DatumGetByteaP(patt_const->constvalue);
05202
05203 pattlen = VARSIZE(bstr) - VARHDRSZ;
05204 patt = (char *) palloc(pattlen);
05205 memcpy(patt, VARDATA(bstr), pattlen);
05206 if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue))
05207 pfree(bstr);
05208 }
05209
05210 match = palloc(pattlen + 1);
05211 match_pos = 0;
05212 for (pos = 0; pos < pattlen; pos++)
05213 {
05214
05215 if (patt[pos] == '%' ||
05216 patt[pos] == '_')
05217 break;
05218
05219
05220 if (patt[pos] == '\\')
05221 {
05222 pos++;
05223 if (pos >= pattlen)
05224 break;
05225 }
05226
05227
05228 if (case_insensitive &&
05229 pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
05230 break;
05231
05232 match[match_pos++] = patt[pos];
05233 }
05234
05235 match[match_pos] = '\0';
05236
05237 if (typeid != BYTEAOID)
05238 *prefix_const = string_to_const(match, typeid);
05239 else
05240 *prefix_const = string_to_bytea_const(match, match_pos);
05241
05242 if (rest_selec != NULL)
05243 *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
05244 case_insensitive);
05245
05246 pfree(patt);
05247 pfree(match);
05248
05249
05250 if (pos == pattlen)
05251 return Pattern_Prefix_Exact;
05252
05253 if (match_pos > 0)
05254 return Pattern_Prefix_Partial;
05255
05256 return Pattern_Prefix_None;
05257 }
05258
05259 static Pattern_Prefix_Status
05260 regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
05261 Const **prefix_const, Selectivity *rest_selec)
05262 {
05263 Oid typeid = patt_const->consttype;
05264 char *prefix;
05265 bool exact;
05266
05267
05268
05269
05270
05271
05272 if (typeid == BYTEAOID)
05273 ereport(ERROR,
05274 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
05275 errmsg("regular-expression matching not supported on type bytea")));
05276
05277
05278 prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
05279 case_insensitive, collation,
05280 &exact);
05281
05282 if (prefix == NULL)
05283 {
05284 *prefix_const = NULL;
05285
05286 if (rest_selec != NULL)
05287 {
05288 char *patt = TextDatumGetCString(patt_const->constvalue);
05289
05290 *rest_selec = regex_selectivity(patt, strlen(patt),
05291 case_insensitive,
05292 0);
05293 pfree(patt);
05294 }
05295
05296 return Pattern_Prefix_None;
05297 }
05298
05299 *prefix_const = string_to_const(prefix, typeid);
05300
05301 if (rest_selec != NULL)
05302 {
05303 if (exact)
05304 {
05305
05306 *rest_selec = 1.0;
05307 }
05308 else
05309 {
05310 char *patt = TextDatumGetCString(patt_const->constvalue);
05311
05312 *rest_selec = regex_selectivity(patt, strlen(patt),
05313 case_insensitive,
05314 strlen(prefix));
05315 pfree(patt);
05316 }
05317 }
05318
05319 pfree(prefix);
05320
05321 if (exact)
05322 return Pattern_Prefix_Exact;
05323 else
05324 return Pattern_Prefix_Partial;
05325 }
05326
05327 Pattern_Prefix_Status
05328 pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
05329 Const **prefix, Selectivity *rest_selec)
05330 {
05331 Pattern_Prefix_Status result;
05332
05333 switch (ptype)
05334 {
05335 case Pattern_Type_Like:
05336 result = like_fixed_prefix(patt, false, collation,
05337 prefix, rest_selec);
05338 break;
05339 case Pattern_Type_Like_IC:
05340 result = like_fixed_prefix(patt, true, collation,
05341 prefix, rest_selec);
05342 break;
05343 case Pattern_Type_Regex:
05344 result = regex_fixed_prefix(patt, false, collation,
05345 prefix, rest_selec);
05346 break;
05347 case Pattern_Type_Regex_IC:
05348 result = regex_fixed_prefix(patt, true, collation,
05349 prefix, rest_selec);
05350 break;
05351 default:
05352 elog(ERROR, "unrecognized ptype: %d", (int) ptype);
05353 result = Pattern_Prefix_None;
05354 break;
05355 }
05356 return result;
05357 }
05358
05359
05360
05361
05362
05363
05364
05365
05366
05367
05368
05369
05370
05371
05372
05373
05374
05375
05376
05377
05378 static Selectivity
05379 prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
05380 Oid vartype, Oid opfamily, Const *prefixcon)
05381 {
05382 Selectivity prefixsel;
05383 Oid cmpopr;
05384 FmgrInfo opproc;
05385 Const *greaterstrcon;
05386 Selectivity eq_sel;
05387
05388 cmpopr = get_opfamily_member(opfamily, vartype, vartype,
05389 BTGreaterEqualStrategyNumber);
05390 if (cmpopr == InvalidOid)
05391 elog(ERROR, "no >= operator for opfamily %u", opfamily);
05392 fmgr_info(get_opcode(cmpopr), &opproc);
05393
05394 prefixsel = ineq_histogram_selectivity(root, vardata, &opproc, true,
05395 prefixcon->constvalue,
05396 prefixcon->consttype);
05397
05398 if (prefixsel < 0.0)
05399 {
05400
05401 return DEFAULT_MATCH_SEL;
05402 }
05403
05404
05405
05406
05407
05408
05409 cmpopr = get_opfamily_member(opfamily, vartype, vartype,
05410 BTLessStrategyNumber);
05411 if (cmpopr == InvalidOid)
05412 elog(ERROR, "no < operator for opfamily %u", opfamily);
05413 fmgr_info(get_opcode(cmpopr), &opproc);
05414 greaterstrcon = make_greater_string(prefixcon, &opproc,
05415 DEFAULT_COLLATION_OID);
05416 if (greaterstrcon)
05417 {
05418 Selectivity topsel;
05419
05420 topsel = ineq_histogram_selectivity(root, vardata, &opproc, false,
05421 greaterstrcon->constvalue,
05422 greaterstrcon->consttype);
05423
05424
05425 Assert(topsel >= 0.0);
05426
05427
05428
05429
05430
05431
05432
05433 prefixsel = topsel + prefixsel - 1.0;
05434 }
05435
05436
05437
05438
05439
05440
05441
05442
05443
05444
05445
05446
05447
05448
05449 cmpopr = get_opfamily_member(opfamily, vartype, vartype,
05450 BTEqualStrategyNumber);
05451 if (cmpopr == InvalidOid)
05452 elog(ERROR, "no = operator for opfamily %u", opfamily);
05453 eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
05454 false, true);
05455
05456 prefixsel = Max(prefixsel, eq_sel);
05457
05458 return prefixsel;
05459 }
05460
05461
05462
05463
05464
05465
05466
05467
05468
05469
05470
05471
05472 #define FIXED_CHAR_SEL 0.20
05473 #define CHAR_RANGE_SEL 0.25
05474 #define ANY_CHAR_SEL 0.9
05475 #define FULL_WILDCARD_SEL 5.0
05476 #define PARTIAL_WILDCARD_SEL 2.0
05477
05478 static Selectivity
05479 like_selectivity(const char *patt, int pattlen, bool case_insensitive)
05480 {
05481 Selectivity sel = 1.0;
05482 int pos;
05483
05484
05485 for (pos = 0; pos < pattlen; pos++)
05486 {
05487 if (patt[pos] != '%' && patt[pos] != '_')
05488 break;
05489 }
05490
05491 for (; pos < pattlen; pos++)
05492 {
05493
05494 if (patt[pos] == '%')
05495 sel *= FULL_WILDCARD_SEL;
05496 else if (patt[pos] == '_')
05497 sel *= ANY_CHAR_SEL;
05498 else if (patt[pos] == '\\')
05499 {
05500
05501 pos++;
05502 if (pos >= pattlen)
05503 break;
05504 sel *= FIXED_CHAR_SEL;
05505 }
05506 else
05507 sel *= FIXED_CHAR_SEL;
05508 }
05509
05510 if (sel > 1.0)
05511 sel = 1.0;
05512 return sel;
05513 }
05514
05515 static Selectivity
05516 regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
05517 {
05518 Selectivity sel = 1.0;
05519 int paren_depth = 0;
05520 int paren_pos = 0;
05521 int pos;
05522
05523 for (pos = 0; pos < pattlen; pos++)
05524 {
05525 if (patt[pos] == '(')
05526 {
05527 if (paren_depth == 0)
05528 paren_pos = pos;
05529 paren_depth++;
05530 }
05531 else if (patt[pos] == ')' && paren_depth > 0)
05532 {
05533 paren_depth--;
05534 if (paren_depth == 0)
05535 sel *= regex_selectivity_sub(patt + (paren_pos + 1),
05536 pos - (paren_pos + 1),
05537 case_insensitive);
05538 }
05539 else if (patt[pos] == '|' && paren_depth == 0)
05540 {
05541
05542
05543
05544
05545 sel += regex_selectivity_sub(patt + (pos + 1),
05546 pattlen - (pos + 1),
05547 case_insensitive);
05548 break;
05549 }
05550 else if (patt[pos] == '[')
05551 {
05552 bool negclass = false;
05553
05554 if (patt[++pos] == '^')
05555 {
05556 negclass = true;
05557 pos++;
05558 }
05559 if (patt[pos] == ']')
05560
05561 pos++;
05562 while (pos < pattlen && patt[pos] != ']')
05563 pos++;
05564 if (paren_depth == 0)
05565 sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
05566 }
05567 else if (patt[pos] == '.')
05568 {
05569 if (paren_depth == 0)
05570 sel *= ANY_CHAR_SEL;
05571 }
05572 else if (patt[pos] == '*' ||
05573 patt[pos] == '?' ||
05574 patt[pos] == '+')
05575 {
05576
05577 if (paren_depth == 0)
05578 sel *= PARTIAL_WILDCARD_SEL;
05579 }
05580 else if (patt[pos] == '{')
05581 {
05582 while (pos < pattlen && patt[pos] != '}')
05583 pos++;
05584 if (paren_depth == 0)
05585 sel *= PARTIAL_WILDCARD_SEL;
05586 }
05587 else if (patt[pos] == '\\')
05588 {
05589
05590 pos++;
05591 if (pos >= pattlen)
05592 break;
05593 if (paren_depth == 0)
05594 sel *= FIXED_CHAR_SEL;
05595 }
05596 else
05597 {
05598 if (paren_depth == 0)
05599 sel *= FIXED_CHAR_SEL;
05600 }
05601 }
05602
05603 if (sel > 1.0)
05604 sel = 1.0;
05605 return sel;
05606 }
05607
05608 static Selectivity
05609 regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
05610 int fixed_prefix_len)
05611 {
05612 Selectivity sel;
05613
05614
05615 if (pattlen > 0 && patt[pattlen - 1] == '$' &&
05616 (pattlen == 1 || patt[pattlen - 2] != '\\'))
05617 {
05618
05619 sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
05620 }
05621 else
05622 {
05623
05624 sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
05625 sel *= FULL_WILDCARD_SEL;
05626 }
05627
05628
05629 if (fixed_prefix_len > 0)
05630 sel /= pow(FIXED_CHAR_SEL, fixed_prefix_len);
05631
05632
05633 CLAMP_PROBABILITY(sel);
05634 return sel;
05635 }
05636
05637
05638
05639
05640
05641
05642 static bool
05643 byte_increment(unsigned char *ptr, int len)
05644 {
05645 if (*ptr >= 255)
05646 return false;
05647 (*ptr)++;
05648 return true;
05649 }
05650
05651
05652
05653
05654
05655
05656
05657
05658
05659
05660
05661
05662
05663
05664
05665
05666
05667
05668
05669
05670
05671
05672
05673
05674
05675
05676
05677
05678
05679
05680
05681
05682
05683
05684
05685
05686
05687
05688
05689
05690
05691
05692 Const *
05693 make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
05694 {
05695 Oid datatype = str_const->consttype;
05696 char *workstr;
05697 int len;
05698 Datum cmpstr;
05699 text *cmptxt = NULL;
05700 mbcharacter_incrementer charinc;
05701
05702
05703
05704
05705
05706
05707
05708 if (datatype == NAMEOID)
05709 {
05710 workstr = DatumGetCString(DirectFunctionCall1(nameout,
05711 str_const->constvalue));
05712 len = strlen(workstr);
05713 cmpstr = str_const->constvalue;
05714 }
05715 else if (datatype == BYTEAOID)
05716 {
05717 bytea *bstr = DatumGetByteaP(str_const->constvalue);
05718
05719 len = VARSIZE(bstr) - VARHDRSZ;
05720 workstr = (char *) palloc(len);
05721 memcpy(workstr, VARDATA(bstr), len);
05722 if ((Pointer) bstr != DatumGetPointer(str_const->constvalue))
05723 pfree(bstr);
05724 cmpstr = str_const->constvalue;
05725 }
05726 else
05727 {
05728 workstr = TextDatumGetCString(str_const->constvalue);
05729 len = strlen(workstr);
05730 if (lc_collate_is_c(collation) || len == 0)
05731 cmpstr = str_const->constvalue;
05732 else
05733 {
05734
05735 static char suffixchar = 0;
05736 static Oid suffixcollation = 0;
05737
05738 if (!suffixchar || suffixcollation != collation)
05739 {
05740 char *best;
05741
05742 best = "Z";
05743 if (varstr_cmp(best, 1, "z", 1, collation) < 0)
05744 best = "z";
05745 if (varstr_cmp(best, 1, "y", 1, collation) < 0)
05746 best = "y";
05747 if (varstr_cmp(best, 1, "9", 1, collation) < 0)
05748 best = "9";
05749 suffixchar = *best;
05750 suffixcollation = collation;
05751 }
05752
05753
05754 cmptxt = (text *) palloc(VARHDRSZ + len + 1);
05755 SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
05756 memcpy(VARDATA(cmptxt), workstr, len);
05757 *(VARDATA(cmptxt) + len) = suffixchar;
05758 cmpstr = PointerGetDatum(cmptxt);
05759 }
05760 }
05761
05762
05763 if (datatype == BYTEAOID)
05764 charinc = byte_increment;
05765 else
05766 charinc = pg_database_encoding_character_incrementer();
05767
05768
05769 while (len > 0)
05770 {
05771 int charlen;
05772 unsigned char *lastchar;
05773
05774
05775 if (datatype == BYTEAOID)
05776 charlen = 1;
05777 else
05778 charlen = len - pg_mbcliplen(workstr, len, len - 1);
05779 lastchar = (unsigned char *) (workstr + len - charlen);
05780
05781
05782
05783
05784
05785
05786
05787
05788
05789 while (charinc(lastchar, charlen))
05790 {
05791 Const *workstr_const;
05792
05793 if (datatype == BYTEAOID)
05794 workstr_const = string_to_bytea_const(workstr, len);
05795 else
05796 workstr_const = string_to_const(workstr, datatype);
05797
05798 if (DatumGetBool(FunctionCall2Coll(ltproc,
05799 collation,
05800 cmpstr,
05801 workstr_const->constvalue)))
05802 {
05803
05804 if (cmptxt)
05805 pfree(cmptxt);
05806 pfree(workstr);
05807 return workstr_const;
05808 }
05809
05810
05811 pfree(DatumGetPointer(workstr_const->constvalue));
05812 pfree(workstr_const);
05813 }
05814
05815
05816
05817
05818
05819 len -= charlen;
05820 workstr[len] = '\0';
05821 }
05822
05823
05824 if (cmptxt)
05825 pfree(cmptxt);
05826 pfree(workstr);
05827
05828 return NULL;
05829 }
05830
05831
05832
05833
05834
05835
05836 static Datum
05837 string_to_datum(const char *str, Oid datatype)
05838 {
05839 Assert(str != NULL);
05840
05841
05842
05843
05844
05845 if (datatype == NAMEOID)
05846 return DirectFunctionCall1(namein, CStringGetDatum(str));
05847 else if (datatype == BYTEAOID)
05848 return DirectFunctionCall1(byteain, CStringGetDatum(str));
05849 else
05850 return CStringGetTextDatum(str);
05851 }
05852
05853
05854
05855
05856 static Const *
05857 string_to_const(const char *str, Oid datatype)
05858 {
05859 Datum conval = string_to_datum(str, datatype);
05860 Oid collation;
05861 int constlen;
05862
05863
05864
05865
05866
05867 switch (datatype)
05868 {
05869 case TEXTOID:
05870 case VARCHAROID:
05871 case BPCHAROID:
05872 collation = DEFAULT_COLLATION_OID;
05873 constlen = -1;
05874 break;
05875
05876 case NAMEOID:
05877 collation = InvalidOid;
05878 constlen = NAMEDATALEN;
05879 break;
05880
05881 case BYTEAOID:
05882 collation = InvalidOid;
05883 constlen = -1;
05884 break;
05885
05886 default:
05887 elog(ERROR, "unexpected datatype in string_to_const: %u",
05888 datatype);
05889 return NULL;
05890 }
05891
05892 return makeConst(datatype, -1, collation, constlen,
05893 conval, false, false);
05894 }
05895
05896
05897
05898
05899 static Const *
05900 string_to_bytea_const(const char *str, size_t str_len)
05901 {
05902 bytea *bstr = palloc(VARHDRSZ + str_len);
05903 Datum conval;
05904
05905 memcpy(VARDATA(bstr), str, str_len);
05906 SET_VARSIZE(bstr, VARHDRSZ + str_len);
05907 conval = PointerGetDatum(bstr);
05908
05909 return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
05910 }
05911
05912
05913
05914
05915
05916
05917
05918
05919
05920
05921
05922
05923
05924
05925
05926
05927
05928
05929
05930
05931
05932 typedef struct
05933 {
05934
05935 Cost indexStartupCost;
05936 Cost indexTotalCost;
05937 Selectivity indexSelectivity;
05938 double indexCorrelation;
05939
05940
05941 double numIndexPages;
05942 double numIndexTuples;
05943 double spc_random_page_cost;
05944 double num_sa_scans;
05945 } GenericCosts;
05946
05947 static void
05948 genericcostestimate(PlannerInfo *root,
05949 IndexPath *path,
05950 double loop_count,
05951 GenericCosts *costs)
05952 {
05953 IndexOptInfo *index = path->indexinfo;
05954 List *indexQuals = path->indexquals;
05955 List *indexOrderBys = path->indexorderbys;
05956 Cost indexStartupCost;
05957 Cost indexTotalCost;
05958 Selectivity indexSelectivity;
05959 double indexCorrelation;
05960 double numIndexPages;
05961 double numIndexTuples;
05962 double spc_random_page_cost;
05963 double num_sa_scans;
05964 double num_outer_scans;
05965 double num_scans;
05966 QualCost index_qual_cost;
05967 double qual_op_cost;
05968 double qual_arg_cost;
05969 List *selectivityQuals;
05970 ListCell *l;
05971
05972
05973
05974
05975
05976
05977 selectivityQuals = add_predicate_to_quals(index, indexQuals);
05978
05979
05980
05981
05982
05983 num_sa_scans = 1;
05984 foreach(l, indexQuals)
05985 {
05986 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
05987
05988 if (IsA(rinfo->clause, ScalarArrayOpExpr))
05989 {
05990 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
05991 int alength = estimate_array_length(lsecond(saop->args));
05992
05993 if (alength > 1)
05994 num_sa_scans *= alength;
05995 }
05996 }
05997
05998
05999 indexSelectivity = clauselist_selectivity(root, selectivityQuals,
06000 index->rel->relid,
06001 JOIN_INNER,
06002 NULL);
06003
06004
06005
06006
06007
06008
06009 numIndexTuples = costs->numIndexTuples;
06010 if (numIndexTuples <= 0.0)
06011 {
06012 numIndexTuples = indexSelectivity * index->rel->tuples;
06013
06014
06015
06016
06017
06018
06019
06020
06021 numIndexTuples = rint(numIndexTuples / num_sa_scans);
06022 }
06023
06024
06025
06026
06027
06028
06029 if (numIndexTuples > index->tuples)
06030 numIndexTuples = index->tuples;
06031 if (numIndexTuples < 1.0)
06032 numIndexTuples = 1.0;
06033
06034
06035
06036
06037
06038
06039
06040
06041
06042
06043
06044
06045
06046 if (index->pages > 1 && index->tuples > 1)
06047 numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
06048 else
06049 numIndexPages = 1.0;
06050
06051
06052 get_tablespace_page_costs(index->reltablespace,
06053 &spc_random_page_cost,
06054 NULL);
06055
06056
06057
06058
06059
06060
06061
06062
06063
06064
06065
06066
06067
06068
06069
06070
06071
06072
06073 num_outer_scans = loop_count;
06074 num_scans = num_sa_scans * num_outer_scans;
06075
06076 if (num_scans > 1)
06077 {
06078 double pages_fetched;
06079
06080
06081 pages_fetched = numIndexPages * num_scans;
06082
06083
06084 pages_fetched = index_pages_fetched(pages_fetched,
06085 index->pages,
06086 (double) index->pages,
06087 root);
06088
06089
06090
06091
06092
06093
06094 indexTotalCost = (pages_fetched * spc_random_page_cost)
06095 / num_outer_scans;
06096 }
06097 else
06098 {
06099
06100
06101
06102
06103 indexTotalCost = numIndexPages * spc_random_page_cost;
06104 }
06105
06106
06107
06108
06109
06110
06111
06112
06113
06114
06115
06116
06117
06118
06119
06120 cost_qual_eval(&index_qual_cost, indexQuals, root);
06121 qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple;
06122 cost_qual_eval(&index_qual_cost, indexOrderBys, root);
06123 qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
06124 qual_op_cost = cpu_operator_cost *
06125 (list_length(indexQuals) + list_length(indexOrderBys));
06126 qual_arg_cost -= qual_op_cost;
06127 if (qual_arg_cost < 0)
06128 qual_arg_cost = 0;
06129
06130 indexStartupCost = qual_arg_cost;
06131 indexTotalCost += qual_arg_cost;
06132 indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
06133
06134
06135
06136
06137 indexCorrelation = 0.0;
06138
06139
06140
06141
06142 costs->indexStartupCost = indexStartupCost;
06143 costs->indexTotalCost = indexTotalCost;
06144 costs->indexSelectivity = indexSelectivity;
06145 costs->indexCorrelation = indexCorrelation;
06146 costs->numIndexPages = numIndexPages;
06147 costs->numIndexTuples = numIndexTuples;
06148 costs->spc_random_page_cost = spc_random_page_cost;
06149 costs->num_sa_scans = num_sa_scans;
06150 }
06151
06152
06153
06154
06155
06156
06157
06158
06159
06160
06161
06162
06163
06164
06165
06166
06167
06168
06169
06170
06171 static List *
06172 add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
06173 {
06174 List *predExtraQuals = NIL;
06175 ListCell *lc;
06176
06177 if (index->indpred == NIL)
06178 return indexQuals;
06179
06180 foreach(lc, index->indpred)
06181 {
06182 Node *predQual = (Node *) lfirst(lc);
06183 List *oneQual = list_make1(predQual);
06184
06185 if (!predicate_implied_by(oneQual, indexQuals))
06186 predExtraQuals = list_concat(predExtraQuals, oneQual);
06187 }
06188
06189 return list_concat(predExtraQuals, indexQuals);
06190 }
06191
06192
06193 Datum
06194 btcostestimate(PG_FUNCTION_ARGS)
06195 {
06196 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
06197 IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
06198 double loop_count = PG_GETARG_FLOAT8(2);
06199 Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
06200 Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
06201 Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
06202 double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
06203 IndexOptInfo *index = path->indexinfo;
06204 GenericCosts costs;
06205 Oid relid;
06206 AttrNumber colnum;
06207 VariableStatData vardata;
06208 double numIndexTuples;
06209 Cost descentCost;
06210 List *indexBoundQuals;
06211 int indexcol;
06212 bool eqQualHere;
06213 bool found_saop;
06214 bool found_is_null_op;
06215 double num_sa_scans;
06216 ListCell *lcc,
06217 *lci;
06218
06219
06220
06221
06222
06223
06224
06225
06226
06227
06228
06229
06230
06231
06232
06233
06234
06235
06236 indexBoundQuals = NIL;
06237 indexcol = 0;
06238 eqQualHere = false;
06239 found_saop = false;
06240 found_is_null_op = false;
06241 num_sa_scans = 1;
06242 forboth(lcc, path->indexquals, lci, path->indexqualcols)
06243 {
06244 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lcc);
06245 Expr *clause;
06246 Node *leftop,
06247 *rightop PG_USED_FOR_ASSERTS_ONLY;
06248 Oid clause_op;
06249 int op_strategy;
06250 bool is_null_op = false;
06251
06252 if (indexcol != lfirst_int(lci))
06253 {
06254
06255 if (!eqQualHere)
06256 break;
06257 eqQualHere = false;
06258 indexcol++;
06259 if (indexcol != lfirst_int(lci))
06260 break;
06261 }
06262
06263 Assert(IsA(rinfo, RestrictInfo));
06264 clause = rinfo->clause;
06265
06266 if (IsA(clause, OpExpr))
06267 {
06268 leftop = get_leftop(clause);
06269 rightop = get_rightop(clause);
06270 clause_op = ((OpExpr *) clause)->opno;
06271 }
06272 else if (IsA(clause, RowCompareExpr))
06273 {
06274 RowCompareExpr *rc = (RowCompareExpr *) clause;
06275
06276 leftop = (Node *) linitial(rc->largs);
06277 rightop = (Node *) linitial(rc->rargs);
06278 clause_op = linitial_oid(rc->opnos);
06279 }
06280 else if (IsA(clause, ScalarArrayOpExpr))
06281 {
06282 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
06283
06284 leftop = (Node *) linitial(saop->args);
06285 rightop = (Node *) lsecond(saop->args);
06286 clause_op = saop->opno;
06287 found_saop = true;
06288 }
06289 else if (IsA(clause, NullTest))
06290 {
06291 NullTest *nt = (NullTest *) clause;
06292
06293 leftop = (Node *) nt->arg;
06294 rightop = NULL;
06295 clause_op = InvalidOid;
06296 if (nt->nulltesttype == IS_NULL)
06297 {
06298 found_is_null_op = true;
06299 is_null_op = true;
06300 }
06301 }
06302 else
06303 {
06304 elog(ERROR, "unsupported indexqual type: %d",
06305 (int) nodeTag(clause));
06306 continue;
06307 }
06308
06309 if (match_index_to_operand(leftop, indexcol, index))
06310 {
06311
06312 }
06313 else
06314 {
06315 Assert(match_index_to_operand(rightop, indexcol, index));
06316
06317 clause_op = get_commutator(clause_op);
06318 }
06319
06320
06321 if (OidIsValid(clause_op))
06322 {
06323 op_strategy = get_op_opfamily_strategy(clause_op,
06324 index->opfamily[indexcol]);
06325 Assert(op_strategy != 0);
06326 if (op_strategy == BTEqualStrategyNumber)
06327 eqQualHere = true;
06328 }
06329 else if (is_null_op)
06330 {
06331
06332 eqQualHere = true;
06333 }
06334
06335 if (IsA(clause, ScalarArrayOpExpr))
06336 {
06337 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
06338 int alength = estimate_array_length(lsecond(saop->args));
06339
06340 if (alength > 1)
06341 num_sa_scans *= alength;
06342 }
06343 indexBoundQuals = lappend(indexBoundQuals, rinfo);
06344 }
06345
06346
06347
06348
06349
06350
06351
06352 if (index->unique &&
06353 indexcol == index->ncolumns - 1 &&
06354 eqQualHere &&
06355 !found_saop &&
06356 !found_is_null_op)
06357 numIndexTuples = 1.0;
06358 else
06359 {
06360 List *selectivityQuals;
06361 Selectivity btreeSelectivity;
06362
06363
06364
06365
06366
06367
06368 selectivityQuals = add_predicate_to_quals(index, indexBoundQuals);
06369
06370 btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
06371 index->rel->relid,
06372 JOIN_INNER,
06373 NULL);
06374 numIndexTuples = btreeSelectivity * index->rel->tuples;
06375
06376
06377
06378
06379
06380
06381 numIndexTuples = rint(numIndexTuples / num_sa_scans);
06382 }
06383
06384
06385
06386
06387 MemSet(&costs, 0, sizeof(costs));
06388 costs.numIndexTuples = numIndexTuples;
06389
06390 genericcostestimate(root, path, loop_count, &costs);
06391
06392
06393
06394
06395
06396
06397
06398
06399
06400
06401
06402
06403 if (index->tuples > 1)
06404 {
06405 descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
06406 costs.indexStartupCost += descentCost;
06407 costs.indexTotalCost += costs.num_sa_scans * descentCost;
06408 }
06409
06410
06411
06412
06413
06414
06415
06416
06417
06418
06419
06420 descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
06421 costs.indexStartupCost += descentCost;
06422 costs.indexTotalCost += costs.num_sa_scans * descentCost;
06423
06424
06425
06426
06427
06428
06429
06430
06431
06432 MemSet(&vardata, 0, sizeof(vardata));
06433
06434 if (index->indexkeys[0] != 0)
06435 {
06436
06437 RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root);
06438
06439 Assert(rte->rtekind == RTE_RELATION);
06440 relid = rte->relid;
06441 Assert(relid != InvalidOid);
06442 colnum = index->indexkeys[0];
06443
06444 if (get_relation_stats_hook &&
06445 (*get_relation_stats_hook) (root, rte, colnum, &vardata))
06446 {
06447
06448
06449
06450
06451 if (HeapTupleIsValid(vardata.statsTuple) &&
06452 !vardata.freefunc)
06453 elog(ERROR, "no function provided to release variable stats with");
06454 }
06455 else
06456 {
06457 vardata.statsTuple = SearchSysCache3(STATRELATTINH,
06458 ObjectIdGetDatum(relid),
06459 Int16GetDatum(colnum),
06460 BoolGetDatum(rte->inh));
06461 vardata.freefunc = ReleaseSysCache;
06462 }
06463 }
06464 else
06465 {
06466
06467 relid = index->indexoid;
06468 colnum = 1;
06469
06470 if (get_index_stats_hook &&
06471 (*get_index_stats_hook) (root, relid, colnum, &vardata))
06472 {
06473
06474
06475
06476
06477 if (HeapTupleIsValid(vardata.statsTuple) &&
06478 !vardata.freefunc)
06479 elog(ERROR, "no function provided to release variable stats with");
06480 }
06481 else
06482 {
06483 vardata.statsTuple = SearchSysCache3(STATRELATTINH,
06484 ObjectIdGetDatum(relid),
06485 Int16GetDatum(colnum),
06486 BoolGetDatum(false));
06487 vardata.freefunc = ReleaseSysCache;
06488 }
06489 }
06490
06491 if (HeapTupleIsValid(vardata.statsTuple))
06492 {
06493 Oid sortop;
06494 float4 *numbers;
06495 int nnumbers;
06496
06497 sortop = get_opfamily_member(index->opfamily[0],
06498 index->opcintype[0],
06499 index->opcintype[0],
06500 BTLessStrategyNumber);
06501 if (OidIsValid(sortop) &&
06502 get_attstatsslot(vardata.statsTuple, InvalidOid, 0,
06503 STATISTIC_KIND_CORRELATION,
06504 sortop,
06505 NULL,
06506 NULL, NULL,
06507 &numbers, &nnumbers))
06508 {
06509 double varCorrelation;
06510
06511 Assert(nnumbers == 1);
06512 varCorrelation = numbers[0];
06513
06514 if (index->reverse_sort[0])
06515 varCorrelation = -varCorrelation;
06516
06517 if (index->ncolumns > 1)
06518 costs.indexCorrelation = varCorrelation * 0.75;
06519 else
06520 costs.indexCorrelation = varCorrelation;
06521
06522 free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers);
06523 }
06524 }
06525
06526 ReleaseVariableStats(vardata);
06527
06528 *indexStartupCost = costs.indexStartupCost;
06529 *indexTotalCost = costs.indexTotalCost;
06530 *indexSelectivity = costs.indexSelectivity;
06531 *indexCorrelation = costs.indexCorrelation;
06532
06533 PG_RETURN_VOID();
06534 }
06535
06536 Datum
06537 hashcostestimate(PG_FUNCTION_ARGS)
06538 {
06539 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
06540 IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
06541 double loop_count = PG_GETARG_FLOAT8(2);
06542 Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
06543 Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
06544 Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
06545 double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
06546 GenericCosts costs;
06547
06548 MemSet(&costs, 0, sizeof(costs));
06549
06550 genericcostestimate(root, path, loop_count, &costs);
06551
06552
06553
06554
06555
06556
06557
06558
06559
06560
06561
06562
06563
06564
06565
06566
06567
06568
06569
06570
06571
06572
06573
06574
06575
06576
06577 *indexStartupCost = costs.indexStartupCost;
06578 *indexTotalCost = costs.indexTotalCost;
06579 *indexSelectivity = costs.indexSelectivity;
06580 *indexCorrelation = costs.indexCorrelation;
06581
06582 PG_RETURN_VOID();
06583 }
06584
06585 Datum
06586 gistcostestimate(PG_FUNCTION_ARGS)
06587 {
06588 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
06589 IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
06590 double loop_count = PG_GETARG_FLOAT8(2);
06591 Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
06592 Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
06593 Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
06594 double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
06595 IndexOptInfo *index = path->indexinfo;
06596 GenericCosts costs;
06597 Cost descentCost;
06598
06599 MemSet(&costs, 0, sizeof(costs));
06600
06601 genericcostestimate(root, path, loop_count, &costs);
06602
06603
06604
06605
06606
06607
06608
06609
06610
06611
06612 if (index->tree_height < 0)
06613 {
06614 if (index->pages > 1)
06615 index->tree_height = (int) (log(index->pages) / log(100.0));
06616 else
06617 index->tree_height = 0;
06618 }
06619
06620
06621
06622
06623
06624
06625 if (index->tuples > 1)
06626 {
06627 descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
06628 costs.indexStartupCost += descentCost;
06629 costs.indexTotalCost += costs.num_sa_scans * descentCost;
06630 }
06631
06632
06633
06634
06635 descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
06636 costs.indexStartupCost += descentCost;
06637 costs.indexTotalCost += costs.num_sa_scans * descentCost;
06638
06639 *indexStartupCost = costs.indexStartupCost;
06640 *indexTotalCost = costs.indexTotalCost;
06641 *indexSelectivity = costs.indexSelectivity;
06642 *indexCorrelation = costs.indexCorrelation;
06643
06644 PG_RETURN_VOID();
06645 }
06646
06647 Datum
06648 spgcostestimate(PG_FUNCTION_ARGS)
06649 {
06650 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
06651 IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
06652 double loop_count = PG_GETARG_FLOAT8(2);
06653 Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
06654 Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
06655 Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
06656 double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
06657 IndexOptInfo *index = path->indexinfo;
06658 GenericCosts costs;
06659 Cost descentCost;
06660
06661 MemSet(&costs, 0, sizeof(costs));
06662
06663 genericcostestimate(root, path, loop_count, &costs);
06664
06665
06666
06667
06668
06669
06670
06671
06672
06673
06674 if (index->tree_height < 0)
06675 {
06676 if (index->pages > 1)
06677 index->tree_height = (int) (log(index->pages) / log(100.0));
06678 else
06679 index->tree_height = 0;
06680 }
06681
06682
06683
06684
06685
06686
06687 if (index->tuples > 1)
06688 {
06689 descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
06690 costs.indexStartupCost += descentCost;
06691 costs.indexTotalCost += costs.num_sa_scans * descentCost;
06692 }
06693
06694
06695
06696
06697 descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
06698 costs.indexStartupCost += descentCost;
06699 costs.indexTotalCost += costs.num_sa_scans * descentCost;
06700
06701 *indexStartupCost = costs.indexStartupCost;
06702 *indexTotalCost = costs.indexTotalCost;
06703 *indexSelectivity = costs.indexSelectivity;
06704 *indexCorrelation = costs.indexCorrelation;
06705
06706 PG_RETURN_VOID();
06707 }
06708
06709
06710
06711
06712
06713
06714 typedef struct
06715 {
06716 bool haveFullScan;
06717 double partialEntries;
06718 double exactEntries;
06719 double searchEntries;
06720 double arrayScans;
06721 } GinQualCounts;
06722
06723
06724 static int
06725 find_index_column(Node *op, IndexOptInfo *index)
06726 {
06727 int i;
06728
06729 for (i = 0; i < index->ncolumns; i++)
06730 {
06731 if (match_index_to_operand(op, i, index))
06732 return i;
06733 }
06734
06735 return -1;
06736 }
06737
06738
06739
06740
06741
06742
06743 static bool
06744 gincost_pattern(IndexOptInfo *index, int indexcol,
06745 Oid clause_op, Datum query,
06746 GinQualCounts *counts)
06747 {
06748 Oid extractProcOid;
06749 Oid collation;
06750 int strategy_op;
06751 Oid lefttype,
06752 righttype;
06753 int32 nentries = 0;
06754 bool *partial_matches = NULL;
06755 Pointer *extra_data = NULL;
06756 bool *nullFlags = NULL;
06757 int32 searchMode = GIN_SEARCH_MODE_DEFAULT;
06758 int32 i;
06759
06760
06761
06762
06763
06764
06765
06766 get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
06767 &strategy_op, &lefttype, &righttype);
06768
06769
06770
06771
06772
06773
06774 extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
06775 index->opcintype[indexcol],
06776 index->opcintype[indexcol],
06777 GIN_EXTRACTQUERY_PROC);
06778
06779 if (!OidIsValid(extractProcOid))
06780 {
06781
06782 elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
06783 GIN_EXTRACTQUERY_PROC, indexcol + 1,
06784 get_rel_name(index->indexoid));
06785 }
06786
06787
06788
06789
06790 if (OidIsValid(index->indexcollations[indexcol]))
06791 collation = index->indexcollations[indexcol];
06792 else
06793 collation = DEFAULT_COLLATION_OID;
06794
06795 OidFunctionCall7Coll(extractProcOid,
06796 collation,
06797 query,
06798 PointerGetDatum(&nentries),
06799 UInt16GetDatum(strategy_op),
06800 PointerGetDatum(&partial_matches),
06801 PointerGetDatum(&extra_data),
06802 PointerGetDatum(&nullFlags),
06803 PointerGetDatum(&searchMode));
06804
06805 if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT)
06806 {
06807
06808 return false;
06809 }
06810
06811 for (i = 0; i < nentries; i++)
06812 {
06813
06814
06815
06816
06817 if (partial_matches && partial_matches[i])
06818 counts->partialEntries += 100;
06819 else
06820 counts->exactEntries++;
06821
06822 counts->searchEntries++;
06823 }
06824
06825 if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
06826 {
06827
06828 counts->exactEntries++;
06829 counts->searchEntries++;
06830 }
06831 else if (searchMode != GIN_SEARCH_MODE_DEFAULT)
06832 {
06833
06834 counts->haveFullScan = true;
06835 }
06836
06837 return true;
06838 }
06839
06840
06841
06842
06843
06844
06845 static bool
06846 gincost_opexpr(IndexOptInfo *index, OpExpr *clause, GinQualCounts *counts)
06847 {
06848 Node *leftop = get_leftop((Expr *) clause);
06849 Node *rightop = get_rightop((Expr *) clause);
06850 Oid clause_op = clause->opno;
06851 int indexcol;
06852 Node *operand;
06853
06854
06855 if ((indexcol = find_index_column(leftop, index)) >= 0)
06856 {
06857 operand = rightop;
06858 }
06859 else if ((indexcol = find_index_column(rightop, index)) >= 0)
06860 {
06861 operand = leftop;
06862 clause_op = get_commutator(clause_op);
06863 }
06864 else
06865 {
06866 elog(ERROR, "could not match index to operand");
06867 operand = NULL;
06868 }
06869
06870 if (IsA(operand, RelabelType))
06871 operand = (Node *) ((RelabelType *) operand)->arg;
06872
06873
06874
06875
06876
06877
06878 if (!IsA(operand, Const))
06879 {
06880 counts->exactEntries++;
06881 counts->searchEntries++;
06882 return true;
06883 }
06884
06885
06886 if (((Const *) operand)->constisnull)
06887 return false;
06888
06889
06890 return gincost_pattern(index, indexcol, clause_op,
06891 ((Const *) operand)->constvalue,
06892 counts);
06893 }
06894
06895
06896
06897
06898
06899
06900
06901
06902
06903
06904
06905
06906
06907 static bool
06908 gincost_scalararrayopexpr(IndexOptInfo *index, ScalarArrayOpExpr *clause,
06909 double numIndexEntries,
06910 GinQualCounts *counts)
06911 {
06912 Node *leftop = (Node *) linitial(clause->args);
06913 Node *rightop = (Node *) lsecond(clause->args);
06914 Oid clause_op = clause->opno;
06915 int indexcol;
06916 ArrayType *arrayval;
06917 int16 elmlen;
06918 bool elmbyval;
06919 char elmalign;
06920 int numElems;
06921 Datum *elemValues;
06922 bool *elemNulls;
06923 GinQualCounts arraycounts;
06924 int numPossible = 0;
06925 int i;
06926
06927 Assert(clause->useOr);
06928
06929
06930 if ((indexcol = find_index_column(leftop, index)) < 0)
06931 elog(ERROR, "could not match index to operand");
06932
06933 if (IsA(rightop, RelabelType))
06934 rightop = (Node *) ((RelabelType *) rightop)->arg;
06935
06936
06937
06938
06939
06940
06941
06942 if (!IsA(rightop, Const))
06943 {
06944 counts->exactEntries++;
06945 counts->searchEntries++;
06946 counts->arrayScans *= estimate_array_length(rightop);
06947 return true;
06948 }
06949
06950
06951 if (((Const *) rightop)->constisnull)
06952 return false;
06953
06954
06955 arrayval = DatumGetArrayTypeP(((Const *) rightop)->constvalue);
06956 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
06957 &elmlen, &elmbyval, &elmalign);
06958 deconstruct_array(arrayval,
06959 ARR_ELEMTYPE(arrayval),
06960 elmlen, elmbyval, elmalign,
06961 &elemValues, &elemNulls, &numElems);
06962
06963 memset(&arraycounts, 0, sizeof(arraycounts));
06964
06965 for (i = 0; i < numElems; i++)
06966 {
06967 GinQualCounts elemcounts;
06968
06969
06970 if (elemNulls[i])
06971 continue;
06972
06973
06974 memset(&elemcounts, 0, sizeof(elemcounts));
06975
06976 if (gincost_pattern(index, indexcol, clause_op, elemValues[i],
06977 &elemcounts))
06978 {
06979
06980 numPossible++;
06981
06982 if (elemcounts.haveFullScan)
06983 {
06984
06985
06986
06987
06988
06989 elemcounts.partialEntries = 0;
06990 elemcounts.exactEntries = numIndexEntries;
06991 elemcounts.searchEntries = numIndexEntries;
06992 }
06993 arraycounts.partialEntries += elemcounts.partialEntries;
06994 arraycounts.exactEntries += elemcounts.exactEntries;
06995 arraycounts.searchEntries += elemcounts.searchEntries;
06996 }
06997 }
06998
06999 if (numPossible == 0)
07000 {
07001
07002 return false;
07003 }
07004
07005
07006
07007
07008
07009
07010 counts->partialEntries += arraycounts.partialEntries / numPossible;
07011 counts->exactEntries += arraycounts.exactEntries / numPossible;
07012 counts->searchEntries += arraycounts.searchEntries / numPossible;
07013
07014 counts->arrayScans *= numPossible;
07015
07016 return true;
07017 }
07018
07019
07020
07021
07022 Datum
07023 gincostestimate(PG_FUNCTION_ARGS)
07024 {
07025 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
07026 IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
07027 double loop_count = PG_GETARG_FLOAT8(2);
07028 Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
07029 Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
07030 Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
07031 double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
07032 IndexOptInfo *index = path->indexinfo;
07033 List *indexQuals = path->indexquals;
07034 List *indexOrderBys = path->indexorderbys;
07035 ListCell *l;
07036 List *selectivityQuals;
07037 double numPages = index->pages,
07038 numTuples = index->tuples;
07039 double numEntryPages,
07040 numDataPages,
07041 numPendingPages,
07042 numEntries;
07043 GinQualCounts counts;
07044 bool matchPossible;
07045 double entryPagesFetched,
07046 dataPagesFetched,
07047 dataPagesFetchedBySel;
07048 double qual_op_cost,
07049 qual_arg_cost,
07050 spc_random_page_cost,
07051 outer_scans;
07052 QualCost index_qual_cost;
07053 Relation indexRel;
07054 GinStatsData ginStats;
07055
07056
07057
07058
07059 indexRel = index_open(index->indexoid, AccessShareLock);
07060 ginGetStats(indexRel, &ginStats);
07061 index_close(indexRel, AccessShareLock);
07062
07063 numEntryPages = ginStats.nEntryPages;
07064 numDataPages = ginStats.nDataPages;
07065 numPendingPages = ginStats.nPendingPages;
07066 numEntries = ginStats.nEntries;
07067
07068
07069
07070
07071
07072
07073
07074 if (ginStats.nTotalPages == 0 || ginStats.nEntryPages == 0)
07075 {
07076 numEntryPages = numPages;
07077 numDataPages = 0;
07078 numEntries = numTuples;
07079 }
07080 else
07081 {
07082 double scale = numPages / ginStats.nTotalPages;
07083
07084 numEntryPages = ceil(numEntryPages * scale);
07085 numDataPages = ceil(numDataPages * scale);
07086 numEntries = ceil(numEntries * scale);
07087
07088 numEntryPages = Min(numEntryPages, numPages);
07089 numDataPages = Min(numDataPages, numPages - numEntryPages);
07090 }
07091
07092
07093 if (numEntries < 1)
07094 numEntries = 1;
07095
07096
07097
07098
07099
07100 if (index->indpred != NIL)
07101 {
07102 List *predExtraQuals = NIL;
07103
07104 foreach(l, index->indpred)
07105 {
07106 Node *predQual = (Node *) lfirst(l);
07107 List *oneQual = list_make1(predQual);
07108
07109 if (!predicate_implied_by(oneQual, indexQuals))
07110 predExtraQuals = list_concat(predExtraQuals, oneQual);
07111 }
07112
07113 selectivityQuals = list_concat(predExtraQuals, indexQuals);
07114 }
07115 else
07116 selectivityQuals = indexQuals;
07117
07118
07119 *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
07120 index->rel->relid,
07121 JOIN_INNER,
07122 NULL);
07123
07124
07125 get_tablespace_page_costs(index->reltablespace,
07126 &spc_random_page_cost,
07127 NULL);
07128
07129
07130
07131
07132 *indexCorrelation = 0.0;
07133
07134
07135
07136
07137 memset(&counts, 0, sizeof(counts));
07138 counts.arrayScans = 1;
07139 matchPossible = true;
07140
07141 foreach(l, indexQuals)
07142 {
07143 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
07144 Expr *clause;
07145
07146 Assert(IsA(rinfo, RestrictInfo));
07147 clause = rinfo->clause;
07148 if (IsA(clause, OpExpr))
07149 {
07150 matchPossible = gincost_opexpr(index,
07151 (OpExpr *) clause,
07152 &counts);
07153 if (!matchPossible)
07154 break;
07155 }
07156 else if (IsA(clause, ScalarArrayOpExpr))
07157 {
07158 matchPossible = gincost_scalararrayopexpr(index,
07159 (ScalarArrayOpExpr *) clause,
07160 numEntries,
07161 &counts);
07162 if (!matchPossible)
07163 break;
07164 }
07165 else
07166 {
07167
07168 elog(ERROR, "unsupported GIN indexqual type: %d",
07169 (int) nodeTag(clause));
07170 }
07171 }
07172
07173
07174 if (!matchPossible)
07175 {
07176 *indexStartupCost = 0;
07177 *indexTotalCost = 0;
07178 *indexSelectivity = 0;
07179 PG_RETURN_VOID();
07180 }
07181
07182 if (counts.haveFullScan || indexQuals == NIL)
07183 {
07184
07185
07186
07187
07188 counts.partialEntries = 0;
07189 counts.exactEntries = numEntries;
07190 counts.searchEntries = numEntries;
07191 }
07192
07193
07194 outer_scans = loop_count;
07195
07196
07197
07198
07199
07200 entryPagesFetched = numPendingPages;
07201
07202
07203
07204
07205
07206
07207
07208
07209 entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15)));
07210
07211
07212
07213
07214
07215
07216 entryPagesFetched += ceil(numEntryPages * counts.partialEntries / numEntries);
07217
07218
07219
07220
07221
07222
07223 dataPagesFetched = ceil(numDataPages * counts.partialEntries / numEntries);
07224
07225
07226
07227
07228
07229
07230 if (outer_scans > 1 || counts.arrayScans > 1)
07231 {
07232 entryPagesFetched *= outer_scans * counts.arrayScans;
07233 entryPagesFetched = index_pages_fetched(entryPagesFetched,
07234 (BlockNumber) numEntryPages,
07235 numEntryPages, root);
07236 entryPagesFetched /= outer_scans;
07237 dataPagesFetched *= outer_scans * counts.arrayScans;
07238 dataPagesFetched = index_pages_fetched(dataPagesFetched,
07239 (BlockNumber) numDataPages,
07240 numDataPages, root);
07241 dataPagesFetched /= outer_scans;
07242 }
07243
07244
07245
07246
07247
07248 *indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
07249
07250
07251
07252
07253
07254
07255
07256 dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
07257
07258
07259
07260
07261
07262 dataPagesFetchedBySel = ceil(*indexSelectivity *
07263 (numTuples / (BLCKSZ / SizeOfIptrData)));
07264
07265 if (dataPagesFetchedBySel > dataPagesFetched)
07266 {
07267
07268
07269
07270
07271
07272
07273
07274 dataPagesFetched = dataPagesFetchedBySel;
07275 }
07276
07277
07278 if (outer_scans > 1 || counts.arrayScans > 1)
07279 {
07280 dataPagesFetched *= outer_scans * counts.arrayScans;
07281 dataPagesFetched = index_pages_fetched(dataPagesFetched,
07282 (BlockNumber) numDataPages,
07283 numDataPages, root);
07284 dataPagesFetched /= outer_scans;
07285 }
07286
07287
07288 *indexTotalCost = *indexStartupCost +
07289 dataPagesFetched * spc_random_page_cost;
07290
07291
07292
07293
07294 cost_qual_eval(&index_qual_cost, indexQuals, root);
07295 qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple;
07296 cost_qual_eval(&index_qual_cost, indexOrderBys, root);
07297 qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
07298 qual_op_cost = cpu_operator_cost *
07299 (list_length(indexQuals) + list_length(indexOrderBys));
07300 qual_arg_cost -= qual_op_cost;
07301 if (qual_arg_cost < 0)
07302 qual_arg_cost = 0;
07303
07304 *indexStartupCost += qual_arg_cost;
07305 *indexTotalCost += qual_arg_cost;
07306 *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
07307
07308 PG_RETURN_VOID();
07309 }