00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <config.h>
00023
00024 #include "queryoptimiser.h"
00025
00026 #include "andmaybepostlist.h"
00027 #include "andnotpostlist.h"
00028 #include "autoptr.h"
00029 #include "emptypostlist.h"
00030 #include "exactphrasepostlist.h"
00031 #include "multiandpostlist.h"
00032 #include "multimatch.h"
00033 #include "omassert.h"
00034 #include "omdebug.h"
00035 #include "omqueryinternal.h"
00036 #include "orpostlist.h"
00037 #include "phrasepostlist.h"
00038 #include "postlist.h"
00039 #include "valuegepostlist.h"
00040 #include "valuerangepostlist.h"
00041 #include "xorpostlist.h"
00042
00043 #include <algorithm>
00044 #include <list>
00045 #include <map>
00046 #include <string>
00047 #include <vector>
00048
00049 using namespace std;
00050
00051 PostList *
00052 QueryOptimiser::do_subquery(const Xapian::Query::Internal * query, double factor)
00053 {
00054 DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_subquery",
00055 query << ", " << factor);
00056
00057
00058 if (!query) RETURN(new EmptyPostList());
00059
00060 switch (query->op) {
00061 case Xapian::Query::Internal::OP_LEAF:
00062 RETURN(do_leaf(query, factor));
00063
00064 case Xapian::Query::OP_AND:
00065 case Xapian::Query::OP_FILTER:
00066 case Xapian::Query::OP_NEAR:
00067 case Xapian::Query::OP_PHRASE:
00068 RETURN(do_and_like(query, factor));
00069
00070 case Xapian::Query::OP_OR:
00071 case Xapian::Query::OP_XOR:
00072 case Xapian::Query::OP_ELITE_SET:
00073 RETURN(do_or_like(query, factor));
00074
00075 case Xapian::Query::OP_AND_NOT: {
00076 AssertEq(query->subqs.size(), 2);
00077 PostList * l = do_subquery(query->subqs[0], factor);
00078 PostList * r = do_subquery(query->subqs[1], 0.0);
00079 RETURN(new AndNotPostList(l, r, matcher, db_size));
00080 }
00081
00082 case Xapian::Query::OP_AND_MAYBE: {
00083 AssertEq(query->subqs.size(), 2);
00084 PostList * l = do_subquery(query->subqs[0], factor);
00085 PostList * r = do_subquery(query->subqs[1], factor);
00086 RETURN(new AndMaybePostList(l, r, matcher, db_size));
00087 }
00088
00089 case Xapian::Query::OP_VALUE_RANGE: {
00090 Xapian::valueno valno(query->parameter);
00091 const string & range_begin = query->tname;
00092 const string & range_end = query->str_parameter;
00093 RETURN(new ValueRangePostList(&db, valno, range_begin, range_end));
00094 }
00095
00096 case Xapian::Query::OP_VALUE_GE: {
00097 Xapian::valueno valno(query->parameter);
00098 const string & range_begin = query->tname;
00099 RETURN(new ValueGePostList(&db, valno, range_begin));
00100 }
00101
00102 case Xapian::Query::OP_VALUE_LE: {
00103 Xapian::valueno valno(query->parameter);
00104 const string & range_end = query->tname;
00105 RETURN(new ValueRangePostList(&db, valno, "", range_end));
00106 }
00107
00108 case Xapian::Query::OP_SCALE_WEIGHT: {
00109 AssertEq(query->subqs.size(), 1);
00110 double sub_factor = factor;
00111 if (sub_factor != 0.0) sub_factor *= query->get_dbl_parameter();
00112 RETURN(do_subquery(query->subqs[0], sub_factor));
00113 }
00114
00115 default:
00116 Assert(false);
00117 RETURN(NULL);
00118 }
00119 }
00120
00121 struct PosFilter {
00122 PosFilter(Xapian::Query::Internal::op_t op_, size_t begin_, size_t end_,
00123 Xapian::termcount window_)
00124 : op(op_), begin(begin_), end(end_), window(window_) { }
00125
00126 Xapian::Query::Internal::op_t op;
00127
00129 size_t begin, end;
00130
00131 Xapian::termcount window;
00132 };
00133
00134 PostList *
00135 QueryOptimiser::do_and_like(const Xapian::Query::Internal *query, double factor)
00136 {
00137 DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_and_like",
00138 query << ", " << factor);
00139
00140 list<PosFilter> pos_filters;
00141 vector<PostList *> plists;
00142 do_and_like(query, factor, plists, pos_filters);
00143 AssertRel(plists.size(), >=, 2);
00144
00145 PostList * pl;
00146 pl = new MultiAndPostList(plists.begin(), plists.end(), matcher, db_size);
00147
00148
00149
00150
00151
00152
00153 list<PosFilter>::iterator i;
00154 for (i = pos_filters.begin(); i != pos_filters.end(); ++i) {
00155 const PosFilter & filter = *i;
00156
00157
00158
00159 vector<PostList *> terms(plists.begin() + filter.begin,
00160 plists.begin() + filter.end);
00161
00162 Xapian::termcount window = filter.window;
00163 if (filter.op == Xapian::Query::OP_NEAR) {
00164 pl = new NearPostList(pl, window, terms);
00165 } else if (window == filter.end - filter.begin) {
00166 AssertEq(filter.op, Xapian::Query::OP_PHRASE);
00167 pl = new ExactPhrasePostList(pl, terms);
00168 } else {
00169 AssertEq(filter.op, Xapian::Query::OP_PHRASE);
00170 pl = new PhrasePostList(pl, window, terms);
00171 }
00172 }
00173
00174 RETURN(pl);
00175 }
00176
00177 inline bool is_and_like(Xapian::Query::Internal::op_t op) {
00178 return op == Xapian::Query::OP_AND || op == Xapian::Query::OP_FILTER ||
00179 op == Xapian::Query::OP_NEAR || op == Xapian::Query::OP_PHRASE;
00180 }
00181
00182 void
00183 QueryOptimiser::do_and_like(const Xapian::Query::Internal *query, double factor,
00184 vector<PostList *> & and_plists,
00185 list<PosFilter> & pos_filters)
00186 {
00187 DEBUGCALL(MATCH, void, "QueryOptimiser::do_and_like",
00188 query << ", " << factor << ", [and_plists], [pos_filters]");
00189
00190 Xapian::Query::Internal::op_t op = query->op;
00191 Assert(is_and_like(op));
00192
00193 bool positional = false;
00194 if (op == Xapian::Query::OP_PHRASE || op == Xapian::Query::OP_NEAR) {
00195
00196
00197
00198 if (!db.has_positions()) {
00199 op = Xapian::Query::OP_AND;
00200 } else {
00201 positional = true;
00202 }
00203 }
00204
00205 const Xapian::Query::Internal::subquery_list &queries = query->subqs;
00206 AssertRel(queries.size(), >=, 2);
00207
00208 for (size_t i = 0; i != queries.size(); ++i) {
00209
00210 if (i == 1 && op == Xapian::Query::OP_FILTER) factor = 0.0;
00211
00212 const Xapian::Query::Internal * subq = queries[i];
00213 if (is_and_like(subq->op)) {
00214 do_and_like(subq, factor, and_plists, pos_filters);
00215 } else {
00216 PostList * pl = do_subquery(subq, factor);
00217 and_plists.push_back(pl);
00218 }
00219 }
00220
00221 if (positional) {
00222
00223 size_t end = and_plists.size();
00224 size_t begin = end - queries.size();
00225 Xapian::termcount window = query->parameter;
00226
00227 pos_filters.push_back(PosFilter(op, begin, end, window));
00228 }
00229 }
00230
00236 struct CmpMaxOrTerms {
00245 bool operator()(const PostList *a, const PostList *b) {
00246 if (a->get_termfreq_max() == 0) return false;
00247 if (b->get_termfreq_max() == 0) return true;
00248
00249 #if defined(__i386__) || defined(__mc68000__)
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266 volatile double a_max_wt = a->get_maxweight();
00267 volatile double b_max_wt = b->get_maxweight();
00268 return a_max_wt > b_max_wt;
00269 #else
00270 return a->get_maxweight() > b->get_maxweight();
00271 #endif
00272 }
00273 };
00274
00275 template<class CLASS> struct delete_ptr {
00276 void operator()(CLASS *p) { delete p; }
00277 };
00278
00280 struct ComparePostListTermFreqAscending {
00282 bool operator()(const PostList *a, const PostList *b) {
00283 return a->get_termfreq_est() > b->get_termfreq_est();
00284 }
00285 };
00286
00287 PostList *
00288 QueryOptimiser::do_or_like(const Xapian::Query::Internal *query, double factor)
00289 {
00290 DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_or_like",
00291 query << ", " << factor);
00292
00293
00294
00295 Xapian::Query::Internal::op_t op = query->op;
00296 Assert(op == Xapian::Query::OP_ELITE_SET || op == Xapian::Query::OP_OR ||
00297 op == Xapian::Query::OP_XOR);
00298
00299 const Xapian::Query::Internal::subquery_list &queries = query->subqs;
00300 AssertRel(queries.size(), >=, 2);
00301
00302 vector<PostList *> postlists;
00303 postlists.reserve(queries.size());
00304
00305 Xapian::Query::Internal::subquery_list::const_iterator q;
00306 for (q = queries.begin(); q != queries.end(); ++q) {
00307 postlists.push_back(do_subquery(*q, factor));
00308 }
00309
00310 if (op == Xapian::Query::OP_ELITE_SET) {
00311
00312 Xapian::termcount elite_set_size = query->parameter;
00313 Assert(elite_set_size > 0);
00314
00315 if (postlists.size() > elite_set_size) {
00316
00317
00318 for_each(postlists.begin(), postlists.end(),
00319 mem_fun(&PostList::recalc_maxweight));
00320
00321 nth_element(postlists.begin(),
00322 postlists.begin() + elite_set_size - 1,
00323 postlists.end(), CmpMaxOrTerms());
00324
00325 for_each(postlists.begin() + elite_set_size, postlists.end(),
00326 delete_ptr<PostList>());
00327
00328 if (elite_set_size == 1) RETURN(postlists[0]);
00329
00330 postlists.resize(elite_set_size);
00331 }
00332 }
00333
00334
00335
00336 make_heap(postlists.begin(), postlists.end(),
00337 ComparePostListTermFreqAscending());
00338
00339
00340
00341
00342
00343
00344
00345 AssertRel(postlists.size(), >=, 2);
00346 while (true) {
00347
00348
00349
00350
00351
00352
00353 PostList * r = postlists.front();
00354 pop_heap(postlists.begin(), postlists.end(),
00355 ComparePostListTermFreqAscending());
00356 postlists.pop_back();
00357 PostList * l = postlists.front();
00358
00359 PostList * pl;
00360 if (op == Xapian::Query::OP_XOR) {
00361 pl = new XorPostList(l, r, matcher, db_size);
00362 } else {
00363 pl = new OrPostList(l, r, matcher, db_size);
00364 }
00365
00366 if (postlists.size() == 1) RETURN(pl);
00367
00368 pop_heap(postlists.begin(), postlists.end(),
00369 ComparePostListTermFreqAscending());
00370 postlists.back() = pl;
00371 push_heap(postlists.begin(), postlists.end(),
00372 ComparePostListTermFreqAscending());
00373 }
00374 }