00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <config.h>
00025
00026 #include "autoptr.h"
00027 #include "omdebug.h"
00028 #include "omqueryinternal.h"
00029 #include "utils.h"
00030 #include "serialise.h"
00031 #include "serialise-double.h"
00032
00033 #include <xapian/error.h>
00034 #include <xapian/termiterator.h>
00035 #include <xapian/version.h>
00036 #include "vectortermlist.h"
00037
00038 #include <vector>
00039 #include <set>
00040 #include <algorithm>
00041 #include <math.h>
00042 #include <limits.h>
00043 #include <cfloat>
00044
00045 using namespace std;
00046
00047
00048
00049 static unsigned int
00050 get_min_subqs(Xapian::Query::Internal::op_t op)
00051 {
00052 switch (op) {
00053 case Xapian::Query::Internal::OP_LEAF:
00054 case Xapian::Query::OP_AND:
00055 case Xapian::Query::OP_OR:
00056 case Xapian::Query::OP_XOR:
00057 case Xapian::Query::OP_NEAR:
00058 case Xapian::Query::OP_PHRASE:
00059 case Xapian::Query::OP_ELITE_SET:
00060 case Xapian::Query::OP_VALUE_RANGE:
00061 case Xapian::Query::OP_VALUE_GE:
00062 case Xapian::Query::OP_VALUE_LE:
00063 return 0;
00064 case Xapian::Query::OP_SCALE_WEIGHT:
00065 return 1;
00066 case Xapian::Query::OP_FILTER:
00067 case Xapian::Query::OP_AND_MAYBE:
00068 case Xapian::Query::OP_AND_NOT:
00069 return 2;
00070 default:
00071 Assert(false);
00072 throw Xapian::InvalidOperationError("get_min_subqs called with invalid operator type");
00073 }
00074 }
00075
00076 static unsigned int
00077 get_max_subqs(Xapian::Query::Internal::op_t op)
00078 {
00079 switch (op) {
00080 case Xapian::Query::Internal::OP_LEAF:
00081 case Xapian::Query::OP_VALUE_RANGE:
00082 case Xapian::Query::OP_VALUE_GE:
00083 case Xapian::Query::OP_VALUE_LE:
00084 return 0;
00085 case Xapian::Query::OP_SCALE_WEIGHT:
00086 return 1;
00087 case Xapian::Query::OP_FILTER:
00088 case Xapian::Query::OP_AND_MAYBE:
00089 case Xapian::Query::OP_AND_NOT:
00090 return 2;
00091 case Xapian::Query::OP_AND:
00092 case Xapian::Query::OP_OR:
00093 case Xapian::Query::OP_XOR:
00094 case Xapian::Query::OP_NEAR:
00095 case Xapian::Query::OP_PHRASE:
00096 case Xapian::Query::OP_ELITE_SET:
00097 return UINT_MAX;
00098 default:
00099 Assert(false);
00100 throw Xapian::InvalidOperationError("get_max_subqs called with invalid operator type");
00101 }
00102 }
00103
00104 static inline bool
00105 is_leaf(Xapian::Query::Internal::op_t op)
00106 {
00107 return (op == Xapian::Query::Internal::OP_LEAF);
00108 }
00109
00110
00111
00130 string
00131 Xapian::Query::Internal::serialise(Xapian::termpos & curpos) const
00132 {
00133 #ifdef XAPIAN_HAS_REMOTE_BACKEND
00134 string result;
00135
00136 if (op == Xapian::Query::Internal::OP_LEAF) {
00137 result += '[';
00138 result += encode_length(tname.length());
00139 result += tname;
00140 if (term_pos != curpos) result += '@' + om_tostring(term_pos);
00141 if (wqf != 1) result += '#' + om_tostring(wqf);
00142 ++curpos;
00143 } else {
00144 result += "(";
00145 for (subquery_list::const_iterator i = subqs.begin();
00146 i != subqs.end();
00147 ++i) {
00148 result += (*i)->serialise(curpos);
00149 }
00150 switch (op) {
00151 case Xapian::Query::Internal::OP_LEAF:
00152 Assert(false);
00153 break;
00154 case Xapian::Query::OP_AND:
00155 result += "&";
00156 break;
00157 case Xapian::Query::OP_OR:
00158 result += "|";
00159 break;
00160 case Xapian::Query::OP_FILTER:
00161 result += "%";
00162 break;
00163 case Xapian::Query::OP_AND_MAYBE:
00164 result += "+";
00165 break;
00166 case Xapian::Query::OP_AND_NOT:
00167 result += "-";
00168 break;
00169 case Xapian::Query::OP_XOR:
00170 result += "^";
00171 break;
00172 case Xapian::Query::OP_NEAR:
00173 result += "~" + om_tostring(parameter);
00174 break;
00175 case Xapian::Query::OP_PHRASE:
00176 result += "\"" + om_tostring(parameter);
00177 break;
00178 case Xapian::Query::OP_ELITE_SET:
00179 result += "*" + om_tostring(parameter);
00180 break;
00181 case Xapian::Query::OP_VALUE_RANGE:
00182 result += "]";
00183 result += encode_length(tname.length());
00184 result += tname;
00185 result += encode_length(str_parameter.length());
00186 result += str_parameter;
00187 result += om_tostring(parameter);
00188 break;
00189 case Xapian::Query::OP_VALUE_GE:
00190 result += "}";
00191 result += encode_length(tname.length());
00192 result += tname;
00193 result += om_tostring(parameter);
00194 break;
00195 case Xapian::Query::OP_VALUE_LE:
00196 result += "{";
00197 result += encode_length(tname.length());
00198 result += tname;
00199 result += om_tostring(parameter);
00200 break;
00201 case Xapian::Query::OP_SCALE_WEIGHT:
00202 result += ".";
00203 result += str_parameter;
00204 break;
00205 }
00206 }
00207 return result;
00208 #else
00209 (void)curpos;
00210 throw Xapian::InternalError("query serialisation not compiled in");
00211 #endif
00212 }
00213
00214 string
00215 Xapian::Query::Internal::get_op_name(Xapian::Query::Internal::op_t op)
00216 {
00217 string name;
00218 switch (op) {
00219 case Xapian::Query::Internal::OP_LEAF: name = "LEAF"; break;
00220 case Xapian::Query::OP_AND: name = "AND"; break;
00221 case Xapian::Query::OP_OR: name = "OR"; break;
00222 case Xapian::Query::OP_FILTER: name = "FILTER"; break;
00223 case Xapian::Query::OP_AND_MAYBE: name = "AND_MAYBE"; break;
00224 case Xapian::Query::OP_AND_NOT: name = "AND_NOT"; break;
00225 case Xapian::Query::OP_XOR: name = "XOR"; break;
00226 case Xapian::Query::OP_NEAR: name = "NEAR"; break;
00227 case Xapian::Query::OP_PHRASE: name = "PHRASE"; break;
00228 case Xapian::Query::OP_ELITE_SET: name = "ELITE_SET"; break;
00229 case Xapian::Query::OP_VALUE_RANGE: name = "VALUE_RANGE"; break;
00230 case Xapian::Query::OP_VALUE_GE: name = "VALUE_GE"; break;
00231 case Xapian::Query::OP_VALUE_LE: name = "VALUE_LE"; break;
00232 case Xapian::Query::OP_SCALE_WEIGHT: name = "SCALE_WEIGHT"; break;
00233 }
00234 return name;
00235 }
00236
00237 string
00238 Xapian::Query::Internal::get_description() const
00239 {
00240 string opstr;
00241
00242 if (is_leaf(op)) {
00243 if (term_pos != 0) {
00244 opstr += "pos=" + om_tostring(term_pos);
00245 }
00246 if (wqf != 1) {
00247 if (!opstr.empty()) opstr += ",";
00248 opstr += "wqf=" + om_tostring(wqf);
00249 }
00250 if (!opstr.empty()) opstr = ":(" + opstr + ")";
00251 if (tname.empty()) return "<alldocuments>" + opstr;
00252 return tname + opstr;
00253 }
00254
00255 switch (op) {
00256 case Xapian::Query::OP_VALUE_RANGE: {
00257 opstr = get_op_name(op);
00258 opstr += ' ';
00259 opstr += om_tostring(parameter);
00260 opstr += ' ';
00261 opstr += tname;
00262 opstr += ' ';
00263 opstr += str_parameter;
00264 return opstr;
00265 }
00266 case Xapian::Query::OP_VALUE_GE:
00267 case Xapian::Query::OP_VALUE_LE: {
00268 opstr = get_op_name(op);
00269 opstr += ' ';
00270 opstr += om_tostring(parameter);
00271 opstr += ' ';
00272 opstr += tname;
00273 return opstr;
00274 }
00275 case Xapian::Query::OP_SCALE_WEIGHT: {
00276 opstr += om_tostring(get_dbl_parameter());
00277 opstr += " * ";
00278 opstr += subqs[0]->get_description();
00279 return opstr;
00280 }
00281 }
00282
00283 opstr = " " + get_op_name(op) + " ";
00284 if (op == Xapian::Query::OP_NEAR ||
00285 op == Xapian::Query::OP_PHRASE ||
00286 op == Xapian::Query::OP_ELITE_SET)
00287 opstr += om_tostring(parameter) + " ";
00288
00289 string description;
00290 subquery_list::const_iterator i;
00291 for (i = subqs.begin(); i != subqs.end(); i++) {
00292 if (!description.empty()) description += opstr;
00293 description += (**i).get_description();
00294 }
00295
00296 return "(" + description + ")";
00297 }
00298
00299 Xapian::termcount
00300 Xapian::Query::Internal::get_length() const
00301 {
00302 if (is_leaf(op)) return wqf;
00303 Xapian::termcount len = 0;
00304 subquery_list::const_iterator i;
00305 for (i = subqs.begin(); i != subqs.end(); ++i) {
00306 len += (**i).get_length();
00307 }
00308 return len;
00309 }
00310
00312 void
00313 Xapian::Query::Internal::accumulate_terms(
00314 vector<pair<string, Xapian::termpos> > &terms) const
00315 {
00316 if (is_leaf(op)) {
00317
00318 terms.push_back(make_pair(tname, term_pos));
00319 } else {
00320 subquery_list::const_iterator end = subqs.end();
00321
00322 for (subquery_list::const_iterator i = subqs.begin(); i != end; ++i) {
00323 (*i)->accumulate_terms(terms);
00324 }
00325 }
00326 }
00327
00328 struct LessByTermpos {
00329 typedef const pair<string, Xapian::termpos> argtype;
00330 bool operator()(argtype &left, argtype &right) {
00331 if (left.second != right.second) {
00332 return left.second < right.second;
00333 } else {
00334 return left.first < right.first;
00335 }
00336 }
00337 };
00338
00339 Xapian::TermIterator
00340 Xapian::Query::Internal::get_terms() const
00341 {
00342 vector<pair<string, Xapian::termpos> > terms;
00343 accumulate_terms(terms);
00344
00345 sort(terms.begin(), terms.end(), LessByTermpos());
00346
00347
00348
00349 vector<pair<string, Xapian::termpos> >::iterator newlast =
00350 unique(terms.begin(), terms.end());
00351
00352 terms.erase(newlast, terms.end());
00353
00354 vector<string> result;
00355 vector<pair<string, Xapian::termpos> >::const_iterator i;
00356 for (i = terms.begin(); i != terms.end(); ++i) {
00357 result.push_back(i->first);
00358 }
00359
00360 return Xapian::TermIterator(new VectorTermList(result.begin(),
00361 result.end()));
00362 }
00363
00364 #ifdef XAPIAN_HAS_REMOTE_BACKEND
00365
00366
00367 class QUnserial {
00368 private:
00369 const char *p;
00370 const char *end;
00371 Xapian::termpos curpos;
00372
00373 Xapian::Query::Internal * readquery();
00374 Xapian::Query::Internal * readcompound();
00375
00376 public:
00377 QUnserial(const string & s) : p(s.c_str()), end(p + s.size()), curpos(1) { }
00378 Xapian::Query::Internal * decode();
00379 };
00380
00381 Xapian::Query::Internal *
00382 QUnserial::decode() {
00383 DEBUGLINE(UNKNOWN, "QUnserial::decode(" << p << ")");
00384 AutoPtr<Xapian::Query::Internal> qint(readquery());
00385 if (p != end)
00386 throw Xapian::InvalidArgumentError("Bad serialised query");
00387 return qint.release();
00388 }
00389
00390 Xapian::Query::Internal *
00391 QUnserial::readquery() {
00392 if (p == end)
00393 throw Xapian::InvalidArgumentError("Bad serialised query");
00394 switch (*p++) {
00395 case '[': {
00396 size_t length = decode_length(&p, end, true);
00397 string tname(p, length);
00398 p += length;
00399 Xapian::termpos term_pos = curpos;
00400 Xapian::termcount wqf = 1;
00401 if (p != end) {
00402 if (*p == '@') {
00403 char *tmp;
00404 term_pos = strtol(p + 1, &tmp, 10);
00405 p = tmp;
00406 }
00407 if (*p == '#') {
00408 char *tmp;
00409 wqf = strtol(p + 1, &tmp, 10);
00410 p = tmp;
00411 }
00412 }
00413 ++curpos;
00414 return new Xapian::Query::Internal(tname, wqf, term_pos);
00415 }
00416 case '(':
00417 return readcompound();
00418 default:
00419 DEBUGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'");
00420 throw Xapian::InvalidArgumentError("Invalid query string");
00421 }
00422 }
00423
00424 static Xapian::Query::Internal *
00425 qint_from_vector(Xapian::Query::op op,
00426 const vector<Xapian::Query::Internal *> & vec,
00427 Xapian::termcount parameter = 0)
00428 {
00429 Xapian::Query::Internal * qint = new Xapian::Query::Internal(op, parameter);
00430 vector<Xapian::Query::Internal *>::const_iterator i;
00431 for (i = vec.begin(); i != vec.end(); i++) {
00432 qint->add_subquery(*i);
00433 delete *i;
00434 }
00435 qint->end_construction();
00436 return qint;
00437 }
00438
00439 static Xapian::Query::Internal *
00440 qint_from_vector(Xapian::Query::op op,
00441 const vector<Xapian::Query::Internal *> & vec,
00442 Xapian::termcount parameter,
00443 double dbl_parameter)
00444 {
00445 Xapian::Query::Internal * qint = new Xapian::Query::Internal(op, parameter);
00446 qint->set_dbl_parameter(dbl_parameter);
00447 vector<Xapian::Query::Internal *>::const_iterator i;
00448 for (i = vec.begin(); i != vec.end(); i++) {
00449 qint->add_subquery(*i);
00450 delete *i;
00451 }
00452 qint->end_construction();
00453 return qint;
00454 }
00455
00456 Xapian::Query::Internal *
00457 QUnserial::readcompound() {
00458 vector<Xapian::Query::Internal *> subqs;
00459 try {
00460 while (true) {
00461 if (p == end)
00462 throw Xapian::InvalidArgumentError("Bad serialised query");
00463 switch (*p++) {
00464 case '[':
00465 --p;
00466 subqs.push_back(readquery());
00467 break;
00468 case '(': {
00469 subqs.push_back(readcompound());
00470 break;
00471 }
00472 case '&':
00473 return qint_from_vector(Xapian::Query::OP_AND, subqs);
00474 case '|':
00475 return qint_from_vector(Xapian::Query::OP_OR, subqs);
00476 case '%':
00477 return qint_from_vector(Xapian::Query::OP_FILTER, subqs);
00478 case '^':
00479 return qint_from_vector(Xapian::Query::OP_XOR, subqs);
00480 case '+':
00481 return qint_from_vector(Xapian::Query::OP_AND_MAYBE, subqs);
00482 case '-':
00483 return qint_from_vector(Xapian::Query::OP_AND_NOT, subqs);
00484 case '~': {
00485 char *tmp;
00486 Xapian::termcount window(strtol(p, &tmp, 10));
00487 p = tmp;
00488 return qint_from_vector(Xapian::Query::OP_NEAR, subqs, window);
00489 }
00490 case '"': {
00491 char *tmp;
00492 Xapian::termcount window(strtol(p, &tmp, 10));
00493 p = tmp;
00494 return qint_from_vector(Xapian::Query::OP_PHRASE, subqs, window);
00495 }
00496 case '*': {
00497 char *tmp;
00498 Xapian::termcount elite_set_size(strtol(p, &tmp, 10));
00499 p = tmp;
00500 return qint_from_vector(Xapian::Query::OP_ELITE_SET, subqs,
00501 elite_set_size);
00502 }
00503 case ']': {
00504 size_t len = decode_length(&p, end, true);
00505 string start(p, len);
00506 p += len;
00507 len = decode_length(&p, end, true);
00508 string stop(p, len);
00509 p += len;
00510 char *tmp;
00511 Xapian::valueno valno = strtoul(p, &tmp, 10);
00512 p = tmp;
00513 return new Xapian::Query::Internal(Xapian::Query::OP_VALUE_RANGE, valno,
00514 start, stop);
00515 }
00516 case '}': {
00517 size_t len = decode_length(&p, end, true);
00518 string start(p, len);
00519 p += len;
00520 char *tmp;
00521 Xapian::valueno valno = strtoul(p, &tmp, 10);
00522 p = tmp;
00523 return new Xapian::Query::Internal(Xapian::Query::OP_VALUE_GE, valno,
00524 start);
00525 }
00526 case '{': {
00527 size_t len = decode_length(&p, end, true);
00528 string start(p, len);
00529 p += len;
00530 char *tmp;
00531 Xapian::valueno valno = strtoul(p, &tmp, 10);
00532 p = tmp;
00533 return new Xapian::Query::Internal(Xapian::Query::OP_VALUE_LE, valno,
00534 start);
00535 }
00536 case '.': {
00537 double param = unserialise_double(&p, end);
00538 return qint_from_vector(Xapian::Query::OP_SCALE_WEIGHT,
00539 subqs, 0, param);
00540 }
00541 default:
00542 DEBUGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'");
00543 throw Xapian::InvalidArgumentError("Invalid query string");
00544 }
00545 }
00546 } catch (...) {
00547 vector<Xapian::Query::Internal *>::iterator i;
00548 for (i = subqs.begin(); i != subqs.end(); i++)
00549 delete *i;
00550 throw;
00551 }
00552 }
00553
00554 Xapian::Query::Internal *
00555 Xapian::Query::Internal::unserialise(const string &s)
00556 {
00557 Assert(s.length() > 1);
00558 QUnserial u(s);
00559 Xapian::Query::Internal * qint = u.decode();
00560 AssertEq(s, qint->serialise());
00561 return qint;
00562 }
00563 #else
00564 Xapian::Query::Internal *
00565 Xapian::Query::Internal::unserialise(const string &)
00566 {
00567 throw Xapian::InternalError("query serialisation not compiled in");
00568 }
00569 #endif
00570
00578 void
00579 Xapian::Query::Internal::swap(Xapian::Query::Internal &other)
00580 {
00581 std::swap(op, other.op);
00582 subqs.swap(other.subqs);
00583 std::swap(parameter, other.parameter);
00584 std::swap(tname, other.tname);
00585 std::swap(str_parameter, other.str_parameter);
00586 std::swap(term_pos, other.term_pos);
00587 std::swap(wqf, other.wqf);
00588 }
00589
00590 Xapian::Query::Internal::Internal(const Xapian::Query::Internal ©me)
00591 : Xapian::Internal::RefCntBase(),
00592 op(copyme.op),
00593 subqs(),
00594 parameter(copyme.parameter),
00595 tname(copyme.tname),
00596 str_parameter(copyme.str_parameter),
00597 term_pos(copyme.term_pos),
00598 wqf(copyme.wqf)
00599 {
00600 for (subquery_list::const_iterator i = copyme.subqs.begin();
00601 i != copyme.subqs.end();
00602 ++i) {
00603 subqs.push_back(new Xapian::Query::Internal(**i));
00604 }
00605 }
00606
00608
00609
00610 Xapian::Query::Internal::Internal(const string & tname_, Xapian::termcount wqf_,
00611 Xapian::termpos term_pos_)
00612 : op(Xapian::Query::Internal::OP_LEAF),
00613 subqs(),
00614 parameter(0),
00615 tname(tname_),
00616 term_pos(term_pos_),
00617 wqf(wqf_)
00618 {
00619 validate_query();
00620 }
00621
00622 Xapian::Query::Internal::Internal(op_t op_, Xapian::termcount parameter_)
00623 : op(op_),
00624 subqs(),
00625 parameter(parameter_),
00626 tname(),
00627 term_pos(0),
00628 wqf(0)
00629 {
00630 if (parameter != 0 && op != OP_PHRASE && op != OP_NEAR && op != OP_ELITE_SET)
00631 throw Xapian::InvalidArgumentError("parameter is only meaningful for OP_NEAR, OP_PHRASE, or OP_ELITE_SET");
00632 }
00633
00634 Xapian::Query::Internal::Internal(op_t op_, Xapian::valueno valno,
00635 const string &begin, const string &end)
00636 : op(op_),
00637 parameter(Xapian::termcount(valno)),
00638 tname(begin),
00639 str_parameter(end)
00640 {
00641 if (op != OP_VALUE_RANGE)
00642 throw Xapian::InvalidArgumentError("This constructor is only meaningful for OP_VALUE_RANGE");
00643 validate_query();
00644 }
00645
00646 Xapian::Query::Internal::Internal(op_t op_, Xapian::valueno valno,
00647 const std::string &value)
00648 : op(op_),
00649 parameter(Xapian::termcount(valno)),
00650 tname(value)
00651 {
00652 if (op != OP_VALUE_GE && op != OP_VALUE_LE)
00653 throw Xapian::InvalidArgumentError("This constructor is only meaningful for OP_VALUE_GE or OP_VALUE_LE");
00654 if (op == OP_VALUE_GE && value.empty()) {
00655
00656 op = OP_LEAF;
00657 parameter = 0;
00658 term_pos = 0;
00659 wqf = 1;
00660 }
00661 validate_query();
00662 }
00663
00664 Xapian::Query::Internal::~Internal()
00665 {
00666 subquery_list::iterator i;
00667 for (i = subqs.begin(); i != subqs.end(); i++) {
00668 delete *i;
00669 }
00670 }
00671
00672 Xapian::Query::Internal *
00673 Xapian::Query::Internal::end_construction()
00674 {
00675 DEBUGCALL(API, void, "Xapian::Query::Internal::end_construction", "");
00676 validate_query();
00677 Xapian::Query::Internal * qint = simplify_query();
00678 if (qint) qint->validate_query();
00679 return qint;
00680 }
00681
00682 void
00683 Xapian::Query::Internal::validate_query() const
00684 {
00685 DEBUGCALL(API, void, "Xapian::Query::Internal::validate_query", "");
00686
00687
00688 if (subqs.size() < get_min_subqs(op) ||
00689 subqs.size() > get_max_subqs(op)) {
00690 throw Xapian::InvalidArgumentError("Xapian::Query: " + get_op_name(op) +
00691 " requires a minimum of " + om_tostring(get_min_subqs(op)) +
00692 " and a maximum of " + om_tostring(get_max_subqs(op)) +
00693 " sub queries, had " +
00694 om_tostring(subqs.size()) + ".");
00695 }
00696
00697 if (op == OP_SCALE_WEIGHT && get_dbl_parameter() < 0) {
00698 throw Xapian::InvalidArgumentError("Xapian::Query: " + get_op_name(op) + " requires a non-negative parameter.");
00699 }
00700
00701
00702
00703 Assert(is_leaf(op) ||
00704 op == OP_VALUE_RANGE ||
00705 op == OP_VALUE_GE ||
00706 op == OP_VALUE_LE ||
00707 tname.empty());
00708 }
00709
00710 bool
00711 Xapian::Query::Internal::simplify_matchnothing()
00712 {
00713 subquery_list::iterator sq;
00714 switch (op) {
00715 case OP_PHRASE:
00716 case OP_NEAR:
00717 case OP_AND:
00718 case OP_FILTER:
00719
00720
00721 for (sq = subqs.begin(); sq != subqs.end(); sq++) {
00722 if (*sq == 0) {
00723 for (sq = subqs.begin(); sq != subqs.end(); sq++)
00724 delete *sq;
00725 subqs.clear();
00726 return true;
00727 }
00728 }
00729 break;
00730 case OP_ELITE_SET:
00731 case OP_OR:
00732 case OP_XOR:
00733
00734
00735
00736
00737 sq = subqs.begin();
00738 while (sq != subqs.end() && subqs.size() > 1) {
00739 if (*sq == 0) {
00740 sq = subqs.erase(sq);
00741 } else {
00742 ++sq;
00743 }
00744 }
00745 break;
00746 case OP_AND_MAYBE:
00747 case OP_AND_NOT:
00748
00749
00750
00751
00752
00753
00754 Assert(subqs.size() == 2);
00755 if (subqs[0] == 0 || subqs[1] == 0) {
00756 sq = subqs.begin();
00757 ++sq;
00758 delete *sq;
00759 subqs.erase(sq);
00760 }
00761 break;
00762 case OP_SCALE_WEIGHT:
00763 Assert(subqs.size() == 1);
00764
00765
00766 Assert(subqs[0]);
00767 break;
00768 case OP_LEAF:
00769
00770 break;
00771 }
00772 return false;
00773 }
00774
00775 Xapian::Query::Internal *
00776 Xapian::Query::Internal::simplify_query()
00777 {
00778 DEBUGCALL(API, bool, "Xapian::Query::Internal::simplify_query", "");
00779
00780
00781 if (simplify_matchnothing()) {
00782 return 0;
00783 }
00784
00785
00786 switch (op) {
00787 case OP_LEAF:
00788 return this;
00789 case OP_VALUE_RANGE:
00790
00791
00792 if (tname > str_parameter) return 0;
00793 return this;
00794 case OP_VALUE_GE:
00795 case OP_VALUE_LE:
00796 return this;
00797 case OP_SCALE_WEIGHT:
00798 if (fabs(get_dbl_parameter() - 1.0) > DBL_EPSILON) return this;
00799
00800
00801 break;
00802 case OP_PHRASE: case OP_NEAR:
00803
00804 if (!parameter) parameter = subqs.size();
00805
00806 flatten_subqs();
00807 break;
00808 case OP_ELITE_SET:
00809 if (!parameter) {
00810
00811
00812 if (subqs.size() <= 100) {
00813 parameter = 10;
00814 } else {
00815 parameter = Xapian::termcount(ceil(sqrt(double(subqs.size()))));
00816 Assert(parameter > 10);
00817 }
00818 }
00819 break;
00820 case OP_OR: case OP_AND: case OP_XOR:
00821
00822 if (subqs.size() > 1) collapse_subqs();
00823 break;
00824 default:
00825 break;
00826 }
00827
00828
00829 if (subqs.empty())
00830 return 0;
00831
00832
00833
00834 if (subqs.size() == 1) {
00835 Xapian::Query::Internal * qint = subqs[0];
00836 subqs[0] = 0;
00837 return qint;
00838 }
00839
00840 return this;
00841 }
00842
00843 struct SortPosName {
00844 bool operator()(const Xapian::Query::Internal * left,
00845 const Xapian::Query::Internal * right) const {
00846 Assert(left != 0);
00847 Assert(right != 0);
00848 Assert(is_leaf(left->op));
00849 Assert(is_leaf(right->op));
00850 if (left->term_pos != right->term_pos) {
00851 return left->term_pos < right->term_pos;
00852 } else {
00853 return left->tname < right->tname;
00854 }
00855 }
00856 };
00857
00861 void
00862 Xapian::Query::Internal::collapse_subqs()
00863 {
00864 Assert(op == OP_OR || op == OP_AND || op == OP_XOR);
00865 typedef set<Xapian::Query::Internal *, SortPosName> subqtable;
00866 subqtable sqtab;
00867
00868 subquery_list::iterator sq = subqs.begin();
00869 while (sq != subqs.end()) {
00870 Assert(*sq != 0);
00871 if (is_leaf((*sq)->op)) {
00872 Assert((*sq)->subqs.empty());
00873 subqtable::iterator s = sqtab.find(*sq);
00874 if (s == sqtab.end()) {
00875 sqtab.insert(*sq);
00876 ++sq;
00877 } else {
00878 AssertEq((*s)->tname, (*sq)->tname);
00879 AssertEq((*s)->term_pos, (*sq)->term_pos);
00880 (*s)->wqf += (*sq)->wqf;
00881
00882
00883
00884 delete *sq;
00885 sq = subqs.erase(sq);
00886 }
00887 } else {
00888 ++sq;
00889 }
00890 }
00891 }
00892
00894 void
00895 Xapian::Query::Internal::flatten_subqs()
00896 {
00897 if (op != Xapian::Query::OP_NEAR && op != Xapian::Query::OP_PHRASE) {
00898 throw Xapian::UnimplementedError("NEAR or PHRASE with non-term subqueries isn't well supported currently");
00899 }
00900
00901 subquery_list::iterator sq;
00902 for (sq = subqs.begin(); sq != subqs.end(); sq++) {
00903 if (!is_leaf((*sq)->op)) break;
00904 }
00905
00906 if (sq != subqs.end()) {
00907 if ((*sq)->op == Xapian::Query::OP_NEAR ||
00908 (*sq)->op == Xapian::Query::OP_PHRASE) {
00909
00910 throw Xapian::UnimplementedError("Can't use NEAR/PHRASE with a subexpression containing NEAR or PHRASE");
00911 }
00912
00913 AutoPtr<Xapian::Query::Internal> flattenme(*sq);
00914 *sq = 0;
00915
00916
00917 Xapian::Query::Internal newq(flattenme->op, 0);
00918
00919 subquery_list::iterator j;
00920 for (j = flattenme->subqs.begin(); j != flattenme->subqs.end(); ++j) {
00921 *sq = *j;
00922 *j = 0;
00923 flatten_subqs();
00924 newq.add_subquery(this);
00925 delete *sq;
00926 *sq = 0;
00927 }
00928
00929 Xapian::Query::Internal * newq2 = newq.end_construction();
00930 Assert(newq2);
00931 this->swap(*newq2);
00932 }
00933 }
00934
00935 void
00936 Xapian::Query::Internal::add_subquery(const Xapian::Query::Internal * subq)
00937 {
00938 Assert(!is_leaf(op));
00939 if (subq == 0) {
00940 subqs.push_back(0);
00941 } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR)) {
00942
00943 for (subquery_list::const_iterator i = subq->subqs.begin();
00944 i != subq->subqs.end(); i++) {
00945 add_subquery(*i);
00946 }
00947 } else {
00948 subqs.push_back(new Xapian::Query::Internal(*subq));
00949 }
00950 }
00951
00952 void
00953 Xapian::Query::Internal::set_dbl_parameter(double dbl_parameter_)
00954 {
00955
00956
00957
00958
00959 str_parameter = serialise_double(dbl_parameter_);
00960 }
00961
00962 double
00963 Xapian::Query::Internal::get_dbl_parameter() const
00964 {
00965
00966
00967
00968
00969 const char * p = str_parameter.data();
00970 const char * end = p + str_parameter.size();
00971 return unserialise_double(&p, end);
00972 }