api/omqueryinternal.cc

Go to the documentation of this file.
00001 /* omqueryinternal.cc: Internals of query interface
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002 Ananova Ltd
00005  * Copyright 2002,2003,2004,2005,2006,2007,2008 Olly Betts
00006  * Copyright 2006,2007,2008 Lemur Consulting Ltd
00007  *
00008  * This program is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU General Public License as
00010  * published by the Free Software Foundation; either version 2 of the
00011  * License, or (at your option) any later version.
00012  *
00013  * This program is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00021  * USA
00022  */
00023 
00024 #include <config.h>
00025 
00026 #include "autoptr.h"
00027 #include "omdebug.h"
00028 #include "omqueryinternal.h"
00029 #include "utils.h"
00030 #include "serialise.h"
00031 #include "serialise-double.h"
00032 
00033 #include <xapian/error.h>
00034 #include <xapian/termiterator.h>
00035 #include <xapian/version.h>
00036 #include "vectortermlist.h"
00037 
00038 #include <vector>
00039 #include <set>
00040 #include <algorithm>
00041 #include <math.h>
00042 #include <limits.h>
00043 #include <cfloat>
00044 
00045 using namespace std;
00046 
00047 // Properties for query operations.
00048 
00049 static unsigned int
00050 get_min_subqs(Xapian::Query::Internal::op_t op)
00051 {
00052     switch (op) {
00053         case Xapian::Query::Internal::OP_LEAF:
00054         case Xapian::Query::OP_AND:
00055         case Xapian::Query::OP_OR:
00056         case Xapian::Query::OP_XOR:
00057         case Xapian::Query::OP_NEAR:
00058         case Xapian::Query::OP_PHRASE:
00059         case Xapian::Query::OP_ELITE_SET:
00060         case Xapian::Query::OP_VALUE_RANGE:
00061         case Xapian::Query::OP_VALUE_GE:
00062         case Xapian::Query::OP_VALUE_LE:
00063             return 0;
00064         case Xapian::Query::OP_SCALE_WEIGHT:
00065             return 1;
00066         case Xapian::Query::OP_FILTER:
00067         case Xapian::Query::OP_AND_MAYBE:
00068         case Xapian::Query::OP_AND_NOT:
00069             return 2;
00070         default:
00071             Assert(false);
00072             throw Xapian::InvalidOperationError("get_min_subqs called with invalid operator type");
00073     }
00074 }
00075 
00076 static unsigned int
00077 get_max_subqs(Xapian::Query::Internal::op_t op)
00078 {
00079     switch (op) {
00080         case Xapian::Query::Internal::OP_LEAF:
00081         case Xapian::Query::OP_VALUE_RANGE:
00082         case Xapian::Query::OP_VALUE_GE:
00083         case Xapian::Query::OP_VALUE_LE:
00084             return 0;
00085         case Xapian::Query::OP_SCALE_WEIGHT:
00086             return 1;
00087         case Xapian::Query::OP_FILTER:
00088         case Xapian::Query::OP_AND_MAYBE:
00089         case Xapian::Query::OP_AND_NOT:
00090             return 2;
00091         case Xapian::Query::OP_AND:
00092         case Xapian::Query::OP_OR:
00093         case Xapian::Query::OP_XOR:
00094         case Xapian::Query::OP_NEAR:
00095         case Xapian::Query::OP_PHRASE:
00096         case Xapian::Query::OP_ELITE_SET:
00097             return UINT_MAX;
00098         default:
00099             Assert(false);
00100             throw Xapian::InvalidOperationError("get_max_subqs called with invalid operator type");
00101     }
00102 }
00103 
00104 static inline bool
00105 is_leaf(Xapian::Query::Internal::op_t op)
00106 {
00107     return (op == Xapian::Query::Internal::OP_LEAF);
00108 }
00109 
00110 // Methods for Xapian::Query::Internal
00111 
00130 string
00131 Xapian::Query::Internal::serialise(Xapian::termpos & curpos) const
00132 {
00133 #ifdef XAPIAN_HAS_REMOTE_BACKEND
00134     string result;
00135 
00136     if (op == Xapian::Query::Internal::OP_LEAF) {
00137         result += '[';
00138         result += encode_length(tname.length());
00139         result += tname;
00140         if (term_pos != curpos) result += '@' + om_tostring(term_pos);
00141         if (wqf != 1) result += '#' + om_tostring(wqf);
00142         ++curpos;
00143     } else {
00144         result += "(";
00145         for (subquery_list::const_iterator i = subqs.begin();
00146              i != subqs.end();
00147              ++i) {
00148             result += (*i)->serialise(curpos);
00149         }
00150         switch (op) {
00151             case Xapian::Query::Internal::OP_LEAF:
00152                 Assert(false);
00153                 break;
00154             case Xapian::Query::OP_AND:
00155                 result += "&";
00156                 break;
00157             case Xapian::Query::OP_OR:
00158                 result += "|";
00159                 break;
00160             case Xapian::Query::OP_FILTER:
00161                 result += "%";
00162                 break;
00163             case Xapian::Query::OP_AND_MAYBE:
00164                 result += "+";
00165                 break;
00166             case Xapian::Query::OP_AND_NOT:
00167                 result += "-";
00168                 break;
00169             case Xapian::Query::OP_XOR:
00170                 result += "^";
00171                 break;
00172             case Xapian::Query::OP_NEAR:
00173                 result += "~" + om_tostring(parameter);
00174                 break;
00175             case Xapian::Query::OP_PHRASE:
00176                 result += "\"" + om_tostring(parameter);
00177                 break;
00178             case Xapian::Query::OP_ELITE_SET:
00179                 result += "*" + om_tostring(parameter);
00180                 break;
00181             case Xapian::Query::OP_VALUE_RANGE:
00182                 result += "]";
00183                 result += encode_length(tname.length());
00184                 result += tname;
00185                 result += encode_length(str_parameter.length());
00186                 result += str_parameter;
00187                 result += om_tostring(parameter);
00188                 break;
00189             case Xapian::Query::OP_VALUE_GE:
00190                 result += "}";
00191                 result += encode_length(tname.length());
00192                 result += tname;
00193                 result += om_tostring(parameter);
00194                 break;
00195             case Xapian::Query::OP_VALUE_LE:
00196                 result += "{";
00197                 result += encode_length(tname.length());
00198                 result += tname;
00199                 result += om_tostring(parameter);
00200                 break;
00201             case Xapian::Query::OP_SCALE_WEIGHT:
00202                 result += ".";
00203                 result += str_parameter; // serialise_double(get_dbl_parameter());
00204                 break;
00205         }
00206     }
00207     return result;
00208 #else
00209     (void)curpos;
00210     throw Xapian::InternalError("query serialisation not compiled in");
00211 #endif
00212 }
00213 
00214 string
00215 Xapian::Query::Internal::get_op_name(Xapian::Query::Internal::op_t op)
00216 {
00217     string name;
00218     switch (op) {
00219         case Xapian::Query::Internal::OP_LEAF:  name = "LEAF"; break;
00220         case Xapian::Query::OP_AND:             name = "AND"; break;
00221         case Xapian::Query::OP_OR:              name = "OR"; break;
00222         case Xapian::Query::OP_FILTER:          name = "FILTER"; break;
00223         case Xapian::Query::OP_AND_MAYBE:       name = "AND_MAYBE"; break;
00224         case Xapian::Query::OP_AND_NOT:         name = "AND_NOT"; break;
00225         case Xapian::Query::OP_XOR:             name = "XOR"; break;
00226         case Xapian::Query::OP_NEAR:            name = "NEAR"; break;
00227         case Xapian::Query::OP_PHRASE:          name = "PHRASE"; break;
00228         case Xapian::Query::OP_ELITE_SET:       name = "ELITE_SET"; break;
00229         case Xapian::Query::OP_VALUE_RANGE:     name = "VALUE_RANGE"; break;
00230         case Xapian::Query::OP_VALUE_GE:        name = "VALUE_GE"; break;
00231         case Xapian::Query::OP_VALUE_LE:        name = "VALUE_LE"; break;
00232         case Xapian::Query::OP_SCALE_WEIGHT:    name = "SCALE_WEIGHT"; break;
00233     }
00234     return name;
00235 }
00236 
00237 string
00238 Xapian::Query::Internal::get_description() const
00239 {
00240     string opstr;
00241 
00242     if (is_leaf(op)) {
00243         if (term_pos != 0) {
00244             opstr += "pos=" + om_tostring(term_pos);
00245         }
00246         if (wqf != 1) {
00247             if (!opstr.empty()) opstr += ",";
00248             opstr += "wqf=" + om_tostring(wqf);
00249         }
00250         if (!opstr.empty()) opstr = ":(" + opstr + ")";
00251         if (tname.empty()) return "<alldocuments>" + opstr;
00252         return tname + opstr;
00253     }
00254 
00255     switch (op) {
00256         case Xapian::Query::OP_VALUE_RANGE: {
00257             opstr = get_op_name(op);
00258             opstr += ' ';
00259             opstr += om_tostring(parameter);
00260             opstr += ' ';
00261             opstr += tname;
00262             opstr += ' ';
00263             opstr += str_parameter;
00264             return opstr;
00265         }
00266         case Xapian::Query::OP_VALUE_GE:
00267         case Xapian::Query::OP_VALUE_LE: {
00268             opstr = get_op_name(op);
00269             opstr += ' ';
00270             opstr += om_tostring(parameter);
00271             opstr += ' ';
00272             opstr += tname;
00273             return opstr;
00274         }
00275         case Xapian::Query::OP_SCALE_WEIGHT: {
00276             opstr += om_tostring(get_dbl_parameter());
00277             opstr += " * ";
00278             opstr += subqs[0]->get_description();
00279             return opstr;
00280         }
00281     }
00282 
00283     opstr = " " + get_op_name(op) + " ";
00284     if (op == Xapian::Query::OP_NEAR ||
00285         op == Xapian::Query::OP_PHRASE ||
00286         op == Xapian::Query::OP_ELITE_SET)
00287         opstr += om_tostring(parameter) + " ";
00288 
00289     string description;
00290     subquery_list::const_iterator i;
00291     for (i = subqs.begin(); i != subqs.end(); i++) {
00292         if (!description.empty()) description += opstr;
00293         description += (**i).get_description();
00294     }
00295 
00296     return "(" + description + ")";
00297 }
00298 
00299 Xapian::termcount
00300 Xapian::Query::Internal::get_length() const
00301 {
00302     if (is_leaf(op)) return wqf;
00303     Xapian::termcount len = 0;
00304     subquery_list::const_iterator i;
00305     for (i = subqs.begin(); i != subqs.end(); ++i) {
00306         len += (**i).get_length();
00307     }
00308     return len;
00309 }
00310 
00312 void
00313 Xapian::Query::Internal::accumulate_terms(
00314                         vector<pair<string, Xapian::termpos> > &terms) const
00315 {
00316     if (is_leaf(op)) {
00317         // We're a leaf, so just return our term.
00318         terms.push_back(make_pair(tname, term_pos));
00319     } else {
00320         subquery_list::const_iterator end = subqs.end();
00321         // not a leaf, concatenate results from all subqueries
00322         for (subquery_list::const_iterator i = subqs.begin(); i != end; ++i) {
00323             (*i)->accumulate_terms(terms);
00324         }
00325     }
00326 }
00327 
00328 struct LessByTermpos {
00329     typedef const pair<string, Xapian::termpos> argtype;
00330     bool operator()(argtype &left, argtype &right) {
00331         if (left.second != right.second) {
00332             return left.second < right.second;
00333         } else {
00334             return left.first < right.first;
00335         }
00336     }
00337 };
00338 
00339 Xapian::TermIterator
00340 Xapian::Query::Internal::get_terms() const
00341 {
00342     vector<pair<string, Xapian::termpos> > terms;
00343     accumulate_terms(terms);
00344 
00345     sort(terms.begin(), terms.end(), LessByTermpos());
00346 
00347     // remove adjacent duplicates, and return an iterator pointing
00348     // to just after the last unique element
00349     vector<pair<string, Xapian::termpos> >::iterator newlast =
00350                 unique(terms.begin(), terms.end());
00351     // and remove the rest...  (See Stroustrup 18.6.3)
00352     terms.erase(newlast, terms.end());
00353 
00354     vector<string> result;
00355     vector<pair<string, Xapian::termpos> >::const_iterator i;
00356     for (i = terms.begin(); i != terms.end(); ++i) {
00357         result.push_back(i->first);
00358     }
00359 
00360     return Xapian::TermIterator(new VectorTermList(result.begin(),
00361                                                    result.end()));
00362 }
00363 
00364 #ifdef XAPIAN_HAS_REMOTE_BACKEND
00365 // Methods.
00366 
00367 class QUnserial {
00368   private:
00369     const char *p;
00370     const char *end;
00371     Xapian::termpos curpos;
00372 
00373     Xapian::Query::Internal * readquery();
00374     Xapian::Query::Internal * readcompound();
00375 
00376   public:
00377     QUnserial(const string & s) : p(s.c_str()), end(p + s.size()), curpos(1) { }
00378     Xapian::Query::Internal * decode();
00379 };
00380 
00381 Xapian::Query::Internal *
00382 QUnserial::decode() {
00383     DEBUGLINE(UNKNOWN, "QUnserial::decode(" << p << ")");
00384     AutoPtr<Xapian::Query::Internal> qint(readquery());
00385     if (p != end)
00386         throw Xapian::InvalidArgumentError("Bad serialised query");
00387     return qint.release();
00388 }
00389 
00390 Xapian::Query::Internal *
00391 QUnserial::readquery() {
00392     if (p == end)
00393         throw Xapian::InvalidArgumentError("Bad serialised query");
00394     switch (*p++) {
00395         case '[': {
00396             size_t length = decode_length(&p, end, true);
00397             string tname(p, length);
00398             p += length;
00399             Xapian::termpos term_pos = curpos;
00400             Xapian::termcount wqf = 1;
00401             if (p != end) {
00402                 if (*p == '@') {
00403                     char *tmp; // avoid compiler warning
00404                     term_pos = strtol(p + 1, &tmp, 10);
00405                     p = tmp;
00406                 }
00407                 if (*p == '#') {
00408                     char *tmp; // avoid compiler warning
00409                     wqf = strtol(p + 1, &tmp, 10);
00410                     p = tmp;
00411                 }
00412             }
00413             ++curpos;
00414             return new Xapian::Query::Internal(tname, wqf, term_pos);
00415         }
00416         case '(':
00417             return readcompound();
00418         default:
00419             DEBUGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'");
00420             throw Xapian::InvalidArgumentError("Invalid query string");
00421     }
00422 }
00423 
00424 static Xapian::Query::Internal *
00425 qint_from_vector(Xapian::Query::op op,
00426                  const vector<Xapian::Query::Internal *> & vec,
00427                  Xapian::termcount parameter = 0)
00428 {
00429     Xapian::Query::Internal * qint = new Xapian::Query::Internal(op, parameter);
00430     vector<Xapian::Query::Internal *>::const_iterator i;
00431     for (i = vec.begin(); i != vec.end(); i++) {
00432         qint->add_subquery(*i);
00433         delete *i;
00434     }
00435     qint->end_construction();
00436     return qint;
00437 }
00438 
00439 static Xapian::Query::Internal *
00440 qint_from_vector(Xapian::Query::op op,
00441                  const vector<Xapian::Query::Internal *> & vec,
00442                  Xapian::termcount parameter,
00443                  double dbl_parameter)
00444 {
00445     Xapian::Query::Internal * qint = new Xapian::Query::Internal(op, parameter);
00446     qint->set_dbl_parameter(dbl_parameter);
00447     vector<Xapian::Query::Internal *>::const_iterator i;
00448     for (i = vec.begin(); i != vec.end(); i++) {
00449         qint->add_subquery(*i);
00450         delete *i;
00451     }
00452     qint->end_construction();
00453     return qint;
00454 }
00455 
00456 Xapian::Query::Internal *
00457 QUnserial::readcompound() {
00458     vector<Xapian::Query::Internal *> subqs;
00459     try {
00460         while (true) {
00461             if (p == end)
00462                 throw Xapian::InvalidArgumentError("Bad serialised query");
00463             switch (*p++) {
00464                 case '[':
00465                     --p;
00466                     subqs.push_back(readquery());
00467                     break;
00468                 case '(': {
00469                     subqs.push_back(readcompound());
00470                     break;
00471                 }
00472                 case '&':
00473                     return qint_from_vector(Xapian::Query::OP_AND, subqs);
00474                 case '|':
00475                     return qint_from_vector(Xapian::Query::OP_OR, subqs);
00476                 case '%':
00477                     return qint_from_vector(Xapian::Query::OP_FILTER, subqs);
00478                 case '^':
00479                     return qint_from_vector(Xapian::Query::OP_XOR, subqs);
00480                 case '+':
00481                     return qint_from_vector(Xapian::Query::OP_AND_MAYBE, subqs);
00482                 case '-':
00483                     return qint_from_vector(Xapian::Query::OP_AND_NOT, subqs);
00484                 case '~': {
00485                     char *tmp; // avoid compiler warning
00486                     Xapian::termcount window(strtol(p, &tmp, 10));
00487                     p = tmp;
00488                     return qint_from_vector(Xapian::Query::OP_NEAR, subqs, window);
00489                 }
00490                 case '"': {
00491                     char *tmp; // avoid compiler warning
00492                     Xapian::termcount window(strtol(p, &tmp, 10));
00493                     p = tmp;
00494                     return qint_from_vector(Xapian::Query::OP_PHRASE, subqs, window);
00495                 }
00496                 case '*': {
00497                     char *tmp; // avoid compiler warning
00498                     Xapian::termcount elite_set_size(strtol(p, &tmp, 10));
00499                     p = tmp;
00500                     return qint_from_vector(Xapian::Query::OP_ELITE_SET, subqs,
00501                                             elite_set_size);
00502                 }
00503                 case ']': {
00504                     size_t len = decode_length(&p, end, true);
00505                     string start(p, len);
00506                     p += len;
00507                     len = decode_length(&p, end, true);
00508                     string stop(p, len);
00509                     p += len;
00510                     char *tmp; // avoid compiler warning
00511                     Xapian::valueno valno = strtoul(p, &tmp, 10);
00512                     p = tmp;
00513                     return new Xapian::Query::Internal(Xapian::Query::OP_VALUE_RANGE, valno,
00514                                                        start, stop);
00515                 }
00516                 case '}': {
00517                     size_t len = decode_length(&p, end, true);
00518                     string start(p, len);
00519                     p += len;
00520                     char *tmp; // avoid compiler warning
00521                     Xapian::valueno valno = strtoul(p, &tmp, 10);
00522                     p = tmp;
00523                     return new Xapian::Query::Internal(Xapian::Query::OP_VALUE_GE, valno,
00524                                                        start);
00525                 }
00526                 case '{': {
00527                     size_t len = decode_length(&p, end, true);
00528                     string start(p, len);
00529                     p += len;
00530                     char *tmp; // avoid compiler warning
00531                     Xapian::valueno valno = strtoul(p, &tmp, 10);
00532                     p = tmp;
00533                     return new Xapian::Query::Internal(Xapian::Query::OP_VALUE_LE, valno,
00534                                                        start);
00535                 }
00536                 case '.': {
00537                     double param = unserialise_double(&p, end);
00538                     return qint_from_vector(Xapian::Query::OP_SCALE_WEIGHT,
00539                                             subqs, 0, param);
00540                 }
00541                 default:
00542                     DEBUGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'");
00543                     throw Xapian::InvalidArgumentError("Invalid query string");
00544             }
00545         }
00546     } catch (...) {
00547         vector<Xapian::Query::Internal *>::iterator i;
00548         for (i = subqs.begin(); i != subqs.end(); i++)
00549             delete *i;
00550         throw;
00551     }
00552 }
00553 
00554 Xapian::Query::Internal *
00555 Xapian::Query::Internal::unserialise(const string &s)
00556 {
00557     Assert(s.length() > 1);
00558     QUnserial u(s);
00559     Xapian::Query::Internal * qint = u.decode();
00560     AssertEq(s, qint->serialise());
00561     return qint;
00562 }
00563 #else
00564 Xapian::Query::Internal *
00565 Xapian::Query::Internal::unserialise(const string &)
00566 {
00567     throw Xapian::InternalError("query serialisation not compiled in");
00568 }
00569 #endif
00570 
00578 void
00579 Xapian::Query::Internal::swap(Xapian::Query::Internal &other)
00580 {
00581     std::swap(op, other.op);
00582     subqs.swap(other.subqs);
00583     std::swap(parameter, other.parameter);
00584     std::swap(tname, other.tname);
00585     std::swap(str_parameter, other.str_parameter);
00586     std::swap(term_pos, other.term_pos);
00587     std::swap(wqf, other.wqf);
00588 }
00589 
00590 Xapian::Query::Internal::Internal(const Xapian::Query::Internal &copyme)
00591         : Xapian::Internal::RefCntBase(),
00592           op(copyme.op),
00593           subqs(),
00594           parameter(copyme.parameter),
00595           tname(copyme.tname),
00596           str_parameter(copyme.str_parameter),
00597           term_pos(copyme.term_pos),
00598           wqf(copyme.wqf)
00599 {
00600     for (subquery_list::const_iterator i = copyme.subqs.begin();
00601          i != copyme.subqs.end();
00602          ++i) {
00603         subqs.push_back(new Xapian::Query::Internal(**i));
00604     }
00605 }
00606 
00608 // Methods for making new query objects
00609 
00610 Xapian::Query::Internal::Internal(const string & tname_, Xapian::termcount wqf_,
00611                  Xapian::termpos term_pos_)
00612         : op(Xapian::Query::Internal::OP_LEAF),
00613           subqs(),
00614           parameter(0),
00615           tname(tname_),
00616           term_pos(term_pos_),
00617           wqf(wqf_)
00618 {
00619     validate_query();
00620 }
00621 
00622 Xapian::Query::Internal::Internal(op_t op_, Xapian::termcount parameter_)
00623         : op(op_),
00624           subqs(),
00625           parameter(parameter_),
00626           tname(),
00627           term_pos(0),
00628           wqf(0)
00629 {
00630     if (parameter != 0 && op != OP_PHRASE && op != OP_NEAR && op != OP_ELITE_SET)
00631         throw Xapian::InvalidArgumentError("parameter is only meaningful for OP_NEAR, OP_PHRASE, or OP_ELITE_SET");
00632 }
00633 
00634 Xapian::Query::Internal::Internal(op_t op_, Xapian::valueno valno,
00635                                   const string &begin, const string &end)
00636         : op(op_),
00637           parameter(Xapian::termcount(valno)),
00638           tname(begin),
00639           str_parameter(end)
00640 {
00641     if (op != OP_VALUE_RANGE)
00642         throw Xapian::InvalidArgumentError("This constructor is only meaningful for OP_VALUE_RANGE");
00643     validate_query();
00644 }
00645 
00646 Xapian::Query::Internal::Internal(op_t op_, Xapian::valueno valno,
00647                                   const std::string &value)
00648         : op(op_),
00649           parameter(Xapian::termcount(valno)),
00650           tname(value)
00651 {
00652     if (op != OP_VALUE_GE && op != OP_VALUE_LE)
00653         throw Xapian::InvalidArgumentError("This constructor is only meaningful for OP_VALUE_GE or OP_VALUE_LE");
00654     if (op == OP_VALUE_GE && value.empty()) {
00655         // Map '<value> >= ""' to MatchAll.
00656         op = OP_LEAF;
00657         parameter = 0;
00658         term_pos = 0;
00659         wqf = 1;
00660     }
00661     validate_query();
00662 }
00663 
00664 Xapian::Query::Internal::~Internal()
00665 {
00666     subquery_list::iterator i;
00667     for (i = subqs.begin(); i != subqs.end(); i++) {
00668         delete *i;
00669     }
00670 }
00671 
00672 Xapian::Query::Internal *
00673 Xapian::Query::Internal::end_construction()
00674 {
00675     DEBUGCALL(API, void, "Xapian::Query::Internal::end_construction", "");
00676     validate_query();
00677     Xapian::Query::Internal * qint = simplify_query();
00678     if (qint) qint->validate_query();
00679     return qint;
00680 }
00681 
00682 void
00683 Xapian::Query::Internal::validate_query() const
00684 {
00685     DEBUGCALL(API, void, "Xapian::Query::Internal::validate_query", "");
00686 
00687     // Check that the number of subqueries is in acceptable limits for this op
00688     if (subqs.size() < get_min_subqs(op) ||
00689         subqs.size() > get_max_subqs(op)) {
00690         throw Xapian::InvalidArgumentError("Xapian::Query: " + get_op_name(op) +
00691                 " requires a minimum of " + om_tostring(get_min_subqs(op)) +
00692                 " and a maximum of " + om_tostring(get_max_subqs(op)) +
00693                 " sub queries, had " +
00694                 om_tostring(subqs.size()) + ".");
00695     }
00696 
00697     if (op == OP_SCALE_WEIGHT && get_dbl_parameter() < 0) {
00698         throw Xapian::InvalidArgumentError("Xapian::Query: " + get_op_name(op) + " requires a non-negative parameter.");
00699     }
00700 
00701     // Check that the termname is null in a branch query, unless the op
00702     // is OP_VALUE_RANGE or OP_VALUE_GE or OP_VALUE_LE.
00703     Assert(is_leaf(op) ||
00704            op == OP_VALUE_RANGE ||
00705            op == OP_VALUE_GE ||
00706            op == OP_VALUE_LE ||
00707            tname.empty());
00708 }
00709 
00710 bool
00711 Xapian::Query::Internal::simplify_matchnothing()
00712 {
00713     subquery_list::iterator sq;
00714     switch (op) {
00715         case OP_PHRASE:
00716         case OP_NEAR:
00717         case OP_AND:
00718         case OP_FILTER:
00719             // Doing an "AND" type operation - if we've got any MatchNothing
00720             // nodes, we match nothing.
00721             for (sq = subqs.begin(); sq != subqs.end(); sq++) {
00722                 if (*sq == 0) {
00723                     for (sq = subqs.begin(); sq != subqs.end(); sq++)
00724                         delete *sq;
00725                     subqs.clear();
00726                     return true;
00727                 }
00728             }
00729             break;
00730         case OP_ELITE_SET:
00731         case OP_OR:
00732         case OP_XOR:
00733             // Doing an "OR" type operation - if we've got any MatchNothing
00734             // subnodes, drop them; except that we mustn't become an empty
00735             // node due to this, so we never drop a MatchNothing subnode
00736             // if it's the only subnode.
00737             sq = subqs.begin();
00738             while (sq != subqs.end() && subqs.size() > 1) {
00739                 if (*sq == 0) {
00740                     sq = subqs.erase(sq);
00741                 } else {
00742                     ++sq;
00743                 }
00744             }
00745             break;
00746         case OP_AND_MAYBE:
00747         case OP_AND_NOT:
00748             // If left hand side is MatchNothing, we match nothing.
00749             // If right hand side is MatchNothing, replace node with LHS.
00750             // So, if either node is MatchNothing, replace node with LHS.
00751             // Easiest way to do this is to remove the right hand node,
00752             // and let simplify_query() perform the replacement of
00753             // the unary operator with its one remaining child.
00754             Assert(subqs.size() == 2);
00755             if (subqs[0] == 0 || subqs[1] == 0) {
00756                 sq = subqs.begin();
00757                 ++sq;
00758                 delete *sq;
00759                 subqs.erase(sq);
00760             }
00761             break;
00762         case OP_SCALE_WEIGHT:
00763             Assert(subqs.size() == 1);
00764             // We should have already handled OP_SCALE_WEIGHT applied to
00765             // MatchNothing in the relevant constructor.
00766             Assert(subqs[0]);
00767             break;
00768         case OP_LEAF:
00769             // Do nothing.
00770             break;
00771     }
00772     return false;
00773 }
00774 
00775 Xapian::Query::Internal *
00776 Xapian::Query::Internal::simplify_query()
00777 {
00778     DEBUGCALL(API, bool, "Xapian::Query::Internal::simplify_query", "");
00779 
00780     // Simplify any MatchNothing nodes.
00781     if (simplify_matchnothing()) {
00782         return 0;
00783     }
00784 
00785     // General simplifications, dependent on operator.
00786     switch (op) {
00787         case OP_LEAF:
00788             return this;
00789         case OP_VALUE_RANGE:
00790             // If the start of the range is greater than the end then we won't
00791             // match anything.
00792             if (tname > str_parameter) return 0;
00793             return this;
00794         case OP_VALUE_GE:
00795         case OP_VALUE_LE:
00796             return this;
00797         case OP_SCALE_WEIGHT:
00798             if (fabs(get_dbl_parameter() - 1.0) > DBL_EPSILON) return this;
00799             // If the multiplier is 1, this node doesn't actually do anything,
00800             // so we leave it to be removed.
00801             break;
00802         case OP_PHRASE: case OP_NEAR:
00803             // Default to the number of subqueries.
00804             if (!parameter) parameter = subqs.size();
00805             // Flatten out sub queries if this is a phrase (or near) operation.
00806             flatten_subqs();
00807             break;
00808         case OP_ELITE_SET:
00809             if (!parameter) {
00810                 // Default to sqrt(number of subqueries), or a minimum of 10.
00811                 // Gives a reasonable default.
00812                 if (subqs.size() <= 100) {
00813                     parameter = 10;
00814                 } else {
00815                     parameter = Xapian::termcount(ceil(sqrt(double(subqs.size()))));
00816                     Assert(parameter > 10);
00817                 }
00818             }
00819             break;
00820         case OP_OR: case OP_AND: case OP_XOR:
00821             // Remove duplicates if we can.
00822             if (subqs.size() > 1) collapse_subqs();
00823             break;
00824         default:
00825             break;
00826     }
00827 
00828     // If we have no subqueries, then we're an empty query.
00829     if (subqs.empty())
00830         return 0;
00831 
00832     // Any nodes which are valid with only one subquery can be replaced by
00833     // that solitary subquery.
00834     if (subqs.size() == 1) {
00835         Xapian::Query::Internal * qint = subqs[0];
00836         subqs[0] = 0;
00837         return qint;
00838     }
00839 
00840     return this;
00841 }
00842 
00843 struct SortPosName {
00844     bool operator()(const Xapian::Query::Internal * left,
00845                     const Xapian::Query::Internal * right) const {
00846         Assert(left != 0);
00847         Assert(right != 0);
00848         Assert(is_leaf(left->op));
00849         Assert(is_leaf(right->op));
00850         if (left->term_pos != right->term_pos) {
00851             return left->term_pos < right->term_pos;
00852         } else {
00853             return left->tname < right->tname;
00854         }
00855     }
00856 };
00857 
00861 void
00862 Xapian::Query::Internal::collapse_subqs()
00863 {
00864     Assert(op == OP_OR || op == OP_AND || op == OP_XOR);
00865     typedef set<Xapian::Query::Internal *, SortPosName> subqtable;
00866     subqtable sqtab;
00867 
00868     subquery_list::iterator sq = subqs.begin();
00869     while (sq != subqs.end()) {
00870         Assert(*sq != 0);
00871         if (is_leaf((*sq)->op)) {
00872             Assert((*sq)->subqs.empty());
00873             subqtable::iterator s = sqtab.find(*sq);
00874             if (s == sqtab.end()) {
00875                 sqtab.insert(*sq);
00876                 ++sq;
00877             } else {
00878                 AssertEq((*s)->tname, (*sq)->tname);
00879                 AssertEq((*s)->term_pos, (*sq)->term_pos);
00880                 (*s)->wqf += (*sq)->wqf;
00881                 // Rather than incrementing sq, delete the current
00882                 // element, as it has been merged into the other
00883                 // equivalent term.
00884                 delete *sq;
00885                 sq = subqs.erase(sq);
00886             }
00887         } else {
00888             ++sq;
00889         }
00890     }
00891 }
00892 
00894 void
00895 Xapian::Query::Internal::flatten_subqs()
00896 {
00897     if (op != Xapian::Query::OP_NEAR && op != Xapian::Query::OP_PHRASE) {
00898         throw Xapian::UnimplementedError("NEAR or PHRASE with non-term subqueries isn't well supported currently");
00899     }
00900 
00901     subquery_list::iterator sq;
00902     for (sq = subqs.begin(); sq != subqs.end(); sq++) {
00903         if (!is_leaf((*sq)->op)) break;
00904     }
00905 
00906     if (sq != subqs.end()) {
00907         if ((*sq)->op == Xapian::Query::OP_NEAR ||
00908             (*sq)->op == Xapian::Query::OP_PHRASE) {
00909             // FIXME: A PHRASE (B PHRASE C) -> (A PHRASE B) AND (B PHRASE C)?
00910             throw Xapian::UnimplementedError("Can't use NEAR/PHRASE with a subexpression containing NEAR or PHRASE");
00911         }
00912 
00913         AutoPtr<Xapian::Query::Internal> flattenme(*sq);
00914         *sq = 0;
00915 
00916         // New query to build up.
00917         Xapian::Query::Internal newq(flattenme->op, 0);
00918 
00919         subquery_list::iterator j;
00920         for (j = flattenme->subqs.begin(); j != flattenme->subqs.end(); ++j) {
00921             *sq = *j;
00922             *j = 0;
00923             flatten_subqs();
00924             newq.add_subquery(this);
00925             delete *sq;
00926             *sq = 0;
00927         }
00928 
00929         Xapian::Query::Internal * newq2 = newq.end_construction();
00930         Assert(newq2);
00931         this->swap(*newq2);
00932     }
00933 }
00934 
00935 void
00936 Xapian::Query::Internal::add_subquery(const Xapian::Query::Internal * subq)
00937 {
00938     Assert(!is_leaf(op));
00939     if (subq == 0) {
00940         subqs.push_back(0);
00941     } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR)) {
00942         // Distribute the subquery.
00943         for (subquery_list::const_iterator i = subq->subqs.begin();
00944              i != subq->subqs.end(); i++) {
00945             add_subquery(*i);
00946         }
00947     } else {
00948         subqs.push_back(new Xapian::Query::Internal(*subq));
00949     }
00950 }
00951 
00952 void
00953 Xapian::Query::Internal::set_dbl_parameter(double dbl_parameter_)
00954 {
00955     // We store the double parameter encoded as a string because
00956     // Xapian::Query::Internal is defined in an external API header and we want
00957     // to avoid any risk of ABI breakage (we suspect it would be OK, but it's
00958     // not risking).  FIXME: rework for 1.1.0
00959     str_parameter = serialise_double(dbl_parameter_);
00960 }
00961 
00962 double
00963 Xapian::Query::Internal::get_dbl_parameter() const
00964 {
00965     // We store the double parameter encoded as a string because
00966     // Xapian::Query::Internal is defined in an external API header and we want
00967     // to avoid any risk of ABI breakage (we suspect it would be OK, but it's
00968     // not risking).  FIXME: rework for 1.1.0
00969     const char * p = str_parameter.data();
00970     const char * end = p + str_parameter.size();
00971     return unserialise_double(&p, end);
00972 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.