include/xapian/enquire.h

Go to the documentation of this file.
00001 
00004 /* Copyright 1999,2000,2001 BrightStation PLC
00005  * Copyright 2001,2002 Ananova Ltd
00006  * Copyright 2002,2003,2004,2005,2006,2007 Olly Betts
00007  *
00008  * This program is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU General Public License as
00010  * published by the Free Software Foundation; either version 2 of the
00011  * License, or (at your option) any later version.
00012  *
00013  * This program is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00021  * USA
00022  */
00023 
00024 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00025 #define XAPIAN_INCLUDED_ENQUIRE_H
00026 
00027 #include <string>
00028 
00029 #include <xapian/base.h>
00030 #include <xapian/deprecated.h>
00031 #include <xapian/sorter.h>
00032 #include <xapian/types.h>
00033 #include <xapian/termiterator.h>
00034 #include <xapian/visibility.h>
00035 
00036 namespace Xapian {
00037 
00038 class Database;
00039 class Document;
00040 class ErrorHandler;
00041 class ExpandDecider;
00042 class MSetIterator;
00043 class Query;
00044 class Weight;
00045 
00049 class XAPIAN_VISIBILITY_DEFAULT MSet {
00050     public:
00051         class Internal;
00053         Xapian::Internal::RefCntPtr<Internal> internal;
00054 
00056         explicit MSet(MSet::Internal * internal_);
00057 
00059         MSet();
00060 
00062         ~MSet();
00063 
00065         MSet(const MSet & other);
00066 
00068         void operator=(const MSet &other);
00069 
00085         void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00086 
00089         void fetch(const MSetIterator &item) const;
00090 
00093         void fetch() const;
00094 
00099         Xapian::percent convert_to_percent(Xapian::weight wt) const;
00100 
00102         Xapian::percent convert_to_percent(const MSetIterator &it) const;
00103 
00111         Xapian::doccount get_termfreq(const std::string &tname) const;
00112 
00120         Xapian::weight get_termweight(const std::string &tname) const;
00121 
00129         Xapian::doccount get_firstitem() const;
00130 
00140         Xapian::doccount get_matches_lower_bound() const;
00141 
00154         Xapian::doccount get_matches_estimated() const;
00155 
00165         Xapian::doccount get_matches_upper_bound() const;
00166 
00172         Xapian::weight get_max_possible() const;
00173 
00187         Xapian::weight get_max_attained() const;
00188 
00190         Xapian::doccount size() const;
00191 
00193         Xapian::doccount max_size() const { return size(); }
00194 
00196         bool empty() const;
00197 
00199         void swap(MSet & other);
00200 
00202         MSetIterator begin() const;
00203 
00205         MSetIterator end() const;
00206 
00208         MSetIterator back() const;
00209 
00219         MSetIterator operator[](Xapian::doccount i) const;
00220 
00222 
00223         typedef MSetIterator value_type; // FIXME: not assignable...
00224         typedef MSetIterator iterator;
00225         typedef MSetIterator const_iterator;
00226         typedef MSetIterator & reference; // Hmm
00227         typedef MSetIterator & const_reference;
00228         typedef MSetIterator * pointer; // Hmm
00229         typedef Xapian::doccount_diff difference_type;
00230         typedef Xapian::doccount size_type;
00232 
00234         std::string get_description() const;
00235 };
00236 
00240 class XAPIAN_VISIBILITY_DEFAULT MSetIterator {
00241     private:
00242         friend class MSet;
00243         friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00244         friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00245 
00246         MSetIterator(Xapian::doccount index_, const MSet & mset_)
00247             : index(index_), mset(mset_) { }
00248 
00249         Xapian::doccount index;
00250         MSet mset;
00251 
00252     public:
00256         MSetIterator() : index(0), mset() { }
00257 
00258         ~MSetIterator() { }
00259 
00261         MSetIterator(const MSetIterator &other) {
00262             index = other.index;
00263             mset = other.mset;
00264         }
00265 
00267         void operator=(const MSetIterator &other) {
00268             index = other.index;
00269             mset = other.mset;
00270         }
00271 
00273         MSetIterator & operator++() {
00274             ++index;
00275             return *this;
00276         }
00277 
00279         MSetIterator operator++(int) {
00280             MSetIterator tmp = *this;
00281             ++index;
00282             return tmp;
00283         }
00284 
00286         MSetIterator & operator--() {
00287             --index;
00288             return *this;
00289         }
00290 
00292         MSetIterator operator--(int) {
00293             MSetIterator tmp = *this;
00294             --index;
00295             return tmp;
00296         }
00297 
00299         Xapian::docid operator*() const;
00300 
00317         Xapian::Document get_document() const;
00318 
00325         Xapian::doccount get_rank() const {
00326             return mset.get_firstitem() + index;
00327         }
00328 
00330         Xapian::weight get_weight() const;
00331 
00334         std::string get_collapse_key() const;
00335 
00352         Xapian::doccount get_collapse_count() const;
00353 
00359         Xapian::percent get_percent() const;
00360 
00362         std::string get_description() const;
00363 
00365 
00366         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: could enhance to be a randomaccess_iterator
00367         typedef Xapian::docid value_type;
00368         typedef Xapian::doccount_diff difference_type;
00369         typedef Xapian::docid * pointer;
00370         typedef Xapian::docid & reference;
00372 };
00373 
00374 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00375 {
00376     return (a.index == b.index);
00377 }
00378 
00379 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00380 {
00381     return (a.index != b.index);
00382 }
00383 
00384 class ESetIterator;
00385 
00390 class XAPIAN_VISIBILITY_DEFAULT ESet {
00391     public:
00392         class Internal;
00394         Xapian::Internal::RefCntPtr<Internal> internal;
00395 
00397         ESet();
00398 
00400         ~ESet();
00401 
00403         ESet(const ESet & other);
00404 
00406         void operator=(const ESet &other);
00407 
00412         Xapian::termcount get_ebound() const;
00413 
00415         Xapian::termcount size() const;
00416 
00418         Xapian::termcount max_size() const { return size(); }
00419 
00421         bool empty() const;
00422 
00424         void swap(ESet & other);
00425 
00427         ESetIterator begin() const;
00428 
00430         ESetIterator end() const;
00431 
00433         ESetIterator back() const;
00434 
00436         ESetIterator operator[](Xapian::termcount i) const;
00437 
00439         std::string get_description() const;
00440 };
00441 
00443 class XAPIAN_VISIBILITY_DEFAULT ESetIterator {
00444     private:
00445         friend class ESet;
00446         friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00447         friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00448 
00449         ESetIterator(Xapian::termcount index_, const ESet & eset_)
00450             : index(index_), eset(eset_) { }
00451 
00452         Xapian::termcount index;
00453         ESet eset;
00454 
00455     public:
00459         ESetIterator() : index(0), eset() { }
00460 
00461         ~ESetIterator() { }
00462 
00464         ESetIterator(const ESetIterator &other) {
00465             index = other.index;
00466             eset = other.eset;
00467         }
00468 
00470         void operator=(const ESetIterator &other) {
00471             index = other.index;
00472             eset = other.eset;
00473         }
00474 
00476         ESetIterator & operator++() {
00477             ++index;
00478             return *this;
00479         }
00480 
00482         ESetIterator operator++(int) {
00483             ESetIterator tmp = *this;
00484             ++index;
00485             return tmp;
00486         }
00487 
00489         ESetIterator & operator--() {
00490             --index;
00491             return *this;
00492         }
00493 
00495         ESetIterator operator--(int) {
00496             ESetIterator tmp = *this;
00497             --index;
00498             return tmp;
00499         }
00500 
00502         const std::string & operator *() const;
00503 
00505         Xapian::weight get_weight() const;
00506 
00508         std::string get_description() const;
00509 
00511 
00512         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: go for randomaccess_iterator!
00513         typedef std::string value_type;
00514         typedef Xapian::termcount_diff difference_type;
00515         typedef std::string * pointer;
00516         typedef std::string & reference;
00518 };
00519 
00520 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00521 {
00522     return (a.index == b.index);
00523 }
00524 
00525 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00526 {
00527     return (a.index != b.index);
00528 }
00529 
00534 class XAPIAN_VISIBILITY_DEFAULT RSet {
00535     public:
00537         class Internal;
00538 
00540         Xapian::Internal::RefCntPtr<Internal> internal;
00541 
00543         RSet(const RSet &rset);
00544 
00546         void operator=(const RSet &rset);
00547 
00549         RSet();
00550 
00552         ~RSet();
00553 
00555         Xapian::doccount size() const;
00556 
00558         bool empty() const;
00559 
00561         void add_document(Xapian::docid did);
00562 
00564         void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00565 
00567         void remove_document(Xapian::docid did);
00568 
00570         void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00571 
00573         bool contains(Xapian::docid did) const;
00574 
00576         bool contains(const Xapian::MSetIterator & i) const { return contains(*i); }
00577 
00579         std::string get_description() const;
00580 };
00581 
00584 class XAPIAN_VISIBILITY_DEFAULT MatchDecider {
00585     public:
00591         virtual bool operator()(const Xapian::Document &doc) const = 0;
00592 
00594         virtual ~MatchDecider();
00595 };
00596 
00607 class XAPIAN_VISIBILITY_DEFAULT Enquire {
00608     public:
00610         Enquire(const Enquire & other);
00611 
00613         void operator=(const Enquire & other);
00614 
00615         class Internal;
00617         Xapian::Internal::RefCntPtr<Internal> internal;
00618 
00643         explicit Enquire(const Database &database, ErrorHandler * errorhandler_ = 0);
00644 
00647         ~Enquire();
00648 
00655         void set_query(const Xapian::Query & query, Xapian::termcount qlen = 0);
00656 
00663         const Xapian::Query & get_query() const;
00664 
00671         void set_weighting_scheme(const Weight &weight_);
00672 
00699         void set_collapse_key(Xapian::valueno collapse_key);
00700 
00701         typedef enum {
00702             ASCENDING = 1,
00703             DESCENDING = 0,
00704             DONT_CARE = 2
00705         } docid_order;
00706 
00730         void set_docid_order(docid_order order);
00731 
00750         void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00751 
00756         void set_sort_by_relevance();
00757 
00770         void set_sort_by_value(Xapian::valueno sort_key, bool ascending = true);
00771 
00780         void set_sort_by_key(Xapian::Sorter * sorter, bool ascending = true);
00781 
00795         void set_sort_by_value_then_relevance(Xapian::valueno sort_key,
00796                                               bool ascending = true);
00797 
00807         void set_sort_by_key_then_relevance(Xapian::Sorter * sorter,
00808                                             bool ascending = true);
00809 
00829         void set_sort_by_relevance_then_value(Xapian::valueno sort_key,
00830                                               bool ascending = true);
00831 
00848         void set_sort_by_relevance_then_key(Xapian::Sorter * sorter,
00849                                             bool ascending = true);
00850 
00883         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00884                       Xapian::doccount checkatleast = 0,
00885                       const RSet * omrset = 0,
00886                       const MatchDecider * mdecider = 0) const;
00887         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00888                       Xapian::doccount checkatleast,
00889                       const RSet * omrset,
00890                       const MatchDecider * mdecider,
00891                       const MatchDecider * matchspy) const;
00892         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00893                       const RSet * omrset,
00894                       const MatchDecider * mdecider = 0) const {
00895             return get_mset(first, maxitems, 0, omrset, mdecider);
00896         }
00897 
00898         static const int INCLUDE_QUERY_TERMS = 1;
00899         static const int USE_EXACT_TERMFREQ = 2;
00900 #ifndef _MSC_VER
00902         XAPIAN_DEPRECATED(static const int include_query_terms) = 1;
00904         XAPIAN_DEPRECATED(static const int use_exact_termfreq) = 2;
00905 #else
00906         // Work around MSVC stupidity (you get a warning for deprecating a
00907         // declaration).
00908         static const int include_query_terms = 1;
00909         static const int use_exact_termfreq = 2;
00910 #pragma deprecated("Xapian::Enquire::include_query_terms", "Xapian::Enquire::use_exact_termfreq")
00911 #endif
00912 
00935         ESet get_eset(Xapian::termcount maxitems,
00936                         const RSet & omrset,
00937                         int flags = 0,
00938                         double k = 1.0,
00939                         const Xapian::ExpandDecider * edecider = 0) const;
00940 
00954         inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00955                                const Xapian::ExpandDecider * edecider) const {
00956             return get_eset(maxitems, omrset, 0, 1.0, edecider);
00957         }
00958 
00987         TermIterator get_matching_terms_begin(Xapian::docid did) const;
00988 
00990         TermIterator get_matching_terms_end(Xapian::docid /*did*/) const {
00991             return TermIterator(NULL);
00992         }
00993 
01016         TermIterator get_matching_terms_begin(const MSetIterator &it) const;
01017 
01019         TermIterator get_matching_terms_end(const MSetIterator &/*it*/) const {
01020             return TermIterator(NULL);
01021         }
01022 
01035         XAPIAN_DEPRECATED(
01036         void register_match_decider(const std::string &name,
01037                                     const MatchDecider *mdecider = NULL));
01038 
01040         std::string get_description() const;
01041 };
01042 
01043 }
01044 
01045 class RemoteServer;
01046 class ScaleWeight;
01047 
01048 namespace Xapian {
01049 
01051 class XAPIAN_VISIBILITY_DEFAULT Weight {
01052     friend class Enquire; // So Enquire can clone us
01053     friend class ::RemoteServer; // So RemoteServer can clone us - FIXME
01054     friend class ::ScaleWeight;
01055     public:
01056         class Internal;
01057     protected:
01058         Weight(const Weight &);
01059     private:
01060         void operator=(Weight &);
01061 
01071         virtual Weight * clone() const = 0;
01072 
01073     protected:
01074         const Internal * internal; // Weight::Internal == Stats
01075         Xapian::doclength querysize;
01076         Xapian::termcount wqf;
01077         std::string tname;
01078 
01079     public:
01080         // FIXME:1.1: initialise internal to NULL here
01081         Weight() { }
01082         virtual ~Weight();
01083 
01096         Weight * create(const Internal * internal_, Xapian::doclength querysize_,
01097                         Xapian::termcount wqf_, const std::string & tname_) const;
01098 
01103         virtual std::string name() const = 0;
01104 
01106         virtual std::string serialise() const = 0;
01107 
01109         virtual Weight * unserialise(const std::string &s) const = 0;
01110 
01118         virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
01119                                       Xapian::doclength len) const = 0;
01120 
01126         virtual Xapian::weight get_maxpart() const = 0;
01127 
01136         virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
01137 
01141         virtual Xapian::weight get_maxextra() const = 0;
01142 
01144         virtual bool get_sumpart_needs_doclength() const; /* { return true; } */
01145 };
01146 
01148 class XAPIAN_VISIBILITY_DEFAULT BoolWeight : public Weight {
01149     public:
01150         BoolWeight * clone() const;
01151         BoolWeight() { }
01152         ~BoolWeight();
01153         std::string name() const;
01154         std::string serialise() const;
01155         BoolWeight * unserialise(const std::string & s) const;
01156         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01157         Xapian::weight get_maxpart() const;
01158 
01159         Xapian::weight get_sumextra(Xapian::doclength len) const;
01160         Xapian::weight get_maxextra() const;
01161 
01162         bool get_sumpart_needs_doclength() const;
01163 };
01164 
01177 class XAPIAN_VISIBILITY_DEFAULT BM25Weight : public Weight {
01178     private:
01179         mutable Xapian::weight termweight;
01180         mutable Xapian::doclength lenpart;
01181 
01182         double k1, k2, k3, b;
01183         Xapian::doclength min_normlen;
01184 
01185         mutable bool weight_calculated;
01186 
01187         void calc_termweight() const;
01188 
01189     public:
01208         BM25Weight(double k1_, double k2_, double k3_, double b_,
01209                    double min_normlen_)
01210                 : k1(k1_), k2(k2_), k3(k3_), b(b_), min_normlen(min_normlen_),
01211                   weight_calculated(false)
01212         {
01213             if (k1 < 0) k1 = 0;
01214             if (k2 < 0) k2 = 0;
01215             if (k3 < 0) k3 = 0;
01216             if (b < 0) b = 0; else if (b > 1) b = 1;
01217         }
01218         BM25Weight() : k1(1), k2(0), k3(1), b(0.5), min_normlen(0.5),
01219                        weight_calculated(false) { }
01220 
01221         BM25Weight * clone() const;
01222         ~BM25Weight() { }
01223         std::string name() const;
01224         std::string serialise() const;
01225         BM25Weight * unserialise(const std::string & s) const;
01226         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01227         Xapian::weight get_maxpart() const;
01228 
01229         Xapian::weight get_sumextra(Xapian::doclength len) const;
01230         Xapian::weight get_maxextra() const;
01231 
01232         bool get_sumpart_needs_doclength() const;
01233 };
01234 
01252 class XAPIAN_VISIBILITY_DEFAULT TradWeight : public Weight {
01253     private:
01254         mutable Xapian::weight termweight;
01255         mutable Xapian::doclength lenpart;
01256 
01257         double param_k;
01258 
01259         mutable bool weight_calculated;
01260 
01261         void calc_termweight() const;
01262 
01263     public:
01271         explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
01272             if (param_k < 0) param_k = 0;
01273         }
01274 
01275         TradWeight() : param_k(1.0), weight_calculated(false) { }
01276 
01277         TradWeight * clone() const;
01278         ~TradWeight() { }
01279         std::string name() const;
01280         std::string serialise() const;
01281         TradWeight * unserialise(const std::string & s) const;
01282 
01283         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01284         Xapian::weight get_maxpart() const;
01285 
01286         Xapian::weight get_sumextra(Xapian::doclength len) const;
01287         Xapian::weight get_maxextra() const;
01288 
01289         bool get_sumpart_needs_doclength() const;
01290 };
01291 
01292 }
01293 
01294 #endif /* XAPIAN_INCLUDED_ENQUIRE_H */

Documentation for Xapian (version 1.0.10).
Generated on 23 Dec 2008 by Doxygen 1.5.2.