include/xapian/matchspy.h

Go to the documentation of this file.
00001 
00004 /* Copyright (C) 2007 Olly Betts
00005  * Copyright (C) 2007 Lemur Consulting Ltd
00006  *
00007  * This program is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * This program is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
00020  */
00021 
00022 #ifndef XAPIAN_INCLUDED_MATCHSPY_H
00023 #define XAPIAN_INCLUDED_MATCHSPY_H
00024 
00025 #include <xapian/enquire.h>
00026 
00027 #include <map>
00028 #include <set>
00029 #include <string>
00030 #include <vector>
00031 
00032 namespace Xapian {
00033 
00035 class XAPIAN_VISIBILITY_DEFAULT MultipleMatchDecider : public MatchDecider {
00036   private:
00043     std::vector<const MatchDecider *> deciders;
00044 
00045   public:
00052     void append(const MatchDecider * decider) {
00053         deciders.push_back(decider);
00054     }
00055 
00062     bool operator()(const Xapian::Document &doc) const;
00063 };
00064 
00067 struct XAPIAN_VISIBILITY_DEFAULT StringAndFrequency {
00068     std::string str;
00069     Xapian::doccount frequency;
00070     StringAndFrequency(std::string str_, Xapian::doccount frequency_)
00071             : str(str_), frequency(frequency_) {}
00072 };
00073 
00074 
00077 class XAPIAN_VISIBILITY_DEFAULT StringListSerialiser {
00078   private:
00079     std::string serialised;
00080 
00081   public:
00083     StringListSerialiser() { }
00084 
00088     StringListSerialiser(const std::string & initial) : serialised(initial) { }
00089 
00091     template <class Iterator>
00092     StringListSerialiser(Iterator begin, Iterator end) : serialised() {
00093         while (begin != end) append(*begin++);
00094     }
00095 
00097     void append(const std::string & value);
00098 
00100     const std::string & get() const { return serialised; }
00101 };
00102 
00106 class XAPIAN_VISIBILITY_DEFAULT StringListUnserialiser {
00107   private:
00108     std::string serialised;
00109     std::string curritem;
00110     const char * pos;
00111 
00113     void read_next();
00114 
00116     friend bool operator==(const StringListUnserialiser & a,
00117                            const StringListUnserialiser & b);
00118     friend bool operator!=(const StringListUnserialiser & a,
00119                            const StringListUnserialiser & b);
00120 
00121   public:
00123     StringListUnserialiser() : pos(NULL) {}
00124 
00127     StringListUnserialiser(const std::string & in)
00128             : serialised(in),
00129               pos(serialised.data())
00130     {
00131         read_next();
00132     }
00133 
00135     ~StringListUnserialiser() {}
00136 
00138     StringListUnserialiser(const StringListUnserialiser & other)
00139             : serialised(other.serialised),
00140               curritem(other.curritem),
00141               pos((other.pos == NULL) ? NULL : serialised.data() + (other.pos - other.serialised.data()))
00142     {}
00143 
00145     void operator=(const StringListUnserialiser & other) {
00146         serialised = other.serialised;
00147         curritem = other.curritem;
00148         pos = (other.pos == NULL) ? NULL : serialised.data() + (other.pos - other.serialised.data());
00149     }
00150 
00152     std::string operator *() const {
00153         return curritem;
00154     }
00155 
00157     StringListUnserialiser & operator++() {
00158         read_next();
00159         return *this;
00160     }
00161 
00163     StringListUnserialiser operator++(int) {
00164         StringListUnserialiser tmp = *this;
00165         read_next();
00166         return tmp;
00167     }
00168 
00169     // Allow use as an STL iterator
00170     typedef std::input_iterator_tag iterator_category;
00171     typedef std::string value_type;
00172     typedef size_t difference_type;
00173     typedef std::string * pointer;
00174     typedef std::string & reference;
00175 };
00176 
00177 inline bool operator==(const StringListUnserialiser & a,
00178                        const StringListUnserialiser & b) {
00179     return (a.pos == b.pos);
00180 }
00181 
00182 inline bool operator!=(const StringListUnserialiser & a,
00183                        const StringListUnserialiser & b) {
00184     return (a.pos != b.pos);
00185 }
00186 
00188 class XAPIAN_VISIBILITY_DEFAULT ValueCountMatchSpy : public MatchDecider {
00189   protected:
00191     mutable Xapian::doccount total;
00192 
00195     mutable std::map<Xapian::valueno, std::map<std::string, Xapian::doccount> > values;
00196 
00202     std::set<Xapian::valueno> multivalues;
00203 
00204   public:
00206     ValueCountMatchSpy() : total(0) { }
00207 
00212     ValueCountMatchSpy(Xapian::valueno valno, bool multivalue=false) : total(0) {
00213         add_slot(valno, multivalue);
00214     }
00215 
00220     void add_slot(Xapian::valueno valno, bool multivalue=false) {
00221         // Ensure that values[valno] exists.
00222         (void)values[valno];
00223         if (multivalue) multivalues.insert(valno);
00224     }
00225 
00233     const std::map<std::string, Xapian::doccount> &
00234             get_values(Xapian::valueno valno) const {
00235         return values[valno];
00236     }
00237 
00239     size_t get_total() const {
00240         return total;
00241     }
00242 
00257     void get_top_values(std::vector<StringAndFrequency> & result,
00258                         Xapian::valueno valno, size_t maxvalues) const;
00259 
00264     bool operator()(const Xapian::Document &doc) const;
00265 };
00266 
00274 class XAPIAN_VISIBILITY_DEFAULT TermCountMatchSpy : public MatchDecider {
00275   protected:
00277     mutable Xapian::doccount documents_seen;
00278 
00280     mutable Xapian::termcount terms_seen;
00281 
00288     mutable std::map<std::string, std::map<std::string, Xapian::doccount> > terms;
00289 
00290   public:
00292     TermCountMatchSpy() : documents_seen(0), terms_seen(0) { }
00293 
00298     TermCountMatchSpy(std::string prefix) : documents_seen(0), terms_seen(0) {
00299         add_prefix(prefix);
00300     }
00301 
00308     void add_prefix(std::string prefix) {
00309         // Ensure that terms[prefix] exists.
00310         (void)terms[prefix];
00311     }
00312 
00320     const std::map<std::string, Xapian::doccount> &
00321             get_terms(std::string prefix) const {
00322         return terms[prefix];
00323     }
00324 
00326     size_t get_documents_seen() const {
00327         return documents_seen;
00328     }
00329 
00335     size_t get_terms_seen() const {
00336         return terms_seen;
00337     }
00338 
00353     void get_top_terms(std::vector<StringAndFrequency> & result,
00354                        std::string prefix, size_t maxterms) const;
00355 
00360     bool operator()(const Xapian::Document &doc) const;
00361 };
00362 
00363 
00364 
00367 class XAPIAN_VISIBILITY_DEFAULT CategorySelectMatchSpy :
00368         public ValueCountMatchSpy {
00369   public:
00371     CategorySelectMatchSpy() : ValueCountMatchSpy() { }
00372 
00378     CategorySelectMatchSpy(Xapian::valueno valno) : ValueCountMatchSpy(valno) {
00379     }
00380 
00401     double score_categorisation(Xapian::valueno valno,
00402                                 double desired_no_of_categories = 0.0);
00403 
00423     bool build_numeric_ranges(Xapian::valueno valno, size_t max_ranges);
00424 };
00425 
00426 }
00427 
00428 #endif // XAPIAN_INCLUDED_MATCHSPY_H

Documentation for Xapian (version 1.0.10).
Generated on 23 Dec 2008 by Doxygen 1.5.2.