00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef XAPIAN_INCLUDED_MATCHSPY_H
00023 #define XAPIAN_INCLUDED_MATCHSPY_H
00024
00025 #include <xapian/enquire.h>
00026
00027 #include <map>
00028 #include <set>
00029 #include <string>
00030 #include <vector>
00031
00032 namespace Xapian {
00033
00035 class XAPIAN_VISIBILITY_DEFAULT MultipleMatchDecider : public MatchDecider {
00036 private:
00043 std::vector<const MatchDecider *> deciders;
00044
00045 public:
00052 void append(const MatchDecider * decider) {
00053 deciders.push_back(decider);
00054 }
00055
00062 bool operator()(const Xapian::Document &doc) const;
00063 };
00064
00067 struct XAPIAN_VISIBILITY_DEFAULT StringAndFrequency {
00068 std::string str;
00069 Xapian::doccount frequency;
00070 StringAndFrequency(std::string str_, Xapian::doccount frequency_)
00071 : str(str_), frequency(frequency_) {}
00072 };
00073
00074
00077 class XAPIAN_VISIBILITY_DEFAULT StringListSerialiser {
00078 private:
00079 std::string serialised;
00080
00081 public:
00083 StringListSerialiser() { }
00084
00088 StringListSerialiser(const std::string & initial) : serialised(initial) { }
00089
00091 template <class Iterator>
00092 StringListSerialiser(Iterator begin, Iterator end) : serialised() {
00093 while (begin != end) append(*begin++);
00094 }
00095
00097 void append(const std::string & value);
00098
00100 const std::string & get() const { return serialised; }
00101 };
00102
00106 class XAPIAN_VISIBILITY_DEFAULT StringListUnserialiser {
00107 private:
00108 std::string serialised;
00109 std::string curritem;
00110 const char * pos;
00111
00113 void read_next();
00114
00116 friend bool operator==(const StringListUnserialiser & a,
00117 const StringListUnserialiser & b);
00118 friend bool operator!=(const StringListUnserialiser & a,
00119 const StringListUnserialiser & b);
00120
00121 public:
00123 StringListUnserialiser() : pos(NULL) {}
00124
00127 StringListUnserialiser(const std::string & in)
00128 : serialised(in),
00129 pos(serialised.data())
00130 {
00131 read_next();
00132 }
00133
00135 ~StringListUnserialiser() {}
00136
00138 StringListUnserialiser(const StringListUnserialiser & other)
00139 : serialised(other.serialised),
00140 curritem(other.curritem),
00141 pos((other.pos == NULL) ? NULL : serialised.data() + (other.pos - other.serialised.data()))
00142 {}
00143
00145 void operator=(const StringListUnserialiser & other) {
00146 serialised = other.serialised;
00147 curritem = other.curritem;
00148 pos = (other.pos == NULL) ? NULL : serialised.data() + (other.pos - other.serialised.data());
00149 }
00150
00152 std::string operator *() const {
00153 return curritem;
00154 }
00155
00157 StringListUnserialiser & operator++() {
00158 read_next();
00159 return *this;
00160 }
00161
00163 StringListUnserialiser operator++(int) {
00164 StringListUnserialiser tmp = *this;
00165 read_next();
00166 return tmp;
00167 }
00168
00169
00170 typedef std::input_iterator_tag iterator_category;
00171 typedef std::string value_type;
00172 typedef size_t difference_type;
00173 typedef std::string * pointer;
00174 typedef std::string & reference;
00175 };
00176
00177 inline bool operator==(const StringListUnserialiser & a,
00178 const StringListUnserialiser & b) {
00179 return (a.pos == b.pos);
00180 }
00181
00182 inline bool operator!=(const StringListUnserialiser & a,
00183 const StringListUnserialiser & b) {
00184 return (a.pos != b.pos);
00185 }
00186
00188 class XAPIAN_VISIBILITY_DEFAULT ValueCountMatchSpy : public MatchDecider {
00189 protected:
00191 mutable Xapian::doccount total;
00192
00195 mutable std::map<Xapian::valueno, std::map<std::string, Xapian::doccount> > values;
00196
00202 std::set<Xapian::valueno> multivalues;
00203
00204 public:
00206 ValueCountMatchSpy() : total(0) { }
00207
00212 ValueCountMatchSpy(Xapian::valueno valno, bool multivalue=false) : total(0) {
00213 add_slot(valno, multivalue);
00214 }
00215
00220 void add_slot(Xapian::valueno valno, bool multivalue=false) {
00221
00222 (void)values[valno];
00223 if (multivalue) multivalues.insert(valno);
00224 }
00225
00233 const std::map<std::string, Xapian::doccount> &
00234 get_values(Xapian::valueno valno) const {
00235 return values[valno];
00236 }
00237
00239 size_t get_total() const {
00240 return total;
00241 }
00242
00257 void get_top_values(std::vector<StringAndFrequency> & result,
00258 Xapian::valueno valno, size_t maxvalues) const;
00259
00264 bool operator()(const Xapian::Document &doc) const;
00265 };
00266
00274 class XAPIAN_VISIBILITY_DEFAULT TermCountMatchSpy : public MatchDecider {
00275 protected:
00277 mutable Xapian::doccount documents_seen;
00278
00280 mutable Xapian::termcount terms_seen;
00281
00288 mutable std::map<std::string, std::map<std::string, Xapian::doccount> > terms;
00289
00290 public:
00292 TermCountMatchSpy() : documents_seen(0), terms_seen(0) { }
00293
00298 TermCountMatchSpy(std::string prefix) : documents_seen(0), terms_seen(0) {
00299 add_prefix(prefix);
00300 }
00301
00308 void add_prefix(std::string prefix) {
00309
00310 (void)terms[prefix];
00311 }
00312
00320 const std::map<std::string, Xapian::doccount> &
00321 get_terms(std::string prefix) const {
00322 return terms[prefix];
00323 }
00324
00326 size_t get_documents_seen() const {
00327 return documents_seen;
00328 }
00329
00335 size_t get_terms_seen() const {
00336 return terms_seen;
00337 }
00338
00353 void get_top_terms(std::vector<StringAndFrequency> & result,
00354 std::string prefix, size_t maxterms) const;
00355
00360 bool operator()(const Xapian::Document &doc) const;
00361 };
00362
00363
00364
00367 class XAPIAN_VISIBILITY_DEFAULT CategorySelectMatchSpy :
00368 public ValueCountMatchSpy {
00369 public:
00371 CategorySelectMatchSpy() : ValueCountMatchSpy() { }
00372
00378 CategorySelectMatchSpy(Xapian::valueno valno) : ValueCountMatchSpy(valno) {
00379 }
00380
00401 double score_categorisation(Xapian::valueno valno,
00402 double desired_no_of_categories = 0.0);
00403
00423 bool build_numeric_ranges(Xapian::valueno valno, size_t max_ranges);
00424 };
00425
00426 }
00427
00428 #endif // XAPIAN_INCLUDED_MATCHSPY_H