00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef OM_HGUARD_INMEMORY_DATABASE_H
00025 #define OM_HGUARD_INMEMORY_DATABASE_H
00026
00027 #include "leafpostlist.h"
00028 #include "termlist.h"
00029 #include "database.h"
00030 #include <map>
00031 #include <vector>
00032 #include <algorithm>
00033 #include <xapian/document.h>
00034 #include "inmemory_positionlist.h"
00035 #include <omassert.h>
00036
00037 using namespace std;
00038
00039
00040
00041 class InMemoryPosting {
00042 public:
00043 Xapian::docid did;
00044 bool valid;
00045 vector<Xapian::termpos> positions;
00046 Xapian::termcount wdf;
00047
00048
00049 void merge(const InMemoryPosting & post) {
00050 Assert(did == post.did);
00051
00052 positions.insert(positions.end(),
00053 post.positions.begin(),
00054 post.positions.end());
00055
00056 sort(positions.begin(), positions.end());
00057 }
00058 };
00059
00060 class InMemoryTermEntry {
00061 public:
00062 string tname;
00063 vector<Xapian::termpos> positions;
00064 Xapian::termcount wdf;
00065
00066
00067 void merge(const InMemoryTermEntry & post) {
00068 Assert(tname == post.tname);
00069
00070 positions.insert(positions.end(),
00071 post.positions.begin(),
00072 post.positions.end());
00073
00074 sort(positions.begin(), positions.end());
00075 }
00076 };
00077
00078
00079 class InMemoryPostingLessThan {
00080 public:
00081 int operator() (const InMemoryPosting &p1, const InMemoryPosting &p2)
00082 {
00083 return p1.did < p2.did;
00084 }
00085 };
00086
00087
00088 class InMemoryTermEntryLessThan {
00089 public:
00090 int operator() (const InMemoryTermEntry&p1, const InMemoryTermEntry&p2)
00091 {
00092 return p1.tname < p2.tname;
00093 }
00094 };
00095
00096
00097 class InMemoryTerm {
00098 public:
00099
00100 vector<InMemoryPosting> docs;
00101
00102 Xapian::termcount term_freq;
00103 Xapian::termcount collection_freq;
00104
00105 InMemoryTerm() : term_freq(0), collection_freq(0) {}
00106
00107 void add_posting(const InMemoryPosting & post);
00108 };
00109
00111 class InMemoryDoc {
00112 public:
00113 bool is_valid;
00114
00115 vector<InMemoryTermEntry> terms;
00116
00117
00118
00119 InMemoryDoc() : is_valid(false) {}
00120
00121
00122 InMemoryDoc(bool is_valid_) : is_valid(is_valid_) {}
00123
00124 void add_posting(const InMemoryTermEntry & post);
00125 };
00126
00127 class InMemoryDatabase;
00128
00131 class InMemoryPostList : public LeafPostList {
00132 friend class InMemoryDatabase;
00133 private:
00134 vector<InMemoryPosting>::const_iterator pos;
00135 vector<InMemoryPosting>::const_iterator end;
00136 Xapian::doccount termfreq;
00137 bool started;
00138
00142 InMemoryPositionList mypositions;
00143
00144 Xapian::Internal::RefCntPtr<const InMemoryDatabase> db;
00145
00146 InMemoryPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db,
00147 const InMemoryTerm & term);
00148 public:
00149 Xapian::doccount get_termfreq() const;
00150
00151 Xapian::docid get_docid() const;
00152 Xapian::doclength get_doclength() const;
00153 Xapian::termcount get_wdf() const;
00154 PositionList * read_position_list();
00155 PositionList * open_position_list() const;
00156
00157 PostList *next(Xapian::weight w_min);
00158
00159 PostList *skip_to(Xapian::docid did, Xapian::weight w_min);
00160
00161
00162 bool at_end() const;
00163
00164 string get_description() const;
00165 };
00166
00169 class InMemoryAllDocsPostList : public LeafPostList {
00170 friend class InMemoryDatabase;
00171 private:
00172 Xapian::docid did;
00173
00174 Xapian::Internal::RefCntPtr<const InMemoryDatabase> db;
00175
00176 InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db);
00177 public:
00178 Xapian::doccount get_termfreq() const;
00179
00180 Xapian::docid get_docid() const;
00181 Xapian::doclength get_doclength() const;
00182 Xapian::termcount get_wdf() const;
00183 PositionList * read_position_list();
00184 PositionList * open_position_list() const;
00185
00186 PostList *next(Xapian::weight w_min);
00187
00188 PostList *skip_to(Xapian::docid did, Xapian::weight w_min);
00189
00190
00191 bool at_end() const;
00192
00193 string get_description() const;
00194 };
00195
00196
00197 class InMemoryTermList : public TermList {
00198 friend class InMemoryDatabase;
00199 private:
00200 vector<InMemoryTermEntry>::const_iterator pos;
00201 vector<InMemoryTermEntry>::const_iterator end;
00202 Xapian::termcount terms;
00203 bool started;
00204
00205 Xapian::Internal::RefCntPtr<const InMemoryDatabase> db;
00206 Xapian::docid did;
00207 Xapian::doclength document_length;
00208
00209 InMemoryTermList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db, Xapian::docid did,
00210 const InMemoryDoc & doc,
00211 Xapian::doclength len);
00212 public:
00213 Xapian::termcount get_approx_size() const;
00214
00216 void accumulate_stats(Xapian::Internal::ExpandStats & stats) const;
00217
00218 string get_termname() const;
00219 Xapian::termcount get_wdf() const;
00220 Xapian::doccount get_termfreq() const;
00221 TermList * next();
00222 bool at_end() const;
00223 Xapian::termcount positionlist_count() const;
00224 Xapian::PositionIterator positionlist_begin() const;
00225 };
00226
00231 class InMemoryDatabase : public Xapian::Database::Internal {
00232 friend class InMemoryAllDocsPostList;
00233
00234 map<string, InMemoryTerm> postlists;
00235 vector<InMemoryDoc> termlists;
00236 vector<std::string> doclists;
00237 vector<std::map<Xapian::valueno, string> > valuelists;
00238
00239 vector<Xapian::doclength> doclengths;
00240
00241 std::map<string, string> metadata;
00242
00243 Xapian::doccount totdocs;
00244
00245 Xapian::doclength totlen;
00246
00247 bool positions_present;
00248
00249
00250 InMemoryDatabase& operator=(const InMemoryDatabase &);
00251 InMemoryDatabase(const InMemoryDatabase &);
00252
00253 void make_term(const string & tname);
00254
00255 bool doc_exists(Xapian::docid did) const;
00256 Xapian::docid make_doc(const string & docdata);
00257
00258
00259 void finish_add_doc(Xapian::docid did, const Xapian::Document &document);
00260 void add_values(Xapian::docid did, const map<Xapian::valueno, string> &values_);
00261
00262 void make_posting(InMemoryDoc * doc,
00263 const string & tname,
00264 Xapian::docid did,
00265 Xapian::termpos position,
00266 Xapian::termcount wdf,
00267 bool use_position = true);
00268
00270
00272 void flush();
00273 void cancel();
00274
00275 Xapian::docid add_document(const Xapian::Document & document);
00276
00277
00278
00279
00280
00281 #if (!defined __GNUC__ && !defined _MSC_VER) || __GNUC__ > 2
00282 using Xapian::Database::Internal::delete_document;
00283 using Xapian::Database::Internal::replace_document;
00284 #endif
00285 void delete_document(Xapian::docid did);
00286 void replace_document(Xapian::docid did, const Xapian::Document & document);
00288
00289 public:
00294 InMemoryDatabase();
00295
00296 ~InMemoryDatabase();
00297
00298 Xapian::doccount get_doccount() const;
00299
00300 Xapian::docid get_lastdocid() const;
00301
00302 Xapian::doclength get_avlength() const;
00303 Xapian::doclength get_doclength(Xapian::docid did) const;
00304
00305 Xapian::doccount get_termfreq(const string & tname) const;
00306 Xapian::termcount get_collection_freq(const string & tname) const;
00307 bool term_exists(const string & tname) const;
00308 bool has_positions() const;
00309
00310 LeafPostList * open_post_list(const string & tname) const;
00311 TermList * open_term_list(Xapian::docid did) const;
00312 Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy = false) const;
00313
00314 std::string get_metadata(const std::string & key) const;
00315 void set_metadata(const std::string & key, const std::string & value);
00316
00317 Xapian::termcount positionlist_count(Xapian::docid did,
00318 const string & tname) const;
00319 PositionList * open_position_list(Xapian::docid did,
00320 const string & tname) const;
00321 TermList * open_allterms(const string & prefix) const;
00322 };
00323
00324 #endif