backends/flint/flint_spelling.h

Go to the documentation of this file.
00001 
00004 /* Copyright (C) 2007 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 2 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
00019  */
00020 
00021 #ifndef XAPIAN_INCLUDED_FLINT_SPELLING_H
00022 #define XAPIAN_INCLUDED_FLINT_SPELLING_H
00023 
00024 #include <xapian/types.h>
00025 
00026 #include "flint_table.h"
00027 #include "termlist.h"
00028 
00029 #include <map>
00030 #include <set>
00031 #include <string>
00032 #include <string.h> // For memcpy() and memcmp().
00033 
00034 struct fragment {
00035     char data[4];
00036 
00037     // Default constructor.
00038     fragment() { }
00039 
00040     // Allow implicit conversion.
00041     fragment(char data_[4]) { memcpy(data, data_, 4); }
00042 
00043     char & operator[] (unsigned i) { return data[i]; }
00044     const char & operator[] (unsigned i) const { return data[i]; }
00045 
00046     operator std::string () const {
00047         return string(data, data[0] == 'M' ? 4 : 3);
00048     }
00049 };
00050 
00051 inline bool operator<(const fragment &a, const fragment &b) {
00052     return memcmp(a.data, b.data, 4) < 0;
00053 }
00054 
00055 class FlintSpellingTable : public FlintTable {
00056     void add_fragment(fragment frag, const string & word);
00057     void remove_fragment(fragment frag, const string & word);
00058 
00059     std::map<std::string, Xapian::termcount> wordfreq_changes;
00060     std::map<fragment, std::set<std::string> > termlist_deltas;
00061 
00062   public:
00071     FlintSpellingTable(std::string dbdir, bool readonly)
00072         : FlintTable(dbdir + "/spelling.", readonly, Z_DEFAULT_STRATEGY, true) { }
00073 
00074     // Merge in batched-up changes.
00075     void merge_changes();
00076 
00077     void add_word(const std::string & word, Xapian::termcount freqinc);
00078     void remove_word(const std::string & word, Xapian::termcount freqdec);
00079 
00080     TermList * open_termlist(const std::string & word);
00081 
00082     Xapian::doccount get_word_frequency(const string & word) const;
00083 
00091     bool is_modified() const {
00092         return !wordfreq_changes.empty() || FlintTable::is_modified();
00093     }
00094 
00095     void create_and_open(unsigned int blocksize) {
00096         // The spelling table is created lazily, but erase it in case we're
00097         // overwriting an existing database and it already exists.
00098         FlintTable::erase();
00099         FlintTable::set_block_size(blocksize);
00100     }
00101 
00102     void commit(flint_revision_number_t revision) {
00103         merge_changes();
00104         FlintTable::commit(revision);
00105     }
00106 
00107     void cancel() {
00108         // Discard batched-up changes.
00109         wordfreq_changes.clear();
00110         termlist_deltas.clear();
00111 
00112         FlintTable::cancel();
00113     }
00114 
00115     // @}
00116 };
00117 
00119 class FlintSpellingTermList : public TermList {
00121     std::string data;
00122 
00124     unsigned p;
00125 
00127     std::string current_term;
00128 
00130     FlintSpellingTermList(const FlintSpellingTermList &);
00131 
00133     void operator=(const FlintSpellingTermList &);
00134 
00135   public:
00137     FlintSpellingTermList(const std::string & data_)
00138         : data(data_), p(0) { }
00139 
00140     Xapian::termcount get_approx_size() const;
00141 
00142     std::string get_termname() const;
00143 
00144     Xapian::termcount get_wdf() const;
00145 
00146     Xapian::doccount get_termfreq() const;
00147 
00148     Xapian::termcount get_collection_freq() const;
00149 
00150     TermList *next();
00151 
00152     bool at_end() const;
00153 
00154     Xapian::termcount positionlist_count() const;
00155 
00156     Xapian::PositionIterator positionlist_begin() const;
00157 };
00158 
00159 #endif // XAPIAN_INCLUDED_FLINT_SPELLING_H

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.