backends/flint/flint_synonym.cc

Go to the documentation of this file.
00001 
00004 /* Copyright (C) 2004,2005,2006,2007,2008 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 2 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
00019  */
00020 
00021 #include <config.h>
00022 
00023 #include <xapian/error.h>
00024 
00025 #include "flint_cursor.h"
00026 #include "flint_synonym.h"
00027 #include "flint_utils.h"
00028 #include "stringutils.h"
00029 #include "vectortermlist.h"
00030 
00031 #include <set>
00032 #include <string>
00033 #include <vector>
00034 
00035 using namespace std;
00036 
00037 // We XOR the length values with this so that they are more likely to coincide
00038 // with lower case ASCII letters, which are likely to be common.  This means
00039 // that zlib should do a better job of compressing tag values.
00040 #define MAGIC_XOR_VALUE 96
00041 
00042 void
00043 FlintSynonymTable::merge_changes()
00044 {
00045     if (last_term.empty()) return;
00046 
00047     if (last_synonyms.empty()) {
00048         del(last_term);
00049     } else {
00050         string tag;
00051 
00052         set<string>::const_iterator i;
00053         for (i = last_synonyms.begin(); i != last_synonyms.end(); ++i) {
00054             const string & synonym = *i;
00055             tag += (byte)(synonym.size() ^ MAGIC_XOR_VALUE);
00056             tag += synonym;
00057         }
00058 
00059         add(last_term, tag);
00060         last_synonyms.clear();
00061     }
00062     last_term.resize(0);
00063 }
00064 
00065 void
00066 FlintSynonymTable::add_synonym(const string & term, const string & synonym)
00067 {
00068     if (last_term != term) {
00069         merge_changes();
00070         last_term = term;
00071 
00072         string tag;
00073         if (get_exact_entry(term, tag)) {
00074             const char * p = tag.data();
00075             const char * end = p + tag.size();
00076             while (p != end) {
00077                 size_t len;
00078                 if (p == end ||
00079                     (len = (byte)*p ^ MAGIC_XOR_VALUE) >= size_t(end - p))
00080                     throw Xapian::DatabaseCorruptError("Bad synonym data");
00081                 ++p;
00082                 last_synonyms.insert(string(p, len));
00083                 p += len;
00084             }
00085         }
00086     }
00087 
00088     last_synonyms.insert(synonym);
00089 }
00090 
00091 void
00092 FlintSynonymTable::remove_synonym(const string & term, const string & synonym)
00093 {
00094     if (last_term != term) {
00095         merge_changes();
00096         last_term = term;
00097 
00098         string tag;
00099         if (get_exact_entry(term, tag)) {
00100             const char * p = tag.data();
00101             const char * end = p + tag.size();
00102             while (p != end) {
00103                 size_t len;
00104                 if (p == end ||
00105                     (len = (byte)*p ^ MAGIC_XOR_VALUE) >= size_t(end - p))
00106                     throw Xapian::DatabaseCorruptError("Bad synonym data");
00107                 ++p;
00108                 last_synonyms.insert(string(p, len));
00109                 p += len;
00110             }
00111         }
00112     }
00113 
00114     last_synonyms.erase(synonym);
00115 }
00116 
00117 void
00118 FlintSynonymTable::clear_synonyms(const string & term)
00119 {
00120     // We don't actually ever need to merge_changes() here, but it's quite
00121     // likely that someone might clear_synonyms() and then add_synonym() for
00122     // the same term.  The alternative we could otherwise optimise for (modify
00123     // synonyms for a term, then clear those for another, then modify those for
00124     // the first term again) seems much less likely.
00125     if (last_term == term) {
00126         last_synonyms.clear();
00127     } else {
00128         merge_changes();
00129         last_term = term;
00130     }
00131 }
00132 
00133 TermList *
00134 FlintSynonymTable::open_termlist(const string & term)
00135 {
00136     vector<string> synonyms;
00137 
00138     if (last_term == term) {
00139         if (last_synonyms.empty()) return NULL;
00140 
00141         synonyms.reserve(last_synonyms.size());
00142         set<string>::const_iterator i;
00143         for (i = last_synonyms.begin(); i != last_synonyms.end(); ++i) {
00144             synonyms.push_back(*i);
00145         }
00146     } else {
00147         string tag;
00148         if (!get_exact_entry(term, tag)) return NULL;
00149 
00150         const char * p = tag.data();
00151         const char * end = p + tag.size();
00152         while (p != end) {
00153             size_t len;
00154             if (p == end ||
00155                 (len = (byte)*p ^ MAGIC_XOR_VALUE) >= size_t(end - p))
00156                 throw Xapian::DatabaseCorruptError("Bad synonym data");
00157             ++p;
00158             synonyms.push_back(string(p, len));
00159             p += len;
00160         }
00161     }
00162 
00163     return new VectorTermList(synonyms.begin(), synonyms.end());
00164 }
00165 
00167 
00168 FlintSynonymTermList::~FlintSynonymTermList()
00169 {
00170     DEBUGCALL(DB, void, "~FlintSynonymTermList", "");
00171     delete cursor;
00172 }
00173 
00174 string
00175 FlintSynonymTermList::get_termname() const
00176 {
00177     DEBUGCALL(DB, string, "FlintSynonymTermList::get_termname", "");
00178     Assert(cursor);
00179     Assert(!cursor->current_key.empty());
00180     Assert(!at_end());
00181     RETURN(cursor->current_key);
00182 }
00183 
00184 Xapian::doccount
00185 FlintSynonymTermList::get_termfreq() const
00186 {
00187     throw Xapian::InvalidOperationError("FlintSynonymTermList::get_termfreq() not meaningful");
00188 }
00189 
00190 Xapian::termcount
00191 FlintSynonymTermList::get_collection_freq() const
00192 {
00193     throw Xapian::InvalidOperationError("FlintSynonymTermList::get_collection_freq() not meaningful");
00194 }
00195 
00196 TermList *
00197 FlintSynonymTermList::next()
00198 {
00199     DEBUGCALL(DB, TermList *, "FlintSynonymTermList::next", "");
00200     Assert(!at_end());
00201 
00202     cursor->next();
00203     if (!cursor->after_end() && !startswith(cursor->current_key, prefix)) {
00204         // We've reached the end of the end of the prefixed terms.
00205         cursor->to_end();
00206     }
00207 
00208     RETURN(NULL);
00209 }
00210 
00211 TermList *
00212 FlintSynonymTermList::skip_to(const string &tname)
00213 {
00214     DEBUGCALL(DB, TermList *, "FlintSynonymTermList::skip_to", tname);
00215     Assert(!at_end());
00216 
00217     if (!cursor->find_entry_ge(tname)) {
00218         // The exact term we asked for isn't there, so check if the next
00219         // term after it also has the right prefix.
00220         if (!cursor->after_end() && !startswith(cursor->current_key, prefix)) {
00221             // We've reached the end of the prefixed terms.
00222             cursor->to_end();
00223         }
00224     }
00225     RETURN(NULL);
00226 }
00227 
00228 bool
00229 FlintSynonymTermList::at_end() const
00230 {
00231     DEBUGCALL(DB, bool, "FlintSynonymTermList::at_end", "");
00232     RETURN(cursor->after_end());
00233 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.