00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <config.h>
00022
00023 #include <xapian/error.h>
00024
00025 #include "flint_cursor.h"
00026 #include "flint_synonym.h"
00027 #include "flint_utils.h"
00028 #include "stringutils.h"
00029 #include "vectortermlist.h"
00030
00031 #include <set>
00032 #include <string>
00033 #include <vector>
00034
00035 using namespace std;
00036
00037
00038
00039
00040 #define MAGIC_XOR_VALUE 96
00041
00042 void
00043 FlintSynonymTable::merge_changes()
00044 {
00045 if (last_term.empty()) return;
00046
00047 if (last_synonyms.empty()) {
00048 del(last_term);
00049 } else {
00050 string tag;
00051
00052 set<string>::const_iterator i;
00053 for (i = last_synonyms.begin(); i != last_synonyms.end(); ++i) {
00054 const string & synonym = *i;
00055 tag += (byte)(synonym.size() ^ MAGIC_XOR_VALUE);
00056 tag += synonym;
00057 }
00058
00059 add(last_term, tag);
00060 last_synonyms.clear();
00061 }
00062 last_term.resize(0);
00063 }
00064
00065 void
00066 FlintSynonymTable::add_synonym(const string & term, const string & synonym)
00067 {
00068 if (last_term != term) {
00069 merge_changes();
00070 last_term = term;
00071
00072 string tag;
00073 if (get_exact_entry(term, tag)) {
00074 const char * p = tag.data();
00075 const char * end = p + tag.size();
00076 while (p != end) {
00077 size_t len;
00078 if (p == end ||
00079 (len = (byte)*p ^ MAGIC_XOR_VALUE) >= size_t(end - p))
00080 throw Xapian::DatabaseCorruptError("Bad synonym data");
00081 ++p;
00082 last_synonyms.insert(string(p, len));
00083 p += len;
00084 }
00085 }
00086 }
00087
00088 last_synonyms.insert(synonym);
00089 }
00090
00091 void
00092 FlintSynonymTable::remove_synonym(const string & term, const string & synonym)
00093 {
00094 if (last_term != term) {
00095 merge_changes();
00096 last_term = term;
00097
00098 string tag;
00099 if (get_exact_entry(term, tag)) {
00100 const char * p = tag.data();
00101 const char * end = p + tag.size();
00102 while (p != end) {
00103 size_t len;
00104 if (p == end ||
00105 (len = (byte)*p ^ MAGIC_XOR_VALUE) >= size_t(end - p))
00106 throw Xapian::DatabaseCorruptError("Bad synonym data");
00107 ++p;
00108 last_synonyms.insert(string(p, len));
00109 p += len;
00110 }
00111 }
00112 }
00113
00114 last_synonyms.erase(synonym);
00115 }
00116
00117 void
00118 FlintSynonymTable::clear_synonyms(const string & term)
00119 {
00120
00121
00122
00123
00124
00125 if (last_term == term) {
00126 last_synonyms.clear();
00127 } else {
00128 merge_changes();
00129 last_term = term;
00130 }
00131 }
00132
00133 TermList *
00134 FlintSynonymTable::open_termlist(const string & term)
00135 {
00136 vector<string> synonyms;
00137
00138 if (last_term == term) {
00139 if (last_synonyms.empty()) return NULL;
00140
00141 synonyms.reserve(last_synonyms.size());
00142 set<string>::const_iterator i;
00143 for (i = last_synonyms.begin(); i != last_synonyms.end(); ++i) {
00144 synonyms.push_back(*i);
00145 }
00146 } else {
00147 string tag;
00148 if (!get_exact_entry(term, tag)) return NULL;
00149
00150 const char * p = tag.data();
00151 const char * end = p + tag.size();
00152 while (p != end) {
00153 size_t len;
00154 if (p == end ||
00155 (len = (byte)*p ^ MAGIC_XOR_VALUE) >= size_t(end - p))
00156 throw Xapian::DatabaseCorruptError("Bad synonym data");
00157 ++p;
00158 synonyms.push_back(string(p, len));
00159 p += len;
00160 }
00161 }
00162
00163 return new VectorTermList(synonyms.begin(), synonyms.end());
00164 }
00165
00167
00168 FlintSynonymTermList::~FlintSynonymTermList()
00169 {
00170 DEBUGCALL(DB, void, "~FlintSynonymTermList", "");
00171 delete cursor;
00172 }
00173
00174 string
00175 FlintSynonymTermList::get_termname() const
00176 {
00177 DEBUGCALL(DB, string, "FlintSynonymTermList::get_termname", "");
00178 Assert(cursor);
00179 Assert(!cursor->current_key.empty());
00180 Assert(!at_end());
00181 RETURN(cursor->current_key);
00182 }
00183
00184 Xapian::doccount
00185 FlintSynonymTermList::get_termfreq() const
00186 {
00187 throw Xapian::InvalidOperationError("FlintSynonymTermList::get_termfreq() not meaningful");
00188 }
00189
00190 Xapian::termcount
00191 FlintSynonymTermList::get_collection_freq() const
00192 {
00193 throw Xapian::InvalidOperationError("FlintSynonymTermList::get_collection_freq() not meaningful");
00194 }
00195
00196 TermList *
00197 FlintSynonymTermList::next()
00198 {
00199 DEBUGCALL(DB, TermList *, "FlintSynonymTermList::next", "");
00200 Assert(!at_end());
00201
00202 cursor->next();
00203 if (!cursor->after_end() && !startswith(cursor->current_key, prefix)) {
00204
00205 cursor->to_end();
00206 }
00207
00208 RETURN(NULL);
00209 }
00210
00211 TermList *
00212 FlintSynonymTermList::skip_to(const string &tname)
00213 {
00214 DEBUGCALL(DB, TermList *, "FlintSynonymTermList::skip_to", tname);
00215 Assert(!at_end());
00216
00217 if (!cursor->find_entry_ge(tname)) {
00218
00219
00220 if (!cursor->after_end() && !startswith(cursor->current_key, prefix)) {
00221
00222 cursor->to_end();
00223 }
00224 }
00225 RETURN(NULL);
00226 }
00227
00228 bool
00229 FlintSynonymTermList::at_end() const
00230 {
00231 DEBUGCALL(DB, bool, "FlintSynonymTermList::at_end", "");
00232 RETURN(cursor->after_end());
00233 }