00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024
00025 #include <xapian/document.h>
00026 #include <xapian/types.h>
00027 #include "document.h"
00028 #include "maptermlist.h"
00029 #include <xapian/error.h>
00030 #include <xapian/valueiterator.h>
00031 #include "utils.h"
00032
00033 #include <algorithm>
00034 #include <string>
00035
00036 using namespace std;
00037
00038 namespace Xapian {
00039
00040
00041
00042 Document::Document(Document::Internal *internal_) : internal(internal_)
00043 {
00044 }
00045
00046 Document::Document() : internal(new Xapian::Document::Internal())
00047 {
00048 }
00049
00050 string
00051 Document::get_value(Xapian::valueno value) const
00052 {
00053 DEBUGAPICALL(string, "Document::get_value", value);
00054 RETURN(internal->get_value(value));
00055 }
00056
00057 string
00058 Document::get_data() const
00059 {
00060 DEBUGAPICALL(string, "Document::get_data", "");
00061 RETURN(internal->get_data());
00062 }
00063
00064 void
00065 Document::set_data(const string &data)
00066 {
00067 DEBUGAPICALL(void, "Document::set_data", data);
00068 internal->set_data(data);
00069 }
00070
00071 void
00072 Document::operator=(const Document &other)
00073 {
00074
00075 internal = other.internal;
00076 }
00077
00078 Document::Document(const Document &other)
00079 : internal(other.internal)
00080 {
00081 }
00082
00083 Document::~Document()
00084 {
00085 }
00086
00087 string
00088 Document::get_description() const
00089 {
00090 return "Document(" + internal->get_description() + ")";
00091 }
00092
00093 void
00094 Document::add_value(Xapian::valueno valueno, const string &value)
00095 {
00096 DEBUGAPICALL(void, "Document::add_value", valueno << ", " << value);
00097 internal->add_value(valueno, value);
00098 }
00099
00100 void
00101 Document::remove_value(Xapian::valueno valueno)
00102 {
00103 DEBUGAPICALL(void, "Document::remove_value", valueno);
00104 internal->remove_value(valueno);
00105 }
00106
00107 void
00108 Document::clear_values()
00109 {
00110 DEBUGAPICALL(void, "Document::clear_values", "");
00111 internal->clear_values();
00112 }
00113
00114 void
00115 Document::add_posting(const string & tname,
00116 Xapian::termpos tpos,
00117 Xapian::termcount wdfinc)
00118 {
00119 DEBUGAPICALL(void, "Document::add_posting",
00120 tname << ", " << tpos << ", " << wdfinc);
00121 if (tname.empty()) {
00122 throw InvalidArgumentError("Empty termnames aren't allowed.");
00123 }
00124 internal->add_posting(tname, tpos, wdfinc);
00125 }
00126
00127 void
00128 Document::add_term(const string & tname, Xapian::termcount wdfinc)
00129 {
00130 DEBUGAPICALL(void, "Document::add_term", tname << ", " << wdfinc);
00131 if (tname.empty()) {
00132 throw InvalidArgumentError("Empty termnames aren't allowed.");
00133 }
00134 internal->add_term(tname, wdfinc);
00135 }
00136
00137 void
00138 Document::remove_posting(const string & tname, Xapian::termpos tpos,
00139 Xapian::termcount wdfdec)
00140 {
00141 DEBUGAPICALL(void, "Document::remove_posting",
00142 tname << ", " << tpos << ", " << wdfdec);
00143 if (tname.empty()) {
00144 throw InvalidArgumentError("Empty termnames aren't allowed.");
00145 }
00146 internal->remove_posting(tname, tpos, wdfdec);
00147 }
00148
00149 void
00150 Document::remove_term(const string & tname)
00151 {
00152 DEBUGAPICALL(void, "Document::remove_term", tname);
00153 internal->remove_term(tname);
00154 }
00155
00156 void
00157 Document::clear_terms()
00158 {
00159 DEBUGAPICALL(void, "Document::clear_terms", "");
00160 internal->clear_terms();
00161 }
00162
00163 Xapian::termcount
00164 Document::termlist_count() const {
00165 DEBUGAPICALL(Xapian::termcount, "Document::termlist_count", "");
00166 RETURN(internal->termlist_count());
00167 }
00168
00169 TermIterator
00170 Document::termlist_begin() const
00171 {
00172 DEBUGAPICALL(TermIterator, "Document::termlist_begin", "");
00173 RETURN(TermIterator(internal->open_term_list()));
00174 }
00175
00176 Xapian::termcount
00177 Document::values_count() const {
00178 DEBUGAPICALL(Xapian::termcount, "Document::values_count", "");
00179 RETURN(internal->values_count());
00180 }
00181
00182 ValueIterator
00183 Document::values_begin() const
00184 {
00185 DEBUGAPICALL(ValueIterator, "Document::values_begin", "");
00186
00187 internal->need_values();
00188 RETURN(ValueIterator(0, *this));
00189 }
00190
00191 ValueIterator
00192 Document::values_end() const
00193 {
00194 DEBUGAPICALL(ValueIterator, "Document::values_end", "");
00195 RETURN(ValueIterator(internal->values_count(), *this));
00196 }
00197
00198 docid
00199 Document::get_docid() const
00200 {
00201 DEBUGAPICALL(docid, "Document::get_docid", "");
00202 RETURN(internal->get_docid());
00203 }
00204
00205 }
00206
00208
00209 void
00210 OmDocumentTerm::add_position(Xapian::termpos tpos)
00211 {
00212 DEBUGAPICALL(void, "OmDocumentTerm::add_position", tpos);
00213
00214
00215
00216 if (positions.empty() || tpos > positions.back()) {
00217 positions.push_back(tpos);
00218 return;
00219 }
00220
00221
00222
00223 vector<Xapian::termpos>::iterator i;
00224 i = lower_bound(positions.begin(), positions.end(), tpos);
00225 if (i == positions.end() || *i != tpos) {
00226 positions.insert(i, tpos);
00227 }
00228 }
00229
00230 void
00231 OmDocumentTerm::remove_position(Xapian::termpos tpos)
00232 {
00233 DEBUGAPICALL(void, "OmDocumentTerm::remove_position", tpos);
00234
00235
00236
00237 vector<Xapian::termpos>::iterator i;
00238 i = lower_bound(positions.begin(), positions.end(), tpos);
00239 if (i == positions.end() || *i != tpos) {
00240 throw Xapian::InvalidArgumentError("Position `" + om_tostring(tpos) +
00241 "' not found in list of positions that `" +
00242 tname +
00243 "' occurs at,"
00244 " when removing position from list");
00245 }
00246 positions.erase(i);
00247 }
00248
00249 string
00250 OmDocumentTerm::get_description() const
00251 {
00252 string description;
00253
00254 description = "OmDocumentTerm(" + tname +
00255 ", wdf = " + om_tostring(wdf) +
00256 ", positions[" + om_tostring(positions.size()) + "]" +
00257 ")";
00258 return description;
00259 }
00260
00261 string
00262 Xapian::Document::Internal::get_value(Xapian::valueno valueid) const
00263 {
00264 if (values_here) {
00265 map<Xapian::valueno, string>::const_iterator i;
00266 i = values.find(valueid);
00267 if (i == values.end()) return "";
00268 return i->second;
00269 }
00270 if (!database) return "";
00271 return do_get_value(valueid);
00272 }
00273
00274 string
00275 Xapian::Document::Internal::get_data() const
00276 {
00277 if (data_here) return data;
00278 if (!database) return "";
00279 return do_get_data();
00280 }
00281
00282 void
00283 Xapian::Document::Internal::set_data(const string &data_)
00284 {
00285 data = data_;
00286 data_here = true;
00287 }
00288
00289 TermList *
00290 Xapian::Document::Internal::open_term_list() const
00291 {
00292 DEBUGCALL(MATCH, TermList *, "Document::Internal::open_term_list", "");
00293 if (terms_here) {
00294 RETURN(new MapTermList(terms.begin(), terms.end()));
00295 }
00296 if (!database) return NULL;
00297 RETURN(database->open_term_list(did));
00298 }
00299
00300 void
00301 Xapian::Document::Internal::add_value(Xapian::valueno valueno, const string &value)
00302 {
00303 need_values();
00304 values[valueno] = value;
00305 value_nos.clear();
00306 }
00307
00308 void
00309 Xapian::Document::Internal::remove_value(Xapian::valueno valueno)
00310 {
00311 need_values();
00312 map<Xapian::valueno, string>::iterator i = values.find(valueno);
00313 if (i == values.end()) {
00314 throw Xapian::InvalidArgumentError("Value #" + om_tostring(valueno) +
00315 " is not present in document, in "
00316 "Xapian::Document::Internal::remove_value()");
00317 }
00318 values.erase(i);
00319 value_nos.clear();
00320 }
00321
00322 void
00323 Xapian::Document::Internal::clear_values()
00324 {
00325 values.clear();
00326 value_nos.clear();
00327 values_here = true;
00328 }
00329
00330 void
00331 Xapian::Document::Internal::add_posting(const string & tname, Xapian::termpos tpos,
00332 Xapian::termcount wdfinc)
00333 {
00334 need_terms();
00335
00336 map<string, OmDocumentTerm>::iterator i;
00337 i = terms.find(tname);
00338 if (i == terms.end()) {
00339 OmDocumentTerm newterm(tname, wdfinc);
00340 newterm.add_position(tpos);
00341 terms.insert(make_pair(tname, newterm));
00342 } else {
00343 i->second.add_position(tpos);
00344 if (wdfinc) i->second.inc_wdf(wdfinc);
00345 }
00346 }
00347
00348 void
00349 Xapian::Document::Internal::add_term(const string & tname, Xapian::termcount wdfinc)
00350 {
00351 need_terms();
00352
00353 map<string, OmDocumentTerm>::iterator i;
00354 i = terms.find(tname);
00355 if (i == terms.end()) {
00356 OmDocumentTerm newterm(tname, wdfinc);
00357 terms.insert(make_pair(tname, newterm));
00358 } else {
00359 if (wdfinc) i->second.inc_wdf(wdfinc);
00360 }
00361 }
00362
00363 void
00364 Xapian::Document::Internal::remove_posting(const string & tname,
00365 Xapian::termpos tpos,
00366 Xapian::termcount wdfdec)
00367 {
00368 need_terms();
00369
00370 map<string, OmDocumentTerm>::iterator i;
00371 i = terms.find(tname);
00372 if (i == terms.end()) {
00373 throw Xapian::InvalidArgumentError("Term `" + tname +
00374 "' is not present in document, in "
00375 "Xapian::Document::Internal::remove_posting()");
00376 }
00377 i->second.remove_position(tpos);
00378 if (wdfdec) i->second.dec_wdf(wdfdec);
00379 }
00380
00381 void
00382 Xapian::Document::Internal::remove_term(const string & tname)
00383 {
00384 need_terms();
00385 map<string, OmDocumentTerm>::iterator i;
00386 i = terms.find(tname);
00387 if (i == terms.end()) {
00388 throw Xapian::InvalidArgumentError("Term `" + tname +
00389 "' is not present in document, in "
00390 "Xapian::Document::Internal::remove_term()");
00391 }
00392 terms.erase(i);
00393 }
00394
00395 void
00396 Xapian::Document::Internal::clear_terms()
00397 {
00398 terms.clear();
00399 terms_here = true;
00400 }
00401
00402 Xapian::termcount
00403 Xapian::Document::Internal::termlist_count() const
00404 {
00405 if (!terms_here) {
00406
00407
00408 need_terms();
00409 }
00410 Assert(terms_here);
00411 return terms.size();
00412 }
00413
00414 void
00415 Xapian::Document::Internal::need_terms() const
00416 {
00417 if (terms_here) return;
00418 if (database) {
00419 Xapian::TermIterator t(database->open_term_list(did));
00420 Xapian::TermIterator tend(NULL);
00421 for ( ; t != tend; ++t) {
00422 Xapian::PositionIterator p = t.positionlist_begin();
00423 Xapian::PositionIterator pend = t.positionlist_end();
00424 OmDocumentTerm term(*t, t.get_wdf());
00425 for ( ; p != pend; ++p) {
00426 term.add_position(*p);
00427 }
00428 terms.insert(make_pair(*t, term));
00429 }
00430 }
00431 terms_here = true;
00432 }
00433
00434 Xapian::valueno
00435 Xapian::Document::Internal::values_count() const
00436 {
00437 DEBUGLINE(UNKNOWN, "Xapian::Document::Internal::values_count() called");
00438 need_values();
00439 Assert(values_here);
00440 return values.size();
00441 }
00442
00443 string
00444 Xapian::Document::Internal::get_description() const
00445 {
00446 string description = "Xapian::Document::Internal(";
00447
00448 if (data_here) description += "data=`" + data + "'";
00449
00450 if (values_here) {
00451 if (data_here) description += ", ";
00452 description += "values[" + om_tostring(values.size()) + "]";
00453 }
00454
00455 if (terms_here) {
00456 if (data_here || values_here) description += ", ";
00457 description += "terms[" + om_tostring(terms.size()) + "]";
00458 }
00459
00460 if (database) {
00461 if (data_here || values_here || terms_here) description += ", ";
00462 description += "doc=";
00463 description += "?";
00464 }
00465
00466 description += ')';
00467
00468 return description;
00469 }
00470
00471 void
00472 Xapian::Document::Internal::need_values() const
00473 {
00474 if (!values_here) {
00475 if (database) {
00476 values = do_get_all_values();
00477 value_nos.clear();
00478 }
00479 values_here = true;
00480 }
00481 }