api/omdocument.cc

Go to the documentation of this file.
00001 /* omdocument.cc: class for performing a match
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002 Ananova Ltd
00005  * Copyright 2003,2004,2006,2007 Olly Betts
00006  *
00007  * This program is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU General Public License as
00009  * published by the Free Software Foundation; either version 2 of the
00010  * License, or (at your option) any later version.
00011  *
00012  * This program is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00020  * USA
00021  */
00022 
00023 #include <config.h>
00024 
00025 #include <xapian/document.h>
00026 #include <xapian/types.h>
00027 #include "document.h"
00028 #include "maptermlist.h"
00029 #include <xapian/error.h>
00030 #include <xapian/valueiterator.h>
00031 #include "utils.h"
00032 
00033 #include <algorithm>
00034 #include <string>
00035 
00036 using namespace std;
00037 
00038 namespace Xapian {
00039 
00040 // implementation of Document
00041 
00042 Document::Document(Document::Internal *internal_) : internal(internal_)
00043 {
00044 }
00045 
00046 Document::Document() : internal(new Xapian::Document::Internal())
00047 {
00048 }
00049 
00050 string
00051 Document::get_value(Xapian::valueno value) const
00052 {
00053     DEBUGAPICALL(string, "Document::get_value", value);
00054     RETURN(internal->get_value(value));
00055 }
00056 
00057 string
00058 Document::get_data() const
00059 {
00060     DEBUGAPICALL(string, "Document::get_data", "");
00061     RETURN(internal->get_data());
00062 }
00063 
00064 void
00065 Document::set_data(const string &data)
00066 {
00067     DEBUGAPICALL(void, "Document::set_data", data);
00068     internal->set_data(data);
00069 }
00070 
00071 void
00072 Document::operator=(const Document &other)
00073 {
00074     // pointers are reference counted.
00075     internal = other.internal;
00076 }
00077 
00078 Document::Document(const Document &other)
00079         : internal(other.internal)
00080 {
00081 }
00082 
00083 Document::~Document()
00084 {
00085 }
00086 
00087 string
00088 Document::get_description() const
00089 {
00090     return "Document(" + internal->get_description() + ")";
00091 }
00092 
00093 void
00094 Document::add_value(Xapian::valueno valueno, const string &value)
00095 {
00096     DEBUGAPICALL(void, "Document::add_value", valueno << ", " << value);
00097     internal->add_value(valueno, value);
00098 }
00099 
00100 void
00101 Document::remove_value(Xapian::valueno valueno)
00102 {
00103     DEBUGAPICALL(void, "Document::remove_value", valueno);
00104     internal->remove_value(valueno);
00105 }
00106 
00107 void
00108 Document::clear_values()
00109 {
00110     DEBUGAPICALL(void, "Document::clear_values", "");
00111     internal->clear_values();
00112 }
00113 
00114 void
00115 Document::add_posting(const string & tname,
00116                         Xapian::termpos tpos,
00117                         Xapian::termcount wdfinc)
00118 {
00119     DEBUGAPICALL(void, "Document::add_posting",
00120                  tname << ", " << tpos << ", " << wdfinc);
00121     if (tname.empty()) {
00122         throw InvalidArgumentError("Empty termnames aren't allowed.");
00123     }
00124     internal->add_posting(tname, tpos, wdfinc);
00125 }
00126 
00127 void
00128 Document::add_term(const string & tname, Xapian::termcount wdfinc)
00129 {
00130     DEBUGAPICALL(void, "Document::add_term", tname << ", " << wdfinc);
00131     if (tname.empty()) {
00132         throw InvalidArgumentError("Empty termnames aren't allowed.");
00133     }
00134     internal->add_term(tname, wdfinc);
00135 }
00136 
00137 void
00138 Document::remove_posting(const string & tname, Xapian::termpos tpos,
00139                          Xapian::termcount wdfdec)
00140 {
00141     DEBUGAPICALL(void, "Document::remove_posting",
00142                  tname << ", " << tpos << ", " << wdfdec);
00143     if (tname.empty()) {
00144         throw InvalidArgumentError("Empty termnames aren't allowed.");
00145     }
00146     internal->remove_posting(tname, tpos, wdfdec);
00147 }
00148 
00149 void
00150 Document::remove_term(const string & tname)
00151 {
00152     DEBUGAPICALL(void, "Document::remove_term", tname);
00153     internal->remove_term(tname);
00154 }
00155 
00156 void
00157 Document::clear_terms()
00158 {
00159     DEBUGAPICALL(void, "Document::clear_terms", "");
00160     internal->clear_terms();
00161 }
00162 
00163 Xapian::termcount
00164 Document::termlist_count() const {
00165     DEBUGAPICALL(Xapian::termcount, "Document::termlist_count", "");
00166     RETURN(internal->termlist_count());
00167 }
00168 
00169 TermIterator
00170 Document::termlist_begin() const
00171 {
00172     DEBUGAPICALL(TermIterator, "Document::termlist_begin", "");
00173     RETURN(TermIterator(internal->open_term_list()));
00174 }
00175 
00176 Xapian::termcount
00177 Document::values_count() const {
00178     DEBUGAPICALL(Xapian::termcount, "Document::values_count", "");
00179     RETURN(internal->values_count());
00180 }
00181 
00182 ValueIterator
00183 Document::values_begin() const
00184 {
00185     DEBUGAPICALL(ValueIterator, "Document::values_begin", "");
00186     // Force the values to be read and cached.
00187     internal->need_values();
00188     RETURN(ValueIterator(0, *this));
00189 }
00190 
00191 ValueIterator
00192 Document::values_end() const
00193 {
00194     DEBUGAPICALL(ValueIterator, "Document::values_end", "");
00195     RETURN(ValueIterator(internal->values_count(), *this));
00196 }
00197 
00198 docid
00199 Document::get_docid() const
00200 {
00201     DEBUGAPICALL(docid, "Document::get_docid", "");
00202     RETURN(internal->get_docid());
00203 }
00204 
00205 }
00206 
00208 
00209 void
00210 OmDocumentTerm::add_position(Xapian::termpos tpos)
00211 {
00212     DEBUGAPICALL(void, "OmDocumentTerm::add_position", tpos);
00213 
00214     // We generally expect term positions to be added in approximately
00215     // increasing order, so check the end first
00216     if (positions.empty() || tpos > positions.back()) {
00217         positions.push_back(tpos);
00218         return;
00219     }
00220 
00221     // Search for the position the term occurs at.  Use binary chop to
00222     // search, since this is a sorted list.
00223     vector<Xapian::termpos>::iterator i;
00224     i = lower_bound(positions.begin(), positions.end(), tpos);
00225     if (i == positions.end() || *i != tpos) {
00226         positions.insert(i, tpos);
00227     }
00228 }
00229 
00230 void
00231 OmDocumentTerm::remove_position(Xapian::termpos tpos)
00232 {
00233     DEBUGAPICALL(void, "OmDocumentTerm::remove_position", tpos);
00234     
00235     // Search for the position the term occurs at.  Use binary chop to
00236     // search, since this is a sorted list.
00237     vector<Xapian::termpos>::iterator i;
00238     i = lower_bound(positions.begin(), positions.end(), tpos);
00239     if (i == positions.end() || *i != tpos) {
00240         throw Xapian::InvalidArgumentError("Position `" + om_tostring(tpos) +
00241                                      "' not found in list of positions that `" +
00242                                      tname +
00243                                      "' occurs at,"
00244                                      " when removing position from list");
00245     }
00246     positions.erase(i);
00247 }
00248 
00249 string
00250 OmDocumentTerm::get_description() const
00251 {
00252     string description;
00253 
00254     description = "OmDocumentTerm(" + tname +
00255             ", wdf = " + om_tostring(wdf) +
00256             ", positions[" + om_tostring(positions.size()) + "]" +
00257             ")";
00258     return description;
00259 }
00260 
00261 string
00262 Xapian::Document::Internal::get_value(Xapian::valueno valueid) const
00263 {
00264     if (values_here) {
00265         map<Xapian::valueno, string>::const_iterator i;
00266         i = values.find(valueid);
00267         if (i == values.end()) return "";
00268         return i->second;
00269     }
00270     if (!database) return "";
00271     return do_get_value(valueid);
00272 }
00273         
00274 string
00275 Xapian::Document::Internal::get_data() const
00276 {
00277     if (data_here) return data;
00278     if (!database) return "";
00279     return do_get_data();
00280 }
00281 
00282 void
00283 Xapian::Document::Internal::set_data(const string &data_)
00284 {
00285     data = data_;
00286     data_here = true;
00287 }
00288 
00289 TermList *
00290 Xapian::Document::Internal::open_term_list() const
00291 {
00292     DEBUGCALL(MATCH, TermList *, "Document::Internal::open_term_list", "");
00293     if (terms_here) {
00294         RETURN(new MapTermList(terms.begin(), terms.end()));
00295     }
00296     if (!database) return NULL;
00297     RETURN(database->open_term_list(did));
00298 }
00299 
00300 void
00301 Xapian::Document::Internal::add_value(Xapian::valueno valueno, const string &value)
00302 {
00303     need_values();
00304     values[valueno] = value;
00305     value_nos.clear();
00306 }
00307 
00308 void
00309 Xapian::Document::Internal::remove_value(Xapian::valueno valueno)
00310 {
00311     need_values();
00312     map<Xapian::valueno, string>::iterator i = values.find(valueno);
00313     if (i == values.end()) {
00314         throw Xapian::InvalidArgumentError("Value #" + om_tostring(valueno) +
00315                 " is not present in document, in "
00316                 "Xapian::Document::Internal::remove_value()");
00317     }
00318     values.erase(i);
00319     value_nos.clear();
00320 }
00321 
00322 void
00323 Xapian::Document::Internal::clear_values()
00324 {
00325     values.clear();
00326     value_nos.clear();
00327     values_here = true;
00328 }
00329 
00330 void
00331 Xapian::Document::Internal::add_posting(const string & tname, Xapian::termpos tpos,
00332                               Xapian::termcount wdfinc)
00333 {
00334     need_terms();
00335 
00336     map<string, OmDocumentTerm>::iterator i;
00337     i = terms.find(tname);
00338     if (i == terms.end()) {
00339         OmDocumentTerm newterm(tname, wdfinc);
00340         newterm.add_position(tpos);
00341         terms.insert(make_pair(tname, newterm));
00342     } else {
00343         i->second.add_position(tpos);
00344         if (wdfinc) i->second.inc_wdf(wdfinc);
00345     }
00346 }
00347 
00348 void
00349 Xapian::Document::Internal::add_term(const string & tname, Xapian::termcount wdfinc)
00350 {
00351     need_terms();
00352 
00353     map<string, OmDocumentTerm>::iterator i;
00354     i = terms.find(tname);
00355     if (i == terms.end()) {
00356         OmDocumentTerm newterm(tname, wdfinc);
00357         terms.insert(make_pair(tname, newterm));
00358     } else {
00359         if (wdfinc) i->second.inc_wdf(wdfinc);
00360     }
00361 }
00362 
00363 void
00364 Xapian::Document::Internal::remove_posting(const string & tname,
00365                                            Xapian::termpos tpos,
00366                                            Xapian::termcount wdfdec)    
00367 {
00368     need_terms();
00369 
00370     map<string, OmDocumentTerm>::iterator i;
00371     i = terms.find(tname);
00372     if (i == terms.end()) {
00373         throw Xapian::InvalidArgumentError("Term `" + tname +
00374                 "' is not present in document, in "
00375                 "Xapian::Document::Internal::remove_posting()");
00376     }
00377     i->second.remove_position(tpos);
00378     if (wdfdec) i->second.dec_wdf(wdfdec);
00379 }
00380 
00381 void
00382 Xapian::Document::Internal::remove_term(const string & tname)
00383 {
00384     need_terms();
00385     map<string, OmDocumentTerm>::iterator i;
00386     i = terms.find(tname);
00387     if (i == terms.end()) {
00388         throw Xapian::InvalidArgumentError("Term `" + tname +
00389                 "' is not present in document, in "
00390                 "Xapian::Document::Internal::remove_term()");
00391     }
00392     terms.erase(i);
00393 }
00394         
00395 void
00396 Xapian::Document::Internal::clear_terms()
00397 {
00398     terms.clear();
00399     terms_here = true;
00400 }
00401 
00402 Xapian::termcount
00403 Xapian::Document::Internal::termlist_count() const
00404 {
00405     if (!terms_here) {
00406         // How equivalent is this line to the rest?
00407         // return database ? database->open_term_list(did)->get_approx_size() : 0;
00408         need_terms();
00409     }
00410     Assert(terms_here);
00411     return terms.size();
00412 }
00413 
00414 void
00415 Xapian::Document::Internal::need_terms() const
00416 {
00417     if (terms_here) return;
00418     if (database) {
00419         Xapian::TermIterator t(database->open_term_list(did));
00420         Xapian::TermIterator tend(NULL);
00421         for ( ; t != tend; ++t) {
00422             Xapian::PositionIterator p = t.positionlist_begin();
00423             Xapian::PositionIterator pend = t.positionlist_end();
00424             OmDocumentTerm term(*t, t.get_wdf());
00425             for ( ; p != pend; ++p) {
00426                 term.add_position(*p);
00427             }
00428             terms.insert(make_pair(*t, term));
00429         }
00430     }
00431     terms_here = true;
00432 }
00433 
00434 Xapian::valueno
00435 Xapian::Document::Internal::values_count() const
00436 {
00437     DEBUGLINE(UNKNOWN, "Xapian::Document::Internal::values_count() called");
00438     need_values();
00439     Assert(values_here);
00440     return values.size();
00441 }
00442 
00443 string
00444 Xapian::Document::Internal::get_description() const
00445 {
00446     string description = "Xapian::Document::Internal(";
00447 
00448     if (data_here) description += "data=`" + data + "'";
00449 
00450     if (values_here) {
00451         if (data_here) description += ", ";
00452         description += "values[" + om_tostring(values.size()) + "]";
00453     }
00454 
00455     if (terms_here) {
00456         if (data_here || values_here) description += ", ";
00457         description += "terms[" + om_tostring(terms.size()) + "]";
00458     }
00459 
00460     if (database) {
00461         if (data_here || values_here || terms_here) description += ", ";
00462         description += "doc=";
00463         description += "?"; // do_get_description(); ?
00464     }
00465 
00466     description += ')';
00467 
00468     return description;
00469 }
00470 
00471 void
00472 Xapian::Document::Internal::need_values() const
00473 {
00474     if (!values_here) {
00475         if (database) {
00476             values = do_get_all_values();
00477             value_nos.clear();
00478         }
00479         values_here = true;
00480     }
00481 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.