backends/flint/flint_alltermslist.cc

Go to the documentation of this file.
00001 /* flint_alltermslist.cc: A termlist containing all terms in a flint database.
00002  *
00003  * Copyright (C) 2005,2007 Olly Betts
00004  *
00005  * This program is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU General Public License as
00007  * published by the Free Software Foundation; either version 2 of the
00008  * License, or (at your option) any later version.
00009  *
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program; if not, write to the Free Software
00017  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00018  * USA
00019  */
00020 
00021 #include <config.h>
00022 
00023 #include "flint_alltermslist.h"
00024 #include "flint_postlist.h"
00025 #include "flint_utils.h"
00026 
00027 #include "stringutils.h"
00028 
00029 void
00030 FlintAllTermsList::read_termfreq_and_collfreq() const
00031 {
00032     DEBUGCALL(DB, void, "FlintAllTermsList::read_termfreq_and_collfreq", "");
00033     Assert(!current_term.empty());
00034     Assert(!at_end());
00035 
00036     // Unpack the termfreq and collfreq from the tag.  Only do this if
00037     // one or other is actually read.
00038     cursor->read_tag();
00039     const char *p = cursor->current_tag.data();
00040     const char *pend = p + cursor->current_tag.size();
00041     FlintPostList::read_number_of_entries(&p, pend, &termfreq, &collfreq);
00042 }
00043 
00044 FlintAllTermsList::~FlintAllTermsList()
00045 {
00046     DEBUGCALL(DB, void, "~FlintAllTermsList", "");
00047     delete cursor;
00048 }
00049 
00050 string
00051 FlintAllTermsList::get_termname() const
00052 {
00053     DEBUGCALL(DB, string, "FlintAllTermsList::get_termname", "");
00054     Assert(!current_term.empty());
00055     Assert(!at_end());
00056     RETURN(current_term);
00057 }
00058 
00059 Xapian::doccount
00060 FlintAllTermsList::get_termfreq() const
00061 {
00062     DEBUGCALL(DB, Xapian::doccount, "FlintAllTermsList::get_termfreq", "");
00063     Assert(!current_term.empty());
00064     Assert(!at_end());
00065     if (termfreq == 0) read_termfreq_and_collfreq();
00066     RETURN(termfreq);
00067 }
00068 
00069 Xapian::termcount
00070 FlintAllTermsList::get_collection_freq() const
00071 {
00072     DEBUGCALL(DB, Xapian::termcount, "FlintAllTermsList::get_collection_freq", "");
00073     Assert(!current_term.empty());
00074     Assert(!at_end());
00075     if (termfreq == 0) read_termfreq_and_collfreq();
00076     RETURN(collfreq);
00077 }
00078 
00079 TermList *
00080 FlintAllTermsList::next()
00081 {
00082     DEBUGCALL(DB, TermList *, "FlintAllTermsList::next", "");
00083     Assert(!at_end());
00084     // Set termfreq to 0 to indicate no termfreq/collfreq have been read for
00085     // the current term.
00086     termfreq = 0;
00087 
00088     while (true) {
00089         cursor->next();
00090         if (cursor->after_end()) {
00091             current_term = "";
00092             break;
00093         }
00094 
00095         const char *p = cursor->current_key.data();
00096         const char *pend = p + cursor->current_key.size();
00097         if (!unpack_string_preserving_sort(&p, pend, current_term)) {
00098             throw Xapian::DatabaseCorruptError("PostList table key has unexpected format");
00099         }
00100 
00101         if (!startswith(current_term, prefix)) {
00102             // We've reached the end of the prefixed terms.
00103             cursor->to_end();
00104             current_term = "";
00105             break;
00106         }
00107 
00108         // If this key is for the first chunk of a postlist, we're done.
00109         // Otherwise we need to skip past continuation chunks until we find the
00110         // first chunk of the next postlist.
00111         if (p == pend) break;
00112     }
00113     RETURN(NULL);
00114 }
00115 
00116 TermList *
00117 FlintAllTermsList::skip_to(const string &term)
00118 {
00119     DEBUGCALL(DB, TermList *, "FlintAllTermsList::skip_to", term);
00120     Assert(!at_end());
00121     // Set termfreq to 0 to indicate no termfreq/collfreq have been read for
00122     // the current term.
00123     termfreq = 0;
00124 
00125     if (cursor->find_entry_ge(pack_string_preserving_sort(term))) {
00126         // The exact term we asked for is there, so just copy it rather than
00127         // wasting effort unpacking it from the key.
00128         current_term = term;
00129     } else {
00130         if (cursor->after_end()) {
00131             current_term = "";
00132             RETURN(NULL);
00133         }
00134 
00135         const char *p = cursor->current_key.data();
00136         const char *pend = p + cursor->current_key.size();
00137         if (!unpack_string_preserving_sort(&p, pend, current_term)) {
00138             throw Xapian::DatabaseCorruptError("PostList table key has unexpected format");
00139         }
00140     }
00141 
00142     if (!startswith(current_term, prefix)) {
00143         // We've reached the end of the prefixed terms.
00144         cursor->to_end();
00145         current_term = "";
00146     }
00147 
00148     RETURN(NULL);
00149 }
00150 
00151 bool
00152 FlintAllTermsList::at_end() const
00153 {
00154     DEBUGCALL(DB, bool, "FlintAllTermsList::at_end", "");
00155     RETURN(cursor->after_end());
00156 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.