backends/quartz/quartz_utils.h

Go to the documentation of this file.
00001 /* quartz_utils.h: Generic functions for quartz
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002 Ananova Ltd
00005  * Copyright 2002,2003,2004,2006 Olly Betts
00006  * 
00007  * This program is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU General Public License as
00009  * published by the Free Software Foundation; either version 2 of the
00010  * License, or (at your option) any later version.
00011  *
00012  * This program is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00020  * USA
00021  */
00022 
00023 #ifndef OM_HGUARD_QUARTZ_UTILS_H
00024 #define OM_HGUARD_QUARTZ_UTILS_H
00025 
00026 #include <xapian/types.h>
00027 
00028 #include <string>
00029 
00030 using namespace std;
00031 
00033 #define CASSERT(a) {char assert[(a) ? 1 : -1];(void)assert;}
00034 
00036 #define CASSERT_TYPE_UNSIGNED(T) CASSERT(static_cast<T>(-1) > 0)
00037 
00038 typedef unsigned char       om_byte;
00039 typedef unsigned int        om_uint32;
00040 typedef int                 om_int32;
00041 
00066 template<class T>
00067 bool
00068 unpack_uint(const char ** src,
00069             const char * src_end,
00070             T * resultptr)
00071 {
00072     // Check unsigned
00073     CASSERT_TYPE_UNSIGNED(T);
00074 
00075     // Check byte is what it's meant to be
00076     CASSERT(sizeof(om_byte) == 1);
00077 
00078     unsigned int shift = 0;
00079     T result = 0;
00080 
00081     while (true) {
00082         if ((*src) == src_end) {
00083             *src = 0;
00084             return false;
00085         }
00086 
00087         om_byte part = static_cast<om_byte>(**src);
00088         (*src)++;
00089 
00090         // if new byte might cause overflow, and it does
00091         if (((shift > (sizeof(T) - 1) * 8 + 1) &&
00092              ((part & 0x7f) << (shift % 8)) >= 0x100) ||
00093             (shift >= sizeof(T) * 8))  {
00094             // Overflowed - move to end of this integer
00095             while (true) {
00096                 if ((part & 0x80) == 0) return false;
00097                 if ((*src) == src_end) {
00098                     *src = 0;
00099                     return false;
00100                 }
00101                 part = static_cast<om_byte>(**src);
00102                 (*src)++;
00103             }
00104         }
00105 
00106         result += T(part & 0x7f) << shift;
00107         shift += 7;
00108 
00109         if ((part & 0x80) == 0) {
00110             if (resultptr) *resultptr = result;
00111             return true;
00112         }
00113     }
00114 }
00115 
00116 
00123 template<class T>
00124 string
00125 pack_uint(T value)
00126 {
00127     // Check unsigned
00128     CASSERT_TYPE_UNSIGNED(T);
00129 
00130     if (value == 0) return string("", 1u);
00131     string result;
00132 
00133     while (value != 0) {
00134         om_byte part = static_cast<om_byte>(value & 0x7f);
00135         value = value >> 7;
00136         if (value) part |= 0x80;
00137         result.append(1u, char(part));
00138     }
00139 
00140     return result;
00141 }
00142 
00151 template<>
00152 inline string
00153 pack_uint<bool>(bool value)
00154 {
00155     return string(1, static_cast<char>(value));
00156 }
00157 
00175 template<class T>
00176 bool
00177 unpack_uint_last(const char ** src, const char * src_end, T * resultptr)
00178 {
00179     // Check unsigned
00180     CASSERT_TYPE_UNSIGNED(T);
00181     // Check byte is what it's meant to be
00182     CASSERT(sizeof(om_byte) == 1);
00183 
00184     if (src_end - *src > int(sizeof(T))) {
00185         // Would overflow
00186         *src = src_end;
00187         return false;
00188     }
00189 
00190     T result = 0;
00191     int shift = 0;
00192     while (*src != src_end) {
00193         result |= static_cast<T>(static_cast<om_byte>(**src)) << shift;
00194         ++(*src);
00195         shift += 8;
00196     }
00197     *resultptr = result;
00198     return true;
00199 }
00200 
00210 template<class T>
00211 string
00212 pack_uint_last(T value)
00213 {
00214     // Check unsigned
00215     CASSERT_TYPE_UNSIGNED(T);
00216 
00217     string result;
00218     while (value) {
00219         result += char(value);
00220         value >>= 8;
00221     }
00222     return result;
00223 }
00224 
00235 template<class T>
00236 string
00237 pack_uint_preserving_sort(T value)
00238 {
00239     // Check unsigned
00240     CASSERT_TYPE_UNSIGNED(T);
00241 
00242     string result;
00243     while (value != 0) {
00244         om_byte part = static_cast<om_byte>(value & 0xff);
00245         value = value >> 8;
00246         result.insert(string::size_type(0), 1u, char(part));
00247     }
00248     result.insert(string::size_type(0), 1u, char(result.size()));
00249     return result;
00250 }
00251 
00271 template<class T>
00272 bool
00273 unpack_uint_preserving_sort(const char ** src,
00274                             const char * src_end,
00275                             T * resultptr)
00276 {
00277     if (*src == src_end) {
00278         *src = 0;
00279         return false;
00280     }
00281 
00282     unsigned int length = static_cast<om_byte>(**src);
00283     (*src)++;
00284 
00285     if (length > sizeof(T)) {
00286         *src += length;
00287         if (*src > src_end) {
00288             *src = 0;
00289         }
00290         return false;
00291     }
00292 
00293     // Can't be overflow now.
00294     T result = 0;
00295     while (length > 0) {
00296         result = result << 8;
00297         result += static_cast<om_byte>(**src);
00298         (*src)++;
00299         length--;
00300     }
00301     *resultptr = result;
00302 
00303     return true;
00304 }
00305 
00306 inline bool
00307 unpack_string(const char ** src,
00308               const char * src_end,
00309               string & result)
00310 {
00311     string::size_type length;
00312     if (!unpack_uint(src, src_end, &length)) {
00313         return false;
00314     }
00315 
00316     if (src_end - *src < 0 ||
00317         string::size_type(src_end - *src) < length) {
00318         src = 0;
00319         return false;
00320     }
00321 
00322     result = string(*src, length);
00323     *src += length;
00324     return true;
00325 }
00326 
00327 inline string
00328 pack_string(string value)
00329 {
00330     return pack_uint(value.size()) + value;
00331 }
00332 
00338 inline string
00339 pack_string_preserving_sort(string value)
00340 {
00341     string::size_type i = 0, j;
00342     while ((j = value.find('\0', i)) != string::npos) {
00343         value.replace(j, 1, "\0\xff", 2);
00344         i = j + 2;
00345     }
00346     value += '\0'; // FIXME temp...
00347     return value + '\0'; // Note - next byte mustn't be '\xff'...
00348 }
00349 
00350 inline bool
00351 unpack_string_preserving_sort(const char ** src,
00352                               const char * src_end,
00353                               string & result)
00354 {
00355     result = "";
00356     while (*src < src_end) {
00357         const char *begin = *src;
00358         while (**src) {
00359             ++(*src);
00360             if (*src == src_end) return false;
00361         }
00362         result += string(begin, *src - begin);
00363         ++(*src);
00364         if (*src == src_end) return false;
00365         if (**src != '\xff') {
00366             ++(*src); // FIXME temp
00367             return true;
00368         }
00369         result += '\0';
00370         ++(*src);
00371     }
00372     return false;
00373 }
00374 
00375 inline bool
00376 unpack_bool(const char ** src,
00377             const char * src_end,
00378             bool * resultptr)
00379 {
00380     if (*src == src_end) {
00381         *src = 0;
00382         return false;
00383     }
00384     switch (*((*src)++)) {
00385         case '0':
00386             if (resultptr) *resultptr = false;
00387             return true;
00388         case '1':
00389             if (resultptr) *resultptr = true;
00390             return true;
00391     }
00392     *src = 0;
00393     return false;
00394 }
00395 
00396 inline string
00397 pack_bool(bool value)
00398 {
00399     return value ? "1" : "0";
00400 }
00401 
00405 inline string
00406 quartz_docid_to_key(Xapian::docid did)
00407 {
00408     return pack_uint_last(did);
00409 }
00410 
00411 #endif /* OM_HGUARD_QUARTZ_UTILS_H */

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.