common/serialise-double.cc

Go to the documentation of this file.
00001 /* @file serialise-double.cc
00002  * @brief functions to serialise and unserialise a double
00003  */
00004 /* Copyright (C) 2006,2007,2008 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 2 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
00019  */
00020 
00021 #include <config.h>
00022 
00023 #include <xapian/error.h>
00024 
00025 #include "omassert.h"
00026 
00027 #include "serialise-double.h"
00028 
00029 #include <float.h>
00030 #include <math.h>
00031 
00032 #include <algorithm>
00033 #include <string>
00034 
00035 using namespace std;
00036 
00037 // The serialisation we use for doubles is inspired by a comp.lang.c post
00038 // by Jens Moeller:
00039 //
00040 // http://groups.google.com/group/comp.lang.c/browse_thread/thread/6558d4653f6dea8b/75a529ec03148c98
00041 //
00042 // The clever part is that the mantissa is encoded as a base-256 number which
00043 // means there's no rounding error provided both ends have FLT_RADIX as some
00044 // power of two.
00045 //
00046 // FLT_RADIX == 2 seems to be ubiquitous on modern UNIX platforms, while
00047 // some older platforms used FLT_RADIX == 16 (IBM machines for example).
00048 // FLT_RADIX == 10 seems to be very rare (the only instance Google finds
00049 // is for a cross-compiler to some TI calculators).
00050 
00051 #if FLT_RADIX == 2
00052 # define MAX_MANTISSA_BYTES ((DBL_MANT_DIG + 7 + 7) / 8)
00053 # define MAX_EXP ((DBL_MAX_EXP + 1) / 8)
00054 # define MAX_MANTISSA (1 << (DBL_MAX_EXP & 7))
00055 #elif FLT_RADIX == 16
00056 # define MAX_MANTISSA_BYTES ((DBL_MANT_DIG + 1 + 1) / 2)
00057 # define MAX_EXP ((DBL_MAX_EXP + 1) / 2)
00058 # define MAX_MANTISSA (1 << ((DBL_MAX_EXP & 1) * 4))
00059 #else
00060 # error FLT_RADIX is a value not currently handled (not 2 or 16)
00061 // # define MAX_MANTISSA_BYTES (sizeof(double) + 1)
00062 #endif
00063 
00064 static int base256ify_double(double &v) {
00065     int exp;
00066     v = frexp(v, &exp);
00067     // v is now in the range [0.5, 1.0)
00068     --exp;
00069     v = ldexp(v, (exp & 7) + 1);
00070     // v is now in the range [1.0, 256.0)
00071     exp >>= 3;
00072     return exp;
00073 }
00074 
00075 std::string serialise_double(double v)
00076 {
00077     /* First byte:
00078      *  bit 7 Negative flag
00079      *  bit 4..6 Mantissa length - 1
00080      *  bit 0..3 --- 0-13 -> Exponent + 7
00081      *            \- 14 -> Exponent given by next byte
00082      *             - 15 -> Exponent given by next 2 bytes
00083      *
00084      * Then optional medium (1 byte) or large exponent (2 bytes, lsb first)
00085      *
00086      * Then mantissa (0 iff value is 0)
00087      */
00088 
00089     bool negative = (v < 0.0);
00090 
00091     if (negative) v = -v;
00092 
00093     int exp = base256ify_double(v);
00094 
00095     string result;
00096 
00097     if (exp <= 6 && exp >= -7) {
00098         unsigned char b = static_cast<unsigned char>(exp + 7);
00099         if (negative) b |= static_cast<unsigned char>(0x80);
00100         result += char(b);
00101     } else {
00102         if (exp >= -128 && exp < 127) {
00103             result += negative ? char(0x8e) : char(0x0e);
00104             result += char(exp + 128);
00105         } else {
00106             if (exp < -32768 || exp > 32767) {
00107                 throw Xapian::NetworkError("Insane exponent in floating point number");
00108             }
00109             result += negative ? char(0x8f) : char(0x0f);
00110             result += char(unsigned(exp + 32768) & 0xff);
00111             result += char(unsigned(exp + 32768) >> 8);
00112         }
00113     }
00114 
00115     int maxbytes = min(MAX_MANTISSA_BYTES, 8);
00116 
00117     size_t n = result.size();
00118     do {
00119         unsigned char byte = static_cast<unsigned char>(v);
00120         result += (char)byte;
00121         v -= double(byte);
00122         v *= 256.0;
00123     } while (v != 0.0 && --maxbytes);
00124 
00125     n = result.size() - n;
00126     if (n > 1) {
00127         Assert(n <= 8);
00128         result[0] = static_cast<unsigned char>(result[0] | ((n - 1) << 4));
00129     }
00130 
00131     return result;
00132 }
00133 
00134 double unserialise_double(const char ** p, const char *end)
00135 {
00136     if (end - *p < 2) {
00137         throw Xapian::NetworkError("Bad encoded double: insufficient data");
00138     }
00139     unsigned char first = *(*p)++;
00140     if (first == 0 && *(*p) == 0) {
00141         ++*p;
00142         return 0.0;
00143     }
00144 
00145     bool negative = (first & 0x80) != 0;
00146     size_t mantissa_len = ((first >> 4) & 0x07) + 1;
00147 
00148     int exp = first & 0x0f;
00149     if (exp >= 14) {
00150         int bigexp = static_cast<unsigned char>(*(*p)++);
00151         if (exp == 15) {
00152             if (*p == end) {
00153                 throw Xapian::NetworkError("Bad encoded double: short large exponent");
00154             }
00155             exp = bigexp | (static_cast<unsigned char>(*(*p)++) << 8);
00156             exp -= 32768;
00157         } else {
00158             exp = bigexp - 128;
00159         }
00160     } else {
00161         exp -= 7;
00162     }
00163 
00164     if (size_t(end - *p) < mantissa_len) {
00165         throw Xapian::NetworkError("Bad encoded double: short mantissa");
00166     }
00167 
00168     double v = 0.0;
00169 
00170     static double dbl_max_mantissa = DBL_MAX;
00171     static int dbl_max_exp = base256ify_double(dbl_max_mantissa);
00172     *p += mantissa_len;
00173     if (exp > dbl_max_exp ||
00174         (exp == dbl_max_exp && double(**p) > dbl_max_mantissa)) {
00175         // The mantissa check should be precise provided that FLT_RADIX
00176         // is a power of 2.
00177         v = HUGE_VAL;
00178     } else {
00179         const char *q = *p;
00180         while (mantissa_len--) {
00181             v *= 0.00390625; // 1/256
00182             v += double(static_cast<unsigned char>(*--q));
00183         }
00184 
00185         if (exp) v = ldexp(v, exp * 8);
00186 
00187 #if 0
00188         if (v == 0.0) {
00189             // FIXME: handle underflow
00190         }
00191 #endif
00192     }
00193 
00194     if (negative) v = -v;
00195 
00196     return v;
00197 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.