Header And Logo

PostgreSQL
| The world's most advanced open source database.

hstore_compat.c

Go to the documentation of this file.
00001 /*
00002  * contrib/hstore/hstore_compat.c
00003  *
00004  * Notes on old/new hstore format disambiguation.
00005  *
00006  * There are three formats to consider:
00007  * 1) old contrib/hstore (referred to as hstore-old)
00008  * 2) prerelease pgfoundry hstore
00009  * 3) new contrib/hstore
00010  *
00011  * (2) and (3) are identical except for the HS_FLAG_NEWVERSION
00012  * bit, which is set in (3) but not (2).
00013  *
00014  * Values that are already in format (3), or which are
00015  * unambiguously in format (2), are handled by the first
00016  * "return immediately" test in hstoreUpgrade().
00017  *
00018  * To stress a point: we ONLY get here with possibly-ambiguous
00019  * values if we're doing some sort of in-place migration from an
00020  * old prerelease pgfoundry hstore-new; and we explicitly don't
00021  * support that without fixing up any potentially padded values
00022  * first. Most of the code here is serious overkill, but the
00023  * performance penalty isn't serious (especially compared to the
00024  * palloc() that we have to do anyway) and the belt-and-braces
00025  * validity checks provide some reassurance. (If for some reason
00026  * we get a value that would have worked on the old code, but
00027  * which would be botched by the conversion code, the validity
00028  * checks will fail it first so we get an error rather than bad
00029  * data.)
00030  *
00031  * Note also that empty hstores are the same in (2) and (3), so
00032  * there are some special-case paths for them.
00033  *
00034  * We tell the difference between formats (2) and (3) as follows (but
00035  * note that there are some edge cases where we can't tell; see
00036  * comments in hstoreUpgrade):
00037  *
00038  * First, since there must be at least one entry, we look at
00039  * how the bits line up. The new format looks like:
00040  *
00041  * 10kkkkkkkkkkkkkkkkkkkkkkkkkkkkkk  (k..k = keylen)
00042  * 0nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv  (v..v = keylen+vallen)
00043  *
00044  * The old format looks like one of these, depending on endianness
00045  * and bitfield layout: (k..k = keylen, v..v = vallen, p..p = pos,
00046  * n = isnull)
00047  *
00048  * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
00049  * nppppppppppppppppppppppppppppppp
00050  *
00051  * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
00052  * pppppppppppppppppppppppppppppppn
00053  *
00054  * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
00055  * nppppppppppppppppppppppppppppppp
00056  *
00057  * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
00058  * pppppppppppppppppppppppppppppppn   (usual i386 format)
00059  *
00060  * If the entry is in old format, for the first entry "pos" must be 0.
00061  * We can obviously see that either keylen or vallen must be >32768
00062  * for there to be any ambiguity (which is why lengths less than that
00063  * are fasttracked in hstore.h) Since "pos"==0, the "v" field in the
00064  * new-format interpretation can only be 0 or 1, which constrains all
00065  * but three bits of the old-format's k and v fields. But in addition
00066  * to all of this, the data length implied by the keylen and vallen
00067  * must fit in the varlena size. So the only ambiguous edge case for
00068  * hstores with only one entry occurs between a new-format entry with
00069  * an excess (~32k) of padding, and an old-format entry. But we know
00070  * which format to use in that case based on how we were compiled, so
00071  * no actual data corruption can occur.
00072  *
00073  * If there is more than one entry, the requirement that keys do not
00074  * decrease in length, and that positions increase contiguously, and
00075  * that the end of the data not be beyond the end of the varlena
00076  * itself, disambiguates in almost all other cases. There is a small
00077  * set of ambiguous cases which could occur if the old-format value
00078  * has a large excess of padding and just the right pattern of key
00079  * sizes, but these are also handled based on how we were compiled.
00080  *
00081  * The otherwise undocumented function hstore_version_diag is provided
00082  * for testing purposes.
00083  */
00084 #include "postgres.h"
00085 
00086 
00087 #include "hstore.h"
00088 
00089 /*
00090  * This is the structure used for entries in the old contrib/hstore
00091  * implementation. Notice that this is the same size as the new entry
00092  * (two 32-bit words per key/value pair) and that the header is the
00093  * same, so the old and new versions of ARRPTR, STRPTR, CALCDATASIZE
00094  * etc. are compatible.
00095  *
00096  * If the above statement isn't true on some bizarre platform, we're
00097  * a bit hosed (see StaticAssertStmt in hstoreValidOldFormat).
00098  */
00099 typedef struct
00100 {
00101     uint16      keylen;
00102     uint16      vallen;
00103     uint32
00104                 valisnull:1,
00105                 pos:31;
00106 } HOldEntry;
00107 
00108 static int  hstoreValidNewFormat(HStore *hs);
00109 static int  hstoreValidOldFormat(HStore *hs);
00110 
00111 
00112 /*
00113  * Validity test for a new-format hstore.
00114  *  0 = not valid
00115  *  1 = valid but with "slop" in the length
00116  *  2 = exactly valid
00117  */
00118 static int
00119 hstoreValidNewFormat(HStore *hs)
00120 {
00121     int         count = HS_COUNT(hs);
00122     HEntry     *entries = ARRPTR(hs);
00123     int         buflen = (count) ? HSE_ENDPOS(entries[2 * (count) - 1]) : 0;
00124     int         vsize = CALCDATASIZE(count, buflen);
00125     int         i;
00126 
00127     if (hs->size_ & HS_FLAG_NEWVERSION)
00128         return 2;
00129 
00130     if (count == 0)
00131         return 2;
00132 
00133     if (!HSE_ISFIRST(entries[0]))
00134         return 0;
00135 
00136     if (vsize > VARSIZE(hs))
00137         return 0;
00138 
00139     /* entry position must be nondecreasing */
00140 
00141     for (i = 1; i < 2 * count; ++i)
00142     {
00143         if (HSE_ISFIRST(entries[i])
00144             || (HSE_ENDPOS(entries[i]) < HSE_ENDPOS(entries[i - 1])))
00145             return 0;
00146     }
00147 
00148     /* key length must be nondecreasing and keys must not be null */
00149 
00150     for (i = 1; i < count; ++i)
00151     {
00152         if (HS_KEYLEN(entries, i) < HS_KEYLEN(entries, i - 1))
00153             return 0;
00154         if (HSE_ISNULL(entries[2 * i]))
00155             return 0;
00156     }
00157 
00158     if (vsize != VARSIZE(hs))
00159         return 1;
00160 
00161     return 2;
00162 }
00163 
00164 /*
00165  * Validity test for an old-format hstore.
00166  *  0 = not valid
00167  *  1 = valid but with "slop" in the length
00168  *  2 = exactly valid
00169  */
00170 static int
00171 hstoreValidOldFormat(HStore *hs)
00172 {
00173     int         count = hs->size_;
00174     HOldEntry  *entries = (HOldEntry *) ARRPTR(hs);
00175     int         vsize;
00176     int         lastpos = 0;
00177     int         i;
00178 
00179     if (hs->size_ & HS_FLAG_NEWVERSION)
00180         return 0;
00181 
00182     /* New format uses an HEntry for key and another for value */
00183     StaticAssertStmt(sizeof(HOldEntry) == 2 * sizeof(HEntry),
00184                      "old hstore format is not upward-compatible");
00185 
00186     if (count == 0)
00187         return 2;
00188 
00189     if (count > 0xFFFFFFF)
00190         return 0;
00191 
00192     if (CALCDATASIZE(count, 0) > VARSIZE(hs))
00193         return 0;
00194 
00195     if (entries[0].pos != 0)
00196         return 0;
00197 
00198     /* key length must be nondecreasing */
00199 
00200     for (i = 1; i < count; ++i)
00201     {
00202         if (entries[i].keylen < entries[i - 1].keylen)
00203             return 0;
00204     }
00205 
00206     /*
00207      * entry position must be strictly increasing, except for the first entry
00208      * (which can be ""=>"" and thus zero-length); and all entries must be
00209      * properly contiguous
00210      */
00211 
00212     for (i = 0; i < count; ++i)
00213     {
00214         if (entries[i].pos != lastpos)
00215             return 0;
00216         lastpos += (entries[i].keylen
00217                     + ((entries[i].valisnull) ? 0 : entries[i].vallen));
00218     }
00219 
00220     vsize = CALCDATASIZE(count, lastpos);
00221 
00222     if (vsize > VARSIZE(hs))
00223         return 0;
00224 
00225     if (vsize != VARSIZE(hs))
00226         return 1;
00227 
00228     return 2;
00229 }
00230 
00231 
00232 /*
00233  * hstoreUpgrade: PG_DETOAST_DATUM plus support for conversion of old hstores
00234  */
00235 HStore *
00236 hstoreUpgrade(Datum orig)
00237 {
00238     HStore     *hs = (HStore *) PG_DETOAST_DATUM(orig);
00239     int         valid_new;
00240     int         valid_old;
00241     bool        writable;
00242 
00243     /* Return immediately if no conversion needed */
00244     if ((hs->size_ & HS_FLAG_NEWVERSION) ||
00245         hs->size_ == 0 ||
00246         (VARSIZE(hs) < 32768 && HSE_ISFIRST((ARRPTR(hs)[0]))))
00247         return hs;
00248 
00249     valid_new = hstoreValidNewFormat(hs);
00250     valid_old = hstoreValidOldFormat(hs);
00251     /* Do we have a writable copy? */
00252     writable = ((void *) hs != (void *) DatumGetPointer(orig));
00253 
00254     if (!valid_old || hs->size_ == 0)
00255     {
00256         if (valid_new)
00257         {
00258             /*
00259              * force the "new version" flag and the correct varlena length,
00260              * but only if we have a writable copy already (which we almost
00261              * always will, since short new-format values won't come through
00262              * here)
00263              */
00264             if (writable)
00265             {
00266                 HS_SETCOUNT(hs, HS_COUNT(hs));
00267                 HS_FIXSIZE(hs, HS_COUNT(hs));
00268             }
00269             return hs;
00270         }
00271         else
00272         {
00273             elog(ERROR, "invalid hstore value found");
00274         }
00275     }
00276 
00277     /*
00278      * this is the tricky edge case. It is only possible in some quite extreme
00279      * cases (the hstore must have had a lot of wasted padding space at the
00280      * end). But the only way a "new" hstore value could get here is if we're
00281      * upgrading in place from a pre-release version of hstore-new (NOT
00282      * contrib/hstore), so we work off the following assumptions: 1. If you're
00283      * moving from old contrib/hstore to hstore-new, you're required to fix up
00284      * any potential conflicts first, e.g. by running ALTER TABLE ... USING
00285      * col::text::hstore; on all hstore columns before upgrading. 2. If you're
00286      * moving from old contrib/hstore to new contrib/hstore, then "new" values
00287      * are impossible here 3. If you're moving from pre-release hstore-new to
00288      * hstore-new, then "old" values are impossible here 4. If you're moving
00289      * from pre-release hstore-new to new contrib/hstore, you're not doing so
00290      * as an in-place upgrade, so there is no issue So the upshot of all this
00291      * is that we can treat all the edge cases as "new" if we're being built
00292      * as hstore-new, and "old" if we're being built as contrib/hstore.
00293      *
00294      * XXX the WARNING can probably be downgraded to DEBUG1 once this has been
00295      * beta-tested. But for now, it would be very useful to know if anyone can
00296      * actually reach this case in a non-contrived setting.
00297      */
00298 
00299     if (valid_new)
00300     {
00301 #if HSTORE_IS_HSTORE_NEW
00302         elog(WARNING, "ambiguous hstore value resolved as hstore-new");
00303 
00304         /*
00305          * force the "new version" flag and the correct varlena length, but
00306          * only if we have a writable copy already (which we almost always
00307          * will, since short new-format values won't come through here)
00308          */
00309         if (writable)
00310         {
00311             HS_SETCOUNT(hs, HS_COUNT(hs));
00312             HS_FIXSIZE(hs, HS_COUNT(hs));
00313         }
00314         return hs;
00315 #else
00316         elog(WARNING, "ambiguous hstore value resolved as hstore-old");
00317 #endif
00318     }
00319 
00320     /*
00321      * must have an old-style value. Overwrite it in place as a new-style one,
00322      * making sure we have a writable copy first.
00323      */
00324 
00325     if (!writable)
00326         hs = (HStore *) PG_DETOAST_DATUM_COPY(orig);
00327 
00328     {
00329         int         count = hs->size_;
00330         HEntry     *new_entries = ARRPTR(hs);
00331         HOldEntry  *old_entries = (HOldEntry *) ARRPTR(hs);
00332         int         i;
00333 
00334         for (i = 0; i < count; ++i)
00335         {
00336             uint32      pos = old_entries[i].pos;
00337             uint32      keylen = old_entries[i].keylen;
00338             uint32      vallen = old_entries[i].vallen;
00339             bool        isnull = old_entries[i].valisnull;
00340 
00341             if (isnull)
00342                 vallen = 0;
00343 
00344             new_entries[2 * i].entry = (pos + keylen) & HENTRY_POSMASK;
00345             new_entries[2 * i + 1].entry = (((pos + keylen + vallen) & HENTRY_POSMASK)
00346                                             | ((isnull) ? HENTRY_ISNULL : 0));
00347         }
00348 
00349         if (count)
00350             new_entries[0].entry |= HENTRY_ISFIRST;
00351         HS_SETCOUNT(hs, count);
00352         HS_FIXSIZE(hs, count);
00353     }
00354 
00355     return hs;
00356 }
00357 
00358 
00359 PG_FUNCTION_INFO_V1(hstore_version_diag);
00360 Datum       hstore_version_diag(PG_FUNCTION_ARGS);
00361 Datum
00362 hstore_version_diag(PG_FUNCTION_ARGS)
00363 {
00364     HStore     *hs = (HStore *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
00365     int         valid_new = hstoreValidNewFormat(hs);
00366     int         valid_old = hstoreValidOldFormat(hs);
00367 
00368     PG_RETURN_INT32(valid_old * 10 + valid_new);
00369 }