00001 /* 00002 * contrib/hstore/hstore_compat.c 00003 * 00004 * Notes on old/new hstore format disambiguation. 00005 * 00006 * There are three formats to consider: 00007 * 1) old contrib/hstore (referred to as hstore-old) 00008 * 2) prerelease pgfoundry hstore 00009 * 3) new contrib/hstore 00010 * 00011 * (2) and (3) are identical except for the HS_FLAG_NEWVERSION 00012 * bit, which is set in (3) but not (2). 00013 * 00014 * Values that are already in format (3), or which are 00015 * unambiguously in format (2), are handled by the first 00016 * "return immediately" test in hstoreUpgrade(). 00017 * 00018 * To stress a point: we ONLY get here with possibly-ambiguous 00019 * values if we're doing some sort of in-place migration from an 00020 * old prerelease pgfoundry hstore-new; and we explicitly don't 00021 * support that without fixing up any potentially padded values 00022 * first. Most of the code here is serious overkill, but the 00023 * performance penalty isn't serious (especially compared to the 00024 * palloc() that we have to do anyway) and the belt-and-braces 00025 * validity checks provide some reassurance. (If for some reason 00026 * we get a value that would have worked on the old code, but 00027 * which would be botched by the conversion code, the validity 00028 * checks will fail it first so we get an error rather than bad 00029 * data.) 00030 * 00031 * Note also that empty hstores are the same in (2) and (3), so 00032 * there are some special-case paths for them. 00033 * 00034 * We tell the difference between formats (2) and (3) as follows (but 00035 * note that there are some edge cases where we can't tell; see 00036 * comments in hstoreUpgrade): 00037 * 00038 * First, since there must be at least one entry, we look at 00039 * how the bits line up. The new format looks like: 00040 * 00041 * 10kkkkkkkkkkkkkkkkkkkkkkkkkkkkkk (k..k = keylen) 00042 * 0nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv (v..v = keylen+vallen) 00043 * 00044 * The old format looks like one of these, depending on endianness 00045 * and bitfield layout: (k..k = keylen, v..v = vallen, p..p = pos, 00046 * n = isnull) 00047 * 00048 * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv 00049 * nppppppppppppppppppppppppppppppp 00050 * 00051 * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv 00052 * pppppppppppppppppppppppppppppppn 00053 * 00054 * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk 00055 * nppppppppppppppppppppppppppppppp 00056 * 00057 * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk 00058 * pppppppppppppppppppppppppppppppn (usual i386 format) 00059 * 00060 * If the entry is in old format, for the first entry "pos" must be 0. 00061 * We can obviously see that either keylen or vallen must be >32768 00062 * for there to be any ambiguity (which is why lengths less than that 00063 * are fasttracked in hstore.h) Since "pos"==0, the "v" field in the 00064 * new-format interpretation can only be 0 or 1, which constrains all 00065 * but three bits of the old-format's k and v fields. But in addition 00066 * to all of this, the data length implied by the keylen and vallen 00067 * must fit in the varlena size. So the only ambiguous edge case for 00068 * hstores with only one entry occurs between a new-format entry with 00069 * an excess (~32k) of padding, and an old-format entry. But we know 00070 * which format to use in that case based on how we were compiled, so 00071 * no actual data corruption can occur. 00072 * 00073 * If there is more than one entry, the requirement that keys do not 00074 * decrease in length, and that positions increase contiguously, and 00075 * that the end of the data not be beyond the end of the varlena 00076 * itself, disambiguates in almost all other cases. There is a small 00077 * set of ambiguous cases which could occur if the old-format value 00078 * has a large excess of padding and just the right pattern of key 00079 * sizes, but these are also handled based on how we were compiled. 00080 * 00081 * The otherwise undocumented function hstore_version_diag is provided 00082 * for testing purposes. 00083 */ 00084 #include "postgres.h" 00085 00086 00087 #include "hstore.h" 00088 00089 /* 00090 * This is the structure used for entries in the old contrib/hstore 00091 * implementation. Notice that this is the same size as the new entry 00092 * (two 32-bit words per key/value pair) and that the header is the 00093 * same, so the old and new versions of ARRPTR, STRPTR, CALCDATASIZE 00094 * etc. are compatible. 00095 * 00096 * If the above statement isn't true on some bizarre platform, we're 00097 * a bit hosed (see StaticAssertStmt in hstoreValidOldFormat). 00098 */ 00099 typedef struct 00100 { 00101 uint16 keylen; 00102 uint16 vallen; 00103 uint32 00104 valisnull:1, 00105 pos:31; 00106 } HOldEntry; 00107 00108 static int hstoreValidNewFormat(HStore *hs); 00109 static int hstoreValidOldFormat(HStore *hs); 00110 00111 00112 /* 00113 * Validity test for a new-format hstore. 00114 * 0 = not valid 00115 * 1 = valid but with "slop" in the length 00116 * 2 = exactly valid 00117 */ 00118 static int 00119 hstoreValidNewFormat(HStore *hs) 00120 { 00121 int count = HS_COUNT(hs); 00122 HEntry *entries = ARRPTR(hs); 00123 int buflen = (count) ? HSE_ENDPOS(entries[2 * (count) - 1]) : 0; 00124 int vsize = CALCDATASIZE(count, buflen); 00125 int i; 00126 00127 if (hs->size_ & HS_FLAG_NEWVERSION) 00128 return 2; 00129 00130 if (count == 0) 00131 return 2; 00132 00133 if (!HSE_ISFIRST(entries[0])) 00134 return 0; 00135 00136 if (vsize > VARSIZE(hs)) 00137 return 0; 00138 00139 /* entry position must be nondecreasing */ 00140 00141 for (i = 1; i < 2 * count; ++i) 00142 { 00143 if (HSE_ISFIRST(entries[i]) 00144 || (HSE_ENDPOS(entries[i]) < HSE_ENDPOS(entries[i - 1]))) 00145 return 0; 00146 } 00147 00148 /* key length must be nondecreasing and keys must not be null */ 00149 00150 for (i = 1; i < count; ++i) 00151 { 00152 if (HS_KEYLEN(entries, i) < HS_KEYLEN(entries, i - 1)) 00153 return 0; 00154 if (HSE_ISNULL(entries[2 * i])) 00155 return 0; 00156 } 00157 00158 if (vsize != VARSIZE(hs)) 00159 return 1; 00160 00161 return 2; 00162 } 00163 00164 /* 00165 * Validity test for an old-format hstore. 00166 * 0 = not valid 00167 * 1 = valid but with "slop" in the length 00168 * 2 = exactly valid 00169 */ 00170 static int 00171 hstoreValidOldFormat(HStore *hs) 00172 { 00173 int count = hs->size_; 00174 HOldEntry *entries = (HOldEntry *) ARRPTR(hs); 00175 int vsize; 00176 int lastpos = 0; 00177 int i; 00178 00179 if (hs->size_ & HS_FLAG_NEWVERSION) 00180 return 0; 00181 00182 /* New format uses an HEntry for key and another for value */ 00183 StaticAssertStmt(sizeof(HOldEntry) == 2 * sizeof(HEntry), 00184 "old hstore format is not upward-compatible"); 00185 00186 if (count == 0) 00187 return 2; 00188 00189 if (count > 0xFFFFFFF) 00190 return 0; 00191 00192 if (CALCDATASIZE(count, 0) > VARSIZE(hs)) 00193 return 0; 00194 00195 if (entries[0].pos != 0) 00196 return 0; 00197 00198 /* key length must be nondecreasing */ 00199 00200 for (i = 1; i < count; ++i) 00201 { 00202 if (entries[i].keylen < entries[i - 1].keylen) 00203 return 0; 00204 } 00205 00206 /* 00207 * entry position must be strictly increasing, except for the first entry 00208 * (which can be ""=>"" and thus zero-length); and all entries must be 00209 * properly contiguous 00210 */ 00211 00212 for (i = 0; i < count; ++i) 00213 { 00214 if (entries[i].pos != lastpos) 00215 return 0; 00216 lastpos += (entries[i].keylen 00217 + ((entries[i].valisnull) ? 0 : entries[i].vallen)); 00218 } 00219 00220 vsize = CALCDATASIZE(count, lastpos); 00221 00222 if (vsize > VARSIZE(hs)) 00223 return 0; 00224 00225 if (vsize != VARSIZE(hs)) 00226 return 1; 00227 00228 return 2; 00229 } 00230 00231 00232 /* 00233 * hstoreUpgrade: PG_DETOAST_DATUM plus support for conversion of old hstores 00234 */ 00235 HStore * 00236 hstoreUpgrade(Datum orig) 00237 { 00238 HStore *hs = (HStore *) PG_DETOAST_DATUM(orig); 00239 int valid_new; 00240 int valid_old; 00241 bool writable; 00242 00243 /* Return immediately if no conversion needed */ 00244 if ((hs->size_ & HS_FLAG_NEWVERSION) || 00245 hs->size_ == 0 || 00246 (VARSIZE(hs) < 32768 && HSE_ISFIRST((ARRPTR(hs)[0])))) 00247 return hs; 00248 00249 valid_new = hstoreValidNewFormat(hs); 00250 valid_old = hstoreValidOldFormat(hs); 00251 /* Do we have a writable copy? */ 00252 writable = ((void *) hs != (void *) DatumGetPointer(orig)); 00253 00254 if (!valid_old || hs->size_ == 0) 00255 { 00256 if (valid_new) 00257 { 00258 /* 00259 * force the "new version" flag and the correct varlena length, 00260 * but only if we have a writable copy already (which we almost 00261 * always will, since short new-format values won't come through 00262 * here) 00263 */ 00264 if (writable) 00265 { 00266 HS_SETCOUNT(hs, HS_COUNT(hs)); 00267 HS_FIXSIZE(hs, HS_COUNT(hs)); 00268 } 00269 return hs; 00270 } 00271 else 00272 { 00273 elog(ERROR, "invalid hstore value found"); 00274 } 00275 } 00276 00277 /* 00278 * this is the tricky edge case. It is only possible in some quite extreme 00279 * cases (the hstore must have had a lot of wasted padding space at the 00280 * end). But the only way a "new" hstore value could get here is if we're 00281 * upgrading in place from a pre-release version of hstore-new (NOT 00282 * contrib/hstore), so we work off the following assumptions: 1. If you're 00283 * moving from old contrib/hstore to hstore-new, you're required to fix up 00284 * any potential conflicts first, e.g. by running ALTER TABLE ... USING 00285 * col::text::hstore; on all hstore columns before upgrading. 2. If you're 00286 * moving from old contrib/hstore to new contrib/hstore, then "new" values 00287 * are impossible here 3. If you're moving from pre-release hstore-new to 00288 * hstore-new, then "old" values are impossible here 4. If you're moving 00289 * from pre-release hstore-new to new contrib/hstore, you're not doing so 00290 * as an in-place upgrade, so there is no issue So the upshot of all this 00291 * is that we can treat all the edge cases as "new" if we're being built 00292 * as hstore-new, and "old" if we're being built as contrib/hstore. 00293 * 00294 * XXX the WARNING can probably be downgraded to DEBUG1 once this has been 00295 * beta-tested. But for now, it would be very useful to know if anyone can 00296 * actually reach this case in a non-contrived setting. 00297 */ 00298 00299 if (valid_new) 00300 { 00301 #if HSTORE_IS_HSTORE_NEW 00302 elog(WARNING, "ambiguous hstore value resolved as hstore-new"); 00303 00304 /* 00305 * force the "new version" flag and the correct varlena length, but 00306 * only if we have a writable copy already (which we almost always 00307 * will, since short new-format values won't come through here) 00308 */ 00309 if (writable) 00310 { 00311 HS_SETCOUNT(hs, HS_COUNT(hs)); 00312 HS_FIXSIZE(hs, HS_COUNT(hs)); 00313 } 00314 return hs; 00315 #else 00316 elog(WARNING, "ambiguous hstore value resolved as hstore-old"); 00317 #endif 00318 } 00319 00320 /* 00321 * must have an old-style value. Overwrite it in place as a new-style one, 00322 * making sure we have a writable copy first. 00323 */ 00324 00325 if (!writable) 00326 hs = (HStore *) PG_DETOAST_DATUM_COPY(orig); 00327 00328 { 00329 int count = hs->size_; 00330 HEntry *new_entries = ARRPTR(hs); 00331 HOldEntry *old_entries = (HOldEntry *) ARRPTR(hs); 00332 int i; 00333 00334 for (i = 0; i < count; ++i) 00335 { 00336 uint32 pos = old_entries[i].pos; 00337 uint32 keylen = old_entries[i].keylen; 00338 uint32 vallen = old_entries[i].vallen; 00339 bool isnull = old_entries[i].valisnull; 00340 00341 if (isnull) 00342 vallen = 0; 00343 00344 new_entries[2 * i].entry = (pos + keylen) & HENTRY_POSMASK; 00345 new_entries[2 * i + 1].entry = (((pos + keylen + vallen) & HENTRY_POSMASK) 00346 | ((isnull) ? HENTRY_ISNULL : 0)); 00347 } 00348 00349 if (count) 00350 new_entries[0].entry |= HENTRY_ISFIRST; 00351 HS_SETCOUNT(hs, count); 00352 HS_FIXSIZE(hs, count); 00353 } 00354 00355 return hs; 00356 } 00357 00358 00359 PG_FUNCTION_INFO_V1(hstore_version_diag); 00360 Datum hstore_version_diag(PG_FUNCTION_ARGS); 00361 Datum 00362 hstore_version_diag(PG_FUNCTION_ARGS) 00363 { 00364 HStore *hs = (HStore *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); 00365 int valid_new = hstoreValidNewFormat(hs); 00366 int valid_old = hstoreValidOldFormat(hs); 00367 00368 PG_RETURN_INT32(valid_old * 10 + valid_new); 00369 }