mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-06 15:24:56 +08:00
369 lines
10 KiB
C
369 lines
10 KiB
C
/*
|
|
* contrib/hstore/hstore_compat.c
|
|
*
|
|
* Notes on old/new hstore format disambiguation.
|
|
*
|
|
* There are three formats to consider:
|
|
* 1) old contrib/hstore (referred to as hstore-old)
|
|
* 2) prerelease pgfoundry hstore
|
|
* 3) new contrib/hstore
|
|
*
|
|
* (2) and (3) are identical except for the HS_FLAG_NEWVERSION
|
|
* bit, which is set in (3) but not (2).
|
|
*
|
|
* Values that are already in format (3), or which are
|
|
* unambiguously in format (2), are handled by the first
|
|
* "return immediately" test in hstoreUpgrade().
|
|
*
|
|
* To stress a point: we ONLY get here with possibly-ambiguous
|
|
* values if we're doing some sort of in-place migration from an
|
|
* old prerelease pgfoundry hstore-new; and we explicitly don't
|
|
* support that without fixing up any potentially padded values
|
|
* first. Most of the code here is serious overkill, but the
|
|
* performance penalty isn't serious (especially compared to the
|
|
* palloc() that we have to do anyway) and the belt-and-braces
|
|
* validity checks provide some reassurance. (If for some reason
|
|
* we get a value that would have worked on the old code, but
|
|
* which would be botched by the conversion code, the validity
|
|
* checks will fail it first so we get an error rather than bad
|
|
* data.)
|
|
*
|
|
* Note also that empty hstores are the same in (2) and (3), so
|
|
* there are some special-case paths for them.
|
|
*
|
|
* We tell the difference between formats (2) and (3) as follows (but
|
|
* note that there are some edge cases where we can't tell; see
|
|
* comments in hstoreUpgrade):
|
|
*
|
|
* First, since there must be at least one entry, we look at
|
|
* how the bits line up. The new format looks like:
|
|
*
|
|
* 10kkkkkkkkkkkkkkkkkkkkkkkkkkkkkk (k..k = keylen)
|
|
* 0nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv (v..v = keylen+vallen)
|
|
*
|
|
* The old format looks like one of these, depending on endianness
|
|
* and bitfield layout: (k..k = keylen, v..v = vallen, p..p = pos,
|
|
* n = isnull)
|
|
*
|
|
* kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
|
|
* nppppppppppppppppppppppppppppppp
|
|
*
|
|
* kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
|
|
* pppppppppppppppppppppppppppppppn
|
|
*
|
|
* vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
|
|
* nppppppppppppppppppppppppppppppp
|
|
*
|
|
* vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
|
|
* pppppppppppppppppppppppppppppppn (usual i386 format)
|
|
*
|
|
* If the entry is in old format, for the first entry "pos" must be 0.
|
|
* We can obviously see that either keylen or vallen must be >32768
|
|
* for there to be any ambiguity (which is why lengths less than that
|
|
* are fasttracked in hstore.h) Since "pos"==0, the "v" field in the
|
|
* new-format interpretation can only be 0 or 1, which constrains all
|
|
* but three bits of the old-format's k and v fields. But in addition
|
|
* to all of this, the data length implied by the keylen and vallen
|
|
* must fit in the varlena size. So the only ambiguous edge case for
|
|
* hstores with only one entry occurs between a new-format entry with
|
|
* an excess (~32k) of padding, and an old-format entry. But we know
|
|
* which format to use in that case based on how we were compiled, so
|
|
* no actual data corruption can occur.
|
|
*
|
|
* If there is more than one entry, the requirement that keys do not
|
|
* decrease in length, and that positions increase contiguously, and
|
|
* that the end of the data not be beyond the end of the varlena
|
|
* itself, disambiguates in almost all other cases. There is a small
|
|
* set of ambiguous cases which could occur if the old-format value
|
|
* has a large excess of padding and just the right pattern of key
|
|
* sizes, but these are also handled based on how we were compiled.
|
|
*
|
|
* The otherwise undocumented function hstore_version_diag is provided
|
|
* for testing purposes.
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
|
|
#include "hstore.h"
|
|
|
|
/*
|
|
* This is the structure used for entries in the old contrib/hstore
|
|
* implementation. Notice that this is the same size as the new entry
|
|
* (two 32-bit words per key/value pair) and that the header is the
|
|
* same, so the old and new versions of ARRPTR, STRPTR, CALCDATASIZE
|
|
* etc. are compatible.
|
|
*
|
|
* If the above statement isn't true on some bizarre platform, we're
|
|
* a bit hosed (see Assert in hstoreValidOldFormat).
|
|
*/
|
|
typedef struct
|
|
{
|
|
uint16 keylen;
|
|
uint16 vallen;
|
|
uint32
|
|
valisnull:1,
|
|
pos:31;
|
|
} HOldEntry;
|
|
|
|
static int hstoreValidNewFormat(HStore *hs);
|
|
static int hstoreValidOldFormat(HStore *hs);
|
|
|
|
|
|
/*
|
|
* Validity test for a new-format hstore.
|
|
* 0 = not valid
|
|
* 1 = valid but with "slop" in the length
|
|
* 2 = exactly valid
|
|
*/
|
|
static int
|
|
hstoreValidNewFormat(HStore *hs)
|
|
{
|
|
int count = HS_COUNT(hs);
|
|
HEntry *entries = ARRPTR(hs);
|
|
int buflen = (count) ? HSE_ENDPOS(entries[2 * (count) - 1]) : 0;
|
|
int vsize = CALCDATASIZE(count, buflen);
|
|
int i;
|
|
|
|
if (hs->size_ & HS_FLAG_NEWVERSION)
|
|
return 2;
|
|
|
|
if (count == 0)
|
|
return 2;
|
|
|
|
if (!HSE_ISFIRST(entries[0]))
|
|
return 0;
|
|
|
|
if (vsize > VARSIZE(hs))
|
|
return 0;
|
|
|
|
/* entry position must be nondecreasing */
|
|
|
|
for (i = 1; i < 2 * count; ++i)
|
|
{
|
|
if (HSE_ISFIRST(entries[i])
|
|
|| (HSE_ENDPOS(entries[i]) < HSE_ENDPOS(entries[i - 1])))
|
|
return 0;
|
|
}
|
|
|
|
/* key length must be nondecreasing and keys must not be null */
|
|
|
|
for (i = 1; i < count; ++i)
|
|
{
|
|
if (HS_KEYLEN(entries, i) < HS_KEYLEN(entries, i - 1))
|
|
return 0;
|
|
if (HSE_ISNULL(entries[2 * i]))
|
|
return 0;
|
|
}
|
|
|
|
if (vsize != VARSIZE(hs))
|
|
return 1;
|
|
|
|
return 2;
|
|
}
|
|
|
|
/*
|
|
* Validity test for an old-format hstore.
|
|
* 0 = not valid
|
|
* 1 = valid but with "slop" in the length
|
|
* 2 = exactly valid
|
|
*/
|
|
static int
|
|
hstoreValidOldFormat(HStore *hs)
|
|
{
|
|
int count = hs->size_;
|
|
HOldEntry *entries = (HOldEntry *) ARRPTR(hs);
|
|
int vsize;
|
|
int lastpos = 0;
|
|
int i;
|
|
|
|
if (hs->size_ & HS_FLAG_NEWVERSION)
|
|
return 0;
|
|
|
|
/* New format uses an HEntry for key and another for value */
|
|
Assert(sizeof(HOldEntry) == (2 * sizeof(HEntry)));
|
|
|
|
if (count == 0)
|
|
return 2;
|
|
|
|
if (count > 0xFFFFFFF)
|
|
return 0;
|
|
|
|
if (CALCDATASIZE(count, 0) > VARSIZE(hs))
|
|
return 0;
|
|
|
|
if (entries[0].pos != 0)
|
|
return 0;
|
|
|
|
/* key length must be nondecreasing */
|
|
|
|
for (i = 1; i < count; ++i)
|
|
{
|
|
if (entries[i].keylen < entries[i - 1].keylen)
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* entry position must be strictly increasing, except for the first entry
|
|
* (which can be ""=>"" and thus zero-length); and all entries must be
|
|
* properly contiguous
|
|
*/
|
|
|
|
for (i = 0; i < count; ++i)
|
|
{
|
|
if (entries[i].pos != lastpos)
|
|
return 0;
|
|
lastpos += (entries[i].keylen
|
|
+ ((entries[i].valisnull) ? 0 : entries[i].vallen));
|
|
}
|
|
|
|
vsize = CALCDATASIZE(count, lastpos);
|
|
|
|
if (vsize > VARSIZE(hs))
|
|
return 0;
|
|
|
|
if (vsize != VARSIZE(hs))
|
|
return 1;
|
|
|
|
return 2;
|
|
}
|
|
|
|
|
|
/*
|
|
* hstoreUpgrade: PG_DETOAST_DATUM plus support for conversion of old hstores
|
|
*/
|
|
HStore *
|
|
hstoreUpgrade(Datum orig)
|
|
{
|
|
HStore *hs = (HStore *) PG_DETOAST_DATUM(orig);
|
|
int valid_new;
|
|
int valid_old;
|
|
bool writable;
|
|
|
|
/* Return immediately if no conversion needed */
|
|
if ((hs->size_ & HS_FLAG_NEWVERSION) ||
|
|
hs->size_ == 0 ||
|
|
(VARSIZE(hs) < 32768 && HSE_ISFIRST((ARRPTR(hs)[0]))))
|
|
return hs;
|
|
|
|
valid_new = hstoreValidNewFormat(hs);
|
|
valid_old = hstoreValidOldFormat(hs);
|
|
/* Do we have a writable copy? */
|
|
writable = ((void *) hs != (void *) DatumGetPointer(orig));
|
|
|
|
if (!valid_old || hs->size_ == 0)
|
|
{
|
|
if (valid_new)
|
|
{
|
|
/*
|
|
* force the "new version" flag and the correct varlena length,
|
|
* but only if we have a writable copy already (which we almost
|
|
* always will, since short new-format values won't come through
|
|
* here)
|
|
*/
|
|
if (writable)
|
|
{
|
|
HS_SETCOUNT(hs, HS_COUNT(hs));
|
|
HS_FIXSIZE(hs, HS_COUNT(hs));
|
|
}
|
|
return hs;
|
|
}
|
|
else
|
|
{
|
|
elog(ERROR, "invalid hstore value found");
|
|
}
|
|
}
|
|
|
|
/*
|
|
* this is the tricky edge case. It is only possible in some quite extreme
|
|
* cases (the hstore must have had a lot of wasted padding space at the
|
|
* end). But the only way a "new" hstore value could get here is if we're
|
|
* upgrading in place from a pre-release version of hstore-new (NOT
|
|
* contrib/hstore), so we work off the following assumptions: 1. If you're
|
|
* moving from old contrib/hstore to hstore-new, you're required to fix up
|
|
* any potential conflicts first, e.g. by running ALTER TABLE ... USING
|
|
* col::text::hstore; on all hstore columns before upgrading. 2. If you're
|
|
* moving from old contrib/hstore to new contrib/hstore, then "new" values
|
|
* are impossible here 3. If you're moving from pre-release hstore-new to
|
|
* hstore-new, then "old" values are impossible here 4. If you're moving
|
|
* from pre-release hstore-new to new contrib/hstore, you're not doing so
|
|
* as an in-place upgrade, so there is no issue So the upshot of all this
|
|
* is that we can treat all the edge cases as "new" if we're being built
|
|
* as hstore-new, and "old" if we're being built as contrib/hstore.
|
|
*
|
|
* XXX the WARNING can probably be downgraded to DEBUG1 once this has been
|
|
* beta-tested. But for now, it would be very useful to know if anyone can
|
|
* actually reach this case in a non-contrived setting.
|
|
*/
|
|
|
|
if (valid_new)
|
|
{
|
|
#if HSTORE_IS_HSTORE_NEW
|
|
elog(WARNING, "ambiguous hstore value resolved as hstore-new");
|
|
|
|
/*
|
|
* force the "new version" flag and the correct varlena length, but
|
|
* only if we have a writable copy already (which we almost always
|
|
* will, since short new-format values won't come through here)
|
|
*/
|
|
if (writable)
|
|
{
|
|
HS_SETCOUNT(hs, HS_COUNT(hs));
|
|
HS_FIXSIZE(hs, HS_COUNT(hs));
|
|
}
|
|
return hs;
|
|
#else
|
|
elog(WARNING, "ambiguous hstore value resolved as hstore-old");
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* must have an old-style value. Overwrite it in place as a new-style one,
|
|
* making sure we have a writable copy first.
|
|
*/
|
|
|
|
if (!writable)
|
|
hs = (HStore *) PG_DETOAST_DATUM_COPY(orig);
|
|
|
|
{
|
|
int count = hs->size_;
|
|
HEntry *new_entries = ARRPTR(hs);
|
|
HOldEntry *old_entries = (HOldEntry *) ARRPTR(hs);
|
|
int i;
|
|
|
|
for (i = 0; i < count; ++i)
|
|
{
|
|
uint32 pos = old_entries[i].pos;
|
|
uint32 keylen = old_entries[i].keylen;
|
|
uint32 vallen = old_entries[i].vallen;
|
|
bool isnull = old_entries[i].valisnull;
|
|
|
|
if (isnull)
|
|
vallen = 0;
|
|
|
|
new_entries[2 * i].entry = (pos + keylen) & HENTRY_POSMASK;
|
|
new_entries[2 * i + 1].entry = (((pos + keylen + vallen) & HENTRY_POSMASK)
|
|
| ((isnull) ? HENTRY_ISNULL : 0));
|
|
}
|
|
|
|
if (count)
|
|
new_entries[0].entry |= HENTRY_ISFIRST;
|
|
HS_SETCOUNT(hs, count);
|
|
HS_FIXSIZE(hs, count);
|
|
}
|
|
|
|
return hs;
|
|
}
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(hstore_version_diag);
|
|
Datum hstore_version_diag(PG_FUNCTION_ARGS);
|
|
Datum
|
|
hstore_version_diag(PG_FUNCTION_ARGS)
|
|
{
|
|
HStore *hs = (HStore *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
|
int valid_new = hstoreValidNewFormat(hs);
|
|
int valid_old = hstoreValidOldFormat(hs);
|
|
|
|
PG_RETURN_INT32(valid_old * 10 + valid_new);
|
|
}
|