postgresql/contrib/btree_gist/btree_ts.c

412 lines
9.5 KiB
C
Raw Normal View History

/*
2010-09-21 04:08:53 +08:00
* contrib/btree_gist/btree_ts.c
*/
#include "postgres.h"
#include "btree_gist.h"
#include "btree_utils_num.h"
#include "utils/builtins.h"
#include "utils/datetime.h"
typedef struct
{
2004-08-29 13:07:03 +08:00
Timestamp lower;
Timestamp upper;
} tsKEY;
/*
** timestamp ops
*/
PG_FUNCTION_INFO_V1(gbt_ts_compress);
PG_FUNCTION_INFO_V1(gbt_tstz_compress);
PG_FUNCTION_INFO_V1(gbt_ts_fetch);
PG_FUNCTION_INFO_V1(gbt_ts_union);
PG_FUNCTION_INFO_V1(gbt_ts_picksplit);
PG_FUNCTION_INFO_V1(gbt_ts_consistent);
PG_FUNCTION_INFO_V1(gbt_ts_distance);
PG_FUNCTION_INFO_V1(gbt_tstz_consistent);
PG_FUNCTION_INFO_V1(gbt_tstz_distance);
PG_FUNCTION_INFO_V1(gbt_ts_penalty);
PG_FUNCTION_INFO_V1(gbt_ts_same);
#ifdef USE_FLOAT8_BYVAL
#define TimestampGetDatumFast(X) TimestampGetDatum(X)
#else
#define TimestampGetDatumFast(X) PointerGetDatum(&(X))
#endif
2004-08-29 13:07:03 +08:00
static bool
gbt_tsgt(const void *a, const void *b)
{
const Timestamp *aa = (const Timestamp *) a;
const Timestamp *bb = (const Timestamp *) b;
return DatumGetBool(DirectFunctionCall2(timestamp_gt,
TimestampGetDatumFast(*aa),
TimestampGetDatumFast(*bb)));
}
2004-08-29 13:07:03 +08:00
static bool
gbt_tsge(const void *a, const void *b)
{
const Timestamp *aa = (const Timestamp *) a;
const Timestamp *bb = (const Timestamp *) b;
return DatumGetBool(DirectFunctionCall2(timestamp_ge,
TimestampGetDatumFast(*aa),
TimestampGetDatumFast(*bb)));
}
2004-08-29 13:07:03 +08:00
static bool
gbt_tseq(const void *a, const void *b)
{
const Timestamp *aa = (const Timestamp *) a;
const Timestamp *bb = (const Timestamp *) b;
return DatumGetBool(DirectFunctionCall2(timestamp_eq,
TimestampGetDatumFast(*aa),
TimestampGetDatumFast(*bb)));
}
2004-08-29 13:07:03 +08:00
static bool
gbt_tsle(const void *a, const void *b)
{
const Timestamp *aa = (const Timestamp *) a;
const Timestamp *bb = (const Timestamp *) b;
return DatumGetBool(DirectFunctionCall2(timestamp_le,
TimestampGetDatumFast(*aa),
TimestampGetDatumFast(*bb)));
}
2004-08-29 13:07:03 +08:00
static bool
gbt_tslt(const void *a, const void *b)
{
const Timestamp *aa = (const Timestamp *) a;
const Timestamp *bb = (const Timestamp *) b;
return DatumGetBool(DirectFunctionCall2(timestamp_lt,
TimestampGetDatumFast(*aa),
TimestampGetDatumFast(*bb)));
}
static int
gbt_tskey_cmp(const void *a, const void *b)
{
tsKEY *ia = (tsKEY *) (((const Nsrt *) a)->t);
tsKEY *ib = (tsKEY *) (((const Nsrt *) b)->t);
2010-02-26 10:01:40 +08:00
int res;
res = DatumGetInt32(DirectFunctionCall2(timestamp_cmp, TimestampGetDatumFast(ia->lower), TimestampGetDatumFast(ib->lower)));
if (res == 0)
return DatumGetInt32(DirectFunctionCall2(timestamp_cmp, TimestampGetDatumFast(ia->upper), TimestampGetDatumFast(ib->upper)));
return res;
}
static float8
gbt_ts_dist(const void *a, const void *b)
{
const Timestamp *aa = (const Timestamp *) a;
const Timestamp *bb = (const Timestamp *) b;
2011-04-10 23:42:00 +08:00
Interval *i;
if (TIMESTAMP_NOT_FINITE(*aa) || TIMESTAMP_NOT_FINITE(*bb))
return get_float8_infinity();
i = DatumGetIntervalP(DirectFunctionCall2(timestamp_mi,
TimestampGetDatumFast(*aa),
TimestampGetDatumFast(*bb)));
return (float8) Abs(INTERVAL_TO_SEC(i));
}
2004-08-29 13:07:03 +08:00
static const gbtree_ninfo tinfo =
{
gbt_t_ts,
sizeof(Timestamp),
16, /* sizeof(gbtreekey16) */
2004-08-29 13:07:03 +08:00
gbt_tsgt,
gbt_tsge,
gbt_tseq,
gbt_tsle,
gbt_tslt,
gbt_tskey_cmp,
gbt_ts_dist
};
PG_FUNCTION_INFO_V1(ts_dist);
Datum
ts_dist(PG_FUNCTION_ARGS)
{
Timestamp a = PG_GETARG_TIMESTAMP(0);
Timestamp b = PG_GETARG_TIMESTAMP(1);
2011-04-10 23:42:00 +08:00
Interval *r;
if (TIMESTAMP_NOT_FINITE(a) || TIMESTAMP_NOT_FINITE(b))
{
2011-04-10 23:42:00 +08:00
Interval *p = palloc(sizeof(Interval));
p->day = INT_MAX;
p->month = INT_MAX;
#ifdef HAVE_INT64_TIMESTAMP
p->time = PG_INT64_MAX;
#else
p->time = DBL_MAX;
#endif
PG_RETURN_INTERVAL_P(p);
}
else
2011-04-10 23:42:00 +08:00
r = DatumGetIntervalP(DirectFunctionCall2(timestamp_mi,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1)));
PG_RETURN_INTERVAL_P(abs_interval(r));
}
PG_FUNCTION_INFO_V1(tstz_dist);
Datum
tstz_dist(PG_FUNCTION_ARGS)
{
2011-04-10 23:42:00 +08:00
TimestampTz a = PG_GETARG_TIMESTAMPTZ(0);
TimestampTz b = PG_GETARG_TIMESTAMPTZ(1);
Interval *r;
if (TIMESTAMP_NOT_FINITE(a) || TIMESTAMP_NOT_FINITE(b))
{
2011-04-10 23:42:00 +08:00
Interval *p = palloc(sizeof(Interval));
p->day = INT_MAX;
p->month = INT_MAX;
#ifdef HAVE_INT64_TIMESTAMP
p->time = PG_INT64_MAX;
#else
p->time = DBL_MAX;
#endif
PG_RETURN_INTERVAL_P(p);
}
r = DatumGetIntervalP(DirectFunctionCall2(timestamp_mi,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1)));
2011-04-10 23:42:00 +08:00
PG_RETURN_INTERVAL_P(abs_interval(r));
}
/**************************************************
* timestamp ops
**************************************************/
Support timezone abbreviations that sometimes change. Up to now, PG has assumed that any given timezone abbreviation (such as "EDT") represents a constant GMT offset in the usage of any particular region; we had a way to configure what that offset was, but not for it to be changeable over time. But, as with most things horological, this view of the world is too simplistic: there are numerous regions that have at one time or another switched to a different GMT offset but kept using the same timezone abbreviation. Almost the entire Russian Federation did that a few years ago, and later this month they're going to do it again. And there are similar examples all over the world. To cope with this, invent the notion of a "dynamic timezone abbreviation", which is one that is referenced to a particular underlying timezone (as defined in the IANA timezone database) and means whatever it currently means in that zone. For zones that use or have used daylight-savings time, the standard and DST abbreviations continue to have the property that you can specify standard or DST time and get that time offset whether or not DST was theoretically in effect at the time. However, the abbreviations mean what they meant at the time in question (or most recently before that time) rather than being absolutely fixed. The standard abbreviation-list files have been changed to use this behavior for abbreviations that have actually varied in meaning since 1970. The old simple-numeric definitions are kept for abbreviations that have not changed, since they are a bit faster to resolve. While this is clearly a new feature, it seems necessary to back-patch it into all active branches, because otherwise use of Russian zone abbreviations is going to become even more problematic than it already was. This change supersedes the changes in commit 513d06ded et al to modify the fixed meanings of the Russian abbreviations; since we've not shipped that yet, this will avoid an undesirably incompatible (not to mention incorrect) change in behavior for timestamps between 2011 and 2014. This patch makes some cosmetic changes in ecpglib to keep its usage of datetime lookup tables as similar as possible to the backend code, but doesn't do anything about the increasingly obsolete set of timezone abbreviation definitions that are hard-wired into ecpglib. Whatever we do about that will likely not be appropriate material for back-patching. Also, a potential free() of a garbage pointer after an out-of-memory failure in ecpglib has been fixed. This patch also fixes pre-existing bugs in DetermineTimeZoneOffset() that caused it to produce unexpected results near a timezone transition, if both the "before" and "after" states are marked as standard time. We'd only ever thought about or tested transitions between standard and DST time, but that's not what's happening when a zone simply redefines their base GMT offset. In passing, update the SGML documentation to refer to the Olson/zoneinfo/ zic timezone database as the "IANA" database, since it's now being maintained under the auspices of IANA.
2014-10-17 03:22:10 +08:00
static inline Timestamp
tstz_to_ts_gmt(TimestampTz ts)
{
Support timezone abbreviations that sometimes change. Up to now, PG has assumed that any given timezone abbreviation (such as "EDT") represents a constant GMT offset in the usage of any particular region; we had a way to configure what that offset was, but not for it to be changeable over time. But, as with most things horological, this view of the world is too simplistic: there are numerous regions that have at one time or another switched to a different GMT offset but kept using the same timezone abbreviation. Almost the entire Russian Federation did that a few years ago, and later this month they're going to do it again. And there are similar examples all over the world. To cope with this, invent the notion of a "dynamic timezone abbreviation", which is one that is referenced to a particular underlying timezone (as defined in the IANA timezone database) and means whatever it currently means in that zone. For zones that use or have used daylight-savings time, the standard and DST abbreviations continue to have the property that you can specify standard or DST time and get that time offset whether or not DST was theoretically in effect at the time. However, the abbreviations mean what they meant at the time in question (or most recently before that time) rather than being absolutely fixed. The standard abbreviation-list files have been changed to use this behavior for abbreviations that have actually varied in meaning since 1970. The old simple-numeric definitions are kept for abbreviations that have not changed, since they are a bit faster to resolve. While this is clearly a new feature, it seems necessary to back-patch it into all active branches, because otherwise use of Russian zone abbreviations is going to become even more problematic than it already was. This change supersedes the changes in commit 513d06ded et al to modify the fixed meanings of the Russian abbreviations; since we've not shipped that yet, this will avoid an undesirably incompatible (not to mention incorrect) change in behavior for timestamps between 2011 and 2014. This patch makes some cosmetic changes in ecpglib to keep its usage of datetime lookup tables as similar as possible to the backend code, but doesn't do anything about the increasingly obsolete set of timezone abbreviation definitions that are hard-wired into ecpglib. Whatever we do about that will likely not be appropriate material for back-patching. Also, a potential free() of a garbage pointer after an out-of-memory failure in ecpglib has been fixed. This patch also fixes pre-existing bugs in DetermineTimeZoneOffset() that caused it to produce unexpected results near a timezone transition, if both the "before" and "after" states are marked as standard time. We'd only ever thought about or tested transitions between standard and DST time, but that's not what's happening when a zone simply redefines their base GMT offset. In passing, update the SGML documentation to refer to the Olson/zoneinfo/ zic timezone database as the "IANA" database, since it's now being maintained under the auspices of IANA.
2014-10-17 03:22:10 +08:00
/* No timezone correction is needed, since GMT is offset 0 by definition */
return (Timestamp) ts;
}
Datum
gbt_ts_compress(PG_FUNCTION_ARGS)
{
2004-08-29 13:07:03 +08:00
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
PG_RETURN_POINTER(gbt_num_compress(entry, &tinfo));
}
Datum
gbt_tstz_compress(PG_FUNCTION_ARGS)
{
2004-08-29 13:07:03 +08:00
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval;
2004-08-29 13:07:03 +08:00
if (entry->leafkey)
{
tsKEY *r = (tsKEY *) palloc(sizeof(tsKEY));
TimestampTz ts = DatumGetTimestampTz(entry->key);
2004-08-29 13:07:03 +08:00
Timestamp gmt;
gmt = tstz_to_ts_gmt(ts);
2004-08-29 13:07:03 +08:00
retval = palloc(sizeof(GISTENTRY));
r->lower = r->upper = gmt;
gistentryinit(*retval, PointerGetDatum(r),
entry->rel, entry->page,
entry->offset, FALSE);
2004-08-29 13:07:03 +08:00
}
else
retval = entry;
2004-08-29 13:07:03 +08:00
PG_RETURN_POINTER(retval);
}
Datum
gbt_ts_fetch(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
PG_RETURN_POINTER(gbt_num_fetch(entry, &tinfo));
}
Datum
gbt_ts_consistent(PG_FUNCTION_ARGS)
{
2004-08-29 13:07:03 +08:00
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
Timestamp query = PG_GETARG_TIMESTAMP(1);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
/* Oid subtype = PG_GETARG_OID(3); */
bool *recheck = (bool *) PG_GETARG_POINTER(4);
2004-08-29 13:07:03 +08:00
tsKEY *kkk = (tsKEY *) DatumGetPointer(entry->key);
GBT_NUMKEY_R key;
/* All cases served by this function are exact */
*recheck = false;
2004-08-29 13:07:03 +08:00
key.lower = (GBT_NUMKEY *) &kkk->lower;
key.upper = (GBT_NUMKEY *) &kkk->upper;
2004-08-29 13:07:03 +08:00
PG_RETURN_BOOL(
gbt_num_consistent(&key, (void *) &query, &strategy, GIST_LEAF(entry), &tinfo)
2004-08-29 13:07:03 +08:00
);
}
Datum
gbt_ts_distance(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
Timestamp query = PG_GETARG_TIMESTAMP(1);
/* Oid subtype = PG_GETARG_OID(3); */
tsKEY *kkk = (tsKEY *) DatumGetPointer(entry->key);
GBT_NUMKEY_R key;
key.lower = (GBT_NUMKEY *) &kkk->lower;
key.upper = (GBT_NUMKEY *) &kkk->upper;
PG_RETURN_FLOAT8(
2011-04-10 23:42:00 +08:00
gbt_num_distance(&key, (void *) &query, GIST_LEAF(entry), &tinfo)
);
}
Datum
gbt_tstz_consistent(PG_FUNCTION_ARGS)
{
2004-08-29 13:07:03 +08:00
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
TimestampTz query = PG_GETARG_TIMESTAMPTZ(1);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
/* Oid subtype = PG_GETARG_OID(3); */
bool *recheck = (bool *) PG_GETARG_POINTER(4);
2004-08-29 13:07:03 +08:00
char *kkk = (char *) DatumGetPointer(entry->key);
GBT_NUMKEY_R key;
Timestamp qqq;
/* All cases served by this function are exact */
*recheck = false;
key.lower = (GBT_NUMKEY *) &kkk[0];
key.upper = (GBT_NUMKEY *) &kkk[MAXALIGN(tinfo.size)];
qqq = tstz_to_ts_gmt(query);
2004-08-29 13:07:03 +08:00
PG_RETURN_BOOL(
gbt_num_consistent(&key, (void *) &qqq, &strategy, GIST_LEAF(entry), &tinfo)
);
}
Datum
gbt_tstz_distance(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
TimestampTz query = PG_GETARG_TIMESTAMPTZ(1);
/* Oid subtype = PG_GETARG_OID(3); */
char *kkk = (char *) DatumGetPointer(entry->key);
GBT_NUMKEY_R key;
Timestamp qqq;
key.lower = (GBT_NUMKEY *) &kkk[0];
key.upper = (GBT_NUMKEY *) &kkk[MAXALIGN(tinfo.size)];
qqq = tstz_to_ts_gmt(query);
PG_RETURN_FLOAT8(
2011-04-10 23:42:00 +08:00
gbt_num_distance(&key, (void *) &qqq, GIST_LEAF(entry), &tinfo)
);
}
Datum
gbt_ts_union(PG_FUNCTION_ARGS)
{
2004-08-29 13:07:03 +08:00
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
void *out = palloc(sizeof(tsKEY));
*(int *) PG_GETARG_POINTER(1) = sizeof(tsKEY);
PG_RETURN_POINTER(gbt_num_union((void *) out, entryvec, &tinfo));
}
#define penalty_check_max_float(val) do { \
2005-10-15 10:49:52 +08:00
if ( val > FLT_MAX ) \
val = FLT_MAX; \
if ( val < -FLT_MAX ) \
val = -FLT_MAX; \
} while(false);
Datum
gbt_ts_penalty(PG_FUNCTION_ARGS)
{
2004-08-29 13:07:03 +08:00
tsKEY *origentry = (tsKEY *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
tsKEY *newentry = (tsKEY *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
float *result = (float *) PG_GETARG_POINTER(2);
2005-10-15 10:49:52 +08:00
double orgdbl[2],
newdbl[2];
/*
* We are always using "double" timestamps here. Precision should be good
2005-10-15 10:49:52 +08:00
* enough.
*/
orgdbl[0] = ((double) origentry->lower);
orgdbl[1] = ((double) origentry->upper);
newdbl[0] = ((double) newentry->lower);
newdbl[1] = ((double) newentry->upper);
2005-10-15 10:49:52 +08:00
penalty_check_max_float(orgdbl[0]);
penalty_check_max_float(orgdbl[1]);
penalty_check_max_float(newdbl[0]);
penalty_check_max_float(newdbl[1]);
2005-10-15 10:49:52 +08:00
penalty_num(result, orgdbl[0], orgdbl[1], newdbl[0], newdbl[1]);
2004-08-29 13:07:03 +08:00
PG_RETURN_POINTER(result);
}
Datum
gbt_ts_picksplit(PG_FUNCTION_ARGS)
{
2004-08-29 13:07:03 +08:00
PG_RETURN_POINTER(gbt_num_picksplit(
2005-10-15 10:49:52 +08:00
(GistEntryVector *) PG_GETARG_POINTER(0),
(GIST_SPLITVEC *) PG_GETARG_POINTER(1),
2004-08-29 13:07:03 +08:00
&tinfo
));
}
Datum
gbt_ts_same(PG_FUNCTION_ARGS)
{
2004-08-29 13:07:03 +08:00
tsKEY *b1 = (tsKEY *) PG_GETARG_POINTER(0);
tsKEY *b2 = (tsKEY *) PG_GETARG_POINTER(1);
bool *result = (bool *) PG_GETARG_POINTER(2);
2004-08-29 13:07:03 +08:00
*result = gbt_num_same((void *) b1, (void *) b2, &tinfo);
PG_RETURN_POINTER(result);
}