2015-07-22 01:54:18 +08:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* _int_selfuncs.c
|
|
|
|
* Functions for selectivity estimation of intarray operators
|
|
|
|
*
|
2021-01-03 02:06:25 +08:00
|
|
|
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
2015-07-22 01:54:18 +08:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* contrib/intarray/_int_selfuncs.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
2019-10-23 11:56:22 +08:00
|
|
|
#include "_int.h"
|
2015-07-22 01:54:18 +08:00
|
|
|
#include "access/htup_details.h"
|
|
|
|
#include "catalog/pg_operator.h"
|
|
|
|
#include "catalog/pg_statistic.h"
|
|
|
|
#include "catalog/pg_type.h"
|
2019-10-23 11:56:22 +08:00
|
|
|
#include "miscadmin.h"
|
2016-12-29 01:00:00 +08:00
|
|
|
#include "utils/builtins.h"
|
2019-10-23 11:56:22 +08:00
|
|
|
#include "utils/lsyscache.h"
|
2015-07-22 01:54:18 +08:00
|
|
|
#include "utils/selfuncs.h"
|
|
|
|
#include "utils/syscache.h"
|
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(_int_overlap_sel);
|
|
|
|
PG_FUNCTION_INFO_V1(_int_contains_sel);
|
|
|
|
PG_FUNCTION_INFO_V1(_int_contained_sel);
|
|
|
|
PG_FUNCTION_INFO_V1(_int_overlap_joinsel);
|
|
|
|
PG_FUNCTION_INFO_V1(_int_contains_joinsel);
|
|
|
|
PG_FUNCTION_INFO_V1(_int_contained_joinsel);
|
|
|
|
PG_FUNCTION_INFO_V1(_int_matchsel);
|
|
|
|
|
|
|
|
|
|
|
|
static Selectivity int_query_opr_selec(ITEM *item, Datum *values, float4 *freqs,
|
|
|
|
int nmncelems, float4 minfreq);
|
|
|
|
static int compare_val_int4(const void *a, const void *b);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wrappers around the default array selectivity estimation functions.
|
|
|
|
*
|
|
|
|
* The default array selectivity operators for the @>, && and @< operators
|
|
|
|
* work fine for integer arrays. However, if we tried to just use arraycontsel
|
2019-07-01 09:00:23 +08:00
|
|
|
* and arraycontjoinsel directly as the cost estimator functions for our
|
2015-07-22 01:54:18 +08:00
|
|
|
* operators, they would not work as intended, because they look at the
|
|
|
|
* operator's OID. Our operators behave exactly like the built-in anyarray
|
|
|
|
* versions, but we must tell the cost estimator functions which built-in
|
|
|
|
* operators they correspond to. These wrappers just replace the operator
|
|
|
|
* OID with the corresponding built-in operator's OID, and call the built-in
|
|
|
|
* function.
|
|
|
|
*/
|
|
|
|
|
|
|
|
Datum
|
|
|
|
_int_overlap_sel(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
|
|
|
|
PG_GETARG_DATUM(0),
|
|
|
|
ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
|
|
|
|
PG_GETARG_DATUM(2),
|
|
|
|
PG_GETARG_DATUM(3)));
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
_int_contains_sel(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
|
|
|
|
PG_GETARG_DATUM(0),
|
|
|
|
ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
|
|
|
|
PG_GETARG_DATUM(2),
|
|
|
|
PG_GETARG_DATUM(3)));
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
_int_contained_sel(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
|
|
|
|
PG_GETARG_DATUM(0),
|
|
|
|
ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
|
|
|
|
PG_GETARG_DATUM(2),
|
|
|
|
PG_GETARG_DATUM(3)));
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
_int_overlap_joinsel(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
|
|
|
|
PG_GETARG_DATUM(0),
|
|
|
|
ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
|
|
|
|
PG_GETARG_DATUM(2),
|
|
|
|
PG_GETARG_DATUM(3),
|
|
|
|
PG_GETARG_DATUM(4)));
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
_int_contains_joinsel(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
|
|
|
|
PG_GETARG_DATUM(0),
|
|
|
|
ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
|
|
|
|
PG_GETARG_DATUM(2),
|
|
|
|
PG_GETARG_DATUM(3),
|
|
|
|
PG_GETARG_DATUM(4)));
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
_int_contained_joinsel(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
|
|
|
|
PG_GETARG_DATUM(0),
|
|
|
|
ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
|
|
|
|
PG_GETARG_DATUM(2),
|
|
|
|
PG_GETARG_DATUM(3),
|
|
|
|
PG_GETARG_DATUM(4)));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _int_matchsel -- restriction selectivity function for intarray @@ query_int
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
_int_matchsel(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
|
|
|
|
|
|
|
|
List *args = (List *) PG_GETARG_POINTER(2);
|
|
|
|
int varRelid = PG_GETARG_INT32(3);
|
|
|
|
VariableStatData vardata;
|
|
|
|
Node *other;
|
|
|
|
bool varonleft;
|
|
|
|
Selectivity selec;
|
|
|
|
QUERYTYPE *query;
|
|
|
|
Datum *mcelems = NULL;
|
|
|
|
float4 *mcefreqs = NULL;
|
|
|
|
int nmcelems = 0;
|
|
|
|
float4 minfreq = 0.0;
|
|
|
|
float4 nullfrac = 0.0;
|
Redesign get_attstatsslot()/free_attstatsslot() for more safety and speed.
The mess cleaned up in commit da0759600 is clear evidence that it's a
bug hazard to expect the caller of get_attstatsslot()/free_attstatsslot()
to provide the correct type OID for the array elements in the slot.
Moreover, we weren't even getting any performance benefit from that,
since get_attstatsslot() was extracting the real type OID from the array
anyway. So we ought to get rid of that requirement; indeed, it would
make more sense for get_attstatsslot() to pass back the type OID it found,
in case the caller isn't sure what to expect, which is likely in binary-
compatible-operator cases.
Another problem with the current implementation is that if the stats array
element type is pass-by-reference, we incur a palloc/memcpy/pfree cycle
for each element. That seemed acceptable when the code was written because
we were targeting O(10) array sizes --- but these days, stats arrays are
almost always bigger than that, sometimes much bigger. We can save a
significant number of cycles by doing one palloc/memcpy/pfree of the whole
array. Indeed, in the now-probably-common case where the array is toasted,
that happens anyway so this method is basically free. (Note: although the
catcache code will inline any out-of-line toasted values, it doesn't
decompress them. At the other end of the size range, it doesn't expand
short-header datums either. In either case, DatumGetArrayTypeP would have
to make a copy. We do end up using an extra array copy step if the element
type is pass-by-value and the array length is neither small enough for a
short header nor large enough to have suffered compression. But that
seems like a very acceptable price for winning in pass-by-ref cases.)
Hence, redesign to take these insights into account. While at it,
convert to an API in which we fill a struct rather than passing a bunch
of pointers to individual output arguments. That will make it less
painful if we ever want further expansion of what get_attstatsslot can
pass back.
It's certainly arguable that this is new development and not something to
push post-feature-freeze. However, I view it as primarily bug-proofing
and therefore something that's better to have sooner not later. Since
we aren't quite at beta phase yet, let's put it in.
Discussion: https://postgr.es/m/16364.1494520862@sss.pgh.pa.us
2017-05-14 03:14:39 +08:00
|
|
|
AttStatsSlot sslot;
|
2015-07-22 01:54:18 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If expression is not "variable @@ something" or "something @@ variable"
|
|
|
|
* then punt and return a default estimate.
|
|
|
|
*/
|
|
|
|
if (!get_restriction_variable(root, args, varRelid,
|
|
|
|
&vardata, &other, &varonleft))
|
|
|
|
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Variable should be int[]. We don't support cases where variable is
|
|
|
|
* query_int.
|
|
|
|
*/
|
|
|
|
if (vardata.vartype != INT4ARRAYOID)
|
|
|
|
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Can't do anything useful if the something is not a constant, either.
|
|
|
|
*/
|
|
|
|
if (!IsA(other, Const))
|
|
|
|
{
|
|
|
|
ReleaseVariableStats(vardata);
|
|
|
|
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The "@@" operator is strict, so we can cope with NULL right away.
|
|
|
|
*/
|
|
|
|
if (((Const *) other)->constisnull)
|
|
|
|
{
|
|
|
|
ReleaseVariableStats(vardata);
|
|
|
|
PG_RETURN_FLOAT8(0.0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The caller made sure the const is a query, so get it now */
|
|
|
|
query = DatumGetQueryTypeP(((Const *) other)->constvalue);
|
|
|
|
|
|
|
|
/* Empty query matches nothing */
|
|
|
|
if (query->size == 0)
|
|
|
|
{
|
|
|
|
ReleaseVariableStats(vardata);
|
|
|
|
return (Selectivity) 0.0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the statistics for the intarray column.
|
|
|
|
*
|
|
|
|
* We're interested in the Most-Common-Elements list, and the NULL
|
|
|
|
* fraction.
|
|
|
|
*/
|
|
|
|
if (HeapTupleIsValid(vardata.statsTuple))
|
|
|
|
{
|
Redesign get_attstatsslot()/free_attstatsslot() for more safety and speed.
The mess cleaned up in commit da0759600 is clear evidence that it's a
bug hazard to expect the caller of get_attstatsslot()/free_attstatsslot()
to provide the correct type OID for the array elements in the slot.
Moreover, we weren't even getting any performance benefit from that,
since get_attstatsslot() was extracting the real type OID from the array
anyway. So we ought to get rid of that requirement; indeed, it would
make more sense for get_attstatsslot() to pass back the type OID it found,
in case the caller isn't sure what to expect, which is likely in binary-
compatible-operator cases.
Another problem with the current implementation is that if the stats array
element type is pass-by-reference, we incur a palloc/memcpy/pfree cycle
for each element. That seemed acceptable when the code was written because
we were targeting O(10) array sizes --- but these days, stats arrays are
almost always bigger than that, sometimes much bigger. We can save a
significant number of cycles by doing one palloc/memcpy/pfree of the whole
array. Indeed, in the now-probably-common case where the array is toasted,
that happens anyway so this method is basically free. (Note: although the
catcache code will inline any out-of-line toasted values, it doesn't
decompress them. At the other end of the size range, it doesn't expand
short-header datums either. In either case, DatumGetArrayTypeP would have
to make a copy. We do end up using an extra array copy step if the element
type is pass-by-value and the array length is neither small enough for a
short header nor large enough to have suffered compression. But that
seems like a very acceptable price for winning in pass-by-ref cases.)
Hence, redesign to take these insights into account. While at it,
convert to an API in which we fill a struct rather than passing a bunch
of pointers to individual output arguments. That will make it less
painful if we ever want further expansion of what get_attstatsslot can
pass back.
It's certainly arguable that this is new development and not something to
push post-feature-freeze. However, I view it as primarily bug-proofing
and therefore something that's better to have sooner not later. Since
we aren't quite at beta phase yet, let's put it in.
Discussion: https://postgr.es/m/16364.1494520862@sss.pgh.pa.us
2017-05-14 03:14:39 +08:00
|
|
|
Form_pg_statistic stats;
|
|
|
|
|
2015-07-22 01:54:18 +08:00
|
|
|
stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
|
|
|
|
nullfrac = stats->stanullfrac;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For an int4 array, the default array type analyze function will
|
|
|
|
* collect a Most Common Elements list, which is an array of int4s.
|
|
|
|
*/
|
Redesign get_attstatsslot()/free_attstatsslot() for more safety and speed.
The mess cleaned up in commit da0759600 is clear evidence that it's a
bug hazard to expect the caller of get_attstatsslot()/free_attstatsslot()
to provide the correct type OID for the array elements in the slot.
Moreover, we weren't even getting any performance benefit from that,
since get_attstatsslot() was extracting the real type OID from the array
anyway. So we ought to get rid of that requirement; indeed, it would
make more sense for get_attstatsslot() to pass back the type OID it found,
in case the caller isn't sure what to expect, which is likely in binary-
compatible-operator cases.
Another problem with the current implementation is that if the stats array
element type is pass-by-reference, we incur a palloc/memcpy/pfree cycle
for each element. That seemed acceptable when the code was written because
we were targeting O(10) array sizes --- but these days, stats arrays are
almost always bigger than that, sometimes much bigger. We can save a
significant number of cycles by doing one palloc/memcpy/pfree of the whole
array. Indeed, in the now-probably-common case where the array is toasted,
that happens anyway so this method is basically free. (Note: although the
catcache code will inline any out-of-line toasted values, it doesn't
decompress them. At the other end of the size range, it doesn't expand
short-header datums either. In either case, DatumGetArrayTypeP would have
to make a copy. We do end up using an extra array copy step if the element
type is pass-by-value and the array length is neither small enough for a
short header nor large enough to have suffered compression. But that
seems like a very acceptable price for winning in pass-by-ref cases.)
Hence, redesign to take these insights into account. While at it,
convert to an API in which we fill a struct rather than passing a bunch
of pointers to individual output arguments. That will make it less
painful if we ever want further expansion of what get_attstatsslot can
pass back.
It's certainly arguable that this is new development and not something to
push post-feature-freeze. However, I view it as primarily bug-proofing
and therefore something that's better to have sooner not later. Since
we aren't quite at beta phase yet, let's put it in.
Discussion: https://postgr.es/m/16364.1494520862@sss.pgh.pa.us
2017-05-14 03:14:39 +08:00
|
|
|
if (get_attstatsslot(&sslot, vardata.statsTuple,
|
2015-07-22 01:54:18 +08:00
|
|
|
STATISTIC_KIND_MCELEM, InvalidOid,
|
Redesign get_attstatsslot()/free_attstatsslot() for more safety and speed.
The mess cleaned up in commit da0759600 is clear evidence that it's a
bug hazard to expect the caller of get_attstatsslot()/free_attstatsslot()
to provide the correct type OID for the array elements in the slot.
Moreover, we weren't even getting any performance benefit from that,
since get_attstatsslot() was extracting the real type OID from the array
anyway. So we ought to get rid of that requirement; indeed, it would
make more sense for get_attstatsslot() to pass back the type OID it found,
in case the caller isn't sure what to expect, which is likely in binary-
compatible-operator cases.
Another problem with the current implementation is that if the stats array
element type is pass-by-reference, we incur a palloc/memcpy/pfree cycle
for each element. That seemed acceptable when the code was written because
we were targeting O(10) array sizes --- but these days, stats arrays are
almost always bigger than that, sometimes much bigger. We can save a
significant number of cycles by doing one palloc/memcpy/pfree of the whole
array. Indeed, in the now-probably-common case where the array is toasted,
that happens anyway so this method is basically free. (Note: although the
catcache code will inline any out-of-line toasted values, it doesn't
decompress them. At the other end of the size range, it doesn't expand
short-header datums either. In either case, DatumGetArrayTypeP would have
to make a copy. We do end up using an extra array copy step if the element
type is pass-by-value and the array length is neither small enough for a
short header nor large enough to have suffered compression. But that
seems like a very acceptable price for winning in pass-by-ref cases.)
Hence, redesign to take these insights into account. While at it,
convert to an API in which we fill a struct rather than passing a bunch
of pointers to individual output arguments. That will make it less
painful if we ever want further expansion of what get_attstatsslot can
pass back.
It's certainly arguable that this is new development and not something to
push post-feature-freeze. However, I view it as primarily bug-proofing
and therefore something that's better to have sooner not later. Since
we aren't quite at beta phase yet, let's put it in.
Discussion: https://postgr.es/m/16364.1494520862@sss.pgh.pa.us
2017-05-14 03:14:39 +08:00
|
|
|
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
|
2015-07-22 01:54:18 +08:00
|
|
|
{
|
Redesign get_attstatsslot()/free_attstatsslot() for more safety and speed.
The mess cleaned up in commit da0759600 is clear evidence that it's a
bug hazard to expect the caller of get_attstatsslot()/free_attstatsslot()
to provide the correct type OID for the array elements in the slot.
Moreover, we weren't even getting any performance benefit from that,
since get_attstatsslot() was extracting the real type OID from the array
anyway. So we ought to get rid of that requirement; indeed, it would
make more sense for get_attstatsslot() to pass back the type OID it found,
in case the caller isn't sure what to expect, which is likely in binary-
compatible-operator cases.
Another problem with the current implementation is that if the stats array
element type is pass-by-reference, we incur a palloc/memcpy/pfree cycle
for each element. That seemed acceptable when the code was written because
we were targeting O(10) array sizes --- but these days, stats arrays are
almost always bigger than that, sometimes much bigger. We can save a
significant number of cycles by doing one palloc/memcpy/pfree of the whole
array. Indeed, in the now-probably-common case where the array is toasted,
that happens anyway so this method is basically free. (Note: although the
catcache code will inline any out-of-line toasted values, it doesn't
decompress them. At the other end of the size range, it doesn't expand
short-header datums either. In either case, DatumGetArrayTypeP would have
to make a copy. We do end up using an extra array copy step if the element
type is pass-by-value and the array length is neither small enough for a
short header nor large enough to have suffered compression. But that
seems like a very acceptable price for winning in pass-by-ref cases.)
Hence, redesign to take these insights into account. While at it,
convert to an API in which we fill a struct rather than passing a bunch
of pointers to individual output arguments. That will make it less
painful if we ever want further expansion of what get_attstatsslot can
pass back.
It's certainly arguable that this is new development and not something to
push post-feature-freeze. However, I view it as primarily bug-proofing
and therefore something that's better to have sooner not later. Since
we aren't quite at beta phase yet, let's put it in.
Discussion: https://postgr.es/m/16364.1494520862@sss.pgh.pa.us
2017-05-14 03:14:39 +08:00
|
|
|
Assert(sslot.valuetype == INT4OID);
|
|
|
|
|
2015-07-22 01:54:18 +08:00
|
|
|
/*
|
|
|
|
* There should be three more Numbers than Values, because the
|
|
|
|
* last three (for intarray) cells are taken for minimal, maximal
|
|
|
|
* and nulls frequency. Punt if not.
|
|
|
|
*/
|
Redesign get_attstatsslot()/free_attstatsslot() for more safety and speed.
The mess cleaned up in commit da0759600 is clear evidence that it's a
bug hazard to expect the caller of get_attstatsslot()/free_attstatsslot()
to provide the correct type OID for the array elements in the slot.
Moreover, we weren't even getting any performance benefit from that,
since get_attstatsslot() was extracting the real type OID from the array
anyway. So we ought to get rid of that requirement; indeed, it would
make more sense for get_attstatsslot() to pass back the type OID it found,
in case the caller isn't sure what to expect, which is likely in binary-
compatible-operator cases.
Another problem with the current implementation is that if the stats array
element type is pass-by-reference, we incur a palloc/memcpy/pfree cycle
for each element. That seemed acceptable when the code was written because
we were targeting O(10) array sizes --- but these days, stats arrays are
almost always bigger than that, sometimes much bigger. We can save a
significant number of cycles by doing one palloc/memcpy/pfree of the whole
array. Indeed, in the now-probably-common case where the array is toasted,
that happens anyway so this method is basically free. (Note: although the
catcache code will inline any out-of-line toasted values, it doesn't
decompress them. At the other end of the size range, it doesn't expand
short-header datums either. In either case, DatumGetArrayTypeP would have
to make a copy. We do end up using an extra array copy step if the element
type is pass-by-value and the array length is neither small enough for a
short header nor large enough to have suffered compression. But that
seems like a very acceptable price for winning in pass-by-ref cases.)
Hence, redesign to take these insights into account. While at it,
convert to an API in which we fill a struct rather than passing a bunch
of pointers to individual output arguments. That will make it less
painful if we ever want further expansion of what get_attstatsslot can
pass back.
It's certainly arguable that this is new development and not something to
push post-feature-freeze. However, I view it as primarily bug-proofing
and therefore something that's better to have sooner not later. Since
we aren't quite at beta phase yet, let's put it in.
Discussion: https://postgr.es/m/16364.1494520862@sss.pgh.pa.us
2017-05-14 03:14:39 +08:00
|
|
|
if (sslot.nnumbers == sslot.nvalues + 3)
|
2015-07-22 01:54:18 +08:00
|
|
|
{
|
|
|
|
/* Grab the lowest frequency. */
|
Redesign get_attstatsslot()/free_attstatsslot() for more safety and speed.
The mess cleaned up in commit da0759600 is clear evidence that it's a
bug hazard to expect the caller of get_attstatsslot()/free_attstatsslot()
to provide the correct type OID for the array elements in the slot.
Moreover, we weren't even getting any performance benefit from that,
since get_attstatsslot() was extracting the real type OID from the array
anyway. So we ought to get rid of that requirement; indeed, it would
make more sense for get_attstatsslot() to pass back the type OID it found,
in case the caller isn't sure what to expect, which is likely in binary-
compatible-operator cases.
Another problem with the current implementation is that if the stats array
element type is pass-by-reference, we incur a palloc/memcpy/pfree cycle
for each element. That seemed acceptable when the code was written because
we were targeting O(10) array sizes --- but these days, stats arrays are
almost always bigger than that, sometimes much bigger. We can save a
significant number of cycles by doing one palloc/memcpy/pfree of the whole
array. Indeed, in the now-probably-common case where the array is toasted,
that happens anyway so this method is basically free. (Note: although the
catcache code will inline any out-of-line toasted values, it doesn't
decompress them. At the other end of the size range, it doesn't expand
short-header datums either. In either case, DatumGetArrayTypeP would have
to make a copy. We do end up using an extra array copy step if the element
type is pass-by-value and the array length is neither small enough for a
short header nor large enough to have suffered compression. But that
seems like a very acceptable price for winning in pass-by-ref cases.)
Hence, redesign to take these insights into account. While at it,
convert to an API in which we fill a struct rather than passing a bunch
of pointers to individual output arguments. That will make it less
painful if we ever want further expansion of what get_attstatsslot can
pass back.
It's certainly arguable that this is new development and not something to
push post-feature-freeze. However, I view it as primarily bug-proofing
and therefore something that's better to have sooner not later. Since
we aren't quite at beta phase yet, let's put it in.
Discussion: https://postgr.es/m/16364.1494520862@sss.pgh.pa.us
2017-05-14 03:14:39 +08:00
|
|
|
minfreq = sslot.numbers[sslot.nnumbers - (sslot.nnumbers - sslot.nvalues)];
|
2015-07-22 01:54:18 +08:00
|
|
|
|
Redesign get_attstatsslot()/free_attstatsslot() for more safety and speed.
The mess cleaned up in commit da0759600 is clear evidence that it's a
bug hazard to expect the caller of get_attstatsslot()/free_attstatsslot()
to provide the correct type OID for the array elements in the slot.
Moreover, we weren't even getting any performance benefit from that,
since get_attstatsslot() was extracting the real type OID from the array
anyway. So we ought to get rid of that requirement; indeed, it would
make more sense for get_attstatsslot() to pass back the type OID it found,
in case the caller isn't sure what to expect, which is likely in binary-
compatible-operator cases.
Another problem with the current implementation is that if the stats array
element type is pass-by-reference, we incur a palloc/memcpy/pfree cycle
for each element. That seemed acceptable when the code was written because
we were targeting O(10) array sizes --- but these days, stats arrays are
almost always bigger than that, sometimes much bigger. We can save a
significant number of cycles by doing one palloc/memcpy/pfree of the whole
array. Indeed, in the now-probably-common case where the array is toasted,
that happens anyway so this method is basically free. (Note: although the
catcache code will inline any out-of-line toasted values, it doesn't
decompress them. At the other end of the size range, it doesn't expand
short-header datums either. In either case, DatumGetArrayTypeP would have
to make a copy. We do end up using an extra array copy step if the element
type is pass-by-value and the array length is neither small enough for a
short header nor large enough to have suffered compression. But that
seems like a very acceptable price for winning in pass-by-ref cases.)
Hence, redesign to take these insights into account. While at it,
convert to an API in which we fill a struct rather than passing a bunch
of pointers to individual output arguments. That will make it less
painful if we ever want further expansion of what get_attstatsslot can
pass back.
It's certainly arguable that this is new development and not something to
push post-feature-freeze. However, I view it as primarily bug-proofing
and therefore something that's better to have sooner not later. Since
we aren't quite at beta phase yet, let's put it in.
Discussion: https://postgr.es/m/16364.1494520862@sss.pgh.pa.us
2017-05-14 03:14:39 +08:00
|
|
|
mcelems = sslot.values;
|
|
|
|
mcefreqs = sslot.numbers;
|
|
|
|
nmcelems = sslot.nvalues;
|
2015-07-22 01:54:18 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
Redesign get_attstatsslot()/free_attstatsslot() for more safety and speed.
The mess cleaned up in commit da0759600 is clear evidence that it's a
bug hazard to expect the caller of get_attstatsslot()/free_attstatsslot()
to provide the correct type OID for the array elements in the slot.
Moreover, we weren't even getting any performance benefit from that,
since get_attstatsslot() was extracting the real type OID from the array
anyway. So we ought to get rid of that requirement; indeed, it would
make more sense for get_attstatsslot() to pass back the type OID it found,
in case the caller isn't sure what to expect, which is likely in binary-
compatible-operator cases.
Another problem with the current implementation is that if the stats array
element type is pass-by-reference, we incur a palloc/memcpy/pfree cycle
for each element. That seemed acceptable when the code was written because
we were targeting O(10) array sizes --- but these days, stats arrays are
almost always bigger than that, sometimes much bigger. We can save a
significant number of cycles by doing one palloc/memcpy/pfree of the whole
array. Indeed, in the now-probably-common case where the array is toasted,
that happens anyway so this method is basically free. (Note: although the
catcache code will inline any out-of-line toasted values, it doesn't
decompress them. At the other end of the size range, it doesn't expand
short-header datums either. In either case, DatumGetArrayTypeP would have
to make a copy. We do end up using an extra array copy step if the element
type is pass-by-value and the array length is neither small enough for a
short header nor large enough to have suffered compression. But that
seems like a very acceptable price for winning in pass-by-ref cases.)
Hence, redesign to take these insights into account. While at it,
convert to an API in which we fill a struct rather than passing a bunch
of pointers to individual output arguments. That will make it less
painful if we ever want further expansion of what get_attstatsslot can
pass back.
It's certainly arguable that this is new development and not something to
push post-feature-freeze. However, I view it as primarily bug-proofing
and therefore something that's better to have sooner not later. Since
we aren't quite at beta phase yet, let's put it in.
Discussion: https://postgr.es/m/16364.1494520862@sss.pgh.pa.us
2017-05-14 03:14:39 +08:00
|
|
|
else
|
|
|
|
memset(&sslot, 0, sizeof(sslot));
|
2015-07-22 01:54:18 +08:00
|
|
|
|
|
|
|
/* Process the logical expression in the query, using the stats */
|
|
|
|
selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
|
|
|
|
mcelems, mcefreqs, nmcelems, minfreq);
|
|
|
|
|
|
|
|
/* MCE stats count only non-null rows, so adjust for null rows. */
|
|
|
|
selec *= (1.0 - nullfrac);
|
|
|
|
|
Redesign get_attstatsslot()/free_attstatsslot() for more safety and speed.
The mess cleaned up in commit da0759600 is clear evidence that it's a
bug hazard to expect the caller of get_attstatsslot()/free_attstatsslot()
to provide the correct type OID for the array elements in the slot.
Moreover, we weren't even getting any performance benefit from that,
since get_attstatsslot() was extracting the real type OID from the array
anyway. So we ought to get rid of that requirement; indeed, it would
make more sense for get_attstatsslot() to pass back the type OID it found,
in case the caller isn't sure what to expect, which is likely in binary-
compatible-operator cases.
Another problem with the current implementation is that if the stats array
element type is pass-by-reference, we incur a palloc/memcpy/pfree cycle
for each element. That seemed acceptable when the code was written because
we were targeting O(10) array sizes --- but these days, stats arrays are
almost always bigger than that, sometimes much bigger. We can save a
significant number of cycles by doing one palloc/memcpy/pfree of the whole
array. Indeed, in the now-probably-common case where the array is toasted,
that happens anyway so this method is basically free. (Note: although the
catcache code will inline any out-of-line toasted values, it doesn't
decompress them. At the other end of the size range, it doesn't expand
short-header datums either. In either case, DatumGetArrayTypeP would have
to make a copy. We do end up using an extra array copy step if the element
type is pass-by-value and the array length is neither small enough for a
short header nor large enough to have suffered compression. But that
seems like a very acceptable price for winning in pass-by-ref cases.)
Hence, redesign to take these insights into account. While at it,
convert to an API in which we fill a struct rather than passing a bunch
of pointers to individual output arguments. That will make it less
painful if we ever want further expansion of what get_attstatsslot can
pass back.
It's certainly arguable that this is new development and not something to
push post-feature-freeze. However, I view it as primarily bug-proofing
and therefore something that's better to have sooner not later. Since
we aren't quite at beta phase yet, let's put it in.
Discussion: https://postgr.es/m/16364.1494520862@sss.pgh.pa.us
2017-05-14 03:14:39 +08:00
|
|
|
free_attstatsslot(&sslot);
|
2015-07-22 01:54:18 +08:00
|
|
|
ReleaseVariableStats(vardata);
|
|
|
|
|
|
|
|
CLAMP_PROBABILITY(selec);
|
|
|
|
|
|
|
|
PG_RETURN_FLOAT8((float8) selec);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Estimate selectivity of single intquery operator
|
|
|
|
*/
|
|
|
|
static Selectivity
|
|
|
|
int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
|
|
|
|
int nmcelems, float4 minfreq)
|
|
|
|
{
|
|
|
|
Selectivity selec;
|
|
|
|
|
|
|
|
/* since this function recurses, it could be driven to stack overflow */
|
|
|
|
check_stack_depth();
|
|
|
|
|
|
|
|
if (item->type == VAL)
|
|
|
|
{
|
|
|
|
Datum *searchres;
|
|
|
|
|
|
|
|
if (mcelems == NULL)
|
|
|
|
return (Selectivity) DEFAULT_EQ_SEL;
|
|
|
|
|
|
|
|
searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
|
|
|
|
sizeof(Datum), compare_val_int4);
|
|
|
|
if (searchres)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The element is in MCELEM. Return precise selectivity (or at
|
|
|
|
* least as precise as ANALYZE could find out).
|
|
|
|
*/
|
|
|
|
selec = mcefreqs[searchres - mcelems];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The element is not in MCELEM. Punt, but assume that the
|
|
|
|
* selectivity cannot be more than minfreq / 2.
|
|
|
|
*/
|
|
|
|
selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (item->type == OPR)
|
|
|
|
{
|
|
|
|
/* Current query node is an operator */
|
|
|
|
Selectivity s1,
|
|
|
|
s2;
|
|
|
|
|
|
|
|
s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
|
|
|
|
minfreq);
|
|
|
|
switch (item->val)
|
|
|
|
{
|
|
|
|
case (int32) '!':
|
|
|
|
selec = 1.0 - s1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case (int32) '&':
|
|
|
|
s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
|
|
|
|
nmcelems, minfreq);
|
|
|
|
selec = s1 * s2;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case (int32) '|':
|
|
|
|
s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
|
|
|
|
nmcelems, minfreq);
|
|
|
|
selec = s1 + s2 - s1 * s2;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
elog(ERROR, "unrecognized operator: %d", item->val);
|
|
|
|
selec = 0; /* keep compiler quiet */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
elog(ERROR, "unrecognized int query item type: %u", item->type);
|
|
|
|
selec = 0; /* keep compiler quiet */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Clamp intermediate results to stay sane despite roundoff error */
|
|
|
|
CLAMP_PROBABILITY(selec);
|
|
|
|
|
|
|
|
return selec;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Comparison function for binary search in mcelem array.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
compare_val_int4(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
int32 key = *(int32 *) a;
|
|
|
|
const Datum *t = (const Datum *) b;
|
|
|
|
|
|
|
|
return key - DatumGetInt32(*t);
|
|
|
|
}
|