mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-24 18:55:04 +08:00
0faf7d933f
Make sure that function declarations use names that exactly match the corresponding names from function definitions in contrib code. Like other recent commits that cleaned up function parameter names, this commit was written with help from clang-tidy. Author: Peter Geoghegan <pg@bowt.ie> Discussion: https://postgr.es/m/CAH2-WznJt9CMM9KJTMjJh_zbL5hD9oX44qdJ4aqZtjFi-zA3Tg@mail.gmail.com
334 lines
8.5 KiB
C
334 lines
8.5 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* _int_selfuncs.c
|
|
* Functions for selectivity estimation of intarray operators
|
|
*
|
|
* Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* contrib/intarray/_int_selfuncs.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "_int.h"
|
|
#include "access/htup_details.h"
|
|
#include "catalog/pg_operator.h"
|
|
#include "catalog/pg_statistic.h"
|
|
#include "catalog/pg_type.h"
|
|
#include "miscadmin.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/lsyscache.h"
|
|
#include "utils/selfuncs.h"
|
|
#include "utils/syscache.h"
|
|
|
|
PG_FUNCTION_INFO_V1(_int_overlap_sel);
|
|
PG_FUNCTION_INFO_V1(_int_contains_sel);
|
|
PG_FUNCTION_INFO_V1(_int_contained_sel);
|
|
PG_FUNCTION_INFO_V1(_int_overlap_joinsel);
|
|
PG_FUNCTION_INFO_V1(_int_contains_joinsel);
|
|
PG_FUNCTION_INFO_V1(_int_contained_joinsel);
|
|
PG_FUNCTION_INFO_V1(_int_matchsel);
|
|
|
|
|
|
static Selectivity int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
|
|
int nmcelems, float4 minfreq);
|
|
static int compare_val_int4(const void *a, const void *b);
|
|
|
|
/*
|
|
* Wrappers around the default array selectivity estimation functions.
|
|
*
|
|
* The default array selectivity operators for the @>, && and @< operators
|
|
* work fine for integer arrays. However, if we tried to just use arraycontsel
|
|
* and arraycontjoinsel directly as the cost estimator functions for our
|
|
* operators, they would not work as intended, because they look at the
|
|
* operator's OID. Our operators behave exactly like the built-in anyarray
|
|
* versions, but we must tell the cost estimator functions which built-in
|
|
* operators they correspond to. These wrappers just replace the operator
|
|
* OID with the corresponding built-in operator's OID, and call the built-in
|
|
* function.
|
|
*/
|
|
|
|
Datum
|
|
_int_overlap_sel(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
|
|
PG_GETARG_DATUM(0),
|
|
ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
|
|
PG_GETARG_DATUM(2),
|
|
PG_GETARG_DATUM(3)));
|
|
}
|
|
|
|
Datum
|
|
_int_contains_sel(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
|
|
PG_GETARG_DATUM(0),
|
|
ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
|
|
PG_GETARG_DATUM(2),
|
|
PG_GETARG_DATUM(3)));
|
|
}
|
|
|
|
Datum
|
|
_int_contained_sel(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
|
|
PG_GETARG_DATUM(0),
|
|
ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
|
|
PG_GETARG_DATUM(2),
|
|
PG_GETARG_DATUM(3)));
|
|
}
|
|
|
|
Datum
|
|
_int_overlap_joinsel(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
|
|
PG_GETARG_DATUM(0),
|
|
ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
|
|
PG_GETARG_DATUM(2),
|
|
PG_GETARG_DATUM(3),
|
|
PG_GETARG_DATUM(4)));
|
|
}
|
|
|
|
Datum
|
|
_int_contains_joinsel(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
|
|
PG_GETARG_DATUM(0),
|
|
ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
|
|
PG_GETARG_DATUM(2),
|
|
PG_GETARG_DATUM(3),
|
|
PG_GETARG_DATUM(4)));
|
|
}
|
|
|
|
Datum
|
|
_int_contained_joinsel(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
|
|
PG_GETARG_DATUM(0),
|
|
ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
|
|
PG_GETARG_DATUM(2),
|
|
PG_GETARG_DATUM(3),
|
|
PG_GETARG_DATUM(4)));
|
|
}
|
|
|
|
|
|
/*
|
|
* _int_matchsel -- restriction selectivity function for intarray @@ query_int
|
|
*/
|
|
Datum
|
|
_int_matchsel(PG_FUNCTION_ARGS)
|
|
{
|
|
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
|
|
|
|
List *args = (List *) PG_GETARG_POINTER(2);
|
|
int varRelid = PG_GETARG_INT32(3);
|
|
VariableStatData vardata;
|
|
Node *other;
|
|
bool varonleft;
|
|
Selectivity selec;
|
|
QUERYTYPE *query;
|
|
Datum *mcelems = NULL;
|
|
float4 *mcefreqs = NULL;
|
|
int nmcelems = 0;
|
|
float4 minfreq = 0.0;
|
|
float4 nullfrac = 0.0;
|
|
AttStatsSlot sslot;
|
|
|
|
/*
|
|
* If expression is not "variable @@ something" or "something @@ variable"
|
|
* then punt and return a default estimate.
|
|
*/
|
|
if (!get_restriction_variable(root, args, varRelid,
|
|
&vardata, &other, &varonleft))
|
|
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
|
|
|
|
/*
|
|
* Variable should be int[]. We don't support cases where variable is
|
|
* query_int.
|
|
*/
|
|
if (vardata.vartype != INT4ARRAYOID)
|
|
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
|
|
|
|
/*
|
|
* Can't do anything useful if the something is not a constant, either.
|
|
*/
|
|
if (!IsA(other, Const))
|
|
{
|
|
ReleaseVariableStats(vardata);
|
|
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
|
|
}
|
|
|
|
/*
|
|
* The "@@" operator is strict, so we can cope with NULL right away.
|
|
*/
|
|
if (((Const *) other)->constisnull)
|
|
{
|
|
ReleaseVariableStats(vardata);
|
|
PG_RETURN_FLOAT8(0.0);
|
|
}
|
|
|
|
/* The caller made sure the const is a query, so get it now */
|
|
query = DatumGetQueryTypeP(((Const *) other)->constvalue);
|
|
|
|
/* Empty query matches nothing */
|
|
if (query->size == 0)
|
|
{
|
|
ReleaseVariableStats(vardata);
|
|
return (Selectivity) 0.0;
|
|
}
|
|
|
|
/*
|
|
* Get the statistics for the intarray column.
|
|
*
|
|
* We're interested in the Most-Common-Elements list, and the NULL
|
|
* fraction.
|
|
*/
|
|
if (HeapTupleIsValid(vardata.statsTuple))
|
|
{
|
|
Form_pg_statistic stats;
|
|
|
|
stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
|
|
nullfrac = stats->stanullfrac;
|
|
|
|
/*
|
|
* For an int4 array, the default array type analyze function will
|
|
* collect a Most Common Elements list, which is an array of int4s.
|
|
*/
|
|
if (get_attstatsslot(&sslot, vardata.statsTuple,
|
|
STATISTIC_KIND_MCELEM, InvalidOid,
|
|
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
|
|
{
|
|
Assert(sslot.valuetype == INT4OID);
|
|
|
|
/*
|
|
* There should be three more Numbers than Values, because the
|
|
* last three (for intarray) cells are taken for minimal, maximal
|
|
* and nulls frequency. Punt if not.
|
|
*/
|
|
if (sslot.nnumbers == sslot.nvalues + 3)
|
|
{
|
|
/* Grab the lowest frequency. */
|
|
minfreq = sslot.numbers[sslot.nnumbers - (sslot.nnumbers - sslot.nvalues)];
|
|
|
|
mcelems = sslot.values;
|
|
mcefreqs = sslot.numbers;
|
|
nmcelems = sslot.nvalues;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
memset(&sslot, 0, sizeof(sslot));
|
|
|
|
/* Process the logical expression in the query, using the stats */
|
|
selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
|
|
mcelems, mcefreqs, nmcelems, minfreq);
|
|
|
|
/* MCE stats count only non-null rows, so adjust for null rows. */
|
|
selec *= (1.0 - nullfrac);
|
|
|
|
free_attstatsslot(&sslot);
|
|
ReleaseVariableStats(vardata);
|
|
|
|
CLAMP_PROBABILITY(selec);
|
|
|
|
PG_RETURN_FLOAT8((float8) selec);
|
|
}
|
|
|
|
/*
|
|
* Estimate selectivity of single intquery operator
|
|
*/
|
|
static Selectivity
|
|
int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
|
|
int nmcelems, float4 minfreq)
|
|
{
|
|
Selectivity selec;
|
|
|
|
/* since this function recurses, it could be driven to stack overflow */
|
|
check_stack_depth();
|
|
|
|
if (item->type == VAL)
|
|
{
|
|
Datum *searchres;
|
|
|
|
if (mcelems == NULL)
|
|
return (Selectivity) DEFAULT_EQ_SEL;
|
|
|
|
searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
|
|
sizeof(Datum), compare_val_int4);
|
|
if (searchres)
|
|
{
|
|
/*
|
|
* The element is in MCELEM. Return precise selectivity (or at
|
|
* least as precise as ANALYZE could find out).
|
|
*/
|
|
selec = mcefreqs[searchres - mcelems];
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* The element is not in MCELEM. Punt, but assume that the
|
|
* selectivity cannot be more than minfreq / 2.
|
|
*/
|
|
selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
|
|
}
|
|
}
|
|
else if (item->type == OPR)
|
|
{
|
|
/* Current query node is an operator */
|
|
Selectivity s1,
|
|
s2;
|
|
|
|
s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
|
|
minfreq);
|
|
switch (item->val)
|
|
{
|
|
case (int32) '!':
|
|
selec = 1.0 - s1;
|
|
break;
|
|
|
|
case (int32) '&':
|
|
s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
|
|
nmcelems, minfreq);
|
|
selec = s1 * s2;
|
|
break;
|
|
|
|
case (int32) '|':
|
|
s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
|
|
nmcelems, minfreq);
|
|
selec = s1 + s2 - s1 * s2;
|
|
break;
|
|
|
|
default:
|
|
elog(ERROR, "unrecognized operator: %d", item->val);
|
|
selec = 0; /* keep compiler quiet */
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
elog(ERROR, "unrecognized int query item type: %u", item->type);
|
|
selec = 0; /* keep compiler quiet */
|
|
}
|
|
|
|
/* Clamp intermediate results to stay sane despite roundoff error */
|
|
CLAMP_PROBABILITY(selec);
|
|
|
|
return selec;
|
|
}
|
|
|
|
/*
|
|
* Comparison function for binary search in mcelem array.
|
|
*/
|
|
static int
|
|
compare_val_int4(const void *a, const void *b)
|
|
{
|
|
int32 key = *(int32 *) a;
|
|
const Datum *t = (const Datum *) b;
|
|
|
|
return key - DatumGetInt32(*t);
|
|
}
|