mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-02-23 19:39:53 +08:00
Use a hopefully-more-reliable method of detecting default selectivity
estimates when combining the estimates for a range query. As pointed out by Miquel van Smoorenburg, the existing check for an impossible combined result would quite possibly fail to detect one default and one non-default input. It seems better to use the default range query estimate in such cases. To do so, add a check for an estimate of exactly DEFAULT_INEQ_SEL. This is a bit ugly because it introduces additional coupling between clauselist_selectivity and scalarltsel/scalargtsel, but it's not like there wasn't plenty already...
This commit is contained in:
parent
e4387116da
commit
547bb4a7f2
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.70 2004/08/29 05:06:43 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.71 2004/11/09 00:34:38 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -80,9 +80,10 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
|
||||
* interpreting it as a value. Then the available range is 1-losel to hisel.
|
||||
* However, this calculation double-excludes nulls, so really we need
|
||||
* hisel + losel + null_frac - 1.)
|
||||
* If the calculation yields zero or negative, however, we chicken out and
|
||||
* use a default estimate; that probably means that one or both
|
||||
* selectivities is a default estimate rather than an actual range value.
|
||||
*
|
||||
* If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
|
||||
* and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
|
||||
* yields an impossible (negative) result.
|
||||
*
|
||||
* A free side-effect is that we can recognize redundant inequalities such
|
||||
* as "x < 4 AND x < 5"; only the tighter constraint will be counted.
|
||||
@ -194,37 +195,51 @@ clauselist_selectivity(Query *root,
|
||||
if (rqlist->have_lobound && rqlist->have_hibound)
|
||||
{
|
||||
/* Successfully matched a pair of range clauses */
|
||||
Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0;
|
||||
|
||||
/* Adjust for double-exclusion of NULLs */
|
||||
s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid);
|
||||
Selectivity s2;
|
||||
|
||||
/*
|
||||
* A zero or slightly negative s2 should be converted into a
|
||||
* small positive value; we probably are dealing with a very
|
||||
* tight range and got a bogus result due to roundoff errors.
|
||||
* However, if s2 is very negative, then we probably have
|
||||
* default selectivity estimates on one or both sides of the
|
||||
* range. In that case, insert a not-so-wildly-optimistic
|
||||
* default estimate.
|
||||
* Exact equality to the default value probably means the
|
||||
* selectivity function punted. This is not airtight but
|
||||
* should be good enough.
|
||||
*/
|
||||
if (s2 <= 0.0)
|
||||
if (rqlist->hibound == DEFAULT_INEQ_SEL ||
|
||||
rqlist->lobound == DEFAULT_INEQ_SEL)
|
||||
{
|
||||
if (s2 < -0.01)
|
||||
s2 = DEFAULT_RANGE_INEQ_SEL;
|
||||
}
|
||||
else
|
||||
{
|
||||
s2 = rqlist->hibound + rqlist->lobound - 1.0;
|
||||
|
||||
/* Adjust for double-exclusion of NULLs */
|
||||
s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid);
|
||||
|
||||
/*
|
||||
* A zero or slightly negative s2 should be converted into a
|
||||
* small positive value; we probably are dealing with a very
|
||||
* tight range and got a bogus result due to roundoff errors.
|
||||
* However, if s2 is very negative, then we probably have
|
||||
* default selectivity estimates on one or both sides of the
|
||||
* range that we failed to recognize above for some reason.
|
||||
*/
|
||||
if (s2 <= 0.0)
|
||||
{
|
||||
/*
|
||||
* No data available --- use a default estimate that
|
||||
* is small, but not real small.
|
||||
*/
|
||||
s2 = 0.005;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* It's just roundoff error; use a small positive
|
||||
* value
|
||||
*/
|
||||
s2 = 1.0e-10;
|
||||
if (s2 < -0.01)
|
||||
{
|
||||
/*
|
||||
* No data available --- use a default estimate that
|
||||
* is small, but not real small.
|
||||
*/
|
||||
s2 = DEFAULT_RANGE_INEQ_SEL;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* It's just roundoff error; use a small positive
|
||||
* value
|
||||
*/
|
||||
s2 = 1.0e-10;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Merge in the selectivity of the pair of clauses */
|
||||
|
@ -15,7 +15,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.166 2004/09/18 19:39:50 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.167 2004/11/09 00:34:42 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -111,45 +111,6 @@
|
||||
#include "utils/syscache.h"
|
||||
|
||||
|
||||
/*
|
||||
* Note: the default selectivity estimates are not chosen entirely at random.
|
||||
* We want them to be small enough to ensure that indexscans will be used if
|
||||
* available, for typical table densities of ~100 tuples/page. Thus, for
|
||||
* example, 0.01 is not quite small enough, since that makes it appear that
|
||||
* nearly all pages will be hit anyway. Also, since we sometimes estimate
|
||||
* eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
|
||||
* 1/DEFAULT_EQ_SEL.
|
||||
*/
|
||||
|
||||
/* default selectivity estimate for equalities such as "A = b" */
|
||||
#define DEFAULT_EQ_SEL 0.005
|
||||
|
||||
/* default selectivity estimate for inequalities such as "A < b" */
|
||||
#define DEFAULT_INEQ_SEL (1.0 / 3.0)
|
||||
|
||||
/* default selectivity estimate for pattern-match operators such as LIKE */
|
||||
#define DEFAULT_MATCH_SEL 0.005
|
||||
|
||||
/* default number of distinct values in a table */
|
||||
#define DEFAULT_NUM_DISTINCT 200
|
||||
|
||||
/* default selectivity estimate for boolean and null test nodes */
|
||||
#define DEFAULT_UNK_SEL 0.005
|
||||
#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)
|
||||
|
||||
/*
|
||||
* Clamp a computed probability estimate (which may suffer from roundoff or
|
||||
* estimation errors) to valid range. Argument must be a float variable.
|
||||
*/
|
||||
#define CLAMP_PROBABILITY(p) \
|
||||
do { \
|
||||
if (p < 0.0) \
|
||||
p = 0.0; \
|
||||
else if (p > 1.0) \
|
||||
p = 1.0; \
|
||||
} while (0)
|
||||
|
||||
|
||||
/* Return data from examine_variable and friends */
|
||||
typedef struct
|
||||
{
|
||||
|
@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.19 2004/08/29 05:06:59 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.20 2004/11/09 00:34:46 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -19,6 +19,49 @@
|
||||
#include "nodes/parsenodes.h"
|
||||
|
||||
|
||||
/*
|
||||
* Note: the default selectivity estimates are not chosen entirely at random.
|
||||
* We want them to be small enough to ensure that indexscans will be used if
|
||||
* available, for typical table densities of ~100 tuples/page. Thus, for
|
||||
* example, 0.01 is not quite small enough, since that makes it appear that
|
||||
* nearly all pages will be hit anyway. Also, since we sometimes estimate
|
||||
* eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
|
||||
* 1/DEFAULT_EQ_SEL.
|
||||
*/
|
||||
|
||||
/* default selectivity estimate for equalities such as "A = b" */
|
||||
#define DEFAULT_EQ_SEL 0.005
|
||||
|
||||
/* default selectivity estimate for inequalities such as "A < b" */
|
||||
#define DEFAULT_INEQ_SEL 0.3333333333333333
|
||||
|
||||
/* default selectivity estimate for range inequalities "A > b AND A < c" */
|
||||
#define DEFAULT_RANGE_INEQ_SEL 0.005
|
||||
|
||||
/* default selectivity estimate for pattern-match operators such as LIKE */
|
||||
#define DEFAULT_MATCH_SEL 0.005
|
||||
|
||||
/* default number of distinct values in a table */
|
||||
#define DEFAULT_NUM_DISTINCT 200
|
||||
|
||||
/* default selectivity estimate for boolean and null test nodes */
|
||||
#define DEFAULT_UNK_SEL 0.005
|
||||
#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)
|
||||
|
||||
|
||||
/*
|
||||
* Clamp a computed probability estimate (which may suffer from roundoff or
|
||||
* estimation errors) to valid range. Argument must be a float variable.
|
||||
*/
|
||||
#define CLAMP_PROBABILITY(p) \
|
||||
do { \
|
||||
if (p < 0.0) \
|
||||
p = 0.0; \
|
||||
else if (p > 1.0) \
|
||||
p = 1.0; \
|
||||
} while (0)
|
||||
|
||||
|
||||
typedef enum
|
||||
{
|
||||
Pattern_Type_Like, Pattern_Type_Like_IC,
|
||||
|
Loading…
Reference in New Issue
Block a user