Use a hopefully-more-reliable method of detecting default selectivity

estimates when combining the estimates for a range query. As pointed out by Miquel van Smoorenburg, the existing check for an impossible combined result would quite possibly fail to detect one default and one non-default input. It seems better to use the default range query estimate in such cases. To do so, add a check for an estimate of exactly DEFAULT_INEQ_SEL. This is a bit ugly because it introduces additional coupling between clauselist_selectivity and scalarltsel/scalargtsel, but it's not like there wasn't plenty already...
2025-02-23 19:39:53 +08:00 · 2004-11-09 00:34:46 +00:00 · 2004-11-09 00:34:46 +00:00 · 547bb4a7f2
commit 547bb4a7f2
parent e4387116da
3 changed files with 90 additions and 71 deletions
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.70 2004/08/29 05:06:43 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.71 2004/11/09 00:34:38 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -80,9 +80,10 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
 * interpreting it as a value.	Then the available range is 1-losel to hisel.
 * However, this calculation double-excludes nulls, so really we need
 * hisel + losel + null_frac - 1.)
- * If the calculation yields zero or negative, however, we chicken out and
- * use a default estimate; that probably means that one or both
- * selectivities is a default estimate rather than an actual range value.
+ *
+ * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
+ * and instead use DEFAULT_RANGE_INEQ_SEL.  The same applies if the equation
+ * yields an impossible (negative) result.
 *
 * A free side-effect is that we can recognize redundant inequalities such
 * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
@ -194,37 +195,51 @@ clauselist_selectivity(Query *root,
 		if (rqlist->have_lobound && rqlist->have_hibound)
 		{
 			/* Successfully matched a pair of range clauses */
-			Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0;
-
-			/* Adjust for double-exclusion of NULLs */
-			s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid);
+			Selectivity s2;

 			/*
-			 * A zero or slightly negative s2 should be converted into a
-			 * small positive value; we probably are dealing with a very
-			 * tight range and got a bogus result due to roundoff errors.
-			 * However, if s2 is very negative, then we probably have
-			 * default selectivity estimates on one or both sides of the
-			 * range.  In that case, insert a not-so-wildly-optimistic
-			 * default estimate.
+			 * Exact equality to the default value probably means the
+			 * selectivity function punted.  This is not airtight but
+			 * should be good enough.
 			 */
-			if (s2 <= 0.0)
+			if (rqlist->hibound == DEFAULT_INEQ_SEL ||
+				rqlist->lobound == DEFAULT_INEQ_SEL)
 			{
-				if (s2 < -0.01)
+				s2 = DEFAULT_RANGE_INEQ_SEL;
+			}
+			else
+			{
+				s2 = rqlist->hibound + rqlist->lobound - 1.0;
+
+				/* Adjust for double-exclusion of NULLs */
+				s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid);
+
+				/*
+				 * A zero or slightly negative s2 should be converted into a
+				 * small positive value; we probably are dealing with a very
+				 * tight range and got a bogus result due to roundoff errors.
+				 * However, if s2 is very negative, then we probably have
+				 * default selectivity estimates on one or both sides of the
+				 * range that we failed to recognize above for some reason.
+				 */
+				if (s2 <= 0.0)
 				{
-					/*
-					 * No data available --- use a default estimate that
-					 * is small, but not real small.
-					 */
-					s2 = 0.005;
-				}
-				else
-				{
-					/*
-					 * It's just roundoff error; use a small positive
-					 * value
-					 */
-					s2 = 1.0e-10;
+					if (s2 < -0.01)
+					{
+						/*
+						 * No data available --- use a default estimate that
+						 * is small, but not real small.
+						 */
+						s2 = DEFAULT_RANGE_INEQ_SEL;
+					}
+					else
+					{
+						/*
+						 * It's just roundoff error; use a small positive
+						 * value
+						 */
+						s2 = 1.0e-10;
+					}
 				}
 			}
 			/* Merge in the selectivity of the pair of clauses */
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@ -15,7 +15,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.166 2004/09/18 19:39:50 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.167 2004/11/09 00:34:42 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -111,45 +111,6 @@
 #include "utils/syscache.h"


-/*
- * Note: the default selectivity estimates are not chosen entirely at random.
- * We want them to be small enough to ensure that indexscans will be used if
- * available, for typical table densities of ~100 tuples/page.	Thus, for
- * example, 0.01 is not quite small enough, since that makes it appear that
- * nearly all pages will be hit anyway.  Also, since we sometimes estimate
- * eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
- * 1/DEFAULT_EQ_SEL.
- */
-
-/* default selectivity estimate for equalities such as "A = b" */
-#define DEFAULT_EQ_SEL	0.005
-
-/* default selectivity estimate for inequalities such as "A < b" */
-#define DEFAULT_INEQ_SEL  (1.0 / 3.0)
-
-/* default selectivity estimate for pattern-match operators such as LIKE */
-#define DEFAULT_MATCH_SEL	0.005
-
-/* default number of distinct values in a table */
-#define DEFAULT_NUM_DISTINCT  200
-
-/* default selectivity estimate for boolean and null test nodes */
-#define DEFAULT_UNK_SEL			0.005
-#define DEFAULT_NOT_UNK_SEL		(1.0 - DEFAULT_UNK_SEL)
-
-/*
- * Clamp a computed probability estimate (which may suffer from roundoff or
- * estimation errors) to valid range.  Argument must be a float variable.
- */
-#define CLAMP_PROBABILITY(p) \
-	do { \
-		if (p < 0.0) \
-			p = 0.0; \
-		else if (p > 1.0) \
-			p = 1.0; \
-	} while (0)
-
-
 /* Return data from examine_variable and friends */
 typedef struct
 {
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@ -8,7 +8,7 @@
 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.19 2004/08/29 05:06:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.20 2004/11/09 00:34:46 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -19,6 +19,49 @@
 #include "nodes/parsenodes.h"


+/*
+ * Note: the default selectivity estimates are not chosen entirely at random.
+ * We want them to be small enough to ensure that indexscans will be used if
+ * available, for typical table densities of ~100 tuples/page.	Thus, for
+ * example, 0.01 is not quite small enough, since that makes it appear that
+ * nearly all pages will be hit anyway.  Also, since we sometimes estimate
+ * eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
+ * 1/DEFAULT_EQ_SEL.
+ */
+
+/* default selectivity estimate for equalities such as "A = b" */
+#define DEFAULT_EQ_SEL	0.005
+
+/* default selectivity estimate for inequalities such as "A < b" */
+#define DEFAULT_INEQ_SEL  0.3333333333333333
+
+/* default selectivity estimate for range inequalities "A > b AND A < c" */
+#define DEFAULT_RANGE_INEQ_SEL  0.005
+
+/* default selectivity estimate for pattern-match operators such as LIKE */
+#define DEFAULT_MATCH_SEL	0.005
+
+/* default number of distinct values in a table */
+#define DEFAULT_NUM_DISTINCT  200
+
+/* default selectivity estimate for boolean and null test nodes */
+#define DEFAULT_UNK_SEL			0.005
+#define DEFAULT_NOT_UNK_SEL		(1.0 - DEFAULT_UNK_SEL)
+
+
+/*
+ * Clamp a computed probability estimate (which may suffer from roundoff or
+ * estimation errors) to valid range.  Argument must be a float variable.
+ */
+#define CLAMP_PROBABILITY(p) \
+	do { \
+		if (p < 0.0) \
+			p = 0.0; \
+		else if (p > 1.0) \
+			p = 1.0; \
+	} while (0)
+
+
 typedef enum
 {
 	Pattern_Type_Like, Pattern_Type_Like_IC,