From c0f7dcdac10777a270eed88613a29b8e5d38a015 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 19 Oct 2002 02:56:16 +0000 Subject: [PATCH] Fix range-query estimation to not double-exclude NULLs, per gripe from Ray Ontko 28-June-02. Also, fix prefix_selectivity for NAME lefthand variables (it was bogusly assuming binary compatibility), and adjust make_greater_string() to not call pg_mbcliplen() with invalid multibyte data (this last per bug report that I can't find at the moment, but it was in July '02). --- src/backend/optimizer/path/clausesel.c | 19 +++++++-- src/backend/utils/adt/selfuncs.c | 58 +++++++++++++------------- src/include/utils/selfuncs.h | 8 ++-- 3 files changed, 49 insertions(+), 36 deletions(-) diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index 25350c480c..8a1ab28ac1 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.51 2002/06/20 20:29:29 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.52 2002/10/19 02:56:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -102,7 +102,9 @@ restrictlist_selectivity(Query *root, * see that hisel is the fraction of the range below the high bound, while * losel is the fraction above the low bound; so hisel can be interpreted * directly as a 0..1 value but we need to convert losel to 1-losel before - * interpreting it as a value. Then the available range is 1-losel to hisel.) + * interpreting it as a value. Then the available range is 1-losel to hisel. + * However, this calculation double-excludes nulls, so really we need + * hisel + losel + null_frac - 1.) * If the calculation yields zero or negative, however, we chicken out and * use a default estimate; that probably means that one or both * selectivities is a default estimate rather than an actual range value. @@ -199,6 +201,9 @@ clauselist_selectivity(Query *root, /* Successfully matched a pair of range clauses */ Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0; + /* Adjust for double-exclusion of NULLs */ + s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid); + /* * A zero or slightly negative s2 should be converted into a * small positive value; we probably are dealing with a very @@ -503,12 +508,18 @@ clause_selectivity(Query *root, else if (IsA(clause, NullTest)) { /* Use node specific selectivity calculation function */ - s1 = nulltestsel(root, (NullTest *) clause, varRelid); + s1 = nulltestsel(root, + ((NullTest *) clause)->nulltesttype, + ((NullTest *) clause)->arg, + varRelid); } else if (IsA(clause, BooleanTest)) { /* Use node specific selectivity calculation function */ - s1 = booltestsel(root, (BooleanTest *) clause, varRelid); + s1 = booltestsel(root, + ((BooleanTest *) clause)->booltesttype, + ((BooleanTest *) clause)->arg, + varRelid); } else if (IsA(clause, RelabelType)) { diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 693925b7ab..211ccbf87b 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.118 2002/09/20 03:55:40 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.119 2002/10/19 02:56:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1007,10 +1007,9 @@ icnlikesel(PG_FUNCTION_ARGS) * booltestsel - Selectivity of BooleanTest Node. */ Selectivity -booltestsel(Query *root, BooleanTest *clause, int varRelid) +booltestsel(Query *root, BoolTestType booltesttype, Node *arg, int varRelid) { Var *var; - Node *arg; Oid relid; HeapTuple statsTuple; Datum *values; @@ -1019,10 +1018,6 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) int nnumbers; double selec; - Assert(clause && IsA(clause, BooleanTest)); - - arg = (Node *) clause->arg; - /* * Ignore any binary-compatible relabeling (probably unnecessary, but * can't hurt) @@ -1040,7 +1035,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) * the possibility of a NULL value when using clause_selectivity, * and just assume the value is either TRUE or FALSE. */ - switch (clause->booltesttype) + switch (booltesttype) { case IS_UNKNOWN: selec = DEFAULT_UNK_SEL; @@ -1058,7 +1053,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) break; default: elog(ERROR, "booltestsel: unexpected booltesttype %d", - (int) clause->booltesttype); + (int) booltesttype); selec = 0.0; /* Keep compiler quiet */ break; } @@ -1107,7 +1102,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) */ freq_false = 1.0 - freq_true - freq_null; - switch (clause->booltesttype) + switch (booltesttype) { case IS_UNKNOWN: /* select only NULL values */ @@ -1135,7 +1130,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) break; default: elog(ERROR, "booltestsel: unexpected booltesttype %d", - (int) clause->booltesttype); + (int) booltesttype); selec = 0.0; /* Keep compiler quiet */ break; } @@ -1151,7 +1146,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) * Otherwise adjust for null fraction and assume an even split * for boolean tests. */ - switch (clause->booltesttype) + switch (booltesttype) { case IS_UNKNOWN: @@ -1176,7 +1171,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) break; default: elog(ERROR, "booltestsel: unexpected booltesttype %d", - (int) clause->booltesttype); + (int) booltesttype); selec = 0.0; /* Keep compiler quiet */ break; } @@ -1190,7 +1185,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) * No VACUUM ANALYZE stats available, so use a default value. * (Note: not much point in recursing to clause_selectivity here.) */ - switch (clause->booltesttype) + switch (booltesttype) { case IS_UNKNOWN: selec = DEFAULT_UNK_SEL; @@ -1206,7 +1201,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) break; default: elog(ERROR, "booltestsel: unexpected booltesttype %d", - (int) clause->booltesttype); + (int) booltesttype); selec = 0.0; /* Keep compiler quiet */ break; } @@ -1222,19 +1217,16 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) * nulltestsel - Selectivity of NullTest Node. */ Selectivity -nulltestsel(Query *root, NullTest *clause, int varRelid) +nulltestsel(Query *root, NullTestType nulltesttype, Node *arg, int varRelid) { Var *var; - Node *arg; Oid relid; HeapTuple statsTuple; double selec; double defselec; double freq_null; - Assert(clause && IsA(clause, NullTest)); - - switch (clause->nulltesttype) + switch (nulltesttype) { case IS_NULL: defselec = DEFAULT_UNK_SEL; @@ -1244,25 +1236,22 @@ nulltestsel(Query *root, NullTest *clause, int varRelid) break; default: elog(ERROR, "nulltestsel: unexpected nulltesttype %d", - (int) clause->nulltesttype); + (int) nulltesttype); return (Selectivity) 0; /* keep compiler quiet */ } - arg = (Node *) clause->arg; - /* * Ignore any binary-compatible relabeling */ if (IsA(arg, RelabelType)) arg = ((RelabelType *) arg)->arg; - if (IsA(arg, Var) &&(varRelid == 0 || varRelid == ((Var *) arg)->varno)) + if (IsA(arg, Var) && + (varRelid == 0 || varRelid == ((Var *) arg)->varno)) var = (Var *) arg; else { - /* - * punt if non-Var argument - */ + /* punt if non-Var argument */ return (Selectivity) defselec; } @@ -1282,7 +1271,7 @@ nulltestsel(Query *root, NullTest *clause, int varRelid) stats = (Form_pg_statistic) GETSTRUCT(statsTuple); freq_null = stats->stanullfrac; - switch (clause->nulltesttype) + switch (nulltesttype) { case IS_NULL: @@ -1301,7 +1290,7 @@ nulltestsel(Query *root, NullTest *clause, int varRelid) break; default: elog(ERROR, "nulltestsel: unexpected nulltesttype %d", - (int) clause->nulltesttype); + (int) nulltesttype); return (Selectivity) 0; /* keep compiler quiet */ } @@ -2978,6 +2967,10 @@ prefix_selectivity(Query *root, Var *var, Const *prefixcon) else prefix = DatumGetCString(DirectFunctionCall1(byteaout, prefixcon->constvalue)); + /* If var is type NAME, must adjust type of comparison constant */ + if (var->vartype == NAMEOID) + prefixcon = string_to_const(prefix, NAMEOID); + cmpargs = makeList2(var, prefixcon); /* Assume scalargtsel is appropriate for all supported types */ prefixsel = DatumGetFloat8(DirectFunctionCall4(scalargtsel, @@ -3014,6 +3007,9 @@ prefix_selectivity(Query *root, Var *var, Const *prefixcon) */ prefixsel = topsel + prefixsel - 1.0; + /* Adjust for double-exclusion of NULLs */ + prefixsel += nulltestsel(root, IS_NULL, (Node *) var, var->varno); + /* * A zero or slightly negative prefixsel should be converted into * a small positive value; we probably are dealing with a very @@ -3351,6 +3347,7 @@ make_greater_string(const Const *str_const) while (len > 0) { unsigned char *lastchar = (unsigned char *) (workstr + len - 1); + unsigned char savelastchar = *lastchar; /* * Try to generate a larger string by incrementing the last byte. @@ -3369,6 +3366,9 @@ make_greater_string(const Const *str_const) } } + /* restore last byte so we don't confuse pg_mbcliplen */ + *lastchar = savelastchar; + /* * Truncate off the last character, which might be more than 1 * byte, depending on the character encoding. diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 4ed6c1755c..8e73e61ffd 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: selfuncs.h,v 1.8 2002/09/04 20:31:46 momjian Exp $ + * $Id: selfuncs.h,v 1.9 2002/10/19 02:56:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -66,8 +66,10 @@ extern Datum icregexnejoinsel(PG_FUNCTION_ARGS); extern Datum nlikejoinsel(PG_FUNCTION_ARGS); extern Datum icnlikejoinsel(PG_FUNCTION_ARGS); -extern Selectivity booltestsel(Query *root, BooleanTest *clause, int varRelid); -extern Selectivity nulltestsel(Query *root, NullTest *clause, int varRelid); +extern Selectivity booltestsel(Query *root, BoolTestType booltesttype, + Node *arg, int varRelid); +extern Selectivity nulltestsel(Query *root, NullTestType nulltesttype, + Node *arg, int varRelid); extern void mergejoinscansel(Query *root, Node *clause, Selectivity *leftscan,