mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-18 18:44:06 +08:00
Fix some problems with selectivity estimation for partial indexes.
First, genericcostestimate() was being way too liberal about including partial-index conditions in its selectivity estimate, resulting in substantial underestimates for situations such as an indexqual "x = 42" used with an index on x "WHERE x >= 40 AND x < 50". While the code is intentionally set up to favor selecting partial indexes when available, this was too much... Second, choose_bitmap_and() was likewise easily fooled by cases of this type, since it would similarly think that the partial index had selectivity independent of the indexqual. Fixed by using predicate_implied_by() rather than simple equality checks to determine redundancy. This is a good deal more expensive but I don't see much alternative. At least the extra cost is only paid when there's actually a partial index under consideration. Per report from Jeff Davis. I'm not going to risk back-patching this, though.
This commit is contained in:
parent
2b49e5d3cb
commit
54d20024c1
@ -9,7 +9,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.217 2007/03/17 00:11:04 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.218 2007/03/21 22:18:12 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -57,8 +57,8 @@ static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
|
|||||||
static int bitmap_path_comparator(const void *a, const void *b);
|
static int bitmap_path_comparator(const void *a, const void *b);
|
||||||
static Cost bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel,
|
static Cost bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel,
|
||||||
List *paths, RelOptInfo *outer_rel);
|
List *paths, RelOptInfo *outer_rel);
|
||||||
static List *pull_indexpath_quals(Path *bitmapqual);
|
static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds);
|
||||||
static bool lists_intersect_ptr(List *list1, List *list2);
|
static bool lists_intersect(List *list1, List *list2);
|
||||||
static bool match_clause_to_indexcol(IndexOptInfo *index,
|
static bool match_clause_to_indexcol(IndexOptInfo *index,
|
||||||
int indexcol, Oid opfamily,
|
int indexcol, Oid opfamily,
|
||||||
RestrictInfo *rinfo,
|
RestrictInfo *rinfo,
|
||||||
@ -562,6 +562,7 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
|
|||||||
Path **patharray;
|
Path **patharray;
|
||||||
Cost costsofar;
|
Cost costsofar;
|
||||||
List *qualsofar;
|
List *qualsofar;
|
||||||
|
List *firstpred;
|
||||||
ListCell *lastcell;
|
ListCell *lastcell;
|
||||||
int i;
|
int i;
|
||||||
ListCell *l;
|
ListCell *l;
|
||||||
@ -586,8 +587,7 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
|
|||||||
* consider an index redundant if any of its index conditions were already
|
* consider an index redundant if any of its index conditions were already
|
||||||
* used by earlier indexes. (We could use predicate_implied_by to have a
|
* used by earlier indexes. (We could use predicate_implied_by to have a
|
||||||
* more intelligent, but much more expensive, check --- but in most cases
|
* more intelligent, but much more expensive, check --- but in most cases
|
||||||
* simple pointer equality should suffice, since after all the index
|
* simple equality should suffice.)
|
||||||
* conditions are all coming from the same RestrictInfo lists.)
|
|
||||||
*
|
*
|
||||||
* You might think the condition for redundancy should be "all index
|
* You might think the condition for redundancy should be "all index
|
||||||
* conditions already used", not "any", but this turns out to be wrong.
|
* conditions already used", not "any", but this turns out to be wrong.
|
||||||
@ -597,10 +597,27 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
|
|||||||
* non-selective. In any case, we'd surely be drastically misestimating
|
* non-selective. In any case, we'd surely be drastically misestimating
|
||||||
* the selectivity if we count the same condition twice.
|
* the selectivity if we count the same condition twice.
|
||||||
*
|
*
|
||||||
* We include index predicate conditions in the redundancy test. Because
|
* We must also consider index predicate conditions in checking for
|
||||||
* the test is just for pointer equality and not equal(), the effect is
|
* redundancy, because the estimated selectivity of a partial index
|
||||||
* that use of the same partial index in two different AND elements is
|
* includes its predicate even if the explicit index conditions don't.
|
||||||
* considered redundant. (XXX is this too strong?)
|
* Here we have to work harder than just checking expression equality:
|
||||||
|
* we check to see if any of the predicate clauses are implied by
|
||||||
|
* index conditions or predicate clauses of previous paths. This covers
|
||||||
|
* cases such as a condition "x = 42" used with a plain index, followed
|
||||||
|
* by a clauseless scan of a partial index "WHERE x >= 40 AND x < 50".
|
||||||
|
* Also, we reject indexes that have a qual condition matching any
|
||||||
|
* previously-used index's predicate (by including predicate conditions
|
||||||
|
* into qualsofar). It should be sufficient to check equality in this
|
||||||
|
* case, not implication, since we've sorted the paths by selectivity
|
||||||
|
* and so tighter conditions are seen first --- only for exactly equal
|
||||||
|
* cases might the partial index come first.
|
||||||
|
*
|
||||||
|
* XXX the reason we need all these redundancy checks is that costsize.c
|
||||||
|
* and clausesel.c aren't very smart about redundant clauses: they will
|
||||||
|
* usually double-count the redundant clauses, producing a too-small
|
||||||
|
* selectivity that makes a redundant AND look like it reduces the total
|
||||||
|
* cost. Perhaps someday that code will be smarter and we can remove
|
||||||
|
* these heuristics.
|
||||||
*
|
*
|
||||||
* Note: outputting the selected sub-paths in selectivity order is a good
|
* Note: outputting the selected sub-paths in selectivity order is a good
|
||||||
* thing even if we weren't using that as part of the selection method,
|
* thing even if we weren't using that as part of the selection method,
|
||||||
@ -619,18 +636,38 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
|
|||||||
|
|
||||||
paths = list_make1(patharray[0]);
|
paths = list_make1(patharray[0]);
|
||||||
costsofar = bitmap_and_cost_est(root, rel, paths, outer_rel);
|
costsofar = bitmap_and_cost_est(root, rel, paths, outer_rel);
|
||||||
qualsofar = pull_indexpath_quals(patharray[0]);
|
find_indexpath_quals(patharray[0], &qualsofar, &firstpred);
|
||||||
|
qualsofar = list_concat(qualsofar, firstpred);
|
||||||
lastcell = list_head(paths); /* for quick deletions */
|
lastcell = list_head(paths); /* for quick deletions */
|
||||||
|
|
||||||
for (i = 1; i < npaths; i++)
|
for (i = 1; i < npaths; i++)
|
||||||
{
|
{
|
||||||
Path *newpath = patharray[i];
|
Path *newpath = patharray[i];
|
||||||
List *newqual;
|
List *newqual;
|
||||||
|
List *newpred;
|
||||||
Cost newcost;
|
Cost newcost;
|
||||||
|
|
||||||
newqual = pull_indexpath_quals(newpath);
|
find_indexpath_quals(newpath, &newqual, &newpred);
|
||||||
if (lists_intersect_ptr(newqual, qualsofar))
|
if (lists_intersect(newqual, qualsofar))
|
||||||
continue; /* consider it redundant */
|
continue; /* consider it redundant */
|
||||||
|
if (newpred)
|
||||||
|
{
|
||||||
|
bool redundant = false;
|
||||||
|
|
||||||
|
/* we check each predicate clause separately */
|
||||||
|
foreach(l, newpred)
|
||||||
|
{
|
||||||
|
Node *np = (Node *) lfirst(l);
|
||||||
|
|
||||||
|
if (predicate_implied_by(list_make1(np), qualsofar))
|
||||||
|
{
|
||||||
|
redundant = true;
|
||||||
|
break; /* out of inner loop */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (redundant)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
/* tentatively add newpath to paths, so we can estimate cost */
|
/* tentatively add newpath to paths, so we can estimate cost */
|
||||||
paths = lappend(paths, newpath);
|
paths = lappend(paths, newpath);
|
||||||
newcost = bitmap_and_cost_est(root, rel, paths, outer_rel);
|
newcost = bitmap_and_cost_est(root, rel, paths, outer_rel);
|
||||||
@ -638,7 +675,7 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
|
|||||||
{
|
{
|
||||||
/* keep newpath in paths, update subsidiary variables */
|
/* keep newpath in paths, update subsidiary variables */
|
||||||
costsofar = newcost;
|
costsofar = newcost;
|
||||||
qualsofar = list_concat(qualsofar, newqual);
|
qualsofar = list_concat(list_concat(qualsofar, newqual), newpred);
|
||||||
lastcell = lnext(lastcell);
|
lastcell = lnext(lastcell);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -710,35 +747,39 @@ bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* pull_indexpath_quals
|
* find_indexpath_quals
|
||||||
*
|
*
|
||||||
* Given the Path structure for a plain or bitmap indexscan, extract a list
|
* Given the Path structure for a plain or bitmap indexscan, extract lists
|
||||||
* of all the indexquals and index predicate conditions used in the Path.
|
* of all the indexquals and index predicate conditions used in the Path.
|
||||||
*
|
*
|
||||||
* This is sort of a simplified version of make_restrictinfo_from_bitmapqual;
|
* This is sort of a simplified version of make_restrictinfo_from_bitmapqual;
|
||||||
* here, we are not trying to produce an accurate representation of the AND/OR
|
* here, we are not trying to produce an accurate representation of the AND/OR
|
||||||
* semantics of the Path, but just find out all the base conditions used.
|
* semantics of the Path, but just find out all the base conditions used.
|
||||||
*
|
*
|
||||||
* The result list contains pointers to the expressions used in the Path,
|
* The result lists contain pointers to the expressions used in the Path,
|
||||||
* but all the list cells are freshly built, so it's safe to destructively
|
* but all the list cells are freshly built, so it's safe to destructively
|
||||||
* modify the list (eg, by concat'ing it with other lists).
|
* modify the lists (eg, by concat'ing with other lists).
|
||||||
*/
|
*/
|
||||||
static List *
|
static void
|
||||||
pull_indexpath_quals(Path *bitmapqual)
|
find_indexpath_quals(Path *bitmapqual, List **quals, List **preds)
|
||||||
{
|
{
|
||||||
List *result = NIL;
|
|
||||||
ListCell *l;
|
ListCell *l;
|
||||||
|
|
||||||
|
*quals = NIL;
|
||||||
|
*preds = NIL;
|
||||||
|
|
||||||
if (IsA(bitmapqual, BitmapAndPath))
|
if (IsA(bitmapqual, BitmapAndPath))
|
||||||
{
|
{
|
||||||
BitmapAndPath *apath = (BitmapAndPath *) bitmapqual;
|
BitmapAndPath *apath = (BitmapAndPath *) bitmapqual;
|
||||||
|
|
||||||
foreach(l, apath->bitmapquals)
|
foreach(l, apath->bitmapquals)
|
||||||
{
|
{
|
||||||
List *sublist;
|
List *subquals;
|
||||||
|
List *subpreds;
|
||||||
|
|
||||||
sublist = pull_indexpath_quals((Path *) lfirst(l));
|
find_indexpath_quals((Path *) lfirst(l), &subquals, &subpreds);
|
||||||
result = list_concat(result, sublist);
|
*quals = list_concat(*quals, subquals);
|
||||||
|
*preds = list_concat(*preds, subpreds);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (IsA(bitmapqual, BitmapOrPath))
|
else if (IsA(bitmapqual, BitmapOrPath))
|
||||||
@ -747,36 +788,36 @@ pull_indexpath_quals(Path *bitmapqual)
|
|||||||
|
|
||||||
foreach(l, opath->bitmapquals)
|
foreach(l, opath->bitmapquals)
|
||||||
{
|
{
|
||||||
List *sublist;
|
List *subquals;
|
||||||
|
List *subpreds;
|
||||||
|
|
||||||
sublist = pull_indexpath_quals((Path *) lfirst(l));
|
find_indexpath_quals((Path *) lfirst(l), &subquals, &subpreds);
|
||||||
result = list_concat(result, sublist);
|
*quals = list_concat(*quals, subquals);
|
||||||
|
*preds = list_concat(*preds, subpreds);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (IsA(bitmapqual, IndexPath))
|
else if (IsA(bitmapqual, IndexPath))
|
||||||
{
|
{
|
||||||
IndexPath *ipath = (IndexPath *) bitmapqual;
|
IndexPath *ipath = (IndexPath *) bitmapqual;
|
||||||
|
|
||||||
result = get_actual_clauses(ipath->indexclauses);
|
*quals = get_actual_clauses(ipath->indexclauses);
|
||||||
result = list_concat(result, list_copy(ipath->indexinfo->indpred));
|
*preds = list_copy(ipath->indexinfo->indpred);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
|
elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* lists_intersect_ptr
|
* lists_intersect
|
||||||
* Detect whether two lists have a nonempty intersection,
|
* Detect whether two lists have a nonempty intersection,
|
||||||
* using pointer equality to compare members.
|
* using equal() to compare members.
|
||||||
*
|
*
|
||||||
* This possibly should go into list.c, but it doesn't yet have any use
|
* This possibly should go into list.c, but it doesn't yet have any use
|
||||||
* except in choose_bitmap_and.
|
* except in choose_bitmap_and.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
lists_intersect_ptr(List *list1, List *list2)
|
lists_intersect(List *list1, List *list2)
|
||||||
{
|
{
|
||||||
ListCell *cell1;
|
ListCell *cell1;
|
||||||
|
|
||||||
@ -787,7 +828,7 @@ lists_intersect_ptr(List *list1, List *list2)
|
|||||||
|
|
||||||
foreach(cell2, list2)
|
foreach(cell2, list2)
|
||||||
{
|
{
|
||||||
if (lfirst(cell2) == datum1)
|
if (equal(lfirst(cell2), datum1))
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.229 2007/03/17 00:11:05 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.230 2007/03/21 22:18:12 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -86,6 +86,7 @@
|
|||||||
#include "optimizer/pathnode.h"
|
#include "optimizer/pathnode.h"
|
||||||
#include "optimizer/paths.h"
|
#include "optimizer/paths.h"
|
||||||
#include "optimizer/plancat.h"
|
#include "optimizer/plancat.h"
|
||||||
|
#include "optimizer/predtest.h"
|
||||||
#include "optimizer/restrictinfo.h"
|
#include "optimizer/restrictinfo.h"
|
||||||
#include "optimizer/var.h"
|
#include "optimizer/var.h"
|
||||||
#include "parser/parse_coerce.h"
|
#include "parser/parse_coerce.h"
|
||||||
@ -4775,36 +4776,38 @@ genericcostestimate(PlannerInfo *root,
|
|||||||
List *selectivityQuals;
|
List *selectivityQuals;
|
||||||
ListCell *l;
|
ListCell *l;
|
||||||
|
|
||||||
/*
|
/*----------
|
||||||
* If the index is partial, AND the index predicate with the explicitly
|
* If the index is partial, AND the index predicate with the explicitly
|
||||||
* given indexquals to produce a more accurate idea of the index
|
* given indexquals to produce a more accurate idea of the index
|
||||||
* selectivity. This may produce redundant clauses. We get rid of exact
|
* selectivity. However, we need to be careful not to insert redundant
|
||||||
* duplicates in the code below. We expect that most cases of partial
|
* clauses, because clauselist_selectivity() is easily fooled into
|
||||||
* redundancy (such as "x < 4" from the qual and "x < 5" from the
|
* computing a too-low selectivity estimate. Our approach is to add
|
||||||
* predicate) will be recognized and handled correctly by
|
* only the index predicate clause(s) that cannot be proven to be implied
|
||||||
* clauselist_selectivity(). This assumption is somewhat fragile, since
|
* by the given indexquals. This successfully handles cases such as a
|
||||||
* it depends on predicate_implied_by() and clauselist_selectivity()
|
* qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
|
||||||
* having similar capabilities, and there are certainly many cases where
|
* There are many other cases where we won't detect redundancy, leading
|
||||||
* we will end up with a too-low selectivity estimate. This will bias the
|
* to a too-low selectivity estimate, which will bias the system in favor
|
||||||
* system in favor of using partial indexes where possible, which is not
|
* of using partial indexes where possible. That is not necessarily bad
|
||||||
* necessarily a bad thing. But it'd be nice to do better someday.
|
* though.
|
||||||
*
|
*
|
||||||
* Note that index->indpred and indexQuals are both in implicit-AND form,
|
* Note that indexQuals contains RestrictInfo nodes while the indpred
|
||||||
* so ANDing them together just takes merging the lists. However,
|
* does not. This is OK for both predicate_implied_by() and
|
||||||
* eliminating duplicates is a bit trickier because indexQuals contains
|
* clauselist_selectivity().
|
||||||
* RestrictInfo nodes and the indpred does not. It is okay to pass a
|
*----------
|
||||||
* mixed list to clauselist_selectivity, but we have to work a bit to
|
|
||||||
* generate a list without logical duplicates. (We could just list_union
|
|
||||||
* indpred and strippedQuals, but then we'd not get caching of per-qual
|
|
||||||
* selectivity estimates.)
|
|
||||||
*/
|
*/
|
||||||
if (index->indpred != NIL)
|
if (index->indpred != NIL)
|
||||||
{
|
{
|
||||||
List *strippedQuals;
|
List *predExtraQuals = NIL;
|
||||||
List *predExtraQuals;
|
|
||||||
|
|
||||||
strippedQuals = get_actual_clauses(indexQuals);
|
foreach(l, index->indpred)
|
||||||
predExtraQuals = list_difference(index->indpred, strippedQuals);
|
{
|
||||||
|
Node *predQual = (Node *) lfirst(l);
|
||||||
|
List *oneQual = list_make1(predQual);
|
||||||
|
|
||||||
|
if (!predicate_implied_by(oneQual, indexQuals))
|
||||||
|
predExtraQuals = list_concat(predExtraQuals, oneQual);
|
||||||
|
}
|
||||||
|
/* list_concat avoids modifying the passed-in indexQuals list */
|
||||||
selectivityQuals = list_concat(predExtraQuals, indexQuals);
|
selectivityQuals = list_concat(predExtraQuals, indexQuals);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -209,9 +209,13 @@ SELECT onek.unique1, onek.string4 FROM onek
|
|||||||
-- test partial btree indexes
|
-- test partial btree indexes
|
||||||
--
|
--
|
||||||
-- As of 7.2, planner probably won't pick an indexscan without stats,
|
-- As of 7.2, planner probably won't pick an indexscan without stats,
|
||||||
-- so ANALYZE first.
|
-- so ANALYZE first. Also, we want to prevent it from picking a bitmapscan
|
||||||
|
-- followed by sort, because that could hide index ordering problems.
|
||||||
--
|
--
|
||||||
ANALYZE onek2;
|
ANALYZE onek2;
|
||||||
|
SET enable_seqscan TO off;
|
||||||
|
SET enable_bitmapscan TO off;
|
||||||
|
SET enable_sort TO off;
|
||||||
--
|
--
|
||||||
-- awk '{if($1<10){print $0;}else{next;}}' onek.data | sort +0n -1
|
-- awk '{if($1<10){print $0;}else{next;}}' onek.data | sort +0n -1
|
||||||
--
|
--
|
||||||
@ -288,6 +292,9 @@ SELECT onek2.unique1, onek2.stringu1 FROM onek2
|
|||||||
999 | LMAAAA
|
999 | LMAAAA
|
||||||
(19 rows)
|
(19 rows)
|
||||||
|
|
||||||
|
RESET enable_seqscan;
|
||||||
|
RESET enable_bitmapscan;
|
||||||
|
RESET enable_sort;
|
||||||
SELECT two, stringu1, ten, string4
|
SELECT two, stringu1, ten, string4
|
||||||
INTO TABLE tmp
|
INTO TABLE tmp
|
||||||
FROM onek;
|
FROM onek;
|
||||||
|
@ -59,10 +59,15 @@ SELECT onek.unique1, onek.string4 FROM onek
|
|||||||
-- test partial btree indexes
|
-- test partial btree indexes
|
||||||
--
|
--
|
||||||
-- As of 7.2, planner probably won't pick an indexscan without stats,
|
-- As of 7.2, planner probably won't pick an indexscan without stats,
|
||||||
-- so ANALYZE first.
|
-- so ANALYZE first. Also, we want to prevent it from picking a bitmapscan
|
||||||
|
-- followed by sort, because that could hide index ordering problems.
|
||||||
--
|
--
|
||||||
ANALYZE onek2;
|
ANALYZE onek2;
|
||||||
|
|
||||||
|
SET enable_seqscan TO off;
|
||||||
|
SET enable_bitmapscan TO off;
|
||||||
|
SET enable_sort TO off;
|
||||||
|
|
||||||
--
|
--
|
||||||
-- awk '{if($1<10){print $0;}else{next;}}' onek.data | sort +0n -1
|
-- awk '{if($1<10){print $0;}else{next;}}' onek.data | sort +0n -1
|
||||||
--
|
--
|
||||||
@ -81,6 +86,10 @@ SELECT onek2.unique1, onek2.stringu1 FROM onek2
|
|||||||
SELECT onek2.unique1, onek2.stringu1 FROM onek2
|
SELECT onek2.unique1, onek2.stringu1 FROM onek2
|
||||||
WHERE onek2.unique1 > 980;
|
WHERE onek2.unique1 > 980;
|
||||||
|
|
||||||
|
RESET enable_seqscan;
|
||||||
|
RESET enable_bitmapscan;
|
||||||
|
RESET enable_sort;
|
||||||
|
|
||||||
|
|
||||||
SELECT two, stringu1, ten, string4
|
SELECT two, stringu1, ten, string4
|
||||||
INTO TABLE tmp
|
INTO TABLE tmp
|
||||||
|
Loading…
Reference in New Issue
Block a user