Fix some planner bugs exposed by reports from Arjen van der Meijden. These

are all in new-in-8.2 logic associated with indexability of ScalarArrayOpExpr (IN-clauses) or amortization of indexscan costs across repeated indexscans on the inside of a nestloop. In particular: Fix some logic errors in the estimation for multiple scans induced by a ScalarArrayOpExpr indexqual. Include a small cost component in bitmap index scans to reflect the costs of manipulating the bitmap itself; this is mainly to prevent a bitmap scan from appearing to have the same cost as a plain indexscan for fetching a single tuple. Also add a per-index-scan-startup CPU cost component; while prior releases were clearly too pessimistic about the cost of repeated indexscans, the original 8.2 coding allowed the cost of an indexscan to effectively go to zero if repeated often enough, which is overly optimistic. Pay some attention to index correlation when estimating costs for a nestloop inner indexscan: this is significant when the plan fetches multiple heap tuples per iteration, since high correlation means those tuples are probably on the same or adjacent heap pages.
2024-12-27 08:39:28 +08:00 · 2006-12-15 18:42:35 +00:00 · 2006-12-15 18:42:35 +00:00 · 14bdb023cc
commit 14bdb023cc
parent 4f554024a8
2 changed files with 88 additions and 24 deletions
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@ -54,7 +54,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.169 2006/11/11 01:14:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.169.2.1 2006/12/15 18:42:35 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -276,13 +276,12 @@ cost_index(IndexPath *path, PlannerInfo *root,
 	if (outer_rel != NULL && outer_rel->rows > 1)
 	{
 		/*
-		 * For repeated indexscans, scale up the number of tuples fetched in
+		 * For repeated indexscans, the appropriate estimate for the
+		 * uncorrelated case is to scale up the number of tuples fetched in
 		 * the Mackert and Lohman formula by the number of scans, so that we
-		 * estimate the number of pages fetched by all the scans. Then
+		 * estimate the number of pages fetched by all the scans; then
 		 * pro-rate the costs for one scan.  In this case we assume all the
-		 * fetches are random accesses.  XXX it'd be good to include
-		 * correlation in this model, but it's not clear how to do that
-		 * without double-counting cache effects.
+		 * fetches are random accesses.
 		 */
 		double		num_scans = outer_rel->rows;

@ -291,7 +290,27 @@ cost_index(IndexPath *path, PlannerInfo *root,
 											(double) index->pages,
 											root);

-		run_cost += (pages_fetched * random_page_cost) / num_scans;
+		max_IO_cost = (pages_fetched * random_page_cost) / num_scans;
+
+		/*
+		 * In the perfectly correlated case, the number of pages touched
+		 * by each scan is selectivity * table_size, and we can use the
+		 * Mackert and Lohman formula at the page level to estimate how
+		 * much work is saved by caching across scans.  We still assume
+		 * all the fetches are random, though, which is an overestimate
+		 * that's hard to correct for without double-counting the cache
+		 * effects.  (But in most cases where such a plan is actually
+		 * interesting, only one page would get fetched per scan anyway,
+		 * so it shouldn't matter much.)
+		 */
+		pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
+
+		pages_fetched = index_pages_fetched(pages_fetched * num_scans,
+											baserel->pages,
+											(double) index->pages,
+											root);
+
+		min_IO_cost = (pages_fetched * random_page_cost) / num_scans;
 	}
 	else
 	{
@ -312,16 +331,16 @@ cost_index(IndexPath *path, PlannerInfo *root,
 		min_IO_cost = random_page_cost;
 		if (pages_fetched > 1)
 			min_IO_cost += (pages_fetched - 1) * seq_page_cost;
-
-		/*
-		 * Now interpolate based on estimated index order correlation to get
-		 * total disk I/O cost for main table accesses.
-		 */
-		csquared = indexCorrelation * indexCorrelation;
-
-		run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost);
 	}

+	/*
+	 * Now interpolate based on estimated index order correlation to get
+	 * total disk I/O cost for main table accesses.
+	 */
+	csquared = indexCorrelation * indexCorrelation;
+
+	run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost);
+
 	/*
 	 * Estimate CPU costs per tuple.
 	 *
@ -614,6 +633,13 @@ cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec)
 	{
 		*cost = ((IndexPath *) path)->indextotalcost;
 		*selec = ((IndexPath *) path)->indexselectivity;
+		/*
+		 * Charge a small amount per retrieved tuple to reflect the costs of
+		 * manipulating the bitmap.  This is mostly to make sure that a bitmap
+		 * scan doesn't look to be the same cost as an indexscan to retrieve
+		 * a single tuple.
+		 */
+		*cost += 0.1 * cpu_operator_cost * ((IndexPath *) path)->rows;
 	}
 	else if (IsA(path, BitmapAndPath))
 	{
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@ -15,7 +15,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.214 2006/10/04 00:29:59 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.214.2.1 2006/12/15 18:42:35 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -4673,14 +4673,18 @@ genericcostestimate(PlannerInfo *root,
 	 * way in order to get the right answer for partial indexes.
 	 */
 	if (numIndexTuples <= 0.0)
+	{
 		numIndexTuples = *indexSelectivity * index->rel->tuples;

-	/*
-	 * The estimate obtained so far counts all the tuples returned by all
-	 * scans of ScalarArrayOpExpr calls.  We want to consider the per-scan
-	 * number, so adjust.  This is a handy place to round to integer, too.
-	 */
-	numIndexTuples = rint(numIndexTuples / num_sa_scans);
+		/*
+		 * The above calculation counts all the tuples visited across all
+		 * scans induced by ScalarArrayOpExpr nodes.  We want to consider the
+		 * average per-indexscan number, so adjust.  This is a handy place to
+		 * round to integer, too.  (If caller supplied tuple estimate, it's
+		 * responsible for handling these considerations.)
+		 */
+		numIndexTuples = rint(numIndexTuples / num_sa_scans);
+	}

 	/*
 	 * We can bound the number of tuples by the index size in any case. Also,
@ -4786,7 +4790,9 @@ genericcostestimate(PlannerInfo *root,
 	 * evaluated once at the start of the scan to reduce them to runtime keys
 	 * to pass to the index AM (see nodeIndexscan.c).  We model the per-tuple
 	 * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
-	 * indexqual operator.
+	 * indexqual operator.  Because we have numIndexTuples as a per-scan
+	 * number, we have to multiply by num_sa_scans to get the correct result
+	 * for ScalarArrayOpExpr cases.
 	 *
 	 * Note: this neglects the possible costs of rechecking lossy operators
 	 * and OR-clause expressions.  Detecting that that might be needed seems
@ -4801,7 +4807,22 @@ genericcostestimate(PlannerInfo *root,
 		qual_arg_cost = 0;
 	*indexStartupCost = qual_arg_cost;
 	*indexTotalCost += qual_arg_cost;
-	*indexTotalCost += numIndexTuples * (cpu_index_tuple_cost + qual_op_cost);
+	*indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
+
+	/*
+	 * We also add a CPU-cost component to represent the general costs of 
+	 * starting an indexscan, such as analysis of btree index keys and
+	 * initial tree descent.  This is estimated at 100x cpu_operator_cost,
+	 * which is a bit arbitrary but seems the right order of magnitude.
+	 * (As noted above, we don't charge any I/O for touching upper tree
+	 * levels, but charging nothing at all has been found too optimistic.)
+	 *
+	 * Although this is startup cost with respect to any one scan, we add
+	 * it to the "total" cost component because it's only very interesting
+	 * in the many-ScalarArrayOpExpr-scan case, and there it will be paid
+	 * over the life of the scan node.
+	 */
+	*indexTotalCost += num_sa_scans * 100.0 * cpu_operator_cost;

 	/*
 	 * Generic assumption about index correlation: there isn't any.
@ -4829,6 +4850,7 @@ btcostestimate(PG_FUNCTION_ARGS)
 	int			indexcol;
 	bool		eqQualHere;
 	bool		found_saop;
+	double		num_sa_scans;
 	ListCell   *l;

 	/*
@ -4852,6 +4874,7 @@ btcostestimate(PG_FUNCTION_ARGS)
 	indexcol = 0;
 	eqQualHere = false;
 	found_saop = false;
+	num_sa_scans = 1;
 	foreach(l, indexQuals)
 	{
 		RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
@ -4928,6 +4951,15 @@ btcostestimate(PG_FUNCTION_ARGS)
 		Assert(op_strategy != 0);		/* not a member of opclass?? */
 		if (op_strategy == BTEqualStrategyNumber)
 			eqQualHere = true;
+		/* count up number of SA scans induced by indexBoundQuals only */
+		if (IsA(clause, ScalarArrayOpExpr))
+		{
+			ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
+			int			alength = estimate_array_length(lsecond(saop->args));
+
+			if (alength > 1)
+				num_sa_scans *= alength;
+		}
 		indexBoundQuals = lappend(indexBoundQuals, rinfo);
 	}

@ -4949,6 +4981,12 @@ btcostestimate(PG_FUNCTION_ARGS)
 												  index->rel->relid,
 												  JOIN_INNER);
 		numIndexTuples = btreeSelectivity * index->rel->tuples;
+		/*
+		 * As in genericcostestimate(), we have to adjust for any
+		 * ScalarArrayOpExpr quals included in indexBoundQuals, and then
+		 * round to integer.
+		 */
+		numIndexTuples = rint(numIndexTuples / num_sa_scans);
 	}

 	genericcostestimate(root, index, indexQuals, outer_rel, numIndexTuples,