mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-21 08:29:39 +08:00
Allow estimate_num_groups() to pass back further details about the estimation
Here we add a new output parameter to estimate_num_groups() to allow it to inform the caller of additional, possibly useful information about the estimation. The new output parameter is a struct that currently contains just a single field with a set of flags. This was done rather than having the flags as an output parameter to allow future fields to be added without having to change the signature of the function at a later date when we want to pass back further information that might not be suitable to store in the flags field. It seems reasonable that one day in the future that the planner would want to know more about the estimation. For example, how many individual sets of statistics was the estimation generated from? The planner may want to take that into account if we ever want to consider risks as well as costs when generating plans. For now, there's only 1 flag we set in the flags field. This is to indicate if the estimation fell back on using the hard-coded constants in any part of the estimation. Callers may like to change their behavior if this is set, and this gives them the ability to do so. Callers may pass the flag pointer as NULL if they have no interest in obtaining any additional information about the estimate. We're not adding any actual usages of these flags here. Some follow-up commits will make use of this feature. Additionally, we're also not making any changes to add support for clauselist_selectivity() and clauselist_selectivity_ext(). However, if this is required in the future then the same struct being added here should be fine to use as a new output argument for those functions too. Author: David Rowley Discussion: https://postgr.es/m/CAApHDvqQqpk=1W-G_ds7A9CsXX3BggWj_7okinzkLVhDubQzjA@mail.gmail.com
This commit is contained in:
parent
efd9d92bb3
commit
ed934d4fa3
@ -3087,7 +3087,7 @@ estimate_path_cost_size(PlannerInfo *root,
|
||||
numGroups = estimate_num_groups(root,
|
||||
get_sortgrouplist_exprs(root->parse->groupClause,
|
||||
fpinfo->grouped_tlist),
|
||||
input_rows, NULL);
|
||||
input_rows, NULL, NULL);
|
||||
|
||||
/*
|
||||
* Get the retrieved_rows and rows estimates. If there are HAVING
|
||||
|
@ -1969,7 +1969,8 @@ cost_incremental_sort(Path *path,
|
||||
|
||||
/* Estimate number of groups with equal presorted keys. */
|
||||
if (!unknown_varno)
|
||||
input_groups = estimate_num_groups(root, presortedExprs, input_tuples, NULL);
|
||||
input_groups = estimate_num_groups(root, presortedExprs, input_tuples,
|
||||
NULL, NULL);
|
||||
|
||||
group_tuples = input_tuples / input_groups;
|
||||
group_input_run_cost = input_run_cost / input_groups;
|
||||
|
@ -1990,6 +1990,7 @@ adjust_rowcount_for_semijoins(PlannerInfo *root,
|
||||
nunique = estimate_num_groups(root,
|
||||
sjinfo->semi_rhs_exprs,
|
||||
nraw,
|
||||
NULL,
|
||||
NULL);
|
||||
if (rowcount > nunique)
|
||||
rowcount = nunique;
|
||||
|
@ -3702,7 +3702,8 @@ get_number_of_groups(PlannerInfo *root,
|
||||
double numGroups = estimate_num_groups(root,
|
||||
groupExprs,
|
||||
path_rows,
|
||||
&gset);
|
||||
&gset,
|
||||
NULL);
|
||||
|
||||
gs->numGroups = numGroups;
|
||||
rollup->numGroups += numGroups;
|
||||
@ -3727,7 +3728,8 @@ get_number_of_groups(PlannerInfo *root,
|
||||
double numGroups = estimate_num_groups(root,
|
||||
groupExprs,
|
||||
path_rows,
|
||||
&gset);
|
||||
&gset,
|
||||
NULL);
|
||||
|
||||
gs->numGroups = numGroups;
|
||||
gd->dNumHashGroups += numGroups;
|
||||
@ -3743,7 +3745,7 @@ get_number_of_groups(PlannerInfo *root,
|
||||
target_list);
|
||||
|
||||
dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
|
||||
NULL);
|
||||
NULL, NULL);
|
||||
}
|
||||
}
|
||||
else if (parse->groupingSets)
|
||||
@ -4792,7 +4794,7 @@ create_distinct_paths(PlannerInfo *root,
|
||||
parse->targetList);
|
||||
numDistinctRows = estimate_num_groups(root, distinctExprs,
|
||||
cheapest_input_path->rows,
|
||||
NULL);
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -338,6 +338,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
|
||||
*pNumGroups = estimate_num_groups(subroot,
|
||||
get_tlist_exprs(subquery->targetList, false),
|
||||
subpath->rows,
|
||||
NULL,
|
||||
NULL);
|
||||
}
|
||||
}
|
||||
|
@ -1713,6 +1713,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
|
||||
pathnode->path.rows = estimate_num_groups(root,
|
||||
sjinfo->semi_rhs_exprs,
|
||||
rel->rows,
|
||||
NULL,
|
||||
NULL);
|
||||
numCols = list_length(sjinfo->semi_rhs_exprs);
|
||||
|
||||
|
@ -3241,6 +3241,7 @@ typedef struct
|
||||
Node *var; /* might be an expression, not just a Var */
|
||||
RelOptInfo *rel; /* relation it belongs to */
|
||||
double ndistinct; /* # distinct values */
|
||||
bool isdefault; /* true if DEFAULT_NUM_DISTINCT was used */
|
||||
} GroupVarInfo;
|
||||
|
||||
static List *
|
||||
@ -3287,6 +3288,7 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
|
||||
varinfo->var = var;
|
||||
varinfo->rel = vardata->rel;
|
||||
varinfo->ndistinct = ndistinct;
|
||||
varinfo->isdefault = isdefault;
|
||||
varinfos = lappend(varinfos, varinfo);
|
||||
return varinfos;
|
||||
}
|
||||
@ -3311,6 +3313,12 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
|
||||
* pgset - NULL, or a List** pointing to a grouping set to filter the
|
||||
* groupExprs against
|
||||
*
|
||||
* Outputs:
|
||||
* estinfo - When passed as non-NULL, the function will set bits in the
|
||||
* "flags" field in order to provide callers with additional information
|
||||
* about the estimation. Currently, we only set the SELFLAG_USED_DEFAULT
|
||||
* bit if we used any default values in the estimation.
|
||||
*
|
||||
* Given the lack of any cross-correlation statistics in the system, it's
|
||||
* impossible to do anything really trustworthy with GROUP BY conditions
|
||||
* involving multiple Vars. We should however avoid assuming the worst
|
||||
@ -3358,7 +3366,7 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
|
||||
*/
|
||||
double
|
||||
estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
|
||||
List **pgset)
|
||||
List **pgset, EstimationInfo *estinfo)
|
||||
{
|
||||
List *varinfos = NIL;
|
||||
double srf_multiplier = 1.0;
|
||||
@ -3366,6 +3374,10 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
|
||||
ListCell *l;
|
||||
int i;
|
||||
|
||||
/* Zero the estinfo output parameter, if non-NULL */
|
||||
if (estinfo != NULL)
|
||||
memset(estinfo, 0, sizeof(EstimationInfo));
|
||||
|
||||
/*
|
||||
* We don't ever want to return an estimate of zero groups, as that tends
|
||||
* to lead to division-by-zero and other unpleasantness. The input_rows
|
||||
@ -3577,6 +3589,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
|
||||
if (relmaxndistinct < varinfo2->ndistinct)
|
||||
relmaxndistinct = varinfo2->ndistinct;
|
||||
relvarcount++;
|
||||
|
||||
/*
|
||||
* When varinfo2's isdefault is set then we'd better set
|
||||
* the SELFLAG_USED_DEFAULT bit in the EstimationInfo.
|
||||
*/
|
||||
if (estinfo != NULL && varinfo2->isdefault)
|
||||
estinfo->flags |= SELFLAG_USED_DEFAULT;
|
||||
|
||||
}
|
||||
|
||||
/* we're done with this relation */
|
||||
|
@ -68,6 +68,20 @@
|
||||
p = 1.0; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* A set of flags which some selectivity estimation functions can pass back to
|
||||
* callers to provide further details about some assumptions which were made
|
||||
* during the estimation.
|
||||
*/
|
||||
#define SELFLAG_USED_DEFAULT (1 << 0) /* Estimation fell back on one
|
||||
* of the DEFAULTs as defined
|
||||
* above. */
|
||||
|
||||
typedef struct EstimationInfo
|
||||
{
|
||||
uint32 flags; /* Flags, as defined above to mark special
|
||||
* properties of the estimation. */
|
||||
} EstimationInfo;
|
||||
|
||||
/* Return data from examine_variable and friends */
|
||||
typedef struct VariableStatData
|
||||
@ -197,7 +211,8 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause,
|
||||
Selectivity *rightstart, Selectivity *rightend);
|
||||
|
||||
extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
|
||||
double input_rows, List **pgset);
|
||||
double input_rows, List **pgset,
|
||||
EstimationInfo *estinfo);
|
||||
|
||||
extern void estimate_hash_bucket_stats(PlannerInfo *root,
|
||||
Node *hashkey, double nbuckets,
|
||||
|
Loading…
Reference in New Issue
Block a user