mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-24 18:55:04 +08:00
Fix planning of star-schema-style queries.
Part of the intent of the parameterized-path mechanism was to handle
star-schema queries efficiently, but some overly-restrictive search
limiting logic added in commit e2fa76d80b
prevented such cases from working as desired. Fix that and add a
regression test about it. Per gripe from Marc Cousin.
This is arguably a bug rather than a new feature, so back-patch to 9.2
where parameterized paths were introduced.
This commit is contained in:
parent
c4f4c7ca99
commit
b514a7460d
@ -702,17 +702,37 @@ intermediate layers of joins, for example:
|
||||
-> Index Scan using C_Z_IDX on C
|
||||
Index Condition: C.Z = A.X
|
||||
|
||||
If all joins are plain inner joins then this is unnecessary, because
|
||||
it's always possible to reorder the joins so that a parameter is used
|
||||
If all joins are plain inner joins then this is usually unnecessary,
|
||||
because it's possible to reorder the joins so that a parameter is used
|
||||
immediately below the nestloop node that provides it. But in the
|
||||
presence of outer joins, join reordering may not be possible, and then
|
||||
this option can be critical. Before version 9.2, Postgres used ad-hoc
|
||||
methods for planning and executing such queries, and those methods could
|
||||
not handle passing parameters down through multiple join levels.
|
||||
presence of outer joins, such join reordering may not be possible.
|
||||
|
||||
Also, the bottom-level scan might require parameters from more than one
|
||||
other relation. In principle we could join the other relations first
|
||||
so that all the parameters are supplied from a single nestloop level.
|
||||
But if those other relations have no join clause in common (which is
|
||||
common in star-schema queries for instance), the planner won't consider
|
||||
joining them directly to each other. In such a case we need to be able
|
||||
to create a plan like
|
||||
|
||||
NestLoop
|
||||
-> Seq Scan on SmallTable1 A
|
||||
NestLoop
|
||||
-> Seq Scan on SmallTable2 B
|
||||
NestLoop
|
||||
-> Index Scan using XYIndex on LargeTable C
|
||||
Index Condition: C.X = A.AID and C.Y = B.BID
|
||||
|
||||
so we should be willing to pass down A.AID through a join even though
|
||||
there is no join order constraint forcing the plan to look like this.
|
||||
|
||||
Before version 9.2, Postgres used ad-hoc methods for planning and
|
||||
executing nestloop queries of this kind, and those methods could not
|
||||
handle passing parameters down through multiple join levels.
|
||||
|
||||
To plan such queries, we now use a notion of a "parameterized path",
|
||||
which is a path that makes use of a join clause to a relation that's not
|
||||
scanned by the path. In the example just above, we would construct a
|
||||
scanned by the path. In the example two above, we would construct a
|
||||
path representing the possibility of doing this:
|
||||
|
||||
-> Index Scan using C_Z_IDX on C
|
||||
|
@ -117,13 +117,14 @@ add_paths_to_joinrel(PlannerInfo *root,
|
||||
/*
|
||||
* Decide whether it's sensible to generate parameterized paths for this
|
||||
* joinrel, and if so, which relations such paths should require. There
|
||||
* is no need to create a parameterized result path unless there is a join
|
||||
* order restriction that prevents joining one of our input rels directly
|
||||
* to the parameter source rel instead of joining to the other input rel.
|
||||
* This restriction reduces the number of parameterized paths we have to
|
||||
* deal with at higher join levels, without compromising the quality of
|
||||
* the resulting plan. We express the restriction as a Relids set that
|
||||
* must overlap the parameterization of any proposed join path.
|
||||
* is usually no need to create a parameterized result path unless there
|
||||
* is a join order restriction that prevents joining one of our input rels
|
||||
* directly to the parameter source rel instead of joining to the other
|
||||
* input rel. (But see exception in try_nestloop_path.) This restriction
|
||||
* reduces the number of parameterized paths we have to deal with at
|
||||
* higher join levels, without compromising the quality of the resulting
|
||||
* plan. We express the restriction as a Relids set that must overlap the
|
||||
* parameterization of any proposed join path.
|
||||
*/
|
||||
foreach(lc, root->join_info_list)
|
||||
{
|
||||
@ -291,9 +292,29 @@ try_nestloop_path(PlannerInfo *root,
|
||||
if (required_outer &&
|
||||
!bms_overlap(required_outer, param_source_rels))
|
||||
{
|
||||
/* Waste no memory when we reject a path here */
|
||||
bms_free(required_outer);
|
||||
return;
|
||||
/*
|
||||
* We override the param_source_rels heuristic to accept nestloop
|
||||
* paths in which the outer rel satisfies some but not all of the
|
||||
* inner path's parameterization. This is necessary to get good plans
|
||||
* for star-schema scenarios, in which a parameterized path for a
|
||||
* large table may require parameters from multiple small tables that
|
||||
* will not get joined directly to each other. We can handle that by
|
||||
* stacking nestloops that have the small tables on the outside; but
|
||||
* this breaks the rule the param_source_rels heuristic is based on,
|
||||
* namely that parameters should not be passed down across joins
|
||||
* unless there's a join-order-constraint-based reason to do so. So
|
||||
* ignore the param_source_rels restriction when this case applies.
|
||||
*/
|
||||
Relids outerrelids = outer_path->parent->relids;
|
||||
Relids innerparams = PATH_REQ_OUTER(inner_path);
|
||||
|
||||
if (!(bms_overlap(innerparams, outerrelids) &&
|
||||
bms_nonempty_difference(innerparams, outerrelids)))
|
||||
{
|
||||
/* Waste no memory when we reject a path here */
|
||||
bms_free(required_outer);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2780,6 +2780,25 @@ where thousand = (q1 + q2);
|
||||
-> Seq Scan on int4_tbl
|
||||
(12 rows)
|
||||
|
||||
--
|
||||
-- test ability to generate a suitable plan for a star-schema query
|
||||
--
|
||||
explain (costs off)
|
||||
select * from
|
||||
tenk1, int8_tbl a, int8_tbl b
|
||||
where thousand = a.q1 and tenthous = b.q1 and a.q2 = 1 and b.q2 = 2;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Nested Loop
|
||||
-> Seq Scan on int8_tbl b
|
||||
Filter: (q2 = 2)
|
||||
-> Nested Loop
|
||||
-> Seq Scan on int8_tbl a
|
||||
Filter: (q2 = 1)
|
||||
-> Index Scan using tenk1_thous_tenthous on tenk1
|
||||
Index Cond: ((thousand = a.q1) AND (tenthous = b.q1))
|
||||
(8 rows)
|
||||
|
||||
--
|
||||
-- test extraction of restriction OR clauses from join OR clause
|
||||
-- (we used to only do this for indexable clauses)
|
||||
|
@ -763,6 +763,15 @@ select * from
|
||||
int4(sin(0)) q2
|
||||
where thousand = (q1 + q2);
|
||||
|
||||
--
|
||||
-- test ability to generate a suitable plan for a star-schema query
|
||||
--
|
||||
|
||||
explain (costs off)
|
||||
select * from
|
||||
tenk1, int8_tbl a, int8_tbl b
|
||||
where thousand = a.q1 and tenthous = b.q1 and a.q2 = 1 and b.q2 = 2;
|
||||
|
||||
--
|
||||
-- test extraction of restriction OR clauses from join OR clause
|
||||
-- (we used to only do this for indexable clauses)
|
||||
|
Loading…
Reference in New Issue
Block a user