mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-21 08:29:39 +08:00
Weaken the planner's tests for relevant joinclauses.
We should be willing to cross-join two small relations if that allows us to use an inner indexscan on a large relation (that is, the potential indexqual for the large table requires both smaller relations). This worked in simple cases but fell apart as soon as there was a join clause to a fourth relation, because the existence of any two-relation join clause caused the planner to not consider clauseless joins between other base relations. The added regression test shows an example case adapted from a recent complaint from Benoit Delbosc. Adjust have_relevant_joinclause, have_relevant_eclass_joinclause, and has_relevant_eclass_joinclause to consider that a join clause mentioning three or more relations is sufficient grounds for joining any subset of those relations, even if we have to do so via a cartesian join. Since such clauses are relatively uncommon, this shouldn't affect planning speed on typical queries; in fact it should help a bit, because the latter two functions in particular get significantly simpler. Although this is arguably a bug fix, I'm not going to risk back-patching it, since it might have currently-unforeseen consequences.
This commit is contained in:
parent
c0cc526e8b
commit
e3ffd05b02
@ -2035,7 +2035,7 @@ get_parent_relid(PlannerInfo *root, RelOptInfo *rel)
|
||||
/*
|
||||
* have_relevant_eclass_joinclause
|
||||
* Detect whether there is an EquivalenceClass that could produce
|
||||
* a joinclause between the two given relations.
|
||||
* a joinclause involving the two given relations.
|
||||
*
|
||||
* This is essentially a very cut-down version of
|
||||
* generate_join_implied_equalities(). Note it's OK to occasionally say "yes"
|
||||
@ -2051,9 +2051,6 @@ have_relevant_eclass_joinclause(PlannerInfo *root,
|
||||
foreach(lc1, root->eq_classes)
|
||||
{
|
||||
EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1);
|
||||
bool has_rel1;
|
||||
bool has_rel2;
|
||||
ListCell *lc2;
|
||||
|
||||
/*
|
||||
* Won't generate joinclauses if single-member (this test covers the
|
||||
@ -2063,9 +2060,18 @@ have_relevant_eclass_joinclause(PlannerInfo *root,
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We do not need to examine the individual members of the EC, because
|
||||
* all that we care about is whether each rel overlaps the relids of
|
||||
* at least one member, and a test on ec_relids is sufficient to prove
|
||||
* that. (As with have_relevant_joinclause(), it is not necessary
|
||||
* that the EC be able to form a joinclause relating exactly the two
|
||||
* given rels, only that it be able to form a joinclause mentioning
|
||||
* both, and this will surely be true if both of them overlap
|
||||
* ec_relids.)
|
||||
*
|
||||
* Note we don't test ec_broken; if we did, we'd need a separate code
|
||||
* path to look through ec_sources. Checking the members anyway is OK
|
||||
* as a possibly-overoptimistic heuristic.
|
||||
* path to look through ec_sources. Checking the membership anyway is
|
||||
* OK as a possibly-overoptimistic heuristic.
|
||||
*
|
||||
* We don't test ec_has_const either, even though a const eclass won't
|
||||
* generate real join clauses. This is because if we had "WHERE a.x =
|
||||
@ -2073,35 +2079,8 @@ have_relevant_eclass_joinclause(PlannerInfo *root,
|
||||
* since the join result is likely to be small even though it'll end
|
||||
* up being an unqualified nestloop.
|
||||
*/
|
||||
|
||||
/* Needn't scan if it couldn't contain members from each rel */
|
||||
if (!bms_overlap(rel1->relids, ec->ec_relids) ||
|
||||
!bms_overlap(rel2->relids, ec->ec_relids))
|
||||
continue;
|
||||
|
||||
/* Scan the EC to see if it has member(s) in each rel */
|
||||
has_rel1 = has_rel2 = false;
|
||||
foreach(lc2, ec->ec_members)
|
||||
{
|
||||
EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
|
||||
|
||||
if (cur_em->em_is_const || cur_em->em_is_child)
|
||||
continue; /* ignore consts and children here */
|
||||
if (bms_is_subset(cur_em->em_relids, rel1->relids))
|
||||
{
|
||||
has_rel1 = true;
|
||||
if (has_rel2)
|
||||
break;
|
||||
}
|
||||
if (bms_is_subset(cur_em->em_relids, rel2->relids))
|
||||
{
|
||||
has_rel2 = true;
|
||||
if (has_rel1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (has_rel1 && has_rel2)
|
||||
if (bms_overlap(rel1->relids, ec->ec_relids) &&
|
||||
bms_overlap(rel2->relids, ec->ec_relids))
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2112,7 +2091,7 @@ have_relevant_eclass_joinclause(PlannerInfo *root,
|
||||
/*
|
||||
* has_relevant_eclass_joinclause
|
||||
* Detect whether there is an EquivalenceClass that could produce
|
||||
* a joinclause between the given relation and anything else.
|
||||
* a joinclause involving the given relation and anything else.
|
||||
*
|
||||
* This is the same as have_relevant_eclass_joinclause with the other rel
|
||||
* implicitly defined as "everything else in the query".
|
||||
@ -2125,9 +2104,6 @@ has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1)
|
||||
foreach(lc1, root->eq_classes)
|
||||
{
|
||||
EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1);
|
||||
bool has_rel1;
|
||||
bool has_rel2;
|
||||
ListCell *lc2;
|
||||
|
||||
/*
|
||||
* Won't generate joinclauses if single-member (this test covers the
|
||||
@ -2137,45 +2113,11 @@ has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Note we don't test ec_broken; if we did, we'd need a separate code
|
||||
* path to look through ec_sources. Checking the members anyway is OK
|
||||
* as a possibly-overoptimistic heuristic.
|
||||
*
|
||||
* We don't test ec_has_const either, even though a const eclass won't
|
||||
* generate real join clauses. This is because if we had "WHERE a.x =
|
||||
* b.y and a.x = 42", it is worth considering a join between a and b,
|
||||
* since the join result is likely to be small even though it'll end
|
||||
* up being an unqualified nestloop.
|
||||
* Per the comment in have_relevant_eclass_joinclause, it's sufficient
|
||||
* to find an EC that mentions both this rel and some other rel.
|
||||
*/
|
||||
|
||||
/* Needn't scan if it couldn't contain members from each rel */
|
||||
if (!bms_overlap(rel1->relids, ec->ec_relids) ||
|
||||
bms_is_subset(ec->ec_relids, rel1->relids))
|
||||
continue;
|
||||
|
||||
/* Scan the EC to see if it has member(s) in each rel */
|
||||
has_rel1 = has_rel2 = false;
|
||||
foreach(lc2, ec->ec_members)
|
||||
{
|
||||
EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
|
||||
|
||||
if (cur_em->em_is_const || cur_em->em_is_child)
|
||||
continue; /* ignore consts and children here */
|
||||
if (bms_is_subset(cur_em->em_relids, rel1->relids))
|
||||
{
|
||||
has_rel1 = true;
|
||||
if (has_rel2)
|
||||
break;
|
||||
}
|
||||
if (!bms_overlap(cur_em->em_relids, rel1->relids))
|
||||
{
|
||||
has_rel2 = true;
|
||||
if (has_rel1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (has_rel1 && has_rel2)
|
||||
if (bms_overlap(rel1->relids, ec->ec_relids) &&
|
||||
!bms_is_subset(ec->ec_relids, rel1->relids))
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -88,13 +88,9 @@ join_search_one_level(PlannerInfo *root, int level)
|
||||
has_join_restriction(root, old_rel))
|
||||
{
|
||||
/*
|
||||
* Note that if all available join clauses for this rel require
|
||||
* more than one other rel, we will fail to make any joins against
|
||||
* it here. In most cases that's OK; it'll be considered by
|
||||
* "bushy plan" join code in a higher-level pass where we have
|
||||
* those other rels collected into a join rel.
|
||||
*
|
||||
* See also the last-ditch case below.
|
||||
* There are relevant join clauses or join order restrictions,
|
||||
* so consider joins between this rel and (only) those rels it is
|
||||
* linked to by a clause or restriction.
|
||||
*/
|
||||
make_rels_by_clause_joins(root,
|
||||
old_rel,
|
||||
@ -160,8 +156,8 @@ join_search_one_level(PlannerInfo *root, int level)
|
||||
{
|
||||
/*
|
||||
* OK, we can build a rel of the right level from this
|
||||
* pair of rels. Do so if there is at least one usable
|
||||
* join clause or a relevant join restriction.
|
||||
* pair of rels. Do so if there is at least one relevant
|
||||
* join clause or join order restriction.
|
||||
*/
|
||||
if (have_relevant_joinclause(root, old_rel, new_rel) ||
|
||||
have_join_order_restriction(root, old_rel, new_rel))
|
||||
|
@ -21,34 +21,46 @@
|
||||
|
||||
/*
|
||||
* have_relevant_joinclause
|
||||
* Detect whether there is a joinclause that can be used to join
|
||||
* Detect whether there is a joinclause that involves
|
||||
* the two given relations.
|
||||
*
|
||||
* Note: the joinclause does not have to be evaluatable with only these two
|
||||
* relations. This is intentional. For example consider
|
||||
* SELECT * FROM a, b, c WHERE a.x = (b.y + c.z)
|
||||
* If a is much larger than the other tables, it may be worthwhile to
|
||||
* cross-join b and c and then use an inner indexscan on a.x. Therefore
|
||||
* we should consider this joinclause as reason to join b to c, even though
|
||||
* it can't be applied at that join step.
|
||||
*/
|
||||
bool
|
||||
have_relevant_joinclause(PlannerInfo *root,
|
||||
RelOptInfo *rel1, RelOptInfo *rel2)
|
||||
{
|
||||
bool result = false;
|
||||
Relids join_relids;
|
||||
List *joininfo;
|
||||
Relids other_relids;
|
||||
ListCell *l;
|
||||
|
||||
join_relids = bms_union(rel1->relids, rel2->relids);
|
||||
|
||||
/*
|
||||
* We could scan either relation's joininfo list; may as well use the
|
||||
* shorter one.
|
||||
*/
|
||||
if (list_length(rel1->joininfo) <= list_length(rel2->joininfo))
|
||||
{
|
||||
joininfo = rel1->joininfo;
|
||||
other_relids = rel2->relids;
|
||||
}
|
||||
else
|
||||
{
|
||||
joininfo = rel2->joininfo;
|
||||
other_relids = rel1->relids;
|
||||
}
|
||||
|
||||
foreach(l, joininfo)
|
||||
{
|
||||
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
|
||||
|
||||
if (bms_is_subset(rinfo->required_relids, join_relids))
|
||||
if (bms_overlap(other_relids, rinfo->required_relids))
|
||||
{
|
||||
result = true;
|
||||
break;
|
||||
@ -62,8 +74,6 @@ have_relevant_joinclause(PlannerInfo *root,
|
||||
if (!result && rel1->has_eclass_joins && rel2->has_eclass_joins)
|
||||
result = have_relevant_eclass_joinclause(root, rel1, rel2);
|
||||
|
||||
bms_free(join_relids);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -2666,6 +2666,57 @@ select * from int4_tbl a full join int4_tbl b on false;
|
||||
-2147483647 |
|
||||
(10 rows)
|
||||
|
||||
--
|
||||
-- test for ability to use a cartesian join when necessary
|
||||
--
|
||||
explain (costs off)
|
||||
select * from
|
||||
tenk1 join int4_tbl on f1 = twothousand,
|
||||
int4(sin(1)) q1,
|
||||
int4(sin(0)) q2
|
||||
where q1 = thousand or q2 = thousand;
|
||||
QUERY PLAN
|
||||
-----------------------------------------------------------------------------
|
||||
Hash Join
|
||||
Hash Cond: (tenk1.twothousand = int4_tbl.f1)
|
||||
-> Nested Loop
|
||||
Join Filter: ((q1.q1 = tenk1.thousand) OR (q2.q2 = tenk1.thousand))
|
||||
-> Nested Loop
|
||||
-> Function Scan on q1
|
||||
-> Function Scan on q2
|
||||
-> Bitmap Heap Scan on tenk1
|
||||
Recheck Cond: ((q1.q1 = thousand) OR (q2.q2 = thousand))
|
||||
-> BitmapOr
|
||||
-> Bitmap Index Scan on tenk1_thous_tenthous
|
||||
Index Cond: (q1.q1 = thousand)
|
||||
-> Bitmap Index Scan on tenk1_thous_tenthous
|
||||
Index Cond: (q2.q2 = thousand)
|
||||
-> Hash
|
||||
-> Seq Scan on int4_tbl
|
||||
(16 rows)
|
||||
|
||||
explain (costs off)
|
||||
select * from
|
||||
tenk1 join int4_tbl on f1 = twothousand,
|
||||
int4(sin(1)) q1,
|
||||
int4(sin(0)) q2
|
||||
where thousand = (q1 + q2);
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------------
|
||||
Hash Join
|
||||
Hash Cond: (tenk1.twothousand = int4_tbl.f1)
|
||||
-> Nested Loop
|
||||
-> Nested Loop
|
||||
-> Function Scan on q1
|
||||
-> Function Scan on q2
|
||||
-> Bitmap Heap Scan on tenk1
|
||||
Recheck Cond: (thousand = (q1.q1 + q2.q2))
|
||||
-> Bitmap Index Scan on tenk1_thous_tenthous
|
||||
Index Cond: (thousand = (q1.q1 + q2.q2))
|
||||
-> Hash
|
||||
-> Seq Scan on int4_tbl
|
||||
(12 rows)
|
||||
|
||||
--
|
||||
-- test join removal
|
||||
--
|
||||
|
@ -689,6 +689,24 @@ order by 1,2;
|
||||
select * from int4_tbl a full join int4_tbl b on true;
|
||||
select * from int4_tbl a full join int4_tbl b on false;
|
||||
|
||||
--
|
||||
-- test for ability to use a cartesian join when necessary
|
||||
--
|
||||
|
||||
explain (costs off)
|
||||
select * from
|
||||
tenk1 join int4_tbl on f1 = twothousand,
|
||||
int4(sin(1)) q1,
|
||||
int4(sin(0)) q2
|
||||
where q1 = thousand or q2 = thousand;
|
||||
|
||||
explain (costs off)
|
||||
select * from
|
||||
tenk1 join int4_tbl on f1 = twothousand,
|
||||
int4(sin(1)) q1,
|
||||
int4(sin(0)) q2
|
||||
where thousand = (q1 + q2);
|
||||
|
||||
--
|
||||
-- test join removal
|
||||
--
|
||||
|
Loading…
Reference in New Issue
Block a user