From 4e97631e6a9d31acfde1057ad966e73f9d28c28d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 14 Sep 2010 23:15:29 +0000 Subject: [PATCH] Fix join-removal logic for pseudoconstant and outerjoin-delayed quals. In these cases a qual can get marked with the removable rel in its required_relids, but this is just to schedule its evaluation correctly, not because it really depends on the rel. We were assuming that, in effect, we could throw away *all* quals so marked, which is nonsense. Tighten up the logic to be a little more paranoid about which quals belong to the outer join being considered for removal, and arrange for all quals that don't belong to be updated so they will still get evaluated correctly. Also fix another problem that happened to be exposed by this test case, which was that make_join_rel() was failing to notice some cases where a constant-false qual could be used to prove a join relation empty. If it's a pushed-down constant false, then the relation is empty even if it's an outer join, because the qual applies after the outer join expansion. Per report from Nathan Grange. Back-patch into 9.0. --- src/backend/optimizer/path/joinrels.c | 40 ++++++++--- src/backend/optimizer/plan/analyzejoins.c | 83 ++++++++++++++++++----- src/backend/optimizer/util/joininfo.c | 36 +++++++++- src/include/optimizer/joininfo.h | 5 +- src/test/regress/expected/join.out | 37 ++++++++++ src/test/regress/sql/join.sql | 17 +++++ 6 files changed, 187 insertions(+), 31 deletions(-) diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index e781ad5c1a..ddecff5533 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.105 2010/02/26 02:00:45 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.106 2010/09/14 23:15:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -29,7 +29,8 @@ static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel); static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel); static bool is_dummy_rel(RelOptInfo *rel); static void mark_dummy_rel(RelOptInfo *rel); -static bool restriction_is_constant_false(List *restrictlist); +static bool restriction_is_constant_false(List *restrictlist, + bool only_pushed_down); /* @@ -603,7 +604,10 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) * * Also, a provably constant-false join restriction typically means that * we can skip evaluating one or both sides of the join. We do this by - * marking the appropriate rel as dummy. + * marking the appropriate rel as dummy. For outer joins, a constant-false + * restriction that is pushed down still means the whole join is dummy, + * while a non-pushed-down one means that no inner rows will join so we + * can treat the inner rel as dummy. * * We need only consider the jointypes that appear in join_info_list, plus * JOIN_INNER. @@ -612,7 +616,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) { case JOIN_INNER: if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || - restriction_is_constant_false(restrictlist)) + restriction_is_constant_false(restrictlist, false)) { mark_dummy_rel(joinrel); break; @@ -625,12 +629,13 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) restrictlist); break; case JOIN_LEFT: - if (is_dummy_rel(rel1)) + if (is_dummy_rel(rel1) || + restriction_is_constant_false(restrictlist, true)) { mark_dummy_rel(joinrel); break; } - if (restriction_is_constant_false(restrictlist) && + if (restriction_is_constant_false(restrictlist, false) && bms_is_subset(rel2->relids, sjinfo->syn_righthand)) mark_dummy_rel(rel2); add_paths_to_joinrel(root, joinrel, rel1, rel2, @@ -641,7 +646,8 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) restrictlist); break; case JOIN_FULL: - if (is_dummy_rel(rel1) && is_dummy_rel(rel2)) + if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) || + restriction_is_constant_false(restrictlist, true)) { mark_dummy_rel(joinrel); break; @@ -665,7 +671,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) bms_is_subset(sjinfo->min_righthand, rel2->relids)) { if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || - restriction_is_constant_false(restrictlist)) + restriction_is_constant_false(restrictlist, false)) { mark_dummy_rel(joinrel); break; @@ -687,6 +693,12 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) create_unique_path(root, rel2, rel2->cheapest_total_path, sjinfo) != NULL) { + if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || + restriction_is_constant_false(restrictlist, false)) + { + mark_dummy_rel(joinrel); + break; + } add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER, sjinfo, restrictlist); @@ -696,12 +708,13 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) } break; case JOIN_ANTI: - if (is_dummy_rel(rel1)) + if (is_dummy_rel(rel1) || + restriction_is_constant_false(restrictlist, true)) { mark_dummy_rel(joinrel); break; } - if (restriction_is_constant_false(restrictlist) && + if (restriction_is_constant_false(restrictlist, false) && bms_is_subset(rel2->relids, sjinfo->syn_righthand)) mark_dummy_rel(rel2); add_paths_to_joinrel(root, joinrel, rel1, rel2, @@ -947,9 +960,11 @@ mark_dummy_rel(RelOptInfo *rel) * join situations this will leave us computing cartesian products only to * decide there's no match for an outer row, which is pretty stupid. So, * we need to detect the case. + * + * If only_pushed_down is TRUE, then consider only pushed-down quals. */ static bool -restriction_is_constant_false(List *restrictlist) +restriction_is_constant_false(List *restrictlist, bool only_pushed_down) { ListCell *lc; @@ -964,6 +979,9 @@ restriction_is_constant_false(List *restrictlist) RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); Assert(IsA(rinfo, RestrictInfo)); + if (only_pushed_down && !rinfo->is_pushed_down) + continue; + if (rinfo->clause && IsA(rinfo->clause, Const)) { Const *con = (Const *) rinfo->clause; diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 0d90d072ea..c0860beb3c 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -16,19 +16,21 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/analyzejoins.c,v 1.3 2010/07/06 19:18:56 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/analyzejoins.c,v 1.4 2010/09/14 23:15:29 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include "optimizer/joininfo.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" /* local functions */ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); -static void remove_rel_from_query(PlannerInfo *root, int relid); +static void remove_rel_from_query(PlannerInfo *root, int relid, + Relids joinrelids); static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); @@ -67,7 +69,9 @@ restart: */ innerrelid = bms_singleton_member(sjinfo->min_righthand); - remove_rel_from_query(root, innerrelid); + remove_rel_from_query(root, innerrelid, + bms_union(sjinfo->min_lefthand, + sjinfo->min_righthand)); /* We verify that exactly one reference gets removed from joinlist */ nremoved = 0; @@ -216,19 +220,25 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); - /* Ignore clauses not pertinent to this join */ - if (!bms_is_subset(restrictinfo->required_relids, joinrelids)) - continue; - /* - * If we find a pushed-down clause, it must have come from above the - * outer join and it must contain references to the inner rel. (If it - * had only outer-rel variables, it'd have been pushed down into the - * outer rel.) Therefore, we can conclude that join removal is unsafe - * without any examination of the clause contents. + * If it's not a join clause for this outer join, we can't use it. + * Note that if the clause is pushed-down, then it is logically from + * above the outer join, even if it references no other rels (it might + * be from WHERE, for example). */ - if (restrictinfo->is_pushed_down) - return false; + if (restrictinfo->is_pushed_down || + !bms_equal(restrictinfo->required_relids, joinrelids)) + { + /* + * If such a clause actually references the inner rel then + * join removal has to be disallowed. We have to check this + * despite the previous attr_needed checks because of the + * possibility of pushed-down clauses referencing the rel. + */ + if (bms_is_member(innerrelid, restrictinfo->clause_relids)) + return false; + continue; /* else, ignore; not useful here */ + } /* Ignore if it's not a mergejoinable clause */ if (!restrictinfo->can_join || @@ -299,14 +309,14 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) * We are not terribly thorough here. We must make sure that the rel is * no longer treated as a baserel, and that attributes of other baserels * are no longer marked as being needed at joins involving this rel. - * In particular, we don't bother removing join quals involving the rel from - * the joininfo lists; they'll just get ignored, since we will never form a - * join relation at which they could be evaluated. + * Also, join quals involving the rel have to be removed from the joininfo + * lists, but only if they belong to the outer join identified by joinrelids. */ static void -remove_rel_from_query(PlannerInfo *root, int relid) +remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids) { RelOptInfo *rel = find_base_rel(root, relid); + List *joininfos; Index rti; ListCell *l; @@ -379,6 +389,43 @@ remove_rel_from_query(PlannerInfo *root, int relid) phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid); } + + /* + * Remove any joinquals referencing the rel from the joininfo lists. + * + * In some cases, a joinqual has to be put back after deleting its + * reference to the target rel. This can occur for pseudoconstant and + * outerjoin-delayed quals, which can get marked as requiring the rel in + * order to force them to be evaluated at or above the join. We can't + * just discard them, though. Only quals that logically belonged to the + * outer join being discarded should be removed from the query. + * + * We must make a copy of the rel's old joininfo list before starting the + * loop, because otherwise remove_join_clause_from_rels would destroy the + * list while we're scanning it. + */ + joininfos = list_copy(rel->joininfo); + foreach(l, joininfos) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + + remove_join_clause_from_rels(root, rinfo, rinfo->required_relids); + + if (rinfo->is_pushed_down || + !bms_equal(rinfo->required_relids, joinrelids)) + { + /* Recheck that qual doesn't actually reference the target rel */ + Assert(!bms_is_member(relid, rinfo->clause_relids)); + /* + * The required_relids probably aren't shared with anything else, + * but let's copy them just to be sure. + */ + rinfo->required_relids = bms_copy(rinfo->required_relids); + rinfo->required_relids = bms_del_member(rinfo->required_relids, + relid); + distribute_restrictinfo_to_rels(root, rinfo); + } + } } /* diff --git a/src/backend/optimizer/util/joininfo.c b/src/backend/optimizer/util/joininfo.c index b03045d657..eccff14791 100644 --- a/src/backend/optimizer/util/joininfo.c +++ b/src/backend/optimizer/util/joininfo.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/joininfo.c,v 1.52 2010/01/02 16:57:48 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/joininfo.c,v 1.53 2010/09/14 23:15:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -98,3 +98,37 @@ add_join_clause_to_rels(PlannerInfo *root, } bms_free(tmprelids); } + +/* + * remove_join_clause_from_rels + * Delete 'restrictinfo' from all the joininfo lists it is in + * + * This reverses the effect of add_join_clause_to_rels. It's used when we + * discover that a relation need not be joined at all. + * + * 'restrictinfo' describes the join clause + * 'join_relids' is the list of relations participating in the join clause + * (there must be more than one) + */ +void +remove_join_clause_from_rels(PlannerInfo *root, + RestrictInfo *restrictinfo, + Relids join_relids) +{ + Relids tmprelids; + int cur_relid; + + tmprelids = bms_copy(join_relids); + while ((cur_relid = bms_first_member(tmprelids)) >= 0) + { + RelOptInfo *rel = find_base_rel(root, cur_relid); + + /* + * Remove the restrictinfo from the list. Pointer comparison is + * sufficient. + */ + Assert(list_member_ptr(rel->joininfo, restrictinfo)); + rel->joininfo = list_delete_ptr(rel->joininfo, restrictinfo); + } + bms_free(tmprelids); +} diff --git a/src/include/optimizer/joininfo.h b/src/include/optimizer/joininfo.h index 58eeb8c942..0c4c8e172a 100644 --- a/src/include/optimizer/joininfo.h +++ b/src/include/optimizer/joininfo.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/joininfo.h,v 1.38 2010/01/02 16:58:07 momjian Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/joininfo.h,v 1.39 2010/09/14 23:15:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,5 +23,8 @@ extern bool have_relevant_joinclause(PlannerInfo *root, extern void add_join_clause_to_rels(PlannerInfo *root, RestrictInfo *restrictinfo, Relids join_relids); +extern void remove_join_clause_from_rels(PlannerInfo *root, + RestrictInfo *restrictinfo, + Relids join_relids); #endif /* JOININFO_H */ diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 6dfc710be0..5299a10ac4 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2586,6 +2586,43 @@ explain (costs off) -> Seq Scan on child c (5 rows) +-- check for a 9.0rc1 bug: join removal breaks pseudoconstant qual handling +select p.* from + parent p left join child c on (p.k = c.k) + where p.k = 1 and p.k = 2; + k | pd +---+---- +(0 rows) + +explain (costs off) +select p.* from + parent p left join child c on (p.k = c.k) + where p.k = 1 and p.k = 2; + QUERY PLAN +------------------------------------------------ + Result + One-Time Filter: false + -> Index Scan using parent_pkey on parent p + Index Cond: (k = 1) +(4 rows) + +select p.* from + (parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k + where p.k = 1 and p.k = 2; + k | pd +---+---- +(0 rows) + +explain (costs off) +select p.* from + (parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k + where p.k = 1 and p.k = 2; + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) + -- bug 5255: this is not optimizable by join removal begin; CREATE TEMP TABLE a (id int PRIMARY KEY); diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 8657636757..ea237f977f 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -615,6 +615,23 @@ explain (costs off) left join (select c.*, true as linked from child c) as ss on (p.k = ss.k); +-- check for a 9.0rc1 bug: join removal breaks pseudoconstant qual handling +select p.* from + parent p left join child c on (p.k = c.k) + where p.k = 1 and p.k = 2; +explain (costs off) +select p.* from + parent p left join child c on (p.k = c.k) + where p.k = 1 and p.k = 2; + +select p.* from + (parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k + where p.k = 1 and p.k = 2; +explain (costs off) +select p.* from + (parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k + where p.k = 1 and p.k = 2; + -- bug 5255: this is not optimizable by join removal begin;