From 95f4e59c3286671656aff7db45b322f14a7bb8cc Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Tue, 28 Jul 2015 13:20:39 -0400 Subject: [PATCH] Remove an unsafe Assert, and explain join_clause_is_movable_into() better. join_clause_is_movable_into() is approximate, in the sense that it might sometimes return "false" when actually it would be valid to push the given join clause down to the specified level. This is okay ... but there was an Assert in get_joinrel_parampathinfo() that's only safe if the answers are always exact. Comment out the Assert, and add a bunch of commentary to clarify what's going on. Per fuzz testing by Andreas Seltenreich. The added regression test is a pretty silly query, but it's based on his crasher example. Back-patch to 9.2 where the faulty logic was introduced. --- src/backend/optimizer/util/relnode.c | 9 +++++ src/backend/optimizer/util/restrictinfo.c | 39 ++++++++++++++---- src/test/regress/expected/join.out | 48 +++++++++++++++++++++++ src/test/regress/sql/join.sql | 26 ++++++++++++ 4 files changed, 115 insertions(+), 7 deletions(-) diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index be2ef3becf..68a93a1a5b 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -982,9 +982,18 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, { RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + /* + * In principle, join_clause_is_movable_into() should accept anything + * returned by generate_join_implied_equalities(); but because its + * analysis is only approximate, sometimes it doesn't. So we + * currently cannot use this Assert; instead just assume it's okay to + * apply the joinclause at this level. + */ +#ifdef NOT_USED Assert(join_clause_is_movable_into(rinfo, joinrel->relids, join_and_req)); +#endif if (!join_clause_is_movable_into(rinfo, outer_path->parent->relids, outer_and_req) && diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index e5f7836517..65499902f6 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -464,10 +464,9 @@ extract_actual_join_clauses(List *restrictinfo_list, * outer join, as that would change the results (rows would be suppressed * rather than being null-extended). * - * Also the target relation must not be in the clause's nullable_relids, i.e., - * there must not be an outer join below the clause that would null the Vars - * coming from the target relation. Otherwise the clause might give results - * different from what it would give at its normal semantic level. + * Also there must not be an outer join below the clause that would null the + * Vars coming from the target relation. Otherwise the clause might give + * results different from what it would give at its normal semantic level. * * Also, the join clause must not use any relations that have LATERAL * references to the target relation, since we could not put such rels on @@ -516,10 +515,31 @@ join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel) * not pushing the clause into its outer-join outer side, nor down into * a lower outer join's inner side. * + * The check about pushing a clause down into a lower outer join's inner side + * is only approximate; it sometimes returns "false" when actually it would + * be safe to use the clause here because we're still above the outer join + * in question. This is okay as long as the answers at different join levels + * are consistent: it just means we might sometimes fail to push a clause as + * far down as it could safely be pushed. It's unclear whether it would be + * worthwhile to do this more precisely. (But if it's ever fixed to be + * exactly accurate, there's an Assert in get_joinrel_parampathinfo() that + * should be re-enabled.) + * * There's no check here equivalent to join_clause_is_movable_to's test on * lateral_referencers. We assume the caller wouldn't be inquiring unless * it'd verified that the proposed outer rels don't have lateral references - * to the current rel(s). + * to the current rel(s). (If we are considering join paths with the outer + * rels on the outside and the current rels on the inside, then this should + * have been checked at the outset of such consideration; see join_is_legal + * and the path parameterization checks in joinpath.c.) On the other hand, + * in join_clause_is_movable_to we are asking whether the clause could be + * moved for some valid set of outer rels, so we don't have the benefit of + * relying on prior checks for lateral-reference validity. + * + * Note: if this returns true, it means that the clause could be moved to + * this join relation, but that doesn't mean that this is the lowest join + * it could be moved to. Caller may need to make additional calls to verify + * that this doesn't succeed on either of the inputs of a proposed join. * * Note: get_joinrel_parampathinfo depends on the fact that if * current_and_outer is NULL, this function will always return false @@ -534,7 +554,7 @@ join_clause_is_movable_into(RestrictInfo *rinfo, if (!bms_is_subset(rinfo->clause_relids, current_and_outer)) return false; - /* Clause must physically reference target rel(s) */ + /* Clause must physically reference at least one target rel */ if (!bms_overlap(currentrelids, rinfo->clause_relids)) return false; @@ -542,7 +562,12 @@ join_clause_is_movable_into(RestrictInfo *rinfo, if (bms_overlap(currentrelids, rinfo->outer_relids)) return false; - /* Target rel(s) must not be nullable below the clause */ + /* + * Target rel(s) must not be nullable below the clause. This is + * approximate, in the safe direction, because the current join might be + * above the join where the nulling would happen, in which case the clause + * would work correctly here. But we don't have enough info to be sure. + */ if (bms_overlap(currentrelids, rinfo->nullable_relids)) return false; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 4ce01cbcd5..1afd0c328b 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2218,6 +2218,54 @@ order by 1, 2; 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 123 (5 rows) +-- +-- regression test: check a case where join_clause_is_movable_into() gives +-- an imprecise result +-- +analyze pg_enum; +explain (costs off) +select anname, outname, enumtypid +from + (select pa.proname as anname, coalesce(po.proname, typname) as outname + from pg_type t + left join pg_proc po on po.oid = t.typoutput + join pg_proc pa on pa.oid = t.typanalyze) ss, + pg_enum, + pg_type t2 +where anname = enumlabel and outname = t2.typname and enumtypid = t2.oid; + QUERY PLAN +----------------------------------------------------------------------- + Nested Loop + Join Filter: (pg_enum.enumtypid = t2.oid) + -> Nested Loop Left Join + -> Hash Join + Hash Cond: ((t.typanalyze)::oid = pa.oid) + -> Seq Scan on pg_type t + -> Hash + -> Hash Join + Hash Cond: (pa.proname = pg_enum.enumlabel) + -> Seq Scan on pg_proc pa + -> Hash + -> Seq Scan on pg_enum + -> Index Scan using pg_proc_oid_index on pg_proc po + Index Cond: (oid = (t.typoutput)::oid) + -> Index Scan using pg_type_typname_nsp_index on pg_type t2 + Index Cond: (typname = COALESCE(po.proname, t.typname)) +(16 rows) + +select anname, outname, enumtypid +from + (select pa.proname as anname, coalesce(po.proname, typname) as outname + from pg_type t + left join pg_proc po on po.oid = t.typoutput + join pg_proc pa on pa.oid = t.typanalyze) ss, + pg_enum, + pg_type t2 +where anname = enumlabel and outname = t2.typname and enumtypid = t2.oid; + anname | outname | enumtypid +--------+---------+----------- +(0 rows) + -- -- Clean up -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 3a71dbf4df..d34cefac5a 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -377,6 +377,32 @@ select * from int8_tbl i1 left join (int8_tbl i2 join (select 123 as x) ss on i2.q1 = x) on i1.q2 = i2.q2 order by 1, 2; +-- +-- regression test: check a case where join_clause_is_movable_into() gives +-- an imprecise result +-- +analyze pg_enum; +explain (costs off) +select anname, outname, enumtypid +from + (select pa.proname as anname, coalesce(po.proname, typname) as outname + from pg_type t + left join pg_proc po on po.oid = t.typoutput + join pg_proc pa on pa.oid = t.typanalyze) ss, + pg_enum, + pg_type t2 +where anname = enumlabel and outname = t2.typname and enumtypid = t2.oid; + +select anname, outname, enumtypid +from + (select pa.proname as anname, coalesce(po.proname, typname) as outname + from pg_type t + left join pg_proc po on po.oid = t.typoutput + join pg_proc pa on pa.oid = t.typanalyze) ss, + pg_enum, + pg_type t2 +where anname = enumlabel and outname = t2.typname and enumtypid = t2.oid; + -- -- Clean up