Fix planning of non-strict equivalence clauses above outer joins.

If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable.  (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.)  The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence.  Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.

In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs.  This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places.  (This accounts for bug #7604 from Bill MacArthur.)

Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses.  Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.

Add regression tests illustrating these problems.  In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
This commit is contained in:
Tom Lane 2012-10-18 12:29:19 -04:00
parent 7bdaacfb1f
commit c29a91037d
8 changed files with 193 additions and 34 deletions

View File

@ -1427,6 +1427,7 @@ _outEquivalenceMember(StringInfo str, EquivalenceMember *node)
WRITE_NODE_FIELD(em_expr);
WRITE_BITMAPSET_FIELD(em_relids);
WRITE_BITMAPSET_FIELD(em_nullable_relids);
WRITE_BOOL_FIELD(em_is_const);
WRITE_BOOL_FIELD(em_is_child);
WRITE_OID_FIELD(em_datatype);

View File

@ -27,7 +27,7 @@
static EquivalenceMember *add_eq_member(EquivalenceClass *ec,
Expr *expr, Relids relids,
Expr *expr, Relids relids, Relids nullable_relids,
bool is_child, Oid datatype);
static void generate_base_implied_equalities_const(PlannerInfo *root,
EquivalenceClass *ec);
@ -97,7 +97,9 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
Expr *item1;
Expr *item2;
Relids item1_relids,
item2_relids;
item2_relids,
item1_nullable_relids,
item2_nullable_relids;
List *opfamilies;
EquivalenceClass *ec1,
*ec2;
@ -139,6 +141,12 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
return false; /* RHS is non-strict but not constant */
}
/* Calculate nullable-relid sets for each side of the clause */
item1_nullable_relids = bms_intersect(item1_relids,
restrictinfo->nullable_relids);
item2_nullable_relids = bms_intersect(item2_relids,
restrictinfo->nullable_relids);
/*
* We use the declared input types of the operator, not exprType() of the
* inputs, as the nominal datatypes for opfamily lookup. This presumes
@ -273,7 +281,8 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
else if (ec1)
{
/* Case 3: add item2 to ec1 */
em2 = add_eq_member(ec1, item2, item2_relids, false, item2_type);
em2 = add_eq_member(ec1, item2, item2_relids, item2_nullable_relids,
false, item2_type);
ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
ec1->ec_below_outer_join |= below_outer_join;
/* mark the RI as usable with this pair of EMs */
@ -283,7 +292,8 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
else if (ec2)
{
/* Case 3: add item1 to ec2 */
em1 = add_eq_member(ec2, item1, item1_relids, false, item1_type);
em1 = add_eq_member(ec2, item1, item1_relids, item1_nullable_relids,
false, item1_type);
ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo);
ec2->ec_below_outer_join |= below_outer_join;
/* mark the RI as usable with this pair of EMs */
@ -306,8 +316,10 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
ec->ec_broken = false;
ec->ec_sortref = 0;
ec->ec_merged = NULL;
em1 = add_eq_member(ec, item1, item1_relids, false, item1_type);
em2 = add_eq_member(ec, item2, item2_relids, false, item2_type);
em1 = add_eq_member(ec, item1, item1_relids, item1_nullable_relids,
false, item1_type);
em2 = add_eq_member(ec, item2, item2_relids, item2_nullable_relids,
false, item2_type);
root->eq_classes = lappend(root->eq_classes, ec);
@ -324,12 +336,13 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
*/
static EquivalenceMember *
add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
bool is_child, Oid datatype)
Relids nullable_relids, bool is_child, Oid datatype)
{
EquivalenceMember *em = makeNode(EquivalenceMember);
em->em_expr = expr;
em->em_relids = relids;
em->em_nullable_relids = nullable_relids;
em->em_is_const = false;
em->em_is_child = is_child;
em->em_datatype = datatype;
@ -443,7 +456,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
newec->ec_sortref = sortref;
newec->ec_merged = NULL;
newem = add_eq_member(newec, expr, pull_varnos((Node *) expr),
false, expr_datatype);
NULL, false, expr_datatype);
/*
* add_eq_member doesn't check for volatile functions, set-returning
@ -621,7 +634,9 @@ generate_base_implied_equalities_const(PlannerInfo *root,
}
process_implied_equality(root, eq_op,
cur_em->em_expr, const_em->em_expr,
ec->ec_relids,
bms_copy(ec->ec_relids),
bms_union(cur_em->em_nullable_relids,
const_em->em_nullable_relids),
ec->ec_below_outer_join,
cur_em->em_is_const);
}
@ -676,7 +691,9 @@ generate_base_implied_equalities_no_const(PlannerInfo *root,
}
process_implied_equality(root, eq_op,
prev_em->em_expr, cur_em->em_expr,
ec->ec_relids,
bms_copy(ec->ec_relids),
bms_union(prev_em->em_nullable_relids,
cur_em->em_nullable_relids),
ec->ec_below_outer_join,
false);
}
@ -1077,7 +1094,9 @@ create_join_clause(PlannerInfo *root,
leftem->em_expr,
rightem->em_expr,
bms_union(leftem->em_relids,
rightem->em_relids));
rightem->em_relids),
bms_union(leftem->em_nullable_relids,
rightem->em_nullable_relids));
/* Mark the clause as redundant, or not */
rinfo->parent_ec = parent_ec;
@ -1295,7 +1314,8 @@ reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo,
left_type,
right_type,
inner_datatype;
Relids inner_relids;
Relids inner_relids,
inner_nullable_relids;
ListCell *lc1;
Assert(is_opclause(rinfo->clause));
@ -1321,6 +1341,8 @@ reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo,
inner_datatype = left_type;
inner_relids = rinfo->left_relids;
}
inner_nullable_relids = bms_intersect(inner_relids,
rinfo->nullable_relids);
/* Scan EquivalenceClasses for a match to outervar */
foreach(lc1, root->eq_classes)
@ -1375,7 +1397,8 @@ reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo,
newrinfo = build_implied_join_equality(eq_op,
innervar,
cur_em->em_expr,
inner_relids);
bms_copy(inner_relids),
bms_copy(inner_nullable_relids));
if (process_equivalence(root, newrinfo, true))
match = true;
}
@ -1408,7 +1431,9 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo)
left_type,
right_type;
Relids left_relids,
right_relids;
right_relids,
left_nullable_relids,
right_nullable_relids;
ListCell *lc1;
/* Can't use an outerjoin_delayed clause here */
@ -1423,6 +1448,10 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo)
rightvar = (Expr *) get_rightop(rinfo->clause);
left_relids = rinfo->left_relids;
right_relids = rinfo->right_relids;
left_nullable_relids = bms_intersect(left_relids,
rinfo->nullable_relids);
right_nullable_relids = bms_intersect(right_relids,
rinfo->nullable_relids);
foreach(lc1, root->eq_classes)
{
@ -1504,7 +1533,8 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo)
newrinfo = build_implied_join_equality(eq_op,
leftvar,
cur_em->em_expr,
left_relids);
bms_copy(left_relids),
bms_copy(left_nullable_relids));
if (process_equivalence(root, newrinfo, true))
matchleft = true;
}
@ -1516,7 +1546,8 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo)
newrinfo = build_implied_join_equality(eq_op,
rightvar,
cur_em->em_expr,
right_relids);
bms_copy(right_relids),
bms_copy(right_nullable_relids));
if (process_equivalence(root, newrinfo, true))
matchright = true;
}
@ -1636,11 +1667,27 @@ add_child_rel_equivalences(PlannerInfo *root,
{
/* Yes, generate transformed child version */
Expr *child_expr;
Relids new_nullable_relids;
child_expr = (Expr *)
adjust_appendrel_attrs((Node *) cur_em->em_expr,
appinfo);
(void) add_eq_member(cur_ec, child_expr, child_rel->relids,
/*
* Must translate nullable_relids. Note this code assumes
* parent and child relids are singletons.
*/
new_nullable_relids = cur_em->em_nullable_relids;
if (bms_overlap(new_nullable_relids, parent_rel->relids))
{
new_nullable_relids = bms_difference(new_nullable_relids,
parent_rel->relids);
new_nullable_relids = bms_add_members(new_nullable_relids,
child_rel->relids);
}
(void) add_eq_member(cur_ec, child_expr,
child_rel->relids, new_nullable_relids,
true, cur_em->em_datatype);
}
}

View File

@ -49,9 +49,12 @@ static void distribute_qual_to_rels(PlannerInfo *root, Node *clause,
bool below_outer_join,
Relids qualscope,
Relids ojscope,
Relids outerjoin_nonnullable);
Relids outerjoin_nonnullable,
Relids deduced_nullable_relids);
static bool check_outerjoin_delay(PlannerInfo *root, Relids *relids_p,
Relids *nullable_relids_p, bool is_pushed_down);
static bool check_equivalence_delay(PlannerInfo *root,
RestrictInfo *restrictinfo);
static void check_mergejoinable(RestrictInfo *restrictinfo);
static void check_hashjoinable(RestrictInfo *restrictinfo);
@ -343,7 +346,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
foreach(l, (List *) f->quals)
distribute_qual_to_rels(root, (Node *) lfirst(l),
false, below_outer_join,
*qualscope, NULL, NULL);
*qualscope, NULL, NULL, NULL);
}
else if (IsA(jtnode, JoinExpr))
{
@ -453,7 +456,8 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
foreach(qual, (List *) j->quals)
distribute_qual_to_rels(root, (Node *) lfirst(qual),
false, below_outer_join,
*qualscope, ojscope, nonnullable_rels);
*qualscope,
ojscope, nonnullable_rels, NULL);
/* Now we can add the OuterJoinInfo to oj_info_list */
if (ojinfo)
@ -713,6 +717,8 @@ make_outerjoininfo(PlannerInfo *root,
* baserels appearing on the outer (nonnullable) side of the join
* (for FULL JOIN this includes both sides of the join, and must in fact
* equal qualscope)
* 'deduced_nullable_relids': if is_deduced is TRUE, the nullable relids to
* impute to the clause; otherwise NULL
*
* 'qualscope' identifies what level of JOIN the qual came from syntactically.
* 'ojscope' is needed if we decide to force the qual up to the outer-join
@ -724,7 +730,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
bool below_outer_join,
Relids qualscope,
Relids ojscope,
Relids outerjoin_nonnullable)
Relids outerjoin_nonnullable,
Relids deduced_nullable_relids)
{
Relids relids;
bool is_pushed_down;
@ -832,12 +839,13 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
* If the qual came from implied-equality deduction, it should not be
* outerjoin-delayed, else deducer blew it. But we can't check this
* because the ojinfo list may now contain OJs above where the qual
* belongs.
* belongs. For the same reason, we must rely on caller to supply the
* correct nullable_relids set.
*/
Assert(!ojscope);
is_pushed_down = true;
outerjoin_delayed = false;
nullable_relids = NULL;
nullable_relids = deduced_nullable_relids;
/* Don't feed it back for more deductions */
maybe_equivalence = false;
maybe_outer_join = false;
@ -990,7 +998,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
{
if (maybe_equivalence)
{
if (process_equivalence(root, restrictinfo, below_outer_join))
if (check_equivalence_delay(root, restrictinfo) &&
process_equivalence(root, restrictinfo, below_outer_join))
return;
/* EC rejected it, so pass to distribute_restrictinfo_to_rels */
}
@ -1151,6 +1160,44 @@ check_outerjoin_delay(PlannerInfo *root,
return outerjoin_delayed;
}
/*
* check_equivalence_delay
* Detect whether a potential equivalence clause is rendered unsafe
* by outer-join-delay considerations. Return TRUE if it's safe.
*
* The initial tests in distribute_qual_to_rels will consider a mergejoinable
* clause to be a potential equivalence clause if it is not outerjoin_delayed.
* But since the point of equivalence processing is that we will recombine the
* two sides of the clause with others, we have to check that each side
* satisfies the not-outerjoin_delayed condition on its own; otherwise it might
* not be safe to evaluate everywhere we could place a derived equivalence
* condition.
*/
static bool
check_equivalence_delay(PlannerInfo *root,
RestrictInfo *restrictinfo)
{
Relids relids;
Relids nullable_relids;
/* fast path if no special joins */
if (root->oj_info_list == NIL)
return true;
/* must copy restrictinfo's relids to avoid changing it */
relids = bms_copy(restrictinfo->left_relids);
/* check left side does not need delay */
if (check_outerjoin_delay(root, &relids, &nullable_relids, true))
return false;
/* and similarly for the right side */
relids = bms_copy(restrictinfo->right_relids);
if (check_outerjoin_delay(root, &relids, &nullable_relids, true))
return false;
return true;
}
/*
* distribute_restrictinfo_to_rels
* Push a completed RestrictInfo into the proper restriction or join
@ -1223,11 +1270,20 @@ distribute_restrictinfo_to_rels(PlannerInfo *root,
* variable-free. Otherwise the qual is applied at the lowest join level
* that provides all its variables.
*
* "nullable_relids" is the set of relids used in the expressions that are
* potentially nullable below the expressions. (This has to be supplied by
* caller because this function is used after deconstruct_jointree, so we
* don't have knowledge of where the clause items came from.)
*
* "both_const" indicates whether both items are known pseudo-constant;
* in this case it is worth applying eval_const_expressions() in case we
* can produce constant TRUE or constant FALSE. (Otherwise it's not,
* because the expressions went through eval_const_expressions already.)
*
* Note: this function will copy item1 and item2, but it is caller's
* responsibility to make sure that the Relids parameters are fresh copies
* not shared with other uses.
*
* This is currently used only when an EquivalenceClass is found to
* contain pseudoconstants. See path/pathkeys.c for more details.
*/
@ -1237,6 +1293,7 @@ process_implied_equality(PlannerInfo *root,
Expr *item1,
Expr *item2,
Relids qualscope,
Relids nullable_relids,
bool below_outer_join,
bool both_const)
{
@ -1268,15 +1325,12 @@ process_implied_equality(PlannerInfo *root,
}
}
/* Make a copy of qualscope to avoid problems if source EC changes */
qualscope = bms_copy(qualscope);
/*
* Push the new clause into all the appropriate restrictinfo lists.
*/
distribute_qual_to_rels(root, (Node *) clause,
true, below_outer_join,
qualscope, NULL, NULL);
qualscope, NULL, NULL, nullable_relids);
}
/*
@ -1284,12 +1338,17 @@ process_implied_equality(PlannerInfo *root,
*
* This overlaps the functionality of process_implied_equality(), but we
* must return the RestrictInfo, not push it into the joininfo tree.
*
* Note: this function will copy item1 and item2, but it is caller's
* responsibility to make sure that the Relids parameters are fresh copies
* not shared with other uses.
*/
RestrictInfo *
build_implied_join_equality(Oid opno,
Expr *item1,
Expr *item2,
Relids qualscope)
Relids qualscope,
Relids nullable_relids)
{
RestrictInfo *restrictinfo;
Expr *clause;
@ -1304,9 +1363,6 @@ build_implied_join_equality(Oid opno,
(Expr *) copyObject(item1),
(Expr *) copyObject(item2));
/* Make a copy of qualscope to avoid problems if source EC changes */
qualscope = bms_copy(qualscope);
/*
* Build the RestrictInfo node itself.
*/
@ -1315,7 +1371,7 @@ build_implied_join_equality(Oid opno,
false, /* outerjoin_delayed */
false, /* pseudoconstant */
qualscope, /* required_relids */
NULL); /* nullable_relids */
nullable_relids); /* nullable_relids */
/* Set mergejoinability info always, and hashjoinability if enabled */
check_mergejoinable(restrictinfo);

View File

@ -509,6 +509,7 @@ typedef struct EquivalenceMember
Expr *em_expr; /* the expression represented */
Relids em_relids; /* all relids appearing in em_expr */
Relids em_nullable_relids; /* nullable by lower outer joins */
bool em_is_const; /* expression is pseudoconstant? */
bool em_is_child; /* derived version for a child relation? */
Oid em_datatype; /* the "nominal type" used by the opfamily */

View File

@ -82,12 +82,14 @@ extern void process_implied_equality(PlannerInfo *root,
Expr *item1,
Expr *item2,
Relids qualscope,
Relids nullable_relids,
bool below_outer_join,
bool both_const);
extern RestrictInfo *build_implied_join_equality(Oid opno,
Expr *item1,
Expr *item2,
Relids qualscope);
Relids qualscope,
Relids nullable_relids);
/*
* prototypes for plan/setrefs.c

View File

@ -2129,6 +2129,7 @@ on (x1 = xx1) where (xx2 is not null);
-- regression test: check for bug with propagation of implied equality
-- to outside an IN
--
analyze tenk1; -- ensure we get consistent plans here
select count(*) from tenk1 a where unique1 in
(select unique1 from tenk1 b join tenk1 c using (unique1)
where b.unique2 = 42);
@ -2348,3 +2349,21 @@ select * from a left join b on i = x and i = y and x = i;
(0 rows)
rollback;
--
-- test handling of potential equivalence clauses above outer joins
--
select q1, unique2, thousand, hundred
from int8_tbl a left join tenk1 b on q1 = unique2
where coalesce(thousand,123) = q1 and q1 = coalesce(hundred,123);
q1 | unique2 | thousand | hundred
----+---------+----------+---------
(0 rows)
select f1, unique2, case when unique2 is null then f1 else 0 end
from int4_tbl a left join tenk1 b on f1 = unique2
where (case when unique2 is null then f1 else 0 end) = 0;
f1 | unique2 | case
----+---------+------
0 | 0 | 0
(1 row)

View File

@ -2129,6 +2129,7 @@ on (x1 = xx1) where (xx2 is not null);
-- regression test: check for bug with propagation of implied equality
-- to outside an IN
--
analyze tenk1; -- ensure we get consistent plans here
select count(*) from tenk1 a where unique1 in
(select unique1 from tenk1 b join tenk1 c using (unique1)
where b.unique2 = 42);
@ -2348,3 +2349,21 @@ select * from a left join b on i = x and i = y and x = i;
(0 rows)
rollback;
--
-- test handling of potential equivalence clauses above outer joins
--
select q1, unique2, thousand, hundred
from int8_tbl a left join tenk1 b on q1 = unique2
where coalesce(thousand,123) = q1 and q1 = coalesce(hundred,123);
q1 | unique2 | thousand | hundred
----+---------+----------+---------
(0 rows)
select f1, unique2, case when unique2 is null then f1 else 0 end
from int4_tbl a left join tenk1 b on f1 = unique2
where (case when unique2 is null then f1 else 0 end) = 0;
f1 | unique2 | case
----+---------+------
0 | 0 | 0
(1 row)

View File

@ -330,6 +330,8 @@ on (x1 = xx1) where (xx2 is not null);
-- regression test: check for bug with propagation of implied equality
-- to outside an IN
--
analyze tenk1; -- ensure we get consistent plans here
select count(*) from tenk1 a where unique1 in
(select unique1 from tenk1 b join tenk1 c using (unique1)
where b.unique2 = 42);
@ -512,3 +514,15 @@ create temp table b (x integer, y integer);
select * from a left join b on i = x and i = y and x = i;
rollback;
--
-- test handling of potential equivalence clauses above outer joins
--
select q1, unique2, thousand, hundred
from int8_tbl a left join tenk1 b on q1 = unique2
where coalesce(thousand,123) = q1 and q1 = coalesce(hundred,123);
select f1, unique2, case when unique2 is null then f1 else 0 end
from int4_tbl a left join tenk1 b on f1 = unique2
where (case when unique2 is null then f1 else 0 end) = 0;