From f01313bc0d4c293d88c2619084b1fc41825f2cf8 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 27 Feb 2009 00:06:27 +0000 Subject: [PATCH] Improve create_unique_path to not be fooled by unrelated clauses that happen to be syntactically part of a semijoin clause. For example given WHERE EXISTS(SELECT ... WHERE upper.var = lower.var AND some-condition) where some-condition is just a restriction on the lower relation, we can use unique-ification on lower.var after having applied some-condition within the scan on lower. --- src/backend/optimizer/util/pathnode.c | 68 +++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 8 deletions(-) diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 5ac0b6c260..28b2828c2d 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.149 2009/01/01 17:23:44 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.150 2009/02/27 00:06:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -765,12 +765,26 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, */ oldcontext = MemoryContextSwitchTo(root->planner_cxt); - /* + /*---------- * Look to see whether the semijoin's join quals consist of AND'ed * equality operators, with (only) RHS variables on only one side of * each one. If so, we can figure out how to enforce uniqueness for * the RHS. * + * Note that the input join_quals list is the list of quals that are + * *syntactically* associated with the semijoin, which in practice means + * the synthesized comparison list for an IN or the WHERE of an EXISTS. + * Particularly in the latter case, it might contain clauses that aren't + * *semantically* associated with the join, but refer to just one side or + * the other. We can ignore such clauses here, as they will just drop + * down to be processed within one side or the other. (It is okay to + * consider only the syntactically-associated clauses here because for a + * semijoin, no higher-level quals could refer to the RHS, and so there + * can be no other quals that are semantically associated with this join. + * We do things this way because it is useful to be able to run this test + * before we have extracted the list of quals that are actually + * semantically associated with the particular join.) + * * Note that the in_operators list consists of the joinqual operators * themselves (but commuted if needed to put the RHS value on the right). * These could be cross-type operators, in which case the operator @@ -778,6 +792,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, * We assume here that that operator will be available from the btree * or hash opclass when the time comes ... if not, create_unique_plan() * will fail. + *---------- */ in_operators = NIL; uniq_exprs = NIL; @@ -791,19 +806,52 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, Node *right_expr; Relids left_varnos; Relids right_varnos; + Relids all_varnos; - /* must be binary opclause... */ - if (!IsA(op, OpExpr)) - goto no_unique_path; - if (list_length(op->args) != 2) + /* Is it a binary opclause? */ + if (!IsA(op, OpExpr) || + list_length(op->args) != 2) + { + /* No, but does it reference both sides? */ + all_varnos = pull_varnos((Node *) op); + if (!bms_overlap(all_varnos, sjinfo->syn_righthand) || + bms_is_subset(all_varnos, sjinfo->syn_righthand)) + { + /* + * Clause refers to only one rel, so ignore it --- unless it + * contains volatile functions, in which case we'd better + * punt. + */ + if (contain_volatile_functions((Node *) op)) + goto no_unique_path; + continue; + } + /* Non-operator clause referencing both sides, must punt */ goto no_unique_path; + } + + /* Extract data from binary opclause */ opno = op->opno; left_expr = linitial(op->args); right_expr = lsecond(op->args); - - /* check rel membership of arguments */ left_varnos = pull_varnos(left_expr); right_varnos = pull_varnos(right_expr); + all_varnos = bms_union(left_varnos, right_varnos); + + /* Does it reference both sides? */ + if (!bms_overlap(all_varnos, sjinfo->syn_righthand) || + bms_is_subset(all_varnos, sjinfo->syn_righthand)) + { + /* + * Clause refers to only one rel, so ignore it --- unless it + * contains volatile functions, in which case we'd better punt. + */ + if (contain_volatile_functions((Node *) op)) + goto no_unique_path; + continue; + } + + /* check rel membership of arguments */ if (!bms_is_empty(right_varnos) && bms_is_subset(right_varnos, sjinfo->syn_righthand) && !bms_overlap(left_varnos, sjinfo->syn_righthand)) @@ -845,6 +893,10 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, uniq_exprs = lappend(uniq_exprs, copyObject(right_expr)); } + /* Punt if we didn't find at least one column to unique-ify */ + if (uniq_exprs == NIL) + goto no_unique_path; + /* * The expressions we'd need to unique-ify mustn't be volatile. */