diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index ba33da829a..0b9c581982 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.200 2008/10/21 20:42:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.201 2008/11/22 22:47:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2481,7 +2481,7 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, break; case JOIN_SEMI: nrows = outer_rel->rows * jselec; - nrows *= pselec; + /* pselec not used */ break; case JOIN_ANTI: nrows = outer_rel->rows * (1.0 - jselec); diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 297c50389f..5d1e315492 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.233 2008/09/12 14:56:13 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.234 2008/11/22 22:47:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1647,10 +1647,10 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, switch (jointype) { case JOIN_INNER: + case JOIN_SEMI: isouterjoin = false; break; case JOIN_LEFT: - case JOIN_SEMI: case JOIN_ANTI: isouterjoin = true; break; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index eaf0ffa427..fe70df49ad 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.118 2008/10/04 21:56:53 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.119 2008/11/22 22:47:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -744,12 +744,12 @@ hash_inner_and_outer(PlannerInfo *root, switch (jointype) { case JOIN_INNER: + case JOIN_SEMI: case JOIN_UNIQUE_OUTER: case JOIN_UNIQUE_INNER: isouterjoin = false; break; case JOIN_LEFT: - case JOIN_SEMI: case JOIN_ANTI: isouterjoin = true; break; diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 9ac0cc9743..5f49ee7e29 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.94 2008/08/17 19:40:11 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.95 2008/11/22 22:47:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -419,6 +419,27 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, match_sjinfo = sjinfo; reversed = true; } + else if (sjinfo->jointype == JOIN_SEMI && + bms_equal(sjinfo->syn_righthand, rel2->relids)) + { + /* + * For a semijoin, we can join the RHS to anything else by + * unique-ifying the RHS. + */ + if (match_sjinfo) + return false; /* invalid join path */ + match_sjinfo = sjinfo; + reversed = false; + } + else if (sjinfo->jointype == JOIN_SEMI && + bms_equal(sjinfo->syn_righthand, rel1->relids)) + { + /* Reversed semijoin case */ + if (match_sjinfo) + return false; /* invalid join path */ + match_sjinfo = sjinfo; + reversed = true; + } else { /*---------- @@ -444,14 +465,24 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, * We assume that make_outerjoininfo() set things up correctly * so that we'll only match to some SJ if the join is valid. * Set flag here to check at bottom of loop. + * + * For a semijoin, assume it's okay if either side fully contains + * the RHS (per the unique-ification case above). *---------- */ - if (bms_overlap(rel1->relids, sjinfo->min_righthand) && + if (sjinfo->jointype != JOIN_SEMI && + bms_overlap(rel1->relids, sjinfo->min_righthand) && bms_overlap(rel2->relids, sjinfo->min_righthand)) { /* seems OK */ Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand)); } + else if (sjinfo->jointype == JOIN_SEMI && + (bms_is_subset(sjinfo->syn_righthand, rel1->relids) || + bms_is_subset(sjinfo->syn_righthand, rel2->relids))) + { + /* seems OK */ + } else is_valid_inner = false; } @@ -612,15 +643,23 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) restrictlist); break; case JOIN_SEMI: - if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || - restriction_is_constant_false(restrictlist)) + /* + * Do these steps only if we actually have a regular semijoin, + * as opposed to a case where we should unique-ify the RHS. + */ + if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) && + bms_is_subset(sjinfo->min_righthand, rel2->relids)) { - mark_dummy_rel(joinrel); - break; + if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || + restriction_is_constant_false(restrictlist)) + { + mark_dummy_rel(joinrel); + break; + } + add_paths_to_joinrel(root, joinrel, rel1, rel2, + JOIN_SEMI, sjinfo, + restrictlist); } - add_paths_to_joinrel(root, joinrel, rel1, rel2, - JOIN_SEMI, sjinfo, - restrictlist); /* * If we know how to unique-ify the RHS and one input rel is diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index d1b590d063..1764656da3 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.144 2008/10/25 19:51:32 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.145 2008/11/22 22:47:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -851,16 +851,11 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, maybe_equivalence = false; maybe_outer_join = false; } - else if (bms_overlap(relids, outerjoin_nonnullable) && - (jointype != JOIN_SEMI || - bms_nonempty_difference(relids, outerjoin_nonnullable))) + else if (bms_overlap(relids, outerjoin_nonnullable)) { /* * The qual is attached to an outer join and mentions (some of the) - * rels on the nonnullable side, so it's not degenerate. (For a - * JOIN_SEMI qual, we consider it non-degenerate only if it mentions - * both sides of the join --- if it mentions only one side, it can - * be pushed down.) + * rels on the nonnullable side, so it's not degenerate. * * We can't use such a clause to deduce equivalence (the left and * right sides might be unequal above the join because one of them has @@ -1062,6 +1057,7 @@ distribute_sublink_quals_to_rels(PlannerInfo *root, SpecialJoinInfo *sjinfo; Relids qualscope; Relids ojscope; + Relids outerjoin_nonnullable; ListCell *l; /* @@ -1076,17 +1072,27 @@ distribute_sublink_quals_to_rels(PlannerInfo *root, fslink->jointype, quals); + /* Treat as inner join if SEMI, outer join if ANTI */ qualscope = bms_union(sjinfo->syn_lefthand, sjinfo->syn_righthand); - ojscope = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + if (fslink->jointype == JOIN_SEMI) + { + ojscope = outerjoin_nonnullable = NULL; + } + else + { + Assert(fslink->jointype == JOIN_ANTI); + ojscope = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + outerjoin_nonnullable = fslink->lefthand; + } - /* Distribute the join quals much as for a regular LEFT JOIN */ + /* Distribute the join quals much as for a regular JOIN node */ foreach(l, quals) { Node *qual = (Node *) lfirst(l); distribute_qual_to_rels(root, qual, false, below_outer_join, fslink->jointype, - qualscope, ojscope, fslink->lefthand); + qualscope, ojscope, outerjoin_nonnullable); } /* Now we can add the SpecialJoinInfo to join_info_list */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index ddd9934cf1..4161c08ec5 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.214 2008/10/21 20:42:53 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.215 2008/11/22 22:47:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -542,15 +542,23 @@ typedef enum JoinType /* * OUTER joins are those for which pushed-down quals must behave differently - * from the join's own quals. This is in fact everything except INNER joins. - * However, this macro must also exclude the JOIN_UNIQUE symbols since those - * are temporary proxies for what will eventually be an INNER join. + * from the join's own quals. This is in fact everything except INNER and + * SEMI joins. However, this macro must also exclude the JOIN_UNIQUE symbols + * since those are temporary proxies for what will eventually be an INNER + * join. * - * Note: in some places it is preferable to treat JOIN_SEMI as not being - * an outer join, since it doesn't produce null-extended rows. Be aware - * of that distinction when deciding whether to use this macro. + * Note: semijoins are a hybrid case, but we choose to treat them as not + * being outer joins. This is okay principally because the SQL syntax makes + * it impossible to have a pushed-down qual that refers to the inner relation + * of a semijoin; so there is no strong need to distinguish join quals from + * pushed-down quals. This is convenient because for almost all purposes, + * quals attached to a semijoin can be treated the same as innerjoin quals. */ #define IS_OUTER_JOIN(jointype) \ - ((jointype) > JOIN_INNER && (jointype) < JOIN_UNIQUE_OUTER) + (((1 << (jointype)) & \ + ((1 << JOIN_LEFT) | \ + (1 << JOIN_FULL) | \ + (1 << JOIN_RIGHT) | \ + (1 << JOIN_ANTI))) != 0) #endif /* NODES_H */