From 0043aa6b8597809b19c420cb5e635b4d832adccc Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 31 Oct 2022 19:52:33 -0400 Subject: [PATCH] Add basic regression tests for semi/antijoin recognition. Add some simple tests that the planner recognizes all the standard idioms for SEMI and ANTI joins. Failure to optimize in this way won't necessarily cause any visible change in query results, so check the plans. We had no similar coverage before, at least for some variants of antijoin, as noted by Richard Guo. Discussion: https://postgr.es/m/CAMbWs4-mvPPCJ1W6iK6dD5HiNwoJdi6mZp=-7mE8N9Sh+cd0tQ@mail.gmail.com --- src/test/regress/expected/join.out | 63 ++++++++++++++++++++++++++++++ src/test/regress/sql/join.sql | 25 ++++++++++++ 2 files changed, 88 insertions(+) diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 9b69a8c122..b901d7299f 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2650,6 +2650,69 @@ WHERE d.f1 IS NULL; 9999 (3 rows) +-- +-- basic semijoin and antijoin recognition tests +-- +explain (costs off) +select a.* from tenk1 a +where unique1 in (select unique2 from tenk1 b); + QUERY PLAN +------------------------------------------------------------ + Hash Semi Join + Hash Cond: (a.unique1 = b.unique2) + -> Seq Scan on tenk1 a + -> Hash + -> Index Only Scan using tenk1_unique2 on tenk1 b +(5 rows) + +-- sadly, this is not an antijoin +explain (costs off) +select a.* from tenk1 a +where unique1 not in (select unique2 from tenk1 b); + QUERY PLAN +-------------------------------------------------------- + Seq Scan on tenk1 a + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Index Only Scan using tenk1_unique2 on tenk1 b +(4 rows) + +explain (costs off) +select a.* from tenk1 a +where exists (select 1 from tenk1 b where a.unique1 = b.unique2); + QUERY PLAN +------------------------------------------------------------ + Hash Semi Join + Hash Cond: (a.unique1 = b.unique2) + -> Seq Scan on tenk1 a + -> Hash + -> Index Only Scan using tenk1_unique2 on tenk1 b +(5 rows) + +explain (costs off) +select a.* from tenk1 a +where not exists (select 1 from tenk1 b where a.unique1 = b.unique2); + QUERY PLAN +------------------------------------------------------------ + Hash Anti Join + Hash Cond: (a.unique1 = b.unique2) + -> Seq Scan on tenk1 a + -> Hash + -> Index Only Scan using tenk1_unique2 on tenk1 b +(5 rows) + +explain (costs off) +select a.* from tenk1 a left join tenk1 b on a.unique1 = b.unique2 +where b.unique2 is null; + QUERY PLAN +------------------------------------------------------------ + Hash Anti Join + Hash Cond: (a.unique1 = b.unique2) + -> Seq Scan on tenk1 a + -> Hash + -> Index Only Scan using tenk1_unique2 on tenk1 b +(5 rows) + -- -- regression test for proper handling of outer joins within antijoins -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 27e7e741a1..ccbbe5454c 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -624,6 +624,31 @@ LEFT JOIN ( ) AS d ON (a.f1 = d.f1) WHERE d.f1 IS NULL; +-- +-- basic semijoin and antijoin recognition tests +-- + +explain (costs off) +select a.* from tenk1 a +where unique1 in (select unique2 from tenk1 b); + +-- sadly, this is not an antijoin +explain (costs off) +select a.* from tenk1 a +where unique1 not in (select unique2 from tenk1 b); + +explain (costs off) +select a.* from tenk1 a +where exists (select 1 from tenk1 b where a.unique1 = b.unique2); + +explain (costs off) +select a.* from tenk1 a +where not exists (select 1 from tenk1 b where a.unique1 = b.unique2); + +explain (costs off) +select a.* from tenk1 a left join tenk1 b on a.unique1 = b.unique2 +where b.unique2 is null; + -- -- regression test for proper handling of outer joins within antijoins --