From f5983923d81d6327bdacf9d439a1536c4c8c4c15 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 16 Aug 2012 13:03:54 -0400 Subject: [PATCH] Allow create_index_paths() to consider multiple join bitmapscan paths. In the initial cut at the "parameterized paths" feature, I'd simplified create_index_paths() to the point where it would only generate a single parameterized bitmap path per relation. Experimentation with an example supplied by Josh Berkus convinces me that that's not good enough: we really need to consider a bitmap path for each possible outer relation. Otherwise we have regressions relative to pre-9.2 versions, in which the planner picks a plain indexscan where it should have used a bitmap scan in queries involving three or more tables. Indeed, after fixing this, several queries in the regression tests show improved plans as a result of using bitmap not plain indexscans. --- src/backend/optimizer/path/indxpath.c | 98 ++++++++++++++++++++++----- src/test/regress/expected/join.out | 28 +++++--- 2 files changed, 99 insertions(+), 27 deletions(-) diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 66b68fc71d..b6efb0fb4c 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -309,26 +309,92 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) } /* - * Likewise, if we found anything usable, generate a BitmapHeapPath for - * the most promising combination of join bitmap index paths. Note there - * will be only one such path no matter how many join clauses are - * available. (XXX is that good enough, or do we need to consider even - * more paths for different subsets of possible join partners? Also, - * should we add in restriction bitmap paths as well?) + * Likewise, if we found anything usable, generate BitmapHeapPaths for the + * most promising combinations of join bitmap index paths. Our strategy + * is to generate one such path for each distinct parameterization seen + * among the available bitmap index paths. This may look pretty + * expensive, but usually there won't be very many distinct + * parameterizations. */ if (bitjoinpaths != NIL) { - Path *bitmapqual; - Relids required_outer; - double loop_count; - BitmapHeapPath *bpath; + List *path_outer; + List *all_path_outers; + ListCell *lc; - bitmapqual = choose_bitmap_and(root, rel, bitjoinpaths); - required_outer = get_bitmap_tree_required_outer(bitmapqual); - loop_count = get_loop_count(root, required_outer); - bpath = create_bitmap_heap_path(root, rel, bitmapqual, - required_outer, loop_count); - add_path(rel, (Path *) bpath); + /* + * path_outer holds the parameterization of each path in bitjoinpaths + * (to save recalculating that several times), while all_path_outers + * holds all distinct parameterization sets. + */ + path_outer = all_path_outers = NIL; + foreach(lc, bitjoinpaths) + { + Path *path = (Path *) lfirst(lc); + Relids required_outer; + bool found = false; + ListCell *lco; + + required_outer = get_bitmap_tree_required_outer(path); + path_outer = lappend(path_outer, required_outer); + + /* Have we already seen this param set? */ + foreach(lco, all_path_outers) + { + Relids existing_outers = (Relids) lfirst(lco); + + if (bms_equal(existing_outers, required_outer)) + { + found = true; + break; + } + } + if (!found) + { + /* No, so add it to all_path_outers */ + all_path_outers = lappend(all_path_outers, required_outer); + } + } + + /* Now, for each distinct parameterization set ... */ + foreach(lc, all_path_outers) + { + Relids max_outers = (Relids) lfirst(lc); + List *this_path_set; + Path *bitmapqual; + Relids required_outer; + double loop_count; + BitmapHeapPath *bpath; + ListCell *lcp; + ListCell *lco; + + /* Identify all the bitmap join paths needing no more than that */ + this_path_set = NIL; + forboth(lcp, bitjoinpaths, lco, path_outer) + { + Path *path = (Path *) lfirst(lcp); + Relids p_outers = (Relids) lfirst(lco); + + if (bms_is_subset(p_outers, max_outers)) + this_path_set = lappend(this_path_set, path); + } + + /* + * Add in restriction bitmap paths, since they can be used + * together with any join paths. + */ + this_path_set = list_concat(this_path_set, bitindexpaths); + + /* Select best AND combination for this parameterization */ + bitmapqual = choose_bitmap_and(root, rel, this_path_set); + + /* And push that path into the mix */ + required_outer = get_bitmap_tree_required_outer(bitmapqual); + loop_count = get_loop_count(root, required_outer); + bpath = create_bitmap_heap_path(root, rel, bitmapqual, + required_outer, loop_count); + add_path(rel, (Path *) bpath); + } } } diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 6705706f02..51aeb8de7b 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2725,11 +2725,13 @@ where t1.unique1 = 1; Index Cond: (unique1 = 1) -> Nested Loop Join Filter: (t1.ten = t3.ten) - -> Index Scan using tenk1_hundred on tenk1 t2 - Index Cond: (t1.hundred = hundred) + -> Bitmap Heap Scan on tenk1 t2 + Recheck Cond: (t1.hundred = hundred) + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (t1.hundred = hundred) -> Index Scan using tenk1_unique2 on tenk1 t3 Index Cond: (unique2 = t2.thousand) -(9 rows) +(11 rows) explain (costs off) select * from tenk1 t1 left join @@ -2743,32 +2745,36 @@ where t1.unique1 = 1; Index Cond: (unique1 = 1) -> Nested Loop Join Filter: ((t1.ten + t2.ten) = t3.ten) - -> Index Scan using tenk1_hundred on tenk1 t2 - Index Cond: (t1.hundred = hundred) + -> Bitmap Heap Scan on tenk1 t2 + Recheck Cond: (t1.hundred = hundred) + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (t1.hundred = hundred) -> Index Scan using tenk1_unique2 on tenk1 t3 Index Cond: (unique2 = t2.thousand) -(9 rows) +(11 rows) explain (costs off) select count(*) from tenk1 a join tenk1 b on a.unique1 = b.unique2 left join tenk1 c on a.unique2 = b.unique1 and c.thousand = a.thousand join int4_tbl on b.thousand = f1; - QUERY PLAN --------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------- Aggregate -> Nested Loop Left Join Join Filter: (a.unique2 = b.unique1) -> Nested Loop -> Nested Loop -> Seq Scan on int4_tbl - -> Index Scan using tenk1_thous_tenthous on tenk1 b - Index Cond: (thousand = int4_tbl.f1) + -> Bitmap Heap Scan on tenk1 b + Recheck Cond: (thousand = int4_tbl.f1) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = int4_tbl.f1) -> Index Scan using tenk1_unique1 on tenk1 a Index Cond: (unique1 = b.unique2) -> Index Only Scan using tenk1_thous_tenthous on tenk1 c Index Cond: (thousand = a.thousand) -(12 rows) +(14 rows) select count(*) from tenk1 a join tenk1 b on a.unique1 = b.unique2