diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c index 3e915747da..91b6c75e8e 100644 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: geqo_eval.c,v 1.59 2002/06/20 20:29:29 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.60 2002/12/16 21:30:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,6 +21,7 @@ #include "postgres.h" +#include #include #include @@ -44,6 +45,20 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene) Cost fitness; List *savelist; + /* + * Because gimme_tree considers both left- and right-sided trees, + * there is no difference between a tour (a,b,c,d,...) and a tour + * (b,a,c,d,...) --- the same join orders will be considered. + * To avoid redundant cost calculations, we simply reject tours where + * tour[0] > tour[1], assigning them an artificially bad fitness. + * + * (It would be better to tweak the GEQO logic to not generate such tours + * in the first place, but I'm not sure of all the implications in the + * mutation logic.) + */ + if (num_gene >= 2 && tour[0] > tour[1]) + return DBL_MAX; + /* * Create a private memory context that will hold all temp storage * allocated inside gimme_tree(). @@ -60,11 +75,15 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene) ALLOCSET_DEFAULT_MAXSIZE); oldcxt = MemoryContextSwitchTo(mycontext); - /* preserve root->join_rel_list, which gimme_tree changes */ + /* + * preserve root->join_rel_list, which gimme_tree changes; without this, + * it'll be pointing at recycled storage after the MemoryContextDelete + * below. + */ savelist = root->join_rel_list; /* construct the best path for the given combination of relations */ - joinrel = gimme_tree(root, initial_rels, tour, num_gene, 0, NULL); + joinrel = gimme_tree(root, initial_rels, tour, num_gene); /* * compute fitness @@ -86,70 +105,61 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene) /* * gimme_tree - * this routine considers only LEFT-SIDED TREES! + * Form planner estimates for a join tree constructed in the specified + * order. * * 'root' is the Query * 'initial_rels' is the list of initial relations (FROM-list items) * 'tour' is the proposed join order, of length 'num_gene' - * 'rel_count' is number of initial_rels items already joined (initially 0) - * 'old_rel' is the preceding join (initially NULL) * - * Returns a new join relation incorporating all joins in a left-sided tree. + * Returns a new join relation whose cheapest path is the best plan for + * this join order. + * + * Note that at each step we consider using the next rel as both left and + * right side of a join. However, we cannot build general ("bushy") plan + * trees this way, only left-sided and right-sided trees. */ RelOptInfo * gimme_tree(Query *root, List *initial_rels, - Gene *tour, int num_gene, - int rel_count, RelOptInfo *old_rel) + Gene *tour, int num_gene) { - RelOptInfo *inner_rel; /* current relation */ - int init_rel_index; + RelOptInfo *joinrel; + int cur_rel_index; + int rel_count; - if (rel_count < num_gene) + /* + * Start with the first relation ... + */ + cur_rel_index = (int) tour[0]; + + joinrel = (RelOptInfo *) nth(cur_rel_index - 1, initial_rels); + + /* + * And add on each relation in the specified order ... + */ + for (rel_count = 1; rel_count < num_gene; rel_count++) { - /* tree not yet finished */ - init_rel_index = (int) tour[rel_count]; + RelOptInfo *inner_rel; + RelOptInfo *new_rel; - inner_rel = (RelOptInfo *) nth(init_rel_index - 1, initial_rels); + cur_rel_index = (int) tour[rel_count]; - if (rel_count == 0) - { - /* processing first join with init_rel_index = (int) tour[0] */ - rel_count++; - return gimme_tree(root, initial_rels, - tour, num_gene, - rel_count, inner_rel); - } - else - { - /* tree main part */ - List *acceptable_rels = makeList1(inner_rel); - List *new_rels; - RelOptInfo *new_rel; + inner_rel = (RelOptInfo *) nth(cur_rel_index - 1, initial_rels); - new_rels = make_rels_by_clause_joins(root, old_rel, - acceptable_rels); - /* Shouldn't get more than one result */ - Assert(length(new_rels) <= 1); - if (new_rels == NIL) - { - new_rels = make_rels_by_clauseless_joins(root, old_rel, - acceptable_rels); - Assert(length(new_rels) <= 1); - if (new_rels == NIL) - elog(ERROR, "gimme_tree: failed to construct join rel"); - } - new_rel = (RelOptInfo *) lfirst(new_rels); + /* + * Construct a RelOptInfo representing the previous joinrel joined + * to inner_rel. These are always inner joins. Note that we expect + * the joinrel not to exist in root->join_rel_list yet, and so the + * paths constructed for it will only include the ones we want. + */ + new_rel = make_join_rel(root, joinrel, inner_rel, JOIN_INNER); - /* Find and save the cheapest paths for this rel */ - set_cheapest(new_rel); + /* Find and save the cheapest paths for this rel */ + set_cheapest(new_rel); - /* and recurse... */ - rel_count++; - return gimme_tree(root, initial_rels, - tour, num_gene, - rel_count, new_rel); - } + /* and repeat... */ + joinrel = new_rel; } - return old_rel; /* tree finished ... */ + return joinrel; } diff --git a/src/backend/optimizer/geqo/geqo_main.c b/src/backend/optimizer/geqo/geqo_main.c index fda6d268a8..c9993680b5 100644 --- a/src/backend/optimizer/geqo/geqo_main.c +++ b/src/backend/optimizer/geqo/geqo_main.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: geqo_main.c,v 1.32 2002/07/20 04:59:10 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.33 2002/12/16 21:30:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -235,8 +235,7 @@ geqo(Query *root, int number_of_rels, List *initial_rels) /* root->join_rel_list will be modified during this ! */ best_rel = gimme_tree(root, initial_rels, - best_tour, pool->string_length, - 0, NULL); + best_tour, pool->string_length); /* DBG: show the query plan print_plan(best_plan, root); diff --git a/src/backend/optimizer/geqo/geqo_misc.c b/src/backend/optimizer/geqo/geqo_misc.c index 7cda46946b..7cf3a5a14d 100644 --- a/src/backend/optimizer/geqo/geqo_misc.c +++ b/src/backend/optimizer/geqo/geqo_misc.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_misc.c,v 1.35 2002/11/06 00:00:44 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_misc.c,v 1.36 2002/12/16 21:30:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,19 +31,25 @@ /* * avg_pool */ -static float +static double avg_pool(Pool *pool) { int i; double cumulative = 0.0; - if (pool->size == 0) + if (pool->size <= 0) elog(ERROR, "avg_pool: pool_size of zero"); + /* + * Since the pool may contain multiple occurrences of DBL_MAX, divide + * by pool->size before summing, not after, to avoid overflow. This + * loses a little in speed and accuracy, but this routine is only used + * for debug printouts, so we don't care that much. + */ for (i = 0; i < pool->size; i++) - cumulative = cumulative + pool->data[i].worth; + cumulative += pool->data[i].worth / pool->size; - return (float) cumulative / pool->size; + return cumulative; } /* print_pool @@ -72,8 +78,10 @@ print_pool(FILE *fp, Pool *pool, int start, int stop) fprintf(fp, "%d)\t", i); for (j = 0; j < pool->string_length; j++) fprintf(fp, "%d ", pool->data[i].string[j]); - fprintf(fp, "%f\n", pool->data[i].worth); + fprintf(fp, "%g\n", pool->data[i].worth); } + + fflush(fp); } /* print_gen @@ -90,12 +98,14 @@ print_gen(FILE *fp, Pool *pool, int generation) lowest = pool->size > 1 ? pool->size - 2 : 0; fprintf(fp, - "%5d | Best: %f Worst: %f Mean: %f Avg: %f\n", + "%5d | Best: %g Worst: %g Mean: %g Avg: %g\n", generation, pool->data[0].worth, pool->data[lowest].worth, pool->data[pool->size / 2].worth, avg_pool(pool)); + + fflush(fp); } @@ -116,6 +126,8 @@ print_edge_table(FILE *fp, Edge *edge_table, int num_gene) } fprintf(fp, "\n"); + + fflush(fp); } #endif /* GEQO_DEBUG */ diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 984c930e3a..228a876f71 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.129 2002/12/15 16:17:49 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.130 2002/12/16 21:30:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1398,6 +1398,7 @@ best_inner_indexscan(Query *root, RelOptInfo *rel, List *ilist; List *jlist; InnerIndexscanInfo *info; + MemoryContext oldcontext; /* * Nestloop only supports inner and left joins. @@ -1415,15 +1416,27 @@ best_inner_indexscan(Query *root, RelOptInfo *rel, } /* * If there are no indexable joinclauses for this rel, exit quickly. - * Otherwise, intersect the given outer_relids with index_outer_relids - * to find the set of outer relids actually relevant for this index. - * If there are none, again we can fail immediately. */ if (!rel->index_outer_relids) return NULL; + /* + * Otherwise, we have to do path selection in the memory context of + * the given rel, so that any created path can be safely attached to + * the rel's cache of best inner paths. (This is not currently an + * issue for normal planning, but it is an issue for GEQO planning.) + */ + oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel)); + /* + * Intersect the given outer_relids with index_outer_relids + * to find the set of outer relids actually relevant for this index. + * If there are none, again we can fail immediately. + */ outer_relids = set_intersecti(rel->index_outer_relids, outer_relids); if (!outer_relids) + { + MemoryContextSwitchTo(oldcontext); return NULL; + } /* * Look to see if we already computed the result for this set of * relevant outerrels. (We include the isouterjoin status in the @@ -1437,6 +1450,7 @@ best_inner_indexscan(Query *root, RelOptInfo *rel, info->isouterjoin == isouterjoin) { freeList(outer_relids); + MemoryContextSwitchTo(oldcontext); return info->best_innerpath; } } @@ -1517,6 +1531,8 @@ best_inner_indexscan(Query *root, RelOptInfo *rel, info->best_innerpath = cheapest; rel->index_inner_paths = lcons(info, rel->index_inner_paths); + MemoryContextSwitchTo(oldcontext); + return cheapest; } diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 58772c0bc0..037733d5d7 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.57 2002/06/20 20:29:30 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.58 2002/12/16 21:30:30 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -18,8 +18,12 @@ #include "optimizer/paths.h" -static RelOptInfo *make_join_rel(Query *root, RelOptInfo *rel1, - RelOptInfo *rel2, JoinType jointype); +static List *make_rels_by_clause_joins(Query *root, + RelOptInfo *old_rel, + List *other_rels); +static List *make_rels_by_clauseless_joins(Query *root, + RelOptInfo *old_rel, + List *other_rels); /* @@ -246,7 +250,7 @@ make_rels_by_joins(Query *root, int level, List **joinrels) * no extra test for overlap for initial rels, since the is_subset test can * only succeed when other_rel is not already part of old_rel.) */ -List * +static List * make_rels_by_clause_joins(Query *root, RelOptInfo *old_rel, List *other_rels) @@ -297,7 +301,7 @@ make_rels_by_clause_joins(Query *root, * Currently, this is only used with initial rels in other_rels, but it would * work for joining to joinrels too. */ -List * +static List * make_rels_by_clauseless_joins(Query *root, RelOptInfo *old_rel, List *other_rels) @@ -392,7 +396,7 @@ make_jointree_rel(Query *root, Node *jtnode) * (The join rel may already contain paths generated from other * pairs of rels that add up to the same set of base rels.) */ -static RelOptInfo * +RelOptInfo * make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2, JoinType jointype) { diff --git a/src/include/optimizer/geqo.h b/src/include/optimizer/geqo.h index 04af4d7d88..aec8236540 100644 --- a/src/include/optimizer/geqo.h +++ b/src/include/optimizer/geqo.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: geqo.h,v 1.29 2002/07/20 04:59:10 momjian Exp $ + * $Id: geqo.h,v 1.30 2002/12/16 21:30:30 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -74,7 +74,6 @@ extern RelOptInfo *geqo(Query *root, int number_of_rels, List *initial_rels); extern Cost geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene); extern RelOptInfo *gimme_tree(Query *root, List *initial_rels, - Gene *tour, int num_gene, - int rel_count, RelOptInfo *old_rel); + Gene *tour, int num_gene); #endif /* GEQO_H */ diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 06f585f722..7ed5b403a8 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: paths.h,v 1.62 2002/12/12 15:49:41 tgl Exp $ + * $Id: paths.h,v 1.63 2002/12/16 21:30:30 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -74,13 +74,10 @@ extern void add_paths_to_joinrel(Query *root, RelOptInfo *joinrel, * routines to determine which relations to join */ extern List *make_rels_by_joins(Query *root, int level, List **joinrels); -extern List *make_rels_by_clause_joins(Query *root, - RelOptInfo *old_rel, - List *other_rels); -extern List *make_rels_by_clauseless_joins(Query *root, - RelOptInfo *old_rel, - List *other_rels); extern RelOptInfo *make_jointree_rel(Query *root, Node *jtnode); +extern RelOptInfo *make_join_rel(Query *root, + RelOptInfo *rel1, RelOptInfo *rel2, + JoinType jointype); /* * pathkeys.c