mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-02-23 19:39:53 +08:00
Allow left join removals and unique joins on partitioned tables
This allows left join removals and unique joins to work with partitioned tables. The planner just lacked sufficient proofs that a given join would not cause any row duplication. Unique indexes currently serve as that proof, so have get_relation_info() populate the indexlist for partitioned tables too. Author: Arne Roland Reviewed-by: Alvaro Herrera, Zhihong Yu, Amit Langote, David Rowley Discussion: https://postgr.es/m/c3b2408b7a39433b8230bbcd02e9f302@index.de
This commit is contained in:
parent
216a784829
commit
3c569049b7
@ -109,7 +109,9 @@ static void set_baserel_partition_constraint(Relation relation,
|
||||
* If inhparent is true, all we need to do is set up the attr arrays:
|
||||
* the RelOptInfo actually represents the appendrel formed by an inheritance
|
||||
* tree, and so the parent rel's physical size and index information isn't
|
||||
* important for it.
|
||||
* important for it, however, for partitioned tables, we do populate the
|
||||
* indexlist as the planner uses unique indexes as unique proofs for certain
|
||||
* optimizations.
|
||||
*/
|
||||
void
|
||||
get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
|
||||
@ -175,10 +177,14 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
|
||||
|
||||
/*
|
||||
* Make list of indexes. Ignore indexes on system catalogs if told to.
|
||||
* Don't bother with indexes for an inheritance parent, either.
|
||||
* Don't bother with indexes from traditional inheritance parents. For
|
||||
* partitioned tables, we need a list of at least unique indexes as these
|
||||
* serve as unique proofs for certain planner optimizations. However,
|
||||
* let's not discriminate here and just record all partitioned indexes
|
||||
* whether they're unique indexes or not.
|
||||
*/
|
||||
if (inhparent ||
|
||||
(IgnoreSystemIndexes && IsSystemRelation(relation)))
|
||||
if ((inhparent && relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
|
||||
|| (IgnoreSystemIndexes && IsSystemRelation(relation)))
|
||||
hasindex = false;
|
||||
else
|
||||
hasindex = relation->rd_rel->relhasindex;
|
||||
@ -231,16 +237,6 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ignore partitioned indexes, since they are not usable for
|
||||
* queries.
|
||||
*/
|
||||
if (indexRelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
|
||||
{
|
||||
index_close(indexRelation, NoLock);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the index is valid, but cannot yet be used, ignore it; but
|
||||
* mark the plan we are generating as transient. See
|
||||
@ -285,105 +281,129 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
|
||||
|
||||
info->relam = indexRelation->rd_rel->relam;
|
||||
|
||||
/* We copy just the fields we need, not all of rd_indam */
|
||||
amroutine = indexRelation->rd_indam;
|
||||
info->amcanorderbyop = amroutine->amcanorderbyop;
|
||||
info->amoptionalkey = amroutine->amoptionalkey;
|
||||
info->amsearcharray = amroutine->amsearcharray;
|
||||
info->amsearchnulls = amroutine->amsearchnulls;
|
||||
info->amcanparallel = amroutine->amcanparallel;
|
||||
info->amhasgettuple = (amroutine->amgettuple != NULL);
|
||||
info->amhasgetbitmap = amroutine->amgetbitmap != NULL &&
|
||||
relation->rd_tableam->scan_bitmap_next_block != NULL;
|
||||
info->amcanmarkpos = (amroutine->ammarkpos != NULL &&
|
||||
amroutine->amrestrpos != NULL);
|
||||
info->amcostestimate = amroutine->amcostestimate;
|
||||
Assert(info->amcostestimate != NULL);
|
||||
|
||||
/* Fetch index opclass options */
|
||||
info->opclassoptions = RelationGetIndexAttOptions(indexRelation, true);
|
||||
|
||||
/*
|
||||
* Fetch the ordering information for the index, if any.
|
||||
* We don't have an AM for partitioned indexes, so we'll just
|
||||
* NULLify the AM related fields for those.
|
||||
*/
|
||||
if (info->relam == BTREE_AM_OID)
|
||||
if (indexRelation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
|
||||
{
|
||||
/* We copy just the fields we need, not all of rd_indam */
|
||||
amroutine = indexRelation->rd_indam;
|
||||
info->amcanorderbyop = amroutine->amcanorderbyop;
|
||||
info->amoptionalkey = amroutine->amoptionalkey;
|
||||
info->amsearcharray = amroutine->amsearcharray;
|
||||
info->amsearchnulls = amroutine->amsearchnulls;
|
||||
info->amcanparallel = amroutine->amcanparallel;
|
||||
info->amhasgettuple = (amroutine->amgettuple != NULL);
|
||||
info->amhasgetbitmap = amroutine->amgetbitmap != NULL &&
|
||||
relation->rd_tableam->scan_bitmap_next_block != NULL;
|
||||
info->amcanmarkpos = (amroutine->ammarkpos != NULL &&
|
||||
amroutine->amrestrpos != NULL);
|
||||
info->amcostestimate = amroutine->amcostestimate;
|
||||
Assert(info->amcostestimate != NULL);
|
||||
|
||||
/* Fetch index opclass options */
|
||||
info->opclassoptions = RelationGetIndexAttOptions(indexRelation, true);
|
||||
|
||||
/*
|
||||
* If it's a btree index, we can use its opfamily OIDs
|
||||
* directly as the sort ordering opfamily OIDs.
|
||||
* Fetch the ordering information for the index, if any.
|
||||
*/
|
||||
Assert(amroutine->amcanorder);
|
||||
|
||||
info->sortopfamily = info->opfamily;
|
||||
info->reverse_sort = (bool *) palloc(sizeof(bool) * nkeycolumns);
|
||||
info->nulls_first = (bool *) palloc(sizeof(bool) * nkeycolumns);
|
||||
|
||||
for (i = 0; i < nkeycolumns; i++)
|
||||
if (info->relam == BTREE_AM_OID)
|
||||
{
|
||||
int16 opt = indexRelation->rd_indoption[i];
|
||||
/*
|
||||
* If it's a btree index, we can use its opfamily OIDs
|
||||
* directly as the sort ordering opfamily OIDs.
|
||||
*/
|
||||
Assert(amroutine->amcanorder);
|
||||
|
||||
info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
|
||||
info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
|
||||
info->sortopfamily = info->opfamily;
|
||||
info->reverse_sort = (bool *) palloc(sizeof(bool) * nkeycolumns);
|
||||
info->nulls_first = (bool *) palloc(sizeof(bool) * nkeycolumns);
|
||||
|
||||
for (i = 0; i < nkeycolumns; i++)
|
||||
{
|
||||
int16 opt = indexRelation->rd_indoption[i];
|
||||
|
||||
info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
|
||||
info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (amroutine->amcanorder)
|
||||
{
|
||||
/*
|
||||
* Otherwise, identify the corresponding btree opfamilies by
|
||||
* trying to map this index's "<" operators into btree. Since
|
||||
* "<" uniquely defines the behavior of a sort order, this is
|
||||
* a sufficient test.
|
||||
*
|
||||
* XXX This method is rather slow and also requires the
|
||||
* undesirable assumption that the other index AM numbers its
|
||||
* strategies the same as btree. It'd be better to have a way
|
||||
* to explicitly declare the corresponding btree opfamily for
|
||||
* each opfamily of the other index type. But given the lack
|
||||
* of current or foreseeable amcanorder index types, it's not
|
||||
* worth expending more effort on now.
|
||||
*/
|
||||
info->sortopfamily = (Oid *) palloc(sizeof(Oid) * nkeycolumns);
|
||||
info->reverse_sort = (bool *) palloc(sizeof(bool) * nkeycolumns);
|
||||
info->nulls_first = (bool *) palloc(sizeof(bool) * nkeycolumns);
|
||||
|
||||
for (i = 0; i < nkeycolumns; i++)
|
||||
else if (amroutine->amcanorder)
|
||||
{
|
||||
int16 opt = indexRelation->rd_indoption[i];
|
||||
Oid ltopr;
|
||||
Oid btopfamily;
|
||||
Oid btopcintype;
|
||||
int16 btstrategy;
|
||||
/*
|
||||
* Otherwise, identify the corresponding btree opfamilies
|
||||
* by trying to map this index's "<" operators into btree.
|
||||
* Since "<" uniquely defines the behavior of a sort
|
||||
* order, this is a sufficient test.
|
||||
*
|
||||
* XXX This method is rather slow and also requires the
|
||||
* undesirable assumption that the other index AM numbers
|
||||
* its strategies the same as btree. It'd be better to
|
||||
* have a way to explicitly declare the corresponding
|
||||
* btree opfamily for each opfamily of the other index
|
||||
* type. But given the lack of current or foreseeable
|
||||
* amcanorder index types, it's not worth expending more
|
||||
* effort on now.
|
||||
*/
|
||||
info->sortopfamily = (Oid *) palloc(sizeof(Oid) * nkeycolumns);
|
||||
info->reverse_sort = (bool *) palloc(sizeof(bool) * nkeycolumns);
|
||||
info->nulls_first = (bool *) palloc(sizeof(bool) * nkeycolumns);
|
||||
|
||||
info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
|
||||
info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
|
||||
for (i = 0; i < nkeycolumns; i++)
|
||||
{
|
||||
int16 opt = indexRelation->rd_indoption[i];
|
||||
Oid ltopr;
|
||||
Oid btopfamily;
|
||||
Oid btopcintype;
|
||||
int16 btstrategy;
|
||||
|
||||
ltopr = get_opfamily_member(info->opfamily[i],
|
||||
info->opcintype[i],
|
||||
info->opcintype[i],
|
||||
BTLessStrategyNumber);
|
||||
if (OidIsValid(ltopr) &&
|
||||
get_ordering_op_properties(ltopr,
|
||||
&btopfamily,
|
||||
&btopcintype,
|
||||
&btstrategy) &&
|
||||
btopcintype == info->opcintype[i] &&
|
||||
btstrategy == BTLessStrategyNumber)
|
||||
{
|
||||
/* Successful mapping */
|
||||
info->sortopfamily[i] = btopfamily;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Fail ... quietly treat index as unordered */
|
||||
info->sortopfamily = NULL;
|
||||
info->reverse_sort = NULL;
|
||||
info->nulls_first = NULL;
|
||||
break;
|
||||
info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
|
||||
info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
|
||||
|
||||
ltopr = get_opfamily_member(info->opfamily[i],
|
||||
info->opcintype[i],
|
||||
info->opcintype[i],
|
||||
BTLessStrategyNumber);
|
||||
if (OidIsValid(ltopr) &&
|
||||
get_ordering_op_properties(ltopr,
|
||||
&btopfamily,
|
||||
&btopcintype,
|
||||
&btstrategy) &&
|
||||
btopcintype == info->opcintype[i] &&
|
||||
btstrategy == BTLessStrategyNumber)
|
||||
{
|
||||
/* Successful mapping */
|
||||
info->sortopfamily[i] = btopfamily;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Fail ... quietly treat index as unordered */
|
||||
info->sortopfamily = NULL;
|
||||
info->reverse_sort = NULL;
|
||||
info->nulls_first = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
info->sortopfamily = NULL;
|
||||
info->reverse_sort = NULL;
|
||||
info->nulls_first = NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
info->amcanorderbyop = false;
|
||||
info->amoptionalkey = false;
|
||||
info->amsearcharray = false;
|
||||
info->amsearchnulls = false;
|
||||
info->amcanparallel = false;
|
||||
info->amhasgettuple = false;
|
||||
info->amhasgetbitmap = false;
|
||||
info->amcanmarkpos = false;
|
||||
info->amcostestimate = NULL;
|
||||
|
||||
info->sortopfamily = NULL;
|
||||
info->reverse_sort = NULL;
|
||||
info->nulls_first = NULL;
|
||||
@ -416,31 +436,45 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
|
||||
* the number-of-tuples estimate to equal the parent table; if it
|
||||
* is partial then we have to use the same methods as we would for
|
||||
* a table, except we can be sure that the index is not larger
|
||||
* than the table.
|
||||
* than the table. We must ignore partitioned indexes here as as
|
||||
* there are not physical indexes.
|
||||
*/
|
||||
if (info->indpred == NIL)
|
||||
if (indexRelation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
|
||||
{
|
||||
info->pages = RelationGetNumberOfBlocks(indexRelation);
|
||||
info->tuples = rel->tuples;
|
||||
}
|
||||
else
|
||||
{
|
||||
double allvisfrac; /* dummy */
|
||||
|
||||
estimate_rel_size(indexRelation, NULL,
|
||||
&info->pages, &info->tuples, &allvisfrac);
|
||||
if (info->tuples > rel->tuples)
|
||||
if (info->indpred == NIL)
|
||||
{
|
||||
info->pages = RelationGetNumberOfBlocks(indexRelation);
|
||||
info->tuples = rel->tuples;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
double allvisfrac; /* dummy */
|
||||
|
||||
if (info->relam == BTREE_AM_OID)
|
||||
{
|
||||
/* For btrees, get tree height while we have the index open */
|
||||
info->tree_height = _bt_getrootheight(indexRelation);
|
||||
estimate_rel_size(indexRelation, NULL,
|
||||
&info->pages, &info->tuples, &allvisfrac);
|
||||
if (info->tuples > rel->tuples)
|
||||
info->tuples = rel->tuples;
|
||||
}
|
||||
|
||||
if (info->relam == BTREE_AM_OID)
|
||||
{
|
||||
/*
|
||||
* For btrees, get tree height while we have the index
|
||||
* open
|
||||
*/
|
||||
info->tree_height = _bt_getrootheight(indexRelation);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* For other index types, just set it to "unknown" for now */
|
||||
info->tree_height = -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* For other index types, just set it to "unknown" for now */
|
||||
/* Zero these out for partitioned indexes */
|
||||
info->pages = 0;
|
||||
info->tuples = 0.0;
|
||||
info->tree_height = -1;
|
||||
}
|
||||
|
||||
|
@ -5994,6 +5994,10 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
|
||||
rte = root->simple_rte_array[rel->relid];
|
||||
Assert(rte->rtekind == RTE_RELATION);
|
||||
|
||||
/* ignore partitioned tables. Any indexes here are not real indexes */
|
||||
if (rte->relkind == RELKIND_PARTITIONED_TABLE)
|
||||
return false;
|
||||
|
||||
/* Search through the indexes to see if any match our problem */
|
||||
foreach(lc, rel->indexlist)
|
||||
{
|
||||
|
@ -653,7 +653,7 @@ typedef struct PartitionSchemeData *PartitionScheme;
|
||||
* lateral_referencers - relids of rels that reference this one laterally
|
||||
* (includes both direct and indirect lateral references)
|
||||
* indexlist - list of IndexOptInfo nodes for relation's indexes
|
||||
* (always NIL if it's not a table)
|
||||
* (always NIL if it's not a table or partitioned table)
|
||||
* pages - number of disk pages in relation (zero if not a table)
|
||||
* tuples - number of tuples in relation (not considering restrictions)
|
||||
* allvisfrac - fraction of disk pages that are marked all-visible
|
||||
@ -1097,11 +1097,11 @@ struct IndexOptInfo
|
||||
Oid *opfamily pg_node_attr(array_size(nkeycolumns));
|
||||
/* OIDs of opclass declared input data types */
|
||||
Oid *opcintype pg_node_attr(array_size(nkeycolumns));
|
||||
/* OIDs of btree opfamilies, if orderable */
|
||||
/* OIDs of btree opfamilies, if orderable. NULL if partitioned index */
|
||||
Oid *sortopfamily pg_node_attr(array_size(nkeycolumns));
|
||||
/* is sort order descending? */
|
||||
/* is sort order descending? or NULL if partitioned index */
|
||||
bool *reverse_sort pg_node_attr(array_size(nkeycolumns));
|
||||
/* do NULLs come first in the sort order? */
|
||||
/* do NULLs come first in the sort order? or NULL if partitioned index */
|
||||
bool *nulls_first pg_node_attr(array_size(nkeycolumns));
|
||||
/* opclass-specific options for columns */
|
||||
bytea **opclassoptions pg_node_attr(read_write_ignore);
|
||||
@ -1139,7 +1139,7 @@ struct IndexOptInfo
|
||||
|
||||
/*
|
||||
* Remaining fields are copied from the index AM's API struct
|
||||
* (IndexAmRoutine).
|
||||
* (IndexAmRoutine). These fields are not set for partitioned indexes.
|
||||
*/
|
||||
bool amcanorderbyop;
|
||||
bool amoptionalkey;
|
||||
|
@ -4860,6 +4860,16 @@ select 1 from (select a.id FROM a left join b on a.b_id = b.id) q,
|
||||
Filter: (a.id = i)
|
||||
(4 rows)
|
||||
|
||||
CREATE TEMP TABLE parted_b (id int PRIMARY KEY) partition by range(id);
|
||||
CREATE TEMP TABLE parted_b1 partition of parted_b for values from (0) to (10);
|
||||
-- test join removals on a partitioned table
|
||||
explain (costs off)
|
||||
select a.* from a left join parted_b pb on a.b_id = pb.id;
|
||||
QUERY PLAN
|
||||
---------------
|
||||
Seq Scan on a
|
||||
(1 row)
|
||||
|
||||
rollback;
|
||||
create temp table parent (k int primary key, pd int);
|
||||
create temp table child (k int unique, cd int);
|
||||
|
@ -4874,7 +4874,7 @@ ANALYZE fract_t;
|
||||
SET max_parallel_workers_per_gather = 0;
|
||||
SET enable_partitionwise_join = on;
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY id ASC LIMIT 10;
|
||||
SELECT x.id, y.id FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY x.id ASC LIMIT 10;
|
||||
QUERY PLAN
|
||||
-----------------------------------------------------------------------
|
||||
Limit
|
||||
@ -4891,7 +4891,7 @@ SELECT * FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY id ASC LIMIT 10;
|
||||
(11 rows)
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY id DESC LIMIT 10;
|
||||
SELECT x.id, y.id FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY x.id DESC LIMIT 10;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------------------------------
|
||||
Limit
|
||||
|
@ -1709,6 +1709,13 @@ explain (costs off)
|
||||
select 1 from (select a.id FROM a left join b on a.b_id = b.id) q,
|
||||
lateral generate_series(1, q.id) gs(i) where q.id = gs.i;
|
||||
|
||||
CREATE TEMP TABLE parted_b (id int PRIMARY KEY) partition by range(id);
|
||||
CREATE TEMP TABLE parted_b1 partition of parted_b for values from (0) to (10);
|
||||
|
||||
-- test join removals on a partitioned table
|
||||
explain (costs off)
|
||||
select a.* from a left join parted_b pb on a.b_id = pb.id;
|
||||
|
||||
rollback;
|
||||
|
||||
create temp table parent (k int primary key, pd int);
|
||||
|
@ -1157,10 +1157,10 @@ SET max_parallel_workers_per_gather = 0;
|
||||
SET enable_partitionwise_join = on;
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY id ASC LIMIT 10;
|
||||
SELECT x.id, y.id FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY x.id ASC LIMIT 10;
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY id DESC LIMIT 10;
|
||||
SELECT x.id, y.id FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY x.id DESC LIMIT 10;
|
||||
|
||||
-- cleanup
|
||||
DROP TABLE fract_t;
|
||||
|
Loading…
Reference in New Issue
Block a user