From e4106b2528727c4b48639c0e12bf2f70a766b910 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 9 Feb 2016 14:00:50 -0500 Subject: [PATCH] postgres_fdw: Push down joins to remote servers. If we've got a relatively straightforward join between two tables, this pushes that join down to the remote server instead of fetching the rows for each table and performing the join locally. Some cases are not handled yet, such as SEMI and ANTI joins. Also, we don't yet attempt to create presorted join paths or parameterized join paths even though these options do get tried for a base relation scan. Nevertheless, this seems likely to be a very significant win in many practical cases. Shigeru Hanada and Ashutosh Bapat, reviewed by Robert Haas, with additional review at various points by Tom Lane, Etsuro Fujita, KaiGai Kohei, and Jeevan Chalke. --- contrib/postgres_fdw/deparse.c | 541 ++++++-- .../postgres_fdw/expected/postgres_fdw.out | 1094 ++++++++++++++++- contrib/postgres_fdw/postgres_fdw.c | 835 +++++++++++-- contrib/postgres_fdw/postgres_fdw.h | 46 +- contrib/postgres_fdw/sql/postgres_fdw.sql | 213 +++- doc/src/sgml/postgres-fdw.sgml | 10 + 6 files changed, 2469 insertions(+), 270 deletions(-) diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index 7c1a619b89..27ecdcd944 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -44,10 +44,12 @@ #include "catalog/pg_proc.h" #include "catalog/pg_type.h" #include "commands/defrem.h" +#include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/plannodes.h" #include "optimizer/clauses.h" #include "optimizer/prep.h" +#include "optimizer/tlist.h" #include "optimizer/var.h" #include "parser/parsetree.h" #include "utils/builtins.h" @@ -96,6 +98,11 @@ typedef struct deparse_expr_cxt List **params_list; /* exprs that will become remote Params */ } deparse_expr_cxt; +#define REL_ALIAS_PREFIX "r" +/* Handy macro to add relation name qualification */ +#define ADD_REL_QUALIFIER(buf, varno) \ + appendStringInfo((buf), "%s%d.", REL_ALIAS_PREFIX, (varno)) + /* * Functions to determine whether an expression can be evaluated safely on * remote server. @@ -114,14 +121,17 @@ static void deparseTargetList(StringInfo buf, Relation rel, bool is_returning, Bitmapset *attrs_used, + bool qualify_col, List **retrieved_attrs); +static void deparseExplicitTargetList(List *tlist, List **retrieved_attrs, + deparse_expr_cxt *context); static void deparseReturningList(StringInfo buf, PlannerInfo *root, Index rtindex, Relation rel, bool trig_after_row, List *returningList, List **retrieved_attrs); static void deparseColumnRef(StringInfo buf, int varno, int varattno, - PlannerInfo *root); + PlannerInfo *root, bool qualify_col); static void deparseRelation(StringInfo buf, Relation rel); static void deparseExpr(Expr *expr, deparse_expr_cxt *context); static void deparseVar(Var *node, deparse_expr_cxt *context); @@ -142,11 +152,13 @@ static void printRemoteParam(int paramindex, Oid paramtype, int32 paramtypmod, deparse_expr_cxt *context); static void printRemotePlaceholder(Oid paramtype, int32 paramtypmod, deparse_expr_cxt *context); -static void deparseSelectSql(Bitmapset *attrs_used, List **retrieved_attrs, +static void deparseSelectSql(List *tlist, List **retrieved_attrs, deparse_expr_cxt *context); static void deparseLockingClause(deparse_expr_cxt *context); -static void appendWhereClause(List *exprs, deparse_expr_cxt *context); static void appendOrderByClause(List *pathkeys, deparse_expr_cxt *context); +static void appendConditions(List *exprs, deparse_expr_cxt *context); +static void deparseFromExprForRel(StringInfo buf, PlannerInfo *root, + RelOptInfo *joinrel, bool use_alias, List **params_list); /* @@ -269,7 +281,7 @@ foreign_expr_walker(Node *node, * Param's collation, ie it's not safe for it to have a * non-default collation. */ - if (var->varno == glob_cxt->foreignrel->relid && + if (bms_is_member(var->varno, glob_cxt->foreignrel->relids) && var->varlevelsup == 0) { /* Var belongs to foreign table */ @@ -703,43 +715,80 @@ deparse_type_name(Oid type_oid, int32 typemod) return format_type_with_typemod_qualified(type_oid, typemod); } +/* + * Build the targetlist for given relation to be deparsed as SELECT clause. + * + * The output targetlist contains the columns that need to be fetched from the + * foreign server for the given relation. + */ +List * +build_tlist_to_deparse(RelOptInfo *foreignrel) +{ + List *tlist = NIL; + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private; + + /* + * We require columns specified in foreignrel->reltargetlist and those + * required for evaluating the local conditions. + */ + tlist = add_to_flat_tlist(tlist, foreignrel->reltargetlist); + tlist = add_to_flat_tlist(tlist, + pull_var_clause((Node *) fpinfo->local_conds, + PVC_REJECT_AGGREGATES, + PVC_RECURSE_PLACEHOLDERS)); + + return tlist; +} + /* * Deparse SELECT statement for given relation into buf. * + * tlist contains the list of desired columns to be fetched from foreign server. + * For a base relation fpinfo->attrs_used is used to construct SELECT clause, + * hence the tlist is ignored for a base relation. + * * remote_conds is the list of conditions to be deparsed as WHERE clause. * - * pathkeys is the list of pathkeys to order the result by. - * - * List of columns selected is returned in retrieved_attrs. - * * If params_list is not NULL, it receives a list of Params and other-relation * Vars used in the clauses; these values must be transmitted to the remote * server as parameter values. * * If params_list is NULL, we're generating the query for EXPLAIN purposes, * so Params and other-relation Vars should be replaced by dummy values. + * + * pathkeys is the list of pathkeys to order the result by. + * + * List of columns selected is returned in retrieved_attrs. */ extern void deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *rel, - List *remote_conds, List *pathkeys, + List *tlist, List *remote_conds, List *pathkeys, List **retrieved_attrs, List **params_list) { - PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) rel->fdw_private; deparse_expr_cxt context; - /* Initialize params_list if caller needs one */ - if (params_list) - *params_list = NIL; + /* We handle relations for foreign tables and joins between those */ + Assert(rel->reloptkind == RELOPT_JOINREL || + rel->reloptkind == RELOPT_BASEREL || + rel->reloptkind == RELOPT_OTHER_MEMBER_REL); + /* Fill portions of context common to join and base relation */ context.buf = buf; context.root = root; context.foreignrel = rel; context.params_list = params_list; - deparseSelectSql(fpinfo->attrs_used, retrieved_attrs, &context); + /* Construct SELECT clause and FROM clause */ + deparseSelectSql(tlist, retrieved_attrs, &context); + /* + * Construct WHERE clause + */ if (remote_conds) - appendWhereClause(remote_conds, &context); + { + appendStringInfo(buf, " WHERE "); + appendConditions(remote_conds, &context); + } /* Add ORDER BY clause if we found any useful pathkeys */ if (pathkeys) @@ -752,41 +801,58 @@ deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *rel, /* * Construct a simple SELECT statement that retrieves desired columns * of the specified foreign table, and append it to "buf". The output - * contains just "SELECT ... FROM tablename". + * contains just "SELECT ... FROM ....". * * We also create an integer List of the columns being retrieved, which is * returned to *retrieved_attrs. + * + * tlist is the list of desired columns. Read prologue of + * deparseSelectStmtForRel() for details. */ static void -deparseSelectSql(Bitmapset *attrs_used, List **retrieved_attrs, - deparse_expr_cxt *context) +deparseSelectSql(List *tlist, List **retrieved_attrs, deparse_expr_cxt *context) { StringInfo buf = context->buf; RelOptInfo *foreignrel = context->foreignrel; PlannerInfo *root = context->root; - RangeTblEntry *rte = planner_rt_fetch(foreignrel->relid, root); - Relation rel; - - /* - * Core code already has some lock on each rel being planned, so we can - * use NoLock here. - */ - rel = heap_open(rte->relid, NoLock); + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private; /* * Construct SELECT list */ appendStringInfoString(buf, "SELECT "); - deparseTargetList(buf, root, foreignrel->relid, rel, false, attrs_used, - retrieved_attrs); + + if (foreignrel->reloptkind == RELOPT_JOINREL) + { + /* For a join relation use the input tlist */ + deparseExplicitTargetList(tlist, retrieved_attrs, context); + } + else + { + /* + * For a base relation fpinfo->attrs_used gives the list of columns + * required to be fetched from the foreign server. + */ + RangeTblEntry *rte = planner_rt_fetch(foreignrel->relid, root); + + /* + * Core code already has some lock on each rel being planned, so we + * can use NoLock here. + */ + Relation rel = heap_open(rte->relid, NoLock); + + deparseTargetList(buf, root, foreignrel->relid, rel, false, + fpinfo->attrs_used, false, retrieved_attrs); + heap_close(rel, NoLock); + } /* * Construct FROM clause */ appendStringInfoString(buf, " FROM "); - deparseRelation(buf, rel); - - heap_close(rel, NoLock); + deparseFromExprForRel(buf, root, foreignrel, + (foreignrel->reloptkind == RELOPT_JOINREL), + context->params_list); } /* @@ -796,6 +862,8 @@ deparseSelectSql(Bitmapset *attrs_used, List **retrieved_attrs, * * The tlist text is appended to buf, and we also create an integer List * of the columns being retrieved, which is returned to *retrieved_attrs. + * + * If qualify_col is true, add relation alias before the column name. */ static void deparseTargetList(StringInfo buf, @@ -804,6 +872,7 @@ deparseTargetList(StringInfo buf, Relation rel, bool is_returning, Bitmapset *attrs_used, + bool qualify_col, List **retrieved_attrs) { TupleDesc tupdesc = RelationGetDescr(rel); @@ -836,7 +905,7 @@ deparseTargetList(StringInfo buf, appendStringInfoString(buf, " RETURNING "); first = false; - deparseColumnRef(buf, rtindex, i, root); + deparseColumnRef(buf, rtindex, i, root, qualify_col); *retrieved_attrs = lappend_int(*retrieved_attrs, i); } @@ -855,6 +924,8 @@ deparseTargetList(StringInfo buf, appendStringInfoString(buf, " RETURNING "); first = false; + if (qualify_col) + ADD_REL_QUALIFIER(buf, rtindex); appendStringInfoString(buf, "ctid"); *retrieved_attrs = lappend_int(*retrieved_attrs, @@ -876,64 +947,81 @@ deparseLockingClause(deparse_expr_cxt *context) StringInfo buf = context->buf; PlannerInfo *root = context->root; RelOptInfo *rel = context->foreignrel; + int relid = -1; - /* - * Add FOR UPDATE/SHARE if appropriate. We apply locking during the - * initial row fetch, rather than later on as is done for local tables. - * The extra roundtrips involved in trying to duplicate the local - * semantics exactly don't seem worthwhile (see also comments for - * RowMarkType). - * - * Note: because we actually run the query as a cursor, this assumes that - * DECLARE CURSOR ... FOR UPDATE is supported, which it isn't before 8.3. - */ - if (rel->relid == root->parse->resultRelation && - (root->parse->commandType == CMD_UPDATE || - root->parse->commandType == CMD_DELETE)) + while ((relid = bms_next_member(rel->relids, relid)) >= 0) { - /* Relation is UPDATE/DELETE target, so use FOR UPDATE */ - appendStringInfoString(buf, " FOR UPDATE"); - } - else - { - PlanRowMark *rc = get_plan_rowmark(root->rowMarks, rel->relid); - - if (rc) + /* + * Add FOR UPDATE/SHARE if appropriate. We apply locking during the + * initial row fetch, rather than later on as is done for local + * tables. The extra roundtrips involved in trying to duplicate the + * local semantics exactly don't seem worthwhile (see also comments + * for RowMarkType). + * + * Note: because we actually run the query as a cursor, this assumes + * that DECLARE CURSOR ... FOR UPDATE is supported, which it isn't + * before 8.3. + */ + if (relid == root->parse->resultRelation && + (root->parse->commandType == CMD_UPDATE || + root->parse->commandType == CMD_DELETE)) { - /* - * Relation is specified as a FOR UPDATE/SHARE target, so handle - * that. (But we could also see LCS_NONE, meaning this isn't a - * target relation after all.) - * - * For now, just ignore any [NO] KEY specification, since (a) it's - * not clear what that means for a remote table that we don't have - * complete information about, and (b) it wouldn't work anyway on - * older remote servers. Likewise, we don't worry about NOWAIT. - */ - switch (rc->strength) + /* Relation is UPDATE/DELETE target, so use FOR UPDATE */ + appendStringInfoString(buf, " FOR UPDATE"); + + /* Add the relation alias if we are here for a join relation */ + if (rel->reloptkind == RELOPT_JOINREL) + appendStringInfo(buf, " OF %s%d", REL_ALIAS_PREFIX, relid); + } + else + { + PlanRowMark *rc = get_plan_rowmark(root->rowMarks, relid); + + if (rc) { - case LCS_NONE: - /* No locking needed */ - break; - case LCS_FORKEYSHARE: - case LCS_FORSHARE: - appendStringInfoString(buf, " FOR SHARE"); - break; - case LCS_FORNOKEYUPDATE: - case LCS_FORUPDATE: - appendStringInfoString(buf, " FOR UPDATE"); - break; + /* + * Relation is specified as a FOR UPDATE/SHARE target, so + * handle that. (But we could also see LCS_NONE, meaning this + * isn't a target relation after all.) + * + * For now, just ignore any [NO] KEY specification, since (a) + * it's not clear what that means for a remote table that we + * don't have complete information about, and (b) it wouldn't + * work anyway on older remote servers. Likewise, we don't + * worry about NOWAIT. + */ + switch (rc->strength) + { + case LCS_NONE: + /* No locking needed */ + break; + case LCS_FORKEYSHARE: + case LCS_FORSHARE: + appendStringInfoString(buf, " FOR SHARE"); + break; + case LCS_FORNOKEYUPDATE: + case LCS_FORUPDATE: + appendStringInfoString(buf, " FOR UPDATE"); + break; + } + + /* Add the relation alias if we are here for a join relation */ + if (rel->reloptkind == RELOPT_JOINREL && + rc->strength != LCS_NONE) + appendStringInfo(buf, " OF %s%d", REL_ALIAS_PREFIX, relid); } } } } /* - * Deparse WHERE clauses in given list of RestrictInfos and append them to - * context->buf. + * Deparse conditions from the provided list and append them to buf. + * + * The conditions in the list are assumed to be ANDed. This function is used to + * deparse both WHERE clauses and JOIN .. ON clauses. */ static void -appendWhereClause(List *exprs, deparse_expr_cxt *context) +appendConditions(List *exprs, deparse_expr_cxt *context) { int nestlevel; ListCell *lc; @@ -945,16 +1033,25 @@ appendWhereClause(List *exprs, deparse_expr_cxt *context) foreach(lc, exprs) { - RestrictInfo *ri = (RestrictInfo *) lfirst(lc); + Expr *expr = (Expr *) lfirst(lc); + + /* + * Extract clause from RestrictInfo, if required. See comments in + * declaration of PgFdwRelationInfo for details. + */ + if (IsA(expr, RestrictInfo)) + { + RestrictInfo *ri = (RestrictInfo *) expr; + + expr = ri->clause; + } /* Connect expressions with "AND" and parenthesize each condition. */ - if (is_first) - appendStringInfoString(buf, " WHERE "); - else + if (!is_first) appendStringInfoString(buf, " AND "); appendStringInfoChar(buf, '('); - deparseExpr(ri->clause, context); + deparseExpr(expr, context); appendStringInfoChar(buf, ')'); is_first = false; @@ -963,6 +1060,156 @@ appendWhereClause(List *exprs, deparse_expr_cxt *context) reset_transmission_modes(nestlevel); } +/* Output join name for given join type */ +extern const char * +get_jointype_name(JoinType jointype) +{ + switch (jointype) + { + case JOIN_INNER: + return "INNER"; + + case JOIN_LEFT: + return "LEFT"; + + case JOIN_RIGHT: + return "RIGHT"; + + case JOIN_FULL: + return "FULL"; + + default: + /* Shouldn't come here, but protect from buggy code. */ + elog(ERROR, "unsupported join type %d", jointype); + } + + /* Keep compiler happy */ + return NULL; +} + +/* + * Deparse given targetlist and append it to context->buf. + * + * tlist is list of TargetEntry's which in turn contain Var nodes. + * + * retrieved_attrs is the list of continuously increasing integers starting + * from 1. It has same number of entries as tlist. + */ +static void +deparseExplicitTargetList(List *tlist, List **retrieved_attrs, + deparse_expr_cxt *context) +{ + ListCell *lc; + StringInfo buf = context->buf; + int i = 0; + + *retrieved_attrs = NIL; + + foreach(lc, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + Var *var; + + /* Extract expression if TargetEntry node */ + Assert(IsA(tle, TargetEntry)); + var = (Var *) tle->expr; + /* We expect only Var nodes here */ + Assert(IsA(var, Var)); + + if (i > 0) + appendStringInfoString(buf, ", "); + deparseVar(var, context); + + *retrieved_attrs = lappend_int(*retrieved_attrs, i + 1); + + i++; + } + + if (i == 0) + appendStringInfoString(buf, "NULL"); +} + +/* + * Construct FROM clause for given relation + * + * The function constructs ... JOIN ... ON ... for join relation. For a base + * relation it just returns schema-qualified tablename, with the appropriate + * alias if so requested. + */ +void +deparseFromExprForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *foreignrel, + bool use_alias, List **params_list) +{ + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private; + + if (foreignrel->reloptkind == RELOPT_JOINREL) + { + RelOptInfo *rel_o = fpinfo->outerrel; + RelOptInfo *rel_i = fpinfo->innerrel; + StringInfoData join_sql_o; + StringInfoData join_sql_i; + + /* Deparse outer relation */ + initStringInfo(&join_sql_o); + deparseFromExprForRel(&join_sql_o, root, rel_o, true, params_list); + + /* Deparse inner relation */ + initStringInfo(&join_sql_i); + deparseFromExprForRel(&join_sql_i, root, rel_i, true, params_list); + + /* + * For a join relation FROM clause entry is deparsed as + * + * ((outer relation) (inner relation) ON (joinclauses) + */ + appendStringInfo(buf, "(%s %s JOIN %s ON ", join_sql_o.data, + get_jointype_name(fpinfo->jointype), join_sql_i.data); + + /* Append join clause; (TRUE) if no join clause */ + if (fpinfo->joinclauses) + { + deparse_expr_cxt context; + + context.buf = buf; + context.foreignrel = foreignrel; + context.root = root; + context.params_list = params_list; + + appendStringInfo(buf, "("); + appendConditions(fpinfo->joinclauses, &context); + appendStringInfo(buf, ")"); + } + else + appendStringInfoString(buf, "(TRUE)"); + + /* End the FROM clause entry. */ + appendStringInfo(buf, ")"); + } + else + { + RangeTblEntry *rte = planner_rt_fetch(foreignrel->relid, root); + + /* + * Core code already has some lock on each rel being planned, so we + * can use NoLock here. + */ + Relation rel = heap_open(rte->relid, NoLock); + + deparseRelation(buf, rel); + + /* + * Add a unique alias to avoid any conflict in relation names due to + * pulled up subqueries in the query being built for a pushed down + * join. + */ + if (use_alias) + appendStringInfo(buf, " %s%d", REL_ALIAS_PREFIX, foreignrel->relid); + + heap_close(rel, NoLock); + } + return; +} + /* * deparse remote INSERT statement * @@ -996,7 +1243,7 @@ deparseInsertSql(StringInfo buf, PlannerInfo *root, appendStringInfoString(buf, ", "); first = false; - deparseColumnRef(buf, rtindex, attnum, root); + deparseColumnRef(buf, rtindex, attnum, root, false); } appendStringInfoString(buf, ") VALUES ("); @@ -1057,7 +1304,7 @@ deparseUpdateSql(StringInfo buf, PlannerInfo *root, appendStringInfoString(buf, ", "); first = false; - deparseColumnRef(buf, rtindex, attnum, root); + deparseColumnRef(buf, rtindex, attnum, root, false); appendStringInfo(buf, " = $%d", pindex); pindex++; } @@ -1120,7 +1367,7 @@ deparseReturningList(StringInfo buf, PlannerInfo *root, } if (attrs_used != NULL) - deparseTargetList(buf, root, rtindex, rel, true, attrs_used, + deparseTargetList(buf, root, rtindex, rel, true, attrs_used, false, retrieved_attrs); else *retrieved_attrs = NIL; @@ -1212,45 +1459,97 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs) /* * Construct name to use for given column, and emit it into buf. * If it has a column_name FDW option, use that instead of attribute name. + * + * If qualify_col is true, qualify column name with the alias of relation. */ static void -deparseColumnRef(StringInfo buf, int varno, int varattno, PlannerInfo *root) +deparseColumnRef(StringInfo buf, int varno, int varattno, PlannerInfo *root, + bool qualify_col) { RangeTblEntry *rte; - char *colname = NULL; - List *options; - ListCell *lc; - /* varno must not be any of OUTER_VAR, INNER_VAR and INDEX_VAR. */ - Assert(!IS_SPECIAL_VARNO(varno)); - - /* Get RangeTblEntry from array in PlannerInfo. */ - rte = planner_rt_fetch(varno, root); - - /* - * If it's a column of a foreign table, and it has the column_name FDW - * option, use that value. - */ - options = GetForeignColumnOptions(rte->relid, varattno); - foreach(lc, options) + /* varattno can be a whole-row reference, ctid or a regular table column */ + if (varattno == SelfItemPointerAttributeNumber) { - DefElem *def = (DefElem *) lfirst(lc); - - if (strcmp(def->defname, "column_name") == 0) - { - colname = defGetString(def); - break; - } + if (qualify_col) + ADD_REL_QUALIFIER(buf, varno); + appendStringInfoString(buf, "ctid"); } + else if (varattno == 0) + { + /* Whole row reference */ + Relation rel; + Bitmapset *attrs_used; - /* - * If it's a column of a regular table or it doesn't have column_name FDW - * option, use attribute name. - */ - if (colname == NULL) - colname = get_relid_attribute_name(rte->relid, varattno); + /* Required only to be passed down to deparseTargetList(). */ + List *retrieved_attrs; - appendStringInfoString(buf, quote_identifier(colname)); + /* Get RangeTblEntry from array in PlannerInfo. */ + rte = planner_rt_fetch(varno, root); + + /* + * The lock on the relation will be held by upper callers, so it's + * fine to open it with no lock here. + */ + rel = heap_open(rte->relid, NoLock); + + /* + * The local name of the foreign table can not be recognized by the + * foreign server and the table it references on foreign server might + * have different column ordering or different columns than those + * declared locally. Hence we have to deparse whole-row reference as + * ROW(columns referenced locally). Construct this by deparsing a + * "whole row" attribute. + */ + attrs_used = bms_add_member(NULL, + 0 - FirstLowInvalidHeapAttributeNumber); + appendStringInfoString(buf, "ROW("); + deparseTargetList(buf, root, varno, rel, false, attrs_used, qualify_col, + &retrieved_attrs); + appendStringInfoString(buf, ")"); + heap_close(rel, NoLock); + bms_free(attrs_used); + } + else + { + char *colname = NULL; + List *options; + ListCell *lc; + + /* varno must not be any of OUTER_VAR, INNER_VAR and INDEX_VAR. */ + Assert(!IS_SPECIAL_VARNO(varno)); + + /* Get RangeTblEntry from array in PlannerInfo. */ + rte = planner_rt_fetch(varno, root); + + /* + * If it's a column of a foreign table, and it has the column_name FDW + * option, use that value. + */ + options = GetForeignColumnOptions(rte->relid, varattno); + foreach(lc, options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "column_name") == 0) + { + colname = defGetString(def); + break; + } + } + + /* + * If it's a column of a regular table or it doesn't have column_name + * FDW option, use attribute name. + */ + if (colname == NULL) + colname = get_relid_attribute_name(rte->relid, varattno); + + if (qualify_col) + ADD_REL_QUALIFIER(buf, varno); + + appendStringInfoString(buf, quote_identifier(colname)); + } } /* @@ -1395,14 +1694,12 @@ deparseExpr(Expr *node, deparse_expr_cxt *context) static void deparseVar(Var *node, deparse_expr_cxt *context) { - StringInfo buf = context->buf; + bool qualify_col = (context->foreignrel->reloptkind == RELOPT_JOINREL); - if (node->varno == context->foreignrel->relid && + if (bms_is_member(node->varno, context->foreignrel->relids) && node->varlevelsup == 0) - { - /* Var belongs to foreign table */ - deparseColumnRef(buf, node->varno, node->varattno, context->root); - } + deparseColumnRef(context->buf, node->varno, node->varattno, + context->root, qualify_col); else { /* Treat like a Param */ diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index f6210241c5..1c943b6ee2 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -9,11 +9,16 @@ DO $d$ OPTIONS (dbname '$$||current_database()||$$', port '$$||current_setting('port')||$$' )$$; + EXECUTE $$CREATE SERVER loopback2 FOREIGN DATA WRAPPER postgres_fdw + OPTIONS (dbname '$$||current_database()||$$', + port '$$||current_setting('port')||$$' + )$$; END; $d$; CREATE USER MAPPING FOR public SERVER testserver1 OPTIONS (user 'value', password 'value'); CREATE USER MAPPING FOR CURRENT_USER SERVER loopback; +CREATE USER MAPPING FOR CURRENT_USER SERVER loopback2; -- =================================================================== -- create objects used through FDW loopback server -- =================================================================== @@ -35,6 +40,18 @@ CREATE TABLE "S 1"."T 2" ( c2 text, CONSTRAINT t2_pkey PRIMARY KEY (c1) ); +CREATE TABLE "S 1"."T 3" ( + c1 int NOT NULL, + c2 int NOT NULL, + c3 text, + CONSTRAINT t3_pkey PRIMARY KEY (c1) +); +CREATE TABLE "S 1"."T 4" ( + c1 int NOT NULL, + c2 int NOT NULL, + c3 text, + CONSTRAINT t4_pkey PRIMARY KEY (c1) +); INSERT INTO "S 1"."T 1" SELECT id, id % 10, @@ -49,8 +66,22 @@ INSERT INTO "S 1"."T 2" SELECT id, 'AAA' || to_char(id, 'FM000') FROM generate_series(1, 100) id; +INSERT INTO "S 1"."T 3" + SELECT id, + id + 1, + 'AAA' || to_char(id, 'FM000') + FROM generate_series(1, 100) id; +DELETE FROM "S 1"."T 3" WHERE c1 % 2 != 0; -- delete for outer join tests +INSERT INTO "S 1"."T 4" + SELECT id, + id + 1, + 'AAA' || to_char(id, 'FM000') + FROM generate_series(1, 100) id; +DELETE FROM "S 1"."T 4" WHERE c1 % 3 != 0; -- delete for outer join tests ANALYZE "S 1"."T 1"; ANALYZE "S 1"."T 2"; +ANALYZE "S 1"."T 3"; +ANALYZE "S 1"."T 4"; -- =================================================================== -- create foreign tables -- =================================================================== @@ -78,6 +109,21 @@ CREATE FOREIGN TABLE ft2 ( c8 user_enum ) SERVER loopback; ALTER FOREIGN TABLE ft2 DROP COLUMN cx; +CREATE FOREIGN TABLE ft4 ( + c1 int NOT NULL, + c2 int NOT NULL, + c3 text +) SERVER loopback OPTIONS (schema_name 'S 1', table_name 'T 3'); +CREATE FOREIGN TABLE ft5 ( + c1 int NOT NULL, + c2 int NOT NULL, + c3 text +) SERVER loopback OPTIONS (schema_name 'S 1', table_name 'T 4'); +CREATE FOREIGN TABLE ft6 ( + c1 int NOT NULL, + c2 int NOT NULL, + c3 text +) SERVER loopback2 OPTIONS (schema_name 'S 1', table_name 'T 4'); -- =================================================================== -- tests for validator -- =================================================================== @@ -127,12 +173,15 @@ ALTER FOREIGN TABLE ft2 OPTIONS (schema_name 'S 1', table_name 'T 1'); ALTER FOREIGN TABLE ft1 ALTER COLUMN c1 OPTIONS (column_name 'C 1'); ALTER FOREIGN TABLE ft2 ALTER COLUMN c1 OPTIONS (column_name 'C 1'); \det+ - List of foreign tables - Schema | Table | Server | FDW Options | Description ---------+-------+----------+---------------------------------------+------------- - public | ft1 | loopback | (schema_name 'S 1', table_name 'T 1') | - public | ft2 | loopback | (schema_name 'S 1', table_name 'T 1') | -(2 rows) + List of foreign tables + Schema | Table | Server | FDW Options | Description +--------+-------+-----------+---------------------------------------+------------- + public | ft1 | loopback | (schema_name 'S 1', table_name 'T 1') | + public | ft2 | loopback | (schema_name 'S 1', table_name 'T 1') | + public | ft4 | loopback | (schema_name 'S 1', table_name 'T 3') | + public | ft5 | loopback | (schema_name 'S 1', table_name 'T 4') | + public | ft6 | loopback2 | (schema_name 'S 1', table_name 'T 4') | +(5 rows) -- Now we should be able to run ANALYZE. -- To exercise multiple code paths, we use local stats on ft1 @@ -281,22 +330,6 @@ SELECT COUNT(*) FROM ft1 t1; 1000 (1 row) --- join two tables -SELECT t1.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; - c1 ------ - 101 - 102 - 103 - 104 - 105 - 106 - 107 - 108 - 109 - 110 -(10 rows) - -- subquery SELECT * FROM ft1 t1 WHERE t1.c3 IN (SELECT c3 FROM ft2 t2 WHERE c1 <= 10) ORDER BY c1; c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 @@ -513,16 +546,16 @@ EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 WHERE c8 = 'foo'; -- can't Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" (4 rows) --- parameterized remote path +-- parameterized remote path for foreign table EXPLAIN (VERBOSE, COSTS false) - SELECT * FROM ft2 a, ft2 b WHERE a.c1 = 47 AND b.c1 = a.c2; + SELECT * FROM "S 1"."T 1" a, ft2 b WHERE a."C 1" = 47 AND b.c1 = a.c2; QUERY PLAN ------------------------------------------------------------------------------------------------------------- Nested Loop - Output: a.c1, a.c2, a.c3, a.c4, a.c5, a.c6, a.c7, a.c8, b.c1, b.c2, b.c3, b.c4, b.c5, b.c6, b.c7, b.c8 - -> Foreign Scan on public.ft2 a - Output: a.c1, a.c2, a.c3, a.c4, a.c5, a.c6, a.c7, a.c8 - Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = 47)) + Output: a."C 1", a.c2, a.c3, a.c4, a.c5, a.c6, a.c7, a.c8, b.c1, b.c2, b.c3, b.c4, b.c5, b.c6, b.c7, b.c8 + -> Index Scan using t1_pkey on "S 1"."T 1" a + Output: a."C 1", a.c2, a.c3, a.c4, a.c5, a.c6, a.c7, a.c8 + Index Cond: (a."C 1" = 47) -> Foreign Scan on public.ft2 b Output: b.c1, b.c2, b.c3, b.c4, b.c5, b.c6, b.c7, b.c8 Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (($1::integer = "C 1")) @@ -826,23 +859,946 @@ SELECT count(c3) FROM ft1 t1 WHERE t1.c1 === t1.c2; 9 (1 row) +-- =================================================================== +-- JOIN queries +-- =================================================================== +-- Analyze ft4 and ft5 so that we have better statistics. These tables do not +-- have use_remote_estimate set. +ANALYZE ft4; +ANALYZE ft5; +-- join two tables +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------- + Limit + Output: t1.c1, t2.c1, t1.c3 + -> Sort + Output: t1.c1, t2.c1, t1.c3 + Sort Key: t1.c3, t1.c1 + -> Foreign Scan + Output: t1.c1, t2.c1, t1.c3 + Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2) + Remote SQL: SELECT r1."C 1", r1.c3, r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) +(9 rows) + +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; + c1 | c1 +-----+----- + 101 | 101 + 102 | 102 + 103 | 103 + 104 | 104 + 105 | 105 + 106 | 106 + 107 | 107 + 108 | 108 + 109 | 109 + 110 | 110 +(10 rows) + +-- join three tables +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c2, t3.c3 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) JOIN ft4 t3 ON (t3.c1 = t1.c1) ORDER BY t1.c3, t1.c1 OFFSET 10 LIMIT 10; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit + Output: t1.c1, t2.c2, t3.c3, t1.c3 + -> Sort + Output: t1.c1, t2.c2, t3.c3, t1.c3 + Sort Key: t1.c3, t1.c1 + -> Foreign Scan + Output: t1.c1, t2.c2, t3.c3, t1.c3 + Relations: ((public.ft1 t1) INNER JOIN (public.ft2 t2)) INNER JOIN (public.ft4 t3) + Remote SQL: SELECT r1."C 1", r1.c3, r2.c2, r4.c3 FROM (("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) INNER JOIN "S 1"."T 3" r4 ON (TRUE)) WHERE ((r1."C 1" = r4.c1)) AND ((r1."C 1" = r2."C 1")) +(9 rows) + +SELECT t1.c1, t2.c2, t3.c3 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) JOIN ft4 t3 ON (t3.c1 = t1.c1) ORDER BY t1.c3, t1.c1 OFFSET 10 LIMIT 10; + c1 | c2 | c3 +----+----+-------- + 22 | 2 | AAA022 + 24 | 4 | AAA024 + 26 | 6 | AAA026 + 28 | 8 | AAA028 + 30 | 0 | AAA030 + 32 | 2 | AAA032 + 34 | 4 | AAA034 + 36 | 6 | AAA036 + 38 | 8 | AAA038 + 40 | 0 | AAA040 +(10 rows) + +-- left outer join +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Limit + Output: t1.c1, t2.c1 + -> Sort + Output: t1.c1, t2.c1 + Sort Key: t1.c1, t2.c1 + -> Foreign Scan + Output: t1.c1, t2.c1 + Relations: (public.ft4 t1) LEFT JOIN (public.ft5 t2) + Remote SQL: SELECT r1.c1, r2.c1 FROM ("S 1"."T 3" r1 LEFT JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) +(9 rows) + +SELECT t1.c1, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; + c1 | c1 +----+---- + 22 | + 24 | 24 + 26 | + 28 | + 30 | 30 + 32 | + 34 | + 36 | 36 + 38 | + 40 | +(10 rows) + +-- left outer join + placement of clauses. +-- clauses within the nullable side are not pulled up, but top level clause on +-- non-nullable side is pushed into non-nullable side +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t1.c2, t2.c1, t2.c2 FROM ft4 t1 LEFT JOIN (SELECT * FROM ft5 WHERE c1 < 10) t2 ON (t1.c1 = t2.c1) WHERE t1.c1 < 10; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: t1.c1, t1.c2, ft5.c1, ft5.c2 + Relations: (public.ft4 t1) LEFT JOIN (public.ft5) + Remote SQL: SELECT r1.c1, r1.c2, r4.c1, r4.c2 FROM ("S 1"."T 3" r1 LEFT JOIN "S 1"."T 4" r4 ON (((r1.c1 = r4.c1)) AND ((r4.c1 < 10)))) WHERE ((r1.c1 < 10)) +(4 rows) + +SELECT t1.c1, t1.c2, t2.c1, t2.c2 FROM ft4 t1 LEFT JOIN (SELECT * FROM ft5 WHERE c1 < 10) t2 ON (t1.c1 = t2.c1) WHERE t1.c1 < 10; + c1 | c2 | c1 | c2 +----+----+----+---- + 2 | 3 | | + 4 | 5 | | + 6 | 7 | 6 | 7 + 8 | 9 | | +(4 rows) + +-- clauses within the nullable side are not pulled up, but the top level clause +-- on nullable side is not pushed down into nullable side +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t1.c2, t2.c1, t2.c2 FROM ft4 t1 LEFT JOIN (SELECT * FROM ft5 WHERE c1 < 10) t2 ON (t1.c1 = t2.c1) + WHERE (t2.c1 < 10 OR t2.c1 IS NULL) AND t1.c1 < 10; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: t1.c1, t1.c2, ft5.c1, ft5.c2 + Relations: (public.ft4 t1) LEFT JOIN (public.ft5) + Remote SQL: SELECT r1.c1, r1.c2, r4.c1, r4.c2 FROM ("S 1"."T 3" r1 LEFT JOIN "S 1"."T 4" r4 ON (((r1.c1 = r4.c1)) AND ((r4.c1 < 10)))) WHERE (((r4.c1 < 10) OR (r4.c1 IS NULL))) AND ((r1.c1 < 10)) +(4 rows) + +SELECT t1.c1, t1.c2, t2.c1, t2.c2 FROM ft4 t1 LEFT JOIN (SELECT * FROM ft5 WHERE c1 < 10) t2 ON (t1.c1 = t2.c1) + WHERE (t2.c1 < 10 OR t2.c1 IS NULL) AND t1.c1 < 10; + c1 | c2 | c1 | c2 +----+----+----+---- + 2 | 3 | | + 4 | 5 | | + 6 | 7 | 6 | 7 + 8 | 9 | | +(4 rows) + +-- right outer join +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft5 t1 RIGHT JOIN ft4 t2 ON (t1.c1 = t2.c1) ORDER BY t2.c1, t1.c1 OFFSET 10 LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Limit + Output: t1.c1, t2.c1 + -> Sort + Output: t1.c1, t2.c1 + Sort Key: t2.c1, t1.c1 + -> Foreign Scan + Output: t1.c1, t2.c1 + Relations: (public.ft4 t2) LEFT JOIN (public.ft5 t1) + Remote SQL: SELECT r2.c1, r1.c1 FROM ("S 1"."T 3" r2 LEFT JOIN "S 1"."T 4" r1 ON (((r1.c1 = r2.c1)))) +(9 rows) + +SELECT t1.c1, t2.c1 FROM ft5 t1 RIGHT JOIN ft4 t2 ON (t1.c1 = t2.c1) ORDER BY t2.c1, t1.c1 OFFSET 10 LIMIT 10; + c1 | c1 +----+---- + | 22 + 24 | 24 + | 26 + | 28 + 30 | 30 + | 32 + | 34 + 36 | 36 + | 38 + | 40 +(10 rows) + +-- full outer join +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Limit + Output: t1.c1, t2.c1 + -> Sort + Output: t1.c1, t2.c1 + Sort Key: t1.c1, t2.c1 + -> Foreign Scan + Output: t1.c1, t2.c1 + Relations: (public.ft4 t1) FULL JOIN (public.ft5 t2) + Remote SQL: SELECT r1.c1, r2.c1 FROM ("S 1"."T 3" r1 FULL JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) +(9 rows) + +SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10; + c1 | c1 +-----+---- + 92 | + 94 | + 96 | 96 + 98 | + 100 | + | 3 + | 9 + | 15 + | 21 + | 27 +(10 rows) + +-- full outer join + WHERE clause, only matched rows +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) WHERE (t1.c1 = t2.c1 OR t1.c1 IS NULL) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Limit + Output: t1.c1, t2.c1 + -> Sort + Output: t1.c1, t2.c1 + Sort Key: t1.c1, t2.c1 + -> Foreign Scan + Output: t1.c1, t2.c1 + Relations: (public.ft4 t1) FULL JOIN (public.ft5 t2) + Remote SQL: SELECT r1.c1, r2.c1 FROM ("S 1"."T 3" r1 FULL JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) WHERE (((r1.c1 = r2.c1) OR (r1.c1 IS NULL))) +(9 rows) + +SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) WHERE (t1.c1 = t2.c1 OR t1.c1 IS NULL) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; + c1 | c1 +----+---- + 66 | 66 + 72 | 72 + 78 | 78 + 84 | 84 + 90 | 90 + 96 | 96 + | 3 + | 9 + | 15 + | 21 +(10 rows) + +-- join two tables with FOR UPDATE clause +-- tests whole-row reference for row marks +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit + Output: t1.c1, t2.c1, t1.c3, t1.*, t2.* + -> LockRows + Output: t1.c1, t2.c1, t1.c3, t1.*, t2.* + -> Sort + Output: t1.c1, t2.c1, t1.c3, t1.*, t2.* + Sort Key: t1.c3, t1.c1 + -> Foreign Scan + Output: t1.c1, t2.c1, t1.c3, t1.*, t2.* + Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2) + Remote SQL: SELECT r1."C 1", r1.c3, ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8), r2."C 1", ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) FOR UPDATE OF r1 + -> Merge Join + Output: t1.c1, t1.c3, t1.*, t2.c1, t2.* + Merge Cond: (t1.c1 = t2.c1) + -> Sort + Output: t1.c1, t1.c3, t1.* + Sort Key: t1.c1 + -> Foreign Scan on public.ft1 t1 + Output: t1.c1, t1.c3, t1.* + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE + -> Sort + Output: t2.c1, t2.* + Sort Key: t2.c1 + -> Foreign Scan on public.ft2 t2 + Output: t2.c1, t2.* + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" +(26 rows) + +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1; + c1 | c1 +-----+----- + 101 | 101 + 102 | 102 + 103 | 103 + 104 | 104 + 105 | 105 + 106 | 106 + 107 | 107 + 108 | 108 + 109 | 109 + 110 | 110 +(10 rows) + +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit + Output: t1.c1, t2.c1, t1.c3, t1.*, t2.* + -> LockRows + Output: t1.c1, t2.c1, t1.c3, t1.*, t2.* + -> Sort + Output: t1.c1, t2.c1, t1.c3, t1.*, t2.* + Sort Key: t1.c3, t1.c1 + -> Foreign Scan + Output: t1.c1, t2.c1, t1.c3, t1.*, t2.* + Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2) + Remote SQL: SELECT r1."C 1", r1.c3, ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8), r2."C 1", ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) FOR UPDATE OF r1 FOR UPDATE OF r2 + -> Merge Join + Output: t1.c1, t1.c3, t1.*, t2.c1, t2.* + Merge Cond: (t1.c1 = t2.c1) + -> Sort + Output: t1.c1, t1.c3, t1.* + Sort Key: t1.c1 + -> Foreign Scan on public.ft1 t1 + Output: t1.c1, t1.c3, t1.* + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE + -> Sort + Output: t2.c1, t2.* + Sort Key: t2.c1 + -> Foreign Scan on public.ft2 t2 + Output: t2.c1, t2.* + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE +(26 rows) + +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE; + c1 | c1 +-----+----- + 101 | 101 + 102 | 102 + 103 | 103 + 104 | 104 + 105 | 105 + 106 | 106 + 107 | 107 + 108 | 108 + 109 | 109 + 110 | 110 +(10 rows) + +-- join two tables with FOR SHARE clause +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit + Output: t1.c1, t2.c1, t1.c3, t1.*, t2.* + -> LockRows + Output: t1.c1, t2.c1, t1.c3, t1.*, t2.* + -> Sort + Output: t1.c1, t2.c1, t1.c3, t1.*, t2.* + Sort Key: t1.c3, t1.c1 + -> Foreign Scan + Output: t1.c1, t2.c1, t1.c3, t1.*, t2.* + Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2) + Remote SQL: SELECT r1."C 1", r1.c3, ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8), r2."C 1", ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) FOR SHARE OF r1 + -> Merge Join + Output: t1.c1, t1.c3, t1.*, t2.c1, t2.* + Merge Cond: (t1.c1 = t2.c1) + -> Sort + Output: t1.c1, t1.c3, t1.* + Sort Key: t1.c1 + -> Foreign Scan on public.ft1 t1 + Output: t1.c1, t1.c3, t1.* + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE + -> Sort + Output: t2.c1, t2.* + Sort Key: t2.c1 + -> Foreign Scan on public.ft2 t2 + Output: t2.c1, t2.* + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" +(26 rows) + +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1; + c1 | c1 +-----+----- + 101 | 101 + 102 | 102 + 103 | 103 + 104 | 104 + 105 | 105 + 106 | 106 + 107 | 107 + 108 | 108 + 109 | 109 + 110 | 110 +(10 rows) + +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit + Output: t1.c1, t2.c1, t1.c3, t1.*, t2.* + -> LockRows + Output: t1.c1, t2.c1, t1.c3, t1.*, t2.* + -> Sort + Output: t1.c1, t2.c1, t1.c3, t1.*, t2.* + Sort Key: t1.c3, t1.c1 + -> Foreign Scan + Output: t1.c1, t2.c1, t1.c3, t1.*, t2.* + Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2) + Remote SQL: SELECT r1."C 1", r1.c3, ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8), r2."C 1", ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) FOR SHARE OF r1 FOR SHARE OF r2 + -> Merge Join + Output: t1.c1, t1.c3, t1.*, t2.c1, t2.* + Merge Cond: (t1.c1 = t2.c1) + -> Sort + Output: t1.c1, t1.c3, t1.* + Sort Key: t1.c1 + -> Foreign Scan on public.ft1 t1 + Output: t1.c1, t1.c3, t1.* + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE + -> Sort + Output: t2.c1, t2.* + Sort Key: t2.c1 + -> Foreign Scan on public.ft2 t2 + Output: t2.c1, t2.* + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE +(26 rows) + +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE; + c1 | c1 +-----+----- + 101 | 101 + 102 | 102 + 103 | 103 + 104 | 104 + 105 | 105 + 106 | 106 + 107 | 107 + 108 | 108 + 109 | 109 + 110 | 110 +(10 rows) + +-- join in CTE +EXPLAIN (COSTS false, VERBOSE) +WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------ + Limit + Output: t.c1_1, t.c2_1, t.c1_3 + CTE t + -> Foreign Scan + Output: t1.c1, t1.c3, t2.c1 + Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2) + Remote SQL: SELECT r1."C 1", r1.c3, r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) + -> Sort + Output: t.c1_1, t.c2_1, t.c1_3 + Sort Key: t.c1_3, t.c1_1 + -> CTE Scan on t + Output: t.c1_1, t.c2_1, t.c1_3 +(12 rows) + +WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10; + c1_1 | c2_1 +------+------ + 101 | 101 + 102 | 102 + 103 | 103 + 104 | 104 + 105 | 105 + 106 | 106 + 107 | 107 + 108 | 108 + 109 | 109 + 110 | 110 +(10 rows) + +-- ctid with whole-row reference +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.ctid, t1, t2, t1.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit + Output: t1.ctid, t1.*, t2.*, t1.c1, t1.c3 + -> Sort + Output: t1.ctid, t1.*, t2.*, t1.c1, t1.c3 + Sort Key: t1.c3, t1.c1 + -> Foreign Scan + Output: t1.ctid, t1.*, t2.*, t1.c1, t1.c3 + Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2) + Remote SQL: SELECT r1.ctid, ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8), r1."C 1", r1.c3, ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) +(9 rows) + +SELECT t1.ctid, t1, t2, t1.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; + ctid | t1 | t2 | c1 +--------+--------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------+----- + (1,4) | (101,1,00101,"Fri Jan 02 00:00:00 1970 PST","Fri Jan 02 00:00:00 1970",1,"1 ",foo) | (101,1,00101,"Fri Jan 02 00:00:00 1970 PST","Fri Jan 02 00:00:00 1970",1,"1 ",foo) | 101 + (1,5) | (102,2,00102,"Sat Jan 03 00:00:00 1970 PST","Sat Jan 03 00:00:00 1970",2,"2 ",foo) | (102,2,00102,"Sat Jan 03 00:00:00 1970 PST","Sat Jan 03 00:00:00 1970",2,"2 ",foo) | 102 + (1,6) | (103,3,00103,"Sun Jan 04 00:00:00 1970 PST","Sun Jan 04 00:00:00 1970",3,"3 ",foo) | (103,3,00103,"Sun Jan 04 00:00:00 1970 PST","Sun Jan 04 00:00:00 1970",3,"3 ",foo) | 103 + (1,7) | (104,4,00104,"Mon Jan 05 00:00:00 1970 PST","Mon Jan 05 00:00:00 1970",4,"4 ",foo) | (104,4,00104,"Mon Jan 05 00:00:00 1970 PST","Mon Jan 05 00:00:00 1970",4,"4 ",foo) | 104 + (1,8) | (105,5,00105,"Tue Jan 06 00:00:00 1970 PST","Tue Jan 06 00:00:00 1970",5,"5 ",foo) | (105,5,00105,"Tue Jan 06 00:00:00 1970 PST","Tue Jan 06 00:00:00 1970",5,"5 ",foo) | 105 + (1,9) | (106,6,00106,"Wed Jan 07 00:00:00 1970 PST","Wed Jan 07 00:00:00 1970",6,"6 ",foo) | (106,6,00106,"Wed Jan 07 00:00:00 1970 PST","Wed Jan 07 00:00:00 1970",6,"6 ",foo) | 106 + (1,10) | (107,7,00107,"Thu Jan 08 00:00:00 1970 PST","Thu Jan 08 00:00:00 1970",7,"7 ",foo) | (107,7,00107,"Thu Jan 08 00:00:00 1970 PST","Thu Jan 08 00:00:00 1970",7,"7 ",foo) | 107 + (1,11) | (108,8,00108,"Fri Jan 09 00:00:00 1970 PST","Fri Jan 09 00:00:00 1970",8,"8 ",foo) | (108,8,00108,"Fri Jan 09 00:00:00 1970 PST","Fri Jan 09 00:00:00 1970",8,"8 ",foo) | 108 + (1,12) | (109,9,00109,"Sat Jan 10 00:00:00 1970 PST","Sat Jan 10 00:00:00 1970",9,"9 ",foo) | (109,9,00109,"Sat Jan 10 00:00:00 1970 PST","Sat Jan 10 00:00:00 1970",9,"9 ",foo) | 109 + (1,13) | (110,0,00110,"Sun Jan 11 00:00:00 1970 PST","Sun Jan 11 00:00:00 1970",0,"0 ",foo) | (110,0,00110,"Sun Jan 11 00:00:00 1970 PST","Sun Jan 11 00:00:00 1970",0,"0 ",foo) | 110 +(10 rows) + +-- SEMI JOIN, not pushed down +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1 FROM ft1 t1 WHERE EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c1) ORDER BY t1.c1 OFFSET 100 LIMIT 10; + QUERY PLAN +---------------------------------------------------------------------------------- + Limit + Output: t1.c1 + -> Merge Semi Join + Output: t1.c1 + Merge Cond: (t1.c1 = t2.c1) + -> Foreign Scan on public.ft1 t1 + Output: t1.c1 + Remote SQL: SELECT "C 1" FROM "S 1"."T 1" ORDER BY "C 1" ASC + -> Materialize + Output: t2.c1 + -> Foreign Scan on public.ft2 t2 + Output: t2.c1 + Remote SQL: SELECT "C 1" FROM "S 1"."T 1" ORDER BY "C 1" ASC +(13 rows) + +SELECT t1.c1 FROM ft1 t1 WHERE EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c1) ORDER BY t1.c1 OFFSET 100 LIMIT 10; + c1 +----- + 101 + 102 + 103 + 104 + 105 + 106 + 107 + 108 + 109 + 110 +(10 rows) + +-- ANTI JOIN, not pushed down +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1 FROM ft1 t1 WHERE NOT EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c2) ORDER BY t1.c1 OFFSET 100 LIMIT 10; + QUERY PLAN +---------------------------------------------------------------------------- + Limit + Output: t1.c1 + -> Merge Anti Join + Output: t1.c1 + Merge Cond: (t1.c1 = t2.c2) + -> Foreign Scan on public.ft1 t1 + Output: t1.c1 + Remote SQL: SELECT "C 1" FROM "S 1"."T 1" ORDER BY "C 1" ASC + -> Materialize + Output: t2.c2 + -> Foreign Scan on public.ft2 t2 + Output: t2.c2 + Remote SQL: SELECT c2 FROM "S 1"."T 1" ORDER BY c2 ASC +(13 rows) + +SELECT t1.c1 FROM ft1 t1 WHERE NOT EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c2) ORDER BY t1.c1 OFFSET 100 LIMIT 10; + c1 +----- + 110 + 111 + 112 + 113 + 114 + 115 + 116 + 117 + 118 + 119 +(10 rows) + +-- CROSS JOIN, not pushed down +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 CROSS JOIN ft2 t2 ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + Output: t1.c1, t2.c1 + -> Sort + Output: t1.c1, t2.c1 + Sort Key: t1.c1, t2.c1 + -> Nested Loop + Output: t1.c1, t2.c1 + -> Foreign Scan on public.ft1 t1 + Output: t1.c1 + Remote SQL: SELECT "C 1" FROM "S 1"."T 1" + -> Materialize + Output: t2.c1 + -> Foreign Scan on public.ft2 t2 + Output: t2.c1 + Remote SQL: SELECT "C 1" FROM "S 1"."T 1" +(15 rows) + +SELECT t1.c1, t2.c1 FROM ft1 t1 CROSS JOIN ft2 t2 ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10; + c1 | c1 +----+----- + 1 | 101 + 1 | 102 + 1 | 103 + 1 | 104 + 1 | 105 + 1 | 106 + 1 | 107 + 1 | 108 + 1 | 109 + 1 | 110 +(10 rows) + +-- different server, not pushed down. No result expected. +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft5 t1 JOIN ft6 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10; + QUERY PLAN +---------------------------------------------------------------------------- + Limit + Output: t1.c1, t2.c1 + -> Merge Join + Output: t1.c1, t2.c1 + Merge Cond: (t2.c1 = t1.c1) + -> Foreign Scan on public.ft6 t2 + Output: t2.c1, t2.c2, t2.c3 + Remote SQL: SELECT c1 FROM "S 1"."T 4" ORDER BY c1 ASC + -> Materialize + Output: t1.c1, t1.c2, t1.c3 + -> Foreign Scan on public.ft5 t1 + Output: t1.c1, t1.c2, t1.c3 + Remote SQL: SELECT c1 FROM "S 1"."T 4" ORDER BY c1 ASC +(13 rows) + +SELECT t1.c1, t2.c1 FROM ft5 t1 JOIN ft6 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10; + c1 | c1 +----+---- +(0 rows) + +-- unsafe join conditions (c8 has a UDT), not pushed down. Practically a CROSS +-- JOIN since c8 in both tables has same value. +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 LEFT JOIN ft2 t2 ON (t1.c8 = t2.c8) ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10; + QUERY PLAN +------------------------------------------------------------------------- + Limit + Output: t1.c1, t2.c1 + -> Sort + Output: t1.c1, t2.c1 + Sort Key: t1.c1, t2.c1 + -> Merge Left Join + Output: t1.c1, t2.c1 + Merge Cond: (t1.c8 = t2.c8) + -> Sort + Output: t1.c1, t1.c8 + Sort Key: t1.c8 + -> Foreign Scan on public.ft1 t1 + Output: t1.c1, t1.c8 + Remote SQL: SELECT "C 1", c8 FROM "S 1"."T 1" + -> Sort + Output: t2.c1, t2.c8 + Sort Key: t2.c8 + -> Foreign Scan on public.ft2 t2 + Output: t2.c1, t2.c8 + Remote SQL: SELECT "C 1", c8 FROM "S 1"."T 1" +(20 rows) + +SELECT t1.c1, t2.c1 FROM ft1 t1 LEFT JOIN ft2 t2 ON (t1.c8 = t2.c8) ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10; + c1 | c1 +----+----- + 1 | 101 + 1 | 102 + 1 | 103 + 1 | 104 + 1 | 105 + 1 | 106 + 1 | 107 + 1 | 108 + 1 | 109 + 1 | 110 +(10 rows) + +-- unsafe conditions on one side (c8 has a UDT), not pushed down. +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE t1.c8 = 'foo' ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; + QUERY PLAN +----------------------------------------------------------------------------- + Limit + Output: t1.c1, t2.c1, t1.c3 + -> Sort + Output: t1.c1, t2.c1, t1.c3 + Sort Key: t1.c3, t1.c1 + -> Hash Right Join + Output: t1.c1, t2.c1, t1.c3 + Hash Cond: (t2.c1 = t1.c1) + -> Foreign Scan on public.ft2 t2 + Output: t2.c1 + Remote SQL: SELECT "C 1" FROM "S 1"."T 1" + -> Hash + Output: t1.c1, t1.c3 + -> Foreign Scan on public.ft1 t1 + Output: t1.c1, t1.c3 + Filter: (t1.c8 = 'foo'::user_enum) + Remote SQL: SELECT "C 1", c3, c8 FROM "S 1"."T 1" +(17 rows) + +SELECT t1.c1, t2.c1 FROM ft1 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE t1.c8 = 'foo' ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; + c1 | c1 +-----+----- + 101 | 101 + 102 | 102 + 103 | 103 + 104 | 104 + 105 | 105 + 106 | 106 + 107 | 107 + 108 | 108 + 109 | 109 + 110 | 110 +(10 rows) + +-- join where unsafe to pushdown condition in WHERE clause has a column not +-- in the SELECT clause. In this test unsafe clause needs to have column +-- references from both joining sides so that the clause is not pushed down +-- into one of the joining sides. +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE t1.c8 = t2.c8 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Limit + Output: t1.c1, t2.c1, t1.c3 + -> Sort + Output: t1.c1, t2.c1, t1.c3 + Sort Key: t1.c3, t1.c1 + -> Foreign Scan + Output: t1.c1, t2.c1, t1.c3 + Filter: (t1.c8 = t2.c8) + Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2) + Remote SQL: SELECT r1."C 1", r1.c3, r2."C 1", r1.c8, r2.c8 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) +(10 rows) + +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE t1.c8 = t2.c8 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; + c1 | c1 +-----+----- + 101 | 101 + 102 | 102 + 103 | 103 + 104 | 104 + 105 | 105 + 106 | 106 + 107 | 107 + 108 | 108 + 109 | 109 + 110 | 110 +(10 rows) + +-- Aggregate after UNION, for testing setrefs +EXPLAIN (COSTS false, VERBOSE) +SELECT t1c1, avg(t1c1 + t2c1) FROM (SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) UNION SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) AS t (t1c1, t2c1) GROUP BY t1c1 ORDER BY t1c1 OFFSET 100 LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit + Output: t1.c1, (avg((t1.c1 + t2.c1))) + -> Sort + Output: t1.c1, (avg((t1.c1 + t2.c1))) + Sort Key: t1.c1 + -> HashAggregate + Output: t1.c1, avg((t1.c1 + t2.c1)) + Group Key: t1.c1 + -> HashAggregate + Output: t1.c1, t2.c1 + Group Key: t1.c1, t2.c1 + -> Append + -> Foreign Scan + Output: t1.c1, t2.c1 + Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2) + Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) + -> Foreign Scan + Output: t1_1.c1, t2_1.c1 + Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2) + Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) +(20 rows) + +SELECT t1c1, avg(t1c1 + t2c1) FROM (SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) UNION SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) AS t (t1c1, t2c1) GROUP BY t1c1 ORDER BY t1c1 OFFSET 100 LIMIT 10; + t1c1 | avg +------+---------------------- + 101 | 202.0000000000000000 + 102 | 204.0000000000000000 + 103 | 206.0000000000000000 + 104 | 208.0000000000000000 + 105 | 210.0000000000000000 + 106 | 212.0000000000000000 + 107 | 214.0000000000000000 + 108 | 216.0000000000000000 + 109 | 218.0000000000000000 + 110 | 220.0000000000000000 +(10 rows) + +-- join with lateral reference +EXPLAIN (COSTS false, VERBOSE) +SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit + Output: t1."C 1" + -> Nested Loop + Output: t1."C 1" + -> Index Scan using t1_pkey on "S 1"."T 1" t1 + Output: t1."C 1", t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8 + -> HashAggregate + Output: t2.c1, t3.c1 + Group Key: t2.c1, t3.c1 + -> Foreign Scan + Output: t2.c1, t3.c1 + Relations: (public.ft1 t2) INNER JOIN (public.ft2 t3) + Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) AND ((r1.c2 = $1::integer)) +(13 rows) + +SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10; + C 1 +----- + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 +(10 rows) + +-- create another user for permission, user mapping, effective user tests +CREATE USER view_owner; +-- grant privileges on ft4 and ft5 to view_owner +GRANT ALL ON ft4 TO view_owner; +GRANT ALL ON ft5 TO view_owner; +-- prepare statement with current session user +PREPARE join_stmt AS SELECT t1.c1, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; +EXPLAIN (COSTS OFF, VERBOSE) EXECUTE join_stmt; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Limit + Output: t1.c1, t2.c1 + -> Sort + Output: t1.c1, t2.c1 + Sort Key: t1.c1, t2.c1 + -> Foreign Scan + Output: t1.c1, t2.c1 + Relations: (public.ft4 t1) LEFT JOIN (public.ft5 t2) + Remote SQL: SELECT r1.c1, r2.c1 FROM ("S 1"."T 3" r1 LEFT JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) +(9 rows) + +EXECUTE join_stmt; + c1 | c1 +----+---- + 22 | + 24 | 24 + 26 | + 28 | + 30 | 30 + 32 | + 34 | + 36 | 36 + 38 | + 40 | +(10 rows) + +-- change the session user to view_owner and execute the statement. Because of +-- change in session user, the plan should get invalidated and created again. +-- While creating the plan, it should throw error since there is no user mapping +-- available for view_owner. +SET SESSION ROLE view_owner; +EXPLAIN (COSTS OFF, VERBOSE) EXECUTE join_stmt; +ERROR: user mapping not found for "view_owner" +EXECUTE join_stmt; +ERROR: user mapping not found for "view_owner" +RESET ROLE; +DEALLOCATE join_stmt; +CREATE VIEW v_ft5 AS SELECT * FROM ft5; +-- change owner of v_ft5 to view_owner so that the effective user for scan on +-- ft5 is view_owner and not the current user. +ALTER VIEW v_ft5 OWNER TO view_owner; +-- create a public user mapping for loopback server +-- drop user mapping for current_user. +DROP USER MAPPING FOR CURRENT_USER SERVER loopback; +CREATE USER MAPPING FOR PUBLIC SERVER loopback; +-- different effective user for permission check, but same user mapping for the +-- joining sides, join pushed down, no result expected. +PREPARE join_stmt AS SELECT t1.c1, t2.c1 FROM ft5 t1 JOIN v_ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10; +EXPLAIN (COSTS false, VERBOSE) EXECUTE join_stmt; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------- + Limit + Output: t1.c1, ft5.c1 + -> Sort + Output: t1.c1, ft5.c1 + Sort Key: t1.c1 + -> Foreign Scan + Output: t1.c1, ft5.c1 + Relations: (public.ft5 t1) INNER JOIN (public.ft5) + Remote SQL: SELECT r1.c1, r6.c1 FROM ("S 1"."T 4" r1 INNER JOIN "S 1"."T 4" r6 ON (TRUE)) WHERE ((r1.c1 = r6.c1)) +(9 rows) + +EXECUTE join_stmt; + c1 | c1 +----+---- +(0 rows) + +-- create user mapping for view_owner and execute the prepared statement +-- the join should not be pushed down since joining relations now use two +-- different user mappings +CREATE USER MAPPING FOR view_owner SERVER loopback; +EXPLAIN (COSTS false, VERBOSE) EXECUTE join_stmt; + QUERY PLAN +---------------------------------------------------------------------------- + Limit + Output: t1.c1, ft5.c1 + -> Merge Join + Output: t1.c1, ft5.c1 + Merge Cond: (t1.c1 = ft5.c1) + -> Foreign Scan on public.ft5 t1 + Output: t1.c1, t1.c2, t1.c3 + Remote SQL: SELECT c1 FROM "S 1"."T 4" ORDER BY c1 ASC + -> Materialize + Output: ft5.c1, ft5.c2, ft5.c3 + -> Foreign Scan on public.ft5 + Output: ft5.c1, ft5.c2, ft5.c3 + Remote SQL: SELECT c1 FROM "S 1"."T 4" ORDER BY c1 ASC +(13 rows) + +EXECUTE join_stmt; + c1 | c1 +----+---- +(0 rows) + +-- recreate the dropped user mapping for further tests +CREATE USER MAPPING FOR CURRENT_USER SERVER loopback; +DROP USER MAPPING FOR PUBLIC SERVER loopback; -- =================================================================== -- parameterized queries -- =================================================================== -- simple join PREPARE st1(int, int) AS SELECT t1.c3, t2.c3 FROM ft1 t1, ft2 t2 WHERE t1.c1 = $1 AND t2.c1 = $2; EXPLAIN (VERBOSE, COSTS false) EXECUTE st1(1, 2); - QUERY PLAN --------------------------------------------------------------------- - Nested Loop + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan Output: t1.c3, t2.c3 - -> Foreign Scan on public.ft1 t1 - Output: t1.c3 - Remote SQL: SELECT c3 FROM "S 1"."T 1" WHERE (("C 1" = 1)) - -> Foreign Scan on public.ft2 t2 - Output: t2.c3 - Remote SQL: SELECT c3 FROM "S 1"."T 1" WHERE (("C 1" = 2)) -(8 rows) + Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2) + Remote SQL: SELECT r1.c3, r2.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r2."C 1" = 2)) AND ((r1."C 1" = 1)) +(4 rows) EXECUTE st1(1, 1); c3 | c3 @@ -1135,6 +2091,9 @@ ALTER FOREIGN TABLE ft1 ALTER COLUMN c8 TYPE int; SELECT * FROM ft1 WHERE c1 = 1; -- ERROR ERROR: invalid input syntax for integer: "foo" CONTEXT: column "c8" of foreign table "ft1" +SELECT ft1.c1, ft2.c2, ft1.c8 FROM ft1, ft2 WHERE ft1.c1 = ft2.c1 AND ft1.c1 = 1; -- ERROR +ERROR: invalid input syntax for integer: "foo" +CONTEXT: column "c8" of foreign table "ft1" ALTER FOREIGN TABLE ft1 ALTER COLUMN c8 TYPE user_enum; -- =================================================================== -- subtransaction @@ -1425,22 +2384,26 @@ UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING EXPLAIN (verbose, costs off) UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Update on public.ft2 Remote SQL: UPDATE "S 1"."T 1" SET c2 = $2, c3 = $3, c7 = $4 WHERE ctid = $1 - -> Hash Join + -> Foreign Scan Output: ft2.c1, (ft2.c2 + 500), NULL::integer, (ft2.c3 || '_update9'::text), ft2.c4, ft2.c5, ft2.c6, 'ft2 '::character(10), ft2.c8, ft2.ctid, ft1.* - Hash Cond: (ft2.c2 = ft1.c1) - -> Foreign Scan on public.ft2 - Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c8, ft2.ctid - Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c8, ctid FROM "S 1"."T 1" FOR UPDATE - -> Hash - Output: ft1.*, ft1.c1 - -> Foreign Scan on public.ft1 + Relations: (public.ft2) INNER JOIN (public.ft1) + Remote SQL: SELECT r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c8, r1.ctid, ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1.c2 = r2."C 1")) AND (((r2."C 1" % 10) = 9)) FOR UPDATE OF r1 + -> Hash Join + Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c8, ft2.ctid, ft1.* + Hash Cond: (ft2.c2 = ft1.c1) + -> Foreign Scan on public.ft2 + Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c8, ft2.ctid + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c8, ctid FROM "S 1"."T 1" FOR UPDATE + -> Hash Output: ft1.*, ft1.c1 - Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 9)) -(13 rows) + -> Foreign Scan on public.ft1 + Output: ft1.*, ft1.c1 + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 9)) +(17 rows) UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; @@ -1566,22 +2529,26 @@ DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; EXPLAIN (verbose, costs off) DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Delete on public.ft2 Remote SQL: DELETE FROM "S 1"."T 1" WHERE ctid = $1 - -> Hash Join + -> Foreign Scan Output: ft2.ctid, ft1.* - Hash Cond: (ft2.c2 = ft1.c1) - -> Foreign Scan on public.ft2 - Output: ft2.ctid, ft2.c2 - Remote SQL: SELECT c2, ctid FROM "S 1"."T 1" FOR UPDATE - -> Hash - Output: ft1.*, ft1.c1 - -> Foreign Scan on public.ft1 + Relations: (public.ft2) INNER JOIN (public.ft1) + Remote SQL: SELECT r1.ctid, ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1.c2 = r2."C 1")) AND (((r2."C 1" % 10) = 2)) FOR UPDATE OF r1 + -> Hash Join + Output: ft2.ctid, ft1.* + Hash Cond: (ft2.c2 = ft1.c1) + -> Foreign Scan on public.ft2 + Output: ft2.ctid, ft2.c2 + Remote SQL: SELECT c2, ctid FROM "S 1"."T 1" FOR UPDATE + -> Hash Output: ft1.*, ft1.c1 - Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 2)) -(13 rows) + -> Foreign Scan on public.ft1 + Output: ft1.*, ft1.c1 + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 2)) +(17 rows) DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2; SELECT c1,c2,c3,c4 FROM ft2 ORDER BY c1; @@ -4064,3 +5031,6 @@ AND ftoptions @> array['fetch_size=60000']; (1 row) ROLLBACK; +-- Cleanup +DROP OWNED BY view_owner; +DROP USER view_owner; diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index d5a2af9428..14a3f9891a 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -28,9 +28,9 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" -#include "optimizer/prep.h" #include "optimizer/restrictinfo.h" #include "optimizer/var.h" +#include "optimizer/tlist.h" #include "parser/parsetree.h" #include "utils/builtins.h" #include "utils/guc.h" @@ -64,7 +64,15 @@ enum FdwScanPrivateIndex /* Integer list of attribute numbers retrieved by the SELECT */ FdwScanPrivateRetrievedAttrs, /* Integer representing the desired fetch_size */ - FdwScanPrivateFetchSize + FdwScanPrivateFetchSize, + /* Oid of user mapping to be used while connecting to the foreign server */ + FdwScanPrivateUserMappingOid, + + /* + * String describing join i.e. names of relations being joined and types + * of join, added when the scan is join + */ + FdwScanPrivateRelations }; /* @@ -94,7 +102,9 @@ enum FdwModifyPrivateIndex */ typedef struct PgFdwScanState { - Relation rel; /* relcache entry for the foreign table */ + Relation rel; /* relcache entry for the foreign table. NULL + * for a foreign join scan. */ + TupleDesc tupdesc; /* tuple descriptor of scan */ AttInMetadata *attinmeta; /* attribute datatype conversion metadata */ /* extracted fdw_private data */ @@ -182,8 +192,16 @@ typedef struct PgFdwAnalyzeState */ typedef struct ConversionLocation { - Relation rel; /* foreign table's relcache entry */ + Relation rel; /* foreign table's relcache entry. */ AttrNumber cur_attno; /* attribute number being processed, or 0 */ + + /* + * In case of foreign join push down, fdw_scan_tlist is used to identify + * the Var node corresponding to the error location and + * fsstate->ss.ps.state gives access to the RTEs of corresponding relation + * to get the relation name and attribute name. + */ + ForeignScanState *fsstate; } ConversionLocation; /* Callback argument for ec_member_matches_foreign */ @@ -257,6 +275,14 @@ static bool postgresAnalyzeForeignTable(Relation relation, BlockNumber *totalpages); static List *postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid); +static void postgresGetForeignJoinPaths(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra); +static bool postgresRecheckForeignScan(ForeignScanState *node, + TupleTableSlot *slot); static List *get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel); static List *get_useful_ecs_for_relation(PlannerInfo *root, RelOptInfo *rel); @@ -299,8 +325,12 @@ static HeapTuple make_tuple_from_result_row(PGresult *res, Relation rel, AttInMetadata *attinmeta, List *retrieved_attrs, + ForeignScanState *fsstate, MemoryContext temp_context); static void conversion_error_callback(void *arg); +static bool foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, + JoinType jointype, RelOptInfo *outerrel, RelOptInfo *innerrel, + JoinPathExtraData *extra); /* @@ -331,6 +361,8 @@ postgres_fdw_handler(PG_FUNCTION_ARGS) routine->EndForeignModify = postgresEndForeignModify; routine->IsForeignRelUpdatable = postgresIsForeignRelUpdatable; + /* Function for EvalPlanQual rechecks */ + routine->RecheckForeignScan = postgresRecheckForeignScan; /* Support functions for EXPLAIN */ routine->ExplainForeignScan = postgresExplainForeignScan; routine->ExplainForeignModify = postgresExplainForeignModify; @@ -341,6 +373,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS) /* Support functions for IMPORT FOREIGN SCHEMA */ routine->ImportForeignSchema = postgresImportForeignSchema; + /* Support functions for join push-down */ + routine->GetForeignJoinPaths = postgresGetForeignJoinPaths; + PG_RETURN_POINTER(routine); } @@ -358,6 +393,10 @@ postgresGetForeignRelSize(PlannerInfo *root, { PgFdwRelationInfo *fpinfo; ListCell *lc; + RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root); + const char *namespace; + const char *relname; + const char *refname; /* * We use PgFdwRelationInfo to pass various information to subsequent @@ -366,6 +405,9 @@ postgresGetForeignRelSize(PlannerInfo *root, fpinfo = (PgFdwRelationInfo *) palloc0(sizeof(PgFdwRelationInfo)); baserel->fdw_private = (void *) fpinfo; + /* Base foreign tables need to be push down always. */ + fpinfo->pushdown_safe = true; + /* Look up foreign-table catalog info. */ fpinfo->table = GetForeignTable(foreigntableid); fpinfo->server = GetForeignServer(fpinfo->table->serverid); @@ -414,7 +456,6 @@ postgresGetForeignRelSize(PlannerInfo *root, */ if (fpinfo->use_remote_estimate) { - RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root); Oid userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); fpinfo->user = GetUserMapping(userid, fpinfo->server->serverid); @@ -509,6 +550,23 @@ postgresGetForeignRelSize(PlannerInfo *root, &fpinfo->rows, &fpinfo->width, &fpinfo->startup_cost, &fpinfo->total_cost); } + + /* + * Set the name of relation in fpinfo, while we are constructing it here. + * It will be used to build the string describing the join relation in + * EXPLAIN output. We can't know whether VERBOSE option is specified or + * not, so always schema-qualify the foreign table name. + */ + fpinfo->relation_name = makeStringInfo(); + namespace = get_namespace_name(get_rel_namespace(foreigntableid)); + relname = get_rel_name(foreigntableid); + refname = rte->eref->aliasname; + appendStringInfo(fpinfo->relation_name, "%s.%s", + quote_identifier(namespace), + quote_identifier(relname)); + if (*refname && strcmp(refname, relname) != 0) + appendStringInfo(fpinfo->relation_name, " %s", + quote_identifier(rte->eref->aliasname)); } /* @@ -935,15 +993,15 @@ postgresGetForeignPaths(PlannerInfo *root, */ static ForeignScan * postgresGetForeignPlan(PlannerInfo *root, - RelOptInfo *baserel, + RelOptInfo *foreignrel, Oid foreigntableid, ForeignPath *best_path, List *tlist, List *scan_clauses, Plan *outer_plan) { - PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private; - Index scan_relid = baserel->relid; + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private; + Index scan_relid; List *fdw_private; List *remote_conds = NIL; List *remote_exprs = NIL; @@ -952,6 +1010,28 @@ postgresGetForeignPlan(PlannerInfo *root, List *retrieved_attrs; StringInfoData sql; ListCell *lc; + List *fdw_scan_tlist = NIL; + + /* + * For base relations, set scan_relid as the relid of the relation. For + * other kinds of relations set it to 0. + */ + if (foreignrel->reloptkind == RELOPT_BASEREL || + foreignrel->reloptkind == RELOPT_OTHER_MEMBER_REL) + scan_relid = foreignrel->relid; + else + { + scan_relid = 0; + + /* + * create_scan_plan() and create_foreignscan_plan() pass + * rel->baserestrictinfo + parameterization clauses through + * scan_clauses. For a join rel->baserestrictinfo is NIL and we are + * not considering parameterization right now, so there should be no + * scan_clauses for a joinrel. + */ + Assert(!scan_clauses); + } /* * Separate the scan_clauses into those that can be executed remotely and @@ -989,7 +1069,7 @@ postgresGetForeignPlan(PlannerInfo *root, } else if (list_member_ptr(fpinfo->local_conds, rinfo)) local_exprs = lappend(local_exprs, rinfo->clause); - else if (is_foreign_expr(root, baserel, rinfo->clause)) + else if (is_foreign_expr(root, foreignrel, rinfo->clause)) { remote_conds = lappend(remote_conds, rinfo); remote_exprs = lappend(remote_exprs, rinfo->clause); @@ -998,26 +1078,70 @@ postgresGetForeignPlan(PlannerInfo *root, local_exprs = lappend(local_exprs, rinfo->clause); } + if (foreignrel->reloptkind == RELOPT_JOINREL) + { + /* For a join relation, get the conditions from fdw_private structure */ + remote_conds = fpinfo->remote_conds; + local_exprs = fpinfo->local_conds; + + /* Build the list of columns to be fetched from the foreign server. */ + fdw_scan_tlist = build_tlist_to_deparse(foreignrel); + + /* + * Ensure that the outer plan produces a tuple whose descriptor + * matches our scan tuple slot. This is safe because all scans and + * joins support projection, so we never need to insert a Result node. + * Also, remove the local conditions from outer plan's quals, lest + * they will be evaluated twice, once by the local plan and once by + * the scan. + */ + if (outer_plan) + { + ListCell *lc; + + outer_plan->targetlist = fdw_scan_tlist; + + foreach(lc, local_exprs) + { + Join *join_plan = (Join *) outer_plan; + Node *qual = lfirst(lc); + + outer_plan->qual = list_delete(outer_plan->qual, qual); + + /* + * For an inner join the local conditions of foreign scan plan + * can be part of the joinquals as well. + */ + if (join_plan->jointype == JOIN_INNER) + join_plan->joinqual = list_delete(join_plan->joinqual, + qual); + } + } + } + /* * Build the query string to be sent for execution, and identify * expressions to be sent as parameters. */ initStringInfo(&sql); - deparseSelectStmtForRel(&sql, root, baserel, remote_conds, - best_path->path.pathkeys, &retrieved_attrs, - ¶ms_list); + deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist, + remote_conds, best_path->path.pathkeys, + &retrieved_attrs, ¶ms_list); /* * Build the fdw_private list that will be available to the executor. - * Items in the list must match enum FdwScanPrivateIndex, above. + * Items in the list must match order in enum FdwScanPrivateIndex. */ - fdw_private = list_make3(makeString(sql.data), + fdw_private = list_make4(makeString(sql.data), retrieved_attrs, - makeInteger(fpinfo->fetch_size)); + makeInteger(fpinfo->fetch_size), + makeInteger(foreignrel->umid)); + if (foreignrel->reloptkind == RELOPT_JOINREL) + fdw_private = lappend(fdw_private, + makeString(fpinfo->relation_name->data)); /* - * Create the ForeignScan node from target list, filtering expressions, - * remote parameter expressions, and FDW private information. + * Create the ForeignScan node for the given relation. * * Note that the remote parameter expressions are stored in the fdw_exprs * field of the finished plan node; we can't keep them in private state @@ -1028,7 +1152,7 @@ postgresGetForeignPlan(PlannerInfo *root, scan_relid, params_list, fdw_private, - NIL, /* no custom tlist */ + fdw_scan_tlist, remote_exprs, outer_plan); } @@ -1043,9 +1167,6 @@ postgresBeginForeignScan(ForeignScanState *node, int eflags) ForeignScan *fsplan = (ForeignScan *) node->ss.ps.plan; EState *estate = node->ss.ps.state; PgFdwScanState *fsstate; - RangeTblEntry *rte; - Oid userid; - ForeignTable *table; UserMapping *user; int numParams; int i; @@ -1064,16 +1185,36 @@ postgresBeginForeignScan(ForeignScanState *node, int eflags) node->fdw_state = (void *) fsstate; /* - * Identify which user to do the remote access as. This should match what - * ExecCheckRTEPerms() does. + * Obtain the foreign server where to connect and user mapping to use for + * connection. For base relations we obtain this information from + * catalogs. For join relations, this information is frozen at the time of + * planning to ensure that the join is safe to pushdown. In case the + * information goes stale between planning and execution, plan will be + * invalidated and replanned. */ - rte = rt_fetch(fsplan->scan.scanrelid, estate->es_range_table); - userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); + if (fsplan->scan.scanrelid > 0) + { + ForeignTable *table; - /* Get info about foreign table. */ - fsstate->rel = node->ss.ss_currentRelation; - table = GetForeignTable(RelationGetRelid(fsstate->rel)); - user = GetUserMapping(userid, table->serverid); + /* + * Identify which user to do the remote access as. This should match + * what ExecCheckRTEPerms() does. + */ + RangeTblEntry *rte = rt_fetch(fsplan->scan.scanrelid, estate->es_range_table); + Oid userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); + + fsstate->rel = node->ss.ss_currentRelation; + table = GetForeignTable(RelationGetRelid(fsstate->rel)); + + user = GetUserMapping(userid, table->serverid); + } + else + { + Oid umid = intVal(list_nth(fsplan->fdw_private, FdwScanPrivateUserMappingOid)); + + user = GetUserMappingById(umid); + Assert(fsplan->fs_server == user->serverid); + } /* * Get connection to the foreign server. Connection manager will @@ -1105,8 +1246,16 @@ postgresBeginForeignScan(ForeignScanState *node, int eflags) ALLOCSET_SMALL_INITSIZE, ALLOCSET_SMALL_MAXSIZE); - /* Get info we'll need for input data conversion. */ - fsstate->attinmeta = TupleDescGetAttInMetadata(RelationGetDescr(fsstate->rel)); + /* + * Get info we'll need for converting data fetched from the foreign server + * into local representation and error reporting during that process. + */ + if (fsplan->scan.scanrelid > 0) + fsstate->tupdesc = RelationGetDescr(fsstate->rel); + else + fsstate->tupdesc = node->ss.ss_ScanTupleSlot->tts_tupleDescriptor; + + fsstate->attinmeta = TupleDescGetAttInMetadata(fsstate->tupdesc); /* Prepare for output conversion of parameters used in remote query. */ numParams = list_length(fsplan->fdw_exprs); @@ -1824,6 +1973,34 @@ postgresIsForeignRelUpdatable(Relation rel) (1 << CMD_INSERT) | (1 << CMD_UPDATE) | (1 << CMD_DELETE) : 0; } +/* + * postgresRecheckForeignScan + * Execute a local join execution plan for a foreign join + */ +static bool +postgresRecheckForeignScan(ForeignScanState *node, TupleTableSlot *slot) +{ + Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid; + PlanState *outerPlan = outerPlanState(node); + TupleTableSlot *result; + + /* For base foreign relations, it suffices to set fdw_recheck_quals */ + if (scanrelid > 0) + return true; + + Assert(outerPlan != NULL); + + /* Execute a local join execution plan */ + result = ExecProcNode(outerPlan); + if (TupIsNull(result)) + return false; + + /* Store result in the given slot */ + ExecCopySlot(slot, result); + + return true; +} + /* * postgresExplainForeignScan * Produce extra output for EXPLAIN of a ForeignScan on a foreign table @@ -1833,10 +2010,25 @@ postgresExplainForeignScan(ForeignScanState *node, ExplainState *es) { List *fdw_private; char *sql; + char *relations; + fdw_private = ((ForeignScan *) node->ss.ps.plan)->fdw_private; + + /* + * Add names of relation handled by the foreign scan when the scan is a + * join + */ + if (list_length(fdw_private) > FdwScanPrivateRelations) + { + relations = strVal(list_nth(fdw_private, FdwScanPrivateRelations)); + ExplainPropertyText("Relations", relations, es); + } + + /* + * Add remote query, when VERBOSE option is specified. + */ if (es->verbose) { - fdw_private = ((ForeignScan *) node->ss.ps.plan)->fdw_private; sql = strVal(list_nth(fdw_private, FdwScanPrivateSelectSql)); ExplainPropertyText("Remote SQL", sql, es); } @@ -1865,26 +2057,29 @@ postgresExplainForeignModify(ModifyTableState *mtstate, /* * estimate_path_cost_size - * Get cost and size estimates for a foreign scan + * Get cost and size estimates for a foreign scan on given foreign relation + * either a base relation or a join between foreign relations. * - * We assume that all the baserestrictinfo clauses will be applied, plus - * any join clauses listed in join_conds. + * param_join_conds are the parameterization clauses with outer relations. + * pathkeys specify the expected sort order if any for given path being costed. + * + * The function returns the cost and size estimates in p_row, p_width, + * p_startup_cost and p_total_cost variables. */ static void estimate_path_cost_size(PlannerInfo *root, - RelOptInfo *baserel, - List *join_conds, + RelOptInfo *foreignrel, + List *param_join_conds, List *pathkeys, double *p_rows, int *p_width, Cost *p_startup_cost, Cost *p_total_cost) { - PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private; + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private; double rows; double retrieved_rows; int width; Cost startup_cost; Cost total_cost; - Cost run_cost; Cost cpu_per_tuple; /* @@ -1896,39 +2091,49 @@ estimate_path_cost_size(PlannerInfo *root, */ if (fpinfo->use_remote_estimate) { - List *remote_join_conds; - List *local_join_conds; + List *remote_param_join_conds; + List *local_param_join_conds; StringInfoData sql; - List *retrieved_attrs; PGconn *conn; Selectivity local_sel; QualCost local_cost; + List *fdw_scan_tlist = NIL; List *remote_conds; + /* Required only to be passed to deparseSelectStmtForRel */ + List *retrieved_attrs; + /* - * join_conds might contain both clauses that are safe to send across, - * and clauses that aren't. + * param_join_conds might contain both clauses that are safe to send + * across, and clauses that aren't. */ - classifyConditions(root, baserel, join_conds, - &remote_join_conds, &local_join_conds); + classifyConditions(root, foreignrel, param_join_conds, + &remote_param_join_conds, &local_param_join_conds); + + /* Build the list of columns to be fetched from the foreign server. */ + if (foreignrel->reloptkind == RELOPT_JOINREL) + fdw_scan_tlist = build_tlist_to_deparse(foreignrel); + else + fdw_scan_tlist = NIL; /* * The complete list of remote conditions includes everything from * baserestrictinfo plus any extra join_conds relevant to this * particular path. */ - remote_conds = list_concat(list_copy(remote_join_conds), + remote_conds = list_concat(list_copy(remote_param_join_conds), fpinfo->remote_conds); /* * Construct EXPLAIN query including the desired SELECT, FROM, and - * WHERE clauses. Params and other-relation Vars are replaced by - * dummy values. + * WHERE clauses. Params and other-relation Vars are replaced by dummy + * values, so don't request params_list. */ initStringInfo(&sql); appendStringInfoString(&sql, "EXPLAIN "); - deparseSelectStmtForRel(&sql, root, baserel, remote_conds, pathkeys, - &retrieved_attrs, NULL); + deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist, + remote_conds, pathkeys, &retrieved_attrs, + NULL); /* Get the remote estimate */ conn = GetConnection(fpinfo->user, false); @@ -1940,8 +2145,8 @@ estimate_path_cost_size(PlannerInfo *root, /* Factor in the selectivity of the locally-checked quals */ local_sel = clauselist_selectivity(root, - local_join_conds, - baserel->relid, + local_param_join_conds, + foreignrel->relid, JOIN_INNER, NULL); local_sel *= fpinfo->local_conds_sel; @@ -1951,41 +2156,113 @@ estimate_path_cost_size(PlannerInfo *root, /* Add in the eval cost of the locally-checked quals */ startup_cost += fpinfo->local_conds_cost.startup; total_cost += fpinfo->local_conds_cost.per_tuple * retrieved_rows; - cost_qual_eval(&local_cost, local_join_conds, root); + cost_qual_eval(&local_cost, local_param_join_conds, root); startup_cost += local_cost.startup; total_cost += local_cost.per_tuple * retrieved_rows; } else { + Cost run_cost = 0; + /* * We don't support join conditions in this mode (hence, no * parameterized paths can be made). */ - Assert(join_conds == NIL); - - /* Use rows/width estimates made by set_baserel_size_estimates. */ - rows = baserel->rows; - width = baserel->width; + Assert(param_join_conds == NIL); /* - * Back into an estimate of the number of retrieved rows. Just in - * case this is nuts, clamp to at most baserel->tuples. + * Use rows/width estimates made by set_baserel_size_estimates() for + * base foreign relations and set_joinrel_size_estimates() for join + * between foreign relations. */ + rows = foreignrel->rows; + width = foreignrel->width; + + /* Back into an estimate of the number of retrieved rows. */ retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel); - retrieved_rows = Min(retrieved_rows, baserel->tuples); - /* - * Cost as though this were a seqscan, which is pessimistic. We - * effectively imagine the local_conds are being evaluated remotely, - * too. - */ - startup_cost = 0; - run_cost = 0; - run_cost += seq_page_cost * baserel->pages; + if (foreignrel->reloptkind != RELOPT_JOINREL) + { + /* Clamp retrieved rows estimates to at most foreignrel->tuples. */ + retrieved_rows = Min(retrieved_rows, foreignrel->tuples); - startup_cost += baserel->baserestrictcost.startup; - cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple; - run_cost += cpu_per_tuple * baserel->tuples; + /* + * Cost as though this were a seqscan, which is pessimistic. We + * effectively imagine the local_conds are being evaluated + * remotely, too. + */ + startup_cost = 0; + run_cost = 0; + run_cost += seq_page_cost * foreignrel->pages; + + startup_cost += foreignrel->baserestrictcost.startup; + cpu_per_tuple = cpu_tuple_cost + foreignrel->baserestrictcost.per_tuple; + run_cost += cpu_per_tuple * foreignrel->tuples; + } + else + { + PgFdwRelationInfo *fpinfo_i; + PgFdwRelationInfo *fpinfo_o; + QualCost join_cost; + QualCost remote_conds_cost; + double nrows; + + /* For join we expect inner and outer relations set */ + Assert(fpinfo->innerrel && fpinfo->outerrel); + + fpinfo_i = (PgFdwRelationInfo *) fpinfo->innerrel->fdw_private; + fpinfo_o = (PgFdwRelationInfo *) fpinfo->outerrel->fdw_private; + + /* Estimate of number of rows in cross product */ + nrows = fpinfo_i->rows * fpinfo_o->rows; + /* Clamp retrieved rows estimate to at most size of cross product */ + retrieved_rows = Min(retrieved_rows, nrows); + + /* + * The cost of foreign join is estimated as cost of generating + * rows for the joining relations + cost for applying quals on the + * rows. + */ + + /* Calculate the cost of clauses pushed down the foreign server */ + cost_qual_eval(&remote_conds_cost, fpinfo->remote_conds, root); + /* Calculate the cost of applying join clauses */ + cost_qual_eval(&join_cost, fpinfo->joinclauses, root); + + /* + * Startup cost includes startup cost of joining relations and the + * startup cost for join and other clauses. We do not include the + * startup cost specific to join strategy (e.g. setting up hash + * tables) since we do not know what strategy the foreign server + * is going to use. + */ + startup_cost = fpinfo_i->rel_startup_cost + fpinfo_o->rel_startup_cost; + startup_cost += join_cost.startup; + startup_cost += remote_conds_cost.startup; + startup_cost += fpinfo->local_conds_cost.startup; + + /* + * Run time cost includes: + * + * 1. Run time cost (total_cost - startup_cost) of relations being + * joined + * + * 2. Run time cost of applying join clauses on the cross product + * of the joining relations. + * + * 3. Run time cost of applying pushed down other clauses on the + * result of join + * + * 4. Run time cost of applying nonpushable other clauses locally + * on the result fetched from the foreign server. + */ + run_cost = fpinfo_i->rel_total_cost - fpinfo_i->rel_startup_cost; + run_cost += fpinfo_o->rel_total_cost - fpinfo_o->rel_startup_cost; + run_cost += nrows * join_cost.per_tuple; + nrows = clamp_row_est(nrows * fpinfo->joinclause_sel); + run_cost += nrows * remote_conds_cost.per_tuple; + run_cost += fpinfo->local_conds_cost.per_tuple * retrieved_rows; + } /* * Without remote estimates, we have no real way to estimate the cost @@ -2005,6 +2282,15 @@ estimate_path_cost_size(PlannerInfo *root, total_cost = startup_cost + run_cost; } + /* + * Cache the costs prior to adding the costs for transferring data from + * the foreign server. These costs are useful for costing the join between + * this relation and another foreign relation, when the cost of join can + * not be obtained from the foreign server. + */ + fpinfo->rel_startup_cost = startup_cost; + fpinfo->rel_total_cost = total_cost; + /* * Add some additional cost factors to account for connection overhead * (fdw_startup_cost), transferring data across the network @@ -2237,11 +2523,15 @@ fetch_more_data(ForeignScanState *node) for (i = 0; i < numrows; i++) { + ForeignScan *fsplan = (ForeignScan *) node->ss.ps.plan; + + Assert(IsA(fsplan, ForeignScan)); fsstate->tuples[i] = make_tuple_from_result_row(res, i, fsstate->rel, fsstate->attinmeta, fsstate->retrieved_attrs, + node, fsstate->temp_cxt); } @@ -2460,6 +2750,7 @@ store_returning_result(PgFdwModifyState *fmstate, fmstate->rel, fmstate->attinmeta, fmstate->retrieved_attrs, + NULL, fmstate->temp_cxt); /* tuple will be deleted when it is cleared from the slot */ ExecStoreTuple(newtup, slot, InvalidBuffer, true); @@ -2770,6 +3061,7 @@ analyze_row_processor(PGresult *res, int row, PgFdwAnalyzeState *astate) astate->rel, astate->attinmeta, astate->retrieved_attrs, + NULL, astate->temp_cxt); MemoryContextSwitchTo(oldcontext); @@ -3044,6 +3336,345 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) return commands; } +/* + * Assess whether the join between inner and outer relations can be pushed down + * to the foreign server. As a side effect, save information we obtain in this + * function to PgFdwRelationInfo passed in. + * + * Joins that satisfy conditions below are safe to push down. + * + * 1) Join type is INNER or OUTER (one of LEFT/RIGHT/FULL) + * 2) Both outer and inner portions are safe to push-down + * 3) All foreign tables in the join belong to the same foreign server and use + * the same user mapping. + * 4) All join conditions are safe to push down + * 5) No relation has local filter (this can be relaxed for INNER JOIN, if we + * can move unpushable clauses upwards in the join tree). + */ +static bool +foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, + RelOptInfo *outerrel, RelOptInfo *innerrel, + JoinPathExtraData *extra) +{ + PgFdwRelationInfo *fpinfo; + PgFdwRelationInfo *fpinfo_o; + PgFdwRelationInfo *fpinfo_i; + ListCell *lc; + List *joinclauses; + List *otherclauses; + + /* + * We support pushing down INNER, LEFT, RIGHT and FULL OUTER joins. + * Constructing queries representing SEMI and ANTI joins is hard, hence + * not considered right now. + */ + if (jointype != JOIN_INNER && jointype != JOIN_LEFT && + jointype != JOIN_RIGHT && jointype != JOIN_FULL) + return false; + + /* + * If either of the joining relations is marked as unsafe to pushdown, the + * join can not be pushed down. + */ + fpinfo = (PgFdwRelationInfo *) joinrel->fdw_private; + fpinfo_o = (PgFdwRelationInfo *) outerrel->fdw_private; + fpinfo_i = (PgFdwRelationInfo *) innerrel->fdw_private; + if (!fpinfo_o || !fpinfo_o->pushdown_safe || + !fpinfo_i || !fpinfo_i->pushdown_safe) + return false; + + /* + * If joining relations have local conditions, those conditions are + * required to be applied before joining the relations. Hence the join can + * not be pushed down. + */ + if (fpinfo_o->local_conds || fpinfo_i->local_conds) + return false; + + /* Separate restrict list into join quals and quals on join relation */ + if (IS_OUTER_JOIN(jointype)) + extract_actual_join_clauses(extra->restrictlist, &joinclauses, &otherclauses); + else + { + /* + * Unlike an outer join, for inner join, the join result contains only + * the rows which satisfy join clauses, similar to the other clause. + * Hence all clauses can be treated as other quals. This helps to push + * a join down to the foreign server even if some of its join quals + * are not safe to pushdown. + */ + otherclauses = extract_actual_clauses(extra->restrictlist, false); + joinclauses = NIL; + } + + /* Join quals must be safe to push down. */ + foreach(lc, joinclauses) + { + Expr *expr = (Expr *) lfirst(lc); + + if (!is_foreign_expr(root, joinrel, expr)) + return false; + } + + /* Save the join clauses, for later use. */ + fpinfo->joinclauses = joinclauses; + + /* + * Other clauses are applied after the join has been performed and thus + * need not be all pushable. We will push those which can be pushed to + * reduce the number of rows fetched from the foreign server. Rest of them + * will be applied locally after fetching join result. Add them to fpinfo + * so that other joins involving this joinrel will know that this joinrel + * has local clauses. + */ + foreach(lc, otherclauses) + { + Expr *expr = (Expr *) lfirst(lc); + + if (!is_foreign_expr(root, joinrel, expr)) + fpinfo->local_conds = lappend(fpinfo->local_conds, expr); + else + fpinfo->remote_conds = lappend(fpinfo->remote_conds, expr); + } + + fpinfo->outerrel = outerrel; + fpinfo->innerrel = innerrel; + fpinfo->jointype = jointype; + + /* + * If user is willing to estimate cost for a scan of either of the joining + * relations using EXPLAIN, he intends to estimate scans on that relation + * more accurately. Then, it makes sense to estimate the cost the join + * with that relation more accurately using EXPLAIN. + */ + fpinfo->use_remote_estimate = fpinfo_o->use_remote_estimate || + fpinfo_i->use_remote_estimate; + + /* + * Since both the joining relations come from the same server, the server + * level options should have same value for both the relations. Pick from + * any side. + */ + fpinfo->fdw_startup_cost = fpinfo_o->fdw_startup_cost; + fpinfo->fdw_tuple_cost = fpinfo_o->fdw_tuple_cost; + + /* Mark that this join can be pushed down safely */ + fpinfo->pushdown_safe = true; + + /* + * Set fetch size to maximum of the joining sides, since we are expecting + * the rows returned by the join to be proportional to the relation sizes. + */ + if (fpinfo_o->fetch_size > fpinfo_i->fetch_size) + fpinfo->fetch_size = fpinfo_o->fetch_size; + else + fpinfo->fetch_size = fpinfo_i->fetch_size; + + /* + * Pull the other remote conditions from the joining relations into join + * clauses or other remote clauses (remote_conds) of this relation. This + * avoids building subqueries at every join step. + * + * For an inner join, clauses from both the relations are added to the + * other remote clauses. For an OUTER join, the clauses from the outer + * side are added to remote_conds since those can be evaluated after the + * join is evaluated. The clauses from inner side are added to the + * joinclauses, since they need to evaluated while constructing the join. + * + * The joining sides can not have local conditions, thus no need to test + * shippability of the clauses being pulled up. + */ + switch (jointype) + { + case JOIN_INNER: + fpinfo->remote_conds = list_concat(fpinfo->remote_conds, + fpinfo_i->remote_conds); + fpinfo->remote_conds = list_concat(fpinfo->remote_conds, + fpinfo_o->remote_conds); + break; + + case JOIN_LEFT: + fpinfo->joinclauses = list_concat(fpinfo->joinclauses, + fpinfo_i->remote_conds); + fpinfo->remote_conds = list_concat(fpinfo->remote_conds, + fpinfo_o->remote_conds); + break; + + case JOIN_RIGHT: + fpinfo->joinclauses = list_concat(fpinfo->joinclauses, + fpinfo_o->remote_conds); + fpinfo->remote_conds = list_concat(fpinfo->remote_conds, + fpinfo_i->remote_conds); + break; + + case JOIN_FULL: + fpinfo->joinclauses = list_concat(fpinfo->joinclauses, + fpinfo_i->remote_conds); + fpinfo->joinclauses = list_concat(fpinfo->joinclauses, + fpinfo_o->remote_conds); + break; + + default: + /* Should not happen, we have just check this above */ + elog(ERROR, "unsupported join type %d", jointype); + } + + /* + * Set the string describing this join relation to be used in EXPLAIN + * output of corresponding ForeignScan. + */ + fpinfo->relation_name = makeStringInfo(); + appendStringInfo(fpinfo->relation_name, "(%s) %s JOIN (%s)", + fpinfo_o->relation_name->data, + get_jointype_name(fpinfo->jointype), + fpinfo_i->relation_name->data); + + return true; +} + +/* + * postgresGetForeignJoinPaths + * Add possible ForeignPath to joinrel, if join is safe to push down. + */ +static void +postgresGetForeignJoinPaths(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra) +{ + PgFdwRelationInfo *fpinfo; + ForeignPath *joinpath; + double rows; + int width; + Cost startup_cost; + Cost total_cost; + Path *epq_path; /* Path to create plan to be executed when + * EvalPlanQual gets triggered. */ + + /* + * Skip if this join combination has been considered already. + */ + if (joinrel->fdw_private) + return; + + /* + * Create unfinished PgFdwRelationInfo entry which is used to indicate + * that the join relation is already considered, so that we won't waste + * time in judging safety of join pushdown and adding the same paths again + * if found safe. Once we know that this join can be pushed down, we fill + * the entry. + */ + fpinfo = (PgFdwRelationInfo *) palloc0(sizeof(PgFdwRelationInfo)); + fpinfo->pushdown_safe = false; + joinrel->fdw_private = fpinfo; + /* attrs_used is only for base relations. */ + fpinfo->attrs_used = NULL; + + /* + * In case there is a possibility that EvalPlanQual will be executed, we + * should be able to reconstruct the row, from base relations applying all + * the conditions. We create a local plan from a suitable local path + * available in the path list. In case such a path doesn't exist, we can + * not push the join to the foreign server since we won't be able to + * reconstruct the row for EvalPlanQual(). Find an alternative local path + * before we add ForeignPath, lest the new path would kick possibly the + * only local path. Do this before calling foreign_join_ok(), since that + * function updates fpinfo and marks it as pushable if the join is found + * to be pushable. + */ + if (root->parse->commandType == CMD_DELETE || + root->parse->commandType == CMD_UPDATE || + root->rowMarks) + { + epq_path = GetExistingLocalJoinPath(joinrel); + if (!epq_path) + { + elog(DEBUG3, "could not push down foreign join because a local path suitable for EPQ checks was not found"); + return; + } + } + else + epq_path = NULL; + + if (!foreign_join_ok(root, joinrel, jointype, outerrel, innerrel, extra)) + { + /* Free path required for EPQ if we copied one; we don't need it now */ + if (epq_path) + pfree(epq_path); + return; + } + + /* + * Compute the selectivity and cost of the local_conds, so we don't have + * to do it over again for each path. The best we can do for these + * conditions is to estimate selectivity on the basis of local statistics. + * The local conditions are applied after the join has been computed on + * the remote side like quals in WHERE clause, so pass jointype as + * JOIN_INNER. + */ + fpinfo->local_conds_sel = clauselist_selectivity(root, + fpinfo->local_conds, + 0, + JOIN_INNER, + NULL); + cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root); + + /* + * If we are going to estimate the costs using EXPLAIN, we will need + * connection information. Fill it here. + */ + if (fpinfo->use_remote_estimate) + fpinfo->user = GetUserMappingById(joinrel->umid); + else + { + fpinfo->user = NULL; + + /* + * If we are going to estimate costs locally, estimate the join clause + * selectivity here while we have special join info. + */ + fpinfo->joinclause_sel = clauselist_selectivity(root, fpinfo->joinclauses, + 0, fpinfo->jointype, + extra->sjinfo); + + } + fpinfo->server = GetForeignServer(joinrel->serverid); + + /* Estimate costs for bare join relation */ + estimate_path_cost_size(root, joinrel, NIL, NIL, &rows, + &width, &startup_cost, &total_cost); + /* Now update this information in the joinrel */ + joinrel->rows = rows; + joinrel->width = width; + fpinfo->rows = rows; + fpinfo->width = width; + fpinfo->startup_cost = startup_cost; + fpinfo->total_cost = total_cost; + + /* + * Create a new join path and add it to the joinrel which represents a + * join between foreign tables. + */ + joinpath = create_foreignscan_path(root, + joinrel, + rows, + startup_cost, + total_cost, + NIL, /* no pathkeys */ + NULL, /* no required_outer */ + epq_path, + NULL); /* no fdw_private */ + + /* Add generated path into joinrel by add_path(). */ + add_path(joinrel, (Path *) joinpath); + + /* XXX Consider pathkeys for the join relation */ + + /* XXX Consider parameterized paths for the join relation */ +} + /* * Create a tuple from the specified row of the PGresult. * @@ -3058,10 +3689,11 @@ make_tuple_from_result_row(PGresult *res, Relation rel, AttInMetadata *attinmeta, List *retrieved_attrs, + ForeignScanState *fsstate, MemoryContext temp_context) { HeapTuple tuple; - TupleDesc tupdesc = RelationGetDescr(rel); + TupleDesc tupdesc; Datum *values; bool *nulls; ItemPointer ctid = NULL; @@ -3080,6 +3712,17 @@ make_tuple_from_result_row(PGresult *res, */ oldcontext = MemoryContextSwitchTo(temp_context); + if (rel) + tupdesc = RelationGetDescr(rel); + else + { + PgFdwScanState *fdw_sstate; + + Assert(fsstate); + fdw_sstate = (PgFdwScanState *) fsstate->fdw_state; + tupdesc = fdw_sstate->tupdesc; + } + values = (Datum *) palloc0(tupdesc->natts * sizeof(Datum)); nulls = (bool *) palloc(tupdesc->natts * sizeof(bool)); /* Initialize to nulls for any columns not present in result */ @@ -3090,6 +3733,7 @@ make_tuple_from_result_row(PGresult *res, */ errpos.rel = rel; errpos.cur_attno = 0; + errpos.fsstate = fsstate; errcallback.callback = conversion_error_callback; errcallback.arg = (void *) &errpos; errcallback.previous = error_context_stack; @@ -3178,13 +3822,46 @@ make_tuple_from_result_row(PGresult *res, static void conversion_error_callback(void *arg) { + const char *attname = NULL; + const char *relname = NULL; ConversionLocation *errpos = (ConversionLocation *) arg; - TupleDesc tupdesc = RelationGetDescr(errpos->rel); - if (errpos->cur_attno > 0 && errpos->cur_attno <= tupdesc->natts) - errcontext("column \"%s\" of foreign table \"%s\"", - NameStr(tupdesc->attrs[errpos->cur_attno - 1]->attname), - RelationGetRelationName(errpos->rel)); + if (errpos->rel) + { + /* error occurred in a scan against a foreign table */ + TupleDesc tupdesc = RelationGetDescr(errpos->rel); + + if (errpos->cur_attno > 0 && errpos->cur_attno <= tupdesc->natts) + attname = NameStr(tupdesc->attrs[errpos->cur_attno - 1]->attname); + else if (errpos->cur_attno == SelfItemPointerAttributeNumber) + attname = "ctid"; + + relname = RelationGetRelationName(errpos->rel); + } + else + { + /* error occurred in a scan against a foreign join */ + ForeignScanState *fsstate = errpos->fsstate; + ForeignScan *fsplan = (ForeignScan *) fsstate->ss.ps.plan; + EState *estate = fsstate->ss.ps.state; + TargetEntry *tle; + Var *var; + RangeTblEntry *rte; + + Assert(IsA(fsplan, ForeignScan)); + tle = (TargetEntry *) list_nth(fsplan->fdw_scan_tlist, + errpos->cur_attno - 1); + Assert(IsA(tle, TargetEntry)); + var = (Var *) tle->expr; + Assert(IsA(var, Var)); + + rte = rt_fetch(var->varno, estate->es_range_table); + relname = get_rel_name(rte->relid); + attname = get_relid_attribute_name(rte->relid, var->varattno); + } + + if (attname && relname) + errcontext("column \"%s\" of foreign table \"%s\"", attname, relname); } /* diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h index 2b632817fe..4c731becc6 100644 --- a/contrib/postgres_fdw/postgres_fdw.h +++ b/contrib/postgres_fdw/postgres_fdw.h @@ -26,7 +26,25 @@ */ typedef struct PgFdwRelationInfo { - /* baserestrictinfo clauses, broken down into safe and unsafe subsets. */ + /* + * True means that the relation can be pushed down. Always true for simple + * foreign scan. + */ + bool pushdown_safe; + + /* + * Restriction clauses, divided into safe and unsafe to pushdown subsets. + * + * For a base foreign relation this is a list of clauses along-with + * RestrictInfo wrapper. Keeping RestrictInfo wrapper helps while dividing + * scan_clauses in postgresGetForeignPlan into safe and unsafe subsets. + * Also it helps in estimating costs since RestrictInfo caches the + * selectivity and qual cost for the clause in it. + * + * For a join relation, however, they are part of otherclause list + * obtained from extract_actual_join_clauses, which strips RestrictInfo + * construct. So, for a join relation they are list of bare clauses. + */ List *remote_conds; List *local_conds; @@ -37,11 +55,17 @@ typedef struct PgFdwRelationInfo QualCost local_conds_cost; Selectivity local_conds_sel; - /* Estimated size and cost for a scan with baserestrictinfo quals. */ + /* Selectivity of join conditions */ + Selectivity joinclause_sel; + + /* Estimated size and cost for a scan or join. */ double rows; int width; Cost startup_cost; Cost total_cost; + /* Costs excluding costs for transferring data from the foreign server */ + Cost rel_startup_cost; + Cost rel_total_cost; /* Options extracted from catalogs. */ bool use_remote_estimate; @@ -55,6 +79,19 @@ typedef struct PgFdwRelationInfo UserMapping *user; /* only set in use_remote_estimate mode */ int fetch_size; /* fetch size for this remote table */ + + /* + * Name of the relation while EXPLAINing ForeignScan. It is used for join + * relations but is set for all relations. For join relation, the name + * indicates which foreign tables are being joined and the join type used. + */ + StringInfo relation_name; + + /* Join information */ + RelOptInfo *outerrel; + RelOptInfo *innerrel; + JoinType jointype; + List *joinclauses; } PgFdwRelationInfo; /* in postgres_fdw.c */ @@ -102,12 +139,15 @@ extern void deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs); extern void deparseStringLiteral(StringInfo buf, const char *val); extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel); +extern List *build_tlist_to_deparse(RelOptInfo *foreign_rel); extern void deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, - RelOptInfo *baserel, List *remote_conds, List *pathkeys, + RelOptInfo *foreignrel, List *tlist, + List *remote_conds, List *pathkeys, List **retrieved_attrs, List **params_list); /* in shippable.c */ extern bool is_builtin(Oid objectId); extern bool is_shippable(Oid objectId, Oid classId, PgFdwRelationInfo *fpinfo); +extern const char *get_jointype_name(JoinType jointype); #endif /* POSTGRES_FDW_H */ diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 1978e16cab..b32e45a368 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -11,12 +11,17 @@ DO $d$ OPTIONS (dbname '$$||current_database()||$$', port '$$||current_setting('port')||$$' )$$; + EXECUTE $$CREATE SERVER loopback2 FOREIGN DATA WRAPPER postgres_fdw + OPTIONS (dbname '$$||current_database()||$$', + port '$$||current_setting('port')||$$' + )$$; END; $d$; CREATE USER MAPPING FOR public SERVER testserver1 OPTIONS (user 'value', password 'value'); CREATE USER MAPPING FOR CURRENT_USER SERVER loopback; +CREATE USER MAPPING FOR CURRENT_USER SERVER loopback2; -- =================================================================== -- create objects used through FDW loopback server @@ -39,6 +44,18 @@ CREATE TABLE "S 1"."T 2" ( c2 text, CONSTRAINT t2_pkey PRIMARY KEY (c1) ); +CREATE TABLE "S 1"."T 3" ( + c1 int NOT NULL, + c2 int NOT NULL, + c3 text, + CONSTRAINT t3_pkey PRIMARY KEY (c1) +); +CREATE TABLE "S 1"."T 4" ( + c1 int NOT NULL, + c2 int NOT NULL, + c3 text, + CONSTRAINT t4_pkey PRIMARY KEY (c1) +); INSERT INTO "S 1"."T 1" SELECT id, @@ -54,9 +71,23 @@ INSERT INTO "S 1"."T 2" SELECT id, 'AAA' || to_char(id, 'FM000') FROM generate_series(1, 100) id; +INSERT INTO "S 1"."T 3" + SELECT id, + id + 1, + 'AAA' || to_char(id, 'FM000') + FROM generate_series(1, 100) id; +DELETE FROM "S 1"."T 3" WHERE c1 % 2 != 0; -- delete for outer join tests +INSERT INTO "S 1"."T 4" + SELECT id, + id + 1, + 'AAA' || to_char(id, 'FM000') + FROM generate_series(1, 100) id; +DELETE FROM "S 1"."T 4" WHERE c1 % 3 != 0; -- delete for outer join tests ANALYZE "S 1"."T 1"; ANALYZE "S 1"."T 2"; +ANALYZE "S 1"."T 3"; +ANALYZE "S 1"."T 4"; -- =================================================================== -- create foreign tables @@ -87,6 +118,24 @@ CREATE FOREIGN TABLE ft2 ( ) SERVER loopback; ALTER FOREIGN TABLE ft2 DROP COLUMN cx; +CREATE FOREIGN TABLE ft4 ( + c1 int NOT NULL, + c2 int NOT NULL, + c3 text +) SERVER loopback OPTIONS (schema_name 'S 1', table_name 'T 3'); + +CREATE FOREIGN TABLE ft5 ( + c1 int NOT NULL, + c2 int NOT NULL, + c3 text +) SERVER loopback OPTIONS (schema_name 'S 1', table_name 'T 4'); + +CREATE FOREIGN TABLE ft6 ( + c1 int NOT NULL, + c2 int NOT NULL, + c3 text +) SERVER loopback2 OPTIONS (schema_name 'S 1', table_name 'T 4'); + -- =================================================================== -- tests for validator -- =================================================================== @@ -168,8 +217,6 @@ EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 WHERE c1 = 102 FOR SHARE; SELECT * FROM ft1 t1 WHERE c1 = 102 FOR SHARE; -- aggregate SELECT COUNT(*) FROM ft1 t1; --- join two tables -SELECT t1.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; -- subquery SELECT * FROM ft1 t1 WHERE t1.c3 IN (SELECT c3 FROM ft2 t2 WHERE c1 <= 10) ORDER BY c1; -- subquery+MAX @@ -208,10 +255,11 @@ EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 WHERE c1 = ANY(ARRAY[c2, 1, EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 WHERE c1 = (ARRAY[c1,c2,3])[1]; -- ArrayRef EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 WHERE c6 = E'foo''s\\bar'; -- check special chars EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 WHERE c8 = 'foo'; -- can't be sent to remote --- parameterized remote path +-- parameterized remote path for foreign table EXPLAIN (VERBOSE, COSTS false) - SELECT * FROM ft2 a, ft2 b WHERE a.c1 = 47 AND b.c1 = a.c2; + SELECT * FROM "S 1"."T 1" a, ft2 b WHERE a."C 1" = 47 AND b.c1 = a.c2; SELECT * FROM ft2 a, ft2 b WHERE a.c1 = 47 AND b.c1 = a.c2; + -- check both safe and unsafe join conditions EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft2 a, ft2 b @@ -270,6 +318,158 @@ EXPLAIN (VERBOSE, COSTS false) SELECT count(c3) FROM ft1 t1 WHERE t1.c1 === t1.c2; SELECT count(c3) FROM ft1 t1 WHERE t1.c1 === t1.c2; +-- =================================================================== +-- JOIN queries +-- =================================================================== +-- Analyze ft4 and ft5 so that we have better statistics. These tables do not +-- have use_remote_estimate set. +ANALYZE ft4; +ANALYZE ft5; + +-- join two tables +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; +-- join three tables +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c2, t3.c3 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) JOIN ft4 t3 ON (t3.c1 = t1.c1) ORDER BY t1.c3, t1.c1 OFFSET 10 LIMIT 10; +SELECT t1.c1, t2.c2, t3.c3 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) JOIN ft4 t3 ON (t3.c1 = t1.c1) ORDER BY t1.c3, t1.c1 OFFSET 10 LIMIT 10; +-- left outer join +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; +SELECT t1.c1, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; +-- left outer join + placement of clauses. +-- clauses within the nullable side are not pulled up, but top level clause on +-- non-nullable side is pushed into non-nullable side +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t1.c2, t2.c1, t2.c2 FROM ft4 t1 LEFT JOIN (SELECT * FROM ft5 WHERE c1 < 10) t2 ON (t1.c1 = t2.c1) WHERE t1.c1 < 10; +SELECT t1.c1, t1.c2, t2.c1, t2.c2 FROM ft4 t1 LEFT JOIN (SELECT * FROM ft5 WHERE c1 < 10) t2 ON (t1.c1 = t2.c1) WHERE t1.c1 < 10; +-- clauses within the nullable side are not pulled up, but the top level clause +-- on nullable side is not pushed down into nullable side +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t1.c2, t2.c1, t2.c2 FROM ft4 t1 LEFT JOIN (SELECT * FROM ft5 WHERE c1 < 10) t2 ON (t1.c1 = t2.c1) + WHERE (t2.c1 < 10 OR t2.c1 IS NULL) AND t1.c1 < 10; +SELECT t1.c1, t1.c2, t2.c1, t2.c2 FROM ft4 t1 LEFT JOIN (SELECT * FROM ft5 WHERE c1 < 10) t2 ON (t1.c1 = t2.c1) + WHERE (t2.c1 < 10 OR t2.c1 IS NULL) AND t1.c1 < 10; +-- right outer join +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft5 t1 RIGHT JOIN ft4 t2 ON (t1.c1 = t2.c1) ORDER BY t2.c1, t1.c1 OFFSET 10 LIMIT 10; +SELECT t1.c1, t2.c1 FROM ft5 t1 RIGHT JOIN ft4 t2 ON (t1.c1 = t2.c1) ORDER BY t2.c1, t1.c1 OFFSET 10 LIMIT 10; +-- full outer join +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10; +SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10; +-- full outer join + WHERE clause, only matched rows +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) WHERE (t1.c1 = t2.c1 OR t1.c1 IS NULL) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; +SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) WHERE (t1.c1 = t2.c1 OR t1.c1 IS NULL) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; +-- join two tables with FOR UPDATE clause +-- tests whole-row reference for row marks +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1; +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1; +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE; +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE; +-- join two tables with FOR SHARE clause +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1; +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1; +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE; +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE; +-- join in CTE +EXPLAIN (COSTS false, VERBOSE) +WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10; +WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10; +-- ctid with whole-row reference +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.ctid, t1, t2, t1.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; +SELECT t1.ctid, t1, t2, t1.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; +-- SEMI JOIN, not pushed down +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1 FROM ft1 t1 WHERE EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c1) ORDER BY t1.c1 OFFSET 100 LIMIT 10; +SELECT t1.c1 FROM ft1 t1 WHERE EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c1) ORDER BY t1.c1 OFFSET 100 LIMIT 10; +-- ANTI JOIN, not pushed down +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1 FROM ft1 t1 WHERE NOT EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c2) ORDER BY t1.c1 OFFSET 100 LIMIT 10; +SELECT t1.c1 FROM ft1 t1 WHERE NOT EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c2) ORDER BY t1.c1 OFFSET 100 LIMIT 10; +-- CROSS JOIN, not pushed down +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 CROSS JOIN ft2 t2 ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10; +SELECT t1.c1, t2.c1 FROM ft1 t1 CROSS JOIN ft2 t2 ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10; +-- different server, not pushed down. No result expected. +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft5 t1 JOIN ft6 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10; +SELECT t1.c1, t2.c1 FROM ft5 t1 JOIN ft6 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10; +-- unsafe join conditions (c8 has a UDT), not pushed down. Practically a CROSS +-- JOIN since c8 in both tables has same value. +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 LEFT JOIN ft2 t2 ON (t1.c8 = t2.c8) ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10; +SELECT t1.c1, t2.c1 FROM ft1 t1 LEFT JOIN ft2 t2 ON (t1.c8 = t2.c8) ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10; +-- unsafe conditions on one side (c8 has a UDT), not pushed down. +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE t1.c8 = 'foo' ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; +SELECT t1.c1, t2.c1 FROM ft1 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE t1.c8 = 'foo' ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; +-- join where unsafe to pushdown condition in WHERE clause has a column not +-- in the SELECT clause. In this test unsafe clause needs to have column +-- references from both joining sides so that the clause is not pushed down +-- into one of the joining sides. +EXPLAIN (COSTS false, VERBOSE) +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE t1.c8 = t2.c8 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; +SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE t1.c8 = t2.c8 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; +-- Aggregate after UNION, for testing setrefs +EXPLAIN (COSTS false, VERBOSE) +SELECT t1c1, avg(t1c1 + t2c1) FROM (SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) UNION SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) AS t (t1c1, t2c1) GROUP BY t1c1 ORDER BY t1c1 OFFSET 100 LIMIT 10; +SELECT t1c1, avg(t1c1 + t2c1) FROM (SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) UNION SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) AS t (t1c1, t2c1) GROUP BY t1c1 ORDER BY t1c1 OFFSET 100 LIMIT 10; +-- join with lateral reference +EXPLAIN (COSTS false, VERBOSE) +SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10; +SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10; + +-- create another user for permission, user mapping, effective user tests +CREATE USER view_owner; +-- grant privileges on ft4 and ft5 to view_owner +GRANT ALL ON ft4 TO view_owner; +GRANT ALL ON ft5 TO view_owner; +-- prepare statement with current session user +PREPARE join_stmt AS SELECT t1.c1, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; +EXPLAIN (COSTS OFF, VERBOSE) EXECUTE join_stmt; +EXECUTE join_stmt; +-- change the session user to view_owner and execute the statement. Because of +-- change in session user, the plan should get invalidated and created again. +-- While creating the plan, it should throw error since there is no user mapping +-- available for view_owner. +SET SESSION ROLE view_owner; +EXPLAIN (COSTS OFF, VERBOSE) EXECUTE join_stmt; +EXECUTE join_stmt; +RESET ROLE; +DEALLOCATE join_stmt; + +CREATE VIEW v_ft5 AS SELECT * FROM ft5; +-- change owner of v_ft5 to view_owner so that the effective user for scan on +-- ft5 is view_owner and not the current user. +ALTER VIEW v_ft5 OWNER TO view_owner; +-- create a public user mapping for loopback server +-- drop user mapping for current_user. +DROP USER MAPPING FOR CURRENT_USER SERVER loopback; +CREATE USER MAPPING FOR PUBLIC SERVER loopback; +-- different effective user for permission check, but same user mapping for the +-- joining sides, join pushed down, no result expected. +PREPARE join_stmt AS SELECT t1.c1, t2.c1 FROM ft5 t1 JOIN v_ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10; +EXPLAIN (COSTS false, VERBOSE) EXECUTE join_stmt; +EXECUTE join_stmt; +-- create user mapping for view_owner and execute the prepared statement +-- the join should not be pushed down since joining relations now use two +-- different user mappings +CREATE USER MAPPING FOR view_owner SERVER loopback; +EXPLAIN (COSTS false, VERBOSE) EXECUTE join_stmt; +EXECUTE join_stmt; + +-- recreate the dropped user mapping for further tests +CREATE USER MAPPING FOR CURRENT_USER SERVER loopback; +DROP USER MAPPING FOR PUBLIC SERVER loopback; + -- =================================================================== -- parameterized queries -- =================================================================== @@ -348,6 +548,7 @@ DROP FUNCTION f_test(int); -- =================================================================== ALTER FOREIGN TABLE ft1 ALTER COLUMN c8 TYPE int; SELECT * FROM ft1 WHERE c1 = 1; -- ERROR +SELECT ft1.c1, ft2.c2, ft1.c8 FROM ft1, ft2 WHERE ft1.c1 = ft2.c1 AND ft1.c1 = 1; -- ERROR ALTER FOREIGN TABLE ft1 ALTER COLUMN c8 TYPE user_enum; -- =================================================================== @@ -973,3 +1174,7 @@ WHERE ftrelid = 'table30000'::regclass AND ftoptions @> array['fetch_size=60000']; ROLLBACK; + +-- Cleanup +DROP OWNED BY view_owner; +DROP USER view_owner; diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml index a90983cb1e..c408ba659b 100644 --- a/doc/src/sgml/postgres-fdw.sgml +++ b/doc/src/sgml/postgres-fdw.sgml @@ -486,6 +486,16 @@ be IMMUTABLE as well. + + When postgres_fdw encounters a join between foreign tables on + the same foreign server, it sends the entire join to the foreign server, + unless for some reason it believes that it will be more efficient to fetch + rows from each table individually, or unless the table references involved + are subject to different user mappings. While sending the JOIN + clauses, it takes the same precautions as mentioned above for the + WHERE clauses. + + The query that is actually sent to the remote server for execution can be examined using EXPLAIN VERBOSE.