postgres_fdw: Fix incorrect handling of row movement for remote partitions.

Commit 3d956d9562 added support for update row movement in postgres_fdw.
This patch fixes the following issues introduced by that commit:

* When a remote partition chosen to insert routed rows into was also an
  UPDATE subplan target rel that would be updated later, the UPDATE that
  used a direct modification plan modified those routed rows incorrectly
  because those routed rows were visible to the later UPDATE command.
  The right fix for this would be to have some way in postgres_fdw in
  which the later UPDATE command ignores those routed rows, but it seems
  hard to do so with the current infrastructure.  For now throw an error
  in that case.

* When a remote partition chosen to insert routed rows into was also an
  UPDATE subplan target rel, fmstate created for the UPDATE that used a
  non-direct modification plan was mistakenly overridden by another
  fmstate created for inserting those routed rows into the partition.
  This caused 1) server crash when the partition would be updated later,
  and 2) resource leak when the partition had been already updated.  To
  avoid that, adjust the treatment of the fmstate for the inserting.  As
  for #1, since we would also have the incorrectness issue as mentioned
  above, error out in that case as well.

Update the docs to mention that postgres_fdw currently does not handle
the case where a remote partition chosen to insert a routed row into is
also an UPDATE subplan target rel that will be updated later.

Author: Amit Langote and Etsuro Fujita
Reviewed-by: Amit Langote
Backpatch-through: 11 where row movement in postgres_fdw was added
Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
This commit is contained in:
Etsuro Fujita 2019-04-24 18:31:50 +09:00
parent fdc7efcc30
commit 5c47049180
4 changed files with 239 additions and 2 deletions

View File

@ -7734,6 +7734,137 @@ update utrtest set a = 1 where a = 2 returning *;
(1 row)
drop trigger loct_br_insert_trigger on loct;
-- We can move rows to a foreign partition that has been updated already,
-- but can't move rows to a foreign partition that hasn't been updated yet
delete from utrtest;
insert into utrtest values (1, 'foo');
insert into utrtest values (2, 'qux');
-- Test the former case:
-- with a direct modification plan
explain (verbose, costs off)
update utrtest set a = 1 returning *;
QUERY PLAN
-----------------------------------------------------------------
Update on public.utrtest
Output: remp.a, remp.b
Foreign Update on public.remp
Update on public.locp
-> Foreign Update on public.remp
Remote SQL: UPDATE public.loct SET a = 1 RETURNING a, b
-> Seq Scan on public.locp
Output: 1, locp.b, locp.ctid
(8 rows)
update utrtest set a = 1 returning *;
a | b
---+-----
1 | foo
1 | qux
(2 rows)
delete from utrtest;
insert into utrtest values (1, 'foo');
insert into utrtest values (2, 'qux');
-- with a non-direct modification plan
explain (verbose, costs off)
update utrtest set a = 1 from (values (1), (2)) s(x) where a = s.x returning *;
QUERY PLAN
------------------------------------------------------------------------------
Update on public.utrtest
Output: remp.a, remp.b, "*VALUES*".column1
Foreign Update on public.remp
Remote SQL: UPDATE public.loct SET a = $2 WHERE ctid = $1 RETURNING a, b
Update on public.locp
-> Hash Join
Output: 1, remp.b, remp.ctid, "*VALUES*".*, "*VALUES*".column1
Hash Cond: (remp.a = "*VALUES*".column1)
-> Foreign Scan on public.remp
Output: remp.b, remp.ctid, remp.a
Remote SQL: SELECT a, b, ctid FROM public.loct FOR UPDATE
-> Hash
Output: "*VALUES*".*, "*VALUES*".column1
-> Values Scan on "*VALUES*"
Output: "*VALUES*".*, "*VALUES*".column1
-> Hash Join
Output: 1, locp.b, locp.ctid, "*VALUES*".*, "*VALUES*".column1
Hash Cond: (locp.a = "*VALUES*".column1)
-> Seq Scan on public.locp
Output: locp.b, locp.ctid, locp.a
-> Hash
Output: "*VALUES*".*, "*VALUES*".column1
-> Values Scan on "*VALUES*"
Output: "*VALUES*".*, "*VALUES*".column1
(24 rows)
update utrtest set a = 1 from (values (1), (2)) s(x) where a = s.x returning *;
a | b | x
---+-----+---
1 | foo | 1
1 | qux | 2
(2 rows)
-- Change the definition of utrtest so that the foreign partition get updated
-- after the local partition
delete from utrtest;
alter table utrtest detach partition remp;
drop foreign table remp;
alter table loct drop constraint loct_a_check;
alter table loct add check (a in (3));
create foreign table remp (a int check (a in (3)), b text) server loopback options (table_name 'loct');
alter table utrtest attach partition remp for values in (3);
insert into utrtest values (2, 'qux');
insert into utrtest values (3, 'xyzzy');
-- Test the latter case:
-- with a direct modification plan
explain (verbose, costs off)
update utrtest set a = 3 returning *;
QUERY PLAN
-----------------------------------------------------------------
Update on public.utrtest
Output: locp.a, locp.b
Update on public.locp
Foreign Update on public.remp
-> Seq Scan on public.locp
Output: 3, locp.b, locp.ctid
-> Foreign Update on public.remp
Remote SQL: UPDATE public.loct SET a = 3 RETURNING a, b
(8 rows)
update utrtest set a = 3 returning *; -- ERROR
ERROR: cannot route tuples into foreign table to be updated "remp"
-- with a non-direct modification plan
explain (verbose, costs off)
update utrtest set a = 3 from (values (2), (3)) s(x) where a = s.x returning *;
QUERY PLAN
------------------------------------------------------------------------------
Update on public.utrtest
Output: locp.a, locp.b, "*VALUES*".column1
Update on public.locp
Foreign Update on public.remp
Remote SQL: UPDATE public.loct SET a = $2 WHERE ctid = $1 RETURNING a, b
-> Hash Join
Output: 3, locp.b, locp.ctid, "*VALUES*".*, "*VALUES*".column1
Hash Cond: (locp.a = "*VALUES*".column1)
-> Seq Scan on public.locp
Output: locp.b, locp.ctid, locp.a
-> Hash
Output: "*VALUES*".*, "*VALUES*".column1
-> Values Scan on "*VALUES*"
Output: "*VALUES*".*, "*VALUES*".column1
-> Hash Join
Output: 3, remp.b, remp.ctid, "*VALUES*".*, "*VALUES*".column1
Hash Cond: (remp.a = "*VALUES*".column1)
-> Foreign Scan on public.remp
Output: remp.b, remp.ctid, remp.a
Remote SQL: SELECT a, b, ctid FROM public.loct FOR UPDATE
-> Hash
Output: "*VALUES*".*, "*VALUES*".column1
-> Values Scan on "*VALUES*"
Output: "*VALUES*".*, "*VALUES*".column1
(24 rows)
update utrtest set a = 3 from (values (2), (3)) s(x) where a = s.x returning *; -- ERROR
ERROR: cannot route tuples into foreign table to be updated "remp"
drop table utrtest;
drop table loct;
-- Test copy tuple routing

View File

@ -185,6 +185,10 @@ typedef struct PgFdwModifyState
/* working memory context */
MemoryContext temp_cxt; /* context for per-tuple temporary data */
/* for update row movement if subplan result rel */
struct PgFdwModifyState *aux_fmstate; /* foreign-insert state, if
* created */
} PgFdwModifyState;
/*
@ -1844,8 +1848,22 @@ postgresExecForeignInsert(EState *estate,
TupleTableSlot *slot,
TupleTableSlot *planSlot)
{
return execute_foreign_modify(estate, resultRelInfo, CMD_INSERT,
PgFdwModifyState *fmstate = (PgFdwModifyState *) resultRelInfo->ri_FdwState;
TupleTableSlot *rslot;
/*
* If the fmstate has aux_fmstate set, use the aux_fmstate (see
* postgresBeginForeignInsert())
*/
if (fmstate->aux_fmstate)
resultRelInfo->ri_FdwState = fmstate->aux_fmstate;
rslot = execute_foreign_modify(estate, resultRelInfo, CMD_INSERT,
slot, planSlot);
/* Revert that change */
if (fmstate->aux_fmstate)
resultRelInfo->ri_FdwState = fmstate;
return rslot;
}
/*
@ -1915,6 +1933,22 @@ postgresBeginForeignInsert(ModifyTableState *mtstate,
List *retrieved_attrs = NIL;
bool doNothing = false;
/*
* If the foreign table we are about to insert routed rows into is also
* an UPDATE subplan result rel that will be updated later, proceeding
* with the INSERT will result in the later UPDATE incorrectly modifying
* those routed rows, so prevent the INSERT --- it would be nice if we
* could handle this case; but for now, throw an error for safety.
*/
if (plan && plan->operation == CMD_UPDATE &&
(resultRelInfo->ri_usesFdwDirectModify ||
resultRelInfo->ri_FdwState) &&
resultRelInfo > mtstate->resultRelInfo + mtstate->mt_whichplan)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot route tuples into foreign table to be updated \"%s\"",
RelationGetRelationName(rel))));
initStringInfo(&sql);
/* We transmit all columns that are defined in the foreign table. */
@ -1983,7 +2017,19 @@ postgresBeginForeignInsert(ModifyTableState *mtstate,
retrieved_attrs != NIL,
retrieved_attrs);
resultRelInfo->ri_FdwState = fmstate;
/*
* If the given resultRelInfo already has PgFdwModifyState set, it means
* the foreign table is an UPDATE subplan result rel; in which case, store
* the resulting state into the aux_fmstate of the PgFdwModifyState.
*/
if (resultRelInfo->ri_FdwState)
{
Assert(plan && plan->operation == CMD_UPDATE);
Assert(resultRelInfo->ri_usesFdwDirectModify == false);
((PgFdwModifyState *) resultRelInfo->ri_FdwState)->aux_fmstate = fmstate;
}
else
resultRelInfo->ri_FdwState = fmstate;
}
/*
@ -1998,6 +2044,13 @@ postgresEndForeignInsert(EState *estate,
Assert(fmstate != NULL);
/*
* If the fmstate has aux_fmstate set, get the aux_fmstate (see
* postgresBeginForeignInsert())
*/
if (fmstate->aux_fmstate)
fmstate = fmstate->aux_fmstate;
/* Destroy the execution state */
finish_foreign_modify(fmstate);
}
@ -3482,6 +3535,9 @@ create_foreign_modify(EState *estate,
Assert(fmstate->p_nums <= n_params);
/* Initialize auxiliary state */
fmstate->aux_fmstate = NULL;
return fmstate;
}

View File

@ -2015,6 +2015,51 @@ update utrtest set a = 1 where a = 2 returning *;
drop trigger loct_br_insert_trigger on loct;
-- We can move rows to a foreign partition that has been updated already,
-- but can't move rows to a foreign partition that hasn't been updated yet
delete from utrtest;
insert into utrtest values (1, 'foo');
insert into utrtest values (2, 'qux');
-- Test the former case:
-- with a direct modification plan
explain (verbose, costs off)
update utrtest set a = 1 returning *;
update utrtest set a = 1 returning *;
delete from utrtest;
insert into utrtest values (1, 'foo');
insert into utrtest values (2, 'qux');
-- with a non-direct modification plan
explain (verbose, costs off)
update utrtest set a = 1 from (values (1), (2)) s(x) where a = s.x returning *;
update utrtest set a = 1 from (values (1), (2)) s(x) where a = s.x returning *;
-- Change the definition of utrtest so that the foreign partition get updated
-- after the local partition
delete from utrtest;
alter table utrtest detach partition remp;
drop foreign table remp;
alter table loct drop constraint loct_a_check;
alter table loct add check (a in (3));
create foreign table remp (a int check (a in (3)), b text) server loopback options (table_name 'loct');
alter table utrtest attach partition remp for values in (3);
insert into utrtest values (2, 'qux');
insert into utrtest values (3, 'xyzzy');
-- Test the latter case:
-- with a direct modification plan
explain (verbose, costs off)
update utrtest set a = 3 returning *;
update utrtest set a = 3 returning *; -- ERROR
-- with a non-direct modification plan
explain (verbose, costs off)
update utrtest set a = 3 from (values (2), (3)) s(x) where a = s.x returning *;
update utrtest set a = 3 from (values (2), (3)) s(x) where a = s.x returning *; -- ERROR
drop table utrtest;
drop table loct;

View File

@ -74,6 +74,11 @@
UPDATE</literal> clause. However, the <literal>ON CONFLICT DO NOTHING</literal>
clause is supported, provided a unique index inference specification
is omitted.
Note also that <filename>postgres_fdw</filename> supports row movement
invoked by <command>UPDATE</command> statements executed on partitioned
tables, but it currently does not handle the case where a remote partition
chosen to insert a moved row into is also an <command>UPDATE</command>
target partition that will be updated later.
</para>
<para>