Fix double publish of child table's data.

We publish the child table's data twice for a publication that has both
child and parent tables and is published with publish_via_partition_root
as true. This happens because subscribers will initiate synchronization
using both parent and child tables, since it gets both as separate tables
in the initial table list.

Ensure that pg_publication_tables returns only parent tables in such
cases.

Author: Hou Zhijie
Reviewed-by: Greg Nancarrow, Amit Langote, Vignesh C, Amit Kapila
Backpatch-through: 13
Discussion: https://postgr.es/m/OS0PR01MB57167F45D481F78CDC5986F794B99@OS0PR01MB5716.jpnprd01.prod.outlook.com
This commit is contained in:
Amit Kapila 2021-12-09 08:36:59 +05:30
parent bcf60585e6
commit 5e97905a2c
4 changed files with 40 additions and 31 deletions

View File

@ -142,7 +142,7 @@ is_publishable_class(Oid relid, Form_pg_class reltuple)
* the publication.
*/
static List *
filter_partitions(List *relids, List *schemarelids)
filter_partitions(List *relids)
{
List *result = NIL;
ListCell *lc;
@ -161,16 +161,8 @@ filter_partitions(List *relids, List *schemarelids)
{
Oid ancestor = lfirst_oid(lc2);
/*
* Check if the parent table exists in the published table list.
*
* XXX As of now, we do this if the partition relation or the
* partition relation's ancestor is present in schema publication
* relations.
*/
if (list_member_oid(relids, ancestor) &&
(list_member_oid(schemarelids, relid) ||
list_member_oid(schemarelids, ancestor)))
/* Check if the parent table exists in the published table list. */
if (list_member_oid(relids, ancestor))
{
skip = true;
break;
@ -913,22 +905,17 @@ pg_get_publication_tables(PG_FUNCTION_ARGS)
PUBLICATION_PART_ROOT :
PUBLICATION_PART_LEAF);
tables = list_concat_unique_oid(relids, schemarelids);
if (schemarelids && publication->pubviaroot)
{
/*
* If the publication publishes partition changes via their
* respective root partitioned tables, we must exclude
* partitions in favor of including the root partitioned
* tables. Otherwise, the function could return both the child
* and parent tables which could cause data of the child table
* to be double-published on the subscriber side.
*
* XXX As of now, we do this when a publication has associated
* schema or for all tables publication. See
* GetAllTablesPublicationRelations().
*/
tables = filter_partitions(tables, schemarelids);
}
/*
* If the publication publishes partition changes via their
* respective root partitioned tables, we must exclude partitions
* in favor of including the root partitioned tables. Otherwise,
* the function could return both the child and parent tables
* which could cause data of the child table to be
* double-published on the subscriber side.
*/
if (publication->pubviaroot)
tables = filter_partitions(tables);
}
funcctx->user_fctx = (void *) tables;

View File

@ -829,6 +829,14 @@ SELECT * FROM pg_publication_tables;
pub | sch2 | tbl1_part1
(1 row)
-- Table publication that includes both the parent table and the child table
ALTER PUBLICATION pub ADD TABLE sch1.tbl1;
SELECT * FROM pg_publication_tables;
pubname | schemaname | tablename
---------+------------+-----------
pub | sch1 | tbl1
(1 row)
DROP PUBLICATION pub;
-- Schema publication that does not include the schema that has the parent table
CREATE PUBLICATION pub FOR ALL TABLES IN SCHEMA sch2 WITH (PUBLISH_VIA_PARTITION_ROOT=0);

View File

@ -474,6 +474,10 @@ DROP PUBLICATION pub;
CREATE PUBLICATION pub FOR TABLE sch2.tbl1_part1 WITH (PUBLISH_VIA_PARTITION_ROOT=1);
SELECT * FROM pg_publication_tables;
-- Table publication that includes both the parent table and the child table
ALTER PUBLICATION pub ADD TABLE sch1.tbl1;
SELECT * FROM pg_publication_tables;
DROP PUBLICATION pub;
-- Schema publication that does not include the schema that has the parent table
CREATE PUBLICATION pub FOR ALL TABLES IN SCHEMA sch2 WITH (PUBLISH_VIA_PARTITION_ROOT=0);

View File

@ -6,7 +6,7 @@ use strict;
use warnings;
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More tests => 62;
use Test::More tests => 63;
# setup
@ -412,11 +412,16 @@ $node_publisher->safe_psql('postgres',
$node_publisher->safe_psql('postgres',
"ALTER PUBLICATION pub_all SET (publish_via_partition_root = true)");
# Note: tab3_1's parent is not in the publication, in which case its
# changes are published using own identity.
# changes are published using own identity. For tab2, even though both parent
# and child tables are present but changes will be replicated via the parent's
# identity and only once.
$node_publisher->safe_psql('postgres',
"CREATE PUBLICATION pub_viaroot FOR TABLE tab2, tab3_1 WITH (publish_via_partition_root = true)"
"CREATE PUBLICATION pub_viaroot FOR TABLE tab2, tab2_1, tab3_1 WITH (publish_via_partition_root = true)"
);
# prepare data for the initial sync
$node_publisher->safe_psql('postgres', "INSERT INTO tab2 VALUES (1)");
# subscriber 1
$node_subscriber1->safe_psql('postgres', "DROP SUBSCRIPTION sub1");
$node_subscriber1->safe_psql('postgres',
@ -468,12 +473,17 @@ $node_subscriber1->poll_query_until('postgres', $synced_query)
$node_subscriber2->poll_query_until('postgres', $synced_query)
or die "Timed out while waiting for subscriber to synchronize data";
# check that data is synced correctly
$result = $node_subscriber1->safe_psql('postgres',
"SELECT c, a FROM tab2");
is( $result, qq(sub1_tab2|1), 'initial data synced for pub_viaroot');
# insert
$node_publisher->safe_psql('postgres', "INSERT INTO tab1 VALUES (1), (0)");
$node_publisher->safe_psql('postgres', "INSERT INTO tab1_1 (a) VALUES (3)");
$node_publisher->safe_psql('postgres', "INSERT INTO tab1_2 VALUES (5)");
$node_publisher->safe_psql('postgres',
"INSERT INTO tab2 VALUES (1), (0), (3), (5)");
"INSERT INTO tab2 VALUES (0), (3), (5)");
$node_publisher->safe_psql('postgres',
"INSERT INTO tab3 VALUES (1), (0), (3), (5)");