mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-02-23 19:39:53 +08:00
Fix old-fd issues using global barriers everywhere.
Commits4eb21763
andb74e94dc
introduced a way to force every backend to close all relation files, to fix an ancient Windows-only bug. This commit extends that behavior to all operating systems and adds a couple of extra barrier points, to fix a totally different class of bug: the reuse of relfilenodes in scenarios that have no other kind of cache invalidation to prevent file descriptor mix-ups. In all releases, data corruption could occur when you moved a database to another tablespace and then back again. Despite that, no back-patch for now as the infrastructure required is too new and invasive. In master only, since commitaa010514
, it could also happen when using CREATE DATABASE with a user-supplied OID or via pg_upgrade. Author: Andres Freund <andres@anarazel.de> Reviewed-by: Robert Haas <robertmhaas@gmail.com> Reviewed-by: Thomas Munro <thomas.munro@gmail.com> Discussion: https://postgr.es/m/20220209220004.kb3dgtn2x2k2gtdm%40alap3.anarazel.de
This commit is contained in:
parent
b74e94dc27
commit
e2f65f4255
@ -1687,10 +1687,8 @@ dropdb(const char *dbname, bool missing_ok, bool force)
|
||||
*/
|
||||
RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
|
||||
|
||||
#if defined(USE_BARRIER_SMGRRELEASE)
|
||||
/* Close all smgr fds in all backends. */
|
||||
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Remove all tablespace subdirs belonging to the database.
|
||||
@ -1940,10 +1938,8 @@ movedb(const char *dbname, const char *tblspcname)
|
||||
RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT
|
||||
| CHECKPOINT_FLUSH_ALL);
|
||||
|
||||
#if defined(USE_BARRIER_SMGRRELEASE)
|
||||
/* Close all smgr fds in all backends. */
|
||||
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Now drop all buffers holding data of the target database; they should
|
||||
@ -3054,6 +3050,9 @@ dbase_redo(XLogReaderState *record)
|
||||
*/
|
||||
FlushDatabaseBuffers(xlrec->src_db_id);
|
||||
|
||||
/* Close all sgmr fds in all backends. */
|
||||
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
|
||||
|
||||
/*
|
||||
* Copy this subdirectory to the new location
|
||||
*
|
||||
@ -3111,10 +3110,8 @@ dbase_redo(XLogReaderState *record)
|
||||
/* Clean out the xlog relcache too */
|
||||
XLogDropDatabase(xlrec->db_id);
|
||||
|
||||
#if defined(USE_BARRIER_SMGRRELEASE)
|
||||
/* Close all sgmr fds in all backends. */
|
||||
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
|
||||
#endif
|
||||
|
||||
for (i = 0; i < xlrec->ntablespaces; i++)
|
||||
{
|
||||
|
@ -548,11 +548,10 @@ DropTableSpace(DropTableSpaceStmt *stmt)
|
||||
* use a global barrier to ask all backends to close all files, and
|
||||
* wait until they're finished.
|
||||
*/
|
||||
#if defined(USE_BARRIER_SMGRRELEASE)
|
||||
LWLockRelease(TablespaceCreateLock);
|
||||
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
|
||||
LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
|
||||
#endif
|
||||
|
||||
/* And now try again. */
|
||||
if (!destroy_tablespace_directories(tablespaceoid, false))
|
||||
{
|
||||
@ -1574,6 +1573,9 @@ tblspc_redo(XLogReaderState *record)
|
||||
{
|
||||
xl_tblspc_drop_rec *xlrec = (xl_tblspc_drop_rec *) XLogRecGetData(record);
|
||||
|
||||
/* Close all smgr fds in all backends. */
|
||||
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
|
||||
|
||||
/*
|
||||
* If we issued a WAL record for a drop tablespace it implies that
|
||||
* there were no files in it at all when the DROP was done. That means
|
||||
@ -1591,11 +1593,6 @@ tblspc_redo(XLogReaderState *record)
|
||||
*/
|
||||
if (!destroy_tablespace_directories(xlrec->ts_id, true))
|
||||
{
|
||||
#if defined(USE_BARRIER_SMGRRELEASE)
|
||||
/* Close all smgr fds in all backends. */
|
||||
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
|
||||
#endif
|
||||
|
||||
ResolveRecoveryConflictWithTablespace(xlrec->ts_id);
|
||||
|
||||
/*
|
||||
|
@ -152,17 +152,6 @@
|
||||
#define EXEC_BACKEND
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If USE_BARRIER_SMGRRELEASE is defined, certain code paths that unlink
|
||||
* directories will ask other backends to close all smgr file descriptors.
|
||||
* This is enabled on Windows, because otherwise unlinked but still open files
|
||||
* can prevent rmdir(containing_directory) from succeeding. On other
|
||||
* platforms, it can be defined to exercise those code paths.
|
||||
*/
|
||||
#if defined(WIN32)
|
||||
#define USE_BARRIER_SMGRRELEASE
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Define this if your operating system supports link()
|
||||
*/
|
||||
|
@ -9,7 +9,7 @@
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
EXTRA_INSTALL=contrib/test_decoding
|
||||
EXTRA_INSTALL=contrib/test_decoding contrib/pg_prewarm
|
||||
|
||||
subdir = src/test/recovery
|
||||
top_builddir = ../../..
|
||||
|
233
src/test/recovery/t/032_relfilenode_reuse.pl
Normal file
233
src/test/recovery/t/032_relfilenode_reuse.pl
Normal file
@ -0,0 +1,233 @@
|
||||
use strict;
|
||||
use warnings;
|
||||
use PostgreSQL::Test::Cluster;
|
||||
use PostgreSQL::Test::Utils;
|
||||
use Test::More;
|
||||
use File::Basename;
|
||||
|
||||
|
||||
my $node_primary = PostgreSQL::Test::Cluster->new('primary');
|
||||
$node_primary->init(allows_streaming => 1);
|
||||
$node_primary->append_conf('postgresql.conf', q[
|
||||
allow_in_place_tablespaces = true
|
||||
log_connections=on
|
||||
# to avoid "repairing" corruption
|
||||
full_page_writes=off
|
||||
log_min_messages=debug2
|
||||
autovacuum_naptime=1s
|
||||
shared_buffers=1MB
|
||||
]);
|
||||
$node_primary->start;
|
||||
|
||||
|
||||
# Create streaming standby linking to primary
|
||||
my $backup_name = 'my_backup';
|
||||
$node_primary->backup($backup_name);
|
||||
my $node_standby = PostgreSQL::Test::Cluster->new('standby');
|
||||
$node_standby->init_from_backup($node_primary, $backup_name,
|
||||
has_streaming => 1);
|
||||
$node_standby->start;
|
||||
|
||||
# To avoid hanging while expecting some specific input from a psql
|
||||
# instance being driven by us, add a timeout high enough that it
|
||||
# should never trigger even on very slow machines, unless something
|
||||
# is really wrong.
|
||||
my $psql_timeout = IPC::Run::timer(300);
|
||||
|
||||
my %psql_primary = (stdin => '', stdout => '', stderr => '');
|
||||
$psql_primary{run} = IPC::Run::start(
|
||||
[ 'psql', '-XA', '-f', '-', '-d', $node_primary->connstr('postgres') ],
|
||||
'<',
|
||||
\$psql_primary{stdin},
|
||||
'>',
|
||||
\$psql_primary{stdout},
|
||||
'2>',
|
||||
\$psql_primary{stderr},
|
||||
$psql_timeout);
|
||||
|
||||
my %psql_standby = ('stdin' => '', 'stdout' => '', 'stderr' => '');
|
||||
$psql_standby{run} = IPC::Run::start(
|
||||
[ 'psql', '-XA', '-f', '-', '-d', $node_standby->connstr('postgres') ],
|
||||
'<',
|
||||
\$psql_standby{stdin},
|
||||
'>',
|
||||
\$psql_standby{stdout},
|
||||
'2>',
|
||||
\$psql_standby{stderr},
|
||||
$psql_timeout);
|
||||
|
||||
|
||||
# Create template database with a table that we'll update, to trigger dirty
|
||||
# rows. Using a template database + preexisting rows makes it a bit easier to
|
||||
# reproduce, because there's no cache invalidations generated.
|
||||
|
||||
$node_primary->safe_psql('postgres', "CREATE DATABASE conflict_db_template OID = 50000;");
|
||||
$node_primary->safe_psql('conflict_db_template', q[
|
||||
CREATE TABLE large(id serial primary key, dataa text, datab text);
|
||||
INSERT INTO large(dataa, datab) SELECT g.i::text, 1 FROM generate_series(1, 4000) g(i);]);
|
||||
$node_primary->safe_psql('postgres', "CREATE DATABASE conflict_db TEMPLATE conflict_db_template OID = 50001;");
|
||||
|
||||
$node_primary->safe_psql('postgres', q[
|
||||
CREATE EXTENSION pg_prewarm;
|
||||
CREATE TABLE replace_sb(data text);
|
||||
INSERT INTO replace_sb(data) SELECT random()::text FROM generate_series(1, 15000);]);
|
||||
|
||||
# Use longrunning transactions, so that AtEOXact_SMgr doesn't close files
|
||||
send_query_and_wait(
|
||||
\%psql_primary,
|
||||
q[BEGIN;],
|
||||
qr/BEGIN/m);
|
||||
send_query_and_wait(
|
||||
\%psql_standby,
|
||||
q[BEGIN;],
|
||||
qr/BEGIN/m);
|
||||
|
||||
# Cause lots of dirty rows in shared_buffers
|
||||
$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 1;");
|
||||
|
||||
# Now do a bunch of work in another database. That will end up needing to
|
||||
# write back dirty data from the previous step, opening the relevant file
|
||||
# descriptors
|
||||
cause_eviction(\%psql_primary, \%psql_standby);
|
||||
|
||||
# drop and recreate database
|
||||
$node_primary->safe_psql('postgres', "DROP DATABASE conflict_db;");
|
||||
$node_primary->safe_psql('postgres', "CREATE DATABASE conflict_db TEMPLATE conflict_db_template OID = 50001;");
|
||||
|
||||
verify($node_primary, $node_standby, 1,
|
||||
"initial contents as expected");
|
||||
|
||||
# Again cause lots of dirty rows in shared_buffers, but use a different update
|
||||
# value so we can check everything is OK
|
||||
$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 2;");
|
||||
|
||||
# Again cause a lot of IO. That'll again write back dirty data, but uses (XXX
|
||||
# adjust after bugfix) the already opened file descriptor.
|
||||
# FIXME
|
||||
cause_eviction(\%psql_primary, \%psql_standby);
|
||||
|
||||
verify($node_primary, $node_standby, 2,
|
||||
"update to reused relfilenode (due to DB oid conflict) is not lost");
|
||||
|
||||
|
||||
$node_primary->safe_psql('conflict_db', "VACUUM FULL large;");
|
||||
$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 3;");
|
||||
|
||||
verify($node_primary, $node_standby, 3,
|
||||
"restored contents as expected");
|
||||
|
||||
# Test for old filehandles after moving a database in / out of tablespace
|
||||
$node_primary->safe_psql('postgres', q[CREATE TABLESPACE test_tablespace LOCATION '']);
|
||||
|
||||
# cause dirty buffers
|
||||
$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 4;");
|
||||
# cause files to be opened in backend in other database
|
||||
cause_eviction(\%psql_primary, \%psql_standby);
|
||||
|
||||
# move database back / forth
|
||||
$node_primary->safe_psql('postgres', 'ALTER DATABASE conflict_db SET TABLESPACE test_tablespace');
|
||||
$node_primary->safe_psql('postgres', 'ALTER DATABASE conflict_db SET TABLESPACE pg_default');
|
||||
|
||||
# cause dirty buffers
|
||||
$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 5;");
|
||||
cause_eviction(\%psql_primary, \%psql_standby);
|
||||
|
||||
verify($node_primary, $node_standby, 5,
|
||||
"post move contents as expected");
|
||||
|
||||
$node_primary->safe_psql('postgres', 'ALTER DATABASE conflict_db SET TABLESPACE test_tablespace');
|
||||
|
||||
$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 7;");
|
||||
cause_eviction(\%psql_primary, \%psql_standby);
|
||||
$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 8;");
|
||||
$node_primary->safe_psql('postgres', 'DROP DATABASE conflict_db');
|
||||
$node_primary->safe_psql('postgres', 'DROP TABLESPACE test_tablespace');
|
||||
|
||||
$node_primary->safe_psql('postgres', 'REINDEX TABLE pg_database');
|
||||
|
||||
|
||||
# explicitly shut down psql instances gracefully - to avoid hangs
|
||||
# or worse on windows
|
||||
$psql_primary{stdin} .= "\\q\n";
|
||||
$psql_primary{run}->finish;
|
||||
$psql_standby{stdin} .= "\\q\n";
|
||||
$psql_standby{run}->finish;
|
||||
|
||||
$node_primary->stop();
|
||||
$node_standby->stop();
|
||||
|
||||
# Make sure that there weren't crashes during shutdown
|
||||
|
||||
command_like([ 'pg_controldata', $node_primary->data_dir ],
|
||||
qr/Database cluster state:\s+shut down\n/, 'primary shut down ok');
|
||||
command_like([ 'pg_controldata', $node_standby->data_dir ],
|
||||
qr/Database cluster state:\s+shut down in recovery\n/, 'standby shut down ok');
|
||||
done_testing();
|
||||
|
||||
sub verify
|
||||
{
|
||||
my ($primary, $standby, $counter, $message) = @_;
|
||||
|
||||
my $query = "SELECT datab, count(*) FROM large GROUP BY 1 ORDER BY 1 LIMIT 10";
|
||||
is($primary->safe_psql('conflict_db', $query),
|
||||
"$counter|4000",
|
||||
"primary: $message");
|
||||
|
||||
$primary->wait_for_catchup($standby);
|
||||
is($standby->safe_psql('conflict_db', $query),
|
||||
"$counter|4000",
|
||||
"standby: $message");
|
||||
}
|
||||
|
||||
sub cause_eviction
|
||||
{
|
||||
my ($psql_primary, $psql_standby) = @_;
|
||||
|
||||
send_query_and_wait(
|
||||
$psql_primary,
|
||||
q[SELECT SUM(pg_prewarm(oid)) warmed_buffers FROM pg_class WHERE pg_relation_filenode(oid) != 0;],
|
||||
qr/warmed_buffers/m);
|
||||
|
||||
send_query_and_wait(
|
||||
$psql_standby,
|
||||
q[SELECT SUM(pg_prewarm(oid)) warmed_buffers FROM pg_class WHERE pg_relation_filenode(oid) != 0;],
|
||||
qr/warmed_buffers/m);
|
||||
}
|
||||
|
||||
# Send query, wait until string matches
|
||||
sub send_query_and_wait
|
||||
{
|
||||
my ($psql, $query, $untl) = @_;
|
||||
my $ret;
|
||||
|
||||
# send query
|
||||
$$psql{stdin} .= $query;
|
||||
$$psql{stdin} .= "\n";
|
||||
|
||||
# wait for query results
|
||||
$$psql{run}->pump_nb();
|
||||
while (1)
|
||||
{
|
||||
last if $$psql{stdout} =~ /$untl/;
|
||||
|
||||
if ($psql_timeout->is_expired)
|
||||
{
|
||||
BAIL_OUT("aborting wait: program timed out\n"
|
||||
. "stream contents: >>$$psql{stdout}<<\n"
|
||||
. "pattern searched for: $untl\n");
|
||||
return 0;
|
||||
}
|
||||
if (not $$psql{run}->pumpable())
|
||||
{
|
||||
BAIL_OUT("aborting wait: program died\n"
|
||||
. "stream contents: >>$$psql{stdout}<<\n"
|
||||
. "pattern searched for: $untl\n");
|
||||
return 0;
|
||||
}
|
||||
$$psql{run}->pump();
|
||||
}
|
||||
|
||||
$$psql{stdout} = '';
|
||||
|
||||
return 1;
|
||||
}
|
Loading…
Reference in New Issue
Block a user