mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-03 08:00:21 +08:00
Add test case for an archive recovery corner case.
While I was working on a patch to refactor things around xlog.c, I mixed up EndOfLogTLI and replayTLI at the end of recovery. As a result, if you recovered to a point with a lower-numbered timeline in a WAL segment that has a higher TLI in the filename, the end-of-recovery WAL record was created with invalid PrevTimeLineId. I noticed that while self-reviewing, but no tests failed. So add a test to cover that corner case. Thanks to Amul Sul who also submitted a test case for the same corner case, although this patch is different from that. Reviewed-by: Amul Sul, Michael Paquier Discussion: https://www.postgresql.org/message-id/52bc9ccd-8591-431b-0086-15d9acf25a3f@iki.fi Discussion: https://www.postgresql.org/message-id/CAAJ_b94Vjt5cXGza_1MkjLQWciNdEemsmiWuQj0d%3DM7JfjAa1g%40mail.gmail.com
This commit is contained in:
parent
1383d52faa
commit
50e5bc582a
176
src/test/recovery/t/028_pitr_timelines.pl
Normal file
176
src/test/recovery/t/028_pitr_timelines.pl
Normal file
@ -0,0 +1,176 @@
|
||||
# Copyright (c) 2022, PostgreSQL Global Development Group
|
||||
|
||||
# Test recovering to a point-in-time using WAL archive, such that the
|
||||
# target point is physically in a WAL segment with a higher TLI than
|
||||
# the target point's TLI. For example, imagine that the following WAL
|
||||
# segments exist in the WAL archive:
|
||||
#
|
||||
# 000000010000000000000001
|
||||
# 000000010000000000000002
|
||||
# 000000020000000000000003
|
||||
#
|
||||
# The timeline switch happened in the middle of WAL segment 3, but it
|
||||
# was never archived on timeline 1. The first half of
|
||||
# 000000020000000000000003 contains the WAL from timeline 1 up to the
|
||||
# point where the timeline switch happened. If you now perform
|
||||
# archive recovery with recovery target point in that first half of
|
||||
# segment 3, archive recovery will find the WAL up to that point in
|
||||
# segment 000000020000000000000003, but it will not follow the
|
||||
# timeline switch to timeline 2, and creates a timeline switching
|
||||
# end-of-recovery record with TLI 1 -> 3. That's what this test case
|
||||
# tests.
|
||||
#
|
||||
# The comments below contain lists of WAL segments at different points
|
||||
# in the tests, to make it easier to follow along. They are correct
|
||||
# as of this writing, but the exact WAL segment numbers could change
|
||||
# if the backend logic for when it switches to a new segment changes.
|
||||
# The actual checks are not sensitive to that.
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use PostgreSQL::Test::Cluster;
|
||||
use PostgreSQL::Test::Utils;
|
||||
use Test::More;
|
||||
use File::Compare;
|
||||
|
||||
# Initialize and start primary node with WAL archiving
|
||||
my $node_primary = PostgreSQL::Test::Cluster->new('primary');
|
||||
$node_primary->init(has_archiving => 1, allows_streaming => 1);
|
||||
$node_primary->start;
|
||||
|
||||
# Take a backup.
|
||||
my $backup_name = 'my_backup';
|
||||
$node_primary->backup($backup_name);
|
||||
|
||||
# Workload with some transactions, and the target restore point.
|
||||
$node_primary->psql(
|
||||
'postgres', qq{
|
||||
CREATE TABLE foo(i int);
|
||||
INSERT INTO foo VALUES(1);
|
||||
SELECT pg_create_restore_point('rp');
|
||||
INSERT INTO foo VALUES(2);
|
||||
});
|
||||
|
||||
# Contents of the WAL archive at this point:
|
||||
#
|
||||
# 000000010000000000000001
|
||||
# 000000010000000000000002
|
||||
# 000000010000000000000002.00000028.backup
|
||||
#
|
||||
# The operations on the test table and the restore point went into WAL
|
||||
# segment 3, but it hasn't been archived yet.
|
||||
|
||||
# Start a standby node, and wait for it to catch up.
|
||||
my $node_standby = PostgreSQL::Test::Cluster->new('standby');
|
||||
$node_standby->init_from_backup(
|
||||
$node_primary, $backup_name,
|
||||
standby => 1,
|
||||
has_streaming => 1,
|
||||
has_archiving => 1,
|
||||
has_restoring => 0);
|
||||
$node_standby->append_conf('postgresql.conf', 'archive_mode = always');
|
||||
$node_standby->start;
|
||||
$node_primary->wait_for_catchup($node_standby);
|
||||
|
||||
# Check that it's really caught up.
|
||||
my $result = $node_standby->safe_psql('postgres', "SELECT max(i) FROM foo;");
|
||||
is($result, qq{2}, "check table contents after archive recovery");
|
||||
|
||||
# Kill the old primary, before it archives the most recent WAL segment that
|
||||
# contains all the INSERTs.
|
||||
$node_primary->stop('immediate');
|
||||
|
||||
# Promote the standby, and switch WAL so that it archives a WAL segment
|
||||
# that contains all the INSERTs, on a new timeline.
|
||||
$node_standby->promote;
|
||||
|
||||
# Find next WAL segment to be archived.
|
||||
my $walfile_to_be_archived = $node_standby->safe_psql('postgres',
|
||||
"SELECT pg_walfile_name(pg_current_wal_lsn());");
|
||||
|
||||
# Make WAL segment eligible for archival
|
||||
$node_standby->safe_psql('postgres', 'SELECT pg_switch_wal()');
|
||||
|
||||
# Wait until the WAL segment has been archived.
|
||||
my $archive_wait_query =
|
||||
"SELECT '$walfile_to_be_archived' <= last_archived_wal FROM pg_stat_archiver;";
|
||||
$node_standby->poll_query_until('postgres', $archive_wait_query)
|
||||
or die "Timed out while waiting for WAL segment to be archived";
|
||||
my $last_archived_wal_file = $walfile_to_be_archived;
|
||||
|
||||
# Ok, the standby has now archived the WAL on timeline 2. We don't
|
||||
# need the standby anymore.
|
||||
$node_standby->stop;
|
||||
|
||||
# Contents of the WAL archive at this point:
|
||||
#
|
||||
# 000000010000000000000001
|
||||
# 000000010000000000000002
|
||||
# 000000010000000000000002.00000028.backup
|
||||
# 000000010000000000000003.partial
|
||||
# 000000020000000000000003
|
||||
# 00000002.history
|
||||
#
|
||||
# The operations on the test table and the restore point are in
|
||||
# segment 3. They are part of timeline 1, but were not archived by
|
||||
# the primary yet. However, they were copied into the beginning of
|
||||
# segment 000000020000000000000003, before the timeline switching
|
||||
# record. (They are also present in the
|
||||
# 000000010000000000000003.partial file, but .partial files are not
|
||||
# used automatically.)
|
||||
|
||||
# Now test PITR to the recovery target. It should find the WAL in
|
||||
# segment 000000020000000000000003, but not follow the timeline switch
|
||||
# to timeline 2.
|
||||
my $node_pitr = PostgreSQL::Test::Cluster->new('node_pitr');
|
||||
$node_pitr->init_from_backup(
|
||||
$node_primary, $backup_name,
|
||||
standby => 0,
|
||||
has_restoring => 1);
|
||||
$node_pitr->append_conf(
|
||||
'postgresql.conf', qq{
|
||||
recovery_target_name = 'rp'
|
||||
recovery_target_action = 'promote'
|
||||
});
|
||||
|
||||
$node_pitr->start;
|
||||
|
||||
# Wait until recovery finishes.
|
||||
$node_pitr->poll_query_until('postgres', "SELECT pg_is_in_recovery() = 'f';")
|
||||
or die "Timed out while waiting for PITR promotion";
|
||||
|
||||
# Check that we see the data we expect.
|
||||
$result = $node_pitr->safe_psql('postgres', "SELECT max(i) FROM foo;");
|
||||
is($result, qq{1}, "check table contents after point-in-time recovery");
|
||||
|
||||
# Insert a row so that we can check later that we successfully recover
|
||||
# back to this timeline.
|
||||
$node_pitr->safe_psql('postgres', "INSERT INTO foo VALUES(3);");
|
||||
|
||||
# Stop the node. This archives the last segment.
|
||||
$node_pitr->stop();
|
||||
|
||||
# Test archive recovery on the timeline created by the PITR. This
|
||||
# replays the end-of-recovery record that switches from timeline 1 to
|
||||
# 3.
|
||||
my $node_pitr2 = PostgreSQL::Test::Cluster->new('node_pitr2');
|
||||
$node_pitr2->init_from_backup(
|
||||
$node_primary, $backup_name,
|
||||
standby => 0,
|
||||
has_restoring => 1);
|
||||
$node_pitr2->append_conf(
|
||||
'postgresql.conf', qq{
|
||||
recovery_target_action = 'promote'
|
||||
});
|
||||
|
||||
$node_pitr2->start;
|
||||
|
||||
# Wait until recovery finishes.
|
||||
$node_pitr2->poll_query_until('postgres', "SELECT pg_is_in_recovery() = 'f';")
|
||||
or die "Timed out while waiting for PITR promotion";
|
||||
|
||||
# Verify that we can see the row inserted after the PITR.
|
||||
$result = $node_pitr2->safe_psql('postgres', "SELECT max(i) FROM foo;");
|
||||
is($result, qq{3}, "check table contents after point-in-time recovery");
|
||||
|
||||
done_testing();
|
Loading…
Reference in New Issue
Block a user