Have the server properly terminate tar archives.

Earlier versions of PostgreSQL featured a version of pg_basebackup
that wanted to edit tar archives but was too dumb to parse them
properly. The server made things easier for the client by failing
to add the two blocks of zero bytes that ought to end a tar file,
leaving it up to the client to do that.

But since commit 23a1c6578c, we
don't need this hack any more, because pg_basebackup is now smarter
and can parse tar files even if they are properly terminated! So
change the server to always properly terminate the tar files. Older
versions of pg_basebackup can't talk to new servers anyway, so
there's no compatibility break.

On the pg_basebackup side, we see still need to add the terminating
zero bytes if we're talking to an older server, but not when the
server is v15+. Hopefully at some point we'll be able to remove
some of this compatibility cruft, but it seems best to hang on to
it for now.

In passing, add a file header comment to bbstreamer_tar.c, to make
it clearer what's going on here.

Discussion: http://postgr.es/m/CA+TgmoZbNzsWwM4BE5Jb_qHncY817DYZwGf+2-7hkMQ27ZwsMQ@mail.gmail.com
This commit is contained in:
Robert Haas 2021-11-09 14:21:35 -05:00
parent ee3a1a5b63
commit 5a1007a508
3 changed files with 45 additions and 6 deletions

View File

@ -374,8 +374,17 @@ perform_base_backup(basebackup_options *opt, bbsink *sink)
Assert(lnext(state.tablespaces, lc) == NULL);
}
else
{
/* Properly terminate the tarfile. */
StaticAssertStmt(TAR_BLOCK_SIZE <= 2 * BLCKSZ,
"BLCKSZ too small for 2 tar blocks");
memset(sink->bbs_buffer, 0, 2 * TAR_BLOCK_SIZE);
bbsink_archive_contents(sink, 2 * TAR_BLOCK_SIZE);
/* OK, that's the end of the archive. */
bbsink_end_archive(sink);
}
}
basebackup_progress_wait_wal_archive(&state);
endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli);
@ -611,6 +620,13 @@ perform_base_backup(basebackup_options *opt, bbsink *sink)
sendFileWithContent(sink, pathbuf, "", &manifest);
}
/* Properly terminate the tar file. */
StaticAssertStmt(TAR_BLOCK_SIZE <= 2 * BLCKSZ,
"BLCKSZ too small for 2 tar blocks");
memset(sink->bbs_buffer, 0, 2 * TAR_BLOCK_SIZE);
bbsink_archive_contents(sink, 2 * TAR_BLOCK_SIZE);
/* OK, that's the end of the archive. */
bbsink_end_archive(sink);
}

View File

@ -2,6 +2,16 @@
*
* bbstreamer_tar.c
*
* This module implements three types of tar processing. A tar parser
* expects unlabelled chunks of data (e.g. BBSTREAMER_UNKNOWN) and splits
* it into labelled chunks (any other value of bbstreamer_archive_context).
* A tar archiver does the reverse: it takes a bunch of labelled chunks
* and produces a tarfile, optionally replacing member headers and trailers
* so that upstream bbstreamer objects can perform surgery on the tarfile
* contents without knowing the details of the tar format. A tar terminator
* just adds two blocks of NUL bytes to the end of the file, since older
* server versions produce files with this terminator omitted.
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
*
* IDENTIFICATION

View File

@ -85,6 +85,12 @@ typedef void (*WriteDataCallback) (size_t nbytes, char *buf,
*/
#define MINIMUM_VERSION_FOR_MANIFESTS 130000
/*
* Before v15, tar files received from the server will be improperly
* terminated.
*/
#define MINIMUM_VERSION_FOR_TERMINATED_TARFILE 150000
/*
* Different ways to include WAL
*/
@ -166,7 +172,8 @@ static void progress_report(int tablespacenum, bool force, bool finished);
static bbstreamer *CreateBackupStreamer(char *archive_name, char *spclocation,
bbstreamer **manifest_inject_streamer_p,
bool is_recovery_guc_supported);
bool is_recovery_guc_supported,
bool expect_unterminated_tarfile);
static void ReceiveTarFile(PGconn *conn, char *archive_name, char *spclocation,
bool tablespacenum);
static void ReceiveTarCopyChunk(size_t r, char *copybuf, void *callback_data);
@ -965,7 +972,8 @@ ReceiveCopyData(PGconn *conn, WriteDataCallback callback,
static bbstreamer *
CreateBackupStreamer(char *archive_name, char *spclocation,
bbstreamer **manifest_inject_streamer_p,
bool is_recovery_guc_supported)
bool is_recovery_guc_supported,
bool expect_unterminated_tarfile)
{
bbstreamer *streamer;
bbstreamer *manifest_inject_streamer = NULL;
@ -1074,12 +1082,13 @@ CreateBackupStreamer(char *archive_name, char *spclocation,
/*
* If we're doing anything that involves understanding the contents of
* the archive, we'll need to parse it. If not, we can skip parsing it,
* but the tar files the server sends are not properly terminated, so
* we'll need to add the terminator here.
* but old versions of the server send improperly terminated tarfiles,
* so if we're talking to such a server we'll need to add the terminator
* here.
*/
if (must_parse_archive)
streamer = bbstreamer_tar_parser_new(streamer);
else
else if (expect_unterminated_tarfile)
streamer = bbstreamer_tar_terminator_new(streamer);
/* Return the results. */
@ -1099,14 +1108,18 @@ ReceiveTarFile(PGconn *conn, char *archive_name, char *spclocation,
WriteTarState state;
bbstreamer *manifest_inject_streamer;
bool is_recovery_guc_supported;
bool expect_unterminated_tarfile;
/* Pass all COPY data through to the backup streamer. */
memset(&state, 0, sizeof(state));
is_recovery_guc_supported =
PQserverVersion(conn) >= MINIMUM_VERSION_FOR_RECOVERY_GUC;
expect_unterminated_tarfile =
PQserverVersion(conn) < MINIMUM_VERSION_FOR_TERMINATED_TARFILE;
state.streamer = CreateBackupStreamer(archive_name, spclocation,
&manifest_inject_streamer,
is_recovery_guc_supported);
is_recovery_guc_supported,
expect_unterminated_tarfile);
state.tablespacenum = tablespacenum;
ReceiveCopyData(conn, ReceiveTarCopyChunk, &state);
progress_filename = NULL;