Extend the options of pg_basebackup to control compression

The option --compress is extended to accept a compression method and an
optional compression level, as of the grammar METHOD[:LEVEL].  The
methods currently support are "none" and "gzip", for client-side
compression.  Any of those methods use only an integer value for the
compression level, but any method implemented in the future could use
more specific keywords if necessary.

This commit keeps the logic backward-compatible.  Hence, the following
compatibility rules apply for the new format of the option --compress:
* -z/--gzip is a synonym of --compress=gzip.
* --compress=NUM implies:
** --compress=none if NUM = 0.
** --compress=gzip:NUM if NUM > 0.

Note that there are also plans to extend more this grammar with
server-side compression.

Reviewed-by: Robert Haas, Magnus Hagander, Álvaro Herrera, David
G. Johnston, Georgios Kokolatos
Discussion: https://postgr.es/m/Yb3GEgWwcu4wZDuA@paquier.xyz
This commit is contained in:
Michael Paquier 2022-01-21 11:08:43 +09:00
parent 512fc2dd79
commit 5c649fe153
3 changed files with 179 additions and 24 deletions

View File

@ -398,15 +398,24 @@ PostgreSQL documentation
<varlistentry>
<term><option>-Z <replaceable class="parameter">level</replaceable></option></term>
<term><option>-Z <replaceable class="parameter">method</replaceable></option>[:<replaceable>level</replaceable>]</term>
<term><option>--compress=<replaceable class="parameter">level</replaceable></option></term>
<term><option>--compress=<replaceable class="parameter">method</replaceable></option>[:<replaceable>level</replaceable>]</term>
<listitem>
<para>
Enables gzip compression of tar file output, and specifies the
Enables compression of tar file output, and specifies the
compression level (0 through 9, 0 being no compression and 9 being best
compression). Compression is only available when using the tar
format, and the suffix <filename>.gz</filename> will
automatically be added to all tar filenames.
</para>
<para>
The compression method can be set to either <literal>gzip</literal>
for compression with <application>gzip</application>, or
<literal>none</literal> for no compression. A compression level
can be optionally specified, by appending the level number after a
colon (<literal>:</literal>).
</para>
</listitem>
</varlistentry>
</variablelist>
@ -942,6 +951,16 @@ PostgreSQL documentation
<screen>
<prompt>$</prompt> <userinput>pg_basebackup -D backup/data -T /opt/ts=$(pwd)/backup/ts</userinput>
</screen></para>
<para>
To create a backup of a local server with one tar file for each tablespace
compressed with <application>gzip</application> at level 9, stored in the
directory <filename>backup</filename>:
<screen>
<prompt>$</prompt> <userinput>pg_basebackup -D backup -Ft --compress=gzip:9</userinput>
</screen>
</para>
</refsect1>
<refsect1>

View File

@ -123,6 +123,7 @@ static bool showprogress = false;
static bool estimatesize = true;
static int verbose = 0;
static int compresslevel = 0;
static WalCompressionMethod compressmethod = COMPRESSION_NONE;
static IncludeWal includewal = STREAM_WAL;
static bool fastcheckpoint = false;
static bool writerecoveryconf = false;
@ -379,7 +380,8 @@ usage(void)
printf(_(" -X, --wal-method=none|fetch|stream\n"
" include required WAL files with specified method\n"));
printf(_(" -z, --gzip compress tar output\n"));
printf(_(" -Z, --compress=0-9 compress tar output with given compression level\n"));
printf(_(" -Z, --compress={gzip,none}[:LEVEL] or [LEVEL]\n"
" compress tar output with given compression method or level\n"));
printf(_("\nGeneral options:\n"));
printf(_(" -c, --checkpoint=fast|spread\n"
" set fast or spread checkpointing\n"));
@ -544,8 +546,7 @@ LogStreamerMain(logstreamer_param *param)
stream.do_sync);
else
stream.walmethod = CreateWalTarMethod(param->xlog,
(compresslevel != 0) ?
COMPRESSION_GZIP : COMPRESSION_NONE,
compressmethod,
compresslevel,
stream.do_sync);
@ -936,6 +937,81 @@ parse_max_rate(char *src)
return (int32) result;
}
/*
* Utility wrapper to parse the values specified for -Z/--compress.
* *methodres and *levelres will be optionally filled with values coming
* from the parsed results.
*/
static void
parse_compress_options(char *src, WalCompressionMethod *methodres,
int *levelres)
{
char *sep;
int firstlen;
char *firstpart = NULL;
/* check if the option is split in two */
sep = strchr(src, ':');
/*
* The first part of the option value could be a method name, or just a
* level value.
*/
firstlen = (sep != NULL) ? (sep - src) : strlen(src);
firstpart = pg_malloc(firstlen + 1);
strncpy(firstpart, src, firstlen);
firstpart[firstlen] = '\0';
/*
* Check if the first part of the string matches with a supported
* compression method.
*/
if (pg_strcasecmp(firstpart, "gzip") == 0)
*methodres = COMPRESSION_GZIP;
else if (pg_strcasecmp(firstpart, "none") == 0)
*methodres = COMPRESSION_NONE;
else
{
/*
* It does not match anything known, so check for the
* backward-compatible case of only an integer where the implied
* compression method changes depending on the level value.
*/
if (!option_parse_int(firstpart, "-Z/--compress", 0,
INT_MAX, levelres))
exit(1);
*methodres = (*levelres > 0) ?
COMPRESSION_GZIP : COMPRESSION_NONE;
return;
}
if (sep == NULL)
{
/*
* The caller specified a method without a colon separator, so let any
* subsequent checks assign a default level.
*/
return;
}
/* Check the contents after the colon separator. */
sep++;
if (*sep == '\0')
{
pg_log_error("no compression level defined for method %s", firstpart);
exit(1);
}
/*
* For any of the methods currently supported, the data after the
* separator can just be an integer.
*/
if (!option_parse_int(sep, "-Z/--compress", 0, INT_MAX,
levelres))
exit(1);
}
/*
* Read a stream of COPY data and invoke the provided callback for each
* chunk.
@ -996,7 +1072,7 @@ CreateBackupStreamer(char *archive_name, char *spclocation,
bool is_recovery_guc_supported,
bool expect_unterminated_tarfile)
{
bbstreamer *streamer;
bbstreamer *streamer = NULL;
bbstreamer *manifest_inject_streamer = NULL;
bool inject_manifest;
bool must_parse_archive;
@ -1055,19 +1131,22 @@ CreateBackupStreamer(char *archive_name, char *spclocation,
archive_file = NULL;
}
if (compressmethod == COMPRESSION_NONE)
streamer = bbstreamer_plain_writer_new(archive_filename,
archive_file);
#ifdef HAVE_LIBZ
if (compresslevel != 0)
else if (compressmethod == COMPRESSION_GZIP)
{
strlcat(archive_filename, ".gz", sizeof(archive_filename));
streamer = bbstreamer_gzip_writer_new(archive_filename,
archive_file,
compresslevel);
}
else
#endif
streamer = bbstreamer_plain_writer_new(archive_filename,
archive_file);
else
{
Assert(false); /* not reachable */
}
/*
* If we need to parse the archive for whatever reason, then we'll
@ -2279,11 +2358,11 @@ main(int argc, char **argv)
#else
compresslevel = 1; /* will be rejected below */
#endif
compressmethod = COMPRESSION_GZIP;
break;
case 'Z':
if (!option_parse_int(optarg, "-Z/--compress", 0, 9,
&compresslevel))
exit(1);
parse_compress_options(optarg, &compressmethod,
&compresslevel);
break;
case 'c':
if (pg_strcasecmp(optarg, "fast") == 0)
@ -2412,7 +2491,7 @@ main(int argc, char **argv)
/*
* Compression doesn't make sense unless tar format is in use.
*/
if (format == 'p' && compresslevel != 0)
if (format == 'p' && compressmethod != COMPRESSION_NONE)
{
if (backup_target == NULL)
pg_log_error("only tar mode backups can be compressed");
@ -2516,14 +2595,43 @@ main(int argc, char **argv)
}
}
#ifndef HAVE_LIBZ
/* Sanity checks for compression level. */
/* Sanity checks for compression-related options. */
switch (compressmethod)
{
case COMPRESSION_NONE:
if (compresslevel != 0)
{
pg_log_error("this build does not support compression");
pg_log_error("cannot use compression level with method %s",
"none");
fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
progname);
exit(1);
}
break;
case COMPRESSION_GZIP:
#ifdef HAVE_LIBZ
if (compresslevel == 0)
{
pg_log_info("no value specified for compression level, switching to default");
compresslevel = Z_DEFAULT_COMPRESSION;
}
if (compresslevel > 9)
{
pg_log_error("compression level %d of method %s higher than maximum of 9",
compresslevel, "gzip");
exit(1);
}
#else
pg_log_error("this build does not support compression with %s",
"gzip");
exit(1);
#endif
break;
case COMPRESSION_LZ4:
/* option not supported */
Assert(false);
break;
}
/*
* Sanity checks for progress reporting options.

View File

@ -10,7 +10,7 @@ use File::Path qw(rmtree);
use Fcntl qw(:seek);
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More tests => 135;
use Test::More tests => 143;
program_help_ok('pg_basebackup');
program_version_ok('pg_basebackup');
@ -38,6 +38,20 @@ my $pgdata = $node->data_dir;
$node->command_fails(['pg_basebackup'],
'pg_basebackup needs target directory specified');
# Sanity checks for options
$node->command_fails_like(
[ 'pg_basebackup', '-D', "$tempdir/backup", '--compress', 'none:1' ],
qr/\Qpg_basebackup: error: cannot use compression level with method none/,
'failure if method "none" specified with compression level');
$node->command_fails_like(
[ 'pg_basebackup', '-D', "$tempdir/backup", '--compress', 'none+' ],
qr/\Qpg_basebackup: error: invalid value "none+" for option/,
'failure on incorrect separator to define compression level');
$node->command_fails_like(
[ 'pg_basebackup', '-D', "$tempdir/backup", '--compress', 'none:' ],
qr/\Qpg_basebackup: error: no compression level defined for method none/,
'failure on missing compression level value');
# Some Windows ANSI code pages may reject this filename, in which case we
# quietly proceed without this bit of test coverage.
if (open my $badchars, '>>', "$tempdir/pgdata/FOO\xe0\xe0\xe0BAR")
@ -699,7 +713,7 @@ note "Testing pg_basebackup with compression methods";
# Check ZLIB compression if available.
SKIP:
{
skip "postgres was not built with ZLIB support", 5
skip "postgres was not built with ZLIB support", 7
if (!check_pg_config("#define HAVE_LIBZ 1"));
$node->command_ok(
@ -717,15 +731,28 @@ SKIP:
'--format', 't'
],
'pg_basebackup with --gzip');
$node->command_ok(
[
@pg_basebackup_defs, '-D',
"$tempdir/backup_gzip3", '--compress',
'gzip:1', '--format',
't'
],
'pg_basebackup with --compress=gzip:1');
# Verify that the stored files are generated with their expected
# names.
my @zlib_files = glob "$tempdir/backup_gzip/*.tar.gz";
is(scalar(@zlib_files), 2,
"two files created with --compress (base.tar.gz and pg_wal.tar.gz)");
"two files created with --compress=NUM (base.tar.gz and pg_wal.tar.gz)"
);
my @zlib_files2 = glob "$tempdir/backup_gzip2/*.tar.gz";
is(scalar(@zlib_files2), 2,
"two files created with --gzip (base.tar.gz and pg_wal.tar.gz)");
my @zlib_files3 = glob "$tempdir/backup_gzip3/*.tar.gz";
is(scalar(@zlib_files3), 2,
"two files created with --compress=gzip:NUM (base.tar.gz and pg_wal.tar.gz)"
);
# Check the integrity of the files generated.
my $gzip = $ENV{GZIP_PROGRAM};
@ -735,8 +762,9 @@ SKIP:
|| system_log($gzip, '--version') != 0);
my $gzip_is_valid =
system_log($gzip, '--test', @zlib_files, @zlib_files2);
system_log($gzip, '--test', @zlib_files, @zlib_files2, @zlib_files3);
is($gzip_is_valid, 0, "gzip verified the integrity of compressed data");
rmtree("$tempdir/backup_gzip");
rmtree("$tempdir/backup_gzip2");
rmtree("$tempdir/backup_gzip3");
}