From 5c649fe153367cdab278738ee4aebbfd158e0546 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 21 Jan 2022 11:08:43 +0900 Subject: [PATCH] Extend the options of pg_basebackup to control compression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The option --compress is extended to accept a compression method and an optional compression level, as of the grammar METHOD[:LEVEL]. The methods currently support are "none" and "gzip", for client-side compression. Any of those methods use only an integer value for the compression level, but any method implemented in the future could use more specific keywords if necessary. This commit keeps the logic backward-compatible. Hence, the following compatibility rules apply for the new format of the option --compress: * -z/--gzip is a synonym of --compress=gzip. * --compress=NUM implies: ** --compress=none if NUM = 0. ** --compress=gzip:NUM if NUM > 0. Note that there are also plans to extend more this grammar with server-side compression. Reviewed-by: Robert Haas, Magnus Hagander, Álvaro Herrera, David G. Johnston, Georgios Kokolatos Discussion: https://postgr.es/m/Yb3GEgWwcu4wZDuA@paquier.xyz --- doc/src/sgml/ref/pg_basebackup.sgml | 21 ++- src/bin/pg_basebackup/pg_basebackup.c | 146 ++++++++++++++++--- src/bin/pg_basebackup/t/010_pg_basebackup.pl | 36 ++++- 3 files changed, 179 insertions(+), 24 deletions(-) diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index 8422cd4304..47d11289be 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -398,15 +398,24 @@ PostgreSQL documentation + [:level] + [:level] - Enables gzip compression of tar file output, and specifies the + Enables compression of tar file output, and specifies the compression level (0 through 9, 0 being no compression and 9 being best compression). Compression is only available when using the tar format, and the suffix .gz will automatically be added to all tar filenames. + + The compression method can be set to either gzip + for compression with gzip, or + none for no compression. A compression level + can be optionally specified, by appending the level number after a + colon (:). + @@ -942,6 +951,16 @@ PostgreSQL documentation $ pg_basebackup -D backup/data -T /opt/ts=$(pwd)/backup/ts + + + To create a backup of a local server with one tar file for each tablespace + compressed with gzip at level 9, stored in the + directory backup: + +$ pg_basebackup -D backup -Ft --compress=gzip:9 + + + diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index ec3b4f3c17..d5b0ade10d 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -123,6 +123,7 @@ static bool showprogress = false; static bool estimatesize = true; static int verbose = 0; static int compresslevel = 0; +static WalCompressionMethod compressmethod = COMPRESSION_NONE; static IncludeWal includewal = STREAM_WAL; static bool fastcheckpoint = false; static bool writerecoveryconf = false; @@ -379,7 +380,8 @@ usage(void) printf(_(" -X, --wal-method=none|fetch|stream\n" " include required WAL files with specified method\n")); printf(_(" -z, --gzip compress tar output\n")); - printf(_(" -Z, --compress=0-9 compress tar output with given compression level\n")); + printf(_(" -Z, --compress={gzip,none}[:LEVEL] or [LEVEL]\n" + " compress tar output with given compression method or level\n")); printf(_("\nGeneral options:\n")); printf(_(" -c, --checkpoint=fast|spread\n" " set fast or spread checkpointing\n")); @@ -544,8 +546,7 @@ LogStreamerMain(logstreamer_param *param) stream.do_sync); else stream.walmethod = CreateWalTarMethod(param->xlog, - (compresslevel != 0) ? - COMPRESSION_GZIP : COMPRESSION_NONE, + compressmethod, compresslevel, stream.do_sync); @@ -936,6 +937,81 @@ parse_max_rate(char *src) return (int32) result; } +/* + * Utility wrapper to parse the values specified for -Z/--compress. + * *methodres and *levelres will be optionally filled with values coming + * from the parsed results. + */ +static void +parse_compress_options(char *src, WalCompressionMethod *methodres, + int *levelres) +{ + char *sep; + int firstlen; + char *firstpart = NULL; + + /* check if the option is split in two */ + sep = strchr(src, ':'); + + /* + * The first part of the option value could be a method name, or just a + * level value. + */ + firstlen = (sep != NULL) ? (sep - src) : strlen(src); + firstpart = pg_malloc(firstlen + 1); + strncpy(firstpart, src, firstlen); + firstpart[firstlen] = '\0'; + + /* + * Check if the first part of the string matches with a supported + * compression method. + */ + if (pg_strcasecmp(firstpart, "gzip") == 0) + *methodres = COMPRESSION_GZIP; + else if (pg_strcasecmp(firstpart, "none") == 0) + *methodres = COMPRESSION_NONE; + else + { + /* + * It does not match anything known, so check for the + * backward-compatible case of only an integer where the implied + * compression method changes depending on the level value. + */ + if (!option_parse_int(firstpart, "-Z/--compress", 0, + INT_MAX, levelres)) + exit(1); + + *methodres = (*levelres > 0) ? + COMPRESSION_GZIP : COMPRESSION_NONE; + return; + } + + if (sep == NULL) + { + /* + * The caller specified a method without a colon separator, so let any + * subsequent checks assign a default level. + */ + return; + } + + /* Check the contents after the colon separator. */ + sep++; + if (*sep == '\0') + { + pg_log_error("no compression level defined for method %s", firstpart); + exit(1); + } + + /* + * For any of the methods currently supported, the data after the + * separator can just be an integer. + */ + if (!option_parse_int(sep, "-Z/--compress", 0, INT_MAX, + levelres)) + exit(1); +} + /* * Read a stream of COPY data and invoke the provided callback for each * chunk. @@ -996,7 +1072,7 @@ CreateBackupStreamer(char *archive_name, char *spclocation, bool is_recovery_guc_supported, bool expect_unterminated_tarfile) { - bbstreamer *streamer; + bbstreamer *streamer = NULL; bbstreamer *manifest_inject_streamer = NULL; bool inject_manifest; bool must_parse_archive; @@ -1055,19 +1131,22 @@ CreateBackupStreamer(char *archive_name, char *spclocation, archive_file = NULL; } + if (compressmethod == COMPRESSION_NONE) + streamer = bbstreamer_plain_writer_new(archive_filename, + archive_file); #ifdef HAVE_LIBZ - if (compresslevel != 0) + else if (compressmethod == COMPRESSION_GZIP) { strlcat(archive_filename, ".gz", sizeof(archive_filename)); streamer = bbstreamer_gzip_writer_new(archive_filename, archive_file, compresslevel); } - else #endif - streamer = bbstreamer_plain_writer_new(archive_filename, - archive_file); - + else + { + Assert(false); /* not reachable */ + } /* * If we need to parse the archive for whatever reason, then we'll @@ -2279,11 +2358,11 @@ main(int argc, char **argv) #else compresslevel = 1; /* will be rejected below */ #endif + compressmethod = COMPRESSION_GZIP; break; case 'Z': - if (!option_parse_int(optarg, "-Z/--compress", 0, 9, - &compresslevel)) - exit(1); + parse_compress_options(optarg, &compressmethod, + &compresslevel); break; case 'c': if (pg_strcasecmp(optarg, "fast") == 0) @@ -2412,7 +2491,7 @@ main(int argc, char **argv) /* * Compression doesn't make sense unless tar format is in use. */ - if (format == 'p' && compresslevel != 0) + if (format == 'p' && compressmethod != COMPRESSION_NONE) { if (backup_target == NULL) pg_log_error("only tar mode backups can be compressed"); @@ -2516,14 +2595,43 @@ main(int argc, char **argv) } } -#ifndef HAVE_LIBZ - /* Sanity checks for compression level. */ - if (compresslevel != 0) + /* Sanity checks for compression-related options. */ + switch (compressmethod) { - pg_log_error("this build does not support compression"); - exit(1); - } + case COMPRESSION_NONE: + if (compresslevel != 0) + { + pg_log_error("cannot use compression level with method %s", + "none"); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); + exit(1); + } + break; + case COMPRESSION_GZIP: +#ifdef HAVE_LIBZ + if (compresslevel == 0) + { + pg_log_info("no value specified for compression level, switching to default"); + compresslevel = Z_DEFAULT_COMPRESSION; + } + if (compresslevel > 9) + { + pg_log_error("compression level %d of method %s higher than maximum of 9", + compresslevel, "gzip"); + exit(1); + } +#else + pg_log_error("this build does not support compression with %s", + "gzip"); + exit(1); #endif + break; + case COMPRESSION_LZ4: + /* option not supported */ + Assert(false); + break; + } /* * Sanity checks for progress reporting options. diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl index f7e21941eb..95a6bd6778 100644 --- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl +++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl @@ -10,7 +10,7 @@ use File::Path qw(rmtree); use Fcntl qw(:seek); use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; -use Test::More tests => 135; +use Test::More tests => 143; program_help_ok('pg_basebackup'); program_version_ok('pg_basebackup'); @@ -38,6 +38,20 @@ my $pgdata = $node->data_dir; $node->command_fails(['pg_basebackup'], 'pg_basebackup needs target directory specified'); +# Sanity checks for options +$node->command_fails_like( + [ 'pg_basebackup', '-D', "$tempdir/backup", '--compress', 'none:1' ], + qr/\Qpg_basebackup: error: cannot use compression level with method none/, + 'failure if method "none" specified with compression level'); +$node->command_fails_like( + [ 'pg_basebackup', '-D', "$tempdir/backup", '--compress', 'none+' ], + qr/\Qpg_basebackup: error: invalid value "none+" for option/, + 'failure on incorrect separator to define compression level'); +$node->command_fails_like( + [ 'pg_basebackup', '-D', "$tempdir/backup", '--compress', 'none:' ], + qr/\Qpg_basebackup: error: no compression level defined for method none/, + 'failure on missing compression level value'); + # Some Windows ANSI code pages may reject this filename, in which case we # quietly proceed without this bit of test coverage. if (open my $badchars, '>>', "$tempdir/pgdata/FOO\xe0\xe0\xe0BAR") @@ -699,7 +713,7 @@ note "Testing pg_basebackup with compression methods"; # Check ZLIB compression if available. SKIP: { - skip "postgres was not built with ZLIB support", 5 + skip "postgres was not built with ZLIB support", 7 if (!check_pg_config("#define HAVE_LIBZ 1")); $node->command_ok( @@ -717,15 +731,28 @@ SKIP: '--format', 't' ], 'pg_basebackup with --gzip'); + $node->command_ok( + [ + @pg_basebackup_defs, '-D', + "$tempdir/backup_gzip3", '--compress', + 'gzip:1', '--format', + 't' + ], + 'pg_basebackup with --compress=gzip:1'); # Verify that the stored files are generated with their expected # names. my @zlib_files = glob "$tempdir/backup_gzip/*.tar.gz"; is(scalar(@zlib_files), 2, - "two files created with --compress (base.tar.gz and pg_wal.tar.gz)"); + "two files created with --compress=NUM (base.tar.gz and pg_wal.tar.gz)" + ); my @zlib_files2 = glob "$tempdir/backup_gzip2/*.tar.gz"; is(scalar(@zlib_files2), 2, "two files created with --gzip (base.tar.gz and pg_wal.tar.gz)"); + my @zlib_files3 = glob "$tempdir/backup_gzip3/*.tar.gz"; + is(scalar(@zlib_files3), 2, + "two files created with --compress=gzip:NUM (base.tar.gz and pg_wal.tar.gz)" + ); # Check the integrity of the files generated. my $gzip = $ENV{GZIP_PROGRAM}; @@ -735,8 +762,9 @@ SKIP: || system_log($gzip, '--version') != 0); my $gzip_is_valid = - system_log($gzip, '--test', @zlib_files, @zlib_files2); + system_log($gzip, '--test', @zlib_files, @zlib_files2, @zlib_files3); is($gzip_is_valid, 0, "gzip verified the integrity of compressed data"); rmtree("$tempdir/backup_gzip"); rmtree("$tempdir/backup_gzip2"); + rmtree("$tempdir/backup_gzip3"); }