diff --git a/configure b/configure
index 9049ad015e..78a10c8c88 100755
--- a/configure
+++ b/configure
@@ -19254,7 +19254,8 @@ fi
-for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit memmove poll pstat readlink setproctitle setsid sigprocmask symlink towlower utime utimes wcstombs wcstombs_l
+
+for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
do
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
{ $as_echo "$as_me:$LINENO: checking for $ac_func" >&5
diff --git a/configure.in b/configure.in
index a362cfdf5e..8ed09f58ab 100644
--- a/configure.in
+++ b/configure.in
@@ -1207,7 +1207,7 @@ PGAC_VAR_INT_TIMEZONE
AC_FUNC_ACCEPT_ARGTYPES
PGAC_FUNC_GETTIMEOFDAY_1ARG
-AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit memmove poll pstat readlink setproctitle setsid sigprocmask symlink towlower utime utimes wcstombs wcstombs_l])
+AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
AC_REPLACE_FUNCS(fseeko)
case $host_os in
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index 11484468dd..08ee37e7d8 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -219,6 +219,21 @@ PostgreSQL documentation
+
+
+
+
+
+ By default, initdb will wait for all files to be
+ written safely to disk. This option causes initdb
+ to return without waiting, which is faster, but means that a
+ subsequent operating system crash can leave the data directory
+ corrupt. Generally, this option is useful for testing, but should not
+ be used when creating a production installation.
+
+
+
+
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index f79f4c6a36..9724f481dc 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -336,12 +336,15 @@ pg_fdatasync(int fd)
/*
* pg_flush_data --- advise OS that the data described won't be needed soon
*
- * Not all platforms have posix_fadvise; treat as noop if not available.
+ * Not all platforms have sync_file_range or posix_fadvise; treat as no-op
+ * if not available.
*/
int
pg_flush_data(int fd, off_t offset, off_t amount)
{
-#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
+#if defined(HAVE_SYNC_FILE_RANGE)
+ return sync_file_range(fd, offset, amount, SYNC_FILE_RANGE_WRITE);
+#elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
return posix_fadvise(fd, offset, amount, POSIX_FADV_DONTNEED);
#else
return 0;
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 982d864cb3..4292231d0d 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -49,6 +49,7 @@
#include "postgres_fe.h"
#include
+#include
#include
#include
#include
@@ -116,6 +117,7 @@ static const char *authmethodhost = "";
static const char *authmethodlocal = "";
static bool debug = false;
static bool noclean = false;
+static bool do_sync = true;
static bool show_setting = false;
static char *xlog_dir = "";
@@ -160,6 +162,9 @@ static char *authwarning = NULL;
/*
* Centralized knowledge of switches to pass to backend
*
+ * Note: we run the backend with -F (fsync disabled) and then do a single
+ * pass of fsync'ing at the end. This is faster than fsync'ing each step.
+ *
* Note: in the shell-script version, we also passed PGDATA as a -D switch,
* but here it is more convenient to pass it as an environment variable
* (no quoting to worry about).
@@ -182,6 +187,9 @@ static char **filter_lines_with_token(char **lines, const char *token);
#endif
static char **readfile(const char *path);
static void writefile(char *path, char **lines);
+static void walkdir(char *path, void (*action)(char *fname, bool isdir));
+static void pre_sync_fname(char *fname, bool isdir);
+static void fsync_fname(char *fname, bool isdir);
static FILE *popen_check(const char *command, const char *mode);
static void exit_nicely(void);
static char *get_id(void);
@@ -209,6 +217,7 @@ static void load_plpgsql(void);
static void vacuum_db(void);
static void make_template0(void);
static void make_postgres(void);
+static void perform_fsync(void);
static void trapsig(int signum);
static void check_ok(void);
static char *escape_quotes(const char *src);
@@ -489,6 +498,174 @@ writefile(char *path, char **lines)
}
}
+/*
+ * walkdir: recursively walk a directory, applying the action to each
+ * regular file and directory (including the named directory itself).
+ *
+ * Adapted from copydir() in copydir.c.
+ */
+static void
+walkdir(char *path, void (*action) (char *fname, bool isdir))
+{
+ DIR *dir;
+ struct dirent *direntry;
+ char subpath[MAXPGPATH];
+
+ dir = opendir(path);
+ if (dir == NULL)
+ {
+ fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
+ progname, path, strerror(errno));
+ exit_nicely();
+ }
+
+ while (errno = 0, (direntry = readdir(dir)) != NULL)
+ {
+ struct stat fst;
+
+ if (strcmp(direntry->d_name, ".") == 0 ||
+ strcmp(direntry->d_name, "..") == 0)
+ continue;
+
+ snprintf(subpath, MAXPGPATH, "%s/%s", path, direntry->d_name);
+
+ if (lstat(subpath, &fst) < 0)
+ {
+ fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"),
+ progname, subpath, strerror(errno));
+ exit_nicely();
+ }
+
+ if (S_ISDIR(fst.st_mode))
+ walkdir(subpath, action);
+ else if (S_ISREG(fst.st_mode))
+ (*action) (subpath, false);
+ }
+
+#ifdef WIN32
+ /*
+ * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in
+ * released version
+ */
+ if (GetLastError() == ERROR_NO_MORE_FILES)
+ errno = 0;
+#endif
+
+ if (errno)
+ {
+ fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
+ progname, path, strerror(errno));
+ exit_nicely();
+ }
+
+ closedir(dir);
+
+ /*
+ * It's important to fsync the destination directory itself as individual
+ * file fsyncs don't guarantee that the directory entry for the file is
+ * synced. Recent versions of ext4 have made the window much wider but
+ * it's been an issue for ext3 and other filesystems in the past.
+ */
+ (*action) (path, true);
+}
+
+/*
+ * Hint to the OS that it should get ready to fsync() this file.
+ */
+static void
+pre_sync_fname(char *fname, bool isdir)
+{
+#if defined(HAVE_SYNC_FILE_RANGE) || \
+ (defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED))
+ int fd;
+
+ fd = open(fname, O_RDONLY | PG_BINARY);
+
+ /*
+ * Some OSs don't allow us to open directories at all (Windows returns
+ * EACCES)
+ */
+ if (fd < 0 && isdir && (errno == EISDIR || errno == EACCES))
+ return;
+
+ if (fd < 0)
+ {
+ fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
+ progname, fname, strerror(errno));
+ exit_nicely();
+ }
+
+ /*
+ * Prefer sync_file_range, else use posix_fadvise. We ignore any error
+ * here since this operation is only a hint anyway.
+ */
+#if defined(HAVE_SYNC_FILE_RANGE)
+ sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE);
+#elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
+ posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
+#endif
+
+ close(fd);
+#endif
+}
+
+/*
+ * fsync a file or directory
+ *
+ * Try to fsync directories but ignore errors that indicate the OS
+ * just doesn't allow/require fsyncing directories.
+ *
+ * Adapted from fsync_fname() in copydir.c.
+ */
+static void
+fsync_fname(char *fname, bool isdir)
+{
+ int fd;
+ int returncode;
+
+ /*
+ * Some OSs require directories to be opened read-only whereas other
+ * systems don't allow us to fsync files opened read-only; so we need both
+ * cases here
+ */
+ if (!isdir)
+ fd = open(fname, O_RDWR | PG_BINARY);
+ else
+ fd = open(fname, O_RDONLY | PG_BINARY);
+
+ /*
+ * Some OSs don't allow us to open directories at all (Windows returns
+ * EACCES)
+ */
+ if (fd < 0 && isdir && (errno == EISDIR || errno == EACCES))
+ return;
+
+ else if (fd < 0)
+ {
+ fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
+ progname, fname, strerror(errno));
+ exit_nicely();
+ }
+
+ returncode = fsync(fd);
+
+ /* Some OSs don't allow us to fsync directories at all */
+ if (returncode != 0 && isdir && errno == EBADF)
+ {
+ close(fd);
+ return;
+ }
+
+ if (returncode != 0)
+ {
+ fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
+ progname, fname, strerror(errno));
+ exit_nicely();
+ }
+
+ close(fd);
+}
+
/*
* Open a subcommand with suitable error messaging
*/
@@ -2092,6 +2269,47 @@ make_postgres(void)
check_ok();
}
+/*
+ * fsync everything down to disk
+ */
+static void
+perform_fsync(void)
+{
+ char pdir[MAXPGPATH];
+
+ fputs(_("syncing data to disk ... "), stdout);
+ fflush(stdout);
+
+ /*
+ * We need to name the parent of PGDATA. get_parent_directory() isn't
+ * enough here, because it can result in an empty string.
+ */
+ snprintf(pdir, MAXPGPATH, "%s/..", pg_data);
+ canonicalize_path(pdir);
+
+ /*
+ * Hint to the OS so that we're going to fsync each of these files soon.
+ */
+
+ /* first the parent of the PGDATA directory */
+ pre_sync_fname(pdir, true);
+
+ /* then recursively through the directory */
+ walkdir(pg_data, pre_sync_fname);
+
+ /*
+ * Now, do the fsync()s in the same order.
+ */
+
+ /* first the parent of the PGDATA directory */
+ fsync_fname(pdir, true);
+
+ /* then recursively through the directory */
+ walkdir(pg_data, fsync_fname);
+
+ check_ok();
+}
+
/*
* signal handler in case we are interrupted.
@@ -2532,6 +2750,7 @@ usage(const char *progname)
printf(_(" -d, --debug generate lots of debugging output\n"));
printf(_(" -L DIRECTORY where to find the input files\n"));
printf(_(" -n, --noclean do not clean up after errors\n"));
+ printf(_(" -N, --nosync do not wait for changes to be written safely to disk\n"));
printf(_(" -s, --show show internal settings\n"));
printf(_("\nOther options:\n"));
printf(_(" -V, --version output version information, then exit\n"));
@@ -2621,6 +2840,7 @@ main(int argc, char *argv[])
{"debug", no_argument, NULL, 'd'},
{"show", no_argument, NULL, 's'},
{"noclean", no_argument, NULL, 'n'},
+ {"nosync", no_argument, NULL, 'N'},
{"xlogdir", required_argument, NULL, 'X'},
{NULL, 0, NULL, 0}
};
@@ -2676,7 +2896,7 @@ main(int argc, char *argv[])
/* process command-line options */
- while ((c = getopt_long(argc, argv, "dD:E:L:nU:WA:sT:X:", long_options, &option_index)) != -1)
+ while ((c = getopt_long(argc, argv, "dD:E:L:nNU:WA:sT:X:", long_options, &option_index)) != -1)
{
switch (c)
{
@@ -2719,6 +2939,9 @@ main(int argc, char *argv[])
noclean = true;
printf(_("Running in noclean mode. Mistakes will not be cleaned up.\n"));
break;
+ case 'N':
+ do_sync = false;
+ break;
case 'L':
share_path = xstrdup(optarg);
break;
@@ -3310,6 +3533,11 @@ main(int argc, char *argv[])
make_postgres();
+ if (do_sync)
+ perform_fsync();
+ else
+ printf(_("\nSync to disk skipped.\nThe data directory might become corrupt if the operating system crashes.\n"));
+
if (authwarning != NULL)
fprintf(stderr, "%s", authwarning);
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 6521c6d5b9..915c318bd2 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -511,6 +511,9 @@
/* Define to 1 if you have the `symlink' function. */
#undef HAVE_SYMLINK
+/* Define to 1 if you have the `sync_file_range' function. */
+#undef HAVE_SYNC_FILE_RANGE
+
/* Define to 1 if you have the syslog interface. */
#undef HAVE_SYSLOG
diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32
index 8c232f67a0..e6fc482534 100644
--- a/src/include/pg_config.h.win32
+++ b/src/include/pg_config.h.win32
@@ -420,6 +420,9 @@
/* Define to 1 if you have the `symlink' function. */
#define HAVE_SYMLINK 1
+/* Define to 1 if you have the `sync_file_range' function. */
+/* #undef HAVE_SYNC_FILE_RANGE */
+
/* Define to 1 if you have the `sysconf' function. */
/* #undef HAVE_SYSCONF */