diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 2d33510930..7a007a6ba5 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -577,6 +577,13 @@ ShutdownCLOG(void) /* Flush dirty CLOG pages to disk */ TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(false); SimpleLruFlush(ClogCtl, false); + + /* + * fsync pg_xact to ensure that any files flushed previously are durably + * on disk. + */ + fsync_fname("pg_xact", true); + TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(false); } @@ -589,6 +596,13 @@ CheckPointCLOG(void) /* Flush dirty CLOG pages to disk */ TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true); SimpleLruFlush(ClogCtl, true); + + /* + * fsync pg_xact to ensure that any files flushed previously are durably + * on disk. + */ + fsync_fname("pg_xact", true); + TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true); } diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index 8e1df6e0ea..03ffa20908 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -746,6 +746,12 @@ ShutdownCommitTs(void) { /* Flush dirty CommitTs pages to disk */ SimpleLruFlush(CommitTsCtl, false); + + /* + * fsync pg_commit_ts to ensure that any files flushed previously are durably + * on disk. + */ + fsync_fname("pg_commit_ts", true); } /* @@ -756,6 +762,12 @@ CheckPointCommitTs(void) { /* Flush dirty CommitTs pages to disk */ SimpleLruFlush(CommitTsCtl, true); + + /* + * fsync pg_commit_ts to ensure that any files flushed previously are durably + * on disk. + */ + fsync_fname("pg_commit_ts", true); } /* diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 4b4999fd7b..83169cccc3 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -1650,6 +1650,14 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon) } LWLockRelease(TwoPhaseStateLock); + /* + * Flush unconditionally the parent directory to make any information + * durable on disk. Two-phase files could have been removed and those + * removals need to be made persistent as well as any files newly created + * previously since the last checkpoint. + */ + fsync_fname(TWOPHASE_DIR, true); + TRACE_POSTGRESQL_TWOPHASE_CHECKPOINT_DONE(); if (log_checkpoints && serialized_xacts > 0) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 58790e0e96..61ca81d1d2 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -3475,7 +3475,7 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, if (!find_free) { /* Force installation: get rid of any pre-existing segment file */ - unlink(path); + durable_unlink(path, DEBUG1); } else { @@ -4026,16 +4026,13 @@ RemoveXlogFile(const char *segname, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr) path))); return; } - rc = unlink(newpath); + rc = durable_unlink(newpath, LOG); #else - rc = unlink(path); + rc = durable_unlink(path, LOG); #endif if (rc != 0) { - ereport(LOG, - (errcode_for_file_access(), - errmsg("could not remove old transaction log file \"%s\": %m", - path))); + /* Message already logged by durable_unlink() */ return; } CheckpointStats.ckpt_segs_removed++; @@ -10771,17 +10768,13 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) (errcode_for_file_access(), errmsg("could not read file \"%s\": %m", BACKUP_LABEL_FILE))); - if (unlink(BACKUP_LABEL_FILE) != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not remove file \"%s\": %m", - BACKUP_LABEL_FILE))); + durable_unlink(BACKUP_LABEL_FILE, ERROR); /* * Remove tablespace_map file if present, it is created only if there * are tablespaces. */ - unlink(TABLESPACE_MAP); + durable_unlink(TABLESPACE_MAP, DEBUG1); } PG_END_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive)); } diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index f0ed2e9b5f..b14979496c 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -657,6 +657,43 @@ durable_rename(const char *oldfile, const char *newfile, int elevel) return 0; } +/* + * durable_unlink -- remove a file in a durable manner + * + * This routine ensures that, after returning, the effect of removing file + * persists in case of a crash. A crash while this routine is running will + * leave the system in no mixed state. + * + * It does so by using fsync on the parent directory of the file after the + * actual removal is done. + * + * Log errors with the severity specified by caller. + * + * Returns 0 if the operation succeeded, -1 otherwise. Note that errno is not + * valid upon return. + */ +int +durable_unlink(const char *fname, int elevel) +{ + if (unlink(fname) < 0) + { + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not remove file \"%s\": %m", + fname))); + return -1; + } + + /* + * To guarantee that the removal of the file is persistent, fsync + * its parent directory. + */ + if (fsync_parent_path(fname, elevel) != 0) + return -1; + + return 0; +} + /* * durable_link_or_rename -- rename a file in a durable manner. * diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index ac37502928..05680499e4 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -119,6 +119,7 @@ extern int pg_fdatasync(int fd); extern void pg_flush_data(int fd, off_t offset, off_t amount); extern void fsync_fname(const char *fname, bool isdir); extern int durable_rename(const char *oldfile, const char *newfile, int loglevel); +extern int durable_unlink(const char *fname, int loglevel); extern int durable_link_or_rename(const char *oldfile, const char *newfile, int loglevel); extern void SyncDataDirectory(void);