mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-21 08:29:39 +08:00
Don't error out if recycling or removing an old WAL segment fails at the end
of checkpoint. Although the checkpoint has been written to WAL at that point already, so that all data is safe, and we'll retry removing the WAL segment at the next checkpoint, if such a failure persists we won't be able to remove any other old WAL segments either and will eventually run out of disk space. It's better to treat the failure as non-fatal, and move on to clean any other WAL segment and continue with any other end-of-checkpoint cleanup. We don't normally expect any such failures, but on Windows it can happen with some anti-virus or backup software that lock files without FILE_SHARE_DELETE flag. Also, the loop in pgrename() to retry when the file is locked was broken. If a file is locked on Windows, you get ERROR_SHARE_VIOLATION, not ERROR_ACCESS_DENIED, at least on modern versions. Fix that, although I left the check for ERROR_ACCESS_DENIED in there as well (presumably it was correct in some environment), and added ERROR_LOCK_VIOLATION to be consistent with similar checks in pgwin32_open(). Reduce the timeout on the loop from 30s to 10s, on the grounds that since it's been broken, we've effectively had a timeout of 0s and no-one has complained, so a smaller timeout is actually closer to the old behavior. A longer timeout would mean that if recycling a WAL file fails because it's locked for some reason, InstallXLogFileSegment() will hold ControlFileLock for longer, potentially blocking other backends, so a long timeout isn't totally harmless. While we're at it, set errno correctly in pgrename(). Backpatch to 8.2, which is the oldest version supported on Windows. The xlog.c changes would make sense on other platforms and thus on older versions as well, but since there's no such locking issues on other platforms, it's not worth it.
This commit is contained in:
parent
fbee791d05
commit
227771d10e
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.258.2.6 2009/09/10 09:43:32 heikki Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.258.2.7 2009/09/13 18:32:34 heikki Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -1996,7 +1996,11 @@ XLogFileInit(uint32 log, uint32 seg,
|
||||
*use_existent, &max_advance,
|
||||
use_lock))
|
||||
{
|
||||
/* No need for any more future segments... */
|
||||
/*
|
||||
* No need for any more future segments, or InstallXLogFileSegment()
|
||||
* failed to rename the file into place. If the rename failed, opening
|
||||
* the file below will fail.
|
||||
*/
|
||||
unlink(tmppath);
|
||||
}
|
||||
|
||||
@ -2141,10 +2145,9 @@ XLogFileCopy(uint32 log, uint32 seg,
|
||||
* place. This should be TRUE except during bootstrap log creation. The
|
||||
* caller must *not* hold the lock at call.
|
||||
*
|
||||
* Returns TRUE if file installed, FALSE if not installed because of
|
||||
* exceeding max_advance limit. On Windows, we also return FALSE if we
|
||||
* can't rename the file into place because someone's got it open.
|
||||
* (Any other kind of failure causes ereport().)
|
||||
* Returns TRUE if the file was installed successfully. FALSE indicates that
|
||||
* max_advance limit was exceeded, or an error occurred while renaming the
|
||||
* file into place.
|
||||
*/
|
||||
static bool
|
||||
InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath,
|
||||
@ -2192,31 +2195,26 @@ InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath,
|
||||
*/
|
||||
#if HAVE_WORKING_LINK
|
||||
if (link(tmppath, path) < 0)
|
||||
ereport(ERROR,
|
||||
{
|
||||
if (use_lock)
|
||||
LWLockRelease(ControlFileLock);
|
||||
ereport(LOG,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not link file \"%s\" to \"%s\" (initialization of log file %u, segment %u): %m",
|
||||
tmppath, path, *log, *seg)));
|
||||
return false;
|
||||
}
|
||||
unlink(tmppath);
|
||||
#else
|
||||
if (rename(tmppath, path) < 0)
|
||||
{
|
||||
#ifdef WIN32
|
||||
#if !defined(__CYGWIN__)
|
||||
if (GetLastError() == ERROR_ACCESS_DENIED)
|
||||
#else
|
||||
if (errno == EACCES)
|
||||
#endif
|
||||
{
|
||||
if (use_lock)
|
||||
LWLockRelease(ControlFileLock);
|
||||
return false;
|
||||
}
|
||||
#endif /* WIN32 */
|
||||
|
||||
ereport(ERROR,
|
||||
if (use_lock)
|
||||
LWLockRelease(ControlFileLock);
|
||||
ereport(LOG,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not rename file \"%s\" to \"%s\" (initialization of log file %u, segment %u): %m",
|
||||
tmppath, path, *log, *seg)));
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2686,19 +2684,25 @@ MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr,
|
||||
*/
|
||||
snprintf(newpath, MAXPGPATH, "%s.deleted", path);
|
||||
if (rename(path, newpath) != 0)
|
||||
ereport(ERROR,
|
||||
{
|
||||
ereport(LOG,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not rename old transaction log file \"%s\"",
|
||||
path)));
|
||||
continue;
|
||||
}
|
||||
rc = unlink(newpath);
|
||||
#else
|
||||
rc = unlink(path);
|
||||
#endif
|
||||
if (rc != 0)
|
||||
ereport(ERROR,
|
||||
{
|
||||
ereport(LOG,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not remove old transaction log file \"%s\": %m",
|
||||
path)));
|
||||
continue;
|
||||
}
|
||||
(*nsegsremoved)++;
|
||||
}
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Win32 (NT, Win2k, XP). replace() doesn't work on Win95/98/Me.
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/port/dirmod.c,v 1.44.2.3 2008/04/12 00:00:08 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/port/dirmod.c,v 1.44.2.4 2009/09/13 18:32:35 heikki Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -117,10 +117,11 @@ pgrename(const char *from, const char *to)
|
||||
int loops = 0;
|
||||
|
||||
/*
|
||||
* We need to loop because even though PostgreSQL uses flags that
|
||||
* allow rename while the file is open, other applications might have
|
||||
* the file open without those flags. However, we won't wait
|
||||
* indefinitely for someone else to close the file.
|
||||
* We need to loop because even though PostgreSQL uses flags that allow
|
||||
* rename while the file is open, other applications might have the file
|
||||
* open without those flags. However, we won't wait indefinitely for
|
||||
* someone else to close the file, as the caller might be holding locks
|
||||
* and blocking other backends.
|
||||
*/
|
||||
#if defined(WIN32) && !defined(__CYGWIN__)
|
||||
while (!MoveFileEx(from, to, MOVEFILE_REPLACE_EXISTING))
|
||||
@ -129,13 +130,28 @@ pgrename(const char *from, const char *to)
|
||||
#endif
|
||||
{
|
||||
#if defined(WIN32) && !defined(__CYGWIN__)
|
||||
if (GetLastError() != ERROR_ACCESS_DENIED)
|
||||
DWORD err = GetLastError();
|
||||
|
||||
_dosmaperr(err);
|
||||
|
||||
/*
|
||||
* Modern NT-based Windows versions return ERROR_SHARING_VIOLATION
|
||||
* if another process has the file open without FILE_SHARE_DELETE.
|
||||
* ERROR_LOCK_VIOLATION has also been seen with some anti-virus
|
||||
* software. This used to check for just ERROR_ACCESS_DENIED, so
|
||||
* presumably you can get that too with some OS versions. We don't
|
||||
* expect real permission errors where we currently use rename().
|
||||
*/
|
||||
if (err != ERROR_ACCESS_DENIED &&
|
||||
err != ERROR_SHARING_VIOLATION &&
|
||||
err != ERROR_LOCK_VIOLATION)
|
||||
return -1;
|
||||
#else
|
||||
if (errno != EACCES)
|
||||
#endif
|
||||
/* set errno? */
|
||||
return -1;
|
||||
if (++loops > 300) /* time out after 30 sec */
|
||||
#endif
|
||||
|
||||
if (++loops > 100) /* time out after 10 sec */
|
||||
return -1;
|
||||
pg_usleep(100000); /* us */
|
||||
}
|
||||
@ -152,17 +168,17 @@ pgunlink(const char *path)
|
||||
int loops = 0;
|
||||
|
||||
/*
|
||||
* We need to loop because even though PostgreSQL uses flags that
|
||||
* allow unlink while the file is open, other applications might have
|
||||
* the file open without those flags. However, we won't wait
|
||||
* indefinitely for someone else to close the file.
|
||||
* We need to loop because even though PostgreSQL uses flags that allow
|
||||
* unlink while the file is open, other applications might have the file
|
||||
* open without those flags. However, we won't wait indefinitely for
|
||||
* someone else to close the file, as the caller might be holding locks
|
||||
* and blocking other backends.
|
||||
*/
|
||||
while (unlink(path))
|
||||
{
|
||||
if (errno != EACCES)
|
||||
/* set errno? */
|
||||
return -1;
|
||||
if (++loops > 300) /* time out after 30 sec */
|
||||
if (++loops > 100) /* time out after 10 sec */
|
||||
return -1;
|
||||
pg_usleep(100000); /* us */
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user