mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-24 18:55:04 +08:00
Treat directory open failures as hard errors in ResetUnloggedRelations().
Previously, this code just reported such problems at LOG level and kept going. The problem with this approach is that transient failures (e.g., ENFILE) could prevent us from resetting unlogged relations to empty, yet allow recovery to appear to complete successfully. That seems like a data corruption hazard large enough to treat such problems as reasons to fail startup. For the same reason, treat unlink failures for unlogged files as hard errors not just LOG messages. It's a little odd that we did it like that when file-level errors in other steps (copy_file, fsync_fname) are ERRORs. The sole case that I left alone is that ENOENT failure on a tablespace (not database) directory is not an error, though it will now be logged rather than just silently ignored. This is to cover the scenario where a previous DROP TABLESPACE removed the tablespace directory but failed before removing the pg_tblspc symlink. I'm not sure that that's very likely in practice, but that seems like the only real excuse for the old behavior here, so let's allow for it. (As coded, this will also allow ENOENT on $PGDATA/base/. But since we'll fail soon enough if that's gone, I don't think we need to complicate this code by distinguishing that from a true tablespace case.) Discussion: https://postgr.es/m/21040.1512418508@sss.pgh.pa.us
This commit is contained in:
parent
e7cfb26fbc
commit
8dc3c971a9
@ -98,7 +98,9 @@ ResetUnloggedRelations(int op)
|
||||
MemoryContextDelete(tmpctx);
|
||||
}
|
||||
|
||||
/* Process one tablespace directory for ResetUnloggedRelations */
|
||||
/*
|
||||
* Process one tablespace directory for ResetUnloggedRelations
|
||||
*/
|
||||
static void
|
||||
ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
|
||||
{
|
||||
@ -107,29 +109,32 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
|
||||
char dbspace_path[MAXPGPATH * 2];
|
||||
|
||||
ts_dir = AllocateDir(tsdirname);
|
||||
if (ts_dir == NULL)
|
||||
|
||||
/*
|
||||
* If we get ENOENT on a tablespace directory, log it and return. This
|
||||
* can happen if a previous DROP TABLESPACE crashed between removing the
|
||||
* tablespace directory and removing the symlink in pg_tblspc. We don't
|
||||
* really want to prevent database startup in that scenario, so let it
|
||||
* pass instead. Any other type of error will be reported by ReadDir
|
||||
* (causing a startup failure).
|
||||
*/
|
||||
if (ts_dir == NULL && errno == ENOENT)
|
||||
{
|
||||
/* anything except ENOENT is fishy */
|
||||
if (errno != ENOENT)
|
||||
ereport(LOG,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open directory \"%s\": %m",
|
||||
tsdirname)));
|
||||
ereport(LOG,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open directory \"%s\": %m",
|
||||
tsdirname)));
|
||||
return;
|
||||
}
|
||||
|
||||
while ((de = ReadDir(ts_dir, tsdirname)) != NULL)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
/*
|
||||
* We're only interested in the per-database directories, which have
|
||||
* numeric names. Note that this code will also (properly) ignore "."
|
||||
* and "..".
|
||||
*/
|
||||
while (isdigit((unsigned char) de->d_name[i]))
|
||||
++i;
|
||||
if (de->d_name[i] != '\0' || i == 0)
|
||||
if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
|
||||
continue;
|
||||
|
||||
snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
|
||||
@ -140,7 +145,9 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
|
||||
FreeDir(ts_dir);
|
||||
}
|
||||
|
||||
/* Process one per-dbspace directory for ResetUnloggedRelations */
|
||||
/*
|
||||
* Process one per-dbspace directory for ResetUnloggedRelations
|
||||
*/
|
||||
static void
|
||||
ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
|
||||
{
|
||||
@ -158,20 +165,9 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
|
||||
*/
|
||||
if ((op & UNLOGGED_RELATION_CLEANUP) != 0)
|
||||
{
|
||||
HTAB *hash = NULL;
|
||||
HTAB *hash;
|
||||
HASHCTL ctl;
|
||||
|
||||
/* Open the directory. */
|
||||
dbspace_dir = AllocateDir(dbspacedirname);
|
||||
if (dbspace_dir == NULL)
|
||||
{
|
||||
ereport(LOG,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open directory \"%s\": %m",
|
||||
dbspacedirname)));
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* It's possible that someone could create a ton of unlogged relations
|
||||
* in the same database & tablespace, so we'd better use a hash table
|
||||
@ -179,11 +175,13 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
|
||||
* need to be reset. Otherwise, this cleanup operation would be
|
||||
* O(n^2).
|
||||
*/
|
||||
memset(&ctl, 0, sizeof(ctl));
|
||||
ctl.keysize = sizeof(unlogged_relation_entry);
|
||||
ctl.entrysize = sizeof(unlogged_relation_entry);
|
||||
hash = hash_create("unlogged hash", 32, &ctl, HASH_ELEM);
|
||||
|
||||
/* Scan the directory. */
|
||||
dbspace_dir = AllocateDir(dbspacedirname);
|
||||
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
|
||||
{
|
||||
ForkNumber forkNum;
|
||||
@ -222,21 +220,9 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
|
||||
}
|
||||
|
||||
/*
|
||||
* Now, make a second pass and remove anything that matches. First,
|
||||
* reopen the directory.
|
||||
* Now, make a second pass and remove anything that matches.
|
||||
*/
|
||||
dbspace_dir = AllocateDir(dbspacedirname);
|
||||
if (dbspace_dir == NULL)
|
||||
{
|
||||
ereport(LOG,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open directory \"%s\": %m",
|
||||
dbspacedirname)));
|
||||
hash_destroy(hash);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Scan the directory. */
|
||||
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
|
||||
{
|
||||
ForkNumber forkNum;
|
||||
@ -266,15 +252,11 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
|
||||
{
|
||||
snprintf(rm_path, sizeof(rm_path), "%s/%s",
|
||||
dbspacedirname, de->d_name);
|
||||
|
||||
/*
|
||||
* It's tempting to actually throw an error here, but since
|
||||
* this code gets run during database startup, that could
|
||||
* result in the database failing to start. (XXX Should we do
|
||||
* it anyway?)
|
||||
*/
|
||||
if (unlink(rm_path))
|
||||
elog(LOG, "could not unlink file \"%s\": %m", rm_path);
|
||||
if (unlink(rm_path) < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not remove file \"%s\": %m",
|
||||
rm_path)));
|
||||
else
|
||||
elog(DEBUG2, "unlinked file \"%s\"", rm_path);
|
||||
}
|
||||
@ -294,19 +276,8 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
|
||||
*/
|
||||
if ((op & UNLOGGED_RELATION_INIT) != 0)
|
||||
{
|
||||
/* Open the directory. */
|
||||
dbspace_dir = AllocateDir(dbspacedirname);
|
||||
if (dbspace_dir == NULL)
|
||||
{
|
||||
/* we just saw this directory, so it really ought to be there */
|
||||
ereport(LOG,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open directory \"%s\": %m",
|
||||
dbspacedirname)));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Scan the directory. */
|
||||
dbspace_dir = AllocateDir(dbspacedirname);
|
||||
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
|
||||
{
|
||||
ForkNumber forkNum;
|
||||
@ -350,16 +321,6 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
|
||||
* (especially the metadata ones) at once.
|
||||
*/
|
||||
dbspace_dir = AllocateDir(dbspacedirname);
|
||||
if (dbspace_dir == NULL)
|
||||
{
|
||||
/* we just saw this directory, so it really ought to be there */
|
||||
ereport(LOG,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open directory \"%s\": %m",
|
||||
dbspacedirname)));
|
||||
return;
|
||||
}
|
||||
|
||||
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
|
||||
{
|
||||
ForkNumber forkNum;
|
||||
@ -388,6 +349,14 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
|
||||
|
||||
FreeDir(dbspace_dir);
|
||||
|
||||
/*
|
||||
* Lastly, fsync the database directory itself, ensuring the
|
||||
* filesystem remembers the file creations and deletions we've done.
|
||||
* We don't bother with this during a call that does only
|
||||
* UNLOGGED_RELATION_CLEANUP, because if recovery crashes before we
|
||||
* get to doing UNLOGGED_RELATION_INIT, we'll redo the cleanup step
|
||||
* too at the next startup attempt.
|
||||
*/
|
||||
fsync_fname(dbspacedirname, true);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user