Fix longstanding problems in VACUUM caused by untimely interruptions

In VACUUM FULL, an interrupt after the initial transaction has been recorded
as committed can cause postmaster to restart with the following error message:
PANIC: cannot abort transaction NNNN, it was already committed
This problem has been reported many times.

In lazy VACUUM, an interrupt after the table has been truncated by
lazy_truncate_heap causes other backends' relcache to still point to the
removed pages; this can cause future INSERT and UPDATE queries to error out
with the following error message:
could not read block XX of relation 1663/NNN/MMMM: read only 0 of 8192 bytes
The window to this race condition is extremely narrow, but it has been seen in
the wild involving a cancelled autovacuum process.

The solution for both problems is to inhibit interrupts in both operations
until after the respective transactions have been committed.  It's not a
complete solution, because the transaction could theoretically be aborted by
some other error, but at least fixes the most common causes of both problems.
This commit is contained in:
Alvaro Herrera 2009-11-10 18:00:57 +00:00
parent db925b1a3c
commit cf46701aa3
3 changed files with 53 additions and 13 deletions

View File

@ -13,7 +13,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.342.2.6 2008/09/11 14:01:16 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.342.2.7 2009/11/10 18:00:57 alvherre Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -204,10 +204,10 @@ static List *get_rel_oids(List *relids, const RangeVar *vacrel,
const char *stmttype); const char *stmttype);
static void vac_truncate_clog(TransactionId frozenXID); static void vac_truncate_clog(TransactionId frozenXID);
static void vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind); static void vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind);
static void full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt); static bool full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt);
static void scan_heap(VRelStats *vacrelstats, Relation onerel, static void scan_heap(VRelStats *vacrelstats, Relation onerel,
VacPageList vacuum_pages, VacPageList fraged_pages); VacPageList vacuum_pages, VacPageList fraged_pages);
static void repair_frag(VRelStats *vacrelstats, Relation onerel, static bool repair_frag(VRelStats *vacrelstats, Relation onerel,
VacPageList vacuum_pages, VacPageList fraged_pages, VacPageList vacuum_pages, VacPageList fraged_pages,
int nindexes, Relation *Irel); int nindexes, Relation *Irel);
static void move_chain_tuple(Relation rel, static void move_chain_tuple(Relation rel,
@ -959,6 +959,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
Oid toast_relid; Oid toast_relid;
Oid save_userid; Oid save_userid;
bool save_secdefcxt; bool save_secdefcxt;
bool heldoff;
/* Begin a transaction for vacuuming this relation */ /* Begin a transaction for vacuuming this relation */
StartTransactionCommand(); StartTransactionCommand();
@ -1102,9 +1103,9 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
* Do the actual work --- either FULL or "lazy" vacuum * Do the actual work --- either FULL or "lazy" vacuum
*/ */
if (vacstmt->full) if (vacstmt->full)
full_vacuum_rel(onerel, vacstmt); heldoff = full_vacuum_rel(onerel, vacstmt);
else else
lazy_vacuum_rel(onerel, vacstmt); heldoff = lazy_vacuum_rel(onerel, vacstmt);
StrategyHintVacuum(false); StrategyHintVacuum(false);
@ -1119,6 +1120,10 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
*/ */
CommitTransactionCommand(); CommitTransactionCommand();
/* now we can allow interrupts again, if disabled */
if (heldoff)
RESUME_INTERRUPTS();
/* /*
* If the relation has a secondary toast rel, vacuum that too while we * If the relation has a secondary toast rel, vacuum that too while we
* still hold the session lock on the master table. Note however that * still hold the session lock on the master table. Note however that
@ -1152,8 +1157,11 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
* *
* At entry, we have already established a transaction and opened * At entry, we have already established a transaction and opened
* and locked the relation. * and locked the relation.
*
* The return value indicates whether this function has held off
* interrupts -- caller must RESUME_INTERRUPTS() after commit if true.
*/ */
static void static bool
full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt) full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
{ {
VacPageListData vacuum_pages; /* List of pages to vacuum and/or VacPageListData vacuum_pages; /* List of pages to vacuum and/or
@ -1164,6 +1172,7 @@ full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
int nindexes, int nindexes,
i; i;
VRelStats *vacrelstats; VRelStats *vacrelstats;
bool heldoff = false;
vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared, vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared,
&OldestXmin, &FreezeLimit); &OldestXmin, &FreezeLimit);
@ -1205,8 +1214,8 @@ full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
if (fraged_pages.num_pages > 0) if (fraged_pages.num_pages > 0)
{ {
/* Try to shrink heap */ /* Try to shrink heap */
repair_frag(vacrelstats, onerel, &vacuum_pages, &fraged_pages, heldoff = repair_frag(vacrelstats, onerel, &vacuum_pages, &fraged_pages,
nindexes, Irel); nindexes, Irel);
vac_close_indexes(nindexes, Irel, NoLock); vac_close_indexes(nindexes, Irel, NoLock);
} }
else else
@ -1230,6 +1239,8 @@ full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
/* report results to the stats collector, too */ /* report results to the stats collector, too */
pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared, pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared,
vacstmt->analyze, vacrelstats->rel_tuples); vacstmt->analyze, vacrelstats->rel_tuples);
return heldoff;
} }
@ -1655,8 +1666,11 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
* for them after committing (in hack-manner - without losing locks * for them after committing (in hack-manner - without losing locks
* and freeing memory!) current transaction. It truncates relation * and freeing memory!) current transaction. It truncates relation
* if some end-blocks are gone away. * if some end-blocks are gone away.
*
* The return value indicates whether this function has held off
* interrupts -- caller must RESUME_INTERRUPTS() after commit if true.
*/ */
static void static bool
repair_frag(VRelStats *vacrelstats, Relation onerel, repair_frag(VRelStats *vacrelstats, Relation onerel,
VacPageList vacuum_pages, VacPageList fraged_pages, VacPageList vacuum_pages, VacPageList fraged_pages,
int nindexes, Relation *Irel) int nindexes, Relation *Irel)
@ -1680,6 +1694,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
vacuumed_pages; vacuumed_pages;
int keep_tuples = 0; int keep_tuples = 0;
PGRUsage ru0; PGRUsage ru0;
bool heldoff = false;
pg_rusage_init(&ru0); pg_rusage_init(&ru0);
@ -2380,7 +2395,13 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
* lot of extra code to close and re-open the relation, indexes, etc. * lot of extra code to close and re-open the relation, indexes, etc.
* For now, a quick hack: record status of current transaction as * For now, a quick hack: record status of current transaction as
* committed, and continue. * committed, and continue.
*
* We prevent cancel interrupts after this point to mitigate the
* problem that you can't abort the transaction now; caller is
* responsible for re-enabling them after committing the transaction.
*/ */
HOLD_INTERRUPTS();
heldoff = true;
RecordTransactionCommit(); RecordTransactionCommit();
} }
@ -2570,6 +2591,8 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
pfree(vacrelstats->vtlinks); pfree(vacrelstats->vtlinks);
ExecContext_Finish(&ec); ExecContext_Finish(&ec);
return heldoff;
} }
/* /*

View File

@ -38,7 +38,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.81.2.5 2009/01/06 14:55:50 heikki Exp $ * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.81.2.6 2009/11/10 18:00:57 alvherre Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -141,14 +141,18 @@ static int vac_cmp_page_spaces(const void *left, const void *right);
* *
* At entry, we have already established a transaction and opened * At entry, we have already established a transaction and opened
* and locked the relation. * and locked the relation.
*
* The return value indicates whether this function has held off
* interrupts -- caller must RESUME_INTERRUPTS() after commit if true.
*/ */
void bool
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt) lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
{ {
LVRelStats *vacrelstats; LVRelStats *vacrelstats;
Relation *Irel; Relation *Irel;
int nindexes; int nindexes;
BlockNumber possibly_freeable; BlockNumber possibly_freeable;
bool heldoff = false;
if (vacstmt->verbose) if (vacstmt->verbose)
elevel = INFO; elevel = INFO;
@ -179,12 +183,23 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
* *
* Don't even think about it unless we have a shot at releasing a goodly * Don't even think about it unless we have a shot at releasing a goodly
* number of pages. Otherwise, the time taken isn't worth it. * number of pages. Otherwise, the time taken isn't worth it.
*
*
* Note that after we've truncated the heap, it's too late to abort the
* transaction; doing so would lose the sinval messages needed to tell
* the other backends about the table being shrunk. We prevent interrupts
* in that case; caller is responsible for re-enabling them after
* committing the transaction.
*/ */
possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages; possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
if (possibly_freeable > 0 && if (possibly_freeable > 0 &&
(possibly_freeable >= REL_TRUNCATE_MINIMUM || (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)) possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
{
HOLD_INTERRUPTS();
heldoff = true;
lazy_truncate_heap(onerel, vacrelstats); lazy_truncate_heap(onerel, vacrelstats);
}
/* Update shared free space map with final free space info */ /* Update shared free space map with final free space info */
lazy_update_fsm(onerel, vacrelstats); lazy_update_fsm(onerel, vacrelstats);
@ -199,6 +214,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
/* report results to the stats collector, too */ /* report results to the stats collector, too */
pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared, pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared,
vacstmt->analyze, vacrelstats->rel_tuples); vacstmt->analyze, vacrelstats->rel_tuples);
return heldoff;
} }

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.68 2006/11/05 22:42:10 tgl Exp $ * $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.68.2.1 2009/11/10 18:00:57 alvherre Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -127,7 +127,7 @@ extern bool vac_is_partial_index(Relation indrel);
extern void vacuum_delay_point(void); extern void vacuum_delay_point(void);
/* in commands/vacuumlazy.c */ /* in commands/vacuumlazy.c */
extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt); extern bool lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt);
/* in commands/analyze.c */ /* in commands/analyze.c */
extern void analyze_rel(Oid relid, VacuumStmt *vacstmt); extern void analyze_rel(Oid relid, VacuumStmt *vacstmt);