From 78e1220104227c86b4b49d0fc123db7fa596d43d Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Mon, 19 Aug 2013 12:33:07 -0400 Subject: [PATCH] Fix pg_upgrade failure from servers older than 9.3 When upgrading from servers of versions 9.2 and older, and MultiXactIds have been used in the old server beyond the first page (that is, 2048 multis or more in the default 8kB-page build), pg_upgrade would set the next multixact offset to use beyond what has been allocated in the new cluster. This would cause a failure the first time the new cluster needs to use this value, because the pg_multixact/offsets/ file wouldn't exist or wouldn't be large enough. To fix, ensure that the transient server instances launched by pg_upgrade extend the file as necessary. Per report from Jesse Denardo in CANiVXAj4c88YqipsyFQPboqMudnjcNTdB3pqe8ReXqAFQ=HXyA@mail.gmail.com --- src/backend/access/transam/multixact.c | 47 ++++++++++++++++++++++++++ src/backend/access/transam/slru.c | 44 ++++++++++++++++++++++++ src/include/access/slru.h | 1 + 3 files changed, 92 insertions(+) diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index b553518bab..745b1f1d89 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -1722,6 +1722,46 @@ ZeroMultiXactMemberPage(int pageno, bool writeXlog) return slotno; } +/* + * MaybeExtendOffsetSlru + * Extend the offsets SLRU area, if necessary + * + * After a binary upgrade from <= 9.2, the pg_multixact/offset SLRU area might + * contain files that are shorter than necessary; this would occur if the old + * installation had used multixacts beyond the first page (files cannot be + * copied, because the on-disk representation is different). pg_upgrade would + * update pg_control to set the next offset value to be at that position, so + * that tuples marked as locked by such MultiXacts would be seen as visible + * without having to consult multixact. However, trying to create and use a + * new MultiXactId would result in an error because the page on which the new + * value would reside does not exist. This routine is in charge of creating + * such pages. + */ +static void +MaybeExtendOffsetSlru(void) +{ + int pageno; + + pageno = MultiXactIdToOffsetPage(MultiXactState->nextMXact); + + LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE); + + if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno)) + { + int slotno; + + /* + * Fortunately for us, SimpleLruWritePage is already prepared to deal + * with creating a new segment file even if the page we're writing is + * not the first in it, so this is enough. + */ + slotno = ZeroMultiXactOffsetPage(pageno, false); + SimpleLruWritePage(MultiXactOffsetCtl, slotno); + } + + LWLockRelease(MultiXactOffsetControlLock); +} + /* * This must be called ONCE during postmaster or standalone-backend startup. * @@ -1742,6 +1782,13 @@ StartupMultiXact(void) int entryno; int flagsoff; + /* + * During a binary upgrade, make sure that the offsets SLRU is large + * enough to contain the next value that would be created. + */ + if (IsBinaryUpgrade) + MaybeExtendOffsetSlru(); + /* Clean up offsets state */ LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE); diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 5a8f654fb7..5e53593a8f 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -563,6 +563,50 @@ SimpleLruWritePage(SlruCtl ctl, int slotno) SlruInternalWritePage(ctl, slotno, NULL); } +/* + * Return whether the given page exists on disk. + * + * A false return means that either the file does not exist, or that it's not + * large enough to contain the given page. + */ +bool +SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno) +{ + int segno = pageno / SLRU_PAGES_PER_SEGMENT; + int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; + int offset = rpageno * BLCKSZ; + char path[MAXPGPATH]; + int fd; + bool result; + off_t endpos; + + SlruFileName(ctl, path, segno); + + fd = OpenTransientFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); + if (fd < 0) + { + /* expected: file doesn't exist */ + if (errno == ENOENT) + return false; + + /* report error normally */ + slru_errcause = SLRU_OPEN_FAILED; + slru_errno = errno; + SlruReportIOError(ctl, pageno, 0); + } + + if ((endpos = lseek(fd, 0, SEEK_END)) < 0) + { + slru_errcause = SLRU_OPEN_FAILED; + slru_errno = errno; + SlruReportIOError(ctl, pageno, 0); + } + + result = endpos >= (off_t) (offset + BLCKSZ); + + CloseTransientFile(fd); + return result; +} /* * Physical read of a (previously existing) page into a buffer slot diff --git a/src/include/access/slru.h b/src/include/access/slru.h index 29ae9e0e5c..7e81e0f113 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -145,6 +145,7 @@ extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, extern void SimpleLruWritePage(SlruCtl ctl, int slotno); extern void SimpleLruFlush(SlruCtl ctl, bool checkpoint); extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage); +extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno); typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int segpage, void *data);