mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-03-07 19:47:50 +08:00
Fix and enhance the assertion of no palloc's in a critical section.
The assertion failed if WAL_DEBUG or LWLOCK_STATS was enabled; fix that by using separate memory contexts for the allocations made within those code blocks. This patch introduces a mechanism for marking any memory context as allowed in a critical section. Previously ErrorContext was exempt as a special case. Instead of a blanket exception of the checkpointer process, only exempt the memory context used for the pending ops hash table.
This commit is contained in:
parent
a749a23d7a
commit
1c6821be31
@ -60,6 +60,7 @@
|
||||
#include "storage/spin.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/guc.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/ps_status.h"
|
||||
#include "utils/relmapper.h"
|
||||
#include "utils/snapmgr.h"
|
||||
@ -736,6 +737,10 @@ static bool bgwriterLaunched = false;
|
||||
static int MyLockNo = 0;
|
||||
static bool holdingAllLocks = false;
|
||||
|
||||
#ifdef WAL_DEBUG
|
||||
static MemoryContext walDebugCxt = NULL;
|
||||
#endif
|
||||
|
||||
static void readRecoveryCommandFile(void);
|
||||
static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo);
|
||||
static bool recoveryStopsBefore(XLogRecord *record);
|
||||
@ -1258,6 +1263,7 @@ begin:;
|
||||
if (XLOG_DEBUG)
|
||||
{
|
||||
StringInfoData buf;
|
||||
MemoryContext oldCxt = MemoryContextSwitchTo(walDebugCxt);
|
||||
|
||||
initStringInfo(&buf);
|
||||
appendStringInfo(&buf, "INSERT @ %X/%X: ",
|
||||
@ -1282,10 +1288,11 @@ begin:;
|
||||
|
||||
appendStringInfoString(&buf, " - ");
|
||||
RmgrTable[rechdr->xl_rmid].rm_desc(&buf, (XLogRecord *) recordbuf.data);
|
||||
pfree(recordbuf.data);
|
||||
}
|
||||
elog(LOG, "%s", buf.data);
|
||||
pfree(buf.data);
|
||||
|
||||
MemoryContextSwitchTo(oldCxt);
|
||||
MemoryContextReset(walDebugCxt);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -4807,6 +4814,24 @@ XLOGShmemInit(void)
|
||||
char *allocptr;
|
||||
int i;
|
||||
|
||||
#ifdef WAL_DEBUG
|
||||
/*
|
||||
* Create a memory context for WAL debugging that's exempt from the
|
||||
* normal "no pallocs in critical section" rule. Yes, that can lead to a
|
||||
* PANIC if an allocation fails, but wal_debug is not for production use
|
||||
* anyway.
|
||||
*/
|
||||
if (walDebugCxt == NULL)
|
||||
{
|
||||
walDebugCxt = AllocSetContextCreate(TopMemoryContext,
|
||||
"WAL Debug",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
MemoryContextAllowInCriticalSection(walDebugCxt, true);
|
||||
}
|
||||
#endif
|
||||
|
||||
ControlFile = (ControlFileData *)
|
||||
ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
|
||||
XLogCtl = (XLogCtlData *)
|
||||
|
@ -1305,19 +1305,6 @@ AbsorbFsyncRequests(void)
|
||||
if (!AmCheckpointerProcess())
|
||||
return;
|
||||
|
||||
/*
|
||||
* We have to PANIC if we fail to absorb all the pending requests (eg,
|
||||
* because our hashtable runs out of memory). This is because the system
|
||||
* cannot run safely if we are unable to fsync what we have been told to
|
||||
* fsync. Fortunately, the hashtable is so small that the problem is
|
||||
* quite unlikely to arise in practice.
|
||||
*/
|
||||
START_CRIT_SECTION();
|
||||
|
||||
/*
|
||||
* We try to avoid holding the lock for a long time by copying the request
|
||||
* array.
|
||||
*/
|
||||
LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE);
|
||||
|
||||
/* Transfer stats counts into pending pgstats message */
|
||||
@ -1327,12 +1314,25 @@ AbsorbFsyncRequests(void)
|
||||
CheckpointerShmem->num_backend_writes = 0;
|
||||
CheckpointerShmem->num_backend_fsync = 0;
|
||||
|
||||
/*
|
||||
* We try to avoid holding the lock for a long time by copying the request
|
||||
* array, and processing the requests after releasing the lock.
|
||||
*
|
||||
* Once we have cleared the requests from shared memory, we have to PANIC
|
||||
* if we then fail to absorb them (eg, because our hashtable runs out of
|
||||
* memory). This is because the system cannot run safely if we are unable
|
||||
* to fsync what we have been told to fsync. Fortunately, the hashtable
|
||||
* is so small that the problem is quite unlikely to arise in practice.
|
||||
*/
|
||||
n = CheckpointerShmem->num_requests;
|
||||
if (n > 0)
|
||||
{
|
||||
requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest));
|
||||
memcpy(requests, CheckpointerShmem->requests, n * sizeof(CheckpointerRequest));
|
||||
}
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
CheckpointerShmem->num_requests = 0;
|
||||
|
||||
LWLockRelease(CheckpointerCommLock);
|
||||
@ -1340,10 +1340,10 @@ AbsorbFsyncRequests(void)
|
||||
for (request = requests; n > 0; request++, n--)
|
||||
RememberFsyncRequest(request->rnode, request->forknum, request->segno);
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
if (requests)
|
||||
pfree(requests);
|
||||
|
||||
END_CRIT_SECTION();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -104,8 +104,8 @@ typedef struct lwlock_stats
|
||||
int spin_delay_count;
|
||||
} lwlock_stats;
|
||||
|
||||
static int counts_for_pid = 0;
|
||||
static HTAB *lwlock_stats_htab;
|
||||
static lwlock_stats lwlock_stats_dummy;
|
||||
#endif
|
||||
|
||||
#ifdef LOCK_DEBUG
|
||||
@ -142,21 +142,39 @@ static void
|
||||
init_lwlock_stats(void)
|
||||
{
|
||||
HASHCTL ctl;
|
||||
static MemoryContext lwlock_stats_cxt = NULL;
|
||||
static bool exit_registered = false;
|
||||
|
||||
if (lwlock_stats_htab != NULL)
|
||||
{
|
||||
hash_destroy(lwlock_stats_htab);
|
||||
lwlock_stats_htab = NULL;
|
||||
}
|
||||
if (lwlock_stats_cxt != NULL)
|
||||
MemoryContextDelete(lwlock_stats_cxt);
|
||||
|
||||
/*
|
||||
* The LWLock stats will be updated within a critical section, which
|
||||
* requires allocating new hash entries. Allocations within a critical
|
||||
* section are normally not allowed because running out of memory would
|
||||
* lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
|
||||
* turned on in production, so that's an acceptable risk. The hash entries
|
||||
* are small, so the risk of running out of memory is minimal in practice.
|
||||
*/
|
||||
lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
|
||||
"LWLock stats",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
|
||||
|
||||
MemSet(&ctl, 0, sizeof(ctl));
|
||||
ctl.keysize = sizeof(lwlock_stats_key);
|
||||
ctl.entrysize = sizeof(lwlock_stats);
|
||||
ctl.hash = tag_hash;
|
||||
ctl.hcxt = lwlock_stats_cxt;
|
||||
lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
|
||||
HASH_ELEM | HASH_FUNCTION);
|
||||
counts_for_pid = MyProcPid;
|
||||
on_shmem_exit(print_lwlock_stats, 0);
|
||||
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
|
||||
if (!exit_registered)
|
||||
{
|
||||
on_shmem_exit(print_lwlock_stats, 0);
|
||||
exit_registered = true;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@ -190,9 +208,13 @@ get_lwlock_stats_entry(LWLock *lock)
|
||||
lwlock_stats *lwstats;
|
||||
bool found;
|
||||
|
||||
/* Set up local count state first time through in a given process */
|
||||
if (counts_for_pid != MyProcPid)
|
||||
init_lwlock_stats();
|
||||
/*
|
||||
* During shared memory initialization, the hash table doesn't exist yet.
|
||||
* Stats of that phase aren't very interesting, so just collect operations
|
||||
* on all locks in a single dummy entry.
|
||||
*/
|
||||
if (lwlock_stats_htab == NULL)
|
||||
return &lwlock_stats_dummy;
|
||||
|
||||
/* Fetch or create the entry. */
|
||||
key.tranche = lock->tranche;
|
||||
@ -361,6 +383,16 @@ CreateLWLocks(void)
|
||||
LWLockRegisterTranche(0, &MainLWLockTranche);
|
||||
}
|
||||
|
||||
/*
|
||||
* InitLWLockAccess - initialize backend-local state needed to hold LWLocks
|
||||
*/
|
||||
void
|
||||
InitLWLockAccess(void)
|
||||
{
|
||||
#ifdef LWLOCK_STATS
|
||||
init_lwlock_stats();
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* LWLockAssign - assign a dynamically-allocated LWLock number
|
||||
|
@ -411,8 +411,9 @@ InitProcess(void)
|
||||
|
||||
/*
|
||||
* Now that we have a PGPROC, we could try to acquire locks, so initialize
|
||||
* the deadlock checker.
|
||||
* local state needed for LWLocks, and the deadlock checker.
|
||||
*/
|
||||
InitLWLockAccess();
|
||||
InitDeadLockChecking();
|
||||
}
|
||||
|
||||
|
@ -115,7 +115,7 @@ typedef struct _MdfdVec
|
||||
struct _MdfdVec *mdfd_chain; /* next segment, or NULL */
|
||||
} MdfdVec;
|
||||
|
||||
static MemoryContext MdCxt; /* context for all md.c allocations */
|
||||
static MemoryContext MdCxt; /* context for all MdfdVec objects */
|
||||
|
||||
|
||||
/*
|
||||
@ -157,6 +157,7 @@ typedef struct
|
||||
|
||||
static HTAB *pendingOpsTable = NULL;
|
||||
static List *pendingUnlinks = NIL;
|
||||
static MemoryContext pendingOpsCxt; /* context for the above */
|
||||
|
||||
static CycleCtr mdsync_cycle_ctr = 0;
|
||||
static CycleCtr mdckpt_cycle_ctr = 0;
|
||||
@ -209,11 +210,27 @@ mdinit(void)
|
||||
{
|
||||
HASHCTL hash_ctl;
|
||||
|
||||
/*
|
||||
* XXX: The checkpointer needs to add entries to the pending ops table
|
||||
* when absorbing fsync requests. That is done within a critical
|
||||
* section, which isn't usually allowed, but we make an exception.
|
||||
* It means that there's a theoretical possibility that you run out of
|
||||
* memory while absorbing fsync requests, which leads to a PANIC.
|
||||
* Fortunately the hash table is small so that's unlikely to happen in
|
||||
* practice.
|
||||
*/
|
||||
pendingOpsCxt = AllocSetContextCreate(MdCxt,
|
||||
"Pending Ops Context",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
MemoryContextAllowInCriticalSection(pendingOpsCxt, true);
|
||||
|
||||
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
|
||||
hash_ctl.keysize = sizeof(RelFileNode);
|
||||
hash_ctl.entrysize = sizeof(PendingOperationEntry);
|
||||
hash_ctl.hash = tag_hash;
|
||||
hash_ctl.hcxt = MdCxt;
|
||||
hash_ctl.hcxt = pendingOpsCxt;
|
||||
pendingOpsTable = hash_create("Pending Ops Table",
|
||||
100L,
|
||||
&hash_ctl,
|
||||
@ -1516,7 +1533,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
|
||||
else if (segno == UNLINK_RELATION_REQUEST)
|
||||
{
|
||||
/* Unlink request: put it in the linked list */
|
||||
MemoryContext oldcxt = MemoryContextSwitchTo(MdCxt);
|
||||
MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
|
||||
PendingUnlinkEntry *entry;
|
||||
|
||||
/* PendingUnlinkEntry doesn't store forknum, since it's always MAIN */
|
||||
@ -1533,7 +1550,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
|
||||
else
|
||||
{
|
||||
/* Normal case: enter a request to fsync this segment */
|
||||
MemoryContext oldcxt = MemoryContextSwitchTo(MdCxt);
|
||||
MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
|
||||
PendingOperationEntry *entry;
|
||||
bool found;
|
||||
|
||||
|
@ -60,15 +60,9 @@ static void MemoryContextStatsInternal(MemoryContext context, int level);
|
||||
* You should not do memory allocations within a critical section, because
|
||||
* an out-of-memory error will be escalated to a PANIC. To enforce that
|
||||
* rule, the allocation functions Assert that.
|
||||
*
|
||||
* There are a two exceptions: 1) error recovery uses ErrorContext, which
|
||||
* has some memory set aside so that you don't run out. And 2) checkpointer
|
||||
* currently just hopes for the best, which is wrong and ought to be fixed,
|
||||
* but it's a known issue so let's not complain about in the meanwhile.
|
||||
*/
|
||||
#define AssertNotInCriticalSection(context) \
|
||||
Assert(CritSectionCount == 0 || (context) == ErrorContext || \
|
||||
AmCheckpointerProcess())
|
||||
Assert(CritSectionCount == 0 || (context)->allowInCritSection)
|
||||
|
||||
/*****************************************************************************
|
||||
* EXPORTED ROUTINES *
|
||||
@ -120,7 +114,10 @@ MemoryContextInit(void)
|
||||
* require it to contain at least 8K at all times. This is the only case
|
||||
* where retained memory in a context is *essential* --- we want to be
|
||||
* sure ErrorContext still has some memory even if we've run out
|
||||
* elsewhere!
|
||||
* elsewhere! Also, allow allocations in ErrorContext within a critical
|
||||
* section. Otherwise a PANIC will cause an assertion failure in the
|
||||
* error reporting code, before printing out the real cause of the
|
||||
* failure.
|
||||
*
|
||||
* This should be the last step in this function, as elog.c assumes memory
|
||||
* management works once ErrorContext is non-null.
|
||||
@ -130,6 +127,7 @@ MemoryContextInit(void)
|
||||
8 * 1024,
|
||||
8 * 1024,
|
||||
8 * 1024);
|
||||
MemoryContextAllowInCriticalSection(ErrorContext, true);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -305,6 +303,26 @@ MemoryContextSetParent(MemoryContext context, MemoryContext new_parent)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* MemoryContextAllowInCriticalSection
|
||||
* Allow/disallow allocations in this memory context within a critical
|
||||
* section.
|
||||
*
|
||||
* Normally, memory allocations are not allowed within a critical section,
|
||||
* because a failure would lead to PANIC. There are a few exceptions to
|
||||
* that, like allocations related to debugging code that is not supposed to
|
||||
* be enabled in production. This function can be used to exempt specific
|
||||
* memory contexts from the assertion in palloc().
|
||||
*/
|
||||
void
|
||||
MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
|
||||
{
|
||||
AssertArg(MemoryContextIsValid(context));
|
||||
#ifdef USE_ASSERT_CHECKING
|
||||
context->allowInCritSection = allow;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* GetMemoryChunkSpace
|
||||
* Given a currently-allocated chunk, determine the total space
|
||||
@ -533,6 +551,7 @@ MemoryContextCreate(NodeTag tag, Size size,
|
||||
MemoryContext node;
|
||||
Size needed = size + strlen(name) + 1;
|
||||
|
||||
/* creating new memory contexts is not allowed in a critical section */
|
||||
Assert(CritSectionCount == 0);
|
||||
|
||||
/* Get space for node and name */
|
||||
@ -570,6 +589,11 @@ MemoryContextCreate(NodeTag tag, Size size,
|
||||
node->parent = parent;
|
||||
node->nextchild = parent->firstchild;
|
||||
parent->firstchild = node;
|
||||
|
||||
#ifdef USE_ASSERT_CHECKING
|
||||
/* inherit allowInCritSection flag from parent */
|
||||
node->allowInCritSection = parent->allowInCritSection;
|
||||
#endif
|
||||
}
|
||||
|
||||
VALGRIND_CREATE_MEMPOOL(node, 0, false);
|
||||
|
@ -60,6 +60,9 @@ typedef struct MemoryContextData
|
||||
MemoryContext nextchild; /* next child of same parent */
|
||||
char *name; /* context name (just for debugging) */
|
||||
bool isReset; /* T = no space alloced since last reset */
|
||||
#ifdef USE_ASSERT_CHECKING
|
||||
bool allowInCritSection; /* allow palloc in critical section */
|
||||
#endif
|
||||
} MemoryContextData;
|
||||
|
||||
/* utils/palloc.h contains typedef struct MemoryContextData *MemoryContext */
|
||||
|
@ -182,6 +182,7 @@ extern void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 value);
|
||||
|
||||
extern Size LWLockShmemSize(void);
|
||||
extern void CreateLWLocks(void);
|
||||
extern void InitLWLockAccess(void);
|
||||
|
||||
/*
|
||||
* The traditional method for obtaining an lwlock for use by an extension is
|
||||
|
@ -101,6 +101,8 @@ extern MemoryContext GetMemoryChunkContext(void *pointer);
|
||||
extern MemoryContext MemoryContextGetParent(MemoryContext context);
|
||||
extern bool MemoryContextIsEmpty(MemoryContext context);
|
||||
extern void MemoryContextStats(MemoryContext context);
|
||||
extern void MemoryContextAllowInCriticalSection(MemoryContext context,
|
||||
bool allow);
|
||||
|
||||
#ifdef MEMORY_CONTEXT_CHECKING
|
||||
extern void MemoryContextCheck(MemoryContext context);
|
||||
|
Loading…
Reference in New Issue
Block a user