From 11a65eed1637a05b03e174700799b024e104bfb4 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 8 Apr 2014 11:39:55 -0400 Subject: [PATCH] Get rid of the dynamic shared memory state file. Instead of storing the ID of the dynamic shared memory control segment in a file within the data directory, store it in the main control segment. This avoids a number of nasty corner cases, most seriously that doing an online backup and then using it on the same machine (e.g. to fire up a standby) would result in the standby clobbering all of the master's dynamic shared memory segments. Per complaints from Heikki Linnakangas, Fujii Masao, and Tom Lane. --- src/backend/port/sysv_shmem.c | 12 +- src/backend/port/win32_shmem.c | 6 +- src/backend/storage/ipc/dsm.c | 200 +++++++-------------------------- src/backend/storage/ipc/ipci.c | 6 +- src/include/storage/dsm.h | 8 +- src/include/storage/pg_shmem.h | 5 +- 6 files changed, 71 insertions(+), 166 deletions(-) diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c index 51c1a2b71f..5e3850b024 100644 --- a/src/backend/port/sysv_shmem.c +++ b/src/backend/port/sysv_shmem.c @@ -30,6 +30,7 @@ #include "miscadmin.h" #include "portability/mem.h" +#include "storage/dsm.h" #include "storage/ipc.h" #include "storage/pg_shmem.h" #include "utils/guc.h" @@ -421,7 +422,8 @@ CreateAnonymousSegment(Size *size) * zero will be passed. */ PGShmemHeader * -PGSharedMemoryCreate(Size size, bool makePrivate, int port) +PGSharedMemoryCreate(Size size, bool makePrivate, int port, + PGShmemHeader **shim) { IpcMemoryKey NextShmemSegID; void *memAddress; @@ -509,10 +511,13 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) /* * The segment appears to be from a dead Postgres process, or from a - * previous cycle of life in this same process. Zap it, if possible. + * previous cycle of life in this same process. Zap it, if possible, + * and any associated dynamic shared memory segments, as well. * This probably shouldn't fail, but if it does, assume the segment * belongs to someone else after all, and continue quietly. */ + if (hdr->dsm_control != 0) + dsm_cleanup_using_control_segment(hdr->dsm_control); shmdt(memAddress); if (shmctl(shmid, IPC_RMID, NULL) < 0) continue; @@ -539,6 +544,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) hdr = (PGShmemHeader *) memAddress; hdr->creatorPID = getpid(); hdr->magic = PGShmemMagic; + hdr->dsm_control = 0; /* Fill in the data directory ID info, too */ if (stat(DataDir, &statbuf) < 0) @@ -554,6 +560,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) */ hdr->totalsize = size; hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader)); + *shim = hdr; /* Save info for possible future use */ UsedShmemSegAddr = memAddress; @@ -608,6 +615,7 @@ PGSharedMemoryReAttach(void) if (hdr != origUsedShmemSegAddr) elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)", hdr, origUsedShmemSegAddr); + dsm_set_control_handle(((PGShmemHeader *) hdr)->dsm_control); UsedShmemSegAddr = hdr; /* probably redundant */ } diff --git a/src/backend/port/win32_shmem.c b/src/backend/port/win32_shmem.c index dca371cce6..3a0ded4865 100644 --- a/src/backend/port/win32_shmem.c +++ b/src/backend/port/win32_shmem.c @@ -117,7 +117,8 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2) * */ PGShmemHeader * -PGSharedMemoryCreate(Size size, bool makePrivate, int port) +PGSharedMemoryCreate(Size size, bool makePrivate, int port, + PGShmemHeader **shim) { void *memAddress; PGShmemHeader *hdr; @@ -245,12 +246,14 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) */ hdr->totalsize = size; hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader)); + hdr->dsm_control = 0; /* Save info for possible future use */ UsedShmemSegAddr = memAddress; UsedShmemSegSize = size; UsedShmemSegID = hmap2; + *shim = NULL; return hdr; } @@ -289,6 +292,7 @@ PGSharedMemoryReAttach(void) hdr, origUsedShmemSegAddr); if (hdr->magic != PGShmemMagic) elog(FATAL, "reattaching to shared memory returned non-PostgreSQL memory"); + dsm_set_control_handle(hdr->dsm_control); UsedShmemSegAddr = hdr; /* probably redundant */ } diff --git a/src/backend/storage/ipc/dsm.c b/src/backend/storage/ipc/dsm.c index c967177c4b..6c410f77d9 100644 --- a/src/backend/storage/ipc/dsm.c +++ b/src/backend/storage/ipc/dsm.c @@ -39,13 +39,11 @@ #include "storage/dsm.h" #include "storage/ipc.h" #include "storage/lwlock.h" +#include "storage/pg_shmem.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/resowner_private.h" -#define PG_DYNSHMEM_STATE_FILE PG_DYNSHMEM_DIR "/state" -#define PG_DYNSHMEM_NEW_STATE_FILE PG_DYNSHMEM_DIR "/state.new" -#define PG_DYNSHMEM_STATE_BUFSIZ 512 #define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32 /* @@ -95,10 +93,7 @@ typedef struct dsm_control_header dsm_control_item item[FLEXIBLE_ARRAY_MEMBER]; } dsm_control_header; -static void dsm_cleanup_using_control_segment(void); static void dsm_cleanup_for_mmap(void); -static bool dsm_read_state_file(dsm_handle *h); -static void dsm_write_state_file(dsm_handle h); static void dsm_postmaster_shutdown(int code, Datum arg); static dsm_segment *dsm_create_descriptor(void); static bool dsm_control_segment_sane(dsm_control_header *control, @@ -146,7 +141,7 @@ static void *dsm_control_impl_private = NULL; * startup time. */ void -dsm_postmaster_startup(void) +dsm_postmaster_startup(PGShmemHeader *shim) { void *dsm_control_address = NULL; uint32 maxitems; @@ -159,26 +154,13 @@ dsm_postmaster_startup(void) return; /* - * Check for, and remove, shared memory segments left behind by a dead - * postmaster. This isn't necessary on Windows, which always removes them - * when the last reference is gone. + * If we're using the mmap implementations, clean up any leftovers. + * Cleanup isn't needed on Windows, and happens earlier in startup for + * POSIX and System V shared memory, via a direct call to + * dsm_cleanup_using_control_segment. */ - switch (dynamic_shared_memory_type) - { - case DSM_IMPL_POSIX: - case DSM_IMPL_SYSV: - dsm_cleanup_using_control_segment(); - break; - case DSM_IMPL_MMAP: - dsm_cleanup_for_mmap(); - break; - case DSM_IMPL_WINDOWS: - /* Nothing to do. */ - break; - default: - elog(ERROR, "unknown dynamic shared memory type: %d", - dynamic_shared_memory_type); - } + if (dynamic_shared_memory_type == DSM_IMPL_MMAP) + dsm_cleanup_for_mmap(); /* Determine size for new control segment. */ maxitems = PG_DYNSHMEM_FIXED_SLOTS @@ -187,23 +169,30 @@ dsm_postmaster_startup(void) maxitems); segsize = dsm_control_bytes_needed(maxitems); - /* Loop until we find an unused identifier for the new control segment. */ + /* + * Loop until we find an unused identifier for the new control segment. + * We sometimes use 0 as a sentinel value indicating that no control + * segment is known to exist, so avoid using that value for a real + * control segment. + */ for (;;) { Assert(dsm_control_address == NULL); Assert(dsm_control_mapped_size == 0); dsm_control_handle = random(); + if (dsm_control_handle == 0) + continue; if (dsm_impl_op(DSM_OP_CREATE, dsm_control_handle, segsize, &dsm_control_impl_private, &dsm_control_address, &dsm_control_mapped_size, ERROR)) break; } dsm_control = dsm_control_address; - on_shmem_exit(dsm_postmaster_shutdown, 0); + on_shmem_exit(dsm_postmaster_shutdown, PointerGetDatum(shim)); elog(DEBUG2, "created dynamic shared memory control segment %u (%zu bytes)", dsm_control_handle, segsize); - dsm_write_state_file(dsm_control_handle); + shim->dsm_control = dsm_control_handle; /* Initialize control segment. */ dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC; @@ -216,8 +205,8 @@ dsm_postmaster_startup(void) * invocation still exists. If so, remove the dynamic shared memory * segments to which it refers, and then the control segment itself. */ -static void -dsm_cleanup_using_control_segment(void) +void +dsm_cleanup_using_control_segment(dsm_handle old_control_handle) { void *mapped_address = NULL; void *junk_mapped_address = NULL; @@ -227,14 +216,10 @@ dsm_cleanup_using_control_segment(void) Size junk_mapped_size = 0; uint32 nitems; uint32 i; - dsm_handle old_control_handle; dsm_control_header *old_control; - /* - * Read the state file. If it doesn't exist or is empty, there's nothing - * more to do. - */ - if (!dsm_read_state_file(&old_control_handle)) + /* If dynamic shared memory is disabled, there's nothing to do. */ + if (dynamic_shared_memory_type == DSM_IMPL_NONE) return; /* @@ -346,111 +331,6 @@ dsm_cleanup_for_mmap(void) FreeDir(dir); } -/* - * Read and parse the state file. - * - * If the state file is empty or the contents are garbled, it probably means - * that the operating system rebooted before the data written by the previous - * postmaster made it to disk. In that case, we can just ignore it; any shared - * memory from before the reboot should be gone anyway. - */ -static bool -dsm_read_state_file(dsm_handle *h) -{ - int statefd; - char statebuf[PG_DYNSHMEM_STATE_BUFSIZ]; - int nbytes = 0; - char *endptr, - *s; - dsm_handle handle; - - /* Read the state file to get the ID of the old control segment. */ - statefd = BasicOpenFile(PG_DYNSHMEM_STATE_FILE, O_RDONLY | PG_BINARY, 0); - if (statefd < 0) - { - if (errno == ENOENT) - return false; - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not open file \"%s\": %m", - PG_DYNSHMEM_STATE_FILE))); - } - nbytes = read(statefd, statebuf, PG_DYNSHMEM_STATE_BUFSIZ - 1); - if (nbytes < 0) - { - close(statefd); - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not read file \"%s\": %m", - PG_DYNSHMEM_STATE_FILE))); - } - /* make sure buffer is NUL terminated */ - statebuf[nbytes] = '\0'; - close(statefd); - - /* - * We expect to find the handle of the old control segment here, - * on a line by itself. - */ - handle = strtoul(statebuf, &endptr, 10); - for (s = endptr; *s == ' ' || *s == '\t'; ++s) - ; - if (*s != '\n' && *s != '\0') - return false; - - /* Looks good. */ - *h = handle; - return true; -} - -/* - * Write our control segment handle to the state file, so that if the - * postmaster is killed without running it's on_shmem_exit hooks, the - * next postmaster can clean things up after restart. - */ -static void -dsm_write_state_file(dsm_handle h) -{ - int statefd; - char statebuf[PG_DYNSHMEM_STATE_BUFSIZ]; - int nbytes; - - /* Create or truncate the file. */ - statefd = open(PG_DYNSHMEM_NEW_STATE_FILE, - O_RDWR | O_CREAT | O_TRUNC | PG_BINARY, 0600); - if (statefd < 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not create file \"%s\": %m", - PG_DYNSHMEM_NEW_STATE_FILE))); - - /* Write contents. */ - snprintf(statebuf, PG_DYNSHMEM_STATE_BUFSIZ, "%u\n", dsm_control_handle); - nbytes = strlen(statebuf); - if (write(statefd, statebuf, nbytes) != nbytes) - { - if (errno == 0) - errno = ENOSPC; /* if no error signalled, assume no space */ - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write file \"%s\": %m", - PG_DYNSHMEM_NEW_STATE_FILE))); - } - - /* Close file. */ - close(statefd); - - /* - * Atomically rename file into place, so that no one ever sees a partially - * written state file. - */ - if (rename(PG_DYNSHMEM_NEW_STATE_FILE, PG_DYNSHMEM_STATE_FILE) < 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not rename file \"%s\": %m", - PG_DYNSHMEM_NEW_STATE_FILE))); -} - /* * At shutdown time, we iterate over the control segment and remove all * remaining dynamic shared memory segments. We avoid throwing errors here; @@ -466,6 +346,7 @@ dsm_postmaster_shutdown(int code, Datum arg) void *junk_mapped_address = NULL; void *junk_impl_private = NULL; Size junk_mapped_size = 0; + PGShmemHeader *shim = (PGShmemHeader *) DatumGetPointer(arg); /* * If some other backend exited uncleanly, it might have corrupted the @@ -510,13 +391,7 @@ dsm_postmaster_shutdown(int code, Datum arg) &dsm_control_impl_private, &dsm_control_address, &dsm_control_mapped_size, LOG); dsm_control = dsm_control_address; - - /* And, finally, remove the state file. */ - if (unlink(PG_DYNSHMEM_STATE_FILE) < 0) - ereport(LOG, - (errcode_for_file_access(), - errmsg("could not unlink file \"%s\": %m", - PG_DYNSHMEM_STATE_FILE))); + shim->dsm_control = 0; } /* @@ -536,25 +411,18 @@ dsm_backend_startup(void) #ifdef EXEC_BACKEND { - dsm_handle control_handle; void *control_address = NULL; - /* Read the control segment information from the state file. */ - if (!dsm_read_state_file(&control_handle)) - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("could not parse dynamic shared memory state file"))); - /* Attach control segment. */ - dsm_impl_op(DSM_OP_ATTACH, control_handle, 0, + Assert(dsm_control_handle != 0); + dsm_impl_op(DSM_OP_ATTACH, dsm_control_handle, 0, &dsm_control_impl_private, &control_address, &dsm_control_mapped_size, ERROR); - dsm_control_handle = control_handle; dsm_control = control_address; /* If control segment doesn't look sane, something is badly wrong. */ if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size)) { - dsm_impl_op(DSM_OP_DETACH, control_handle, 0, + dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0, &dsm_control_impl_private, &control_address, &dsm_control_mapped_size, WARNING); ereport(FATAL, @@ -567,6 +435,20 @@ dsm_backend_startup(void) dsm_init_done = true; } +#ifdef EXEC_BACKEND +/* + * When running under EXEC_BACKEND, we get a callback here when the main + * shared memory segment is re-attached, so that we can record the control + * handle retrieved from it. + */ +void +dsm_set_control_handle(dsm_handle h) +{ + Assert(dsm_control_handle == 0 && h != 0); + dsm_control_handle = h; +} +#endif + /* * Create a new dynamic shared memory segment. */ diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index c392d4fa22..4290d2dc81 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -90,6 +90,8 @@ RequestAddinShmemSpace(Size size) void CreateSharedMemoryAndSemaphores(bool makePrivate, int port) { + PGShmemHeader *shim = NULL; + if (!IsUnderPostmaster) { PGShmemHeader *seghdr; @@ -149,7 +151,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) /* * Create the shmem segment */ - seghdr = PGSharedMemoryCreate(size, makePrivate, port); + seghdr = PGSharedMemoryCreate(size, makePrivate, port, &shim); InitShmemAccess(seghdr); @@ -254,7 +256,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) /* Initialize dynamic shared memory facilities. */ if (!IsUnderPostmaster) - dsm_postmaster_startup(); + dsm_postmaster_startup(shim); /* * Now give loadable modules a chance to set up their shmem allocations diff --git a/src/include/storage/dsm.h b/src/include/storage/dsm.h index e4669a1e3c..272787adc6 100644 --- a/src/include/storage/dsm.h +++ b/src/include/storage/dsm.h @@ -18,10 +18,16 @@ typedef struct dsm_segment dsm_segment; /* Startup and shutdown functions. */ -extern void dsm_postmaster_startup(void); +struct PGShmemHeader; /* avoid including pg_shmem.h */ +extern void dsm_cleanup_using_control_segment(dsm_handle old_control_handle); +extern void dsm_postmaster_startup(struct PGShmemHeader *); extern void dsm_backend_shutdown(void); extern void dsm_detach_all(void); +#ifdef EXEC_BACKEND +extern void dsm_set_control_handle(dsm_handle h); +#endif + /* Functions that create, update, or remove mappings. */ extern dsm_segment *dsm_create(Size size); extern dsm_segment *dsm_attach(dsm_handle h); diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h index 0dc960b597..ab28ebee84 100644 --- a/src/include/storage/pg_shmem.h +++ b/src/include/storage/pg_shmem.h @@ -24,6 +24,8 @@ #ifndef PG_SHMEM_H #define PG_SHMEM_H +#include "storage/dsm_impl.h" + typedef struct PGShmemHeader /* standard header for all Postgres shmem */ { int32 magic; /* magic # to identify Postgres segments */ @@ -31,6 +33,7 @@ typedef struct PGShmemHeader /* standard header for all Postgres shmem */ pid_t creatorPID; /* PID of creating process */ Size totalsize; /* total size of segment */ Size freeoffset; /* offset to first free space */ + dsm_handle dsm_control; /* ID of dynamic shared memory control seg */ void *index; /* pointer to ShmemIndex table */ #ifndef WIN32 /* Windows doesn't have useful inode#s */ dev_t device; /* device data directory is on */ @@ -61,7 +64,7 @@ extern void PGSharedMemoryReAttach(void); #endif extern PGShmemHeader *PGSharedMemoryCreate(Size size, bool makePrivate, - int port); + int port, PGShmemHeader **shim); extern bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2); extern void PGSharedMemoryDetach(void);