Get rid of the dynamic shared memory state file.

Instead of storing the ID of the dynamic shared memory control
segment in a file within the data directory, store it in the main
control segment.  This avoids a number of nasty corner cases,
most seriously that doing an online backup and then using it on
the same machine (e.g. to fire up a standby) would result in the
standby clobbering all of the master's dynamic shared memory
segments.

Per complaints from Heikki Linnakangas, Fujii Masao, and Tom
Lane.
This commit is contained in:
Robert Haas 2014-04-08 11:39:55 -04:00
parent 0886fc6a5c
commit 11a65eed16
6 changed files with 71 additions and 166 deletions

View File

@ -30,6 +30,7 @@
#include "miscadmin.h"
#include "portability/mem.h"
#include "storage/dsm.h"
#include "storage/ipc.h"
#include "storage/pg_shmem.h"
#include "utils/guc.h"
@ -421,7 +422,8 @@ CreateAnonymousSegment(Size *size)
* zero will be passed.
*/
PGShmemHeader *
PGSharedMemoryCreate(Size size, bool makePrivate, int port)
PGSharedMemoryCreate(Size size, bool makePrivate, int port,
PGShmemHeader **shim)
{
IpcMemoryKey NextShmemSegID;
void *memAddress;
@ -509,10 +511,13 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
/*
* The segment appears to be from a dead Postgres process, or from a
* previous cycle of life in this same process. Zap it, if possible.
* previous cycle of life in this same process. Zap it, if possible,
* and any associated dynamic shared memory segments, as well.
* This probably shouldn't fail, but if it does, assume the segment
* belongs to someone else after all, and continue quietly.
*/
if (hdr->dsm_control != 0)
dsm_cleanup_using_control_segment(hdr->dsm_control);
shmdt(memAddress);
if (shmctl(shmid, IPC_RMID, NULL) < 0)
continue;
@ -539,6 +544,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
hdr = (PGShmemHeader *) memAddress;
hdr->creatorPID = getpid();
hdr->magic = PGShmemMagic;
hdr->dsm_control = 0;
/* Fill in the data directory ID info, too */
if (stat(DataDir, &statbuf) < 0)
@ -554,6 +560,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
*/
hdr->totalsize = size;
hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
*shim = hdr;
/* Save info for possible future use */
UsedShmemSegAddr = memAddress;
@ -608,6 +615,7 @@ PGSharedMemoryReAttach(void)
if (hdr != origUsedShmemSegAddr)
elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
hdr, origUsedShmemSegAddr);
dsm_set_control_handle(((PGShmemHeader *) hdr)->dsm_control);
UsedShmemSegAddr = hdr; /* probably redundant */
}

View File

@ -117,7 +117,8 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
*
*/
PGShmemHeader *
PGSharedMemoryCreate(Size size, bool makePrivate, int port)
PGSharedMemoryCreate(Size size, bool makePrivate, int port,
PGShmemHeader **shim)
{
void *memAddress;
PGShmemHeader *hdr;
@ -245,12 +246,14 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
*/
hdr->totalsize = size;
hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
hdr->dsm_control = 0;
/* Save info for possible future use */
UsedShmemSegAddr = memAddress;
UsedShmemSegSize = size;
UsedShmemSegID = hmap2;
*shim = NULL;
return hdr;
}
@ -289,6 +292,7 @@ PGSharedMemoryReAttach(void)
hdr, origUsedShmemSegAddr);
if (hdr->magic != PGShmemMagic)
elog(FATAL, "reattaching to shared memory returned non-PostgreSQL memory");
dsm_set_control_handle(hdr->dsm_control);
UsedShmemSegAddr = hdr; /* probably redundant */
}

View File

@ -39,13 +39,11 @@
#include "storage/dsm.h"
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "storage/pg_shmem.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/resowner_private.h"
#define PG_DYNSHMEM_STATE_FILE PG_DYNSHMEM_DIR "/state"
#define PG_DYNSHMEM_NEW_STATE_FILE PG_DYNSHMEM_DIR "/state.new"
#define PG_DYNSHMEM_STATE_BUFSIZ 512
#define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
/*
@ -95,10 +93,7 @@ typedef struct dsm_control_header
dsm_control_item item[FLEXIBLE_ARRAY_MEMBER];
} dsm_control_header;
static void dsm_cleanup_using_control_segment(void);
static void dsm_cleanup_for_mmap(void);
static bool dsm_read_state_file(dsm_handle *h);
static void dsm_write_state_file(dsm_handle h);
static void dsm_postmaster_shutdown(int code, Datum arg);
static dsm_segment *dsm_create_descriptor(void);
static bool dsm_control_segment_sane(dsm_control_header *control,
@ -146,7 +141,7 @@ static void *dsm_control_impl_private = NULL;
* startup time.
*/
void
dsm_postmaster_startup(void)
dsm_postmaster_startup(PGShmemHeader *shim)
{
void *dsm_control_address = NULL;
uint32 maxitems;
@ -159,26 +154,13 @@ dsm_postmaster_startup(void)
return;
/*
* Check for, and remove, shared memory segments left behind by a dead
* postmaster. This isn't necessary on Windows, which always removes them
* when the last reference is gone.
* If we're using the mmap implementations, clean up any leftovers.
* Cleanup isn't needed on Windows, and happens earlier in startup for
* POSIX and System V shared memory, via a direct call to
* dsm_cleanup_using_control_segment.
*/
switch (dynamic_shared_memory_type)
{
case DSM_IMPL_POSIX:
case DSM_IMPL_SYSV:
dsm_cleanup_using_control_segment();
break;
case DSM_IMPL_MMAP:
dsm_cleanup_for_mmap();
break;
case DSM_IMPL_WINDOWS:
/* Nothing to do. */
break;
default:
elog(ERROR, "unknown dynamic shared memory type: %d",
dynamic_shared_memory_type);
}
if (dynamic_shared_memory_type == DSM_IMPL_MMAP)
dsm_cleanup_for_mmap();
/* Determine size for new control segment. */
maxitems = PG_DYNSHMEM_FIXED_SLOTS
@ -187,23 +169,30 @@ dsm_postmaster_startup(void)
maxitems);
segsize = dsm_control_bytes_needed(maxitems);
/* Loop until we find an unused identifier for the new control segment. */
/*
* Loop until we find an unused identifier for the new control segment.
* We sometimes use 0 as a sentinel value indicating that no control
* segment is known to exist, so avoid using that value for a real
* control segment.
*/
for (;;)
{
Assert(dsm_control_address == NULL);
Assert(dsm_control_mapped_size == 0);
dsm_control_handle = random();
if (dsm_control_handle == 0)
continue;
if (dsm_impl_op(DSM_OP_CREATE, dsm_control_handle, segsize,
&dsm_control_impl_private, &dsm_control_address,
&dsm_control_mapped_size, ERROR))
break;
}
dsm_control = dsm_control_address;
on_shmem_exit(dsm_postmaster_shutdown, 0);
on_shmem_exit(dsm_postmaster_shutdown, PointerGetDatum(shim));
elog(DEBUG2,
"created dynamic shared memory control segment %u (%zu bytes)",
dsm_control_handle, segsize);
dsm_write_state_file(dsm_control_handle);
shim->dsm_control = dsm_control_handle;
/* Initialize control segment. */
dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC;
@ -216,8 +205,8 @@ dsm_postmaster_startup(void)
* invocation still exists. If so, remove the dynamic shared memory
* segments to which it refers, and then the control segment itself.
*/
static void
dsm_cleanup_using_control_segment(void)
void
dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
{
void *mapped_address = NULL;
void *junk_mapped_address = NULL;
@ -227,14 +216,10 @@ dsm_cleanup_using_control_segment(void)
Size junk_mapped_size = 0;
uint32 nitems;
uint32 i;
dsm_handle old_control_handle;
dsm_control_header *old_control;
/*
* Read the state file. If it doesn't exist or is empty, there's nothing
* more to do.
*/
if (!dsm_read_state_file(&old_control_handle))
/* If dynamic shared memory is disabled, there's nothing to do. */
if (dynamic_shared_memory_type == DSM_IMPL_NONE)
return;
/*
@ -346,111 +331,6 @@ dsm_cleanup_for_mmap(void)
FreeDir(dir);
}
/*
* Read and parse the state file.
*
* If the state file is empty or the contents are garbled, it probably means
* that the operating system rebooted before the data written by the previous
* postmaster made it to disk. In that case, we can just ignore it; any shared
* memory from before the reboot should be gone anyway.
*/
static bool
dsm_read_state_file(dsm_handle *h)
{
int statefd;
char statebuf[PG_DYNSHMEM_STATE_BUFSIZ];
int nbytes = 0;
char *endptr,
*s;
dsm_handle handle;
/* Read the state file to get the ID of the old control segment. */
statefd = BasicOpenFile(PG_DYNSHMEM_STATE_FILE, O_RDONLY | PG_BINARY, 0);
if (statefd < 0)
{
if (errno == ENOENT)
return false;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m",
PG_DYNSHMEM_STATE_FILE)));
}
nbytes = read(statefd, statebuf, PG_DYNSHMEM_STATE_BUFSIZ - 1);
if (nbytes < 0)
{
close(statefd);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m",
PG_DYNSHMEM_STATE_FILE)));
}
/* make sure buffer is NUL terminated */
statebuf[nbytes] = '\0';
close(statefd);
/*
* We expect to find the handle of the old control segment here,
* on a line by itself.
*/
handle = strtoul(statebuf, &endptr, 10);
for (s = endptr; *s == ' ' || *s == '\t'; ++s)
;
if (*s != '\n' && *s != '\0')
return false;
/* Looks good. */
*h = handle;
return true;
}
/*
* Write our control segment handle to the state file, so that if the
* postmaster is killed without running it's on_shmem_exit hooks, the
* next postmaster can clean things up after restart.
*/
static void
dsm_write_state_file(dsm_handle h)
{
int statefd;
char statebuf[PG_DYNSHMEM_STATE_BUFSIZ];
int nbytes;
/* Create or truncate the file. */
statefd = open(PG_DYNSHMEM_NEW_STATE_FILE,
O_RDWR | O_CREAT | O_TRUNC | PG_BINARY, 0600);
if (statefd < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not create file \"%s\": %m",
PG_DYNSHMEM_NEW_STATE_FILE)));
/* Write contents. */
snprintf(statebuf, PG_DYNSHMEM_STATE_BUFSIZ, "%u\n", dsm_control_handle);
nbytes = strlen(statebuf);
if (write(statefd, statebuf, nbytes) != nbytes)
{
if (errno == 0)
errno = ENOSPC; /* if no error signalled, assume no space */
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write file \"%s\": %m",
PG_DYNSHMEM_NEW_STATE_FILE)));
}
/* Close file. */
close(statefd);
/*
* Atomically rename file into place, so that no one ever sees a partially
* written state file.
*/
if (rename(PG_DYNSHMEM_NEW_STATE_FILE, PG_DYNSHMEM_STATE_FILE) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\": %m",
PG_DYNSHMEM_NEW_STATE_FILE)));
}
/*
* At shutdown time, we iterate over the control segment and remove all
* remaining dynamic shared memory segments. We avoid throwing errors here;
@ -466,6 +346,7 @@ dsm_postmaster_shutdown(int code, Datum arg)
void *junk_mapped_address = NULL;
void *junk_impl_private = NULL;
Size junk_mapped_size = 0;
PGShmemHeader *shim = (PGShmemHeader *) DatumGetPointer(arg);
/*
* If some other backend exited uncleanly, it might have corrupted the
@ -510,13 +391,7 @@ dsm_postmaster_shutdown(int code, Datum arg)
&dsm_control_impl_private, &dsm_control_address,
&dsm_control_mapped_size, LOG);
dsm_control = dsm_control_address;
/* And, finally, remove the state file. */
if (unlink(PG_DYNSHMEM_STATE_FILE) < 0)
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not unlink file \"%s\": %m",
PG_DYNSHMEM_STATE_FILE)));
shim->dsm_control = 0;
}
/*
@ -536,25 +411,18 @@ dsm_backend_startup(void)
#ifdef EXEC_BACKEND
{
dsm_handle control_handle;
void *control_address = NULL;
/* Read the control segment information from the state file. */
if (!dsm_read_state_file(&control_handle))
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("could not parse dynamic shared memory state file")));
/* Attach control segment. */
dsm_impl_op(DSM_OP_ATTACH, control_handle, 0,
Assert(dsm_control_handle != 0);
dsm_impl_op(DSM_OP_ATTACH, dsm_control_handle, 0,
&dsm_control_impl_private, &control_address,
&dsm_control_mapped_size, ERROR);
dsm_control_handle = control_handle;
dsm_control = control_address;
/* If control segment doesn't look sane, something is badly wrong. */
if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
{
dsm_impl_op(DSM_OP_DETACH, control_handle, 0,
dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0,
&dsm_control_impl_private, &control_address,
&dsm_control_mapped_size, WARNING);
ereport(FATAL,
@ -567,6 +435,20 @@ dsm_backend_startup(void)
dsm_init_done = true;
}
#ifdef EXEC_BACKEND
/*
* When running under EXEC_BACKEND, we get a callback here when the main
* shared memory segment is re-attached, so that we can record the control
* handle retrieved from it.
*/
void
dsm_set_control_handle(dsm_handle h)
{
Assert(dsm_control_handle == 0 && h != 0);
dsm_control_handle = h;
}
#endif
/*
* Create a new dynamic shared memory segment.
*/

View File

@ -90,6 +90,8 @@ RequestAddinShmemSpace(Size size)
void
CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
{
PGShmemHeader *shim = NULL;
if (!IsUnderPostmaster)
{
PGShmemHeader *seghdr;
@ -149,7 +151,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
/*
* Create the shmem segment
*/
seghdr = PGSharedMemoryCreate(size, makePrivate, port);
seghdr = PGSharedMemoryCreate(size, makePrivate, port, &shim);
InitShmemAccess(seghdr);
@ -254,7 +256,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
/* Initialize dynamic shared memory facilities. */
if (!IsUnderPostmaster)
dsm_postmaster_startup();
dsm_postmaster_startup(shim);
/*
* Now give loadable modules a chance to set up their shmem allocations

View File

@ -18,10 +18,16 @@
typedef struct dsm_segment dsm_segment;
/* Startup and shutdown functions. */
extern void dsm_postmaster_startup(void);
struct PGShmemHeader; /* avoid including pg_shmem.h */
extern void dsm_cleanup_using_control_segment(dsm_handle old_control_handle);
extern void dsm_postmaster_startup(struct PGShmemHeader *);
extern void dsm_backend_shutdown(void);
extern void dsm_detach_all(void);
#ifdef EXEC_BACKEND
extern void dsm_set_control_handle(dsm_handle h);
#endif
/* Functions that create, update, or remove mappings. */
extern dsm_segment *dsm_create(Size size);
extern dsm_segment *dsm_attach(dsm_handle h);

View File

@ -24,6 +24,8 @@
#ifndef PG_SHMEM_H
#define PG_SHMEM_H
#include "storage/dsm_impl.h"
typedef struct PGShmemHeader /* standard header for all Postgres shmem */
{
int32 magic; /* magic # to identify Postgres segments */
@ -31,6 +33,7 @@ typedef struct PGShmemHeader /* standard header for all Postgres shmem */
pid_t creatorPID; /* PID of creating process */
Size totalsize; /* total size of segment */
Size freeoffset; /* offset to first free space */
dsm_handle dsm_control; /* ID of dynamic shared memory control seg */
void *index; /* pointer to ShmemIndex table */
#ifndef WIN32 /* Windows doesn't have useful inode#s */
dev_t device; /* device data directory is on */
@ -61,7 +64,7 @@ extern void PGSharedMemoryReAttach(void);
#endif
extern PGShmemHeader *PGSharedMemoryCreate(Size size, bool makePrivate,
int port);
int port, PGShmemHeader **shim);
extern bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2);
extern void PGSharedMemoryDetach(void);