From b0fc0df9364d2d2d17c0162cf3b8b59f6cb09f67 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Thu, 28 Jun 2012 11:05:16 -0400 Subject: [PATCH] Dramatically reduce System V shared memory consumption. Except when compiling with EXEC_BACKEND, we'll now allocate only a tiny amount of System V shared memory (as an interlock to protect the data directory) and allocate the rest as anonymous shared memory via mmap. This will hopefully spare most users the hassle of adjusting operating system parameters before being able to start PostgreSQL with a reasonable value for shared_buffers. There are a bunch of documentation updates needed here, and we might need to adjust some of the HINT messages related to shared memory as well. But it's not 100% clear how portable this is, so before we write the documentation, let's give it a spin on the buildfarm and see what turns red. --- src/backend/port/sysv_shmem.c | 89 +++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 3 deletions(-) diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c index b28766c652..4bbd0ec649 100644 --- a/src/backend/port/sysv_shmem.c +++ b/src/backend/port/sysv_shmem.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #ifdef HAVE_SYS_IPC_H #include @@ -43,9 +44,22 @@ typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */ #define PG_SHMAT_FLAGS 0 #endif +/* Linux prefers MAP_ANONYMOUS, but the flag is called MAP_ANON on other systems. */ +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +/* BSD-derived systems have MAP_HASSEMAPHORE, but it's not present (or needed) on Linux. */ +#ifndef MAP_HASSEMAPHORE +#define MAP_HASSEMAPHORE 0 +#endif + +#define PG_MMAP_FLAGS (MAP_SHARED|MAP_ANONYMOUS|MAP_HASSEMAPHORE) unsigned long UsedShmemSegID = 0; void *UsedShmemSegAddr = NULL; +static Size AnonymousShmemSize; +static PGShmemHeader *AnonymousShmem; static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size); static void IpcMemoryDetach(int status, Datum shmaddr); @@ -218,8 +232,13 @@ InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size) static void IpcMemoryDetach(int status, Datum shmaddr) { + /* Detach System V shared memory block. */ if (shmdt(DatumGetPointer(shmaddr)) < 0) elog(LOG, "shmdt(%p) failed: %m", DatumGetPointer(shmaddr)); + /* Release anonymous shared memory block, if any. */ + if (AnonymousShmem != NULL + && munmap(AnonymousShmem, AnonymousShmemSize) < 0) + elog(LOG, "munmap(%p) failed: %m", AnonymousShmem); } /****************************************************************************/ @@ -357,10 +376,59 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) PGShmemHeader *hdr; IpcMemoryId shmid; struct stat statbuf; + Size allocsize = size; /* Room for a header? */ Assert(size > MAXALIGN(sizeof(PGShmemHeader))); + /* + * As of PostgreSQL 9.3, we normally allocate only a very small amount of + * System V shared memory, and only for the purposes of providing an + * interlock to protect the data directory. The real shared memory block + * is allocated using mmap(). This works around the problem that many + * systems have very low limits on the amount of System V shared memory + * that can be allocated. Even a limit of a few megabytes will be enough + * to run many copies of PostgreSQL without needing to adjust system + * settings. + * + * However, we disable this logic in the EXEC_BACKEND case, and fall back + * to the old method of allocating the entire segment using System V shared + * memory, because there's no way to attach an mmap'd segment to a process + * after exec(). Since EXEC_BACKEND is intended only for developer use, + * this shouldn't be a big problem. + */ +#ifndef EXEC_BACKEND + { + long pagesize = sysconf(_SC_PAGE_SIZE); + + /* + * pagesize will, for practical purposes, always be a power of two. + * But just in case it isn't, we do it this way instead of using + * TYPEALIGN(). + */ + AnonymousShmemSize = size; + if (size % pagesize != 0) + AnonymousShmemSize += pagesize - (size % pagesize); + + /* + * We assume that no one will attempt to run PostgreSQL 9.3 or later + * on systems that are ancient enough that anonymous shared memory is + * not supported, such as pre-2.4 versions of Linux. If that turns out + * to be false, we might need to add a run-time test here and do this + * only if the running kernel supports it. + */ + AnonymousShmem = mmap(NULL, size, PROT_READ|PROT_WRITE, PG_MMAP_FLAGS, + -1, 0); + if (AnonymousShmem == NULL) + ereport(FATAL, + (errmsg("could not map %lu bytes of anonymous shared memory: %m", + (unsigned long) AnonymousShmemSize))); + + /* Now we can allocate a minimal SHM block. */ + allocsize = sizeof(PGShmemHeader); + } +#endif + /* Make sure PGSharedMemoryAttach doesn't fail without need */ UsedShmemSegAddr = NULL; @@ -370,7 +438,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) for (NextShmemSegID++;; NextShmemSegID++) { /* Try to create new segment */ - memAddress = InternalIpcMemoryCreate(NextShmemSegID, size); + memAddress = InternalIpcMemoryCreate(NextShmemSegID, allocsize); if (memAddress) break; /* successful create and attach */ @@ -409,7 +477,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) /* * Now try again to create the segment. */ - memAddress = InternalIpcMemoryCreate(NextShmemSegID, size); + memAddress = InternalIpcMemoryCreate(NextShmemSegID, allocsize); if (memAddress) break; /* successful create and attach */ @@ -448,7 +516,17 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) UsedShmemSegAddr = memAddress; UsedShmemSegID = (unsigned long) NextShmemSegID; - return hdr; + /* + * If AnonymousShmem is NULL here, then we're not using anonymous shared + * memory, and should return a pointer to the System V shared memory block. + * Otherwise, the System V shared memory block is only a shim, and we must + * return a pointer to the real block. + */ + if (AnonymousShmem == NULL) + return hdr; + memcpy(AnonymousShmem, hdr, sizeof(PGShmemHeader)); + return AnonymousShmem; + } #ifdef EXEC_BACKEND @@ -516,6 +594,11 @@ PGSharedMemoryDetach(void) elog(LOG, "shmdt(%p) failed: %m", UsedShmemSegAddr); UsedShmemSegAddr = NULL; } + + /* Release anonymous shared memory block, if any. */ + if (AnonymousShmem != NULL + && munmap(AnonymousShmem, AnonymousShmemSize) < 0) + elog(LOG, "munmap(%p) failed: %m", AnonymousShmem); }