From 2b1e36c7c0f294c70595bddaae0caecbcf8b50c6 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 26 Apr 2003 02:57:14 +0000 Subject: [PATCH] Tweak stats collector start logic so that we will not try to spawn a new stats collector oftener than once a minute. Per gripe from Erik Walthinsen 4/25/03. --- src/backend/postmaster/pgstat.c | 92 ++++++++++++++++++----------- src/backend/postmaster/postmaster.c | 19 +++--- src/include/pgstat.h | 32 ++++++---- 3 files changed, 89 insertions(+), 54 deletions(-) diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index c5835226e6..0534974352 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -10,13 +10,10 @@ * * - Add a pgstat config column to pg_database, so this * entire thing can be enabled/disabled on a per db base. - * Not to be done before 7.2 - requires catalog change and - * thus an initdb and we might want to provide this as a - * patch for 7.1. * - * Copyright (c) 2001, PostgreSQL Global Development Group + * Copyright (c) 2001-2003, PostgreSQL Global Development Group * - * $Header: /cvsroot/pgsql/src/backend/postmaster/pgstat.c,v 1.33 2003/04/25 01:24:00 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/postmaster/pgstat.c,v 1.34 2003/04/26 02:57:14 tgl Exp $ * ---------- */ #include "postgres.h" @@ -31,6 +28,8 @@ #include #include +#include "pgstat.h" + #include "access/xact.h" #include "access/heapam.h" #include "catalog/catname.h" @@ -47,8 +46,6 @@ #include "utils/ps_status.h" #include "utils/syscache.h" -#include "pgstat.h" - /* ---------- * GUC parameters @@ -60,6 +57,12 @@ bool pgstat_collect_querystring = false; bool pgstat_collect_tuplelevel = false; bool pgstat_collect_blocklevel = false; +/* ---------- + * Other global variables + * ---------- + */ +bool pgstat_is_running = false; + /* ---------- * Local data * ---------- @@ -69,8 +72,8 @@ static int pgStatPipe[2]; static struct sockaddr_in pgStatAddr; static int pgStatPmPipe[2] = {-1, -1}; -static int pgStatRunning = 0; static int pgStatPid; +static time_t last_pgstat_start_time; static long pgStatNumMessages = 0; @@ -130,14 +133,12 @@ static void pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len); * pgstat_init() - * * Called from postmaster at startup. Create the resources required - * by the statistics collector process. - * - * NOTE: failure exit from this routine causes the postmaster to abort. - * This is unfriendly and should not be done except in dire straits. - * Better to let the postmaster start with stats collection disabled. + * by the statistics collector process. If unable to do so, do not + * fail --- better to let the postmaster start with stats collection + * disabled. * ---------- */ -int +void pgstat_init(void) { int alen; @@ -168,7 +169,7 @@ pgstat_init(void) * Nothing else required if collector will not get started */ if (!pgstat_collect_startcollector) - return 0; + return; /* * Create the UDP socket for sending and receiving statistic messages @@ -231,7 +232,7 @@ pgstat_init(void) goto startup_failed; } - return 0; + return; startup_failed: if (pgStatSock >= 0) @@ -239,11 +240,10 @@ startup_failed: pgStatSock = -1; /* Adjust GUC variables to suppress useless activity */ + pgstat_collect_startcollector = false; pgstat_collect_querystring = false; pgstat_collect_tuplelevel = false; pgstat_collect_blocklevel = false; - - return 0; } @@ -251,31 +251,51 @@ startup_failed: * pgstat_start() - * * Called from postmaster at startup or after an existing collector - * died. Fire up a fresh statistics collector. + * died. Attempt to fire up a fresh statistics collector. * - * NOTE: failure exit from this routine causes the postmaster to abort. + * Note: if fail, we will be called again from the postmaster main loop. * ---------- */ -int +void pgstat_start(void) { + time_t curtime; + /* * Do nothing if no collector needed */ - if (!pgstat_collect_startcollector) - return 0; + if (pgstat_is_running || !pgstat_collect_startcollector) + return; /* - * Check that the socket is there, else pgstat_init failed + * Do nothing if too soon since last collector start. This is a + * safety valve to protect against continuous respawn attempts if + * the collector is dying immediately at launch. Note that since + * we will be re-called from the postmaster main loop, we will get + * another chance later. + */ + curtime = time(NULL); + if ((unsigned int) (curtime - last_pgstat_start_time) < + (unsigned int) PGSTAT_RESTART_INTERVAL) + return; + last_pgstat_start_time = curtime; + + /* + * Check that the socket is there, else pgstat_init failed. */ if (pgStatSock < 0) { elog(LOG, "PGSTAT: statistics collector startup skipped"); - return 0; + /* + * We can only get here if someone tries to manually turn + * pgstat_collect_startcollector on after it had been off. + */ + pgstat_collect_startcollector = false; + return; } /* - * Then fork off the collector. Remember its PID for pgstat_ispgstat. + * Okay, fork off the collector. Remember its PID for pgstat_ispgstat. */ fflush(stdout); @@ -294,15 +314,14 @@ pgstat_start(void) beos_backend_startup_failed(); #endif elog(LOG, "PGSTAT: fork() failed: %m"); - pgStatRunning = 0; - return 0; + return; case 0: break; default: - pgStatRunning = 1; - return 0; + pgstat_is_running = true; + return; } /* in postmaster child ... */ @@ -335,16 +354,19 @@ pgstat_start(void) * was the statistics collector. * ---------- */ -int +bool pgstat_ispgstat(int pid) { - if (pgStatRunning == 0) - return 0; + if (!pgstat_is_running) + return false; if (pgStatPid != pid) - return 0; + return false; - return 1; + /* Oh dear ... */ + pgstat_is_running = false; + + return true; } diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 834b03ab62..e9a7d86f3d 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -37,7 +37,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/postmaster/postmaster.c,v 1.314 2003/04/22 00:08:06 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/postmaster/postmaster.c,v 1.315 2003/04/26 02:57:14 tgl Exp $ * * NOTES * @@ -790,12 +790,10 @@ PostmasterMain(int argc, char *argv[]) } /* - * Initialize and startup the statistics collector process + * Initialize and try to startup the statistics collector process */ - if (pgstat_init() < 0) - ExitPostmaster(1); - if (pgstat_start() < 0) - ExitPostmaster(1); + pgstat_init(); + pgstat_start(); /* * Load cached files for client authentication. @@ -1058,6 +1056,10 @@ ServerLoop(void) ConnFree(port); } } + + /* If we have lost the stats collector, try to start a new one */ + if (!pgstat_is_running) + pgstat_start(); } } @@ -1720,8 +1722,9 @@ reaper(SIGNAL_ARGS) #endif /* - * Check if this child was the statistics collector. If so, start - * a new one. + * Check if this child was the statistics collector. If so, + * try to start a new one. (If fail, we'll try again in + * future cycles of the main loop.) */ if (pgstat_ispgstat(pid)) { diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 533c2e44f5..e85f6ec3e9 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -3,15 +3,18 @@ * * Definitions for the PostgreSQL statistics collector daemon. * - * Copyright (c) 2001, PostgreSQL Global Development Group + * Copyright (c) 2001-2003, PostgreSQL Global Development Group * - * $Id: pgstat.h,v 1.13 2003/03/20 03:34:56 momjian Exp $ + * $Id: pgstat.h,v 1.14 2003/04/26 02:57:14 tgl Exp $ * ---------- */ #ifndef PGSTAT_H #define PGSTAT_H +#include "utils/hsearch.h" #include "utils/nabstime.h" +#include "utils/rel.h" + /* ---------- * Paths for the statistics files. The %s is replaced with the @@ -26,16 +29,17 @@ * ---------- */ #define PGSTAT_STAT_INTERVAL 500 /* How often to write the status */ - /* file, in milliseconds. */ + /* file; in milliseconds. */ #define PGSTAT_DESTROY_DELAY 10000 /* How long to keep destroyed */ - /* objects known to give delayed */ - /* UDP packets time to arrive, */ + /* objects known, to give delayed */ + /* UDP packets time to arrive; */ /* in milliseconds. */ -#define PGSTAT_DESTROY_COUNT (PGSTAT_DESTROY_DELAY \ - / PGSTAT_STAT_INTERVAL) +#define PGSTAT_DESTROY_COUNT (PGSTAT_DESTROY_DELAY / PGSTAT_STAT_INTERVAL) +#define PGSTAT_RESTART_INTERVAL 60 /* How often to attempt to restart */ + /* a failed statistics collector; in seconds. */ /* ---------- * How much of the actual query string to send to the collector. @@ -323,7 +327,7 @@ typedef union PgStat_Msg /* ---------- - * Global variables + * GUC parameters * ---------- */ extern bool pgstat_collect_startcollector; @@ -332,13 +336,19 @@ extern bool pgstat_collect_querystring; extern bool pgstat_collect_tuplelevel; extern bool pgstat_collect_blocklevel; +/* ---------- + * Other global variables + * ---------- + */ +extern bool pgstat_is_running; + /* ---------- * Functions called from postmaster * ---------- */ -extern int pgstat_init(void); -extern int pgstat_start(void); -extern int pgstat_ispgstat(int pid); +extern void pgstat_init(void); +extern void pgstat_start(void); +extern bool pgstat_ispgstat(int pid); extern void pgstat_close_sockets(void); extern void pgstat_beterm(int pid);