From 96c5389342967aa6bacc19b8bdcf00481b61ba12 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 16 Jul 2006 18:17:35 +0000 Subject: [PATCH] Ensure that we retry rather than erroring out when send() or recv() return EINTR; the stats code was failing to do this and so were a couple of places in the postmaster. The stats code assumed that recv() could not return EINTR if a preceding select() showed the socket to be read-ready, but this is demonstrably false with our Windows implementation of recv(), and it may not be the case on all Unix variants either. I think this explains the intermittent stats regression test failures we've been seeing, as well as reports of stats collector instability under high load on Windows. Backpatch as far as 8.0. --- src/backend/postmaster/pgstat.c | 28 +++++++++++++++++++++++++--- src/backend/postmaster/postmaster.c | 13 +++++++++++-- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index ef826918995..765ee1fe203 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -13,7 +13,7 @@ * * Copyright (c) 2001-2005, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.87.4.2 2005/03/31 23:21:09 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.87.4.3 2006/07/16 18:17:35 tgl Exp $ * ---------- */ #include "postgres.h" @@ -337,8 +337,12 @@ pgstat_init(void) * packet filtering rules prevent it). */ test_byte = TESTBYTEVAL; + +retry1: if (send(pgStatSock, &test_byte, 1, 0) != 1) { + if (errno == EINTR) + goto retry1; /* if interrupted, just retry */ ereport(LOG, (errcode_for_socket_access(), errmsg("could not send test message on socket for statistics collector: %m"))); @@ -389,8 +393,11 @@ pgstat_init(void) test_byte++; /* just make sure variable is changed */ +retry2: if (recv(pgStatSock, &test_byte, 1, 0) != 1) { + if (errno == EINTR) + goto retry2; /* if interrupted, just retry */ ereport(LOG, (errcode_for_socket_access(), errmsg("could not receive test message on socket for statistics collector: %m"))); @@ -1329,13 +1336,24 @@ pgstat_setheader(PgStat_MsgHdr *hdr, int mtype) static void pgstat_send(void *msg, int len) { + int rc; + if (pgStatSock < 0) return; ((PgStat_MsgHdr *) msg)->m_size = len; - send(pgStatSock, msg, len, 0); - /* We deliberately ignore any error from send() */ + /* We'll retry after EINTR, but ignore all other failures */ + do + { + rc = send(pgStatSock, msg, len, 0); + } while (rc < 0 && errno == EINTR); + +#ifdef USE_ASSERT_CHECKING + /* In debug builds, log send failures ... */ + if (rc < 0) + elog(LOG, "could not send to statistics collector: %m"); +#endif } @@ -1863,9 +1881,13 @@ pgstat_recvbuffer(void) len = recv(pgStatSock, (char *) &input_buffer, sizeof(PgStat_Msg), 0); if (len < 0) + { + if (errno == EINTR) + continue; ereport(ERROR, (errcode_for_socket_access(), errmsg("could not read statistics message: %m"))); + } /* * We ignore messages that are smaller than our common header diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 0cf734fff72..69a2b5bbcf7 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -37,7 +37,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.443.4.7 2006/03/18 22:10:14 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.443.4.8 2006/07/16 18:17:35 tgl Exp $ * * NOTES * @@ -1399,8 +1399,12 @@ ProcessStartupPacket(Port *port, bool SSLdone) #else SSLok = 'N'; /* No support for SSL */ #endif + +retry1: if (send(port->sock, &SSLok, 1, 0) != 1) { + if (errno == EINTR) + goto retry1; /* if interrupted, just retry */ ereport(COMMERROR, (errcode_for_socket_access(), errmsg("failed to send SSL negotiation response: %m"))); @@ -2532,6 +2536,7 @@ static void report_fork_failure_to_client(Port *port, int errnum) { char buffer[1000]; + int rc; /* Format the error message packet (always V2 protocol) */ snprintf(buffer, sizeof(buffer), "E%s%s\n", @@ -2542,7 +2547,11 @@ report_fork_failure_to_client(Port *port, int errnum) if (!pg_set_noblock(port->sock)) return; - send(port->sock, buffer, strlen(buffer) + 1, 0); + /* We'll retry after EINTR, but ignore all other failures */ + do + { + rc = send(port->sock, buffer, strlen(buffer) + 1, 0); + } while (rc < 0 && errno == EINTR); }