mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-27 08:39:28 +08:00
Try to log current the query string when a backend crashes.
To avoid minimize risk inside the postmaster, we subject this feature to a number of significant limitations. We very much wish to avoid doing any complex processing inside the postmaster, due to the posssibility that the crashed backend has completely corrupted shared memory. To that end, no encoding conversion is done; instead, we just replace anything that doesn't look like an ASCII character with a question mark. We limit the amount of data copied to 1024 characters, and carefully sanity check the source of that data. While these restrictions would doubtless be unacceptable in a general-purpose logging facility, even this limited facility seems like an improvement over the status quo ante. Marti Raudsepp, reviewed by PDXPUG and myself
This commit is contained in:
parent
980261929f
commit
c8e8b5a6e2
@ -58,6 +58,7 @@
|
||||
#include "storage/pg_shmem.h"
|
||||
#include "storage/pmsignal.h"
|
||||
#include "storage/procsignal.h"
|
||||
#include "utils/ascii.h"
|
||||
#include "utils/guc.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/ps_status.h"
|
||||
@ -2228,6 +2229,7 @@ static PgBackendStatus *MyBEEntry = NULL;
|
||||
static char *BackendClientHostnameBuffer = NULL;
|
||||
static char *BackendAppnameBuffer = NULL;
|
||||
static char *BackendActivityBuffer = NULL;
|
||||
static Size BackendActivityBufferSize = 0;
|
||||
|
||||
|
||||
/*
|
||||
@ -2310,9 +2312,12 @@ CreateSharedBackendStatus(void)
|
||||
}
|
||||
|
||||
/* Create or attach to the shared activity buffer */
|
||||
size = mul_size(pgstat_track_activity_query_size, MaxBackends);
|
||||
BackendActivityBufferSize = mul_size(pgstat_track_activity_query_size,
|
||||
MaxBackends);
|
||||
BackendActivityBuffer = (char *)
|
||||
ShmemInitStruct("Backend Activity Buffer", size, &found);
|
||||
ShmemInitStruct("Backend Activity Buffer",
|
||||
BackendActivityBufferSize,
|
||||
&found);
|
||||
|
||||
if (!found)
|
||||
{
|
||||
@ -2751,6 +2756,70 @@ pgstat_get_backend_current_activity(int pid, bool checkUser)
|
||||
return "<backend information not available>";
|
||||
}
|
||||
|
||||
/* ----------
|
||||
* pgstat_get_crashed_backend_activity() -
|
||||
*
|
||||
* Return a string representing the current activity of the backend with
|
||||
* the specified PID. Like the function above, but reads shared memory with
|
||||
* the expectation that it may be corrupt. Returns either a pointer to a
|
||||
* constant string, or writes into the 'buffer' argument and returns it.
|
||||
*
|
||||
* This function is only intended to be used by postmaster to report the
|
||||
* query that crashed the backend. In particular, no attempt is made to
|
||||
* follow the correct concurrency protocol when accessing the
|
||||
* BackendStatusArray. But that's OK, in the worst case we'll return a
|
||||
* corrupted message. We also must take care not to trip on ereport(ERROR).
|
||||
*
|
||||
* Note: return strings for special cases match pg_stat_get_backend_activity.
|
||||
* ----------
|
||||
*/
|
||||
const char *
|
||||
pgstat_get_crashed_backend_activity(int pid, char *buffer,
|
||||
int len)
|
||||
{
|
||||
volatile PgBackendStatus *beentry;
|
||||
int i;
|
||||
|
||||
beentry = BackendStatusArray;
|
||||
for (i = 1; i <= MaxBackends; i++)
|
||||
{
|
||||
if (beentry->st_procpid == pid)
|
||||
{
|
||||
/* Read pointer just once, so it can't change after validation */
|
||||
const char *activity = beentry->st_activity;
|
||||
const char *activity_last;
|
||||
|
||||
/*
|
||||
* We can't access activity pointer before we verify that it
|
||||
* falls into BackendActivityBuffer. To make sure that the entire
|
||||
* string including its ending is contained within the buffer,
|
||||
* we subtract one activity length from it.
|
||||
*/
|
||||
activity_last = BackendActivityBuffer + BackendActivityBufferSize
|
||||
- pgstat_track_activity_query_size;
|
||||
|
||||
if (activity < BackendActivityBuffer ||
|
||||
activity > activity_last)
|
||||
return "<command string corrupt>";
|
||||
|
||||
if (*(activity) == '\0')
|
||||
return "<command string empty>";
|
||||
|
||||
/*
|
||||
* Copy only ASCII-safe characters so we don't run into encoding
|
||||
* problems when reporting the message.
|
||||
*/
|
||||
ascii_safe_strncpy(buffer, activity, len);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
beentry++;
|
||||
}
|
||||
|
||||
/* PID not found */
|
||||
return "<backend information not available>";
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
* Local support functions follow
|
||||
|
@ -2777,6 +2777,13 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
||||
static void
|
||||
LogChildExit(int lev, const char *procname, int pid, int exitstatus)
|
||||
{
|
||||
char activity_buffer[1024]; /* default track_activity_query_size */
|
||||
const char *activity;
|
||||
|
||||
activity = pgstat_get_crashed_backend_activity(pid,
|
||||
activity_buffer,
|
||||
sizeof(activity_buffer));
|
||||
|
||||
if (WIFEXITED(exitstatus))
|
||||
ereport(lev,
|
||||
|
||||
@ -2784,7 +2791,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
|
||||
translator: %s is a noun phrase describing a child process, such as
|
||||
"server process" */
|
||||
(errmsg("%s (PID %d) exited with exit code %d",
|
||||
procname, pid, WEXITSTATUS(exitstatus))));
|
||||
procname, pid, WEXITSTATUS(exitstatus)),
|
||||
errdetail("Running query: %s", activity)));
|
||||
else if (WIFSIGNALED(exitstatus))
|
||||
#if defined(WIN32)
|
||||
ereport(lev,
|
||||
@ -2794,7 +2802,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
|
||||
"server process" */
|
||||
(errmsg("%s (PID %d) was terminated by exception 0x%X",
|
||||
procname, pid, WTERMSIG(exitstatus)),
|
||||
errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value.")));
|
||||
errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
|
||||
errdetail("Running query: %s", activity)));
|
||||
#elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
|
||||
ereport(lev,
|
||||
|
||||
@ -2804,7 +2813,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
|
||||
(errmsg("%s (PID %d) was terminated by signal %d: %s",
|
||||
procname, pid, WTERMSIG(exitstatus),
|
||||
WTERMSIG(exitstatus) < NSIG ?
|
||||
sys_siglist[WTERMSIG(exitstatus)] : "(unknown)")));
|
||||
sys_siglist[WTERMSIG(exitstatus)] : "(unknown)"),
|
||||
errdetail("Running query: %s", activity)));
|
||||
#else
|
||||
ereport(lev,
|
||||
|
||||
@ -2812,7 +2822,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
|
||||
translator: %s is a noun phrase describing a child process, such as
|
||||
"server process" */
|
||||
(errmsg("%s (PID %d) was terminated by signal %d",
|
||||
procname, pid, WTERMSIG(exitstatus))));
|
||||
procname, pid, WTERMSIG(exitstatus)),
|
||||
errdetail("Running query: %s", activity)));
|
||||
#endif
|
||||
else
|
||||
ereport(lev,
|
||||
@ -2821,7 +2832,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
|
||||
translator: %s is a noun phrase describing a child process, such as
|
||||
"server process" */
|
||||
(errmsg("%s (PID %d) exited with unrecognized status %d",
|
||||
procname, pid, exitstatus)));
|
||||
procname, pid, exitstatus),
|
||||
errdetail("Running query: %s", activity)));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -158,3 +158,37 @@ to_ascii_default(PG_FUNCTION_ARGS)
|
||||
|
||||
PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
|
||||
}
|
||||
|
||||
/* ----------
|
||||
* "Escape" a string in unknown encoding to a valid ASCII string.
|
||||
* Replace non-ASCII bytes with '?'
|
||||
* This must not trigger ereport(ERROR), as it is called from postmaster.
|
||||
*
|
||||
* Unlike C strncpy(), the result is always terminated with exactly one null
|
||||
* byte.
|
||||
* ----------
|
||||
*/
|
||||
void
|
||||
ascii_safe_strncpy(char *dest, const char *src, int len)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < (len - 1); i++)
|
||||
{
|
||||
unsigned char ch = src[i]; /* use unsigned char here to avoid compiler warning */
|
||||
|
||||
if (ch == '\0')
|
||||
break;
|
||||
/* Keep printable ASCII characters */
|
||||
if (32 <= ch && ch <= 127)
|
||||
dest[i] = ch;
|
||||
/* White-space is also OK */
|
||||
else if (ch == '\n' || ch == '\r' || ch == '\t')
|
||||
dest[i] = ch;
|
||||
/* Everything else is replaced with '?' */
|
||||
else
|
||||
dest[i] = '?';
|
||||
}
|
||||
|
||||
dest[i] = '\0';
|
||||
}
|
||||
|
@ -720,6 +720,8 @@ extern void pgstat_report_appname(const char *appname);
|
||||
extern void pgstat_report_xact_timestamp(TimestampTz tstamp);
|
||||
extern void pgstat_report_waiting(bool waiting);
|
||||
extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser);
|
||||
extern const char *pgstat_get_crashed_backend_activity(int pid, char *buffer,
|
||||
int len);
|
||||
|
||||
extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id);
|
||||
extern PgStat_BackendFunctionEntry *find_funcstat_entry(Oid func_id);
|
||||
|
@ -16,5 +16,6 @@
|
||||
extern Datum to_ascii_encname(PG_FUNCTION_ARGS);
|
||||
extern Datum to_ascii_enc(PG_FUNCTION_ARGS);
|
||||
extern Datum to_ascii_default(PG_FUNCTION_ARGS);
|
||||
extern void ascii_safe_strncpy(char *dest, const char *src, int len);
|
||||
|
||||
#endif /* _ASCII_H_ */
|
||||
|
Loading…
Reference in New Issue
Block a user