mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-21 08:29:39 +08:00
Add memory barriers for PgBackendStatus.st_changecount protocol.
st_changecount protocol needs the memory barriers to ensure that the apparent order of execution is as it desires. Otherwise, for example, the CPU might rearrange the code so that st_changecount is incremented twice before the modification on a machine with weak memory ordering. This surprising result can lead to bugs. This commit introduces the macros to load and store st_changecount with the memory barriers. These are called before and after PgBackendStatus entries are modified or copied into private memory, in order to prevent CPU from reordering PgBackendStatus access. Per discussion on pgsql-hackers, we decided not to back-patch this to 9.4 or before until we get an actual bug report about this. Patch by me. Review by Robert Haas.
This commit is contained in:
parent
19e065c049
commit
38628db8d8
@ -2563,7 +2563,7 @@ pgstat_bestart(void)
|
|||||||
beentry = MyBEEntry;
|
beentry = MyBEEntry;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
beentry->st_changecount++;
|
pgstat_increment_changecount_before(beentry);
|
||||||
} while ((beentry->st_changecount & 1) == 0);
|
} while ((beentry->st_changecount & 1) == 0);
|
||||||
|
|
||||||
beentry->st_procpid = MyProcPid;
|
beentry->st_procpid = MyProcPid;
|
||||||
@ -2588,8 +2588,7 @@ pgstat_bestart(void)
|
|||||||
beentry->st_appname[NAMEDATALEN - 1] = '\0';
|
beentry->st_appname[NAMEDATALEN - 1] = '\0';
|
||||||
beentry->st_activity[pgstat_track_activity_query_size - 1] = '\0';
|
beentry->st_activity[pgstat_track_activity_query_size - 1] = '\0';
|
||||||
|
|
||||||
beentry->st_changecount++;
|
pgstat_increment_changecount_after(beentry);
|
||||||
Assert((beentry->st_changecount & 1) == 0);
|
|
||||||
|
|
||||||
/* Update app name to current GUC setting */
|
/* Update app name to current GUC setting */
|
||||||
if (application_name)
|
if (application_name)
|
||||||
@ -2624,12 +2623,11 @@ pgstat_beshutdown_hook(int code, Datum arg)
|
|||||||
* before and after. We use a volatile pointer here to ensure the
|
* before and after. We use a volatile pointer here to ensure the
|
||||||
* compiler doesn't try to get cute.
|
* compiler doesn't try to get cute.
|
||||||
*/
|
*/
|
||||||
beentry->st_changecount++;
|
pgstat_increment_changecount_before(beentry);
|
||||||
|
|
||||||
beentry->st_procpid = 0; /* mark invalid */
|
beentry->st_procpid = 0; /* mark invalid */
|
||||||
|
|
||||||
beentry->st_changecount++;
|
pgstat_increment_changecount_after(beentry);
|
||||||
Assert((beentry->st_changecount & 1) == 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2666,7 +2664,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
|
|||||||
* non-disabled state. As our final update, change the state and
|
* non-disabled state. As our final update, change the state and
|
||||||
* clear fields we will not be updating anymore.
|
* clear fields we will not be updating anymore.
|
||||||
*/
|
*/
|
||||||
beentry->st_changecount++;
|
pgstat_increment_changecount_before(beentry);
|
||||||
beentry->st_state = STATE_DISABLED;
|
beentry->st_state = STATE_DISABLED;
|
||||||
beentry->st_state_start_timestamp = 0;
|
beentry->st_state_start_timestamp = 0;
|
||||||
beentry->st_activity[0] = '\0';
|
beentry->st_activity[0] = '\0';
|
||||||
@ -2674,8 +2672,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
|
|||||||
/* st_xact_start_timestamp and st_waiting are also disabled */
|
/* st_xact_start_timestamp and st_waiting are also disabled */
|
||||||
beentry->st_xact_start_timestamp = 0;
|
beentry->st_xact_start_timestamp = 0;
|
||||||
beentry->st_waiting = false;
|
beentry->st_waiting = false;
|
||||||
beentry->st_changecount++;
|
pgstat_increment_changecount_after(beentry);
|
||||||
Assert((beentry->st_changecount & 1) == 0);
|
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -2695,7 +2692,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
|
|||||||
/*
|
/*
|
||||||
* Now update the status entry
|
* Now update the status entry
|
||||||
*/
|
*/
|
||||||
beentry->st_changecount++;
|
pgstat_increment_changecount_before(beentry);
|
||||||
|
|
||||||
beentry->st_state = state;
|
beentry->st_state = state;
|
||||||
beentry->st_state_start_timestamp = current_timestamp;
|
beentry->st_state_start_timestamp = current_timestamp;
|
||||||
@ -2707,8 +2704,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
|
|||||||
beentry->st_activity_start_timestamp = start_timestamp;
|
beentry->st_activity_start_timestamp = start_timestamp;
|
||||||
}
|
}
|
||||||
|
|
||||||
beentry->st_changecount++;
|
pgstat_increment_changecount_after(beentry);
|
||||||
Assert((beentry->st_changecount & 1) == 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------
|
/* ----------
|
||||||
@ -2734,13 +2730,12 @@ pgstat_report_appname(const char *appname)
|
|||||||
* st_changecount before and after. We use a volatile pointer here to
|
* st_changecount before and after. We use a volatile pointer here to
|
||||||
* ensure the compiler doesn't try to get cute.
|
* ensure the compiler doesn't try to get cute.
|
||||||
*/
|
*/
|
||||||
beentry->st_changecount++;
|
pgstat_increment_changecount_before(beentry);
|
||||||
|
|
||||||
memcpy((char *) beentry->st_appname, appname, len);
|
memcpy((char *) beentry->st_appname, appname, len);
|
||||||
beentry->st_appname[len] = '\0';
|
beentry->st_appname[len] = '\0';
|
||||||
|
|
||||||
beentry->st_changecount++;
|
pgstat_increment_changecount_after(beentry);
|
||||||
Assert((beentry->st_changecount & 1) == 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2760,10 +2755,9 @@ pgstat_report_xact_timestamp(TimestampTz tstamp)
|
|||||||
* st_changecount before and after. We use a volatile pointer here to
|
* st_changecount before and after. We use a volatile pointer here to
|
||||||
* ensure the compiler doesn't try to get cute.
|
* ensure the compiler doesn't try to get cute.
|
||||||
*/
|
*/
|
||||||
beentry->st_changecount++;
|
pgstat_increment_changecount_before(beentry);
|
||||||
beentry->st_xact_start_timestamp = tstamp;
|
beentry->st_xact_start_timestamp = tstamp;
|
||||||
beentry->st_changecount++;
|
pgstat_increment_changecount_after(beentry);
|
||||||
Assert((beentry->st_changecount & 1) == 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------
|
/* ----------
|
||||||
@ -2839,7 +2833,10 @@ pgstat_read_current_status(void)
|
|||||||
*/
|
*/
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
int save_changecount = beentry->st_changecount;
|
int before_changecount;
|
||||||
|
int after_changecount;
|
||||||
|
|
||||||
|
pgstat_save_changecount_before(beentry, before_changecount);
|
||||||
|
|
||||||
localentry->backendStatus.st_procpid = beentry->st_procpid;
|
localentry->backendStatus.st_procpid = beentry->st_procpid;
|
||||||
if (localentry->backendStatus.st_procpid > 0)
|
if (localentry->backendStatus.st_procpid > 0)
|
||||||
@ -2856,8 +2853,9 @@ pgstat_read_current_status(void)
|
|||||||
localentry->backendStatus.st_activity = localactivity;
|
localentry->backendStatus.st_activity = localactivity;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (save_changecount == beentry->st_changecount &&
|
pgstat_save_changecount_after(beentry, after_changecount);
|
||||||
(save_changecount & 1) == 0)
|
if (before_changecount == after_changecount &&
|
||||||
|
(before_changecount & 1) == 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Make sure we can break out of loop if stuck... */
|
/* Make sure we can break out of loop if stuck... */
|
||||||
@ -2927,12 +2925,17 @@ pgstat_get_backend_current_activity(int pid, bool checkUser)
|
|||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
int save_changecount = vbeentry->st_changecount;
|
int before_changecount;
|
||||||
|
int after_changecount;
|
||||||
|
|
||||||
|
pgstat_save_changecount_before(vbeentry, before_changecount);
|
||||||
|
|
||||||
found = (vbeentry->st_procpid == pid);
|
found = (vbeentry->st_procpid == pid);
|
||||||
|
|
||||||
if (save_changecount == vbeentry->st_changecount &&
|
pgstat_save_changecount_after(vbeentry, after_changecount);
|
||||||
(save_changecount & 1) == 0)
|
|
||||||
|
if (before_changecount == after_changecount &&
|
||||||
|
(before_changecount & 1) == 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Make sure we can break out of loop if stuck... */
|
/* Make sure we can break out of loop if stuck... */
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include "libpq/pqcomm.h"
|
#include "libpq/pqcomm.h"
|
||||||
#include "portability/instr_time.h"
|
#include "portability/instr_time.h"
|
||||||
#include "postmaster/pgarch.h"
|
#include "postmaster/pgarch.h"
|
||||||
|
#include "storage/barrier.h"
|
||||||
#include "utils/hsearch.h"
|
#include "utils/hsearch.h"
|
||||||
#include "utils/relcache.h"
|
#include "utils/relcache.h"
|
||||||
|
|
||||||
@ -714,6 +715,12 @@ typedef struct PgBackendStatus
|
|||||||
* st_changecount again. If the value hasn't changed, and if it's even,
|
* st_changecount again. If the value hasn't changed, and if it's even,
|
||||||
* the copy is valid; otherwise start over. This makes updates cheap
|
* the copy is valid; otherwise start over. This makes updates cheap
|
||||||
* while reads are potentially expensive, but that's the tradeoff we want.
|
* while reads are potentially expensive, but that's the tradeoff we want.
|
||||||
|
*
|
||||||
|
* The above protocol needs the memory barriers to ensure that
|
||||||
|
* the apparent order of execution is as it desires. Otherwise,
|
||||||
|
* for example, the CPU might rearrange the code so that st_changecount
|
||||||
|
* is incremented twice before the modification on a machine with
|
||||||
|
* weak memory ordering. This surprising result can lead to bugs.
|
||||||
*/
|
*/
|
||||||
int st_changecount;
|
int st_changecount;
|
||||||
|
|
||||||
@ -745,6 +752,43 @@ typedef struct PgBackendStatus
|
|||||||
char *st_activity;
|
char *st_activity;
|
||||||
} PgBackendStatus;
|
} PgBackendStatus;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Macros to load and store st_changecount with the memory barriers.
|
||||||
|
*
|
||||||
|
* pgstat_increment_changecount_before() and
|
||||||
|
* pgstat_increment_changecount_after() need to be called before and after
|
||||||
|
* PgBackendStatus entries are modified, respectively. This makes sure that
|
||||||
|
* st_changecount is incremented around the modification.
|
||||||
|
*
|
||||||
|
* Also pgstat_save_changecount_before() and pgstat_save_changecount_after()
|
||||||
|
* need to be called before and after PgBackendStatus entries are copied into
|
||||||
|
* private memory, respectively.
|
||||||
|
*/
|
||||||
|
#define pgstat_increment_changecount_before(beentry) \
|
||||||
|
do { \
|
||||||
|
beentry->st_changecount++; \
|
||||||
|
pg_write_barrier(); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define pgstat_increment_changecount_after(beentry) \
|
||||||
|
do { \
|
||||||
|
pg_write_barrier(); \
|
||||||
|
beentry->st_changecount++; \
|
||||||
|
Assert((beentry->st_changecount & 1) == 0); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define pgstat_save_changecount_before(beentry, save_changecount) \
|
||||||
|
do { \
|
||||||
|
save_changecount = beentry->st_changecount; \
|
||||||
|
pg_read_barrier(); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define pgstat_save_changecount_after(beentry, save_changecount) \
|
||||||
|
do { \
|
||||||
|
pg_read_barrier(); \
|
||||||
|
save_changecount = beentry->st_changecount; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
/* ----------
|
/* ----------
|
||||||
* LocalPgBackendStatus
|
* LocalPgBackendStatus
|
||||||
*
|
*
|
||||||
|
Loading…
Reference in New Issue
Block a user