mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-09 08:10:09 +08:00
Fix signal handling in logical replication workers
The logical replication worker processes now use the normal die() handler for SIGTERM and CHECK_FOR_INTERRUPTS() instead of custom code. One problem before was that the apply worker would not exit promptly when a subscription was dropped, which could lead to deadlocks. Author: Petr Jelinek <petr.jelinek@2ndquadrant.com> Reported-by: Masahiko Sawada <sawada.mshk@gmail.com>
This commit is contained in:
parent
acbd8375e9
commit
9fcf670c2e
@ -80,8 +80,8 @@ static void logicalrep_worker_detach(void);
|
||||
static void logicalrep_worker_cleanup(LogicalRepWorker *worker);
|
||||
|
||||
/* Flags set by signal handlers */
|
||||
volatile sig_atomic_t got_SIGHUP = false;
|
||||
volatile sig_atomic_t got_SIGTERM = false;
|
||||
static volatile sig_atomic_t got_SIGHUP = false;
|
||||
static volatile sig_atomic_t got_SIGTERM = false;
|
||||
|
||||
static bool on_commit_launcher_wakeup = false;
|
||||
|
||||
@ -624,8 +624,8 @@ logicalrep_worker_onexit(int code, Datum arg)
|
||||
}
|
||||
|
||||
/* SIGTERM: set flag to exit at next convenient time */
|
||||
void
|
||||
logicalrep_worker_sigterm(SIGNAL_ARGS)
|
||||
static void
|
||||
logicalrep_launcher_sigterm(SIGNAL_ARGS)
|
||||
{
|
||||
int save_errno = errno;
|
||||
|
||||
@ -638,8 +638,8 @@ logicalrep_worker_sigterm(SIGNAL_ARGS)
|
||||
}
|
||||
|
||||
/* SIGHUP: set flag to reload configuration at next convenient time */
|
||||
void
|
||||
logicalrep_worker_sighup(SIGNAL_ARGS)
|
||||
static void
|
||||
logicalrep_launcher_sighup(SIGNAL_ARGS)
|
||||
{
|
||||
int save_errno = errno;
|
||||
|
||||
@ -799,8 +799,8 @@ ApplyLauncherMain(Datum main_arg)
|
||||
before_shmem_exit(logicalrep_launcher_onexit, (Datum) 0);
|
||||
|
||||
/* Establish signal handlers. */
|
||||
pqsignal(SIGHUP, logicalrep_worker_sighup);
|
||||
pqsignal(SIGTERM, logicalrep_worker_sigterm);
|
||||
pqsignal(SIGHUP, logicalrep_launcher_sighup);
|
||||
pqsignal(SIGTERM, logicalrep_launcher_sigterm);
|
||||
BackgroundWorkerUnblockSignals();
|
||||
|
||||
/* Make it easy to identify our processes. */
|
||||
|
@ -154,10 +154,12 @@ wait_for_sync_status_change(Oid relid, char origstate)
|
||||
int rc;
|
||||
char state = origstate;
|
||||
|
||||
while (!got_SIGTERM)
|
||||
for (;;)
|
||||
{
|
||||
LogicalRepWorker *worker;
|
||||
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
|
||||
worker = logicalrep_worker_find(MyLogicalRepWorker->subid,
|
||||
relid, false);
|
||||
@ -525,7 +527,7 @@ copy_read_data(void *outbuf, int minread, int maxread)
|
||||
bytesread += avail;
|
||||
}
|
||||
|
||||
while (!got_SIGTERM && maxread > 0 && bytesread < minread)
|
||||
while (maxread > 0 && bytesread < minread)
|
||||
{
|
||||
pgsocket fd = PGINVALID_SOCKET;
|
||||
int rc;
|
||||
@ -579,10 +581,6 @@ copy_read_data(void *outbuf, int minread, int maxread)
|
||||
ResetLatch(&MyProc->procLatch);
|
||||
}
|
||||
|
||||
/* Check for exit condition. */
|
||||
if (got_SIGTERM)
|
||||
proc_exit(0);
|
||||
|
||||
return bytesread;
|
||||
}
|
||||
|
||||
|
@ -72,6 +72,8 @@
|
||||
#include "storage/proc.h"
|
||||
#include "storage/procarray.h"
|
||||
|
||||
#include "tcop/tcopprot.h"
|
||||
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/catcache.h"
|
||||
#include "utils/datum.h"
|
||||
@ -118,6 +120,9 @@ static void store_flush_position(XLogRecPtr remote_lsn);
|
||||
|
||||
static void reread_subscription(void);
|
||||
|
||||
/* Flags set by signal handlers */
|
||||
static volatile sig_atomic_t got_SIGHUP = false;
|
||||
|
||||
/*
|
||||
* Should this worker apply changes for given relation.
|
||||
*
|
||||
@ -1005,7 +1010,7 @@ LogicalRepApplyLoop(XLogRecPtr last_received)
|
||||
/* mark as idle, before starting to loop */
|
||||
pgstat_report_activity(STATE_IDLE, NULL);
|
||||
|
||||
while (!got_SIGTERM)
|
||||
for (;;)
|
||||
{
|
||||
pgsocket fd = PGINVALID_SOCKET;
|
||||
int rc;
|
||||
@ -1015,6 +1020,8 @@ LogicalRepApplyLoop(XLogRecPtr last_received)
|
||||
TimestampTz last_recv_timestamp = GetCurrentTimestamp();
|
||||
bool ping_sent = false;
|
||||
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
MemoryContextSwitchTo(ApplyMessageContext);
|
||||
|
||||
len = walrcv_receive(wrconn, &buf, &fd);
|
||||
@ -1437,6 +1444,19 @@ subscription_change_cb(Datum arg, int cacheid, uint32 hashvalue)
|
||||
MySubscriptionValid = false;
|
||||
}
|
||||
|
||||
/* SIGHUP: set flag to reload configuration at next convenient time */
|
||||
static void
|
||||
logicalrep_worker_sighup(SIGNAL_ARGS)
|
||||
{
|
||||
int save_errno = errno;
|
||||
|
||||
got_SIGHUP = true;
|
||||
|
||||
/* Waken anything waiting on the process latch */
|
||||
SetLatch(MyLatch);
|
||||
|
||||
errno = save_errno;
|
||||
}
|
||||
|
||||
/* Logical Replication Apply worker entry point */
|
||||
void
|
||||
@ -1454,7 +1474,7 @@ ApplyWorkerMain(Datum main_arg)
|
||||
|
||||
/* Setup signal handling */
|
||||
pqsignal(SIGHUP, logicalrep_worker_sighup);
|
||||
pqsignal(SIGTERM, logicalrep_worker_sigterm);
|
||||
pqsignal(SIGTERM, die);
|
||||
BackgroundWorkerUnblockSignals();
|
||||
|
||||
/* Initialise stats to a sanish value */
|
||||
@ -1604,6 +1624,14 @@ ApplyWorkerMain(Datum main_arg)
|
||||
/* Run the main loop. */
|
||||
LogicalRepApplyLoop(origin_startpos);
|
||||
|
||||
/* We should only get here if we received SIGTERM */
|
||||
proc_exit(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Is current process a logical replication worker?
|
||||
*/
|
||||
bool
|
||||
IsLogicalWorker(void)
|
||||
{
|
||||
return MyLogicalRepWorker != NULL;
|
||||
}
|
||||
|
@ -55,6 +55,7 @@
|
||||
#include "pg_getopt.h"
|
||||
#include "postmaster/autovacuum.h"
|
||||
#include "postmaster/postmaster.h"
|
||||
#include "replication/logicalworker.h"
|
||||
#include "replication/slot.h"
|
||||
#include "replication/walsender.h"
|
||||
#include "rewrite/rewriteHandler.h"
|
||||
@ -2845,6 +2846,10 @@ ProcessInterrupts(void)
|
||||
ereport(FATAL,
|
||||
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
||||
errmsg("terminating autovacuum process due to administrator command")));
|
||||
else if (IsLogicalWorker())
|
||||
ereport(FATAL,
|
||||
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
||||
errmsg("terminating logical replication worker due to administrator command")));
|
||||
else if (RecoveryConflictPending && RecoveryConflictRetryable)
|
||||
{
|
||||
pgstat_report_recovery_conflict(RecoveryConflictReason);
|
||||
|
@ -14,4 +14,6 @@
|
||||
|
||||
extern void ApplyWorkerMain(Datum main_arg);
|
||||
|
||||
extern bool IsLogicalWorker(void);
|
||||
|
||||
#endif /* LOGICALWORKER_H */
|
||||
|
@ -67,8 +67,6 @@ extern Subscription *MySubscription;
|
||||
extern LogicalRepWorker *MyLogicalRepWorker;
|
||||
|
||||
extern bool in_remote_transaction;
|
||||
extern volatile sig_atomic_t got_SIGHUP;
|
||||
extern volatile sig_atomic_t got_SIGTERM;
|
||||
|
||||
extern void logicalrep_worker_attach(int slot);
|
||||
extern LogicalRepWorker *logicalrep_worker_find(Oid subid, Oid relid,
|
||||
@ -81,8 +79,6 @@ extern void logicalrep_worker_wakeup_ptr(LogicalRepWorker *worker);
|
||||
|
||||
extern int logicalrep_sync_worker_count(Oid subid);
|
||||
|
||||
extern void logicalrep_worker_sighup(SIGNAL_ARGS);
|
||||
extern void logicalrep_worker_sigterm(SIGNAL_ARGS);
|
||||
extern char *LogicalRepSyncTableStart(XLogRecPtr *origin_startpos);
|
||||
void process_syncing_tables(XLogRecPtr current_lsn);
|
||||
void invalidate_syncing_table_states(Datum arg, int cacheid,
|
||||
|
Loading…
Reference in New Issue
Block a user