diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml
index 00f0c459ac..e706ee271c 100644
--- a/doc/src/sgml/wal.sgml
+++ b/doc/src/sgml/wal.sgml
@@ -1,4 +1,4 @@
-
+
Write-Ahead Logging (WAL)
@@ -295,10 +295,13 @@
record to the log with LogInsert but before
performing a LogFlush. This delay allows other
backends to add their commit records to the log so as to have all
- of them flushed with a single log sync. Unfortunately, this
- mechanism is not fully implemented at release 7.1, so there is at
- present usually no benefit to be gained from increasing this parameter
- above its default value of zero.
+ of them flushed with a single log sync. No sleep will occur if fsync
+ is not enabled or if fewer than COMMIT_SIBLINGS
+ other backends are not currently in active transactions; this avoids
+ sleeping when it's unlikely that any other backend will commit soon.
+ Note that on most platforms, the resolution of a sleep request is
+ ten milliseconds, so that any nonzero COMMIT_DELAY
+ setting between 1 and 10000 microseconds will have the same effect.
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 37eee5ebfd..0af2582658 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.97 2001/02/18 04:50:43 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.98 2001/02/26 00:50:07 tgl Exp $
*
* NOTES
* Transaction aborts can now occur two ways:
@@ -157,6 +157,7 @@
#include
#include "access/nbtree.h"
+#include "access/xact.h"
#include "catalog/heap.h"
#include "catalog/index.h"
#include "commands/async.h"
@@ -177,8 +178,6 @@
extern bool SharedBufferChanged;
-void RecordTransactionCommit(void);
-
static void AbortTransaction(void);
static void AtAbort_Cache(void);
static void AtAbort_Locks(void);
@@ -216,12 +215,14 @@ TransactionStateData CurrentTransactionStateData = {
TransactionState CurrentTransactionState = &CurrentTransactionStateData;
+/*
+ * User-tweakable parameters
+ */
int DefaultXactIsoLevel = XACT_READ_COMMITTED;
int XactIsoLevel;
-#include "access/xlogutils.h"
-
-int CommitDelay = 0; /* in microseconds */
+int CommitDelay = 0; /* precommit delay in microseconds */
+int CommitSiblings = 5; /* number of concurrent xacts needed to sleep */
static void (*_RollbackFunc)(void*) = NULL;
static void *_RollbackData = NULL;
@@ -687,10 +688,15 @@ RecordTransactionCommit()
* Sleep before commit! So we can flush more than one
* commit records per single fsync. (The idea is some other
* backend may do the XLogFlush while we're sleeping. This
- * needs work however, because on most Unixen, the minimum
+ * needs work still, because on most Unixen, the minimum
* select() delay is 10msec or more, which is way too long.)
+ *
+ * We do not sleep if enableFsync is not turned on, nor if there
+ * are fewer than CommitSiblings other backends with active
+ * transactions.
*/
- if (CommitDelay > 0)
+ if (CommitDelay > 0 && enableFsync &&
+ CountActiveBackends() >= CommitSiblings)
{
struct timeval delay;
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index fbc4223034..fc861aba12 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.54 2001/02/18 04:39:42 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.55 2001/02/26 00:50:07 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -2096,8 +2096,6 @@ ShutdownXLOG()
elog(LOG, "database system is shut down");
}
-extern XLogRecPtr GetUndoRecPtr(void);
-
void
CreateCheckPoint(bool shutdown)
{
diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c
index 2907fca1a1..44b8d11625 100644
--- a/src/backend/storage/ipc/sinval.c
+++ b/src/backend/storage/ipc/sinval.c
@@ -8,16 +8,14 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.25 2001/01/24 19:43:07 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.26 2001/02/26 00:50:07 tgl Exp $
*
*-------------------------------------------------------------------------
*/
-/* #define INVALIDDEBUG 1 */
+#include "postgres.h"
#include
-#include "postgres.h"
-
#include "storage/backendid.h"
#include "storage/proc.h"
#include "storage/sinval.h"
@@ -347,11 +345,54 @@ GetSnapshotData(bool serializable)
return snapshot;
}
+/*
+ * CountActiveBackends --- count backends (other than myself) that are in
+ * active transactions. This is used as a heuristic to decide if
+ * a pre-XLOG-flush delay is worthwhile during commit.
+ *
+ * An active transaction is something that has written at least one XLOG
+ * record; read-only transactions don't count. Also, do not count backends
+ * that are blocked waiting for locks, since they are not going to get to
+ * run until someone else commits.
+ */
+int
+CountActiveBackends(void)
+{
+ SISeg *segP = shmInvalBuffer;
+ ProcState *stateP = segP->procState;
+ int count = 0;
+ int index;
+
+ /*
+ * Note: for speed, we don't acquire SInvalLock. This is a little bit
+ * bogus, but since we are only testing xrecoff for zero or nonzero,
+ * it should be OK. The result is only used for heuristic purposes
+ * anyway...
+ */
+ for (index = 0; index < segP->lastBackend; index++)
+ {
+ SHMEM_OFFSET pOffset = stateP[index].procStruct;
+
+ if (pOffset != INVALID_OFFSET)
+ {
+ PROC *proc = (PROC *) MAKE_PTR(pOffset);
+
+ if (proc == MyProc)
+ continue; /* do not count myself */
+ if (proc->logRec.xrecoff == 0)
+ continue; /* do not count if not in a transaction */
+ if (proc->waitLock != NULL)
+ continue; /* do not count if blocked on a lock */
+ count++;
+ }
+ }
+
+ return count;
+}
+
/*
* GetUndoRecPtr -- returns oldest PROC->logRec.
*/
-XLogRecPtr GetUndoRecPtr(void);
-
XLogRecPtr
GetUndoRecPtr(void)
{
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 8c89fa56af..b2853917ac 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -4,7 +4,7 @@
* Support for grand unified configuration scheme, including SET
* command, configuration file, and command line options.
*
- * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.30 2001/02/18 04:50:43 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.31 2001/02/26 00:50:07 tgl Exp $
*
* Copyright 2000 by PostgreSQL Global Development Group
* Written by Peter Eisentraut .
@@ -41,6 +41,7 @@ extern int XLOGbuffers;
extern int XLOGfiles;
extern int XLOG_DEBUG;
extern int CommitDelay;
+extern int CommitSiblings;
extern bool FixBTree;
@@ -181,7 +182,7 @@ ConfigureNamesBool[] =
{"tcpip_socket", PGC_POSTMASTER, &NetServer, false},
{"ssl", PGC_POSTMASTER, &EnableSSL, false},
- {"fsync", PGC_USERSET, &enableFsync, true},
+ {"fsync", PGC_SIGHUP, &enableFsync, true},
{"silent_mode", PGC_POSTMASTER, &SilentMode, false},
{"log_connections", PGC_SIGHUP, &Log_connections, false},
@@ -279,7 +280,7 @@ ConfigureNamesInt[] =
0777, 0000, 0777},
{"checkpoint_timeout", PGC_POSTMASTER, &CheckPointTimeout,
- 300, 30, 1800},
+ 300, 30, 3600},
{"wal_buffers", PGC_POSTMASTER, &XLOGbuffers,
8, 4, INT_MAX},
@@ -293,6 +294,9 @@ ConfigureNamesInt[] =
{"commit_delay", PGC_USERSET, &CommitDelay,
0, 0, 100000},
+ {"commit_siblings", PGC_USERSET, &CommitSiblings,
+ 5, 1, 1000},
+
{NULL, 0, NULL, 0, 0, 0}
};
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 7670fdffab..f599d97cff 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -109,7 +109,8 @@
#wal_files = 0 # range 0-64
#wal_debug = 0 # range 0-16
#commit_delay = 0 # range 0-100000
-#checkpoint_timeout = 300 # range 30-1800
+#commit_siblings = 5 # range 1-1000
+#checkpoint_timeout = 300 # in seconds, range 30-3600
#
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 8e73b57b66..8528310b18 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -3,7 +3,7 @@
*
* Copyright 2000 by PostgreSQL Global Development Group
*
- * $Header: /cvsroot/pgsql/src/bin/psql/tab-complete.c,v 1.26 2001/02/10 02:31:28 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/bin/psql/tab-complete.c,v 1.27 2001/02/26 00:50:07 tgl Exp $
*/
/*----------------------------------------------------------------------
@@ -241,6 +241,7 @@ psql_completion(char *text, int start, int end)
"debug_level",
"max_expr_depth",
"commit_delay",
+ "commit_siblings",
"effective_cache_size",
"random_page_cost",
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 47e14c3a82..c17bf32cc5 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -3,7 +3,7 @@
*
* PostgreSQL transaction log manager
*
- * $Header: /cvsroot/pgsql/src/include/access/xlog.h,v 1.17 2001/01/14 05:08:16 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/include/access/xlog.h,v 1.18 2001/02/26 00:50:07 tgl Exp $
*/
#ifndef XLOG_H
#define XLOG_H
@@ -146,4 +146,9 @@ extern void ShutdownXLOG(void);
extern void CreateCheckPoint(bool shutdown);
extern void SetThisStartUpID(void);
+/* in storage/ipc/sinval.c, but don't want to declare in sinval.h because
+ * we'd have to include xlog.h into that ...
+ */
+extern XLogRecPtr GetUndoRecPtr(void);
+
#endif /* XLOG_H */
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 4dd5a8c2a6..6290e83d50 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: proc.h,v 1.39 2001/01/25 03:31:16 tgl Exp $
+ * $Id: proc.h,v 1.40 2001/02/26 00:50:08 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -50,6 +50,10 @@ struct proc
* were starting our xact: vacuum must not
* remove tuples deleted by xid >= xmin ! */
+ /* XLOG location of first XLOG record written by this backend's current
+ * transaction. If backend is not in a transaction or hasn't yet modified
+ * anything, logRec.xrecoff is zero.
+ */
XLogRecPtr logRec;
/* Info about lock the process is currently waiting for, if any. */
diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h
index 1b04bcec90..93a10e72fe 100644
--- a/src/include/storage/sinval.h
+++ b/src/include/storage/sinval.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: sinval.h,v 1.17 2001/01/24 19:43:28 momjian Exp $
+ * $Id: sinval.h,v 1.18 2001/02/26 00:50:08 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -30,6 +30,6 @@ extern void InvalidateSharedInvalid(void (*invalFunction) (),
extern bool DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself);
extern bool TransactionIdIsInProgress(TransactionId xid);
extern void GetXmaxRecent(TransactionId *XmaxRecent);
-
+extern int CountActiveBackends(void);
#endif /* SINVAL_H */