diff --git a/src/backend/access/heap/Makefile b/src/backend/access/heap/Makefile
index 3a712191a49..ac2401232bb 100644
--- a/src/backend/access/heap/Makefile
+++ b/src/backend/access/heap/Makefile
@@ -4,7 +4,7 @@
 #    Makefile for access/heap
 #
 # IDENTIFICATION
-#    $PostgreSQL: pgsql/src/backend/access/heap/Makefile,v 1.15 2007/04/08 01:26:27 tgl Exp $
+#    $PostgreSQL: pgsql/src/backend/access/heap/Makefile,v 1.16 2007/06/08 18:23:52 tgl Exp $
 #
 #-------------------------------------------------------------------------
 
@@ -12,7 +12,7 @@ subdir = src/backend/access/heap
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = heapam.o hio.o rewriteheap.o tuptoaster.o
+OBJS = heapam.o hio.o rewriteheap.o syncscan.o tuptoaster.o
 
 all: SUBSYS.o
 
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 0b20e5e9a8d..a0b561c209e 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.234 2007/05/30 20:11:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.235 2007/06/08 18:23:52 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -78,29 +78,44 @@ initscan(HeapScanDesc scan, ScanKey key)
 	 * Determine the number of blocks we have to scan.
 	 *
 	 * It is sufficient to do this once at scan start, since any tuples added
-	 * while the scan is in progress will be invisible to my transaction
-	 * anyway...
+	 * while the scan is in progress will be invisible to my snapshot
+	 * anyway.  (That is not true when using a non-MVCC snapshot.  However,
+	 * we couldn't guarantee to return tuples added after scan start anyway,
+	 * since they might go into pages we already scanned.  To guarantee
+	 * consistent results for a non-MVCC snapshot, the caller must hold some
+	 * higher-level lock that ensures the interesting tuple(s) won't change.)
 	 */
 	scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);
 
 	/*
 	 * If the table is large relative to NBuffers, use a bulk-read access
-	 * strategy, else use the default random-access strategy.  During a
-	 * rescan, don't make a new strategy object if we don't have to.
+	 * strategy and enable synchronized scanning (see syncscan.c).  Although
+	 * the thresholds for these features could be different, we make them the
+	 * same so that there are only two behaviors to tune rather than four.
+	 *
+	 * During a rescan, don't make a new strategy object if we don't have to.
 	 */
 	if (scan->rs_nblocks > NBuffers / 4 &&
 		!scan->rs_rd->rd_istemp)
 	{
 		if (scan->rs_strategy == NULL)
 			scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD);
+
+		scan->rs_syncscan = true;
+		scan->rs_startblock = ss_get_location(scan->rs_rd, scan->rs_nblocks);
 	}
 	else
 	{
 		if (scan->rs_strategy != NULL)
 			FreeAccessStrategy(scan->rs_strategy);
 		scan->rs_strategy = NULL;
+
+		scan->rs_syncscan = false;
+		scan->rs_startblock = 0;
 	}
 
+	/* rs_pageatatime was set when the snapshot was filled in */
+
 	scan->rs_inited = false;
 	scan->rs_ctup.t_data = NULL;
 	ItemPointerSetInvalid(&scan->rs_ctup.t_self);
@@ -229,6 +244,7 @@ heapgettup(HeapScanDesc scan,
 	Snapshot	snapshot = scan->rs_snapshot;
 	bool		backward = ScanDirectionIsBackward(dir);
 	BlockNumber page;
+	bool		finished;
 	Page		dp;
 	int			lines;
 	OffsetNumber lineoff;
@@ -251,7 +267,7 @@ heapgettup(HeapScanDesc scan,
 				tuple->t_data = NULL;
 				return;
 			}
-			page = 0;			/* first page */
+			page = scan->rs_startblock;			/* first page */
 			heapgetpage(scan, page);
 			lineoff = FirstOffsetNumber;		/* first offnum */
 			scan->rs_inited = true;
@@ -285,7 +301,18 @@ heapgettup(HeapScanDesc scan,
 				tuple->t_data = NULL;
 				return;
 			}
-			page = scan->rs_nblocks - 1;		/* final page */
+			/*
+			 * Disable reporting to syncscan logic in a backwards scan; it's
+			 * not very likely anyone else is doing the same thing at the same
+			 * time, and much more likely that we'll just bollix things for
+			 * forward scanners.
+			 */
+			scan->rs_syncscan = false;
+			/* start from last page of the scan */ 
+			if (scan->rs_startblock > 0)
+				page = scan->rs_startblock - 1;
+			else
+				page = scan->rs_nblocks - 1;
 			heapgetpage(scan, page);
 		}
 		else
@@ -397,10 +424,43 @@ heapgettup(HeapScanDesc scan,
 		 */
 		LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
 
+		/*
+		 * advance to next/prior page and detect end of scan
+		 */
+		if (backward)
+		{
+			finished = (page == scan->rs_startblock);
+			if (page == 0)
+				page = scan->rs_nblocks;
+			page--;
+		}
+		else
+		{
+			page++;
+			if (page >= scan->rs_nblocks)
+				page = 0;
+			finished = (page == scan->rs_startblock);
+
+			/*
+			 * Report our new scan position for synchronization purposes.
+			 * We don't do that when moving backwards, however. That would
+			 * just mess up any other forward-moving scanners.
+			 *
+			 * Note: we do this before checking for end of scan so that the
+			 * final state of the position hint is back at the start of the
+			 * rel.  That's not strictly necessary, but otherwise when you run
+			 * the same query multiple times the starting position would shift
+			 * a little bit backwards on every invocation, which is confusing.
+			 * We don't guarantee any specific ordering in general, though.
+			 */
+			if (scan->rs_syncscan)
+				ss_report_location(scan->rs_rd, page);
+		}
+
 		/*
 		 * return NULL if we've exhausted all the pages
 		 */
-		if (backward ? (page == 0) : (page + 1 >= scan->rs_nblocks))
+		if (finished)
 		{
 			if (BufferIsValid(scan->rs_cbuf))
 				ReleaseBuffer(scan->rs_cbuf);
@@ -411,8 +471,6 @@ heapgettup(HeapScanDesc scan,
 			return;
 		}
 
-		page = backward ? (page - 1) : (page + 1);
-
 		heapgetpage(scan, page);
 
 		LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
@@ -455,6 +513,7 @@ heapgettup_pagemode(HeapScanDesc scan,
 	HeapTuple	tuple = &(scan->rs_ctup);
 	bool		backward = ScanDirectionIsBackward(dir);
 	BlockNumber page;
+	bool		finished;
 	Page		dp;
 	int			lines;
 	int			lineindex;
@@ -478,7 +537,7 @@ heapgettup_pagemode(HeapScanDesc scan,
 				tuple->t_data = NULL;
 				return;
 			}
-			page = 0;			/* first page */
+			page = scan->rs_startblock;			/* first page */
 			heapgetpage(scan, page);
 			lineindex = 0;
 			scan->rs_inited = true;
@@ -509,7 +568,18 @@ heapgettup_pagemode(HeapScanDesc scan,
 				tuple->t_data = NULL;
 				return;
 			}
-			page = scan->rs_nblocks - 1;		/* final page */
+			/*
+			 * Disable reporting to syncscan logic in a backwards scan; it's
+			 * not very likely anyone else is doing the same thing at the same
+			 * time, and much more likely that we'll just bollix things for
+			 * forward scanners.
+			 */
+			scan->rs_syncscan = false;
+			/* start from last page of the scan */ 
+			if (scan->rs_startblock > 0)
+				page = scan->rs_startblock - 1;
+			else
+				page = scan->rs_nblocks - 1;
 			heapgetpage(scan, page);
 		}
 		else
@@ -616,11 +686,40 @@ heapgettup_pagemode(HeapScanDesc scan,
 		 * if we get here, it means we've exhausted the items on this page and
 		 * it's time to move to the next.
 		 */
+		if (backward)
+		{
+			finished = (page == scan->rs_startblock);
+			if (page == 0)
+				page = scan->rs_nblocks;
+			page--;
+		}
+		else
+		{
+			page++;
+			if (page >= scan->rs_nblocks)
+				page = 0;
+			finished = (page == scan->rs_startblock);
+
+			/*
+			 * Report our new scan position for synchronization purposes.
+			 * We don't do that when moving backwards, however. That would
+			 * just mess up any other forward-moving scanners.
+			 *
+			 * Note: we do this before checking for end of scan so that the
+			 * final state of the position hint is back at the start of the
+			 * rel.  That's not strictly necessary, but otherwise when you run
+			 * the same query multiple times the starting position would shift
+			 * a little bit backwards on every invocation, which is confusing.
+			 * We don't guarantee any specific ordering in general, though.
+			 */
+			if (scan->rs_syncscan)
+				ss_report_location(scan->rs_rd, page);
+		}
 
 		/*
 		 * return NULL if we've exhausted all the pages
 		 */
-		if (backward ? (page == 0) : (page + 1 >= scan->rs_nblocks))
+		if (finished)
 		{
 			if (BufferIsValid(scan->rs_cbuf))
 				ReleaseBuffer(scan->rs_cbuf);
@@ -631,7 +730,6 @@ heapgettup_pagemode(HeapScanDesc scan,
 			return;
 		}
 
-		page = backward ? (page - 1) : (page + 1);
 		heapgetpage(scan, page);
 
 		dp = (Page) BufferGetPage(scan->rs_cbuf);
diff --git a/src/backend/access/heap/syncscan.c b/src/backend/access/heap/syncscan.c
new file mode 100644
index 00000000000..795efccc090
--- /dev/null
+++ b/src/backend/access/heap/syncscan.c
@@ -0,0 +1,318 @@
+/*-------------------------------------------------------------------------
+ *
+ * syncscan.c
+ *	  heap scan synchronization support
+ *
+ * When multiple backends run a sequential scan on the same table, we try
+ * to keep them synchronized to reduce the overall I/O needed.  The goal is
+ * to read each page into shared buffer cache only once, and let all backends
+ * that take part in the shared scan process the page before it falls out of
+ * the cache.
+ *
+ * Since the "leader" in a pack of backends doing a seqscan will have to wait
+ * for I/O, while the "followers" don't, there is a strong self-synchronizing
+ * effect once we can get the backends examining approximately the same part
+ * of the table at the same time.  Hence all that is really needed is to get
+ * a new backend beginning a seqscan to begin it close to where other backends
+ * are reading.  We can scan the table circularly, from block X up to the
+ * end and then from block 0 to X-1, to ensure we visit all rows while still
+ * participating in the common scan.
+ *
+ * To accomplish that, we keep track of the scan position of each table, and
+ * start new scans close to where the previous scan(s) are.  We don't try to
+ * do any extra synchronization to keep the scans together afterwards; some
+ * scans might progress much more slowly than others, for example if the
+ * results need to be transferred to the client over a slow network, and we
+ * don't want such queries to slow down others.
+ *
+ * There can realistically only be a few large sequential scans on different
+ * tables in progress at any time.  Therefore we just keep the scan positions
+ * in a small LRU list which we scan every time we need to look up or update a
+ * scan position.  The whole mechanism is only applied for tables exceeding
+ * a threshold size (but that is not the concern of this module).
+ *
+ * INTERFACE ROUTINES
+ *		ss_get_location		- return current scan location of a relation
+ *		ss_report_location	- update current scan location
+ *
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/access/heap/syncscan.c,v 1.1 2007/06/08 18:23:52 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "miscadmin.h"
+
+
+/* GUC variables */
+#ifdef TRACE_SYNCSCAN
+bool	trace_syncscan = false;
+#endif
+
+
+/*
+ * Size of the LRU list.
+ *
+ * Note: the code assumes that SYNC_SCAN_NELEM > 1.
+ *
+ * XXX: What's a good value? It should be large enough to hold the
+ * maximum number of large tables scanned simultaneously.  But a larger value
+ * means more traversing of the LRU list when starting a new scan.
+ */
+#define SYNC_SCAN_NELEM 20
+
+/*
+ * Interval between reports of the location of the current scan, in pages.
+ *
+ * Note: This should be smaller than the ring size (see buffer/freelist.c)
+ * we use for bulk reads.  Otherwise a scan joining other scans might start
+ * from a page that's no longer in the buffer cache.  This is a bit fuzzy;
+ * there's no guarantee that the new scan will read the page before it leaves
+ * the buffer cache anyway, and on the other hand the page is most likely
+ * still in the OS cache.
+ */
+#define SYNC_SCAN_REPORT_INTERVAL (128 * 1024 / BLCKSZ)
+
+
+/*
+ * The scan locations structure is essentially a doubly-linked LRU with head
+ * and tail pointer, but designed to hold a fixed maximum number of elements in
+ * fixed-size shared memory.
+ */
+typedef struct ss_scan_location_t
+{
+	RelFileNode relfilenode;	/* identity of a relation */
+	BlockNumber location;		/* last-reported location in the relation */
+} ss_scan_location_t;
+
+typedef struct ss_lru_item_t
+{
+	struct ss_lru_item_t	*prev;
+	struct ss_lru_item_t	*next;
+	ss_scan_location_t		location;
+} ss_lru_item_t;
+
+typedef struct ss_scan_locations_t
+{
+	ss_lru_item_t		*head;
+	ss_lru_item_t		*tail;
+	ss_lru_item_t		items[1]; /* SYNC_SCAN_NELEM items */
+} ss_scan_locations_t;
+
+#define SizeOfScanLocations(N) offsetof(ss_scan_locations_t, items[N])
+
+/* Pointer to struct in shared memory */
+static ss_scan_locations_t *scan_locations;
+
+/* prototypes for internal functions */
+static BlockNumber ss_search(RelFileNode relfilenode,
+							 BlockNumber location, bool set);
+
+
+/*
+ * SyncScanShmemSize --- report amount of shared memory space needed
+ */
+Size
+SyncScanShmemSize(void)
+{
+	return SizeOfScanLocations(SYNC_SCAN_NELEM);
+}
+
+/*
+ * SyncScanShmemInit --- initialize this module's shared memory
+ */
+void
+SyncScanShmemInit(void)
+{
+	int i;
+	bool found;
+
+	scan_locations = (ss_scan_locations_t *)
+		ShmemInitStruct("Sync Scan Locations List",
+						SizeOfScanLocations(SYNC_SCAN_NELEM),
+						&found);
+
+	if (!IsUnderPostmaster)
+	{
+		/* Initialize shared memory area */
+		Assert(!found);
+
+		scan_locations->head = &scan_locations->items[0];
+		scan_locations->tail = &scan_locations->items[SYNC_SCAN_NELEM - 1];
+
+		for (i = 0; i < SYNC_SCAN_NELEM; i++)
+		{
+			ss_lru_item_t *item = &scan_locations->items[i];
+
+			/*
+			 * Initialize all slots with invalid values. As scans are started,
+			 * these invalid entries will fall off the LRU list and get
+			 * replaced with real entries.
+			 */
+			item->location.relfilenode.spcNode = InvalidOid;
+			item->location.relfilenode.dbNode = InvalidOid;
+			item->location.relfilenode.relNode = InvalidOid;
+			item->location.location = InvalidBlockNumber;
+
+			item->prev = (i > 0) ?
+				(&scan_locations->items[i - 1]) : NULL;
+			item->next = (i < SYNC_SCAN_NELEM - 1) ?
+				(&scan_locations->items[i + 1]) : NULL;
+		}
+	}
+	else
+		Assert(found);
+}
+
+/*
+ * ss_search --- search the scan_locations structure for an entry with the
+ *		given relfilenode.
+ *
+ * If "set" is true, the location is updated to the given location.  If no
+ * entry for the given relfilenode is found, it will be created at the head
+ * of the list with the given location, even if "set" is false.
+ *
+ * In any case, the location after possible update is returned.
+ *
+ * Caller is responsible for having acquired suitable lock on the shared
+ * data structure.
+ */
+static BlockNumber
+ss_search(RelFileNode relfilenode, BlockNumber location, bool set)
+{
+	ss_lru_item_t	*item;
+
+	item = scan_locations->head;
+	for (;;)
+	{
+		bool match;
+
+		match = RelFileNodeEquals(item->location.relfilenode, relfilenode);
+
+		if (match || item->next == NULL)
+		{
+			/*
+			 * If we reached the end of list and no match was found,
+			 * take over the last entry
+			 */
+			if (!match)
+			{
+				item->location.relfilenode = relfilenode;
+				item->location.location = location;
+			}
+			else if (set)
+				item->location.location = location;
+
+			/* Move the entry to the front of the LRU list */
+			if (item != scan_locations->head)
+			{
+				/* unlink */
+				if (item == scan_locations->tail)
+					scan_locations->tail = item->prev;
+				item->prev->next = item->next;
+				if (item->next)
+					item->next->prev = item->prev;
+
+				/* link */
+				item->prev = NULL;
+				item->next = scan_locations->head;
+				scan_locations->head->prev = item;
+				scan_locations->head = item;
+			}
+
+			return item->location.location;
+		}
+
+		item = item->next;
+	}
+
+	/* not reached */
+}
+
+/*
+ * ss_get_location --- get the optimal starting location for scan
+ *
+ * Returns the last-reported location of a sequential scan on the
+ * relation, or 0 if no valid location is found.
+ *
+ * We expect the caller has just done RelationGetNumberOfBlocks(), and
+ * so that number is passed in rather than computing it again.  The result
+ * is guaranteed less than relnblocks (assuming that's > 0).
+ */
+BlockNumber
+ss_get_location(Relation rel, BlockNumber relnblocks)
+{
+	BlockNumber startloc;
+
+	LWLockAcquire(SyncScanLock, LW_EXCLUSIVE);
+	startloc = ss_search(rel->rd_node, 0, false);
+	LWLockRelease(SyncScanLock);
+
+	/*
+	 * If the location is not a valid block number for this scan, start at 0.
+	 *
+	 * This can happen if for instance a VACUUM truncated the table
+	 * since the location was saved.
+	 */
+	if (startloc >= relnblocks)
+		startloc = 0;
+
+#ifdef TRACE_SYNCSCAN
+	if (trace_syncscan)
+		elog(LOG,
+			 "SYNC_SCAN: start \"%s\" (size %u) at %u",
+			 RelationGetRelationName(rel), relnblocks, startloc);
+#endif
+
+	return startloc;
+}
+
+/*
+ * ss_report_location --- update the current scan location
+ *
+ * Writes an entry into the shared Sync Scan state of the form
+ * (relfilenode, blocknumber), overwriting any existing entry for the
+ * same relfilenode.
+ */
+void
+ss_report_location(Relation rel, BlockNumber location)
+{
+#ifdef TRACE_SYNCSCAN
+	if (trace_syncscan)
+	{
+		if ((location % 1024) == 0)
+			elog(LOG,
+				 "SYNC_SCAN: scanning \"%s\" at %u",
+				 RelationGetRelationName(rel), location);
+	}
+#endif
+
+	/*
+	 * To reduce lock contention, only report scan progress every N pages.
+	 * For the same reason, don't block if the lock isn't immediately
+	 * available.  Missing a few updates isn't critical, it just means that a
+	 * new scan that wants to join the pack will start a little bit behind the
+	 * head of the scan.  Hopefully the pages are still in OS cache and the
+	 * scan catches up quickly.
+	 */
+	if ((location % SYNC_SCAN_REPORT_INTERVAL) == 0)
+	{
+		if (LWLockConditionalAcquire(SyncScanLock, LW_EXCLUSIVE))
+		{
+			(void) ss_search(rel->rd_node, location, true);
+			LWLockRelease(SyncScanLock);
+		}
+#ifdef TRACE_SYNCSCAN
+		else if (trace_syncscan)
+			elog(LOG,
+				 "SYNC_SCAN: missed update for \"%s\" at %u",
+				 RelationGetRelationName(rel), location);
+#endif
+	}
+}
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index d8eec0f8231..a3e2c7c4422 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.59 2007/05/30 20:11:59 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.60 2007/06/08 18:23:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -340,6 +340,9 @@ GetAccessStrategy(BufferAccessStrategyType btype)
 	 * Select ring size to use.  See buffer/README for rationales.
 	 * (Currently all cases are the same size, but keep this code
 	 * structure for flexibility.)
+	 *
+	 * Note: if you change the ring size for BAS_BULKREAD, see also
+	 * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
 	 */
 	switch (btype)
 	{
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 0296cbbcfc4..86c54448e4c 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -8,13 +8,14 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.91 2007/02/15 23:23:23 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.92 2007/06/08 18:23:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
 #include "access/clog.h"
+#include "access/heapam.h"
 #include "access/multixact.h"
 #include "access/nbtree.h"
 #include "access/subtrans.h"
@@ -112,6 +113,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
 		size = add_size(size, BgWriterShmemSize());
 		size = add_size(size, AutoVacuumShmemSize());
 		size = add_size(size, BTreeShmemSize());
+		size = add_size(size, SyncScanShmemSize());
 #ifdef EXEC_BACKEND
 		size = add_size(size, ShmemBackendArraySize());
 #endif
@@ -216,6 +218,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
 	 * Set up other modules that need some shared memory space
 	 */
 	BTreeShmemInit();
+	SyncScanShmemInit();
 
 #ifdef EXEC_BACKEND
 
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 6e412e328a1..387c4ae1531 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -10,7 +10,7 @@
  * Written by Peter Eisentraut <peter_e@gmx.net>.
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.395 2007/06/05 21:50:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.396 2007/06/08 18:23:52 tgl Exp $
  *
  *--------------------------------------------------------------------
  */
@@ -109,6 +109,9 @@ extern bool fullPageWrites;
 #ifdef TRACE_SORT
 extern bool trace_sort;
 #endif
+#ifdef TRACE_SYNCSCAN
+extern bool trace_syncscan;
+#endif
 #ifdef DEBUG_BOUNDED_SORT
 extern bool optimize_bounded_sort;
 #endif
@@ -970,6 +973,19 @@ static struct config_bool ConfigureNamesBool[] =
 	},
 #endif
 
+#ifdef TRACE_SYNCSCAN
+	/* this is undocumented because not exposed in a standard build */
+	{
+		{"trace_syncscan", PGC_USERSET, DEVELOPER_OPTIONS,
+			gettext_noop("Generate debugging output for synchronized scanning."),
+			NULL,
+			GUC_NOT_IN_SAMPLE
+		},
+		&trace_syncscan,
+		false, NULL, NULL
+	},
+#endif
+
 #ifdef DEBUG_BOUNDED_SORT
 	/* this is undocumented because not exposed in a standard build */
 	{
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index ebb2e984c24..206159bdad7 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.124 2007/05/27 03:50:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.125 2007/06/08 18:23:53 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -112,6 +112,13 @@ extern Datum fastgetattr(HeapTuple tup, int attnum, TupleDesc tupleDesc,
 )
 
 
+typedef enum
+{
+	LockTupleShared,
+	LockTupleExclusive
+} LockTupleMode;
+
+
 /* ----------------
  *		function prototypes for heap access method
  *
@@ -120,14 +127,7 @@ extern Datum fastgetattr(HeapTuple tup, int attnum, TupleDesc tupleDesc,
  * ----------------
  */
 
-/* heapam.c */
-
-typedef enum
-{
-	LockTupleShared,
-	LockTupleExclusive
-} LockTupleMode;
-
+/* in heap/heapam.c */
 extern Relation relation_open(Oid relationId, LOCKMODE lockmode);
 extern Relation try_relation_open(Oid relationId, LOCKMODE lockmode);
 extern Relation relation_open_nowait(Oid relationId, LOCKMODE lockmode);
@@ -240,4 +240,10 @@ extern HeapTuple heap_addheader(int natts, bool withoid,
 
 extern void heap_sync(Relation relation);
 
+/* in heap/syncscan.c */
+extern void ss_report_location(Relation rel, BlockNumber location);
+extern BlockNumber ss_get_location(Relation rel, BlockNumber relnblocks);
+extern void SyncScanShmemInit(void);
+extern Size SyncScanShmemSize(void);
+
 #endif   /* HEAPAM_H */
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 200b45713e7..b45b2caabf1 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.54 2007/05/30 20:12:02 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.55 2007/06/08 18:23:53 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -26,9 +26,13 @@ typedef struct HeapScanDescData
 	Snapshot	rs_snapshot;	/* snapshot to see */
 	int			rs_nkeys;		/* number of scan keys */
 	ScanKey		rs_key;			/* array of scan key descriptors */
+
+	/* state set up at initscan time */
 	BlockNumber rs_nblocks;		/* number of blocks to scan */
+	BlockNumber	rs_startblock;	/* block # to start at */
 	BufferAccessStrategy rs_strategy;	/* access strategy for reads */
 	bool		rs_pageatatime; /* verify visibility page-at-a-time? */
+	bool		rs_syncscan;	/* report location to syncscan logic? */
 
 	/* scan current state */
 	bool		rs_inited;		/* false = scan not init'd yet */
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index a5b3f98a8eb..354a3d370eb 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -6,7 +6,7 @@
  * for developers.	If you edit any of these, be sure to do a *full*
  * rebuild (and an initdb if noted).
  *
- * $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.26 2007/02/23 21:36:19 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.27 2007/06/08 18:23:53 tgl Exp $
  *------------------------------------------------------------------------
  */
 
@@ -249,6 +249,11 @@
  */
 #define TRACE_SORT 1
 
+/*
+ * Enable tracing of syncscan operations (see also the trace_syncscan GUC var).
+ */
+/* #define TRACE_SYNCSCAN */
+
 /*
  * Other debug #defines (documentation, anyone?)
  */
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 477284b7d1d..046064cdc1f 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.36 2007/04/16 18:30:04 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.37 2007/06/08 18:23:53 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,6 +62,7 @@ typedef enum LWLockId
 	AddinShmemInitLock,
 	AutovacuumLock,
 	AutovacuumScheduleLock,
+	SyncScanLock,
 	/* Individual lock IDs end here */
 	FirstBufMappingLock,
 	FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,