diff --git a/src/backend/access/heap/Makefile b/src/backend/access/heap/Makefile index 3a712191a49..ac2401232bb 100644 --- a/src/backend/access/heap/Makefile +++ b/src/backend/access/heap/Makefile @@ -4,7 +4,7 @@ # Makefile for access/heap # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/access/heap/Makefile,v 1.15 2007/04/08 01:26:27 tgl Exp $ +# $PostgreSQL: pgsql/src/backend/access/heap/Makefile,v 1.16 2007/06/08 18:23:52 tgl Exp $ # #------------------------------------------------------------------------- @@ -12,7 +12,7 @@ subdir = src/backend/access/heap top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = heapam.o hio.o rewriteheap.o tuptoaster.o +OBJS = heapam.o hio.o rewriteheap.o syncscan.o tuptoaster.o all: SUBSYS.o diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 0b20e5e9a8d..a0b561c209e 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.234 2007/05/30 20:11:53 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.235 2007/06/08 18:23:52 tgl Exp $ * * * INTERFACE ROUTINES @@ -78,29 +78,44 @@ initscan(HeapScanDesc scan, ScanKey key) * Determine the number of blocks we have to scan. * * It is sufficient to do this once at scan start, since any tuples added - * while the scan is in progress will be invisible to my transaction - * anyway... + * while the scan is in progress will be invisible to my snapshot + * anyway. (That is not true when using a non-MVCC snapshot. However, + * we couldn't guarantee to return tuples added after scan start anyway, + * since they might go into pages we already scanned. To guarantee + * consistent results for a non-MVCC snapshot, the caller must hold some + * higher-level lock that ensures the interesting tuple(s) won't change.) */ scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd); /* * If the table is large relative to NBuffers, use a bulk-read access - * strategy, else use the default random-access strategy. During a - * rescan, don't make a new strategy object if we don't have to. + * strategy and enable synchronized scanning (see syncscan.c). Although + * the thresholds for these features could be different, we make them the + * same so that there are only two behaviors to tune rather than four. + * + * During a rescan, don't make a new strategy object if we don't have to. */ if (scan->rs_nblocks > NBuffers / 4 && !scan->rs_rd->rd_istemp) { if (scan->rs_strategy == NULL) scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD); + + scan->rs_syncscan = true; + scan->rs_startblock = ss_get_location(scan->rs_rd, scan->rs_nblocks); } else { if (scan->rs_strategy != NULL) FreeAccessStrategy(scan->rs_strategy); scan->rs_strategy = NULL; + + scan->rs_syncscan = false; + scan->rs_startblock = 0; } + /* rs_pageatatime was set when the snapshot was filled in */ + scan->rs_inited = false; scan->rs_ctup.t_data = NULL; ItemPointerSetInvalid(&scan->rs_ctup.t_self); @@ -229,6 +244,7 @@ heapgettup(HeapScanDesc scan, Snapshot snapshot = scan->rs_snapshot; bool backward = ScanDirectionIsBackward(dir); BlockNumber page; + bool finished; Page dp; int lines; OffsetNumber lineoff; @@ -251,7 +267,7 @@ heapgettup(HeapScanDesc scan, tuple->t_data = NULL; return; } - page = 0; /* first page */ + page = scan->rs_startblock; /* first page */ heapgetpage(scan, page); lineoff = FirstOffsetNumber; /* first offnum */ scan->rs_inited = true; @@ -285,7 +301,18 @@ heapgettup(HeapScanDesc scan, tuple->t_data = NULL; return; } - page = scan->rs_nblocks - 1; /* final page */ + /* + * Disable reporting to syncscan logic in a backwards scan; it's + * not very likely anyone else is doing the same thing at the same + * time, and much more likely that we'll just bollix things for + * forward scanners. + */ + scan->rs_syncscan = false; + /* start from last page of the scan */ + if (scan->rs_startblock > 0) + page = scan->rs_startblock - 1; + else + page = scan->rs_nblocks - 1; heapgetpage(scan, page); } else @@ -397,10 +424,43 @@ heapgettup(HeapScanDesc scan, */ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + /* + * advance to next/prior page and detect end of scan + */ + if (backward) + { + finished = (page == scan->rs_startblock); + if (page == 0) + page = scan->rs_nblocks; + page--; + } + else + { + page++; + if (page >= scan->rs_nblocks) + page = 0; + finished = (page == scan->rs_startblock); + + /* + * Report our new scan position for synchronization purposes. + * We don't do that when moving backwards, however. That would + * just mess up any other forward-moving scanners. + * + * Note: we do this before checking for end of scan so that the + * final state of the position hint is back at the start of the + * rel. That's not strictly necessary, but otherwise when you run + * the same query multiple times the starting position would shift + * a little bit backwards on every invocation, which is confusing. + * We don't guarantee any specific ordering in general, though. + */ + if (scan->rs_syncscan) + ss_report_location(scan->rs_rd, page); + } + /* * return NULL if we've exhausted all the pages */ - if (backward ? (page == 0) : (page + 1 >= scan->rs_nblocks)) + if (finished) { if (BufferIsValid(scan->rs_cbuf)) ReleaseBuffer(scan->rs_cbuf); @@ -411,8 +471,6 @@ heapgettup(HeapScanDesc scan, return; } - page = backward ? (page - 1) : (page + 1); - heapgetpage(scan, page); LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); @@ -455,6 +513,7 @@ heapgettup_pagemode(HeapScanDesc scan, HeapTuple tuple = &(scan->rs_ctup); bool backward = ScanDirectionIsBackward(dir); BlockNumber page; + bool finished; Page dp; int lines; int lineindex; @@ -478,7 +537,7 @@ heapgettup_pagemode(HeapScanDesc scan, tuple->t_data = NULL; return; } - page = 0; /* first page */ + page = scan->rs_startblock; /* first page */ heapgetpage(scan, page); lineindex = 0; scan->rs_inited = true; @@ -509,7 +568,18 @@ heapgettup_pagemode(HeapScanDesc scan, tuple->t_data = NULL; return; } - page = scan->rs_nblocks - 1; /* final page */ + /* + * Disable reporting to syncscan logic in a backwards scan; it's + * not very likely anyone else is doing the same thing at the same + * time, and much more likely that we'll just bollix things for + * forward scanners. + */ + scan->rs_syncscan = false; + /* start from last page of the scan */ + if (scan->rs_startblock > 0) + page = scan->rs_startblock - 1; + else + page = scan->rs_nblocks - 1; heapgetpage(scan, page); } else @@ -616,11 +686,40 @@ heapgettup_pagemode(HeapScanDesc scan, * if we get here, it means we've exhausted the items on this page and * it's time to move to the next. */ + if (backward) + { + finished = (page == scan->rs_startblock); + if (page == 0) + page = scan->rs_nblocks; + page--; + } + else + { + page++; + if (page >= scan->rs_nblocks) + page = 0; + finished = (page == scan->rs_startblock); + + /* + * Report our new scan position for synchronization purposes. + * We don't do that when moving backwards, however. That would + * just mess up any other forward-moving scanners. + * + * Note: we do this before checking for end of scan so that the + * final state of the position hint is back at the start of the + * rel. That's not strictly necessary, but otherwise when you run + * the same query multiple times the starting position would shift + * a little bit backwards on every invocation, which is confusing. + * We don't guarantee any specific ordering in general, though. + */ + if (scan->rs_syncscan) + ss_report_location(scan->rs_rd, page); + } /* * return NULL if we've exhausted all the pages */ - if (backward ? (page == 0) : (page + 1 >= scan->rs_nblocks)) + if (finished) { if (BufferIsValid(scan->rs_cbuf)) ReleaseBuffer(scan->rs_cbuf); @@ -631,7 +730,6 @@ heapgettup_pagemode(HeapScanDesc scan, return; } - page = backward ? (page - 1) : (page + 1); heapgetpage(scan, page); dp = (Page) BufferGetPage(scan->rs_cbuf); diff --git a/src/backend/access/heap/syncscan.c b/src/backend/access/heap/syncscan.c new file mode 100644 index 00000000000..795efccc090 --- /dev/null +++ b/src/backend/access/heap/syncscan.c @@ -0,0 +1,318 @@ +/*------------------------------------------------------------------------- + * + * syncscan.c + * heap scan synchronization support + * + * When multiple backends run a sequential scan on the same table, we try + * to keep them synchronized to reduce the overall I/O needed. The goal is + * to read each page into shared buffer cache only once, and let all backends + * that take part in the shared scan process the page before it falls out of + * the cache. + * + * Since the "leader" in a pack of backends doing a seqscan will have to wait + * for I/O, while the "followers" don't, there is a strong self-synchronizing + * effect once we can get the backends examining approximately the same part + * of the table at the same time. Hence all that is really needed is to get + * a new backend beginning a seqscan to begin it close to where other backends + * are reading. We can scan the table circularly, from block X up to the + * end and then from block 0 to X-1, to ensure we visit all rows while still + * participating in the common scan. + * + * To accomplish that, we keep track of the scan position of each table, and + * start new scans close to where the previous scan(s) are. We don't try to + * do any extra synchronization to keep the scans together afterwards; some + * scans might progress much more slowly than others, for example if the + * results need to be transferred to the client over a slow network, and we + * don't want such queries to slow down others. + * + * There can realistically only be a few large sequential scans on different + * tables in progress at any time. Therefore we just keep the scan positions + * in a small LRU list which we scan every time we need to look up or update a + * scan position. The whole mechanism is only applied for tables exceeding + * a threshold size (but that is not the concern of this module). + * + * INTERFACE ROUTINES + * ss_get_location - return current scan location of a relation + * ss_report_location - update current scan location + * + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/access/heap/syncscan.c,v 1.1 2007/06/08 18:23:52 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/heapam.h" +#include "miscadmin.h" + + +/* GUC variables */ +#ifdef TRACE_SYNCSCAN +bool trace_syncscan = false; +#endif + + +/* + * Size of the LRU list. + * + * Note: the code assumes that SYNC_SCAN_NELEM > 1. + * + * XXX: What's a good value? It should be large enough to hold the + * maximum number of large tables scanned simultaneously. But a larger value + * means more traversing of the LRU list when starting a new scan. + */ +#define SYNC_SCAN_NELEM 20 + +/* + * Interval between reports of the location of the current scan, in pages. + * + * Note: This should be smaller than the ring size (see buffer/freelist.c) + * we use for bulk reads. Otherwise a scan joining other scans might start + * from a page that's no longer in the buffer cache. This is a bit fuzzy; + * there's no guarantee that the new scan will read the page before it leaves + * the buffer cache anyway, and on the other hand the page is most likely + * still in the OS cache. + */ +#define SYNC_SCAN_REPORT_INTERVAL (128 * 1024 / BLCKSZ) + + +/* + * The scan locations structure is essentially a doubly-linked LRU with head + * and tail pointer, but designed to hold a fixed maximum number of elements in + * fixed-size shared memory. + */ +typedef struct ss_scan_location_t +{ + RelFileNode relfilenode; /* identity of a relation */ + BlockNumber location; /* last-reported location in the relation */ +} ss_scan_location_t; + +typedef struct ss_lru_item_t +{ + struct ss_lru_item_t *prev; + struct ss_lru_item_t *next; + ss_scan_location_t location; +} ss_lru_item_t; + +typedef struct ss_scan_locations_t +{ + ss_lru_item_t *head; + ss_lru_item_t *tail; + ss_lru_item_t items[1]; /* SYNC_SCAN_NELEM items */ +} ss_scan_locations_t; + +#define SizeOfScanLocations(N) offsetof(ss_scan_locations_t, items[N]) + +/* Pointer to struct in shared memory */ +static ss_scan_locations_t *scan_locations; + +/* prototypes for internal functions */ +static BlockNumber ss_search(RelFileNode relfilenode, + BlockNumber location, bool set); + + +/* + * SyncScanShmemSize --- report amount of shared memory space needed + */ +Size +SyncScanShmemSize(void) +{ + return SizeOfScanLocations(SYNC_SCAN_NELEM); +} + +/* + * SyncScanShmemInit --- initialize this module's shared memory + */ +void +SyncScanShmemInit(void) +{ + int i; + bool found; + + scan_locations = (ss_scan_locations_t *) + ShmemInitStruct("Sync Scan Locations List", + SizeOfScanLocations(SYNC_SCAN_NELEM), + &found); + + if (!IsUnderPostmaster) + { + /* Initialize shared memory area */ + Assert(!found); + + scan_locations->head = &scan_locations->items[0]; + scan_locations->tail = &scan_locations->items[SYNC_SCAN_NELEM - 1]; + + for (i = 0; i < SYNC_SCAN_NELEM; i++) + { + ss_lru_item_t *item = &scan_locations->items[i]; + + /* + * Initialize all slots with invalid values. As scans are started, + * these invalid entries will fall off the LRU list and get + * replaced with real entries. + */ + item->location.relfilenode.spcNode = InvalidOid; + item->location.relfilenode.dbNode = InvalidOid; + item->location.relfilenode.relNode = InvalidOid; + item->location.location = InvalidBlockNumber; + + item->prev = (i > 0) ? + (&scan_locations->items[i - 1]) : NULL; + item->next = (i < SYNC_SCAN_NELEM - 1) ? + (&scan_locations->items[i + 1]) : NULL; + } + } + else + Assert(found); +} + +/* + * ss_search --- search the scan_locations structure for an entry with the + * given relfilenode. + * + * If "set" is true, the location is updated to the given location. If no + * entry for the given relfilenode is found, it will be created at the head + * of the list with the given location, even if "set" is false. + * + * In any case, the location after possible update is returned. + * + * Caller is responsible for having acquired suitable lock on the shared + * data structure. + */ +static BlockNumber +ss_search(RelFileNode relfilenode, BlockNumber location, bool set) +{ + ss_lru_item_t *item; + + item = scan_locations->head; + for (;;) + { + bool match; + + match = RelFileNodeEquals(item->location.relfilenode, relfilenode); + + if (match || item->next == NULL) + { + /* + * If we reached the end of list and no match was found, + * take over the last entry + */ + if (!match) + { + item->location.relfilenode = relfilenode; + item->location.location = location; + } + else if (set) + item->location.location = location; + + /* Move the entry to the front of the LRU list */ + if (item != scan_locations->head) + { + /* unlink */ + if (item == scan_locations->tail) + scan_locations->tail = item->prev; + item->prev->next = item->next; + if (item->next) + item->next->prev = item->prev; + + /* link */ + item->prev = NULL; + item->next = scan_locations->head; + scan_locations->head->prev = item; + scan_locations->head = item; + } + + return item->location.location; + } + + item = item->next; + } + + /* not reached */ +} + +/* + * ss_get_location --- get the optimal starting location for scan + * + * Returns the last-reported location of a sequential scan on the + * relation, or 0 if no valid location is found. + * + * We expect the caller has just done RelationGetNumberOfBlocks(), and + * so that number is passed in rather than computing it again. The result + * is guaranteed less than relnblocks (assuming that's > 0). + */ +BlockNumber +ss_get_location(Relation rel, BlockNumber relnblocks) +{ + BlockNumber startloc; + + LWLockAcquire(SyncScanLock, LW_EXCLUSIVE); + startloc = ss_search(rel->rd_node, 0, false); + LWLockRelease(SyncScanLock); + + /* + * If the location is not a valid block number for this scan, start at 0. + * + * This can happen if for instance a VACUUM truncated the table + * since the location was saved. + */ + if (startloc >= relnblocks) + startloc = 0; + +#ifdef TRACE_SYNCSCAN + if (trace_syncscan) + elog(LOG, + "SYNC_SCAN: start \"%s\" (size %u) at %u", + RelationGetRelationName(rel), relnblocks, startloc); +#endif + + return startloc; +} + +/* + * ss_report_location --- update the current scan location + * + * Writes an entry into the shared Sync Scan state of the form + * (relfilenode, blocknumber), overwriting any existing entry for the + * same relfilenode. + */ +void +ss_report_location(Relation rel, BlockNumber location) +{ +#ifdef TRACE_SYNCSCAN + if (trace_syncscan) + { + if ((location % 1024) == 0) + elog(LOG, + "SYNC_SCAN: scanning \"%s\" at %u", + RelationGetRelationName(rel), location); + } +#endif + + /* + * To reduce lock contention, only report scan progress every N pages. + * For the same reason, don't block if the lock isn't immediately + * available. Missing a few updates isn't critical, it just means that a + * new scan that wants to join the pack will start a little bit behind the + * head of the scan. Hopefully the pages are still in OS cache and the + * scan catches up quickly. + */ + if ((location % SYNC_SCAN_REPORT_INTERVAL) == 0) + { + if (LWLockConditionalAcquire(SyncScanLock, LW_EXCLUSIVE)) + { + (void) ss_search(rel->rd_node, location, true); + LWLockRelease(SyncScanLock); + } +#ifdef TRACE_SYNCSCAN + else if (trace_syncscan) + elog(LOG, + "SYNC_SCAN: missed update for \"%s\" at %u", + RelationGetRelationName(rel), location); +#endif + } +} diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index d8eec0f8231..a3e2c7c4422 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.59 2007/05/30 20:11:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.60 2007/06/08 18:23:52 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -340,6 +340,9 @@ GetAccessStrategy(BufferAccessStrategyType btype) * Select ring size to use. See buffer/README for rationales. * (Currently all cases are the same size, but keep this code * structure for flexibility.) + * + * Note: if you change the ring size for BAS_BULKREAD, see also + * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c. */ switch (btype) { diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 0296cbbcfc4..86c54448e4c 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -8,13 +8,14 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.91 2007/02/15 23:23:23 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.92 2007/06/08 18:23:52 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "access/clog.h" +#include "access/heapam.h" #include "access/multixact.h" #include "access/nbtree.h" #include "access/subtrans.h" @@ -112,6 +113,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) size = add_size(size, BgWriterShmemSize()); size = add_size(size, AutoVacuumShmemSize()); size = add_size(size, BTreeShmemSize()); + size = add_size(size, SyncScanShmemSize()); #ifdef EXEC_BACKEND size = add_size(size, ShmemBackendArraySize()); #endif @@ -216,6 +218,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) * Set up other modules that need some shared memory space */ BTreeShmemInit(); + SyncScanShmemInit(); #ifdef EXEC_BACKEND diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 6e412e328a1..387c4ae1531 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut . * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.395 2007/06/05 21:50:19 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.396 2007/06/08 18:23:52 tgl Exp $ * *-------------------------------------------------------------------- */ @@ -109,6 +109,9 @@ extern bool fullPageWrites; #ifdef TRACE_SORT extern bool trace_sort; #endif +#ifdef TRACE_SYNCSCAN +extern bool trace_syncscan; +#endif #ifdef DEBUG_BOUNDED_SORT extern bool optimize_bounded_sort; #endif @@ -970,6 +973,19 @@ static struct config_bool ConfigureNamesBool[] = }, #endif +#ifdef TRACE_SYNCSCAN + /* this is undocumented because not exposed in a standard build */ + { + {"trace_syncscan", PGC_USERSET, DEVELOPER_OPTIONS, + gettext_noop("Generate debugging output for synchronized scanning."), + NULL, + GUC_NOT_IN_SAMPLE + }, + &trace_syncscan, + false, NULL, NULL + }, +#endif + #ifdef DEBUG_BOUNDED_SORT /* this is undocumented because not exposed in a standard build */ { diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index ebb2e984c24..206159bdad7 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.124 2007/05/27 03:50:39 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.125 2007/06/08 18:23:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -112,6 +112,13 @@ extern Datum fastgetattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, ) +typedef enum +{ + LockTupleShared, + LockTupleExclusive +} LockTupleMode; + + /* ---------------- * function prototypes for heap access method * @@ -120,14 +127,7 @@ extern Datum fastgetattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, * ---------------- */ -/* heapam.c */ - -typedef enum -{ - LockTupleShared, - LockTupleExclusive -} LockTupleMode; - +/* in heap/heapam.c */ extern Relation relation_open(Oid relationId, LOCKMODE lockmode); extern Relation try_relation_open(Oid relationId, LOCKMODE lockmode); extern Relation relation_open_nowait(Oid relationId, LOCKMODE lockmode); @@ -240,4 +240,10 @@ extern HeapTuple heap_addheader(int natts, bool withoid, extern void heap_sync(Relation relation); +/* in heap/syncscan.c */ +extern void ss_report_location(Relation rel, BlockNumber location); +extern BlockNumber ss_get_location(Relation rel, BlockNumber relnblocks); +extern void SyncScanShmemInit(void); +extern Size SyncScanShmemSize(void); + #endif /* HEAPAM_H */ diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 200b45713e7..b45b2caabf1 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.54 2007/05/30 20:12:02 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.55 2007/06/08 18:23:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -26,9 +26,13 @@ typedef struct HeapScanDescData Snapshot rs_snapshot; /* snapshot to see */ int rs_nkeys; /* number of scan keys */ ScanKey rs_key; /* array of scan key descriptors */ + + /* state set up at initscan time */ BlockNumber rs_nblocks; /* number of blocks to scan */ + BlockNumber rs_startblock; /* block # to start at */ BufferAccessStrategy rs_strategy; /* access strategy for reads */ bool rs_pageatatime; /* verify visibility page-at-a-time? */ + bool rs_syncscan; /* report location to syncscan logic? */ /* scan current state */ bool rs_inited; /* false = scan not init'd yet */ diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h index a5b3f98a8eb..354a3d370eb 100644 --- a/src/include/pg_config_manual.h +++ b/src/include/pg_config_manual.h @@ -6,7 +6,7 @@ * for developers. If you edit any of these, be sure to do a *full* * rebuild (and an initdb if noted). * - * $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.26 2007/02/23 21:36:19 momjian Exp $ + * $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.27 2007/06/08 18:23:53 tgl Exp $ *------------------------------------------------------------------------ */ @@ -249,6 +249,11 @@ */ #define TRACE_SORT 1 +/* + * Enable tracing of syncscan operations (see also the trace_syncscan GUC var). + */ +/* #define TRACE_SYNCSCAN */ + /* * Other debug #defines (documentation, anyone?) */ diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 477284b7d1d..046064cdc1f 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.36 2007/04/16 18:30:04 alvherre Exp $ + * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.37 2007/06/08 18:23:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -62,6 +62,7 @@ typedef enum LWLockId AddinShmemInitLock, AutovacuumLock, AutovacuumScheduleLock, + SyncScanLock, /* Individual lock IDs end here */ FirstBufMappingLock, FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,