mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-30 19:00:29 +08:00
Arrange to cache btree metapage data in the relcache entry for the index,
thereby saving a visit to the metapage in most index searches/updates. This wouldn't actually save any I/O (since in the old regime the metapage generally stayed in cache anyway), but it does provide a useful decrease in bufmgr traffic in high-contention scenarios. Per my recent proposal.
This commit is contained in:
parent
89083876c9
commit
d2896a9ed1
@ -1,4 +1,4 @@
|
||||
$PostgreSQL: pgsql/src/backend/access/nbtree/README,v 1.9 2006/01/17 00:09:00 tgl Exp $
|
||||
$PostgreSQL: pgsql/src/backend/access/nbtree/README,v 1.10 2006/04/25 22:46:05 tgl Exp $
|
||||
|
||||
This directory contains a correct implementation of Lehman and Yao's
|
||||
high-concurrency B-tree management algorithm (P. Lehman and S. Yao,
|
||||
@ -316,7 +316,17 @@ Other things that are handy to know
|
||||
|
||||
Page zero of every btree is a meta-data page. This page stores the
|
||||
location of the root page --- both the true root and the current effective
|
||||
root ("fast" root).
|
||||
root ("fast" root). To avoid fetching the metapage for every single index
|
||||
search, we cache a copy of the meta-data information in the index's
|
||||
relcache entry (rd_amcache). This is a bit ticklish since using the cache
|
||||
implies following a root page pointer that could be stale. We require
|
||||
every metapage update to send out a SI "relcache inval" message on the
|
||||
index relation. That ensures that each backend will flush its cached copy
|
||||
not later than the start of its next transaction. Therefore, stale
|
||||
pointers cannot be used for longer than the current transaction, which
|
||||
reduces the problem to the same one already dealt with for concurrent
|
||||
VACUUM --- we can just imagine that each open transaction is potentially
|
||||
"already in flight" to the old root.
|
||||
|
||||
The algorithm assumes we can fit at least three items per page
|
||||
(a "high key" and two real data items). Therefore it's unsafe
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.135 2006/04/13 03:53:05 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.136 2006/04/25 22:46:05 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -18,6 +18,7 @@
|
||||
#include "access/heapam.h"
|
||||
#include "access/nbtree.h"
|
||||
#include "miscadmin.h"
|
||||
#include "utils/inval.h"
|
||||
|
||||
|
||||
typedef struct
|
||||
@ -638,9 +639,12 @@ _bt_insertonpg(Relation rel,
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
/* release pin/lock */
|
||||
/* release buffers; send out relcache inval if metapage changed */
|
||||
if (BufferIsValid(metabuf))
|
||||
{
|
||||
CacheInvalidateRelcache(rel);
|
||||
_bt_relbuf(rel, metabuf);
|
||||
}
|
||||
|
||||
_bt_relbuf(rel, buf);
|
||||
}
|
||||
@ -1526,6 +1530,9 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
/* send out relcache inval for metapage change */
|
||||
CacheInvalidateRelcache(rel);
|
||||
|
||||
/* done with metapage */
|
||||
_bt_relbuf(rel, metabuf);
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.95 2006/04/01 03:03:36 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.96 2006/04/25 22:46:05 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Postgres btree pages look like ordinary relation pages. The opaque
|
||||
@ -26,6 +26,7 @@
|
||||
#include "miscadmin.h"
|
||||
#include "storage/freespace.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "utils/inval.h"
|
||||
|
||||
|
||||
/*
|
||||
@ -99,6 +100,49 @@ _bt_getroot(Relation rel, int access)
|
||||
uint32 rootlevel;
|
||||
BTMetaPageData *metad;
|
||||
|
||||
/*
|
||||
* Try to use previously-cached metapage data to find the root. This
|
||||
* normally saves one buffer access per index search, which is a very
|
||||
* helpful savings in bufmgr traffic and hence contention.
|
||||
*/
|
||||
if (rel->rd_amcache != NULL)
|
||||
{
|
||||
metad = (BTMetaPageData *) rel->rd_amcache;
|
||||
/* We shouldn't have cached it if any of these fail */
|
||||
Assert(metad->btm_magic == BTREE_MAGIC);
|
||||
Assert(metad->btm_version == BTREE_VERSION);
|
||||
Assert(metad->btm_root != P_NONE);
|
||||
|
||||
rootblkno = metad->btm_fastroot;
|
||||
Assert(rootblkno != P_NONE);
|
||||
rootlevel = metad->btm_fastlevel;
|
||||
|
||||
rootbuf = _bt_getbuf(rel, rootblkno, BT_READ);
|
||||
rootpage = BufferGetPage(rootbuf);
|
||||
rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
|
||||
|
||||
/*
|
||||
* Since the cache might be stale, we check the page more carefully
|
||||
* here than normal. We *must* check that it's not deleted.
|
||||
* If it's not alone on its level, then we reject too --- this
|
||||
* may be overly paranoid but better safe than sorry. Note we
|
||||
* don't check P_ISROOT, because that's not set in a "fast root".
|
||||
*/
|
||||
if (!P_IGNORE(rootopaque) &&
|
||||
rootopaque->btpo.level == rootlevel &&
|
||||
P_LEFTMOST(rootopaque) &&
|
||||
P_RIGHTMOST(rootopaque))
|
||||
{
|
||||
/* OK, accept cached page as the root */
|
||||
return rootbuf;
|
||||
}
|
||||
_bt_relbuf(rel, rootbuf);
|
||||
/* Cache is stale, throw it away */
|
||||
if (rel->rd_amcache)
|
||||
pfree(rel->rd_amcache);
|
||||
rel->rd_amcache = NULL;
|
||||
}
|
||||
|
||||
metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
|
||||
metapg = BufferGetPage(metabuf);
|
||||
metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
|
||||
@ -200,6 +244,12 @@ _bt_getroot(Relation rel, int access)
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
/*
|
||||
* Send out relcache inval for metapage change (probably unnecessary
|
||||
* here, but let's be safe).
|
||||
*/
|
||||
CacheInvalidateRelcache(rel);
|
||||
|
||||
/*
|
||||
* swap root write lock for read lock. There is no danger of anyone
|
||||
* else accessing the new root page while it's unlocked, since no one
|
||||
@ -217,6 +267,13 @@ _bt_getroot(Relation rel, int access)
|
||||
Assert(rootblkno != P_NONE);
|
||||
rootlevel = metad->btm_fastlevel;
|
||||
|
||||
/*
|
||||
* Cache the metapage data for next time
|
||||
*/
|
||||
rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt,
|
||||
sizeof(BTMetaPageData));
|
||||
memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
|
||||
|
||||
/*
|
||||
* We are done with the metapage; arrange to release it via first
|
||||
* _bt_relandgetbuf call
|
||||
@ -280,6 +337,16 @@ _bt_gettrueroot(Relation rel)
|
||||
uint32 rootlevel;
|
||||
BTMetaPageData *metad;
|
||||
|
||||
/*
|
||||
* We don't try to use cached metapage data here, since (a) this path is
|
||||
* not performance-critical, and (b) if we are here it suggests our cache
|
||||
* is out-of-date anyway. In light of point (b), it's probably safest to
|
||||
* actively flush any cached metapage info.
|
||||
*/
|
||||
if (rel->rd_amcache)
|
||||
pfree(rel->rd_amcache);
|
||||
rel->rd_amcache = NULL;
|
||||
|
||||
metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
|
||||
metapg = BufferGetPage(metabuf);
|
||||
metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
|
||||
@ -1052,9 +1119,12 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
/* release buffers */
|
||||
/* release buffers; send out relcache inval if metapage changed */
|
||||
if (BufferIsValid(metabuf))
|
||||
{
|
||||
CacheInvalidateRelcache(rel);
|
||||
_bt_relbuf(rel, metabuf);
|
||||
}
|
||||
_bt_relbuf(rel, pbuf);
|
||||
_bt_relbuf(rel, rbuf);
|
||||
_bt_relbuf(rel, buf);
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.144 2006/04/01 03:03:37 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.145 2006/04/25 22:46:05 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -26,6 +26,7 @@
|
||||
#include "miscadmin.h"
|
||||
#include "storage/freespace.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "utils/inval.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
|
||||
@ -127,6 +128,17 @@ btbuild(PG_FUNCTION_ARGS)
|
||||
}
|
||||
#endif /* BTREE_BUILD_STATS */
|
||||
|
||||
/*
|
||||
* If we are reindexing a pre-existing index, it is critical to send out
|
||||
* a relcache invalidation SI message to ensure all backends re-read the
|
||||
* index metapage. In most circumstances the update-stats operation will
|
||||
* cause that to happen, but at the moment there are corner cases where
|
||||
* no pg_class update will occur, so force an inval here. XXX FIXME:
|
||||
* the upper levels of CREATE INDEX should handle the stats update as
|
||||
* well as guaranteeing relcache inval.
|
||||
*/
|
||||
CacheInvalidateRelcache(index);
|
||||
|
||||
/* since we just counted the # of tuples, may as well update stats */
|
||||
IndexCloseAndUpdateStats(heap, reltuples, index, buildstate.indtuples);
|
||||
|
||||
|
8
src/backend/utils/cache/relcache.c
vendored
8
src/backend/utils/cache/relcache.c
vendored
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.238 2006/03/05 15:58:45 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.239 2006/04/25 22:46:05 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -948,6 +948,7 @@ RelationInitIndexAccessInfo(Relation relation)
|
||||
*/
|
||||
relation->rd_indexprs = NIL;
|
||||
relation->rd_indpred = NIL;
|
||||
relation->rd_amcache = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1481,6 +1482,10 @@ RelationReloadClassinfo(Relation relation)
|
||||
RelationInitPhysicalAddr(relation);
|
||||
/* Make sure targblock is reset in case rel was truncated */
|
||||
relation->rd_targblock = InvalidBlockNumber;
|
||||
/* Must free any AM cached data, too */
|
||||
if (relation->rd_amcache)
|
||||
pfree(relation->rd_amcache);
|
||||
relation->rd_amcache = NULL;
|
||||
/* Okay, now it's valid again */
|
||||
relation->rd_isvalid = true;
|
||||
}
|
||||
@ -3141,6 +3146,7 @@ load_relcache_init_file(void)
|
||||
rel->rd_indexlist = NIL;
|
||||
rel->rd_oidindex = InvalidOid;
|
||||
rel->rd_createSubid = InvalidSubTransactionId;
|
||||
rel->rd_amcache = NULL;
|
||||
MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
|
||||
|
||||
/*
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.88 2006/03/05 15:59:07 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.89 2006/04/25 22:46:05 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -167,6 +167,13 @@ typedef struct RelationData
|
||||
* cached, namely those with subtype zero. The arrays are indexed by
|
||||
* strategy or support number, which is a sufficient identifier given that
|
||||
* restriction.
|
||||
*
|
||||
* Note: rd_amcache is available for index AMs to cache private data about
|
||||
* an index. This must be just a cache since it may get reset at any time
|
||||
* (in particular, it will get reset by a relcache inval message for the
|
||||
* index). If used, it must point to a single memory chunk palloc'd in
|
||||
* rd_indexcxt. A relcache reset will include freeing that chunk and
|
||||
* setting rd_amcache = NULL.
|
||||
*/
|
||||
MemoryContext rd_indexcxt; /* private memory cxt for this stuff */
|
||||
RelationAmInfo *rd_aminfo; /* lookup info for funcs found in pg_am */
|
||||
@ -175,6 +182,7 @@ typedef struct RelationData
|
||||
FmgrInfo *rd_supportinfo; /* lookup info for support procedures */
|
||||
List *rd_indexprs; /* index expression trees, if any */
|
||||
List *rd_indpred; /* index predicate tree, if any */
|
||||
void *rd_amcache; /* available for use by index AM */
|
||||
|
||||
/* statistics collection area */
|
||||
PgStat_Info pgstat_info;
|
||||
|
Loading…
Reference in New Issue
Block a user