mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-18 18:44:06 +08:00
Improve handling of prune/no-prune decisions by storing a page's oldest
unpruned XMAX in its header. At the cost of 4 bytes per page, this keeps us from performing heap_page_prune when there's no chance of pruning anything. Seems to be necessary per Heikki's preliminary performance testing.
This commit is contained in:
parent
386a5d4268
commit
cc59049daf
@ -23,14 +23,14 @@ only by superusers.
|
||||
|
||||
A page image obtained with get_raw_page should be passed as argument:
|
||||
|
||||
test=# SELECT * FROM page_header(get_raw_page('pg_class',0));
|
||||
lsn | tli | flags | lower | upper | special | pagesize | version
|
||||
----------+-----+-------+-------+-------+---------+----------+---------
|
||||
0/3C5614 | 1 | 1 | 216 | 256 | 8192 | 8192 | 4
|
||||
regression=# SELECT * FROM page_header(get_raw_page('pg_class',0));
|
||||
lsn | tli | flags | lower | upper | special | pagesize | version | prune_xid
|
||||
-----------+-----+-------+-------+-------+---------+----------+---------+-----------
|
||||
0/24A1B50 | 1 | 1 | 232 | 368 | 8192 | 8192 | 4 | 0
|
||||
(1 row)
|
||||
|
||||
The returned columns correspond to the fields in the PageHeaderData-struct,
|
||||
see src/include/storage/bufpage.h for more details.
|
||||
The returned columns correspond to the fields in the PageHeaderData struct.
|
||||
See src/include/storage/bufpage.h for details.
|
||||
|
||||
heap_page_items
|
||||
---------------
|
||||
|
@ -20,7 +20,8 @@ CREATE OR REPLACE FUNCTION page_header(IN page bytea,
|
||||
OUT upper smallint,
|
||||
OUT special smallint,
|
||||
OUT pagesize smallint,
|
||||
OUT version smallint)
|
||||
OUT version smallint,
|
||||
OUT prune_xid xid)
|
||||
AS 'MODULE_PATHNAME', 'page_header'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
||||
* Copyright (c) 2007, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/contrib/pageinspect/rawpage.c,v 1.1 2007/05/17 19:11:24 momjian Exp $
|
||||
* $PostgreSQL: pgsql/contrib/pageinspect/rawpage.c,v 1.2 2007/09/21 21:25:42 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -110,8 +110,8 @@ page_header(PG_FUNCTION_ARGS)
|
||||
|
||||
Datum result;
|
||||
HeapTuple tuple;
|
||||
Datum values[8];
|
||||
bool nulls[8];
|
||||
Datum values[9];
|
||||
bool nulls[9];
|
||||
|
||||
PageHeader page;
|
||||
XLogRecPtr lsn;
|
||||
@ -152,6 +152,7 @@ page_header(PG_FUNCTION_ARGS)
|
||||
values[5] = UInt16GetDatum(page->pd_special);
|
||||
values[6] = UInt16GetDatum(PageGetPageSize(page));
|
||||
values[7] = UInt16GetDatum(PageGetPageLayoutVersion(page));
|
||||
values[8] = TransactionIdGetDatum(page->pd_prune_xid);
|
||||
|
||||
/* Build and return the tuple. */
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/storage.sgml,v 1.18 2007/06/03 17:05:53 tgl Exp $ -->
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/storage.sgml,v 1.19 2007/09/21 21:25:42 tgl Exp $ -->
|
||||
|
||||
<chapter id="storage">
|
||||
|
||||
@ -416,7 +416,7 @@ Item
|
||||
|
||||
<row>
|
||||
<entry>PageHeaderData</entry>
|
||||
<entry>20 bytes long. Contains general information about the page, including
|
||||
<entry>24 bytes long. Contains general information about the page, including
|
||||
free space pointers.</entry>
|
||||
</row>
|
||||
|
||||
@ -449,7 +449,7 @@ data. Empty in ordinary tables.</entry>
|
||||
|
||||
<para>
|
||||
|
||||
The first 20 bytes of each page consists of a page header
|
||||
The first 24 bytes of each page consists of a page header
|
||||
(PageHeaderData). Its format is detailed in <xref
|
||||
linkend="pageheaderdata-table">. The first two fields track the most
|
||||
recent WAL entry related to this page. Next is a 2-byte field
|
||||
@ -459,7 +459,7 @@ data. Empty in ordinary tables.</entry>
|
||||
from the page start to the start
|
||||
of unallocated space, to the end of unallocated space, and to the start of
|
||||
the special space.
|
||||
The last 2 bytes of the page header,
|
||||
The next 2 bytes of the page header,
|
||||
<structfield>pd_pagesize_version</structfield>, store both the page size
|
||||
and a version indicator. Beginning with
|
||||
<productname>PostgreSQL</productname> 8.3 the version number is 4;
|
||||
@ -471,6 +471,8 @@ data. Empty in ordinary tables.</entry>
|
||||
versions, but the layout of heap row headers has.) The page size
|
||||
is basically only present as a cross-check; there is no support for having
|
||||
more than one page size in an installation.
|
||||
The last field is a hint that shows whether pruning the page is likely
|
||||
to be profitable: it tracks the oldest un-pruned XMAX on the page.
|
||||
|
||||
</para>
|
||||
|
||||
@ -530,6 +532,12 @@ data. Empty in ordinary tables.</entry>
|
||||
<entry>2 bytes</entry>
|
||||
<entry>Page size and layout version number information</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>pd_prune_xid</entry>
|
||||
<entry>TransactionId</entry>
|
||||
<entry>4 bytes</entry>
|
||||
<entry>Oldest unpruned XMAX on page, or zero if none</entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
|
@ -1,4 +1,4 @@
|
||||
$PostgreSQL: pgsql/src/backend/access/heap/README.HOT,v 1.1 2007/09/20 17:56:30 tgl Exp $
|
||||
$PostgreSQL: pgsql/src/backend/access/heap/README.HOT,v 1.2 2007/09/21 21:25:42 tgl Exp $
|
||||
|
||||
Heap Only Tuples (HOT)
|
||||
|
||||
@ -233,8 +233,8 @@ subsequent index searches. However it is unclear that this gain is
|
||||
large enough to accept any extra maintenance burden for.
|
||||
|
||||
The currently planned heuristic is to prune and defrag when first accessing
|
||||
a page that potentially has prunable tuples (flagged by the PD_PRUNABLE
|
||||
page hint bit) and that either has free space less than MAX(fillfactor
|
||||
a page that potentially has prunable tuples (as flagged by the pd_prune_xid
|
||||
page hint field) and that either has free space less than MAX(fillfactor
|
||||
target free space, BLCKSZ/10) *or* has recently had an UPDATE fail to
|
||||
find enough free space to store an updated tuple version. (These rules
|
||||
are subject to change.)
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.241 2007/09/20 17:56:30 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.242 2007/09/21 21:25:42 tgl Exp $
|
||||
*
|
||||
*
|
||||
* INTERFACE ROUTINES
|
||||
@ -2068,11 +2068,12 @@ l1:
|
||||
|
||||
/*
|
||||
* If this transaction commits, the tuple will become DEAD sooner or
|
||||
* later. Set hint bit that this page is a candidate for pruning. If
|
||||
* the transaction finally aborts, the subsequent page pruning will be
|
||||
* a no-op and the hint will be cleared.
|
||||
* later. Set flag that this page is a candidate for pruning once our xid
|
||||
* falls below the OldestXmin horizon. If the transaction finally aborts,
|
||||
* the subsequent page pruning will be a no-op and the hint will be
|
||||
* cleared.
|
||||
*/
|
||||
PageSetPrunable((Page) dp);
|
||||
PageSetPrunable(dp, xid);
|
||||
|
||||
/* store transaction information of xact deleting the tuple */
|
||||
tp.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
|
||||
@ -2571,16 +2572,17 @@ l2:
|
||||
|
||||
/*
|
||||
* If this transaction commits, the old tuple will become DEAD sooner or
|
||||
* later. Set hint bit that this page is a candidate for pruning. If
|
||||
* the transaction finally aborts, the subsequent page pruning will be
|
||||
* a no-op and the hint will be cleared.
|
||||
* later. Set flag that this page is a candidate for pruning once our xid
|
||||
* falls below the OldestXmin horizon. If the transaction finally aborts,
|
||||
* the subsequent page pruning will be a no-op and the hint will be
|
||||
* cleared.
|
||||
*
|
||||
* XXX Should we set hint on newbuf as well? If the transaction
|
||||
* aborts, there would be a prunable tuple in the newbuf; but for now
|
||||
* we choose not to optimize for aborts. Note that heap_xlog_update
|
||||
* must be kept in sync if this changes.
|
||||
* must be kept in sync if this decision changes.
|
||||
*/
|
||||
PageSetPrunable(dp);
|
||||
PageSetPrunable(dp, xid);
|
||||
|
||||
if (use_hot_update)
|
||||
{
|
||||
@ -4108,7 +4110,7 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
|
||||
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
|
||||
|
||||
/* Mark the page as a candidate for pruning */
|
||||
PageSetPrunable(page);
|
||||
PageSetPrunable(page, record->xl_xid);
|
||||
|
||||
/* Make sure there is no forward chain link in t_ctid */
|
||||
htup->t_ctid = xlrec->target.tid;
|
||||
@ -4284,7 +4286,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool move, bool hot_update)
|
||||
}
|
||||
|
||||
/* Mark the page as a candidate for pruning */
|
||||
PageSetPrunable(page);
|
||||
PageSetPrunable(page, record->xl_xid);
|
||||
|
||||
/*
|
||||
* this test is ugly, but necessary to avoid thinking that insert change
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/pruneheap.c,v 1.1 2007/09/20 17:56:30 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/pruneheap.c,v 1.2 2007/09/21 21:25:42 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -63,9 +63,10 @@ heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin)
|
||||
/*
|
||||
* Let's see if we really need pruning.
|
||||
*
|
||||
* Forget it if page is not hinted to contain something prunable
|
||||
* Forget it if page is not hinted to contain something prunable that's
|
||||
* older than OldestXmin.
|
||||
*/
|
||||
if (!PageIsPrunable(dp))
|
||||
if (!PageIsPrunable(dp, OldestXmin))
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -93,6 +94,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin)
|
||||
/*
|
||||
* Now that we have buffer lock, get accurate information about the
|
||||
* page's free space, and recheck the heuristic about whether to prune.
|
||||
* (We needn't recheck PageIsPrunable, since no one else could have
|
||||
* pruned while we hold pin.)
|
||||
*/
|
||||
if (PageIsFull(dp) || PageGetHeapFreeSpace((Page) dp) < minfree)
|
||||
{
|
||||
@ -147,7 +150,7 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
|
||||
START_CRIT_SECTION();
|
||||
|
||||
/*
|
||||
* Mark the page as clear of prunable tuples. If we find a tuple which
|
||||
* Mark the page as clear of prunable tuples. If we find a tuple which
|
||||
* may soon become prunable, we shall set the hint again. Also clear
|
||||
* the "page is full" flag, since there's no point in repeating the
|
||||
* prune/defrag process until something else happens to the page.
|
||||
@ -203,6 +206,14 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
|
||||
PageSetLSN(BufferGetPage(buffer), recptr);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* If we didn't prune anything, we have nonetheless updated the
|
||||
* pd_prune_xid field; treat this as a non-WAL-logged hint.
|
||||
*/
|
||||
SetBufferCommitInfoNeedsSave(buffer);
|
||||
}
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
@ -392,18 +403,18 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
|
||||
case HEAPTUPLE_RECENTLY_DEAD:
|
||||
recent_dead = true;
|
||||
/*
|
||||
* This tuple may soon become DEAD. Re-set the hint bit so
|
||||
* This tuple may soon become DEAD. Update the hint field so
|
||||
* that the page is reconsidered for pruning in future.
|
||||
*/
|
||||
PageSetPrunable(dp);
|
||||
PageSetPrunable(dp, HeapTupleHeaderGetXmax(htup));
|
||||
break;
|
||||
|
||||
case HEAPTUPLE_DELETE_IN_PROGRESS:
|
||||
/*
|
||||
* This tuple may soon become DEAD. Re-set the hint bit so
|
||||
* This tuple may soon become DEAD. Update the hint field so
|
||||
* that the page is reconsidered for pruning in future.
|
||||
*/
|
||||
PageSetPrunable(dp);
|
||||
PageSetPrunable(dp, HeapTupleHeaderGetXmax(htup));
|
||||
break;
|
||||
|
||||
case HEAPTUPLE_LIVE:
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.74 2007/09/20 17:56:31 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.75 2007/09/21 21:25:42 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -40,11 +40,12 @@ PageInit(Page page, Size pageSize, Size specialSize)
|
||||
/* Make sure all fields of page are zero, as well as unused space */
|
||||
MemSet(p, 0, pageSize);
|
||||
|
||||
/* p->pd_flags = 0; done by above MemSet */
|
||||
/* p->pd_flags = 0; done by above MemSet */
|
||||
p->pd_lower = SizeOfPageHeaderData;
|
||||
p->pd_upper = pageSize - specialSize;
|
||||
p->pd_special = pageSize - specialSize;
|
||||
PageSetPageSizeAndVersion(page, pageSize, PG_PAGE_LAYOUT_VERSION);
|
||||
/* p->pd_prune_xid = InvalidTransactionId; done by above MemSet */
|
||||
}
|
||||
|
||||
|
||||
|
@ -37,7 +37,7 @@
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.426 2007/09/20 17:56:32 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.427 2007/09/21 21:25:42 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -53,6 +53,6 @@
|
||||
*/
|
||||
|
||||
/* yyyymmddN */
|
||||
#define CATALOG_VERSION_NO 200709201
|
||||
#define CATALOG_VERSION_NO 200709211
|
||||
|
||||
#endif
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.74 2007/09/20 17:56:32 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.75 2007/09/21 21:25:42 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -48,7 +48,7 @@
|
||||
*
|
||||
* EXCEPTIONS:
|
||||
*
|
||||
* obviously, a page is not formatted before it is initialized with by
|
||||
* obviously, a page is not formatted before it is initialized by
|
||||
* a call to PageInit.
|
||||
*
|
||||
* NOTES:
|
||||
@ -95,6 +95,7 @@ typedef uint16 LocationIndex;
|
||||
* pd_upper - offset to end of free space.
|
||||
* pd_special - offset to start of special space.
|
||||
* pd_pagesize_version - size in bytes and page layout version number.
|
||||
* pd_prune_xid - oldest XID among potentially prunable tuples on page.
|
||||
*
|
||||
* The LSN is used by the buffer manager to enforce the basic rule of WAL:
|
||||
* "thou shalt write xlog before data". A dirty buffer cannot be dumped
|
||||
@ -103,6 +104,9 @@ typedef uint16 LocationIndex;
|
||||
* purposes (it is not clear that this is actually necessary, but it seems
|
||||
* like a good idea).
|
||||
*
|
||||
* pd_prune_xid is a hint field that helps determine whether pruning will be
|
||||
* useful. It is currently unused in index pages.
|
||||
*
|
||||
* The page version number and page size are packed together into a single
|
||||
* uint16 field. This is for historical reasons: before PostgreSQL 7.3,
|
||||
* there was no concept of a page version number, and doing it this way
|
||||
@ -128,6 +132,7 @@ typedef struct PageHeaderData
|
||||
LocationIndex pd_upper; /* offset to end of free space */
|
||||
LocationIndex pd_special; /* offset to start of special space */
|
||||
uint16 pd_pagesize_version;
|
||||
TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
|
||||
ItemIdData pd_linp[1]; /* beginning of line pointer array */
|
||||
} PageHeaderData;
|
||||
|
||||
@ -141,20 +146,14 @@ typedef PageHeaderData *PageHeader;
|
||||
* pd_lower. This should be considered a hint rather than the truth, since
|
||||
* changes to it are not WAL-logged.
|
||||
*
|
||||
* PD_PRUNABLE is set if there are any prunable tuples in the page.
|
||||
* This should be considered a hint rather than the truth, since
|
||||
* the transaction which generates a prunable tuple may or may not commit.
|
||||
* Also there is a lag before a tuple is declared dead.
|
||||
*
|
||||
* PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
|
||||
* page for its new tuple version; this suggests that a prune is needed.
|
||||
* Again, this is just a hint.
|
||||
*/
|
||||
#define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
|
||||
#define PD_PRUNABLE 0x0002 /* are there any prunable tuples? */
|
||||
#define PD_PAGE_FULL 0x0004 /* not enough free space for new tuple? */
|
||||
#define PD_PAGE_FULL 0x0002 /* not enough free space for new tuple? */
|
||||
|
||||
#define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
|
||||
#define PD_VALID_FLAG_BITS 0x0003 /* OR of all valid pd_flags bits */
|
||||
|
||||
/*
|
||||
* Page layout version number 0 is for pre-7.3 Postgres releases.
|
||||
@ -162,7 +161,8 @@ typedef PageHeaderData *PageHeader;
|
||||
* Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
|
||||
* Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
|
||||
* Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
|
||||
* added the pd_flags field (by stealing some bits from pd_tli).
|
||||
* added the pd_flags field (by stealing some bits from pd_tli),
|
||||
* as well as adding the pd_prune_xid field (which enlarges the header).
|
||||
*/
|
||||
#define PG_PAGE_LAYOUT_VERSION 4
|
||||
|
||||
@ -348,13 +348,6 @@ typedef PageHeaderData *PageHeader;
|
||||
#define PageClearHasFreeLinePointers(page) \
|
||||
(((PageHeader) (page))->pd_flags &= ~PD_HAS_FREE_LINES)
|
||||
|
||||
#define PageIsPrunable(page) \
|
||||
(((PageHeader) (page))->pd_flags & PD_PRUNABLE)
|
||||
#define PageSetPrunable(page) \
|
||||
(((PageHeader) (page))->pd_flags |= PD_PRUNABLE)
|
||||
#define PageClearPrunable(page) \
|
||||
(((PageHeader) (page))->pd_flags &= ~PD_PRUNABLE)
|
||||
|
||||
#define PageIsFull(page) \
|
||||
(((PageHeader) (page))->pd_flags & PD_PAGE_FULL)
|
||||
#define PageSetFull(page) \
|
||||
@ -362,6 +355,22 @@ typedef PageHeaderData *PageHeader;
|
||||
#define PageClearFull(page) \
|
||||
(((PageHeader) (page))->pd_flags &= ~PD_PAGE_FULL)
|
||||
|
||||
#define PageIsPrunable(page, oldestxmin) \
|
||||
( \
|
||||
AssertMacro(TransactionIdIsNormal(oldestxmin)), \
|
||||
TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) && \
|
||||
TransactionIdPrecedes(((PageHeader) (page))->pd_prune_xid, oldestxmin) \
|
||||
)
|
||||
#define PageSetPrunable(page, xid) \
|
||||
do { \
|
||||
Assert(TransactionIdIsNormal(xid)); \
|
||||
if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
|
||||
TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
|
||||
((PageHeader) (page))->pd_prune_xid = (xid); \
|
||||
} while (0)
|
||||
#define PageClearPrunable(page) \
|
||||
(((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* extern declarations
|
||||
|
Loading…
Reference in New Issue
Block a user