mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-02-23 19:39:53 +08:00
Reimplement free-space-map management as per recent discussions.
Adjustable threshold is gone in favor of keeping track of total requested page storage and doling out proportional fractions to each relation (with a minimum amount per relation, and some quantization of the results to avoid thrashing with small changes in page counts). Provide special- case code for indexes so as not to waste space storing useless page free space counts. Restructure internal data storage to be a flat array instead of list-of-chunks; this may cost a little more work in data copying when reorganizing, but allows binary search to be used during lookup_fsm_page_entry().
This commit is contained in:
parent
a455c94257
commit
391eb5e5b6
@ -1,5 +1,5 @@
|
||||
<!--
|
||||
$Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.169 2003/02/19 04:06:28 momjian Exp $
|
||||
$Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.170 2003/03/04 21:51:19 tgl Exp $
|
||||
-->
|
||||
|
||||
<Chapter Id="runtime">
|
||||
@ -1725,7 +1725,9 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir'
|
||||
<listitem>
|
||||
<para>
|
||||
Sets the maximum number of disk pages for which free space will
|
||||
be tracked in the shared free-space map. The default is 10000.
|
||||
be tracked in the shared free-space map. Six bytes of shared memory
|
||||
are consumed for each page slot. This setting must be more than
|
||||
16 * <varname>max_fsm_relations</varname>. The default is 20000.
|
||||
This option can only be set at server start.
|
||||
</para>
|
||||
</listitem>
|
||||
@ -1735,9 +1737,11 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir'
|
||||
<term><varname>MAX_FSM_RELATIONS</varname> (<type>integer</type>)</term>
|
||||
<listitem>
|
||||
<para>
|
||||
Sets the maximum number of relations (tables) for which free
|
||||
space will be tracked in the shared free-space map. The default
|
||||
is 1000. This option can only be set at server start.
|
||||
Sets the maximum number of relations (tables and indexes) for which
|
||||
free space will be tracked in the shared free-space map. Roughly
|
||||
fifty bytes of shared memory are consumed for each slot.
|
||||
The default is 1000.
|
||||
This option can only be set at server start.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.63 2003/02/23 23:20:52 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.64 2003/03/04 21:51:20 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Postgres btree pages look like ordinary relation pages. The opaque
|
||||
@ -401,15 +401,10 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
|
||||
* that the page is still free. (For example, an already-free page
|
||||
* could have been re-used between the time the last VACUUM scanned
|
||||
* it and the time the VACUUM made its FSM updates.)
|
||||
*
|
||||
* The request size should be more than half of what btvacuumcleanup
|
||||
* logs as the per-page free space. We use BLCKSZ/2 and BLCKSZ-1
|
||||
* to try to get some use out of FSM's space management algorithm.
|
||||
* XXX this needs some more thought...
|
||||
*/
|
||||
for (;;)
|
||||
{
|
||||
blkno = GetPageWithFreeSpace(&rel->rd_node, BLCKSZ/2);
|
||||
blkno = GetFreeIndexPage(&rel->rd_node);
|
||||
if (blkno == InvalidBlockNumber)
|
||||
break;
|
||||
buf = ReadBuffer(rel, blkno);
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.100 2003/02/24 00:57:17 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.101 2003/03/04 21:51:20 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -697,7 +697,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(2);
|
||||
BlockNumber num_pages;
|
||||
BlockNumber blkno;
|
||||
PageFreeSpaceInfo *pageSpaces;
|
||||
BlockNumber *freePages;
|
||||
int nFreePages,
|
||||
maxFreePages;
|
||||
BlockNumber pages_deleted = 0;
|
||||
@ -712,7 +712,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
maxFreePages = MaxFSMPages;
|
||||
if ((BlockNumber) maxFreePages > num_pages)
|
||||
maxFreePages = (int) num_pages + 1; /* +1 to avoid palloc(0) */
|
||||
pageSpaces = (PageFreeSpaceInfo *) palloc(maxFreePages * sizeof(PageFreeSpaceInfo));
|
||||
freePages = (BlockNumber *) palloc(maxFreePages * sizeof(BlockNumber));
|
||||
nFreePages = 0;
|
||||
|
||||
/* Create a temporary memory context to run _bt_pagedel in */
|
||||
@ -740,12 +740,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
{
|
||||
/* Okay to recycle this page */
|
||||
if (nFreePages < maxFreePages)
|
||||
{
|
||||
pageSpaces[nFreePages].blkno = blkno;
|
||||
/* claimed avail-space must be < BLCKSZ */
|
||||
pageSpaces[nFreePages].avail = BLCKSZ-1;
|
||||
nFreePages++;
|
||||
}
|
||||
freePages[nFreePages++] = blkno;
|
||||
pages_deleted++;
|
||||
}
|
||||
else if (P_ISDELETED(opaque))
|
||||
@ -781,12 +776,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
if (ndel && info->vacuum_full)
|
||||
{
|
||||
if (nFreePages < maxFreePages)
|
||||
{
|
||||
pageSpaces[nFreePages].blkno = blkno;
|
||||
/* claimed avail-space must be < BLCKSZ */
|
||||
pageSpaces[nFreePages].avail = BLCKSZ-1;
|
||||
nFreePages++;
|
||||
}
|
||||
freePages[nFreePages++] = blkno;
|
||||
}
|
||||
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
@ -805,8 +795,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
{
|
||||
BlockNumber new_pages = num_pages;
|
||||
|
||||
while (nFreePages > 0 &&
|
||||
pageSpaces[nFreePages-1].blkno == new_pages-1)
|
||||
while (nFreePages > 0 && freePages[nFreePages-1] == new_pages-1)
|
||||
{
|
||||
new_pages--;
|
||||
pages_deleted--;
|
||||
@ -841,12 +830,12 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
|
||||
/*
|
||||
* Update the shared Free Space Map with the info we now have about
|
||||
* free space in the index, discarding any old info the map may have.
|
||||
* free pages in the index, discarding any old info the map may have.
|
||||
* We do not need to sort the page numbers; they're in order already.
|
||||
*/
|
||||
MultiRecordFreeSpace(&rel->rd_node, 0, nFreePages, pageSpaces);
|
||||
RecordIndexFreeSpace(&rel->rd_node, nFreePages, freePages);
|
||||
|
||||
pfree(pageSpaces);
|
||||
pfree(freePages);
|
||||
|
||||
MemoryContextDelete(mycontext);
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.250 2003/02/24 00:57:17 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.251 2003/03/04 21:51:20 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -336,6 +336,13 @@ vacuum(VacuumStmt *vacstmt)
|
||||
*/
|
||||
StartTransactionCommand(true);
|
||||
|
||||
/*
|
||||
* If it was a database-wide VACUUM, print FSM usage statistics
|
||||
* (we don't make you be superuser to see these).
|
||||
*/
|
||||
if (vacstmt->relation == NULL)
|
||||
PrintFreeSpaceMapStatistics(elevel);
|
||||
|
||||
/*
|
||||
* If we completed a database-wide VACUUM without skipping any
|
||||
* relations, update the database's pg_database row with info
|
||||
@ -2781,31 +2788,48 @@ vac_update_fsm(Relation onerel, VacPageList fraged_pages,
|
||||
BlockNumber rel_pages)
|
||||
{
|
||||
int nPages = fraged_pages->num_pages;
|
||||
int i;
|
||||
VacPage *pagedesc = fraged_pages->pagedesc;
|
||||
Size threshold;
|
||||
PageFreeSpaceInfo *pageSpaces;
|
||||
int outPages;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* We only report pages with free space at least equal to the average
|
||||
* request size --- this avoids cluttering FSM with uselessly-small bits
|
||||
* of space. Although FSM would discard pages with little free space
|
||||
* anyway, it's important to do this prefiltering because (a) it reduces
|
||||
* the time spent holding the FSM lock in RecordRelationFreeSpace, and
|
||||
* (b) FSM uses the number of pages reported as a statistic for guiding
|
||||
* space management. If we didn't threshold our reports the same way
|
||||
* vacuumlazy.c does, we'd be skewing that statistic.
|
||||
*/
|
||||
threshold = GetAvgFSMRequestSize(&onerel->rd_node);
|
||||
|
||||
/* +1 to avoid palloc(0) */
|
||||
pageSpaces = (PageFreeSpaceInfo *)
|
||||
palloc((nPages + 1) * sizeof(PageFreeSpaceInfo));
|
||||
outPages = 0;
|
||||
|
||||
for (i = 0; i < nPages; i++)
|
||||
{
|
||||
pageSpaces[i].blkno = fraged_pages->pagedesc[i]->blkno;
|
||||
pageSpaces[i].avail = fraged_pages->pagedesc[i]->free;
|
||||
|
||||
/*
|
||||
* fraged_pages may contain entries for pages that we later
|
||||
* decided to truncate from the relation; don't enter them into
|
||||
* the free space map!
|
||||
*/
|
||||
if (pageSpaces[i].blkno >= rel_pages)
|
||||
{
|
||||
nPages = i;
|
||||
if (pagedesc[i]->blkno >= rel_pages)
|
||||
break;
|
||||
|
||||
if (pagedesc[i]->free >= threshold)
|
||||
{
|
||||
pageSpaces[outPages].blkno = pagedesc[i]->blkno;
|
||||
pageSpaces[outPages].avail = pagedesc[i]->free;
|
||||
outPages++;
|
||||
}
|
||||
}
|
||||
|
||||
MultiRecordFreeSpace(&onerel->rd_node, 0, nPages, pageSpaces);
|
||||
RecordRelationFreeSpace(&onerel->rd_node, outPages, pageSpaces);
|
||||
|
||||
pfree(pageSpaces);
|
||||
}
|
||||
|
@ -31,7 +31,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.26 2003/02/24 00:57:17 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.27 2003/03/04 21:51:21 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -51,21 +51,11 @@
|
||||
/*
|
||||
* Space/time tradeoff parameters: do these need to be user-tunable?
|
||||
*
|
||||
* A page with less than PAGE_SPACE_THRESHOLD free space will be forgotten
|
||||
* immediately, and not even passed to the free space map. Removing the
|
||||
* uselessly small entries early saves cycles, and in particular reduces
|
||||
* the amount of time we spend holding the FSM lock when we finally call
|
||||
* MultiRecordFreeSpace. Since the FSM will ignore pages below its own
|
||||
* runtime threshold anyway, there's no point in making this really small.
|
||||
* XXX Is it worth trying to measure average tuple size, and using that to
|
||||
* set the threshold? Problem is we don't know average tuple size very
|
||||
* accurately for the first few pages...
|
||||
*
|
||||
* To consider truncating the relation, we want there to be at least
|
||||
* relsize / REL_TRUNCATE_FRACTION potentially-freeable pages.
|
||||
* REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
|
||||
* is less) potentially-freeable pages.
|
||||
*/
|
||||
#define PAGE_SPACE_THRESHOLD ((Size) (BLCKSZ / 32))
|
||||
|
||||
#define REL_TRUNCATE_MINIMUM 1000
|
||||
#define REL_TRUNCATE_FRACTION 16
|
||||
|
||||
/* MAX_TUPLES_PER_PAGE can be a conservative upper limit */
|
||||
@ -78,6 +68,7 @@ typedef struct LVRelStats
|
||||
BlockNumber rel_pages;
|
||||
double rel_tuples;
|
||||
BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
|
||||
Size threshold; /* minimum interesting free space */
|
||||
/* List of TIDs of tuples we intend to delete */
|
||||
/* NB: this list is ordered by TID address */
|
||||
int num_dead_tuples; /* current # of entries */
|
||||
@ -149,6 +140,10 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
|
||||
|
||||
vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
|
||||
|
||||
/* Set threshold for interesting free space = average request size */
|
||||
/* XXX should we scale it up or down? Adjust vacuum.c too, if so */
|
||||
vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node);
|
||||
|
||||
/* Open all indexes of the relation */
|
||||
vac_open_indexes(onerel, &nindexes, &Irel);
|
||||
hasindex = (nindexes > 0);
|
||||
@ -166,7 +161,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
|
||||
* number of pages. Otherwise, the time taken isn't worth it.
|
||||
*/
|
||||
possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
|
||||
if (possibly_freeable > vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)
|
||||
if (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
|
||||
possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)
|
||||
lazy_truncate_heap(onerel, vacrelstats);
|
||||
|
||||
/* Update shared free space map with final free space info */
|
||||
@ -943,8 +939,21 @@ lazy_record_free_space(LVRelStats *vacrelstats,
|
||||
PageFreeSpaceInfo *pageSpaces;
|
||||
int n;
|
||||
|
||||
/* Ignore pages with little free space */
|
||||
if (avail < PAGE_SPACE_THRESHOLD)
|
||||
/*
|
||||
* A page with less than stats->threshold free space will be forgotten
|
||||
* immediately, and never passed to the free space map. Removing the
|
||||
* uselessly small entries early saves cycles, and in particular reduces
|
||||
* the amount of time we spend holding the FSM lock when we finally call
|
||||
* RecordRelationFreeSpace. Since the FSM will probably drop pages with
|
||||
* little free space anyway, there's no point in making this really small.
|
||||
*
|
||||
* XXX Is it worth trying to measure average tuple size, and using that to
|
||||
* adjust the threshold? Would be worthwhile if FSM has no stats yet
|
||||
* for this relation. But changing the threshold as we scan the rel
|
||||
* might lead to bizarre behavior, too. Also, it's probably better if
|
||||
* vacuum.c has the same thresholding behavior as we do here.
|
||||
*/
|
||||
if (avail < vacrelstats->threshold)
|
||||
return;
|
||||
|
||||
/* Copy pointers to local variables for notational simplicity */
|
||||
@ -1079,13 +1088,13 @@ lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats)
|
||||
int nPages = vacrelstats->num_free_pages;
|
||||
|
||||
/*
|
||||
* Sort data into order, as required by MultiRecordFreeSpace.
|
||||
* Sort data into order, as required by RecordRelationFreeSpace.
|
||||
*/
|
||||
if (nPages > 1)
|
||||
qsort(pageSpaces, nPages, sizeof(PageFreeSpaceInfo),
|
||||
vac_cmp_page_spaces);
|
||||
|
||||
MultiRecordFreeSpace(&onerel->rd_node, 0, nPages, pageSpaces);
|
||||
RecordRelationFreeSpace(&onerel->rd_node, nPages, pageSpaces);
|
||||
}
|
||||
|
||||
/*
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -11,7 +11,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.61 2002/09/20 19:56:01 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.62 2003/03/04 21:51:21 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -410,7 +410,7 @@ smgrtruncate(int16 which, Relation reln, BlockNumber nblocks)
|
||||
* for the about-to-be-deleted blocks. We want to be sure it
|
||||
* won't return bogus block numbers later on.
|
||||
*/
|
||||
MultiRecordFreeSpace(&reln->rd_node, nblocks, 0, NULL);
|
||||
FreeSpaceMapTruncateRel(&reln->rd_node, nblocks);
|
||||
|
||||
newblks = (*(smgrsw[which].smgr_truncate)) (reln, nblocks);
|
||||
if (newblks == InvalidBlockNumber)
|
||||
|
@ -5,7 +5,7 @@
|
||||
* command, configuration file, and command line options.
|
||||
* See src/backend/utils/misc/README for more information.
|
||||
*
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.115 2003/02/23 23:27:21 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.116 2003/03/04 21:51:21 tgl Exp $
|
||||
*
|
||||
* Copyright 2000 by PostgreSQL Global Development Group
|
||||
* Written by Peter Eisentraut <peter_e@gmx.net>.
|
||||
@ -644,11 +644,11 @@ static struct config_int
|
||||
|
||||
{
|
||||
{"max_fsm_relations", PGC_POSTMASTER}, &MaxFSMRelations,
|
||||
1000, 10, INT_MAX, NULL, NULL
|
||||
1000, 100, INT_MAX, NULL, NULL
|
||||
},
|
||||
{
|
||||
{"max_fsm_pages", PGC_POSTMASTER}, &MaxFSMPages,
|
||||
10000, 1000, INT_MAX, NULL, NULL
|
||||
20000, 1000, INT_MAX, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
|
@ -48,10 +48,11 @@
|
||||
# Shared Memory Size
|
||||
#
|
||||
#shared_buffers = 64 # min max_connections*2 or 16, 8KB each
|
||||
#max_fsm_relations = 1000 # min 10, fsm is free space map, ~40 bytes
|
||||
#max_fsm_pages = 10000 # min 1000, fsm is free space map, ~6 bytes
|
||||
#max_locks_per_transaction = 64 # min 10
|
||||
#wal_buffers = 8 # min 4, typically 8KB each
|
||||
# fsm = free space map
|
||||
#max_fsm_relations = 1000 # min 100, ~50 bytes each
|
||||
#max_fsm_pages = 20000 # min max_fsm_relations*16, 6 bytes each
|
||||
|
||||
#
|
||||
# Non-shared Memory Sizes
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: freespace.h,v 1.8 2002/09/20 19:56:01 tgl Exp $
|
||||
* $Id: freespace.h,v 1.9 2003/03/04 21:51:22 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -28,6 +28,7 @@ typedef struct PageFreeSpaceInfo
|
||||
} PageFreeSpaceInfo;
|
||||
|
||||
|
||||
/* GUC variables */
|
||||
extern int MaxFSMRelations;
|
||||
extern int MaxFSMPages;
|
||||
|
||||
@ -39,19 +40,26 @@ extern void InitFreeSpaceMap(void);
|
||||
extern int FreeSpaceShmemSize(void);
|
||||
|
||||
extern BlockNumber GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded);
|
||||
extern void RecordFreeSpace(RelFileNode *rel, BlockNumber page,
|
||||
Size spaceAvail);
|
||||
extern BlockNumber RecordAndGetPageWithFreeSpace(RelFileNode *rel,
|
||||
BlockNumber oldPage,
|
||||
Size oldSpaceAvail,
|
||||
Size spaceNeeded);
|
||||
extern void MultiRecordFreeSpace(RelFileNode *rel,
|
||||
BlockNumber minPage,
|
||||
int nPages,
|
||||
PageFreeSpaceInfo *pageSpaces);
|
||||
extern Size GetAvgFSMRequestSize(RelFileNode *rel);
|
||||
extern void RecordRelationFreeSpace(RelFileNode *rel,
|
||||
int nPages,
|
||||
PageFreeSpaceInfo *pageSpaces);
|
||||
|
||||
extern BlockNumber GetFreeIndexPage(RelFileNode *rel);
|
||||
extern void RecordIndexFreeSpace(RelFileNode *rel,
|
||||
int nPages,
|
||||
BlockNumber *pages);
|
||||
|
||||
extern void FreeSpaceMapTruncateRel(RelFileNode *rel, BlockNumber nblocks);
|
||||
extern void FreeSpaceMapForgetRel(RelFileNode *rel);
|
||||
extern void FreeSpaceMapForgetDatabase(Oid dbid);
|
||||
|
||||
extern void PrintFreeSpaceMapStatistics(int elevel);
|
||||
|
||||
#ifdef FREESPACE_DEBUG
|
||||
extern void DumpFreeSpace(void);
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user