mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-15 08:20:16 +08:00
Create a routine PageIndexMultiDelete() that replaces a loop around
PageIndexTupleDelete() with a single pass of compactification --- logic mostly lifted from PageRepairFragmentation. I noticed while profiling that a VACUUM that's cleaning up a whole lot of deleted tuples would spend as much as a third of its CPU time in PageIndexTupleDelete; not too surprising considering the loop method was roughly O(N^2) in the number of tuples involved.
This commit is contained in:
parent
775d28302c
commit
94e03330cb
@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.81 2004/12/31 21:59:22 pgsql Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.82 2005/03/22 06:17:03 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Postgres btree pages look like ordinary relation pages. The opaque
|
||||
@ -639,17 +639,12 @@ _bt_delitems(Relation rel, Buffer buf,
|
||||
OffsetNumber *itemnos, int nitems)
|
||||
{
|
||||
Page page = BufferGetPage(buf);
|
||||
int i;
|
||||
|
||||
/* No ereport(ERROR) until changes are logged */
|
||||
START_CRIT_SECTION();
|
||||
|
||||
/*
|
||||
* Delete the items in reverse order so we don't have to think about
|
||||
* adjusting item numbers for previous deletions.
|
||||
*/
|
||||
for (i = nitems - 1; i >= 0; i--)
|
||||
PageIndexTupleDelete(page, itemnos[i]);
|
||||
/* Fix the page */
|
||||
PageIndexMultiDelete(page, itemnos, nitems);
|
||||
|
||||
/* XLOG stuff */
|
||||
if (!rel->rd_istemp)
|
||||
|
@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.19 2004/12/31 21:59:22 pgsql Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.20 2005/03/22 06:17:03 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -411,12 +411,7 @@ btree_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record)
|
||||
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
|
||||
unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
|
||||
|
||||
/* be careful to delete from back to front */
|
||||
while (unused < unend)
|
||||
{
|
||||
unend--;
|
||||
PageIndexTupleDelete(page, *unend);
|
||||
}
|
||||
PageIndexMultiDelete(page, unused, unend - unused);
|
||||
}
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.62 2004/12/31 22:01:10 pgsql Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.63 2005/03/22 06:17:03 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -274,13 +274,14 @@ PageRestoreTempPage(Page tempPage, Page oldPage)
|
||||
}
|
||||
|
||||
/*
|
||||
* sorting support for PageRepairFragmentation
|
||||
* sorting support for PageRepairFragmentation and PageIndexMultiDelete
|
||||
*/
|
||||
typedef struct itemIdSortData
|
||||
{
|
||||
int offsetindex; /* linp array index */
|
||||
int itemoff; /* page offset of item data */
|
||||
Size alignedlen; /* MAXALIGN(item data len) */
|
||||
ItemIdData olditemid; /* used only in PageIndexMultiDelete */
|
||||
} itemIdSortData;
|
||||
typedef itemIdSortData *itemIdSort;
|
||||
|
||||
@ -297,7 +298,8 @@ itemoffcompare(const void *itemidp1, const void *itemidp2)
|
||||
*
|
||||
* Frees fragmented space on a page.
|
||||
* It doesn't remove unused line pointers! Please don't change this.
|
||||
* This routine is usable for heap pages only.
|
||||
*
|
||||
* This routine is usable for heap pages only, but see PageIndexMultiDelete.
|
||||
*
|
||||
* Returns number of unused line pointers on page. If "unused" is not NULL
|
||||
* then the unused[] array is filled with indexes of unused line pointers.
|
||||
@ -543,3 +545,135 @@ PageIndexTupleDelete(Page page, OffsetNumber offnum)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* PageIndexMultiDelete
|
||||
*
|
||||
* This routine handles the case of deleting multiple tuples from an
|
||||
* index page at once. It is considerably faster than a loop around
|
||||
* PageIndexTupleDelete ... however, the caller *must* supply the array
|
||||
* of item numbers to be deleted in item number order!
|
||||
*/
|
||||
void
|
||||
PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
|
||||
{
|
||||
PageHeader phdr = (PageHeader) page;
|
||||
Offset pd_lower = phdr->pd_lower;
|
||||
Offset pd_upper = phdr->pd_upper;
|
||||
Offset pd_special = phdr->pd_special;
|
||||
itemIdSort itemidbase,
|
||||
itemidptr;
|
||||
ItemId lp;
|
||||
int nline,
|
||||
nused;
|
||||
int i;
|
||||
Size totallen;
|
||||
Offset upper;
|
||||
Size size;
|
||||
unsigned offset;
|
||||
int nextitm;
|
||||
OffsetNumber offnum;
|
||||
|
||||
/*
|
||||
* If there aren't very many items to delete, then retail
|
||||
* PageIndexTupleDelete is the best way. Delete the items in reverse
|
||||
* order so we don't have to think about adjusting item numbers for
|
||||
* previous deletions.
|
||||
*
|
||||
* TODO: tune the magic number here
|
||||
*/
|
||||
if (nitems <= 2)
|
||||
{
|
||||
while (--nitems >= 0)
|
||||
PageIndexTupleDelete(page, itemnos[nitems]);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* As with PageRepairFragmentation, paranoia seems justified.
|
||||
*/
|
||||
if (pd_lower < SizeOfPageHeaderData ||
|
||||
pd_lower > pd_upper ||
|
||||
pd_upper > pd_special ||
|
||||
pd_special > BLCKSZ ||
|
||||
pd_special != MAXALIGN(pd_special))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||
errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
|
||||
pd_lower, pd_upper, pd_special)));
|
||||
|
||||
/*
|
||||
* Scan the item pointer array and build a list of just the ones we
|
||||
* are going to keep. Notice we do not modify the page yet, since
|
||||
* we are still validity-checking.
|
||||
*/
|
||||
nline = PageGetMaxOffsetNumber(page);
|
||||
itemidbase = (itemIdSort) palloc(sizeof(itemIdSortData) * nline);
|
||||
itemidptr = itemidbase;
|
||||
totallen = 0;
|
||||
nused = 0;
|
||||
nextitm = 0;
|
||||
for (offnum = 1; offnum <= nline; offnum++)
|
||||
{
|
||||
lp = PageGetItemId(page, offnum);
|
||||
size = ItemIdGetLength(lp);
|
||||
offset = ItemIdGetOffset(lp);
|
||||
if (offset < pd_upper ||
|
||||
(offset + size) > pd_special ||
|
||||
offset != MAXALIGN(offset))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||
errmsg("corrupted item pointer: offset = %u, size = %u",
|
||||
offset, (unsigned int) size)));
|
||||
|
||||
if (nextitm < nitems && offnum == itemnos[nextitm])
|
||||
{
|
||||
/* skip item to be deleted */
|
||||
nextitm++;
|
||||
}
|
||||
else
|
||||
{
|
||||
itemidptr->offsetindex = nused; /* where it will go */
|
||||
itemidptr->itemoff = offset;
|
||||
itemidptr->olditemid = *lp;
|
||||
itemidptr->alignedlen = MAXALIGN(size);
|
||||
totallen += itemidptr->alignedlen;
|
||||
itemidptr++;
|
||||
nused++;
|
||||
}
|
||||
}
|
||||
|
||||
/* this will catch invalid or out-of-order itemnos[] */
|
||||
if (nextitm != nitems)
|
||||
elog(ERROR, "incorrect index offsets supplied");
|
||||
|
||||
if (totallen > (Size) (pd_special - pd_lower))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||
errmsg("corrupted item lengths: total %u, available space %u",
|
||||
(unsigned int) totallen, pd_special - pd_lower)));
|
||||
|
||||
/* sort itemIdSortData array into decreasing itemoff order */
|
||||
qsort((char *) itemidbase, nused, sizeof(itemIdSortData),
|
||||
itemoffcompare);
|
||||
|
||||
/* compactify page and install new itemids */
|
||||
upper = pd_special;
|
||||
|
||||
for (i = 0, itemidptr = itemidbase; i < nused; i++, itemidptr++)
|
||||
{
|
||||
lp = PageGetItemId(page, itemidptr->offsetindex + 1);
|
||||
upper -= itemidptr->alignedlen;
|
||||
memmove((char *) page + upper,
|
||||
(char *) page + itemidptr->itemoff,
|
||||
itemidptr->alignedlen);
|
||||
*lp = itemidptr->olditemid;
|
||||
lp->lp_off = upper;
|
||||
}
|
||||
|
||||
phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
|
||||
phdr->pd_upper = upper;
|
||||
|
||||
pfree(itemidbase);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.63 2004/12/31 22:03:42 pgsql Exp $
|
||||
* $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.64 2005/03/22 06:17:03 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -323,5 +323,6 @@ extern void PageRestoreTempPage(Page tempPage, Page oldPage);
|
||||
extern int PageRepairFragmentation(Page page, OffsetNumber *unused);
|
||||
extern Size PageGetFreeSpace(Page page);
|
||||
extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
|
||||
extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
|
||||
|
||||
#endif /* BUFPAGE_H */
|
||||
|
Loading…
Reference in New Issue
Block a user