From 94e03330cbd163378e43094388f87fcba4801ba8 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 22 Mar 2005 06:17:03 +0000 Subject: [PATCH] Create a routine PageIndexMultiDelete() that replaces a loop around PageIndexTupleDelete() with a single pass of compactification --- logic mostly lifted from PageRepairFragmentation. I noticed while profiling that a VACUUM that's cleaning up a whole lot of deleted tuples would spend as much as a third of its CPU time in PageIndexTupleDelete; not too surprising considering the loop method was roughly O(N^2) in the number of tuples involved. --- src/backend/access/nbtree/nbtpage.c | 11 +-- src/backend/access/nbtree/nbtxlog.c | 9 +- src/backend/storage/page/bufpage.c | 140 +++++++++++++++++++++++++++- src/include/storage/bufpage.h | 3 +- 4 files changed, 144 insertions(+), 19 deletions(-) diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 3e2a9010a4..2b82a87a1d 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.81 2004/12/31 21:59:22 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.82 2005/03/22 06:17:03 tgl Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -639,17 +639,12 @@ _bt_delitems(Relation rel, Buffer buf, OffsetNumber *itemnos, int nitems) { Page page = BufferGetPage(buf); - int i; /* No ereport(ERROR) until changes are logged */ START_CRIT_SECTION(); - /* - * Delete the items in reverse order so we don't have to think about - * adjusting item numbers for previous deletions. - */ - for (i = nitems - 1; i >= 0; i--) - PageIndexTupleDelete(page, itemnos[i]); + /* Fix the page */ + PageIndexMultiDelete(page, itemnos, nitems); /* XLOG stuff */ if (!rel->rd_istemp) diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index 0bd2c418f0..ade60619a3 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.19 2004/12/31 21:59:22 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.20 2005/03/22 06:17:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -411,12 +411,7 @@ btree_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record) unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete); unend = (OffsetNumber *) ((char *) xlrec + record->xl_len); - /* be careful to delete from back to front */ - while (unused < unend) - { - unend--; - PageIndexTupleDelete(page, *unend); - } + PageIndexMultiDelete(page, unused, unend - unused); } PageSetLSN(page, lsn); diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index 6d6957e279..c33a0011e6 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.62 2004/12/31 22:01:10 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.63 2005/03/22 06:17:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -274,13 +274,14 @@ PageRestoreTempPage(Page tempPage, Page oldPage) } /* - * sorting support for PageRepairFragmentation + * sorting support for PageRepairFragmentation and PageIndexMultiDelete */ typedef struct itemIdSortData { int offsetindex; /* linp array index */ int itemoff; /* page offset of item data */ Size alignedlen; /* MAXALIGN(item data len) */ + ItemIdData olditemid; /* used only in PageIndexMultiDelete */ } itemIdSortData; typedef itemIdSortData *itemIdSort; @@ -297,7 +298,8 @@ itemoffcompare(const void *itemidp1, const void *itemidp2) * * Frees fragmented space on a page. * It doesn't remove unused line pointers! Please don't change this. - * This routine is usable for heap pages only. + * + * This routine is usable for heap pages only, but see PageIndexMultiDelete. * * Returns number of unused line pointers on page. If "unused" is not NULL * then the unused[] array is filled with indexes of unused line pointers. @@ -543,3 +545,135 @@ PageIndexTupleDelete(Page page, OffsetNumber offnum) } } } + + +/* + * PageIndexMultiDelete + * + * This routine handles the case of deleting multiple tuples from an + * index page at once. It is considerably faster than a loop around + * PageIndexTupleDelete ... however, the caller *must* supply the array + * of item numbers to be deleted in item number order! + */ +void +PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems) +{ + PageHeader phdr = (PageHeader) page; + Offset pd_lower = phdr->pd_lower; + Offset pd_upper = phdr->pd_upper; + Offset pd_special = phdr->pd_special; + itemIdSort itemidbase, + itemidptr; + ItemId lp; + int nline, + nused; + int i; + Size totallen; + Offset upper; + Size size; + unsigned offset; + int nextitm; + OffsetNumber offnum; + + /* + * If there aren't very many items to delete, then retail + * PageIndexTupleDelete is the best way. Delete the items in reverse + * order so we don't have to think about adjusting item numbers for + * previous deletions. + * + * TODO: tune the magic number here + */ + if (nitems <= 2) + { + while (--nitems >= 0) + PageIndexTupleDelete(page, itemnos[nitems]); + return; + } + + /* + * As with PageRepairFragmentation, paranoia seems justified. + */ + if (pd_lower < SizeOfPageHeaderData || + pd_lower > pd_upper || + pd_upper > pd_special || + pd_special > BLCKSZ || + pd_special != MAXALIGN(pd_special)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u", + pd_lower, pd_upper, pd_special))); + + /* + * Scan the item pointer array and build a list of just the ones we + * are going to keep. Notice we do not modify the page yet, since + * we are still validity-checking. + */ + nline = PageGetMaxOffsetNumber(page); + itemidbase = (itemIdSort) palloc(sizeof(itemIdSortData) * nline); + itemidptr = itemidbase; + totallen = 0; + nused = 0; + nextitm = 0; + for (offnum = 1; offnum <= nline; offnum++) + { + lp = PageGetItemId(page, offnum); + size = ItemIdGetLength(lp); + offset = ItemIdGetOffset(lp); + if (offset < pd_upper || + (offset + size) > pd_special || + offset != MAXALIGN(offset)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("corrupted item pointer: offset = %u, size = %u", + offset, (unsigned int) size))); + + if (nextitm < nitems && offnum == itemnos[nextitm]) + { + /* skip item to be deleted */ + nextitm++; + } + else + { + itemidptr->offsetindex = nused; /* where it will go */ + itemidptr->itemoff = offset; + itemidptr->olditemid = *lp; + itemidptr->alignedlen = MAXALIGN(size); + totallen += itemidptr->alignedlen; + itemidptr++; + nused++; + } + } + + /* this will catch invalid or out-of-order itemnos[] */ + if (nextitm != nitems) + elog(ERROR, "incorrect index offsets supplied"); + + if (totallen > (Size) (pd_special - pd_lower)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("corrupted item lengths: total %u, available space %u", + (unsigned int) totallen, pd_special - pd_lower))); + + /* sort itemIdSortData array into decreasing itemoff order */ + qsort((char *) itemidbase, nused, sizeof(itemIdSortData), + itemoffcompare); + + /* compactify page and install new itemids */ + upper = pd_special; + + for (i = 0, itemidptr = itemidbase; i < nused; i++, itemidptr++) + { + lp = PageGetItemId(page, itemidptr->offsetindex + 1); + upper -= itemidptr->alignedlen; + memmove((char *) page + upper, + (char *) page + itemidptr->itemoff, + itemidptr->alignedlen); + *lp = itemidptr->olditemid; + lp->lp_off = upper; + } + + phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData); + phdr->pd_upper = upper; + + pfree(itemidbase); +} diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index 71d043cd7f..8b195132cb 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.63 2004/12/31 22:03:42 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.64 2005/03/22 06:17:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -323,5 +323,6 @@ extern void PageRestoreTempPage(Page tempPage, Page oldPage); extern int PageRepairFragmentation(Page page, OffsetNumber *unused); extern Size PageGetFreeSpace(Page page); extern void PageIndexTupleDelete(Page page, OffsetNumber offset); +extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems); #endif /* BUFPAGE_H */