From 37e9992214442ac485eee31d05506d430a89019f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 24 Mar 2009 22:06:32 +0000 Subject: [PATCH] Install a search tree depth limit in GIN bulk-insert operations, to prevent them from degrading badly when the input is sorted or nearly so. In this scenario the tree is unbalanced to the point of becoming a mere linked list, so insertions become O(N^2). The easiest and most safely back-patchable solution is to stop growing the tree sooner, ie limit the growth of N. We might later consider a rebalancing tree algorithm, but it's not clear that the benefit would be worth the cost and complexity. Per report from Sergey Burladyan and an earlier complaint from Heikki. Back-patch to 8.2; older versions didn't have GIN indexes. --- src/backend/access/gin/gininsert.c | 5 +++-- src/include/access/gin.h | 17 +++++++++++------ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index ed79a8f3de..e511f8ca1c 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.5.2.1 2007/06/05 12:48:21 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.5.2.2 2009/03/24 22:06:32 tgl Exp $ *------------------------------------------------------------------------- */ @@ -241,7 +241,8 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values, * we use only half maintenance_work_mem, because there is some leaks * during insertion and extract values */ - if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L / 2L) + if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L / 2L || + buildstate->accum.maxdepth > GIN_MAX_TREE_DEPTH) { ItemPointerData *list; Datum entry; diff --git a/src/include/access/gin.h b/src/include/access/gin.h index cee40175b9..557ac1c79e 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -3,7 +3,7 @@ * header file for postgres inverted index access method implementation. * * Copyright (c) 2006, PostgreSQL Global Development Group - * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.9.2.2 2008/04/22 17:53:41 teodor Exp $ + * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.9.2.3 2009/03/24 22:06:32 tgl Exp $ *-------------------------------------------------------------------------- */ @@ -33,6 +33,14 @@ typedef XLogRecPtr GinNSN; +/* + * Max depth allowed in search tree during bulk inserts. This is to keep from + * degenerating to O(N^2) behavior when the tree is unbalanced due to sorted + * or nearly-sorted input. (Perhaps it would be better to use a balanced-tree + * algorithm, but in common cases that would only add useless overhead.) + */ +#define GIN_MAX_TREE_DEPTH 100 + /* * Page opaque data in a inverted index page. */ @@ -314,12 +322,9 @@ extern IndexTuple ginPageGetLinkItup(Buffer buf); /* gindatapage.c */ extern int compareItemPointers(ItemPointer a, ItemPointer b); -extern void -MergeItemPointers( - ItemPointerData *dst, +extern void MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, - ItemPointerData *b, uint32 nb -); + ItemPointerData *b, uint32 nb); extern void GinDataPageAddItem(Page page, void *data, OffsetNumber offset); extern void PageDeletePostingItem(Page page, OffsetNumber offset);