mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-02-23 19:39:53 +08:00
GIN improvements
- Replace sorted array of entries in maintenance_work_mem to binary tree, this should improve create performance. - More precisely calculate allocated memory, eliminate leaks with user-defined extractValue() - Improve wordings in tsearch2
This commit is contained in:
parent
fa601357fb
commit
234163649e
@ -313,12 +313,12 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
|
||||
#ifdef IGNORE_LONGLEXEME
|
||||
ereport(NOTICE,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("word is too long")));
|
||||
errmsg("A word you are indexing is too long. It will be ignored.")));
|
||||
continue;
|
||||
#else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("word is too long")));
|
||||
errmsg("A word you are indexing is too long")));
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -470,12 +470,12 @@ hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4
|
||||
#ifdef IGNORE_LONGLEXEME
|
||||
ereport(NOTICE,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("word is too long")));
|
||||
errmsg("A word you are indexing is too long. It will be ignored.")));
|
||||
continue;
|
||||
#else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("word is too long")));
|
||||
errmsg("A word you are indexing is too long")));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.1 2006/05/02 11:28:54 teodor Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.2 2006/07/11 16:55:34 teodor Exp $
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
@ -22,17 +22,29 @@
|
||||
#include "utils/memutils.h"
|
||||
#include "access/tuptoaster.h"
|
||||
|
||||
#define DEF_NENTRY 128
|
||||
#define DEF_NENTRY 2048
|
||||
#define DEF_NPTR 4
|
||||
|
||||
void
|
||||
ginInitBA(BuildAccumulator *accum) {
|
||||
|
||||
accum->number = 0;
|
||||
accum->curget = 0;
|
||||
accum->length = DEF_NENTRY;
|
||||
accum->entries = (EntryAccumulator*)palloc0( sizeof(EntryAccumulator) * DEF_NENTRY );
|
||||
accum->allocatedMemory = sizeof(EntryAccumulator) * DEF_NENTRY;
|
||||
accum->maxdepth = 1;
|
||||
accum->stackpos = 0;
|
||||
accum->entries = NULL;
|
||||
accum->stack = NULL;
|
||||
accum->allocatedMemory = 0;
|
||||
accum->entryallocator = NULL;
|
||||
}
|
||||
|
||||
static EntryAccumulator*
|
||||
EAAllocate( BuildAccumulator *accum ) {
|
||||
if ( accum->entryallocator == NULL || accum->length>=DEF_NENTRY ) {
|
||||
accum->entryallocator = palloc(sizeof(EntryAccumulator)*DEF_NENTRY);
|
||||
accum->allocatedMemory += sizeof(EntryAccumulator)*DEF_NENTRY;
|
||||
accum->length = 0;
|
||||
}
|
||||
|
||||
accum->length++;
|
||||
return accum->entryallocator + accum->length - 1;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -61,64 +73,133 @@ ginInsertData(BuildAccumulator *accum, EntryAccumulator *entry, ItemPointer heap
|
||||
entry->number++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find/store one entry from indexed value.
|
||||
* It supposes, that entry should be located between low and end of array of
|
||||
* entries. Returns position of found/inserted entry
|
||||
*/
|
||||
static uint32
|
||||
ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, Datum entry, uint32 low) {
|
||||
uint32 high = accum->number, mid;
|
||||
int res;
|
||||
static Datum
|
||||
getDatumCopy(BuildAccumulator *accum, Datum value) {
|
||||
Form_pg_attribute *att = accum->ginstate->tupdesc->attrs;
|
||||
Datum newvalue;
|
||||
int data_length = 0;
|
||||
void *ptr;
|
||||
|
||||
while(high>low) {
|
||||
mid = low + ((high - low) / 2);
|
||||
if ( att[0]->attbyval ) {
|
||||
store_att_byval(&newvalue, value, att[0]->attlen);
|
||||
} else {
|
||||
/* pass-by-reference */
|
||||
if (att[0]->attlen == -1) {
|
||||
/* varlena */
|
||||
data_length = VARATT_SIZE(DatumGetPointer(value));
|
||||
} else if (att[0]->attlen == -2) {
|
||||
/* c-string */
|
||||
data_length = strlen(DatumGetCString(value)) + 1;
|
||||
} else {
|
||||
/* fixed-length pass-by-reference */
|
||||
Assert(att[0]->attlen > 0);
|
||||
data_length = att[0]->attlen;
|
||||
}
|
||||
|
||||
res = compareEntries(accum->ginstate, entry, accum->entries[mid].value);
|
||||
|
||||
if ( res == 0 ) {
|
||||
ginInsertData( accum, accum->entries+mid, heapptr );
|
||||
return mid;
|
||||
} else if ( res > 0 )
|
||||
low = mid + 1;
|
||||
else
|
||||
high = mid;
|
||||
}
|
||||
|
||||
/* did not find an entry, insert */
|
||||
if ( accum->number >= accum->length ) {
|
||||
accum->allocatedMemory += sizeof(EntryAccumulator) * accum->length;
|
||||
accum->length *= 2;
|
||||
accum->entries = (EntryAccumulator*)repalloc( accum->entries,
|
||||
sizeof(EntryAccumulator) * accum->length );
|
||||
ptr = palloc( data_length );
|
||||
memcpy(ptr, DatumGetPointer(value), data_length);
|
||||
newvalue = PointerGetDatum(ptr);
|
||||
}
|
||||
|
||||
if ( high != accum->number )
|
||||
memmove( accum->entries+high+1, accum->entries+high, sizeof(EntryAccumulator) * (accum->number-high) );
|
||||
accum->allocatedMemory+=data_length;
|
||||
|
||||
accum->entries[high].value = entry;
|
||||
accum->entries[high].length = DEF_NPTR;
|
||||
accum->entries[high].number = 1;
|
||||
accum->entries[high].shouldSort = FALSE;
|
||||
accum->entries[high].list = (ItemPointerData*)palloc(sizeof(ItemPointerData)*DEF_NPTR);
|
||||
accum->entries[high].list[0] = *heapptr;
|
||||
|
||||
accum->allocatedMemory += sizeof(ItemPointerData)*DEF_NPTR;
|
||||
accum->number++;
|
||||
|
||||
return high;
|
||||
return newvalue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find/store one entry from indexed value.
|
||||
*/
|
||||
static void
|
||||
ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, Datum entry) {
|
||||
EntryAccumulator *ea = accum->entries, *pea = NULL;
|
||||
int res = 0;
|
||||
uint32 depth = 1;
|
||||
|
||||
while( ea ) {
|
||||
res = compareEntries(accum->ginstate, entry, ea->value);
|
||||
if ( res == 0 )
|
||||
break; /* found */
|
||||
else {
|
||||
pea = ea;
|
||||
if ( res < 0 )
|
||||
ea = ea->left;
|
||||
else
|
||||
ea = ea->right;
|
||||
}
|
||||
depth++;
|
||||
}
|
||||
|
||||
if ( depth > accum->maxdepth )
|
||||
accum->maxdepth = depth;
|
||||
|
||||
if ( ea == NULL ) {
|
||||
ea = EAAllocate(accum);
|
||||
|
||||
ea->left = ea->right = NULL;
|
||||
ea->value = getDatumCopy(accum, entry);
|
||||
ea->length = DEF_NPTR;
|
||||
ea->number = 1;
|
||||
ea->shouldSort = FALSE;
|
||||
ea->list = (ItemPointerData*)palloc(sizeof(ItemPointerData)*DEF_NPTR);
|
||||
ea->list[0] = *heapptr;
|
||||
accum->allocatedMemory += sizeof(ItemPointerData)*DEF_NPTR;
|
||||
|
||||
if ( pea == NULL )
|
||||
accum->entries = ea;
|
||||
else {
|
||||
Assert( res != 0 );
|
||||
if ( res < 0 )
|
||||
pea->left = ea;
|
||||
else
|
||||
pea->right = ea;
|
||||
}
|
||||
} else
|
||||
ginInsertData( accum, ea, heapptr );
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert one heap pointer. Requires entries to be sorted!
|
||||
* insert middle of left part the middle of right one,
|
||||
* then calls itself for each parts
|
||||
*/
|
||||
static void
|
||||
ginChooseElem(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry,
|
||||
uint32 low, uint32 high, uint32 offset) {
|
||||
uint32 pos;
|
||||
uint32 middle = (low+high)>>1;
|
||||
|
||||
pos = (low+middle)>>1;
|
||||
if ( low!=middle && pos>=offset && pos-offset < nentry )
|
||||
ginInsertEntry( accum, heapptr, entries[ pos-offset ]);
|
||||
pos = (high+middle+1)>>1;
|
||||
if ( middle+1 != high && pos>=offset && pos-offset < nentry )
|
||||
ginInsertEntry( accum, heapptr, entries[ pos-offset ]);
|
||||
|
||||
if ( low!=middle )
|
||||
ginChooseElem(accum, heapptr, entries, nentry, low, middle, offset );
|
||||
if ( high!=middle+1 )
|
||||
ginChooseElem(accum, heapptr, entries, nentry, middle+1, high, offset );
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert one heap pointer. Suppose entries is sorted.
|
||||
* Insertion order trys to get binary tree balanced: first insert middle value,
|
||||
* next middle on left part and middle of right part.
|
||||
*/
|
||||
void
|
||||
ginInsertRecordBA( BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry ) {
|
||||
uint32 start=0,i;
|
||||
uint32 i, nbit=0, offset;
|
||||
|
||||
for(i=0;i<nentry;i++)
|
||||
start = ginInsertEntry( accum, heapptr, entries[i], start);
|
||||
if (nentry==0)
|
||||
return;
|
||||
|
||||
i=nentry-1;
|
||||
for(;i>0;i>>=1) nbit++;
|
||||
|
||||
nbit = 1<<nbit;
|
||||
offset = (nbit-nentry)/2;
|
||||
|
||||
ginInsertEntry( accum, heapptr, entries[ (nbit>>1)-offset ]);
|
||||
ginChooseElem(accum, heapptr, entries, nentry, 0, nbit, offset);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -128,28 +209,79 @@ qsortCompareItemPointers( const void *a, const void *b ) {
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* walk on binary tree and returns ordered nodes
|
||||
*/
|
||||
static EntryAccumulator*
|
||||
walkTree( BuildAccumulator *accum ) {
|
||||
EntryAccumulator *entry = accum->stack[ accum->stackpos ];
|
||||
|
||||
if ( entry->list != NULL ) {
|
||||
/* return entry itself: we already was at left sublink */
|
||||
return entry;
|
||||
} else if ( entry->right && entry->right != accum->stack[ accum->stackpos+1 ] ) {
|
||||
/* go on right sublink */
|
||||
accum->stackpos++;
|
||||
entry = entry->right;
|
||||
|
||||
/* find most-left value */
|
||||
for(;;) {
|
||||
accum->stack[ accum->stackpos ] = entry;
|
||||
if ( entry->left ) {
|
||||
accum->stackpos++;
|
||||
entry = entry->left;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* we already return all left subtree, itself and right subtree */
|
||||
if ( accum->stackpos == 0 )
|
||||
return 0;
|
||||
accum->stackpos--;
|
||||
return walkTree(accum);
|
||||
}
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
ItemPointerData*
|
||||
ginGetEntry(BuildAccumulator *accum, Datum *value, uint32 *n) {
|
||||
EntryAccumulator *entry;
|
||||
|
||||
ItemPointerData *list;
|
||||
if ( accum->curget >= accum->number )
|
||||
return NULL;
|
||||
else if ( accum->curget > 0 )
|
||||
pfree( accum->entries[ accum->curget-1 ].list );
|
||||
|
||||
entry = accum->entries + accum->curget;
|
||||
|
||||
if ( accum->stack == NULL ) {
|
||||
/* first call */
|
||||
accum->stack = palloc0(sizeof(EntryAccumulator*)*(accum->maxdepth+1));
|
||||
entry = accum->entries;
|
||||
|
||||
/* find most-left value */
|
||||
for(;;) {
|
||||
accum->stack[ accum->stackpos ] = entry;
|
||||
if ( entry->left ) {
|
||||
accum->stackpos++;
|
||||
entry = entry->left;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
pfree( accum->stack[ accum->stackpos ]->list );
|
||||
accum->stack[ accum->stackpos ]->list = NULL;
|
||||
entry = walkTree( accum );
|
||||
}
|
||||
|
||||
if ( entry == NULL )
|
||||
return NULL;
|
||||
|
||||
*n = entry->number;
|
||||
*value = entry->value;
|
||||
list = entry->list;
|
||||
accum->curget++;
|
||||
|
||||
Assert(list != NULL);
|
||||
|
||||
if ( entry->shouldSort && entry->number > 1 )
|
||||
qsort(list, *n, sizeof(ItemPointerData), qsortCompareItemPointers);
|
||||
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.2 2006/05/10 23:18:38 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.3 2006/07/11 16:55:34 teodor Exp $
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
@ -26,6 +26,7 @@ typedef struct {
|
||||
GinState ginstate;
|
||||
double indtuples;
|
||||
MemoryContext tmpCtx;
|
||||
MemoryContext funcCtx;
|
||||
BuildAccumulator accum;
|
||||
} GinBuildState;
|
||||
|
||||
@ -189,19 +190,22 @@ ginEntryInsert( Relation index, GinState *ginstate, Datum value, ItemPointerData
|
||||
* Function isnt use during normal insert
|
||||
*/
|
||||
static uint32
|
||||
ginHeapTupleBulkInsert(BuildAccumulator *accum, Datum value, ItemPointer heapptr) {
|
||||
ginHeapTupleBulkInsert(GinBuildState *buildstate, Datum value, ItemPointer heapptr) {
|
||||
Datum *entries;
|
||||
uint32 nentries;
|
||||
MemoryContext oldCtx;
|
||||
|
||||
entries = extractEntriesSU( accum->ginstate, value, &nentries);
|
||||
oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
|
||||
entries = extractEntriesSU( buildstate->accum.ginstate, value, &nentries);
|
||||
MemoryContextSwitchTo(oldCtx);
|
||||
|
||||
if ( nentries==0 )
|
||||
/* nothing to insert */
|
||||
return 0;
|
||||
|
||||
ginInsertRecordBA( accum, heapptr, entries, nentries);
|
||||
ginInsertRecordBA( &buildstate->accum, heapptr, entries, nentries);
|
||||
|
||||
pfree( entries );
|
||||
MemoryContextReset(buildstate->funcCtx);
|
||||
|
||||
return nentries;
|
||||
}
|
||||
@ -218,7 +222,7 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
|
||||
|
||||
oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
|
||||
|
||||
buildstate->indtuples += ginHeapTupleBulkInsert(&buildstate->accum, *values, &htup->t_self);
|
||||
buildstate->indtuples += ginHeapTupleBulkInsert(buildstate, *values, &htup->t_self);
|
||||
|
||||
/* we use only half maintenance_work_mem, because there is some leaks
|
||||
during insertion and extract values */
|
||||
@ -297,6 +301,12 @@ ginbuild(PG_FUNCTION_ARGS) {
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
|
||||
buildstate.funcCtx = AllocSetContextCreate(buildstate.tmpCtx,
|
||||
"Gin build temporary context for user-defined function",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
|
||||
buildstate.accum.ginstate = &buildstate.ginstate;
|
||||
ginInitBA( &buildstate.accum );
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
* header file for postgres inverted index access method implementation.
|
||||
*
|
||||
* Copyright (c) 2006, PostgreSQL Global Development Group
|
||||
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.4 2006/07/11 13:54:24 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.5 2006/07/11 16:55:34 teodor Exp $
|
||||
*--------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
@ -414,21 +414,26 @@ extern Datum arraycontains(PG_FUNCTION_ARGS);
|
||||
extern Datum arraycontained(PG_FUNCTION_ARGS);
|
||||
|
||||
/* ginbulk.c */
|
||||
typedef struct {
|
||||
typedef struct EntryAccumulator {
|
||||
Datum value;
|
||||
uint32 length;
|
||||
uint32 number;
|
||||
ItemPointerData *list;
|
||||
bool shouldSort;
|
||||
struct EntryAccumulator *left;
|
||||
struct EntryAccumulator *right;
|
||||
} EntryAccumulator;
|
||||
|
||||
typedef struct {
|
||||
GinState *ginstate;
|
||||
EntryAccumulator *entries;
|
||||
uint32 length;
|
||||
uint32 number;
|
||||
uint32 curget;
|
||||
uint32 maxdepth;
|
||||
EntryAccumulator **stack;
|
||||
uint32 stackpos;
|
||||
uint32 allocatedMemory;
|
||||
|
||||
uint32 length;
|
||||
EntryAccumulator *entryallocator;
|
||||
} BuildAccumulator;
|
||||
|
||||
extern void ginInitBA(BuildAccumulator *accum);
|
||||
|
Loading…
Reference in New Issue
Block a user