Repair some problems in GIST-index contrib modules. Patch from

Teodor Sigaev <teodor@stack.net>.
This commit is contained in:
Tom Lane 2002-02-07 22:11:43 +00:00
parent e206ff5946
commit fe1a9c3362
4 changed files with 39 additions and 33 deletions

View File

@ -1457,6 +1457,10 @@ _int_common_picksplit(bytea *entryvec,
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
if ( seed_1 == 0 || seed_2 == 0 ) {
seed_1 = 1;
seed_2 = 2;
}
datum_alpha = (ArrayType *) DatumGetPointer(((GISTENTRY *) VARDATA(entryvec))[seed_1].key);
datum_l = copy_intArrayType(datum_alpha);

View File

@ -198,23 +198,6 @@ Don't forget to do
make clean; make; make install
2.
As it was mentioned above we don't use explicitly ID of lexems
as in OpenFTS but use hash function (crc32) instead to map lexem to
integer. Our experiments show that probability of collision is quite small:
for english text it's about 10**(-6) and 10**(-5) for russian collection.
Default installation doesn't check for collisions but if your application
does need to guarantee an exact (no collisions) search, you need
to update system table to mark index islossy:
update pg_amop set amopreqcheck = true where amopclaid =
(select oid from pg_opclass where opcname = 'gist_txtidx_ops');
If you don't bother about collisions :
update pg_amop set amopreqcheck = false where amopclaid =
(select oid from pg_opclass where opcname = 'gist_txtidx_ops');
3.
txtidx doesn't preserve words ordering (this is not critical for searching)
for performance reason, for example:
@ -224,7 +207,7 @@ test=# select 'page two'::txtidx;
'two' 'page'
(1 row)
4.
3.
Indexed access provided by txtidx data type isn't always good
because of internal data structure we use (RD-Tree). Particularly,
queries like '!gist' will be slower than just a sequential scan,
@ -265,7 +248,7 @@ test=# select querytree( '!gist'::query_txt );
These two queries will be processed by scanning of full index !
Very slow !
5.
4.
Following selects produce the same result
select title from titles where titleidx @@ 'patch&gist';

View File

@ -10,6 +10,7 @@
#include "utils/array.h"
#include "utils/builtins.h"
#include "storage/bufpage.h"
#include "access/tuptoaster.h"
#include "txtidx.h"
#include "query.h"
@ -86,6 +87,15 @@ uniqueint( int4* a, int4 l ) {
return res + 1 - a;
}
static void
makesign( BITVECP sign, GISTTYPE *a) {
int4 k,len = ARRNELEM( a );
int4 *ptr = GETARR( a );
MemSet( (void*)sign, 0, sizeof(BITVEC) );
for(k=0;k<len;k++)
HASH( sign, ptr[k] );
}
Datum
gtxtidx_compress(PG_FUNCTION_ARGS) {
GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0);
@ -110,8 +120,6 @@ gtxtidx_compress(PG_FUNCTION_ARGS) {
*arr = crc32_sz( (uint8*)&words[ ptr->pos ], ptr->len );
arr++; ptr++;
}
if ( val != toastedval )
pfree(val);
len = uniqueint( GETARR(res), val->size );
if ( len != val->size ) {
@ -120,7 +128,22 @@ gtxtidx_compress(PG_FUNCTION_ARGS) {
len = CALCGTSIZE( ARRKEY, len );
res = (GISTTYPE*)repalloc( (void*)res, len );
res->len = len;
}
}
if ( val != toastedval )
pfree(val);
/* make signature, if array is too long */
if ( res->len > TOAST_INDEX_TARGET ) {
GISTTYPE *ressign;
len = CALCGTSIZE( SIGNKEY, 0 );
ressign = (GISTTYPE*)palloc( len );
ressign->len = len;
ressign->flag = SIGNKEY;
makesign( GETSIGN(ressign), res );
pfree(res);
res = ressign;
}
retval = (GISTENTRY*)palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(res),
@ -379,15 +402,6 @@ gtxtidx_penalty(PG_FUNCTION_ARGS) {
PG_RETURN_POINTER( penalty );
}
static void
makesign( BITVECP sign, GISTTYPE *a) {
int4 k,len = ARRNELEM( a );
int4 *ptr = GETARR( a );
MemSet( (void*)sign, 0, sizeof(BITVEC) );
for(k=0;k<len;k++)
HASH( sign, ptr[k] );
}
typedef struct {
bool allistrue;
BITVEC sign;
@ -503,6 +517,11 @@ gtxtidx_picksplit(PG_FUNCTION_ARGS) {
right = v->spl_right;
v->spl_nright = 0;
if ( seed_1 == 0 || seed_2 == 0 ) {
seed_1 = 1;
seed_2 = 2;
}
/* form initial .. */
if ( cache[seed_1].allistrue ) {
datum_l = (GISTTYPE*)palloc( CALCGTSIZE( SIGNKEY|ALLISTRUE, 0 ) );

View File

@ -171,7 +171,7 @@ WHERE o.oprleft = t.oid and o.oprright=tq.oid
and ( tq.typname='query_txt' or tq.typname='mquery_txt' );
INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
SELECT opcl.oid, 1, false, c.opoid
SELECT opcl.oid, 1, true, c.opoid
FROM pg_opclass opcl, txtidx_ops_tmp c
WHERE
opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')
@ -179,7 +179,7 @@ INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
and c.oprname = '@@';
INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
SELECT opcl.oid, 2, false, c.opoid
SELECT opcl.oid, 2, true, c.opoid
FROM pg_opclass opcl, txtidx_ops_tmp c
WHERE
opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')