mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-11-27 07:21:09 +08:00
Handle equality operator in contrib/pg_trgm
Obviously, in order to equality operator be satisfiable, target string must contain all the trigrams of the search string. On this base, we implement equality operator in GiST/GIN indexes with recheck. Discussion: https://postgr.es/m/CAOBaU_YWwtT7tdggtROacjdOdeYHCz-tmSwuC-j-TOG-g97J0w%40mail.gmail.com Author: Julien Rouhaud Reviewed-by: Tom Lane, Alexander Korotkov, Georgios Kokolatos, Erik Rijkers
This commit is contained in:
parent
92bf7e2d02
commit
935f666650
@ -9,7 +9,7 @@ OBJS = \
|
||||
trgm_regexp.o
|
||||
|
||||
EXTENSION = pg_trgm
|
||||
DATA = pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \
|
||||
DATA = pg_trgm--1.5--1.6.sql pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \
|
||||
pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \
|
||||
pg_trgm--1.0--1.1.sql
|
||||
PGFILEDESC = "pg_trgm - trigram matching"
|
||||
|
@ -4761,6 +4761,12 @@ insert into test2 values ('abcdef');
|
||||
insert into test2 values ('quark');
|
||||
insert into test2 values (' z foo bar');
|
||||
insert into test2 values ('/123/-45/');
|
||||
insert into test2 values ('line 1');
|
||||
insert into test2 values ('%line 2');
|
||||
insert into test2 values ('line 3%');
|
||||
insert into test2 values ('%line 4%');
|
||||
insert into test2 values ('%li%ne 5%');
|
||||
insert into test2 values ('li_e 6');
|
||||
create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
|
||||
set enable_seqscan=off;
|
||||
explain (costs off)
|
||||
@ -4863,7 +4869,13 @@ select * from test2 where t ~ '(abc)*$';
|
||||
quark
|
||||
z foo bar
|
||||
/123/-45/
|
||||
(4 rows)
|
||||
line 1
|
||||
%line 2
|
||||
line 3%
|
||||
%line 4%
|
||||
%li%ne 5%
|
||||
li_e 6
|
||||
(10 rows)
|
||||
|
||||
select * from test2 where t ~* 'DEF';
|
||||
t
|
||||
@ -4918,7 +4930,11 @@ select * from test2 where t ~ '[a-z]{3}';
|
||||
abcdef
|
||||
quark
|
||||
z foo bar
|
||||
(3 rows)
|
||||
line 1
|
||||
%line 2
|
||||
line 3%
|
||||
%line 4%
|
||||
(7 rows)
|
||||
|
||||
select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
|
||||
t
|
||||
@ -4961,6 +4977,93 @@ select * from test2 where t ~ '/\d+/-\d';
|
||||
/123/-45/
|
||||
(1 row)
|
||||
|
||||
-- test = operator
|
||||
explain (costs off)
|
||||
select * from test2 where t = 'abcdef';
|
||||
QUERY PLAN
|
||||
------------------------------------------
|
||||
Bitmap Heap Scan on test2
|
||||
Recheck Cond: (t = 'abcdef'::text)
|
||||
-> Bitmap Index Scan on test2_idx_gin
|
||||
Index Cond: (t = 'abcdef'::text)
|
||||
(4 rows)
|
||||
|
||||
select * from test2 where t = 'abcdef';
|
||||
t
|
||||
--------
|
||||
abcdef
|
||||
(1 row)
|
||||
|
||||
explain (costs off)
|
||||
select * from test2 where t = '%line%';
|
||||
QUERY PLAN
|
||||
------------------------------------------
|
||||
Bitmap Heap Scan on test2
|
||||
Recheck Cond: (t = '%line%'::text)
|
||||
-> Bitmap Index Scan on test2_idx_gin
|
||||
Index Cond: (t = '%line%'::text)
|
||||
(4 rows)
|
||||
|
||||
select * from test2 where t = '%line%';
|
||||
t
|
||||
---
|
||||
(0 rows)
|
||||
|
||||
select * from test2 where t = 'li_e 1';
|
||||
t
|
||||
---
|
||||
(0 rows)
|
||||
|
||||
select * from test2 where t = '%line 2';
|
||||
t
|
||||
---------
|
||||
%line 2
|
||||
(1 row)
|
||||
|
||||
select * from test2 where t = 'line 3%';
|
||||
t
|
||||
---------
|
||||
line 3%
|
||||
(1 row)
|
||||
|
||||
select * from test2 where t = '%line 3%';
|
||||
t
|
||||
---
|
||||
(0 rows)
|
||||
|
||||
select * from test2 where t = '%line 4%';
|
||||
t
|
||||
----------
|
||||
%line 4%
|
||||
(1 row)
|
||||
|
||||
select * from test2 where t = '%line 5%';
|
||||
t
|
||||
---
|
||||
(0 rows)
|
||||
|
||||
select * from test2 where t = '%li_ne 5%';
|
||||
t
|
||||
---
|
||||
(0 rows)
|
||||
|
||||
select * from test2 where t = '%li%ne 5%';
|
||||
t
|
||||
-----------
|
||||
%li%ne 5%
|
||||
(1 row)
|
||||
|
||||
select * from test2 where t = 'line 6';
|
||||
t
|
||||
---
|
||||
(0 rows)
|
||||
|
||||
select * from test2 where t = 'li_e 6';
|
||||
t
|
||||
--------
|
||||
li_e 6
|
||||
(1 row)
|
||||
|
||||
drop index test2_idx_gin;
|
||||
create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
|
||||
set enable_seqscan=off;
|
||||
@ -5056,7 +5159,13 @@ select * from test2 where t ~ '(abc)*$';
|
||||
quark
|
||||
z foo bar
|
||||
/123/-45/
|
||||
(4 rows)
|
||||
line 1
|
||||
%line 2
|
||||
line 3%
|
||||
%line 4%
|
||||
%li%ne 5%
|
||||
li_e 6
|
||||
(10 rows)
|
||||
|
||||
select * from test2 where t ~* 'DEF';
|
||||
t
|
||||
@ -5111,7 +5220,11 @@ select * from test2 where t ~ '[a-z]{3}';
|
||||
abcdef
|
||||
quark
|
||||
z foo bar
|
||||
(3 rows)
|
||||
line 1
|
||||
%line 2
|
||||
line 3%
|
||||
%line 4%
|
||||
(7 rows)
|
||||
|
||||
select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
|
||||
t
|
||||
@ -5154,6 +5267,89 @@ select * from test2 where t ~ '/\d+/-\d';
|
||||
/123/-45/
|
||||
(1 row)
|
||||
|
||||
-- test = operator
|
||||
explain (costs off)
|
||||
select * from test2 where t = 'abcdef';
|
||||
QUERY PLAN
|
||||
------------------------------------------
|
||||
Index Scan using test2_idx_gist on test2
|
||||
Index Cond: (t = 'abcdef'::text)
|
||||
(2 rows)
|
||||
|
||||
select * from test2 where t = 'abcdef';
|
||||
t
|
||||
--------
|
||||
abcdef
|
||||
(1 row)
|
||||
|
||||
explain (costs off)
|
||||
select * from test2 where t = '%line%';
|
||||
QUERY PLAN
|
||||
------------------------------------------
|
||||
Index Scan using test2_idx_gist on test2
|
||||
Index Cond: (t = '%line%'::text)
|
||||
(2 rows)
|
||||
|
||||
select * from test2 where t = '%line%';
|
||||
t
|
||||
---
|
||||
(0 rows)
|
||||
|
||||
select * from test2 where t = 'li_e 1';
|
||||
t
|
||||
---
|
||||
(0 rows)
|
||||
|
||||
select * from test2 where t = '%line 2';
|
||||
t
|
||||
---------
|
||||
%line 2
|
||||
(1 row)
|
||||
|
||||
select * from test2 where t = 'line 3%';
|
||||
t
|
||||
---------
|
||||
line 3%
|
||||
(1 row)
|
||||
|
||||
select * from test2 where t = '%line 3%';
|
||||
t
|
||||
---
|
||||
(0 rows)
|
||||
|
||||
select * from test2 where t = '%line 4%';
|
||||
t
|
||||
----------
|
||||
%line 4%
|
||||
(1 row)
|
||||
|
||||
select * from test2 where t = '%line 5%';
|
||||
t
|
||||
---
|
||||
(0 rows)
|
||||
|
||||
select * from test2 where t = '%li_ne 5%';
|
||||
t
|
||||
---
|
||||
(0 rows)
|
||||
|
||||
select * from test2 where t = '%li%ne 5%';
|
||||
t
|
||||
-----------
|
||||
%li%ne 5%
|
||||
(1 row)
|
||||
|
||||
select * from test2 where t = 'line 6';
|
||||
t
|
||||
---
|
||||
(0 rows)
|
||||
|
||||
select * from test2 where t = 'li_e 6';
|
||||
t
|
||||
--------
|
||||
li_e 6
|
||||
(1 row)
|
||||
|
||||
-- Check similarity threshold (bug #14202)
|
||||
CREATE TEMP TABLE restaurants (city text);
|
||||
INSERT INTO restaurants SELECT 'Warsaw' FROM generate_series(1, 10000);
|
||||
|
10
contrib/pg_trgm/pg_trgm--1.5--1.6.sql
Normal file
10
contrib/pg_trgm/pg_trgm--1.5--1.6.sql
Normal file
@ -0,0 +1,10 @@
|
||||
/* contrib/pg_trgm/pg_trgm--1.5--1.6.sql */
|
||||
|
||||
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
|
||||
\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.6'" to load this file. \quit
|
||||
|
||||
ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
|
||||
OPERATOR 11 pg_catalog.= (text, text);
|
||||
|
||||
ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD
|
||||
OPERATOR 11 pg_catalog.= (text, text);
|
@ -1,6 +1,6 @@
|
||||
# pg_trgm extension
|
||||
comment = 'text similarity measurement and index searching based on trigrams'
|
||||
default_version = '1.5'
|
||||
default_version = '1.6'
|
||||
module_pathname = '$libdir/pg_trgm'
|
||||
relocatable = true
|
||||
trusted = true
|
||||
|
@ -101,6 +101,12 @@ insert into test2 values ('abcdef');
|
||||
insert into test2 values ('quark');
|
||||
insert into test2 values (' z foo bar');
|
||||
insert into test2 values ('/123/-45/');
|
||||
insert into test2 values ('line 1');
|
||||
insert into test2 values ('%line 2');
|
||||
insert into test2 values ('line 3%');
|
||||
insert into test2 values ('%line 4%');
|
||||
insert into test2 values ('%li%ne 5%');
|
||||
insert into test2 values ('li_e 6');
|
||||
create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
|
||||
set enable_seqscan=off;
|
||||
explain (costs off)
|
||||
@ -137,6 +143,23 @@ select * from test2 where t ~ ' z foo bar';
|
||||
select * from test2 where t ~ ' z foo';
|
||||
select * from test2 where t ~ 'qua(?!foo)';
|
||||
select * from test2 where t ~ '/\d+/-\d';
|
||||
-- test = operator
|
||||
explain (costs off)
|
||||
select * from test2 where t = 'abcdef';
|
||||
select * from test2 where t = 'abcdef';
|
||||
explain (costs off)
|
||||
select * from test2 where t = '%line%';
|
||||
select * from test2 where t = '%line%';
|
||||
select * from test2 where t = 'li_e 1';
|
||||
select * from test2 where t = '%line 2';
|
||||
select * from test2 where t = 'line 3%';
|
||||
select * from test2 where t = '%line 3%';
|
||||
select * from test2 where t = '%line 4%';
|
||||
select * from test2 where t = '%line 5%';
|
||||
select * from test2 where t = '%li_ne 5%';
|
||||
select * from test2 where t = '%li%ne 5%';
|
||||
select * from test2 where t = 'line 6';
|
||||
select * from test2 where t = 'li_e 6';
|
||||
drop index test2_idx_gin;
|
||||
|
||||
create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
|
||||
@ -175,6 +198,23 @@ select * from test2 where t ~ ' z foo bar';
|
||||
select * from test2 where t ~ ' z foo';
|
||||
select * from test2 where t ~ 'qua(?!foo)';
|
||||
select * from test2 where t ~ '/\d+/-\d';
|
||||
-- test = operator
|
||||
explain (costs off)
|
||||
select * from test2 where t = 'abcdef';
|
||||
select * from test2 where t = 'abcdef';
|
||||
explain (costs off)
|
||||
select * from test2 where t = '%line%';
|
||||
select * from test2 where t = '%line%';
|
||||
select * from test2 where t = 'li_e 1';
|
||||
select * from test2 where t = '%line 2';
|
||||
select * from test2 where t = 'line 3%';
|
||||
select * from test2 where t = '%line 3%';
|
||||
select * from test2 where t = '%line 4%';
|
||||
select * from test2 where t = '%line 5%';
|
||||
select * from test2 where t = '%li_ne 5%';
|
||||
select * from test2 where t = '%li%ne 5%';
|
||||
select * from test2 where t = 'line 6';
|
||||
select * from test2 where t = 'li_e 6';
|
||||
|
||||
-- Check similarity threshold (bug #14202)
|
||||
|
||||
|
@ -37,6 +37,7 @@
|
||||
#define WordDistanceStrategyNumber 8
|
||||
#define StrictWordSimilarityStrategyNumber 9
|
||||
#define StrictWordDistanceStrategyNumber 10
|
||||
#define EqualStrategyNumber 11
|
||||
|
||||
typedef char trgm[3];
|
||||
|
||||
|
@ -89,6 +89,7 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS)
|
||||
case SimilarityStrategyNumber:
|
||||
case WordSimilarityStrategyNumber:
|
||||
case StrictWordSimilarityStrategyNumber:
|
||||
case EqualStrategyNumber:
|
||||
trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
|
||||
break;
|
||||
case ILikeStrategyNumber:
|
||||
@ -221,6 +222,7 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
|
||||
#endif
|
||||
/* FALL THRU */
|
||||
case LikeStrategyNumber:
|
||||
case EqualStrategyNumber:
|
||||
/* Check if all extracted trigrams are presented. */
|
||||
res = true;
|
||||
for (i = 0; i < nkeys; i++)
|
||||
@ -306,6 +308,7 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
|
||||
#endif
|
||||
/* FALL THRU */
|
||||
case LikeStrategyNumber:
|
||||
case EqualStrategyNumber:
|
||||
/* Check if all extracted trigrams are presented. */
|
||||
res = GIN_MAYBE;
|
||||
for (i = 0; i < nkeys; i++)
|
||||
|
@ -232,6 +232,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
|
||||
case SimilarityStrategyNumber:
|
||||
case WordSimilarityStrategyNumber:
|
||||
case StrictWordSimilarityStrategyNumber:
|
||||
case EqualStrategyNumber:
|
||||
qtrg = generate_trgm(VARDATA(query),
|
||||
querysize - VARHDRSZ);
|
||||
break;
|
||||
@ -338,7 +339,8 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
|
||||
#endif
|
||||
/* FALL THRU */
|
||||
case LikeStrategyNumber:
|
||||
/* Wildcard search is inexact */
|
||||
case EqualStrategyNumber:
|
||||
/* Wildcard and equal search are inexact */
|
||||
*recheck = true;
|
||||
|
||||
/*
|
||||
|
@ -419,9 +419,11 @@
|
||||
the purpose of very fast similarity searches. These index types support
|
||||
the above-described similarity operators, and additionally support
|
||||
trigram-based index searches for <literal>LIKE</literal>, <literal>ILIKE</literal>,
|
||||
<literal>~</literal> and <literal>~*</literal> queries. (These indexes do not
|
||||
support equality nor simple comparison operators, so you may need a
|
||||
regular B-tree index too.)
|
||||
<literal>~</literal> and <literal>~*</literal> queries. Beginning in
|
||||
<productname>PostgreSQL</productname> 14, these indexes also support
|
||||
equality operator (simple comparison operators are not supported).
|
||||
Note that those indexes may not be as efficient as regular B-tree indexes
|
||||
for equality operator.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
|
Loading…
Reference in New Issue
Block a user