postgresql/contrib/pg_trgm/sql/pg_word_trgm.sql
Tom Lane bc436e4a91 Fix misbehavior in contrib/pg_trgm with an unsatisfiable regex.
If the regex compiler can see that a regex is unsatisfiable
(for example, '$foo') then it may emit an NFA having no arcs.
pg_trgm's packGraph function did the wrong thing in this case;
it would access off the end of a work array, and with bad luck
could produce a corrupted output data structure causing more
problems later.  This could end with wrong answers or crashes
in queries using a pg_trgm GIN or GiST index with such a regex.

Fix by not trying to de-duplicate if there aren't at least 2 arcs.

Per bug #17830 from Alexander Lakhin.  Back-patch to all supported
branches.

Discussion: https://postgr.es/m/17830-57ff5f89bdb02b09@postgresql.org
2023-03-11 12:15:41 -05:00

49 lines
2.8 KiB
SQL

CREATE TABLE test_trgm2(t text COLLATE "C");
\copy test_trgm2 from 'data/trgm2.data'
-- reduce noise
set extra_float_digits = 0;
select t,word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t;
select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t;
select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t;
select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t;
select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7;
create index trgm_idx2 on test_trgm2 using gist (t gist_trgm_ops);
set enable_seqscan=off;
select t,word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t;
select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t;
select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t;
select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t;
explain (costs off)
select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7;
select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7;
drop index trgm_idx2;
create index trgm_idx2 on test_trgm2 using gin (t gin_trgm_ops);
set enable_seqscan=off;
select t,word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t;
select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t;
select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t;
select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t;
set "pg_trgm.word_similarity_threshold" to 0.5;
select t,word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t;
select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t;
select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t;
select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t;
set "pg_trgm.word_similarity_threshold" to 0.3;
select t,word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t;
select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t;
select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t;
select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t;
-- test unsatisfiable pattern
select * from test_trgm2 where t ~ '.*$x';