mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-27 08:39:28 +08:00
Disallow making an empty lexeme via array_to_tsvector().
The tsvector data type has always forbidden lexemes to be empty. However, array_to_tsvector() didn't get that memo, and would allow an empty-string array element to become an empty lexeme. This could result in dump/restore failures later, not to mention whatever semantic issues might be behind the original prohibition. However, other functions that take a plain text input directly as a lexeme value do not need a similar restriction, because they only match the string against existing tsvector entries. In particular it'd be a bad idea to make ts_delete() reject empty strings, since that is the most convenient way to clean up any bad data that might have gotten into a tsvector column via this bug. Reflecting on that, let's also remove the prohibition against NULL array elements in tsvector_delete_arr and tsvector_setweight_by_filter. It seems more consistent to ignore them, as an empty-string element would be ignored. There's a case for back-patching this, since it's clearly a bug fix. On balance though, it doesn't seem like something to change in a minor release. Jean-Christophe Arnu Discussion: https://postgr.es/m/CAHZmTm1YVndPgUVRoag2WL0w900XcoiivDDj-gTTYBsG25c65A@mail.gmail.com
This commit is contained in:
parent
1241fcbd7e
commit
cbe25dcff7
@ -12920,8 +12920,10 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
|
||||
<returnvalue>tsvector</returnvalue>
|
||||
</para>
|
||||
<para>
|
||||
Converts an array of lexemes to a <type>tsvector</type>.
|
||||
The given strings are used as-is without further processing.
|
||||
Converts an array of text strings to a <type>tsvector</type>.
|
||||
The given strings are used as lexemes as-is, without further
|
||||
processing. Array elements must not be empty strings
|
||||
or <literal>NULL</literal>.
|
||||
</para>
|
||||
<para>
|
||||
<literal>array_to_tsvector('{fat,cat,rat}'::text[])</literal>
|
||||
@ -13104,6 +13106,9 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
|
||||
Assigns the specified <parameter>weight</parameter> to elements
|
||||
of the <parameter>vector</parameter> that are listed
|
||||
in <parameter>lexemes</parameter>.
|
||||
The strings in <parameter>lexemes</parameter> are taken as lexemes
|
||||
as-is, without further processing. Strings that do not match any
|
||||
lexeme in <parameter>vector</parameter> are ignored.
|
||||
</para>
|
||||
<para>
|
||||
<literal>setweight('fat:2,4 cat:3 rat:5,6B'::tsvector, 'A', '{cat,rat}')</literal>
|
||||
@ -13265,6 +13270,8 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
|
||||
<para>
|
||||
Removes any occurrence of the given <parameter>lexeme</parameter>
|
||||
from the <parameter>vector</parameter>.
|
||||
The <parameter>lexeme</parameter> string is treated as a lexeme as-is,
|
||||
without further processing.
|
||||
</para>
|
||||
<para>
|
||||
<literal>ts_delete('fat:2,4 cat:3 rat:5A'::tsvector, 'fat')</literal>
|
||||
@ -13281,6 +13288,9 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
|
||||
Removes any occurrences of the lexemes
|
||||
in <parameter>lexemes</parameter>
|
||||
from the <parameter>vector</parameter>.
|
||||
The strings in <parameter>lexemes</parameter> are taken as lexemes
|
||||
as-is, without further processing. Strings that do not match any
|
||||
lexeme in <parameter>vector</parameter> are ignored.
|
||||
</para>
|
||||
<para>
|
||||
<literal>ts_delete('fat:2,4 cat:3 rat:5A'::tsvector, ARRAY['fat','rat'])</literal>
|
||||
|
@ -322,10 +322,9 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
|
||||
int lex_len,
|
||||
lex_pos;
|
||||
|
||||
/* Ignore null array elements, they surely don't match */
|
||||
if (nulls[i])
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
||||
errmsg("lexeme array may not contain nulls")));
|
||||
continue;
|
||||
|
||||
lex = VARDATA(dlexemes[i]);
|
||||
lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
|
||||
@ -602,10 +601,9 @@ tsvector_delete_arr(PG_FUNCTION_ARGS)
|
||||
int lex_len,
|
||||
lex_pos;
|
||||
|
||||
/* Ignore null array elements, they surely don't match */
|
||||
if (nulls[i])
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
||||
errmsg("lexeme array may not contain nulls")));
|
||||
continue;
|
||||
|
||||
lex = VARDATA(dlexemes[i]);
|
||||
lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
|
||||
@ -761,13 +759,21 @@ array_to_tsvector(PG_FUNCTION_ARGS)
|
||||
|
||||
deconstruct_array(v, TEXTOID, -1, false, TYPALIGN_INT, &dlexemes, &nulls, &nitems);
|
||||
|
||||
/* Reject nulls (maybe we should just ignore them, instead?) */
|
||||
/*
|
||||
* Reject nulls and zero length strings (maybe we should just ignore them,
|
||||
* instead?)
|
||||
*/
|
||||
for (i = 0; i < nitems; i++)
|
||||
{
|
||||
if (nulls[i])
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
||||
errmsg("lexeme array may not contain nulls")));
|
||||
|
||||
if (VARSIZE(dlexemes[i]) - VARHDRSZ == 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
|
||||
errmsg("lexeme array may not contain empty strings")));
|
||||
}
|
||||
|
||||
/* Sort and de-dup, because this is required for a valid tsvector. */
|
||||
|
@ -85,6 +85,10 @@ SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
|
||||
'a':3A,4B 'b':2A 'ba':1237
|
||||
(1 row)
|
||||
|
||||
SELECT $$'' '1' '2'$$::tsvector; -- error, empty lexeme is not allowed
|
||||
ERROR: syntax error in tsvector: "'' '1' '2'"
|
||||
LINE 1: SELECT $$'' '1' '2'$$::tsvector;
|
||||
^
|
||||
--Base tsquery test
|
||||
SELECT '1'::tsquery;
|
||||
tsquery
|
||||
@ -1258,8 +1262,12 @@ SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceshi
|
||||
'base' 'hidden' 'strike'
|
||||
(1 row)
|
||||
|
||||
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', NULL]);
|
||||
ERROR: lexeme array may not contain nulls
|
||||
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', '', NULL]);
|
||||
ts_delete
|
||||
--------------------------
|
||||
'base' 'hidden' 'strike'
|
||||
(1 row)
|
||||
|
||||
SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
|
||||
unnest
|
||||
---------------------------------------------
|
||||
@ -1328,8 +1336,11 @@ SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
|
||||
'base' 'hidden' 'rebel' 'spaceship' 'strike'
|
||||
(1 row)
|
||||
|
||||
-- null and empty string are disallowed, since we mustn't make an empty lexeme
|
||||
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
|
||||
ERROR: lexeme array may not contain nulls
|
||||
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', '']);
|
||||
ERROR: lexeme array may not contain empty strings
|
||||
-- array_to_tsvector must sort and de-dup
|
||||
SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
|
||||
array_to_tsvector
|
||||
@ -1367,14 +1378,12 @@ SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '
|
||||
'a':1C,3C 'asd':1C 'w':5,6,12B,13A 'zxc':81C,222C,567C
|
||||
(1 row)
|
||||
|
||||
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', '{a,zxc}');
|
||||
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', '', NULL]);
|
||||
setweight
|
||||
---------------------------------
|
||||
'a' 'asd' 'w':5,6,12B,13A 'zxc'
|
||||
(1 row)
|
||||
|
||||
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', NULL]);
|
||||
ERROR: lexeme array may not contain nulls
|
||||
SELECT ts_filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector, '{a}');
|
||||
ts_filter
|
||||
-------------------------------------------------------------
|
||||
|
@ -17,6 +17,7 @@ SELECT $$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector;
|
||||
SELECT tsvectorin(tsvectorout($$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector));
|
||||
SELECT '''w'':4A,3B,2C,1D,5 a:8';
|
||||
SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
|
||||
SELECT $$'' '1' '2'$$::tsvector; -- error, empty lexeme is not allowed
|
||||
|
||||
--Base tsquery test
|
||||
SELECT '1'::tsquery;
|
||||
@ -239,7 +240,7 @@ SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3':
|
||||
SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceship','leya','rebel']);
|
||||
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel']);
|
||||
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel','rebel']);
|
||||
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', NULL]);
|
||||
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', '', NULL]);
|
||||
|
||||
SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
|
||||
SELECT unnest('base hidden rebel spaceship strike'::tsvector);
|
||||
@ -251,7 +252,9 @@ SELECT tsvector_to_array('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D st
|
||||
SELECT tsvector_to_array('base hidden rebel spaceship strike'::tsvector);
|
||||
|
||||
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
|
||||
-- null and empty string are disallowed, since we mustn't make an empty lexeme
|
||||
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
|
||||
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', '']);
|
||||
-- array_to_tsvector must sort and de-dup
|
||||
SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
|
||||
|
||||
@ -260,8 +263,7 @@ SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c');
|
||||
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a}');
|
||||
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a}');
|
||||
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a,zxc}');
|
||||
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', '{a,zxc}');
|
||||
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', NULL]);
|
||||
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', '', NULL]);
|
||||
|
||||
SELECT ts_filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector, '{a}');
|
||||
SELECT ts_filter('base hidden rebel spaceship strike'::tsvector, '{a}');
|
||||
|
Loading…
Reference in New Issue
Block a user