Fix ts_rank_cd() to ignore stripped lexemes

Previously, stripped lexemes got a default location and could be
considered if mixed with non-stripped lexemes.

BACKWARD INCOMPATIBILITY CHANGE
This commit is contained in:
Bruce Momjian 2014-03-24 14:36:36 -04:00
parent bb42e21be2
commit 1420f3a982
4 changed files with 30 additions and 5 deletions

View File

@ -889,9 +889,13 @@ SELECT plainto_tsquery('english', 'The Fat & Rats:C');
</para> </para>
<para> <para>
This function requires positional information in its input. This function requires lexeme positional information to perform
Therefore it will not work on <quote>stripped</> <type>tsvector</> its calculation. Therefore, it ignores any <quote>stripped</>
values &mdash; it will always return zero. lexemes in the <type>tsvector</>. If there are no unstripped
lexemes in the input, the result will be zero. (See <xref
linkend="textsearch-manipulate-tsvector"> for more information
about the <function>strip</> function and positional information
in <type>tsvector</>s.)
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>

View File

@ -658,8 +658,9 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
} }
else else
{ {
dimt = POSNULL.npos; /* ignore words without positions */
post = POSNULL.pos; entry++;
continue;
} }
while (cur + dimt >= len) while (cur + dimt >= len)

View File

@ -596,6 +596,20 @@ S. T. Coleridge (1772-1834)
0.1 0.1
(1 row) (1 row)
SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
to_tsquery('both & stripped'));
ts_rank_cd
------------
0
(1 row)
SELECT ts_rank_cd(to_tsvector('unstripped') || strip(to_tsvector('stripped')),
to_tsquery('unstripped & stripped'));
ts_rank_cd
------------
0
(1 row)
--headline tests --headline tests
SELECT ts_headline('english', ' SELECT ts_headline('english', '
Day after day, day after day, Day after day, day after day,

View File

@ -165,6 +165,12 @@ Water, water, every where,
S. T. Coleridge (1772-1834) S. T. Coleridge (1772-1834)
'), to_tsquery('english', 'ocean')); '), to_tsquery('english', 'ocean'));
SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
to_tsquery('both & stripped'));
SELECT ts_rank_cd(to_tsvector('unstripped') || strip(to_tsvector('stripped')),
to_tsquery('unstripped & stripped'));
--headline tests --headline tests
SELECT ts_headline('english', ' SELECT ts_headline('english', '
Day after day, day after day, Day after day, day after day,