Fix ts_rank_cd() to ignore stripped lexemes

Previously, stripped lexemes got a default location and could be considered if mixed with non-stripped lexemes. BACKWARD INCOMPATIBILITY CHANGE
2025-01-06 15:24:56 +08:00 · 2014-03-24 14:36:36 -04:00 · 2014-03-24 14:36:36 -04:00 · 1420f3a982
commit 1420f3a982
parent bb42e21be2
4 changed files with 30 additions and 5 deletions
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@ -889,9 +889,13 @@ SELECT plainto_tsquery('english', 'The Fat &amp; Rats:C');
       </para>
       <para>
-        This function requires positional information in its input.
+        This function requires lexeme positional information to perform
-        Therefore it will not work on <quote>stripped</> <type>tsvector</>
+        its calculation.  Therefore, it ignores any <quote>stripped</>
-        values &mdash; it will always return zero.
+        lexemes in the <type>tsvector</>.  If there are no unstripped
        lexemes in the input, the result will be zero.  (See <xref
        linkend="textsearch-manipulate-tsvector"> for more information
        about the <function>strip</> function and positional information
        in <type>tsvector</>s.)
       </para>
      </listitem>
     </varlistentry>
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@ -658,8 +658,9 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
 			}
 			else
 			{
-				dimt = POSNULL.npos;
+				/* ignore words without positions */
-				post = POSNULL.pos;
+				entry++;
 				continue;
 			}
 			while (cur + dimt >= len)
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@ -596,6 +596,20 @@ S. T. Coleridge (1772-1834)
        0.1
 (1 row)
 SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
                  to_tsquery('both & stripped'));
 ts_rank_cd 
 ------------
          0
 (1 row)
 SELECT ts_rank_cd(to_tsvector('unstripped') || strip(to_tsvector('stripped')),
                  to_tsquery('unstripped & stripped'));
 ts_rank_cd 
 ------------
          0
 (1 row)
 --headline tests
 SELECT ts_headline('english', '
 Day after day, day after day,
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@ -165,6 +165,12 @@ Water, water, every where,
 S. T. Coleridge (1772-1834)
 '), to_tsquery('english', 'ocean'));
 SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
                  to_tsquery('both & stripped'));
 SELECT ts_rank_cd(to_tsvector('unstripped') || strip(to_tsvector('stripped')),
                  to_tsquery('unstripped & stripped'));
 --headline tests
 SELECT ts_headline('english', '
 Day after day, day after day,