Fix bug in the tsvector stats collection function, which caused a crash if

the sample contains just a one tsvector, containing only one lexeme.
2024-12-27 08:39:28 +08:00 · 2008-11-27 21:17:39 +00:00 · 2008-11-27 21:17:39 +00:00 · a93b3b98cd
commit a93b3b98cd
parent fb645f6426
1 changed files with 22 additions and 21 deletions
--- a/src/backend/tsearch/ts_typanalyze.c
+++ b/src/backend/tsearch/ts_typanalyze.c
@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.2 2008/09/19 19:03:40 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.3 2008/11/27 21:17:39 heikki Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -290,26 +290,6 @@ compute_tsvector_stats(VacAttrStats *stats,
 		if (num_mcelem > track_len)
 			num_mcelem = track_len;

-		/* Grab the minimal and maximal frequencies that will get stored */
-		minfreq = sort_table[num_mcelem - 1]->frequency;
-		maxfreq = sort_table[0]->frequency;
-
-		/*
-		 * We want to store statistics sorted on the lexeme value using first
-		 * length, then byte-for-byte comparison. The reason for doing length
-		 * comparison first is that we don't care about the ordering so long
-		 * as it's consistent, and comparing lengths first gives us a chance
-		 * to avoid a strncmp() call.
-		 *
-		 * This is different from what we do with scalar statistics -- they get
-		 * sorted on frequencies. The rationale is that we usually search
-		 * through most common elements looking for a specific value, so we can
-		 * grab its frequency.  When values are presorted we can employ binary
-		 * search for that.  See ts_selfuncs.c for a real usage scenario.
-		 */
-		qsort(sort_table, num_mcelem, sizeof(TrackItem *),
-			  trackitem_compare_lexemes);
-
 		/* Generate MCELEM slot entry */
 		if (num_mcelem > 0)
 		{
@ -317,6 +297,27 @@ compute_tsvector_stats(VacAttrStats *stats,
 			Datum			*mcelem_values;
 			float4			*mcelem_freqs;

+			/* Grab the minimal and maximal frequencies that will get stored */
+			minfreq = sort_table[num_mcelem - 1]->frequency;
+			maxfreq = sort_table[0]->frequency;
+
+			/*
+			 * We want to store statistics sorted on the lexeme value using
+			 * first length, then byte-for-byte comparison. The reason for
+			 * doing length comparison first is that we don't care about the
+			 * ordering so long as it's consistent, and comparing lengths first
+			 * gives us a chance to avoid a strncmp() call.
+			 *
+			 * This is different from what we do with scalar statistics -- they
+			 * get sorted on frequencies. The rationale is that we usually
+			 * search through most common elements looking for a specific
+			 * value, so we can grab its frequency.  When values are presorted
+			 * we can employ binary search for that.  See ts_selfuncs.c for a
+			 * real usage scenario.
+			 */
+			qsort(sort_table, num_mcelem, sizeof(TrackItem *),
+				  trackitem_compare_lexemes);
+
 			/* Must copy the target values into anl_context */
 			old_context = MemoryContextSwitchTo(stats->anl_context);