diff --git a/src/backend/tsearch/ts_selfuncs.c b/src/backend/tsearch/ts_selfuncs.c index 8ce9fb46aa..7f33c16a24 100644 --- a/src/backend/tsearch/ts_selfuncs.c +++ b/src/backend/tsearch/ts_selfuncs.c @@ -189,11 +189,17 @@ tsquerysel(VariableStatData *vardata, Datum constval) /* No most-common-elements info, so do without */ selec = tsquery_opr_selec_no_stats(query); } + + /* + * MCE stats count only non-null rows, so adjust for null rows. + */ + selec *= (1.0 - stats->stanullfrac); } else { /* No stats at all, so do without */ selec = tsquery_opr_selec_no_stats(query); + /* we assume no nulls here, so no stanullfrac correction */ } return selec; diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h index f38921f1c6..927cd0b047 100644 --- a/src/include/catalog/pg_statistic.h +++ b/src/include/catalog/pg_statistic.h @@ -246,6 +246,8 @@ typedef FormData_pg_statistic *Form_pg_statistic; * type with identifiable elements (for instance, tsvector). staop contains * the equality operator appropriate to the element type. stavalues contains * the most common element values, and stanumbers their frequencies. Unlike + * MCV slots, frequencies are measured as the fraction of non-null rows the + * element value appears in, not the frequency of all rows. Also unlike * MCV slots, the values are sorted into order (to support binary search * for a particular value). Since this puts the minimum and maximum * frequencies at unpredictable spots in stanumbers, there are two extra