After further thought about support for gathering stats on functional

indexes, it seems like we ought to put another layer of indirection
between the compute_stats functions and the actual data storage.  This
would allow us to compute the values on-the-fly, for example.
This commit is contained in:
Tom Lane 2004-02-13 06:39:49 +00:00
parent 1a4652333b
commit 8787bc8ef3
2 changed files with 81 additions and 52 deletions

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.68 2004/02/12 23:41:02 tgl Exp $ * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.69 2004/02/13 06:39:49 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -52,6 +52,7 @@ static double init_selection_state(int n);
static double select_next_random_record(double t, int n, double *stateptr); static double select_next_random_record(double t, int n, double *stateptr);
static int compare_rows(const void *a, const void *b); static int compare_rows(const void *a, const void *b);
static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats); static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
static bool std_typanalyze(VacAttrStats *stats); static bool std_typanalyze(VacAttrStats *stats);
@ -259,12 +260,14 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt)
old_context = MemoryContextSwitchTo(col_context); old_context = MemoryContextSwitchTo(col_context);
for (i = 0; i < attr_cnt; i++) for (i = 0; i < attr_cnt; i++)
{ {
(*vacattrstats[i]->compute_stats) (vacattrstats[i], VacAttrStats *stats = vacattrstats[i];
vacattrstats[i]->tupattnum,
onerel->rd_att, stats->rows = rows;
totalrows, stats->tupDesc = onerel->rd_att;
rows, (*stats->compute_stats) (stats,
numrows); std_fetch_func,
numrows,
totalrows);
MemoryContextResetAndDeleteChildren(col_context); MemoryContextResetAndDeleteChildren(col_context);
} }
MemoryContextSwitchTo(old_context); MemoryContextSwitchTo(old_context);
@ -861,6 +864,22 @@ update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
heap_close(sd, RowExclusiveLock); heap_close(sd, RowExclusiveLock);
} }
/*
* Standard fetch function for use by compute_stats subroutines.
*
* This exists to provide some insulation between compute_stats routines
* and the actual storage of the sample data.
*/
static Datum
std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
{
int attnum = stats->tupattnum;
HeapTuple tuple = stats->rows[rownum];
TupleDesc tupDesc = stats->tupDesc;
return heap_getattr(tuple, attnum, tupDesc, isNull);
}
/*========================================================================== /*==========================================================================
* *
@ -915,12 +934,14 @@ static SortFunctionKind datumCmpFnKind;
static int *datumCmpTupnoLink; static int *datumCmpTupnoLink;
static void compute_minimal_stats(VacAttrStats *stats, int attnum, static void compute_minimal_stats(VacAttrStatsP stats,
TupleDesc tupDesc, double totalrows, AnalyzeAttrFetchFunc fetchfunc,
HeapTuple *rows, int numrows); int samplerows,
static void compute_scalar_stats(VacAttrStats *stats, int attnum, double totalrows);
TupleDesc tupDesc, double totalrows, static void compute_scalar_stats(VacAttrStatsP stats,
HeapTuple *rows, int numrows); AnalyzeAttrFetchFunc fetchfunc,
int samplerows,
double totalrows);
static int compare_scalars(const void *a, const void *b); static int compare_scalars(const void *a, const void *b);
static int compare_mcvs(const void *a, const void *b); static int compare_mcvs(const void *a, const void *b);
@ -1024,9 +1045,10 @@ std_typanalyze(VacAttrStats *stats)
* depend mainly on the length of the list we are willing to keep. * depend mainly on the length of the list we are willing to keep.
*/ */
static void static void
compute_minimal_stats(VacAttrStats *stats, int attnum, compute_minimal_stats(VacAttrStatsP stats,
TupleDesc tupDesc, double totalrows, AnalyzeAttrFetchFunc fetchfunc,
HeapTuple *rows, int numrows) int samplerows,
double totalrows)
{ {
int i; int i;
int null_cnt = 0; int null_cnt = 0;
@ -1061,9 +1083,8 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
fmgr_info(mystats->eqfunc, &f_cmpeq); fmgr_info(mystats->eqfunc, &f_cmpeq);
for (i = 0; i < numrows; i++) for (i = 0; i < samplerows; i++)
{ {
HeapTuple tuple = rows[i];
Datum value; Datum value;
bool isnull; bool isnull;
bool match; bool match;
@ -1072,7 +1093,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
vacuum_delay_point(); vacuum_delay_point();
value = heap_getattr(tuple, attnum, tupDesc, &isnull); value = fetchfunc(stats, i, &isnull);
/* Check for null/nonnull */ /* Check for null/nonnull */
if (isnull) if (isnull)
@ -1166,7 +1187,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
stats->stats_valid = true; stats->stats_valid = true;
/* Do the simple null-frac and width stats */ /* Do the simple null-frac and width stats */
stats->stanullfrac = (double) null_cnt / (double) numrows; stats->stanullfrac = (double) null_cnt / (double) samplerows;
if (is_varwidth) if (is_varwidth)
stats->stawidth = total_width / (double) nonnull_cnt; stats->stawidth = total_width / (double) nonnull_cnt;
else else
@ -1222,10 +1243,10 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
denom, denom,
stadistinct; stadistinct;
numer = (double) numrows *(double) d; numer = (double) samplerows *(double) d;
denom = (double) (numrows - f1) + denom = (double) (samplerows - f1) +
(double) f1 *(double) numrows / totalrows; (double) f1 *(double) samplerows / totalrows;
stadistinct = numer / denom; stadistinct = numer / denom;
/* Clamp to sane range in case of roundoff error */ /* Clamp to sane range in case of roundoff error */
@ -1270,7 +1291,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
if (ndistinct < 0) if (ndistinct < 0)
ndistinct = -ndistinct * totalrows; ndistinct = -ndistinct * totalrows;
/* estimate # of occurrences in sample of a typical value */ /* estimate # of occurrences in sample of a typical value */
avgcount = (double) numrows / ndistinct; avgcount = (double) samplerows / ndistinct;
/* set minimum threshold count to store a value */ /* set minimum threshold count to store a value */
mincount = avgcount * 1.25; mincount = avgcount * 1.25;
if (mincount < 2) if (mincount < 2)
@ -1303,7 +1324,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
mcv_values[i] = datumCopy(track[i].value, mcv_values[i] = datumCopy(track[i].value,
stats->attr->attbyval, stats->attr->attbyval,
stats->attr->attlen); stats->attr->attlen);
mcv_freqs[i] = (double) track[i].count / (double) numrows; mcv_freqs[i] = (double) track[i].count / (double) samplerows;
} }
MemoryContextSwitchTo(old_context); MemoryContextSwitchTo(old_context);
@ -1333,9 +1354,10 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
* data values into order. * data values into order.
*/ */
static void static void
compute_scalar_stats(VacAttrStats *stats, int attnum, compute_scalar_stats(VacAttrStatsP stats,
TupleDesc tupDesc, double totalrows, AnalyzeAttrFetchFunc fetchfunc,
HeapTuple *rows, int numrows) int samplerows,
double totalrows)
{ {
int i; int i;
int null_cnt = 0; int null_cnt = 0;
@ -1359,23 +1381,22 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
int num_bins = stats->attr->attstattarget; int num_bins = stats->attr->attstattarget;
StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data; StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
values = (ScalarItem *) palloc(numrows * sizeof(ScalarItem)); values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem));
tupnoLink = (int *) palloc(numrows * sizeof(int)); tupnoLink = (int *) palloc(samplerows * sizeof(int));
track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem)); track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
SelectSortFunction(mystats->ltopr, &cmpFn, &cmpFnKind); SelectSortFunction(mystats->ltopr, &cmpFn, &cmpFnKind);
fmgr_info(cmpFn, &f_cmpfn); fmgr_info(cmpFn, &f_cmpfn);
/* Initial scan to find sortable values */ /* Initial scan to find sortable values */
for (i = 0; i < numrows; i++) for (i = 0; i < samplerows; i++)
{ {
HeapTuple tuple = rows[i];
Datum value; Datum value;
bool isnull; bool isnull;
vacuum_delay_point(); vacuum_delay_point();
value = heap_getattr(tuple, attnum, tupDesc, &isnull); value = fetchfunc(stats, i, &isnull);
/* Check for null/nonnull */ /* Check for null/nonnull */
if (isnull) if (isnull)
@ -1505,7 +1526,7 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
stats->stats_valid = true; stats->stats_valid = true;
/* Do the simple null-frac and width stats */ /* Do the simple null-frac and width stats */
stats->stanullfrac = (double) null_cnt / (double) numrows; stats->stanullfrac = (double) null_cnt / (double) samplerows;
if (is_varwidth) if (is_varwidth)
stats->stawidth = total_width / (double) nonnull_cnt; stats->stawidth = total_width / (double) nonnull_cnt;
else else
@ -1546,10 +1567,10 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
denom, denom,
stadistinct; stadistinct;
numer = (double) numrows *(double) d; numer = (double) samplerows *(double) d;
denom = (double) (numrows - f1) + denom = (double) (samplerows - f1) +
(double) f1 *(double) numrows / totalrows; (double) f1 *(double) samplerows / totalrows;
stadistinct = numer / denom; stadistinct = numer / denom;
/* Clamp to sane range in case of roundoff error */ /* Clamp to sane range in case of roundoff error */
@ -1599,13 +1620,13 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
if (ndistinct < 0) if (ndistinct < 0)
ndistinct = -ndistinct * totalrows; ndistinct = -ndistinct * totalrows;
/* estimate # of occurrences in sample of a typical value */ /* estimate # of occurrences in sample of a typical value */
avgcount = (double) numrows / ndistinct; avgcount = (double) samplerows / ndistinct;
/* set minimum threshold count to store a value */ /* set minimum threshold count to store a value */
mincount = avgcount * 1.25; mincount = avgcount * 1.25;
if (mincount < 2) if (mincount < 2)
mincount = 2; mincount = 2;
/* don't let threshold exceed 1/K, however */ /* don't let threshold exceed 1/K, however */
maxmincount = (double) numrows / (double) num_bins; maxmincount = (double) samplerows / (double) num_bins;
if (mincount > maxmincount) if (mincount > maxmincount)
mincount = maxmincount; mincount = maxmincount;
if (num_mcv > track_cnt) if (num_mcv > track_cnt)
@ -1636,7 +1657,7 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
mcv_values[i] = datumCopy(values[track[i].first].value, mcv_values[i] = datumCopy(values[track[i].first].value,
stats->attr->attbyval, stats->attr->attbyval,
stats->attr->attlen); stats->attr->attlen);
mcv_freqs[i] = (double) track[i].count / (double) numrows; mcv_freqs[i] = (double) track[i].count / (double) samplerows;
} }
MemoryContextSwitchTo(old_context); MemoryContextSwitchTo(old_context);

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.49 2004/02/12 23:41:04 tgl Exp $ * $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.50 2004/02/13 06:39:49 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -40,18 +40,18 @@
* and must return TRUE to continue analysis, FALSE to skip analysis of this * and must return TRUE to continue analysis, FALSE to skip analysis of this
* column. In the TRUE case it must set the compute_stats and minrows fields, * column. In the TRUE case it must set the compute_stats and minrows fields,
* and can optionally set extra_data to pass additional info to compute_stats. * and can optionally set extra_data to pass additional info to compute_stats.
* minrows is its request for the minimum number of sample rows to be gathered
* (but note this request might not be honored, eg if there are fewer rows
* than that in the table).
* *
* The compute_stats routine will be called after sample rows have been * The compute_stats routine will be called after sample rows have been
* gathered. Aside from this struct, it is passed: * gathered. Aside from this struct, it is passed:
* attnum: attribute number within the supplied tuples * fetchfunc: a function for accessing the column values from the
* tupDesc: tuple descriptor for the supplied tuples * sample rows
* samplerows: the number of sample tuples
* totalrows: estimated total number of rows in relation * totalrows: estimated total number of rows in relation
* rows: an array of the sample tuples * The fetchfunc may be called with rownum running from 0 to samplerows-1.
* numrows: the number of sample tuples * It returns a Datum and an isNull flag.
* Note that the passed attnum and tupDesc could possibly be different from
* what one would expect by looking at the pg_attribute row. It is important
* to use these values for extracting attribute values from the given rows
* (and not for any other purpose).
* *
* compute_stats should set stats_valid TRUE if it is able to compute * compute_stats should set stats_valid TRUE if it is able to compute
* any useful statistics. If it does, the remainder of the struct holds * any useful statistics. If it does, the remainder of the struct holds
@ -60,6 +60,11 @@
* be CurrentMemoryContext when compute_stats is called. * be CurrentMemoryContext when compute_stats is called.
*---------- *----------
*/ */
typedef struct VacAttrStats *VacAttrStatsP;
typedef Datum (*AnalyzeAttrFetchFunc) (VacAttrStatsP stats, int rownum,
bool *isNull);
typedef struct VacAttrStats typedef struct VacAttrStats
{ {
/* /*
@ -74,9 +79,10 @@ typedef struct VacAttrStats
* These fields must be filled in by the typanalyze routine, * These fields must be filled in by the typanalyze routine,
* unless it returns FALSE. * unless it returns FALSE.
*/ */
void (*compute_stats) (struct VacAttrStats *stats, int attnum, void (*compute_stats) (VacAttrStatsP stats,
TupleDesc tupDesc, double totalrows, AnalyzeAttrFetchFunc fetchfunc,
HeapTuple *rows, int numrows); int samplerows,
double totalrows);
int minrows; /* Minimum # of rows wanted for stats */ int minrows; /* Minimum # of rows wanted for stats */
void *extra_data; /* for extra type-specific data */ void *extra_data; /* for extra type-specific data */
@ -100,6 +106,8 @@ typedef struct VacAttrStats
* be looked at by type-specific functions. * be looked at by type-specific functions.
*/ */
int tupattnum; /* attribute number within tuples */ int tupattnum; /* attribute number within tuples */
HeapTuple *rows; /* access info for fetch function */
TupleDesc tupDesc;
} VacAttrStats; } VacAttrStats;