mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-15 08:20:16 +08:00
After further thought about support for gathering stats on functional
indexes, it seems like we ought to put another layer of indirection between the compute_stats functions and the actual data storage. This would allow us to compute the values on-the-fly, for example.
This commit is contained in:
parent
1a4652333b
commit
8787bc8ef3
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.68 2004/02/12 23:41:02 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.69 2004/02/13 06:39:49 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -52,6 +52,7 @@ static double init_selection_state(int n);
|
|||||||
static double select_next_random_record(double t, int n, double *stateptr);
|
static double select_next_random_record(double t, int n, double *stateptr);
|
||||||
static int compare_rows(const void *a, const void *b);
|
static int compare_rows(const void *a, const void *b);
|
||||||
static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
|
static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
|
||||||
|
static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
|
||||||
|
|
||||||
static bool std_typanalyze(VacAttrStats *stats);
|
static bool std_typanalyze(VacAttrStats *stats);
|
||||||
|
|
||||||
@ -259,12 +260,14 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt)
|
|||||||
old_context = MemoryContextSwitchTo(col_context);
|
old_context = MemoryContextSwitchTo(col_context);
|
||||||
for (i = 0; i < attr_cnt; i++)
|
for (i = 0; i < attr_cnt; i++)
|
||||||
{
|
{
|
||||||
(*vacattrstats[i]->compute_stats) (vacattrstats[i],
|
VacAttrStats *stats = vacattrstats[i];
|
||||||
vacattrstats[i]->tupattnum,
|
|
||||||
onerel->rd_att,
|
stats->rows = rows;
|
||||||
totalrows,
|
stats->tupDesc = onerel->rd_att;
|
||||||
rows,
|
(*stats->compute_stats) (stats,
|
||||||
numrows);
|
std_fetch_func,
|
||||||
|
numrows,
|
||||||
|
totalrows);
|
||||||
MemoryContextResetAndDeleteChildren(col_context);
|
MemoryContextResetAndDeleteChildren(col_context);
|
||||||
}
|
}
|
||||||
MemoryContextSwitchTo(old_context);
|
MemoryContextSwitchTo(old_context);
|
||||||
@ -861,6 +864,22 @@ update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
|
|||||||
heap_close(sd, RowExclusiveLock);
|
heap_close(sd, RowExclusiveLock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Standard fetch function for use by compute_stats subroutines.
|
||||||
|
*
|
||||||
|
* This exists to provide some insulation between compute_stats routines
|
||||||
|
* and the actual storage of the sample data.
|
||||||
|
*/
|
||||||
|
static Datum
|
||||||
|
std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
|
||||||
|
{
|
||||||
|
int attnum = stats->tupattnum;
|
||||||
|
HeapTuple tuple = stats->rows[rownum];
|
||||||
|
TupleDesc tupDesc = stats->tupDesc;
|
||||||
|
|
||||||
|
return heap_getattr(tuple, attnum, tupDesc, isNull);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*==========================================================================
|
/*==========================================================================
|
||||||
*
|
*
|
||||||
@ -915,12 +934,14 @@ static SortFunctionKind datumCmpFnKind;
|
|||||||
static int *datumCmpTupnoLink;
|
static int *datumCmpTupnoLink;
|
||||||
|
|
||||||
|
|
||||||
static void compute_minimal_stats(VacAttrStats *stats, int attnum,
|
static void compute_minimal_stats(VacAttrStatsP stats,
|
||||||
TupleDesc tupDesc, double totalrows,
|
AnalyzeAttrFetchFunc fetchfunc,
|
||||||
HeapTuple *rows, int numrows);
|
int samplerows,
|
||||||
static void compute_scalar_stats(VacAttrStats *stats, int attnum,
|
double totalrows);
|
||||||
TupleDesc tupDesc, double totalrows,
|
static void compute_scalar_stats(VacAttrStatsP stats,
|
||||||
HeapTuple *rows, int numrows);
|
AnalyzeAttrFetchFunc fetchfunc,
|
||||||
|
int samplerows,
|
||||||
|
double totalrows);
|
||||||
static int compare_scalars(const void *a, const void *b);
|
static int compare_scalars(const void *a, const void *b);
|
||||||
static int compare_mcvs(const void *a, const void *b);
|
static int compare_mcvs(const void *a, const void *b);
|
||||||
|
|
||||||
@ -1024,9 +1045,10 @@ std_typanalyze(VacAttrStats *stats)
|
|||||||
* depend mainly on the length of the list we are willing to keep.
|
* depend mainly on the length of the list we are willing to keep.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
compute_minimal_stats(VacAttrStats *stats, int attnum,
|
compute_minimal_stats(VacAttrStatsP stats,
|
||||||
TupleDesc tupDesc, double totalrows,
|
AnalyzeAttrFetchFunc fetchfunc,
|
||||||
HeapTuple *rows, int numrows)
|
int samplerows,
|
||||||
|
double totalrows)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int null_cnt = 0;
|
int null_cnt = 0;
|
||||||
@ -1061,9 +1083,8 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
|
|||||||
|
|
||||||
fmgr_info(mystats->eqfunc, &f_cmpeq);
|
fmgr_info(mystats->eqfunc, &f_cmpeq);
|
||||||
|
|
||||||
for (i = 0; i < numrows; i++)
|
for (i = 0; i < samplerows; i++)
|
||||||
{
|
{
|
||||||
HeapTuple tuple = rows[i];
|
|
||||||
Datum value;
|
Datum value;
|
||||||
bool isnull;
|
bool isnull;
|
||||||
bool match;
|
bool match;
|
||||||
@ -1072,7 +1093,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
|
|||||||
|
|
||||||
vacuum_delay_point();
|
vacuum_delay_point();
|
||||||
|
|
||||||
value = heap_getattr(tuple, attnum, tupDesc, &isnull);
|
value = fetchfunc(stats, i, &isnull);
|
||||||
|
|
||||||
/* Check for null/nonnull */
|
/* Check for null/nonnull */
|
||||||
if (isnull)
|
if (isnull)
|
||||||
@ -1166,7 +1187,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
|
|||||||
|
|
||||||
stats->stats_valid = true;
|
stats->stats_valid = true;
|
||||||
/* Do the simple null-frac and width stats */
|
/* Do the simple null-frac and width stats */
|
||||||
stats->stanullfrac = (double) null_cnt / (double) numrows;
|
stats->stanullfrac = (double) null_cnt / (double) samplerows;
|
||||||
if (is_varwidth)
|
if (is_varwidth)
|
||||||
stats->stawidth = total_width / (double) nonnull_cnt;
|
stats->stawidth = total_width / (double) nonnull_cnt;
|
||||||
else
|
else
|
||||||
@ -1222,10 +1243,10 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
|
|||||||
denom,
|
denom,
|
||||||
stadistinct;
|
stadistinct;
|
||||||
|
|
||||||
numer = (double) numrows *(double) d;
|
numer = (double) samplerows *(double) d;
|
||||||
|
|
||||||
denom = (double) (numrows - f1) +
|
denom = (double) (samplerows - f1) +
|
||||||
(double) f1 *(double) numrows / totalrows;
|
(double) f1 *(double) samplerows / totalrows;
|
||||||
|
|
||||||
stadistinct = numer / denom;
|
stadistinct = numer / denom;
|
||||||
/* Clamp to sane range in case of roundoff error */
|
/* Clamp to sane range in case of roundoff error */
|
||||||
@ -1270,7 +1291,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
|
|||||||
if (ndistinct < 0)
|
if (ndistinct < 0)
|
||||||
ndistinct = -ndistinct * totalrows;
|
ndistinct = -ndistinct * totalrows;
|
||||||
/* estimate # of occurrences in sample of a typical value */
|
/* estimate # of occurrences in sample of a typical value */
|
||||||
avgcount = (double) numrows / ndistinct;
|
avgcount = (double) samplerows / ndistinct;
|
||||||
/* set minimum threshold count to store a value */
|
/* set minimum threshold count to store a value */
|
||||||
mincount = avgcount * 1.25;
|
mincount = avgcount * 1.25;
|
||||||
if (mincount < 2)
|
if (mincount < 2)
|
||||||
@ -1303,7 +1324,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
|
|||||||
mcv_values[i] = datumCopy(track[i].value,
|
mcv_values[i] = datumCopy(track[i].value,
|
||||||
stats->attr->attbyval,
|
stats->attr->attbyval,
|
||||||
stats->attr->attlen);
|
stats->attr->attlen);
|
||||||
mcv_freqs[i] = (double) track[i].count / (double) numrows;
|
mcv_freqs[i] = (double) track[i].count / (double) samplerows;
|
||||||
}
|
}
|
||||||
MemoryContextSwitchTo(old_context);
|
MemoryContextSwitchTo(old_context);
|
||||||
|
|
||||||
@ -1333,9 +1354,10 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
|
|||||||
* data values into order.
|
* data values into order.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
compute_scalar_stats(VacAttrStats *stats, int attnum,
|
compute_scalar_stats(VacAttrStatsP stats,
|
||||||
TupleDesc tupDesc, double totalrows,
|
AnalyzeAttrFetchFunc fetchfunc,
|
||||||
HeapTuple *rows, int numrows)
|
int samplerows,
|
||||||
|
double totalrows)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int null_cnt = 0;
|
int null_cnt = 0;
|
||||||
@ -1359,23 +1381,22 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
|
|||||||
int num_bins = stats->attr->attstattarget;
|
int num_bins = stats->attr->attstattarget;
|
||||||
StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
|
StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
|
||||||
|
|
||||||
values = (ScalarItem *) palloc(numrows * sizeof(ScalarItem));
|
values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem));
|
||||||
tupnoLink = (int *) palloc(numrows * sizeof(int));
|
tupnoLink = (int *) palloc(samplerows * sizeof(int));
|
||||||
track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
|
track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
|
||||||
|
|
||||||
SelectSortFunction(mystats->ltopr, &cmpFn, &cmpFnKind);
|
SelectSortFunction(mystats->ltopr, &cmpFn, &cmpFnKind);
|
||||||
fmgr_info(cmpFn, &f_cmpfn);
|
fmgr_info(cmpFn, &f_cmpfn);
|
||||||
|
|
||||||
/* Initial scan to find sortable values */
|
/* Initial scan to find sortable values */
|
||||||
for (i = 0; i < numrows; i++)
|
for (i = 0; i < samplerows; i++)
|
||||||
{
|
{
|
||||||
HeapTuple tuple = rows[i];
|
|
||||||
Datum value;
|
Datum value;
|
||||||
bool isnull;
|
bool isnull;
|
||||||
|
|
||||||
vacuum_delay_point();
|
vacuum_delay_point();
|
||||||
|
|
||||||
value = heap_getattr(tuple, attnum, tupDesc, &isnull);
|
value = fetchfunc(stats, i, &isnull);
|
||||||
|
|
||||||
/* Check for null/nonnull */
|
/* Check for null/nonnull */
|
||||||
if (isnull)
|
if (isnull)
|
||||||
@ -1505,7 +1526,7 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
|
|||||||
|
|
||||||
stats->stats_valid = true;
|
stats->stats_valid = true;
|
||||||
/* Do the simple null-frac and width stats */
|
/* Do the simple null-frac and width stats */
|
||||||
stats->stanullfrac = (double) null_cnt / (double) numrows;
|
stats->stanullfrac = (double) null_cnt / (double) samplerows;
|
||||||
if (is_varwidth)
|
if (is_varwidth)
|
||||||
stats->stawidth = total_width / (double) nonnull_cnt;
|
stats->stawidth = total_width / (double) nonnull_cnt;
|
||||||
else
|
else
|
||||||
@ -1546,10 +1567,10 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
|
|||||||
denom,
|
denom,
|
||||||
stadistinct;
|
stadistinct;
|
||||||
|
|
||||||
numer = (double) numrows *(double) d;
|
numer = (double) samplerows *(double) d;
|
||||||
|
|
||||||
denom = (double) (numrows - f1) +
|
denom = (double) (samplerows - f1) +
|
||||||
(double) f1 *(double) numrows / totalrows;
|
(double) f1 *(double) samplerows / totalrows;
|
||||||
|
|
||||||
stadistinct = numer / denom;
|
stadistinct = numer / denom;
|
||||||
/* Clamp to sane range in case of roundoff error */
|
/* Clamp to sane range in case of roundoff error */
|
||||||
@ -1599,13 +1620,13 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
|
|||||||
if (ndistinct < 0)
|
if (ndistinct < 0)
|
||||||
ndistinct = -ndistinct * totalrows;
|
ndistinct = -ndistinct * totalrows;
|
||||||
/* estimate # of occurrences in sample of a typical value */
|
/* estimate # of occurrences in sample of a typical value */
|
||||||
avgcount = (double) numrows / ndistinct;
|
avgcount = (double) samplerows / ndistinct;
|
||||||
/* set minimum threshold count to store a value */
|
/* set minimum threshold count to store a value */
|
||||||
mincount = avgcount * 1.25;
|
mincount = avgcount * 1.25;
|
||||||
if (mincount < 2)
|
if (mincount < 2)
|
||||||
mincount = 2;
|
mincount = 2;
|
||||||
/* don't let threshold exceed 1/K, however */
|
/* don't let threshold exceed 1/K, however */
|
||||||
maxmincount = (double) numrows / (double) num_bins;
|
maxmincount = (double) samplerows / (double) num_bins;
|
||||||
if (mincount > maxmincount)
|
if (mincount > maxmincount)
|
||||||
mincount = maxmincount;
|
mincount = maxmincount;
|
||||||
if (num_mcv > track_cnt)
|
if (num_mcv > track_cnt)
|
||||||
@ -1636,7 +1657,7 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
|
|||||||
mcv_values[i] = datumCopy(values[track[i].first].value,
|
mcv_values[i] = datumCopy(values[track[i].first].value,
|
||||||
stats->attr->attbyval,
|
stats->attr->attbyval,
|
||||||
stats->attr->attlen);
|
stats->attr->attlen);
|
||||||
mcv_freqs[i] = (double) track[i].count / (double) numrows;
|
mcv_freqs[i] = (double) track[i].count / (double) samplerows;
|
||||||
}
|
}
|
||||||
MemoryContextSwitchTo(old_context);
|
MemoryContextSwitchTo(old_context);
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.49 2004/02/12 23:41:04 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.50 2004/02/13 06:39:49 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -40,18 +40,18 @@
|
|||||||
* and must return TRUE to continue analysis, FALSE to skip analysis of this
|
* and must return TRUE to continue analysis, FALSE to skip analysis of this
|
||||||
* column. In the TRUE case it must set the compute_stats and minrows fields,
|
* column. In the TRUE case it must set the compute_stats and minrows fields,
|
||||||
* and can optionally set extra_data to pass additional info to compute_stats.
|
* and can optionally set extra_data to pass additional info to compute_stats.
|
||||||
|
* minrows is its request for the minimum number of sample rows to be gathered
|
||||||
|
* (but note this request might not be honored, eg if there are fewer rows
|
||||||
|
* than that in the table).
|
||||||
*
|
*
|
||||||
* The compute_stats routine will be called after sample rows have been
|
* The compute_stats routine will be called after sample rows have been
|
||||||
* gathered. Aside from this struct, it is passed:
|
* gathered. Aside from this struct, it is passed:
|
||||||
* attnum: attribute number within the supplied tuples
|
* fetchfunc: a function for accessing the column values from the
|
||||||
* tupDesc: tuple descriptor for the supplied tuples
|
* sample rows
|
||||||
|
* samplerows: the number of sample tuples
|
||||||
* totalrows: estimated total number of rows in relation
|
* totalrows: estimated total number of rows in relation
|
||||||
* rows: an array of the sample tuples
|
* The fetchfunc may be called with rownum running from 0 to samplerows-1.
|
||||||
* numrows: the number of sample tuples
|
* It returns a Datum and an isNull flag.
|
||||||
* Note that the passed attnum and tupDesc could possibly be different from
|
|
||||||
* what one would expect by looking at the pg_attribute row. It is important
|
|
||||||
* to use these values for extracting attribute values from the given rows
|
|
||||||
* (and not for any other purpose).
|
|
||||||
*
|
*
|
||||||
* compute_stats should set stats_valid TRUE if it is able to compute
|
* compute_stats should set stats_valid TRUE if it is able to compute
|
||||||
* any useful statistics. If it does, the remainder of the struct holds
|
* any useful statistics. If it does, the remainder of the struct holds
|
||||||
@ -60,6 +60,11 @@
|
|||||||
* be CurrentMemoryContext when compute_stats is called.
|
* be CurrentMemoryContext when compute_stats is called.
|
||||||
*----------
|
*----------
|
||||||
*/
|
*/
|
||||||
|
typedef struct VacAttrStats *VacAttrStatsP;
|
||||||
|
|
||||||
|
typedef Datum (*AnalyzeAttrFetchFunc) (VacAttrStatsP stats, int rownum,
|
||||||
|
bool *isNull);
|
||||||
|
|
||||||
typedef struct VacAttrStats
|
typedef struct VacAttrStats
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@ -74,9 +79,10 @@ typedef struct VacAttrStats
|
|||||||
* These fields must be filled in by the typanalyze routine,
|
* These fields must be filled in by the typanalyze routine,
|
||||||
* unless it returns FALSE.
|
* unless it returns FALSE.
|
||||||
*/
|
*/
|
||||||
void (*compute_stats) (struct VacAttrStats *stats, int attnum,
|
void (*compute_stats) (VacAttrStatsP stats,
|
||||||
TupleDesc tupDesc, double totalrows,
|
AnalyzeAttrFetchFunc fetchfunc,
|
||||||
HeapTuple *rows, int numrows);
|
int samplerows,
|
||||||
|
double totalrows);
|
||||||
int minrows; /* Minimum # of rows wanted for stats */
|
int minrows; /* Minimum # of rows wanted for stats */
|
||||||
void *extra_data; /* for extra type-specific data */
|
void *extra_data; /* for extra type-specific data */
|
||||||
|
|
||||||
@ -100,6 +106,8 @@ typedef struct VacAttrStats
|
|||||||
* be looked at by type-specific functions.
|
* be looked at by type-specific functions.
|
||||||
*/
|
*/
|
||||||
int tupattnum; /* attribute number within tuples */
|
int tupattnum; /* attribute number within tuples */
|
||||||
|
HeapTuple *rows; /* access info for fetch function */
|
||||||
|
TupleDesc tupDesc;
|
||||||
} VacAttrStats;
|
} VacAttrStats;
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user