Fix extended statistics with partial analyzes

Either because of a previous ALTER TABLE .. SET STATISTICS 0 or because
of being invoked with a partial column list, ANALYZE could fail to
acquire sufficient data to build extended statistics.  Previously, this
would draw an ERROR and fail to collect any statistics at all (extended
and regular).  Change things so that we raise a WARNING instead, and
remove a hint that was wrong in half the cases.

Reported by: David Rowley
Discussion: https://postgr.es/m/CAKJS1f9Kk0NF6Fg7TA=JUXsjpS9kX6NVu27pb5QDCpOYAvb-Og@mail.gmail.com
This commit is contained in:
Alvaro Herrera 2017-04-17 14:00:47 -03:00
parent 419a23b478
commit bf2a691e02
3 changed files with 60 additions and 31 deletions

View File

@ -23,11 +23,13 @@
#include "catalog/pg_collation.h" #include "catalog/pg_collation.h"
#include "catalog/pg_statistic_ext.h" #include "catalog/pg_statistic_ext.h"
#include "nodes/relation.h" #include "nodes/relation.h"
#include "postmaster/autovacuum.h"
#include "statistics/extended_stats_internal.h" #include "statistics/extended_stats_internal.h"
#include "statistics/statistics.h" #include "statistics/statistics.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/fmgroids.h" #include "utils/fmgroids.h"
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/rel.h" #include "utils/rel.h"
#include "utils/syscache.h" #include "utils/syscache.h"
@ -38,6 +40,8 @@
typedef struct StatExtEntry typedef struct StatExtEntry
{ {
Oid statOid; /* OID of pg_statistic_ext entry */ Oid statOid; /* OID of pg_statistic_ext entry */
char *schema; /* statistics schema */
char *name; /* statistics name */
Bitmapset *columns; /* attribute numbers covered by the statistics */ Bitmapset *columns; /* attribute numbers covered by the statistics */
List *types; /* 'char' list of enabled statistic kinds */ List *types; /* 'char' list of enabled statistic kinds */
} StatExtEntry; } StatExtEntry;
@ -45,7 +49,7 @@ typedef struct StatExtEntry
static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid); static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs, static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
int natts, VacAttrStats **vacattrstats); int nvacatts, VacAttrStats **vacatts);
static void statext_store(Relation pg_stext, Oid relid, static void statext_store(Relation pg_stext, Oid relid,
MVNDistinct *ndistinct, MVDependencies *dependencies, MVNDistinct *ndistinct, MVDependencies *dependencies,
VacAttrStats **stats); VacAttrStats **stats);
@ -66,6 +70,12 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
Relation pg_stext; Relation pg_stext;
ListCell *lc; ListCell *lc;
List *stats; List *stats;
MemoryContext cxt;
MemoryContext oldcxt;
cxt = AllocSetContextCreate(CurrentMemoryContext, "stats ext",
ALLOCSET_DEFAULT_SIZES);
oldcxt = MemoryContextSwitchTo(cxt);
pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock); pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock);
stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel)); stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
@ -78,9 +88,23 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
VacAttrStats **stats; VacAttrStats **stats;
ListCell *lc2; ListCell *lc2;
/* filter only the interesting vacattrstats records */ /*
* Check if we can build these stats based on the column analyzed.
* If not, report this fact (except in autovacuum) and move on.
*/
stats = lookup_var_attr_stats(onerel, stat->columns, stats = lookup_var_attr_stats(onerel, stat->columns,
natts, vacattrstats); natts, vacattrstats);
if (!stats && !IsAutoVacuumWorkerProcess())
{
ereport(WARNING,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("extended statistics \"%s.%s\" could not be collected for relation %s.%s",
stat->schema, stat->name,
get_namespace_name(onerel->rd_rel->relnamespace),
RelationGetRelationName(onerel)),
errtable(onerel)));
continue;
}
/* check allowed number of dimensions */ /* check allowed number of dimensions */
Assert(bms_num_members(stat->columns) >= 2 && Assert(bms_num_members(stat->columns) >= 2 &&
@ -104,6 +128,9 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
} }
heap_close(pg_stext, RowExclusiveLock); heap_close(pg_stext, RowExclusiveLock);
MemoryContextSwitchTo(oldcxt);
MemoryContextDelete(cxt);
} }
/* /*
@ -168,6 +195,8 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
entry = palloc0(sizeof(StatExtEntry)); entry = palloc0(sizeof(StatExtEntry));
entry->statOid = HeapTupleGetOid(htup); entry->statOid = HeapTupleGetOid(htup);
staForm = (Form_pg_statistic_ext) GETSTRUCT(htup); staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
entry->schema = get_namespace_name(staForm->stanamespace);
entry->name = pstrdup(NameStr(staForm->staname));
for (i = 0; i < staForm->stakeys.dim1; i++) for (i = 0; i < staForm->stakeys.dim1; i++)
{ {
entry->columns = bms_add_member(entry->columns, entry->columns = bms_add_member(entry->columns,
@ -200,18 +229,19 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
} }
/* /*
* Using 'vacattrstats' of size 'natts' as input data, return a newly built * Using 'vacatts' of size 'nvacatts' as input data, return a newly built
* VacAttrStats array which includes only the items corresponding to attributes * VacAttrStats array which includes only the items corresponding to
* indicated by 'attrs'. * attributes indicated by 'stakeys'. If we don't have all of the per column
* stats available to compute the extended stats, then we return NULL to indicate
* to the caller that the stats should not be built.
*/ */
static VacAttrStats ** static VacAttrStats **
lookup_var_attr_stats(Relation rel, Bitmapset *attrs, int natts, lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
VacAttrStats **vacattrstats) int nvacatts, VacAttrStats **vacatts)
{ {
int i = 0; int i = 0;
int x = -1; int x = -1;
VacAttrStats **stats; VacAttrStats **stats;
Bitmapset *matched = NULL;
stats = (VacAttrStats **) stats = (VacAttrStats **)
palloc(bms_num_members(attrs) * sizeof(VacAttrStats *)); palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
@ -222,39 +252,34 @@ lookup_var_attr_stats(Relation rel, Bitmapset *attrs, int natts,
int j; int j;
stats[i] = NULL; stats[i] = NULL;
for (j = 0; j < natts; j++) for (j = 0; j < nvacatts; j++)
{ {
if (x == vacattrstats[j]->tupattnum) if (x == vacatts[j]->tupattnum)
{ {
stats[i] = vacattrstats[j]; stats[i] = vacatts[j];
break; break;
} }
} }
if (!stats[i]) if (!stats[i])
ereport(ERROR, {
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION), /*
errmsg("extended statistics could not be collected for column \"%s\" of relation %s.%s", * Looks like stats were not gathered for one of the columns
NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname), * required. We'll be unable to build the extended stats without
get_namespace_name(rel->rd_rel->relnamespace), * this column.
RelationGetRelationName(rel)), */
errhint("Consider ALTER TABLE \"%s\".\"%s\" ALTER \"%s\" SET STATISTICS -1", pfree(stats);
get_namespace_name(rel->rd_rel->relnamespace), return NULL;
RelationGetRelationName(rel), }
NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname))));
/* /*
* Check that we found a non-dropped column and that the attnum * Sanity check that the column is not dropped - stats should have
* matches. * been removed in this case.
*/ */
Assert(!stats[i]->attr->attisdropped); Assert(!stats[i]->attr->attisdropped);
matched = bms_add_member(matched, stats[i]->tupattnum);
i++; i++;
} }
if (bms_subset_compare(matched, attrs) != BMS_EQUAL)
elog(ERROR, "could not find all attributes in attribute stats array");
bms_free(matched);
return stats; return stats;
} }

View File

@ -40,9 +40,11 @@ ALTER TABLE ab1 ALTER a SET STATISTICS 0;
INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a; INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
ANALYZE ab1; ANALYZE ab1;
ERROR: extended statistics could not be collected for column "a" of relation public.ab1 WARNING: extended statistics "public.ab1_a_b_stats" could not be collected for relation public.ab1
HINT: Consider ALTER TABLE "public"."ab1" ALTER "a" SET STATISTICS -1
ALTER TABLE ab1 ALTER a SET STATISTICS -1; ALTER TABLE ab1 ALTER a SET STATISTICS -1;
-- partial analyze doesn't build stats either
ANALYZE ab1 (a);
WARNING: extended statistics "public.ab1_a_b_stats" could not be collected for relation public.ab1
ANALYZE ab1; ANALYZE ab1;
DROP TABLE ab1; DROP TABLE ab1;
-- n-distinct tests -- n-distinct tests

View File

@ -35,6 +35,8 @@ INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
ANALYZE ab1; ANALYZE ab1;
ALTER TABLE ab1 ALTER a SET STATISTICS -1; ALTER TABLE ab1 ALTER a SET STATISTICS -1;
-- partial analyze doesn't build stats either
ANALYZE ab1 (a);
ANALYZE ab1; ANALYZE ab1;
DROP TABLE ab1; DROP TABLE ab1;