mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-12 18:34:36 +08:00
Fix relcache reload mechanism to be more robust in the face of errors
occurring during a reload, such as query-cancel. Instead of zeroing out an existing relcache entry and rebuilding it in place, build a new relcache entry, then swap its contents with the old one, then free the new entry. This avoids problems with code believing that a previously obtained pointer to a cache entry must still reference a valid entry, as seen in recent failures on buildfarm member jaguar. (jaguar is using CLOBBER_CACHE_ALWAYS which raises the probability of failure substantially, but the problem could occur in the field without that.) The previous design was okay when it was made, but subtransactions and the ResourceOwner mechanism make it unsafe now. Also, make more use of the already existing rd_isvalid flag, so that we remember that the entry requires rebuilding even if the first attempt fails. Back-patch as far as 8.2. Prior versions have enough issues around relcache reload anyway (due to inadequate locking) that fixing this one doesn't seem worthwhile.
This commit is contained in:
parent
26098d8f86
commit
483b17119a
276
src/backend/utils/cache/relcache.c
vendored
276
src/backend/utils/cache/relcache.c
vendored
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.250.2.3 2009/09/26 18:25:12 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.250.2.4 2010/01/12 18:12:40 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -176,6 +176,7 @@ static HTAB *OpClassCache = NULL;
|
||||
|
||||
/* non-export function prototypes */
|
||||
|
||||
static void RelationDestroyRelation(Relation relation);
|
||||
static void RelationClearRelation(Relation relation, bool rebuild);
|
||||
|
||||
static void RelationReloadClassinfo(Relation relation);
|
||||
@ -188,10 +189,10 @@ static void formrdesc(const char *relationName, Oid relationReltype,
|
||||
bool hasoids, int natts, FormData_pg_attribute *att);
|
||||
|
||||
static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK);
|
||||
static Relation AllocateRelationDesc(Relation relation, Form_pg_class relp);
|
||||
static Relation AllocateRelationDesc(Form_pg_class relp);
|
||||
static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
|
||||
static void RelationBuildTupleDesc(Relation relation);
|
||||
static Relation RelationBuildDesc(Oid targetRelId, Relation oldrelation);
|
||||
static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
|
||||
static void RelationInitPhysicalAddr(Relation relation);
|
||||
static TupleDesc GetPgClassDescriptor(void);
|
||||
static TupleDesc GetPgIndexDescriptor(void);
|
||||
@ -268,15 +269,12 @@ ScanPgRelation(Oid targetRelId, bool indexOK)
|
||||
* AllocateRelationDesc
|
||||
*
|
||||
* This is used to allocate memory for a new relation descriptor
|
||||
* and initialize the rd_rel field.
|
||||
*
|
||||
* If 'relation' is NULL, allocate a new RelationData object.
|
||||
* If not, reuse the given object (that path is taken only when
|
||||
* we have to rebuild a relcache entry during RelationClearRelation).
|
||||
* and initialize the rd_rel field from the given pg_class tuple.
|
||||
*/
|
||||
static Relation
|
||||
AllocateRelationDesc(Relation relation, Form_pg_class relp)
|
||||
AllocateRelationDesc(Form_pg_class relp)
|
||||
{
|
||||
Relation relation;
|
||||
MemoryContext oldcxt;
|
||||
Form_pg_class relationForm;
|
||||
|
||||
@ -284,15 +282,13 @@ AllocateRelationDesc(Relation relation, Form_pg_class relp)
|
||||
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
|
||||
|
||||
/*
|
||||
* allocate space for new relation descriptor, if needed
|
||||
* allocate and zero space for new relation descriptor
|
||||
*/
|
||||
if (relation == NULL)
|
||||
relation = (Relation) palloc(sizeof(RelationData));
|
||||
relation = (Relation) palloc0(sizeof(RelationData));
|
||||
|
||||
/*
|
||||
* clear all fields of reldesc
|
||||
* clear fields of reldesc that should initialize to something non-zero
|
||||
*/
|
||||
MemSet(relation, 0, sizeof(RelationData));
|
||||
relation->rd_targblock = InvalidBlockNumber;
|
||||
|
||||
/* make sure relation is marked as having no open file yet */
|
||||
@ -350,7 +346,6 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple)
|
||||
{
|
||||
case RELKIND_RELATION:
|
||||
case RELKIND_TOASTVALUE:
|
||||
case RELKIND_UNCATALOGED:
|
||||
case RELKIND_INDEX:
|
||||
break;
|
||||
default:
|
||||
@ -374,7 +369,6 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple)
|
||||
{
|
||||
case RELKIND_RELATION:
|
||||
case RELKIND_TOASTVALUE:
|
||||
case RELKIND_UNCATALOGED:
|
||||
options = heap_reloptions(relation->rd_rel->relkind, datum,
|
||||
false);
|
||||
break;
|
||||
@ -394,6 +388,7 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple)
|
||||
relation->rd_options = MemoryContextAlloc(CacheMemoryContext,
|
||||
VARSIZE(options));
|
||||
memcpy(relation->rd_options, options, VARSIZE(options));
|
||||
pfree(options);
|
||||
}
|
||||
}
|
||||
|
||||
@ -779,24 +774,22 @@ equalRuleLocks(RuleLock *rlock1, RuleLock *rlock2)
|
||||
/*
|
||||
* RelationBuildDesc
|
||||
*
|
||||
* Build a relation descriptor --- either a new one, or by
|
||||
* recycling the given old relation object. The latter case
|
||||
* supports rebuilding a relcache entry without invalidating
|
||||
* pointers to it. The caller must hold at least
|
||||
* Build a relation descriptor. The caller must hold at least
|
||||
* AccessShareLock on the target relid.
|
||||
*
|
||||
* The new descriptor is inserted into the hash table if insertIt is true.
|
||||
*
|
||||
* Returns NULL if no pg_class row could be found for the given relid
|
||||
* (suggesting we are trying to access a just-deleted relation).
|
||||
* Any other error is reported via elog.
|
||||
*/
|
||||
static Relation
|
||||
RelationBuildDesc(Oid targetRelId, Relation oldrelation)
|
||||
RelationBuildDesc(Oid targetRelId, bool insertIt)
|
||||
{
|
||||
Relation relation;
|
||||
Oid relid;
|
||||
HeapTuple pg_class_tuple;
|
||||
Form_pg_class relp;
|
||||
MemoryContext oldcxt;
|
||||
|
||||
/*
|
||||
* find the tuple in pg_class corresponding to the given relation id
|
||||
@ -819,7 +812,7 @@ RelationBuildDesc(Oid targetRelId, Relation oldrelation)
|
||||
* allocate storage for the relation descriptor, and copy pg_class_tuple
|
||||
* to relation->rd_rel.
|
||||
*/
|
||||
relation = AllocateRelationDesc(oldrelation, relp);
|
||||
relation = AllocateRelationDesc(relp);
|
||||
|
||||
/*
|
||||
* initialize the relation's relation id (relation->rd_id)
|
||||
@ -885,11 +878,10 @@ RelationBuildDesc(Oid targetRelId, Relation oldrelation)
|
||||
heap_freetuple(pg_class_tuple);
|
||||
|
||||
/*
|
||||
* Insert newly created relation into relcache hash tables.
|
||||
* Insert newly created relation into relcache hash table, if requested.
|
||||
*/
|
||||
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
|
||||
RelationCacheInsert(relation);
|
||||
MemoryContextSwitchTo(oldcxt);
|
||||
if (insertIt)
|
||||
RelationCacheInsert(relation);
|
||||
|
||||
/* It's fully valid */
|
||||
relation->rd_isvalid = true;
|
||||
@ -1442,9 +1434,19 @@ RelationIdGetRelation(Oid relationId)
|
||||
if (RelationIsValid(rd))
|
||||
{
|
||||
RelationIncrementReferenceCount(rd);
|
||||
/* revalidate nailed index if necessary */
|
||||
/* revalidate cache entry if necessary */
|
||||
if (!rd->rd_isvalid)
|
||||
RelationReloadClassinfo(rd);
|
||||
{
|
||||
/*
|
||||
* Indexes only have a limited number of possible schema changes,
|
||||
* and we don't want to use the full-blown procedure because it's
|
||||
* a headache for indexes that reload itself depends on.
|
||||
*/
|
||||
if (rd->rd_rel->relkind == RELKIND_INDEX)
|
||||
RelationReloadClassinfo(rd);
|
||||
else
|
||||
RelationClearRelation(rd, true);
|
||||
}
|
||||
return rd;
|
||||
}
|
||||
|
||||
@ -1452,7 +1454,7 @@ RelationIdGetRelation(Oid relationId)
|
||||
* no reldesc in the cache, so have RelationBuildDesc() build one and add
|
||||
* it.
|
||||
*/
|
||||
rd = RelationBuildDesc(relationId, NULL);
|
||||
rd = RelationBuildDesc(relationId, true);
|
||||
if (RelationIsValid(rd))
|
||||
RelationIncrementReferenceCount(rd);
|
||||
return rd;
|
||||
@ -1582,6 +1584,49 @@ RelationReloadClassinfo(Relation relation)
|
||||
relation->rd_isvalid = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* RelationDestroyRelation
|
||||
*
|
||||
* Physically delete a relation cache entry and all subsidiary data.
|
||||
* Caller must already have unhooked the entry from the hash table.
|
||||
*/
|
||||
static void
|
||||
RelationDestroyRelation(Relation relation)
|
||||
{
|
||||
Assert(RelationHasReferenceCountZero(relation));
|
||||
|
||||
/*
|
||||
* Make sure smgr and lower levels close the relation's files, if they
|
||||
* weren't closed already. (This was probably done by caller, but let's
|
||||
* just be real sure.)
|
||||
*/
|
||||
RelationCloseSmgr(relation);
|
||||
|
||||
/*
|
||||
* Free all the subsidiary data structures of the relcache entry,
|
||||
* then the entry itself.
|
||||
*/
|
||||
if (relation->rd_rel)
|
||||
pfree(relation->rd_rel);
|
||||
/* can't use DecrTupleDescRefCount here */
|
||||
Assert(relation->rd_att->tdrefcount > 0);
|
||||
if (--relation->rd_att->tdrefcount == 0)
|
||||
FreeTupleDesc(relation->rd_att);
|
||||
list_free(relation->rd_indexlist);
|
||||
FreeTriggerDesc(relation->trigdesc);
|
||||
if (relation->rd_options)
|
||||
pfree(relation->rd_options);
|
||||
if (relation->rd_indextuple)
|
||||
pfree(relation->rd_indextuple);
|
||||
if (relation->rd_am)
|
||||
pfree(relation->rd_am);
|
||||
if (relation->rd_indexcxt)
|
||||
MemoryContextDelete(relation->rd_indexcxt);
|
||||
if (relation->rd_rulescxt)
|
||||
MemoryContextDelete(relation->rd_rulescxt);
|
||||
pfree(relation);
|
||||
}
|
||||
|
||||
/*
|
||||
* RelationClearRelation
|
||||
*
|
||||
@ -1598,7 +1643,6 @@ static void
|
||||
RelationClearRelation(Relation relation, bool rebuild)
|
||||
{
|
||||
Oid old_reltype = relation->rd_rel->reltype;
|
||||
MemoryContext oldcxt;
|
||||
|
||||
/*
|
||||
* Make sure smgr and lower levels close the relation's files, if they
|
||||
@ -1650,40 +1694,15 @@ RelationClearRelation(Relation relation, bool rebuild)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove relation from hash tables
|
||||
*
|
||||
* Note: we might be reinserting it momentarily, but we must not have it
|
||||
* visible in the hash tables until it's valid again, so don't try to
|
||||
* optimize this away...
|
||||
*/
|
||||
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
|
||||
RelationCacheDelete(relation);
|
||||
MemoryContextSwitchTo(oldcxt);
|
||||
/* Mark it invalid until we've finished rebuild */
|
||||
relation->rd_isvalid = false;
|
||||
|
||||
/* Clear out catcache's entries for this relation */
|
||||
/*
|
||||
* Clear out catcache's entries for this relation. This is a bit of
|
||||
* a hack, but it's a convenient place to do it.
|
||||
*/
|
||||
CatalogCacheFlushRelation(RelationGetRelid(relation));
|
||||
|
||||
/*
|
||||
* Free all the subsidiary data structures of the relcache entry. We
|
||||
* cannot free rd_att if we are trying to rebuild the entry, however,
|
||||
* because pointers to it may be cached in various places. The rule
|
||||
* manager might also have pointers into the rewrite rules. So to begin
|
||||
* with, we can only get rid of these fields:
|
||||
*/
|
||||
FreeTriggerDesc(relation->trigdesc);
|
||||
if (relation->rd_indextuple)
|
||||
pfree(relation->rd_indextuple);
|
||||
if (relation->rd_am)
|
||||
pfree(relation->rd_am);
|
||||
if (relation->rd_rel)
|
||||
pfree(relation->rd_rel);
|
||||
if (relation->rd_options)
|
||||
pfree(relation->rd_options);
|
||||
list_free(relation->rd_indexlist);
|
||||
if (relation->rd_indexcxt)
|
||||
MemoryContextDelete(relation->rd_indexcxt);
|
||||
|
||||
/*
|
||||
* If we're really done with the relcache entry, blow it away. But if
|
||||
* someone is still using it, reconstruct the whole deal without moving
|
||||
@ -1692,19 +1711,34 @@ RelationClearRelation(Relation relation, bool rebuild)
|
||||
*/
|
||||
if (!rebuild)
|
||||
{
|
||||
/* ok to zap remaining substructure */
|
||||
/* Flush any rowtype cache entry */
|
||||
flush_rowtype_cache(old_reltype);
|
||||
/* can't use DecrTupleDescRefCount here */
|
||||
Assert(relation->rd_att->tdrefcount > 0);
|
||||
if (--relation->rd_att->tdrefcount == 0)
|
||||
FreeTupleDesc(relation->rd_att);
|
||||
if (relation->rd_rulescxt)
|
||||
MemoryContextDelete(relation->rd_rulescxt);
|
||||
pfree(relation);
|
||||
|
||||
/* Remove it from the hash table */
|
||||
RelationCacheDelete(relation);
|
||||
|
||||
/* And release storage */
|
||||
RelationDestroyRelation(relation);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Our strategy for rebuilding an open relcache entry is to build
|
||||
* a new entry from scratch, swap its contents with the old entry,
|
||||
* and finally delete the new entry (along with any infrastructure
|
||||
* swapped over from the old entry). This is to avoid trouble in case
|
||||
* an error causes us to lose control partway through. The old entry
|
||||
* will still be marked !rd_isvalid, so we'll try to rebuild it again
|
||||
* on next access. Meanwhile it's not any less valid than it was
|
||||
* before, so any code that might expect to continue accessing it
|
||||
* isn't hurt by the rebuild failure. (Consider for example a
|
||||
* subtransaction that ALTERs a table and then gets cancelled partway
|
||||
* through the cache entry rebuild. The outer transaction should
|
||||
* still see the not-modified cache entry as valid.) The worst
|
||||
* consequence of an error is leaking the necessarily-unreferenced
|
||||
* new entry, and this shouldn't happen often enough for that to be
|
||||
* a big problem.
|
||||
*
|
||||
* When rebuilding an open relcache entry, must preserve ref count and
|
||||
* rd_createSubid state. Also attempt to preserve the tupledesc and
|
||||
* rewrite-rule substructures in place. (Note: the refcount mechanism
|
||||
@ -1716,54 +1750,78 @@ RelationClearRelation(Relation relation, bool rebuild)
|
||||
* is good because whatever ref counts the entry may have do not
|
||||
* necessarily belong to that resource owner.
|
||||
*/
|
||||
Relation newrel;
|
||||
Oid save_relid = RelationGetRelid(relation);
|
||||
int old_refcnt = relation->rd_refcnt;
|
||||
SubTransactionId old_createSubid = relation->rd_createSubid;
|
||||
TupleDesc old_att = relation->rd_att;
|
||||
RuleLock *old_rules = relation->rd_rules;
|
||||
MemoryContext old_rulescxt = relation->rd_rulescxt;
|
||||
bool keep_tupdesc;
|
||||
bool keep_rules;
|
||||
|
||||
if (RelationBuildDesc(save_relid, relation) != relation)
|
||||
/* Build temporary entry, but don't link it into hashtable */
|
||||
newrel = RelationBuildDesc(save_relid, false);
|
||||
if (newrel == NULL)
|
||||
{
|
||||
/* Should only get here if relation was deleted */
|
||||
flush_rowtype_cache(old_reltype);
|
||||
Assert(old_att->tdrefcount > 0);
|
||||
if (--old_att->tdrefcount == 0)
|
||||
FreeTupleDesc(old_att);
|
||||
if (old_rulescxt)
|
||||
MemoryContextDelete(old_rulescxt);
|
||||
pfree(relation);
|
||||
RelationCacheDelete(relation);
|
||||
RelationDestroyRelation(relation);
|
||||
elog(ERROR, "relation %u deleted while still in use", save_relid);
|
||||
}
|
||||
relation->rd_refcnt = old_refcnt;
|
||||
relation->rd_createSubid = old_createSubid;
|
||||
if (equalTupleDescs(old_att, relation->rd_att))
|
||||
{
|
||||
/* needn't flush typcache here */
|
||||
Assert(relation->rd_att->tdrefcount == 1);
|
||||
if (--relation->rd_att->tdrefcount == 0)
|
||||
FreeTupleDesc(relation->rd_att);
|
||||
relation->rd_att = old_att;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
|
||||
keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
|
||||
if (!keep_tupdesc)
|
||||
flush_rowtype_cache(old_reltype);
|
||||
Assert(old_att->tdrefcount > 0);
|
||||
if (--old_att->tdrefcount == 0)
|
||||
FreeTupleDesc(old_att);
|
||||
}
|
||||
if (equalRuleLocks(old_rules, relation->rd_rules))
|
||||
|
||||
/*
|
||||
* Perform swapping of the relcache entry contents. Within this
|
||||
* process the old entry is momentarily invalid, so there *must*
|
||||
* be no possibility of CHECK_FOR_INTERRUPTS within this sequence.
|
||||
* Do it in all-in-line code for safety.
|
||||
*
|
||||
* Since the vast majority of fields should be swapped, our method
|
||||
* is to swap the whole structures and then re-swap those few fields
|
||||
* we didn't want swapped.
|
||||
*/
|
||||
#define SWAPFIELD(fldtype, fldname) \
|
||||
do { \
|
||||
fldtype _tmp = newrel->fldname; \
|
||||
newrel->fldname = relation->fldname; \
|
||||
relation->fldname = _tmp; \
|
||||
} while (0)
|
||||
|
||||
/* swap all Relation struct fields */
|
||||
{
|
||||
if (relation->rd_rulescxt)
|
||||
MemoryContextDelete(relation->rd_rulescxt);
|
||||
relation->rd_rules = old_rules;
|
||||
relation->rd_rulescxt = old_rulescxt;
|
||||
RelationData tmpstruct;
|
||||
|
||||
memcpy(&tmpstruct, newrel, sizeof(RelationData));
|
||||
memcpy(newrel, relation, sizeof(RelationData));
|
||||
memcpy(relation, &tmpstruct, sizeof(RelationData));
|
||||
}
|
||||
else
|
||||
|
||||
/* rd_smgr must not be swapped, due to back-links from smgr level */
|
||||
SWAPFIELD(SMgrRelation, rd_smgr);
|
||||
/* rd_refcnt must be preserved */
|
||||
SWAPFIELD(int, rd_refcnt);
|
||||
/* isnailed shouldn't change */
|
||||
Assert(newrel->rd_isnailed == relation->rd_isnailed);
|
||||
/* creation sub-XIDs must be preserved */
|
||||
SWAPFIELD(SubTransactionId, rd_createSubid);
|
||||
/* un-swap rd_rel pointers, swap contents instead */
|
||||
SWAPFIELD(Form_pg_class, rd_rel);
|
||||
/* ... but actually, we don't have to update newrel->rd_rel */
|
||||
memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
|
||||
/* preserve old tupledesc and rules if no logical change */
|
||||
if (keep_tupdesc)
|
||||
SWAPFIELD(TupleDesc, rd_att);
|
||||
if (keep_rules)
|
||||
{
|
||||
if (old_rulescxt)
|
||||
MemoryContextDelete(old_rulescxt);
|
||||
SWAPFIELD(RuleLock *, rd_rules);
|
||||
SWAPFIELD(MemoryContext, rd_rulescxt);
|
||||
}
|
||||
|
||||
#undef SWAPFIELD
|
||||
|
||||
/* And now we can throw away the temporary entry */
|
||||
RelationDestroyRelation(newrel);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2398,7 +2456,7 @@ RelationCacheInitializePhase2(void)
|
||||
#define LOAD_CRIT_INDEX(indexoid) \
|
||||
do { \
|
||||
LockRelationOid(indexoid, AccessShareLock); \
|
||||
ird = RelationBuildDesc(indexoid, NULL); \
|
||||
ird = RelationBuildDesc(indexoid, true); \
|
||||
if (ird == NULL) \
|
||||
elog(PANIC, "could not open critical system index %u", \
|
||||
indexoid); \
|
||||
@ -2985,7 +3043,7 @@ RelationGetIndexExpressions(Relation relation)
|
||||
fix_opfuncids((Node *) result);
|
||||
|
||||
/* Now save a copy of the completed tree in the relcache entry. */
|
||||
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
|
||||
oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
|
||||
relation->rd_indexprs = (List *) copyObject(result);
|
||||
MemoryContextSwitchTo(oldcxt);
|
||||
|
||||
@ -3060,7 +3118,7 @@ RelationGetIndexPredicate(Relation relation)
|
||||
fix_opfuncids((Node *) result);
|
||||
|
||||
/* Now save a copy of the completed tree in the relcache entry. */
|
||||
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
|
||||
oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
|
||||
relation->rd_indpred = (List *) copyObject(result);
|
||||
MemoryContextSwitchTo(oldcxt);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user