Fix RelationCacheInitializePhase2 (Phase3, in HEAD) to cope with the

possibility of shared-inval messages causing a relcache flush while it tries to fill in missing data in preloaded relcache entries. There are actually two distinct failure modes here: 1. The flush could delete the next-to-be-processed cache entry, causing the subsequent hash_seq_search calls to go off into the weeds. This is the problem reported by Michael Brown, and I believe it also accounts for bug #5074. The simplest fix is to restart the hashtable scan after we've read any new data from the catalogs. It appears that pre-8.4 branches have not suffered from this failure, because by chance there were no other catalogs sharing the same hash chains with the catalogs that RelationCacheInitializePhase2 had work to do for. However that's obviously pretty fragile, and it seems possible that derivative versions with additional system catalogs might be vulnerable, so I'm back-patching this part of the fix anyway. 2. The flush could delete the *current* cache entry, in which case the pointer to the newly-loaded data would end up being stored into an already-deleted Relation struct. As long as it was still deleted, the only consequence would be some leaked space in CacheMemoryContext. But it seems possible that the Relation struct could already have been recycled, in which case this represents a hard-to-reproduce clobber of cached data structures, with unforeseeable consequences. The fix here is to pin the entry while we work on it. In passing, also change RelationCacheInitializePhase2 to Assert that formrdesc() set up the relation's cached TupleDesc (rd_att) with the correct type OID and hasoids values. This is more appropriate than silently updating the values, because the original tupdesc might already have been copied into the catcache. However this part of the patch is not in HEAD because it fails due to some questionable recent changes in formrdesc :-(. That will be cleaned up in a subsequent patch.
2025-01-12 18:34:36 +08:00 · 2009-09-26 18:25:12 +00:00 · 2009-09-26 18:25:12 +00:00 · b3eb458a61
commit b3eb458a61
parent f0f4a67029
1 changed files with 61 additions and 11 deletions
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.250.2.2 2008/04/16 18:23:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.250.2.3 2009/09/26 18:25:12 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -128,8 +128,7 @@ do { \
 	RelIdCacheEnt *idhentry; bool found; \
 	idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
 										   (void *) &(RELATION->rd_id), \
-										   HASH_ENTER, \
-										   &found); \
+										   HASH_ENTER, &found); \
 	/* used to give notice if found -- now just keep quiet */ \
 	idhentry->reldesc = RELATION; \
 } while(0)
@ -138,7 +137,8 @@ do { \
 do { \
 	RelIdCacheEnt *hentry; \
 	hentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
-										 (void *) &(ID), HASH_FIND,NULL); \
+										 (void *) &(ID), \
+										 HASH_FIND, NULL); \
 	if (hentry) \
 		RELATION = hentry->reldesc; \
 	else \
@ -1306,7 +1306,9 @@ formrdesc(const char *relationName, Oid relationReltype,
 	 *
 	 * The data we insert here is pretty incomplete/bogus, but it'll serve to
 	 * get us launched.  RelationCacheInitializePhase2() will read the real
-	 * data from pg_class and replace what we've done here.
+	 * data from pg_class and replace what we've done here.  Note in particular
+	 * that relowner is left as zero; this cues RelationCacheInitializePhase2
+	 * that the real data isn't there yet.
 	 */
 	relation->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);

@ -2426,17 +2428,31 @@ RelationCacheInitializePhase2(void)
 	 * rows and replace the fake entries with them. Also, if any of the
 	 * relcache entries have rules or triggers, load that info the hard way
 	 * since it isn't recorded in the cache file.
+	 *
+	 * Whenever we access the catalogs to read data, there is a possibility
+	 * of a shared-inval cache flush causing relcache entries to be removed.
+	 * Since hash_seq_search only guarantees to still work after the *current*
+	 * entry is removed, it's unsafe to continue the hashtable scan afterward.
+	 * We handle this by restarting the scan from scratch after each access.
+	 * This is theoretically O(N^2), but the number of entries that actually
+	 * need to be fixed is small enough that it doesn't matter.
 	 */
 	hash_seq_init(&status, RelationIdCache);

 	while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
 	{
 		Relation	relation = idhentry->reldesc;
+		bool		restart = false;
+
+		/*
+		 * Make sure *this* entry doesn't get flushed while we work with it.
+		 */
+		RelationIncrementReferenceCount(relation);

 		/*
 		 * If it's a faked-up entry, read the real pg_class tuple.
 		 */
-		if (needNewCacheFile && relation->rd_isnailed)
+		if (relation->rd_rel->relowner == InvalidOid)
 		{
 			HeapTuple	htup;
 			Form_pg_class relp;
@ -2453,7 +2469,6 @@ RelationCacheInitializePhase2(void)
 			 * Copy tuple to relation->rd_rel. (See notes in
 			 * AllocateRelationDesc())
 			 */
-			Assert(relation->rd_rel != NULL);
 			memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);

 			/* Update rd_options while we have the tuple */
@ -2462,22 +2477,57 @@ RelationCacheInitializePhase2(void)
 			RelationParseRelOptions(relation, htup);

 			/*
-			 * Also update the derived fields in rd_att.
+			 * Check the values in rd_att were set up correctly.  (We cannot
+			 * just copy them over now: formrdesc must have set up the
+			 * rd_att data correctly to start with, because it may already
+			 * have been copied into one or more catcache entries.)
 			 */
-			relation->rd_att->tdtypeid = relp->reltype;
-			relation->rd_att->tdtypmod = -1;	/* unnecessary, but... */
-			relation->rd_att->tdhasoid = relp->relhasoids;
+			Assert(relation->rd_att->tdtypeid == relp->reltype);
+			Assert(relation->rd_att->tdtypmod == -1);
+			Assert(relation->rd_att->tdhasoid == relp->relhasoids);

 			ReleaseSysCache(htup);
+
+			/* relowner had better be OK now, else we'll loop forever */
+			if (relation->rd_rel->relowner == InvalidOid)
+				elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
+					 RelationGetRelationName(relation));
+
+			restart = true;
 		}

 		/*
 		 * Fix data that isn't saved in relcache cache file.
+		 *
+		 * relhasrules or reltriggers could possibly be wrong or out of
+		 * date.  If we don't actually find any rules or triggers, clear the
+		 * local copy of the flag so that we don't get into an infinite loop
+		 * here.  We don't make any attempt to fix the pg_class entry, though.
 		 */
 		if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
+		{
 			RelationBuildRuleLock(relation);
+			if (relation->rd_rules == NULL)
+				relation->rd_rel->relhasrules = false;
+			restart = true;
+		}
 		if (relation->rd_rel->reltriggers > 0 && relation->trigdesc == NULL)
+		{
 			RelationBuildTriggers(relation);
+			if (relation->trigdesc == NULL)
+				relation->rd_rel->reltriggers = 0;
+			restart = true;
+		}
+
+		/* Release hold on the relation */
+		RelationDecrementReferenceCount(relation);
+
+		/* Now, restart the hashtable scan if needed */
+		if (restart)
+		{
+			hash_seq_term(&status);
+			hash_seq_init(&status, RelationIdCache);
+		}
 	}

 	/*