diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 24426f391f..dd6218cbfe 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.96 2010/01/02 16:57:35 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.97 2010/02/04 00:09:13 tgl Exp $ * * * INTERFACE ROUTINES @@ -1185,10 +1185,25 @@ toast_save_datum(Relation rel, Datum value, int options) toast_pointer.va_extsize = data_todo; } + /* + * Insert the correct table OID into the result TOAST pointer. + * + * Normally this is the actual OID of the target toast table, but during + * table-rewriting operations such as CLUSTER, we have to insert the OID + * of the table's real permanent toast table instead. rd_toastoid is + * set if we have to substitute such an OID. + */ + if (OidIsValid(rel->rd_toastoid)) + toast_pointer.va_toastrelid = rel->rd_toastoid; + else + toast_pointer.va_toastrelid = RelationGetRelid(toastrel); + + /* + * Choose an unused OID within the toast table for this toast value. + */ toast_pointer.va_valueid = GetNewOidWithIndex(toastrel, RelationGetRelid(toastidx), (AttrNumber) 1); - toast_pointer.va_toastrelid = rel->rd_rel->reltoastrelid; /* * Initialize constant parts of the tuple data diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 2d2ac3e894..cf2ac19d53 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.196 2010/02/02 19:12:29 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.197 2010/02/04 00:09:14 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -63,8 +63,9 @@ typedef struct static void rebuild_relation(Relation OldHeap, Oid indexOid, int freeze_min_age, int freeze_table_age); -static TransactionId copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, - Oid OIDOldIndex, int freeze_min_age, int freeze_table_age); +static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, + int freeze_min_age, int freeze_table_age, + bool *pSwapToastByContent, TransactionId *pFreezeXid); static List *get_tables_to_cluster(MemoryContext cluster_context); @@ -584,10 +585,8 @@ rebuild_relation(Relation OldHeap, Oid indexOid, Oid tableOid = RelationGetRelid(OldHeap); Oid tableSpace = OldHeap->rd_rel->reltablespace; Oid OIDNewHeap; - char NewHeapName[NAMEDATALEN]; + bool swap_toast_by_content; TransactionId frozenXid; - ObjectAddress object; - Relation newrel; /* Mark the correct index as clustered */ if (OidIsValid(indexOid)) @@ -596,98 +595,39 @@ rebuild_relation(Relation OldHeap, Oid indexOid, /* Close relcache entry, but keep lock until transaction commit */ heap_close(OldHeap, NoLock); - /* - * Create the new heap, using a temporary name in the same namespace as - * the existing table. NOTE: there is some risk of collision with user - * relnames. Working around this seems more trouble than it's worth; in - * particular, we can't create the new heap in a different namespace from - * the old, or we will have problems with the TEMP status of temp tables. - */ - snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", tableOid); + /* Create the transient table that will receive the re-ordered data */ + OIDNewHeap = make_new_heap(tableOid, tableSpace); - OIDNewHeap = make_new_heap(tableOid, NewHeapName, tableSpace); + /* Copy the heap data into the new table in the desired order */ + copy_heap_data(OIDNewHeap, tableOid, indexOid, + freeze_min_age, freeze_table_age, + &swap_toast_by_content, &frozenXid); - /* - * We don't need CommandCounterIncrement() because make_new_heap did it. - */ + /* Swap the physical files of the old and new heaps */ + swap_relation_files(tableOid, OIDNewHeap, + swap_toast_by_content, frozenXid); - /* - * Copy the heap data into the new table in the desired order. - */ - frozenXid = copy_heap_data(OIDNewHeap, tableOid, indexOid, - freeze_min_age, freeze_table_age); - - /* To make the new heap's data visible (probably not needed?). */ - CommandCounterIncrement(); - - /* Swap the physical files of the old and new heaps. */ - swap_relation_files(tableOid, OIDNewHeap, frozenXid); - - CommandCounterIncrement(); - - /* Destroy new heap with old filenode */ - object.classId = RelationRelationId; - object.objectId = OIDNewHeap; - object.objectSubId = 0; - - /* - * The new relation is local to our transaction and we know nothing - * depends on it, so DROP_RESTRICT should be OK. - */ - performDeletion(&object, DROP_RESTRICT); - - /* performDeletion does CommandCounterIncrement at end */ - - /* - * Rebuild each index on the relation (but not the toast table, which is - * all-new at this point). We do not need CommandCounterIncrement() - * because reindex_relation does it. - */ - reindex_relation(tableOid, false); - - /* - * At this point, everything is kosher except that the toast table's name - * corresponds to the temporary table. The name is irrelevant to the - * backend because it's referenced by OID, but users looking at the - * catalogs could be confused. Rename it to prevent this problem. - * - * Note no lock required on the relation, because we already hold an - * exclusive lock on it. - */ - newrel = heap_open(tableOid, NoLock); - if (OidIsValid(newrel->rd_rel->reltoastrelid)) - { - Relation toastrel; - Oid toastidx; - Oid toastnamespace; - char NewToastName[NAMEDATALEN]; - - toastrel = relation_open(newrel->rd_rel->reltoastrelid, AccessShareLock); - toastidx = toastrel->rd_rel->reltoastidxid; - toastnamespace = toastrel->rd_rel->relnamespace; - relation_close(toastrel, AccessShareLock); - - /* rename the toast table ... */ - snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u", tableOid); - RenameRelationInternal(newrel->rd_rel->reltoastrelid, NewToastName, - toastnamespace); - - /* ... and its index too */ - snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index", tableOid); - RenameRelationInternal(toastidx, NewToastName, - toastnamespace); - } - relation_close(newrel, NoLock); + /* Destroy the new heap, removing the old data along with it */ + cleanup_heap_swap(tableOid, OIDNewHeap, swap_toast_by_content); } + /* - * Create the new table that we will fill with correctly-ordered data. + * Create the transient table that will be filled with new data during + * CLUSTER, ALTER TABLE, and similar operations. The transient table + * duplicates the logical structure of the OldHeap, but is placed in + * NewTableSpace which might be different from OldHeap's. + * + * After this, the caller should load the new heap with transferred/modified + * data, then call swap_relation_files, and finally call cleanup_heap_swap to + * remove the debris. */ Oid -make_new_heap(Oid OIDOldHeap, const char *NewName, Oid NewTableSpace) +make_new_heap(Oid OIDOldHeap, Oid NewTableSpace) { TupleDesc OldHeapDesc, tupdesc; + char NewHeapName[NAMEDATALEN]; Oid OIDNewHeap; Oid toastid; Relation OldHeap; @@ -708,7 +648,7 @@ make_new_heap(Oid OIDOldHeap, const char *NewName, Oid NewTableSpace) tupdesc = CreateTupleDescCopy(OldHeapDesc); /* - * Use options of the old heap for new heap. + * But we do want to use reloptions of the old heap for new heap. */ tuple = SearchSysCache(RELOID, ObjectIdGetDatum(OIDOldHeap), @@ -720,7 +660,16 @@ make_new_heap(Oid OIDOldHeap, const char *NewName, Oid NewTableSpace) if (isNull) reloptions = (Datum) 0; - OIDNewHeap = heap_create_with_catalog(NewName, + /* + * Create the new heap, using a temporary name in the same namespace as + * the existing table. NOTE: there is some risk of collision with user + * relnames. Working around this seems more trouble than it's worth; in + * particular, we can't create the new heap in a different namespace from + * the old, or we will have problems with the TEMP status of temp tables. + */ + snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap); + + OIDNewHeap = heap_create_with_catalog(NewHeapName, RelationGetNamespace(OldHeap), NewTableSpace, InvalidOid, @@ -776,12 +725,16 @@ make_new_heap(Oid OIDOldHeap, const char *NewName, Oid NewTableSpace) } /* - * Do the physical copying of heap data. Returns the TransactionId used as - * freeze cutoff point for the tuples. + * Do the physical copying of heap data. + * + * There are two output parameters: + * *pSwapToastByContent is set true if toast tables must be swapped by content. + * *pFreezeXid receives the TransactionId used as freeze cutoff point. */ -static TransactionId +static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, - int freeze_min_age, int freeze_table_age) + int freeze_min_age, int freeze_table_age, + bool *pSwapToastByContent, TransactionId *pFreezeXid) { Relation NewHeap, OldHeap, @@ -842,13 +795,41 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, /* use_wal off requires rd_targblock be initially invalid */ Assert(NewHeap->rd_targblock == InvalidBlockNumber); + /* + * If both tables have TOAST tables, perform toast swap by content. It is + * possible that the old table has a toast table but the new one doesn't, + * if toastable columns have been dropped. In that case we have to do + * swap by links. This is okay because swap by content is only essential + * for system catalogs, and we don't support schema changes for them. + */ + if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid) + { + *pSwapToastByContent = true; + + /* + * When doing swap by content, any toast pointers written into NewHeap + * must use the old toast table's OID, because that's where the toast + * data will eventually be found. Set this up by setting rd_toastoid. + * Note that we must hold NewHeap open until we are done writing data, + * since the relcache will not guarantee to remember this setting once + * the relation is closed. Also, this technique depends on the fact + * that no one will try to read from the NewHeap until after we've + * finished writing it and swapping the rels --- otherwise they could + * follow the toast pointers to the wrong place. + */ + NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid; + } + else + *pSwapToastByContent = false; + /* * compute xids used to freeze and weed out dead tuples. We use -1 * freeze_min_age to avoid having CLUSTER freeze tuples earlier than a * plain VACUUM would. */ vacuum_set_xid_limits(freeze_min_age, freeze_table_age, - OldHeap->rd_rel->relisshared, &OldestXmin, &FreezeXid, NULL); + OldHeap->rd_rel->relisshared, + &OldestXmin, &FreezeXid, NULL); /* * FreezeXid will become the table's new relfrozenxid, and that mustn't go @@ -857,20 +838,23 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, if (TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid)) FreezeXid = OldHeap->rd_rel->relfrozenxid; + /* return selected value to caller */ + *pFreezeXid = FreezeXid; + /* Initialize the rewrite operation */ rwstate = begin_heap_rewrite(NewHeap, OldestXmin, FreezeXid, use_wal); /* - * Scan through the OldHeap in OldIndex order and copy each tuple into the - * NewHeap. To ensure we see recently-dead tuples that still need to be - * copied, we scan with SnapshotAny and use HeapTupleSatisfiesVacuum for - * the visibility test. + * Scan through the OldHeap, either in OldIndex order or sequentially, + * and copy each tuple into the NewHeap. To ensure we see recently-dead + * tuples that still need to be copied, we scan with SnapshotAny and use + * HeapTupleSatisfiesVacuum for the visibility test. */ if (OldIndex != NULL) { heapScan = NULL; indexScan = index_beginscan(OldHeap, OldIndex, - SnapshotAny, 0, (ScanKey) NULL); + SnapshotAny, 0, (ScanKey) NULL); } else { @@ -1005,6 +989,10 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, /* Write out any remaining tuples, and fsync if needed */ end_heap_rewrite(rwstate); + /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */ + NewHeap->rd_toastoid = InvalidOid; + + /* Clean up */ pfree(values); pfree(isnull); @@ -1012,8 +1000,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, index_close(OldIndex, NoLock); heap_close(OldHeap, NoLock); heap_close(NewHeap, NoLock); - - return FreezeXid; } /* @@ -1022,18 +1008,23 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, * We swap the physical identity (reltablespace and relfilenode) while * keeping the same logical identities of the two relations. * - * Also swap any TOAST links, so that the toast data moves along with - * the main-table data. + * We can swap associated TOAST data in either of two ways: recursively swap + * the physical content of the toast tables (and their indexes), or swap the + * TOAST links in the given relations' pg_class entries. The former is needed + * to manage rewrites of shared catalogs (where we cannot change the pg_class + * links) while the latter is the only way to handle cases in which a toast + * table is added or removed altogether. * * Additionally, the first relation is marked with relfrozenxid set to * frozenXid. It seems a bit ugly to have this here, but all callers would - * have to do it anyway, so having it here saves a heap_update. Note: the - * TOAST table needs no special handling, because since we swapped the links, - * the entry for the TOAST table will now contain RecentXmin in relfrozenxid, - * which is the correct value. + * have to do it anyway, so having it here saves a heap_update. Note: in + * the swap-toast-links case, we assume we don't need to change the toast + * table's relfrozenxid: the new version of the toast table should already + * have relfrozenxid set to RecentXmin, which is good enough. */ void -swap_relation_files(Oid r1, Oid r2, TransactionId frozenXid) +swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content, + TransactionId frozenXid) { Relation relRelation; HeapTuple reltup1, @@ -1071,15 +1062,26 @@ swap_relation_files(Oid r1, Oid r2, TransactionId frozenXid) relform1->reltablespace = relform2->reltablespace; relform2->reltablespace = swaptemp; - swaptemp = relform1->reltoastrelid; - relform1->reltoastrelid = relform2->reltoastrelid; - relform2->reltoastrelid = swaptemp; + if (!swap_toast_by_content) + { + swaptemp = relform1->reltoastrelid; + relform1->reltoastrelid = relform2->reltoastrelid; + relform2->reltoastrelid = swaptemp; - /* we should not swap reltoastidxid */ + /* we should not swap reltoastidxid */ + } + + /* + * In the case of a shared catalog, these next few steps only affect our + * own database's pg_class row; but that's okay. + */ /* set rel1's frozen Xid */ - Assert(TransactionIdIsNormal(frozenXid)); - relform1->relfrozenxid = frozenXid; + if (relform1->relkind != RELKIND_INDEX) + { + Assert(TransactionIdIsNormal(frozenXid)); + relform1->relfrozenxid = frozenXid; + } /* swap size statistics too, since new rel has freshly-updated stats */ { @@ -1107,63 +1109,96 @@ swap_relation_files(Oid r1, Oid r2, TransactionId frozenXid) /* * If we have toast tables associated with the relations being swapped, - * change their dependency links to re-associate them with their new - * owning relations. Otherwise the wrong one will get dropped ... - * - * NOTE: it is possible that only one table has a toast table; this can - * happen in CLUSTER if there were dropped columns in the old table, and - * in ALTER TABLE when adding or changing type of columns. - * - * NOTE: at present, a TOAST table's only dependency is the one on its - * owning table. If more are ever created, we'd need to use something - * more selective than deleteDependencyRecordsFor() to get rid of only the - * link we want. + * deal with them too. */ if (relform1->reltoastrelid || relform2->reltoastrelid) { - ObjectAddress baseobject, - toastobject; - long count; - - /* Delete old dependencies */ - if (relform1->reltoastrelid) + if (swap_toast_by_content) { - count = deleteDependencyRecordsFor(RelationRelationId, - relform1->reltoastrelid); - if (count != 1) - elog(ERROR, "expected one dependency record for TOAST table, found %ld", - count); + if (relform1->reltoastrelid && relform2->reltoastrelid) + { + /* Recursively swap the contents of the toast tables */ + swap_relation_files(relform1->reltoastrelid, + relform2->reltoastrelid, + true, + frozenXid); + } + else + { + /* caller messed up */ + elog(ERROR, "cannot swap toast files by content when there's only one"); + } } - if (relform2->reltoastrelid) + else { - count = deleteDependencyRecordsFor(RelationRelationId, - relform2->reltoastrelid); - if (count != 1) - elog(ERROR, "expected one dependency record for TOAST table, found %ld", - count); - } + /* + * We swapped the ownership links, so we need to change dependency + * data to match. + * + * NOTE: it is possible that only one table has a toast table. + * + * NOTE: at present, a TOAST table's only dependency is the one on + * its owning table. If more are ever created, we'd need to use + * something more selective than deleteDependencyRecordsFor() to + * get rid of just the link we want. + */ + ObjectAddress baseobject, + toastobject; + long count; - /* Register new dependencies */ - baseobject.classId = RelationRelationId; - baseobject.objectSubId = 0; - toastobject.classId = RelationRelationId; - toastobject.objectSubId = 0; + /* Delete old dependencies */ + if (relform1->reltoastrelid) + { + count = deleteDependencyRecordsFor(RelationRelationId, + relform1->reltoastrelid); + if (count != 1) + elog(ERROR, "expected one dependency record for TOAST table, found %ld", + count); + } + if (relform2->reltoastrelid) + { + count = deleteDependencyRecordsFor(RelationRelationId, + relform2->reltoastrelid); + if (count != 1) + elog(ERROR, "expected one dependency record for TOAST table, found %ld", + count); + } - if (relform1->reltoastrelid) - { - baseobject.objectId = r1; - toastobject.objectId = relform1->reltoastrelid; - recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL); - } + /* Register new dependencies */ + baseobject.classId = RelationRelationId; + baseobject.objectSubId = 0; + toastobject.classId = RelationRelationId; + toastobject.objectSubId = 0; - if (relform2->reltoastrelid) - { - baseobject.objectId = r2; - toastobject.objectId = relform2->reltoastrelid; - recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL); + if (relform1->reltoastrelid) + { + baseobject.objectId = r1; + toastobject.objectId = relform1->reltoastrelid; + recordDependencyOn(&toastobject, &baseobject, + DEPENDENCY_INTERNAL); + } + + if (relform2->reltoastrelid) + { + baseobject.objectId = r2; + toastobject.objectId = relform2->reltoastrelid; + recordDependencyOn(&toastobject, &baseobject, + DEPENDENCY_INTERNAL); + } } } + /* + * If we're swapping two toast tables by content, do the same for their + * indexes. + */ + if (swap_toast_by_content && + relform1->reltoastidxid && relform2->reltoastidxid) + swap_relation_files(relform1->reltoastidxid, + relform2->reltoastidxid, + true, + InvalidTransactionId); + /* * Blow away the old relcache entries now. We need this kluge because * relcache.c keeps a link to the smgr relation for the physical file, and @@ -1187,6 +1222,85 @@ swap_relation_files(Oid r1, Oid r2, TransactionId frozenXid) heap_close(relRelation, RowExclusiveLock); } +/* + * Remove the transient table that was built by make_new_heap, and finish + * cleaning up (including rebuilding all indexes on the old heap). + */ +void +cleanup_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, bool swap_toast_by_content) +{ + ObjectAddress object; + + /* Make swap_relation_files' changes visible in the catalogs. */ + CommandCounterIncrement(); + + /* Destroy new heap with old filenode */ + object.classId = RelationRelationId; + object.objectId = OIDNewHeap; + object.objectSubId = 0; + + /* + * The new relation is local to our transaction and we know nothing + * depends on it, so DROP_RESTRICT should be OK. + */ + performDeletion(&object, DROP_RESTRICT); + + /* performDeletion does CommandCounterIncrement at end */ + + /* + * Rebuild each index on the relation (but not the toast table, which is + * all-new at this point). We do not need CommandCounterIncrement() + * because reindex_relation does it. + */ + reindex_relation(OIDOldHeap, false); + + /* + * At this point, everything is kosher except that, if we did toast swap + * by links, the toast table's name corresponds to the transient table. + * The name is irrelevant to the backend because it's referenced by OID, + * but users looking at the catalogs could be confused. Rename it to + * prevent this problem. + * + * Note no lock required on the relation, because we already hold an + * exclusive lock on it. + */ + if (!swap_toast_by_content) + { + Relation newrel; + + newrel = heap_open(OIDOldHeap, NoLock); + if (OidIsValid(newrel->rd_rel->reltoastrelid)) + { + Relation toastrel; + Oid toastidx; + Oid toastnamespace; + char NewToastName[NAMEDATALEN]; + + toastrel = relation_open(newrel->rd_rel->reltoastrelid, + AccessShareLock); + toastidx = toastrel->rd_rel->reltoastidxid; + toastnamespace = toastrel->rd_rel->relnamespace; + relation_close(toastrel, AccessShareLock); + + /* rename the toast table ... */ + snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u", + OIDOldHeap); + RenameRelationInternal(newrel->rd_rel->reltoastrelid, + NewToastName, + toastnamespace); + + /* ... and its index too */ + snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index", + OIDOldHeap); + RenameRelationInternal(toastidx, + NewToastName, + toastnamespace); + } + relation_close(newrel, NoLock); + } +} + + /* * Get a list of tables that the current user owns and * have indisclustered set. Return the list in a List * of rvsToCluster diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index a729adb055..dba5f29d66 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.323 2010/02/03 10:01:29 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.324 2010/02/04 00:09:14 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2866,11 +2866,9 @@ ATRewriteTables(List **wqueue) if (tab->newvals != NIL || tab->new_changeoids) { /* Build a temporary relation and copy data */ - Oid OIDNewHeap; - char NewHeapName[NAMEDATALEN]; - Oid NewTableSpace; Relation OldHeap; - ObjectAddress object; + Oid OIDNewHeap; + Oid NewTableSpace; OldHeap = heap_open(tab->relid, NoLock); @@ -2905,18 +2903,8 @@ ATRewriteTables(List **wqueue) heap_close(OldHeap, NoLock); - /* - * Create the new heap, using a temporary name in the same - * namespace as the existing table. NOTE: there is some risk of - * collision with user relnames. Working around this seems more - * trouble than it's worth; in particular, we can't create the new - * heap in a different namespace from the old, or we will have - * problems with the TEMP status of temp tables. - */ - snprintf(NewHeapName, sizeof(NewHeapName), - "pg_temp_%u", tab->relid); - - OIDNewHeap = make_new_heap(tab->relid, NewHeapName, NewTableSpace); + /* Create transient table that will receive the modified data */ + OIDNewHeap = make_new_heap(tab->relid, NewTableSpace); /* * Copy the heap data into the new table with the desired @@ -2929,30 +2917,14 @@ ATRewriteTables(List **wqueue) * Swap the physical files of the old and new heaps. Since we are * generating a new heap, we can use RecentXmin for the table's * new relfrozenxid because we rewrote all the tuples on - * ATRewriteTable, so no older Xid remains on the table. + * ATRewriteTable, so no older Xid remains in the table. Also, + * we never try to swap toast tables by content, since we have + * no interest in letting this code work on system catalogs. */ - swap_relation_files(tab->relid, OIDNewHeap, RecentXmin); + swap_relation_files(tab->relid, OIDNewHeap, false, RecentXmin); - CommandCounterIncrement(); - - /* Destroy new heap with old filenode */ - object.classId = RelationRelationId; - object.objectId = OIDNewHeap; - object.objectSubId = 0; - - /* - * The new relation is local to our transaction and we know - * nothing depends on it, so DROP_RESTRICT should be OK. - */ - performDeletion(&object, DROP_RESTRICT); - /* performDeletion does CommandCounterIncrement at end */ - - /* - * Rebuild each index on the relation (but not the toast table, - * which is all-new anyway). We do not need - * CommandCounterIncrement() because reindex_relation does it. - */ - reindex_relation(tab->relid, false); + /* Destroy the new heap, removing the old data along with it. */ + cleanup_heap_swap(tab->relid, OIDNewHeap, false); } else { diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index ba09331aba..e71416c0f7 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.301 2010/02/03 01:14:17 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.302 2010/02/04 00:09:14 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1925,13 +1925,13 @@ RelationClearRelation(Relation relation, bool rebuild) * new entry, and this shouldn't happen often enough for that to be * a big problem. * - * When rebuilding an open relcache entry, we must preserve ref count - * and rd_createSubid/rd_newRelfilenodeSubid state. Also attempt to - * preserve the pg_class entry (rd_rel), tupledesc, and rewrite-rule - * substructures in place, because various places assume that these - * structures won't move while they are working with an open relcache - * entry. (Note: the refcount mechanism for tupledescs might someday - * allow us to remove this hack for the tupledesc.) + * When rebuilding an open relcache entry, we must preserve ref count, + * rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also + * attempt to preserve the pg_class entry (rd_rel), tupledesc, and + * rewrite-rule substructures in place, because various places assume + * that these structures won't move while they are working with an + * open relcache entry. (Note: the refcount mechanism for tupledescs + * might someday allow us to remove this hack for the tupledesc.) * * Note that this process does not touch CurrentResourceOwner; which * is good because whatever ref counts the entry may have do not @@ -2005,6 +2005,8 @@ RelationClearRelation(Relation relation, bool rebuild) SWAPFIELD(RuleLock *, rd_rules); SWAPFIELD(MemoryContext, rd_rulescxt); } + /* toast OID override must be preserved */ + SWAPFIELD(Oid, rd_toastoid); /* pgstat_info must be preserved */ SWAPFIELD(struct PgStat_TableStatus *, pgstat_info); diff --git a/src/include/commands/cluster.h b/src/include/commands/cluster.h index c58013c640..f535781436 100644 --- a/src/include/commands/cluster.h +++ b/src/include/commands/cluster.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994-5, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/commands/cluster.h,v 1.38 2010/01/06 05:31:14 itagaki Exp $ + * $PostgreSQL: pgsql/src/include/commands/cluster.h,v 1.39 2010/02/04 00:09:14 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,8 +23,11 @@ extern void cluster_rel(Oid tableOid, Oid indexOid, bool recheck, extern void check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck); extern void mark_index_clustered(Relation rel, Oid indexOid); -extern Oid make_new_heap(Oid OIDOldHeap, const char *NewName, - Oid NewTableSpace); -extern void swap_relation_files(Oid r1, Oid r2, TransactionId frozenXid); + +extern Oid make_new_heap(Oid OIDOldHeap, Oid NewTableSpace); +extern void swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content, + TransactionId frozenXid); +extern void cleanup_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, + bool swap_toast_by_content); #endif /* CLUSTER_H */ diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 344f37a687..3f5795d0ea 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.120 2010/01/17 22:56:23 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.121 2010/02/04 00:09:14 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -202,6 +202,16 @@ typedef struct RelationData uint16 *rd_exclstrats; /* exclusion ops' strategy numbers, if any */ void *rd_amcache; /* available for use by index AM */ + /* + * Hack for CLUSTER, rewriting ALTER TABLE, etc: when writing a new + * version of a table, we need to make any toast pointers inserted into it + * have the existing toast table's OID, not the OID of the transient toast + * table. If rd_toastoid isn't InvalidOid, it is the OID to place in + * toast pointers inserted into this rel. (Note it's set on the new + * version of the main heap, not the toast table itself.) + */ + Oid rd_toastoid; /* Real TOAST table's OID, or InvalidOid */ + /* * sizes of the free space and visibility map forks, or InvalidBlockNumber * if not known yet