Fix TOAST access failure in RETURNING queries.

Discussion of commit 3e2f3c2e4 exposed a problem that is of longer
standing: since we don't detoast data while sticking it into a portal's
holdStore for PORTAL_ONE_RETURNING and PORTAL_UTIL_SELECT queries, and we
release the query's snapshot as soon as we're done loading the holdStore,
later readout of the holdStore can do TOAST fetches against data that can
no longer be seen by any of the session's live snapshots.  This means that
a concurrent VACUUM could remove the TOAST data before we can fetch it.
Commit 3e2f3c2e4 exposed the problem by showing that sometimes we had *no*
live snapshots while fetching TOAST data, but we'd be at risk anyway.

I believe this code was all right when written, because our management of a
session's exposed xmin was such that the TOAST references were safe until
end of transaction.  But that's no longer true now that we can advance or
clear our PGXACT.xmin intra-transaction.

To fix, copy the query's snapshot during FillPortalStore() and save it in
the Portal; release it only when the portal is dropped.  This essentially
implements a policy that we must hold a relevant snapshot whenever we
access potentially-toasted data.  We had already come to that conclusion
in other places, cf commits 08e261cbc9 and ec543db77b.

I'd have liked to add a regression test case for this, but I didn't see
a way to make one that's not unreasonably bloated; it seems to require
returning a toasted value to the client, and those will be big.

In passing, improve PortalRunUtility() so that it positively verifies
that its ending PopActiveSnapshot() call will pop the expected snapshot,
removing a rather shaky assumption about which utility commands might
do their own PopActiveSnapshot().  There's no known bug here, but now
that we're actively referencing the snapshot it's almost free to make
this code a bit more bulletproof.

We might want to consider back-patching something like this into older
branches, but it would be prudent to let it prove itself more in HEAD
beforehand.

Discussion: <87vazemeda.fsf@credativ.de>
This commit is contained in:
Tom Lane 2016-08-07 17:46:08 -04:00
parent 07a601eeda
commit 9ee1cf04ab
4 changed files with 80 additions and 22 deletions

View File

@ -317,10 +317,11 @@ PersistHoldablePortal(Portal portal)
Assert(queryDesc != NULL);
/*
* Caller must have created the tuplestore already.
* Caller must have created the tuplestore already ... but not a snapshot.
*/
Assert(portal->holdContext != NULL);
Assert(portal->holdStore != NULL);
Assert(portal->holdSnapshot == NULL);
/*
* Before closing down the executor, we must copy the tupdesc into
@ -362,7 +363,8 @@ PersistHoldablePortal(Portal portal)
/*
* Change the destination to output to the tuplestore. Note we tell
* the tuplestore receiver to detoast all data passed through it.
* the tuplestore receiver to detoast all data passed through it; this
* makes it safe to not keep a snapshot associated with the data.
*/
queryDesc->dest = CreateDestReceiver(DestTuplestore);
SetTuplestoreDestReceiverParams(queryDesc->dest,

View File

@ -43,9 +43,11 @@ static uint64 RunFromStore(Portal portal, ScanDirection direction, uint64 count,
DestReceiver *dest);
static uint64 PortalRunSelect(Portal portal, bool forward, long count,
DestReceiver *dest);
static void PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel,
static void PortalRunUtility(Portal portal, Node *utilityStmt,
bool isTopLevel, bool setHoldSnapshot,
DestReceiver *dest, char *completionTag);
static void PortalRunMulti(Portal portal, bool isTopLevel,
static void PortalRunMulti(Portal portal,
bool isTopLevel, bool setHoldSnapshot,
DestReceiver *dest, DestReceiver *altdest,
char *completionTag);
static uint64 DoPortalRunFetch(Portal portal,
@ -810,7 +812,7 @@ PortalRun(Portal portal, long count, bool isTopLevel,
break;
case PORTAL_MULTI_QUERY:
PortalRunMulti(portal, isTopLevel,
PortalRunMulti(portal, isTopLevel, false,
dest, altdest, completionTag);
/* Prevent portal's commands from being re-executed */
@ -1039,15 +1041,16 @@ FillPortalStore(Portal portal, bool isTopLevel)
/*
* Run the portal to completion just as for the default
* MULTI_QUERY case, but send the primary query's output to the
* tuplestore. Auxiliary query outputs are discarded.
* tuplestore. Auxiliary query outputs are discarded. Set the
* portal's holdSnapshot to the snapshot used (or a copy of it).
*/
PortalRunMulti(portal, isTopLevel,
PortalRunMulti(portal, isTopLevel, true,
treceiver, None_Receiver, completionTag);
break;
case PORTAL_UTIL_SELECT:
PortalRunUtility(portal, (Node *) linitial(portal->stmts),
isTopLevel, treceiver, completionTag);
isTopLevel, true, treceiver, completionTag);
break;
default:
@ -1142,10 +1145,11 @@ RunFromStore(Portal portal, ScanDirection direction, uint64 count,
* Execute a utility statement inside a portal.
*/
static void
PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel,
PortalRunUtility(Portal portal, Node *utilityStmt,
bool isTopLevel, bool setHoldSnapshot,
DestReceiver *dest, char *completionTag)
{
bool active_snapshot_set;
Snapshot snapshot;
elog(DEBUG3, "ProcessUtility");
@ -1172,11 +1176,19 @@ PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel,
IsA(utilityStmt, UnlistenStmt) ||
IsA(utilityStmt, CheckPointStmt)))
{
PushActiveSnapshot(GetTransactionSnapshot());
active_snapshot_set = true;
snapshot = GetTransactionSnapshot();
/* If told to, register the snapshot we're using and save in portal */
if (setHoldSnapshot)
{
snapshot = RegisterSnapshot(snapshot);
portal->holdSnapshot = snapshot;
}
PushActiveSnapshot(snapshot);
/* PushActiveSnapshot might have copied the snapshot */
snapshot = GetActiveSnapshot();
}
else
active_snapshot_set = false;
snapshot = NULL;
ProcessUtility(utilityStmt,
portal->sourceText,
@ -1190,12 +1202,11 @@ PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel,
/*
* Some utility commands may pop the ActiveSnapshot stack from under us,
* so we only pop the stack if we actually see a snapshot set. Note that
* the set of utility commands that do this must be the same set
* disallowed to run inside a transaction; otherwise, we could be popping
* a snapshot that belongs to some other operation.
* so be careful to only pop the stack if our snapshot is still at the
* top.
*/
if (active_snapshot_set && ActiveSnapshotSet())
if (snapshot != NULL && ActiveSnapshotSet() &&
snapshot == GetActiveSnapshot())
PopActiveSnapshot();
}
@ -1205,7 +1216,8 @@ PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel,
* or non-SELECT-like queries)
*/
static void
PortalRunMulti(Portal portal, bool isTopLevel,
PortalRunMulti(Portal portal,
bool isTopLevel, bool setHoldSnapshot,
DestReceiver *dest, DestReceiver *altdest,
char *completionTag)
{
@ -1261,7 +1273,25 @@ PortalRunMulti(Portal portal, bool isTopLevel,
*/
if (!active_snapshot_set)
{
PushActiveSnapshot(GetTransactionSnapshot());
Snapshot snapshot = GetTransactionSnapshot();
/* If told to, register the snapshot and save in portal */
if (setHoldSnapshot)
{
snapshot = RegisterSnapshot(snapshot);
portal->holdSnapshot = snapshot;
}
/*
* We can't have the holdSnapshot also be the active one,
* because UpdateActiveSnapshotCommandId would complain. So
* force an extra snapshot copy. Plain PushActiveSnapshot
* would have copied the transaction snapshot anyway, so this
* only adds a copy step when setHoldSnapshot is true. (It's
* okay for the command ID of the active snapshot to diverge
* from what holdSnapshot has.)
*/
PushCopiedSnapshot(snapshot);
active_snapshot_set = true;
}
else
@ -1309,14 +1339,14 @@ PortalRunMulti(Portal portal, bool isTopLevel,
{
Assert(!active_snapshot_set);
/* statement can set tag string */
PortalRunUtility(portal, stmt, isTopLevel,
PortalRunUtility(portal, stmt, isTopLevel, false,
dest, completionTag);
}
else
{
Assert(IsA(stmt, NotifyStmt));
/* stmt added by rewrite cannot set tag */
PortalRunUtility(portal, stmt, isTopLevel,
PortalRunUtility(portal, stmt, isTopLevel, false,
altdest, NULL);
}
}

View File

@ -24,6 +24,7 @@
#include "miscadmin.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/snapmgr.h"
#include "utils/timestamp.h"
/*
@ -351,6 +352,7 @@ PortalCreateHoldStore(Portal portal)
Assert(portal->holdContext == NULL);
Assert(portal->holdStore == NULL);
Assert(portal->holdSnapshot == NULL);
/*
* Create the memory context that is used for storage of the tuple set.
@ -526,6 +528,20 @@ PortalDrop(Portal portal, bool isTopCommit)
/* drop cached plan reference, if any */
PortalReleaseCachedPlan(portal);
/*
* If portal has a snapshot protecting its data, release that. This needs
* a little care since the registration will be attached to the portal's
* resowner; if the portal failed, we will already have released the
* resowner (and the snapshot) during transaction abort.
*/
if (portal->holdSnapshot)
{
if (portal->resowner)
UnregisterSnapshotFromOwner(portal->holdSnapshot,
portal->resowner);
portal->holdSnapshot = NULL;
}
/*
* Release any resources still attached to the portal. There are several
* cases being covered here:

View File

@ -162,6 +162,16 @@ typedef struct PortalData
Tuplestorestate *holdStore; /* store for holdable cursors */
MemoryContext holdContext; /* memory containing holdStore */
/*
* Snapshot under which tuples in the holdStore were read. We must keep a
* reference to this snapshot if there is any possibility that the tuples
* contain TOAST references, because releasing the snapshot could allow
* recently-dead rows to be vacuumed away, along with any toast data
* belonging to them. In the case of a held cursor, we avoid needing to
* keep such a snapshot by forcibly detoasting the data.
*/
Snapshot holdSnapshot; /* registered snapshot, or NULL if none */
/*
* atStart, atEnd and portalPos indicate the current cursor position.
* portalPos is zero before the first row, N after fetching N'th row of