ITS#9378 Add explicit replay logging

Logs essential ops so they can be replayed.
Ignores read ops for now.
This commit is contained in:
Howard Chu 2023-12-22 13:14:32 +00:00
parent 31ba06e1da
commit 4a19b804cf

View File

@ -481,18 +481,26 @@ typedef MDB_ID txnid_t;
#define MDB_DEBUG 0
#endif
#define MDB_DBG_INFO 1
#define MDB_DBG_TRACE 2
#if MDB_DEBUG
static int mdb_debug;
static int mdb_debug = MDB_DBG_TRACE;
static txnid_t mdb_debug_start;
/** Print a debug message with printf formatting.
* Requires double parenthesis around 2 or more args.
*/
# define DPRINTF(args) ((void) ((mdb_debug) && DPRINTF0 args))
# define DPRINTF(args) ((void) ((mdb_debug & MDB_DBG_INFO) && DPRINTF0 args))
# define DPRINTF0(fmt, ...) \
fprintf(stderr, "%s:%d " fmt "\n", mdb_func_, __LINE__, __VA_ARGS__)
/** Trace info for replaying */
# define MDB_TRACE(args) ((void) ((mdb_debug & MDB_DBG_TRACE) && DPRINTF1 args))
# define DPRINTF1(fmt, ...) \
fprintf(stderr, ">%d:%s: " fmt "\n", getpid(), mdb_func_, __VA_ARGS__)
#else
# define DPRINTF(args) ((void) 0)
# define MDB_TRACE(args) ((void) 0)
#endif
/** Print a debug string.
* The string is printed literally, with no format processing.
@ -589,6 +597,11 @@ static txnid_t mdb_debug_start;
* This is used for printing a hex dump of a key's contents.
*/
#define DKBUF char kbuf[DKBUF_MAXKEYSIZE*2+1]
/** A data value buffer.
* @ingroup debug
* This is used for printing a hex dump of a #MDB_DUPSORT value's contents.
*/
#define DDBUF char dbuf[DKBUF_MAXKEYSIZE*2+1+2]
/** Display a key in hex.
* @ingroup debug
* Invoke a function to display a key in hex.
@ -596,6 +609,7 @@ static txnid_t mdb_debug_start;
#define DKEY(x) mdb_dkey(x, kbuf)
#else
#define DKBUF
#define DDBUF
#define DKEY(x) 0
#endif
@ -1423,6 +1437,9 @@ static int mdb_update_key(MDB_cursor *mc, MDB_val *key);
static void mdb_cursor_pop(MDB_cursor *mc);
static int mdb_cursor_push(MDB_cursor *mc, MDB_page *mp);
static int _mdb_cursor_del(MDB_cursor *mc, unsigned int flags);
static int _mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, unsigned int flags);
static int mdb_cursor_del0(MDB_cursor *mc);
static int mdb_del0(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, unsigned flags);
static int mdb_cursor_sibling(MDB_cursor *mc, int move_right);
@ -1614,6 +1631,18 @@ mdb_dkey(MDB_val *key, char *buf)
return buf;
}
static char *
mdb_dval(MDB_txn *txn, MDB_dbi dbi, MDB_val *data, char *buf)
{
if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) {
mdb_dkey(data, buf+1);
*buf = '[';
strcpy(buf + data->mv_size * 2 + 1, "]");
} else
*buf = '\0';
return buf;
}
static const char *
mdb_leafnode_type(MDB_node *n)
{
@ -2786,7 +2815,7 @@ mdb_txn_renew0(MDB_txn *txn)
txn->mt_txnid++;
#if MDB_DEBUG
if (txn->mt_txnid == mdb_debug_start)
mdb_debug = 1;
mdb_debug = MDB_DBG_INFO;
#endif
txn->mt_child = NULL;
txn->mt_loose_pgs = NULL;
@ -2945,6 +2974,7 @@ renew:
txn->mt_txnid, (flags & MDB_RDONLY) ? 'r' : 'w',
(void *) txn, (void *) env, txn->mt_dbs[MAIN_DBI].md_root));
}
MDB_TRACE(("%p, %p, %u = %p", env, parent, flags, txn));
return rc;
}
@ -3080,18 +3110,25 @@ mdb_txn_reset(MDB_txn *txn)
mdb_txn_end(txn, MDB_END_RESET);
}
void
mdb_txn_abort(MDB_txn *txn)
static void
_mdb_txn_abort(MDB_txn *txn)
{
if (txn == NULL)
return;
if (txn->mt_child)
mdb_txn_abort(txn->mt_child);
_mdb_txn_abort(txn->mt_child);
mdb_txn_end(txn, MDB_END_ABORT|MDB_END_SLOT|MDB_END_FREE);
}
void
mdb_txn_abort(MDB_txn *txn)
{
MDB_TRACE(("%p", txn));
_mdb_txn_abort(txn);
}
/** Save the freelist as of this transaction to the freeDB.
* This changes the freelist. Keep trying until it stabilizes.
*/
@ -3182,7 +3219,7 @@ mdb_freelist_save(MDB_txn *txn)
pglast = head_id = *(txnid_t *)key.mv_data;
total_room = head_room = 0;
mdb_tassert(txn, pglast <= env->me_pglast);
rc = mdb_cursor_del(&mc, 0);
rc = _mdb_cursor_del(&mc, 0);
if (rc)
return rc;
}
@ -3202,7 +3239,7 @@ mdb_freelist_save(MDB_txn *txn)
do {
freecnt = free_pgs[0];
data.mv_size = MDB_IDL_SIZEOF(free_pgs);
rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
rc = _mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
if (rc)
return rc;
/* Retry if mt_free_pgs[] grew during the Put() */
@ -3251,7 +3288,7 @@ mdb_freelist_save(MDB_txn *txn)
key.mv_size = sizeof(head_id);
key.mv_data = &head_id;
data.mv_size = (head_room + 1) * sizeof(pgno_t);
rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
rc = _mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
if (rc)
return rc;
/* IDL is initially empty, zero out at least the length */
@ -3306,7 +3343,7 @@ mdb_freelist_save(MDB_txn *txn)
data.mv_data = mop -= len;
save = mop[0];
mop[0] = len;
rc = mdb_cursor_put(&mc, &key, &data, MDB_CURRENT);
rc = _mdb_cursor_put(&mc, &key, &data, MDB_CURRENT);
mop[0] = save;
if (rc || !(mop_len -= len))
break;
@ -3467,8 +3504,8 @@ done:
return MDB_SUCCESS;
}
int
mdb_txn_commit(MDB_txn *txn)
static int
_mdb_txn_commit(MDB_txn *txn)
{
int rc;
unsigned int i, end_mode;
@ -3481,7 +3518,7 @@ mdb_txn_commit(MDB_txn *txn)
end_mode = MDB_END_EMPTY_COMMIT|MDB_END_UPDATE|MDB_END_SLOT|MDB_END_FREE;
if (txn->mt_child) {
rc = mdb_txn_commit(txn->mt_child);
rc = _mdb_txn_commit(txn->mt_child);
if (rc)
goto fail;
}
@ -3661,7 +3698,7 @@ mdb_txn_commit(MDB_txn *txn)
goto fail;
}
data.mv_data = &txn->mt_dbs[i];
rc = mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data,
rc = _mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data,
F_SUBDATA);
if (rc)
goto fail;
@ -3692,10 +3729,17 @@ done:
return MDB_SUCCESS;
fail:
mdb_txn_abort(txn);
_mdb_txn_abort(txn);
return rc;
}
int
mdb_txn_commit(MDB_txn *txn)
{
MDB_TRACE(("%p", txn));
return _mdb_txn_commit(txn);
}
/** Read the environment parameters of a DB environment before
* mapping it into memory.
* @param[in] env the environment handle
@ -3992,6 +4036,7 @@ mdb_env_create(MDB_env **env)
GET_PAGESIZE(e->me_os_psize);
VGMEMP_CREATE(e,0,0);
*env = e;
MDB_TRACE(("%p", e));
return MDB_SUCCESS;
}
@ -4115,6 +4160,7 @@ mdb_env_set_mapsize(MDB_env *env, size_t size)
env->me_mapsize = size;
if (env->me_psize)
env->me_maxpg = env->me_mapsize / env->me_psize;
MDB_TRACE(("%p, %"Yu"", env, size));
return MDB_SUCCESS;
}
@ -4124,6 +4170,7 @@ mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs)
if (env->me_map)
return EINVAL;
env->me_maxdbs = dbs + CORE_DBS;
MDB_TRACE(("%p, %u", env, dbs));
return MDB_SUCCESS;
}
@ -4133,6 +4180,7 @@ mdb_env_set_maxreaders(MDB_env *env, unsigned int readers)
if (env->me_map || readers < 1)
return EINVAL;
env->me_maxreaders = readers;
MDB_TRACE(("%p, %u", env, readers));
return MDB_SUCCESS;
}
@ -5076,6 +5124,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
}
leave:
MDB_TRACE(("%p, %s, %u, %04o", env, path, flags & (CHANGEABLE|CHANGELESS), mode));
if (rc) {
mdb_env_close0(env, excl);
}
@ -5188,6 +5237,7 @@ mdb_env_close(MDB_env *env)
if (env == NULL)
return;
MDB_TRACE(("%p", env));
VGMEMP_DESTROY(env);
while ((dp = env->me_dpages) != NULL) {
VGMEMP_DEFINED(&dp->mp_next, sizeof(dp->mp_next));
@ -6559,8 +6609,8 @@ mdb_cursor_touch(MDB_cursor *mc)
/** Do not spill pages to disk if txn is getting full, may fail instead */
#define MDB_NOSPILL 0x8000
int
mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
static int
_mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
unsigned int flags)
{
MDB_env *env;
@ -7023,7 +7073,7 @@ put_sub:
new_dupdata = (int)dkey.mv_size;
/* converted, write the original data first */
if (dkey.mv_size) {
rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags);
rc = _mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags);
if (rc)
goto bad_sub;
/* we've done our job */
@ -7051,7 +7101,7 @@ put_sub:
ecount = mc->mc_xcursor->mx_db.md_entries;
if (flags & MDB_APPENDDUP)
xflags |= MDB_APPEND;
rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, xflags);
rc = _mdb_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, xflags);
if (flags & F_SUBDATA) {
void *db = NODEDATA(leaf);
memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db));
@ -7092,7 +7142,20 @@ bad_sub:
}
int
mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
unsigned int flags)
{
DKBUF;
DDBUF;
int rc = _mdb_cursor_put(mc, key, data, flags);
MDB_TRACE(("%p, %"Z"u[%s], %"Z"u%s, %u",
mc, key ? key->mv_size:0, DKEY(key), data ? data->mv_size:0,
data ? mdb_dval(mc->mc_txn, mc->mc_dbi, data, dbuf):"", flags));
return rc;
}
static int
_mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
{
MDB_node *leaf;
MDB_page *mp;
@ -7130,7 +7193,7 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
if (!F_ISSET(leaf->mn_flags, F_SUBDATA)) {
mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf);
}
rc = mdb_cursor_del(&mc->mc_xcursor->mx_cursor, MDB_NOSPILL);
rc = _mdb_cursor_del(&mc->mc_xcursor->mx_cursor, MDB_NOSPILL);
if (rc)
return rc;
/* If sub-DB still has entries, we're done */
@ -7194,6 +7257,14 @@ fail:
return rc;
}
int
mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
{
MDB_TRACE(("%p, %u",
mc, flags));
return _mdb_cursor_del(mc, flags);
}
/** Allocate and initialize new pages for a database.
* Set #MDB_TXN_ERROR on failure.
* @param[in] mc a cursor on the database being added to.
@ -7687,6 +7758,7 @@ mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret)
return ENOMEM;
}
MDB_TRACE(("%p, %u = %p", txn, dbi, mc));
*ret = mc;
return MDB_SUCCESS;
@ -7750,6 +7822,7 @@ mdb_cursor_count(MDB_cursor *mc, size_t *countp)
void
mdb_cursor_close(MDB_cursor *mc)
{
MDB_TRACE(("%p", mc));
if (mc && !mc->mc_backup) {
/* remove from txn, if tracked */
if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->mt_cursors) {
@ -8559,6 +8632,8 @@ int
mdb_del(MDB_txn *txn, MDB_dbi dbi,
MDB_val *key, MDB_val *data)
{
DKBUF;
DDBUF;
if (!key || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID))
return EINVAL;
@ -8570,6 +8645,9 @@ mdb_del(MDB_txn *txn, MDB_dbi dbi,
data = NULL;
}
MDB_TRACE(("%p, %u, %"Z"u[%s], %"Z"u%s",
txn, dbi, key ? key->mv_size:0, DKEY(key), data ? data->mv_size:0,
data ? mdb_dval(txn, dbi, data, dbuf):""));
return mdb_del0(txn, dbi, key, data, 0);
}
@ -8610,7 +8688,7 @@ mdb_del0(MDB_txn *txn, MDB_dbi dbi,
mc.mc_flags |= C_UNTRACK;
mc.mc_next = txn->mt_cursors[dbi];
txn->mt_cursors[dbi] = &mc;
rc = mdb_cursor_del(&mc, flags);
rc = _mdb_cursor_del(&mc, flags);
txn->mt_cursors[dbi] = mc.mc_next;
}
return rc;
@ -9052,6 +9130,8 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi,
MDB_cursor mc;
MDB_xcursor mx;
int rc;
DKBUF;
DDBUF;
if (!key || !data || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID))
return EINVAL;
@ -9062,10 +9142,12 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi,
if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_BLOCKED))
return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
MDB_TRACE(("%p, %u, %"Z"u[%s], %"Z"u%s, %u",
txn, dbi, key ? key->mv_size:0, DKEY(key), data->mv_size, mdb_dval(txn, dbi, data, dbuf), flags));
mdb_cursor_init(&mc, txn, dbi, &mx);
mc.mc_next = txn->mt_cursors[dbi];
txn->mt_cursors[dbi] = &mc;
rc = mdb_cursor_put(&mc, key, data, flags);
rc = _mdb_cursor_put(&mc, key, data, flags);
txn->mt_cursors[dbi] = mc.mc_next;
return rc;
}
@ -9468,7 +9550,7 @@ finish:
my.mc_error = rc;
mdb_env_cthr_toggle(&my, 1 | MDB_EOF);
rc = THREAD_FINISH(thr);
mdb_txn_abort(txn);
_mdb_txn_abort(txn);
done:
#ifdef _WIN32
@ -9580,7 +9662,7 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd)
}
leave:
mdb_txn_abort(txn);
_mdb_txn_abort(txn);
return rc;
}
@ -9795,6 +9877,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
}
}
mdb_default_cmp(txn, MAIN_DBI);
MDB_TRACE(("%p, (null), %u = %u", txn, flags, MAIN_DBI));
return MDB_SUCCESS;
}
@ -9856,7 +9939,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
dummy.md_root = P_INVALID;
dummy.md_flags = flags & PERSISTENT_FLAGS;
WITH_CURSOR_TRACKING(mc,
rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA));
rc = _mdb_cursor_put(&mc, &key, &data, F_SUBDATA));
dbflag |= DB_DIRTY;
}
@ -9881,6 +9964,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
if (!unused) {
txn->mt_numdbs++;
}
MDB_TRACE(("%p, %s, %u = %u", txn, name, flags, slot));
}
return rc;
@ -9912,6 +9996,7 @@ void mdb_dbi_close(MDB_env *env, MDB_dbi dbi)
ptr = env->me_dbxs[dbi].md_name.mv_data;
/* If there was no name, this was already closed */
if (ptr) {
MDB_TRACE(("%p, %u", env, dbi));
env->me_dbxs[dbi].md_name.mv_data = NULL;
env->me_dbxs[dbi].md_name.mv_size = 0;
env->me_dbflags[dbi] = 0;
@ -10046,6 +10131,7 @@ int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del)
if (rc)
return rc;
MDB_TRACE(("%u, %d", dbi, del));
rc = mdb_drop0(mc, mc->mc_db->md_flags & MDB_DUPSORT);
/* Invalidate the dropped DB's cursors */
for (m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next)