From 09006ccec7928c9cf53bca6abe741e8d4d466c98 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Sun, 18 Sep 2011 10:38:08 -0700 Subject: [PATCH 1/2] API change, consistency check mdb_close() takes an env, not a txn. Also, add check for stale DB records (in case some other process wrote to the DB last). --- libraries/libmdb/mdb.c | 142 ++++++++++++++++++++++++++---------- libraries/libmdb/mdb.h | 25 ++++++- libraries/libmdb/mdb_stat.c | 2 +- libraries/libmdb/mtest.c | 2 +- libraries/libmdb/mtest2.c | 2 +- libraries/libmdb/mtest3.c | 2 +- libraries/libmdb/mtest4.c | 2 +- libraries/libmdb/mtest5.c | 2 +- 8 files changed, 132 insertions(+), 47 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 8a877c744f..9496914154 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -691,9 +691,6 @@ typedef struct MDB_meta { /** Auxiliary DB info. * The information here is mostly static/read-only. There is * only a single copy of this record in the environment. - * The \b md_dirty flag is not read-only, but only a write - * transaction can ever update it, and only write transactions - * need to worry about it. */ typedef struct MDB_dbx { MDB_val md_name; /**< name of the database */ @@ -701,8 +698,6 @@ typedef struct MDB_dbx { MDB_cmp_func *md_dcmp; /**< function for comparing data items */ MDB_rel_func *md_rel; /**< user relocate function */ void *md_relctx; /**< user-provided context for md_rel */ - MDB_dbi md_parent; /**< parent DB of a sub-DB */ - unsigned int md_dirty; /**< TRUE if DB was written in this txn */ } MDB_dbx; /** A database transaction. @@ -728,6 +723,15 @@ struct MDB_txn { MDB_dbx *mt_dbxs; /** Array of MDB_db records for each known DB */ MDB_db *mt_dbs; +/** @defgroup mt_dbflag Transaction DB Flags + * @ingroup internal + * @{ + */ +#define DB_DIRTY 0x01 /**< DB was written in this txn */ +#define DB_STALE 0x02 /**< DB record is older than txnID */ +/** @} */ + /** Array of flags for each DB */ + unsigned char *mt_dbflags; /** Number of DB records in use. This number only ever increments; * we don't decrement it when individual DB handles are closed. */ @@ -767,6 +771,8 @@ struct MDB_cursor { MDB_db *mc_db; /** The database auxiliary record for this cursor */ MDB_dbx *mc_dbx; + /** The @ref mt_dbflag for this database */ + unsigned char *mc_dbflag; unsigned short mc_snum; /**< number of pushed pages */ unsigned short mc_top; /**< index of top page, mc_snum-1 */ /** @defgroup mdb_cursor Cursor Flags @@ -794,6 +800,8 @@ typedef struct MDB_xcursor { MDB_db mx_db; /** The auxiliary DB record for this Dup DB */ MDB_dbx mx_dbx; + /** The @ref mt_dbflag for this Dup DB */ + unsigned char mx_dbflag; } MDB_xcursor; /** A set of pages freed by an earlier transaction. */ @@ -830,6 +838,7 @@ struct MDB_env { pgno_t me_maxpg; /**< me_mapsize / me_psize */ unsigned int me_psize; /**< size of a page, from #GET_PAGESIZE */ unsigned int me_db_toggle; /**< which DB table is current */ + txnid_t me_wtxnid; /**< ID of last txn we committed */ MDB_dbx *me_dbxs; /**< array of static DB info */ MDB_db *me_dbs[2]; /**< two arrays of MDB_db info */ MDB_oldpages *me_pghead; /**< list of old page records */ @@ -1167,6 +1176,7 @@ static inline int mdb_txn_renew0(MDB_txn *txn) { MDB_env *env = txn->mt_env; + char mt_dbflag = 0; if (txn->mt_flags & MDB_TXN_RDONLY) { MDB_reader *r = pthread_getspecific(env->me_txkey); @@ -1191,14 +1201,22 @@ mdb_txn_renew0(MDB_txn *txn) r = &env->me_txns->mti_readers[i]; pthread_setspecific(env->me_txkey, r); } - txn->mt_txnid = env->me_txns->mti_txnid; txn->mt_toggle = env->me_txns->mti_me_toggle; + txn->mt_txnid = env->me_txns->mti_txnid; + /* This happens if a different process was the + * last writer to the DB. + */ + if (env->me_wtxnid < txn->mt_txnid) + mt_dbflag = DB_STALE; r->mr_txnid = txn->mt_txnid; txn->mt_u.reader = r; } else { LOCK_MUTEX_W(env); - txn->mt_txnid = env->me_txns->mti_txnid+1; + txn->mt_txnid = env->me_txns->mti_txnid; + if (env->me_wtxnid < txn->mt_txnid) + mt_dbflag = DB_STALE; + txn->mt_txnid++; txn->mt_toggle = env->me_txns->mti_me_toggle; txn->mt_u.dirty_list = env->me_dirty_list; txn->mt_u.dirty_list[0].mid = 0; @@ -1218,6 +1236,8 @@ mdb_txn_renew0(MDB_txn *txn) (txn->mt_numdbs - 2) * sizeof(MDB_db)); LAZY_RWLOCK_UNLOCK(&env->me_dblock); + memset(txn->mt_dbflags, mt_dbflag, env->me_numdbs); + return MDB_SUCCESS; } @@ -1253,7 +1273,8 @@ mdb_txn_begin(MDB_env *env, unsigned int flags, MDB_txn **ret) DPUTS("environment had fatal error, must shutdown!"); return MDB_PANIC; } - if ((txn = calloc(1, sizeof(MDB_txn) + env->me_maxdbs * sizeof(MDB_db))) == NULL) { + if ((txn = calloc(1, sizeof(MDB_txn) + + env->me_maxdbs * (sizeof(MDB_db)+1))) == NULL) { DPRINTF("calloc: %s", strerror(ErrCode())); return ENOMEM; } @@ -1261,6 +1282,7 @@ mdb_txn_begin(MDB_env *env, unsigned int flags, MDB_txn **ret) if (flags & MDB_RDONLY) { txn->mt_flags |= MDB_TXN_RDONLY; } + txn->mt_dbflags = (unsigned char *)(txn->mt_dbs + env->me_maxdbs); txn->mt_env = env; rc = mdb_txn_renew0(txn); @@ -1289,7 +1311,6 @@ mdb_txn_reset0(MDB_txn *txn) } else { MDB_oldpages *mop; MDB_page *dp; - MDB_dbi dbi; unsigned int i; /* return all dirty pages to dpage list */ @@ -1310,8 +1331,6 @@ mdb_txn_reset0(MDB_txn *txn) } env->me_txn = NULL; - for (dbi=2; dbime_numdbs; dbi++) - env->me_dbxs[dbi].md_dirty = 0; /* The writer mutex was locked in mdb_txn_begin. */ UNLOCK_MUTEX_W(env); } @@ -1362,6 +1381,23 @@ mdb_txn_commit(MDB_txn *txn) env = txn->mt_env; if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { + if (txn->mt_numdbs > env->me_numdbs) { + /* update the DB tables */ + int toggle = !env->me_db_toggle; + MDB_db *ip, *jp; + MDB_dbi i; + + ip = &env->me_dbs[toggle][env->me_numdbs]; + jp = &txn->mt_dbs[env->me_numdbs]; + LAZY_RWLOCK_WRLOCK(&env->me_dblock); + for (i = env->me_numdbs; i < txn->mt_numdbs; i++) { + *ip++ = *jp++; + } + + env->me_db_toggle = toggle; + env->me_numdbs = txn->mt_numdbs; + LAZY_RWLOCK_UNLOCK(&env->me_dblock); + } mdb_txn_abort(txn); return MDB_SUCCESS; } @@ -1456,7 +1492,7 @@ mdb_txn_commit(MDB_txn *txn) mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); for (i = 2; i < txn->mt_numdbs; i++) { - if (txn->mt_dbxs[i].md_dirty) { + if (txn->mt_dbflags[i] & DB_DIRTY) { data.mv_data = &txn->mt_dbs[i]; mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data, 0); } @@ -1578,6 +1614,7 @@ mdb_txn_commit(MDB_txn *txn) } done: + env->me_wtxnid = txn->mt_txnid; env->me_txn = NULL; /* update the DB tables */ { @@ -1594,10 +1631,6 @@ done: ip++; jp++; } - for (i = 2; i < txn->mt_numdbs; i++) { - if (txn->mt_dbxs[i].md_dirty) - txn->mt_dbxs[i].md_dirty = 0; - } env->me_db_toggle = toggle; env->me_numdbs = txn->mt_numdbs; LAZY_RWLOCK_UNLOCK(&env->me_dblock); @@ -2825,16 +2858,32 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int modify) DPRINTF("db %u root page %zu has flags 0x%X", mc->mc_dbi, root, mc->mc_pg[0]->mp_flags); - if (modify) { - /* For sub-databases, update main root first */ - if (mc->mc_dbi > MAIN_DBI && !mc->mc_dbx->md_dirty) { + /* For sub-databases, update main root first */ + if (mc->mc_dbi > MAIN_DBI) { + if ((*mc->mc_dbflag & DB_STALE) || + (modify && !(*mc->mc_dbflag & DB_DIRTY))) { MDB_cursor mc2; + unsigned char dbflag = 0; mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, NULL); - rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, 1); + rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, modify); if (rc) return rc; - mc->mc_dbx->md_dirty = 1; + if (*mc->mc_dbflag & DB_STALE) { + MDB_val data; + int exact = 0; + MDB_node *leaf = mdb_node_search(&mc2, + &mc->mc_dbx->md_name, &exact); + if (!exact) + return MDB_NOTFOUND; + mdb_node_read(mc->mc_txn, leaf, &data); + memcpy(mc->mc_db, data.mv_data, sizeof(MDB_db)); + } + if (modify) + dbflag = DB_DIRTY; + *mc->mc_dbflag = dbflag; } + } + if (modify) { if (!F_ISSET(mc->mc_pg[0]->mp_flags, P_DIRTY)) { if ((rc = mdb_page_touch(mc))) return rc; @@ -3475,13 +3524,13 @@ mdb_cursor_touch(MDB_cursor *mc) { int rc; - if (mc->mc_dbi > MAIN_DBI && !mc->mc_dbx->md_dirty) { + if (mc->mc_dbi > MAIN_DBI && !(*mc->mc_dbflag & DB_DIRTY)) { MDB_cursor mc2; mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, NULL); rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, 1); if (rc) return rc; - mc->mc_dbx->md_dirty = 1; + *mc->mc_dbflag = DB_DIRTY; } for (mc->mc_top = 0; mc->mc_top < mc->mc_snum; mc->mc_top++) { if (!F_ISSET(mc->mc_pg[mc->mc_top]->mp_flags, P_DIRTY)) { @@ -3502,7 +3551,7 @@ int mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, unsigned int flags) { - MDB_node *leaf; + MDB_node *leaf = NULL; MDB_val xdata, *rdata, dkey; MDB_page *fp; MDB_db dummy; @@ -3536,7 +3585,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, mdb_cursor_push(mc, np); mc->mc_db->md_root = np->mp_pgno; mc->mc_db->md_depth++; - mc->mc_dbx->md_dirty = 1; + *mc->mc_dbflag = DB_DIRTY; if ((mc->mc_db->md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) == MDB_DUPFIXED) np->mp_flags |= P_LEAF2; @@ -4181,11 +4230,10 @@ mdb_xcursor_init0(MDB_cursor *mc) mx->mx_cursor.mc_db = &mx->mx_db; mx->mx_cursor.mc_dbx = &mx->mx_dbx; mx->mx_cursor.mc_dbi = mc->mc_dbi+1; - mx->mx_dbx.md_parent = mc->mc_dbi; + mx->mx_cursor.mc_dbflag = &mx->mx_dbflag; mx->mx_dbx.md_cmp = mc->mc_dbx->md_dcmp; mx->mx_dbx.md_dcmp = NULL; mx->mx_dbx.md_rel = mc->mc_dbx->md_rel; - mx->mx_dbx.md_dirty = 0; } /** Final setup of a sorted-dups cursor. @@ -4228,8 +4276,8 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) } DPRINTF("Sub-db %u for db %u root page %zu", mx->mx_cursor.mc_dbi, mc->mc_dbi, mx->mx_db.md_root); - if (F_ISSET(mc->mc_pg[mc->mc_top]->mp_flags, P_DIRTY)) - mx->mx_dbx.md_dirty = 1; + mx->mx_dbflag = (F_ISSET(mc->mc_pg[mc->mc_top]->mp_flags, P_DIRTY)) ? + DB_DIRTY : 0; mx->mx_dbx.md_name.mv_data = NODEKEY(node); mx->mx_dbx.md_name.mv_size = node->mn_ksize; if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t)) @@ -4244,6 +4292,7 @@ mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx) mc->mc_txn = txn; mc->mc_db = &txn->mt_dbs[dbi]; mc->mc_dbx = &txn->mt_dbxs[dbi]; + mc->mc_dbflag = &txn->mt_dbflags[dbi]; mc->mc_snum = 0; mc->mc_flags = 0; if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { @@ -5152,7 +5201,7 @@ int mdb_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi) { MDB_val key, data; MDB_dbi i; - int rc, dirty = 0; + int rc, dbflag = 0; size_t len; if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) { @@ -5201,7 +5250,7 @@ int mdb_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi) dummy.md_flags = flags & 0xffff; mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA); - dirty = 1; + dbflag = DB_DIRTY; } /* OK, got info, add to table */ @@ -5209,8 +5258,7 @@ int mdb_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi) txn->mt_dbxs[txn->mt_numdbs].md_name.mv_data = strdup(name); txn->mt_dbxs[txn->mt_numdbs].md_name.mv_size = len; txn->mt_dbxs[txn->mt_numdbs].md_rel = NULL; - txn->mt_dbxs[txn->mt_numdbs].md_parent = MAIN_DBI; - txn->mt_dbxs[txn->mt_numdbs].md_dirty = dirty; + txn->mt_dbflags[txn->mt_numdbs] = dbflag; memcpy(&txn->mt_dbs[txn->mt_numdbs], data.mv_data, sizeof(MDB_db)); *dbi = txn->mt_numdbs; txn->mt_env->me_dbs[0][txn->mt_numdbs] = txn->mt_dbs[txn->mt_numdbs]; @@ -5230,17 +5278,35 @@ int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg) return mdb_stat0(txn->mt_env, &txn->mt_dbs[dbi], arg); } -void mdb_close(MDB_txn *txn, MDB_dbi dbi) +void mdb_close(MDB_env *env, MDB_dbi dbi) { char *ptr; - if (dbi <= MAIN_DBI || dbi >= txn->mt_numdbs) + if (dbi <= MAIN_DBI || dbi >= env->me_numdbs) return; - ptr = txn->mt_dbxs[dbi].md_name.mv_data; - txn->mt_dbxs[dbi].md_name.mv_data = NULL; - txn->mt_dbxs[dbi].md_name.mv_size = 0; + ptr = env->me_dbxs[dbi].md_name.mv_data; + env->me_dbxs[dbi].md_name.mv_data = NULL; + env->me_dbxs[dbi].md_name.mv_size = 0; free(ptr); } +int mdb_drop(MDB_env *env, MDB_dbi dbi) +{ + MDB_txn *txn; + int rc; + + if (!env || !dbi || dbi >= env->me_numdbs) + return EINVAL; + + if (env->me_txn) + return EBUSY; + + rc = mdb_txn_begin(env, 0, &txn); + if (rc) + return rc; + + +} + int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) { if (txn == NULL || !dbi || dbi >= txn->mt_numdbs) diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index b99ece7c95..04293d4f28 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -592,15 +592,34 @@ int mdb_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi); int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat); /** Close a database handle. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * This call is not mutex protected. Handles should only be closed by + * a single thread, and only if no other threads are going to reference + * the database handle any further. + * @param[in] env An environment handle returned by #mdb_env_create() * @param[in] dbi A database handle returned by #mdb_open() */ -void mdb_close(MDB_txn *txn, MDB_dbi dbi); +void mdb_close(MDB_env *env, MDB_dbi dbi); + + /** Delete a database and free all its pages. + * This function acquires its own write transaction to perform its + * work, so it must not be called if there is already an open write + * transaction in the environment. The database handle will also be + * closed as part of this operation. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] dbi A database handle returned by #mdb_open() + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * + */ +int mdb_drop(MDB_env *env, MDB_dbi dbi); /** Set a custom key comparison function for a database. * The comparison function is called whenever it is necessary to compare a * key specified by the application with a key currently stored in the database. - * If no comparison function is specified, and no special key flags were specified + * If no comparison function is specified, and no speAGAINcial key flags were specified * with #mdb_open(), the keys are compared lexically, with shorter keys collating * before longer keys. * @warning This function must be called before any data access functions are used, diff --git a/libraries/libmdb/mdb_stat.c b/libraries/libmdb/mdb_stat.c index 4bcdb92fa6..931ec61a58 100644 --- a/libraries/libmdb/mdb_stat.c +++ b/libraries/libmdb/mdb_stat.c @@ -56,7 +56,7 @@ int main(int argc,char * argv[]) printf("Leaf pages: %zu\n", mst.ms_leaf_pages); printf("Overflow pages: %zu\n", mst.ms_overflow_pages); printf("Entries: %zu\n", mst.ms_entries); - mdb_close(txn, dbi); + mdb_close(env, dbi); mdb_txn_abort(txn); mdb_env_close(env); diff --git a/libraries/libmdb/mtest.c b/libraries/libmdb/mtest.c index 8f2ecbf0e9..89f0ac4643 100644 --- a/libraries/libmdb/mtest.c +++ b/libraries/libmdb/mtest.c @@ -146,7 +146,7 @@ int main(int argc,char * argv[]) } #endif mdb_cursor_close(cursor); - mdb_close(txn, dbi); + mdb_close(env, dbi); mdb_txn_abort(txn); mdb_env_close(env); diff --git a/libraries/libmdb/mtest2.c b/libraries/libmdb/mtest2.c index 5aeb97d3d4..17c1ecfe59 100644 --- a/libraries/libmdb/mtest2.c +++ b/libraries/libmdb/mtest2.c @@ -108,7 +108,7 @@ int main(int argc,char * argv[]) (int) data.mv_size, (char *) data.mv_data); } mdb_cursor_close(cursor); - mdb_close(txn, dbi); + mdb_close(env, dbi); mdb_txn_abort(txn); mdb_env_close(env); diff --git a/libraries/libmdb/mtest3.c b/libraries/libmdb/mtest3.c index 467a12352d..43e34817b9 100644 --- a/libraries/libmdb/mtest3.c +++ b/libraries/libmdb/mtest3.c @@ -118,7 +118,7 @@ int main(int argc,char * argv[]) (int) data.mv_size, (char *) data.mv_data); } mdb_cursor_close(cursor); - mdb_close(txn, dbi); + mdb_close(env, dbi); mdb_txn_abort(txn); mdb_env_close(env); diff --git a/libraries/libmdb/mtest4.c b/libraries/libmdb/mtest4.c index d3a7877417..821055ad7c 100644 --- a/libraries/libmdb/mtest4.c +++ b/libraries/libmdb/mtest4.c @@ -152,7 +152,7 @@ int main(int argc,char * argv[]) (int) data.mv_size, (char *) data.mv_data); } mdb_cursor_close(cursor); - mdb_close(txn, dbi); + mdb_close(env, dbi); mdb_txn_abort(txn); mdb_env_close(env); diff --git a/libraries/libmdb/mtest5.c b/libraries/libmdb/mtest5.c index 472b100756..5fb6bb0618 100644 --- a/libraries/libmdb/mtest5.c +++ b/libraries/libmdb/mtest5.c @@ -120,7 +120,7 @@ int main(int argc,char * argv[]) (int) data.mv_size, (char *) data.mv_data); } mdb_cursor_close(cursor); - mdb_close(txn, dbi); + mdb_close(env, dbi); mdb_txn_abort(txn); mdb_env_close(env); From 57b5fbea491c956ed7a4ab7d12d802de2b29455d Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Sun, 18 Sep 2011 12:37:02 -0700 Subject: [PATCH 2/2] Add mdb_drop() for deleting a DB Allow MIDLs to grow arbitrarily, to allow unlimited number of pages to be freed in one txn. --- libraries/libmdb/mdb.c | 148 ++++++++++++++++++++++++++-------------- libraries/libmdb/mdb.h | 25 +++---- libraries/libmdb/midl.c | 40 ++++++++++- libraries/libmdb/midl.h | 22 +++++- 4 files changed, 164 insertions(+), 71 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 9496914154..a6df703c37 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -712,9 +712,8 @@ struct MDB_txn { txnid_t mt_txnid; MDB_env *mt_env; /**< the DB environment */ /** The list of pages that became unused during this transaction. - * This is an #IDL. */ - pgno_t *mt_free_pgs; + IDL mt_free_pgs; union { ID2L dirty_list; /**< modified pages */ MDB_reader *reader; /**< this thread's slot in the reader table */ @@ -845,7 +844,7 @@ struct MDB_env { pthread_key_t me_txkey; /**< thread-key for readers */ MDB_page *me_dpages; /**< list of malloc'd blocks for re-use */ /** IDL of pages that became unused in a write txn */ - pgno_t me_free_pgs[MDB_IDL_UM_SIZE]; + IDL me_free_pgs; /** ID2L of pages that were written during a write txn */ ID2 me_dirty_list[MDB_IDL_UM_SIZE]; /** rwlock for the DB tables, if #LAZY_LOCKS is false */ @@ -908,6 +907,7 @@ static void mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcurs static void mdb_xcursor_init0(MDB_cursor *mc); static void mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node); +static int mdb_drop0(MDB_cursor *mc, int subs); static void mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi); /** @cond */ @@ -1135,7 +1135,7 @@ mdb_page_touch(MDB_cursor *mc) return ENOMEM; DPRINTF("touched db %u page %zu -> %zu", mc->mc_dbi, mp->mp_pgno, np->mp_pgno); assert(mp->mp_pgno != np->mp_pgno); - mdb_midl_append(mc->mc_txn->mt_free_pgs, mp->mp_pgno); + mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno); pgno = np->mp_pgno; memcpy(np, mp, mc->mc_txn->mt_env->me_psize); mp = np; @@ -1313,6 +1313,9 @@ mdb_txn_reset0(MDB_txn *txn) MDB_page *dp; unsigned int i; + if (mdb_midl_shrink(&txn->mt_free_pgs)) + env->me_free_pgs = txn->mt_free_pgs; + /* return all dirty pages to dpage list */ for (i=1; i<=txn->mt_u.dirty_list[0].mid; i++) { dp = txn->mt_u.dirty_list[i].mptr; @@ -1466,6 +1469,8 @@ mdb_txn_commit(MDB_txn *txn) return rc; } } while (i != txn->mt_free_pgs[0]); + if (mdb_midl_shrink(&txn->mt_free_pgs)) + env->me_free_pgs = txn->mt_free_pgs; } /* should only be one record now */ if (env->me_pghead) { @@ -1867,6 +1872,11 @@ mdb_env_create(MDB_env **env) if (!e) return ENOMEM; + e->me_free_pgs = mdb_midl_alloc(); + if (!e->me_free_pgs) { + free(e); + return ENOMEM; + } e->me_maxreaders = DEFAULT_READERS; e->me_maxdbs = 2; e->me_fd = INVALID_HANDLE_VALUE; @@ -2490,6 +2500,7 @@ mdb_env_close(MDB_env *env) munmap(env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo)); } close(env->me_lfd); + mdb_midl_free(env->me_free_pgs); free(env); } @@ -3848,45 +3859,10 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags) if (leaf->mn_flags & F_SUBDATA) { /* add all the child DB's pages to the free list */ - rc = mdb_page_search(&mc->mc_xcursor->mx_cursor, NULL, 0); + rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0); if (rc == MDB_SUCCESS) { - MDB_node *ni; - MDB_cursor *mx; - unsigned int i; - - mx = &mc->mc_xcursor->mx_cursor; mc->mc_db->md_entries -= - mx->mc_db->md_entries; - - mdb_cursor_pop(mx); - while (mx->mc_snum > 0) { - for (i=0; imc_pg[mx->mc_top]); i++) { - pgno_t pg; - ni = NODEPTR(mx->mc_pg[mx->mc_top], i); - pg = NODEPGNO(ni); - /* free it */ - mdb_midl_append(mc->mc_txn->mt_free_pgs, pg); - } - if (!mx->mc_top) - break; - rc = mdb_cursor_sibling(mx, 1); - if (rc) { - /* no more siblings, go back to beginning - * of previous level. (stack was already popped - * by mdb_cursor_sibling) - */ - for (i=1; imc_top; i++) { - pgno_t pg; - ni = NODEPTR(mx->mc_pg[i-1],0); - pg = NODEPGNO(ni); - if ((rc = mdb_page_get(mc->mc_txn, pg, &mx->mc_pg[i]))) - break; - } - } - } - /* free it */ - mdb_midl_append(mc->mc_txn->mt_free_pgs, - mx->mc_db->md_root); + mc->mc_xcursor->mx_db.md_entries; } } } @@ -4584,7 +4560,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) return rc; } - mdb_midl_append(csrc->mc_txn->mt_free_pgs, csrc->mc_pg[csrc->mc_top]->mp_pgno); + mdb_midl_append(&csrc->mc_txn->mt_free_pgs, csrc->mc_pg[csrc->mc_top]->mp_pgno); if (IS_LEAF(csrc->mc_pg[csrc->mc_top])) csrc->mc_db->md_leaf_pages--; else @@ -4646,11 +4622,11 @@ mdb_rebalance(MDB_cursor *mc) mc->mc_db->md_root = P_INVALID; mc->mc_db->md_depth = 0; mc->mc_db->md_leaf_pages = 0; - mdb_midl_append(mc->mc_txn->mt_free_pgs, mc->mc_pg[mc->mc_top]->mp_pgno); + mdb_midl_append(&mc->mc_txn->mt_free_pgs, mc->mc_pg[mc->mc_top]->mp_pgno); mc->mc_snum = 0; } else if (IS_BRANCH(mc->mc_pg[mc->mc_top]) && NUMKEYS(mc->mc_pg[mc->mc_top]) == 1) { DPUTS("collapsing root page!"); - mdb_midl_append(mc->mc_txn->mt_free_pgs, mc->mc_pg[mc->mc_top]->mp_pgno); + mdb_midl_append(&mc->mc_txn->mt_free_pgs, mc->mc_pg[mc->mc_top]->mp_pgno); mc->mc_db->md_root = NODEPGNO(NODEPTR(mc->mc_pg[mc->mc_top], 0)); if ((rc = mdb_page_get(mc->mc_txn, mc->mc_db->md_root, &mc->mc_pg[mc->mc_top]))) @@ -4734,7 +4710,7 @@ mdb_cursor_del0(MDB_cursor *mc, MDB_node *leaf) ovpages = OVPAGES(NODEDSZ(leaf), mc->mc_txn->mt_env->me_psize); for (i=0; imc_txn->mt_free_pgs, pg); + mdb_midl_append(&mc->mc_txn->mt_free_pgs, pg); pg++; } } @@ -5289,22 +5265,90 @@ void mdb_close(MDB_env *env, MDB_dbi dbi) free(ptr); } -int mdb_drop(MDB_env *env, MDB_dbi dbi) +/** Add all the DB's pages to the free list. + * @param[in] mc Cursor on the DB to free. + * @param[in] subs non-Zero to check for sub-DBs in this DB. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_drop0(MDB_cursor *mc, int subs) { - MDB_txn *txn; int rc; - if (!env || !dbi || dbi >= env->me_numdbs) + rc = mdb_page_search(mc, NULL, 0); + if (rc == MDB_SUCCESS) { + MDB_node *ni; + MDB_cursor mx; + unsigned int i; + + /* LEAF2 pages have no nodes, cannot have sub-DBs */ + if (!subs || IS_LEAF2(mc->mc_pg[mc->mc_top])) + mdb_cursor_pop(mc); + + mdb_cursor_copy(mc, &mx); + while (mc->mc_snum > 0) { + if (IS_LEAF(mc->mc_pg[mc->mc_top])) { + for (i=0; imc_pg[mc->mc_top]); i++) { + ni = NODEPTR(mc->mc_pg[mc->mc_top], i); + if (ni->mn_flags & F_SUBDATA) { + mdb_xcursor_init1(mc, ni); + rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0); + if (rc) + return rc; + } + } + } else { + for (i=0; imc_pg[mc->mc_top]); i++) { + pgno_t pg; + ni = NODEPTR(mc->mc_pg[mc->mc_top], i); + pg = NODEPGNO(ni); + /* free it */ + mdb_midl_append(&mc->mc_txn->mt_free_pgs, pg); + } + } + if (!mc->mc_top) + break; + rc = mdb_cursor_sibling(mc, 1); + if (rc) { + /* no more siblings, go back to beginning + * of previous level. (stack was already popped + * by mdb_cursor_sibling) + */ + for (i=1; imc_top; i++) + mc->mc_pg[i] = mx.mc_pg[i]; + } + } + /* free it */ + mdb_midl_append(&mc->mc_txn->mt_free_pgs, + mc->mc_db->md_root); + } + return 0; +} + +int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del) +{ + MDB_cursor *mc; + int rc; + + if (!txn || !dbi || dbi >= txn->mt_numdbs) return EINVAL; - if (env->me_txn) - return EBUSY; - - rc = mdb_txn_begin(env, 0, &txn); + rc = mdb_cursor_open(txn, dbi, &mc); if (rc) return rc; + rc = mdb_drop0(mc, 1); + if (rc) + mdb_cursor_close(mc); + return rc; + if (del) { + rc = mdb_del(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL); + if (!rc) + mdb_close(txn->mt_env, dbi); + } + mdb_cursor_close(mc); + return rc; } int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index 04293d4f28..18d73681ea 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -531,7 +531,9 @@ void mdb_txn_reset(MDB_txn *txn); int mdb_txn_renew(MDB_txn *txn); /** Open a database in the environment. - * The database handle may be discarded by calling #mdb_close(). + * The database handle may be discarded by calling #mdb_close(). Only + * one thread should call this function; it is not mutex-protected in + * a read-only transaction. * @param[in] txn A transaction handle returned by #mdb_txn_begin() * @param[in] name The name of the database to open. If only a single * database is needed in the enviroment, this value may be NULL. @@ -600,21 +602,16 @@ int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat); */ void mdb_close(MDB_env *env, MDB_dbi dbi); - /** Delete a database and free all its pages. - * This function acquires its own write transaction to perform its - * work, so it must not be called if there is already an open write - * transaction in the environment. The database handle will also be - * closed as part of this operation. - * @param[in] env An environment handle returned by #mdb_env_create() + /** Delete a database and/or free all its pages. + * If the \b del parameter is non-zero the DB handle will be closed + * and the DB will be deleted. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() * @param[in] dbi A database handle returned by #mdb_open() - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified. - *
  • EBUSY - there is already a write transaction. - *
+ * @param[in] del non-zero to delete the DB from the environment, + * otherwise just free its pages. + * @return A non-zero error value on failure and 0 on success. */ -int mdb_drop(MDB_env *env, MDB_dbi dbi); +int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del); /** Set a custom key comparison function for a database. * The comparison function is called whenever it is necessary to compare a diff --git a/libraries/libmdb/midl.c b/libraries/libmdb/midl.c index 7a7b59c7c0..70487aa3d9 100644 --- a/libraries/libmdb/midl.c +++ b/libraries/libmdb/midl.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include "midl.h" @@ -117,11 +118,44 @@ int mdb_midl_insert( IDL ids, ID id ) } #endif -int mdb_midl_append( IDL ids, ID id ) +IDL mdb_midl_alloc() { + IDL ids = malloc((MDB_IDL_UM_MAX+1) * sizeof(ID)); + *ids++ = MDB_IDL_UM_MAX; + return ids; +} + +void mdb_midl_free(IDL ids) +{ + free(ids-1); +} + +int mdb_midl_shrink( IDL *idp ) +{ + IDL ids = *idp; + if (ids[-1] > MDB_IDL_UM_MAX) { + ids = realloc(ids, (MDB_IDL_UM_MAX+1) * sizeof(ID)); + *ids++ = MDB_IDL_UM_MAX; + *idp = ids; + return 1; + } + return 0; +} + +int mdb_midl_append( IDL *idp, ID id ) +{ + IDL ids = *idp; /* Too big? */ - if (ids[0] >= MDB_IDL_UM_MAX) - return -1; + if (ids[0] >= ids[-1]) { + IDL idn = ids-1; + /* grow it */ + idn = realloc(idn, (*idn + MDB_IDL_UM_MAX + 1) * sizeof(ID)); + if (!idn) + return -1; + *idn++ += MDB_IDL_UM_MAX; + ids = idn; + *idp = ids; + } ids[0]++; ids[ids[0]] = id; return 0; diff --git a/libraries/libmdb/midl.h b/libraries/libmdb/midl.h index 041aa03b3a..0ab89e0333 100644 --- a/libraries/libmdb/midl.h +++ b/libraries/libmdb/midl.h @@ -111,12 +111,30 @@ typedef ID *IDL; int mdb_midl_insert( IDL ids, ID id ); #endif + /** Allocate an IDL. + * Allocates memory for an IDL of a default size. + * @return IDL on success, NULL on failure. + */ +IDL mdb_midl_alloc(); + + /** Free an IDL. + * @param[in] ids The IDL to free. + */ +void mdb_midl_free(IDL ids); + + /** Shrink an IDL. + * Return the IDL to the default size if it has grown larger. + * @param[in,out] idp Address of the IDL to shrink. + * @return 0 on no change, non-zero if shrunk. + */ +int mdb_midl_shrink(IDL *idp); + /** Append an ID onto an IDL. - * @param[in,out] ids The IDL to append to. + * @param[in,out] idp Address of the IDL to append to. * @param[in] id The ID to append. * @return 0 on success, -2 if the IDL is too large. */ -int mdb_midl_append( IDL ids, ID id ); +int mdb_midl_append( IDL *idp, ID id ); /** Sort an IDL. * @param[in,out] ids The IDL to sort.