diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 9bc59f5118..fdd298346b 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -354,8 +354,10 @@ struct MDB_cursor { MDB_ppage mc_stack[CURSOR_STACK]; /* stack of parent pages */ unsigned int mc_snum; /* number of pushed pages */ MDB_dbi mc_dbi; - short mc_initialized; /* 1 if initialized */ - short mc_eof; /* 1 if end is reached */ + unsigned int mc_flags; +#define C_INITIALIZED 0x01 +#define C_EOF 0x02 +#define C_XDIRTY 0x04 struct MDB_xcursor *mc_xcursor; }; @@ -519,7 +521,7 @@ static int mdb_cursor_last(MDB_cursor *cursor, static void mdb_xcursor_init0(MDB_cursor *mc); static void mdb_xcursor_init1(MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx, MDB_page *mp, MDB_node *node); -static void mdb_xcursor_fini(MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx); +static void mdb_xcursor_fini(MDB_cursor *mc); static size_t mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data); @@ -659,13 +661,18 @@ mdb_alloc_page(MDB_txn *txn, MDB_dbi dbi, MDB_page *parent, unsigned int parent_ if (!txn->mt_env->me_pghead && dbi != FREE_DBI && txn->mt_dbs[FREE_DBI].md_root != P_INVALID) { /* See if there's anything in the free DB */ + MDB_cursor mc; MDB_pageparent mpp; MDB_node *leaf; ULONG *kptr, oldest; mpp.mp_parent = NULL; mpp.mp_pi = 0; - mdb_search_page(txn, FREE_DBI, NULL, NULL, 0, &mpp); + mc.mc_txn = txn; + mc.mc_dbi = dbi; + mc.mc_snum = 0; + mc.mc_flags = 0; + mdb_search_page(txn, FREE_DBI, NULL, &mc, 0, &mpp); leaf = NODEPTR(mpp.mp_page, 0); kptr = (ULONG *)NODEKEY(leaf); @@ -683,6 +690,7 @@ mdb_alloc_page(MDB_txn *txn, MDB_dbi dbi, MDB_page *parent, unsigned int parent_ /* It's usable, grab it. */ MDB_oldpages *mop; + MDB_ppage *top; MDB_val data; pgno_t *idl; @@ -705,11 +713,10 @@ mdb_alloc_page(MDB_txn *txn, MDB_dbi dbi, MDB_page *parent, unsigned int parent_ } #endif /* drop this IDL from the DB */ - mpp.mp_parent = NULL; - mpp.mp_pi = 0; - mdb_search_page(txn, FREE_DBI, NULL, NULL, 1, &mpp); - leaf = NODEPTR(mpp.mp_page, 0); - mdb_del0(txn, FREE_DBI, 0, &mpp, leaf); + top = CURSOR_TOP(&mc); + top->mp_ki = 0; + mc.mc_flags = C_INITIALIZED; + mdb_cursor_del(&mc, 0); } } if (txn->mt_env->me_pghead) { @@ -2268,7 +2275,7 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi, mc.mc_txn = txn; mc.mc_dbi = dbi; - mc.mc_initialized = 0; + mc.mc_flags = 0; if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { mc.mc_xcursor = &mx; mdb_xcursor_init0(&mc); @@ -2333,11 +2340,11 @@ mdb_cursor_next(MDB_cursor *cursor, MDB_val *key, MDB_val *data, MDB_cursor_op o MDB_node *leaf; int rc; - if (cursor->mc_eof) { + if (cursor->mc_flags & C_EOF) { return MDB_NOTFOUND; } - assert(cursor->mc_initialized); + assert(cursor->mc_flags & C_INITIALIZED); top = CURSOR_TOP(cursor); mp = top->mp_page; @@ -2351,7 +2358,7 @@ mdb_cursor_next(MDB_cursor *cursor, MDB_val *key, MDB_val *data, MDB_cursor_op o return rc; } } else { - cursor->mc_xcursor->mx_cursor.mc_initialized = 0; + cursor->mc_xcursor->mx_cursor.mc_flags = 0; if (op == MDB_NEXT_DUP) return MDB_NOTFOUND; } @@ -2362,7 +2369,7 @@ mdb_cursor_next(MDB_cursor *cursor, MDB_val *key, MDB_val *data, MDB_cursor_op o if (top->mp_ki + 1 >= NUMKEYS(mp)) { DPUTS("=====> move to next sibling page"); if (mdb_sibling(cursor, 1) != MDB_SUCCESS) { - cursor->mc_eof = 1; + cursor->mc_flags |= C_EOF; return MDB_NOTFOUND; } top = CURSOR_TOP(cursor); @@ -2409,7 +2416,7 @@ mdb_cursor_prev(MDB_cursor *cursor, MDB_val *key, MDB_val *data, MDB_cursor_op o MDB_node *leaf; int rc; - assert(cursor->mc_initialized); + assert(cursor->mc_flags & C_INITIALIZED); top = CURSOR_TOP(cursor); mp = top->mp_page; @@ -2422,7 +2429,7 @@ mdb_cursor_prev(MDB_cursor *cursor, MDB_val *key, MDB_val *data, MDB_cursor_op o if (op != MDB_PREV || rc == MDB_SUCCESS) return rc; } else { - cursor->mc_xcursor->mx_cursor.mc_initialized = 0; + cursor->mc_xcursor->mx_cursor.mc_flags = 0; if (op == MDB_PREV_DUP) return MDB_NOTFOUND; } @@ -2434,7 +2441,7 @@ mdb_cursor_prev(MDB_cursor *cursor, MDB_val *key, MDB_val *data, MDB_cursor_op o if (top->mp_ki == 0) { DPUTS("=====> move to prev sibling page"); if (mdb_sibling(cursor, 0) != MDB_SUCCESS) { - cursor->mc_initialized = 0; + cursor->mc_flags &= ~C_INITIALIZED; return MDB_NOTFOUND; } top = CURSOR_TOP(cursor); @@ -2444,7 +2451,7 @@ mdb_cursor_prev(MDB_cursor *cursor, MDB_val *key, MDB_val *data, MDB_cursor_op o } else top->mp_ki--; - cursor->mc_eof = 0; + cursor->mc_flags &= ~C_EOF; DPRINTF("==> cursor points to page %lu with %u keys, key index %u", mp->mp_pgno, NUMKEYS(mp), top->mp_ki); @@ -2491,7 +2498,7 @@ mdb_cursor_set(MDB_cursor *cursor, MDB_val *key, MDB_val *data, assert(key->mv_size > 0); /* See if we're already on the right page */ - if (cursor->mc_initialized) { + if (cursor->mc_flags & C_INITIALIZED) { MDB_val nodekey; top = CURSOR_TOP(cursor); /* Don't try this for LEAF2 pages. Maybe support that later. */ @@ -2539,8 +2546,8 @@ set2: leaf = NODEPTR(mpp.mp_page, 0); } - cursor->mc_initialized = 1; - cursor->mc_eof = 0; + cursor->mc_flags |= C_INITIALIZED; + cursor->mc_flags &= ~C_EOF; if (IS_LEAF2(mpp.mp_page)) { key->mv_size = cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_pad; @@ -2606,8 +2613,8 @@ mdb_cursor_first(MDB_cursor *cursor, MDB_val *key, MDB_val *data) assert(IS_LEAF(mpp.mp_page)); leaf = NODEPTR(mpp.mp_page, 0); - cursor->mc_initialized = 1; - cursor->mc_eof = 0; + cursor->mc_flags |= C_INITIALIZED; + cursor->mc_flags &= ~C_EOF; if (IS_LEAF2(mpp.mp_page)) { key->mv_size = cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_pad; @@ -2623,7 +2630,7 @@ mdb_cursor_first(MDB_cursor *cursor, MDB_val *key, MDB_val *data) return rc; } else { if (cursor->mc_xcursor) - cursor->mc_xcursor->mx_cursor.mc_initialized = 0; + cursor->mc_xcursor->mx_cursor.mc_flags = 0; if ((rc = mdb_read_data(cursor->mc_txn, leaf, data)) != MDB_SUCCESS) return rc; } @@ -2652,8 +2659,8 @@ mdb_cursor_last(MDB_cursor *cursor, MDB_val *key, MDB_val *data) assert(IS_LEAF(mpp.mp_page)); leaf = NODEPTR(mpp.mp_page, NUMKEYS(mpp.mp_page)-1); - cursor->mc_initialized = 1; - cursor->mc_eof = 0; + cursor->mc_flags |= C_INITIALIZED; + cursor->mc_flags &= ~C_EOF; top = CURSOR_TOP(cursor); top->mp_ki = NUMKEYS(top->mp_page) - 1; @@ -2709,12 +2716,13 @@ mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data, case MDB_GET_MULTIPLE: if (data == NULL || !(cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPFIXED) || - !cursor->mc_initialized) { + !(cursor->mc_flags & C_INITIALIZED)) { rc = EINVAL; break; } rc = MDB_SUCCESS; - if (!cursor->mc_xcursor->mx_cursor.mc_initialized || cursor->mc_xcursor->mx_cursor.mc_eof) + if (!(cursor->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) || + (cursor->mc_xcursor->mx_cursor.mc_flags & C_EOF)) break; goto fetchm; case MDB_NEXT_MULTIPLE: @@ -2723,12 +2731,12 @@ mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data, rc = EINVAL; break; } - if (!cursor->mc_initialized) + if (!(cursor->mc_flags & C_INITIALIZED)) rc = mdb_cursor_first(cursor, key, data); else rc = mdb_cursor_next(cursor, key, data, MDB_NEXT_DUP); if (rc == MDB_SUCCESS) { - if (cursor->mc_xcursor->mx_cursor.mc_initialized) { + if (cursor->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { MDB_ppage *top; fetchm: top = CURSOR_TOP(&cursor->mc_xcursor->mx_cursor); @@ -2744,7 +2752,7 @@ fetchm: case MDB_NEXT: case MDB_NEXT_DUP: case MDB_NEXT_NODUP: - if (!cursor->mc_initialized) + if (!(cursor->mc_flags & C_INITIALIZED)) rc = mdb_cursor_first(cursor, key, data); else rc = mdb_cursor_next(cursor, key, data, op); @@ -2752,7 +2760,7 @@ fetchm: case MDB_PREV: case MDB_PREV_DUP: case MDB_PREV_NODUP: - if (!cursor->mc_initialized || cursor->mc_eof) + if (!(cursor->mc_flags & C_INITIALIZED) || (cursor->mc_flags & C_EOF)) rc = mdb_cursor_last(cursor, key, data); else rc = mdb_cursor_prev(cursor, key, data, op); @@ -2763,8 +2771,8 @@ fetchm: case MDB_FIRST_DUP: if (data == NULL || !(cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) || - !cursor->mc_initialized || - !cursor->mc_xcursor->mx_cursor.mc_initialized) { + !(cursor->mc_flags & C_INITIALIZED) || + !(cursor->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { rc = EINVAL; break; } @@ -2776,8 +2784,8 @@ fetchm: case MDB_LAST_DUP: if (data == NULL || !(cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) || - !cursor->mc_initialized || - !cursor->mc_xcursor->mx_cursor.mc_initialized) { + !(cursor->mc_flags & C_INITIALIZED) || + !(cursor->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { rc = EINVAL; break; } @@ -2811,8 +2819,6 @@ mdb_cursor_touch(MDB_cursor *cursor) mpp.mp_parent = mpp.mp_page; mpp.mp_pi = cursor->mc_stack[i].mp_ki; } - if (cursor->mc_xcursor) - return mdb_cursor_touch(&cursor->mc_xcursor->mx_cursor); return MDB_SUCCESS; } @@ -2824,6 +2830,9 @@ mdb_cursor_put(MDB_cursor *cursor, MDB_val *key, MDB_val *data, MDB_node *leaf; int rc, exact; + if (F_ISSET(cursor->mc_txn->mt_flags, MDB_TXN_RDONLY)) + return EACCES; + if (flags == MDB_NODUPDATA) { rc = mdb_cursor_set(cursor, key, data, MDB_GET_BOTH, &exact); if (rc == 0) @@ -2836,15 +2845,98 @@ mdb_cursor_put(MDB_cursor *cursor, MDB_val *key, MDB_val *data, return MDB_KEYEXIST; if (rc != MDB_NOTFOUND) return rc; - } else if (!cursor->mc_initialized) { + } else if (!(cursor->mc_flags & C_INITIALIZED)) { return EINVAL; } /* Cursor is positioned, now make sure all pages are writable */ rc = mdb_cursor_touch(cursor); if (rc) return rc; + if (cursor->mc_xcursor) { + rc = mdb_cursor_touch(&cursor->mc_xcursor->mx_cursor); + if (rc) return rc; + cursor->mc_flags |= C_XDIRTY; + } top = CURSOR_TOP(cursor); + return rc; +} + +int +mdb_cursor_del(MDB_cursor *mc, unsigned int flags) +{ + MDB_pageparent mpp; + MDB_ppage *top; + MDB_node *leaf; + int rc; + + if (F_ISSET(mc->mc_txn->mt_flags, MDB_TXN_RDONLY)) + return EACCES; + + if (!mc->mc_flags & C_INITIALIZED) + return EINVAL; + + rc = mdb_cursor_touch(mc); + if (rc) return rc; + if (mc->mc_xcursor) { + rc = mdb_cursor_touch(&mc->mc_xcursor->mx_cursor); + if (rc) return rc; + } + + top = CURSOR_TOP(mc); + leaf = NODEPTR(top->mp_page, top->mp_ki); + + if (!IS_LEAF2(top->mp_page) && F_ISSET(leaf->mn_flags, F_DUPDATA)) { + MDB_pageparent mp2; + + if (flags != MDB_NODUPDATA) { + rc = mdb_cursor_del(&mc->mc_xcursor->mx_cursor, 0); + mdb_xcursor_fini(mc); + /* If sub-DB still has entries, we're done */ + if (mc->mc_xcursor->mx_txn.mt_dbs[mc->mc_xcursor->mx_cursor.mc_dbi].md_root + != P_INVALID) { + memcpy(NODEDATA(leaf), + &mc->mc_xcursor->mx_txn.mt_dbs[mc->mc_xcursor->mx_cursor.mc_dbi], + sizeof(MDB_db)); + mc->mc_txn->mt_dbs[mc->mc_dbi].md_entries--; + return rc; + } + /* otherwise fall thru and delete the sub-DB */ + } else { + /* add all the child DB's pages to the free list */ + rc = mdb_search_page(&mx.mx_txn, mx.mx_cursor.mc_dbi, + NULL, &mx.mx_cursor, 0, &mp2); + if (rc == MDB_SUCCESS) { + MDB_ppage *top, *parent; + MDB_node *ni; + unsigned int i; + + cursor_pop_page(&mx.mx_cursor); + if (mx.mx_cursor.mc_snum) { + top = CURSOR_TOP(&mx.mx_cursor); + while (mx.mx_cursor.mc_snum > 1) { + parent = CURSOR_PARENT(&mx.mx_cursor); + for (i=0; imp_page); i++) { + ni = NODEPTR(top->mp_page, i); + mdb_midl_insert(txn->mt_free_pgs, NODEPGNO(ni)); + } + parent->mp_ki++; + if (parent->mp_ki >= NUMKEYS(parent->mp_page)) { + cursor_pop_page(&mx.mx_cursor); + top = parent; + } else { + ni = NODEPTR(parent->mp_page, parent->mp_ki); + rc = mdb_get_page(&mx.mx_txn, NODEPGNO(ni), &top->mp_page); + } + } + } + mdb_midl_insert(txn->mt_free_pgs, mx.mx_txn.mt_dbs[mx.mx_cursor.mc_dbi].md_root); + } + } + } + + return mdb_del0(txn, dbi, ki, &mpp, leaf); + return rc; } /* Allocate a page and initialize it @@ -3109,22 +3201,22 @@ mdb_xcursor_init1(MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx, MDB_page *mp, MDB_ mx->mx_dbxs[dbn].md_name.mv_size = node->mn_ksize; mx->mx_txn.mt_next_pgno = txn->mt_next_pgno; mx->mx_txn.mt_u = txn->mt_u; - mx->mx_cursor.mc_initialized = 0; - mx->mx_cursor.mc_eof = 0; + mx->mx_cursor.mc_flags = 0; } static void -mdb_xcursor_fini(MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx) +mdb_xcursor_fini(MDB_cursor *mc) { - txn->mt_next_pgno = mx->mx_txn.mt_next_pgno; - txn->mt_u = mx->mx_txn.mt_u; - txn->mt_dbs[0] = mx->mx_dbs[0]; - txn->mt_dbs[1] = mx->mx_dbs[1]; - txn->mt_dbxs[0].md_dirty = mx->mx_dbxs[0].md_dirty; - txn->mt_dbxs[1].md_dirty = mx->mx_dbxs[1].md_dirty; - if (dbi > 1) { - txn->mt_dbs[dbi] = mx->mx_dbs[2]; - txn->mt_dbxs[dbi].md_dirty = mx->mx_dbxs[2].md_dirty; + MDB_xcursor *mx = mc->mc_xcursor; + mc->mc_txn->mt_next_pgno = mx->mx_txn.mt_next_pgno; + mc->mc_txn->mt_u = mx->mx_txn.mt_u; + mc->mc_txn->mt_dbs[0] = mx->mx_dbs[0]; + mc->mc_txn->mt_dbs[1] = mx->mx_dbs[1]; + mc->mc_txn->mt_dbxs[0].md_dirty = mx->mx_dbxs[0].md_dirty; + mc->mc_txn->mt_dbxs[1].md_dirty = mx->mx_dbxs[1].md_dirty; + if (mc->mc_dbi > 1) { + mc->mc_txn->mt_dbs[mc->mc_dbi] = mx->mx_dbs[2]; + mc->mc_txn->mt_dbxs[mc->mc_dbi].md_dirty = mx->mx_dbxs[2].md_dirty; } } @@ -3175,7 +3267,7 @@ mdb_cursor_count(MDB_cursor *mc, unsigned long *countp) if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { *countp = 1; } else { - if (!mc->mc_xcursor->mx_cursor.mc_initialized) + if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) return EINVAL; *countp = mc->mc_xcursor->mx_txn.mt_dbs[mc->mc_xcursor->mx_cursor.mc_dbi].md_entries; @@ -3563,13 +3655,13 @@ mdb_del(MDB_txn *txn, MDB_dbi dbi, mc.mc_txn = txn; mc.mc_dbi = dbi; - mc.mc_initialized = 0; + mc.mc_flags = 0; mc.mc_xcursor = &mx; mdb_xcursor_init0(&mc); mdb_xcursor_init1(txn, dbi, &mx, mpp.mp_page, leaf); if (data) { rc = mdb_del(&mx.mx_txn, mx.mx_cursor.mc_dbi, data, NULL); - mdb_xcursor_fini(txn, dbi, &mx); + mdb_xcursor_fini(&mc); /* If sub-DB still has entries, we're done */ if (mx.mx_txn.mt_dbs[mx.mx_cursor.mc_dbi].md_root != P_INVALID) { memcpy(NODEDATA(leaf), &mx.mx_txn.mt_dbs[mx.mx_cursor.mc_dbi], @@ -3993,7 +4085,7 @@ new_sub: put_sub: mc.mc_txn = txn; mc.mc_dbi = dbi; - mc.mc_initialized = 0; + mc.mc_flags = 0; mc.mc_xcursor = &mx; mdb_xcursor_init0(&mc); mdb_xcursor_init1(txn, dbi, &mx, mpp.mp_page, leaf); @@ -4008,7 +4100,7 @@ put_sub: leaf->mn_flags |= F_DUPDATA; } rc = mdb_put0(&mx.mx_txn, mx.mx_cursor.mc_dbi, data, &xdata, flags); - mdb_xcursor_fini(txn, dbi, &mx); + mdb_xcursor_fini(&mc); memcpy(NODEDATA(leaf), &mx.mx_txn.mt_dbs[mx.mx_cursor.mc_dbi], sizeof(MDB_db)); } @@ -4047,8 +4139,7 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, mc.mc_txn = txn; mc.mc_dbi = dbi; mc.mc_snum = 0; - mc.mc_initialized = 0; - mc.mc_eof = 0; + mc.mc_flags = 0; if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { mc.mc_xcursor = &mx; mdb_xcursor_init0(&mc); @@ -4059,7 +4150,7 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, if (mc.mc_xcursor) { MDB_ppage *top; MDB_node *leaf; - mdb_xcursor_fini(txn, dbi, &mx); + mdb_xcursor_fini(&mc); top = CURSOR_TOP(&mc); leaf = NODEPTR(top->mp_page, top->mp_ki); memcpy(NODEDATA(leaf), &mx.mx_txn.mt_dbs[mx.mx_cursor.mc_dbi],