mirror of
https://git.openldap.org/openldap/openldap.git
synced 2025-01-06 10:46:21 +08:00
Freelist cleanup/streamlining
Drop unneeded definitions, redundant code.
This commit is contained in:
parent
65c053a6e7
commit
1b8bfc5756
@ -911,18 +911,6 @@ typedef struct MDB_xcursor {
|
||||
unsigned char mx_dbflag;
|
||||
} MDB_xcursor;
|
||||
|
||||
/** A set of pages freed by an earlier transaction. */
|
||||
typedef struct MDB_oldpages {
|
||||
/** Usually we only read one record from the FREEDB at a time, but
|
||||
* in case we read more, this will chain them together.
|
||||
*/
|
||||
struct MDB_oldpages *mo_next;
|
||||
/** The ID of the transaction in which these pages were freed. */
|
||||
txnid_t mo_txnid;
|
||||
/** An #MDB_IDL of the pages */
|
||||
pgno_t mo_pages[1]; /* dynamic */
|
||||
} MDB_oldpages;
|
||||
|
||||
/** The database environment. */
|
||||
struct MDB_env {
|
||||
HANDLE me_fd; /**< The main data file */
|
||||
@ -949,12 +937,10 @@ struct MDB_env {
|
||||
size_t me_mapsize; /**< size of the data memory map */
|
||||
off_t me_size; /**< current file size */
|
||||
pgno_t me_maxpg; /**< me_mapsize / me_psize */
|
||||
txnid_t me_pgfirst; /**< ID of first old page record we used */
|
||||
txnid_t me_pglast; /**< ID of last old page record we used */
|
||||
MDB_dbx *me_dbxs; /**< array of static DB info */
|
||||
uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */
|
||||
MDB_oldpages *me_pghead; /**< list of old page records */
|
||||
MDB_oldpages *me_pgfree; /**< list of page records to free */
|
||||
pgno_t *me_pghead; /**< old pages reclaimed from freelist */
|
||||
pthread_key_t me_txkey; /**< thread-key for readers */
|
||||
MDB_page *me_dpages; /**< list of malloc'd blocks for re-use */
|
||||
/** IDL of pages that became unused in a write txn */
|
||||
@ -1287,7 +1273,6 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
||||
* after txn 3 commits, and so will be safe to re-use in txn 4.
|
||||
*/
|
||||
if (txn->mt_txnid > 3) {
|
||||
|
||||
if (!txn->mt_env->me_pghead &&
|
||||
txn->mt_dbs[FREE_DBI].md_root != P_INVALID) {
|
||||
/* See if there's anything in the free DB */
|
||||
@ -1298,7 +1283,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
||||
txnid_t *kptr;
|
||||
|
||||
mdb_cursor_init(&m2, txn, FREE_DBI, NULL);
|
||||
if (!txn->mt_env->me_pgfirst) {
|
||||
if (!txn->mt_env->me_pglast) {
|
||||
mdb_page_search(&m2, NULL, 0);
|
||||
leaf = NODEPTR(m2.mc_pg[m2.mc_top], 0);
|
||||
kptr = (txnid_t *)NODEKEY(leaf);
|
||||
@ -1335,10 +1320,9 @@ again:
|
||||
if (oldest > last) {
|
||||
/* It's usable, grab it.
|
||||
*/
|
||||
MDB_oldpages *mop;
|
||||
pgno_t *idl;
|
||||
pgno_t *idl, *mop;
|
||||
|
||||
if (!txn->mt_env->me_pgfirst) {
|
||||
if (!txn->mt_env->me_pglast) {
|
||||
mdb_node_read(txn, leaf, &data);
|
||||
}
|
||||
idl = (MDB_ID *) data.mv_data;
|
||||
@ -1347,26 +1331,20 @@ again:
|
||||
*/
|
||||
if (!idl[0]) {
|
||||
txn->mt_env->me_pglast = last;
|
||||
if (!txn->mt_env->me_pgfirst)
|
||||
txn->mt_env->me_pgfirst = last;
|
||||
goto again;
|
||||
}
|
||||
mop = malloc(sizeof(MDB_oldpages) + MDB_IDL_SIZEOF(idl) - sizeof(pgno_t));
|
||||
mop = malloc(MDB_IDL_SIZEOF(idl));
|
||||
if (!mop)
|
||||
return ENOMEM;
|
||||
mop->mo_next = txn->mt_env->me_pghead;
|
||||
mop->mo_txnid = last;
|
||||
txn->mt_env->me_pglast = last;
|
||||
if (!txn->mt_env->me_pgfirst)
|
||||
txn->mt_env->me_pgfirst = last;
|
||||
txn->mt_env->me_pghead = mop;
|
||||
memcpy(mop->mo_pages, idl, MDB_IDL_SIZEOF(idl));
|
||||
memcpy(mop, idl, MDB_IDL_SIZEOF(idl));
|
||||
|
||||
#if MDB_DEBUG > 1
|
||||
{
|
||||
unsigned int i;
|
||||
DPRINTF("IDL read txn %zu root %zu num %zu",
|
||||
mop->mo_txnid, txn->mt_dbs[FREE_DBI].md_root, idl[0]);
|
||||
last, txn->mt_dbs[FREE_DBI].md_root, idl[0]);
|
||||
for (i=0; i<idl[0]; i++) {
|
||||
DPRINTF("IDL %zu", idl[i+1]);
|
||||
}
|
||||
@ -1376,14 +1354,14 @@ again:
|
||||
}
|
||||
none:
|
||||
if (txn->mt_env->me_pghead) {
|
||||
MDB_oldpages *mop = txn->mt_env->me_pghead;
|
||||
pgno_t *mop = txn->mt_env->me_pghead;
|
||||
if (num > 1) {
|
||||
MDB_cursor m2;
|
||||
int retry = 500, readit = 0, n2 = num-1;
|
||||
unsigned int i, j, k;
|
||||
|
||||
/* If current list is too short, must fetch more and coalesce */
|
||||
if (mop->mo_pages[0] < (unsigned)num)
|
||||
if (mop[0] < (unsigned)num)
|
||||
readit = 1;
|
||||
|
||||
mdb_cursor_init(&m2, txn, FREE_DBI, NULL);
|
||||
@ -1398,11 +1376,10 @@ none:
|
||||
}
|
||||
if (readit) {
|
||||
MDB_val key, data;
|
||||
MDB_oldpages *mop2;
|
||||
pgno_t *idl;
|
||||
pgno_t *idl, *mop2;
|
||||
int exact;
|
||||
|
||||
last = mop->mo_txnid + 1;
|
||||
last = txn->mt_env->me_pglast + 1;
|
||||
|
||||
/* We haven't hit the readers list yet? */
|
||||
if (!oldest) {
|
||||
@ -1432,39 +1409,37 @@ none:
|
||||
if (rc)
|
||||
return rc;
|
||||
idl = (MDB_ID *) data.mv_data;
|
||||
mop2 = malloc(sizeof(MDB_oldpages) + MDB_IDL_SIZEOF(idl) - 2*sizeof(pgno_t) + MDB_IDL_SIZEOF(mop->mo_pages));
|
||||
mop2 = malloc(MDB_IDL_SIZEOF(idl) + MDB_IDL_SIZEOF(mop));
|
||||
if (!mop2)
|
||||
return ENOMEM;
|
||||
/* merge in sorted order */
|
||||
i = idl[0]; j = mop->mo_pages[0]; mop2->mo_pages[0] = k = i+j;
|
||||
mop->mo_pages[0] = P_INVALID;
|
||||
i = idl[0]; j = mop[0]; mop2[0] = k = i+j;
|
||||
mop[0] = P_INVALID;
|
||||
while (i>0 || j>0) {
|
||||
if (i && idl[i] < mop->mo_pages[j])
|
||||
mop2->mo_pages[k--] = idl[i--];
|
||||
if (i && idl[i] < mop[j])
|
||||
mop2[k--] = idl[i--];
|
||||
else
|
||||
mop2->mo_pages[k--] = mop->mo_pages[j--];
|
||||
mop2[k--] = mop[j--];
|
||||
}
|
||||
txn->mt_env->me_pglast = last;
|
||||
mop2->mo_txnid = last;
|
||||
mop2->mo_next = mop->mo_next;
|
||||
txn->mt_env->me_pghead = mop2;
|
||||
free(mop);
|
||||
mop = mop2;
|
||||
/* Keep trying to read until we have enough */
|
||||
if (mop->mo_pages[0] < (unsigned)num) {
|
||||
if (mop[0] < (unsigned)num) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* current list has enough pages, but are they contiguous? */
|
||||
for (i=mop->mo_pages[0]; i>=(unsigned)num; i--) {
|
||||
if (mop->mo_pages[i-n2] == mop->mo_pages[i] + n2) {
|
||||
pgno = mop->mo_pages[i];
|
||||
for (i=mop[0]; i>=(unsigned)num; i--) {
|
||||
if (mop[i-n2] == mop[i] + n2) {
|
||||
pgno = mop[i];
|
||||
i -= n2;
|
||||
/* move any stragglers down */
|
||||
for (j=i+num; j<=mop->mo_pages[0]; j++)
|
||||
mop->mo_pages[i++] = mop->mo_pages[j];
|
||||
mop->mo_pages[0] -= num;
|
||||
for (j=i+num; j<=mop[0]; j++)
|
||||
mop[i++] = mop[j];
|
||||
mop[0] -= num;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1478,17 +1453,12 @@ none:
|
||||
} while (1);
|
||||
} else {
|
||||
/* peel pages off tail, so we only have to truncate the list */
|
||||
pgno = MDB_IDL_LAST(mop->mo_pages);
|
||||
mop->mo_pages[0]--;
|
||||
pgno = MDB_IDL_LAST(mop);
|
||||
mop[0]--;
|
||||
}
|
||||
if (MDB_IDL_IS_ZERO(mop->mo_pages)) {
|
||||
txn->mt_env->me_pghead = mop->mo_next;
|
||||
if (mc->mc_dbi == FREE_DBI) {
|
||||
mop->mo_next = txn->mt_env->me_pgfree;
|
||||
txn->mt_env->me_pgfree = mop;
|
||||
} else {
|
||||
free(mop);
|
||||
}
|
||||
if (MDB_IDL_IS_ZERO(mop)) {
|
||||
txn->mt_env->me_pghead = NULL;
|
||||
free(mop);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1961,7 +1931,7 @@ mdb_txn_reset0(MDB_txn *txn)
|
||||
if (!(env->me_flags & MDB_ROFS))
|
||||
txn->mt_u.reader->mr_txnid = (txnid_t)-1;
|
||||
} else {
|
||||
MDB_oldpages *mop;
|
||||
pgno_t *mop;
|
||||
MDB_page *dp;
|
||||
unsigned int i;
|
||||
|
||||
@ -2001,11 +1971,10 @@ mdb_txn_reset0(MDB_txn *txn)
|
||||
env->me_free_pgs = txn->mt_free_pgs;
|
||||
}
|
||||
|
||||
while ((mop = txn->mt_env->me_pghead)) {
|
||||
txn->mt_env->me_pghead = mop->mo_next;
|
||||
if ((mop = txn->mt_env->me_pghead) != NULL) {
|
||||
txn->mt_env->me_pghead = NULL;
|
||||
free(mop);
|
||||
}
|
||||
txn->mt_env->me_pgfirst = 0;
|
||||
txn->mt_env->me_pglast = 0;
|
||||
|
||||
env->me_txn = NULL;
|
||||
@ -2054,6 +2023,7 @@ mdb_txn_commit(MDB_txn *txn)
|
||||
MDB_page *dp;
|
||||
MDB_env *env;
|
||||
pgno_t next, freecnt;
|
||||
txnid_t oldpg_txnid, id;
|
||||
MDB_cursor mc;
|
||||
|
||||
assert(txn != NULL);
|
||||
@ -2165,10 +2135,21 @@ mdb_txn_commit(MDB_txn *txn)
|
||||
}
|
||||
}
|
||||
|
||||
/* Save the freelist as of this transaction to the freeDB. This
|
||||
* can change the freelist, so keep trying until it stabilizes.
|
||||
*
|
||||
* env->me_pglast and the length of txn->mt_free_pgs cannot decrease.
|
||||
* Page numbers cannot disappear from txn->mt_free_pgs. New pages
|
||||
* can only appear in env->me_pghead when env->me_pglast increases.
|
||||
* Until then, the me_pghead pointer won't move but can become NULL.
|
||||
*/
|
||||
|
||||
mdb_cursor_init(&mc, txn, FREE_DBI, NULL);
|
||||
oldpg_txnid = id = 0;
|
||||
freecnt = 0;
|
||||
|
||||
/* should only be one record now */
|
||||
if (env->me_pghead || env->me_pgfirst) {
|
||||
if (env->me_pghead || env->me_pglast) {
|
||||
/* make sure first page of freeDB is touched and on freelist */
|
||||
rc = mdb_page_search(&mc, NULL, MDB_PS_MODIFY);
|
||||
if (rc && rc != MDB_NOTFOUND) {
|
||||
@ -2179,28 +2160,27 @@ fail:
|
||||
}
|
||||
|
||||
/* Delete IDLs we used from the free list */
|
||||
if (env->me_pgfirst) {
|
||||
txnid_t cur;
|
||||
if (env->me_pglast) {
|
||||
MDB_val key;
|
||||
int exact = 0;
|
||||
|
||||
key.mv_size = sizeof(cur);
|
||||
for (cur = env->me_pgfirst; cur <= env->me_pglast; cur++) {
|
||||
key.mv_data = &cur;
|
||||
|
||||
mdb_cursor_set(&mc, &key, NULL, MDB_SET, &exact);
|
||||
do {
|
||||
free_pgfirst:
|
||||
rc = mdb_cursor_first(&mc, &key, NULL);
|
||||
if (rc)
|
||||
goto fail;
|
||||
oldpg_txnid = *(txnid_t *)key.mv_data;
|
||||
again:
|
||||
assert(oldpg_txnid <= env->me_pglast);
|
||||
id = 0;
|
||||
rc = mdb_cursor_del(&mc, 0);
|
||||
if (rc)
|
||||
goto fail;
|
||||
}
|
||||
env->me_pgfirst = 0;
|
||||
env->me_pglast = 0;
|
||||
} while (oldpg_txnid < env->me_pglast);
|
||||
}
|
||||
|
||||
/* save to free list */
|
||||
/* Save IDL of pages freed by this txn, to freeDB */
|
||||
free2:
|
||||
freecnt = txn->mt_free_pgs[0];
|
||||
if (!MDB_IDL_IS_ZERO(txn->mt_free_pgs)) {
|
||||
if (freecnt != txn->mt_free_pgs[0]) {
|
||||
MDB_val key, data;
|
||||
|
||||
/* make sure last page of freeDB is touched and on freelist */
|
||||
@ -2225,61 +2205,50 @@ free2:
|
||||
/* write to last page of freeDB */
|
||||
key.mv_size = sizeof(pgno_t);
|
||||
key.mv_data = &txn->mt_txnid;
|
||||
data.mv_data = txn->mt_free_pgs;
|
||||
/* The free list can still grow during this call,
|
||||
* despite the pre-emptive touches above. So check
|
||||
* and make sure the entire thing got written.
|
||||
* despite the pre-emptive touches above. So retry
|
||||
* until the reserved space remains big enough.
|
||||
*/
|
||||
do {
|
||||
assert(freecnt < txn->mt_free_pgs[0]);
|
||||
freecnt = txn->mt_free_pgs[0];
|
||||
data.mv_size = MDB_IDL_SIZEOF(txn->mt_free_pgs);
|
||||
mdb_midl_sort(txn->mt_free_pgs);
|
||||
rc = mdb_cursor_put(&mc, &key, &data, 0);
|
||||
rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
|
||||
if (rc)
|
||||
goto fail;
|
||||
} while (freecnt != txn->mt_free_pgs[0]);
|
||||
mdb_midl_sort(txn->mt_free_pgs);
|
||||
memcpy(data.mv_data, txn->mt_free_pgs, data.mv_size);
|
||||
if (oldpg_txnid < env->me_pglast || (!env->me_pghead && id))
|
||||
goto free_pgfirst; /* used up freeDB[oldpg_txnid] */
|
||||
}
|
||||
/* should only be one record now */
|
||||
again:
|
||||
|
||||
/* Put back page numbers we took from freeDB but did not use */
|
||||
if (env->me_pghead) {
|
||||
MDB_val key, data;
|
||||
MDB_oldpages *mop;
|
||||
pgno_t orig;
|
||||
txnid_t id;
|
||||
pgno_t orig, *mop;
|
||||
|
||||
mop = env->me_pghead;
|
||||
id = mop->mo_txnid;
|
||||
id = env->me_pglast;
|
||||
key.mv_size = sizeof(id);
|
||||
key.mv_data = &id;
|
||||
data.mv_size = MDB_IDL_SIZEOF(mop->mo_pages);
|
||||
data.mv_data = mop->mo_pages;
|
||||
orig = mop->mo_pages[0];
|
||||
/* These steps may grow the freelist again
|
||||
* due to freed overflow pages...
|
||||
*/
|
||||
rc = mdb_cursor_put(&mc, &key, &data, 0);
|
||||
if (rc)
|
||||
goto fail;
|
||||
if (mop == env->me_pghead && env->me_pghead->mo_txnid == id) {
|
||||
/* could have been used again here */
|
||||
if (mop->mo_pages[0] != orig) {
|
||||
data.mv_size = MDB_IDL_SIZEOF(mop->mo_pages);
|
||||
data.mv_data = mop->mo_pages;
|
||||
id = mop->mo_txnid;
|
||||
rc = mdb_cursor_put(&mc, &key, &data, 0);
|
||||
if (rc)
|
||||
goto fail;
|
||||
}
|
||||
} else {
|
||||
/* was completely used up */
|
||||
rc = mdb_cursor_del(&mc, 0);
|
||||
i = 2;
|
||||
do {
|
||||
orig = mop[0];
|
||||
data.mv_size = MDB_IDL_SIZEOF(mop);
|
||||
rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
|
||||
if (rc)
|
||||
goto fail;
|
||||
if (env->me_pghead)
|
||||
goto again;
|
||||
}
|
||||
env->me_pgfirst = 0;
|
||||
env->me_pglast = 0;
|
||||
assert(!env->me_pghead || env->me_pglast);
|
||||
/* mop could have been used again here */
|
||||
if (id != env->me_pglast || env->me_pghead == NULL)
|
||||
goto again; /* was completely used up */
|
||||
assert(mop == env->me_pghead && mop[0] <= orig);
|
||||
} while (mop[0] != orig && --i);
|
||||
memcpy(data.mv_data, mop, data.mv_size);
|
||||
}
|
||||
|
||||
/* Check for growth of freelist again */
|
||||
@ -2291,12 +2260,6 @@ again:
|
||||
env->me_pghead = NULL;
|
||||
}
|
||||
|
||||
while (env->me_pgfree) {
|
||||
MDB_oldpages *mop = env->me_pgfree;
|
||||
env->me_pgfree = mop->mo_next;
|
||||
free(mop);
|
||||
}
|
||||
|
||||
if (!MDB_IDL_IS_ZERO(txn->mt_free_pgs)) {
|
||||
if (mdb_midl_shrink(&txn->mt_free_pgs))
|
||||
env->me_free_pgs = txn->mt_free_pgs;
|
||||
@ -2431,6 +2394,7 @@ sync:
|
||||
}
|
||||
|
||||
done:
|
||||
env->me_pglast = 0;
|
||||
env->me_txn = NULL;
|
||||
if (txn->mt_numdbs > env->me_numdbs) {
|
||||
/* update the DB flags */
|
||||
|
Loading…
Reference in New Issue
Block a user