diff --git a/libraries/liblmdb/Makefile b/libraries/liblmdb/Makefile
index 7c3903bdff..25c52ada8e 100644
--- a/libraries/liblmdb/Makefile
+++ b/libraries/liblmdb/Makefile
@@ -1,3 +1,34 @@
+# Makefile for liblmdb (Lightning memory-mapped database library).
+
+########################################################################
+# Configuration. The compiler options must enable threaded compilation.
+#
+# Preprocessor macros (for CPPFLAGS) of interest:
+#
+# To compile successfully if the default does not:
+# - MDB_USE_POSIX_SEM	(enabled by default on BSD, Apple)
+#	Define if shared mutexes are unsupported.  Note that Posix
+#	semaphores and shared mutexes have different behaviors and
+#	different problems, see the Caveats section in lmdb.h.
+#
+# For best performence or to compile successfully:
+# - MDB_DSYNC = "O_DSYNC" (default) or "O_SYNC" (less efficient)
+#	If O_DSYNC is undefined but exists in /usr/include,
+#	preferably set some compiler flag to get the definition.
+# - MDB_FDATASYNC = "fdatasync" or "fsync"
+#	Function for flushing the data of a file. Define this to
+#	"fsync" if fdatasync() is not supported. fdatasync is
+#	default except on BSD, Apple, Android which use fsync.
+# - MDB_USE_PWRITEV
+#	Define if the pwritev() function is supported.
+#
+# Data format:
+# - MDB_MAXKEYSIZE
+#	Controls data packing and limits, see mdb.c.
+#
+# Debugging:
+# - MDB_DEBUG, MDB_PARANOID.
+#
 CC	= gcc
 W	= -W -Wall -Wno-unused-parameter -Wbad-function-cast
 OPT = -O2 -g
@@ -6,6 +37,8 @@ LDLIBS	=
 SOLIBS	=
 prefix	= /usr/local
 
+########################################################################
+
 IHDRS	= lmdb.h
 ILIBS	= liblmdb.a liblmdb.so
 IPROGS	= mdb_stat mdb_copy
diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h
index 954ffde1cb..2076eb35fa 100644
--- a/libraries/liblmdb/lmdb.h
+++ b/libraries/liblmdb/lmdb.h
@@ -144,6 +144,16 @@ typedef	int	mdb_mode_t;
 typedef	mode_t	mdb_mode_t;
 #endif
 
+/** An abstraction for a file handle.
+ *	On POSIX systems file handles are small integers. On Windows
+ *	they're opaque pointers.
+ */
+#ifdef _WIN32
+typedef	void *mdb_filehandle_t;
+#else
+typedef int mdb_filehandle_t;
+#endif
+
 /** @defgroup mdb MDB API
  *	@{
  *	@brief OpenLDAP Lightning Memory-Mapped Database Manager
@@ -325,13 +335,11 @@ typedef enum MDB_cursor_op {
 								Only for #MDB_DUPSORT */
 	MDB_NEXT_MULTIPLE,		/**< Return all duplicate data items at the next
 								cursor position. Only for #MDB_DUPFIXED */
-	MDB_NEXT_NODUP,			/**< Position at first data item of next key.
-								Only for #MDB_DUPSORT */
+	MDB_NEXT_NODUP,			/**< Position at first data item of next key */
 	MDB_PREV,				/**< Position at previous data item */
 	MDB_PREV_DUP,			/**< Position at previous data item of current key.
 								Only for #MDB_DUPSORT */
-	MDB_PREV_NODUP,			/**< Position at last data item of previous key.
-								Only for #MDB_DUPSORT */
+	MDB_PREV_NODUP,			/**< Position at last data item of previous key */
 	MDB_SET,				/**< Position at specified key */
 	MDB_SET_KEY,			/**< Position at specified key, return key + data */
 	MDB_SET_RANGE			/**< Position at first key greater than or equal to specified key. */
@@ -535,6 +543,17 @@ int  mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t
 	 */
 int  mdb_env_copy(MDB_env *env, const char *path);
 
+	/** @brief Copy an MDB environment to the specified file descriptor.
+	 *
+	 * This function may be used to make a backup of an existing environment.
+	 * @param[in] env An environment handle returned by #mdb_env_create(). It
+	 * must have already been opened successfully.
+	 * @param[in] fd The filedescriptor to write the copy to. It must
+	 * have already been opened for Write access.
+	 * @return A non-zero error value on failure and 0 on success.
+	 */
+int  mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd);
+
 	/** @brief Return statistics about the MDB environment.
 	 *
 	 * @param[in] env An environment handle returned by #mdb_env_create()
diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c
index 8415ccb26c..84405e8963 100644
--- a/libraries/liblmdb/mdb.c
+++ b/libraries/liblmdb/mdb.c
@@ -32,7 +32,9 @@
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE 1
+#endif
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/param.h>
@@ -114,7 +116,7 @@
 #define BIG_ENDIAN	__BIG_ENDIAN
 #endif
 
-#if defined(__i386) || defined(__x86_64)
+#if defined(__i386) || defined(__x86_64) || defined(_M_IX86)
 #define MISALIGNED_OK	1
 #endif
 
@@ -158,7 +160,7 @@
 #define	MDB_MSYNC(addr,len,flags)	(!FlushViewOfFile(addr,len))
 #define	ErrCode()	GetLastError()
 #define GET_PAGESIZE(x) {SYSTEM_INFO si; GetSystemInfo(&si); (x) = si.dwPageSize;}
-#define	close(fd)	CloseHandle(fd)
+#define	close(fd)	(CloseHandle(fd) ? 0 : -1)
 #define	munmap(ptr,len)	UnmapViewOfFile(ptr)
 #else
 
@@ -346,6 +348,9 @@ static txnid_t mdb_debug_start;
 #define MDB_VERSION	 1
 
 	/**	@brief The maximum size of a key in the database.
+	 *
+	 *	The library rejects bigger keys, and cannot deal with records
+	 *	with bigger keys stored by a library with bigger max keysize.
 	 *
 	 *	We require that keys all fit onto a regular page. This limit
 	 *	could be raised a bit further if needed; to something just
@@ -926,9 +931,8 @@ typedef struct MDB_xcursor {
 
 	/** State of FreeDB old pages, stored in the MDB_env */
 typedef struct MDB_pgstate {
-	txnid_t		mf_pglast;	/**< ID of last old page record we used */
-	pgno_t		*mf_pghead;	/**< old pages reclaimed from freelist */
-	pgno_t		*mf_pgfree;	/**< memory to free when dropping me_pghead */
+	pgno_t		*mf_pghead;	/**< Reclaimed freeDB pages, or NULL before use */
+	txnid_t		mf_pglast;	/**< ID of last used record, or 0 if !mf_pghead */
 } MDB_pgstate;
 
 	/** The database environment. */
@@ -963,14 +967,13 @@ struct MDB_env {
 	MDB_pgstate	me_pgstate;		/**< state of old pages from freeDB */
 #	define		me_pglast	me_pgstate.mf_pglast
 #	define		me_pghead	me_pgstate.mf_pghead
-#	define		me_pgfree	me_pgstate.mf_pgfree
 	MDB_page	*me_dpages;		/**< list of malloc'd blocks for re-use */
 	/** IDL of pages that became unused in a write txn */
 	MDB_IDL		me_free_pgs;
 	/** ID2L of pages written during a write txn. Length MDB_IDL_UM_SIZE. */
 	MDB_ID2L	me_dirty_list;
 	/** Max number of freelist items that can fit in a single overflow page */
-	unsigned int	me_maxfree_1pg;
+	int			me_maxfree_1pg;
 	/** Max size of a node on a page */
 	unsigned int	me_nodemax;
 #ifdef _WIN32
@@ -995,11 +998,14 @@ typedef struct MDB_ntxn {
 #define MDB_COMMIT_PAGES	IOV_MAX
 #endif
 
+	/* max bytes to write in one call */
+#define MAX_WRITE		(0x80000000U >> (sizeof(ssize_t) == 4))
+
 static int  mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp);
 static int  mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp);
 static int  mdb_page_touch(MDB_cursor *mc);
 
-static int  mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **mp);
+static int  mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **mp, int *lvl);
 static int  mdb_page_search_root(MDB_cursor *mc,
 			    MDB_val *key, int modify);
 #define MDB_PS_MODIFY	1
@@ -1252,19 +1258,27 @@ mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b)
 	return txn->mt_dbxs[dbi].md_dcmp(a, b);
 }
 
-/** Allocate a single page.
- * Re-use old malloc'd pages first, otherwise just malloc.
+/** Allocate a page.
+ * Re-use old malloc'd pages first for singletons, otherwise just malloc.
  */
 static MDB_page *
-mdb_page_malloc(MDB_cursor *mc) {
-	MDB_page *ret;
-	size_t sz = mc->mc_txn->mt_env->me_psize;
-	if ((ret = mc->mc_txn->mt_env->me_dpages) != NULL) {
-		VGMEMP_ALLOC(mc->mc_txn->mt_env, ret, sz);
-		VGMEMP_DEFINED(ret, sizeof(ret->mp_next));
-		mc->mc_txn->mt_env->me_dpages = ret->mp_next;
-	} else if ((ret = malloc(sz)) != NULL) {
-		VGMEMP_ALLOC(mc->mc_txn->mt_env, ret, sz);
+mdb_page_malloc(MDB_txn *txn, unsigned num)
+{
+	MDB_env *env = txn->mt_env;
+	MDB_page *ret = env->me_dpages;
+	size_t sz = env->me_psize;
+	if (num == 1) {
+		if (ret) {
+			VGMEMP_ALLOC(env, ret, sz);
+			VGMEMP_DEFINED(ret, sizeof(ret->mp_next));
+			env->me_dpages = ret->mp_next;
+			return ret;
+		}
+	} else {
+		sz *= num;
+	}
+	if ((ret = malloc(sz)) != NULL) {
+		VGMEMP_ALLOC(env, ret, sz);
 	}
 	return ret;
 }
@@ -1281,6 +1295,50 @@ mdb_page_free(MDB_env *env, MDB_page *mp)
 	env->me_dpages = mp;
 }
 
+/* Free a dirty page */
+static void
+mdb_dpage_free(MDB_env *env, MDB_page *dp)
+{
+	if (!IS_OVERFLOW(dp) || dp->mp_pages == 1) {
+		mdb_page_free(env, dp);
+	} else {
+		/* large pages just get freed directly */
+		VGMEMP_FREE(env, dp);
+		free(dp);
+	}
+}
+
+/* Return all dirty pages to dpage list */
+static void
+mdb_dlist_free(MDB_txn *txn)
+{
+	MDB_env *env = txn->mt_env;
+	MDB_ID2L dl = txn->mt_u.dirty_list;
+	unsigned i, n = dl[0].mid;
+
+	for (i = 1; i <= n; i++) {
+		mdb_dpage_free(env, dl[i].mptr);
+	}
+	dl[0].mid = 0;
+}
+
+/** Find oldest txnid still referenced. Expects txn->mt_txnid > 0. */
+static txnid_t
+mdb_find_oldest(MDB_txn *txn)
+{
+	int i;
+	txnid_t mr, oldest = txn->mt_txnid - 1;
+	MDB_reader *r = txn->mt_env->me_txns->mti_readers;
+	for (i = txn->mt_env->me_txns->mti_numreaders; --i >= 0; ) {
+		if (r[i].mr_pid) {
+			mr = r[i].mr_txnid;
+			if (oldest > mr)
+				oldest = mr;
+		}
+	}
+	return oldest;
+}
+
 /** Allocate pages for writing.
  * If there are free pages available from older transactions, they
  * will be re-used first. Otherwise a new page will be allocated.
@@ -1294,12 +1352,28 @@ mdb_page_free(MDB_env *env, MDB_page *mp)
 static int
 mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
 {
+#ifdef MDB_PARANOID	/* Seems like we can ignore this now */
+	/* Get at most <Max_retries> more freeDB records once me_pghead
+	 * has enough pages.  If not enough, use new pages from the map.
+	 * If <Paranoid> and mc is updating the freeDB, only get new
+	 * records if me_pghead is empty. Then the freelist cannot play
+	 * catch-up with itself by growing while trying to save it.
+	 */
+	enum { Paranoid = 1, Max_retries = 500 };
+#else
+	enum { Paranoid = 0, Max_retries = INT_MAX /*infinite*/ };
+#endif
+	int rc, n2 = num-1, retry = Max_retries;
 	MDB_txn *txn = mc->mc_txn;
+	MDB_env *env = txn->mt_env;
+	pgno_t pgno, *mop = env->me_pghead;
+	unsigned i, j, k, mop_len = mop ? mop[0] : 0;
 	MDB_page *np;
-	pgno_t pgno = P_INVALID;
 	MDB_ID2 mid;
 	txnid_t oldest = 0, last;
-	int rc;
+	MDB_cursor_op op;
+	MDB_cursor m2;
+	int (*insert)(MDB_ID2L, MDB_ID2 *);
 
 	*mp = NULL;
 
@@ -1307,251 +1381,125 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
 	if (txn->mt_dirty_room == 0)
 		return MDB_TXN_FULL;
 
-	/* The free list won't have any content at all until txn 2 has
-	 * committed. The pages freed by txn 2 will be unreferenced
-	 * after txn 3 commits, and so will be safe to re-use in txn 4.
-	 */
-	if (txn->mt_txnid > 3) {
-		if (!txn->mt_env->me_pghead &&
-			txn->mt_dbs[FREE_DBI].md_root != P_INVALID) {
-			/* See if there's anything in the free DB */
-			MDB_reader *r;
-			MDB_cursor m2;
-			MDB_node *leaf;
-			MDB_val data;
-			txnid_t *kptr;
+	for (op = MDB_FIRST;; op = MDB_NEXT) {
+		MDB_val key, data;
+		MDB_node *leaf;
+		pgno_t *idl, old_id, new_id;
 
+		/* Seek a big enough contiguous page range. Prefer
+		 * pages at the tail, just truncating the list.
+		 */
+		if (mop_len >= (unsigned)num) {
+			i = mop_len;
+			do {
+				pgno = mop[i];
+				if (mop[i-n2] == pgno+n2)
+					goto search_done;
+			} while (--i >= (unsigned)num);
+			if (Max_retries < INT_MAX && --retry < 0)
+				break;
+		}
+
+		if (op == MDB_FIRST) {	/* 1st iteration */
+			/* Prepare to fetch more and coalesce */
+			oldest = mdb_find_oldest(txn);
+			last = env->me_pglast;
 			mdb_cursor_init(&m2, txn, FREE_DBI, NULL);
-			if (!txn->mt_env->me_pglast) {
-				mdb_page_search(&m2, NULL, 0);
-				leaf = NODEPTR(m2.mc_pg[m2.mc_top], 0);
-				kptr = (txnid_t *)NODEKEY(leaf);
-				last = *kptr;
-			} else {
-				MDB_val key;
-again:
-				last = txn->mt_env->me_pglast + 1;
-				leaf = NULL;
-				key.mv_data = &last;
+			if (last) {
+				op = MDB_SET_RANGE;
+				key.mv_data = &last; /* will loop up last+1 */
 				key.mv_size = sizeof(last);
-				rc = mdb_cursor_set(&m2, &key, &data, MDB_SET_RANGE, NULL);
-				if (rc)
-					goto none;
-				last = *(txnid_t *)key.mv_data;
 			}
+			if (Paranoid && mc->mc_dbi == FREE_DBI)
+				retry = -1;
+		}
+		if (Paranoid && retry < 0 && mop_len)
+			break;
 
-			{
-				unsigned int i, nr;
-				txnid_t mr;
-				oldest = txn->mt_txnid - 1;
-				nr = txn->mt_env->me_txns->mti_numreaders;
-				r = txn->mt_env->me_txns->mti_readers;
-				for (i=0; i<nr; i++) {
-					if (!r[i].mr_pid) continue;
-					mr = r[i].mr_txnid;
-					if (mr < oldest)
-						oldest = mr;
-				}
-			}
-
-			if (oldest > last) {
-				/* It's usable, grab it.
-				 */
-				pgno_t *idl, *mop;
-
-				if (!txn->mt_env->me_pglast) {
-					mdb_node_read(txn, leaf, &data);
-				}
-				idl = (MDB_ID *) data.mv_data;
-				/* We might have a zero-length IDL due to freelist growth
-				 * during a prior commit
-				 */
-				if (!idl[0]) {
-					txn->mt_env->me_pglast = last;
-					goto again;
-				}
-				mop = malloc(MDB_IDL_SIZEOF(idl));
-				if (!mop)
-					return ENOMEM;
-				txn->mt_env->me_pglast = last;
-				txn->mt_env->me_pghead = txn->mt_env->me_pgfree = mop;
-				memcpy(mop, idl, MDB_IDL_SIZEOF(idl));
+		last++;
+		/* Do not fetch more if the record will be too recent */
+		if (oldest <= last)
+			break;
+		rc = mdb_cursor_get(&m2, &key, NULL, op);
+		if (rc) {
+			if (rc == MDB_NOTFOUND)
+				break;
+			return rc;
+		}
+		last = *(txnid_t*)key.mv_data;
+		if (oldest <= last)
+			break;
+		np = m2.mc_pg[m2.mc_top];
+		leaf = NODEPTR(np, m2.mc_ki[m2.mc_top]);
+		if ((rc = mdb_node_read(txn, leaf, &data)) != MDB_SUCCESS)
+			return rc;
 
+		idl = (MDB_ID *) data.mv_data;
+		i = idl[0];
+		if (!mop) {
+			if (!(env->me_pghead = mop = mdb_midl_alloc(i)))
+				return ENOMEM;
+		} else {
+			if ((rc = mdb_midl_need(&env->me_pghead, i)) != 0)
+				return rc;
+			mop = env->me_pghead;
+		}
+		env->me_pglast = last;
 #if MDB_DEBUG > 1
-				{
-					unsigned int i;
-					DPRINTF("IDL read txn %zu root %zu num %zu",
-						last, txn->mt_dbs[FREE_DBI].md_root, idl[0]);
-					for (i=0; i<idl[0]; i++) {
-						DPRINTF("IDL %zu", idl[i+1]);
-					}
-				}
+		DPRINTF("IDL read txn %zu root %zu num %u",
+				last, txn->mt_dbs[FREE_DBI].md_root, i);
+		for (k = i; k; k--)
+			DPRINTF("IDL %zu", idl[k]);
 #endif
-			}
-		}
-none:
-		if (txn->mt_env->me_pghead) {
-			pgno_t *mop = txn->mt_env->me_pghead;
-			if (num > 1) {
-				MDB_cursor m2;
-				int retry = 1, readit = 0, n2 = num-1;
-				unsigned int i, j, k;
-
-				/* If current list is too short, must fetch more and coalesce */
-				if (mop[0] < (unsigned)num)
-					readit = 1;
-
-				mdb_cursor_init(&m2, txn, FREE_DBI, NULL);
-				do {
-#ifdef MDB_PARANOID	/* Seems like we can ignore this now */
-					/* If on freelist, don't try to read more. If what we have
-					 * right now isn't enough just use new pages.
-					 * TODO: get all of this working. Many circular dependencies...
-					 */
-					if (mc->mc_dbi == FREE_DBI) {
-						retry = 0;
-						readit = 0;
-					}
-#endif
-					if (readit) {
-						MDB_val key, data;
-						pgno_t *idl, *mop2;
-
-						last = txn->mt_env->me_pglast + 1;
-
-						/* We haven't hit the readers list yet? */
-						if (!oldest) {
-							MDB_reader *r;
-							unsigned int nr;
-							txnid_t mr;
-
-							oldest = txn->mt_txnid - 1;
-							nr = txn->mt_env->me_txns->mti_numreaders;
-							r = txn->mt_env->me_txns->mti_readers;
-							for (i=0; i<nr; i++) {
-								if (!r[i].mr_pid) continue;
-								mr = r[i].mr_txnid;
-								if (mr < oldest)
-									oldest = mr;
-							}
-						}
-
-						/* There's nothing we can use on the freelist */
-						if (oldest - last < 1)
-							break;
-
-						key.mv_data = &last;
-						key.mv_size = sizeof(last);
-						rc = mdb_cursor_set(&m2,&key,&data,MDB_SET_RANGE,NULL);
-						if (rc) {
-							if (rc == MDB_NOTFOUND)
-								break;
-							return rc;
-						}
-						last = *(txnid_t*)key.mv_data;
-						if (oldest <= last)
-							break;
-						idl = (MDB_ID *) data.mv_data;
-						mop2 = malloc(MDB_IDL_SIZEOF(idl) + MDB_IDL_SIZEOF(mop));
-						if (!mop2)
-							return ENOMEM;
-						/* merge in sorted order */
-						i = idl[0]; j = mop[0]; mop2[0] = k = i+j;
-						mop[0] = P_INVALID;
-						while (i>0  || j>0) {
-							if (i && idl[i] < mop[j])
-								mop2[k--] = idl[i--];
-							else
-								mop2[k--] = mop[j--];
-						}
-						txn->mt_env->me_pglast = last;
-						free(txn->mt_env->me_pgfree);
-						txn->mt_env->me_pghead = txn->mt_env->me_pgfree = mop2;
-						mop = mop2;
-						/* Keep trying to read until we have enough */
-						if (mop[0] < (unsigned)num) {
-							continue;
-						}
-					}
-
-					/* current list has enough pages, but are they contiguous? */
-					for (i=mop[0]; i>=(unsigned)num; i--) {
-						if (mop[i-n2] == mop[i] + n2) {
-							pgno = mop[i];
-							i -= n2;
-							/* move any stragglers down */
-							for (j=i+num; j<=mop[0]; j++)
-								mop[i++] = mop[j];
-							mop[0] -= num;
-							break;
-						}
-					}
-
-					/* Stop if we succeeded, or no retries */
-					if (!retry || pgno != P_INVALID)
-						break;
-					readit = 1;
-
-				} while (1);
-			} else {
-				/* peel pages off tail, so we only have to truncate the list */
-				pgno = MDB_IDL_LAST(mop);
-				mop[0]--;
-			}
-			if (MDB_IDL_IS_ZERO(mop)) {
-				free(txn->mt_env->me_pgfree);
-				txn->mt_env->me_pghead = txn->mt_env->me_pgfree = NULL;
-			}
+		/* Merge in descending sorted order */
+		j = mop_len;
+		k = mop_len += i;
+		mop[0] = (pgno_t)-1;
+		old_id = mop[j];
+		while (i) {
+			new_id = idl[i--];
+			for (; old_id < new_id; old_id = mop[--j])
+				mop[k--] = old_id;
+			mop[k--] = new_id;
 		}
+		mop[0] = mop_len;
 	}
 
-	if (pgno == P_INVALID) {
-		/* DB size is maxed out */
-		if (txn->mt_next_pgno + num >= txn->mt_env->me_maxpg) {
+	/* Use new pages from the map when nothing suitable in the freeDB */
+	i = 0;
+	pgno = txn->mt_next_pgno;
+	if (pgno + num >= env->me_maxpg) {
 			DPUTS("DB size maxed out");
 			return MDB_MAP_FULL;
-		}
 	}
-	if (txn->mt_env->me_flags & MDB_WRITEMAP) {
-		if (pgno == P_INVALID) {
-			pgno = txn->mt_next_pgno;
-			txn->mt_next_pgno += num;
-		}
-		np = (MDB_page *)(txn->mt_env->me_map + txn->mt_env->me_psize * pgno);
-		np->mp_pgno = pgno;
+
+search_done:
+	if (env->me_flags & MDB_WRITEMAP) {
+		np = (MDB_page *)(env->me_map + env->me_psize * pgno);
+		insert = mdb_mid2l_append;
 	} else {
-		if (txn->mt_env->me_dpages && num == 1) {
-			np = txn->mt_env->me_dpages;
-			VGMEMP_ALLOC(txn->mt_env, np, txn->mt_env->me_psize);
-			VGMEMP_DEFINED(np, sizeof(np->mp_next));
-			txn->mt_env->me_dpages = np->mp_next;
-		} else {
-			size_t sz = txn->mt_env->me_psize * num;
-			if ((np = malloc(sz)) == NULL)
-				return ENOMEM;
-			VGMEMP_ALLOC(txn->mt_env, np, sz);
-		}
-		if (pgno == P_INVALID) {
-			np->mp_pgno = txn->mt_next_pgno;
-			txn->mt_next_pgno += num;
-		} else {
-			np->mp_pgno = pgno;
-		}
+		if (!(np = mdb_page_malloc(txn, num)))
+			return ENOMEM;
+		insert = mdb_mid2l_insert;
 	}
-	mid.mid = np->mp_pgno;
+	if (i) {
+		mop[0] = mop_len -= num;
+		/* Move any stragglers down */
+		for (j = i-num; j < mop_len; )
+			mop[++j] = mop[++i];
+	} else {
+		txn->mt_next_pgno = pgno + num;
+	}
+	mid.mid = np->mp_pgno = pgno;
 	mid.mptr = np;
-	if (txn->mt_env->me_flags & MDB_WRITEMAP) {
-		mdb_mid2l_append(txn->mt_u.dirty_list, &mid);
-	} else {
-		mdb_mid2l_insert(txn->mt_u.dirty_list, &mid);
-	}
+	insert(txn->mt_u.dirty_list, &mid);
 	txn->mt_dirty_room--;
 	*mp = np;
 
 	return MDB_SUCCESS;
 }
 
-/** Copy a page: avoid copying unused portions of the page.
+/** Copy the used portions of a non-overflow page.
  * @param[in] dst page to copy into
  * @param[in] src page to copy from
  * @param[in] psize size of a page
@@ -1559,17 +1507,19 @@ none:
 static void
 mdb_page_copy(MDB_page *dst, MDB_page *src, unsigned int psize)
 {
-	dst->mp_flags = src->mp_flags | P_DIRTY;
-	dst->mp_pages = src->mp_pages;
+	enum { Align = sizeof(pgno_t) };
+	indx_t upper = src->mp_upper, lower = src->mp_lower, unused = upper-lower;
 
-	if (IS_LEAF2(src)) {
-		memcpy(dst->mp_ptrs, src->mp_ptrs, psize - PAGEHDRSZ - SIZELEFT(src));
+	/* If page isn't full, just copy the used portion. Adjust
+	 * alignment so memcpy may copy words instead of bytes.
+	 */
+	if ((unused &= -Align) && !IS_LEAF2(src)) {
+		upper &= -Align;
+		memcpy(dst, src, (lower + (Align-1)) & -Align);
+		memcpy((pgno_t *)((char *)dst+upper), (pgno_t *)((char *)src+upper),
+			psize - upper);
 	} else {
-		unsigned int i, nkeys = NUMKEYS(src);
-		for (i=0; i<nkeys; i++)
-			dst->mp_ptrs[i] = src->mp_ptrs[i];
-		memcpy((char *)dst+src->mp_upper, (char *)src+src->mp_upper,
-			psize - src->mp_upper);
+		memcpy(dst, src, psize - unused);
 	}
 }
 
@@ -1580,88 +1530,85 @@ mdb_page_copy(MDB_page *dst, MDB_page *src, unsigned int psize)
 static int
 mdb_page_touch(MDB_cursor *mc)
 {
-	MDB_page *mp = mc->mc_pg[mc->mc_top];
+	MDB_page *mp = mc->mc_pg[mc->mc_top], *np;
+	MDB_txn *txn = mc->mc_txn;
+	MDB_cursor *m2, *m3;
+	MDB_dbi dbi;
 	pgno_t	pgno;
 	int rc;
 
 	if (!F_ISSET(mp->mp_flags, P_DIRTY)) {
-		MDB_page *np;
-		if ((rc = mdb_page_alloc(mc, 1, &np)))
+		if ((rc = mdb_midl_need(&txn->mt_free_pgs, 1)) ||
+			(rc = mdb_page_alloc(mc, 1, &np)))
 			return rc;
-		DPRINTF("touched db %u page %zu -> %zu", mc->mc_dbi, mp->mp_pgno, np->mp_pgno);
-		assert(mp->mp_pgno != np->mp_pgno);
-		mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno);
-		if (SIZELEFT(mp)) {
-			/* If page isn't full, just copy the used portion */
-			mdb_page_copy(np, mp, mc->mc_txn->mt_env->me_psize);
+		pgno = np->mp_pgno;
+		DPRINTF("touched db %u page %zu -> %zu", mc->mc_dbi,mp->mp_pgno,pgno);
+		assert(mp->mp_pgno != pgno);
+		mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno);
+		/* Update the parent page, if any, to point to the new page */
+		if (mc->mc_top) {
+			MDB_page *parent = mc->mc_pg[mc->mc_top-1];
+			MDB_node *node = NODEPTR(parent, mc->mc_ki[mc->mc_top-1]);
+			SETPGNO(node, pgno);
 		} else {
-			pgno = np->mp_pgno;
-			memcpy(np, mp, mc->mc_txn->mt_env->me_psize);
-			np->mp_pgno = pgno;
-			np->mp_flags |= P_DIRTY;
+			mc->mc_db->md_root = pgno;
 		}
-		mp = np;
-
-finish:
-		/* Adjust other cursors pointing to mp */
-		if (mc->mc_flags & C_SUB) {
-			MDB_cursor *m2, *m3;
-			MDB_dbi dbi = mc->mc_dbi-1;
-
-			for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
-				if (m2 == mc) continue;
-				m3 = &m2->mc_xcursor->mx_cursor;
-				if (m3->mc_snum < mc->mc_snum) continue;
-				if (m3->mc_pg[mc->mc_top] == mc->mc_pg[mc->mc_top]) {
-					m3->mc_pg[mc->mc_top] = mp;
-				}
-			}
-		} else {
-			MDB_cursor *m2;
-
-			for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) {
-				if (m2 == mc || m2->mc_snum < mc->mc_snum) continue;
-				if (m2->mc_pg[mc->mc_top] == mc->mc_pg[mc->mc_top]) {
-					m2->mc_pg[mc->mc_top] = mp;
-				}
-			}
-		}
-		mc->mc_pg[mc->mc_top] = mp;
-		/** If this page has a parent, update the parent to point to
-		 * this new page.
-		 */
-		if (mc->mc_top)
-			SETPGNO(NODEPTR(mc->mc_pg[mc->mc_top-1], mc->mc_ki[mc->mc_top-1]), mp->mp_pgno);
-		else
-			mc->mc_db->md_root = mp->mp_pgno;
-	} else if (mc->mc_txn->mt_parent) {
-		MDB_page *np;
-		MDB_ID2 mid;
+	} else if (txn->mt_parent && !IS_SUBP(mp)) {
+		MDB_ID2 mid, *dl = txn->mt_u.dirty_list;
+		pgno = mp->mp_pgno;
 		/* If txn has a parent, make sure the page is in our
 		 * dirty list.
 		 */
-		if (mc->mc_txn->mt_u.dirty_list[0].mid) {
-			unsigned x = mdb_mid2l_search(mc->mc_txn->mt_u.dirty_list, mp->mp_pgno);
-			if (x <= mc->mc_txn->mt_u.dirty_list[0].mid &&
-				mc->mc_txn->mt_u.dirty_list[x].mid == mp->mp_pgno) {
-				if (mc->mc_txn->mt_u.dirty_list[x].mptr != mp) {
-					mp = mc->mc_txn->mt_u.dirty_list[x].mptr;
-					mc->mc_pg[mc->mc_top] = mp;
-				}
+		if (dl[0].mid) {
+			unsigned x = mdb_mid2l_search(dl, pgno);
+			if (x <= dl[0].mid && dl[x].mid == pgno) {
+				np = dl[x].mptr;
+				if (mp != np)
+					mc->mc_pg[mc->mc_top] = np;
 				return 0;
 			}
 		}
-		assert(mc->mc_txn->mt_u.dirty_list[0].mid < MDB_IDL_UM_MAX);
+		assert(dl[0].mid < MDB_IDL_UM_MAX);
 		/* No - copy it */
-		np = mdb_page_malloc(mc);
+		np = mdb_page_malloc(txn, 1);
 		if (!np)
 			return ENOMEM;
-		memcpy(np, mp, mc->mc_txn->mt_env->me_psize);
-		mid.mid = np->mp_pgno;
+		mid.mid = pgno;
 		mid.mptr = np;
-		mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &mid);
-		mp = np;
-		goto finish;
+		mdb_mid2l_insert(dl, &mid);
+	} else {
+		return 0;
+	}
+
+	mdb_page_copy(np, mp, txn->mt_env->me_psize);
+	np->mp_pgno = pgno;
+	np->mp_flags |= P_DIRTY;
+
+	/* Adjust cursors pointing to mp */
+	mc->mc_pg[mc->mc_top] = np;
+	dbi = mc->mc_dbi;
+	if (mc->mc_flags & C_SUB) {
+		dbi--;
+		for (m2 = txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
+			m3 = &m2->mc_xcursor->mx_cursor;
+			if (m3->mc_snum < mc->mc_snum) continue;
+			if (m3->mc_pg[mc->mc_top] == mp)
+				m3->mc_pg[mc->mc_top] = np;
+		}
+	} else {
+		for (m2 = txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
+			if (m2->mc_snum < mc->mc_snum) continue;
+			if (m2->mc_pg[mc->mc_top] == mp) {
+				m2->mc_pg[mc->mc_top] = np;
+				if ((mc->mc_db->md_flags & MDB_DUPSORT) &&
+					m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top])
+				{
+					MDB_node *leaf = NODEPTR(np, mc->mc_ki[mc->mc_top]);
+					if (!(leaf->mn_flags & F_SUBDATA))
+						m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf);
+				}
+			}
+		}
 	}
 	return 0;
 }
@@ -1779,8 +1726,11 @@ mdb_cursors_close(MDB_txn *txn, unsigned merge)
 	}
 }
 
+#ifdef MDB_DEBUG_SKIP
+#define mdb_txn_reset0(txn, act) mdb_txn_reset0(txn)
+#endif
 static void
-mdb_txn_reset0(MDB_txn *txn);
+mdb_txn_reset0(MDB_txn *txn, const char *act);
 
 /** Common code for #mdb_txn_begin() and #mdb_txn_renew().
  * @param[in] txn the transaction handle to initialize
@@ -1793,6 +1743,7 @@ mdb_txn_renew0(MDB_txn *txn)
 	unsigned int i;
 	uint16_t x;
 	int rc, new_notls = 0;
+	pgno_t lastpg2;
 
 	/* Setup db info */
 	txn->mt_numdbs = env->me_numdbs;
@@ -1861,6 +1812,17 @@ mdb_txn_renew0(MDB_txn *txn)
 
 	/* Copy the DB info and flags */
 	memcpy(txn->mt_dbs, env->me_metas[txn->mt_toggle]->mm_dbs, 2 * sizeof(MDB_db));
+	/* In a read txn, there is a data race here. Make sure our
+	 * last_pg/next_pg are up to date.
+	 */
+	lastpg2 = env->me_metas[txn->mt_toggle]->mm_last_pg+1;
+	if (lastpg2 != txn->mt_next_pgno) {
+		txn->mt_next_pgno = lastpg2;
+		/* When this situation occurs, the txnid will certainly also
+		 * be out of date. But as noted before, we don't care about having
+		 * up to date read txn IDs.
+		 */
+	}
 	for (i=2; i<txn->mt_numdbs; i++) {
 		x = env->me_dbflags[i];
 		txn->mt_dbs[i].md_flags = x & PERSISTENT_FLAGS;
@@ -1869,7 +1831,7 @@ mdb_txn_renew0(MDB_txn *txn)
 	txn->mt_dbflags[0] = txn->mt_dbflags[1] = DB_VALID;
 
 	if (env->me_maxpg < txn->mt_next_pgno) {
-		mdb_txn_reset0(txn);
+		mdb_txn_reset0(txn, "renew0-mapfail");
 		if (new_notls) {
 			txn->mt_u.reader->mr_pid = 0;
 			txn->mt_u.reader = NULL;
@@ -1957,11 +1919,11 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret)
 		txn->mt_toggle = parent->mt_toggle;
 		txn->mt_dirty_room = parent->mt_dirty_room;
 		txn->mt_u.dirty_list[0].mid = 0;
-		txn->mt_free_pgs[0] = 0;
 		txn->mt_next_pgno = parent->mt_next_pgno;
 		parent->mt_child = txn;
 		txn->mt_parent = parent;
 		txn->mt_numdbs = parent->mt_numdbs;
+		txn->mt_flags = parent->mt_flags;
 		txn->mt_dbxs = parent->mt_dbxs;
 		memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db));
 		/* Copy parent's mt_dbflags, but clear DB_NEW */
@@ -1972,17 +1934,16 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret)
 		ntxn->mnt_pgstate = env->me_pgstate; /* save parent me_pghead & co */
 		if (env->me_pghead) {
 			size = MDB_IDL_SIZEOF(env->me_pghead);
-			env->me_pghead = malloc(size);
+			env->me_pghead = mdb_midl_alloc(env->me_pghead[0]);
 			if (env->me_pghead)
 				memcpy(env->me_pghead, ntxn->mnt_pgstate.mf_pghead, size);
 			else
 				rc = ENOMEM;
 		}
-		env->me_pgfree = env->me_pghead;
 		if (!rc)
 			rc = mdb_cursor_shadow(parent, txn);
 		if (rc)
-			mdb_txn_reset0(txn);
+			mdb_txn_reset0(txn, "beginchild-fail");
 	} else {
 		rc = mdb_txn_renew0(txn);
 	}
@@ -2029,14 +1990,17 @@ mdb_dbis_update(MDB_txn *txn, int keep)
  * @param[in] txn the transaction handle to reset
  */
 static void
-mdb_txn_reset0(MDB_txn *txn)
+mdb_txn_reset0(MDB_txn *txn, const char *act)
 {
 	MDB_env	*env = txn->mt_env;
-	unsigned int i;
 
 	/* Close any DBI handles opened in this txn */
 	mdb_dbis_update(txn, 0);
 
+	DPRINTF("%s txn %zu%c %p on mdbenv %p, root page %zu",
+		act, txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
+		(void *) txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root);
+
 	if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) {
 		if (txn->mt_u.reader) {
 			txn->mt_u.reader->mr_txnid = (txnid_t)-1;
@@ -2046,25 +2010,12 @@ mdb_txn_reset0(MDB_txn *txn)
 		txn->mt_numdbs = 0;		/* close nothing if called again */
 		txn->mt_dbxs = NULL;	/* mark txn as reset */
 	} else {
-		MDB_page *dp;
-
 		mdb_cursors_close(txn, 0);
 
 		if (!(env->me_flags & MDB_WRITEMAP)) {
-			/* return all dirty pages to dpage list */
-			for (i=1; i<=txn->mt_u.dirty_list[0].mid; i++) {
-				dp = txn->mt_u.dirty_list[i].mptr;
-				if (!IS_OVERFLOW(dp) || dp->mp_pages == 1) {
-					mdb_page_free(txn->mt_env, dp);
-				} else {
-					/* large pages just get freed directly */
-					VGMEMP_FREE(txn->mt_env, dp);
-					free(dp);
-				}
-			}
+			mdb_dlist_free(txn);
 		}
-
-		free(env->me_pgfree);
+		mdb_midl_free(env->me_pghead);
 
 		if (txn->mt_parent) {
 			txn->mt_parent->mt_child = NULL;
@@ -2072,13 +2023,12 @@ mdb_txn_reset0(MDB_txn *txn)
 			mdb_midl_free(txn->mt_free_pgs);
 			free(txn->mt_u.dirty_list);
 			return;
-		} else {
-			if (mdb_midl_shrink(&txn->mt_free_pgs))
-				env->me_free_pgs = txn->mt_free_pgs;
 		}
 
-		txn->mt_env->me_pghead = txn->mt_env->me_pgfree = NULL;
-		txn->mt_env->me_pglast = 0;
+		if (mdb_midl_shrink(&txn->mt_free_pgs))
+			env->me_free_pgs = txn->mt_free_pgs;
+		env->me_pghead = NULL;
+		env->me_pglast = 0;
 
 		env->me_txn = NULL;
 		/* The writer mutex was locked in mdb_txn_begin. */
@@ -2092,15 +2042,11 @@ mdb_txn_reset(MDB_txn *txn)
 	if (txn == NULL)
 		return;
 
-	DPRINTF("reset txn %zu%c %p on mdbenv %p, root page %zu",
-		txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
-		(void *) txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root);
-
 	/* This call is only valid for read-only txns */
 	if (!(txn->mt_flags & MDB_TXN_RDONLY))
 		return;
 
-	mdb_txn_reset0(txn);
+	mdb_txn_reset0(txn, "reset");
 }
 
 void
@@ -2109,14 +2055,10 @@ mdb_txn_abort(MDB_txn *txn)
 	if (txn == NULL)
 		return;
 
-	DPRINTF("abort txn %zu%c %p on mdbenv %p, root page %zu",
-		txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
-		(void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root);
-
 	if (txn->mt_child)
 		mdb_txn_abort(txn->mt_child);
 
-	mdb_txn_reset0(txn);
+	mdb_txn_reset0(txn, "abort");
 	/* Free reader slot tied to this txn (if MDB_NOTLS && writable FS) */
 	if ((txn->mt_flags & MDB_TXN_RDONLY) && txn->mt_u.reader)
 		txn->mt_u.reader->mr_pid = 0;
@@ -2124,18 +2066,266 @@ mdb_txn_abort(MDB_txn *txn)
 	free(txn);
 }
 
+/** Save the freelist as of this transaction to the freeDB.
+ * This changes the freelist. Keep trying until it stabilizes.
+ */
+static int
+mdb_freelist_save(MDB_txn *txn)
+{
+	/* env->me_pghead[] can grow and shrink during this call.
+	 * env->me_pglast and txn->mt_free_pgs[] can only grow.
+	 * Page numbers cannot disappear from txn->mt_free_pgs[].
+	 */
+	MDB_cursor mc;
+	MDB_env	*env = txn->mt_env;
+	int rc, maxfree_1pg = env->me_maxfree_1pg, more = 1;
+	txnid_t	pglast = 0, head_id = 0;
+	pgno_t	freecnt = 0, *free_pgs, *mop;
+	ssize_t	head_room = 0, total_room = 0, mop_len;
+
+	mdb_cursor_init(&mc, txn, FREE_DBI, NULL);
+
+	if (env->me_pghead) {
+		/* Make sure first page of freeDB is touched and on freelist */
+		rc = mdb_page_search(&mc, NULL, MDB_PS_MODIFY);
+		if (rc && rc != MDB_NOTFOUND)
+			return rc;
+	}
+
+	for (;;) {
+		/* Come back here after each Put() in case freelist changed */
+		MDB_val key, data;
+
+		/* If using records from freeDB which we have not yet
+		 * deleted, delete them and any we reserved for me_pghead.
+		 */
+		while (pglast < env->me_pglast) {
+			rc = mdb_cursor_first(&mc, &key, NULL);
+			if (rc)
+				return rc;
+			pglast = head_id = *(txnid_t *)key.mv_data;
+			total_room = head_room = 0;
+			assert(pglast <= env->me_pglast);
+			rc = mdb_cursor_del(&mc, 0);
+			if (rc)
+				return rc;
+		}
+
+		/* Save the IDL of pages freed by this txn, to a single record */
+		if (freecnt < txn->mt_free_pgs[0]) {
+			if (!freecnt) {
+				/* Make sure last page of freeDB is touched and on freelist */
+				key.mv_size = MDB_MAXKEYSIZE+1;
+				key.mv_data = NULL;
+				rc = mdb_page_search(&mc, &key, MDB_PS_MODIFY);
+				if (rc && rc != MDB_NOTFOUND)
+					return rc;
+			}
+			free_pgs = txn->mt_free_pgs;
+			/* Write to last page of freeDB */
+			key.mv_size = sizeof(txn->mt_txnid);
+			key.mv_data = &txn->mt_txnid;
+			do {
+				freecnt = free_pgs[0];
+				data.mv_size = MDB_IDL_SIZEOF(free_pgs);
+				rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
+				if (rc)
+					return rc;
+				/* Retry if mt_free_pgs[] grew during the Put() */
+				free_pgs = txn->mt_free_pgs;
+			} while (freecnt < free_pgs[0]);
+			mdb_midl_sort(free_pgs);
+			memcpy(data.mv_data, free_pgs, data.mv_size);
+#if MDB_DEBUG > 1
+			{
+				unsigned int i = free_pgs[0];
+				DPRINTF("IDL write txn %zu root %zu num %u",
+					txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i);
+				for (; i; i--)
+					DPRINTF("IDL %zu", free_pgs[i]);
+			}
+#endif
+			continue;
+		}
+
+		mop = env->me_pghead;
+		mop_len = mop ? mop[0] : 0;
+
+		/* Reserve records for me_pghead[]. Split it if multi-page,
+		 * to avoid searching freeDB for a page range. Use keys in
+		 * range [1,me_pglast]: Smaller than txnid of oldest reader.
+		 */
+		if (total_room >= mop_len) {
+			if (total_room == mop_len || --more < 0)
+				break;
+		} else if (head_room >= maxfree_1pg && head_id > 1) {
+			/* Keep current record (overflow page), add a new one */
+			head_id--;
+			head_room = 0;
+		}
+		/* (Re)write {key = head_id, IDL length = head_room} */
+		total_room -= head_room;
+		head_room = mop_len - total_room;
+		if (head_room > maxfree_1pg && head_id > 1) {
+			/* Overflow multi-page for part of me_pghead */
+			head_room /= head_id; /* amortize page sizes */
+			head_room += maxfree_1pg - head_room % (maxfree_1pg + 1);
+		} else if (head_room < 0) {
+			/* Rare case, not bothering to delete this record */
+			head_room = 0;
+		}
+		key.mv_size = sizeof(head_id);
+		key.mv_data = &head_id;
+		data.mv_size = (head_room + 1) * sizeof(pgno_t);
+		rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
+		if (rc)
+			return rc;
+		*(MDB_ID *)data.mv_data = 0; /* IDL is initially empty */
+		total_room += head_room;
+	}
+
+	/* Fill in the reserved, touched me_pghead records. Avoid write ops
+	 * so they cannot rearrange anything, just read the destinations.
+	 */
+	rc = MDB_SUCCESS;
+	if (mop_len) {
+		MDB_val key, data;
+
+		mop += mop_len + 1;
+		rc = mdb_cursor_first(&mc, &key, &data);
+		for (; !rc; rc = mdb_cursor_next(&mc, &key, &data, MDB_NEXT)) {
+			MDB_IDL	dest = data.mv_data;
+			ssize_t	len = (ssize_t)(data.mv_size / sizeof(MDB_ID)) - 1;
+
+			assert(len >= 0 && *(txnid_t*)key.mv_data <= env->me_pglast);
+			if (len > mop_len)
+				len = mop_len;
+			*dest++ = len;
+			memcpy(dest, mop -= len, len * sizeof(MDB_ID));
+			if (! (mop_len -= len))
+				break;
+		}
+	}
+	return rc;
+}
+
+/** Flush dirty pages to the map, after clearing their dirty flag.
+ */
+static int
+mdb_page_flush(MDB_txn *txn)
+{
+	MDB_env		*env = txn->mt_env;
+	MDB_ID2L	dl = txn->mt_u.dirty_list;
+	unsigned	psize = env->me_psize;
+	int			i, pagecount = dl[0].mid, rc;
+	size_t		size = 0, pos = 0;
+	pgno_t		pgno;
+	MDB_page	*dp = NULL;
+#ifdef _WIN32
+	OVERLAPPED	ov;
+#else
+	struct iovec iov[MDB_COMMIT_PAGES];
+	ssize_t		wpos, wsize = 0, wres;
+	size_t		next_pos = 1; /* impossible pos, so pos != next_pos */
+	int			n = 0;
+#endif
+
+	if (env->me_flags & MDB_WRITEMAP) {
+		/* Clear dirty flags */
+		for (i = pagecount; i; i--) {
+			dp = dl[i].mptr;
+			dp->mp_flags &= ~P_DIRTY;
+		}
+		dl[0].mid = 0;
+		return MDB_SUCCESS;
+	}
+
+	/* Write the pages */
+	for (i = 1;; i++) {
+		if (i <= pagecount) {
+			dp = dl[i].mptr;
+			pgno = dl[i].mid;
+			/* clear dirty flag */
+			dp->mp_flags &= ~P_DIRTY;
+			pos = pgno * psize;
+			size = psize;
+			if (IS_OVERFLOW(dp)) size *= dp->mp_pages;
+		}
+#ifdef _WIN32
+		else break;
+
+		/* Windows actually supports scatter/gather I/O, but only on
+		 * unbuffered file handles. Since we're relying on the OS page
+		 * cache for all our data, that's self-defeating. So we just
+		 * write pages one at a time. We use the ov structure to set
+		 * the write offset, to at least save the overhead of a Seek
+		 * system call.
+		 */
+		DPRINTF("committing page %zu", pgno);
+		memset(&ov, 0, sizeof(ov));
+		ov.Offset = pos & 0xffffffff;
+		ov.OffsetHigh = pos >> 16 >> 16;
+		if (!WriteFile(env->me_fd, dp, size, NULL, &ov)) {
+			rc = ErrCode();
+			DPRINTF("WriteFile: %d", rc);
+			return rc;
+		}
+#else
+		/* Write up to MDB_COMMIT_PAGES dirty pages at a time. */
+		if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE) {
+			if (n) {
+				/* Write previous page(s) */
+#ifdef MDB_USE_PWRITEV
+				wres = pwritev(env->me_fd, iov, n, wpos);
+#else
+				if (n == 1) {
+					wres = pwrite(env->me_fd, iov[0].iov_base, wsize, wpos);
+				} else {
+					if (lseek(env->me_fd, wpos, SEEK_SET) == -1) {
+						rc = ErrCode();
+						DPRINTF("lseek: %s", strerror(rc));
+						return rc;
+					}
+					wres = writev(env->me_fd, iov, n);
+				}
+#endif
+				if (wres != wsize) {
+					if (wres < 0) {
+						rc = ErrCode();
+						DPRINTF("Write error: %s", strerror(rc));
+					} else {
+						rc = EIO; /* TODO: Use which error code? */
+						DPUTS("short write, filesystem full?");
+					}
+					return rc;
+				}
+				n = 0;
+			}
+			if (i > pagecount)
+				break;
+			wpos = pos;
+			wsize = 0;
+		}
+		DPRINTF("committing page %zu", pgno);
+		next_pos = pos + size;
+		iov[n].iov_len = size;
+		iov[n].iov_base = (char *)dp;
+		wsize += size;
+		n++;
+#endif	/* _WIN32 */
+	}
+
+	mdb_dlist_free(txn);
+
+	return MDB_SUCCESS;
+}
+
 int
 mdb_txn_commit(MDB_txn *txn)
 {
-	int		 n, done;
+	int		rc;
 	unsigned int i;
-	ssize_t		 rc;
-	off_t		 size;
-	MDB_page	*dp;
 	MDB_env	*env;
-	pgno_t	next, freecnt;
-	txnid_t	oldpg_txnid, id;
-	MDB_cursor mc;
 
 	assert(txn != NULL);
 	assert(txn->mt_env != NULL);
@@ -2143,10 +2333,8 @@ mdb_txn_commit(MDB_txn *txn)
 	if (txn->mt_child) {
 		rc = mdb_txn_commit(txn->mt_child);
 		txn->mt_child = NULL;
-		if (rc) {
-			mdb_txn_abort(txn);
-			return rc;
-		}
+		if (rc)
+			goto fail;
 	}
 
 	env = txn->mt_env;
@@ -2162,8 +2350,8 @@ mdb_txn_commit(MDB_txn *txn)
 		DPUTS("error flag is set, can't commit");
 		if (txn->mt_parent)
 			txn->mt_parent->mt_flags |= MDB_TXN_ERROR;
-		mdb_txn_abort(txn);
-		return EINVAL;
+		rc = EINVAL;
+		goto fail;
 	}
 
 	if (txn->mt_parent) {
@@ -2172,10 +2360,9 @@ mdb_txn_commit(MDB_txn *txn)
 		MDB_ID2L dst, src;
 
 		/* Append our free list to parent's */
-		if (mdb_midl_append_list(&parent->mt_free_pgs, txn->mt_free_pgs)) {
-			mdb_txn_abort(txn);
-			return ENOMEM;
-		}
+		rc = mdb_midl_append_list(&parent->mt_free_pgs, txn->mt_free_pgs);
+		if (rc)
+			goto fail;
 		mdb_midl_free(txn->mt_free_pgs);
 
 		parent->mt_next_pgno = txn->mt_next_pgno;
@@ -2230,15 +2417,15 @@ mdb_txn_commit(MDB_txn *txn)
 		parent->mt_dirty_room = txn->mt_dirty_room;
 
 		txn->mt_parent->mt_child = NULL;
-		free(((MDB_ntxn *)txn)->mnt_pgstate.mf_pgfree);
+		mdb_midl_free(((MDB_ntxn *)txn)->mnt_pgstate.mf_pghead);
 		free(txn);
 		return MDB_SUCCESS;
 	}
 
 	if (txn != env->me_txn) {
 		DPUTS("attempt to commit unknown transaction");
-		mdb_txn_abort(txn);
-		return EINVAL;
+		rc = EINVAL;
+		goto fail;
 	}
 
 	mdb_cursors_close(txn, 0);
@@ -2251,6 +2438,7 @@ mdb_txn_commit(MDB_txn *txn)
 
 	/* Update DB root pointers */
 	if (txn->mt_numdbs > 2) {
+		MDB_cursor mc;
 		MDB_dbi i;
 		MDB_val data;
 		data.mv_size = sizeof(MDB_db);
@@ -2266,274 +2454,23 @@ mdb_txn_commit(MDB_txn *txn)
 		}
 	}
 
-	/* Save the freelist as of this transaction to the freeDB. This
-	 * can change the freelist, so keep trying until it stabilizes.
-	 *
-	 * env->me_pglast and the length of txn->mt_free_pgs cannot decrease,
-	 * except the code below can decrease env->me_pglast to split pghead.
-	 * Page numbers cannot disappear from txn->mt_free_pgs.  New pages
-	 * can only appear in env->me_pghead when env->me_pglast increases.
-	 * Until then, the me_pghead pointer won't move but can become NULL.
-	 */
+	rc = mdb_freelist_save(txn);
+	if (rc)
+		goto fail;
 
-	mdb_cursor_init(&mc, txn, FREE_DBI, NULL);
-	oldpg_txnid = id = 0;
-	freecnt = 0;
-
-	/* should only be one record now */
-	if (env->me_pghead || env->me_pglast) {
-		/* make sure first page of freeDB is touched and on freelist */
-		rc = mdb_page_search(&mc, NULL, MDB_PS_MODIFY);
-		if (rc && rc != MDB_NOTFOUND) {
-fail:
-			mdb_txn_abort(txn);
-			return rc;
-		}
-	}
-
-	/* Delete IDLs we used from the free list */
-	if (env->me_pglast) {
-		MDB_val key;
-
-		do {
-free_pgfirst:
-			rc = mdb_cursor_first(&mc, &key, NULL);
-			if (rc)
-				goto fail;
-			oldpg_txnid = *(txnid_t *)key.mv_data;
-again:
-			assert(oldpg_txnid <= env->me_pglast);
-			id = 0;
-			rc = mdb_cursor_del(&mc, 0);
-			if (rc)
-				goto fail;
-		} while (oldpg_txnid < env->me_pglast);
-	}
-
-	/* Save IDL of pages freed by this txn, to freeDB */
-free2:
-	if (freecnt != txn->mt_free_pgs[0]) {
-		MDB_val key, data;
-
-		/* make sure last page of freeDB is touched and on freelist */
-		key.mv_size = MDB_MAXKEYSIZE+1;
-		key.mv_data = NULL;
-		rc = mdb_page_search(&mc, &key, MDB_PS_MODIFY);
-		if (rc && rc != MDB_NOTFOUND)
-			goto fail;
-
-#if MDB_DEBUG > 1
-		{
-			unsigned int i;
-			MDB_IDL idl = txn->mt_free_pgs;
-			mdb_midl_sort(txn->mt_free_pgs);
-			DPRINTF("IDL write txn %zu root %zu num %zu",
-				txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, idl[0]);
-			for (i=1; i<=idl[0]; i++) {
-				DPRINTF("IDL %zu", idl[i]);
-			}
-		}
-#endif
-		/* write to last page of freeDB */
-		key.mv_size = sizeof(pgno_t);
-		key.mv_data = &txn->mt_txnid;
-		/* The free list can still grow during this call,
-		 * despite the pre-emptive touches above. So retry
-		 * until the reserved space remains big enough.
-		 */
-		do {
-			assert(freecnt < txn->mt_free_pgs[0]);
-			freecnt = txn->mt_free_pgs[0];
-			data.mv_size = MDB_IDL_SIZEOF(txn->mt_free_pgs);
-			rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
-			if (rc)
-				goto fail;
-		} while (freecnt != txn->mt_free_pgs[0]);
-		mdb_midl_sort(txn->mt_free_pgs);
-		memcpy(data.mv_data, txn->mt_free_pgs, data.mv_size);
-		if (oldpg_txnid < env->me_pglast || (!env->me_pghead && id))
-			goto free_pgfirst;	/* used up freeDB[oldpg_txnid] */
-	}
-
-	/* Put back page numbers we took from freeDB but did not use */
-	if (env->me_pghead) {
-	  for (;;) {
-		MDB_val key, data;
-		pgno_t orig, *mop;
-
-		mop = env->me_pghead;
-		id = env->me_pglast;
-		key.mv_size = sizeof(id);
-		key.mv_data = &id;
-		/* These steps may grow the freelist again
-		 * due to freed overflow pages...
-		 */
-		i = 2;
-		do {
-			orig = mop[0];
-			if (orig > env->me_maxfree_1pg && id > 4)
-				orig = env->me_maxfree_1pg; /* Do not use more than 1 page */
-			data.mv_size = (orig + 1) * sizeof(pgno_t);
-			rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
-			if (rc)
-				goto fail;
-			assert(!env->me_pghead || env->me_pglast);
-			/* mop could have been used again here */
-			if (id != env->me_pglast || env->me_pghead == NULL)
-				goto again;		/* was completely used up */
-			assert(mop == env->me_pghead);
-		} while (mop[0] < orig && --i);
-		memcpy(data.mv_data, mop, data.mv_size);
-		if (mop[0] <= orig)
-			break;
-		*(pgno_t *)data.mv_data = orig;
-		mop[orig] = mop[0] - orig;
-		env->me_pghead = mop += orig;
-		/* Save more oldpages at the previous txnid. */
-		assert(env->me_pglast == id && id == oldpg_txnid);
-		env->me_pglast = --oldpg_txnid;
-	  }
-	}
-
-	/* Check for growth of freelist again */
-	if (freecnt != txn->mt_free_pgs[0])
-		goto free2;
-
-	free(env->me_pgfree);
-	env->me_pghead = env->me_pgfree = NULL;
-
-	if (!MDB_IDL_IS_ZERO(txn->mt_free_pgs)) {
-		if (mdb_midl_shrink(&txn->mt_free_pgs))
-			env->me_free_pgs = txn->mt_free_pgs;
-	}
+	mdb_midl_free(env->me_pghead);
+	env->me_pghead = NULL;
+	if (mdb_midl_shrink(&txn->mt_free_pgs))
+		env->me_free_pgs = txn->mt_free_pgs;
 
 #if MDB_DEBUG > 2
 	mdb_audit(txn);
 #endif
 
-	if (env->me_flags & MDB_WRITEMAP) {
-		for (i=1; i<=txn->mt_u.dirty_list[0].mid; i++) {
-			dp = txn->mt_u.dirty_list[i].mptr;
-			/* clear dirty flag */
-			dp->mp_flags &= ~P_DIRTY;
-			txn->mt_u.dirty_list[i].mid = 0;
-		}
-		txn->mt_u.dirty_list[0].mid = 0;
-		goto sync;
-	}
-
-	/* Commit up to MDB_COMMIT_PAGES dirty pages to disk until done.
-	 */
-	next = 0;
-	i = 1;
-	do {
-#ifdef _WIN32
-		/* Windows actually supports scatter/gather I/O, but only on
-		 * unbuffered file handles. Since we're relying on the OS page
-		 * cache for all our data, that's self-defeating. So we just
-		 * write pages one at a time. We use the ov structure to set
-		 * the write offset, to at least save the overhead of a Seek
-		 * system call.
-		 */
-		OVERLAPPED ov;
-		memset(&ov, 0, sizeof(ov));
-		for (; i<=txn->mt_u.dirty_list[0].mid; i++) {
-			size_t wsize;
-			dp = txn->mt_u.dirty_list[i].mptr;
-			DPRINTF("committing page %zu", dp->mp_pgno);
-			size = dp->mp_pgno * env->me_psize;
-			ov.Offset = size & 0xffffffff;
-			ov.OffsetHigh = size >> 16;
-			ov.OffsetHigh >>= 16;
-			/* clear dirty flag */
-			dp->mp_flags &= ~P_DIRTY;
-			wsize = env->me_psize;
-			if (IS_OVERFLOW(dp)) wsize *= dp->mp_pages;
-			rc = WriteFile(env->me_fd, dp, wsize, NULL, &ov);
-			if (!rc) {
-				n = ErrCode();
-				DPRINTF("WriteFile: %d", n);
-				mdb_txn_abort(txn);
-				return n;
-			}
-		}
-		done = 1;
-#else
-		struct iovec	 iov[MDB_COMMIT_PAGES];
-		n = 0;
-		done = 1;
-		size = 0;
-		for (; i<=txn->mt_u.dirty_list[0].mid; i++) {
-			dp = txn->mt_u.dirty_list[i].mptr;
-			if (dp->mp_pgno != next) {
-				if (n) {
-					rc = writev(env->me_fd, iov, n);
-					if (rc != size) {
-						n = ErrCode();
-						if (rc > 0)
-							DPUTS("short write, filesystem full?");
-						else
-							DPRINTF("writev: %s", strerror(n));
-						mdb_txn_abort(txn);
-						return n;
-					}
-					n = 0;
-					size = 0;
-				}
-				lseek(env->me_fd, dp->mp_pgno * env->me_psize, SEEK_SET);
-				next = dp->mp_pgno;
-			}
-			DPRINTF("committing page %zu", dp->mp_pgno);
-			iov[n].iov_len = env->me_psize;
-			if (IS_OVERFLOW(dp)) iov[n].iov_len *= dp->mp_pages;
-			iov[n].iov_base = (char *)dp;
-			size += iov[n].iov_len;
-			next = dp->mp_pgno + (IS_OVERFLOW(dp) ? dp->mp_pages : 1);
-			/* clear dirty flag */
-			dp->mp_flags &= ~P_DIRTY;
-			if (++n >= MDB_COMMIT_PAGES) {
-				done = 0;
-				i++;
-				break;
-			}
-		}
-
-		if (n == 0)
-			break;
-
-		rc = writev(env->me_fd, iov, n);
-		if (rc != size) {
-			n = ErrCode();
-			if (rc > 0)
-				DPUTS("short write, filesystem full?");
-			else
-				DPRINTF("writev: %s", strerror(n));
-			mdb_txn_abort(txn);
-			return n;
-		}
-#endif
-	} while (!done);
-
-	/* Drop the dirty pages.
-	 */
-	for (i=1; i<=txn->mt_u.dirty_list[0].mid; i++) {
-		dp = txn->mt_u.dirty_list[i].mptr;
-		if (!IS_OVERFLOW(dp) || dp->mp_pages == 1) {
-			mdb_page_free(txn->mt_env, dp);
-		} else {
-			VGMEMP_FREE(txn->mt_env, dp);
-			free(dp);
-		}
-		txn->mt_u.dirty_list[i].mid = 0;
-	}
-	txn->mt_u.dirty_list[0].mid = 0;
-
-sync:
-	if ((n = mdb_env_sync(env, 0)) != 0 ||
-	    (n = mdb_env_write_meta(txn)) != MDB_SUCCESS) {
-		mdb_txn_abort(txn);
-		return n;
-	}
+	if ((rc = mdb_page_flush(txn)) ||
+		(rc = mdb_env_sync(env, 0)) ||
+		(rc = mdb_env_write_meta(txn)))
+		goto fail;
 
 done:
 	env->me_pglast = 0;
@@ -2544,6 +2481,10 @@ done:
 	free(txn);
 
 	return MDB_SUCCESS;
+
+fail:
+	mdb_txn_abort(txn);
+	return rc;
 }
 
 /** Read the environment parameters of a DB environment before
@@ -2558,27 +2499,28 @@ mdb_env_read_header(MDB_env *env, MDB_meta *meta)
 	MDB_pagebuf	pbuf;
 	MDB_page	*p;
 	MDB_meta	*m;
-	int		 i, rc, err;
+	int			i, rc, off;
 
 	/* We don't know the page size yet, so use a minimum value.
 	 * Read both meta pages so we can use the latest one.
 	 */
 
-	for (i=0; i<2; i++) {
+	for (i=off=0; i<2; i++, off = meta->mm_psize) {
 #ifdef _WIN32
-		if (!ReadFile(env->me_fd, &pbuf, MDB_PAGESIZE, (DWORD *)&rc, NULL) || rc == 0)
+		DWORD len;
+		OVERLAPPED ov;
+		memset(&ov, 0, sizeof(ov));
+		ov.Offset = off;
+		rc = ReadFile(env->me_fd,&pbuf,MDB_PAGESIZE,&len,&ov) ? (int)len : -1;
 #else
-		if ((rc = read(env->me_fd, &pbuf, MDB_PAGESIZE)) == 0)
+		rc = pread(env->me_fd, &pbuf, MDB_PAGESIZE, off);
 #endif
-		{
-			return ENOENT;
-		}
-		else if (rc != MDB_PAGESIZE) {
-			err = ErrCode();
-			if (rc > 0)
-				err = MDB_INVALID;
-			DPRINTF("read: %s", strerror(err));
-			return err;
+		if (rc != MDB_PAGESIZE) {
+			if (rc == 0 && off == 0)
+				return ENOENT;
+			rc = rc < 0 ? (int) ErrCode() : MDB_INVALID;
+			DPRINTF("read: %s", mdb_strerror(rc));
+			return rc;
 		}
 
 		p = (MDB_page *)&pbuf;
@@ -2600,18 +2542,8 @@ mdb_env_read_header(MDB_env *env, MDB_meta *meta)
 			return MDB_VERSION_MISMATCH;
 		}
 
-		if (i) {
-			if (m->mm_txnid > meta->mm_txnid)
-				memcpy(meta, m, sizeof(*m));
-		} else {
-			memcpy(meta, m, sizeof(*m));
-#ifdef _WIN32
-			if (SetFilePointer(env->me_fd, meta->mm_psize, NULL, FILE_BEGIN) != meta->mm_psize)
-#else
-			if (lseek(env->me_fd, meta->mm_psize, SEEK_SET) != meta->mm_psize)
-#endif
-				return ErrCode();
-		}
+		if (off == 0 || m->mm_txnid > meta->mm_txnid)
+			*meta = *m;
 	}
 	return 0;
 }
@@ -2625,7 +2557,6 @@ static int
 mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
 {
 	MDB_page *p, *q;
-	MDB_meta *m;
 	int rc;
 	unsigned int	 psize;
 
@@ -2646,29 +2577,24 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
 	p = calloc(2, psize);
 	p->mp_pgno = 0;
 	p->mp_flags = P_META;
-
-	m = METADATA(p);
-	memcpy(m, meta, sizeof(*meta));
+	*(MDB_meta *)METADATA(p) = *meta;
 
 	q = (MDB_page *)((char *)p + psize);
-
 	q->mp_pgno = 1;
 	q->mp_flags = P_META;
-
-	m = METADATA(q);
-	memcpy(m, meta, sizeof(*meta));
+	*(MDB_meta *)METADATA(q) = *meta;
 
 #ifdef _WIN32
 	{
 		DWORD len;
-		SetFilePointer(env->me_fd, 0, NULL, FILE_BEGIN);
-		rc = WriteFile(env->me_fd, p, psize * 2, &len, NULL);
-		rc = (len == psize * 2) ? MDB_SUCCESS : ErrCode();
+		OVERLAPPED ov;
+		memset(&ov, 0, sizeof(ov));
+		rc = WriteFile(env->me_fd, p, psize * 2, &len, &ov);
+		rc = rc ? (len == psize * 2 ? MDB_SUCCESS : EIO) : ErrCode();
 	}
 #else
-	lseek(env->me_fd, 0, SEEK_SET);
-	rc = write(env->me_fd, p, psize * 2);
-	rc = (rc == (int)psize * 2) ? MDB_SUCCESS : ErrCode();
+	rc = pwrite(env->me_fd, p, psize * 2, 0);
+	rc = (rc == (int)psize * 2) ? MDB_SUCCESS : rc < 0 ? ErrCode() : EIO;
 #endif
 	free(p);
 	return rc;
@@ -2689,6 +2615,8 @@ mdb_env_write_meta(MDB_txn *txn)
 	HANDLE mfd;
 #ifdef _WIN32
 	OVERLAPPED ov;
+#else
+	int r2;
 #endif
 
 	assert(txn != NULL);
@@ -2751,14 +2679,14 @@ mdb_env_write_meta(MDB_txn *txn)
 	{
 		memset(&ov, 0, sizeof(ov));
 		ov.Offset = off;
-		WriteFile(mfd, ptr, len, (DWORD *)&rc, &ov);
+		if (!WriteFile(mfd, ptr, len, (DWORD *)&rc, &ov))
+			rc = -1;
 	}
 #else
 	rc = pwrite(mfd, ptr, len, off);
 #endif
 	if (rc != len) {
-		int r2;
-		rc = ErrCode();
+		rc = rc < 0 ? ErrCode() : EIO;
 		DPUTS("write failed, disk error?");
 		/* On a failure, the pagecache still contains the new data.
 		 * Write some old data back, to prevent it from being used.
@@ -2767,6 +2695,8 @@ mdb_env_write_meta(MDB_txn *txn)
 		meta.mm_last_pg = metab.mm_last_pg;
 		meta.mm_txnid = metab.mm_txnid;
 #ifdef _WIN32
+		memset(&ov, 0, sizeof(ov));
+		ov.Offset = off;
 		WriteFile(env->me_fd, ptr, len, NULL, &ov);
 #else
 		r2 = pwrite(env->me_fd, ptr, len, off);
@@ -2782,7 +2712,7 @@ done:
 	 * readers will get consistent data regardless of how fresh or
 	 * how stale their view of these values is.
 	 */
-	txn->mt_env->me_txns->mti_txnid = txn->mt_txnid;
+	env->me_txns->mti_txnid = txn->mt_txnid;
 
 	return MDB_SUCCESS;
 }
@@ -2865,9 +2795,12 @@ static int
 mdb_env_open2(MDB_env *env)
 {
 	unsigned int flags = env->me_flags;
-	int i, newenv = 0, prot;
+	int i, newenv = 0;
 	MDB_meta meta;
 	MDB_page *p;
+#ifndef _WIN32
+	int prot;
+#endif
 
 	memset(&meta, 0, sizeof(meta));
 
@@ -2895,19 +2828,19 @@ mdb_env_open2(MDB_env *env)
 
 #ifdef _WIN32
 	{
+		int rc;
 		HANDLE mh;
 		LONG sizelo, sizehi;
 		sizelo = env->me_mapsize & 0xffffffff;
-		sizehi = env->me_mapsize >> 16;		/* pointless on WIN32, only needed on W64 */
-		sizehi >>= 16;
+		sizehi = env->me_mapsize >> 16 >> 16; /* only needed on Win64 */
 		/* Windows won't create mappings for zero length files.
 		 * Just allocate the maxsize right now.
 		 */
 		if (newenv) {
-			SetFilePointer(env->me_fd, sizelo, sizehi ? &sizehi : NULL, 0);
-			if (!SetEndOfFile(env->me_fd))
+			if (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo
+				|| !SetEndOfFile(env->me_fd)
+				|| SetFilePointer(env->me_fd, 0, NULL, 0) != 0)
 				return ErrCode();
-			SetFilePointer(env->me_fd, 0, NULL, 0);
 		}
 		mh = CreateFileMapping(env->me_fd, NULL, flags & MDB_WRITEMAP ?
 			PAGE_READWRITE : PAGE_READONLY,
@@ -2917,9 +2850,10 @@ mdb_env_open2(MDB_env *env)
 		env->me_map = MapViewOfFileEx(mh, flags & MDB_WRITEMAP ?
 			FILE_MAP_WRITE : FILE_MAP_READ,
 			0, 0, env->me_mapsize, meta.mm_address);
+		rc = env->me_map ? 0 : ErrCode();
 		CloseHandle(mh);
-		if (!env->me_map)
-			return ErrCode();
+		if (rc)
+			return rc;
 	}
 #else
 	i = MAP_SHARED;
@@ -3288,12 +3222,14 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
 	size = GetFileSize(env->me_lfd, NULL);
 #else
 	size = lseek(env->me_lfd, 0, SEEK_END);
+	if (size == -1) goto fail_errno;
 #endif
 	rsize = (env->me_maxreaders-1) * sizeof(MDB_reader) + sizeof(MDB_txninfo);
 	if (size < rsize && *excl > 0) {
 #ifdef _WIN32
-		SetFilePointer(env->me_lfd, rsize, NULL, 0);
-		if (!SetEndOfFile(env->me_lfd)) goto fail_errno;
+		if (SetFilePointer(env->me_lfd, rsize, NULL, FILE_BEGIN) != rsize
+			|| !SetEndOfFile(env->me_lfd))
+			goto fail_errno;
 #else
 		if (ftruncate(env->me_lfd, rsize) != 0) goto fail_errno;
 #endif
@@ -3408,7 +3344,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
 			goto fail;
 		}
 		rc = ErrCode();
-		if (rc != EACCES && rc != EAGAIN) {
+		if (rc && rc != EACCES && rc != EAGAIN) {
 			goto fail;
 		}
 #ifdef _WIN32
@@ -3572,8 +3508,7 @@ mdb_env_close0(MDB_env *env, int excl)
 	free(env->me_dbxs);
 	free(env->me_path);
 	free(env->me_dirty_list);
-	if (env->me_free_pgs)
-		mdb_midl_free(env->me_free_pgs);
+	mdb_midl_free(env->me_free_pgs);
 
 	if (env->me_flags & MDB_ENV_TXKEY) {
 		pthread_key_delete(env->me_txkey);
@@ -3592,9 +3527,9 @@ mdb_env_close0(MDB_env *env, int excl)
 		munmap(env->me_map, env->me_mapsize);
 	}
 	if (env->me_mfd != env->me_fd && env->me_mfd != INVALID_HANDLE_VALUE)
-		close(env->me_mfd);
+		(void) close(env->me_mfd);
 	if (env->me_fd != INVALID_HANDLE_VALUE)
-		close(env->me_fd);
+		(void) close(env->me_fd);
 	if (env->me_txns) {
 		pid_t pid = env->me_pid;
 		/* Clearing readers is done in this function because
@@ -3638,19 +3573,99 @@ mdb_env_close0(MDB_env *env, int excl)
 			UnlockFile(env->me_lfd, 0, 0, 1, 0);
 		}
 #endif
-		close(env->me_lfd);
+		(void) close(env->me_lfd);
 	}
 
 	env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY);
 }
 
 int
-mdb_env_copy(MDB_env *env, const char *path)
+mdb_env_copyfd(MDB_env *env, HANDLE fd)
 {
 	MDB_txn *txn = NULL;
-	int rc, len;
+	int rc;
 	size_t wsize;
-	char *lpath, *ptr;
+	char *ptr;
+
+	/* Do the lock/unlock of the reader mutex before starting the
+	 * write txn.  Otherwise other read txns could block writers.
+	 */
+	rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
+	if (rc)
+		return rc;
+
+	if (env->me_txns) {
+		/* We must start the actual read txn after blocking writers */
+		mdb_txn_reset0(txn, "reset-stage1");
+
+		/* Temporarily block writers until we snapshot the meta pages */
+		LOCK_MUTEX_W(env);
+
+		rc = mdb_txn_renew0(txn);
+		if (rc) {
+			UNLOCK_MUTEX_W(env);
+			goto leave;
+		}
+	}
+
+	wsize = env->me_psize * 2;
+#ifdef _WIN32
+	{
+		DWORD len;
+		rc = WriteFile(fd, env->me_map, wsize, &len, NULL);
+		rc = rc ? (len == wsize ? MDB_SUCCESS : EIO) : ErrCode();
+	}
+#else
+	rc = write(fd, env->me_map, wsize);
+	rc = rc == (int)wsize ? MDB_SUCCESS : rc < 0 ? ErrCode() : EIO;
+#endif
+	if (env->me_txns)
+		UNLOCK_MUTEX_W(env);
+
+	if (rc)
+		goto leave;
+
+	ptr = env->me_map + wsize;
+	wsize = txn->mt_next_pgno * env->me_psize - wsize;
+#ifdef _WIN32
+	while (wsize > 0) {
+		DWORD len, w2;
+		if (wsize > MAX_WRITE)
+			w2 = MAX_WRITE;
+		else
+			w2 = wsize;
+		rc = WriteFile(fd, ptr, w2, &len, NULL);
+		rc = rc ? (len == w2 ? MDB_SUCCESS : EIO) : ErrCode();
+		if (rc) break;
+		wsize -= w2;
+		ptr += w2;
+	}
+#else
+	while (wsize > 0) {
+		size_t w2;
+		ssize_t wres;
+		if (wsize > MAX_WRITE)
+			w2 = MAX_WRITE;
+		else
+			w2 = wsize;
+		wres = write(fd, ptr, w2);
+		rc = wres == (ssize_t)w2 ? MDB_SUCCESS : wres < 0 ? ErrCode() : EIO;
+		if (rc) break;
+		wsize -= wres;
+		ptr += wres;
+	}
+#endif
+
+leave:
+	mdb_txn_abort(txn);
+	return rc;
+}
+
+int
+mdb_env_copy(MDB_env *env, const char *path)
+{
+	int rc, len;
+	char *lpath;
 	HANDLE newfd = INVALID_HANDLE_VALUE;
 
 	if (env->me_flags & MDB_NOSUBDIR) {
@@ -3678,8 +3693,6 @@ mdb_env_copy(MDB_env *env, const char *path)
 #endif
 		, 0666);
 #endif
-	if (!(env->me_flags & MDB_NOSUBDIR))
-		free(lpath);
 	if (newfd == INVALID_HANDLE_VALUE) {
 		rc = ErrCode();
 		goto leave;
@@ -3693,80 +3706,14 @@ mdb_env_copy(MDB_env *env, const char *path)
 	}
 #endif
 
-	/* Do the lock/unlock of the reader mutex before starting the
-	 * write txn.  Otherwise other read txns could block writers.
-	 */
-	rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
-	if (rc)
-		goto leave;
-
-	if (env->me_txns) {
-		/* We must start the actual read txn after blocking writers */
-		mdb_txn_reset0(txn);
-
-		/* Temporarily block writers until we snapshot the meta pages */
-		LOCK_MUTEX_W(env);
-
-		rc = mdb_txn_renew0(txn);
-		if (rc) {
-			UNLOCK_MUTEX_W(env);
-			goto leave;
-		}
-	}
-
-	wsize = env->me_psize * 2;
-#ifdef _WIN32
-	{
-		DWORD len;
-		rc = WriteFile(newfd, env->me_map, wsize, &len, NULL);
-		rc = (len == wsize) ? MDB_SUCCESS : ErrCode();
-	}
-#else
-	rc = write(newfd, env->me_map, wsize);
-	rc = (rc == (int)wsize) ? MDB_SUCCESS : ErrCode();
-#endif
-	if (env->me_txns)
-		UNLOCK_MUTEX_W(env);
-
-	if (rc)
-		goto leave;
-
-	ptr = env->me_map + wsize;
-	wsize = txn->mt_next_pgno * env->me_psize - wsize;
-#define MAX_WRITE	2147483648U
-#ifdef _WIN32
-	while (wsize > 0) {
-		DWORD len, w2;
-		if (wsize > MAX_WRITE)
-			w2 = MAX_WRITE;
-		else
-			w2 = wsize;
-		rc = WriteFile(newfd, ptr, w2, &len, NULL);
-		rc = (len == w2) ? MDB_SUCCESS : ErrCode();
-		if (rc) break;
-		wsize -= w2;
-		ptr += w2;
-	}
-#else
-	while (wsize > 0) {
-		size_t w2;
-		ssize_t wres;
-		if (wsize > MAX_WRITE)
-			w2 = MAX_WRITE;
-		else
-			w2 = wsize;
-		wres = write(newfd, ptr, w2);
-		rc = (wres > 0) ? MDB_SUCCESS : ErrCode();
-		if (rc) break;
-		wsize -= wres;
-		ptr += wres;
-	}
-#endif
+	rc = mdb_env_copyfd(env, newfd);
 
 leave:
-	mdb_txn_abort(txn);
+	if (!(env->me_flags & MDB_NOSUBDIR))
+		free(lpath);
 	if (newfd != INVALID_HANDLE_VALUE)
-		close(newfd);
+		if (close(newfd) < 0 && rc == MDB_SUCCESS)
+			rc = ErrCode();
 
 	return rc;
 }
@@ -4032,17 +3979,20 @@ mdb_cursor_push(MDB_cursor *mc, MDB_page *mp)
  * @param[in] txn the transaction for this access.
  * @param[in] pgno the page number for the page to retrieve.
  * @param[out] ret address of a pointer where the page's address will be stored.
+ * @param[out] lvl dirty_list inheritance level of found page. 1=current txn, 0=mapped page.
  * @return 0 on success, non-zero on failure.
  */
 static int
-mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret)
+mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl)
 {
 	MDB_page *p = NULL;
+	int level;
 
 	if (!((txn->mt_flags & MDB_TXN_RDONLY) |
 		  (txn->mt_env->me_flags & MDB_WRITEMAP)))
 	{
 		MDB_txn *tx2 = txn;
+		level = 1;
 		do {
 			MDB_ID2L dl = tx2->mt_u.dirty_list;
 			if (dl[0].mid) {
@@ -4052,19 +4002,24 @@ mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret)
 					goto done;
 				}
 			}
+			level++;
 		} while ((tx2 = tx2->mt_parent) != NULL);
 	}
 
 	if (pgno < txn->mt_next_pgno) {
+		level = 0;
 		p = (MDB_page *)(txn->mt_env->me_map + txn->mt_env->me_psize * pgno);
 	} else {
 		DPRINTF("page %zu not found", pgno);
 		assert(p != NULL);
+		return MDB_PAGE_NOTFOUND;
 	}
 
 done:
 	*ret = p;
-	return (p != NULL) ? MDB_SUCCESS : MDB_PAGE_NOTFOUND;
+	if (lvl)
+		*lvl = level;
+	return MDB_SUCCESS;
 }
 
 /** Search for the page a given key should be in.
@@ -4118,7 +4073,7 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int modify)
 		assert(i < NUMKEYS(mp));
 		node = NODEPTR(mp, i);
 
-		if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp)))
+		if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0)
 			return rc;
 
 		mc->mc_ki[mc->mc_top] = i;
@@ -4157,7 +4112,7 @@ mdb_page_search_lowest(MDB_cursor *mc)
 	MDB_node	*node = NODEPTR(mp, 0);
 	int rc;
 
-	if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp)))
+	if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0)
 		return rc;
 
 	mc->mc_ki[mc->mc_top] = 0;
@@ -4209,7 +4164,9 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags)
 						&mc->mc_dbx->md_name, &exact);
 					if (!exact)
 						return MDB_NOTFOUND;
-					mdb_node_read(mc->mc_txn, leaf, &data);
+					rc = mdb_node_read(mc->mc_txn, leaf, &data);
+					if (rc)
+						return rc;
 					memcpy(&flags, ((char *) data.mv_data + offsetof(MDB_db, md_flags)),
 						sizeof(uint16_t));
 					/* The txn may not know this DBI, or another process may
@@ -4235,7 +4192,7 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags)
 
 	assert(root > 1);
 	if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root)
-		if ((rc = mdb_page_get(mc->mc_txn, root, &mc->mc_pg[0])))
+		if ((rc = mdb_page_get(mc->mc_txn, root, &mc->mc_pg[0], NULL)) != 0)
 			return rc;
 
 	mc->mc_snum = 1;
@@ -4255,6 +4212,63 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags)
 	return mdb_page_search_root(mc, key, flags);
 }
 
+static int
+mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp)
+{
+	MDB_txn *txn = mc->mc_txn;
+	pgno_t pg = mp->mp_pgno;
+	unsigned i, ovpages = mp->mp_pages;
+	MDB_env *env = txn->mt_env;
+	int rc;
+
+	DPRINTF("free ov page %zu (%d)", pg, ovpages);
+	/* If the page is dirty we just acquired it, so we should
+	 * give it back to our current free list, if any.
+	 * Not currently supported in nested txns.
+	 * Otherwise put it onto the list of pages we freed in this txn.
+	 */
+	if ((mp->mp_flags & P_DIRTY) && !txn->mt_parent && env->me_pghead) {
+		unsigned j, x;
+		pgno_t *mop;
+		MDB_ID2 *dl, ix, iy;
+		rc = mdb_midl_need(&env->me_pghead, ovpages);
+		if (rc)
+			return rc;
+		/* Remove from dirty list */
+		dl = txn->mt_u.dirty_list;
+		x = dl[0].mid--;
+		for (ix = dl[x]; ix.mptr != mp; ix = iy) {
+			if (x > 1) {
+				x--;
+				iy = dl[x];
+				dl[x] = ix;
+			} else {
+				assert(x > 1);
+				j = ++(dl[0].mid);
+				dl[j] = ix;		/* Unsorted. OK when MDB_TXN_ERROR. */
+				txn->mt_flags |= MDB_TXN_ERROR;
+				return MDB_CORRUPTED;
+			}
+		}
+		if (!(env->me_flags & MDB_WRITEMAP))
+			mdb_dpage_free(env, mp);
+		/* Insert in me_pghead */
+		mop = env->me_pghead;
+		j = mop[0] + ovpages;
+		for (i = mop[0]; i && mop[i] < pg; i--)
+			mop[j--] = mop[i];
+		while (j>i)
+			mop[j--] = pg++;
+		mop[0] += ovpages;
+	} else {
+		rc = mdb_midl_append_range(&txn->mt_free_pgs, pg, ovpages);
+		if (rc)
+			return rc;
+	}
+	mc->mc_db->md_overflow_pages -= ovpages;
+	return 0;
+}
+
 /** Return the data associated with a given node.
  * @param[in] txn The transaction for this operation.
  * @param[in] leaf The node being read.
@@ -4278,7 +4292,7 @@ mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data)
 	 */
 	data->mv_size = NODEDSZ(leaf);
 	memcpy(&pgno, NODEDATA(leaf), sizeof(pgno));
-	if ((rc = mdb_page_get(txn, pgno, &omp))) {
+	if ((rc = mdb_page_get(txn, pgno, &omp, NULL)) != 0) {
 		DPRINTF("read overflow page %zu failed", pgno);
 		return rc;
 	}
@@ -4355,7 +4369,7 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right)
 	assert(IS_BRANCH(mc->mc_pg[mc->mc_top]));
 
 	indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
-	if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp)))
+	if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp, NULL) != 0))
 		return rc;
 
 	mdb_cursor_push(mc, mp);
@@ -4386,7 +4400,7 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
 		if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
 			if (op == MDB_NEXT || op == MDB_NEXT_DUP) {
 				rc = mdb_cursor_next(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_NEXT);
-				if (op != MDB_NEXT || rc == MDB_SUCCESS)
+				if (op != MDB_NEXT || rc != MDB_NOTFOUND)
 					return rc;
 			}
 		} else {
@@ -4400,10 +4414,10 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
 
 	if (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mp)) {
 		DPUTS("=====> move to next sibling page");
-		if (mdb_cursor_sibling(mc, 1) != MDB_SUCCESS) {
+		if ((rc = mdb_cursor_sibling(mc, 1)) != MDB_SUCCESS) {
 			mc->mc_flags |= C_EOF;
 			mc->mc_flags &= ~C_INITIALIZED;
-			return MDB_NOTFOUND;
+			return rc;
 		}
 		mp = mc->mc_pg[mc->mc_top];
 		DPRINTF("next page is %zu, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]);
@@ -4457,7 +4471,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
 		if (op == MDB_PREV || op == MDB_PREV_DUP) {
 			if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
 				rc = mdb_cursor_prev(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_PREV);
-				if (op != MDB_PREV || rc == MDB_SUCCESS)
+				if (op != MDB_PREV || rc != MDB_NOTFOUND)
 					return rc;
 			} else {
 				mc->mc_xcursor->mx_cursor.mc_flags &= ~C_INITIALIZED;
@@ -4471,9 +4485,9 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
 
 	if (mc->mc_ki[mc->mc_top] == 0)  {
 		DPUTS("=====> move to prev sibling page");
-		if (mdb_cursor_sibling(mc, 0) != MDB_SUCCESS) {
+		if ((rc = mdb_cursor_sibling(mc, 0)) != MDB_SUCCESS) {
 			mc->mc_flags &= ~C_INITIALIZED;
-			return MDB_NOTFOUND;
+			return rc;
 		}
 		mp = mc->mc_pg[mc->mc_top];
 		mc->mc_ki[mc->mc_top] = NUMKEYS(mp) - 1;
@@ -4887,6 +4901,8 @@ fetchm:
 	case MDB_PREV_NODUP:
 		if (!(mc->mc_flags & C_INITIALIZED)) {
 			rc = mdb_cursor_last(mc, key, data);
+			if (rc)
+				break;
 			mc->mc_flags |= C_INITIALIZED;
 			mc->mc_ki[mc->mc_top]++;
 		}
@@ -5114,6 +5130,7 @@ more:
 				MDB_page *mp;
 				unsigned int offset;
 				unsigned int i;
+				uint16_t fp_flags;
 
 				fp = NODEDATA(leaf);
 				if (flags == MDB_CURRENT) {
@@ -5133,6 +5150,7 @@ reuse:
 					offset = NODESIZE + sizeof(indx_t) + data->mv_size;
 				}
 				offset += offset & 1;
+				fp_flags = fp->mp_flags;
 				if (NODESIZE + sizeof(indx_t) + NODEKSZ(leaf) + NODEDSZ(leaf) +
 					offset >= mc->mc_txn->mt_env->me_nodemax) {
 					/* yes, convert it */
@@ -5156,6 +5174,7 @@ reuse:
 					offset = mc->mc_txn->mt_env->me_psize - NODEDSZ(leaf);
 					flags |= F_DUPDATA|F_SUBDATA;
 					dummy.md_root = mp->mp_pgno;
+					fp_flags &= ~P_SUBP;
 				} else {
 					/* no, just grow it */
 					rdata = &xdata;
@@ -5165,7 +5184,7 @@ reuse:
 					mp->mp_pgno = mc->mc_pg[mc->mc_top]->mp_pgno;
 					flags |= F_DUPDATA;
 				}
-				mp->mp_flags = fp->mp_flags | P_DIRTY;
+				mp->mp_flags = fp_flags | P_DIRTY;
 				mp->mp_pad   = fp->mp_pad;
 				mp->mp_lower = fp->mp_lower;
 				mp->mp_upper = fp->mp_upper + offset;
@@ -5190,32 +5209,51 @@ current:
 		if (F_ISSET(leaf->mn_flags, F_BIGDATA)) {
 			MDB_page *omp;
 			pgno_t pg;
-			int ovpages, dpages;
+			unsigned psize = mc->mc_txn->mt_env->me_psize;
+			int level, ovpages, dpages = OVPAGES(data->mv_size, psize);
 
-			ovpages = OVPAGES(NODEDSZ(leaf), mc->mc_txn->mt_env->me_psize);
-			dpages = OVPAGES(data->mv_size, mc->mc_txn->mt_env->me_psize);
 			memcpy(&pg, NODEDATA(leaf), sizeof(pg));
-			mdb_page_get(mc->mc_txn, pg, &omp);
+			if ((rc2 = mdb_page_get(mc->mc_txn, pg, &omp, &level)) != 0)
+				return rc2;
+			ovpages = omp->mp_pages;
+
 			/* Is the ov page writable and large enough? */
 			if ((omp->mp_flags & P_DIRTY) && ovpages >= dpages) {
 				/* yes, overwrite it. Note in this case we don't
-				 * bother to try shrinking the node if the new data
+				 * bother to try shrinking the page if the new data
 				 * is smaller than the overflow threshold.
 				 */
+				if (level > 1) {
+					/* It is writable only in a parent txn */
+					size_t sz = (size_t) psize * ovpages, off;
+					MDB_page *np = mdb_page_malloc(mc->mc_txn, ovpages);
+					MDB_ID2 id2;
+					if (!np)
+						return ENOMEM;
+					id2.mid = pg;
+					id2.mptr = np;
+					mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2);
+					if (!(flags & MDB_RESERVE)) {
+						/* Copy end of page, adjusting alignment so
+						 * compiler may copy words instead of bytes.
+						 */
+						off = (PAGEHDRSZ + data->mv_size) & -sizeof(size_t);
+						memcpy((size_t *)((char *)np + off),
+							(size_t *)((char *)omp + off), sz - off);
+						sz = PAGEHDRSZ;
+					}
+					memcpy(np, omp, sz); /* Copy beginning of page */
+					omp = np;
+				}
+				SETDSZ(leaf, data->mv_size);
 				if (F_ISSET(flags, MDB_RESERVE))
 					data->mv_data = METADATA(omp);
 				else
 					memcpy(METADATA(omp), data->mv_data, data->mv_size);
 				goto done;
 			} else {
-				/* no, free ovpages */
-				int i;
-				mc->mc_db->md_overflow_pages -= ovpages;
-				for (i=0; i<ovpages; i++) {
-					DPRINTF("freed ov page %zu", pg);
-					mdb_midl_append(&mc->mc_txn->mt_free_pgs, pg);
-					pg++;
-				}
+				if ((rc2 = mdb_ovpage_free(mc, omp)) != MDB_SUCCESS)
+					return rc2;
 			}
 		} else if (NODEDSZ(leaf) == data->mv_size) {
 			/* same size, just replace it. Note that we could
@@ -5377,8 +5415,18 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
 					void *db = NODEDATA(leaf);
 					memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db));
 				} else {
+					MDB_cursor *m2;
 					/* shrink fake page */
 					mdb_node_shrink(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
+					leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
+					mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf);
+					/* fix other sub-DB cursors pointed at this fake page */
+					for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) {
+						if (m2 == mc || m2->mc_snum < mc->mc_snum) continue;
+						if (m2->mc_pg[mc->mc_top] == mc->mc_pg[mc->mc_top] &&
+							m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top])
+							m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf);
+					}
 				}
 				mc->mc_db->md_entries--;
 				return rc;
@@ -6126,11 +6174,11 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
 			dbi--;
 
 		for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
-			if (m2 == csrc) continue;
 			if (csrc->mc_flags & C_SUB)
 				m3 = &m2->mc_xcursor->mx_cursor;
 			else
 				m3 = m2;
+			if (m3 == csrc) continue;
 			if (m3->mc_pg[csrc->mc_top] == mp && m3->mc_ki[csrc->mc_top] ==
 				csrc->mc_ki[csrc->mc_top]) {
 				m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top];
@@ -6284,7 +6332,10 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
 			return rc;
 	}
 
-	mdb_midl_append(&csrc->mc_txn->mt_free_pgs, csrc->mc_pg[csrc->mc_top]->mp_pgno);
+	rc = mdb_midl_append(&csrc->mc_txn->mt_free_pgs,
+		csrc->mc_pg[csrc->mc_top]->mp_pgno);
+	if (rc)
+		return rc;
 	if (IS_LEAF(csrc->mc_pg[csrc->mc_top]))
 		csrc->mc_db->md_leaf_pages--;
 	else
@@ -6385,11 +6436,13 @@ mdb_rebalance(MDB_cursor *mc)
 			mc->mc_db->md_root = P_INVALID;
 			mc->mc_db->md_depth = 0;
 			mc->mc_db->md_leaf_pages = 0;
-			mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno);
+			rc = mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno);
+			if (rc)
+				return rc;
+			/* Adjust cursors pointing to mp */
 			mc->mc_snum = 0;
 			mc->mc_top = 0;
 			{
-				/* Adjust other cursors pointing to mp */
 				MDB_cursor *m2, *m3;
 				MDB_dbi dbi = mc->mc_dbi;
 
@@ -6397,7 +6450,6 @@ mdb_rebalance(MDB_cursor *mc)
 					dbi--;
 
 				for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
-					if (m2 == mc) continue;
 					if (mc->mc_flags & C_SUB)
 						m3 = &m2->mc_xcursor->mx_cursor;
 					else
@@ -6411,10 +6463,12 @@ mdb_rebalance(MDB_cursor *mc)
 			}
 		} else if (IS_BRANCH(mp) && NUMKEYS(mp) == 1) {
 			DPUTS("collapsing root page!");
-			mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno);
+			rc = mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno);
+			if (rc)
+				return rc;
 			mc->mc_db->md_root = NODEPGNO(NODEPTR(mp, 0));
-			if ((rc = mdb_page_get(mc->mc_txn, mc->mc_db->md_root,
-				&mc->mc_pg[0])))
+			rc = mdb_page_get(mc->mc_txn,mc->mc_db->md_root,&mc->mc_pg[0],NULL);
+			if (rc)
 				return rc;
 			mc->mc_db->md_depth--;
 			mc->mc_db->md_branch_pages--;
@@ -6427,12 +6481,11 @@ mdb_rebalance(MDB_cursor *mc)
 					dbi--;
 
 				for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
-					if (m2 == mc) continue;
 					if (mc->mc_flags & C_SUB)
 						m3 = &m2->mc_xcursor->mx_cursor;
 					else
 						m3 = m2;
-					if (m3->mc_snum < mc->mc_snum) continue;
+					if (m3 == mc || m3->mc_snum < mc->mc_snum) continue;
 					if (m3->mc_pg[0] == mp) {
 						m3->mc_pg[0] = mc->mc_pg[0];
 						m3->mc_snum = 1;
@@ -6467,7 +6520,8 @@ mdb_rebalance(MDB_cursor *mc)
 		DPUTS("reading right neighbor");
 		mn.mc_ki[ptop]++;
 		node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]);
-		if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mn.mc_pg[mn.mc_top])))
+		rc = mdb_page_get(mc->mc_txn,NODEPGNO(node),&mn.mc_pg[mn.mc_top],NULL);
+		if (rc)
 			return rc;
 		mn.mc_ki[mn.mc_top] = 0;
 		mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]);
@@ -6477,7 +6531,8 @@ mdb_rebalance(MDB_cursor *mc)
 		DPUTS("reading left neighbor");
 		mn.mc_ki[ptop]--;
 		node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]);
-		if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mn.mc_pg[mn.mc_top])))
+		rc = mdb_page_get(mc->mc_txn,NODEPGNO(node),&mn.mc_pg[mn.mc_top],NULL);
+		if (rc)
 			return rc;
 		mn.mc_ki[mn.mc_top] = NUMKEYS(mn.mc_pg[mn.mc_top]) - 1;
 		mc->mc_ki[mc->mc_top] = 0;
@@ -6511,17 +6566,13 @@ mdb_cursor_del0(MDB_cursor *mc, MDB_node *leaf)
 
 	/* add overflow pages to free list */
 	if (!IS_LEAF2(mc->mc_pg[mc->mc_top]) && F_ISSET(leaf->mn_flags, F_BIGDATA)) {
-		int i, ovpages;
+		MDB_page *omp;
 		pgno_t pg;
 
 		memcpy(&pg, NODEDATA(leaf), sizeof(pg));
-		ovpages = OVPAGES(NODEDSZ(leaf), mc->mc_txn->mt_env->me_psize);
-		mc->mc_db->md_overflow_pages -= ovpages;
-		for (i=0; i<ovpages; i++) {
-			DPRINTF("freed ov page %zu", pg);
-			mdb_midl_append(&mc->mc_txn->mt_free_pgs, pg);
-			pg++;
-		}
+		if ((rc = mdb_page_get(mc->mc_txn, pg, &omp, NULL)) ||
+			(rc = mdb_ovpage_free(mc, omp)))
+			return rc;
 	}
 	mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], mc->mc_db->md_pad);
 	mc->mc_db->md_entries--;
@@ -6857,7 +6908,7 @@ newsep:
 	/* Move half of the keys to the right sibling. */
 
 	/* grab a page to hold a temporary copy */
-	copy = mdb_page_malloc(mc);
+	copy = mdb_page_malloc(mc->mc_txn, 1);
 	if (copy == NULL)
 		return ENOMEM;
 
@@ -6960,11 +7011,12 @@ done:
 			dbi--;
 
 		for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
-			if (m2 == mc) continue;
 			if (mc->mc_flags & C_SUB)
 				m3 = &m2->mc_xcursor->mx_cursor;
 			else
 				m3 = m2;
+			if (m3 == mc)
+				continue;
 			if (!(m3->mc_flags & C_INITIALIZED))
 				continue;
 			if (m3->mc_flags & C_SPLITTING)
@@ -7188,6 +7240,10 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
 	if (!unused && txn->mt_numdbs >= txn->mt_env->me_maxdbs)
 		return MDB_DBS_FULL;
 
+	/* Cannot mix named databases with some mainDB flags */
+	if (txn->mt_dbs[MAIN_DBI].md_flags & (MDB_DUPSORT|MDB_INTEGERKEY))
+		return (flags & MDB_CREATE) ? MDB_INCOMPATIBLE : MDB_NOTFOUND;
+
 	/* Find the DB info */
 	dbflag = DB_NEW|DB_VALID;
 	exact = 0;
@@ -7236,6 +7292,12 @@ int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg)
 	if (txn == NULL || arg == NULL || dbi >= txn->mt_numdbs)
 		return EINVAL;
 
+	if (txn->mt_dbflags[dbi] & DB_STALE) {
+		MDB_cursor mc;
+		MDB_xcursor mx;
+		/* Stale, must read the DB's root. cursor_init does it for us. */
+		mdb_cursor_init(&mc, txn, dbi, &mx);
+	}
 	return mdb_stat0(txn->mt_env, &txn->mt_dbs[dbi], arg);
 }
 
@@ -7263,6 +7325,7 @@ mdb_drop0(MDB_cursor *mc, int subs)
 
 	rc = mdb_page_search(mc, NULL, 0);
 	if (rc == MDB_SUCCESS) {
+		MDB_txn *txn = mc->mc_txn;
 		MDB_node *ni;
 		MDB_cursor mx;
 		unsigned int i;
@@ -7273,17 +7336,23 @@ mdb_drop0(MDB_cursor *mc, int subs)
 
 		mdb_cursor_copy(mc, &mx);
 		while (mc->mc_snum > 0) {
-			if (IS_LEAF(mc->mc_pg[mc->mc_top])) {
-				for (i=0; i<NUMKEYS(mc->mc_pg[mc->mc_top]); i++) {
-					ni = NODEPTR(mc->mc_pg[mc->mc_top], i);
+			MDB_page *mp = mc->mc_pg[mc->mc_top];
+			unsigned n = NUMKEYS(mp);
+			if (IS_LEAF(mp)) {
+				for (i=0; i<n; i++) {
+					ni = NODEPTR(mp, i);
 					if (ni->mn_flags & F_BIGDATA) {
-						int j, ovpages = OVPAGES(NODEDSZ(ni), mc->mc_txn->mt_env->me_psize);
+						MDB_page *omp;
 						pgno_t pg;
 						memcpy(&pg, NODEDATA(ni), sizeof(pg));
-						for (j=0; j<ovpages; j++) {
-							mdb_midl_append(&mc->mc_txn->mt_free_pgs, pg);
-							pg++;
-						}
+						rc = mdb_page_get(txn, pg, &omp, NULL);
+						if (rc != 0)
+							return rc;
+						assert(IS_OVERFLOW(omp));
+						rc = mdb_midl_append_range(&txn->mt_free_pgs,
+							pg, omp->mp_pages);
+						if (rc)
+							return rc;
 					} else if (subs && (ni->mn_flags & F_SUBDATA)) {
 						mdb_xcursor_init1(mc, ni);
 						rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0);
@@ -7292,12 +7361,14 @@ mdb_drop0(MDB_cursor *mc, int subs)
 					}
 				}
 			} else {
-				for (i=0; i<NUMKEYS(mc->mc_pg[mc->mc_top]); i++) {
+				if ((rc = mdb_midl_need(&txn->mt_free_pgs, n)) != 0)
+					return rc;
+				for (i=0; i<n; i++) {
 					pgno_t pg;
-					ni = NODEPTR(mc->mc_pg[mc->mc_top], i);
+					ni = NODEPTR(mp, i);
 					pg = NODEPGNO(ni);
 					/* free it */
-					mdb_midl_append(&mc->mc_txn->mt_free_pgs, pg);
+					mdb_midl_xappend(txn->mt_free_pgs, pg);
 				}
 			}
 			if (!mc->mc_top)
@@ -7317,15 +7388,16 @@ mdb_drop0(MDB_cursor *mc, int subs)
 			}
 		}
 		/* free it */
-		mdb_midl_append(&mc->mc_txn->mt_free_pgs,
-			mc->mc_db->md_root);
+		rc = mdb_midl_append(&txn->mt_free_pgs, mc->mc_db->md_root);
+	} else if (rc == MDB_NOTFOUND) {
+		rc = MDB_SUCCESS;
 	}
-	return 0;
+	return rc;
 }
 
 int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del)
 {
-	MDB_cursor *mc;
+	MDB_cursor *mc, *m2;
 	int rc;
 
 	if (!txn || !dbi || dbi >= txn->mt_numdbs || (unsigned)del > 1 || !(txn->mt_dbflags[dbi] & DB_VALID))
@@ -7339,6 +7411,9 @@ int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del)
 		return rc;
 
 	rc = mdb_drop0(mc, mc->mc_db->md_flags & MDB_DUPSORT);
+	/* Invalidate the dropped DB's cursors */
+	for (m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next)
+		m2->mc_flags &= ~C_INITIALIZED;
 	if (rc)
 		goto leave;
 
diff --git a/libraries/liblmdb/mdb_copy.1 b/libraries/liblmdb/mdb_copy.1
index 2b3d421e78..7837de5f6b 100644
--- a/libraries/liblmdb/mdb_copy.1
+++ b/libraries/liblmdb/mdb_copy.1
@@ -5,12 +5,19 @@
 mdb_copy \- LMDB environment copy tool
 .SH SYNOPSIS
 .B mdb_copy
-.I srcpath\ dstpath
+.I srcpath\ [dstpath]
 .SH DESCRIPTION
 The
 .B mdb_copy
 utility copies an LMDB environment. The environment can
 be copied regardless of whether it is currently in use.
+
+If
+.I dstpath
+is specified it must be the path of an empty directory
+for storing the backup. Otherwise, the backup will be
+written to stdout.
+
 .SH DIAGNOSTICS
 Exit status is zero if no errors occur.
 Errors result in a non-zero exit status and
diff --git a/libraries/liblmdb/mdb_copy.c b/libraries/liblmdb/mdb_copy.c
index bd0b859110..ca92009cff 100644
--- a/libraries/liblmdb/mdb_copy.c
+++ b/libraries/liblmdb/mdb_copy.c
@@ -11,28 +11,52 @@
  * top-level directory of the distribution or, alternatively, at
  * <http://www.OpenLDAP.org/license.html>.
  */
+#ifdef _WIN32
+#include <windows.h>
+#define	MDB_STDOUT	GetStdHandle(STD_OUTPUT_HANDLE)
+#else
+#define	MDB_STDOUT	1
+#endif
 #include <stdio.h>
 #include <stdlib.h>
+#include <signal.h>
 #include "lmdb.h"
 
+static void
+sighandle(int sig)
+{
+}
+
 int main(int argc,char * argv[])
 {
 	int rc;
 	MDB_env *env;
 	char *envname = argv[1];
 
-	if (argc != 3) {
-		fprintf(stderr, "usage: %s srcpath dstpath\n", argv[0]);
+	if (argc<2 || argc>3) {
+		fprintf(stderr, "usage: %s srcpath [dstpath]\n", argv[0]);
 		exit(EXIT_FAILURE);
 	}
 
+#ifdef SIGPIPE
+	signal(SIGPIPE, sighandle);
+#endif
+#ifdef SIGHUP
+	signal(SIGHUP, sighandle);
+#endif
+	signal(SIGINT, sighandle);
+	signal(SIGTERM, sighandle);
+
 	rc = mdb_env_create(&env);
 
 	rc = mdb_env_open(env, envname, MDB_RDONLY, 0);
 	if (rc) {
 		printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc));
 	} else {
-		rc = mdb_env_copy(env, argv[2]);
+		if (argc == 2)
+			rc = mdb_env_copyfd(env, MDB_STDOUT);
+		else
+			rc = mdb_env_copy(env, argv[2]);
 		if (rc)
 			printf("mdb_env_copy failed, error %d %s\n", rc, mdb_strerror(rc));
 	}
diff --git a/libraries/liblmdb/mdb_stat.c b/libraries/liblmdb/mdb_stat.c
index dd0735f242..3e6be21597 100644
--- a/libraries/liblmdb/mdb_stat.c
+++ b/libraries/liblmdb/mdb_stat.c
@@ -193,9 +193,12 @@ int main(int argc, char *argv[])
 			printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
 			goto txn_abort;
 		}
-		while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT)) == 0) {
-			char *str = malloc(key.mv_size+1);
+		while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT_NODUP)) == 0) {
+			char *str;
 			MDB_dbi db2;
+			if (memchr(key.mv_data, '\0', key.mv_size))
+				continue;
+			str = malloc(key.mv_size+1);
 			memcpy(str, key.mv_data, key.mv_size);
 			str[key.mv_size] = '\0';
 			rc = mdb_open(txn, str, 0, &db2);
@@ -214,6 +217,9 @@ int main(int argc, char *argv[])
 		mdb_cursor_close(cursor);
 	}
 
+	if (rc == MDB_NOTFOUND)
+		rc = MDB_SUCCESS;
+
 	mdb_close(env, dbi);
 txn_abort:
 	mdb_txn_abort(txn);
diff --git a/libraries/liblmdb/midl.c b/libraries/liblmdb/midl.c
index 57f1e049a2..e7bd680cb0 100644
--- a/libraries/liblmdb/midl.c
+++ b/libraries/liblmdb/midl.c
@@ -71,17 +71,6 @@ int mdb_midl_insert( MDB_IDL ids, MDB_ID id )
 {
 	unsigned x, i;
 
-	if (MDB_IDL_IS_RANGE( ids )) {
-		/* if already in range, treat as a dup */
-		if (id >= MDB_IDL_RANGE_FIRST(ids) && id <= MDB_IDL_RANGE_LAST(ids))
-			return -1;
-		if (id < MDB_IDL_RANGE_FIRST(ids))
-			ids[1] = id;
-		else if (id > MDB_IDL_RANGE_LAST(ids))
-			ids[2] = id;
-		return 0;
-	}
-
 	x = mdb_midl_search( ids, id );
 	assert( x > 0 );
 
@@ -97,15 +86,9 @@ int mdb_midl_insert( MDB_IDL ids, MDB_ID id )
 	}
 
 	if ( ++ids[0] >= MDB_IDL_DB_MAX ) {
-		if( id < ids[1] ) {
-			ids[1] = id;
-			ids[2] = ids[ids[0]-1];
-		} else if ( ids[ids[0]-1] < id ) {
-			ids[2] = id;
-		} else {
-			ids[2] = ids[ids[0]-1];
-		}
-		ids[0] = MDB_NOID;
+		/* no room */
+		--ids[0];
+		return -2;
 	
 	} else {
 		/* insert id */
@@ -121,8 +104,10 @@ int mdb_midl_insert( MDB_IDL ids, MDB_ID id )
 MDB_IDL mdb_midl_alloc(int num)
 {
 	MDB_IDL ids = malloc((num+2) * sizeof(MDB_ID));
-	if (ids)
+	if (ids) {
 		*ids++ = num;
+		*ids = 0;
+	}
 	return ids;
 }
 
@@ -135,8 +120,9 @@ void mdb_midl_free(MDB_IDL ids)
 int mdb_midl_shrink( MDB_IDL *idp )
 {
 	MDB_IDL ids = *idp;
-	if (*(--ids) > MDB_IDL_UM_MAX) {
-		ids = realloc(ids, (MDB_IDL_UM_MAX+1) * sizeof(MDB_ID));
+	if (*(--ids) > MDB_IDL_UM_MAX &&
+		(ids = realloc(ids, (MDB_IDL_UM_MAX+1) * sizeof(MDB_ID))))
+	{
 		*ids++ = MDB_IDL_UM_MAX;
 		*idp = ids;
 		return 1;
@@ -144,7 +130,7 @@ int mdb_midl_shrink( MDB_IDL *idp )
 	return 0;
 }
 
-int mdb_midl_grow( MDB_IDL *idp, int num )
+static int mdb_midl_grow( MDB_IDL *idp, int num )
 {
 	MDB_IDL idn = *idp-1;
 	/* grow it */
@@ -156,6 +142,20 @@ int mdb_midl_grow( MDB_IDL *idp, int num )
 	return 0;
 }
 
+int mdb_midl_need( MDB_IDL *idp, unsigned num )
+{
+	MDB_IDL ids = *idp;
+	num += ids[0];
+	if (num > ids[-1]) {
+		num = (num + num/4 + (256 + 2)) & -256;
+		if (!(ids = realloc(ids-1, num * sizeof(MDB_ID))))
+			return ENOMEM;
+		*ids++ = num -= 2;
+		*idp = ids;
+	}
+	return 0;
+}
+
 int mdb_midl_append( MDB_IDL *idp, MDB_ID id )
 {
 	MDB_IDL ids = *idp;
@@ -184,6 +184,22 @@ int mdb_midl_append_list( MDB_IDL *idp, MDB_IDL app )
 	return 0;
 }
 
+int mdb_midl_append_range( MDB_IDL *idp, MDB_ID id, unsigned n )
+{
+	MDB_ID *ids = *idp, len = ids[0];
+	/* Too big? */
+	if (len + n > ids[-1]) {
+		if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX))
+			return ENOMEM;
+		ids = *idp;
+	}
+	ids[0] = len + n;
+	ids += len;
+	while (n)
+		ids[n--] = id++;
+	return 0;
+}
+
 /* Quicksort + Insertion sort for small arrays */
 
 #define SMALL	8
diff --git a/libraries/liblmdb/midl.h b/libraries/liblmdb/midl.h
index 792e6ab938..9ce7133c6e 100644
--- a/libraries/liblmdb/midl.h
+++ b/libraries/liblmdb/midl.h
@@ -52,64 +52,33 @@ typedef size_t MDB_ID;
 	 */
 typedef MDB_ID *MDB_IDL;
 
-#define	MDB_NOID	(~(MDB_ID)0)
-
 /* IDL sizes - likely should be even bigger
  *   limiting factors: sizeof(ID), thread stack size
  */
 #define	MDB_IDL_LOGN	16	/* DB_SIZE is 2^16, UM_SIZE is 2^17 */
 #define MDB_IDL_DB_SIZE		(1<<MDB_IDL_LOGN)
 #define MDB_IDL_UM_SIZE		(1<<(MDB_IDL_LOGN+1))
-#define MDB_IDL_UM_SIZEOF	(MDB_IDL_UM_SIZE * sizeof(MDB_ID))
 
 #define MDB_IDL_DB_MAX		(MDB_IDL_DB_SIZE-1)
-
 #define MDB_IDL_UM_MAX		(MDB_IDL_UM_SIZE-1)
 
-#define MDB_IDL_IS_RANGE(ids)	((ids)[0] == MDB_NOID)
-#define MDB_IDL_RANGE_SIZE		(3)
-#define MDB_IDL_RANGE_SIZEOF	(MDB_IDL_RANGE_SIZE * sizeof(MDB_ID))
-#define MDB_IDL_SIZEOF(ids)		((MDB_IDL_IS_RANGE(ids) \
-	? MDB_IDL_RANGE_SIZE : ((ids)[0]+1)) * sizeof(MDB_ID))
-
-#define MDB_IDL_RANGE_FIRST(ids)	((ids)[1])
-#define MDB_IDL_RANGE_LAST(ids)		((ids)[2])
-
-#define MDB_IDL_RANGE( ids, f, l ) \
-	do { \
-		(ids)[0] = MDB_NOID; \
-		(ids)[1] = (f);  \
-		(ids)[2] = (l);  \
-	} while(0)
-
-#define MDB_IDL_ZERO(ids) \
-	do { \
-		(ids)[0] = 0; \
-		(ids)[1] = 0; \
-		(ids)[2] = 0; \
-	} while(0)
-
+#define MDB_IDL_SIZEOF(ids)		(((ids)[0]+1) * sizeof(MDB_ID))
 #define MDB_IDL_IS_ZERO(ids) ( (ids)[0] == 0 )
-#define MDB_IDL_IS_ALL( range, ids ) ( (ids)[0] == MDB_NOID \
-	&& (ids)[1] <= (range)[1] && (range)[2] <= (ids)[2] )
-
 #define MDB_IDL_CPY( dst, src ) (memcpy( dst, src, MDB_IDL_SIZEOF( src ) ))
-
-#define MDB_IDL_ID( bdb, ids, id ) MDB_IDL_RANGE( ids, id, ((bdb)->bi_lastid) )
-#define MDB_IDL_ALL( bdb, ids ) MDB_IDL_RANGE( ids, 1, ((bdb)->bi_lastid) )
-
 #define MDB_IDL_FIRST( ids )	( (ids)[1] )
-#define MDB_IDL_LAST( ids )		( MDB_IDL_IS_RANGE(ids) \
-	? (ids)[2] : (ids)[(ids)[0]] )
+#define MDB_IDL_LAST( ids )		( (ids)[(ids)[0]] )
 
-#define MDB_IDL_N( ids )		( MDB_IDL_IS_RANGE(ids) \
-	? ((ids)[2]-(ids)[1])+1 : (ids)[0] )
+	/** Append ID to IDL. The IDL must be big enough. */
+#define mdb_midl_xappend(idl, id) do { \
+		MDB_ID *xidl = (idl), xlen = ++(xidl[0]); \
+		xidl[xlen] = (id); \
+	} while (0)
 
 #if 0	/* superseded by append/sort */
 	/** Insert an ID into an IDL.
 	 * @param[in,out] ids	The IDL to insert into.
 	 * @param[in] id	The ID to insert.
-	 * @return	0 on success, -1 if the ID was already present in the IDL.
+	 * @return	0 on success, -1 if ID was already present, -2 on error.
 	 */
 int mdb_midl_insert( MDB_IDL ids, MDB_ID id );
 #endif
@@ -132,28 +101,35 @@ void mdb_midl_free(MDB_IDL ids);
 	 */
 int mdb_midl_shrink(MDB_IDL *idp);
 
-	/** Grow an IDL.
-	 * Add room for num additional elements.
-	 * @param[in,out] idp	Address of the IDL to grow.
-	 * @param[in] num	Number of elements to add.
-	 * @return	0 on success, -1 on failure.
+	/** Make room for num additional elements in an IDL.
+	 * @param[in,out] idp	Address of the IDL.
+	 * @param[in] num	Number of elements to make room for.
+	 * @return	0 on success, ENOMEM on failure.
 	 */
-int mdb_midl_grow(MDB_IDL *idp, int num);
+int mdb_midl_need(MDB_IDL *idp, unsigned num);
 
 	/** Append an ID onto an IDL.
 	 * @param[in,out] idp	Address of the IDL to append to.
 	 * @param[in] id	The ID to append.
-	 * @return	0 on success, -1 if the IDL is too large.
+	 * @return	0 on success, ENOMEM if the IDL is too large.
 	 */
 int mdb_midl_append( MDB_IDL *idp, MDB_ID id );
 
 	/** Append an IDL onto an IDL.
 	 * @param[in,out] idp	Address of the IDL to append to.
 	 * @param[in] app	The IDL to append.
-	 * @return	0 on success, -1 if the IDL is too large.
+	 * @return	0 on success, ENOMEM if the IDL is too large.
 	 */
 int mdb_midl_append_list( MDB_IDL *idp, MDB_IDL app );
 
+	/** Append an ID range onto an IDL.
+	 * @param[in,out] idp	Address of the IDL to append to.
+	 * @param[in] id	The lowest ID to append.
+	 * @param[in] n		Number of IDs to append.
+	 * @return	0 on success, ENOMEM if the IDL is too large.
+	 */
+int mdb_midl_append_range( MDB_IDL *idp, MDB_ID id, unsigned n );
+
 	/** Sort an IDL.
 	 * @param[in,out] ids	The IDL to sort.
 	 */