mirror of
https://git.openldap.org/openldap/openldap.git
synced 2025-01-24 13:24:56 +08:00
Merge branch 'mdb.master' of ssh://git-master.openldap.org/~git/git/openldap
This commit is contained in:
commit
8623c98726
@ -316,8 +316,8 @@ typedef uint16_t indx_t;
|
||||
* Since the database uses multi-version concurrency control, readers don't
|
||||
* actually need any locking. This table is used to keep track of which
|
||||
* readers are using data from which old transactions, so that we'll know
|
||||
* when a particular old transaction is no longer in use, Old transactions
|
||||
* that have freed any data pages can then have their freed pages reclaimed
|
||||
* when a particular old transaction is no longer in use. Old transactions
|
||||
* that have discarded any data pages can then have those pages reclaimed
|
||||
* for use by a later write transaction.
|
||||
*
|
||||
* The lock table is constructed such that reader slots are aligned with the
|
||||
@ -469,10 +469,12 @@ typedef struct MDB_txninfo {
|
||||
* headers on any page after the first.
|
||||
*/
|
||||
typedef struct MDB_page {
|
||||
union {
|
||||
pgno_t mp_pgno; /**< page number */
|
||||
void * mp_next; /**< for in-memory list of freed structs */
|
||||
};
|
||||
#define mp_pgno mp_p.p_pgno
|
||||
#define mp_next mp_p.p_next
|
||||
union padded {
|
||||
pgno_t p_pgno; /**< page number */
|
||||
void * p_next; /**< for in-memory list of freed structs */
|
||||
} mp_p;
|
||||
#define P_BRANCH 0x01 /**< branch page */
|
||||
#define P_LEAF 0x02 /**< leaf page */
|
||||
#define P_OVERFLOW 0x04 /**< overflow page */
|
||||
@ -480,13 +482,16 @@ typedef struct MDB_page {
|
||||
#define P_DIRTY 0x10 /**< dirty page */
|
||||
#define P_LEAF2 0x20 /**< for #MDB_DUPFIXED records */
|
||||
uint32_t mp_flags;
|
||||
union {
|
||||
#define mp_lower mp_pb.pb.pb_lower
|
||||
#define mp_upper mp_pb.pb.pb_upper
|
||||
#define mp_pages mp_pb.pb_pages
|
||||
union page_bounds {
|
||||
struct {
|
||||
indx_t mp_lower; /**< lower bound of free space */
|
||||
indx_t mp_upper; /**< upper bound of free space */
|
||||
};
|
||||
uint32_t mp_pages; /**< number of overflow pages */
|
||||
};
|
||||
indx_t pb_lower; /**< lower bound of free space */
|
||||
indx_t pb_upper; /**< upper bound of free space */
|
||||
} pb;
|
||||
uint32_t pb_pages; /**< number of overflow pages */
|
||||
} mp_pb;
|
||||
indx_t mp_ptrs[1]; /**< dynamic size */
|
||||
} MDB_page;
|
||||
|
||||
@ -543,17 +548,17 @@ typedef struct MDB_node {
|
||||
/** Size of the node header, excluding dynamic data at the end */
|
||||
#define NODESIZE offsetof(MDB_node, mn_data)
|
||||
|
||||
/** Size of a node in a branch page.
|
||||
/** Size of a node in a branch page with a given key.
|
||||
* This is just the node header plus the key, there is no data.
|
||||
*/
|
||||
#define INDXSIZE(k) (NODESIZE + ((k) == NULL ? 0 : (k)->mv_size))
|
||||
|
||||
/** Size of a node in a leaf page.
|
||||
/** Size of a node in a leaf page with a given key and data.
|
||||
* This is node header plus key plus data size.
|
||||
*/
|
||||
#define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size)
|
||||
|
||||
/** Address of node \i in page \p */
|
||||
/** Address of node \b i in page \b p */
|
||||
#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i]))
|
||||
|
||||
/** Address of the key for the node */
|
||||
@ -748,6 +753,7 @@ struct MDB_env {
|
||||
HANDLE me_fd; /**< The main data file */
|
||||
HANDLE me_lfd; /**< The lock file */
|
||||
HANDLE me_mfd; /**< just for writing the meta pages */
|
||||
/** Failed to update the meta page. Probably an I/O error. */
|
||||
#define MDB_FATAL_ERROR 0x80000000U
|
||||
uint32_t me_flags;
|
||||
uint32_t me_extrapad; /**< unused for now */
|
||||
@ -805,8 +811,8 @@ static int mdb_read_data(MDB_txn *txn, MDB_node *leaf, MDB_val *data);
|
||||
|
||||
static int mdb_rebalance(MDB_cursor *mc);
|
||||
static int mdb_update_key(MDB_page *mp, indx_t indx, MDB_val *key);
|
||||
static int mdb_move_node(MDB_cursor *csrcrc, MDB_cursor *cdstst);
|
||||
static int mdb_merge(MDB_cursor *csrcrc, MDB_cursor *cdstst);
|
||||
static int mdb_move_node(MDB_cursor *csrc, MDB_cursor *cdst);
|
||||
static int mdb_merge(MDB_cursor *csrc, MDB_cursor *cdst);
|
||||
static int mdb_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata,
|
||||
pgno_t newpgno);
|
||||
static MDB_page *mdb_new_page(MDB_cursor *mc, uint32_t flags, int num);
|
||||
@ -842,6 +848,7 @@ static SECURITY_ATTRIBUTES mdb_all_sa;
|
||||
static int mdb_sec_inited;
|
||||
#endif
|
||||
|
||||
/** Return the library version info. */
|
||||
char *
|
||||
mdb_version(int *major, int *minor, int *patch)
|
||||
{
|
||||
@ -851,7 +858,7 @@ mdb_version(int *major, int *minor, int *patch)
|
||||
return MDB_VERSION_STRING;
|
||||
}
|
||||
|
||||
/** Table of descriptions for MDB @ref error codes */
|
||||
/** Table of descriptions for MDB @ref errors */
|
||||
static char *const mdb_errstr[] = {
|
||||
"MDB_KEYEXIST: Key/data pair already exists",
|
||||
"MDB_NOTFOUND: No matching key/data pair found",
|
||||
@ -874,7 +881,12 @@ mdb_strerror(int err)
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
static char *
|
||||
/** Display a key in hexadecimal and return the address of the result.
|
||||
* @param[in] key the key to display
|
||||
* @param[in] buf the buffer to write into. Should always be #DKBUF.
|
||||
* @return The key in hexadecimal form.
|
||||
*/
|
||||
char *
|
||||
mdb_dkey(MDB_val *key, char *buf)
|
||||
{
|
||||
char *ptr = buf;
|
||||
@ -882,6 +894,9 @@ mdb_dkey(MDB_val *key, char *buf)
|
||||
unsigned int i;
|
||||
if (key->mv_size > MAXKEYSIZE)
|
||||
return "MAXKEYSIZE";
|
||||
/* may want to make this a dynamic check: if the key is mostly
|
||||
* printable characters, print it as-is instead of converting to hex.
|
||||
*/
|
||||
#if 1
|
||||
for (i=0; i<key->mv_size; i++)
|
||||
ptr += sprintf(ptr, "%02x", *c++);
|
||||
@ -898,6 +913,15 @@ mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b)
|
||||
return txn->mt_dbxs[dbi].md_cmp(a, b);
|
||||
}
|
||||
|
||||
/** Compare two data items according to a particular database.
|
||||
* This returns a comparison as if the two items were data items of
|
||||
* a sorted duplicates #MDB_DUPSORT database.
|
||||
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
|
||||
* @param[in] dbi A database handle returned by #mdb_open()
|
||||
* @param[in] a The first item to compare
|
||||
* @param[in] b The second item to compare
|
||||
* @return < 0 if a < b, 0 if a == b, > 0 if a > b
|
||||
*/
|
||||
int
|
||||
mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b)
|
||||
{
|
||||
@ -907,7 +931,15 @@ mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b)
|
||||
return EINVAL; /* too bad you can't distinguish this from a valid result */
|
||||
}
|
||||
|
||||
/* Allocate new page(s) for writing */
|
||||
/** Allocate pages for writing.
|
||||
* If there are free pages available from older transactions, they
|
||||
* will be re-used first. Otherwise a new page will be allocated.
|
||||
* @param[in] mc cursor A cursor handle identifying the transaction and
|
||||
* database for which we are allocating.
|
||||
* @param[in] num the number of pages to allocate.
|
||||
* @return Address of the allocated page(s). Requests for multiple pages
|
||||
* will always be satisfied by a single contiguous chunk of memory.
|
||||
*/
|
||||
static MDB_page *
|
||||
mdb_alloc_page(MDB_cursor *mc, int num)
|
||||
{
|
||||
@ -1025,7 +1057,9 @@ mdb_alloc_page(MDB_cursor *mc, int num)
|
||||
return np;
|
||||
}
|
||||
|
||||
/* Touch a page: make it dirty and re-insert into tree with updated pgno.
|
||||
/** Touch a page: make it dirty and re-insert into tree with updated pgno.
|
||||
* @param[in] mc cursor pointing to the page to be touched
|
||||
* @return 0 on success, non-zero on failure.
|
||||
*/
|
||||
static int
|
||||
mdb_touch(MDB_cursor *mc)
|
||||
@ -1047,7 +1081,9 @@ mdb_touch(MDB_cursor *mc)
|
||||
mp->mp_flags |= P_DIRTY;
|
||||
|
||||
mc->mc_pg[mc->mc_top] = mp;
|
||||
/* Update the page number to new touched page. */
|
||||
/** If this page has a parent, update the parent to point to
|
||||
* this new page.
|
||||
*/
|
||||
if (mc->mc_top)
|
||||
SETPGNO(NODEPTR(mc->mc_pg[mc->mc_top-1], mc->mc_ki[mc->mc_top-1]), mp->mp_pgno);
|
||||
}
|
||||
@ -1068,6 +1104,12 @@ mdb_env_sync(MDB_env *env, int force)
|
||||
static inline void
|
||||
mdb_txn_reset0(MDB_txn *txn);
|
||||
|
||||
/** Common code for #mdb_txn_begin() and #mdb_txn_renew().
|
||||
* @param[in] txn the transaction handle to initialize
|
||||
* @return 0 on success, non-zero on failure. This can only
|
||||
* fail for read-only transactions, and then only if the
|
||||
* reader table is full.
|
||||
*/
|
||||
static inline int
|
||||
mdb_txn_renew0(MDB_txn *txn)
|
||||
{
|
||||
@ -1181,6 +1223,9 @@ mdb_txn_begin(MDB_env *env, unsigned int flags, MDB_txn **ret)
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** Common code for #mdb_txn_reset() and #mdb_txn_abort().
|
||||
* @param[in] txn the transaction handle to reset
|
||||
*/
|
||||
static inline void
|
||||
mdb_txn_reset0(MDB_txn *txn)
|
||||
{
|
||||
@ -1213,6 +1258,7 @@ mdb_txn_reset0(MDB_txn *txn)
|
||||
env->me_txn = NULL;
|
||||
for (i=2; i<env->me_numdbs; i++)
|
||||
env->me_dbxs[i].md_dirty = 0;
|
||||
/* The writer mutex was locked in mdb_txn_begin. */
|
||||
UNLOCK_MUTEX_W(env);
|
||||
}
|
||||
}
|
||||
@ -1510,6 +1556,12 @@ done:
|
||||
return MDB_SUCCESS;
|
||||
}
|
||||
|
||||
/** Read the environment parameters of a DB environment before
|
||||
* mapping it into memory.
|
||||
* @param[in] env the environment handle
|
||||
* @param[out] meta address of where to store the meta information
|
||||
* @return 0 on success, non-zero on failure.
|
||||
*/
|
||||
static int
|
||||
mdb_env_read_header(MDB_env *env, MDB_meta *meta)
|
||||
{
|
||||
@ -1560,6 +1612,11 @@ mdb_env_read_header(MDB_env *env, MDB_meta *meta)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** Write the environment parameters of a freshly created DB environment.
|
||||
* @param[in] env the environment handle
|
||||
* @param[out] meta address of where to store the meta information
|
||||
* @return 0 on success, non-zero on failure.
|
||||
*/
|
||||
static int
|
||||
mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
|
||||
{
|
||||
@ -1610,6 +1667,10 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** Update the environment info to commit a transaction.
|
||||
* @param[in] txn the transaction that's being committed
|
||||
* @return 0 on success, non-zero on failure.
|
||||
*/
|
||||
static int
|
||||
mdb_env_write_meta(MDB_txn *txn)
|
||||
{
|
||||
@ -1690,6 +1751,11 @@ mdb_env_write_meta(MDB_txn *txn)
|
||||
return MDB_SUCCESS;
|
||||
}
|
||||
|
||||
/** Check both meta pages to see which one is newer.
|
||||
* @param[in] env the environment handle
|
||||
* @param[out] which address of where to store the meta toggle ID
|
||||
* @return 0 on success, non-zero on failure.
|
||||
*/
|
||||
static int
|
||||
mdb_env_read_meta(MDB_env *env, int *which)
|
||||
{
|
||||
@ -1759,6 +1825,8 @@ mdb_env_get_maxreaders(MDB_env *env, int *readers)
|
||||
return MDB_SUCCESS;
|
||||
}
|
||||
|
||||
/** Further setup required for opening an MDB environment
|
||||
*/
|
||||
static int
|
||||
mdb_env_open2(MDB_env *env, unsigned int flags)
|
||||
{
|
||||
@ -2834,7 +2902,6 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
|
||||
set1:
|
||||
if (exactp)
|
||||
*exactp = 1;
|
||||
rc = 0;
|
||||
leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
|
||||
goto set3;
|
||||
}
|
||||
@ -3216,9 +3283,11 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
|
||||
goto top;
|
||||
} else {
|
||||
int exact = 0;
|
||||
rc = mdb_cursor_set(mc, key, NULL, MDB_SET, &exact);
|
||||
MDB_val d2;
|
||||
rc = mdb_cursor_set(mc, key, &d2, MDB_SET, &exact);
|
||||
if (flags == MDB_NOOVERWRITE && rc == 0) {
|
||||
DPRINTF("duplicate key [%s]", DKEY(key));
|
||||
*data = d2;
|
||||
return MDB_KEYEXIST;
|
||||
}
|
||||
if (rc && rc != MDB_NOTFOUND)
|
||||
@ -3273,6 +3342,9 @@ top:
|
||||
rdata = &xdata;
|
||||
xdata.mv_size = sizeof(MDB_db);
|
||||
xdata.mv_data = &dummy;
|
||||
/* new sub-DB, must fully init xcursor */
|
||||
if (flags == MDB_CURRENT)
|
||||
flags = 0;
|
||||
goto new_sub;
|
||||
}
|
||||
goto put_sub;
|
||||
@ -4493,6 +4565,10 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi,
|
||||
int
|
||||
mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff)
|
||||
{
|
||||
/** Only a subset of the @ref mdb_env flags can be changed
|
||||
* at runtime. Changing other flags requires closing the environment
|
||||
* and re-opening it with the new flags.
|
||||
*/
|
||||
#define CHANGEABLE (MDB_NOSYNC)
|
||||
if ((flag & CHANGEABLE) != flag)
|
||||
return EINVAL;
|
||||
|
@ -151,7 +151,8 @@ typedef void (MDB_rel_func)(void *newptr, void *oldptr, size_t size);
|
||||
#define MDB_REVERSEKEY 0x02
|
||||
/** use sorted duplicates */
|
||||
#define MDB_DUPSORT 0x04
|
||||
/** numeric keys in native byte order */
|
||||
/** numeric keys in native byte order.
|
||||
* The keys must all be of the same size. */
|
||||
#define MDB_INTEGERKEY 0x08
|
||||
/** with #MDB_DUPSORT, sorted dup items have fixed size */
|
||||
#define MDB_DUPFIXED 0x10
|
||||
@ -200,7 +201,7 @@ typedef enum MDB_cursor_op {
|
||||
MDB_PREV_DUP, /**< Position at previous data item of current key.
|
||||
Only for #MDB_DUPSORT */
|
||||
MDB_PREV_NODUP, /**< Position at last data item of previous key.
|
||||
only for #MDB_DUPSORT */
|
||||
Only for #MDB_DUPSORT */
|
||||
MDB_SET, /**< Position at specified key */
|
||||
MDB_SET_RANGE /**< Position at first key greater than or equal to specified key. */
|
||||
} MDB_cursor_op;
|
||||
@ -493,7 +494,7 @@ void mdb_txn_abort(MDB_txn *txn);
|
||||
/** Reset a read-only transaction.
|
||||
* This releases the current reader lock but doesn't free the
|
||||
* transaction handle, allowing it to be used again later by #mdb_txn_renew().
|
||||
* It otherwise has the same affect as #mdb_txn_abort() but saves some memory
|
||||
* It otherwise has the same effect as #mdb_txn_abort() but saves some memory
|
||||
* allocation/deallocation overhead if a thread is going to start a new
|
||||
* read-only transaction again soon.
|
||||
* All cursors opened within the transaction must be closed before the transaction
|
||||
@ -539,11 +540,9 @@ int mdb_txn_renew(MDB_txn *txn);
|
||||
* keys may have multiple data items, stored in sorted order.) By default
|
||||
* keys must be unique and may have only a single data item.
|
||||
* <li>#MDB_INTEGERKEY
|
||||
* Keys are binary integers in native byte order. On Big-Endian systems
|
||||
* this flag has no effect. On Little-Endian systems this flag behaves
|
||||
* the same as #MDB_REVERSEKEY. This flag is simply provided as a
|
||||
* convenience so that applications don't need to detect Endianness themselves
|
||||
* when using integers as keys.
|
||||
* Keys are binary integers in native byte order. Setting this option
|
||||
* requires all keys to be the same size, typically sizeof(int)
|
||||
* or sizeof(long).
|
||||
* <li>#MDB_DUPFIXED
|
||||
* This flag may only be used in combination with #MDB_DUPSORT. This option
|
||||
* tells the library that the data items for this database are all the same
|
||||
@ -553,6 +552,9 @@ int mdb_txn_renew(MDB_txn *txn);
|
||||
* <li>#MDB_INTEGERDUP
|
||||
* This option specifies that duplicate data items are also integers, and
|
||||
* should be sorted as such.
|
||||
* <li>#MDB_REVERSEDUP
|
||||
* This option specifies that duplicate data items should be compared as
|
||||
* strings in reverse order.
|
||||
* <li>#MDB_CREATE
|
||||
* Create the named database if it doesn't exist. This option is not
|
||||
* allowed in a read-only transaction or a read-only environment.
|
||||
@ -655,6 +657,11 @@ int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel);
|
||||
* If the database supports duplicate keys (#MDB_DUPSORT) then the
|
||||
* first data item for the key will be returned. Retrieval of other
|
||||
* items requires the use of #mdb_cursor_get().
|
||||
*
|
||||
* @note The memory pointed to by the returned values is owned by the
|
||||
* database. The caller need not dispose of the memory, and may not
|
||||
* modify it in any way. For values returned in a read-only transaction
|
||||
* any modification attempts will cause a SIGSEGV.
|
||||
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
|
||||
* @param[in] dbi A database handle returned by #mdb_open()
|
||||
* @param[in] key The key to search for in the database
|
||||
@ -676,7 +683,7 @@ int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data);
|
||||
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
|
||||
* @param[in] dbi A database handle returned by #mdb_open()
|
||||
* @param[in] key The key to store in the database
|
||||
* @param[in] data The data to store
|
||||
* @param[in,out] data The data to store
|
||||
* @param[in] flags Special options for this operation. This parameter
|
||||
* must be set to 0 or by bitwise OR'ing together one or more of the
|
||||
* values described here.
|
||||
@ -689,7 +696,8 @@ int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data);
|
||||
* <li>#MDB_NOOVERWRITE - enter the new key/data pair only if the key
|
||||
* does not already appear in the database. The function will return
|
||||
* #MDB_KEYEXIST if the key already appears in the database, even if
|
||||
* the database supports duplicates (#MDB_DUPSORT).
|
||||
* the database supports duplicates (#MDB_DUPSORT). The \b data
|
||||
* parameter will be set to point to the existing item.
|
||||
* </ul>
|
||||
* @return A non-zero error value on failure and 0 on success. Some possible
|
||||
* errors are:
|
||||
|
Loading…
Reference in New Issue
Block a user