cache LRU concurrency improvements, reorder DB accesses for consistency

This commit is contained in:
Howard Chu 2005-12-09 00:20:18 +00:00
parent 3e062a3f35
commit 553b2884ab
7 changed files with 200 additions and 156 deletions

View File

@ -312,6 +312,23 @@ retry: /* transaction retry */
goto return_results;
}
/* attribute indexes */
rs->sr_err = bdb_index_entry_add( op, lt2, op->oq_add.rs_e );
if ( rs->sr_err != LDAP_SUCCESS ) {
Debug( LDAP_DEBUG_TRACE,
LDAP_XSTRING(bdb_add) ": index_entry_add failed\n",
0, 0, 0 );
switch( rs->sr_err ) {
case DB_LOCK_DEADLOCK:
case DB_LOCK_NOTGRANTED:
goto retry;
default:
rs->sr_err = LDAP_OTHER;
}
rs->sr_text = "index generation failed";
goto return_results;
}
/* id2entry index */
rs->sr_err = bdb_id2entry_add( op->o_bd, lt2, op->oq_add.rs_e );
if ( rs->sr_err != 0 ) {
@ -329,22 +346,6 @@ retry: /* transaction retry */
goto return_results;
}
/* attribute indexes */
rs->sr_err = bdb_index_entry_add( op, lt2, op->oq_add.rs_e );
if ( rs->sr_err != LDAP_SUCCESS ) {
Debug( LDAP_DEBUG_TRACE,
LDAP_XSTRING(bdb_add) ": index_entry_add failed\n",
0, 0, 0 );
switch( rs->sr_err ) {
case DB_LOCK_DEADLOCK:
case DB_LOCK_NOTGRANTED:
goto retry;
default:
rs->sr_err = LDAP_OTHER;
}
rs->sr_text = "index generation failed";
goto return_results;
}
if ( TXN_COMMIT( lt2, 0 ) != 0 ) {
rs->sr_err = LDAP_OTHER;
rs->sr_text = "txn_commit(2) failed";

View File

@ -123,6 +123,7 @@ typedef struct bdb_entry_info {
typedef struct bdb_cache {
int c_maxsize;
int c_cursize;
int c_minfree;
int c_eiused; /* EntryInfo's in use */
int c_leaves; /* EntryInfo leaf nodes */
EntryInfo c_dntree;
@ -131,7 +132,8 @@ typedef struct bdb_cache {
EntryInfo *c_lruhead; /* lru - add accessed entries here */
EntryInfo *c_lrutail; /* lru - rem lru entries from here */
ldap_pvt_thread_rdwr_t c_rwlock;
ldap_pvt_thread_mutex_t lru_mutex;
ldap_pvt_thread_mutex_t lru_head_mutex;
ldap_pvt_thread_mutex_t lru_tail_mutex;
u_int32_t c_locker; /* used by lru cleaner */
#ifdef SLAP_ZONE_ALLOC
void *c_zctx;
@ -175,6 +177,7 @@ struct bdb_info {
u_int32_t bi_txn_cp_kbyte;
void *bi_txn_cp_task;
void *bi_index_task;
void *bi_cache_task;
int bi_lock_detect;
long bi_shm_key;

View File

@ -26,9 +26,12 @@
#include "back-bdb.h"
#include "ldap_rq.h"
#ifdef BDB_HIER
#define bdb_cache_lru_add hdb_cache_lru_add
#endif
static void bdb_cache_lru_add( struct bdb_info *bdb, EntryInfo *ei );
static int bdb_cache_delete_internal(Cache *cache, EntryInfo *e, int decr);
#ifdef LDAP_DEBUG
@ -460,9 +463,8 @@ hdb_cache_find_parent(
ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock );
if ( addlru ) {
ldap_pvt_thread_mutex_lock( &bdb->bi_cache.lru_mutex );
LRU_ADD( &bdb->bi_cache, ein );
ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.lru_mutex );
ldap_pvt_thread_mutex_lock( &bdb->bi_cache.lru_head_mutex );
bdb_cache_lru_add( bdb, ein );
}
addlru = 1;
@ -527,94 +529,131 @@ int hdb_cache_load(
}
#endif
/* caller must have lru_mutex locked. mutex
* will be unlocked on return.
*/
static void
bdb_cache_lru_add(
struct bdb_info *bdb,
u_int32_t locker,
EntryInfo *ei )
static void *
bdb_cache_lru_purge(void *ctx, void *arg)
{
struct re_s *rtask = arg;
struct bdb_info *bdb = rtask->arg;
DB_LOCK lock, *lockp;
EntryInfo *elru, *elprev;
int count = 0;
if ( locker ) {
if ( bdb->bi_cache.c_locker ) {
lockp = &lock;
} else {
lockp = NULL;
}
/* See if we're above the cache size limit */
if ( bdb->bi_cache.c_cursize > bdb->bi_cache.c_maxsize ) {
EntryInfo *elru, *elprev;
int i = 0;
ldap_pvt_thread_mutex_lock( &bdb->bi_cache.lru_tail_mutex );
/* Look for an unused entry to remove */
for (elru = bdb->bi_cache.c_lrutail; elru; elru = elprev, i++ ) {
elprev = elru->bei_lruprev;
/* Look for an unused entry to remove */
for (elru = bdb->bi_cache.c_lrutail; elru; elru = elprev ) {
elprev = elru->bei_lruprev;
/* Too many probes, not enough idle, give up */
if (i > 10)
break;
/* If we can successfully writelock it, then
* the object is idle.
*/
if ( bdb_cache_entry_db_lock( bdb->bi_dbenv,
bdb->bi_cache.c_locker, elru, 1, 1, lockp ) == 0 ) {
/* If we can successfully writelock it, then
* the object is idle.
int stop = 0;
/* If this node is in the process of linking into the cache,
* or this node is being deleted, skip it.
*/
if ( bdb_cache_entry_db_lock( bdb->bi_dbenv,
bdb->bi_cache.c_locker, elru, 1, 1, lockp ) == 0 ) {
int stop = 0, decr = 0;
/* If this node is in the process of linking into the cache,
* or this node is being deleted, skip it.
*/
if ( elru->bei_state &
( CACHE_ENTRY_NOT_LINKED | CACHE_ENTRY_DELETED )) {
bdb_cache_entry_db_unlock( bdb->bi_dbenv, lockp );
continue;
}
/* Free entry for this node if it's present */
if ( elru->bei_e ) {
elru->bei_e->e_private = NULL;
#ifdef SLAP_ZONE_ALLOC
bdb_entry_return( bdb, elru->bei_e, elru->bei_zseq );
#else
bdb_entry_return( elru->bei_e );
#endif
elru->bei_e = NULL;
decr = 1;
}
/* ITS#4010 if we're in slapcat, and this node is a leaf
* node, free it.
*
* FIXME: we need to do this for slapd as well, (which is
* why we compute bi_cache.c_leaves now) but at the moment
* we can't because it causes unresolvable deadlocks.
*/
if ( slapMode & SLAP_TOOL_READONLY ) {
if ( !elru->bei_kids ) {
/* This does LRU_DELETE for us */
bdb_cache_delete_internal( &bdb->bi_cache, elru, 0 );
bdb_cache_delete_cleanup( &bdb->bi_cache, elru );
}
/* Leave node on LRU list for a future pass */
} else {
LRU_DELETE( &bdb->bi_cache, elru );
}
if ( elru->bei_state &
( CACHE_ENTRY_NOT_LINKED | CACHE_ENTRY_DELETED )) {
bdb_cache_entry_db_unlock( bdb->bi_dbenv, lockp );
ldap_pvt_thread_rdwr_wlock( &bdb->bi_cache.c_rwlock );
if ( decr )
--bdb->bi_cache.c_cursize;
if (bdb->bi_cache.c_cursize <= bdb->bi_cache.c_maxsize)
stop = 1;
ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock );
if (stop) break;
continue;
}
/* Free entry for this node if it's present */
if ( elru->bei_e ) {
elru->bei_e->e_private = NULL;
#ifdef SLAP_ZONE_ALLOC
bdb_entry_return( bdb, elru->bei_e, elru->bei_zseq );
#else
bdb_entry_return( elru->bei_e );
#endif
elru->bei_e = NULL;
count++;
}
/* ITS#4010 if we're in slapcat, and this node is a leaf
* node, free it.
*
* FIXME: we need to do this for slapd as well, (which is
* why we compute bi_cache.c_leaves now) but at the moment
* we can't because it causes unresolvable deadlocks.
*/
if ( slapMode & SLAP_TOOL_READONLY ) {
if ( !elru->bei_kids ) {
/* This does LRU_DELETE for us */
bdb_cache_delete_internal( &bdb->bi_cache, elru, 0 );
bdb_cache_delete_cleanup( &bdb->bi_cache, elru );
}
/* Leave node on LRU list for a future pass */
} else {
LRU_DELETE( &bdb->bi_cache, elru );
}
bdb_cache_entry_db_unlock( bdb->bi_dbenv, lockp );
if ( count == bdb->bi_cache.c_minfree ) {
ldap_pvt_thread_rdwr_wlock( &bdb->bi_cache.c_rwlock );
bdb->bi_cache.c_cursize -= bdb->bi_cache.c_minfree;
if ( bdb->bi_cache.c_maxsize - bdb->bi_cache.c_cursize >=
bdb->bi_cache.c_minfree )
stop = 1;
count = 0;
ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock );
}
if (stop) break;
}
}
ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.lru_tail_mutex );
/* If we're running as a task, drop the task */
if ( ctx ) {
ldap_pvt_thread_mutex_lock( &slapd_rq.rq_mutex );
ldap_pvt_runqueue_stoptask( &slapd_rq, rtask );
/* Defer processing till we're needed again */
ldap_pvt_runqueue_resched( &slapd_rq, rtask, 1 );
ldap_pvt_thread_mutex_unlock( &slapd_rq.rq_mutex );
}
}
/* caller must have lru_head_mutex locked. mutex
* will be unlocked on return.
*/
static void
bdb_cache_lru_add(
struct bdb_info *bdb,
EntryInfo *ei )
{
LRU_ADD( &bdb->bi_cache, ei );
ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.lru_mutex );
ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.lru_head_mutex );
/* See if we're above the cache size limit */
if ( bdb->bi_cache.c_cursize > bdb->bi_cache.c_maxsize ) {
if ( slapMode & SLAP_TOOL_MODE ) {
struct re_s rtask;
rtask.arg = bdb;
bdb_cache_lru_purge( NULL, &rtask );
} else {
ldap_pvt_thread_mutex_lock( &slapd_rq.rq_mutex );
if ( bdb->bi_cache_task ) {
if ( !ldap_pvt_runqueue_isrunning( &slapd_rq,
bdb->bi_cache_task ))
ldap_pvt_runqueue_resched( &slapd_rq, bdb->bi_cache_task,
0 );
} else {
bdb->bi_cache_task = ldap_pvt_runqueue_insert( &slapd_rq, 0,
bdb_cache_lru_purge, bdb, "bdb_cache_lru_purge",
bdb->bi_dbenv_home );
}
ldap_pvt_thread_mutex_unlock( &slapd_rq.rq_mutex );
}
}
}
EntryInfo *
@ -821,22 +860,23 @@ load1:
ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock );
}
ldap_pvt_thread_mutex_lock( &bdb->bi_cache.lru_mutex );
ldap_pvt_thread_mutex_lock( &bdb->bi_cache.lru_head_mutex );
/* If the LRU list has only one entry and this is it, it
* doesn't need to be added again.
*/
if ( bdb->bi_cache.c_lruhead == bdb->bi_cache.c_lrutail &&
bdb->bi_cache.c_lruhead == *eip ) {
ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.lru_mutex );
ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.lru_head_mutex );
} else {
/* if entry is on LRU list, remove from old spot */
if ( (*eip)->bei_lrunext || (*eip)->bei_lruprev ) {
ldap_pvt_thread_mutex_lock( &bdb->bi_cache.lru_tail_mutex );
LRU_DELETE( &bdb->bi_cache, *eip );
ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.lru_tail_mutex );
}
/* lru_mutex is unlocked for us */
bdb_cache_lru_add( bdb, locker, *eip );
/* lru_head_mutex is unlocked for us */
bdb_cache_lru_add( bdb, *eip );
}
}
@ -933,10 +973,10 @@ bdb_cache_add(
ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock );
/* set lru mutex */
ldap_pvt_thread_mutex_lock( &bdb->bi_cache.lru_mutex );
ldap_pvt_thread_mutex_lock( &bdb->bi_cache.lru_head_mutex );
/* lru_mutex is unlocked for us */
bdb_cache_lru_add( bdb, locker, new );
/* lru_head_mutex is unlocked for us */
bdb_cache_lru_add( bdb, new );
return rc;
}
@ -1082,7 +1122,7 @@ bdb_cache_delete(
e->e_id, 0, 0 );
/* set lru mutex */
ldap_pvt_thread_mutex_lock( &cache->lru_mutex );
ldap_pvt_thread_mutex_lock( &cache->lru_tail_mutex );
/* set cache write lock */
ldap_pvt_thread_rdwr_wlock( &cache->c_rwlock );
@ -1093,7 +1133,7 @@ bdb_cache_delete(
ldap_pvt_thread_rdwr_wunlock( &cache->c_rwlock );
/* free lru mutex */
ldap_pvt_thread_mutex_unlock( &cache->lru_mutex );
ldap_pvt_thread_mutex_unlock( &cache->lru_tail_mutex );
/* Leave entry info locked */
@ -1201,7 +1241,7 @@ bdb_cache_release_all( Cache *cache )
/* set cache write lock */
ldap_pvt_thread_rdwr_wlock( &cache->c_rwlock );
/* set lru mutex */
ldap_pvt_thread_mutex_lock( &cache->lru_mutex );
ldap_pvt_thread_mutex_lock( &cache->lru_tail_mutex );
Debug( LDAP_DEBUG_TRACE, "====> bdb_cache_release_all\n", 0, 0, 0 );
@ -1220,7 +1260,7 @@ bdb_cache_release_all( Cache *cache )
cache->c_dntree.bei_kids = NULL;
/* free lru mutex */
ldap_pvt_thread_mutex_unlock( &cache->lru_mutex );
ldap_pvt_thread_mutex_unlock( &cache->lru_tail_mutex );
/* free cache write lock */
ldap_pvt_thread_rdwr_wunlock( &cache->c_rwlock );
}

View File

@ -55,6 +55,11 @@ static ConfigTable bdbcfg[] = {
"DESC 'Directory for database content' "
"EQUALITY caseIgnoreMatch "
"SYNTAX OMsDirectoryString SINGLE-VALUE )", NULL, NULL },
{ "cachefree", "size", 2, 2, 0, ARG_INT|ARG_OFFSET,
(void *)offsetof(struct bdb_info, bi_cache.c_minfree),
"( OLcfgDbAt:1.11 NAME 'olcDbCacheFree' "
"DESC 'Number of extra entries to free when max is reached' "
"SYNTAX OMsInteger SINGLE-VALUE )", NULL, NULL },
{ "cachesize", "size", 2, 2, 0, ARG_INT|ARG_OFFSET,
(void *)offsetof(struct bdb_info, bi_cache.c_maxsize),
"( OLcfgDbAt:1.1 NAME 'olcDbCacheSize' "
@ -134,7 +139,8 @@ static ConfigOCs bdbocs[] = {
"MAY ( olcDbCacheSize $ olcDbCheckpoint $ olcDbConfig $ "
"olcDbNoSync $ olcDbDirtyRead $ olcDbIDLcacheSize $ "
"olcDbIndex $ olcDbLinearIndex $ olcDbLockDetect $ "
"olcDbMode $ olcDbSearchStack $ olcDbShmKey ) )",
"olcDbMode $ olcDbSearchStack $ olcDbShmKey $ "
" olcDbCacheFree ) )",
Cft_Database, bdbcfg },
{ NULL, 0, NULL }
};

View File

@ -369,26 +369,10 @@ retry: /* transaction retry */
goto return_results;
}
/* delete from id2entry */
rs->sr_err = bdb_id2entry_delete( op->o_bd, lt2, e );
if ( rs->sr_err != 0 ) {
Debug(LDAP_DEBUG_TRACE,
"<=- " LDAP_XSTRING(bdb_delete) ": id2entry failed: "
"%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 );
switch( rs->sr_err ) {
case DB_LOCK_DEADLOCK:
case DB_LOCK_NOTGRANTED:
goto retry;
}
rs->sr_text = "entry delete failed";
rs->sr_err = LDAP_OTHER;
goto return_results;
}
/* delete indices for old attributes */
rs->sr_err = bdb_index_entry_del( op, lt2, e );
if ( rs->sr_err != LDAP_SUCCESS ) {
Debug( LDAP_DEBUG_TRACE,
Debug(LDAP_DEBUG_TRACE,
"<=- " LDAP_XSTRING(bdb_delete) ": index failed: "
"%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 );
switch( rs->sr_err ) {
@ -401,6 +385,41 @@ retry: /* transaction retry */
goto return_results;
}
/* fixup delete CSN */
if ( !SLAP_SHADOW( op->o_bd )) {
struct berval vals[2];
vals[0] = op->o_csn;
BER_BVZERO( vals+1 );
rs->sr_err = bdb_index_values( op, lt2, slap_schema.si_ad_entryCSN,
vals, 0, SLAP_INDEX_ADD_OP );
if ( rs->sr_err != LDAP_SUCCESS ) {
switch( rs->sr_err ) {
case DB_LOCK_DEADLOCK:
case DB_LOCK_NOTGRANTED:
goto retry;
}
rs->sr_text = "entryCSN index update failed";
rs->sr_err = LDAP_OTHER;
goto return_results;
}
}
/* delete from id2entry */
rs->sr_err = bdb_id2entry_delete( op->o_bd, lt2, e );
if ( rs->sr_err != 0 ) {
Debug( LDAP_DEBUG_TRACE,
"<=- " LDAP_XSTRING(bdb_delete) ": id2entry failed: "
"%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 );
switch( rs->sr_err ) {
case DB_LOCK_DEADLOCK:
case DB_LOCK_NOTGRANTED:
goto retry;
}
rs->sr_text = "entry delete failed";
rs->sr_err = LDAP_OTHER;
goto return_results;
}
if ( pdn.bv_len != 0 ) {
parent_is_glue = is_entry_glue(p);
rs->sr_err = bdb_cache_children( op, lt2, p );

View File

@ -69,7 +69,8 @@ bdb_db_init( BackendDB *be )
#ifdef BDB_HIER
ldap_pvt_thread_mutex_init( &bdb->bi_modrdns_mutex );
#endif
ldap_pvt_thread_mutex_init( &bdb->bi_cache.lru_mutex );
ldap_pvt_thread_mutex_init( &bdb->bi_cache.lru_head_mutex );
ldap_pvt_thread_mutex_init( &bdb->bi_cache.lru_tail_mutex );
ldap_pvt_thread_mutex_init( &bdb->bi_cache.c_dntree.bei_kids_mutex );
ldap_pvt_thread_rdwr_init ( &bdb->bi_cache.c_rwlock );
ldap_pvt_thread_rdwr_init( &bdb->bi_idl_tree_rwlock );
@ -614,7 +615,8 @@ bdb_db_destroy( BackendDB *be )
bdb_attr_index_destroy( bdb );
ldap_pvt_thread_rdwr_destroy ( &bdb->bi_cache.c_rwlock );
ldap_pvt_thread_mutex_destroy( &bdb->bi_cache.lru_mutex );
ldap_pvt_thread_mutex_destroy( &bdb->bi_cache.lru_head_mutex );
ldap_pvt_thread_mutex_destroy( &bdb->bi_cache.lru_tail_mutex );
ldap_pvt_thread_mutex_destroy( &bdb->bi_cache.c_dntree.bei_kids_mutex );
#ifdef BDB_HIER
ldap_pvt_thread_mutex_destroy( &bdb->bi_modrdns_mutex );

View File

@ -483,11 +483,11 @@ ID bdb_tool_entry_put(
goto done;
}
/* id2entry index */
rc = bdb_id2entry_add( be, tid, e );
if ( !bdb->bi_linear_index )
rc = bdb_tool_index_add( &op, tid, e );
if( rc != 0 ) {
snprintf( text->bv_val, text->bv_len,
"id2entry_add failed: %s (%d)",
"index_entry_add failed: %s (%d)",
db_strerror(rc), rc );
Debug( LDAP_DEBUG_ANY,
"=> " LDAP_XSTRING(bdb_tool_entry_put) ": %s\n",
@ -495,11 +495,11 @@ ID bdb_tool_entry_put(
goto done;
}
if ( !bdb->bi_linear_index )
rc = bdb_tool_index_add( &op, tid, e );
/* id2entry index */
rc = bdb_id2entry_add( be, tid, e );
if( rc != 0 ) {
snprintf( text->bv_val, text->bv_len,
"index_entry_add failed: %s (%d)",
"id2entry_add failed: %s (%d)",
db_strerror(rc), rc );
Debug( LDAP_DEBUG_ANY,
"=> " LDAP_XSTRING(bdb_tool_entry_put) ": %s\n",
@ -688,33 +688,6 @@ ID bdb_tool_entry_modify(
goto done;
}
#if 0
/* FIXME: this is bogus, we don't have the old values to delete
* from the index because the given entry has already been modified.
*/
rc = bdb_index_entry_del( &op, tid, e );
if( rc != 0 ) {
snprintf( text->bv_val, text->bv_len,
"index_entry_del failed: %s (%d)",
db_strerror(rc), rc );
Debug( LDAP_DEBUG_ANY,
"=> " LDAP_XSTRING(bdb_tool_entry_modify) ": %s\n",
text->bv_val, 0, 0 );
goto done;
}
#endif
rc = bdb_index_entry_add( &op, tid, e );
if( rc != 0 ) {
snprintf( text->bv_val, text->bv_len,
"index_entry_add failed: %s (%d)",
db_strerror(rc), rc );
Debug( LDAP_DEBUG_ANY,
"=> " LDAP_XSTRING(bdb_tool_entry_modify) ": %s\n",
text->bv_val, 0, 0 );
goto done;
}
done:
if( rc == 0 ) {
if (! (slapMode & SLAP_TOOL_QUICK)) {