ITS#9479 asyncmeta: fix hanging ops

Two separate problems
  1) ops that never got sent because of a pending Bind
  2) errors that never got returned because of an active op
This commit is contained in:
Howard Chu 2021-03-10 00:18:59 +00:00
parent 180f5b62cb
commit 28a04795f6
4 changed files with 52 additions and 9 deletions

View File

@ -590,6 +590,7 @@ extern LDAP_URLLIST_PROC asyncmeta_back_default_urllist;
#define META_MSGID_NEED_BIND (-2)
#define META_MSGID_CONNECTING (-3)
#define META_MSGID_UNDEFINED (-4)
#define META_MSGID_GOT_BIND (-5)
typedef enum meta_search_candidate_t {
META_SEARCH_UNDEFINED = -2,

View File

@ -1403,8 +1403,8 @@ asyncmeta_dobind_init(Operation *op, SlapReply *rs, bm_context_t *bc, a_metaconn
meta_search_candidate_t retcode;
Debug( LDAP_DEBUG_TRACE, "%s >>> asyncmeta_dobind_init[%d]\n",
op->o_log_prefix, candidate );
Debug( LDAP_DEBUG_TRACE, "%s >>> asyncmeta_dobind_init[%d] msc %p\n",
op->o_log_prefix, candidate, msc );
if ( mc->mc_authz_target == META_BOUND_ALL ) {
return META_SEARCH_CANDIDATE;
@ -1573,6 +1573,19 @@ retry_bind:
Debug( asyncmeta_debug, "[%s] asyncmeta_dobind_init rc=%d msc: %p\n",
time_buf, rc, msc );
}
if ( LogTest( LDAP_DEBUG_TRACE )) {
ber_socket_t s;
char sockname[LUTIL_ADDRLEN];
struct berval sockbv = BER_BVC( sockname );
Sockaddr addr;
socklen_t len = sizeof( addr );
ldap_get_option( msc->msc_ld, LDAP_OPT_DESC, &s );
getsockname( s, &addr.sa_addr, &len );
lutil_sockaddrstr( &addr, &sockbv );
Debug( LDAP_DEBUG_TRACE, "%s asyncmeta_dobind_init msc %p ld %p ldr %p fd %d addr %s\n",
op->o_log_prefix, msc, msc->msc_ld, msc->msc_ldr, s, sockname );
}
if (rc == LDAP_SERVER_DOWN ) {
goto down;
@ -1670,6 +1683,9 @@ asyncmeta_dobind_init_with_retry(Operation *op, SlapReply *rs, bm_context_t *bc,
}
if ( LDAP_BACK_CONN_ISBOUND( msc ) || LDAP_BACK_CONN_ISANON( msc ) ) {
if ( mc->pending_ops > 1 ) {
asyncmeta_send_all_pending_ops( mc, candidate, op->o_threadctx, 1 );
}
return META_SEARCH_CANDIDATE;
}

View File

@ -642,7 +642,9 @@ asyncmeta_send_all_pending_ops(a_metaconn_t *mc, int candidate, void *ctx, int d
for (bc = LDAP_STAILQ_FIRST(&mc->mc_om_list); bc; bc = onext) {
meta_search_candidate_t ret;
onext = LDAP_STAILQ_NEXT(bc, bc_next);
if (bc->candidates[candidate].sr_msgid != META_MSGID_NEED_BIND || bc->bc_active > 0 || bc->op->o_abandon > 0) {
if (bc->candidates[candidate].sr_msgid == META_MSGID_NEED_BIND)
bc->candidates[candidate].sr_msgid = META_MSGID_GOT_BIND;
if (bc->candidates[candidate].sr_msgid != META_MSGID_GOT_BIND || bc->bc_active > 0 || bc->op->o_abandon > 0) {
continue;
}
bc->op->o_threadctx = ctx;
@ -1380,6 +1382,7 @@ asyncmeta_op_read_error(a_metaconn_t *mc, int candidate, int error, void* ctx)
Operation *op;
SlapReply *rs;
SlapReply *candidates;
/* no outstanding ops, nothing to do but log */
Debug( LDAP_DEBUG_TRACE,
"asyncmeta_op_read_error: ldr=%p, err=%d\n",
@ -1412,6 +1415,7 @@ asyncmeta_op_read_error(a_metaconn_t *mc, int candidate, int error, void* ctx)
}
if (bc->bc_active > 0) {
bc->bc_invalid = 1;
continue;
}
@ -1619,22 +1623,21 @@ retry_bc:
ldap_pvt_thread_mutex_lock( &mc->mc_om_mutex );
rc = --mc->mc_active;
ldap_pvt_thread_mutex_unlock( &mc->mc_om_mutex );
if (rc) {
i++;
ldap_pvt_thread_mutex_unlock( &mc->mc_om_mutex );
goto again;
}
slap_sl_mem_setctx(ctx, oldctx);
if (mc->mc_conns) {
ldap_pvt_thread_mutex_lock( &mc->mc_om_mutex );
for (i=0; i<ntargets; i++) {
if (!slapd_shutdown && !META_BACK_CONN_INVALID(msc)
&& mc->mc_conns[i].msc_ldr && mc->mc_conns[i].conn) {
connection_client_enable(mc->mc_conns[i].conn);
}
}
ldap_pvt_thread_mutex_unlock( &mc->mc_om_mutex );
}
ldap_pvt_thread_mutex_unlock( &mc->mc_om_mutex );
return NULL;
}

View File

@ -335,6 +335,7 @@ asyncmeta_back_search_start(
LDAPControl **ctrls = NULL;
BerElement *ber = NULL;
ber_int_t msgid;
ber_socket_t s = -1;
#ifdef SLAPD_META_CLIENT_PR
LDAPControl **save_ctrls = NULL;
#endif /* SLAPD_META_CLIENT_PR */
@ -572,7 +573,6 @@ done_pr:;
if (ber) {
struct timeval tv = {0, mt->mt_network_timeout*1000};
ber_socket_t s;
if (!( LDAP_BACK_CONN_ISBOUND( msc )
|| LDAP_BACK_CONN_ISANON( msc )) || msc->msc_ld == NULL ) {
@ -664,7 +664,7 @@ done:;
}
doreturn:;
Debug( LDAP_DEBUG_TRACE, "%s <<< asyncmeta_back_search_start[%p]=%d\n", op->o_log_prefix, msc, candidates[candidate].sr_msgid );
Debug( LDAP_DEBUG_TRACE, "%s <<< asyncmeta_back_search_start[%p] (fd %d)=%d\n", op->o_log_prefix, msc, s, candidates[candidate].sr_msgid );
return retcode;
}
@ -682,6 +682,7 @@ asyncmeta_back_search( Operation *op, SlapReply *rs )
a_metaconn_t *mc;
int msc_decr = 0;
int max_pending_ops = (mi->mi_max_pending_ops == 0) ? META_BACK_CFG_MAX_PENDING_OPS : mi->mi_max_pending_ops;
int check_bind = 0;
rs_assert_ready( rs );
rs->sr_flags &= ~REP_ENTRY_MASK; /* paranoia, we can set rs = non-entry */
@ -836,7 +837,8 @@ retry:
case META_SEARCH_NEED_BIND:
case META_SEARCH_BINDING:
Debug( LDAP_DEBUG_TRACE, "%s asyncmeta_back_search: BINDING "
"cnd=\"%ld\" %p\n", op->o_log_prefix, i , &mc->mc_conns[i]);
"cnd=\"%ld\" mc %p msc %p\n", op->o_log_prefix, i , mc, &mc->mc_conns[i]);
check_bind++;
ncandidates++;
/* Todo add the context to the message queue but do not send the request
the receiver must send this when we are done binding */
@ -915,6 +917,27 @@ retry:
send_ldap_result(op, rs);
goto finish;
}
/* If we were processing many targets the result from a pending Bind
* on an earlier target may have arrived while we were sending to a
* later target. See if we can now send our pending request.
*/
if ( check_bind ) {
for ( i = 0; i < mi->mi_ntargets; i++ ) {
if ( candidates[ i ].sr_msgid == META_MSGID_GOT_BIND ) {
rc = asyncmeta_back_search_start( op, rs, mc, bc, i, NULL, 0, 1 );
if ( rc == META_SEARCH_ERR ) {
META_CANDIDATE_CLEAR( &candidates[i] );
candidates[ i ].sr_msgid = META_MSGID_IGNORE;
if ( META_BACK_ONERR_STOP( mi ) ) {
asyncmeta_handle_onerr_stop(op,rs,mc,bc,i);
goto finish;
}
}
}
}
}
ldap_pvt_thread_mutex_lock( &mc->mc_om_mutex);
for ( i = 0; i < mi->mi_ntargets; i++ ) {
mc->mc_conns[i].msc_active--;